Merge branch 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block

author Linus Torvalds <[email protected]>

Thu, 13 Jan 2011 18:45:01 +0000 (10:45 -0800)

committer Linus Torvalds <[email protected]>

Thu, 13 Jan 2011 18:45:01 +0000 (10:45 -0800)
author Linus Torvalds <[email protected]>
Thu, 13 Jan 2011 18:45:01 +0000 (10:45 -0800)
committer Linus Torvalds <[email protected]>
Thu, 13 Jan 2011 18:45:01 +0000 (10:45 -0800)
diff --combined block/blk-merge.c

index 74bc4a768f32e0f01e5c43f100f8663eea34c3a4,00b7d31b38a2fa38817b80879ddeaf6c0ee3f3c2..ea85e20d5e9462be965f3f117bd7c3efcf51aa57
--- 1/block/blk-merge.c
--- 2/block/blk-merge.c
+++ b/block/blk-merge.c
@@@ -21,7 -21,7 +21,7 @@@ static unsigned int __blk_recalc_rq_seg
                 return 0;
   
         fbio = bio;
- -      cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+ +      cluster = blk_queue_cluster(q);
         seg_size = 0;
         nr_phys_segs = 0;
         for_each_bio(bio) {
@@@ -87,7 -87,7 +87,7 @@@ EXPORT_SYMBOL(blk_recount_segments)
   static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
                                    struct bio *nxt)
   {
- -      if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
+ +      if (!blk_queue_cluster(q))
                 return 0;
   
         if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
@@@ -123,7 -123,7 +123,7 @@@ int blk_rq_map_sg(struct request_queue 
         int nsegs, cluster;
   
         nsegs = 0;
- -      cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+ +      cluster = blk_queue_cluster(q);
   
         /*
          * for each bio in rq
@@@ -351,11 -351,12 +351,12 @@@ static void blk_account_io_merge(struc
                 int cpu;
   
                 cpu = part_stat_lock();
-               part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+               part = req->part;
   
                 part_round_stats(cpu, part);
                 part_dec_in_flight(part, rq_data_dir(req));
   
+               hd_struct_put(part);
                 part_stat_unlock();
         }
   }
diff --combined block/cfq-iosched.c

index 78ee4b1d4e854c100c604aa754b93410ef92dd62,f083bda3054647f3ca30cda9d66d9223fb8a500d..8427697c5437c2961633d1a7b7c10465a96f1fa2
--- 1/block/cfq-iosched.c
--- 2/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@@ -87,7 -87,6 +87,6 @@@ struct cfq_rb_root 
         unsigned count;
         unsigned total_weight;
         u64 min_vdisktime;
-       struct rb_node *active;
   };
   #define CFQ_RB_ROOT   (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
                         .count = 0, .min_vdisktime = 0, }
@@@ -97,7 -96,7 +96,7 @@@
    */
   struct cfq_queue {
         /* reference count */
-       atomic_t ref;
+       int ref;
         /* various state flags, see below */
         unsigned int flags;
         /* parent cfq_data */
@@@ -180,7 -179,6 +179,6 @@@ struct cfq_group 
         /* group service_tree key */
         u64 vdisktime;
         unsigned int weight;
-       bool on_st;
   
         /* number of cfqq currently on this group */
         int nr_cfqq;
@@@ -209,7 -207,7 +207,7 @@@
         struct blkio_group blkg;
   #ifdef CONFIG_CFQ_GROUP_IOSCHED
         struct hlist_node cfqd_node;
-       atomic_t ref;
+       int ref;
   #endif
         /* number of requests that are on the dispatch list or inside driver */
         int dispatched;
@@@ -563,11 -561,6 +561,6 @@@ static void update_min_vdisktime(struc
         u64 vdisktime = st->min_vdisktime;
         struct cfq_group *cfqg;
   
-       if (st->active) {
-               cfqg = rb_entry_cfqg(st->active);
-               vdisktime = cfqg->vdisktime;
-       }
- 
         if (st->left) {
                 cfqg = rb_entry_cfqg(st->left);
                 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
@@@ -646,11 -639,11 +639,11 @@@ cfq_set_prio_slice(struct cfq_data *cfq
   static inline bool cfq_slice_used(struct cfq_queue *cfqq)
   {
         if (cfq_cfqq_slice_new(cfqq))
-               return 0;
+               return false;
         if (time_before(jiffies, cfqq->slice_end))
-               return 0;
+               return false;
   
-       return 1;
+       return true;
   }
   
   /*
@@@ -869,7 -862,7 +862,7 @@@ cfq_group_service_tree_add(struct cfq_d
         struct rb_node *n;
   
         cfqg->nr_cfqq++;
-       if (cfqg->on_st)
+       if (!RB_EMPTY_NODE(&cfqg->rb_node))
                 return;
   
         /*
@@@ -885,7 -878,6 +878,6 @@@
                 cfqg->vdisktime = st->min_vdisktime;
   
         __cfq_group_service_tree_add(st, cfqg);
-       cfqg->on_st = true;
         st->total_weight += cfqg->weight;
   }
   
@@@ -894,9 -886,6 +886,6 @@@ cfq_group_service_tree_del(struct cfq_d
   {
         struct cfq_rb_root *st = &cfqd->grp_service_tree;
   
-       if (st->active == &cfqg->rb_node)
-               st->active = NULL;
- 
         BUG_ON(cfqg->nr_cfqq < 1);
         cfqg->nr_cfqq--;
   
@@@ -905,7 -894,6 +894,6 @@@
                 return;
   
         cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
-       cfqg->on_st = false;
         st->total_weight -= cfqg->weight;
         if (!RB_EMPTY_NODE(&cfqg->rb_node))
                 cfq_rb_erase(&cfqg->rb_node, st);
@@@ -1026,11 -1014,11 +1014,11 @@@ cfq_find_alloc_cfqg(struct cfq_data *cf
          * elevator which will be dropped by either elevator exit
          * or cgroup deletion path depending on who is exiting first.
          */
-       atomic_set(&cfqg->ref, 1);
+       cfqg->ref = 1;
   
         /*
          * Add group onto cgroup list. It might happen that bdi->dev is
- -       * not initiliazed yet. Initialize this new group without major
+ +       * not initialized yet. Initialize this new group without major
          * and minor info and this info will be filled in once a new thread
          * comes for IO. See code above.
          */
@@@ -1071,7 -1059,7 +1059,7 @@@ static struct cfq_group *cfq_get_cfqg(s
   
   static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
   {
-       atomic_inc(&cfqg->ref);
+       cfqg->ref++;
         return cfqg;
   }
   
@@@ -1083,7 -1071,7 +1071,7 @@@ static void cfq_link_cfqq_cfqg(struct c
   
         cfqq->cfqg = cfqg;
         /* cfqq reference on cfqg */
-       atomic_inc(&cfqq->cfqg->ref);
+       cfqq->cfqg->ref++;
   }
   
   static void cfq_put_cfqg(struct cfq_group *cfqg)
@@@ -1091,11 -1079,12 +1079,12 @@@
         struct cfq_rb_root *st;
         int i, j;
   
-       BUG_ON(atomic_read(&cfqg->ref) <= 0);
-       if (!atomic_dec_and_test(&cfqg->ref))
+       BUG_ON(cfqg->ref <= 0);
+       cfqg->ref--;
+       if (cfqg->ref)
                 return;
         for_each_cfqg_st(cfqg, i, j, st)
-               BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
+               BUG_ON(!RB_EMPTY_ROOT(&st->rb));
         kfree(cfqg);
   }
   
@@@ -1200,7 -1189,7 +1189,7 @@@ static void cfq_service_tree_add(struc
                         cfq_group_service_tree_del(cfqd, cfqq->cfqg);
                 cfqq->orig_cfqg = cfqq->cfqg;
                 cfqq->cfqg = &cfqd->root_group;
-               atomic_inc(&cfqd->root_group.ref);
+               cfqd->root_group.ref++;
                 group_changed = 1;
         } else if (!cfqd->cfq_group_isolation
                    && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
@@@ -1687,9 -1676,6 +1676,6 @@@ __cfq_slice_expired(struct cfq_data *cf
         if (cfqq == cfqd->active_queue)
                 cfqd->active_queue = NULL;
   
-       if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
-               cfqd->grp_service_tree.active = NULL;
- 
         if (cfqd->active_cic) {
                 put_io_context(cfqd->active_cic->ioc);
                 cfqd->active_cic = NULL;
@@@ -1901,10 -1887,10 +1887,10 @@@ static bool cfq_should_idle(struct cfq_
          * in their service tree.
          */
         if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
-               return 1;
+               return true;
         cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
                         service_tree->count);
-       return 0;
+       return false;
   }
   
   static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@@ -2040,7 -2026,7 +2026,7 @@@ static int cfqq_process_refs(struct cfq
         int process_refs, io_refs;
   
         io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
-       process_refs = atomic_read(&cfqq->ref) - io_refs;
+       process_refs = cfqq->ref - io_refs;
         BUG_ON(process_refs < 0);
         return process_refs;
   }
@@@ -2080,10 -2066,10 +2066,10 @@@ static void cfq_setup_merge(struct cfq_
          */
         if (new_process_refs >= process_refs) {
                 cfqq->new_cfqq = new_cfqq;
-               atomic_add(process_refs, &new_cfqq->ref);
+               new_cfqq->ref += process_refs;
         } else {
                 new_cfqq->new_cfqq = cfqq;
-               atomic_add(new_process_refs, &cfqq->ref);
+               cfqq->ref += new_process_refs;
         }
   }
   
@@@ -2116,12 -2102,7 +2102,7 @@@ static void choose_service_tree(struct 
         unsigned count;
         struct cfq_rb_root *st;
         unsigned group_slice;
- 
-       if (!cfqg) {
-               cfqd->serving_prio = IDLE_WORKLOAD;
-               cfqd->workload_expires = jiffies + 1;
-               return;
-       }
+       enum wl_prio_t original_prio = cfqd->serving_prio;
   
         /* Choose next priority. RT > BE > IDLE */
         if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@@ -2134,6 -2115,9 +2115,9 @@@
                 return;
         }
   
+       if (original_prio != cfqd->serving_prio)
+               goto new_workload;
+ 
         /*
          * For RT and BE, we have to choose also the type
          * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
@@@ -2148,6 -2132,7 +2132,7 @@@
         if (count && !time_after(jiffies, cfqd->workload_expires))
                 return;
   
+ new_workload:
         /* otherwise select new workload type */
         cfqd->serving_type =
                 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
@@@ -2199,7 -2184,6 +2184,6 @@@ static struct cfq_group *cfq_get_next_c
         if (RB_EMPTY_ROOT(&st->rb))
                 return NULL;
         cfqg = cfq_rb_first_group(st);
-       st->active = &cfqg->rb_node;
         update_min_vdisktime(st);
         return cfqg;
   }
@@@ -2293,6 -2277,17 +2277,17 @@@ static struct cfq_queue *cfq_select_que
                 goto keep_queue;
         }
   
+       /*
+        * This is a deep seek queue, but the device is much faster than
+        * the queue can deliver, don't idle
+        **/
+       if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
+           (cfq_cfqq_slice_new(cfqq) ||
+           (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+               cfq_clear_cfqq_deep(cfqq);
+               cfq_clear_cfqq_idle_window(cfqq);
+       }
+ 
         if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
                 cfqq = NULL;
                 goto keep_queue;
@@@ -2367,12 -2362,12 +2362,12 @@@ static inline bool cfq_slice_used_soon(
   {
         /* the queue hasn't finished any request, can't estimate */
         if (cfq_cfqq_slice_new(cfqq))
-               return 1;
+               return true;
         if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
                 cfqq->slice_end))
-               return 1;
+               return true;
   
-       return 0;
+       return false;
   }
   
   static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@@ -2538,9 -2533,10 +2533,10 @@@ static void cfq_put_queue(struct cfq_qu
         struct cfq_data *cfqd = cfqq->cfqd;
         struct cfq_group *cfqg, *orig_cfqg;
   
-       BUG_ON(atomic_read(&cfqq->ref) <= 0);
+       BUG_ON(cfqq->ref <= 0);
   
-       if (!atomic_dec_and_test(&cfqq->ref))
+       cfqq->ref--;
+       if (cfqq->ref)
                 return;
   
         cfq_log_cfqq(cfqd, cfqq, "put_queue");
@@@ -2843,7 -2839,7 +2839,7 @@@ static void cfq_init_cfqq(struct cfq_da
         RB_CLEAR_NODE(&cfqq->p_node);
         INIT_LIST_HEAD(&cfqq->fifo);
   
-       atomic_set(&cfqq->ref, 0);
+       cfqq->ref = 0;
         cfqq->cfqd = cfqd;
   
         cfq_mark_cfqq_prio_changed(cfqq);
@@@ -2979,11 -2975,11 +2975,11 @@@ cfq_get_queue(struct cfq_data *cfqd, bo
          * pin the queue now that it's allocated, scheduler exit will prune it
          */
         if (!is_sync && !(*async_cfqq)) {
-               atomic_inc(&cfqq->ref);
+               cfqq->ref++;
                 *async_cfqq = cfqq;
         }
   
-       atomic_inc(&cfqq->ref);
+       cfqq->ref++;
         return cfqq;
   }
   
@@@ -3265,6 -3261,10 +3261,10 @@@ cfq_should_preempt(struct cfq_data *cfq
         if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
                 return true;
   
+       /* An idle queue should not be idle now for some reason */
+       if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
+               return true;
+ 
         if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
                 return false;
   
@@@ -3681,13 -3681,13 +3681,13 @@@ new_queue
         }
   
         cfqq->allocated[rw]++;
-       atomic_inc(&cfqq->ref);
- 
-       spin_unlock_irqrestore(q->queue_lock, flags);
- 
+       cfqq->ref++;
         rq->elevator_private = cic;
         rq->elevator_private2 = cfqq;
         rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
+ 
+       spin_unlock_irqrestore(q->queue_lock, flags);
+ 
         return 0;
   
   queue_fail:
@@@ -3862,6 -3862,10 +3862,10 @@@ static void *cfq_init_queue(struct requ
         if (!cfqd)
                 return NULL;
   
+       /*
+        * Don't need take queue_lock in the routine, since we are
+        * initializing the ioscheduler, and nobody is using cfqd
+        */
         cfqd->cic_index = i;
   
         /* Init root service tree */
@@@ -3881,7 -3885,7 +3885,7 @@@
          * Take a reference to root group which we never drop. This is just
          * to make sure that cfq_put_cfqg() does not try to kfree root group
          */
-       atomic_set(&cfqg->ref, 1);
+       cfqg->ref = 1;
         rcu_read_lock();
         cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
                                         (void *)cfqd, 0);
@@@ -3901,7 -3905,7 +3905,7 @@@
          * will not attempt to free it.
          */
         cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
-       atomic_inc(&cfqd->oom_cfqq.ref);
+       cfqd->oom_cfqq.ref++;
         cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
   
         INIT_LIST_HEAD(&cfqd->cic_list);
diff --combined block/ioctl.c

index a9a302eba01e6a78b46e3af154c4f1b2a1c2c510,fefa9a4967083208ff880d5a6b31139042661827..9049d460fa890fd45e3e31c1aa8ca9ed080e9f81
--- 1/block/ioctl.c
--- 2/block/ioctl.c
+++ b/block/ioctl.c
@@@ -5,6 -5,7 +5,6 @@@
   #include <linux/hdreg.h>
   #include <linux/backing-dev.h>
   #include <linux/buffer_head.h>
- -#include <linux/smp_lock.h>
   #include <linux/blktrace_api.h>
   #include <asm/uaccess.h>
   
@@@ -294,11 -295,12 +294,12 @@@ int blkdev_ioctl(struct block_device *b
                         return -EINVAL;
                 if (get_user(n, (int __user *) arg))
                         return -EFAULT;
-               if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0)
+               if (!(mode & FMODE_EXCL) &&
+                   blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
                         return -EBUSY;
                 ret = set_blocksize(bdev, n);
                 if (!(mode & FMODE_EXCL))
-                       bd_release(bdev);
+                       blkdev_put(bdev, mode | FMODE_EXCL);
                 return ret;
         case BLKPG:
                 ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
diff --combined drivers/md/dm-table.c

index 4d705cea0f8c74c4010e7fe6fbf8d77288f49c36,67150c32986ccf0dde724701c6037160c4e5f5a6..985c20a4f30e14c5a0c00d6749a3b7bcf79e9f6a
--- 1/drivers/md/dm-table.c
--- 2/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@@ -325,15 -325,18 +325,18 @@@ static int open_dev(struct dm_dev_inter
   
         BUG_ON(d->dm_dev.bdev);
   
-       bdev = open_by_devnum(dev, d->dm_dev.mode);
+       bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
         if (IS_ERR(bdev))
                 return PTR_ERR(bdev);
-       r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
-       if (r)
-               blkdev_put(bdev, d->dm_dev.mode);
-       else
-               d->dm_dev.bdev = bdev;
-       return r;
+ 
+       r = bd_link_disk_holder(bdev, dm_disk(md));
+       if (r) {
+               blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
+               return r;
+       }
+ 
+       d->dm_dev.bdev = bdev;
+       return 0;
   }
   
   /*
@@@ -344,8 -347,7 +347,7 @@@ static void close_dev(struct dm_dev_int
         if (!d->dm_dev.bdev)
                 return;
   
-       bd_release_from_disk(d->dm_dev.bdev, dm_disk(md));
-       blkdev_put(d->dm_dev.bdev, d->dm_dev.mode);
+       blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
         d->dm_dev.bdev = NULL;
   }
   
@@@ -517,8 -519,9 +519,8 @@@ int dm_set_device_limits(struct dm_targ
          */
   
         if (q->merge_bvec_fn && !ti->type->merge)
- -              limits->max_sectors =
- -                      min_not_zero(limits->max_sectors,
- -                                   (unsigned int) (PAGE_SIZE >> 9));
+ +              blk_limits_max_hw_sectors(limits,
+ +                                        (unsigned int) (PAGE_SIZE >> 9));
         return 0;
   }
   EXPORT_SYMBOL_GPL(dm_set_device_limits);
@@@ -1130,6 -1133,11 +1132,6 @@@ void dm_table_set_restrictions(struct d
          */
         q->limits = *limits;
   
- -      if (limits->no_cluster)
- -              queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
- -      else
- -              queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
- -
         if (!dm_table_supports_discards(t))
                 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
         else
diff --combined drivers/md/md.c

index 175c424f201f33caa0c50525a41b7f8fc3569387,3bacccab1b8ccc250aefc032bbc6b430e85ee081..7fc090ac9e28283c54cd897bfca21ab54be890b4
--- 1/drivers/md/md.c
--- 2/drivers/md/md.c
+++ b/drivers/md/md.c
@@@ -371,15 -371,10 +371,15 @@@ static void md_end_flush(struct bio *bi
         bio_put(bio);
   }
   
- -static void submit_flushes(mddev_t *mddev)
+ +static void md_submit_flush_data(struct work_struct *ws);
+ +
+ +static void submit_flushes(struct work_struct *ws)
   {
+ +      mddev_t *mddev = container_of(ws, mddev_t, flush_work);
         mdk_rdev_t *rdev;
   
+ +      INIT_WORK(&mddev->flush_work, md_submit_flush_data);
+ +      atomic_set(&mddev->flush_pending, 1);
         rcu_read_lock();
         list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
                 if (rdev->raid_disk >= 0 &&
@@@ -402,8 -397,6 +402,8 @@@
                         rdev_dec_pending(rdev, mddev);
                 }
         rcu_read_unlock();
+ +      if (atomic_dec_and_test(&mddev->flush_pending))
+ +              queue_work(md_wq, &mddev->flush_work);
   }
   
   static void md_submit_flush_data(struct work_struct *ws)
@@@ -411,6 -404,8 +411,6 @@@
         mddev_t *mddev = container_of(ws, mddev_t, flush_work);
         struct bio *bio = mddev->flush_bio;
   
- -      atomic_set(&mddev->flush_pending, 1);
- -
         if (bio->bi_size == 0)
                 /* an empty barrier - all done */
                 bio_endio(bio, 0);
@@@ -419,9 -414,10 +419,9 @@@
                 if (mddev->pers->make_request(mddev, bio))
                         generic_make_request(bio);
         }
- -      if (atomic_dec_and_test(&mddev->flush_pending)) {
- -              mddev->flush_bio = NULL;
- -              wake_up(&mddev->sb_wait);
- -      }
+ +
+ +      mddev->flush_bio = NULL;
+ +      wake_up(&mddev->sb_wait);
   }
   
   void md_flush_request(mddev_t *mddev, struct bio *bio)
@@@ -433,8 -429,13 +433,8 @@@
         mddev->flush_bio = bio;
         spin_unlock_irq(&mddev->write_lock);
   
- -      atomic_set(&mddev->flush_pending, 1);
- -      INIT_WORK(&mddev->flush_work, md_submit_flush_data);
- -
- -      submit_flushes(mddev);
- -
- -      if (atomic_dec_and_test(&mddev->flush_pending))
- -              queue_work(md_wq, &mddev->flush_work);
+ +      INIT_WORK(&mddev->flush_work, submit_flushes);
+ +      queue_work(md_wq, &mddev->flush_work);
   }
   EXPORT_SYMBOL(md_flush_request);
   
@@@ -1336,7 -1337,7 +1336,7 @@@ super_90_rdev_size_change(mdk_rdev_t *r
         md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
                        rdev->sb_page);
         md_super_wait(rdev->mddev);
- -      return num_sectors / 2; /* kB for sysfs */
+ +      return num_sectors;
   }
   
   
@@@ -1703,7 -1704,7 +1703,7 @@@ super_1_rdev_size_change(mdk_rdev_t *rd
         md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
                        rdev->sb_page);
         md_super_wait(rdev->mddev);
- -      return num_sectors / 2; /* kB for sysfs */
+ +      return num_sectors;
   }
   
   static struct super_type super_types[] = {
@@@ -1879,7 -1880,7 +1879,7 @@@ static int bind_rdev_to_array(mdk_rdev_
         rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
   
         list_add_rcu(&rdev->same_set, &mddev->disks);
-       bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+       bd_link_disk_holder(rdev->bdev, mddev->gendisk);
   
         /* May as well allow recovery to be retried once */
         mddev->recovery_disabled = 0;
@@@ -1906,7 -1907,6 +1906,6 @@@ static void unbind_rdev_from_array(mdk_
                 MD_BUG();
                 return;
         }
-       bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
         list_del_rcu(&rdev->same_set);
         printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
         rdev->mddev = NULL;
@@@ -1934,19 -1934,13 +1933,13 @@@ static int lock_rdev(mdk_rdev_t *rdev, 
         struct block_device *bdev;
         char b[BDEVNAME_SIZE];
   
-       bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+       bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+                                shared ? (mdk_rdev_t *)lock_rdev : rdev);
         if (IS_ERR(bdev)) {
                 printk(KERN_ERR "md: could not open %s.\n",
                         __bdevname(dev, b));
                 return PTR_ERR(bdev);
         }
-       err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
-       if (err) {
-               printk(KERN_ERR "md: could not bd_claim %s.\n",
-                       bdevname(bdev, b));
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
-               return err;
-       }
         if (!shared)
                 set_bit(AllReserved, &rdev->flags);
         rdev->bdev = bdev;
@@@ -1959,8 -1953,7 +1952,7 @@@ static void unlock_rdev(mdk_rdev_t *rde
         rdev->bdev = NULL;
         if (!bdev)
                 MD_BUG();
-       bd_release(bdev);
-       blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+       blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
   }
   
   void md_autodetect_dev(dev_t dev);
@@@ -4295,6 -4288,9 +4287,6 @@@ static int md_alloc(dev_t dev, char *na
                 goto abort;
         mddev->queue->queuedata = mddev;
   
- -      /* Can be unlocked because the queue is new: no concurrency */
- -      queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
- -
         blk_queue_make_request(mddev->queue, md_make_request);
   
         disk = alloc_disk(1 << shift);
@@@ -4334,8 -4330,6 +4326,8 @@@
         if (mddev->kobj.sd &&
             sysfs_create_group(&mddev->kobj, &md_bitmap_group))
                 printk(KERN_DEBUG "pointless warning\n");
+ +
+ +      blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
    abort:
         mutex_unlock(&disks_mutex);
         if (!error && mddev->kobj.sd) {
@@@ -5156,7 -5150,7 +5148,7 @@@ static int add_new_disk(mddev_t * mddev
                                 PTR_ERR(rdev));
                         return PTR_ERR(rdev);
                 }
- -              /* set save_raid_disk if appropriate */
+ +              /* set saved_raid_disk if appropriate */
                 if (!mddev->persistent) {
                         if (info->state & (1<<MD_DISK_SYNC)  &&
                             info->raid_disk < mddev->raid_disks)
@@@ -5166,10 -5160,7 +5158,10 @@@
                 } else
                         super_types[mddev->major_version].
                                 validate_super(mddev, rdev);
- -              rdev->saved_raid_disk = rdev->raid_disk;
+ +              if (test_bit(In_sync, &rdev->flags))
+ +                      rdev->saved_raid_disk = rdev->raid_disk;
+ +              else
+ +                      rdev->saved_raid_disk = -1;
   
                 clear_bit(In_sync, &rdev->flags); /* just to be sure */
                 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
@@@ -6041,8 -6032,9 +6033,8 @@@ static int md_thread(void * arg
                          || kthread_should_stop(),
                          thread->timeout);
   
- -              clear_bit(THREAD_WAKEUP, &thread->flags);
- -
- -              thread->run(thread->mddev);
+ +              if (test_and_clear_bit(THREAD_WAKEUP, &thread->flags))
+ +                      thread->run(thread->mddev);
         }
   
         return 0;
diff --combined drivers/scsi/scsi_lib.c

index 501f67bef719930b527a1ad89d9aff0a2e9d0bfc,13bf89145b18b883d3b31bb855a36e743d46b9e0..9045c52abd25798994caf0e63f7fb4c8700446a8
--- 1/drivers/scsi/scsi_lib.c
--- 2/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@@ -1278,10 -1278,11 +1278,10 @@@ static inline int scsi_target_queue_rea
         }
   
         if (scsi_target_is_busy(starget)) {
- -              if (list_empty(&sdev->starved_entry)) {
+ +              if (list_empty(&sdev->starved_entry))
                         list_add_tail(&sdev->starved_entry,
                                       &shost->starved_list);
- -                      return 0;
- -              }
+ +              return 0;
         }
   
         /* We're OK to process the command, so we can't be starved */
@@@ -1402,6 -1403,11 +1402,6 @@@ static void scsi_softirq_done(struct re
   
         INIT_LIST_HEAD(&cmd->eh_entry);
   
- -      /*
- -       * Set the serial numbers back to zero
- -       */
- -      cmd->serial_number = 0;
- -
         atomic_inc(&cmd->device->iodone_cnt);
         if (cmd->result)
                 atomic_inc(&cmd->device->ioerr_cnt);
@@@ -1636,8 -1642,9 +1636,8 @@@ struct request_queue *__scsi_alloc_queu
   
         blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
   
- -      /* New queue, no concurrency on queue_flags */
         if (!shost->use_clustering)
- -              queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
+ +              q->limits.cluster = 0;
   
         /*
          * set a reasonable default alignment on word boundaries: the
@@@ -1977,8 -1984,7 +1977,7 @@@ EXPORT_SYMBOL(scsi_mode_sense)
    *            in.
    *
    *    Returns zero if unsuccessful or an error if TUR failed.  For
-  *    removable media, a return of NOT_READY or UNIT_ATTENTION is
-  *    translated to success, with the ->changed flag updated.
+  *    removable media, UNIT_ATTENTION sets ->changed flag.
    **/
   int
   scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
@@@ -2005,16 -2011,6 +2004,6 @@@
         } while (scsi_sense_valid(sshdr) &&
                  sshdr->sense_key == UNIT_ATTENTION && --retries);
   
-       if (!sshdr)
-               /* could not allocate sense buffer, so can't process it */
-               return result;
- 
-       if (sdev->removable && scsi_sense_valid(sshdr) &&
-           (sshdr->sense_key == UNIT_ATTENTION ||
-            sshdr->sense_key == NOT_READY)) {
-               sdev->changed = 1;
-               result = 0;
-       }
         if (!sshdr_external)
                 kfree(sshdr);
         return result;
diff --combined drivers/scsi/sd.c

index 365024b0c40770689061c8042a7625b18de750db,8d488a9fef008453c4632993d15503f0cb1f2075..b65e65aa07eb12ab1d2c8006a13308442f644e20
--- 1/drivers/scsi/sd.c
--- 2/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@@ -46,6 -46,7 +46,6 @@@
   #include <linux/blkdev.h>
   #include <linux/blkpg.h>
   #include <linux/delay.h>
- -#include <linux/smp_lock.h>
   #include <linux/mutex.h>
   #include <linux/string_helpers.h>
   #include <linux/async.h>
@@@ -583,7 -584,7 +583,7 @@@ static int sd_prep_fn(struct request_qu
                  * quietly refuse to do anything to a changed disc until 
                  * the changed bit has been reset
                  */
- -              /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */
+ +              /* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */
                 goto out;
         }
   
@@@ -1023,6 -1024,7 +1023,6 @@@ static int sd_media_changed(struct gend
          */
         if (!scsi_device_online(sdp)) {
                 set_media_not_present(sdkp);
- -              retval = 1;
                 goto out;
         }
   
@@@ -1043,16 -1045,9 +1043,8 @@@
                                               sshdr);
         }
   
-       /*
-        * Unable to test, unit probably not ready.   This usually
-        * means there is no disc in the drive.  Mark as changed,
-        * and we will figure it out later once the drive is
-        * available again.
-        */
-       if (retval || (scsi_sense_valid(sshdr) &&
-                      /* 0x3a is medium not present */
-                      sshdr->asc == 0x3a)) {
+       if (retval) {
                 set_media_not_present(sdkp);
- -              retval = 1;
                 goto out;
         }
   
@@@ -1063,27 -1058,12 +1055,27 @@@
          */
         sdkp->media_present = 1;
   
- -      retval = sdp->changed;
- -      sdp->changed = 0;
   out:
- -      if (retval != sdkp->previous_state)
+ +      /*
+ +       * Report a media change under the following conditions:
+ +       *
+ +       *      Medium is present now and wasn't present before.
+ +       *      Medium wasn't present before and is present now.
+ +       *      Medium was present at all times, but it changed while
+ +       *              we weren't looking (sdp->changed is set).
+ +       *
+ +       * If there was no medium before and there is no medium now then
+ +       * don't report a change, even if a medium was inserted and removed
+ +       * while we weren't looking.
+ +       */
+ +      retval = (sdkp->media_present != sdkp->previous_state ||
+ +                      (sdkp->media_present && sdp->changed));
+ +      if (retval)
                 sdev_evt_send_simple(sdp, SDEV_EVT_MEDIA_CHANGE, GFP_KERNEL);
- -      sdkp->previous_state = retval;
+ +      sdkp->previous_state = sdkp->media_present;
+ +
+ +      /* sdp->changed indicates medium was changed or is not present */
+ +      sdp->changed = !sdkp->media_present;
         kfree(sshdr);
         return retval;
   }
@@@ -1188,12 -1168,6 +1180,12 @@@ static unsigned int sd_completed_bytes(
         u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512);
         u64 bad_lba;
         int info_valid;
+ +      /*
+ +       * resid is optional but mostly filled in.  When it's unused,
+ +       * its value is zero, so we assume the whole buffer transferred
+ +       */
+ +      unsigned int transferred = scsi_bufflen(scmd) - scsi_get_resid(scmd);
+ +      unsigned int good_bytes;
   
         if (scmd->request->cmd_type != REQ_TYPE_FS)
                 return 0;
@@@ -1227,8 -1201,7 +1219,8 @@@
         /* This computation should always be done in terms of
          * the resolution of the device's medium.
          */
- -      return (bad_lba - start_lba) * scmd->device->sector_size;
+ +      good_bytes = (bad_lba - start_lba) * scmd->device->sector_size;
+ +      return min(good_bytes, transferred);
   }
   
   /**
@@@ -1922,14 -1895,10 +1914,14 @@@ sd_read_cache_type(struct scsi_disk *sd
         int old_rcd = sdkp->RCD;
         int old_dpofua = sdkp->DPOFUA;
   
- -      if (sdp->skip_ms_page_8)
- -              goto defaults;
- -
- -      if (sdp->type == TYPE_RBC) {
+ +      if (sdp->skip_ms_page_8) {
+ +              if (sdp->type == TYPE_RBC)
+ +                      goto defaults;
+ +              else {
+ +                      modepage = 0x3F;
+ +                      dbd = 0;
+ +              }
+ +      } else if (sdp->type == TYPE_RBC) {
                 modepage = 6;
                 dbd = 8;
         } else {
@@@ -1957,11 -1926,13 +1949,11 @@@
          */
         if (len < 3)
                 goto bad_sense;
- -      if (len > 20)
- -              len = 20;
- -
- -      /* Take headers and block descriptors into account */
- -      len += data.header_length + data.block_descriptor_length;
- -      if (len > SD_BUF_SIZE)
- -              goto bad_sense;
+ +      else if (len > SD_BUF_SIZE) {
+ +              sd_printk(KERN_NOTICE, sdkp, "Truncating mode parameter "
+ +                        "data from %d to %d bytes\n", len, SD_BUF_SIZE);
+ +              len = SD_BUF_SIZE;
+ +      }
   
         /* Get the data */
         res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr);
@@@ -1969,45 -1940,16 +1961,45 @@@
         if (scsi_status_is_good(res)) {
                 int offset = data.header_length + data.block_descriptor_length;
   
- -              if (offset >= SD_BUF_SIZE - 2) {
- -                      sd_printk(KERN_ERR, sdkp, "Malformed MODE SENSE response\n");
- -                      goto defaults;
+ +              while (offset < len) {
+ +                      u8 page_code = buffer[offset] & 0x3F;
+ +                      u8 spf       = buffer[offset] & 0x40;
+ +
+ +                      if (page_code == 8 || page_code == 6) {
+ +                              /* We're interested only in the first 3 bytes.
+ +                               */
+ +                              if (len - offset <= 2) {
+ +                                      sd_printk(KERN_ERR, sdkp, "Incomplete "
+ +                                                "mode parameter data\n");
+ +                                      goto defaults;
+ +                              } else {
+ +                                      modepage = page_code;
+ +                                      goto Page_found;
+ +                              }
+ +                      } else {
+ +                              /* Go to the next page */
+ +                              if (spf && len - offset > 3)
+ +                                      offset += 4 + (buffer[offset+2] << 8) +
+ +                                              buffer[offset+3];
+ +                              else if (!spf && len - offset > 1)
+ +                                      offset += 2 + buffer[offset+1];
+ +                              else {
+ +                                      sd_printk(KERN_ERR, sdkp, "Incomplete "
+ +                                                "mode parameter data\n");
+ +                                      goto defaults;
+ +                              }
+ +                      }
                 }
   
- -              if ((buffer[offset] & 0x3f) != modepage) {
+ +              if (modepage == 0x3F) {
+ +                      sd_printk(KERN_ERR, sdkp, "No Caching mode page "
+ +                                "present\n");
+ +                      goto defaults;
+ +              } else if ((buffer[offset] & 0x3f) != modepage) {
                         sd_printk(KERN_ERR, sdkp, "Got wrong page\n");
                         goto defaults;
                 }
- -
+ +      Page_found:
                 if (modepage == 8) {
                         sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0);
                         sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0);
diff --combined fs/block_dev.c

index 88da70355aa350b8c21c1e35d2a915f4e5e63dce,6017389711eec8d620d4e132591bcce068a68aa4..fe3f59c14a02bb523aaaffc4f668b785fed06f01
--- 1/fs/block_dev.c
--- 2/fs/block_dev.c
+++ b/fs/block_dev.c
@@@ -11,6 -11,7 +11,6 @@@
   #include <linux/slab.h>
   #include <linux/kmod.h>
   #include <linux/major.h>
- -#include <linux/smp_lock.h>
   #include <linux/device_cgroup.h>
   #include <linux/highmem.h>
   #include <linux/blkdev.h>
@@@ -409,20 -410,13 +409,20 @@@ static struct inode *bdev_alloc_inode(s
         return &ei->vfs_inode;
   }
   
- -static void bdev_destroy_inode(struct inode *inode)
+ +static void bdev_i_callback(struct rcu_head *head)
   {
+ +      struct inode *inode = container_of(head, struct inode, i_rcu);
         struct bdev_inode *bdi = BDEV_I(inode);
   
+ +      INIT_LIST_HEAD(&inode->i_dentry);
         kmem_cache_free(bdev_cachep, bdi);
   }
   
+ +static void bdev_destroy_inode(struct inode *inode)
+ +{
+ +      call_rcu(&inode->i_rcu, bdev_i_callback);
+ +}
+ +
   static void init_once(void *foo)
   {
         struct bdev_inode *ei = (struct bdev_inode *) foo;
@@@ -432,9 -426,6 +432,6 @@@
         mutex_init(&bdev->bd_mutex);
         INIT_LIST_HEAD(&bdev->bd_inodes);
         INIT_LIST_HEAD(&bdev->bd_list);
- #ifdef CONFIG_SYSFS
-       INIT_LIST_HEAD(&bdev->bd_holder_list);
- #endif
         inode_init_once(&ei->vfs_inode);
         /* Initialize mutex for freeze. */
         mutex_init(&bdev->bd_fsfreeze_mutex);
@@@ -473,7 -464,7 +470,7 @@@ static const struct super_operations bd
   static struct dentry *bd_mount(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data)
   {
- -      return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
+ +      return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
   }
   
   static struct file_system_type bd_type = {
@@@ -669,7 -660,7 +666,7 @@@ static bool bd_may_claim(struct block_d
         else if (bdev->bd_contains == bdev)
                 return true;     /* is a whole device which isn't held */
   
-       else if (whole->bd_holder == bd_claim)
+       else if (whole->bd_holder == bd_may_claim)
                 return true;     /* is a partition of a device that is being partitioned */
         else if (whole->bd_holder != NULL)
                 return false;    /* is a partition of a held device */
@@@ -781,439 -772,87 +778,87 @@@ static struct block_device *bd_start_cl
         }
   }
   
- /* releases bdev_lock */
- static void __bd_abort_claiming(struct block_device *whole, void *holder)
- {
-       BUG_ON(whole->bd_claiming != holder);
-       whole->bd_claiming = NULL;
-       wake_up_bit(&whole->bd_claiming, 0);
- 
-       spin_unlock(&bdev_lock);
-       bdput(whole);
- }
- 
- /**
-  * bd_abort_claiming - abort claiming a block device
-  * @whole: whole block device returned by bd_start_claiming()
-  * @holder: holder trying to claim @bdev
-  *
-  * Abort a claiming block started by bd_start_claiming().  Note that
-  * @whole is not the block device to be claimed but the whole device
-  * returned by bd_start_claiming().
-  *
-  * CONTEXT:
-  * Grabs and releases bdev_lock.
-  */
- static void bd_abort_claiming(struct block_device *whole, void *holder)
- {
-       spin_lock(&bdev_lock);
-       __bd_abort_claiming(whole, holder);             /* releases bdev_lock */
- }
- 
- /* increment holders when we have a legitimate claim. requires bdev_lock */
- static void __bd_claim(struct block_device *bdev, struct block_device *whole,
-                                       void *holder)
- {
-       /* note that for a whole device bd_holders
-        * will be incremented twice, and bd_holder will
-        * be set to bd_claim before being set to holder
-        */
-       whole->bd_holders++;
-       whole->bd_holder = bd_claim;
-       bdev->bd_holders++;
-       bdev->bd_holder = holder;
- }
- 
- /**
-  * bd_finish_claiming - finish claiming a block device
-  * @bdev: block device of interest (passed to bd_start_claiming())
-  * @whole: whole block device returned by bd_start_claiming()
-  * @holder: holder trying to claim @bdev
-  *
-  * Finish a claiming block started by bd_start_claiming().
-  *
-  * CONTEXT:
-  * Grabs and releases bdev_lock.
-  */
- static void bd_finish_claiming(struct block_device *bdev,
-                               struct block_device *whole, void *holder)
- {
-       spin_lock(&bdev_lock);
-       BUG_ON(!bd_may_claim(bdev, whole, holder));
-       __bd_claim(bdev, whole, holder);
-       __bd_abort_claiming(whole, holder); /* not actually an abort */
- }
- 
- /**
-  * bd_claim - claim a block device
-  * @bdev: block device to claim
-  * @holder: holder trying to claim @bdev
-  *
-  * Try to claim @bdev which must have been opened successfully.
-  *
-  * CONTEXT:
-  * Might sleep.
-  *
-  * RETURNS:
-  * 0 if successful, -EBUSY if @bdev is already claimed.
-  */
- int bd_claim(struct block_device *bdev, void *holder)
- {
-       struct block_device *whole = bdev->bd_contains;
-       int res;
- 
-       might_sleep();
- 
-       spin_lock(&bdev_lock);
-       res = bd_prepare_to_claim(bdev, whole, holder);
-       if (res == 0)
-               __bd_claim(bdev, whole, holder);
-       spin_unlock(&bdev_lock);
- 
-       return res;
- }
- EXPORT_SYMBOL(bd_claim);
- 
- void bd_release(struct block_device *bdev)
- {
-       spin_lock(&bdev_lock);
-       if (!--bdev->bd_contains->bd_holders)
-               bdev->bd_contains->bd_holder = NULL;
-       if (!--bdev->bd_holders)
-               bdev->bd_holder = NULL;
-       spin_unlock(&bdev_lock);
- }
- 
- EXPORT_SYMBOL(bd_release);
- 
   #ifdef CONFIG_SYSFS
- /*
-  * Functions for bd_claim_by_kobject / bd_release_from_kobject
-  *
-  *     If a kobject is passed to bd_claim_by_kobject()
-  *     and the kobject has a parent directory,
-  *     following symlinks are created:
-  *        o from the kobject to the claimed bdev
-  *        o from "holders" directory of the bdev to the parent of the kobject
-  *     bd_release_from_kobject() removes these symlinks.
-  *
-  *     Example:
-  *        If /dev/dm-0 maps to /dev/sda, kobject corresponding to
-  *        /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
-  *           /sys/block/dm-0/slaves/sda --> /sys/block/sda
-  *           /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
-  */
- 
   static int add_symlink(struct kobject *from, struct kobject *to)
   {
-       if (!from || !to)
-               return 0;
         return sysfs_create_link(from, to, kobject_name(to));
   }
   
   static void del_symlink(struct kobject *from, struct kobject *to)
   {
-       if (!from || !to)
-               return;
         sysfs_remove_link(from, kobject_name(to));
   }
   
- /*
-  * 'struct bd_holder' contains pointers to kobjects symlinked by
-  * bd_claim_by_kobject.
-  * It's connected to bd_holder_list which is protected by bdev->bd_sem.
-  */
- struct bd_holder {
-       struct list_head list;  /* chain of holders of the bdev */
-       int count;              /* references from the holder */
-       struct kobject *sdir;   /* holder object, e.g. "/block/dm-0/slaves" */
-       struct kobject *hdev;   /* e.g. "/block/dm-0" */
-       struct kobject *hdir;   /* e.g. "/block/sda/holders" */
-       struct kobject *sdev;   /* e.g. "/block/sda" */
- };
- 
- /*
-  * Get references of related kobjects at once.
-  * Returns 1 on success. 0 on failure.
-  *
-  * Should call bd_holder_release_dirs() after successful use.
-  */
- static int bd_holder_grab_dirs(struct block_device *bdev,
-                       struct bd_holder *bo)
- {
-       if (!bdev || !bo)
-               return 0;
- 
-       bo->sdir = kobject_get(bo->sdir);
-       if (!bo->sdir)
-               return 0;
- 
-       bo->hdev = kobject_get(bo->sdir->parent);
-       if (!bo->hdev)
-               goto fail_put_sdir;
- 
-       bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
-       if (!bo->sdev)
-               goto fail_put_hdev;
- 
-       bo->hdir = kobject_get(bdev->bd_part->holder_dir);
-       if (!bo->hdir)
-               goto fail_put_sdev;
- 
-       return 1;
- 
- fail_put_sdev:
-       kobject_put(bo->sdev);
- fail_put_hdev:
-       kobject_put(bo->hdev);
- fail_put_sdir:
-       kobject_put(bo->sdir);
- 
-       return 0;
- }
- 
- /* Put references of related kobjects at once. */
- static void bd_holder_release_dirs(struct bd_holder *bo)
- {
-       kobject_put(bo->hdir);
-       kobject_put(bo->sdev);
-       kobject_put(bo->hdev);
-       kobject_put(bo->sdir);
- }
- 
- static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
- {
-       struct bd_holder *bo;
- 
-       bo = kzalloc(sizeof(*bo), GFP_KERNEL);
-       if (!bo)
-               return NULL;
- 
-       bo->count = 1;
-       bo->sdir = kobj;
- 
-       return bo;
- }
- 
- static void free_bd_holder(struct bd_holder *bo)
- {
-       kfree(bo);
- }
- 
   /**
-  * find_bd_holder - find matching struct bd_holder from the block device
+  * bd_link_disk_holder - create symlinks between holding disk and slave bdev
+  * @bdev: the claimed slave bdev
+  * @disk: the holding disk
    *
-  * @bdev:     struct block device to be searched
-  * @bo:               target struct bd_holder
-  *
-  * Returns matching entry with @bo in @bdev->bd_holder_list.
-  * If found, increment the reference count and return the pointer.
-  * If not found, returns NULL.
-  */
- static struct bd_holder *find_bd_holder(struct block_device *bdev,
-                                       struct bd_holder *bo)
- {
-       struct bd_holder *tmp;
- 
-       list_for_each_entry(tmp, &bdev->bd_holder_list, list)
-               if (tmp->sdir == bo->sdir) {
-                       tmp->count++;
-                       return tmp;
-               }
- 
-       return NULL;
- }
- 
- /**
-  * add_bd_holder - create sysfs symlinks for bd_claim() relationship
-  *
-  * @bdev:     block device to be bd_claimed
-  * @bo:               preallocated and initialized by alloc_bd_holder()
-  *
-  * Add @bo to @bdev->bd_holder_list, create symlinks.
-  *
-  * Returns 0 if symlinks are created.
-  * Returns -ve if something fails.
-  */
- static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
- {
-       int err;
- 
-       if (!bo)
-               return -EINVAL;
- 
-       if (!bd_holder_grab_dirs(bdev, bo))
-               return -EBUSY;
- 
-       err = add_symlink(bo->sdir, bo->sdev);
-       if (err)
-               return err;
- 
-       err = add_symlink(bo->hdir, bo->hdev);
-       if (err) {
-               del_symlink(bo->sdir, bo->sdev);
-               return err;
-       }
- 
-       list_add_tail(&bo->list, &bdev->bd_holder_list);
-       return 0;
- }
- 
- /**
-  * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
+  * This functions creates the following sysfs symlinks.
    *
-  * @bdev:     block device to be bd_claimed
-  * @kobj:     holder's kobject
+  * - from "slaves" directory of the holder @disk to the claimed @bdev
+  * - from "holders" directory of the @bdev to the holder @disk
    *
-  * If there is matching entry with @kobj in @bdev->bd_holder_list
-  * and no other bd_claim() from the same kobject,
-  * remove the struct bd_holder from the list, delete symlinks for it.
+  * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
+  * passed to bd_link_disk_holder(), then:
    *
-  * Returns a pointer to the struct bd_holder when it's removed from the list
-  * and ready to be freed.
-  * Returns NULL if matching claim isn't found or there is other bd_claim()
-  * by the same kobject.
-  */
- static struct bd_holder *del_bd_holder(struct block_device *bdev,
-                                       struct kobject *kobj)
- {
-       struct bd_holder *bo;
- 
-       list_for_each_entry(bo, &bdev->bd_holder_list, list) {
-               if (bo->sdir == kobj) {
-                       bo->count--;
-                       BUG_ON(bo->count < 0);
-                       if (!bo->count) {
-                               list_del(&bo->list);
-                               del_symlink(bo->sdir, bo->sdev);
-                               del_symlink(bo->hdir, bo->hdev);
-                               bd_holder_release_dirs(bo);
-                               return bo;
-                       }
-                       break;
-               }
-       }
- 
-       return NULL;
- }
- 
- /**
-  * bd_claim_by_kobject - bd_claim() with additional kobject signature
+  *   /sys/block/dm-0/slaves/sda --> /sys/block/sda
+  *   /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
    *
-  * @bdev:     block device to be claimed
-  * @holder:   holder's signature
-  * @kobj:     holder's kobject
+  * The caller must have claimed @bdev before calling this function and
+  * ensure that both @bdev and @disk are valid during the creation and
+  * lifetime of these symlinks.
    *
-  * Do bd_claim() and if it succeeds, create sysfs symlinks between
-  * the bdev and the holder's kobject.
-  * Use bd_release_from_kobject() when relesing the claimed bdev.
+  * CONTEXT:
+  * Might sleep.
    *
-  * Returns 0 on success. (same as bd_claim())
-  * Returns errno on failure.
+  * RETURNS:
+  * 0 on success, -errno on failure.
    */
- static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
-                               struct kobject *kobj)
+ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
   {
-       int err;
-       struct bd_holder *bo, *found;
- 
-       if (!kobj)
-               return -EINVAL;
- 
-       bo = alloc_bd_holder(kobj);
-       if (!bo)
-               return -ENOMEM;
+       int ret = 0;
   
         mutex_lock(&bdev->bd_mutex);
   
-       err = bd_claim(bdev, holder);
-       if (err)
-               goto fail;
+       WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk);
   
-       found = find_bd_holder(bdev, bo);
-       if (found)
-               goto fail;
+       /* FIXME: remove the following once add_disk() handles errors */
+       if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
+               goto out_unlock;
   
-       err = add_bd_holder(bdev, bo);
-       if (err)
-               bd_release(bdev);
-       else
-               bo = NULL;
- fail:
-       mutex_unlock(&bdev->bd_mutex);
-       free_bd_holder(bo);
-       return err;
- }
+       ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+       if (ret)
+               goto out_unlock;
   
- /**
-  * bd_release_from_kobject - bd_release() with additional kobject signature
-  *
-  * @bdev:     block device to be released
-  * @kobj:     holder's kobject
-  *
-  * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
-  */
- static void bd_release_from_kobject(struct block_device *bdev,
-                                       struct kobject *kobj)
- {
-       if (!kobj)
-               return;
+       ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+       if (ret) {
+               del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+               goto out_unlock;
+       }
   
-       mutex_lock(&bdev->bd_mutex);
-       bd_release(bdev);
-       free_bd_holder(del_bd_holder(bdev, kobj));
+       bdev->bd_holder_disk = disk;
+ out_unlock:
         mutex_unlock(&bdev->bd_mutex);
+       return ret;
   }
+ EXPORT_SYMBOL_GPL(bd_link_disk_holder);
   
- /**
-  * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
-  *
-  * @bdev:     block device to be claimed
-  * @holder:   holder's signature
-  * @disk:     holder's gendisk
-  *
-  * Call bd_claim_by_kobject() with getting @disk->slave_dir.
-  */
- int bd_claim_by_disk(struct block_device *bdev, void *holder,
-                       struct gendisk *disk)
+ static void bd_unlink_disk_holder(struct block_device *bdev)
   {
-       return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
- }
- EXPORT_SYMBOL_GPL(bd_claim_by_disk);
+       struct gendisk *disk = bdev->bd_holder_disk;
   
- /**
-  * bd_release_from_disk - wrapper function for bd_release_from_kobject()
-  *
-  * @bdev:     block device to be claimed
-  * @disk:     holder's gendisk
-  *
-  * Call bd_release_from_kobject() and put @disk->slave_dir.
-  */
- void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
- {
-       bd_release_from_kobject(bdev, disk->slave_dir);
-       kobject_put(disk->slave_dir);
- }
- EXPORT_SYMBOL_GPL(bd_release_from_disk);
- #endif
+       bdev->bd_holder_disk = NULL;
+       if (!disk)
+               return;
   
- /*
-  * Tries to open block device by device number.  Use it ONLY if you
-  * really do not have anything better - i.e. when you are behind a
-  * truly sucky interface and all you are given is a device number.  _Never_
-  * to be used for internal purposes.  If you ever need it - reconsider
-  * your API.
-  */
- struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
- {
-       struct block_device *bdev = bdget(dev);
-       int err = -ENOMEM;
-       if (bdev)
-               err = blkdev_get(bdev, mode);
-       return err ? ERR_PTR(err) : bdev;
+       del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+       del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
   }
- 
- EXPORT_SYMBOL(open_by_devnum);
+ #else
+ static inline void bd_unlink_disk_holder(struct block_device *bdev)
+ { }
+ #endif
   
   /**
    * flush_disk - invalidates all buffer-cache entries on a disk
@@@ -1309,10 -948,11 +954,11 @@@ int check_disk_change(struct block_devi
   {
         struct gendisk *disk = bdev->bd_disk;
         const struct block_device_operations *bdops = disk->fops;
+       unsigned int events;
   
-       if (!bdops->media_changed)
-               return 0;
-       if (!bdops->media_changed(bdev->bd_disk))
+       events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
+                                  DISK_EVENT_EJECT_REQUEST);
+       if (!(events & DISK_EVENT_MEDIA_CHANGE))
                 return 0;
   
         flush_disk(bdev);
@@@ -1475,17 -1115,171 +1121,171 @@@ static int __blkdev_get(struct block_de
         return ret;
   }
   
- int blkdev_get(struct block_device *bdev, fmode_t mode)
+ /**
+  * blkdev_get - open a block device
+  * @bdev: block_device to open
+  * @mode: FMODE_* mask
+  * @holder: exclusive holder identifier
+  *
+  * Open @bdev with @mode.  If @mode includes %FMODE_EXCL, @bdev is
+  * open with exclusive access.  Specifying %FMODE_EXCL with %NULL
+  * @holder is invalid.  Exclusive opens may nest for the same @holder.
+  *
+  * On success, the reference count of @bdev is unchanged.  On failure,
+  * @bdev is put.
+  *
+  * CONTEXT:
+  * Might sleep.
+  *
+  * RETURNS:
+  * 0 on success, -errno on failure.
+  */
+ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
   {
-       return __blkdev_get(bdev, mode, 0);
+       struct block_device *whole = NULL;
+       int res;
+ 
+       WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
+ 
+       if ((mode & FMODE_EXCL) && holder) {
+               whole = bd_start_claiming(bdev, holder);
+               if (IS_ERR(whole)) {
+                       bdput(bdev);
+                       return PTR_ERR(whole);
+               }
+       }
+ 
+       res = __blkdev_get(bdev, mode, 0);
+ 
+       /* __blkdev_get() may alter read only status, check it afterwards */
+       if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+               __blkdev_put(bdev, mode, 0);
+               res = -EACCES;
+       }
+ 
+       if (whole) {
+               /* finish claiming */
+               mutex_lock(&bdev->bd_mutex);
+               spin_lock(&bdev_lock);
+ 
+               if (!res) {
+                       BUG_ON(!bd_may_claim(bdev, whole, holder));
+                       /*
+                        * Note that for a whole device bd_holders
+                        * will be incremented twice, and bd_holder
+                        * will be set to bd_may_claim before being
+                        * set to holder
+                        */
+                       whole->bd_holders++;
+                       whole->bd_holder = bd_may_claim;
+                       bdev->bd_holders++;
+                       bdev->bd_holder = holder;
+               }
+ 
+               /* tell others that we're done */
+               BUG_ON(whole->bd_claiming != holder);
+               whole->bd_claiming = NULL;
+               wake_up_bit(&whole->bd_claiming, 0);
+ 
+               spin_unlock(&bdev_lock);
+ 
+               /*
+                * Block event polling for write claims.  Any write
+                * holder makes the write_holder state stick until all
+                * are released.  This is good enough and tracking
+                * individual writeable reference is too fragile given
+                * the way @mode is used in blkdev_get/put().
+                */
+               if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+                       bdev->bd_write_holder = true;
+                       disk_block_events(bdev->bd_disk);
+               }
+ 
+               mutex_unlock(&bdev->bd_mutex);
+               bdput(whole);
+       }
+ 
+       return res;
   }
   EXPORT_SYMBOL(blkdev_get);
   
+ /**
+  * blkdev_get_by_path - open a block device by name
+  * @path: path to the block device to open
+  * @mode: FMODE_* mask
+  * @holder: exclusive holder identifier
+  *
+  * Open the blockdevice described by the device file at @path.  @mode
+  * and @holder are identical to blkdev_get().
+  *
+  * On success, the returned block_device has reference count of one.
+  *
+  * CONTEXT:
+  * Might sleep.
+  *
+  * RETURNS:
+  * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+  */
+ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+                                       void *holder)
+ {
+       struct block_device *bdev;
+       int err;
+ 
+       bdev = lookup_bdev(path);
+       if (IS_ERR(bdev))
+               return bdev;
+ 
+       err = blkdev_get(bdev, mode, holder);
+       if (err)
+               return ERR_PTR(err);
+ 
+       return bdev;
+ }
+ EXPORT_SYMBOL(blkdev_get_by_path);
+ 
+ /**
+  * blkdev_get_by_dev - open a block device by device number
+  * @dev: device number of block device to open
+  * @mode: FMODE_* mask
+  * @holder: exclusive holder identifier
+  *
+  * Open the blockdevice described by device number @dev.  @mode and
+  * @holder are identical to blkdev_get().
+  *
+  * Use it ONLY if you really do not have anything better - i.e. when
+  * you are behind a truly sucky interface and all you are given is a
+  * device number.  _Never_ to be used for internal purposes.  If you
+  * ever need it - reconsider your API.
+  *
+  * On success, the returned block_device has reference count of one.
+  *
+  * CONTEXT:
+  * Might sleep.
+  *
+  * RETURNS:
+  * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+  */
+ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+ {
+       struct block_device *bdev;
+       int err;
+ 
+       bdev = bdget(dev);
+       if (!bdev)
+               return ERR_PTR(-ENOMEM);
+ 
+       err = blkdev_get(bdev, mode, holder);
+       if (err)
+               return ERR_PTR(err);
+ 
+       return bdev;
+ }
+ EXPORT_SYMBOL(blkdev_get_by_dev);
+ 
   static int blkdev_open(struct inode * inode, struct file * filp)
   {
-       struct block_device *whole = NULL;
         struct block_device *bdev;
-       int res;
   
         /*
          * Preserve backwards compatibility and allow large file access
@@@ -1506,26 -1300,9 +1306,9 @@@
         if (bdev == NULL)
                 return -ENOMEM;
   
-       if (filp->f_mode & FMODE_EXCL) {
-               whole = bd_start_claiming(bdev, filp);
-               if (IS_ERR(whole)) {
-                       bdput(bdev);
-                       return PTR_ERR(whole);
-               }
-       }
- 
         filp->f_mapping = bdev->bd_inode->i_mapping;
   
-       res = blkdev_get(bdev, filp->f_mode);
- 
-       if (whole) {
-               if (res == 0)
-                       bd_finish_claiming(bdev, whole, filp);
-               else
-                       bd_abort_claiming(whole, filp);
-       }
- 
-       return res;
+       return blkdev_get(bdev, filp->f_mode, filp);
   }
   
   static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
@@@ -1539,6 -1316,7 +1322,7 @@@
                 bdev->bd_part_count--;
   
         if (!--bdev->bd_openers) {
+               WARN_ON_ONCE(bdev->bd_holders);
                 sync_blockdev(bdev);
                 kill_bdev(bdev);
         }
@@@ -1569,6 -1347,45 +1353,45 @@@
   
   int blkdev_put(struct block_device *bdev, fmode_t mode)
   {
+       if (mode & FMODE_EXCL) {
+               bool bdev_free;
+ 
+               /*
+                * Release a claim on the device.  The holder fields
+                * are protected with bdev_lock.  bd_mutex is to
+                * synchronize disk_holder unlinking.
+                */
+               mutex_lock(&bdev->bd_mutex);
+               spin_lock(&bdev_lock);
+ 
+               WARN_ON_ONCE(--bdev->bd_holders < 0);
+               WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
+ 
+               /* bd_contains might point to self, check in a separate step */
+               if ((bdev_free = !bdev->bd_holders))
+                       bdev->bd_holder = NULL;
+               if (!bdev->bd_contains->bd_holders)
+                       bdev->bd_contains->bd_holder = NULL;
+ 
+               spin_unlock(&bdev_lock);
+ 
+               /*
+                * If this was the last claim, remove holder link and
+                * unblock evpoll if it was a write holder.
+                */
+               if (bdev_free) {
+                       bd_unlink_disk_holder(bdev);
+                       if (bdev->bd_write_holder) {
+                               disk_unblock_events(bdev->bd_disk);
+                               bdev->bd_write_holder = false;
+                       } else
+                               disk_check_events(bdev->bd_disk);
+               }
+ 
+               mutex_unlock(&bdev->bd_mutex);
+       } else
+               disk_check_events(bdev->bd_disk);
+ 
         return __blkdev_put(bdev, mode, 0);
   }
   EXPORT_SYMBOL(blkdev_put);
@@@ -1576,8 -1393,7 +1399,7 @@@
   static int blkdev_close(struct inode * inode, struct file * filp)
   {
         struct block_device *bdev = I_BDEV(filp->f_mapping->host);
-       if (bdev->bd_holder == filp)
-               bd_release(bdev);
+ 
         return blkdev_put(bdev, filp->f_mode);
   }
   
@@@ -1722,67 -1538,6 +1544,6 @@@ fail
   }
   EXPORT_SYMBOL(lookup_bdev);
   
- /**
-  * open_bdev_exclusive  -  open a block device by name and set it up for use
-  *
-  * @path:     special file representing the block device
-  * @mode:     FMODE_... combination to pass be used
-  * @holder:   owner for exclusion
-  *
-  * Open the blockdevice described by the special file at @path, claim it
-  * for the @holder.
-  */
- struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
- {
-       struct block_device *bdev, *whole;
-       int error;
- 
-       bdev = lookup_bdev(path);
-       if (IS_ERR(bdev))
-               return bdev;
- 
-       whole = bd_start_claiming(bdev, holder);
-       if (IS_ERR(whole)) {
-               bdput(bdev);
-               return whole;
-       }
- 
-       error = blkdev_get(bdev, mode);
-       if (error)
-               goto out_abort_claiming;
- 
-       error = -EACCES;
-       if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
-               goto out_blkdev_put;
- 
-       bd_finish_claiming(bdev, whole, holder);
-       return bdev;
- 
- out_blkdev_put:
-       blkdev_put(bdev, mode);
- out_abort_claiming:
-       bd_abort_claiming(whole, holder);
-       return ERR_PTR(error);
- }
- 
- EXPORT_SYMBOL(open_bdev_exclusive);
- 
- /**
-  * close_bdev_exclusive  -  close a blockdevice opened by open_bdev_exclusive()
-  *
-  * @bdev:     blockdevice to close
-  * @mode:     mode, must match that used to open.
-  *
-  * This is the counterpart to open_bdev_exclusive().
-  */
- void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
- {
-       bd_release(bdev);
-       blkdev_put(bdev, mode);
- }
- 
- EXPORT_SYMBOL(close_bdev_exclusive);
- 
   int __invalidate_device(struct block_device *bdev)
   {
         struct super_block *sb = get_super(bdev);
diff --combined fs/btrfs/volumes.c

index 6b9884507837581ba6231bc6c4cfb85a036b82f6,95324e9f928013e5943108948d654b0d9aa298d1..1718e1a5c3208b154503edb278accf6e91449c55
--- 1/fs/btrfs/volumes.c
--- 2/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@@ -412,16 -412,12 +412,16 @@@ static noinline int device_list_add(con
   
                 device->fs_devices = fs_devices;
                 fs_devices->num_devices++;
- -      } else if (strcmp(device->name, path)) {
+ +      } else if (!device->name || strcmp(device->name, path)) {
                 name = kstrdup(path, GFP_NOFS);
                 if (!name)
                         return -ENOMEM;
                 kfree(device->name);
                 device->name = name;
+ +              if (device->missing) {
+ +                      fs_devices->missing_devices--;
+ +                      device->missing = 0;
+ +              }
         }
   
         if (found_transid > fs_devices->latest_trans) {
@@@ -493,7 -489,7 +493,7 @@@ again
                         continue;
   
                 if (device->bdev) {
-                       close_bdev_exclusive(device->bdev, device->mode);
+                       blkdev_put(device->bdev, device->mode);
                         device->bdev = NULL;
                         fs_devices->open_devices--;
                 }
@@@ -527,7 -523,7 +527,7 @@@ static int __btrfs_close_devices(struc
   
         list_for_each_entry(device, &fs_devices->devices, dev_list) {
                 if (device->bdev) {
-                       close_bdev_exclusive(device->bdev, device->mode);
+                       blkdev_put(device->bdev, device->mode);
                         fs_devices->open_devices--;
                 }
                 if (device->writeable) {
@@@ -584,13 -580,15 +584,15 @@@ static int __btrfs_open_devices(struct 
         int seeding = 1;
         int ret = 0;
   
+       flags |= FMODE_EXCL;
+ 
         list_for_each_entry(device, head, dev_list) {
                 if (device->bdev)
                         continue;
                 if (!device->name)
                         continue;
   
-               bdev = open_bdev_exclusive(device->name, flags, holder);
+               bdev = blkdev_get_by_path(device->name, flags, holder);
                 if (IS_ERR(bdev)) {
                         printk(KERN_INFO "open %s failed\n", device->name);
                         goto error;
@@@ -642,7 -640,7 +644,7 @@@
   error_brelse:
                 brelse(bh);
   error_close:
-               close_bdev_exclusive(bdev, FMODE_READ);
+               blkdev_put(bdev, flags);
   error:
                 continue;
         }
@@@ -688,7 -686,8 +690,8 @@@ int btrfs_scan_one_device(const char *p
   
         mutex_lock(&uuid_mutex);
   
-       bdev = open_bdev_exclusive(path, flags, holder);
+       flags |= FMODE_EXCL;
+       bdev = blkdev_get_by_path(path, flags, holder);
   
         if (IS_ERR(bdev)) {
                 ret = PTR_ERR(bdev);
@@@ -720,7 -719,7 +723,7 @@@
   
         brelse(bh);
   error_close:
-       close_bdev_exclusive(bdev, flags);
+       blkdev_put(bdev, flags);
   error:
         mutex_unlock(&uuid_mutex);
         return ret;
@@@ -1183,8 -1182,8 +1186,8 @@@ int btrfs_rm_device(struct btrfs_root *
                         goto out;
                 }
         } else {
-               bdev = open_bdev_exclusive(device_path, FMODE_READ,
-                                     root->fs_info->bdev_holder);
+               bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
+                                         root->fs_info->bdev_holder);
                 if (IS_ERR(bdev)) {
                         ret = PTR_ERR(bdev);
                         goto out;
@@@ -1240,9 -1239,6 +1243,9 @@@
   
         device->fs_devices->num_devices--;
   
+ +      if (device->missing)
+ +              root->fs_info->fs_devices->missing_devices--;
+ +
         next_device = list_entry(root->fs_info->fs_devices->devices.next,
                                  struct btrfs_device, dev_list);
         if (device->bdev == root->fs_info->sb->s_bdev)
@@@ -1251,7 -1247,7 +1254,7 @@@
                 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
   
         if (device->bdev) {
-               close_bdev_exclusive(device->bdev, device->mode);
+               blkdev_put(device->bdev, device->mode);
                 device->bdev = NULL;
                 device->fs_devices->open_devices--;
         }
@@@ -1294,7 -1290,7 +1297,7 @@@ error_brelse
         brelse(bh);
   error_close:
         if (bdev)
-               close_bdev_exclusive(bdev, FMODE_READ);
+               blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
   out:
         mutex_unlock(&root->fs_info->volume_mutex);
         mutex_unlock(&uuid_mutex);
@@@ -1446,7 -1442,8 +1449,8 @@@ int btrfs_init_new_device(struct btrfs_
         if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
                 return -EINVAL;
   
-       bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
+       bdev = blkdev_get_by_path(device_path, FMODE_EXCL,
+                                 root->fs_info->bdev_holder);
         if (IS_ERR(bdev))
                 return PTR_ERR(bdev);
   
@@@ -1572,7 -1569,7 +1576,7 @@@ out
         mutex_unlock(&root->fs_info->volume_mutex);
         return ret;
   error:
-       close_bdev_exclusive(bdev, 0);
+       blkdev_put(bdev, FMODE_EXCL);
         if (seeding_dev) {
                 mutex_unlock(&uuid_mutex);
                 up_write(&sb->s_umount);
@@@ -3087,9 -3084,7 +3091,9 @@@ static struct btrfs_device *add_missing
         device->devid = devid;
         device->work.func = pending_bios_fn;
         device->fs_devices = fs_devices;
+ +      device->missing = 1;
         fs_devices->num_devices++;
+ +      fs_devices->missing_devices++;
         spin_lock_init(&device->io_lock);
         INIT_LIST_HEAD(&device->dev_alloc_list);
         memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@@ -3287,15 -3282,6 +3291,15 @@@ static int read_one_dev(struct btrfs_ro
                         device = add_missing_dev(root, devid, dev_uuid);
                         if (!device)
                                 return -ENOMEM;
+ +              } else if (!device->missing) {
+ +                      /*
+ +                       * this happens when a device that was properly setup
+ +                       * in the device info lists suddenly goes bad.
+ +                       * device->bdev is NULL, and so we have to set
+ +                       * device->missing to one here
+ +                       */
+ +                      root->fs_info->fs_devices->missing_devices++;
+ +                      device->missing = 1;
                 }
         }
   
diff --combined fs/btrfs/volumes.h

index 2740db49eb04d7a8de03d18e5935a97ff43ef9db,856e7577030467c19d8930f172179e9e6920eac3..1be7810794500b2b8d5ef6697e1749a93303ce40
--- 1/fs/btrfs/volumes.h
--- 2/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@@ -44,13 -44,12 +44,13 @@@ struct btrfs_device 
   
         int writeable;
         int in_fs_metadata;
+ +      int missing;
   
         spinlock_t io_lock;
   
         struct block_device *bdev;
   
-       /* the mode sent to open_bdev_exclusive */
+       /* the mode sent to blkdev_get */
         fmode_t mode;
   
         char *name;
@@@ -94,7 -93,6 +94,7 @@@ struct btrfs_fs_devices 
         u64 num_devices;
         u64 open_devices;
         u64 rw_devices;
+ +      u64 missing_devices;
         u64 total_rw_bytes;
         struct block_device *latest_bdev;
   
diff --combined fs/char_dev.c

index 6e99b9ddd4e9457167bc37a6b8cc64669d6505e9,143f0207c7eba6a355ab1ed7444e37b3aa11db7c..dca9e5e0f73b2500b1ce4e0d1b722a2d5de999ee
--- 1/fs/char_dev.c
--- 2/fs/char_dev.c
+++ b/fs/char_dev.c
@@@ -59,7 -59,7 +59,7 @@@ static struct char_device_struct 
   } *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
   
   /* index in the above */
- static inline int major_to_index(int major)
+ static inline int major_to_index(unsigned major)
   {
         return major % CHRDEV_MAJOR_HASH_SIZE;
   }
@@@ -417,6 -417,18 +417,6 @@@ static int chrdev_open(struct inode *in
         return ret;
   }
   
- -int cdev_index(struct inode *inode)
- -{
- -      int idx;
- -      struct kobject *kobj;
- -
- -      kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
- -      if (!kobj)
- -              return -1;
- -      kobject_put(kobj);
- -      return idx;
- -}
- -
   void cd_forget(struct inode *inode)
   {
         spin_lock(&cdev_lock);
@@@ -570,6 -582,7 +570,6 @@@ EXPORT_SYMBOL(cdev_init)
   EXPORT_SYMBOL(cdev_alloc);
   EXPORT_SYMBOL(cdev_del);
   EXPORT_SYMBOL(cdev_add);
- -EXPORT_SYMBOL(cdev_index);
   EXPORT_SYMBOL(__register_chrdev);
   EXPORT_SYMBOL(__unregister_chrdev);
   EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --combined fs/ext3/super.c

index b7d0554631e413c20afeff107beb855419abfceb,123720ba786d7dd683ded8f5189d3261af7c7992..7aa767d4f06f4ebcfcc0ee7e23a117e8aa99ee5c
--- 1/fs/ext3/super.c
--- 2/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@@ -27,6 -27,7 +27,6 @@@
   #include <linux/init.h>
   #include <linux/blkdev.h>
   #include <linux/parser.h>
- -#include <linux/smp_lock.h>
   #include <linux/buffer_head.h>
   #include <linux/exportfs.h>
   #include <linux/vfs.h>
@@@ -143,16 -144,12 +143,16 @@@ void ext3_journal_abort_handle(const ch
   void ext3_msg(struct super_block *sb, const char *prefix,
                 const char *fmt, ...)
   {
+ +      struct va_format vaf;
         va_list args;
   
         va_start(args, fmt);
- -      printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +
+ +      printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+ +
         va_end(args);
   }
   
@@@ -199,20 -196,15 +199,20 @@@ static void ext3_handle_error(struct su
                         sb->s_id);
   }
   
- -void ext3_error (struct super_block * sb, const char * function,
- -               const char * fmt, ...)
+ +void ext3_error(struct super_block *sb, const char *function,
+ +              const char *fmt, ...)
   {
+ +      struct va_format vaf;
         va_list args;
   
         va_start(args, fmt);
- -      printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +
+ +      printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
+ +             sb->s_id, function, &vaf);
+ +
         va_end(args);
   
         ext3_handle_error(sb);
@@@ -283,20 -275,15 +283,20 @@@ void __ext3_std_error (struct super_blo
    * case we take the easy way out and panic immediately.
    */
   
- -void ext3_abort (struct super_block * sb, const char * function,
- -               const char * fmt, ...)
+ +void ext3_abort(struct super_block *sb, const char *function,
+ +               const char *fmt, ...)
   {
+ +      struct va_format vaf;
         va_list args;
   
         va_start(args, fmt);
- -      printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +
+ +      printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
+ +             sb->s_id, function, &vaf);
+ +
         va_end(args);
   
         if (test_opt(sb, ERRORS_PANIC))
@@@ -314,20 -301,16 +314,20 @@@
                 journal_abort(EXT3_SB(sb)->s_journal, -EIO);
   }
   
- -void ext3_warning (struct super_block * sb, const char * function,
- -                 const char * fmt, ...)
+ +void ext3_warning(struct super_block *sb, const char *function,
+ +                const char *fmt, ...)
   {
+ +      struct va_format vaf;
         va_list args;
   
         va_start(args, fmt);
- -      printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
- -             sb->s_id, function);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +
+ +      printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
+ +             sb->s_id, function, &vaf);
+ +
         va_end(args);
   }
   
@@@ -364,7 -347,7 +364,7 @@@ static struct block_device *ext3_blkdev
         struct block_device *bdev;
         char b[BDEVNAME_SIZE];
   
-       bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+       bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
         if (IS_ERR(bdev))
                 goto fail;
         return bdev;
@@@ -381,8 -364,7 +381,7 @@@ fail
    */
   static int ext3_blkdev_put(struct block_device *bdev)
   {
-       bd_release(bdev);
-       return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+       return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
   }
   
   static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
@@@ -497,13 -479,6 +496,13 @@@ static struct inode *ext3_alloc_inode(s
         return &ei->vfs_inode;
   }
   
+ +static void ext3_i_callback(struct rcu_head *head)
+ +{
+ +      struct inode *inode = container_of(head, struct inode, i_rcu);
+ +      INIT_LIST_HEAD(&inode->i_dentry);
+ +      kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+ +}
+ +
   static void ext3_destroy_inode(struct inode *inode)
   {
         if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@@ -514,7 -489,7 +513,7 @@@
                                 false);
                 dump_stack();
         }
- -      kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+ +      call_rcu(&inode->i_rcu, ext3_i_callback);
   }
   
   static void init_once(void *foo)
@@@ -1866,15 -1841,13 +1865,15 @@@ static int ext3_fill_super (struct supe
                 goto failed_mount;
         }
   
- -      if (generic_check_addressable(sb->s_blocksize_bits,
- -                                    le32_to_cpu(es->s_blocks_count))) {
+ +      err = generic_check_addressable(sb->s_blocksize_bits,
+ +                                      le32_to_cpu(es->s_blocks_count));
+ +      if (err) {
                 ext3_msg(sb, KERN_ERR,
                         "error: filesystem is too large to mount safely");
                 if (sizeof(sector_t) < 8)
                         ext3_msg(sb, KERN_ERR,
                                 "error: CONFIG_LBDAF not enabled");
+ +              ret = err;
                 goto failed_mount;
         }
   
@@@ -2162,13 -2135,6 +2161,6 @@@ static journal_t *ext3_get_dev_journal(
         if (bdev == NULL)
                 return NULL;
   
-       if (bd_claim(bdev, sb)) {
-               ext3_msg(sb, KERN_ERR,
-                       "error: failed to claim external journal device");
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
-               return NULL;
-       }
- 
         blocksize = sb->s_blocksize;
         hblock = bdev_logical_block_size(bdev);
         if (blocksize < hblock) {
@@@ -2317,7 -2283,7 +2309,7 @@@ static int ext3_load_journal(struct sup
         EXT3_SB(sb)->s_journal = journal;
         ext3_clear_journal_err(sb, es);
   
- -      if (journal_devnum &&
+ +      if (!really_read_only && journal_devnum &&
             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
                 es->s_journal_dev = cpu_to_le32(journal_devnum);
   
diff --combined fs/ext4/super.c

index 29c80f6d8b274d05f45dda8f0fda0d19d973a31f,bd63e692721981b6f5337a6edf59afae1585bd7c..cb10a06775e48faf6220436624ec952d82340d20
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -388,14 -388,13 +388,14 @@@ static void ext4_handle_error(struct su
   void __ext4_error(struct super_block *sb, const char *function,
                   unsigned int line, const char *fmt, ...)
   {
+ +      struct va_format vaf;
         va_list args;
   
         va_start(args, fmt);
- -      printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
- -             sb->s_id, function, line, current->comm);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +      printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
+ +             sb->s_id, function, line, current->comm, &vaf);
         va_end(args);
   
         ext4_handle_error(sb);
@@@ -406,31 -405,28 +406,31 @@@ void ext4_error_inode(struct inode *ino
                       const char *fmt, ...)
   {
         va_list args;
+ +      struct va_format vaf;
         struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
   
         es->s_last_error_ino = cpu_to_le32(inode->i_ino);
         es->s_last_error_block = cpu_to_le64(block);
         save_error_info(inode->i_sb, function, line);
         va_start(args, fmt);
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
                inode->i_sb->s_id, function, line, inode->i_ino);
         if (block)
- -              printk("block %llu: ", block);
- -      printk("comm %s: ", current->comm);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +              printk(KERN_CONT "block %llu: ", block);
+ +      printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
         va_end(args);
   
         ext4_handle_error(inode->i_sb);
   }
   
   void ext4_error_file(struct file *file, const char *function,
- -                   unsigned int line, const char *fmt, ...)
+ +                   unsigned int line, ext4_fsblk_t block,
+ +                   const char *fmt, ...)
   {
         va_list args;
+ +      struct va_format vaf;
         struct ext4_super_block *es;
         struct inode *inode = file->f_dentry->d_inode;
         char pathname[80], *path;
@@@ -438,18 -434,17 +438,18 @@@
         es = EXT4_SB(inode->i_sb)->s_es;
         es->s_last_error_ino = cpu_to_le32(inode->i_ino);
         save_error_info(inode->i_sb, function, line);
- -      va_start(args, fmt);
         path = d_path(&(file->f_path), pathname, sizeof(pathname));
- -      if (!path)
+ +      if (IS_ERR(path))
                 path = "(unknown)";
         printk(KERN_CRIT
- -             "EXT4-fs error (device %s): %s:%d: inode #%lu "
- -             "(comm %s path %s): ",
- -             inode->i_sb->s_id, function, line, inode->i_ino,
- -             current->comm, path);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +             "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
+ +             inode->i_sb->s_id, function, line, inode->i_ino);
+ +      if (block)
+ +              printk(KERN_CONT "block %llu: ", block);
+ +      va_start(args, fmt);
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +      printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
         va_end(args);
   
         ext4_handle_error(inode->i_sb);
@@@ -548,29 -543,28 +548,29 @@@ void __ext4_abort(struct super_block *s
                 panic("EXT4-fs panic from previous error\n");
   }
   
- -void ext4_msg (struct super_block * sb, const char *prefix,
- -                 const char *fmt, ...)
+ +void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
   {
+ +      struct va_format vaf;
         va_list args;
   
         va_start(args, fmt);
- -      printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +      printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
         va_end(args);
   }
   
   void __ext4_warning(struct super_block *sb, const char *function,
                     unsigned int line, const char *fmt, ...)
   {
+ +      struct va_format vaf;
         va_list args;
   
         va_start(args, fmt);
- -      printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
- -             sb->s_id, function, line);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +      printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
+ +             sb->s_id, function, line, &vaf);
         va_end(args);
   }
   
@@@ -581,25 -575,21 +581,25 @@@ void __ext4_grp_locked_error(const cha
   __releases(bitlock)
   __acquires(bitlock)
   {
+ +      struct va_format vaf;
         va_list args;
         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
   
         es->s_last_error_ino = cpu_to_le32(ino);
         es->s_last_error_block = cpu_to_le64(block);
         __save_error_info(sb, function, line);
+ +
         va_start(args, fmt);
+ +
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
                sb->s_id, function, line, grp);
         if (ino)
- -              printk("inode %lu: ", ino);
+ +              printk(KERN_CONT "inode %lu: ", ino);
         if (block)
- -              printk("block %llu:", (unsigned long long) block);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +              printk(KERN_CONT "block %llu:", (unsigned long long) block);
+ +      printk(KERN_CONT "%pV\n", &vaf);
         va_end(args);
   
         if (test_opt(sb, ERRORS_CONT)) {
@@@ -657,7 -647,7 +657,7 @@@ static struct block_device *ext4_blkdev
         struct block_device *bdev;
         char b[BDEVNAME_SIZE];
   
-       bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+       bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
         if (IS_ERR(bdev))
                 goto fail;
         return bdev;
@@@ -673,8 -663,7 +673,7 @@@ fail
    */
   static int ext4_blkdev_put(struct block_device *bdev)
   {
-       bd_release(bdev);
-       return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+       return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
   }
   
   static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
@@@ -818,15 -807,21 +817,15 @@@ static struct inode *ext4_alloc_inode(s
         memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
         INIT_LIST_HEAD(&ei->i_prealloc_list);
         spin_lock_init(&ei->i_prealloc_lock);
- -      /*
- -       * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
- -       * therefore it can be null here.  Don't check it, just initialize
- -       * jinode.
- -       */
- -      jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
         ei->i_reserved_data_blocks = 0;
         ei->i_reserved_meta_blocks = 0;
         ei->i_allocated_meta_blocks = 0;
         ei->i_da_metadata_calc_len = 0;
- -      ei->i_delalloc_reserved_flag = 0;
         spin_lock_init(&(ei->i_block_reservation_lock));
   #ifdef CONFIG_QUOTA
         ei->i_reserved_quota = 0;
   #endif
+ +      ei->jinode = NULL;
         INIT_LIST_HEAD(&ei->i_completed_io_list);
         spin_lock_init(&ei->i_completed_io_lock);
         ei->cur_aio_dio = NULL;
@@@ -845,13 -840,6 +844,13 @@@ static int ext4_drop_inode(struct inod
         return drop;
   }
   
+ +static void ext4_i_callback(struct rcu_head *head)
+ +{
+ +      struct inode *inode = container_of(head, struct inode, i_rcu);
+ +      INIT_LIST_HEAD(&inode->i_dentry);
+ +      kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+ +}
+ +
   static void ext4_destroy_inode(struct inode *inode)
   {
         ext4_ioend_wait(inode);
@@@ -864,7 -852,7 +863,7 @@@
                                 true);
                 dump_stack();
         }
- -      kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+ +      call_rcu(&inode->i_rcu, ext4_i_callback);
   }
   
   static void init_once(void *foo)
@@@ -902,12 -890,9 +901,12 @@@ void ext4_clear_inode(struct inode *ino
         end_writeback(inode);
         dquot_drop(inode);
         ext4_discard_preallocations(inode);
- -      if (EXT4_JOURNAL(inode))
- -              jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
- -                                     &EXT4_I(inode)->jinode);
+ +      if (EXT4_I(inode)->jinode) {
+ +              jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
+ +                                             EXT4_I(inode)->jinode);
+ +              jbd2_free_inode(EXT4_I(inode)->jinode);
+ +              EXT4_I(inode)->jinode = NULL;
+ +      }
   }
   
   static inline void ext4_show_quota_options(struct seq_file *seq,
@@@ -1040,8 -1025,6 +1039,8 @@@ static int ext4_show_options(struct seq
             !(def_mount_opts & EXT4_DEFM_NODELALLOC))
                 seq_puts(seq, ",nodelalloc");
   
+ +      if (test_opt(sb, MBLK_IO_SUBMIT))
+ +              seq_puts(seq, ",mblk_io_submit");
         if (sbi->s_stripe)
                 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
         /*
@@@ -1213,6 -1196,7 +1212,6 @@@ static const struct super_operations ex
         .quota_write    = ext4_quota_write,
   #endif
         .bdev_try_to_free_page = bdev_try_to_free_page,
- -      .trim_fs        = ext4_trim_fs
   };
   
   static const struct super_operations ext4_nojournal_sops = {
@@@ -1255,8 -1239,8 +1254,8 @@@ enum 
         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
         Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
         Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
- -      Opt_stripe, Opt_delalloc, Opt_nodelalloc,
- -      Opt_block_validity, Opt_noblock_validity,
+ +      Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+ +      Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
         Opt_inode_readahead_blks, Opt_journal_ioprio,
         Opt_dioread_nolock, Opt_dioread_lock,
         Opt_discard, Opt_nodiscard,
@@@ -1320,8 -1304,6 +1319,8 @@@ static const match_table_t tokens = 
         {Opt_resize, "resize"},
         {Opt_delalloc, "delalloc"},
         {Opt_nodelalloc, "nodelalloc"},
+ +      {Opt_mblk_io_submit, "mblk_io_submit"},
+ +      {Opt_nomblk_io_submit, "nomblk_io_submit"},
         {Opt_block_validity, "block_validity"},
         {Opt_noblock_validity, "noblock_validity"},
         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@@ -1400,7 -1382,7 +1399,7 @@@ static int set_qf_name(struct super_blo
                 sbi->s_qf_names[qtype] = NULL;
                 return 0;
         }
- -      set_opt(sbi->s_mount_opt, QUOTA);
+ +      set_opt(sb, QUOTA);
         return 1;
   }
   
@@@ -1455,21 -1437,21 +1454,21 @@@ static int parse_options(char *options
                 switch (token) {
                 case Opt_bsd_df:
                         ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- -                      clear_opt(sbi->s_mount_opt, MINIX_DF);
+ +                      clear_opt(sb, MINIX_DF);
                         break;
                 case Opt_minix_df:
                         ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- -                      set_opt(sbi->s_mount_opt, MINIX_DF);
+ +                      set_opt(sb, MINIX_DF);
   
                         break;
                 case Opt_grpid:
                         ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- -                      set_opt(sbi->s_mount_opt, GRPID);
+ +                      set_opt(sb, GRPID);
   
                         break;
                 case Opt_nogrpid:
                         ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
- -                      clear_opt(sbi->s_mount_opt, GRPID);
+ +                      clear_opt(sb, GRPID);
   
                         break;
                 case Opt_resuid:
@@@ -1487,38 -1469,38 +1486,38 @@@
                         /* *sb_block = match_int(&args[0]); */
                         break;
                 case Opt_err_panic:
- -                      clear_opt(sbi->s_mount_opt, ERRORS_CONT);
- -                      clear_opt(sbi->s_mount_opt, ERRORS_RO);
- -                      set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ +                      clear_opt(sb, ERRORS_CONT);
+ +                      clear_opt(sb, ERRORS_RO);
+ +                      set_opt(sb, ERRORS_PANIC);
                         break;
                 case Opt_err_ro:
- -                      clear_opt(sbi->s_mount_opt, ERRORS_CONT);
- -                      clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
- -                      set_opt(sbi->s_mount_opt, ERRORS_RO);
+ +                      clear_opt(sb, ERRORS_CONT);
+ +                      clear_opt(sb, ERRORS_PANIC);
+ +                      set_opt(sb, ERRORS_RO);
                         break;
                 case Opt_err_cont:
- -                      clear_opt(sbi->s_mount_opt, ERRORS_RO);
- -                      clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
- -                      set_opt(sbi->s_mount_opt, ERRORS_CONT);
+ +                      clear_opt(sb, ERRORS_RO);
+ +                      clear_opt(sb, ERRORS_PANIC);
+ +                      set_opt(sb, ERRORS_CONT);
                         break;
                 case Opt_nouid32:
- -                      set_opt(sbi->s_mount_opt, NO_UID32);
+ +                      set_opt(sb, NO_UID32);
                         break;
                 case Opt_debug:
- -                      set_opt(sbi->s_mount_opt, DEBUG);
+ +                      set_opt(sb, DEBUG);
                         break;
                 case Opt_oldalloc:
- -                      set_opt(sbi->s_mount_opt, OLDALLOC);
+ +                      set_opt(sb, OLDALLOC);
                         break;
                 case Opt_orlov:
- -                      clear_opt(sbi->s_mount_opt, OLDALLOC);
+ +                      clear_opt(sb, OLDALLOC);
                         break;
   #ifdef CONFIG_EXT4_FS_XATTR
                 case Opt_user_xattr:
- -                      set_opt(sbi->s_mount_opt, XATTR_USER);
+ +                      set_opt(sb, XATTR_USER);
                         break;
                 case Opt_nouser_xattr:
- -                      clear_opt(sbi->s_mount_opt, XATTR_USER);
+ +                      clear_opt(sb, XATTR_USER);
                         break;
   #else
                 case Opt_user_xattr:
@@@ -1528,10 -1510,10 +1527,10 @@@
   #endif
   #ifdef CONFIG_EXT4_FS_POSIX_ACL
                 case Opt_acl:
- -                      set_opt(sbi->s_mount_opt, POSIX_ACL);
+ +                      set_opt(sb, POSIX_ACL);
                         break;
                 case Opt_noacl:
- -                      clear_opt(sbi->s_mount_opt, POSIX_ACL);
+ +                      clear_opt(sb, POSIX_ACL);
                         break;
   #else
                 case Opt_acl:
@@@ -1550,7 -1532,7 +1549,7 @@@
                                          "Cannot specify journal on remount");
                                 return 0;
                         }
- -                      set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
+ +                      set_opt(sb, UPDATE_JOURNAL);
                         break;
                 case Opt_journal_dev:
                         if (is_remount) {
@@@ -1563,14 -1545,14 +1562,14 @@@
                         *journal_devnum = option;
                         break;
                 case Opt_journal_checksum:
- -                      set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
+ +                      set_opt(sb, JOURNAL_CHECKSUM);
                         break;
                 case Opt_journal_async_commit:
- -                      set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
- -                      set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
+ +                      set_opt(sb, JOURNAL_ASYNC_COMMIT);
+ +                      set_opt(sb, JOURNAL_CHECKSUM);
                         break;
                 case Opt_noload:
- -                      set_opt(sbi->s_mount_opt, NOLOAD);
+ +                      set_opt(sb, NOLOAD);
                         break;
                 case Opt_commit:
                         if (match_int(&args[0], &option))
@@@ -1613,15 -1595,15 +1612,15 @@@
                                         return 0;
                                 }
                         } else {
- -                              clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+ +                              clear_opt(sb, DATA_FLAGS);
                                 sbi->s_mount_opt |= data_opt;
                         }
                         break;
                 case Opt_data_err_abort:
- -                      set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ +                      set_opt(sb, DATA_ERR_ABORT);
                         break;
                 case Opt_data_err_ignore:
- -                      clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+ +                      clear_opt(sb, DATA_ERR_ABORT);
                         break;
   #ifdef CONFIG_QUOTA
                 case Opt_usrjquota:
@@@ -1661,12 -1643,12 +1660,12 @@@ set_qf_format
                         break;
                 case Opt_quota:
                 case Opt_usrquota:
- -                      set_opt(sbi->s_mount_opt, QUOTA);
- -                      set_opt(sbi->s_mount_opt, USRQUOTA);
+ +                      set_opt(sb, QUOTA);
+ +                      set_opt(sb, USRQUOTA);
                         break;
                 case Opt_grpquota:
- -                      set_opt(sbi->s_mount_opt, QUOTA);
- -                      set_opt(sbi->s_mount_opt, GRPQUOTA);
+ +                      set_opt(sb, QUOTA);
+ +                      set_opt(sb, GRPQUOTA);
                         break;
                 case Opt_noquota:
                         if (sb_any_quota_loaded(sb)) {
@@@ -1674,9 -1656,9 +1673,9 @@@
                                         "options when quota turned on");
                                 return 0;
                         }
- -                      clear_opt(sbi->s_mount_opt, QUOTA);
- -                      clear_opt(sbi->s_mount_opt, USRQUOTA);
- -                      clear_opt(sbi->s_mount_opt, GRPQUOTA);
+ +                      clear_opt(sb, QUOTA);
+ +                      clear_opt(sb, USRQUOTA);
+ +                      clear_opt(sb, GRPQUOTA);
                         break;
   #else
                 case Opt_quota:
@@@ -1702,7 -1684,7 +1701,7 @@@
                         sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
                         break;
                 case Opt_nobarrier:
- -                      clear_opt(sbi->s_mount_opt, BARRIER);
+ +                      clear_opt(sb, BARRIER);
                         break;
                 case Opt_barrier:
                         if (args[0].from) {
@@@ -1711,9 -1693,9 +1710,9 @@@
                         } else
                                 option = 1;     /* No argument, default to 1 */
                         if (option)
- -                              set_opt(sbi->s_mount_opt, BARRIER);
+ +                              set_opt(sb, BARRIER);
                         else
- -                              clear_opt(sbi->s_mount_opt, BARRIER);
+ +                              clear_opt(sb, BARRIER);
                         break;
                 case Opt_ignore:
                         break;
@@@ -1737,17 -1719,11 +1736,17 @@@
                                  "Ignoring deprecated bh option");
                         break;
                 case Opt_i_version:
- -                      set_opt(sbi->s_mount_opt, I_VERSION);
+ +                      set_opt(sb, I_VERSION);
                         sb->s_flags |= MS_I_VERSION;
                         break;
                 case Opt_nodelalloc:
- -                      clear_opt(sbi->s_mount_opt, DELALLOC);
+ +                      clear_opt(sb, DELALLOC);
+ +                      break;
+ +              case Opt_mblk_io_submit:
+ +                      set_opt(sb, MBLK_IO_SUBMIT);
+ +                      break;
+ +              case Opt_nomblk_io_submit:
+ +                      clear_opt(sb, MBLK_IO_SUBMIT);
                         break;
                 case Opt_stripe:
                         if (match_int(&args[0], &option))
@@@ -1757,13 -1733,13 +1756,13 @@@
                         sbi->s_stripe = option;
                         break;
                 case Opt_delalloc:
- -                      set_opt(sbi->s_mount_opt, DELALLOC);
+ +                      set_opt(sb, DELALLOC);
                         break;
                 case Opt_block_validity:
- -                      set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ +                      set_opt(sb, BLOCK_VALIDITY);
                         break;
                 case Opt_noblock_validity:
- -                      clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ +                      clear_opt(sb, BLOCK_VALIDITY);
                         break;
                 case Opt_inode_readahead_blks:
                         if (match_int(&args[0], &option))
@@@ -1787,7 -1763,7 +1786,7 @@@
                                                             option);
                         break;
                 case Opt_noauto_da_alloc:
- -                      set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ +                      set_opt(sb, NO_AUTO_DA_ALLOC);
                         break;
                 case Opt_auto_da_alloc:
                         if (args[0].from) {
@@@ -1796,24 -1772,24 +1795,24 @@@
                         } else
                                 option = 1;     /* No argument, default to 1 */
                         if (option)
- -                              clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
+ +                              clear_opt(sb, NO_AUTO_DA_ALLOC);
                         else
- -                              set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ +                              set_opt(sb,NO_AUTO_DA_ALLOC);
                         break;
                 case Opt_discard:
- -                      set_opt(sbi->s_mount_opt, DISCARD);
+ +                      set_opt(sb, DISCARD);
                         break;
                 case Opt_nodiscard:
- -                      clear_opt(sbi->s_mount_opt, DISCARD);
+ +                      clear_opt(sb, DISCARD);
                         break;
                 case Opt_dioread_nolock:
- -                      set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ +                      set_opt(sb, DIOREAD_NOLOCK);
                         break;
                 case Opt_dioread_lock:
- -                      clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ +                      clear_opt(sb, DIOREAD_NOLOCK);
                         break;
                 case Opt_init_inode_table:
- -                      set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ +                      set_opt(sb, INIT_INODE_TABLE);
                         if (args[0].from) {
                                 if (match_int(&args[0], &option))
                                         return 0;
@@@ -1824,7 -1800,7 +1823,7 @@@
                         sbi->s_li_wait_mult = option;
                         break;
                 case Opt_noinit_inode_table:
- -                      clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ +                      clear_opt(sb, INIT_INODE_TABLE);
                         break;
                 default:
                         ext4_msg(sb, KERN_ERR,
@@@ -1836,10 -1812,10 +1835,10 @@@
   #ifdef CONFIG_QUOTA
         if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
                 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
- -                      clear_opt(sbi->s_mount_opt, USRQUOTA);
+ +                      clear_opt(sb, USRQUOTA);
   
                 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
- -                      clear_opt(sbi->s_mount_opt, GRPQUOTA);
+ +                      clear_opt(sb, GRPQUOTA);
   
                 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
                         ext4_msg(sb, KERN_ERR, "old and new quota "
@@@ -1909,12 -1885,12 +1908,12 @@@ static int ext4_setup_super(struct supe
         ext4_commit_super(sb, 1);
         if (test_opt(sb, DEBUG))
                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
- -                              "bpg=%lu, ipg=%lu, mo=%04x]\n",
+ +                              "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
                         sb->s_blocksize,
                         sbi->s_groups_count,
                         EXT4_BLOCKS_PER_GROUP(sb),
                         EXT4_INODES_PER_GROUP(sb),
- -                      sbi->s_mount_opt);
+ +                      sbi->s_mount_opt, sbi->s_mount_opt2);
   
         return res;
   }
@@@ -1944,13 -1920,14 +1943,13 @@@ static int ext4_fill_flex_info(struct s
         size = flex_group_count * sizeof(struct flex_groups);
         sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
         if (sbi->s_flex_groups == NULL) {
- -              sbi->s_flex_groups = vmalloc(size);
- -              if (sbi->s_flex_groups)
- -                      memset(sbi->s_flex_groups, 0, size);
- -      }
- -      if (sbi->s_flex_groups == NULL) {
- -              ext4_msg(sb, KERN_ERR, "not enough memory for "
- -                              "%u flex groups", flex_group_count);
- -              goto failed;
+ +              sbi->s_flex_groups = vzalloc(size);
+ +              if (sbi->s_flex_groups == NULL) {
+ +                      ext4_msg(sb, KERN_ERR,
+ +                               "not enough memory for %u flex groups",
+ +                               flex_group_count);
+ +                      goto failed;
+ +              }
         }
   
         for (i = 0; i < sbi->s_groups_count; i++) {
@@@ -2821,6 -2798,9 +2820,6 @@@ static void ext4_clear_request_list(voi
         struct ext4_li_request *elr;
   
         mutex_lock(&ext4_li_info->li_list_mtx);
- -      if (list_empty(&ext4_li_info->li_request_list))
- -              return;
- -
         list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
                 elr = list_entry(pos, struct ext4_li_request,
                                  lr_request);
@@@ -2929,7 -2909,7 +2928,7 @@@ static int ext4_register_li_request(str
         struct ext4_sb_info *sbi = EXT4_SB(sb);
         struct ext4_li_request *elr;
         ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
- -      int ret;
+ +      int ret = 0;
   
         if (sbi->s_li_request != NULL)
                 return 0;
@@@ -3084,41 -3064,41 +3083,41 @@@ static int ext4_fill_super(struct super
   
         /* Set defaults before we parse the mount options */
         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
- -      set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ +      set_opt(sb, INIT_INODE_TABLE);
         if (def_mount_opts & EXT4_DEFM_DEBUG)
- -              set_opt(sbi->s_mount_opt, DEBUG);
+ +              set_opt(sb, DEBUG);
         if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
                 ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
                         "2.6.38");
- -              set_opt(sbi->s_mount_opt, GRPID);
+ +              set_opt(sb, GRPID);
         }
         if (def_mount_opts & EXT4_DEFM_UID16)
- -              set_opt(sbi->s_mount_opt, NO_UID32);
+ +              set_opt(sb, NO_UID32);
   #ifdef CONFIG_EXT4_FS_XATTR
         if (def_mount_opts & EXT4_DEFM_XATTR_USER)
- -              set_opt(sbi->s_mount_opt, XATTR_USER);
+ +              set_opt(sb, XATTR_USER);
   #endif
   #ifdef CONFIG_EXT4_FS_POSIX_ACL
         if (def_mount_opts & EXT4_DEFM_ACL)
- -              set_opt(sbi->s_mount_opt, POSIX_ACL);
+ +              set_opt(sb, POSIX_ACL);
   #endif
         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
- -              set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+ +              set_opt(sb, JOURNAL_DATA);
         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
- -              set_opt(sbi->s_mount_opt, ORDERED_DATA);
+ +              set_opt(sb, ORDERED_DATA);
         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
- -              set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ +              set_opt(sb, WRITEBACK_DATA);
   
         if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
- -              set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+ +              set_opt(sb, ERRORS_PANIC);
         else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
- -              set_opt(sbi->s_mount_opt, ERRORS_CONT);
+ +              set_opt(sb, ERRORS_CONT);
         else
- -              set_opt(sbi->s_mount_opt, ERRORS_RO);
+ +              set_opt(sb, ERRORS_RO);
         if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
- -              set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+ +              set_opt(sb, BLOCK_VALIDITY);
         if (def_mount_opts & EXT4_DEFM_DISCARD)
- -              set_opt(sbi->s_mount_opt, DISCARD);
+ +              set_opt(sb, DISCARD);
   
         sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
         sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@@@ -3127,7 -3107,7 +3126,7 @@@
         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
   
         if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
- -              set_opt(sbi->s_mount_opt, BARRIER);
+ +              set_opt(sb, BARRIER);
   
         /*
          * enable delayed allocation by default
@@@ -3135,7 -3115,7 +3134,7 @@@
          */
         if (!IS_EXT3_SB(sb) &&
             ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
- -              set_opt(sbi->s_mount_opt, DELALLOC);
+ +              set_opt(sb, DELALLOC);
   
         if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
                            &journal_devnum, &journal_ioprio, NULL, 0)) {
@@@ -3287,14 -3267,13 +3286,14 @@@
          * Test whether we have more sectors than will fit in sector_t,
          * and whether the max offset is addressable by the page cache.
          */
- -      ret = generic_check_addressable(sb->s_blocksize_bits,
+ +      err = generic_check_addressable(sb->s_blocksize_bits,
                                         ext4_blocks_count(es));
- -      if (ret) {
+ +      if (err) {
                 ext4_msg(sb, KERN_ERR, "filesystem"
                          " too large to mount safely on this system");
                 if (sizeof(sector_t) < 8)
                         ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
+ +              ret = err;
                 goto failed_mount;
         }
   
@@@ -3438,8 -3417,8 +3437,8 @@@
                        "suppressed and not mounted read-only");
                 goto failed_mount_wq;
         } else {
- -              clear_opt(sbi->s_mount_opt, DATA_FLAGS);
- -              set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ +              clear_opt(sb, DATA_FLAGS);
+ +              set_opt(sb, WRITEBACK_DATA);
                 sbi->s_journal = NULL;
                 needs_recovery = 0;
                 goto no_journal;
@@@ -3477,9 -3456,9 +3476,9 @@@
                  */
                 if (jbd2_journal_check_available_features
                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
- -                      set_opt(sbi->s_mount_opt, ORDERED_DATA);
+ +                      set_opt(sb, ORDERED_DATA);
                 else
- -                      set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+ +                      set_opt(sb, JOURNAL_DATA);
                 break;
   
         case EXT4_MOUNT_ORDERED_DATA:
@@@ -3569,18 -3548,18 +3568,18 @@@ no_journal
             (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
                 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
                          "requested data journaling mode");
- -              clear_opt(sbi->s_mount_opt, DELALLOC);
+ +              clear_opt(sb, DELALLOC);
         }
         if (test_opt(sb, DIOREAD_NOLOCK)) {
                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
                         ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
                                 "option - requested data journaling mode");
- -                      clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ +                      clear_opt(sb, DIOREAD_NOLOCK);
                 }
                 if (sb->s_blocksize < PAGE_SIZE) {
                         ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
                                 "option - block size is too small");
- -                      clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+ +                      clear_opt(sb, DIOREAD_NOLOCK);
                 }
         }
   
@@@ -3778,13 -3757,6 +3777,6 @@@ static journal_t *ext4_get_dev_journal(
         if (bdev == NULL)
                 return NULL;
   
-       if (bd_claim(bdev, sb)) {
-               ext4_msg(sb, KERN_ERR,
-                       "failed to claim external journal device");
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
-               return NULL;
-       }
- 
         blocksize = sb->s_blocksize;
         hblock = bdev_logical_block_size(bdev);
         if (blocksize < hblock) {
@@@ -4179,22 -4151,6 +4171,22 @@@ static int ext4_unfreeze(struct super_b
         return 0;
   }
   
+ +/*
+ + * Structure to save mount options for ext4_remount's benefit
+ + */
+ +struct ext4_mount_options {
+ +      unsigned long s_mount_opt;
+ +      unsigned long s_mount_opt2;
+ +      uid_t s_resuid;
+ +      gid_t s_resgid;
+ +      unsigned long s_commit_interval;
+ +      u32 s_min_batch_time, s_max_batch_time;
+ +#ifdef CONFIG_QUOTA
+ +      int s_jquota_fmt;
+ +      char *s_qf_names[MAXQUOTAS];
+ +#endif
+ +};
+ +
   static int ext4_remount(struct super_block *sb, int *flags, char *data)
   {
         struct ext4_super_block *es;
@@@ -4215,7 -4171,6 +4207,7 @@@
         lock_super(sb);
         old_sb_flags = sb->s_flags;
         old_opts.s_mount_opt = sbi->s_mount_opt;
+ +      old_opts.s_mount_opt2 = sbi->s_mount_opt2;
         old_opts.s_resuid = sbi->s_resuid;
         old_opts.s_resgid = sbi->s_resgid;
         old_opts.s_commit_interval = sbi->s_commit_interval;
@@@ -4369,7 -4324,6 +4361,7 @@@
   restore_opts:
         sb->s_flags = old_sb_flags;
         sbi->s_mount_opt = old_opts.s_mount_opt;
+ +      sbi->s_mount_opt2 = old_opts.s_mount_opt2;
         sbi->s_resuid = old_opts.s_resuid;
         sbi->s_resgid = old_opts.s_resgid;
         sbi->s_commit_interval = old_opts.s_commit_interval;
diff --combined fs/gfs2/ops_fstype.c

index 693f4470a2df6001dfa1bba6195bf1e01ad08e40,bc56ccf98ffd1843018f315e2dbb16235617685e..777927ce6f79041d9bbc8c1383204c51fcbad8ab
--- 1/fs/gfs2/ops_fstype.c
--- 2/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@@ -440,6 -440,7 +440,6 @@@ static int gfs2_lookup_root(struct supe
                 iput(inode);
                 return -ENOMEM;
         }
- -      dentry->d_op = &gfs2_dops;
         *dptr = dentry;
         return 0;
   }
@@@ -1105,7 -1106,6 +1105,7 @@@ static int fill_super(struct super_bloc
   
         sb->s_magic = GFS2_MAGIC;
         sb->s_op = &gfs2_super_ops;
+ +      sb->s_d_op = &gfs2_dops;
         sb->s_export_op = &gfs2_export_ops;
         sb->s_xattr = gfs2_xattr_handlers;
         sb->s_qcop = &gfs2_quotactl_ops;
@@@ -1268,7 -1268,7 +1268,7 @@@ static struct dentry *gfs2_mount(struc
   {
         struct block_device *bdev;
         struct super_block *s;
-       fmode_t mode = FMODE_READ;
+       fmode_t mode = FMODE_READ | FMODE_EXCL;
         int error;
         struct gfs2_args args;
         struct gfs2_sbd *sdp;
@@@ -1276,7 -1276,7 +1276,7 @@@
         if (!(flags & MS_RDONLY))
                 mode |= FMODE_WRITE;
   
-       bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+       bdev = blkdev_get_by_path(dev_name, mode, fs_type);
         if (IS_ERR(bdev))
                 return ERR_CAST(bdev);
   
@@@ -1298,7 -1298,7 +1298,7 @@@
                 goto error_bdev;
   
         if (s->s_root)
-               close_bdev_exclusive(bdev, mode);
+               blkdev_put(bdev, mode);
   
         memset(&args, 0, sizeof(args));
         args.ar_quota = GFS2_QUOTA_DEFAULT;
@@@ -1342,7 -1342,7 +1342,7 @@@ error_super
         deactivate_locked_super(s);
         return ERR_PTR(error);
   error_bdev:
-       close_bdev_exclusive(bdev, mode);
+       blkdev_put(bdev, mode);
         return ERR_PTR(error);
   }
   
diff --combined fs/nfsd/vfs.c

index 3a359023c9f7b2dc5a00efc862d834463d238018,106ed482f11994a42608fa97f831f85f574a5197..230b79fbf0051be98df5bc5067a2f6f8f18b3b8a
--- 1/fs/nfsd/vfs.c
--- 2/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@@ -845,11 -845,6 +845,6 @@@ nfsd_splice_actor(struct pipe_inode_inf
         struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
         struct page *page = buf->page;
         size_t size;
-       int ret;
- 
-       ret = buf->ops->confirm(pipe, buf);
-       if (unlikely(ret))
-               return ret;
   
         size = sd->len;
   
@@@ -1756,7 -1751,8 +1751,7 @@@ nfsd_rename(struct svc_rqst *rqstp, str
                 goto out_dput_new;
   
         if (svc_msnfs(ffhp) &&
- -              ((atomic_read(&odentry->d_count) > 1)
- -               || (atomic_read(&ndentry->d_count) > 1))) {
+ +              ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
                         host_err = -EPERM;
                         goto out_dput_new;
         }
@@@ -1842,7 -1838,7 +1837,7 @@@ nfsd_unlink(struct svc_rqst *rqstp, str
         if (type != S_IFDIR) { /* It's UNLINK */
   #ifdef MSNFS
                 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- -                      (atomic_read(&rdentry->d_count) > 1)) {
+ +                      (rdentry->d_count > 1)) {
                         host_err = -EPERM;
                 } else
   #endif
diff --combined fs/nilfs2/super.c

index 70dfdd532b83d9158c16f17df6741869f4a7f8bf,0030640e2d722a9fa9d69d6c65cc0a2b481aaa5d..0994f6a76c0799ec86a482288256522a5735f72a
--- 1/fs/nilfs2/super.c
--- 2/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@@ -47,6 -47,7 +47,6 @@@
   #include <linux/crc32.h>
   #include <linux/vfs.h>
   #include <linux/writeback.h>
- -#include <linux/kobject.h>
   #include <linux/seq_file.h>
   #include <linux/mount.h>
   #include "nilfs.h"
@@@ -110,17 -111,12 +110,17 @@@ void nilfs_error(struct super_block *sb
                  const char *fmt, ...)
   {
         struct nilfs_sb_info *sbi = NILFS_SB(sb);
+ +      struct va_format vaf;
         va_list args;
   
         va_start(args, fmt);
- -      printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +
+ +      printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n",
+ +             sb->s_id, function, &vaf);
+ +
         va_end(args);
   
         if (!(sb->s_flags & MS_RDONLY)) {
@@@ -140,17 -136,13 +140,17 @@@
   void nilfs_warning(struct super_block *sb, const char *function,
                    const char *fmt, ...)
   {
+ +      struct va_format vaf;
         va_list args;
   
         va_start(args, fmt);
- -      printk(KERN_WARNING "NILFS warning (device %s): %s: ",
- -             sb->s_id, function);
- -      vprintk(fmt, args);
- -      printk("\n");
+ +
+ +      vaf.fmt = fmt;
+ +      vaf.va = &args;
+ +
+ +      printk(KERN_WARNING "NILFS warning (device %s): %s: %pV\n",
+ +             sb->s_id, function, &vaf);
+ +
         va_end(args);
   }
   
@@@ -170,13 -162,10 +170,13 @@@ struct inode *nilfs_alloc_inode(struct 
         return &ii->vfs_inode;
   }
   
- -void nilfs_destroy_inode(struct inode *inode)
+ +static void nilfs_i_callback(struct rcu_head *head)
   {
+ +      struct inode *inode = container_of(head, struct inode, i_rcu);
         struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
   
+ +      INIT_LIST_HEAD(&inode->i_dentry);
+ +
         if (mdi) {
                 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
                 kfree(mdi);
@@@ -184,11 -173,6 +184,11 @@@
         kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
   }
   
+ +void nilfs_destroy_inode(struct inode *inode)
+ +{
+ +      call_rcu(&inode->i_rcu, nilfs_i_callback);
+ +}
+ +
   static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
   {
         struct the_nilfs *nilfs = sbi->s_nilfs;
@@@ -854,7 -838,7 +854,7 @@@ static int nilfs_attach_snapshot(struc
   
   static int nilfs_tree_was_touched(struct dentry *root_dentry)
   {
- -      return atomic_read(&root_dentry->d_count) > 1;
+ +      return root_dentry->d_count > 1;
   }
   
   /**
@@@ -1018,11 -1002,11 +1018,11 @@@ static int nilfs_remount(struct super_b
         struct nilfs_sb_info *sbi = NILFS_SB(sb);
         struct the_nilfs *nilfs = sbi->s_nilfs;
         unsigned long old_sb_flags;
- -      struct nilfs_mount_options old_opts;
+ +      unsigned long old_mount_opt;
         int err;
   
         old_sb_flags = sb->s_flags;
- -      old_opts.mount_opt = sbi->s_mount_opt;
+ +      old_mount_opt = sbi->s_mount_opt;
   
         if (!parse_options(data, sb, 1)) {
                 err = -EINVAL;
@@@ -1091,7 -1075,7 +1091,7 @@@
   
    restore_opts:
         sb->s_flags = old_sb_flags;
- -      sbi->s_mount_opt = old_opts.mount_opt;
+ +      sbi->s_mount_opt = old_mount_opt;
         return err;
   }
   
@@@ -1163,14 -1147,14 +1163,14 @@@ nilfs_mount(struct file_system_type *fs
   {
         struct nilfs_super_data sd;
         struct super_block *s;
-       fmode_t mode = FMODE_READ;
+       fmode_t mode = FMODE_READ | FMODE_EXCL;
         struct dentry *root_dentry;
         int err, s_new = false;
   
         if (!(flags & MS_RDONLY))
                 mode |= FMODE_WRITE;
   
-       sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+       sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
         if (IS_ERR(sd.bdev))
                 return ERR_CAST(sd.bdev);
   
@@@ -1249,7 -1233,7 +1249,7 @@@
         }
   
         if (!s_new)
-               close_bdev_exclusive(sd.bdev, mode);
+               blkdev_put(sd.bdev, mode);
   
         return root_dentry;
   
@@@ -1258,7 -1242,7 +1258,7 @@@
   
    failed:
         if (!s_new)
-               close_bdev_exclusive(sd.bdev, mode);
+               blkdev_put(sd.bdev, mode);
         return ERR_PTR(err);
   }
   
diff --combined fs/ocfs2/cluster/heartbeat.c

index a6cc05302e9fe06fef4f3bc946610494d36843cb,d0a2721eacebd17f14d91bc75927d5ff152d8c27..b108e863d8f65ab23ec5921fd425a9273f78365e
--- 1/fs/ocfs2/cluster/heartbeat.c
--- 2/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@@ -82,7 -82,6 +82,7 @@@ static unsigned long o2hb_failed_region
   #define O2HB_DB_TYPE_REGION_LIVENODES 4
   #define O2HB_DB_TYPE_REGION_NUMBER    5
   #define O2HB_DB_TYPE_REGION_ELAPSED_TIME      6
+ +#define O2HB_DB_TYPE_REGION_PINNED    7
   struct o2hb_debug_buf {
         int db_type;
         int db_size;
@@@ -102,7 -101,6 +102,7 @@@ static struct o2hb_debug_buf *o2hb_db_f
   #define O2HB_DEBUG_FAILEDREGIONS      "failed_regions"
   #define O2HB_DEBUG_REGION_NUMBER      "num"
   #define O2HB_DEBUG_REGION_ELAPSED_TIME        "elapsed_time_in_ms"
+ +#define O2HB_DEBUG_REGION_PINNED      "pinned"
   
   static struct dentry *o2hb_debug_dir;
   static struct dentry *o2hb_debug_livenodes;
@@@ -134,33 -132,6 +134,33 @@@ char *o2hb_heartbeat_mode_desc[O2HB_HEA
   unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
   unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
   
+ +/*
+ + * o2hb_dependent_users tracks the number of registered callbacks that depend
+ + * on heartbeat. o2net and o2dlm are two entities that register this callback.
+ + * However only o2dlm depends on the heartbeat. It does not want the heartbeat
+ + * to stop while a dlm domain is still active.
+ + */
+ +unsigned int o2hb_dependent_users;
+ +
+ +/*
+ + * In global heartbeat mode, all regions are pinned if there are one or more
+ + * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
+ + * regions are unpinned if the region count exceeds the cut off or the number
+ + * of dependent users falls to zero.
+ + */
+ +#define O2HB_PIN_CUT_OFF              3
+ +
+ +/*
+ + * In local heartbeat mode, we assume the dlm domain name to be the same as
+ + * region uuid. This is true for domains created for the file system but not
+ + * necessarily true for userdlm domains. This is a known limitation.
+ + *
+ + * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
+ + * works for both file system and userdlm domains.
+ + */
+ +static int o2hb_region_pin(const char *region_uuid);
+ +static void o2hb_region_unpin(const char *region_uuid);
+ +
   /* Only sets a new threshold if there are no active regions.
    *
    * No locking or otherwise interesting code is required for reading
@@@ -215,9 -186,7 +215,9 @@@ struct o2hb_region 
         struct config_item      hr_item;
   
         struct list_head        hr_all_item;
- -      unsigned                hr_unclean_stop:1;
+ +      unsigned                hr_unclean_stop:1,
+ +                              hr_item_pinned:1,
+ +                              hr_item_dropped:1;
   
         /* protected by the hr_callback_sem */
         struct task_struct      *hr_task;
@@@ -243,11 -212,9 +243,11 @@@
         struct dentry           *hr_debug_livenodes;
         struct dentry           *hr_debug_regnum;
         struct dentry           *hr_debug_elapsed_time;
+ +      struct dentry           *hr_debug_pinned;
         struct o2hb_debug_buf   *hr_db_livenodes;
         struct o2hb_debug_buf   *hr_db_regnum;
         struct o2hb_debug_buf   *hr_db_elapsed_time;
+ +      struct o2hb_debug_buf   *hr_db_pinned;
   
         /* let the person setting up hb wait for it to return until it
          * has reached a 'steady' state.  This will be fixed when we have
@@@ -340,7 -307,8 +340,7 @@@ static void o2hb_arm_write_timeout(stru
   
   static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
   {
- -      cancel_delayed_work(&reg->hr_write_timeout_work);
- -      flush_scheduled_work();
+ +      cancel_delayed_work_sync(&reg->hr_write_timeout_work);
   }
   
   static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
@@@ -734,14 -702,6 +734,14 @@@ static void o2hb_set_quorum_device(stru
                config_item_name(&reg->hr_item));
   
         set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
+ +
+ +      /*
+ +       * If global heartbeat active, unpin all regions if the
+ +       * region count > CUT_OFF
+ +       */
+ +      if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ +                         O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
+ +              o2hb_region_unpin(NULL);
   }
   
   static int o2hb_check_slot(struct o2hb_region *reg,
@@@ -1082,9 -1042,6 +1082,9 @@@ static int o2hb_thread(void *data
   
         set_user_nice(current, -20);
   
+ +      /* Pin node */
+ +      o2nm_depend_this_node();
+ +
         while (!kthread_should_stop() && !reg->hr_unclean_stop) {
                 /* We track the time spent inside
                  * o2hb_do_disk_heartbeat so that we avoid more than
@@@ -1134,9 -1091,6 +1134,9 @@@
                 mlog_errno(ret);
         }
   
+ +      /* Unpin node */
+ +      o2nm_undepend_this_node();
+ +
         mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
   
         return 0;
@@@ -1189,12 -1143,6 +1189,12 @@@ static int o2hb_debug_open(struct inod
                                                  reg->hr_last_timeout_start));
                 goto done;
   
+ +      case O2HB_DB_TYPE_REGION_PINNED:
+ +              reg = (struct o2hb_region *)db->db_data;
+ +              out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
+ +                              !!reg->hr_item_pinned);
+ +              goto done;
+ +
         default:
                 goto done;
         }
@@@ -1368,8 -1316,6 +1368,8 @@@ int o2hb_init(void
         memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
         memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
   
+ +      o2hb_dependent_users = 0;
+ +
         return o2hb_debug_init();
   }
   
@@@ -1439,7 -1385,6 +1439,7 @@@ static void o2hb_region_release(struct 
         debugfs_remove(reg->hr_debug_livenodes);
         debugfs_remove(reg->hr_debug_regnum);
         debugfs_remove(reg->hr_debug_elapsed_time);
+ +      debugfs_remove(reg->hr_debug_pinned);
         debugfs_remove(reg->hr_debug_dir);
   
         spin_lock(&o2hb_live_lock);
@@@ -1729,7 -1674,7 +1729,7 @@@ static ssize_t o2hb_region_dev_write(st
                 goto out;
   
         reg->hr_bdev = I_BDEV(filp->f_mapping->host);
-       ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ);
+       ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
         if (ret) {
                 reg->hr_bdev = NULL;
                 goto out;
@@@ -2004,18 -1949,6 +2004,18 @@@ static int o2hb_debug_region_init(struc
                 goto bail;
         }
   
+ +      reg->hr_debug_pinned =
+ +                      o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
+ +                                        reg->hr_debug_dir,
+ +                                        &(reg->hr_db_pinned),
+ +                                        sizeof(*(reg->hr_db_pinned)),
+ +                                        O2HB_DB_TYPE_REGION_PINNED,
+ +                                        0, 0, reg);
+ +      if (!reg->hr_debug_pinned) {
+ +              mlog_errno(ret);
+ +              goto bail;
+ +      }
+ +
         ret = 0;
   bail:
         return ret;
@@@ -2031,10 -1964,8 +2031,10 @@@ static struct config_item *o2hb_heartbe
         if (reg == NULL)
                 return ERR_PTR(-ENOMEM);
   
- -      if (strlen(name) > O2HB_MAX_REGION_NAME_LEN)
- -              return ERR_PTR(-ENAMETOOLONG);
+ +      if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) {
+ +              ret = -ENAMETOOLONG;
+ +              goto free;
+ +      }
   
         spin_lock(&o2hb_live_lock);
         reg->hr_region_num = 0;
@@@ -2043,8 -1974,7 +2043,8 @@@
                                                          O2NM_MAX_REGIONS);
                 if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
                         spin_unlock(&o2hb_live_lock);
- -                      return ERR_PTR(-EFBIG);
+ +                      ret = -EFBIG;
+ +                      goto free;
                 }
                 set_bit(reg->hr_region_num, o2hb_region_bitmap);
         }
@@@ -2056,13 -1986,10 +2056,13 @@@
         ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
         if (ret) {
                 config_item_put(&reg->hr_item);
- -              return ERR_PTR(ret);
+ +              goto free;
         }
   
         return &reg->hr_item;
+ +free:
+ +      kfree(reg);
+ +      return ERR_PTR(ret);
   }
   
   static void o2hb_heartbeat_group_drop_item(struct config_group *group,
@@@ -2070,20 -1997,15 +2070,20 @@@
   {
         struct task_struct *hb_task;
         struct o2hb_region *reg = to_o2hb_region(item);
+ +      int quorum_region = 0;
   
         /* stop the thread when the user removes the region dir */
         spin_lock(&o2hb_live_lock);
         if (o2hb_global_heartbeat_active()) {
                 clear_bit(reg->hr_region_num, o2hb_region_bitmap);
                 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
+ +              if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
+ +                      quorum_region = 1;
+ +              clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
         }
         hb_task = reg->hr_task;
         reg->hr_task = NULL;
+ +      reg->hr_item_dropped = 1;
         spin_unlock(&o2hb_live_lock);
   
         if (hb_task)
@@@ -2101,27 -2023,7 +2101,27 @@@
         if (o2hb_global_heartbeat_active())
                 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
                        config_item_name(&reg->hr_item));
+ +
         config_item_put(item);
+ +
+ +      if (!o2hb_global_heartbeat_active() || !quorum_region)
+ +              return;
+ +
+ +      /*
+ +       * If global heartbeat active and there are dependent users,
+ +       * pin all regions if quorum region count <= CUT_OFF
+ +       */
+ +      spin_lock(&o2hb_live_lock);
+ +
+ +      if (!o2hb_dependent_users)
+ +              goto unlock;
+ +
+ +      if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ +                         O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+ +              o2hb_region_pin(NULL);
+ +
+ +unlock:
+ +      spin_unlock(&o2hb_live_lock);
   }
   
   struct o2hb_heartbeat_group_attribute {
@@@ -2307,138 -2209,63 +2307,138 @@@ void o2hb_setup_callback(struct o2hb_ca
   }
   EXPORT_SYMBOL_GPL(o2hb_setup_callback);
   
- -static struct o2hb_region *o2hb_find_region(const char *region_uuid)
+ +/*
+ + * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ + * In global heartbeat mode, region_uuid passed is NULL.
+ + *
+ + * In local, we only pin the matching region. In global we pin all the active
+ + * regions.
+ + */
+ +static int o2hb_region_pin(const char *region_uuid)
   {
- -      struct o2hb_region *p, *reg = NULL;
+ +      int ret = 0, found = 0;
+ +      struct o2hb_region *reg;
+ +      char *uuid;
   
         assert_spin_locked(&o2hb_live_lock);
   
- -      list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
- -              if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
- -                      reg = p;
- -                      break;
+ +      list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ +              uuid = config_item_name(&reg->hr_item);
+ +
+ +              /* local heartbeat */
+ +              if (region_uuid) {
+ +                      if (strcmp(region_uuid, uuid))
+ +                              continue;
+ +                      found = 1;
                 }
+ +
+ +              if (reg->hr_item_pinned || reg->hr_item_dropped)
+ +                      goto skip_pin;
+ +
+ +              /* Ignore ENOENT only for local hb (userdlm domain) */
+ +              ret = o2nm_depend_item(&reg->hr_item);
+ +              if (!ret) {
+ +                      mlog(ML_CLUSTER, "Pin region %s\n", uuid);
+ +                      reg->hr_item_pinned = 1;
+ +              } else {
+ +                      if (ret == -ENOENT && found)
+ +                              ret = 0;
+ +                      else {
+ +                              mlog(ML_ERROR, "Pin region %s fails with %d\n",
+ +                                   uuid, ret);
+ +                              break;
+ +                      }
+ +              }
+ +skip_pin:
+ +              if (found)
+ +                      break;
         }
   
- -      return reg;
+ +      return ret;
   }
   
- -static int o2hb_region_get(const char *region_uuid)
+ +/*
+ + * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ + * In global heartbeat mode, region_uuid passed is NULL.
+ + *
+ + * In local, we only unpin the matching region. In global we unpin all the
+ + * active regions.
+ + */
+ +static void o2hb_region_unpin(const char *region_uuid)
   {
- -      int ret = 0;
         struct o2hb_region *reg;
+ +      char *uuid;
+ +      int found = 0;
   
- -      spin_lock(&o2hb_live_lock);
+ +      assert_spin_locked(&o2hb_live_lock);
   
- -      reg = o2hb_find_region(region_uuid);
- -      if (!reg)
- -              ret = -ENOENT;
- -      spin_unlock(&o2hb_live_lock);
+ +      list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+ +              uuid = config_item_name(&reg->hr_item);
+ +              if (region_uuid) {
+ +                      if (strcmp(region_uuid, uuid))
+ +                              continue;
+ +                      found = 1;
+ +              }
   
- -      if (ret)
- -              goto out;
+ +              if (reg->hr_item_pinned) {
+ +                      mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
+ +                      o2nm_undepend_item(&reg->hr_item);
+ +                      reg->hr_item_pinned = 0;
+ +              }
+ +              if (found)
+ +                      break;
+ +      }
+ +}
   
- -      ret = o2nm_depend_this_node();
- -      if (ret)
- -              goto out;
+ +static int o2hb_region_inc_user(const char *region_uuid)
+ +{
+ +      int ret = 0;
   
- -      ret = o2nm_depend_item(&reg->hr_item);
- -      if (ret)
- -              o2nm_undepend_this_node();
+ +      spin_lock(&o2hb_live_lock);
   
- -out:
+ +      /* local heartbeat */
+ +      if (!o2hb_global_heartbeat_active()) {
+ +          ret = o2hb_region_pin(region_uuid);
+ +          goto unlock;
+ +      }
+ +
+ +      /*
+ +       * if global heartbeat active and this is the first dependent user,
+ +       * pin all regions if quorum region count <= CUT_OFF
+ +       */
+ +      o2hb_dependent_users++;
+ +      if (o2hb_dependent_users > 1)
+ +              goto unlock;
+ +
+ +      if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+ +                         O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+ +              ret = o2hb_region_pin(NULL);
+ +
+ +unlock:
+ +      spin_unlock(&o2hb_live_lock);
         return ret;
   }
   
- -static void o2hb_region_put(const char *region_uuid)
+ +void o2hb_region_dec_user(const char *region_uuid)
   {
- -      struct o2hb_region *reg;
- -
         spin_lock(&o2hb_live_lock);
   
- -      reg = o2hb_find_region(region_uuid);
+ +      /* local heartbeat */
+ +      if (!o2hb_global_heartbeat_active()) {
+ +          o2hb_region_unpin(region_uuid);
+ +          goto unlock;
+ +      }
   
- -      spin_unlock(&o2hb_live_lock);
+ +      /*
+ +       * if global heartbeat active and there are no dependent users,
+ +       * unpin all quorum regions
+ +       */
+ +      o2hb_dependent_users--;
+ +      if (!o2hb_dependent_users)
+ +              o2hb_region_unpin(NULL);
   
- -      if (reg) {
- -              o2nm_undepend_item(&reg->hr_item);
- -              o2nm_undepend_this_node();
- -      }
+ +unlock:
+ +      spin_unlock(&o2hb_live_lock);
   }
   
   int o2hb_register_callback(const char *region_uuid,
@@@ -2459,11 -2286,9 +2459,11 @@@
         }
   
         if (region_uuid) {
- -              ret = o2hb_region_get(region_uuid);
- -              if (ret)
+ +              ret = o2hb_region_inc_user(region_uuid);
+ +              if (ret) {
+ +                      mlog_errno(ret);
                         goto out;
+ +              }
         }
   
         down_write(&o2hb_callback_sem);
@@@ -2481,7 -2306,7 +2481,7 @@@
         up_write(&o2hb_callback_sem);
         ret = 0;
   out:
- -      mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n",
+ +      mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
              ret, __builtin_return_address(0), hc);
         return ret;
   }
@@@ -2492,7 -2317,7 +2492,7 @@@ void o2hb_unregister_callback(const cha
   {
         BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
   
- -      mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
+ +      mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
              __builtin_return_address(0), hc);
   
         /* XXX Can this happen _with_ a region reference? */
@@@ -2500,7 -2325,7 +2500,7 @@@
                 return;
   
         if (region_uuid)
- -              o2hb_region_put(region_uuid);
+ +              o2hb_region_dec_user(region_uuid);
   
         down_write(&o2hb_callback_sem);
   
diff --combined fs/reiserfs/journal.c

index d31bce1a9f908e1df4c8046f371bec748c215440,e2fce519c0f2df11354f11c56a2dbff422b51de1..3eea859e6990ca90601ba7fef043103b87f98745
--- 1/fs/reiserfs/journal.c
--- 2/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@@ -43,6 -43,7 +43,6 @@@
   #include <linux/fcntl.h>
   #include <linux/stat.h>
   #include <linux/string.h>
- -#include <linux/smp_lock.h>
   #include <linux/buffer_head.h>
   #include <linux/workqueue.h>
   #include <linux/writeback.h>
@@@ -2551,8 -2552,6 +2551,6 @@@ static int release_journal_dev(struct s
         result = 0;
   
         if (journal->j_dev_bd != NULL) {
-               if (journal->j_dev_bd->bd_dev != super->s_dev)
-                       bd_release(journal->j_dev_bd);
                 result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
                 journal->j_dev_bd = NULL;
         }
@@@ -2570,7 -2569,7 +2568,7 @@@ static int journal_init_dev(struct supe
   {
         int result;
         dev_t jdev;
-       fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
+       fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
         char b[BDEVNAME_SIZE];
   
         result = 0;
@@@ -2584,7 -2583,10 +2582,10 @@@
   
         /* there is no "jdev" option and journal is on separate device */
         if ((!jdev_name || !jdev_name[0])) {
-               journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
+               if (jdev == super->s_dev)
+                       blkdev_mode &= ~FMODE_EXCL;
+               journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
+                                                     journal);
                 journal->j_dev_mode = blkdev_mode;
                 if (IS_ERR(journal->j_dev_bd)) {
                         result = PTR_ERR(journal->j_dev_bd);
@@@ -2593,22 -2595,14 +2594,14 @@@
                                          "cannot init journal device '%s': %i",
                                          __bdevname(jdev, b), result);
                         return result;
-               } else if (jdev != super->s_dev) {
-                       result = bd_claim(journal->j_dev_bd, journal);
-                       if (result) {
-                               blkdev_put(journal->j_dev_bd, blkdev_mode);
-                               return result;
-                       }
- 
+               } else if (jdev != super->s_dev)
                         set_blocksize(journal->j_dev_bd, super->s_blocksize);
-               }
   
                 return 0;
         }
   
         journal->j_dev_mode = blkdev_mode;
-       journal->j_dev_bd = open_bdev_exclusive(jdev_name,
-                                               blkdev_mode, journal);
+       journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
         if (IS_ERR(journal->j_dev_bd)) {
                 result = PTR_ERR(journal->j_dev_bd);
                 journal->j_dev_bd = NULL;
diff --combined fs/splice.c

index ce2f02579e3539ff504f564564077b0641691e6d,d2026382ac3eee2fb7716efa094809edd5657c48..50a5d978da1698f68cf8d2d2eca97474887ea223
--- 1/fs/splice.c
--- 2/fs/splice.c
+++ b/fs/splice.c
@@@ -682,19 -682,14 +682,14 @@@ static int pipe_to_sendpage(struct pipe
   {
         struct file *file = sd->u.file;
         loff_t pos = sd->pos;
-       int ret, more;
- 
-       ret = buf->ops->confirm(pipe, buf);
-       if (!ret) {
-               more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
-               if (file->f_op && file->f_op->sendpage)
-                       ret = file->f_op->sendpage(file, buf->page, buf->offset,
-                                                  sd->len, &pos, more);
-               else
-                       ret = -EINVAL;
-       }
+       int more;
   
-       return ret;
+       if (!likely(file->f_op && file->f_op->sendpage))
+               return -EINVAL;
+ 
+       more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
+       return file->f_op->sendpage(file, buf->page, buf->offset,
+                                   sd->len, &pos, more);
   }
   
   /*
@@@ -727,13 -722,6 +722,6 @@@ int pipe_to_file(struct pipe_inode_inf
         void *fsdata;
         int ret;
   
-       /*
-        * make sure the data in this buffer is uptodate
-        */
-       ret = buf->ops->confirm(pipe, buf);
-       if (unlikely(ret))
-               return ret;
- 
         offset = sd->pos & ~PAGE_CACHE_MASK;
   
         this_len = sd->len;
@@@ -805,12 -793,17 +793,17 @@@ int splice_from_pipe_feed(struct pipe_i
                 if (sd->len > sd->total_len)
                         sd->len = sd->total_len;
   
-               ret = actor(pipe, buf, sd);
-               if (ret <= 0) {
+               ret = buf->ops->confirm(pipe, buf);
+               if (unlikely(ret)) {
                         if (ret == -ENODATA)
                                 ret = 0;
                         return ret;
                 }
+ 
+               ret = actor(pipe, buf, sd);
+               if (ret <= 0)
+                       return ret;
+ 
                 buf->offset += ret;
                 buf->len -= ret;
   
@@@ -1044,10 -1037,6 +1037,6 @@@ static int write_pipe_buf(struct pipe_i
         int ret;
         void *data;
   
-       ret = buf->ops->confirm(pipe, buf);
-       if (ret)
-               return ret;
- 
         data = buf->ops->map(pipe, buf, 0);
         ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
         buf->ops->unmap(pipe, buf, data);
@@@ -1311,6 -1300,18 +1300,6 @@@ long do_splice_direct(struct file *in, 
   static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
                                struct pipe_inode_info *opipe,
                                size_t len, unsigned int flags);
- -/*
- - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
- - * location, so checking ->i_pipe is not enough to verify that this is a
- - * pipe.
- - */
- -static inline struct pipe_inode_info *pipe_info(struct inode *inode)
- -{
- -      if (S_ISFIFO(inode->i_mode))
- -              return inode->i_pipe;
- -
- -      return NULL;
- -}
   
   /*
    * Determine where to splice to/from.
@@@ -1324,8 -1325,8 +1313,8 @@@ static long do_splice(struct file *in, 
         loff_t offset, *off;
         long ret;
   
- -      ipipe = pipe_info(in->f_path.dentry->d_inode);
- -      opipe = pipe_info(out->f_path.dentry->d_inode);
+ +      ipipe = get_pipe_info(in);
+ +      opipe = get_pipe_info(out);
   
         if (ipipe && opipe) {
                 if (off_in || off_out)
@@@ -1495,10 -1496,6 +1484,6 @@@ static int pipe_to_user(struct pipe_ino
         char *src;
         int ret;
   
-       ret = buf->ops->confirm(pipe, buf);
-       if (unlikely(ret))
-               return ret;
- 
         /*
          * See if we can use the atomic maps, by prefaulting in the
          * pages and doing an atomic copy
@@@ -1543,7 -1540,7 +1528,7 @@@ static long vmsplice_to_user(struct fil
         int error;
         long ret;
   
- -      pipe = pipe_info(file->f_path.dentry->d_inode);
+ +      pipe = get_pipe_info(file);
         if (!pipe)
                 return -EBADF;
   
@@@ -1630,7 -1627,7 +1615,7 @@@ static long vmsplice_to_pipe(struct fil
         };
         long ret;
   
- -      pipe = pipe_info(file->f_path.dentry->d_inode);
+ +      pipe = get_pipe_info(file);
         if (!pipe)
                 return -EBADF;
   
@@@ -2010,8 -2007,8 +1995,8 @@@ static int link_pipe(struct pipe_inode_
   static long do_tee(struct file *in, struct file *out, size_t len,
                    unsigned int flags)
   {
- -      struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode);
- -      struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode);
+ +      struct pipe_inode_info *ipipe = get_pipe_info(in);
+ +      struct pipe_inode_info *opipe = get_pipe_info(out);
         int ret = -EINVAL;
   
         /*
diff --combined fs/super.c

index 823e061faa87277407efb79369ed469dd35f2125,5d9a4497849a5d8557e40f4c743f84106adee8d7..4f6a3571a634dff6fce4cb16b80bdc19b73dfcdd
--- 1/fs/super.c
--- 2/fs/super.c
+++ b/fs/super.c
@@@ -30,7 -30,6 +30,7 @@@
   #include <linux/idr.h>
   #include <linux/mutex.h>
   #include <linux/backing-dev.h>
+ +#include <linux/rculist_bl.h>
   #include "internal.h"
   
   
@@@ -72,7 -71,7 +72,7 @@@ static struct super_block *alloc_super(
                 INIT_LIST_HEAD(&s->s_files);
   #endif
                 INIT_LIST_HEAD(&s->s_instances);
- -              INIT_HLIST_HEAD(&s->s_anon);
+ +              INIT_HLIST_BL_HEAD(&s->s_anon);
                 INIT_LIST_HEAD(&s->s_inodes);
                 INIT_LIST_HEAD(&s->s_dentry_lru);
                 init_rwsem(&s->s_umount);
@@@ -767,13 -766,13 +767,13 @@@ struct dentry *mount_bdev(struct file_s
   {
         struct block_device *bdev;
         struct super_block *s;
-       fmode_t mode = FMODE_READ;
+       fmode_t mode = FMODE_READ | FMODE_EXCL;
         int error = 0;
   
         if (!(flags & MS_RDONLY))
                 mode |= FMODE_WRITE;
   
-       bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+       bdev = blkdev_get_by_path(dev_name, mode, fs_type);
         if (IS_ERR(bdev))
                 return ERR_CAST(bdev);
   
@@@ -802,13 -801,13 +802,13 @@@
   
                 /*
                  * s_umount nests inside bd_mutex during
-                * __invalidate_device().  close_bdev_exclusive()
-                * acquires bd_mutex and can't be called under
-                * s_umount.  Drop s_umount temporarily.  This is safe
-                * as we're holding an active reference.
+                * __invalidate_device().  blkdev_put() acquires
+                * bd_mutex and can't be called under s_umount.  Drop
+                * s_umount temporarily.  This is safe as we're
+                * holding an active reference.
                  */
                 up_write(&s->s_umount);
-               close_bdev_exclusive(bdev, mode);
+               blkdev_put(bdev, mode);
                 down_write(&s->s_umount);
         } else {
                 char b[BDEVNAME_SIZE];
@@@ -832,7 -831,7 +832,7 @@@
   error_s:
         error = PTR_ERR(s);
   error_bdev:
-       close_bdev_exclusive(bdev, mode);
+       blkdev_put(bdev, mode);
   error:
         return ERR_PTR(error);
   }
@@@ -863,7 -862,8 +863,8 @@@ void kill_block_super(struct super_bloc
         bdev->bd_super = NULL;
         generic_shutdown_super(sb);
         sync_blockdev(bdev);
-       close_bdev_exclusive(bdev, mode);
+       WARN_ON_ONCE(!(mode & FMODE_EXCL));
+       blkdev_put(bdev, mode | FMODE_EXCL);
   }
   
   EXPORT_SYMBOL(kill_block_super);
@@@ -1140,7 -1140,7 +1141,7 @@@ static struct vfsmount *fs_set_subtype(
         return mnt;
   
    err:
- -      mntput(mnt);
+ +      mntput_long(mnt);
         return ERR_PTR(err);
   }
   
diff --combined fs/xfs/linux-2.6/xfs_super.c

index a10f6416e56362b258d7104c911a78c95e6a0a47,2d2ce7f651a7c4ae74d458b576113298f77c039c..bd07f73393663bbe94909dbdeedabd987955d32e
--- 1/fs/xfs/linux-2.6/xfs_super.c
--- 2/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@@ -606,7 -606,8 +606,8 @@@ xfs_blkdev_get
   {
         int                     error = 0;
   
-       *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp);
+       *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+                                   mp);
         if (IS_ERR(*bdevp)) {
                 error = PTR_ERR(*bdevp);
                 printk("XFS: Invalid device [%s], error=%d\n", name, error);
@@@ -620,7 -621,7 +621,7 @@@ xfs_blkdev_put
         struct block_device     *bdev)
   {
         if (bdev)
-               close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
+               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
   }
   
   /*
@@@ -834,11 -835,8 +835,11 @@@ xfsaild_wakeup
         struct xfs_ail          *ailp,
         xfs_lsn_t               threshold_lsn)
   {
- -      ailp->xa_target = threshold_lsn;
- -      wake_up_process(ailp->xa_task);
+ +      /* only ever move the target forwards */
+ +      if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
+ +              ailp->xa_target = threshold_lsn;
+ +              wake_up_process(ailp->xa_task);
+ +      }
   }
   
   STATIC int
@@@ -850,17 -848,8 +851,17 @@@ xfsaild
         long            tout = 0; /* milliseconds */
   
         while (!kthread_should_stop()) {
- -              schedule_timeout_interruptible(tout ?
- -                              msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
+ +              /*
+ +               * for short sleeps indicating congestion, don't allow us to
+ +               * get woken early. Otherwise all we do is bang on the AIL lock
+ +               * without making progress.
+ +               */
+ +              if (tout && tout <= 20)
+ +                      __set_current_state(TASK_KILLABLE);
+ +              else
+ +                      __set_current_state(TASK_INTERRUPTIBLE);
+ +              schedule_timeout(tout ?
+ +                               msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
   
                 /* swsusp */
                 try_to_freeze();
@@@ -947,7 -936,7 +948,7 @@@ out_reclaim
    * Slab object creation initialisation for the XFS inode.
    * This covers only the idempotent fields in the XFS inode;
    * all other fields need to be initialised on allocation
- - * from the slab. This avoids the need to repeatedly intialise
+ + * from the slab. This avoids the need to repeatedly initialise
    * fields in the xfs inode that left in the initialise state
    * when freeing the inode.
    */
@@@ -1130,8 -1119,6 +1131,8 @@@ xfs_fs_evict_inode
          */
         ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
         mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+ +      lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+ +                      &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
   
         xfs_inactive(ip);
   }
diff --combined include/linux/blkdev.h

index 36ab42c9bb991566dc5004fad9ca3c0c958bfeb0,5730043eb15a6081404668839cb80b50433fe6a8..4d18ff34670a4a882e5d08e83b1633ecd2973610
--- 1/include/linux/blkdev.h
--- 2/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@@ -115,6 -115,7 +115,7 @@@ struct request 
         void *elevator_private3;
   
         struct gendisk *rq_disk;
+       struct hd_struct *part;
         unsigned long start_time;
   #ifdef CONFIG_BLK_CGROUP
         unsigned long long start_time_ns;
@@@ -250,7 -251,7 +251,7 @@@ struct queue_limits 
   
         unsigned char           misaligned;
         unsigned char           discard_misaligned;
- -      unsigned char           no_cluster;
+ +      unsigned char           cluster;
         signed char             discard_zeroes_data;
   };
   
@@@ -380,6 -381,7 +381,6 @@@ struct request_queu
   #endif
   };
   
- -#define QUEUE_FLAG_CLUSTER    0       /* cluster several segments into 1 */
   #define QUEUE_FLAG_QUEUED     1       /* uses generic tag queueing */
   #define QUEUE_FLAG_STOPPED    2       /* queue is stopped */
   #define       QUEUE_FLAG_SYNCFULL     3       /* read queue has been filled */
@@@ -402,6 -404,7 +403,6 @@@
   #define QUEUE_FLAG_SECDISCARD  19     /* supports SECDISCARD */
   
   #define QUEUE_FLAG_DEFAULT    ((1 << QUEUE_FLAG_IO_STAT) |            \
- -                               (1 << QUEUE_FLAG_CLUSTER) |            \
                                  (1 << QUEUE_FLAG_STACKABLE)    |       \
                                  (1 << QUEUE_FLAG_SAME_COMP)    |       \
                                  (1 << QUEUE_FLAG_ADD_RANDOM))
@@@ -508,11 -511,6 +509,11 @@@ static inline void queue_flag_clear(uns
   
   #define rq_data_dir(rq)               ((rq)->cmd_flags & 1)
   
+ +static inline unsigned int blk_queue_cluster(struct request_queue *q)
+ +{
+ +      return q->limits.cluster;
+ +}
+ +
   /*
    * We regard a request as sync, if either a read or a sync write
    */
@@@ -646,7 -644,6 +647,6 @@@ static inline void rq_flush_dcache_page
   
   extern int blk_register_queue(struct gendisk *disk);
   extern void blk_unregister_queue(struct gendisk *disk);
- extern void register_disk(struct gendisk *dev);
   extern void generic_make_request(struct bio *bio);
   extern void blk_rq_init(struct request_queue *q, struct request *rq);
   extern void blk_put_request(struct request *);
@@@ -808,7 -805,6 +808,7 @@@ extern struct request_queue *blk_init_a
   extern void blk_cleanup_queue(struct request_queue *);
   extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
   extern void blk_queue_bounce_limit(struct request_queue *, u64);
+ +extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
   extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
   extern void blk_queue_max_segments(struct request_queue *, unsigned short);
   extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
@@@ -1256,6 -1252,9 +1256,9 @@@ struct block_device_operations 
         int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
         int (*direct_access) (struct block_device *, sector_t,
                                                 void **, unsigned long *);
+       unsigned int (*check_events) (struct gendisk *disk,
+                                     unsigned int clearing);
+       /* ->media_changed() is DEPRECATED, use ->check_events() instead */
         int (*media_changed) (struct gendisk *);
         void (*unlock_native_capacity) (struct gendisk *);
         int (*revalidate_disk) (struct gendisk *);
diff --combined include/linux/fs.h

index c0701288d2047903b69ae7a34192dc6690e2b731,997d22efdef074ec841d2ec01e0b03a28e39fdc3..3984f2358d1f56d98de234203ddec0cc9f20b538
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -34,9 -34,9 +34,9 @@@
   #define SEEK_MAX      SEEK_END
   
   struct fstrim_range {
- -      uint64_t start;
- -      uint64_t len;
- -      uint64_t minlen;
+ +      __u64 start;
+ +      __u64 len;
+ +      __u64 minlen;
   };
   
   /* And dynamically-tunable limits and defaults: */
@@@ -382,6 -382,7 +382,6 @@@ struct inodes_stat_t 
   #include <linux/path.h>
   #include <linux/stat.h>
   #include <linux/cache.h>
- -#include <linux/kobject.h>
   #include <linux/list.h>
   #include <linux/radix-tree.h>
   #include <linux/prio_tree.h>
@@@ -391,7 -392,6 +391,7 @@@
   #include <linux/capability.h>
   #include <linux/semaphore.h>
   #include <linux/fiemap.h>
+ +#include <linux/rculist_bl.h>
   
   #include <asm/atomic.h>
   #include <asm/byteorder.h>
@@@ -401,7 -401,6 +401,7 @@@ struct hd_geometry
   struct iovec;
   struct nameidata;
   struct kiocb;
+ +struct kobject;
   struct pipe_inode_info;
   struct poll_table_struct;
   struct kstatfs;
@@@ -603,7 -602,6 +603,7 @@@ struct address_space_operations 
         sector_t (*bmap)(struct address_space *, sector_t);
         void (*invalidatepage) (struct page *, unsigned long);
         int (*releasepage) (struct page *, gfp_t);
+ +      void (*freepage)(struct page *);
         ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
                         loff_t offset, unsigned long nr_segs);
         int (*get_xip_mem)(struct address_space *, pgoff_t, int,
@@@ -664,8 -662,9 +664,9 @@@ struct block_device 
         void *                  bd_claiming;
         void *                  bd_holder;
         int                     bd_holders;
+       bool                    bd_write_holder;
   #ifdef CONFIG_SYSFS
-       struct list_head        bd_holder_list;
+       struct gendisk *        bd_holder_disk; /* for sysfs slave linkng */
   #endif
         struct block_device *   bd_contains;
         unsigned                bd_block_size;
@@@ -734,31 -733,16 +735,31 @@@ struct posix_acl
   #define ACL_NOT_CACHED ((void *)(-1))
   
   struct inode {
+ +      /* RCU path lookup touches following: */
+ +      umode_t                 i_mode;
+ +      uid_t                   i_uid;
+ +      gid_t                   i_gid;
+ +      const struct inode_operations   *i_op;
+ +      struct super_block      *i_sb;
+ +
+ +      spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
+ +      unsigned int            i_flags;
+ +      struct mutex            i_mutex;
+ +
+ +      unsigned long           i_state;
+ +      unsigned long           dirtied_when;   /* jiffies of first dirtying */
+ +
         struct hlist_node       i_hash;
         struct list_head        i_wb_list;      /* backing dev IO list */
         struct list_head        i_lru;          /* inode LRU list */
         struct list_head        i_sb_list;
- -      struct list_head        i_dentry;
+ +      union {
+ +              struct list_head        i_dentry;
+ +              struct rcu_head         i_rcu;
+ +      };
         unsigned long           i_ino;
         atomic_t                i_count;
         unsigned int            i_nlink;
- -      uid_t                   i_uid;
- -      gid_t                   i_gid;
         dev_t                   i_rdev;
         unsigned int            i_blkbits;
         u64                     i_version;
@@@ -771,8 -755,13 +772,8 @@@
         struct timespec         i_ctime;
         blkcnt_t                i_blocks;
         unsigned short          i_bytes;
- -      umode_t                 i_mode;
- -      spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
- -      struct mutex            i_mutex;
         struct rw_semaphore     i_alloc_sem;
- -      const struct inode_operations   *i_op;
         const struct file_operations    *i_fop; /* former ->i_op->default_file_ops */
- -      struct super_block      *i_sb;
         struct file_lock        *i_flock;
         struct address_space    *i_mapping;
         struct address_space    i_data;
@@@ -793,6 -782,11 +794,6 @@@
         struct hlist_head       i_fsnotify_marks;
   #endif
   
- -      unsigned long           i_state;
- -      unsigned long           dirtied_when;   /* jiffies of first dirtying */
- -
- -      unsigned int            i_flags;
- -
   #ifdef CONFIG_IMA
         /* protected by i_lock */
         unsigned int            i_readcount; /* struct files open RO */
@@@ -1378,13 -1372,13 +1379,13 @@@ struct super_block 
         const struct xattr_handler **s_xattr;
   
         struct list_head        s_inodes;       /* all inodes */
- -      struct hlist_head       s_anon;         /* anonymous dentries for (nfs) exporting */
+ +      struct hlist_bl_head    s_anon;         /* anonymous dentries for (nfs) exporting */
   #ifdef CONFIG_SMP
         struct list_head __percpu *s_files;
   #else
         struct list_head        s_files;
   #endif
- -      /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
+ +      /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
         struct list_head        s_dentry_lru;   /* unused dentry lru */
         int                     s_nr_dentry_unused;     /* # of dentry on lru */
   
@@@ -1423,7 -1417,6 +1424,7 @@@
          * generic_show_options()
          */
         char __rcu *s_options;
+ +      const struct dentry_operations *s_d_op; /* default d_op for dentries */
   };
   
   extern struct timespec current_fs_time(struct super_block *sb);
@@@ -1552,18 -1545,9 +1553,18 @@@ struct file_operations 
         int (*setlease)(struct file *, long, struct file_lock **);
   };
   
+ +#define IPERM_FLAG_RCU        0x0001
+ +
   struct inode_operations {
- -      int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
         struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
+ +      void * (*follow_link) (struct dentry *, struct nameidata *);
+ +      int (*permission) (struct inode *, int, unsigned int);
+ +      int (*check_acl)(struct inode *, int, unsigned int);
+ +
+ +      int (*readlink) (struct dentry *, char __user *,int);
+ +      void (*put_link) (struct dentry *, struct nameidata *, void *);
+ +
+ +      int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
         int (*link) (struct dentry *,struct inode *,struct dentry *);
         int (*unlink) (struct inode *,struct dentry *);
         int (*symlink) (struct inode *,struct dentry *,const char *);
@@@ -1572,7 -1556,12 +1573,7 @@@
         int (*mknod) (struct inode *,struct dentry *,int,dev_t);
         int (*rename) (struct inode *, struct dentry *,
                         struct inode *, struct dentry *);
- -      int (*readlink) (struct dentry *, char __user *,int);
- -      void * (*follow_link) (struct dentry *, struct nameidata *);
- -      void (*put_link) (struct dentry *, struct nameidata *, void *);
         void (*truncate) (struct inode *);
- -      int (*permission) (struct inode *, int);
- -      int (*check_acl)(struct inode *, int);
         int (*setattr) (struct dentry *, struct iattr *);
         int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
         int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@@ -1584,7 -1573,7 +1585,7 @@@
                           loff_t len);
         int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
                       u64 len);
- -};
+ +} ____cacheline_aligned;
   
   struct seq_file;
   
@@@ -1624,6 -1613,7 +1625,6 @@@ struct super_operations 
         ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
   #endif
         int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
- -      int (*trim_fs) (struct super_block *, struct fstrim_range *);
   };
   
   /*
@@@ -1835,9 -1825,7 +1836,9 @@@ struct super_block *sget(struct file_sy
                         int (*set)(struct super_block *,void *),
                         void *data);
   extern struct dentry *mount_pseudo(struct file_system_type *, char *,
- -      const struct super_operations *ops, unsigned long);
+ +      const struct super_operations *ops,
+ +      const struct dentry_operations *dops,
+ +      unsigned long);
   extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
   
   static inline void sb_mark_dirty(struct super_block *sb)
@@@ -2019,7 -2007,6 +2020,6 @@@ extern struct block_device *bdgrab(stru
   extern void bd_set_size(struct block_device *, loff_t size);
   extern void bd_forget(struct inode *inode);
   extern void bdput(struct block_device *);
- extern struct block_device *open_by_devnum(dev_t, fmode_t);
   extern void invalidate_bdev(struct block_device *);
   extern int sync_blockdev(struct block_device *bdev);
   extern struct super_block *freeze_bdev(struct block_device *);
@@@ -2050,16 -2037,20 +2050,20 @@@ extern const struct file_operations def
   extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
   extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
   extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
- extern int blkdev_get(struct block_device *, fmode_t);
- extern int blkdev_put(struct block_device *, fmode_t);
- extern int bd_claim(struct block_device *, void *);
- extern void bd_release(struct block_device *);
+ extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
+ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+                                              void *holder);
+ extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
+                                             void *holder);
+ extern int blkdev_put(struct block_device *bdev, fmode_t mode);
   #ifdef CONFIG_SYSFS
- extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *);
- extern void bd_release_from_disk(struct block_device *, struct gendisk *);
+ extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
   #else
- #define bd_claim_by_disk(bdev, holder, disk)  bd_claim(bdev, holder)
- #define bd_release_from_disk(bdev, disk)      bd_release(bdev)
+ static inline int bd_link_disk_holder(struct block_device *bdev,
+                                     struct gendisk *disk)
+ {
+       return 0;
+ }
   #endif
   #endif
   
@@@ -2095,8 -2086,6 +2099,6 @@@ static inline void unregister_chrdev(un
   extern const char *__bdevname(dev_t, char *buffer);
   extern const char *bdevname(struct block_device *bdev, char *buffer);
   extern struct block_device *lookup_bdev(const char *);
- extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
- extern void close_bdev_exclusive(struct block_device *, fmode_t);
   extern void blkdev_show(struct seq_file *,off_t);
   
   #else
@@@ -2171,8 -2160,8 +2173,8 @@@ extern sector_t bmap(struct inode *, se
   #endif
   extern int notify_change(struct dentry *, struct iattr *);
   extern int inode_permission(struct inode *, int);
- -extern int generic_permission(struct inode *, int,
- -              int (*check_acl)(struct inode *, int));
+ +extern int generic_permission(struct inode *, int, unsigned int,
+ +              int (*check_acl)(struct inode *, int, unsigned int));
   
   static inline bool execute_ok(struct inode *inode)
   {
@@@ -2243,7 -2232,6 +2245,7 @@@ extern void iget_failed(struct inode *)
   extern void end_writeback(struct inode *);
   extern void __destroy_inode(struct inode *);
   extern struct inode *new_inode(struct super_block *);
+ +extern void free_inode_nonrcu(struct inode *inode);
   extern int should_remove_suid(struct dentry *);
   extern int file_remove_suid(struct file *);
   
@@@ -2460,10 -2448,6 +2462,10 @@@ static inline ino_t parent_ino(struct d
   {
         ino_t res;
   
+ +      /*
+ +       * Don't strictly need d_lock here? If the parent ino could change
+ +       * then surely we'd have a deeper race in the caller?
+ +       */
         spin_lock(&dentry->d_lock);
         res = dentry->d_parent->d_inode->i_ino;
         spin_unlock(&dentry->d_lock);
diff --combined include/scsi/scsi.h

index 1651fef18831a9c7e338216b03b445112671afab,86758618d4f807c31894beb1b3fcd6ba42c7e51d..648d233580387bb57b584508c748eccee8da9f21
--- 1/include/scsi/scsi.h
--- 2/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@@ -104,6 -104,7 +104,7 @@@ struct scsi_cmnd
   #define UNMAP               0x42
   #define READ_TOC              0x43
   #define READ_HEADER           0x44
+ #define GET_EVENT_STATUS_NOTIFICATION 0x4a
   #define LOG_SELECT            0x4c
   #define LOG_SENSE             0x4d
   #define XDWRITEREAD_10        0x53
@@@ -115,61 -116,33 +116,61 @@@
   #define PERSISTENT_RESERVE_OUT 0x5f
   #define VARIABLE_LENGTH_CMD   0x7f
   #define REPORT_LUNS           0xa0
+ +#define SECURITY_PROTOCOL_IN  0xa2
   #define MAINTENANCE_IN        0xa3
   #define MAINTENANCE_OUT       0xa4
   #define MOVE_MEDIUM           0xa5
   #define EXCHANGE_MEDIUM       0xa6
   #define READ_12               0xa8
   #define WRITE_12              0xaa
+ +#define READ_MEDIA_SERIAL_NUMBER 0xab
   #define WRITE_VERIFY_12       0xae
   #define VERIFY_12           0xaf
   #define SEARCH_HIGH_12        0xb0
   #define SEARCH_EQUAL_12       0xb1
   #define SEARCH_LOW_12         0xb2
+ +#define SECURITY_PROTOCOL_OUT 0xb5
   #define READ_ELEMENT_STATUS   0xb8
   #define SEND_VOLUME_TAG       0xb6
   #define WRITE_LONG_2          0xea
+ +#define EXTENDED_COPY         0x83
+ +#define RECEIVE_COPY_RESULTS  0x84
+ +#define ACCESS_CONTROL_IN     0x86
+ +#define ACCESS_CONTROL_OUT    0x87
   #define READ_16               0x88
   #define WRITE_16              0x8a
+ +#define READ_ATTRIBUTE        0x8c
+ +#define WRITE_ATTRIBUTE             0x8d
   #define VERIFY_16           0x8f
   #define WRITE_SAME_16       0x93
   #define SERVICE_ACTION_IN     0x9e
   /* values for service action in */
   #define       SAI_READ_CAPACITY_16  0x10
   #define SAI_GET_LBA_STATUS    0x12
+ +/* values for VARIABLE_LENGTH_CMD service action codes
+ + * see spc4r17 Section D.3.5, table D.7 and D.8 */
+ +#define VLC_SA_RECEIVE_CREDENTIAL 0x1800
   /* values for maintenance in */
+ +#define MI_REPORT_IDENTIFYING_INFORMATION 0x05
   #define MI_REPORT_TARGET_PGS  0x0a
+ +#define MI_REPORT_ALIASES     0x0b
+ +#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c
+ +#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d
+ +#define MI_REPORT_PRIORITY   0x0e
+ +#define MI_REPORT_TIMESTAMP  0x0f
+ +#define MI_MANAGEMENT_PROTOCOL_IN 0x10
   /* values for maintenance out */
+ +#define MO_SET_IDENTIFYING_INFORMATION 0x06
   #define MO_SET_TARGET_PGS     0x0a
+ +#define MO_CHANGE_ALIASES     0x0b
+ +#define MO_SET_PRIORITY       0x0e
+ +#define MO_SET_TIMESTAMP      0x0f
+ +#define MO_MANAGEMENT_PROTOCOL_OUT 0x10
   /* values for variable length command */
+ +#define XDREAD_32           0x03
+ +#define XDWRITE_32          0x04
+ +#define XPWRITE_32          0x06
+ +#define XDWRITEREAD_32              0x07
   #define READ_32                     0x09
   #define VERIFY_32           0x0a
   #define WRITE_32            0x0b
diff --combined kernel/power/swap.c

index 69425889bd40f0f3e3c26ee417775349df0da90c,b019609d1b4541e7b284e44477f9d46ace36e3a2..7c97c3a0eee393ea1e5e879d84bc8c78a6ecabf2
--- 1/kernel/power/swap.c
--- 2/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@@ -6,7 -6,6 +6,7 @@@
    *
    * Copyright (C) 1998,2001-2005 Pavel Machek <[email protected]>
    * Copyright (C) 2006 Rafael J. Wysocki <[email protected]>
+ + * Copyright (C) 2010 Bojan Smojver <[email protected]>
    *
    * This file is released under the GPLv2.
    *
@@@ -30,7 -29,7 +30,7 @@@
   
   #include "power.h"
   
- -#define HIBERNATE_SIG "LINHIB0001"
+ +#define HIBERNATE_SIG "S1SUSPEND"
   
   /*
    *    The swap map is a data structure used for keeping track of each page
@@@ -224,7 -223,7 +224,7 @@@ static int swsusp_swap_check(void
                 return res;
   
         root_swap = res;
-       res = blkdev_get(hib_resume_bdev, FMODE_WRITE);
+       res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
         if (res)
                 return res;
   
@@@ -754,43 -753,30 +754,43 @@@ static int load_image_lzo(struct swap_m
   {
         unsigned int m;
         int error = 0;
+ +      struct bio *bio;
         struct timeval start;
         struct timeval stop;
         unsigned nr_pages;
- -      size_t off, unc_len, cmp_len;
- -      unsigned char *unc, *cmp, *page;
+ +      size_t i, off, unc_len, cmp_len;
+ +      unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
   
- -      page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
- -      if (!page) {
- -              printk(KERN_ERR "PM: Failed to allocate LZO page\n");
- -              return -ENOMEM;
+ +      for (i = 0; i < LZO_CMP_PAGES; i++) {
+ +              page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ +              if (!page[i]) {
+ +                      printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ +
+ +                      while (i)
+ +                              free_page((unsigned long)page[--i]);
+ +
+ +                      return -ENOMEM;
+ +              }
         }
   
         unc = vmalloc(LZO_UNC_SIZE);
         if (!unc) {
                 printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
- -              free_page((unsigned long)page);
+ +
+ +              for (i = 0; i < LZO_CMP_PAGES; i++)
+ +                      free_page((unsigned long)page[i]);
+ +
                 return -ENOMEM;
         }
   
         cmp = vmalloc(LZO_CMP_SIZE);
         if (!cmp) {
                 printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
+ +
                 vfree(unc);
- -              free_page((unsigned long)page);
+ +              for (i = 0; i < LZO_CMP_PAGES; i++)
+ +                      free_page((unsigned long)page[i]);
+ +
                 return -ENOMEM;
         }
   
@@@ -801,7 -787,6 +801,7 @@@
         if (!m)
                 m = 1;
         nr_pages = 0;
+ +      bio = NULL;
         do_gettimeofday(&start);
   
         error = snapshot_write_next(snapshot);
@@@ -809,11 -794,11 +809,11 @@@
                 goto out_finish;
   
         for (;;) {
- -              error = swap_read_page(handle, page, NULL); /* sync */
+ +              error = swap_read_page(handle, page[0], NULL); /* sync */
                 if (error)
                         break;
   
- -              cmp_len = *(size_t *)page;
+ +              cmp_len = *(size_t *)page[0];
                 if (unlikely(!cmp_len ||
                              cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
                         printk(KERN_ERR "PM: Invalid LZO compressed length\n");
@@@ -821,20 -806,13 +821,20 @@@
                         break;
                 }
   
- -              memcpy(cmp, page, PAGE_SIZE);
- -              for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
- -                      error = swap_read_page(handle, page, NULL); /* sync */
+ +              for (off = PAGE_SIZE, i = 1;
+ +                   off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
+ +                      error = swap_read_page(handle, page[i], &bio);
                         if (error)
                                 goto out_finish;
+ +              }
   
- -                      memcpy(cmp + off, page, PAGE_SIZE);
+ +              error = hib_wait_on_bio_chain(&bio); /* need all data now */
+ +              if (error)
+ +                      goto out_finish;
+ +
+ +              for (off = 0, i = 0;
+ +                   off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
+ +                      memcpy(cmp + off, page[i], PAGE_SIZE);
                 }
   
                 unc_len = LZO_UNC_SIZE;
@@@ -879,8 -857,7 +879,8 @@@ out_finish
   
         vfree(cmp);
         vfree(unc);
- -      free_page((unsigned long)page);
+ +      for (i = 0; i < LZO_CMP_PAGES; i++)
+ +              free_page((unsigned long)page[i]);
   
         return error;
   }
@@@ -888,7 -865,7 +888,7 @@@
   /**
    *    swsusp_read - read the hibernation image.
    *    @flags_p: flags passed by the "frozen" kernel in the image header should
- - *              be written into this memeory location
+ + *              be written into this memory location
    */
   
   int swsusp_read(unsigned int *flags_p)
@@@ -930,7 -907,8 +930,8 @@@ int swsusp_check(void
   {
         int error;
   
-       hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+       hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
+                                           FMODE_READ, NULL);
         if (!IS_ERR(hib_resume_bdev)) {
                 set_blocksize(hib_resume_bdev, PAGE_SIZE);
                 clear_page(swsusp_header);
author	Linus Torvalds <[email protected]>
	Thu, 13 Jan 2011 18:45:01 +0000 (10:45 -0800)
committer	Linus Torvalds <[email protected]>
	Thu, 13 Jan 2011 18:45:01 +0000 (10:45 -0800)
		1	2
block/blk-merge.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/cfq-iosched.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-table.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/md.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/scsi_lib.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/sd.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/block_dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/char_dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext3/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/gfs2/ops_fstype.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfsd/vfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nilfs2/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ocfs2/cluster/heartbeat.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/reiserfs/journal.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/splice.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/linux-2.6/xfs_super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blkdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/scsi/scsi.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/power/swap.c	patch \|	diff1 \|	diff2 \|	blob \| history