]> Git Repo - linux.git/commitdiff
Merge tag 'md/4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
authorLinus Torvalds <[email protected]>
Fri, 20 May 2016 00:25:13 +0000 (17:25 -0700)
committerLinus Torvalds <[email protected]>
Fri, 20 May 2016 00:25:13 +0000 (17:25 -0700)
Pull MD updates from Shaohua Li:
 "Several patches from Guoqing fixing md-cluster bugs and several
  patches from Heinz fixing dm-raid bugs"

* tag 'md/4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md-cluster: check the return value of process_recvd_msg
  md-cluster: gather resync infos and enable recv_thread after bitmap is ready
  md: set MD_CHANGE_PENDING in a atomic region
  md: raid5: add prerequisite to run underneath dm-raid
  md: raid10: add prerequisite to run underneath dm-raid
  md: md.c: fix oops in mddev_suspend for raid0
  md-cluster: fix ifnullfree.cocci warnings
  md-cluster/bitmap: unplug bitmap to sync dirty pages to disk
  md-cluster/bitmap: fix wrong page num in bitmap_file_clear_bit and bitmap_file_set_bit
  md-cluster/bitmap: fix wrong calcuation of offset
  md-cluster: sync bitmap when node received RESYNCING msg
  md-cluster: always setup in-memory bitmap
  md-cluster: wakeup thread if activated a spare disk
  md-cluster: change array_sectors and update size are not supported
  md-cluster: fix locking when node joins cluster during message broadcast
  md-cluster: unregister thread if err happened
  md-cluster: wake up thread to continue recovery
  md-cluser: make resync_finish only called after pers->sync_request
  md-cluster: change resync lock from asynchronous to synchronous

1  2 
drivers/md/md.c
drivers/md/raid5-cache.c

diff --combined drivers/md/md.c
index c9a475c33cc7401dc067716e6cf3aaf648c46d93,a79462dcd5e1a7dc53fb21034f456f0f1dcae802..866825f10b4c933f75eb4cbaff46c624f6267826
@@@ -307,7 -307,7 +307,7 @@@ static blk_qc_t md_make_request(struct 
   */
  void mddev_suspend(struct mddev *mddev)
  {
-       WARN_ON_ONCE(current == mddev->thread->tsk);
+       WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
        if (mddev->suspended++)
                return;
        synchronize_rcu();
@@@ -2291,19 -2291,24 +2291,24 @@@ void md_update_sb(struct mddev *mddev, 
                return;
        }
  
+ repeat:
        if (mddev_is_clustered(mddev)) {
                if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
                        force_change = 1;
+               if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
+                       nospares = 1;
                ret = md_cluster_ops->metadata_update_start(mddev);
                /* Has someone else has updated the sb */
                if (!does_sb_need_changing(mddev)) {
                        if (ret == 0)
                                md_cluster_ops->metadata_update_cancel(mddev);
-                       clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+                       bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
+                                                        BIT(MD_CHANGE_DEVS) |
+                                                        BIT(MD_CHANGE_CLEAN));
                        return;
                }
        }
- repeat:
        /* First make sure individual recovery_offsets are correct */
        rdev_for_each(rdev, mddev) {
                if (rdev->raid_disk >= 0 &&
        md_super_wait(mddev);
        /* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */
  
-       spin_lock(&mddev->lock);
+       if (mddev_is_clustered(mddev) && ret == 0)
+               md_cluster_ops->metadata_update_finish(mddev);
        if (mddev->in_sync != sync_req ||
-           test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
+           !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
+                              BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
                /* have to write it out again */
-               spin_unlock(&mddev->lock);
                goto repeat;
-       }
-       clear_bit(MD_CHANGE_PENDING, &mddev->flags);
-       spin_unlock(&mddev->lock);
        wake_up(&mddev->sb_wait);
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
                clear_bit(BlockedBadBlocks, &rdev->flags);
                wake_up(&rdev->blocked_wait);
        }
-       if (mddev_is_clustered(mddev) && ret == 0)
-               md_cluster_ops->metadata_update_finish(mddev);
  }
  EXPORT_SYMBOL(md_update_sb);
  
@@@ -4816,6 -4817,10 +4817,10 @@@ array_size_store(struct mddev *mddev, c
        if (err)
                return err;
  
+       /* cluster raid doesn't support change array_sectors */
+       if (mddev_is_clustered(mddev))
+               return -EINVAL;
        if (strncmp(buf, "default", 7) == 0) {
                if (mddev->pers)
                        sectors = mddev->pers->size(mddev, 0, 0);
@@@ -5039,7 -5044,7 +5044,7 @@@ static int md_alloc(dev_t dev, char *na
        disk->fops = &md_fops;
        disk->private_data = mddev;
        disk->queue = mddev->queue;
 -      blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
 +      blk_queue_write_cache(mddev->queue, true, true);
        /* Allow extended partitions.  This makes the
         * 'mdp' device redundant, but we can't really
         * remove it now.
@@@ -6437,6 -6442,10 +6442,10 @@@ static int update_size(struct mddev *md
        int rv;
        int fit = (num_sectors == 0);
  
+       /* cluster raid doesn't support update size */
+       if (mddev_is_clustered(mddev))
+               return -EINVAL;
        if (mddev->pers->resize == NULL)
                return -EINVAL;
        /* The "num_sectors" is the number of sectors of each device that
@@@ -7785,7 -7794,7 +7794,7 @@@ void md_do_sync(struct md_thread *threa
        struct md_rdev *rdev;
        char *desc, *action = NULL;
        struct blk_plug plug;
-       bool cluster_resync_finished = false;
+       int ret;
  
        /* just incase thread restarts... */
        if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
                return;
        }
  
+       if (mddev_is_clustered(mddev)) {
+               ret = md_cluster_ops->resync_start(mddev);
+               if (ret)
+                       goto skip;
+               if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
+                       test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
+                       test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
+                    && ((unsigned long long)mddev->curr_resync_completed
+                        < (unsigned long long)mddev->resync_max_sectors))
+                       goto skip;
+       }
        if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
                if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
                        desc = "data-check";
                mddev->curr_resync_completed = mddev->curr_resync;
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
-       /* tell personality and other nodes that we are finished */
-       if (mddev_is_clustered(mddev)) {
-               md_cluster_ops->resync_finish(mddev);
-               cluster_resync_finished = true;
-       }
        mddev->pers->sync_request(mddev, max_sectors, &skipped);
  
        if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
                }
        }
   skip:
-       set_bit(MD_CHANGE_DEVS, &mddev->flags);
        if (mddev_is_clustered(mddev) &&
-           test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
-           !cluster_resync_finished)
+           ret == 0) {
+               /* set CHANGE_PENDING here since maybe another
+                * update is needed, so other nodes are informed */
+               set_mask_bits(&mddev->flags, 0,
+                             BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
+               md_wakeup_thread(mddev->thread);
+               wait_event(mddev->sb_wait,
+                          !test_bit(MD_CHANGE_PENDING, &mddev->flags));
                md_cluster_ops->resync_finish(mddev);
+       } else
+               set_bit(MD_CHANGE_DEVS, &mddev->flags);
  
        spin_lock(&mddev->lock);
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@@ -8226,18 -8249,9 +8249,9 @@@ static void md_start_sync(struct work_s
        struct mddev *mddev = container_of(ws, struct mddev, del_work);
        int ret = 0;
  
-       if (mddev_is_clustered(mddev)) {
-               ret = md_cluster_ops->resync_start(mddev);
-               if (ret) {
-                       mddev->sync_thread = NULL;
-                       goto out;
-               }
-       }
        mddev->sync_thread = md_register_thread(md_do_sync,
                                                mddev,
                                                "resync");
- out:
        if (!mddev->sync_thread) {
                if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
                        printk(KERN_ERR "%s: could not start resync"
@@@ -8536,6 -8550,7 +8550,7 @@@ EXPORT_SYMBOL(md_finish_reshape)
  int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
                       int is_new)
  {
+       struct mddev *mddev = rdev->mddev;
        int rv;
        if (is_new)
                s += rdev->new_data_offset;
        if (rv == 0) {
                /* Make sure they get written out promptly */
                sysfs_notify_dirent_safe(rdev->sysfs_state);
-               set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
-               set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
+               set_mask_bits(&mddev->flags, 0,
+                             BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
                md_wakeup_thread(rdev->mddev->thread);
                return 1;
        } else
@@@ -8680,6 -8695,11 +8695,11 @@@ static void check_sb_changes(struct mdd
                                ret = remove_and_add_spares(mddev, rdev2);
                                pr_info("Activated spare: %s\n",
                                                bdevname(rdev2->bdev,b));
+                               /* wakeup mddev->thread here, so array could
+                                * perform resync with the new activated disk */
+                               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+                               md_wakeup_thread(mddev->thread);
                        }
                        /* device faulty
                         * We just want to do the minimum to mark the disk
diff --combined drivers/md/raid5-cache.c
index 26f14970a858c9b6426a0e4e360124d083710f3d,ac51bc5ecb16539aa7ea40382cc804a0778ba45f..e889e2deb7b3525ff226a7d7053df1fe8c54f115
@@@ -712,8 -712,8 +712,8 @@@ static void r5l_write_super_and_discard
         * in_teardown check workaround this issue.
         */
        if (!log->in_teardown) {
-               set_bit(MD_CHANGE_DEVS, &mddev->flags);
-               set_bit(MD_CHANGE_PENDING, &mddev->flags);
+               set_mask_bits(&mddev->flags, 0,
+                             BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
                md_wakeup_thread(mddev->thread);
                wait_event(mddev->sb_wait,
                        !test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
@@@ -1188,7 -1188,6 +1188,7 @@@ ioerr
  
  int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
  {
 +      struct request_queue *q = bdev_get_queue(rdev->bdev);
        struct r5l_log *log;
  
        if (PAGE_SIZE != 4096)
                return -ENOMEM;
        log->rdev = rdev;
  
 -      log->need_cache_flush = (rdev->bdev->bd_disk->queue->flush_flags != 0);
 +      log->need_cache_flush = test_bit(QUEUE_FLAG_WC, &q->queue_flags) != 0;
  
        log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid,
                                       sizeof(rdev->mddev->uuid));
This page took 0.073993 seconds and 4 git commands to generate.