]> Git Repo - linux.git/blobdiff - drivers/md/md.c
[PATCH] md: allow a manual resync with md
[linux.git] / drivers / md / md.c
index 9ecf51ee596fb296f64a75f8372828e0d2701012..37400873b8796d7885acbc3c349a8ac3c69993e5 100644 (file)
@@ -181,7 +181,7 @@ static void mddev_put(mddev_t *mddev)
        if (!mddev->raid_disks && list_empty(&mddev->disks)) {
                list_del(&mddev->all_mddevs);
                blk_put_queue(mddev->queue);
-               kfree(mddev);
+               kobject_unregister(&mddev->kobj);
        }
        spin_unlock(&all_mddevs_lock);
 }
@@ -711,6 +711,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
         */
        int i;
        int active=0, working=0,failed=0,spare=0,nr_disks=0;
+       unsigned int fixdesc=0;
 
        rdev->sb_size = MD_SB_BYTES;
 
@@ -758,16 +759,28 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        sb->disks[0].state = (1<<MD_DISK_REMOVED);
        ITERATE_RDEV(mddev,rdev2,tmp) {
                mdp_disk_t *d;
+               int desc_nr;
                if (rdev2->raid_disk >= 0 && rdev2->in_sync && !rdev2->faulty)
-                       rdev2->desc_nr = rdev2->raid_disk;
+                       desc_nr = rdev2->raid_disk;
                else
-                       rdev2->desc_nr = next_spare++;
+                       desc_nr = next_spare++;
+               if (desc_nr != rdev2->desc_nr) {
+                       fixdesc |= (1 << desc_nr);
+                       rdev2->desc_nr = desc_nr;
+                       if (rdev2->raid_disk >= 0) {
+                               char nm[20];
+                               sprintf(nm, "rd%d", rdev2->raid_disk);
+                               sysfs_remove_link(&mddev->kobj, nm);
+                       }
+                       sysfs_remove_link(&rdev2->kobj, "block");
+                       kobject_del(&rdev2->kobj);
+               }
                d = &sb->disks[rdev2->desc_nr];
                nr_disks++;
                d->number = rdev2->desc_nr;
                d->major = MAJOR(rdev2->bdev->bd_dev);
                d->minor = MINOR(rdev2->bdev->bd_dev);
-               if (rdev2->raid_disk >= 0 && rdev->in_sync && !rdev2->faulty)
+               if (rdev2->raid_disk >= 0 && rdev2->in_sync && !rdev2->faulty)
                        d->raid_disk = rdev2->raid_disk;
                else
                        d->raid_disk = rdev2->desc_nr; /* compatibility */
@@ -787,7 +800,22 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
                if (test_bit(WriteMostly, &rdev2->flags))
                        d->state |= (1<<MD_DISK_WRITEMOSTLY);
        }
-       
+       if (fixdesc)
+               ITERATE_RDEV(mddev,rdev2,tmp)
+                       if (fixdesc & (1<<rdev2->desc_nr)) {
+                               snprintf(rdev2->kobj.name, KOBJ_NAME_LEN, "dev%d",
+                                        rdev2->desc_nr);
+                               kobject_add(&rdev2->kobj);
+                               sysfs_create_link(&rdev2->kobj,
+                                                 &rdev2->bdev->bd_disk->kobj,
+                                                 "block");
+                               if (rdev2->raid_disk >= 0) {
+                                       char nm[20];
+                                       sprintf(nm, "rd%d", rdev2->raid_disk);
+                                       sysfs_create_link(&mddev->kobj,
+                                                         &rdev2->kobj, nm);
+                               }
+                       }
        /* now set the "removed" and "faulty" bits on any missing devices */
        for (i=0 ; i < mddev->raid_disks ; i++) {
                mdp_disk_t *d = &sb->disks[i];
@@ -1147,6 +1175,13 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
        list_add(&rdev->same_set, &mddev->disks);
        rdev->mddev = mddev;
        printk(KERN_INFO "md: bind<%s>\n", bdevname(rdev->bdev,b));
+
+       rdev->kobj.k_name = NULL;
+       snprintf(rdev->kobj.name, KOBJ_NAME_LEN, "dev%d", rdev->desc_nr);
+       rdev->kobj.parent = kobject_get(&mddev->kobj);
+       kobject_add(&rdev->kobj);
+
+       sysfs_create_link(&rdev->kobj, &rdev->bdev->bd_disk->kobj, "block");
        return 0;
 }
 
@@ -1160,6 +1195,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
        list_del_init(&rdev->same_set);
        printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
        rdev->mddev = NULL;
+       sysfs_remove_link(&rdev->kobj, "block");
+       kobject_del(&rdev->kobj);
 }
 
 /*
@@ -1215,7 +1252,7 @@ static void export_rdev(mdk_rdev_t * rdev)
        md_autodetect_dev(rdev->bdev->bd_dev);
 #endif
        unlock_rdev(rdev);
-       kfree(rdev);
+       kobject_put(&rdev->kobj);
 }
 
 static void kick_rdev_from_array(mdk_rdev_t * rdev)
@@ -1414,6 +1451,94 @@ repeat:
 
 }
 
+struct rdev_sysfs_entry {
+       struct attribute attr;
+       ssize_t (*show)(mdk_rdev_t *, char *);
+       ssize_t (*store)(mdk_rdev_t *, const char *, size_t);
+};
+
+static ssize_t
+rdev_show_state(mdk_rdev_t *rdev, char *page)
+{
+       char *sep = "";
+       int len=0;
+
+       if (rdev->faulty) {
+               len+= sprintf(page+len, "%sfaulty",sep);
+               sep = ",";
+       }
+       if (rdev->in_sync) {
+               len += sprintf(page+len, "%sin_sync",sep);
+               sep = ",";
+       }
+       if (!rdev->faulty && !rdev->in_sync) {
+               len += sprintf(page+len, "%sspare", sep);
+               sep = ",";
+       }
+       return len+sprintf(page+len, "\n");
+}
+
+static struct rdev_sysfs_entry rdev_state = {
+       .attr = {.name = "state", .mode = S_IRUGO },
+       .show = rdev_show_state,
+};
+
+static ssize_t
+rdev_show_super(mdk_rdev_t *rdev, char *page)
+{
+       if (rdev->sb_loaded && rdev->sb_size) {
+               memcpy(page, page_address(rdev->sb_page), rdev->sb_size);
+               return rdev->sb_size;
+       } else
+               return 0;
+}
+static struct rdev_sysfs_entry rdev_super = {
+       .attr = {.name = "super", .mode = S_IRUGO },
+       .show = rdev_show_super,
+};
+static struct attribute *rdev_default_attrs[] = {
+       &rdev_state.attr,
+       &rdev_super.attr,
+       NULL,
+};
+static ssize_t
+rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+       struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
+       mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
+
+       if (!entry->show)
+               return -EIO;
+       return entry->show(rdev, page);
+}
+
+static ssize_t
+rdev_attr_store(struct kobject *kobj, struct attribute *attr,
+             const char *page, size_t length)
+{
+       struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
+       mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
+
+       if (!entry->store)
+               return -EIO;
+       return entry->store(rdev, page, length);
+}
+
+static void rdev_free(struct kobject *ko)
+{
+       mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
+       kfree(rdev);
+}
+static struct sysfs_ops rdev_sysfs_ops = {
+       .show           = rdev_attr_show,
+       .store          = rdev_attr_store,
+};
+static struct kobj_type rdev_ktype = {
+       .release        = rdev_free,
+       .sysfs_ops      = &rdev_sysfs_ops,
+       .default_attrs  = rdev_default_attrs,
+};
+
 /*
  * Import a device. If 'super_format' >= 0, then sanity check the superblock
  *
@@ -1445,6 +1570,10 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
        if (err)
                goto abort_free;
 
+       rdev->kobj.parent = NULL;
+       rdev->kobj.ktype = &rdev_ktype;
+       kobject_init(&rdev->kobj);
+
        rdev->desc_nr = -1;
        rdev->faulty = 0;
        rdev->in_sync = 0;
@@ -1551,6 +1680,136 @@ static void analyze_sbs(mddev_t * mddev)
 
 }
 
+struct md_sysfs_entry {
+       struct attribute attr;
+       ssize_t (*show)(mddev_t *, char *);
+       ssize_t (*store)(mddev_t *, const char *, size_t);
+};
+
+static ssize_t
+md_show_level(mddev_t *mddev, char *page)
+{
+       mdk_personality_t *p = mddev->pers;
+       if (p == NULL)
+               return 0;
+       if (mddev->level >= 0)
+               return sprintf(page, "RAID-%d\n", mddev->level);
+       else
+               return sprintf(page, "%s\n", p->name);
+}
+
+static struct md_sysfs_entry md_level = {
+       .attr = {.name = "level", .mode = S_IRUGO },
+       .show = md_show_level,
+};
+
+static ssize_t
+md_show_rdisks(mddev_t *mddev, char *page)
+{
+       return sprintf(page, "%d\n", mddev->raid_disks);
+}
+
+static struct md_sysfs_entry md_raid_disks = {
+       .attr = {.name = "raid_disks", .mode = S_IRUGO },
+       .show = md_show_rdisks,
+};
+
+static ssize_t
+md_show_scan(mddev_t *mddev, char *page)
+{
+       char *type = "none";
+       if (mddev->recovery &
+           ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED))) {
+               if (mddev->recovery & (1<<MD_RECOVERY_SYNC)) {
+                       if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+                               type = "resync";
+                       else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+                               type = "check";
+                       else
+                               type = "repair";
+               } else
+                       type = "recover";
+       }
+       return sprintf(page, "%s\n", type);
+}
+
+static ssize_t
+md_store_scan(mddev_t *mddev, const char *page, size_t len)
+{
+       int canscan=0;
+       if (mddev->recovery &
+           ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED)))
+               return -EBUSY;
+       down(&mddev->reconfig_sem);
+       if (mddev->pers && mddev->pers->sync_request)
+               canscan=1;
+       up(&mddev->reconfig_sem);
+       if (!canscan)
+               return -EINVAL;
+
+       if (strcmp(page, "check")==0 || strcmp(page, "check\n")==0)
+               set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+       else if (strcmp(page, "repair")!=0 && strcmp(page, "repair\n")!=0)
+               return -EINVAL;
+       set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+       set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       md_wakeup_thread(mddev->thread);
+       return len;
+}
+
+static struct md_sysfs_entry md_scan_mode = {
+       .attr = {.name = "scan_mode", .mode = S_IRUGO|S_IWUSR },
+       .show = md_show_scan,
+       .store = md_store_scan,
+};
+
+static struct attribute *md_default_attrs[] = {
+       &md_level.attr,
+       &md_raid_disks.attr,
+       &md_scan_mode.attr,
+       NULL,
+};
+
+static ssize_t
+md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+       struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
+       mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
+
+       if (!entry->show)
+               return -EIO;
+       return entry->show(mddev, page);
+}
+
+static ssize_t
+md_attr_store(struct kobject *kobj, struct attribute *attr,
+             const char *page, size_t length)
+{
+       struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
+       mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
+
+       if (!entry->store)
+               return -EIO;
+       return entry->store(mddev, page, length);
+}
+
+static void md_free(struct kobject *ko)
+{
+       mddev_t *mddev = container_of(ko, mddev_t, kobj);
+       kfree(mddev);
+}
+
+static struct sysfs_ops md_sysfs_ops = {
+       .show   = md_attr_show,
+       .store  = md_attr_store,
+};
+static struct kobj_type md_ktype = {
+       .release        = md_free,
+       .sysfs_ops      = &md_sysfs_ops,
+       .default_attrs  = md_default_attrs,
+};
+
 int mdp_major = 0;
 
 static struct kobject *md_probe(dev_t dev, int *part, void *data)
@@ -1592,6 +1851,11 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
        add_disk(disk);
        mddev->gendisk = disk;
        up(&disks_sem);
+       mddev->kobj.parent = kobject_get(&disk->kobj);
+       mddev->kobj.k_name = NULL;
+       snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md");
+       mddev->kobj.ktype = &md_ktype;
+       kobject_register(&mddev->kobj);
        return NULL;
 }
 
@@ -1736,6 +2000,13 @@ static int do_md_run(mddev_t * mddev)
        mddev->safemode_timer.data = (unsigned long) mddev;
        mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */
        mddev->in_sync = 1;
+
+       ITERATE_RDEV(mddev,rdev,tmp)
+               if (rdev->raid_disk >= 0) {
+                       char nm[20];
+                       sprintf(nm, "rd%d", rdev->raid_disk);
+                       sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
+               }
        
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
@@ -1857,9 +2128,18 @@ static int do_md_stop(mddev_t * mddev, int ro)
         * Free resources if final stop
         */
        if (!ro) {
+               mdk_rdev_t *rdev;
+               struct list_head *tmp;
                struct gendisk *disk;
                printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
 
+               ITERATE_RDEV(mddev,rdev,tmp)
+                       if (rdev->raid_disk >= 0) {
+                               char nm[20];
+                               sprintf(nm, "rd%d", rdev->raid_disk);
+                               sysfs_remove_link(&mddev->kobj, nm);
+                       }
+
                export_array(mddev);
 
                mddev->array_size = 0;
@@ -3626,7 +3906,8 @@ static void md_do_sync(mddev_t *mddev)
 
        is_mddev_idle(mddev); /* this also initializes IO event counters */
        /* we don't use the checkpoint if there's a bitmap */
-       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap)
+       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap
+           && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
                j = mddev->recovery_cp;
        else
                j = 0;
@@ -3864,9 +4145,13 @@ void md_check_recovery(mddev_t *mddev)
                        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
                        goto unlock;
                }
-               if (mddev->recovery)
-                       /* probably just the RECOVERY_NEEDED flag */
-                       mddev->recovery = 0;
+               /* Clear some bits that don't mean anything, but
+                * might be left set
+                */
+               clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+               clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
+               clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
+               clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
 
                /* no recovery is running.
                 * remove any failed drives, then
@@ -3878,29 +4163,39 @@ void md_check_recovery(mddev_t *mddev)
                        if (rdev->raid_disk >= 0 &&
                            (rdev->faulty || ! rdev->in_sync) &&
                            atomic_read(&rdev->nr_pending)==0) {
-                               if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0)
+                               if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) {
+                                       char nm[20];
+                                       sprintf(nm,"rd%d", rdev->raid_disk);
+                                       sysfs_remove_link(&mddev->kobj, nm);
                                        rdev->raid_disk = -1;
+                               }
                        }
 
                if (mddev->degraded) {
                        ITERATE_RDEV(mddev,rdev,rtmp)
                                if (rdev->raid_disk < 0
                                    && !rdev->faulty) {
-                                       if (mddev->pers->hot_add_disk(mddev,rdev))
+                                       if (mddev->pers->hot_add_disk(mddev,rdev)) {
+                                               char nm[20];
+                                               sprintf(nm, "rd%d", rdev->raid_disk);
+                                               sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
                                                spares++;
-                                       else
+                                       else
                                                break;
                                }
                }
 
-               if (!spares && (mddev->recovery_cp == MaxSector )) {
-                       /* nothing we can do ... */
+               if (spares) {
+                       clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+                       clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+               } else if (mddev->recovery_cp < MaxSector) {
+                       set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+               } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+                       /* nothing to be done ... */
                        goto unlock;
-               }
+
                if (mddev->pers->sync_request) {
                        set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-                       if (!spares)
-                               set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                        if (spares && mddev->bitmap && ! mddev->bitmap->file) {
                                /* We are adding a device or devices to an array
                                 * which has the bitmap stored on all devices.
This page took 0.045785 seconds and 4 git commands to generate.