1 /******************************************************************************
2 *******************************************************************************
4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5 ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
7 ** This copyrighted material is made available to anyone wishing to use,
8 ** modify, copy, or redistribute it subject to the terms and conditions
9 ** of the GNU General Public License v.2.
11 *******************************************************************************
12 ******************************************************************************/
14 #include "dlm_internal.h"
15 #include "lockspace.h"
24 #include "requestqueue.h"
29 static struct mutex ls_lock;
30 static struct list_head lslist;
31 static spinlock_t lslist_lock;
32 static struct task_struct * scand_task;
35 static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
38 int n = simple_strtol(buf, NULL, 0);
40 ls = dlm_find_lockspace_local(ls->ls_local_handle);
54 dlm_put_lockspace(ls);
58 static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
60 ls->ls_uevent_result = simple_strtol(buf, NULL, 0);
61 set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
62 wake_up(&ls->ls_uevent_wait);
66 static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
68 return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
71 static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
73 ls->ls_global_id = simple_strtoul(buf, NULL, 0);
77 static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
79 return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls));
82 static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
84 int val = simple_strtoul(buf, NULL, 0);
86 set_bit(LSFL_NODIR, &ls->ls_flags);
90 static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
92 uint32_t status = dlm_recover_status(ls);
93 return snprintf(buf, PAGE_SIZE, "%x\n", status);
96 static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
98 return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
102 struct attribute attr;
103 ssize_t (*show)(struct dlm_ls *, char *);
104 ssize_t (*store)(struct dlm_ls *, const char *, size_t);
107 static struct dlm_attr dlm_attr_control = {
108 .attr = {.name = "control", .mode = S_IWUSR},
109 .store = dlm_control_store
112 static struct dlm_attr dlm_attr_event = {
113 .attr = {.name = "event_done", .mode = S_IWUSR},
114 .store = dlm_event_store
117 static struct dlm_attr dlm_attr_id = {
118 .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
120 .store = dlm_id_store
123 static struct dlm_attr dlm_attr_nodir = {
124 .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR},
125 .show = dlm_nodir_show,
126 .store = dlm_nodir_store
129 static struct dlm_attr dlm_attr_recover_status = {
130 .attr = {.name = "recover_status", .mode = S_IRUGO},
131 .show = dlm_recover_status_show
134 static struct dlm_attr dlm_attr_recover_nodeid = {
135 .attr = {.name = "recover_nodeid", .mode = S_IRUGO},
136 .show = dlm_recover_nodeid_show
139 static struct attribute *dlm_attrs[] = {
140 &dlm_attr_control.attr,
141 &dlm_attr_event.attr,
143 &dlm_attr_nodir.attr,
144 &dlm_attr_recover_status.attr,
145 &dlm_attr_recover_nodeid.attr,
149 static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
152 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
153 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
154 return a->show ? a->show(ls, buf) : 0;
157 static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
158 const char *buf, size_t len)
160 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
161 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
162 return a->store ? a->store(ls, buf, len) : len;
165 static void lockspace_kobj_release(struct kobject *k)
167 struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
171 static const struct sysfs_ops dlm_attr_ops = {
172 .show = dlm_attr_show,
173 .store = dlm_attr_store,
176 static struct kobj_type dlm_ktype = {
177 .default_attrs = dlm_attrs,
178 .sysfs_ops = &dlm_attr_ops,
179 .release = lockspace_kobj_release,
182 static struct kset *dlm_kset;
184 static int do_uevent(struct dlm_ls *ls, int in)
189 kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
191 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
193 log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
195 /* dlm_controld will see the uevent, do the necessary group management
196 and then write to sysfs to wake us */
198 error = wait_event_interruptible(ls->ls_uevent_wait,
199 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
201 log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
206 error = ls->ls_uevent_result;
209 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
210 error, ls->ls_uevent_result);
214 static int dlm_uevent(struct kset *kset, struct kobject *kobj,
215 struct kobj_uevent_env *env)
217 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
219 add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
223 static struct kset_uevent_ops dlm_uevent_ops = {
224 .uevent = dlm_uevent,
227 int __init dlm_lockspace_init(void)
230 mutex_init(&ls_lock);
231 INIT_LIST_HEAD(&lslist);
232 spin_lock_init(&lslist_lock);
234 dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
236 printk(KERN_WARNING "%s: can not create kset\n", __func__);
242 void dlm_lockspace_exit(void)
244 kset_unregister(dlm_kset);
247 static struct dlm_ls *find_ls_to_scan(void)
251 spin_lock(&lslist_lock);
252 list_for_each_entry(ls, &lslist, ls_list) {
253 if (time_after_eq(jiffies, ls->ls_scan_time +
254 dlm_config.ci_scan_secs * HZ)) {
255 spin_unlock(&lslist_lock);
259 spin_unlock(&lslist_lock);
263 static int dlm_scand(void *data)
267 while (!kthread_should_stop()) {
268 ls = find_ls_to_scan();
270 if (dlm_lock_recovery_try(ls)) {
271 ls->ls_scan_time = jiffies;
273 dlm_scan_timeout(ls);
274 dlm_scan_waiters(ls);
275 dlm_unlock_recovery(ls);
277 ls->ls_scan_time += HZ;
281 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
286 static int dlm_scand_start(void)
288 struct task_struct *p;
291 p = kthread_run(dlm_scand, NULL, "dlm_scand");
299 static void dlm_scand_stop(void)
301 kthread_stop(scand_task);
304 struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
308 spin_lock(&lslist_lock);
310 list_for_each_entry(ls, &lslist, ls_list) {
311 if (ls->ls_global_id == id) {
318 spin_unlock(&lslist_lock);
322 struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
326 spin_lock(&lslist_lock);
327 list_for_each_entry(ls, &lslist, ls_list) {
328 if (ls->ls_local_handle == lockspace) {
335 spin_unlock(&lslist_lock);
339 struct dlm_ls *dlm_find_lockspace_device(int minor)
343 spin_lock(&lslist_lock);
344 list_for_each_entry(ls, &lslist, ls_list) {
345 if (ls->ls_device.minor == minor) {
352 spin_unlock(&lslist_lock);
356 void dlm_put_lockspace(struct dlm_ls *ls)
358 spin_lock(&lslist_lock);
360 spin_unlock(&lslist_lock);
363 static void remove_lockspace(struct dlm_ls *ls)
366 spin_lock(&lslist_lock);
367 if (ls->ls_count == 0) {
368 WARN_ON(ls->ls_create_count != 0);
369 list_del(&ls->ls_list);
370 spin_unlock(&lslist_lock);
373 spin_unlock(&lslist_lock);
378 static int threads_start(void)
382 error = dlm_scand_start();
384 log_print("cannot start dlm_scand thread %d", error);
388 /* Thread for sending/receiving messages for all lockspace's */
389 error = dlm_lowcomms_start();
391 log_print("cannot start dlm lowcomms %d", error);
403 static void threads_stop(void)
409 static int new_lockspace(const char *name, const char *cluster,
410 uint32_t flags, int lvblen,
411 const struct dlm_lockspace_ops *ops, void *ops_arg,
412 int *ops_result, dlm_lockspace_t **lockspace)
417 int namelen = strlen(name);
419 if (namelen > DLM_LOCKSPACE_LEN)
422 if (!lvblen || (lvblen % 8))
425 if (!try_module_get(THIS_MODULE))
428 if (!dlm_user_daemon_available()) {
429 log_print("dlm user daemon not available");
434 if (ops && ops_result) {
435 if (!dlm_config.ci_recover_callbacks)
436 *ops_result = -EOPNOTSUPP;
441 if (dlm_config.ci_recover_callbacks && cluster &&
442 strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
443 log_print("dlm cluster name %s mismatch %s",
444 dlm_config.ci_cluster_name, cluster);
451 spin_lock(&lslist_lock);
452 list_for_each_entry(ls, &lslist, ls_list) {
453 WARN_ON(ls->ls_create_count <= 0);
454 if (ls->ls_namelen != namelen)
456 if (memcmp(ls->ls_name, name, namelen))
458 if (flags & DLM_LSFL_NEWEXCL) {
462 ls->ls_create_count++;
467 spin_unlock(&lslist_lock);
474 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
477 memcpy(ls->ls_name, name, namelen);
478 ls->ls_namelen = namelen;
479 ls->ls_lvblen = lvblen;
482 ls->ls_scan_time = jiffies;
484 if (ops && dlm_config.ci_recover_callbacks) {
486 ls->ls_ops_arg = ops_arg;
489 if (flags & DLM_LSFL_TIMEWARN)
490 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
492 /* ls_exflags are forced to match among nodes, and we don't
493 need to require all nodes to have some flags set */
494 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
497 size = dlm_config.ci_rsbtbl_size;
498 ls->ls_rsbtbl_size = size;
500 ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
503 for (i = 0; i < size; i++) {
504 ls->ls_rsbtbl[i].keep.rb_node = NULL;
505 ls->ls_rsbtbl[i].toss.rb_node = NULL;
506 spin_lock_init(&ls->ls_rsbtbl[i].lock);
509 idr_init(&ls->ls_lkbidr);
510 spin_lock_init(&ls->ls_lkbidr_spin);
512 INIT_LIST_HEAD(&ls->ls_waiters);
513 mutex_init(&ls->ls_waiters_mutex);
514 INIT_LIST_HEAD(&ls->ls_orphans);
515 mutex_init(&ls->ls_orphans_mutex);
516 INIT_LIST_HEAD(&ls->ls_timeout);
517 mutex_init(&ls->ls_timeout_mutex);
519 INIT_LIST_HEAD(&ls->ls_new_rsb);
520 spin_lock_init(&ls->ls_new_rsb_spin);
522 INIT_LIST_HEAD(&ls->ls_nodes);
523 INIT_LIST_HEAD(&ls->ls_nodes_gone);
524 ls->ls_num_nodes = 0;
525 ls->ls_low_nodeid = 0;
526 ls->ls_total_weight = 0;
527 ls->ls_node_array = NULL;
529 memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
530 ls->ls_stub_rsb.res_ls = ls;
532 ls->ls_debug_rsb_dentry = NULL;
533 ls->ls_debug_waiters_dentry = NULL;
535 init_waitqueue_head(&ls->ls_uevent_wait);
536 ls->ls_uevent_result = 0;
537 init_completion(&ls->ls_members_done);
538 ls->ls_members_result = -1;
540 mutex_init(&ls->ls_cb_mutex);
541 INIT_LIST_HEAD(&ls->ls_cb_delay);
543 ls->ls_recoverd_task = NULL;
544 mutex_init(&ls->ls_recoverd_active);
545 spin_lock_init(&ls->ls_recover_lock);
546 spin_lock_init(&ls->ls_rcom_spin);
547 get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
548 ls->ls_recover_status = 0;
549 ls->ls_recover_seq = 0;
550 ls->ls_recover_args = NULL;
551 init_rwsem(&ls->ls_in_recovery);
552 init_rwsem(&ls->ls_recv_active);
553 INIT_LIST_HEAD(&ls->ls_requestqueue);
554 mutex_init(&ls->ls_requestqueue_mutex);
555 mutex_init(&ls->ls_clear_proc_locks);
557 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
558 if (!ls->ls_recover_buf)
562 ls->ls_num_slots = 0;
563 ls->ls_slots_size = 0;
566 INIT_LIST_HEAD(&ls->ls_recover_list);
567 spin_lock_init(&ls->ls_recover_list_lock);
568 ls->ls_recover_list_count = 0;
569 ls->ls_local_handle = ls;
570 init_waitqueue_head(&ls->ls_wait_general);
571 INIT_LIST_HEAD(&ls->ls_root_list);
572 init_rwsem(&ls->ls_root_sem);
574 down_write(&ls->ls_in_recovery);
576 spin_lock(&lslist_lock);
577 ls->ls_create_count = 1;
578 list_add(&ls->ls_list, &lslist);
579 spin_unlock(&lslist_lock);
581 if (flags & DLM_LSFL_FS) {
582 error = dlm_callback_start(ls);
584 log_error(ls, "can't start dlm_callback %d", error);
589 /* needs to find ls in lslist */
590 error = dlm_recoverd_start(ls);
592 log_error(ls, "can't start dlm_recoverd %d", error);
596 ls->ls_kobj.kset = dlm_kset;
597 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
601 kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
603 /* let kobject handle freeing of ls if there's an error */
606 /* This uevent triggers dlm_controld in userspace to add us to the
607 group of nodes that are members of this lockspace (managed by the
608 cluster infrastructure.) Once it's done that, it tells us who the
609 current lockspace members are (via configfs) and then tells the
610 lockspace to start running (via sysfs) in dlm_ls_start(). */
612 error = do_uevent(ls, 1);
616 wait_for_completion(&ls->ls_members_done);
617 error = ls->ls_members_result;
621 dlm_create_debug_file(ls);
623 log_debug(ls, "join complete");
629 dlm_clear_members(ls);
630 kfree(ls->ls_node_array);
632 dlm_recoverd_stop(ls);
634 dlm_callback_stop(ls);
636 spin_lock(&lslist_lock);
637 list_del(&ls->ls_list);
638 spin_unlock(&lslist_lock);
639 kfree(ls->ls_recover_buf);
641 idr_destroy(&ls->ls_lkbidr);
642 vfree(ls->ls_rsbtbl);
645 kobject_put(&ls->ls_kobj);
649 module_put(THIS_MODULE);
653 int dlm_new_lockspace(const char *name, const char *cluster,
654 uint32_t flags, int lvblen,
655 const struct dlm_lockspace_ops *ops, void *ops_arg,
656 int *ops_result, dlm_lockspace_t **lockspace)
660 mutex_lock(&ls_lock);
662 error = threads_start();
666 error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
667 ops_result, lockspace);
675 mutex_unlock(&ls_lock);
679 static int lkb_idr_is_local(int id, void *p, void *data)
681 struct dlm_lkb *lkb = p;
683 if (!lkb->lkb_nodeid)
688 static int lkb_idr_is_any(int id, void *p, void *data)
693 static int lkb_idr_free(int id, void *p, void *data)
695 struct dlm_lkb *lkb = p;
697 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
698 dlm_free_lvb(lkb->lkb_lvbptr);
704 /* NOTE: We check the lkbidr here rather than the resource table.
705 This is because there may be LKBs queued as ASTs that have been unlinked
706 from their RSBs and are pending deletion once the AST has been delivered */
708 static int lockspace_busy(struct dlm_ls *ls, int force)
712 spin_lock(&ls->ls_lkbidr_spin);
714 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
715 } else if (force == 1) {
716 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
720 spin_unlock(&ls->ls_lkbidr_spin);
724 static int release_lockspace(struct dlm_ls *ls, int force)
730 busy = lockspace_busy(ls, force);
732 spin_lock(&lslist_lock);
733 if (ls->ls_create_count == 1) {
737 /* remove_lockspace takes ls off lslist */
738 ls->ls_create_count = 0;
741 } else if (ls->ls_create_count > 1) {
742 rv = --ls->ls_create_count;
746 spin_unlock(&lslist_lock);
749 log_debug(ls, "release_lockspace no remove %d", rv);
753 dlm_device_deregister(ls);
755 if (force < 3 && dlm_user_daemon_available())
758 dlm_recoverd_stop(ls);
760 dlm_callback_stop(ls);
762 remove_lockspace(ls);
764 dlm_delete_debug_file(ls);
766 kfree(ls->ls_recover_buf);
769 * Free all lkb's in idr
772 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
773 idr_remove_all(&ls->ls_lkbidr);
774 idr_destroy(&ls->ls_lkbidr);
777 * Free all rsb's on rsbtbl[] lists
780 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
781 while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
782 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
783 rb_erase(n, &ls->ls_rsbtbl[i].keep);
787 while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
788 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
789 rb_erase(n, &ls->ls_rsbtbl[i].toss);
794 vfree(ls->ls_rsbtbl);
796 while (!list_empty(&ls->ls_new_rsb)) {
797 rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
799 list_del(&rsb->res_hashchain);
804 * Free structures on any other lists
807 dlm_purge_requestqueue(ls);
808 kfree(ls->ls_recover_args);
809 dlm_clear_members(ls);
810 dlm_clear_members_gone(ls);
811 kfree(ls->ls_node_array);
812 log_debug(ls, "release_lockspace final free");
813 kobject_put(&ls->ls_kobj);
814 /* The ls structure will be freed when the kobject is done with */
816 module_put(THIS_MODULE);
821 * Called when a system has released all its locks and is not going to use the
822 * lockspace any longer. We free everything we're managing for this lockspace.
823 * Remaining nodes will go through the recovery process as if we'd died. The
824 * lockspace must continue to function as usual, participating in recoveries,
825 * until this returns.
827 * Force has 4 possible values:
828 * 0 - don't destroy locksapce if it has any LKBs
829 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
830 * 2 - destroy lockspace regardless of LKBs
831 * 3 - destroy lockspace as part of a forced shutdown
834 int dlm_release_lockspace(void *lockspace, int force)
839 ls = dlm_find_lockspace_local(lockspace);
842 dlm_put_lockspace(ls);
844 mutex_lock(&ls_lock);
845 error = release_lockspace(ls, force);
850 mutex_unlock(&ls_lock);
855 void dlm_stop_lockspaces(void)
860 spin_lock(&lslist_lock);
861 list_for_each_entry(ls, &lslist, ls_list) {
862 if (!test_bit(LSFL_RUNNING, &ls->ls_flags))
864 spin_unlock(&lslist_lock);
865 log_error(ls, "no userland control daemon, stopping lockspace");
869 spin_unlock(&lslist_lock);