]>
Commit | Line | Data |
---|---|---|
e7fd4179 DT |
1 | /****************************************************************************** |
2 | ******************************************************************************* | |
3 | ** | |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | |
6 | ** | |
7 | ** This copyrighted material is made available to anyone wishing to use, | |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | |
9 | ** of the GNU General Public License v.2. | |
10 | ** | |
11 | ******************************************************************************* | |
12 | ******************************************************************************/ | |
13 | ||
14 | #include "dlm_internal.h" | |
15 | #include "lockspace.h" | |
16 | #include "member.h" | |
17 | #include "lowcomms.h" | |
18 | #include "rcom.h" | |
19 | #include "config.h" | |
20 | #include "memory.h" | |
21 | #include "recover.h" | |
22 | #include "util.h" | |
23 | #include "lock.h" | |
24 | #include "dir.h" | |
25 | ||
e7fd4179 DT |
26 | /* |
27 | * We use the upper 16 bits of the hash value to select the directory node. | |
28 | * Low bits are used for distribution of rsb's among hash buckets on each node. | |
29 | * | |
30 | * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of | |
31 | * num_nodes to the hash value. This value in the desired range is used as an | |
32 | * offset into the sorted list of nodeid's to give the particular nodeid. | |
33 | */ | |
34 | ||
35 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) | |
36 | { | |
c04fecb4 | 37 | uint32_t node; |
e7fd4179 | 38 | |
c04fecb4 DT |
39 | if (ls->ls_num_nodes == 1) |
40 | return dlm_our_nodeid(); | |
41 | else { | |
e7fd4179 | 42 | node = (hash >> 16) % ls->ls_total_weight; |
c04fecb4 | 43 | return ls->ls_node_array[node]; |
e7fd4179 | 44 | } |
e7fd4179 DT |
45 | } |
46 | ||
47 | int dlm_dir_nodeid(struct dlm_rsb *r) | |
48 | { | |
c04fecb4 | 49 | return r->res_dir_nodeid; |
e7fd4179 DT |
50 | } |
51 | ||
c04fecb4 | 52 | void dlm_recover_dir_nodeid(struct dlm_ls *ls) |
e7fd4179 | 53 | { |
c04fecb4 | 54 | struct dlm_rsb *r; |
e7fd4179 | 55 | |
c04fecb4 DT |
56 | down_read(&ls->ls_root_sem); |
57 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | |
58 | r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); | |
e7fd4179 | 59 | } |
c04fecb4 | 60 | up_read(&ls->ls_root_sem); |
e7fd4179 DT |
61 | } |
62 | ||
63 | int dlm_recover_directory(struct dlm_ls *ls) | |
64 | { | |
65 | struct dlm_member *memb; | |
e7fd4179 | 66 | char *b, *last_name = NULL; |
c04fecb4 | 67 | int error = -ENOMEM, last_len, nodeid, result; |
e7fd4179 | 68 | uint16_t namelen; |
c04fecb4 | 69 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; |
e7fd4179 | 70 | |
075f0177 | 71 | log_rinfo(ls, "dlm_recover_directory"); |
e7fd4179 DT |
72 | |
73 | if (dlm_no_directory(ls)) | |
74 | goto out_status; | |
75 | ||
573c24c4 | 76 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); |
e7fd4179 DT |
77 | if (!last_name) |
78 | goto out; | |
79 | ||
80 | list_for_each_entry(memb, &ls->ls_nodes, list) { | |
c04fecb4 DT |
81 | if (memb->nodeid == dlm_our_nodeid()) |
82 | continue; | |
83 | ||
e7fd4179 DT |
84 | memset(last_name, 0, DLM_RESNAME_MAXLEN); |
85 | last_len = 0; | |
86 | ||
87 | for (;;) { | |
cd9df1aa | 88 | int left; |
e7fd4179 DT |
89 | error = dlm_recovery_stopped(ls); |
90 | if (error) | |
91 | goto out_free; | |
92 | ||
93 | error = dlm_rcom_names(ls, memb->nodeid, | |
94 | last_name, last_len); | |
95 | if (error) | |
96 | goto out_free; | |
97 | ||
c04fecb4 | 98 | cond_resched(); |
e7fd4179 DT |
99 | |
100 | /* | |
101 | * pick namelen/name pairs out of received buffer | |
102 | */ | |
103 | ||
4007685c | 104 | b = ls->ls_recover_buf->rc_buf; |
cd9df1aa AV |
105 | left = ls->ls_recover_buf->rc_header.h_length; |
106 | left -= sizeof(struct dlm_rcom); | |
e7fd4179 DT |
107 | |
108 | for (;;) { | |
cd9df1aa AV |
109 | __be16 v; |
110 | ||
111 | error = -EINVAL; | |
112 | if (left < sizeof(__be16)) | |
113 | goto out_free; | |
114 | ||
115 | memcpy(&v, b, sizeof(__be16)); | |
116 | namelen = be16_to_cpu(v); | |
117 | b += sizeof(__be16); | |
118 | left -= sizeof(__be16); | |
e7fd4179 DT |
119 | |
120 | /* namelen of 0xFFFFF marks end of names for | |
121 | this node; namelen of 0 marks end of the | |
122 | buffer */ | |
123 | ||
124 | if (namelen == 0xFFFF) | |
125 | goto done; | |
126 | if (!namelen) | |
127 | break; | |
128 | ||
cd9df1aa AV |
129 | if (namelen > left) |
130 | goto out_free; | |
131 | ||
132 | if (namelen > DLM_RESNAME_MAXLEN) | |
133 | goto out_free; | |
134 | ||
c04fecb4 DT |
135 | error = dlm_master_lookup(ls, memb->nodeid, |
136 | b, namelen, | |
137 | DLM_LU_RECOVER_DIR, | |
138 | &nodeid, &result); | |
139 | if (error) { | |
140 | log_error(ls, "recover_dir lookup %d", | |
141 | error); | |
e7fd4179 | 142 | goto out_free; |
c04fecb4 DT |
143 | } |
144 | ||
145 | /* The name was found in rsbtbl, but the | |
146 | * master nodeid is different from | |
147 | * memb->nodeid which says it is the master. | |
148 | * This should not happen. */ | |
149 | ||
150 | if (result == DLM_LU_MATCH && | |
151 | nodeid != memb->nodeid) { | |
152 | count_bad++; | |
153 | log_error(ls, "recover_dir lookup %d " | |
154 | "nodeid %d memb %d bad %u", | |
155 | result, nodeid, memb->nodeid, | |
156 | count_bad); | |
157 | print_hex_dump_bytes("dlm_recover_dir ", | |
158 | DUMP_PREFIX_NONE, | |
159 | b, namelen); | |
160 | } | |
161 | ||
162 | /* The name was found in rsbtbl, and the | |
163 | * master nodeid matches memb->nodeid. */ | |
164 | ||
165 | if (result == DLM_LU_MATCH && | |
166 | nodeid == memb->nodeid) { | |
167 | count_match++; | |
168 | } | |
169 | ||
170 | /* The name was not found in rsbtbl and was | |
171 | * added with memb->nodeid as the master. */ | |
172 | ||
173 | if (result == DLM_LU_ADD) { | |
174 | count_add++; | |
175 | } | |
e7fd4179 | 176 | |
e7fd4179 | 177 | last_len = namelen; |
e7fd4179 DT |
178 | memcpy(last_name, b, namelen); |
179 | b += namelen; | |
cd9df1aa | 180 | left -= namelen; |
e7fd4179 DT |
181 | count++; |
182 | } | |
183 | } | |
c04fecb4 | 184 | done: |
e7fd4179 DT |
185 | ; |
186 | } | |
187 | ||
188 | out_status: | |
189 | error = 0; | |
c04fecb4 DT |
190 | dlm_set_recover_status(ls, DLM_RS_DIR); |
191 | ||
075f0177 | 192 | log_rinfo(ls, "dlm_recover_directory %u in %u new", |
c04fecb4 | 193 | count, count_add); |
e7fd4179 DT |
194 | out_free: |
195 | kfree(last_name); | |
196 | out: | |
e7fd4179 DT |
197 | return error; |
198 | } | |
199 | ||
85f0379a DT |
200 | static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) |
201 | { | |
202 | struct dlm_rsb *r; | |
7210cb7a DT |
203 | uint32_t hash, bucket; |
204 | int rv; | |
205 | ||
206 | hash = jhash(name, len, 0); | |
207 | bucket = hash & (ls->ls_rsbtbl_size - 1); | |
208 | ||
209 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | |
c04fecb4 | 210 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); |
7210cb7a DT |
211 | if (rv) |
212 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, | |
c04fecb4 | 213 | name, len, &r); |
7210cb7a DT |
214 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
215 | ||
216 | if (!rv) | |
217 | return r; | |
85f0379a DT |
218 | |
219 | down_read(&ls->ls_root_sem); | |
220 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | |
221 | if (len == r->res_length && !memcmp(name, r->res_name, len)) { | |
222 | up_read(&ls->ls_root_sem); | |
c04fecb4 | 223 | log_debug(ls, "find_rsb_root revert to root_list %s", |
7210cb7a | 224 | r->res_name); |
85f0379a DT |
225 | return r; |
226 | } | |
227 | } | |
228 | up_read(&ls->ls_root_sem); | |
229 | return NULL; | |
230 | } | |
231 | ||
232 | /* Find the rsb where we left off (or start again), then send rsb names | |
233 | for rsb's we're master of and whose directory node matches the requesting | |
234 | node. inbuf is the rsb name last sent, inlen is the name's length */ | |
e7fd4179 DT |
235 | |
236 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | |
237 | char *outbuf, int outlen, int nodeid) | |
238 | { | |
239 | struct list_head *list; | |
85f0379a DT |
240 | struct dlm_rsb *r; |
241 | int offset = 0, dir_nodeid; | |
cd8e4679 | 242 | __be16 be_namelen; |
e7fd4179 | 243 | |
e7fd4179 | 244 | down_read(&ls->ls_root_sem); |
85f0379a DT |
245 | |
246 | if (inlen > 1) { | |
247 | r = find_rsb_root(ls, inbuf, inlen); | |
248 | if (!r) { | |
249 | inbuf[inlen - 1] = '\0'; | |
250 | log_error(ls, "copy_master_names from %d start %d %s", | |
251 | nodeid, inlen, inbuf); | |
252 | goto out; | |
253 | } | |
254 | list = r->res_root_list.next; | |
255 | } else { | |
e7fd4179 | 256 | list = ls->ls_root_list.next; |
85f0379a | 257 | } |
e7fd4179 DT |
258 | |
259 | for (offset = 0; list != &ls->ls_root_list; list = list->next) { | |
260 | r = list_entry(list, struct dlm_rsb, res_root_list); | |
261 | if (r->res_nodeid) | |
262 | continue; | |
263 | ||
264 | dir_nodeid = dlm_dir_nodeid(r); | |
265 | if (dir_nodeid != nodeid) | |
266 | continue; | |
267 | ||
268 | /* | |
269 | * The block ends when we can't fit the following in the | |
270 | * remaining buffer space: | |
271 | * namelen (uint16_t) + | |
272 | * name (r->res_length) + | |
273 | * end-of-block record 0x0000 (uint16_t) | |
274 | */ | |
275 | ||
276 | if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { | |
277 | /* Write end-of-block record */ | |
cd8e4679 HH |
278 | be_namelen = cpu_to_be16(0); |
279 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | |
280 | offset += sizeof(__be16); | |
c04fecb4 | 281 | ls->ls_recover_dir_sent_msg++; |
e7fd4179 DT |
282 | goto out; |
283 | } | |
284 | ||
285 | be_namelen = cpu_to_be16(r->res_length); | |
cd8e4679 HH |
286 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
287 | offset += sizeof(__be16); | |
e7fd4179 DT |
288 | memcpy(outbuf + offset, r->res_name, r->res_length); |
289 | offset += r->res_length; | |
c04fecb4 | 290 | ls->ls_recover_dir_sent_res++; |
e7fd4179 DT |
291 | } |
292 | ||
293 | /* | |
294 | * If we've reached the end of the list (and there's room) write a | |
295 | * terminating record. | |
296 | */ | |
297 | ||
298 | if ((list == &ls->ls_root_list) && | |
299 | (offset + sizeof(uint16_t) <= outlen)) { | |
cd8e4679 HH |
300 | be_namelen = cpu_to_be16(0xFFFF); |
301 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | |
302 | offset += sizeof(__be16); | |
c04fecb4 | 303 | ls->ls_recover_dir_sent_msg++; |
e7fd4179 | 304 | } |
e7fd4179 DT |
305 | out: |
306 | up_read(&ls->ls_root_sem); | |
307 | } | |
308 |