]>
Commit | Line | Data |
---|---|---|
b4d0d230 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
9cc6fc50 DH |
2 | /* Handle fileserver selection and rotation. |
3 | * | |
4 | * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. | |
5 | * Written by David Howells ([email protected]) | |
9cc6fc50 DH |
6 | */ |
7 | ||
8 | #include <linux/kernel.h> | |
9 | #include <linux/slab.h> | |
d2ddc776 DH |
10 | #include <linux/fs.h> |
11 | #include <linux/sched.h> | |
12 | #include <linux/delay.h> | |
13 | #include <linux/sched/signal.h> | |
9cc6fc50 | 14 | #include "internal.h" |
d2ddc776 | 15 | #include "afs_fs.h" |
fe245c8f | 16 | #include "protocol_uae.h" |
9cc6fc50 | 17 | |
495f2ae9 DH |
18 | void afs_clear_server_states(struct afs_operation *op) |
19 | { | |
20 | unsigned int i; | |
21 | ||
22 | if (op->server_states) { | |
23 | for (i = 0; i < op->server_list->nr_servers; i++) | |
24 | afs_put_endpoint_state(op->server_states[i].endpoint_state, | |
25 | afs_estate_trace_put_server_state); | |
26 | kfree(op->server_states); | |
27 | } | |
28 | } | |
29 | ||
d2ddc776 DH |
30 | /* |
31 | * Begin iteration through a server list, starting with the vnode's last used | |
32 | * server if possible, or the last recorded good server if not. | |
33 | */ | |
a310082f | 34 | static bool afs_start_fs_iteration(struct afs_operation *op, |
d2ddc776 DH |
35 | struct afs_vnode *vnode) |
36 | { | |
20325960 DH |
37 | struct afs_server *server; |
38 | void *cb_server; | |
d2ddc776 DH |
39 | int i; |
40 | ||
495f2ae9 DH |
41 | trace_afs_rotate(op, afs_rotate_trace_start, 0); |
42 | ||
e49c7b2f | 43 | read_lock(&op->volume->servers_lock); |
8a070a96 DH |
44 | op->server_list = afs_get_serverlist( |
45 | rcu_dereference_protected(op->volume->servers, | |
46 | lockdep_is_held(&op->volume->servers_lock))); | |
e49c7b2f | 47 | read_unlock(&op->volume->servers_lock); |
d2ddc776 | 48 | |
495f2ae9 DH |
49 | op->server_states = kcalloc(op->server_list->nr_servers, sizeof(op->server_states[0]), |
50 | GFP_KERNEL); | |
51 | if (!op->server_states) { | |
52 | afs_op_nomem(op); | |
53 | trace_afs_rotate(op, afs_rotate_trace_nomem, 0); | |
54 | return false; | |
55 | } | |
56 | ||
57 | rcu_read_lock(); | |
58 | for (i = 0; i < op->server_list->nr_servers; i++) { | |
59 | struct afs_endpoint_state *estate; | |
60 | struct afs_server_state *s = &op->server_states[i]; | |
61 | ||
62 | server = op->server_list->servers[i].server; | |
63 | estate = rcu_dereference(server->endpoint_state); | |
64 | s->endpoint_state = afs_get_endpoint_state(estate, | |
65 | afs_estate_trace_get_server_state); | |
66 | s->probe_seq = estate->probe_seq; | |
67 | s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1; | |
68 | init_waitqueue_entry(&s->probe_waiter, current); | |
69 | afs_get_address_preferences(op->net, estate->addresses); | |
70 | } | |
71 | rcu_read_unlock(); | |
72 | ||
73 | ||
905b8615 | 74 | op->untried_servers = (1UL << op->server_list->nr_servers) - 1; |
495f2ae9 | 75 | op->server_index = -1; |
3bf0fb6f | 76 | |
20325960 DH |
77 | cb_server = vnode->cb_server; |
78 | if (cb_server) { | |
d2ddc776 | 79 | /* See if the vnode's preferred record is still available */ |
a310082f | 80 | for (i = 0; i < op->server_list->nr_servers; i++) { |
20325960 DH |
81 | server = op->server_list->servers[i].server; |
82 | if (server == cb_server) { | |
905b8615 | 83 | op->server_index = i; |
d2ddc776 DH |
84 | goto found_interest; |
85 | } | |
86 | } | |
87 | ||
88 | /* If we have a lock outstanding on a server that's no longer | |
89 | * serving this vnode, then we can't switch to another server | |
90 | * and have to return an error. | |
91 | */ | |
a310082f | 92 | if (op->flags & AFS_OPERATION_CUR_ONLY) { |
2de5599f | 93 | afs_op_set_error(op, -ESTALE); |
495f2ae9 | 94 | trace_afs_rotate(op, afs_rotate_trace_stale_lock, 0); |
d2ddc776 DH |
95 | return false; |
96 | } | |
97 | ||
98 | /* Note that the callback promise is effectively broken */ | |
99 | write_seqlock(&vnode->cb_lock); | |
20325960 DH |
100 | ASSERTCMP(cb_server, ==, vnode->cb_server); |
101 | vnode->cb_server = NULL; | |
453924de | 102 | if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) |
d2ddc776 DH |
103 | vnode->cb_break++; |
104 | write_sequnlock(&vnode->cb_lock); | |
d2ddc776 DH |
105 | } |
106 | ||
107 | found_interest: | |
d2ddc776 DH |
108 | return true; |
109 | } | |
110 | ||
111 | /* | |
112 | * Post volume busy note. | |
113 | */ | |
28f4c580 | 114 | static void afs_busy(struct afs_operation *op, u32 abort_code) |
d2ddc776 DH |
115 | { |
116 | const char *m; | |
117 | ||
118 | switch (abort_code) { | |
119 | case VOFFLINE: m = "offline"; break; | |
120 | case VRESTARTING: m = "restarting"; break; | |
121 | case VSALVAGING: m = "being salvaged"; break; | |
122 | default: m = "busy"; break; | |
123 | } | |
0fafdc9f | 124 | |
28f4c580 DH |
125 | pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n", |
126 | op->volume->vid, op->volume->name, &op->server->uuid, m); | |
d2ddc776 DH |
127 | } |
128 | ||
129 | /* | |
130 | * Sleep and retry the operation to the same fileserver. | |
131 | */ | |
a310082f | 132 | static bool afs_sleep_and_retry(struct afs_operation *op) |
d2ddc776 | 133 | { |
495f2ae9 | 134 | trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0); |
e49c7b2f | 135 | if (!(op->flags & AFS_OPERATION_UNINTR)) { |
20b8391f DH |
136 | msleep_interruptible(1000); |
137 | if (signal_pending(current)) { | |
2de5599f | 138 | afs_op_set_error(op, -ERESTARTSYS); |
20b8391f DH |
139 | return false; |
140 | } | |
141 | } else { | |
142 | msleep(1000); | |
d2ddc776 DH |
143 | } |
144 | ||
145 | return true; | |
146 | } | |
147 | ||
148 | /* | |
149 | * Select the fileserver to use. May be called multiple times to rotate | |
150 | * through the fileservers. | |
151 | */ | |
a310082f | 152 | bool afs_select_fileserver(struct afs_operation *op) |
d2ddc776 | 153 | { |
f49b594d | 154 | struct afs_addr_list *alist; |
d2ddc776 | 155 | struct afs_server *server; |
e49c7b2f | 156 | struct afs_vnode *vnode = op->file[0].vnode; |
f49b594d | 157 | unsigned long set, failed; |
aa453bec | 158 | s32 abort_code = op->call_abort_code; |
495f2ae9 DH |
159 | int best_prio = 0; |
160 | int error = op->call_error, addr_index, i, j; | |
d2ddc776 | 161 | |
075171fd DH |
162 | op->nr_iterations++; |
163 | ||
98f9fda2 | 164 | _enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d", |
075171fd | 165 | op->debug_id, op->nr_iterations, op->volume->vid, |
98f9fda2 DH |
166 | op->server_index, op->untried_servers, |
167 | op->addr_index, op->addr_tried, | |
aa453bec | 168 | error, abort_code); |
d2ddc776 | 169 | |
a310082f | 170 | if (op->flags & AFS_OPERATION_STOP) { |
495f2ae9 | 171 | trace_afs_rotate(op, afs_rotate_trace_stopped, 0); |
d2ddc776 DH |
172 | _leave(" = f [stopped]"); |
173 | return false; | |
174 | } | |
175 | ||
075171fd DH |
176 | if (op->nr_iterations == 0) |
177 | goto start; | |
744bcd71 | 178 | |
495f2ae9 DH |
179 | WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error); |
180 | trace_afs_rotate(op, afs_rotate_trace_iter, op->call_error); | |
af9a5b49 | 181 | |
d2ddc776 | 182 | /* Evaluate the result of the previous operation, if there was one. */ |
aa453bec | 183 | switch (op->call_error) { |
d2ddc776 | 184 | case 0: |
28f4c580 DH |
185 | clear_bit(AFS_SE_VOLUME_OFFLINE, |
186 | &op->server_list->servers[op->server_index].flags); | |
187 | clear_bit(AFS_SE_VOLUME_BUSY, | |
188 | &op->server_list->servers[op->server_index].flags); | |
aa453bec | 189 | op->cumul_error.responded = true; |
453924de DH |
190 | |
191 | /* We succeeded, but we may need to redo the op from another | |
192 | * server if we're looking at a set of RO volumes where some of | |
193 | * the servers have not yet been brought up to date lest we | |
194 | * regress the data. We only switch to the new version once | |
195 | * >=50% of the servers are updated. | |
196 | */ | |
197 | error = afs_update_volume_state(op); | |
198 | if (error != 0) { | |
199 | if (error == 1) { | |
200 | afs_sleep_and_retry(op); | |
201 | goto restart_from_beginning; | |
202 | } | |
203 | afs_op_set_error(op, error); | |
204 | goto failed; | |
205 | } | |
aa453bec | 206 | fallthrough; |
d2ddc776 DH |
207 | default: |
208 | /* Success or local failure. Stop. */ | |
2de5599f | 209 | afs_op_set_error(op, error); |
a310082f | 210 | op->flags |= AFS_OPERATION_STOP; |
495f2ae9 | 211 | trace_afs_rotate(op, afs_rotate_trace_stop, error); |
e7f680f4 | 212 | _leave(" = f [okay/local %d]", error); |
d2ddc776 DH |
213 | return false; |
214 | ||
215 | case -ECONNABORTED: | |
216 | /* The far side rejected the operation on some grounds. This | |
217 | * might involve the server being busy or the volume having been moved. | |
fe245c8f DH |
218 | * |
219 | * Note that various V* errors should not be sent to a cache manager | |
220 | * by a fileserver as they should be translated to more modern UAE* | |
221 | * errors instead. IBM AFS and OpenAFS fileservers, however, do leak | |
222 | * these abort codes. | |
d2ddc776 | 223 | */ |
495f2ae9 | 224 | trace_afs_rotate(op, afs_rotate_trace_aborted, abort_code); |
aa453bec DH |
225 | op->cumul_error.responded = true; |
226 | switch (abort_code) { | |
d2ddc776 DH |
227 | case VNOVOL: |
228 | /* This fileserver doesn't know about the volume. | |
229 | * - May indicate that the VL is wrong - retry once and compare | |
230 | * the results. | |
231 | * - May indicate that the fileserver couldn't attach to the vol. | |
fe245c8f DH |
232 | * - The volume might have been temporarily removed so that it can |
233 | * be replaced by a volume restore. "vos" might have ended one | |
234 | * transaction and has yet to create the next. | |
235 | * - The volume might not be blessed or might not be in-service | |
236 | * (administrative action). | |
d2ddc776 | 237 | */ |
a310082f | 238 | if (op->flags & AFS_OPERATION_VNOVOL) { |
aa453bec | 239 | afs_op_accumulate_error(op, -EREMOTEIO, abort_code); |
3d9fa911 | 240 | goto next_server; |
d2ddc776 DH |
241 | } |
242 | ||
e49c7b2f | 243 | write_lock(&op->volume->servers_lock); |
905b8615 | 244 | op->server_list->vnovol_mask |= 1 << op->server_index; |
e49c7b2f | 245 | write_unlock(&op->volume->servers_lock); |
d2ddc776 | 246 | |
e49c7b2f DH |
247 | set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags); |
248 | error = afs_check_volume_status(op->volume, op); | |
2de5599f DH |
249 | if (error < 0) { |
250 | afs_op_set_error(op, error); | |
251 | goto failed; | |
252 | } | |
d2ddc776 | 253 | |
e49c7b2f | 254 | if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) { |
2de5599f | 255 | afs_op_set_error(op, -ENOMEDIUM); |
d2ddc776 DH |
256 | goto failed; |
257 | } | |
258 | ||
259 | /* If the server list didn't change, then assume that | |
260 | * it's the fileserver having trouble. | |
261 | */ | |
8a070a96 | 262 | if (rcu_access_pointer(op->volume->servers) == op->server_list) { |
aa453bec | 263 | afs_op_accumulate_error(op, -EREMOTEIO, abort_code); |
3d9fa911 | 264 | goto next_server; |
d2ddc776 DH |
265 | } |
266 | ||
267 | /* Try again */ | |
a310082f | 268 | op->flags |= AFS_OPERATION_VNOVOL; |
d2ddc776 DH |
269 | _leave(" = t [vnovol]"); |
270 | return true; | |
271 | ||
d2ddc776 | 272 | case VVOLEXISTS: |
d2ddc776 | 273 | case VONLINE: |
fe245c8f DH |
274 | /* These should not be returned from the fileserver. */ |
275 | pr_warn("Fileserver returned unexpected abort %d\n", | |
aa453bec DH |
276 | abort_code); |
277 | afs_op_accumulate_error(op, -EREMOTEIO, abort_code); | |
fe245c8f DH |
278 | goto next_server; |
279 | ||
280 | case VNOSERVICE: | |
281 | /* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver | |
282 | * if the volume was neither in-service nor administratively | |
283 | * blessed. All usage was replaced by VNOVOL because AFS 3.1 and | |
284 | * earlier cache managers did not handle VNOSERVICE and assumed | |
285 | * it was the client OSes errno 105. | |
286 | * | |
287 | * Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the | |
288 | * fileserver idle dead time error which was sent in place of | |
289 | * RX_CALL_TIMEOUT (-3). The error was intended to be sent if the | |
290 | * fileserver took too long to send a reply to the client. | |
291 | * RX_CALL_TIMEOUT would have caused the cache manager to mark the | |
292 | * server down whereas VNOSERVICE since AFS 3.2 would cause cache | |
293 | * manager to temporarily (up to 15 minutes) mark the volume | |
294 | * instance as unusable. | |
295 | * | |
296 | * The idle dead logic resulted in cache inconsistency since a | |
297 | * state changing call that the cache manager assumed was dead | |
298 | * could still be processed to completion by the fileserver. This | |
299 | * logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer | |
300 | * returned. However, many 1.4.8 through 1.6.24 fileservers are | |
301 | * still in existence. | |
302 | * | |
303 | * AuriStorFS fileservers have never returned VNOSERVICE. | |
304 | * | |
305 | * VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT. | |
306 | */ | |
307 | case RX_CALL_TIMEOUT: | |
aa453bec | 308 | afs_op_accumulate_error(op, -ETIMEDOUT, abort_code); |
d2ddc776 DH |
309 | goto next_server; |
310 | ||
fe245c8f DH |
311 | case VSALVAGING: /* This error should not be leaked to cache managers |
312 | * but is from OpenAFS demand attach fileservers. | |
313 | * It should be treated as an alias for VOFFLINE. | |
314 | */ | |
315 | case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */ | |
d2ddc776 | 316 | case VOFFLINE: |
fe245c8f DH |
317 | /* The volume is in use by the volserver or another volume utility |
318 | * for an operation that might alter the contents. The volume is | |
319 | * expected to come back but it might take a long time (could be | |
320 | * days). | |
321 | */ | |
28f4c580 DH |
322 | if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE, |
323 | &op->server_list->servers[op->server_index].flags)) { | |
324 | afs_busy(op, abort_code); | |
325 | clear_bit(AFS_SE_VOLUME_BUSY, | |
326 | &op->server_list->servers[op->server_index].flags); | |
d2ddc776 | 327 | } |
a310082f | 328 | if (op->flags & AFS_OPERATION_NO_VSLEEP) { |
2de5599f | 329 | afs_op_set_error(op, -EADV); |
d2ddc776 DH |
330 | goto failed; |
331 | } | |
d2ddc776 DH |
332 | goto busy; |
333 | ||
fe245c8f | 334 | case VRESTARTING: /* The fileserver is either shutting down or starting up. */ |
d2ddc776 | 335 | case VBUSY: |
fe245c8f DH |
336 | /* The volume is in use by the volserver or another volume |
337 | * utility for an operation that is not expected to alter the | |
338 | * contents of the volume. VBUSY does not need to be returned | |
339 | * for a ROVOL or BACKVOL bound to an ITBusy volserver | |
340 | * transaction. The fileserver is permitted to continue serving | |
341 | * content from ROVOLs and BACKVOLs during an ITBusy transaction | |
342 | * because the content will not change. However, many fileserver | |
343 | * releases do return VBUSY for ROVOL and BACKVOL instances under | |
344 | * many circumstances. | |
345 | * | |
346 | * Retry after going round all the servers unless we have a file | |
347 | * lock we need to maintain. | |
d2ddc776 | 348 | */ |
a310082f | 349 | if (op->flags & AFS_OPERATION_NO_VSLEEP) { |
2de5599f | 350 | afs_op_set_error(op, -EBUSY); |
d2ddc776 DH |
351 | goto failed; |
352 | } | |
28f4c580 DH |
353 | if (!test_and_set_bit(AFS_SE_VOLUME_BUSY, |
354 | &op->server_list->servers[op->server_index].flags)) { | |
355 | afs_busy(op, abort_code); | |
356 | clear_bit(AFS_SE_VOLUME_OFFLINE, | |
357 | &op->server_list->servers[op->server_index].flags); | |
d2ddc776 DH |
358 | } |
359 | busy: | |
a310082f DH |
360 | if (op->flags & AFS_OPERATION_CUR_ONLY) { |
361 | if (!afs_sleep_and_retry(op)) | |
d2ddc776 DH |
362 | goto failed; |
363 | ||
fe245c8f | 364 | /* Retry with same server & address */ |
d2ddc776 DH |
365 | _leave(" = t [vbusy]"); |
366 | return true; | |
367 | } | |
368 | ||
a310082f | 369 | op->flags |= AFS_OPERATION_VBUSY; |
d2ddc776 DH |
370 | goto next_server; |
371 | ||
372 | case VMOVED: | |
373 | /* The volume migrated to another server. We consider | |
374 | * consider all locks and callbacks broken and request | |
375 | * an update from the VLDB. | |
376 | * | |
377 | * We also limit the number of VMOVED hops we will | |
378 | * honour, just in case someone sets up a loop. | |
379 | */ | |
a310082f | 380 | if (op->flags & AFS_OPERATION_VMOVED) { |
2de5599f | 381 | afs_op_set_error(op, -EREMOTEIO); |
d2ddc776 DH |
382 | goto failed; |
383 | } | |
a310082f | 384 | op->flags |= AFS_OPERATION_VMOVED; |
d2ddc776 | 385 | |
e49c7b2f DH |
386 | set_bit(AFS_VOLUME_WAIT, &op->volume->flags); |
387 | set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags); | |
388 | error = afs_check_volume_status(op->volume, op); | |
2de5599f DH |
389 | if (error < 0) { |
390 | afs_op_set_error(op, error); | |
391 | goto failed; | |
392 | } | |
d2ddc776 DH |
393 | |
394 | /* If the server list didn't change, then the VLDB is | |
395 | * out of sync with the fileservers. This is hopefully | |
396 | * a temporary condition, however, so we don't want to | |
397 | * permanently block access to the file. | |
398 | * | |
399 | * TODO: Try other fileservers if we can. | |
400 | * | |
401 | * TODO: Retry a few times with sleeps. | |
402 | */ | |
8a070a96 | 403 | if (rcu_access_pointer(op->volume->servers) == op->server_list) { |
aa453bec | 404 | afs_op_accumulate_error(op, -ENOMEDIUM, abort_code); |
d2ddc776 DH |
405 | goto failed; |
406 | } | |
407 | ||
408 | goto restart_from_beginning; | |
409 | ||
eb8eae65 DH |
410 | case UAEIO: |
411 | case VIO: | |
aa453bec | 412 | afs_op_accumulate_error(op, -EREMOTEIO, abort_code); |
eb8eae65 DH |
413 | if (op->volume->type != AFSVL_RWVOL) |
414 | goto next_server; | |
415 | goto failed; | |
416 | ||
fe245c8f DH |
417 | case VDISKFULL: |
418 | case UAENOSPC: | |
419 | /* The partition is full. Only applies to RWVOLs. | |
420 | * Translate locally and return ENOSPC. | |
421 | * No replicas to failover to. | |
422 | */ | |
2de5599f | 423 | afs_op_set_error(op, -ENOSPC); |
fe245c8f DH |
424 | goto failed_but_online; |
425 | ||
426 | case VOVERQUOTA: | |
427 | case UAEDQUOT: | |
428 | /* Volume is full. Only applies to RWVOLs. | |
429 | * Translate locally and return EDQUOT. | |
430 | * No replicas to failover to. | |
431 | */ | |
2de5599f | 432 | afs_op_set_error(op, -EDQUOT); |
fe245c8f DH |
433 | goto failed_but_online; |
434 | ||
d2ddc776 | 435 | default: |
aa453bec | 436 | afs_op_accumulate_error(op, error, abort_code); |
fe245c8f | 437 | failed_but_online: |
28f4c580 DH |
438 | clear_bit(AFS_SE_VOLUME_OFFLINE, |
439 | &op->server_list->servers[op->server_index].flags); | |
440 | clear_bit(AFS_SE_VOLUME_BUSY, | |
441 | &op->server_list->servers[op->server_index].flags); | |
d2ddc776 DH |
442 | goto failed; |
443 | } | |
444 | ||
e7f680f4 DH |
445 | case -ETIMEDOUT: |
446 | case -ETIME: | |
2de5599f | 447 | if (afs_op_error(op) != -EDESTADDRREQ) |
e7f680f4 | 448 | goto iterate_address; |
df561f66 | 449 | fallthrough; |
4584ae96 DH |
450 | case -ERFKILL: |
451 | case -EADDRNOTAVAIL: | |
d2ddc776 DH |
452 | case -ENETUNREACH: |
453 | case -EHOSTUNREACH: | |
4584ae96 | 454 | case -EHOSTDOWN: |
d2ddc776 | 455 | case -ECONNREFUSED: |
d2ddc776 | 456 | _debug("no conn"); |
aa453bec | 457 | afs_op_accumulate_error(op, error, 0); |
d2ddc776 | 458 | goto iterate_address; |
1a025028 | 459 | |
adc9613f DH |
460 | case -ENETRESET: |
461 | pr_warn("kAFS: Peer reset %s (op=%x)\n", | |
462 | op->type ? op->type->name : "???", op->debug_id); | |
463 | fallthrough; | |
1a025028 DH |
464 | case -ECONNRESET: |
465 | _debug("call reset"); | |
2de5599f | 466 | afs_op_set_error(op, error); |
1a025028 | 467 | goto failed; |
d2ddc776 DH |
468 | } |
469 | ||
470 | restart_from_beginning: | |
495f2ae9 | 471 | trace_afs_rotate(op, afs_rotate_trace_restart, 0); |
d2ddc776 | 472 | _debug("restart"); |
495f2ae9 | 473 | op->estate = NULL; |
20325960 | 474 | op->server = NULL; |
495f2ae9 DH |
475 | afs_clear_server_states(op); |
476 | op->server_states = NULL; | |
e49c7b2f | 477 | afs_put_serverlist(op->net, op->server_list); |
a310082f | 478 | op->server_list = NULL; |
d2ddc776 DH |
479 | start: |
480 | _debug("start"); | |
495f2ae9 | 481 | ASSERTCMP(op->estate, ==, NULL); |
d2ddc776 DH |
482 | /* See if we need to do an update of the volume record. Note that the |
483 | * volume may have moved or even have been deleted. | |
484 | */ | |
e49c7b2f | 485 | error = afs_check_volume_status(op->volume, op); |
495f2ae9 | 486 | trace_afs_rotate(op, afs_rotate_trace_check_vol_status, error); |
2de5599f DH |
487 | if (error < 0) { |
488 | afs_op_set_error(op, error); | |
489 | goto failed; | |
490 | } | |
d2ddc776 | 491 | |
a310082f | 492 | if (!afs_start_fs_iteration(op, vnode)) |
d2ddc776 | 493 | goto failed; |
d2ddc776 | 494 | |
e49c7b2f | 495 | _debug("__ VOL %llx __", op->volume->vid); |
3bf0fb6f DH |
496 | |
497 | pick_server: | |
905b8615 | 498 | _debug("pick [%lx]", op->untried_servers); |
495f2ae9 | 499 | ASSERTCMP(op->estate, ==, NULL); |
3bf0fb6f | 500 | |
495f2ae9 DH |
501 | error = afs_wait_for_fs_probes(op, op->server_states, |
502 | !(op->flags & AFS_OPERATION_UNINTR)); | |
503 | switch (error) { | |
504 | case 0: /* No untried responsive servers and no outstanding probes */ | |
505 | trace_afs_rotate(op, afs_rotate_trace_probe_none, 0); | |
506 | goto no_more_servers; | |
507 | case 1: /* Got a response */ | |
508 | trace_afs_rotate(op, afs_rotate_trace_probe_response, 0); | |
509 | break; | |
510 | case 2: /* Probe data superseded */ | |
511 | trace_afs_rotate(op, afs_rotate_trace_probe_superseded, 0); | |
512 | goto restart_from_beginning; | |
513 | default: | |
514 | trace_afs_rotate(op, afs_rotate_trace_probe_error, error); | |
2de5599f DH |
515 | afs_op_set_error(op, error); |
516 | goto failed; | |
517 | } | |
3bf0fb6f | 518 | |
495f2ae9 DH |
519 | /* Pick the untried server with the highest priority untried endpoint. |
520 | * If we have outstanding callbacks, we stick with the server we're | |
521 | * already using if we can. | |
3bf0fb6f | 522 | */ |
20325960 | 523 | if (op->server) { |
905b8615 DH |
524 | _debug("server %u", op->server_index); |
525 | if (test_bit(op->server_index, &op->untried_servers)) | |
3bf0fb6f | 526 | goto selected_server; |
20325960 DH |
527 | op->server = NULL; |
528 | _debug("no server"); | |
3bf0fb6f DH |
529 | } |
530 | ||
495f2ae9 | 531 | rcu_read_lock(); |
905b8615 | 532 | op->server_index = -1; |
495f2ae9 | 533 | best_prio = -1; |
a310082f | 534 | for (i = 0; i < op->server_list->nr_servers; i++) { |
495f2ae9 | 535 | struct afs_endpoint_state *es; |
d3acd81e | 536 | struct afs_server_entry *se = &op->server_list->servers[i]; |
495f2ae9 | 537 | struct afs_addr_list *sal; |
d3acd81e | 538 | struct afs_server *s = se->server; |
3bf0fb6f | 539 | |
905b8615 | 540 | if (!test_bit(i, &op->untried_servers) || |
d3acd81e | 541 | test_bit(AFS_SE_EXCLUDED, &se->flags) || |
f3c130e6 | 542 | !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags)) |
3bf0fb6f | 543 | continue; |
da0e01cc | 544 | es = op->server_states[i].endpoint_state; |
495f2ae9 DH |
545 | sal = es->addresses; |
546 | ||
547 | afs_get_address_preferences_rcu(op->net, sal); | |
548 | for (j = 0; j < sal->nr_addrs; j++) { | |
da0e01cc DH |
549 | if (es->failed_set & (1 << j)) |
550 | continue; | |
495f2ae9 DH |
551 | if (!sal->addrs[j].peer) |
552 | continue; | |
553 | if (sal->addrs[j].prio > best_prio) { | |
554 | op->server_index = i; | |
555 | best_prio = sal->addrs[j].prio; | |
556 | } | |
3bf0fb6f DH |
557 | } |
558 | } | |
495f2ae9 | 559 | rcu_read_unlock(); |
3bf0fb6f | 560 | |
905b8615 | 561 | if (op->server_index == -1) |
3bf0fb6f DH |
562 | goto no_more_servers; |
563 | ||
564 | selected_server: | |
495f2ae9 DH |
565 | trace_afs_rotate(op, afs_rotate_trace_selected_server, best_prio); |
566 | _debug("use %d prio %u", op->server_index, best_prio); | |
905b8615 | 567 | __clear_bit(op->server_index, &op->untried_servers); |
3bf0fb6f | 568 | |
d2ddc776 DH |
569 | /* We're starting on a different fileserver from the list. We need to |
570 | * check it, create a callback intercept, find its address list and | |
571 | * probe its capabilities before we use it. | |
572 | */ | |
495f2ae9 | 573 | ASSERTCMP(op->estate, ==, NULL); |
905b8615 | 574 | server = op->server_list->servers[op->server_index].server; |
d2ddc776 | 575 | |
f49b594d | 576 | if (!afs_check_server_record(op, server, op->key)) |
d2ddc776 DH |
577 | goto failed; |
578 | ||
579 | _debug("USING SERVER: %pU", &server->uuid); | |
580 | ||
8409f67b | 581 | op->flags |= AFS_OPERATION_RETRY_SERVER; |
20325960 DH |
582 | op->server = server; |
583 | if (vnode->cb_server != server) { | |
584 | vnode->cb_server = server; | |
453924de DH |
585 | vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break); |
586 | atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE); | |
20325960 | 587 | } |
d2ddc776 | 588 | |
8409f67b | 589 | retry_server: |
98f9fda2 DH |
590 | op->addr_tried = 0; |
591 | op->addr_index = -1; | |
d2ddc776 DH |
592 | |
593 | iterate_address: | |
d2ddc776 DH |
594 | /* Iterate over the current server's address list to try and find an |
595 | * address on which it will respond to us. | |
596 | */ | |
495f2ae9 DH |
597 | op->estate = op->server_states[op->server_index].endpoint_state; |
598 | set = READ_ONCE(op->estate->responsive_set); | |
599 | failed = READ_ONCE(op->estate->failed_set); | |
600 | _debug("iterate ES=%x rs=%lx fs=%lx", op->estate->probe_seq, set, failed); | |
f49b594d | 601 | set &= ~(failed | op->addr_tried); |
495f2ae9 | 602 | trace_afs_rotate(op, afs_rotate_trace_iterate_addr, set); |
98f9fda2 | 603 | if (!set) |
495f2ae9 DH |
604 | goto wait_for_more_probe_results; |
605 | ||
606 | alist = op->estate->addresses; | |
83505bde DH |
607 | best_prio = -1; |
608 | addr_index = 0; | |
495f2ae9 | 609 | for (i = 0; i < alist->nr_addrs; i++) { |
da0e01cc DH |
610 | if (!(set & (1 << i))) |
611 | continue; | |
495f2ae9 DH |
612 | if (alist->addrs[i].prio > best_prio) { |
613 | addr_index = i; | |
614 | best_prio = alist->addrs[i].prio; | |
615 | } | |
616 | } | |
d2ddc776 | 617 | |
83505bde | 618 | alist->preferred = addr_index; |
98f9fda2 DH |
619 | |
620 | op->addr_index = addr_index; | |
621 | set_bit(addr_index, &op->addr_tried); | |
3bf0fb6f | 622 | |
16069e13 DH |
623 | op->volsync.creation = TIME64_MIN; |
624 | op->volsync.update = TIME64_MIN; | |
aa453bec | 625 | op->call_responded = false; |
98f9fda2 DH |
626 | _debug("address [%u] %u/%u %pISp", |
627 | op->server_index, addr_index, alist->nr_addrs, | |
628 | rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer)); | |
fe4d774c DH |
629 | _leave(" = t"); |
630 | return true; | |
d2ddc776 | 631 | |
495f2ae9 DH |
632 | wait_for_more_probe_results: |
633 | error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, | |
634 | !(op->flags & AFS_OPERATION_UNINTR)); | |
f94d5420 | 635 | if (error == 1) |
495f2ae9 | 636 | goto iterate_address; |
f94d5420 MD |
637 | if (!error) |
638 | goto restart_from_beginning; | |
495f2ae9 | 639 | |
8409f67b DH |
640 | /* We've now had a failure to respond on all of a server's addresses - |
641 | * immediately probe them again and consider retrying the server. | |
642 | */ | |
495f2ae9 | 643 | trace_afs_rotate(op, afs_rotate_trace_probe_fileserver, 0); |
8409f67b DH |
644 | afs_probe_fileserver(op->net, op->server); |
645 | if (op->flags & AFS_OPERATION_RETRY_SERVER) { | |
495f2ae9 | 646 | error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried, |
f49b594d | 647 | !(op->flags & AFS_OPERATION_UNINTR)); |
8409f67b | 648 | switch (error) { |
f94d5420 | 649 | case 1: |
8409f67b | 650 | op->flags &= ~AFS_OPERATION_RETRY_SERVER; |
f94d5420 | 651 | trace_afs_rotate(op, afs_rotate_trace_retry_server, 1); |
8409f67b | 652 | goto retry_server; |
f94d5420 MD |
653 | case 0: |
654 | trace_afs_rotate(op, afs_rotate_trace_retry_server, 0); | |
655 | goto restart_from_beginning; | |
8409f67b | 656 | case -ERESTARTSYS: |
2de5599f DH |
657 | afs_op_set_error(op, error); |
658 | goto failed; | |
8409f67b DH |
659 | case -ETIME: |
660 | case -EDESTADDRREQ: | |
661 | goto next_server; | |
662 | } | |
663 | } | |
664 | ||
16280a15 | 665 | next_server: |
495f2ae9 | 666 | trace_afs_rotate(op, afs_rotate_trace_next_server, 0); |
16280a15 | 667 | _debug("next"); |
495f2ae9 | 668 | op->estate = NULL; |
3bf0fb6f | 669 | goto pick_server; |
16280a15 | 670 | |
3bf0fb6f | 671 | no_more_servers: |
16280a15 DH |
672 | /* That's all the servers poked to no good effect. Try again if some |
673 | * of them were busy. | |
674 | */ | |
495f2ae9 DH |
675 | trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0); |
676 | if (op->flags & AFS_OPERATION_VBUSY) { | |
677 | afs_sleep_and_retry(op); | |
678 | op->flags &= ~AFS_OPERATION_VBUSY; | |
16280a15 | 679 | goto restart_from_beginning; |
495f2ae9 | 680 | } |
16280a15 | 681 | |
f49b594d | 682 | rcu_read_lock(); |
a310082f | 683 | for (i = 0; i < op->server_list->nr_servers; i++) { |
f49b594d | 684 | struct afs_endpoint_state *estate; |
3bf0fb6f | 685 | |
da0e01cc | 686 | estate = op->server_states[i].endpoint_state; |
f49b594d | 687 | error = READ_ONCE(estate->error); |
aa453bec | 688 | if (error < 0) |
f49b594d | 689 | afs_op_accumulate_error(op, error, estate->abort_code); |
3bf0fb6f | 690 | } |
f49b594d | 691 | rcu_read_unlock(); |
3bf0fb6f | 692 | |
d2ddc776 | 693 | failed: |
495f2ae9 | 694 | trace_afs_rotate(op, afs_rotate_trace_failed, 0); |
a310082f | 695 | op->flags |= AFS_OPERATION_STOP; |
83505bde | 696 | op->estate = NULL; |
2de5599f | 697 | _leave(" = f [failed %d]", afs_op_error(op)); |
d2ddc776 DH |
698 | return false; |
699 | } | |
700 | ||
744bcd71 DH |
701 | /* |
702 | * Dump cursor state in the case of the error being EDESTADDRREQ. | |
703 | */ | |
e49c7b2f | 704 | void afs_dump_edestaddrreq(const struct afs_operation *op) |
744bcd71 DH |
705 | { |
706 | static int count; | |
707 | int i; | |
708 | ||
709 | if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) | |
710 | return; | |
711 | count++; | |
712 | ||
713 | rcu_read_lock(); | |
714 | ||
715 | pr_notice("EDESTADDR occurred\n"); | |
aa453bec | 716 | pr_notice("OP: cbb=%x cbb2=%x fl=%x err=%hd\n", |
e49c7b2f | 717 | op->file[0].cb_break_before, |
aa453bec DH |
718 | op->file[1].cb_break_before, op->flags, op->cumul_error.error); |
719 | pr_notice("OP: ut=%lx ix=%d ni=%u\n", | |
905b8615 | 720 | op->untried_servers, op->server_index, op->nr_iterations); |
aa453bec DH |
721 | pr_notice("OP: call er=%d ac=%d r=%u\n", |
722 | op->call_error, op->call_abort_code, op->call_responded); | |
744bcd71 | 723 | |
a310082f DH |
724 | if (op->server_list) { |
725 | const struct afs_server_list *sl = op->server_list; | |
f49b594d | 726 | |
495f2ae9 DH |
727 | pr_notice("FC: SL nr=%u vnov=%hx\n", |
728 | sl->nr_servers, sl->vnovol_mask); | |
744bcd71 DH |
729 | for (i = 0; i < sl->nr_servers; i++) { |
730 | const struct afs_server *s = sl->servers[i].server; | |
f49b594d DH |
731 | const struct afs_endpoint_state *e = |
732 | rcu_dereference(s->endpoint_state); | |
733 | const struct afs_addr_list *a = e->addresses; | |
734 | ||
744bcd71 DH |
735 | pr_notice("FC: server fl=%lx av=%u %pU\n", |
736 | s->flags, s->addr_version, &s->uuid); | |
f49b594d DH |
737 | pr_notice("FC: - pq=%x R=%lx F=%lx\n", |
738 | e->probe_seq, e->responsive_set, e->failed_set); | |
739 | if (a) { | |
3bf0fb6f | 740 | pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n", |
744bcd71 DH |
741 | a->version, |
742 | a->nr_ipv4, a->nr_addrs, a->max_addrs, | |
3bf0fb6f | 743 | a->preferred); |
f49b594d | 744 | if (a == e->addresses) |
744bcd71 DH |
745 | pr_notice("FC: - current\n"); |
746 | } | |
747 | } | |
748 | } | |
749 | ||
f49b594d | 750 | pr_notice("AC: t=%lx ax=%d\n", op->addr_tried, op->addr_index); |
744bcd71 DH |
751 | rcu_read_unlock(); |
752 | } |