]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
d67ae825 TH |
2 | /* |
3 | * Device operations for the pnfs nfs4 file layout driver. | |
4 | * | |
5 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | |
6 | * | |
7 | * Tao Peng <[email protected]> | |
8 | */ | |
9 | ||
10 | #include <linux/nfs_fs.h> | |
11 | #include <linux/vmalloc.h> | |
12 | #include <linux/module.h> | |
13 | #include <linux/sunrpc/addr.h> | |
14 | ||
15 | #include "../internal.h" | |
16 | #include "../nfs4session.h" | |
17 | #include "flexfilelayout.h" | |
18 | ||
19 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | |
20 | ||
15d03055 TM |
21 | static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; |
22 | static unsigned int dataserver_retrans; | |
d67ae825 | 23 | |
65990d1a FI |
24 | static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); |
25 | ||
d67ae825 TH |
26 | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) |
27 | { | |
65990d1a | 28 | if (!IS_ERR_OR_NULL(mirror_ds)) |
d67ae825 TH |
29 | nfs4_put_deviceid_node(&mirror_ds->id_node); |
30 | } | |
31 | ||
32 | void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | |
33 | { | |
34 | nfs4_print_deviceid(&mirror_ds->id_node.deviceid); | |
35 | nfs4_pnfs_ds_put(mirror_ds->ds); | |
1feb2616 | 36 | kfree(mirror_ds->ds_versions); |
84a80f62 | 37 | kfree_rcu(mirror_ds, id_node.rcu); |
d67ae825 TH |
38 | } |
39 | ||
40 | /* Decode opaque device data and construct new_ds using it */ | |
41 | struct nfs4_ff_layout_ds * | |
42 | nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |
43 | gfp_t gfp_flags) | |
44 | { | |
45 | struct xdr_stream stream; | |
46 | struct xdr_buf buf; | |
47 | struct page *scratch; | |
48 | struct list_head dsaddrs; | |
49 | struct nfs4_pnfs_ds_addr *da; | |
50 | struct nfs4_ff_layout_ds *new_ds = NULL; | |
51 | struct nfs4_ff_ds_version *ds_versions = NULL; | |
52 | u32 mp_count; | |
53 | u32 version_count; | |
54 | __be32 *p; | |
55 | int i, ret = -ENOMEM; | |
56 | ||
57 | /* set up xdr stream */ | |
58 | scratch = alloc_page(gfp_flags); | |
59 | if (!scratch) | |
60 | goto out_err; | |
61 | ||
62 | new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); | |
63 | if (!new_ds) | |
64 | goto out_scratch; | |
65 | ||
66 | nfs4_init_deviceid_node(&new_ds->id_node, | |
67 | server, | |
68 | &pdev->dev_id); | |
69 | INIT_LIST_HEAD(&dsaddrs); | |
70 | ||
71 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); | |
72 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | |
73 | ||
74 | /* multipath count */ | |
75 | p = xdr_inline_decode(&stream, 4); | |
76 | if (unlikely(!p)) | |
77 | goto out_err_drain_dsaddrs; | |
78 | mp_count = be32_to_cpup(p); | |
79 | dprintk("%s: multipath ds count %d\n", __func__, mp_count); | |
80 | ||
81 | for (i = 0; i < mp_count; i++) { | |
82 | /* multipath ds */ | |
83 | da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, | |
84 | &stream, gfp_flags); | |
85 | if (da) | |
86 | list_add_tail(&da->da_node, &dsaddrs); | |
87 | } | |
88 | if (list_empty(&dsaddrs)) { | |
89 | dprintk("%s: no suitable DS addresses found\n", | |
90 | __func__); | |
91 | ret = -ENOMEDIUM; | |
92 | goto out_err_drain_dsaddrs; | |
93 | } | |
94 | ||
95 | /* version count */ | |
96 | p = xdr_inline_decode(&stream, 4); | |
97 | if (unlikely(!p)) | |
98 | goto out_err_drain_dsaddrs; | |
99 | version_count = be32_to_cpup(p); | |
100 | dprintk("%s: version count %d\n", __func__, version_count); | |
101 | ||
6396bb22 KC |
102 | ds_versions = kcalloc(version_count, |
103 | sizeof(struct nfs4_ff_ds_version), | |
d67ae825 TH |
104 | gfp_flags); |
105 | if (!ds_versions) | |
106 | goto out_scratch; | |
107 | ||
108 | for (i = 0; i < version_count; i++) { | |
109 | /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) + | |
110 | * tightly_coupled(4) */ | |
111 | p = xdr_inline_decode(&stream, 20); | |
112 | if (unlikely(!p)) | |
113 | goto out_err_drain_dsaddrs; | |
114 | ds_versions[i].version = be32_to_cpup(p++); | |
115 | ds_versions[i].minor_version = be32_to_cpup(p++); | |
116 | ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); | |
117 | ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); | |
118 | ds_versions[i].tightly_coupled = be32_to_cpup(p); | |
119 | ||
120 | if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) | |
121 | ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; | |
122 | if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) | |
123 | ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; | |
124 | ||
a7878ca1 TM |
125 | /* |
126 | * check for valid major/minor combination. | |
127 | * currently we support dataserver which talk: | |
128 | * v3, v4.0, v4.1, v4.2 | |
129 | */ | |
130 | if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) || | |
131 | (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) { | |
d67ae825 TH |
132 | dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, |
133 | i, ds_versions[i].version, | |
134 | ds_versions[i].minor_version); | |
135 | ret = -EPROTONOSUPPORT; | |
136 | goto out_err_drain_dsaddrs; | |
137 | } | |
138 | ||
139 | dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", | |
140 | __func__, i, ds_versions[i].version, | |
141 | ds_versions[i].minor_version, | |
142 | ds_versions[i].rsize, | |
143 | ds_versions[i].wsize, | |
144 | ds_versions[i].tightly_coupled); | |
145 | } | |
146 | ||
147 | new_ds->ds_versions = ds_versions; | |
148 | new_ds->ds_versions_cnt = version_count; | |
149 | ||
150 | new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); | |
151 | if (!new_ds->ds) | |
152 | goto out_err_drain_dsaddrs; | |
153 | ||
154 | /* If DS was already in cache, free ds addrs */ | |
155 | while (!list_empty(&dsaddrs)) { | |
156 | da = list_first_entry(&dsaddrs, | |
157 | struct nfs4_pnfs_ds_addr, | |
158 | da_node); | |
159 | list_del_init(&da->da_node); | |
160 | kfree(da->da_remotestr); | |
161 | kfree(da); | |
162 | } | |
163 | ||
164 | __free_page(scratch); | |
165 | return new_ds; | |
166 | ||
167 | out_err_drain_dsaddrs: | |
168 | while (!list_empty(&dsaddrs)) { | |
169 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | |
170 | da_node); | |
171 | list_del_init(&da->da_node); | |
172 | kfree(da->da_remotestr); | |
173 | kfree(da); | |
174 | } | |
175 | ||
176 | kfree(ds_versions); | |
177 | out_scratch: | |
178 | __free_page(scratch); | |
179 | out_err: | |
180 | kfree(new_ds); | |
181 | ||
182 | dprintk("%s ERROR: returning %d\n", __func__, ret); | |
183 | return NULL; | |
184 | } | |
185 | ||
889d94d4 TM |
186 | static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, |
187 | struct nfs4_deviceid_node *devid) | |
188 | { | |
1c48cee8 | 189 | nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); |
889d94d4 TM |
190 | if (!ff_layout_has_available_ds(lseg)) |
191 | pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, | |
192 | lseg); | |
193 | } | |
194 | ||
195 | static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, | |
65990d1a FI |
196 | struct nfs4_ff_layout_mirror *mirror, |
197 | bool create) | |
889d94d4 | 198 | { |
65990d1a FI |
199 | if (mirror == NULL || IS_ERR(mirror->mirror_ds)) |
200 | goto outerr; | |
201 | if (mirror->mirror_ds == NULL) { | |
202 | if (create) { | |
203 | struct nfs4_deviceid_node *node; | |
204 | struct pnfs_layout_hdr *lh = lseg->pls_layout; | |
205 | struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); | |
206 | ||
207 | node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), | |
208 | &mirror->devid, lh->plh_lc_cred, | |
209 | GFP_KERNEL); | |
210 | if (node) | |
211 | mirror_ds = FF_LAYOUT_MIRROR_DS(node); | |
212 | ||
213 | /* check for race with another call to this function */ | |
214 | if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && | |
215 | mirror_ds != ERR_PTR(-ENODEV)) | |
216 | nfs4_put_deviceid_node(node); | |
217 | } else | |
218 | goto outerr; | |
889d94d4 | 219 | } |
f17f8a14 TM |
220 | |
221 | if (IS_ERR(mirror->mirror_ds)) | |
222 | goto outerr; | |
223 | ||
889d94d4 TM |
224 | if (mirror->mirror_ds->ds == NULL) { |
225 | struct nfs4_deviceid_node *devid; | |
226 | devid = &mirror->mirror_ds->id_node; | |
227 | ff_layout_mark_devid_invalid(lseg, devid); | |
228 | return false; | |
229 | } | |
230 | return true; | |
65990d1a FI |
231 | outerr: |
232 | pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); | |
233 | return false; | |
889d94d4 TM |
234 | } |
235 | ||
d67ae825 TH |
236 | static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, |
237 | u64 offset, u64 length) | |
238 | { | |
239 | u64 end; | |
240 | ||
17822b20 TM |
241 | end = max_t(u64, pnfs_end_offset(err->offset, err->length), |
242 | pnfs_end_offset(offset, length)); | |
d67ae825 TH |
243 | err->offset = min_t(u64, err->offset, offset); |
244 | err->length = end - err->offset; | |
245 | } | |
246 | ||
b819ed4b TM |
247 | static int |
248 | ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, | |
249 | const struct nfs4_ff_layout_ds_err *e2) | |
d67ae825 | 250 | { |
b819ed4b TM |
251 | int ret; |
252 | ||
253 | if (e1->opnum != e2->opnum) | |
254 | return e1->opnum < e2->opnum ? -1 : 1; | |
255 | if (e1->status != e2->status) | |
256 | return e1->status < e2->status ? -1 : 1; | |
93b717fd TM |
257 | ret = memcmp(e1->stateid.data, e2->stateid.data, |
258 | sizeof(e1->stateid.data)); | |
b819ed4b TM |
259 | if (ret != 0) |
260 | return ret; | |
261 | ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); | |
262 | if (ret != 0) | |
263 | return ret; | |
17822b20 | 264 | if (pnfs_end_offset(e1->offset, e1->length) < e2->offset) |
b819ed4b | 265 | return -1; |
17822b20 | 266 | if (e1->offset > pnfs_end_offset(e2->offset, e2->length)) |
b819ed4b TM |
267 | return 1; |
268 | /* If ranges overlap or are contiguous, they are the same */ | |
269 | return 0; | |
d67ae825 TH |
270 | } |
271 | ||
b819ed4b | 272 | static void |
d67ae825 TH |
273 | ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, |
274 | struct nfs4_ff_layout_ds_err *dserr) | |
275 | { | |
b819ed4b TM |
276 | struct nfs4_ff_layout_ds_err *err, *tmp; |
277 | struct list_head *head = &flo->error_list; | |
278 | int match; | |
279 | ||
280 | /* Do insertion sort w/ merges */ | |
281 | list_for_each_entry_safe(err, tmp, &flo->error_list, list) { | |
282 | match = ff_ds_error_match(err, dserr); | |
283 | if (match < 0) | |
284 | continue; | |
285 | if (match > 0) { | |
286 | /* Add entry "dserr" _before_ entry "err" */ | |
287 | head = &err->list; | |
d67ae825 TH |
288 | break; |
289 | } | |
b819ed4b TM |
290 | /* Entries match, so merge "err" into "dserr" */ |
291 | extend_ds_error(dserr, err->offset, err->length); | |
cb067935 | 292 | list_replace(&err->list, &dserr->list); |
b819ed4b | 293 | kfree(err); |
cb067935 | 294 | return; |
d67ae825 TH |
295 | } |
296 | ||
b819ed4b | 297 | list_add_tail(&dserr->list, head); |
d67ae825 TH |
298 | } |
299 | ||
300 | int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | |
301 | struct nfs4_ff_layout_mirror *mirror, u64 offset, | |
302 | u64 length, int status, enum nfs_opnum4 opnum, | |
303 | gfp_t gfp_flags) | |
304 | { | |
305 | struct nfs4_ff_layout_ds_err *dserr; | |
d67ae825 TH |
306 | |
307 | if (status == 0) | |
308 | return 0; | |
309 | ||
310 | if (mirror->mirror_ds == NULL) | |
311 | return -EINVAL; | |
312 | ||
d67ae825 TH |
313 | dserr = kmalloc(sizeof(*dserr), gfp_flags); |
314 | if (!dserr) | |
315 | return -ENOMEM; | |
316 | ||
317 | INIT_LIST_HEAD(&dserr->list); | |
318 | dserr->offset = offset; | |
319 | dserr->length = length; | |
320 | dserr->status = status; | |
321 | dserr->opnum = opnum; | |
322 | nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); | |
323 | memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, | |
324 | NFS4_DEVICEID4_SIZE); | |
325 | ||
326 | spin_lock(&flo->generic_hdr.plh_inode->i_lock); | |
b819ed4b | 327 | ff_layout_add_ds_error_locked(flo, dserr); |
d67ae825 | 328 | spin_unlock(&flo->generic_hdr.plh_inode->i_lock); |
d67ae825 TH |
329 | |
330 | return 0; | |
331 | } | |
332 | ||
57f3f4c0 JL |
333 | static struct rpc_cred * |
334 | ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) | |
335 | { | |
3064b686 | 336 | struct rpc_cred *cred, __rcu **pcred; |
57f3f4c0 | 337 | |
3064b686 JL |
338 | if (iomode == IOMODE_READ) |
339 | pcred = &mirror->ro_cred; | |
340 | else | |
341 | pcred = &mirror->rw_cred; | |
57f3f4c0 JL |
342 | |
343 | rcu_read_lock(); | |
344 | do { | |
345 | cred = rcu_dereference(*pcred); | |
346 | if (!cred) | |
347 | break; | |
348 | ||
349 | cred = get_rpccred_rcu(cred); | |
350 | } while(!cred); | |
351 | rcu_read_unlock(); | |
352 | return cred; | |
353 | } | |
354 | ||
d67ae825 TH |
355 | struct nfs_fh * |
356 | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) | |
357 | { | |
358 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | |
359 | struct nfs_fh *fh = NULL; | |
d67ae825 | 360 | |
65990d1a | 361 | if (!ff_layout_mirror_valid(lseg, mirror, false)) { |
889d94d4 | 362 | pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", |
d67ae825 | 363 | __func__, mirror_idx); |
d67ae825 TH |
364 | goto out; |
365 | } | |
366 | ||
367 | /* FIXME: For now assume there is only 1 version available for the DS */ | |
368 | fh = &mirror->fh_versions[0]; | |
369 | out: | |
370 | return fh; | |
371 | } | |
372 | ||
bb21ce0a TM |
373 | int |
374 | nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg, | |
375 | u32 mirror_idx, | |
376 | nfs4_stateid *stateid) | |
377 | { | |
378 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | |
379 | ||
380 | if (!ff_layout_mirror_valid(lseg, mirror, false)) { | |
381 | pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", | |
382 | __func__, mirror_idx); | |
383 | goto out; | |
384 | } | |
385 | ||
386 | nfs4_stateid_copy(stateid, &mirror->stateid); | |
387 | return 1; | |
388 | out: | |
389 | return 0; | |
390 | } | |
391 | ||
95e2b7e9 JL |
392 | /** |
393 | * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call | |
394 | * @lseg: the layout segment we're operating on | |
395 | * @ds_idx: index of the DS to use | |
396 | * @fail_return: return layout on connect failure? | |
397 | * | |
398 | * Try to prepare a DS connection to accept an RPC call. This involves | |
399 | * selecting a mirror to use and connecting the client to it if it's not | |
400 | * already connected. | |
401 | * | |
402 | * Since we only need a single functioning mirror to satisfy a read, we don't | |
403 | * want to return the layout if there is one. For writes though, any down | |
404 | * mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish | |
405 | * between the two cases. | |
406 | * | |
407 | * Returns a pointer to a connected DS object on success or NULL on failure. | |
408 | */ | |
d67ae825 TH |
409 | struct nfs4_pnfs_ds * |
410 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |
411 | bool fail_return) | |
412 | { | |
413 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | |
414 | struct nfs4_pnfs_ds *ds = NULL; | |
415 | struct nfs4_deviceid_node *devid; | |
416 | struct inode *ino = lseg->pls_layout->plh_inode; | |
417 | struct nfs_server *s = NFS_SERVER(ino); | |
418 | unsigned int max_payload; | |
a33e4b03 | 419 | int status; |
d67ae825 | 420 | |
65990d1a | 421 | if (!ff_layout_mirror_valid(lseg, mirror, true)) { |
889d94d4 | 422 | pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", |
d67ae825 | 423 | __func__, ds_idx); |
d67ae825 TH |
424 | goto out; |
425 | } | |
426 | ||
427 | devid = &mirror->mirror_ds->id_node; | |
428 | if (ff_layout_test_devid_unavailable(devid)) | |
3dc14735 | 429 | goto out_fail; |
d67ae825 TH |
430 | |
431 | ds = mirror->mirror_ds->ds; | |
432 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ | |
433 | smp_rmb(); | |
434 | if (ds->ds_clp) | |
90a0be00 | 435 | goto out; |
d67ae825 TH |
436 | |
437 | /* FIXME: For now we assume the server sent only one version of NFS | |
438 | * to use for the DS. | |
439 | */ | |
a33e4b03 | 440 | status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, |
d67ae825 TH |
441 | dataserver_retrans, |
442 | mirror->mirror_ds->ds_versions[0].version, | |
7d38de3f | 443 | mirror->mirror_ds->ds_versions[0].minor_version); |
d67ae825 TH |
444 | |
445 | /* connect success, check rsize/wsize limit */ | |
260f32ad | 446 | if (!status) { |
d67ae825 TH |
447 | max_payload = |
448 | nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), | |
449 | NULL); | |
450 | if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) | |
451 | mirror->mirror_ds->ds_versions[0].rsize = max_payload; | |
452 | if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) | |
453 | mirror->mirror_ds->ds_versions[0].wsize = max_payload; | |
3dc14735 | 454 | goto out; |
d67ae825 | 455 | } |
da066f3f | 456 | out_fail: |
3dc14735 TM |
457 | ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), |
458 | mirror, lseg->pls_range.offset, | |
459 | lseg->pls_range.length, NFS4ERR_NXIO, | |
460 | OP_ILLEGAL, GFP_NOIO); | |
3dc14735 TM |
461 | if (fail_return || !ff_layout_has_available_ds(lseg)) |
462 | pnfs_error_mark_layout_for_return(ino, lseg); | |
463 | ds = NULL; | |
d67ae825 TH |
464 | out: |
465 | return ds; | |
466 | } | |
467 | ||
468 | struct rpc_cred * | |
469 | ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, | |
470 | struct rpc_cred *mdscred) | |
471 | { | |
472 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | |
547a6376 | 473 | struct rpc_cred *cred; |
d67ae825 | 474 | |
10ec57e4 | 475 | if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) { |
57f3f4c0 JL |
476 | cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode); |
477 | if (!cred) | |
478 | cred = get_rpccred(mdscred); | |
479 | } else { | |
480 | cred = get_rpccred(mdscred); | |
481 | } | |
d67ae825 TH |
482 | return cred; |
483 | } | |
484 | ||
485 | /** | |
486 | * Find or create a DS rpc client with th MDS server rpc client auth flavor | |
487 | * in the nfs_client cl_ds_clients list. | |
488 | */ | |
489 | struct rpc_clnt * | |
490 | nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, | |
491 | struct nfs_client *ds_clp, struct inode *inode) | |
492 | { | |
493 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | |
494 | ||
495 | switch (mirror->mirror_ds->ds_versions[0].version) { | |
496 | case 3: | |
497 | /* For NFSv3 DS, flavor is set when creating DS connections */ | |
498 | return ds_clp->cl_rpcclient; | |
499 | case 4: | |
500 | return nfs4_find_or_create_ds_client(ds_clp, inode); | |
501 | default: | |
502 | BUG(); | |
503 | } | |
504 | } | |
505 | ||
5b9b3c85 TM |
506 | void ff_layout_free_ds_ioerr(struct list_head *head) |
507 | { | |
508 | struct nfs4_ff_layout_ds_err *err; | |
509 | ||
510 | while (!list_empty(head)) { | |
511 | err = list_first_entry(head, | |
512 | struct nfs4_ff_layout_ds_err, | |
513 | list); | |
514 | list_del(&err->list); | |
515 | kfree(err); | |
516 | } | |
517 | } | |
518 | ||
d67ae825 | 519 | /* called with inode i_lock held */ |
5b9b3c85 | 520 | int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head) |
d67ae825 | 521 | { |
5b9b3c85 | 522 | struct nfs4_ff_layout_ds_err *err; |
d67ae825 TH |
523 | __be32 *p; |
524 | ||
5b9b3c85 | 525 | list_for_each_entry(err, head, list) { |
d67ae825 | 526 | /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) |
d1354907 TM |
527 | * + array length + deviceid(NFS4_DEVICEID4_SIZE) |
528 | * + status(4) + opnum(4) | |
d67ae825 TH |
529 | */ |
530 | p = xdr_reserve_space(xdr, | |
d1354907 | 531 | 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); |
d67ae825 TH |
532 | if (unlikely(!p)) |
533 | return -ENOBUFS; | |
534 | p = xdr_encode_hyper(p, err->offset); | |
535 | p = xdr_encode_hyper(p, err->length); | |
536 | p = xdr_encode_opaque_fixed(p, &err->stateid, | |
537 | NFS4_STATEID_SIZE); | |
d1354907 TM |
538 | /* Encode 1 error */ |
539 | *p++ = cpu_to_be32(1); | |
d67ae825 TH |
540 | p = xdr_encode_opaque_fixed(p, &err->deviceid, |
541 | NFS4_DEVICEID4_SIZE); | |
542 | *p++ = cpu_to_be32(err->status); | |
543 | *p++ = cpu_to_be32(err->opnum); | |
5b9b3c85 | 544 | dprintk("%s: offset %llu length %llu status %d op %d\n", |
d67ae825 | 545 | __func__, err->offset, err->length, err->status, |
5b9b3c85 | 546 | err->opnum); |
d67ae825 TH |
547 | } |
548 | ||
549 | return 0; | |
550 | } | |
551 | ||
5b9b3c85 TM |
552 | static |
553 | unsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, | |
554 | const struct pnfs_layout_range *range, | |
555 | struct list_head *head, | |
556 | unsigned int maxnum) | |
557 | { | |
558 | struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); | |
559 | struct inode *inode = lo->plh_inode; | |
560 | struct nfs4_ff_layout_ds_err *err, *n; | |
561 | unsigned int ret = 0; | |
562 | ||
563 | spin_lock(&inode->i_lock); | |
564 | list_for_each_entry_safe(err, n, &flo->error_list, list) { | |
565 | if (!pnfs_is_range_intersecting(err->offset, | |
566 | pnfs_end_offset(err->offset, err->length), | |
567 | range->offset, | |
568 | pnfs_end_offset(range->offset, range->length))) | |
569 | continue; | |
570 | if (!maxnum) | |
571 | break; | |
572 | list_move(&err->list, head); | |
573 | maxnum--; | |
574 | ret++; | |
575 | } | |
576 | spin_unlock(&inode->i_lock); | |
577 | return ret; | |
578 | } | |
579 | ||
580 | unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, | |
581 | const struct pnfs_layout_range *range, | |
582 | struct list_head *head, | |
583 | unsigned int maxnum) | |
584 | { | |
585 | unsigned int ret; | |
586 | ||
587 | ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum); | |
588 | /* If we're over the max, discard all remaining entries */ | |
589 | if (ret == maxnum) { | |
590 | LIST_HEAD(discard); | |
591 | do_layout_fetch_ds_ioerr(lo, range, &discard, -1); | |
592 | ff_layout_free_ds_ioerr(&discard); | |
593 | } | |
594 | return ret; | |
595 | } | |
596 | ||
81d6dc8b | 597 | static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) |
d67ae825 TH |
598 | { |
599 | struct nfs4_ff_layout_mirror *mirror; | |
600 | struct nfs4_deviceid_node *devid; | |
81d6dc8b | 601 | u32 idx; |
d67ae825 TH |
602 | |
603 | for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | |
604 | mirror = FF_LAYOUT_COMP(lseg, idx); | |
65990d1a FI |
605 | if (mirror) { |
606 | if (!mirror->mirror_ds) | |
607 | return true; | |
608 | if (IS_ERR(mirror->mirror_ds)) | |
609 | continue; | |
d67ae825 TH |
610 | devid = &mirror->mirror_ds->id_node; |
611 | if (!ff_layout_test_devid_unavailable(devid)) | |
612 | return true; | |
613 | } | |
614 | } | |
615 | ||
616 | return false; | |
617 | } | |
618 | ||
81d6dc8b TM |
619 | static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) |
620 | { | |
621 | struct nfs4_ff_layout_mirror *mirror; | |
622 | struct nfs4_deviceid_node *devid; | |
623 | u32 idx; | |
624 | ||
625 | for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | |
626 | mirror = FF_LAYOUT_COMP(lseg, idx); | |
65990d1a | 627 | if (!mirror || IS_ERR(mirror->mirror_ds)) |
81d6dc8b | 628 | return false; |
65990d1a FI |
629 | if (!mirror->mirror_ds) |
630 | continue; | |
81d6dc8b TM |
631 | devid = &mirror->mirror_ds->id_node; |
632 | if (ff_layout_test_devid_unavailable(devid)) | |
633 | return false; | |
634 | } | |
635 | ||
636 | return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; | |
637 | } | |
638 | ||
65990d1a | 639 | static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) |
81d6dc8b TM |
640 | { |
641 | if (lseg->pls_range.iomode == IOMODE_READ) | |
642 | return ff_read_layout_has_available_ds(lseg); | |
643 | /* Note: RW layout needs all mirrors available */ | |
644 | return ff_rw_layout_has_available_ds(lseg); | |
645 | } | |
646 | ||
3b13b4b3 TH |
647 | bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg) |
648 | { | |
649 | return ff_layout_no_fallback_to_mds(lseg) || | |
650 | ff_layout_has_available_ds(lseg); | |
651 | } | |
652 | ||
fb1084e3 TH |
653 | bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg) |
654 | { | |
655 | return lseg->pls_range.iomode == IOMODE_RW && | |
656 | ff_layout_no_read_on_rw(lseg); | |
657 | } | |
658 | ||
d67ae825 TH |
659 | module_param(dataserver_retrans, uint, 0644); |
660 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " | |
661 | "retries a request before it attempts further " | |
662 | " recovery action."); | |
663 | module_param(dataserver_timeo, uint, 0644); | |
664 | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " | |
665 | "NFSv4.1 client waits for a response from a " | |
666 | " data server before it retries an NFS request."); |