]> Git Repo - linux.git/blob - fs/nfs/blocklayout/dev.c
Linux 6.14-rc3
[linux.git] / fs / nfs / blocklayout / dev.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2014-2016 Christoph Hellwig.
4  */
5 #include <linux/sunrpc/svc.h>
6 #include <linux/blkdev.h>
7 #include <linux/nfs4.h>
8 #include <linux/nfs_fs.h>
9 #include <linux/nfs_xdr.h>
10 #include <linux/pr.h>
11
12 #include "blocklayout.h"
13 #include "../nfs4trace.h"
14
15 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
16
17 static void bl_unregister_scsi(struct pnfs_block_dev *dev)
18 {
19         struct block_device *bdev = file_bdev(dev->bdev_file);
20         const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
21         int status;
22
23         status = ops->pr_register(bdev, dev->pr_key, 0, false);
24         if (status)
25                 trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status);
26         else
27                 trace_bl_pr_key_unreg(bdev, dev->pr_key);
28 }
29
30 static bool bl_register_scsi(struct pnfs_block_dev *dev)
31 {
32         struct block_device *bdev = file_bdev(dev->bdev_file);
33         const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
34         int status;
35
36         if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags))
37                 return true;
38
39         status = ops->pr_register(bdev, 0, dev->pr_key, true);
40         if (status) {
41                 trace_bl_pr_key_reg_err(bdev, dev->pr_key, status);
42                 return false;
43         }
44         trace_bl_pr_key_reg(bdev, dev->pr_key);
45         return true;
46 }
47
48 static void bl_unregister_dev(struct pnfs_block_dev *dev)
49 {
50         u32 i;
51
52         if (dev->nr_children) {
53                 for (i = 0; i < dev->nr_children; i++)
54                         bl_unregister_dev(&dev->children[i]);
55                 return;
56         }
57
58         if (dev->type == PNFS_BLOCK_VOLUME_SCSI &&
59                 test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
60                 bl_unregister_scsi(dev);
61 }
62
63 bool bl_register_dev(struct pnfs_block_dev *dev)
64 {
65         u32 i;
66
67         if (dev->nr_children) {
68                 for (i = 0; i < dev->nr_children; i++) {
69                         if (!bl_register_dev(&dev->children[i])) {
70                                 while (i > 0)
71                                         bl_unregister_dev(&dev->children[--i]);
72                                 return false;
73                         }
74                 }
75                 return true;
76         }
77
78         if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
79                 return bl_register_scsi(dev);
80         return true;
81 }
82
83 static void
84 bl_free_device(struct pnfs_block_dev *dev)
85 {
86         bl_unregister_dev(dev);
87
88         if (dev->nr_children) {
89                 int i;
90
91                 for (i = 0; i < dev->nr_children; i++)
92                         bl_free_device(&dev->children[i]);
93                 kfree(dev->children);
94         } else {
95                 if (dev->bdev_file)
96                         fput(dev->bdev_file);
97         }
98 }
99
100 void
101 bl_free_deviceid_node(struct nfs4_deviceid_node *d)
102 {
103         struct pnfs_block_dev *dev =
104                 container_of(d, struct pnfs_block_dev, node);
105
106         bl_free_device(dev);
107         kfree_rcu(dev, node.rcu);
108 }
109
110 static int
111 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
112 {
113         __be32 *p;
114         int i;
115
116         p = xdr_inline_decode(xdr, 4);
117         if (!p)
118                 return -EIO;
119         b->type = be32_to_cpup(p++);
120
121         switch (b->type) {
122         case PNFS_BLOCK_VOLUME_SIMPLE:
123                 p = xdr_inline_decode(xdr, 4);
124                 if (!p)
125                         return -EIO;
126                 b->simple.nr_sigs = be32_to_cpup(p++);
127                 if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) {
128                         dprintk("Bad signature count: %d\n", b->simple.nr_sigs);
129                         return -EIO;
130                 }
131
132                 b->simple.len = 4 + 4;
133                 for (i = 0; i < b->simple.nr_sigs; i++) {
134                         p = xdr_inline_decode(xdr, 8 + 4);
135                         if (!p)
136                                 return -EIO;
137                         p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
138                         b->simple.sigs[i].sig_len = be32_to_cpup(p++);
139                         if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
140                                 pr_info("signature too long: %d\n",
141                                         b->simple.sigs[i].sig_len);
142                                 return -EIO;
143                         }
144
145                         p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
146                         if (!p)
147                                 return -EIO;
148                         memcpy(&b->simple.sigs[i].sig, p,
149                                 b->simple.sigs[i].sig_len);
150
151                         b->simple.len += 8 + 4 + \
152                                 (XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);
153                 }
154                 break;
155         case PNFS_BLOCK_VOLUME_SLICE:
156                 p = xdr_inline_decode(xdr, 8 + 8 + 4);
157                 if (!p)
158                         return -EIO;
159                 p = xdr_decode_hyper(p, &b->slice.start);
160                 p = xdr_decode_hyper(p, &b->slice.len);
161                 b->slice.volume = be32_to_cpup(p++);
162                 break;
163         case PNFS_BLOCK_VOLUME_CONCAT:
164                 p = xdr_inline_decode(xdr, 4);
165                 if (!p)
166                         return -EIO;
167
168                 b->concat.volumes_count = be32_to_cpup(p++);
169                 if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
170                         dprintk("Too many volumes: %d\n", b->concat.volumes_count);
171                         return -EIO;
172                 }
173
174                 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
175                 if (!p)
176                         return -EIO;
177                 for (i = 0; i < b->concat.volumes_count; i++)
178                         b->concat.volumes[i] = be32_to_cpup(p++);
179                 break;
180         case PNFS_BLOCK_VOLUME_STRIPE:
181                 p = xdr_inline_decode(xdr, 8 + 4);
182                 if (!p)
183                         return -EIO;
184
185                 p = xdr_decode_hyper(p, &b->stripe.chunk_size);
186                 b->stripe.volumes_count = be32_to_cpup(p++);
187                 if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) {
188                         dprintk("Too many volumes: %d\n", b->stripe.volumes_count);
189                         return -EIO;
190                 }
191
192                 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
193                 if (!p)
194                         return -EIO;
195                 for (i = 0; i < b->stripe.volumes_count; i++)
196                         b->stripe.volumes[i] = be32_to_cpup(p++);
197                 break;
198         case PNFS_BLOCK_VOLUME_SCSI:
199                 p = xdr_inline_decode(xdr, 4 + 4 + 4);
200                 if (!p)
201                         return -EIO;
202                 b->scsi.code_set = be32_to_cpup(p++);
203                 b->scsi.designator_type = be32_to_cpup(p++);
204                 b->scsi.designator_len = be32_to_cpup(p++);
205                 p = xdr_inline_decode(xdr, b->scsi.designator_len);
206                 if (!p)
207                         return -EIO;
208                 if (b->scsi.designator_len > 256)
209                         return -EIO;
210                 memcpy(&b->scsi.designator, p, b->scsi.designator_len);
211                 p = xdr_inline_decode(xdr, 8);
212                 if (!p)
213                         return -EIO;
214                 p = xdr_decode_hyper(p, &b->scsi.pr_key);
215                 break;
216         default:
217                 dprintk("unknown volume type!\n");
218                 return -EIO;
219         }
220
221         return 0;
222 }
223
224 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
225                 struct pnfs_block_dev_map *map)
226 {
227         map->start = dev->start;
228         map->len = dev->len;
229         map->disk_offset = dev->disk_offset;
230         map->bdev = file_bdev(dev->bdev_file);
231         return true;
232 }
233
234 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
235                 struct pnfs_block_dev_map *map)
236 {
237         int i;
238
239         for (i = 0; i < dev->nr_children; i++) {
240                 struct pnfs_block_dev *child = &dev->children[i];
241
242                 if (child->start > offset ||
243                     child->start + child->len <= offset)
244                         continue;
245
246                 child->map(child, offset - child->start, map);
247                 return true;
248         }
249
250         dprintk("%s: ran off loop!\n", __func__);
251         return false;
252 }
253
254 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
255                 struct pnfs_block_dev_map *map)
256 {
257         struct pnfs_block_dev *child;
258         u64 chunk;
259         u32 chunk_idx;
260         u64 disk_offset;
261
262         chunk = div_u64(offset, dev->chunk_size);
263         div_u64_rem(chunk, dev->nr_children, &chunk_idx);
264
265         if (chunk_idx >= dev->nr_children) {
266                 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
267                         __func__, chunk_idx, offset, dev->chunk_size);
268                 /* error, should not happen */
269                 return false;
270         }
271
272         /* truncate offset to the beginning of the stripe */
273         offset = chunk * dev->chunk_size;
274
275         /* disk offset of the stripe */
276         disk_offset = div_u64(offset, dev->nr_children);
277
278         child = &dev->children[chunk_idx];
279         child->map(child, disk_offset, map);
280
281         map->start += offset;
282         map->disk_offset += disk_offset;
283         map->len = dev->chunk_size;
284         return true;
285 }
286
287 static int
288 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
289                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
290
291
292 static int
293 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
294                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
295 {
296         struct pnfs_block_volume *v = &volumes[idx];
297         struct file *bdev_file;
298         dev_t dev;
299
300         dev = bl_resolve_deviceid(server, v, gfp_mask);
301         if (!dev)
302                 return -EIO;
303
304         bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
305                                        NULL, NULL);
306         if (IS_ERR(bdev_file)) {
307                 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
308                         MAJOR(dev), MINOR(dev), PTR_ERR(bdev_file));
309                 return PTR_ERR(bdev_file);
310         }
311         d->bdev_file = bdev_file;
312         d->len = bdev_nr_bytes(file_bdev(bdev_file));
313         d->map = bl_map_simple;
314
315         printk(KERN_INFO "pNFS: using block device %s\n",
316                 file_bdev(bdev_file)->bd_disk->disk_name);
317         return 0;
318 }
319
320 static bool
321 bl_validate_designator(struct pnfs_block_volume *v)
322 {
323         switch (v->scsi.designator_type) {
324         case PS_DESIGNATOR_EUI64:
325                 if (v->scsi.code_set != PS_CODE_SET_BINARY)
326                         return false;
327
328                 if (v->scsi.designator_len != 8 &&
329                     v->scsi.designator_len != 10 &&
330                     v->scsi.designator_len != 16)
331                         return false;
332
333                 return true;
334         case PS_DESIGNATOR_NAA:
335                 if (v->scsi.code_set != PS_CODE_SET_BINARY)
336                         return false;
337
338                 if (v->scsi.designator_len != 8 &&
339                     v->scsi.designator_len != 16)
340                         return false;
341
342                 return true;
343         case PS_DESIGNATOR_T10:
344         case PS_DESIGNATOR_NAME:
345                 pr_err("pNFS: unsupported designator "
346                         "(code set %d, type %d, len %d.\n",
347                         v->scsi.code_set,
348                         v->scsi.designator_type,
349                         v->scsi.designator_len);
350                 return false;
351         default:
352                 pr_err("pNFS: invalid designator "
353                         "(code set %d, type %d, len %d.\n",
354                         v->scsi.code_set,
355                         v->scsi.designator_type,
356                         v->scsi.designator_len);
357                 return false;
358         }
359 }
360
361 static struct file *
362 bl_open_path(struct pnfs_block_volume *v, const char *prefix)
363 {
364         struct file *bdev_file;
365         const char *devname;
366
367         devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN",
368                         prefix, v->scsi.designator_len, v->scsi.designator);
369         if (!devname)
370                 return ERR_PTR(-ENOMEM);
371
372         bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
373                                         NULL, NULL);
374         if (IS_ERR(bdev_file)) {
375                 dprintk("failed to open device %s (%ld)\n",
376                         devname, PTR_ERR(bdev_file));
377         }
378
379         kfree(devname);
380         return bdev_file;
381 }
382
383 static int
384 bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
385                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
386 {
387         struct pnfs_block_volume *v = &volumes[idx];
388         struct block_device *bdev;
389         const struct pr_ops *ops;
390         struct file *bdev_file;
391         int error;
392
393         if (!bl_validate_designator(v))
394                 return -EINVAL;
395
396         /*
397          * Try to open the RH/Fedora specific dm-mpath udev path first, as the
398          * wwn- links will only point to the first discovered SCSI device there.
399          * On other distributions like Debian, the default SCSI by-id path will
400          * point to the dm-multipath device if one exists.
401          */
402         bdev_file = bl_open_path(v, "dm-uuid-mpath-0x");
403         if (IS_ERR(bdev_file))
404                 bdev_file = bl_open_path(v, "wwn-0x");
405         if (IS_ERR(bdev_file))
406                 bdev_file = bl_open_path(v, "nvme-eui.");
407         if (IS_ERR(bdev_file)) {
408                 pr_warn("pNFS: no device found for volume %*phN\n",
409                         v->scsi.designator_len, v->scsi.designator);
410                 return PTR_ERR(bdev_file);
411         }
412         d->bdev_file = bdev_file;
413         bdev = file_bdev(bdev_file);
414
415         d->len = bdev_nr_bytes(bdev);
416         d->map = bl_map_simple;
417         d->pr_key = v->scsi.pr_key;
418
419         if (d->len == 0)
420                 return -ENODEV;
421
422         ops = bdev->bd_disk->fops->pr_ops;
423         if (!ops) {
424                 pr_err("pNFS: block device %s does not support reservations.",
425                                 bdev->bd_disk->disk_name);
426                 error = -EINVAL;
427                 goto out_blkdev_put;
428         }
429
430         return 0;
431
432 out_blkdev_put:
433         fput(d->bdev_file);
434         return error;
435 }
436
437 static int
438 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
439                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
440 {
441         struct pnfs_block_volume *v = &volumes[idx];
442         int ret;
443
444         ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
445         if (ret)
446                 return ret;
447
448         d->disk_offset = v->slice.start;
449         d->len = v->slice.len;
450         return 0;
451 }
452
453 static int
454 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
455                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
456 {
457         struct pnfs_block_volume *v = &volumes[idx];
458         u64 len = 0;
459         int ret, i;
460
461         d->children = kcalloc(v->concat.volumes_count,
462                         sizeof(struct pnfs_block_dev), gfp_mask);
463         if (!d->children)
464                 return -ENOMEM;
465
466         for (i = 0; i < v->concat.volumes_count; i++) {
467                 ret = bl_parse_deviceid(server, &d->children[i],
468                                 volumes, v->concat.volumes[i], gfp_mask);
469                 if (ret)
470                         return ret;
471
472                 d->nr_children++;
473                 d->children[i].start += len;
474                 len += d->children[i].len;
475         }
476
477         d->len = len;
478         d->map = bl_map_concat;
479         return 0;
480 }
481
482 static int
483 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
484                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
485 {
486         struct pnfs_block_volume *v = &volumes[idx];
487         u64 len = 0;
488         int ret, i;
489
490         d->children = kcalloc(v->stripe.volumes_count,
491                         sizeof(struct pnfs_block_dev), gfp_mask);
492         if (!d->children)
493                 return -ENOMEM;
494
495         for (i = 0; i < v->stripe.volumes_count; i++) {
496                 ret = bl_parse_deviceid(server, &d->children[i],
497                                 volumes, v->stripe.volumes[i], gfp_mask);
498                 if (ret)
499                         return ret;
500
501                 d->nr_children++;
502                 len += d->children[i].len;
503         }
504
505         d->len = len;
506         d->chunk_size = v->stripe.chunk_size;
507         d->map = bl_map_stripe;
508         return 0;
509 }
510
511 static int
512 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
513                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
514 {
515         d->type = volumes[idx].type;
516
517         switch (d->type) {
518         case PNFS_BLOCK_VOLUME_SIMPLE:
519                 return bl_parse_simple(server, d, volumes, idx, gfp_mask);
520         case PNFS_BLOCK_VOLUME_SLICE:
521                 return bl_parse_slice(server, d, volumes, idx, gfp_mask);
522         case PNFS_BLOCK_VOLUME_CONCAT:
523                 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
524         case PNFS_BLOCK_VOLUME_STRIPE:
525                 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
526         case PNFS_BLOCK_VOLUME_SCSI:
527                 return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
528         default:
529                 dprintk("unsupported volume type: %d\n", d->type);
530                 return -EIO;
531         }
532 }
533
534 struct nfs4_deviceid_node *
535 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
536                 gfp_t gfp_mask)
537 {
538         struct nfs4_deviceid_node *node = NULL;
539         struct pnfs_block_volume *volumes;
540         struct pnfs_block_dev *top;
541         struct xdr_stream xdr;
542         struct xdr_buf buf;
543         struct page *scratch;
544         int nr_volumes, ret, i;
545         __be32 *p;
546
547         scratch = alloc_page(gfp_mask);
548         if (!scratch)
549                 goto out;
550
551         xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
552         xdr_set_scratch_page(&xdr, scratch);
553
554         p = xdr_inline_decode(&xdr, sizeof(__be32));
555         if (!p)
556                 goto out_free_scratch;
557         nr_volumes = be32_to_cpup(p++);
558
559         volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
560                           gfp_mask);
561         if (!volumes)
562                 goto out_free_scratch;
563
564         for (i = 0; i < nr_volumes; i++) {
565                 ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
566                 if (ret < 0)
567                         goto out_free_volumes;
568         }
569
570         top = kzalloc(sizeof(*top), gfp_mask);
571         if (!top)
572                 goto out_free_volumes;
573
574         ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
575
576         node = &top->node;
577         nfs4_init_deviceid_node(node, server, &pdev->dev_id);
578         if (ret)
579                 nfs4_mark_deviceid_unavailable(node);
580
581 out_free_volumes:
582         kfree(volumes);
583 out_free_scratch:
584         __free_page(scratch);
585 out:
586         return node;
587 }
This page took 0.065628 seconds and 4 git commands to generate.