]> Git Repo - J-linux.git/blob - fs/xfs/xfs_fsmap.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / fs / xfs / xfs_fsmap.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <[email protected]>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_trans.h"
15 #include "xfs_btree.h"
16 #include "xfs_rmap_btree.h"
17 #include "xfs_trace.h"
18 #include "xfs_rmap.h"
19 #include "xfs_alloc.h"
20 #include "xfs_bit.h"
21 #include <linux/fsmap.h>
22 #include "xfs_fsmap.h"
23 #include "xfs_refcount.h"
24 #include "xfs_refcount_btree.h"
25 #include "xfs_alloc_btree.h"
26 #include "xfs_rtbitmap.h"
27 #include "xfs_ag.h"
28 #include "xfs_rtgroup.h"
29
30 /* Convert an xfs_fsmap to an fsmap. */
31 static void
32 xfs_fsmap_from_internal(
33         struct fsmap            *dest,
34         struct xfs_fsmap        *src)
35 {
36         dest->fmr_device = src->fmr_device;
37         dest->fmr_flags = src->fmr_flags;
38         dest->fmr_physical = BBTOB(src->fmr_physical);
39         dest->fmr_owner = src->fmr_owner;
40         dest->fmr_offset = BBTOB(src->fmr_offset);
41         dest->fmr_length = BBTOB(src->fmr_length);
42         dest->fmr_reserved[0] = 0;
43         dest->fmr_reserved[1] = 0;
44         dest->fmr_reserved[2] = 0;
45 }
46
47 /* Convert an fsmap to an xfs_fsmap. */
48 static void
49 xfs_fsmap_to_internal(
50         struct xfs_fsmap        *dest,
51         struct fsmap            *src)
52 {
53         dest->fmr_device = src->fmr_device;
54         dest->fmr_flags = src->fmr_flags;
55         dest->fmr_physical = BTOBBT(src->fmr_physical);
56         dest->fmr_owner = src->fmr_owner;
57         dest->fmr_offset = BTOBBT(src->fmr_offset);
58         dest->fmr_length = BTOBBT(src->fmr_length);
59 }
60
61 /* Convert an fsmap owner into an rmapbt owner. */
62 static int
63 xfs_fsmap_owner_to_rmap(
64         struct xfs_rmap_irec    *dest,
65         const struct xfs_fsmap  *src)
66 {
67         if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
68                 dest->rm_owner = src->fmr_owner;
69                 return 0;
70         }
71
72         switch (src->fmr_owner) {
73         case 0:                 /* "lowest owner id possible" */
74         case -1ULL:             /* "highest owner id possible" */
75                 dest->rm_owner = src->fmr_owner;
76                 break;
77         case XFS_FMR_OWN_FREE:
78                 dest->rm_owner = XFS_RMAP_OWN_NULL;
79                 break;
80         case XFS_FMR_OWN_UNKNOWN:
81                 dest->rm_owner = XFS_RMAP_OWN_UNKNOWN;
82                 break;
83         case XFS_FMR_OWN_FS:
84                 dest->rm_owner = XFS_RMAP_OWN_FS;
85                 break;
86         case XFS_FMR_OWN_LOG:
87                 dest->rm_owner = XFS_RMAP_OWN_LOG;
88                 break;
89         case XFS_FMR_OWN_AG:
90                 dest->rm_owner = XFS_RMAP_OWN_AG;
91                 break;
92         case XFS_FMR_OWN_INOBT:
93                 dest->rm_owner = XFS_RMAP_OWN_INOBT;
94                 break;
95         case XFS_FMR_OWN_INODES:
96                 dest->rm_owner = XFS_RMAP_OWN_INODES;
97                 break;
98         case XFS_FMR_OWN_REFC:
99                 dest->rm_owner = XFS_RMAP_OWN_REFC;
100                 break;
101         case XFS_FMR_OWN_COW:
102                 dest->rm_owner = XFS_RMAP_OWN_COW;
103                 break;
104         case XFS_FMR_OWN_DEFECTIVE:     /* not implemented */
105                 /* fall through */
106         default:
107                 return -EINVAL;
108         }
109         return 0;
110 }
111
112 /* Convert an rmapbt owner into an fsmap owner. */
113 static int
114 xfs_fsmap_owner_from_frec(
115         struct xfs_fsmap                *dest,
116         const struct xfs_fsmap_irec     *frec)
117 {
118         dest->fmr_flags = 0;
119         if (!XFS_RMAP_NON_INODE_OWNER(frec->owner)) {
120                 dest->fmr_owner = frec->owner;
121                 return 0;
122         }
123         dest->fmr_flags |= FMR_OF_SPECIAL_OWNER;
124
125         switch (frec->owner) {
126         case XFS_RMAP_OWN_FS:
127                 dest->fmr_owner = XFS_FMR_OWN_FS;
128                 break;
129         case XFS_RMAP_OWN_LOG:
130                 dest->fmr_owner = XFS_FMR_OWN_LOG;
131                 break;
132         case XFS_RMAP_OWN_AG:
133                 dest->fmr_owner = XFS_FMR_OWN_AG;
134                 break;
135         case XFS_RMAP_OWN_INOBT:
136                 dest->fmr_owner = XFS_FMR_OWN_INOBT;
137                 break;
138         case XFS_RMAP_OWN_INODES:
139                 dest->fmr_owner = XFS_FMR_OWN_INODES;
140                 break;
141         case XFS_RMAP_OWN_REFC:
142                 dest->fmr_owner = XFS_FMR_OWN_REFC;
143                 break;
144         case XFS_RMAP_OWN_COW:
145                 dest->fmr_owner = XFS_FMR_OWN_COW;
146                 break;
147         case XFS_RMAP_OWN_NULL: /* "free" */
148                 dest->fmr_owner = XFS_FMR_OWN_FREE;
149                 break;
150         default:
151                 ASSERT(0);
152                 return -EFSCORRUPTED;
153         }
154         return 0;
155 }
156
157 /* getfsmap query state */
158 struct xfs_getfsmap_info {
159         struct xfs_fsmap_head   *head;
160         struct fsmap            *fsmap_recs;    /* mapping records */
161         struct xfs_buf          *agf_bp;        /* AGF, for refcount queries */
162         struct xfs_group        *group;         /* group info, if applicable */
163         xfs_daddr_t             next_daddr;     /* next daddr we expect */
164         /* daddr of low fsmap key when we're using the rtbitmap */
165         xfs_daddr_t             low_daddr;
166         /* daddr of high fsmap key, or the last daddr on the device */
167         xfs_daddr_t             end_daddr;
168         u64                     missing_owner;  /* owner of holes */
169         u32                     dev;            /* device id */
170         /*
171          * Low rmap key for the query.  If low.rm_blockcount is nonzero, this
172          * is the second (or later) call to retrieve the recordset in pieces.
173          * xfs_getfsmap_rec_before_start will compare all records retrieved
174          * by the rmapbt query to filter out any records that start before
175          * the last record.
176          */
177         struct xfs_rmap_irec    low;
178         struct xfs_rmap_irec    high;           /* high rmap key */
179         bool                    last;           /* last extent? */
180 };
181
182 /* Associate a device with a getfsmap handler. */
183 struct xfs_getfsmap_dev {
184         u32                     dev;
185         int                     (*fn)(struct xfs_trans *tp,
186                                       const struct xfs_fsmap *keys,
187                                       struct xfs_getfsmap_info *info);
188         sector_t                nr_sectors;
189 };
190
191 /* Compare two getfsmap device handlers. */
192 static int
193 xfs_getfsmap_dev_compare(
194         const void                      *p1,
195         const void                      *p2)
196 {
197         const struct xfs_getfsmap_dev   *d1 = p1;
198         const struct xfs_getfsmap_dev   *d2 = p2;
199
200         return d1->dev - d2->dev;
201 }
202
203 /* Decide if this mapping is shared. */
204 STATIC int
205 xfs_getfsmap_is_shared(
206         struct xfs_trans                *tp,
207         struct xfs_getfsmap_info        *info,
208         const struct xfs_fsmap_irec     *frec,
209         bool                            *stat)
210 {
211         struct xfs_mount                *mp = tp->t_mountp;
212         struct xfs_btree_cur            *cur;
213         xfs_agblock_t                   fbno;
214         xfs_extlen_t                    flen;
215         int                             error;
216
217         *stat = false;
218         if (!xfs_has_reflink(mp))
219                 return 0;
220         /* rt files will have no perag structure */
221         if (!info->group)
222                 return 0;
223
224         /* Are there any shared blocks here? */
225         flen = 0;
226         cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp,
227                         to_perag(info->group));
228
229         error = xfs_refcount_find_shared(cur, frec->rec_key,
230                         XFS_BB_TO_FSBT(mp, frec->len_daddr), &fbno, &flen,
231                         false);
232
233         xfs_btree_del_cursor(cur, error);
234         if (error)
235                 return error;
236
237         *stat = flen > 0;
238         return 0;
239 }
240
241 static inline void
242 xfs_getfsmap_format(
243         struct xfs_mount                *mp,
244         struct xfs_fsmap                *xfm,
245         struct xfs_getfsmap_info        *info)
246 {
247         struct fsmap                    *rec;
248
249         trace_xfs_getfsmap_mapping(mp, xfm);
250
251         rec = &info->fsmap_recs[info->head->fmh_entries++];
252         xfs_fsmap_from_internal(rec, xfm);
253 }
254
255 static inline bool
256 xfs_getfsmap_frec_before_start(
257         struct xfs_getfsmap_info        *info,
258         const struct xfs_fsmap_irec     *frec)
259 {
260         if (info->low_daddr != XFS_BUF_DADDR_NULL)
261                 return frec->start_daddr < info->low_daddr;
262         if (info->low.rm_blockcount) {
263                 struct xfs_rmap_irec    rec = {
264                         .rm_startblock  = frec->rec_key,
265                         .rm_owner       = frec->owner,
266                         .rm_flags       = frec->rm_flags,
267                 };
268
269                 return xfs_rmap_compare(&rec, &info->low) < 0;
270         }
271
272         return false;
273 }
274
275 /*
276  * Format a reverse mapping for getfsmap, having translated rm_startblock
277  * into the appropriate daddr units.  Pass in a nonzero @len_daddr if the
278  * length could be larger than rm_blockcount in struct xfs_rmap_irec.
279  */
280 STATIC int
281 xfs_getfsmap_helper(
282         struct xfs_trans                *tp,
283         struct xfs_getfsmap_info        *info,
284         const struct xfs_fsmap_irec     *frec)
285 {
286         struct xfs_fsmap                fmr;
287         struct xfs_mount                *mp = tp->t_mountp;
288         bool                            shared;
289         int                             error = 0;
290
291         if (fatal_signal_pending(current))
292                 return -EINTR;
293
294         /*
295          * Filter out records that start before our startpoint, if the
296          * caller requested that.
297          */
298         if (xfs_getfsmap_frec_before_start(info, frec))
299                 goto out;
300
301         /* Are we just counting mappings? */
302         if (info->head->fmh_count == 0) {
303                 if (info->head->fmh_entries == UINT_MAX)
304                         return -ECANCELED;
305
306                 if (frec->start_daddr > info->next_daddr)
307                         info->head->fmh_entries++;
308
309                 if (info->last)
310                         return 0;
311
312                 info->head->fmh_entries++;
313                 goto out;
314         }
315
316         /*
317          * If the record starts past the last physical block we saw,
318          * then we've found a gap.  Report the gap as being owned by
319          * whatever the caller specified is the missing owner.
320          */
321         if (frec->start_daddr > info->next_daddr) {
322                 if (info->head->fmh_entries >= info->head->fmh_count)
323                         return -ECANCELED;
324
325                 fmr.fmr_device = info->dev;
326                 fmr.fmr_physical = info->next_daddr;
327                 fmr.fmr_owner = info->missing_owner;
328                 fmr.fmr_offset = 0;
329                 fmr.fmr_length = frec->start_daddr - info->next_daddr;
330                 fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
331                 xfs_getfsmap_format(mp, &fmr, info);
332         }
333
334         if (info->last)
335                 goto out;
336
337         /* Fill out the extent we found */
338         if (info->head->fmh_entries >= info->head->fmh_count)
339                 return -ECANCELED;
340
341         trace_xfs_fsmap_mapping(mp, info->dev,
342                         info->group ? info->group->xg_gno : NULLAGNUMBER,
343                         frec);
344
345         fmr.fmr_device = info->dev;
346         fmr.fmr_physical = frec->start_daddr;
347         error = xfs_fsmap_owner_from_frec(&fmr, frec);
348         if (error)
349                 return error;
350         fmr.fmr_offset = XFS_FSB_TO_BB(mp, frec->offset);
351         fmr.fmr_length = frec->len_daddr;
352         if (frec->rm_flags & XFS_RMAP_UNWRITTEN)
353                 fmr.fmr_flags |= FMR_OF_PREALLOC;
354         if (frec->rm_flags & XFS_RMAP_ATTR_FORK)
355                 fmr.fmr_flags |= FMR_OF_ATTR_FORK;
356         if (frec->rm_flags & XFS_RMAP_BMBT_BLOCK)
357                 fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
358         if (fmr.fmr_flags == 0) {
359                 error = xfs_getfsmap_is_shared(tp, info, frec, &shared);
360                 if (error)
361                         return error;
362                 if (shared)
363                         fmr.fmr_flags |= FMR_OF_SHARED;
364         }
365
366         xfs_getfsmap_format(mp, &fmr, info);
367 out:
368         info->next_daddr = max(info->next_daddr,
369                                frec->start_daddr + frec->len_daddr);
370         return 0;
371 }
372
373 static inline int
374 xfs_getfsmap_group_helper(
375         struct xfs_getfsmap_info        *info,
376         struct xfs_trans                *tp,
377         struct xfs_group                *xg,
378         xfs_agblock_t                   startblock,
379         xfs_extlen_t                    blockcount,
380         struct xfs_fsmap_irec           *frec)
381 {
382         /*
383          * For an info->last query, we're looking for a gap between the last
384          * mapping emitted and the high key specified by userspace.  If the
385          * user's query spans less than 1 fsblock, then info->high and
386          * info->low will have the same rm_startblock, which causes rec_daddr
387          * and next_daddr to be the same.  Therefore, use the end_daddr that
388          * we calculated from userspace's high key to synthesize the record.
389          * Note that if the btree query found a mapping, there won't be a gap.
390          */
391         if (info->last)
392                 frec->start_daddr = info->end_daddr + 1;
393         else
394                 frec->start_daddr = xfs_gbno_to_daddr(xg, startblock);
395
396         frec->len_daddr = XFS_FSB_TO_BB(xg->xg_mount, blockcount);
397         return xfs_getfsmap_helper(tp, info, frec);
398 }
399
400 /* Transform a rmapbt irec into a fsmap */
401 STATIC int
402 xfs_getfsmap_rmapbt_helper(
403         struct xfs_btree_cur            *cur,
404         const struct xfs_rmap_irec      *rec,
405         void                            *priv)
406 {
407         struct xfs_fsmap_irec           frec = {
408                 .owner                  = rec->rm_owner,
409                 .offset                 = rec->rm_offset,
410                 .rm_flags               = rec->rm_flags,
411                 .rec_key                = rec->rm_startblock,
412         };
413         struct xfs_getfsmap_info        *info = priv;
414
415         return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group,
416                         rec->rm_startblock, rec->rm_blockcount, &frec);
417 }
418
419 /* Transform a bnobt irec into a fsmap */
420 STATIC int
421 xfs_getfsmap_datadev_bnobt_helper(
422         struct xfs_btree_cur            *cur,
423         const struct xfs_alloc_rec_incore *rec,
424         void                            *priv)
425 {
426         struct xfs_fsmap_irec           frec = {
427                 .owner                  = XFS_RMAP_OWN_NULL, /* "free" */
428                 .rec_key                = rec->ar_startblock,
429         };
430         struct xfs_getfsmap_info        *info = priv;
431
432         return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group,
433                         rec->ar_startblock, rec->ar_blockcount, &frec);
434 }
435
436 /* Set rmap flags based on the getfsmap flags */
437 static void
438 xfs_getfsmap_set_irec_flags(
439         struct xfs_rmap_irec    *irec,
440         const struct xfs_fsmap  *fmr)
441 {
442         irec->rm_flags = 0;
443         if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
444                 irec->rm_flags |= XFS_RMAP_ATTR_FORK;
445         if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
446                 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
447         if (fmr->fmr_flags & FMR_OF_PREALLOC)
448                 irec->rm_flags |= XFS_RMAP_UNWRITTEN;
449 }
450
451 static inline bool
452 rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r)
453 {
454         if (!xfs_has_reflink(mp))
455                 return true;
456         if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner))
457                 return true;
458         if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
459                            XFS_RMAP_UNWRITTEN))
460                 return true;
461         return false;
462 }
463
464 /* Execute a getfsmap query against the regular data device. */
465 STATIC int
466 __xfs_getfsmap_datadev(
467         struct xfs_trans                *tp,
468         const struct xfs_fsmap          *keys,
469         struct xfs_getfsmap_info        *info,
470         int                             (*query_fn)(struct xfs_trans *,
471                                                     struct xfs_getfsmap_info *,
472                                                     struct xfs_btree_cur **,
473                                                     void *),
474         void                            *priv)
475 {
476         struct xfs_mount                *mp = tp->t_mountp;
477         struct xfs_perag                *pag = NULL;
478         struct xfs_btree_cur            *bt_cur = NULL;
479         xfs_fsblock_t                   start_fsb;
480         xfs_fsblock_t                   end_fsb;
481         xfs_agnumber_t                  start_ag, end_ag;
482         uint64_t                        eofs;
483         int                             error = 0;
484
485         eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
486         if (keys[0].fmr_physical >= eofs)
487                 return 0;
488         start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical);
489         end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
490
491         /*
492          * Convert the fsmap low/high keys to AG based keys.  Initialize
493          * low to the fsmap low key and max out the high key to the end
494          * of the AG.
495          */
496         info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
497         error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
498         if (error)
499                 return error;
500         info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
501         xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
502
503         /* Adjust the low key if we are continuing from where we left off. */
504         if (info->low.rm_blockcount == 0) {
505                 /* No previous record from which to continue */
506         } else if (rmap_not_shareable(mp, &info->low)) {
507                 /* Last record seen was an unshareable extent */
508                 info->low.rm_owner = 0;
509                 info->low.rm_offset = 0;
510
511                 start_fsb += info->low.rm_blockcount;
512                 if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
513                         return 0;
514         } else {
515                 /* Last record seen was a shareable file data extent */
516                 info->low.rm_offset += info->low.rm_blockcount;
517         }
518         info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
519
520         info->high.rm_startblock = -1U;
521         info->high.rm_owner = ULLONG_MAX;
522         info->high.rm_offset = ULLONG_MAX;
523         info->high.rm_blockcount = 0;
524         info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
525
526         start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
527         end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
528
529         while ((pag = xfs_perag_next_range(mp, pag, start_ag, end_ag))) {
530                 /*
531                  * Set the AG high key from the fsmap high key if this
532                  * is the last AG that we're querying.
533                  */
534                 info->group = pag_group(pag);
535                 if (pag_agno(pag) == end_ag) {
536                         info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
537                                         end_fsb);
538                         info->high.rm_offset = XFS_BB_TO_FSBT(mp,
539                                         keys[1].fmr_offset);
540                         error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
541                         if (error)
542                                 break;
543                         xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
544                 }
545
546                 if (bt_cur) {
547                         xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
548                         bt_cur = NULL;
549                         xfs_trans_brelse(tp, info->agf_bp);
550                         info->agf_bp = NULL;
551                 }
552
553                 error = xfs_alloc_read_agf(pag, tp, 0, &info->agf_bp);
554                 if (error)
555                         break;
556
557                 trace_xfs_fsmap_low_group_key(mp, info->dev, pag_agno(pag),
558                                 &info->low);
559                 trace_xfs_fsmap_high_group_key(mp, info->dev, pag_agno(pag),
560                                 &info->high);
561
562                 error = query_fn(tp, info, &bt_cur, priv);
563                 if (error)
564                         break;
565
566                 /*
567                  * Set the AG low key to the start of the AG prior to
568                  * moving on to the next AG.
569                  */
570                 if (pag_agno(pag) == start_ag)
571                         memset(&info->low, 0, sizeof(info->low));
572
573                 /*
574                  * If this is the last AG, report any gap at the end of it
575                  * before we drop the reference to the perag when the loop
576                  * terminates.
577                  */
578                 if (pag_agno(pag) == end_ag) {
579                         info->last = true;
580                         error = query_fn(tp, info, &bt_cur, priv);
581                         if (error)
582                                 break;
583                 }
584                 info->group = NULL;
585         }
586
587         if (bt_cur)
588                 xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
589                                                          XFS_BTREE_NOERROR);
590         if (info->agf_bp) {
591                 xfs_trans_brelse(tp, info->agf_bp);
592                 info->agf_bp = NULL;
593         }
594         if (info->group) {
595                 xfs_perag_rele(pag);
596                 info->group = NULL;
597         } else if (pag) {
598                 /* loop termination case */
599                 xfs_perag_rele(pag);
600         }
601
602         return error;
603 }
604
605 /* Actually query the rmap btree. */
606 STATIC int
607 xfs_getfsmap_datadev_rmapbt_query(
608         struct xfs_trans                *tp,
609         struct xfs_getfsmap_info        *info,
610         struct xfs_btree_cur            **curpp,
611         void                            *priv)
612 {
613         /* Report any gap at the end of the last AG. */
614         if (info->last)
615                 return xfs_getfsmap_rmapbt_helper(*curpp, &info->high, info);
616
617         /* Allocate cursor for this AG and query_range it. */
618         *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
619                         to_perag(info->group));
620         return xfs_rmap_query_range(*curpp, &info->low, &info->high,
621                         xfs_getfsmap_rmapbt_helper, info);
622 }
623
624 /* Execute a getfsmap query against the regular data device rmapbt. */
625 STATIC int
626 xfs_getfsmap_datadev_rmapbt(
627         struct xfs_trans                *tp,
628         const struct xfs_fsmap          *keys,
629         struct xfs_getfsmap_info        *info)
630 {
631         info->missing_owner = XFS_FMR_OWN_FREE;
632         return __xfs_getfsmap_datadev(tp, keys, info,
633                         xfs_getfsmap_datadev_rmapbt_query, NULL);
634 }
635
636 /* Actually query the bno btree. */
637 STATIC int
638 xfs_getfsmap_datadev_bnobt_query(
639         struct xfs_trans                *tp,
640         struct xfs_getfsmap_info        *info,
641         struct xfs_btree_cur            **curpp,
642         void                            *priv)
643 {
644         struct xfs_alloc_rec_incore     *key = priv;
645
646         /* Report any gap at the end of the last AG. */
647         if (info->last)
648                 return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info);
649
650         /* Allocate cursor for this AG and query_range it. */
651         *curpp = xfs_bnobt_init_cursor(tp->t_mountp, tp, info->agf_bp,
652                         to_perag(info->group));
653         key->ar_startblock = info->low.rm_startblock;
654         key[1].ar_startblock = info->high.rm_startblock;
655         return xfs_alloc_query_range(*curpp, key, &key[1],
656                         xfs_getfsmap_datadev_bnobt_helper, info);
657 }
658
659 /* Execute a getfsmap query against the regular data device's bnobt. */
660 STATIC int
661 xfs_getfsmap_datadev_bnobt(
662         struct xfs_trans                *tp,
663         const struct xfs_fsmap          *keys,
664         struct xfs_getfsmap_info        *info)
665 {
666         struct xfs_alloc_rec_incore     akeys[2];
667
668         memset(akeys, 0, sizeof(akeys));
669         info->missing_owner = XFS_FMR_OWN_UNKNOWN;
670         return __xfs_getfsmap_datadev(tp, keys, info,
671                         xfs_getfsmap_datadev_bnobt_query, &akeys[0]);
672 }
673
674 /* Execute a getfsmap query against the log device. */
675 STATIC int
676 xfs_getfsmap_logdev(
677         struct xfs_trans                *tp,
678         const struct xfs_fsmap          *keys,
679         struct xfs_getfsmap_info        *info)
680 {
681         struct xfs_fsmap_irec           frec = {
682                 .start_daddr            = 0,
683                 .rec_key                = 0,
684                 .owner                  = XFS_RMAP_OWN_LOG,
685         };
686         struct xfs_mount                *mp = tp->t_mountp;
687         xfs_fsblock_t                   start_fsb, end_fsb;
688         uint64_t                        eofs;
689
690         eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
691         if (keys[0].fmr_physical >= eofs)
692                 return 0;
693         start_fsb = XFS_BB_TO_FSBT(mp,
694                                 keys[0].fmr_physical + keys[0].fmr_length);
695         end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
696
697         /* Adjust the low key if we are continuing from where we left off. */
698         if (keys[0].fmr_length > 0)
699                 info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
700
701         trace_xfs_fsmap_low_linear_key(mp, info->dev, start_fsb);
702         trace_xfs_fsmap_high_linear_key(mp, info->dev, end_fsb);
703
704         if (start_fsb > 0)
705                 return 0;
706
707         /* Fabricate an rmap entry for the external log device. */
708         frec.len_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
709         return xfs_getfsmap_helper(tp, info, &frec);
710 }
711
712 #ifdef CONFIG_XFS_RT
713 /* Transform a rtbitmap "record" into a fsmap */
714 STATIC int
715 xfs_getfsmap_rtdev_rtbitmap_helper(
716         struct xfs_rtgroup              *rtg,
717         struct xfs_trans                *tp,
718         const struct xfs_rtalloc_rec    *rec,
719         void                            *priv)
720 {
721         struct xfs_fsmap_irec           frec = {
722                 .owner                  = XFS_RMAP_OWN_NULL, /* "free" */
723         };
724         struct xfs_mount                *mp = rtg_mount(rtg);
725         struct xfs_getfsmap_info        *info = priv;
726         xfs_rtblock_t                   start_rtb =
727                                 xfs_rtx_to_rtb(rtg, rec->ar_startext);
728         uint64_t                        rtbcount =
729                                 xfs_rtbxlen_to_blen(mp, rec->ar_extcount);
730
731         /*
732          * For an info->last query, we're looking for a gap between the last
733          * mapping emitted and the high key specified by userspace.  If the
734          * user's query spans less than 1 fsblock, then info->high and
735          * info->low will have the same rm_startblock, which causes rec_daddr
736          * and next_daddr to be the same.  Therefore, use the end_daddr that
737          * we calculated from userspace's high key to synthesize the record.
738          * Note that if the btree query found a mapping, there won't be a gap.
739          */
740         if (info->last)
741                 frec.start_daddr = info->end_daddr + 1;
742         else
743                 frec.start_daddr = xfs_rtb_to_daddr(mp, start_rtb);
744
745         frec.len_daddr = XFS_FSB_TO_BB(mp, rtbcount);
746         return xfs_getfsmap_helper(tp, info, &frec);
747 }
748
749 /* Execute a getfsmap query against the realtime device rtbitmap. */
750 STATIC int
751 xfs_getfsmap_rtdev_rtbitmap(
752         struct xfs_trans                *tp,
753         const struct xfs_fsmap          *keys,
754         struct xfs_getfsmap_info        *info)
755 {
756         struct xfs_mount                *mp = tp->t_mountp;
757         xfs_rtblock_t                   start_rtbno, end_rtbno;
758         xfs_rtxnum_t                    start_rtx, end_rtx;
759         xfs_rgnumber_t                  start_rgno, end_rgno;
760         struct xfs_rtgroup              *rtg = NULL;
761         uint64_t                        eofs;
762         int                             error;
763
764         eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
765         if (keys[0].fmr_physical >= eofs)
766                 return 0;
767
768         info->missing_owner = XFS_FMR_OWN_UNKNOWN;
769
770         /* Adjust the low key if we are continuing from where we left off. */
771         start_rtbno = xfs_daddr_to_rtb(mp,
772                         keys[0].fmr_physical + keys[0].fmr_length);
773         if (keys[0].fmr_length > 0) {
774                 info->low_daddr = xfs_rtb_to_daddr(mp, start_rtbno);
775                 if (info->low_daddr >= eofs)
776                         return 0;
777         }
778         start_rtx = xfs_rtb_to_rtx(mp, start_rtbno);
779         start_rgno = xfs_rtb_to_rgno(mp, start_rtbno);
780
781         end_rtbno = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical));
782         end_rgno = xfs_rtb_to_rgno(mp, end_rtbno);
783
784         trace_xfs_fsmap_low_linear_key(mp, info->dev, start_rtbno);
785         trace_xfs_fsmap_high_linear_key(mp, info->dev, end_rtbno);
786
787         end_rtx = -1ULL;
788
789         while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) {
790                 if (rtg_rgno(rtg) == end_rgno)
791                         end_rtx = xfs_rtb_to_rtx(mp,
792                                         end_rtbno + mp->m_sb.sb_rextsize - 1);
793
794                 info->group = rtg_group(rtg);
795                 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
796                 error = xfs_rtalloc_query_range(rtg, tp, start_rtx, end_rtx,
797                                 xfs_getfsmap_rtdev_rtbitmap_helper, info);
798                 if (error)
799                         break;
800
801                 /*
802                  * Report any gaps at the end of the rtbitmap by simulating a
803                  * zero-length free extent starting at the rtx after the end
804                  * of the query range.
805                  */
806                 if (rtg_rgno(rtg) == end_rgno) {
807                         struct xfs_rtalloc_rec  ahigh = {
808                                 .ar_startext    = min(end_rtx + 1,
809                                                       rtg->rtg_extents),
810                         };
811
812                         info->last = true;
813                         error = xfs_getfsmap_rtdev_rtbitmap_helper(rtg, tp,
814                                         &ahigh, info);
815                         if (error)
816                                 break;
817                 }
818
819                 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
820                 info->group = NULL;
821                 start_rtx = 0;
822         }
823
824         /* loop termination case */
825         if (rtg) {
826                 if (info->group) {
827                         xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
828                         info->group = NULL;
829                 }
830                 xfs_rtgroup_rele(rtg);
831         }
832
833         return error;
834 }
835 #endif /* CONFIG_XFS_RT */
836
837 /* Do we recognize the device? */
838 STATIC bool
839 xfs_getfsmap_is_valid_device(
840         struct xfs_mount        *mp,
841         struct xfs_fsmap        *fm)
842 {
843         if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
844             fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
845                 return true;
846         if (mp->m_logdev_targp &&
847             fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
848                 return true;
849         if (mp->m_rtdev_targp &&
850             fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
851                 return true;
852         return false;
853 }
854
855 /* Ensure that the low key is less than the high key. */
856 STATIC bool
857 xfs_getfsmap_check_keys(
858         struct xfs_fsmap                *low_key,
859         struct xfs_fsmap                *high_key)
860 {
861         if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
862                 if (low_key->fmr_offset)
863                         return false;
864         }
865         if (high_key->fmr_flags != -1U &&
866             (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER |
867                                     FMR_OF_EXTENT_MAP))) {
868                 if (high_key->fmr_offset && high_key->fmr_offset != -1ULL)
869                         return false;
870         }
871         if (high_key->fmr_length && high_key->fmr_length != -1ULL)
872                 return false;
873
874         if (low_key->fmr_device > high_key->fmr_device)
875                 return false;
876         if (low_key->fmr_device < high_key->fmr_device)
877                 return true;
878
879         if (low_key->fmr_physical > high_key->fmr_physical)
880                 return false;
881         if (low_key->fmr_physical < high_key->fmr_physical)
882                 return true;
883
884         if (low_key->fmr_owner > high_key->fmr_owner)
885                 return false;
886         if (low_key->fmr_owner < high_key->fmr_owner)
887                 return true;
888
889         if (low_key->fmr_offset > high_key->fmr_offset)
890                 return false;
891         if (low_key->fmr_offset < high_key->fmr_offset)
892                 return true;
893
894         return false;
895 }
896
897 /*
898  * There are only two devices if we didn't configure RT devices at build time.
899  */
900 #ifdef CONFIG_XFS_RT
901 #define XFS_GETFSMAP_DEVS       3
902 #else
903 #define XFS_GETFSMAP_DEVS       2
904 #endif /* CONFIG_XFS_RT */
905
906 /*
907  * Get filesystem's extents as described in head, and format for output. Fills
908  * in the supplied records array until there are no more reverse mappings to
909  * return or head.fmh_entries == head.fmh_count.  In the second case, this
910  * function returns -ECANCELED to indicate that more records would have been
911  * returned.
912  *
913  * Key to Confusion
914  * ----------------
915  * There are multiple levels of keys and counters at work here:
916  * xfs_fsmap_head.fmh_keys      -- low and high fsmap keys passed in;
917  *                                 these reflect fs-wide sector addrs.
918  * dkeys                        -- fmh_keys used to query each device;
919  *                                 these are fmh_keys but w/ the low key
920  *                                 bumped up by fmr_length.
921  * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
922  *                                 is how we detect gaps in the fsmap
923                                    records and report them.
924  * xfs_getfsmap_info.low/high   -- per-AG low/high keys computed from
925  *                                 dkeys; used to query the metadata.
926  */
927 STATIC int
928 xfs_getfsmap(
929         struct xfs_mount                *mp,
930         struct xfs_fsmap_head           *head,
931         struct fsmap                    *fsmap_recs)
932 {
933         struct xfs_trans                *tp = NULL;
934         struct xfs_fsmap                dkeys[2];       /* per-dev keys */
935         struct xfs_getfsmap_dev         handlers[XFS_GETFSMAP_DEVS];
936         struct xfs_getfsmap_info        info = {
937                 .fsmap_recs             = fsmap_recs,
938                 .head                   = head,
939         };
940         bool                            use_rmap;
941         int                             i;
942         int                             error = 0;
943
944         if (head->fmh_iflags & ~FMH_IF_VALID)
945                 return -EINVAL;
946         if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
947             !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
948                 return -EINVAL;
949         if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1]))
950                 return -EINVAL;
951
952         use_rmap = xfs_has_rmapbt(mp) &&
953                    has_capability_noaudit(current, CAP_SYS_ADMIN);
954         head->fmh_entries = 0;
955
956         /* Set up our device handlers. */
957         memset(handlers, 0, sizeof(handlers));
958         handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
959         handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
960         if (use_rmap)
961                 handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
962         else
963                 handlers[0].fn = xfs_getfsmap_datadev_bnobt;
964         if (mp->m_logdev_targp != mp->m_ddev_targp) {
965                 handlers[1].nr_sectors = XFS_FSB_TO_BB(mp,
966                                                        mp->m_sb.sb_logblocks);
967                 handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
968                 handlers[1].fn = xfs_getfsmap_logdev;
969         }
970 #ifdef CONFIG_XFS_RT
971         if (mp->m_rtdev_targp) {
972                 handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
973                 handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
974                 handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
975         }
976 #endif /* CONFIG_XFS_RT */
977
978         xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
979                         xfs_getfsmap_dev_compare);
980
981         /*
982          * To continue where we left off, we allow userspace to use the
983          * last mapping from a previous call as the low key of the next.
984          * This is identified by a non-zero length in the low key. We
985          * have to increment the low key in this scenario to ensure we
986          * don't return the same mapping again, and instead return the
987          * very next mapping.
988          *
989          * If the low key mapping refers to file data, the same physical
990          * blocks could be mapped to several other files/offsets.
991          * According to rmapbt record ordering, the minimal next
992          * possible record for the block range is the next starting
993          * offset in the same inode. Therefore, each fsmap backend bumps
994          * the file offset to continue the search appropriately.  For
995          * all other low key mapping types (attr blocks, metadata), each
996          * fsmap backend bumps the physical offset as there can be no
997          * other mapping for the same physical block range.
998          */
999         dkeys[0] = head->fmh_keys[0];
1000         memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
1001
1002         info.next_daddr = head->fmh_keys[0].fmr_physical +
1003                           head->fmh_keys[0].fmr_length;
1004
1005         /* For each device we support... */
1006         for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
1007                 /* Is this device within the range the user asked for? */
1008                 if (!handlers[i].fn)
1009                         continue;
1010                 if (head->fmh_keys[0].fmr_device > handlers[i].dev)
1011                         continue;
1012                 if (head->fmh_keys[1].fmr_device < handlers[i].dev)
1013                         break;
1014
1015                 /*
1016                  * If this device number matches the high key, we have to pass
1017                  * the high key to the handler to limit the query results, and
1018                  * set the end_daddr so that we can synthesize records at the
1019                  * end of the query range or device.
1020                  */
1021                 if (handlers[i].dev == head->fmh_keys[1].fmr_device) {
1022                         dkeys[1] = head->fmh_keys[1];
1023                         info.end_daddr = min(handlers[i].nr_sectors - 1,
1024                                              dkeys[1].fmr_physical);
1025                 } else {
1026                         info.end_daddr = handlers[i].nr_sectors - 1;
1027                 }
1028
1029                 /*
1030                  * If the device number exceeds the low key, zero out the low
1031                  * key so that we get everything from the beginning.
1032                  */
1033                 if (handlers[i].dev > head->fmh_keys[0].fmr_device)
1034                         memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
1035
1036                 /*
1037                  * Grab an empty transaction so that we can use its recursive
1038                  * buffer locking abilities to detect cycles in the rmapbt
1039                  * without deadlocking.
1040                  */
1041                 error = xfs_trans_alloc_empty(mp, &tp);
1042                 if (error)
1043                         break;
1044
1045                 info.dev = handlers[i].dev;
1046                 info.last = false;
1047                 info.group = NULL;
1048                 info.low_daddr = XFS_BUF_DADDR_NULL;
1049                 info.low.rm_blockcount = 0;
1050                 error = handlers[i].fn(tp, dkeys, &info);
1051                 if (error)
1052                         break;
1053                 xfs_trans_cancel(tp);
1054                 tp = NULL;
1055                 info.next_daddr = 0;
1056         }
1057
1058         if (tp)
1059                 xfs_trans_cancel(tp);
1060         head->fmh_oflags = FMH_OF_DEV_T;
1061         return error;
1062 }
1063
1064 int
1065 xfs_ioc_getfsmap(
1066         struct xfs_inode        *ip,
1067         struct fsmap_head       __user *arg)
1068 {
1069         struct xfs_fsmap_head   xhead = {0};
1070         struct fsmap_head       head;
1071         struct fsmap            *recs;
1072         unsigned int            count;
1073         __u32                   last_flags = 0;
1074         bool                    done = false;
1075         int                     error;
1076
1077         if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
1078                 return -EFAULT;
1079         if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) ||
1080             memchr_inv(head.fmh_keys[0].fmr_reserved, 0,
1081                        sizeof(head.fmh_keys[0].fmr_reserved)) ||
1082             memchr_inv(head.fmh_keys[1].fmr_reserved, 0,
1083                        sizeof(head.fmh_keys[1].fmr_reserved)))
1084                 return -EINVAL;
1085
1086         /*
1087          * Use an internal memory buffer so that we don't have to copy fsmap
1088          * data to userspace while holding locks.  Start by trying to allocate
1089          * up to 128k for the buffer, but fall back to a single page if needed.
1090          */
1091         count = min_t(unsigned int, head.fmh_count,
1092                         131072 / sizeof(struct fsmap));
1093         recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL);
1094         if (!recs) {
1095                 count = min_t(unsigned int, head.fmh_count,
1096                                 PAGE_SIZE / sizeof(struct fsmap));
1097                 recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL);
1098                 if (!recs)
1099                         return -ENOMEM;
1100         }
1101
1102         xhead.fmh_iflags = head.fmh_iflags;
1103         xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
1104         xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
1105
1106         trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
1107         trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
1108
1109         head.fmh_entries = 0;
1110         do {
1111                 struct fsmap __user     *user_recs;
1112                 struct fsmap            *last_rec;
1113
1114                 user_recs = &arg->fmh_recs[head.fmh_entries];
1115                 xhead.fmh_entries = 0;
1116                 xhead.fmh_count = min_t(unsigned int, count,
1117                                         head.fmh_count - head.fmh_entries);
1118
1119                 /* Run query, record how many entries we got. */
1120                 error = xfs_getfsmap(ip->i_mount, &xhead, recs);
1121                 switch (error) {
1122                 case 0:
1123                         /*
1124                          * There are no more records in the result set.  Copy
1125                          * whatever we got to userspace and break out.
1126                          */
1127                         done = true;
1128                         break;
1129                 case -ECANCELED:
1130                         /*
1131                          * The internal memory buffer is full.  Copy whatever
1132                          * records we got to userspace and go again if we have
1133                          * not yet filled the userspace buffer.
1134                          */
1135                         error = 0;
1136                         break;
1137                 default:
1138                         goto out_free;
1139                 }
1140                 head.fmh_entries += xhead.fmh_entries;
1141                 head.fmh_oflags = xhead.fmh_oflags;
1142
1143                 /*
1144                  * If the caller wanted a record count or there aren't any
1145                  * new records to return, we're done.
1146                  */
1147                 if (head.fmh_count == 0 || xhead.fmh_entries == 0)
1148                         break;
1149
1150                 /* Copy all the records we got out to userspace. */
1151                 if (copy_to_user(user_recs, recs,
1152                                  xhead.fmh_entries * sizeof(struct fsmap))) {
1153                         error = -EFAULT;
1154                         goto out_free;
1155                 }
1156
1157                 /* Remember the last record flags we copied to userspace. */
1158                 last_rec = &recs[xhead.fmh_entries - 1];
1159                 last_flags = last_rec->fmr_flags;
1160
1161                 /* Set up the low key for the next iteration. */
1162                 xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
1163                 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
1164         } while (!done && head.fmh_entries < head.fmh_count);
1165
1166         /*
1167          * If there are no more records in the query result set and we're not
1168          * in counting mode, mark the last record returned with the LAST flag.
1169          */
1170         if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
1171                 struct fsmap __user     *user_rec;
1172
1173                 last_flags |= FMR_OF_LAST;
1174                 user_rec = &arg->fmh_recs[head.fmh_entries - 1];
1175
1176                 if (copy_to_user(&user_rec->fmr_flags, &last_flags,
1177                                         sizeof(last_flags))) {
1178                         error = -EFAULT;
1179                         goto out_free;
1180                 }
1181         }
1182
1183         /* copy back header */
1184         if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
1185                 error = -EFAULT;
1186                 goto out_free;
1187         }
1188
1189 out_free:
1190         kvfree(recs);
1191         return error;
1192 }
This page took 0.097136 seconds and 4 git commands to generate.