Merge tag 'phy-fixes-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy
[linux.git] / fs / bcachefs / fs.c
CommitLineData
1c6fdbd8
KO
1// SPDX-License-Identifier: GPL-2.0
2#ifndef NO_BCACHEFS_FS
3
4#include "bcachefs.h"
5#include "acl.h"
07a1006a 6#include "bkey_buf.h"
1c6fdbd8
KO
7#include "btree_update.h"
8#include "buckets.h"
9#include "chardev.h"
10#include "dirent.h"
d4bf5eec 11#include "errcode.h"
1c6fdbd8
KO
12#include "extents.h"
13#include "fs.h"
96385742 14#include "fs-common.h"
1c6fdbd8
KO
15#include "fs-io.h"
16#include "fs-ioctl.h"
dbbfca9f
KO
17#include "fs-io-buffered.h"
18#include "fs-io-direct.h"
19#include "fs-io-pagecache.h"
1c6fdbd8
KO
20#include "fsck.h"
21#include "inode.h"
1809b8cb 22#include "io_read.h"
1c6fdbd8
KO
23#include "journal.h"
24#include "keylist.h"
25#include "quota.h"
8e877caa 26#include "snapshot.h"
1c6fdbd8
KO
27#include "super.h"
28#include "xattr.h"
747d1d6c 29#include "trace.h"
1c6fdbd8
KO
30
31#include <linux/aio.h>
32#include <linux/backing-dev.h>
33#include <linux/exportfs.h>
34#include <linux/fiemap.h>
929d9543 35#include <linux/fs_context.h>
1c6fdbd8
KO
36#include <linux/module.h>
37#include <linux/pagemap.h>
38#include <linux/posix_acl.h>
39#include <linux/random.h>
40#include <linux/seq_file.h>
41#include <linux/statfs.h>
ffcf9ec7 42#include <linux/string.h>
1c6fdbd8
KO
43#include <linux/xattr.h>
44
45static struct kmem_cache *bch2_inode_cache;
46
32b26e8c 47static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
1c6fdbd8 48 struct bch_inode_info *,
9ca4853b
KO
49 struct bch_inode_unpacked *,
50 struct bch_subvolume *);
1c6fdbd8 51
32b26e8c 52void bch2_inode_update_after_write(struct btree_trans *trans,
1c6fdbd8
KO
53 struct bch_inode_info *inode,
54 struct bch_inode_unpacked *bi,
55 unsigned fields)
56{
32b26e8c
KO
57 struct bch_fs *c = trans->c;
58
59 BUG_ON(bi->bi_inum != inode->v.i_ino);
60
385f0c05 61 bch2_assert_pos_locked(trans, BTREE_ID_inodes, POS(0, bi->bi_inum));
32b26e8c 62
b43a0f60 63 set_nlink(&inode->v, bch2_inode_nlink_get(bi));
1c6fdbd8
KO
64 i_uid_write(&inode->v, bi->bi_uid);
65 i_gid_write(&inode->v, bi->bi_gid);
66 inode->v.i_mode = bi->bi_mode;
67
68 if (fields & ATTR_ATIME)
9e877052 69 inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime));
1c6fdbd8 70 if (fields & ATTR_MTIME)
9e877052 71 inode_set_mtime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_mtime));
1c6fdbd8
KO
72 if (fields & ATTR_CTIME)
73 inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime));
74
75 inode->ei_inode = *bi;
2ea90048
KO
76
77 bch2_inode_flags_to_vfs(inode);
1c6fdbd8
KO
78}
79
2ea90048
KO
80int __must_check bch2_write_inode(struct bch_fs *c,
81 struct bch_inode_info *inode,
82 inode_set_fn set,
83 void *p, unsigned fields)
1c6fdbd8 84{
6bd68ec2 85 struct btree_trans *trans = bch2_trans_get(c);
67e0dd8f 86 struct btree_iter iter = { NULL };
1c6fdbd8
KO
87 struct bch_inode_unpacked inode_u;
88 int ret;
1c6fdbd8 89retry:
6bd68ec2 90 bch2_trans_begin(trans);
1c6fdbd8 91
6bd68ec2 92 ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode),
5dd8c60e 93 BTREE_ITER_intent) ?:
6bd68ec2
KO
94 (set ? set(trans, inode, &inode_u, p) : 0) ?:
95 bch2_inode_write(trans, &iter, &inode_u) ?:
cb52d23e 96 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
1c6fdbd8
KO
97
98 /*
99 * the btree node lock protects inode->ei_inode, not ei_update_lock;
100 * this is important for inode updates via bchfs_write_index_update
101 */
102 if (!ret)
6bd68ec2 103 bch2_inode_update_after_write(trans, inode, &inode_u, fields);
1c6fdbd8 104
6bd68ec2 105 bch2_trans_iter_exit(trans, &iter);
47c46c95 106
549d173c 107 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
47c46c95
KO
108 goto retry;
109
e47a390a 110 bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
112d21fd 111 "%s: inode %llu:%llu not found when updating",
3ed94062 112 bch2_err_str(ret),
711bf946
KO
113 inode_inum(inode).subvol,
114 inode_inum(inode).inum);
115
6bd68ec2 116 bch2_trans_put(trans);
1c6fdbd8
KO
117 return ret < 0 ? ret : 0;
118}
119
0f5254aa
KO
120int bch2_fs_quota_transfer(struct bch_fs *c,
121 struct bch_inode_info *inode,
122 struct bch_qid new_qid,
123 unsigned qtypes,
124 enum quota_acct_mode mode)
125{
126 unsigned i;
127 int ret;
128
129 qtypes &= enabled_qtypes(c);
130
131 for (i = 0; i < QTYP_NR; i++)
132 if (new_qid.q[i] == inode->ei_qid.q[i])
133 qtypes &= ~(1U << i);
134
135 if (!qtypes)
136 return 0;
137
138 mutex_lock(&inode->ei_quota_lock);
139
140 ret = bch2_quota_transfer(c, qtypes, new_qid,
141 inode->ei_qid,
142 inode->v.i_blocks +
143 inode->ei_quota_reserved,
144 mode);
145 if (!ret)
146 for (i = 0; i < QTYP_NR; i++)
147 if (qtypes & (1 << i))
148 inode->ei_qid.q[i] = new_qid.q[i];
149
150 mutex_unlock(&inode->ei_quota_lock);
151
152 return ret;
153}
154
112d21fd 155static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
284ae18c 156{
112d21fd 157 return a.subvol == b.subvol && a.inum == b.inum;
284ae18c
KO
158}
159
9d861787
KO
160static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed)
161{
162 const subvol_inum *inum = data;
163
164 return jhash(&inum->inum, sizeof(inum->inum), seed);
165}
166
167static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed)
168{
169 const struct bch_inode_info *inode = data;
170
171 return bch2_vfs_inode_hash_fn(&inode->ei_inum, sizeof(inode->ei_inum), seed);
172}
173
112d21fd
KO
174static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,
175 const void *obj)
284ae18c 176{
112d21fd
KO
177 const struct bch_inode_info *inode = obj;
178 const subvol_inum *v = arg->key;
284ae18c 179
112d21fd 180 return !subvol_inum_eq(inode->ei_inum, *v);
284ae18c
KO
181}
182
112d21fd
KO
183static const struct rhashtable_params bch2_vfs_inodes_params = {
184 .head_offset = offsetof(struct bch_inode_info, hash),
185 .key_offset = offsetof(struct bch_inode_info, ei_inum),
186 .key_len = sizeof(subvol_inum),
9d861787
KO
187 .hashfn = bch2_vfs_inode_hash_fn,
188 .obj_hashfn = bch2_vfs_inode_obj_hash_fn,
112d21fd
KO
189 .obj_cmpfn = bch2_vfs_inode_cmp_fn,
190 .automatic_shrinking = true,
191};
192
9d861787 193int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
284ae18c 194{
9d861787
KO
195 struct bch_fs *c = trans->c;
196 struct rhashtable *ht = &c->vfs_inodes_table;
197 subvol_inum inum = (subvol_inum) { .inum = p.offset };
198 DARRAY(u32) subvols;
199 int ret = 0;
284ae18c 200
9b23fdbd
KO
201 if (!test_bit(BCH_FS_started, &c->flags))
202 return false;
203
9d861787
KO
204 darray_init(&subvols);
205restart_from_top:
9b23fdbd 206
9d861787
KO
207 /*
208 * Tweaked version of __rhashtable_lookup(); we need to get a list of
209 * subvolumes in which the given inode number is open.
210 *
211 * For this to work, we don't include the subvolume ID in the key that
212 * we hash - all inodes with the same inode number regardless of
213 * subvolume will hash to the same slot.
214 *
215 * This will be less than ideal if the same file is ever open
216 * simultaneously in many different snapshots:
217 */
218 rcu_read_lock();
219 struct rhash_lock_head __rcu *const *bkt;
220 struct rhash_head *he;
221 unsigned int hash;
222 struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
223restart:
224 hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params);
225 bkt = rht_bucket(tbl, hash);
226 do {
227 struct bch_inode_info *inode;
228
229 rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) {
230 if (inode->ei_inum.inum == inum.inum) {
231 ret = darray_push_gfp(&subvols, inode->ei_inum.subvol,
232 GFP_NOWAIT|__GFP_NOWARN);
233 if (ret) {
234 rcu_read_unlock();
235 ret = darray_make_room(&subvols, 1);
236 if (ret)
237 goto err;
238 subvols.nr = 0;
239 goto restart_from_top;
240 }
241 }
242 }
243 /* An object might have been moved to a different hash chain,
244 * while we walk along it - better check and retry.
245 */
246 } while (he != RHT_NULLS_MARKER(bkt));
247
248 /* Ensure we see any new tables. */
249 smp_rmb();
250
251 tbl = rht_dereference_rcu(tbl->future_tbl, ht);
252 if (unlikely(tbl))
253 goto restart;
254 rcu_read_unlock();
255
256 darray_for_each(subvols, i) {
257 u32 snap;
258 ret = bch2_subvolume_get_snapshot(trans, *i, &snap);
259 if (ret)
260 goto err;
261
262 ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot);
263 if (ret)
264 break;
9b23fdbd 265 }
9d861787
KO
266err:
267 darray_exit(&subvols);
268 return ret;
269}
9b23fdbd 270
9d861787 271static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
284ae18c 272{
6b63a948 273 return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
284ae18c
KO
274}
275
6b63a948
KO
276static void __wait_on_freeing_inode(struct bch_fs *c,
277 struct bch_inode_info *inode,
278 subvol_inum inum)
16005147 279{
6b63a948 280 wait_queue_head_t *wq;
3b80552e
KO
281 struct wait_bit_queue_entry wait;
282
6b63a948
KO
283 wq = inode_bit_waitqueue(&wait, &inode->v, __I_NEW);
284 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
285 spin_unlock(&inode->v.i_lock);
286
287 if (__bch2_inode_hash_find(c, inum) == inode)
288 schedule_timeout(HZ * 10);
289 finish_wait(wq, &wait.wq_entry);
112d21fd
KO
290}
291
54f77024
KO
292static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans,
293 subvol_inum inum)
112d21fd
KO
294{
295 struct bch_inode_info *inode;
296repeat:
297 inode = __bch2_inode_hash_find(c, inum);
298 if (inode) {
299 spin_lock(&inode->v.i_lock);
300 if (!test_bit(EI_INODE_HASHED, &inode->ei_flags)) {
301 spin_unlock(&inode->v.i_lock);
302 return NULL;
303 }
304 if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) {
54f77024 305 if (!trans) {
6b63a948 306 __wait_on_freeing_inode(c, inode, inum);
54f77024
KO
307 } else {
308 bch2_trans_unlock(trans);
6b63a948 309 __wait_on_freeing_inode(c, inode, inum);
54f77024
KO
310 int ret = bch2_trans_relock(trans);
311 if (ret)
312 return ERR_PTR(ret);
313 }
112d21fd
KO
314 goto repeat;
315 }
316 __iget(&inode->v);
317 spin_unlock(&inode->v.i_lock);
318 }
319
320 return inode;
321}
322
323static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode)
324{
325 spin_lock(&inode->v.i_lock);
326 bool remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags);
327 spin_unlock(&inode->v.i_lock);
328
329 if (remove) {
330 int ret = rhashtable_remove_fast(&c->vfs_inodes_table,
331 &inode->hash, bch2_vfs_inodes_params);
332 BUG_ON(ret);
333 inode->v.i_hash.pprev = NULL;
6b63a948
KO
334 /*
335 * This pairs with the bch2_inode_hash_find() ->
336 * __wait_on_freeing_inode() path
337 */
338 inode_wake_up_bit(&inode->v, __I_NEW);
112d21fd 339 }
16005147
KO
340}
341
54f77024
KO
342static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c,
343 struct btree_trans *trans,
344 struct bch_inode_info *inode)
1c6fdbd8 345{
112d21fd
KO
346 struct bch_inode_info *old = inode;
347
348 set_bit(EI_INODE_HASHED, &inode->ei_flags);
349retry:
9d861787
KO
350 if (unlikely(rhashtable_lookup_insert_key(&c->vfs_inodes_table,
351 &inode->ei_inum,
112d21fd
KO
352 &inode->hash,
353 bch2_vfs_inodes_params))) {
54f77024 354 old = bch2_inode_hash_find(c, trans, inode->ei_inum);
112d21fd
KO
355 if (!old)
356 goto retry;
357
358 clear_bit(EI_INODE_HASHED, &inode->ei_flags);
1c6fdbd8 359
dd908648
KO
360 /*
361 * bcachefs doesn't use I_NEW; we have no use for it since we
362 * only insert fully created inodes in the inode hash table. But
363 * discard_new_inode() expects it to be set...
364 */
99c87fe0 365 inode->v.i_state |= I_NEW;
b02f973e
KO
366 /*
367 * We don't want bch2_evict_inode() to delete the inode on disk,
368 * we just raced and had another inode in cache. Normally new
369 * inodes don't have nlink == 0 - except tmpfiles do...
370 */
371 set_nlink(&inode->v, 1);
d93ff5fa 372 discard_new_inode(&inode->v);
54f77024 373 return old;
a91bc5e5 374 } else {
112d21fd
KO
375 inode_fake_hash(&inode->v);
376
377 inode_sb_list_add(&inode->v);
378
a91bc5e5
KO
379 mutex_lock(&c->vfs_inodes_lock);
380 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
381 mutex_unlock(&c->vfs_inodes_lock);
54f77024 382 return inode;
a91bc5e5 383 }
a91bc5e5 384}
32b26e8c 385
a91bc5e5
KO
386#define memalloc_flags_do(_flags, _do) \
387({ \
388 unsigned _saved_flags = memalloc_flags_save(_flags); \
389 typeof(_do) _ret = _do; \
390 memalloc_noreclaim_restore(_saved_flags); \
391 _ret; \
392})
393
b769590f
KO
394static struct inode *bch2_alloc_inode(struct super_block *sb)
395{
396 BUG();
397}
398
9897713f 399static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c, gfp_t gfp)
b769590f 400{
082330c3 401 struct bch_inode_info *inode = alloc_inode_sb(c->vfs_sb,
9897713f 402 bch2_inode_cache, gfp);
b769590f
KO
403 if (!inode)
404 return NULL;
405
406 inode_init_once(&inode->v);
407 mutex_init(&inode->ei_update_lock);
408 two_state_lock_init(&inode->ei_pagecache_lock);
409 INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
7124a898 410 inode->ei_flags = 0;
b769590f 411 mutex_init(&inode->ei_quota_lock);
7124a898 412 memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
b769590f 413
9897713f 414 if (unlikely(inode_init_always_gfp(c->vfs_sb, &inode->v, gfp))) {
b769590f
KO
415 kmem_cache_free(bch2_inode_cache, inode);
416 return NULL;
417 }
418
419 return inode;
420}
421
a91bc5e5
KO
422/*
423 * Allocate a new inode, dropping/retaking btree locks if necessary:
424 */
425static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
426{
9897713f 427 struct bch_inode_info *inode = __bch2_new_inode(trans->c, GFP_NOWAIT);
32b26e8c 428
a91bc5e5 429 if (unlikely(!inode)) {
9897713f 430 int ret = drop_locks_do(trans, (inode = __bch2_new_inode(trans->c, GFP_NOFS)) ? 0 : -ENOMEM);
0e42f381
KO
431 if (ret && inode) {
432 __destroy_inode(&inode->v);
433 kmem_cache_free(bch2_inode_cache, inode);
434 }
a91bc5e5
KO
435 if (ret)
436 return ERR_PTR(ret);
1c6fdbd8
KO
437 }
438
a91bc5e5
KO
439 return inode;
440}
9edbcc72 441
54f77024
KO
442static struct bch_inode_info *bch2_inode_hash_init_insert(struct btree_trans *trans,
443 subvol_inum inum,
444 struct bch_inode_unpacked *bi,
445 struct bch_subvolume *subvol)
446{
447 struct bch_inode_info *inode = bch2_new_inode(trans);
448 if (IS_ERR(inode))
449 return inode;
450
451 bch2_vfs_inode_init(trans, inum, inode, bi, subvol);
452
453 return bch2_inode_hash_insert(trans->c, trans, inode);
454
455}
456
a91bc5e5
KO
457struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
458{
54f77024 459 struct bch_inode_info *inode = bch2_inode_hash_find(c, NULL, inum);
a91bc5e5
KO
460 if (inode)
461 return &inode->v;
462
463 struct btree_trans *trans = bch2_trans_get(c);
464
465 struct bch_inode_unpacked inode_u;
466 struct bch_subvolume subvol;
467 int ret = lockrestart_do(trans,
468 bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
469 bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
54f77024 470 PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
a91bc5e5 471 bch2_trans_put(trans);
1c6fdbd8 472
a91bc5e5 473 return ret ? ERR_PTR(ret) : &inode->v;
1c6fdbd8
KO
474}
475
6fed42bb 476struct bch_inode_info *
1c6fdbd8
KO
477__bch2_create(struct mnt_idmap *idmap,
478 struct bch_inode_info *dir, struct dentry *dentry,
42d23732
KO
479 umode_t mode, dev_t rdev, subvol_inum snapshot_src,
480 unsigned flags)
1c6fdbd8
KO
481{
482 struct bch_fs *c = dir->v.i_sb->s_fs_info;
6bd68ec2 483 struct btree_trans *trans;
1c6fdbd8 484 struct bch_inode_unpacked dir_u;
a91bc5e5 485 struct bch_inode_info *inode;
1c6fdbd8 486 struct bch_inode_unpacked inode_u;
1c6fdbd8 487 struct posix_acl *default_acl = NULL, *acl = NULL;
284ae18c 488 subvol_inum inum;
9ca4853b 489 struct bch_subvolume subvol;
73ab6f35 490 u64 journal_seq = 0;
c24adfa0
HL
491 kuid_t kuid;
492 kgid_t kgid;
1c6fdbd8
KO
493 int ret;
494
96385742
KO
495 /*
496 * preallocate acls + vfs inode before btree transaction, so that
497 * nothing can fail after the transaction succeeds:
498 */
1c6fdbd8 499#ifdef CONFIG_BCACHEFS_POSIX_ACL
96385742 500 ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
1c6fdbd8 501 if (ret)
96385742 502 return ERR_PTR(ret);
1c6fdbd8 503#endif
9897713f 504 inode = __bch2_new_inode(c, GFP_NOFS);
1c6fdbd8 505 if (unlikely(!inode)) {
96385742 506 inode = ERR_PTR(-ENOMEM);
1c6fdbd8
KO
507 goto err;
508 }
509
96385742
KO
510 bch2_inode_init_early(c, &inode_u);
511
6fed42bb 512 if (!(flags & BCH_CREATE_TMPFILE))
1c6fdbd8
KO
513 mutex_lock(&dir->ei_update_lock);
514
6bd68ec2 515 trans = bch2_trans_get(c);
1c6fdbd8 516retry:
6bd68ec2 517 bch2_trans_begin(trans);
1c6fdbd8 518
c24adfa0
HL
519 kuid = mapped_fsuid(idmap, i_user_ns(&dir->v));
520 kgid = mapped_fsgid(idmap, i_user_ns(&dir->v));
112d21fd 521 ret = bch2_subvol_is_ro_trans(trans, dir->ei_inum.subvol) ?:
0d72ab35 522 bch2_create_trans(trans,
6fed42bb
KO
523 inode_inum(dir), &dir_u, &inode_u,
524 !(flags & BCH_CREATE_TMPFILE)
525 ? &dentry->d_name : NULL,
c24adfa0
HL
526 from_kuid(i_user_ns(&dir->v), kuid),
527 from_kgid(i_user_ns(&dir->v), kgid),
96385742 528 mode, rdev,
42d23732 529 default_acl, acl, snapshot_src, flags) ?:
96385742
KO
530 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
531 KEY_TYPE_QUOTA_PREALLOC);
532 if (unlikely(ret))
533 goto err_before_quota;
534
112d21fd 535 inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol;
9ca4853b
KO
536 inum.inum = inode_u.bi_inum;
537
6bd68ec2 538 ret = bch2_subvolume_get(trans, inum.subvol, true,
5dd8c60e 539 BTREE_ITER_with_updates, &subvol) ?:
6bd68ec2 540 bch2_trans_commit(trans, NULL, &journal_seq, 0);
96385742
KO
541 if (unlikely(ret)) {
542 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
543 KEY_TYPE_QUOTA_WARN);
544err_before_quota:
549d173c 545 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
96385742 546 goto retry;
1c6fdbd8 547 goto err_trans;
96385742 548 }
1c6fdbd8 549
6fed42bb 550 if (!(flags & BCH_CREATE_TMPFILE)) {
6bd68ec2 551 bch2_inode_update_after_write(trans, dir, &dir_u,
1c6fdbd8 552 ATTR_MTIME|ATTR_CTIME);
1c6fdbd8
KO
553 mutex_unlock(&dir->ei_update_lock);
554 }
555
6bd68ec2 556 bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
1c6fdbd8
KO
557
558 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
559 set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
560
561 /*
562 * we must insert the new inode into the inode cache before calling
563 * bch2_trans_exit() and dropping locks, else we could race with another
564 * thread pulling the inode in and modifying it:
54f77024
KO
565 *
566 * also, calling bch2_inode_hash_insert() without passing in the
567 * transaction object is sketchy - if we could ever end up in
568 * __wait_on_freeing_inode(), we'd risk deadlock.
569 *
570 * But that shouldn't be possible, since we still have the inode locked
571 * that we just created, and we _really_ can't take a transaction
572 * restart here.
1c6fdbd8 573 */
54f77024 574 inode = bch2_inode_hash_insert(c, NULL, inode);
6bd68ec2 575 bch2_trans_put(trans);
96385742 576err:
1c6fdbd8
KO
577 posix_acl_release(default_acl);
578 posix_acl_release(acl);
579 return inode;
580err_trans:
6fed42bb 581 if (!(flags & BCH_CREATE_TMPFILE))
19ee5f2a
KO
582 mutex_unlock(&dir->ei_update_lock);
583
6bd68ec2 584 bch2_trans_put(trans);
1c6fdbd8
KO
585 make_bad_inode(&inode->v);
586 iput(&inode->v);
1c6fdbd8 587 inode = ERR_PTR(ret);
96385742 588 goto err;
1c6fdbd8
KO
589}
590
591/* methods */
592
737cd174
KO
593static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
594 subvol_inum dir, struct bch_hash_info *dir_hash_info,
595 const struct qstr *name)
596{
597 struct bch_fs *c = trans->c;
598 struct btree_iter dirent_iter = {};
599 subvol_inum inum = {};
83c38e3e 600 struct printbuf buf = PRINTBUF;
737cd174 601
ac01928b
KO
602 struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
603 dir_hash_info, dir, name, 0);
604 int ret = bkey_err(k);
737cd174
KO
605 if (ret)
606 return ERR_PTR(ret);
607
737cd174
KO
608 ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), &inum);
609 if (ret > 0)
610 ret = -ENOENT;
611 if (ret)
612 goto err;
613
54f77024 614 struct bch_inode_info *inode = bch2_inode_hash_find(c, trans, inum);
737cd174
KO
615 if (inode)
616 goto out;
617
618 struct bch_subvolume subvol;
619 struct bch_inode_unpacked inode_u;
620 ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
621 bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
54f77024 622 PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol));
737cd174 623
83c38e3e
KO
624 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
625 c, "dirent to missing inode:\n %s",
626 (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
737cd174
KO
627 if (ret)
628 goto err;
629
83c38e3e
KO
630 /* regular files may have hardlinks: */
631 if (bch2_fs_inconsistent_on(bch2_inode_should_have_bp(&inode_u) &&
632 !bkey_eq(k.k->p, POS(inode_u.bi_dir, inode_u.bi_dir_offset)),
633 c,
634 "dirent points to inode that does not point back:\n %s",
635 (bch2_bkey_val_to_text(&buf, c, k),
636 prt_printf(&buf, "\n "),
637 bch2_inode_unpacked_to_text(&buf, &inode_u),
638 buf.buf))) {
639 ret = -ENOENT;
640 goto err;
641 }
737cd174
KO
642out:
643 bch2_trans_iter_exit(trans, &dirent_iter);
83c38e3e 644 printbuf_exit(&buf);
737cd174
KO
645 return inode;
646err:
647 inode = ERR_PTR(ret);
648 goto out;
649}
650
1c6fdbd8
KO
651static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
652 unsigned int flags)
653{
654 struct bch_fs *c = vdir->i_sb->s_fs_info;
655 struct bch_inode_info *dir = to_bch_ei(vdir);
07bca3bd 656 struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
1c6fdbd8 657
737cd174 658 struct bch_inode_info *inode;
a0d11fee 659 bch2_trans_do(c,
737cd174
KO
660 PTR_ERR_OR_ZERO(inode = bch2_lookup_trans(trans, inode_inum(dir),
661 &hash, &dentry->d_name)));
662 if (IS_ERR(inode))
663 inode = NULL;
1c6fdbd8 664
737cd174 665 return d_splice_alias(&inode->v, dentry);
1c6fdbd8
KO
666}
667
821a99b7
KO
668static int bch2_mknod(struct mnt_idmap *idmap,
669 struct inode *vdir, struct dentry *dentry,
670 umode_t mode, dev_t rdev)
1c6fdbd8
KO
671{
672 struct bch_inode_info *inode =
42d23732
KO
673 __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
674 (subvol_inum) { 0 }, 0);
1c6fdbd8
KO
675
676 if (IS_ERR(inode))
5c1ef830 677 return bch2_err_class(PTR_ERR(inode));
1c6fdbd8
KO
678
679 d_instantiate(dentry, &inode->v);
680 return 0;
681}
682
821a99b7
KO
683static int bch2_create(struct mnt_idmap *idmap,
684 struct inode *vdir, struct dentry *dentry,
685 umode_t mode, bool excl)
686{
687 return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
688}
689
1c6fdbd8
KO
690static int __bch2_link(struct bch_fs *c,
691 struct bch_inode_info *inode,
692 struct bch_inode_info *dir,
693 struct dentry *dentry)
694{
184b1dc1 695 struct bch_inode_unpacked dir_u, inode_u;
1c6fdbd8
KO
696 int ret;
697
19ee5f2a 698 mutex_lock(&inode->ei_update_lock);
f369de82 699 struct btree_trans *trans = bch2_trans_get(c);
58677a1d 700
6bd68ec2
KO
701 ret = commit_do(trans, NULL, NULL, 0,
702 bch2_link_trans(trans,
6fed42bb
KO
703 inode_inum(dir), &dir_u,
704 inode_inum(inode), &inode_u,
5f0e4ae1 705 &dentry->d_name));
1c6fdbd8 706
184b1dc1 707 if (likely(!ret)) {
6bd68ec2 708 bch2_inode_update_after_write(trans, dir, &dir_u,
184b1dc1 709 ATTR_MTIME|ATTR_CTIME);
6bd68ec2 710 bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
184b1dc1 711 }
1c6fdbd8 712
6bd68ec2 713 bch2_trans_put(trans);
19ee5f2a 714 mutex_unlock(&inode->ei_update_lock);
1c6fdbd8
KO
715 return ret;
716}
717
718static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
719 struct dentry *dentry)
720{
721 struct bch_fs *c = vdir->i_sb->s_fs_info;
722 struct bch_inode_info *dir = to_bch_ei(vdir);
723 struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
724 int ret;
4e1ec2cc
KO
725
726 lockdep_assert_held(&inode->v.i_rwsem);
1c6fdbd8 727
112d21fd
KO
728 ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:
729 bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
0d72ab35 730 __bch2_link(c, inode, dir, dentry);
1c6fdbd8 731 if (unlikely(ret))
1a1c93e7 732 return bch2_err_class(ret);
1c6fdbd8
KO
733
734 ihold(&inode->v);
735 d_instantiate(dentry, &inode->v);
736 return 0;
737}
738
42d23732 739int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
2027875b 740 bool deleting_snapshot)
1c6fdbd8
KO
741{
742 struct bch_fs *c = vdir->i_sb->s_fs_info;
743 struct bch_inode_info *dir = to_bch_ei(vdir);
744 struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
745 struct bch_inode_unpacked dir_u, inode_u;
1c6fdbd8
KO
746 int ret;
747
168f4c5f 748 bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
58677a1d 749
f369de82
KO
750 struct btree_trans *trans = bch2_trans_get(c);
751
6bd68ec2 752 ret = commit_do(trans, NULL, NULL,
cb52d23e 753 BCH_TRANS_COMMIT_no_enospc,
6bd68ec2 754 bch2_unlink_trans(trans,
2d33036c
KO
755 inode_inum(dir), &dir_u,
756 &inode_u, &dentry->d_name,
757 deleting_snapshot));
758 if (unlikely(ret))
759 goto err;
96385742 760
6bd68ec2 761 bch2_inode_update_after_write(trans, dir, &dir_u,
2d33036c 762 ATTR_MTIME|ATTR_CTIME);
6bd68ec2 763 bch2_inode_update_after_write(trans, inode, &inode_u,
2d33036c 764 ATTR_MTIME);
1c6fdbd8 765
2d33036c
KO
766 if (inode_u.bi_subvol) {
767 /*
768 * Subvolume deletion is asynchronous, but we still want to tell
769 * the VFS that it's been deleted here:
770 */
771 set_nlink(&inode->v, 0);
772 }
773err:
6bd68ec2 774 bch2_trans_put(trans);
f369de82 775 bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
1c6fdbd8
KO
776
777 return ret;
778}
779
42d23732
KO
780static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
781{
0d72ab35
KO
782 struct bch_inode_info *dir= to_bch_ei(vdir);
783 struct bch_fs *c = dir->v.i_sb->s_fs_info;
784
112d21fd 785 int ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:
0d72ab35 786 __bch2_unlink(vdir, dentry, false);
1a1c93e7 787 return bch2_err_class(ret);
42d23732
KO
788}
789
1c6fdbd8
KO
790static int bch2_symlink(struct mnt_idmap *idmap,
791 struct inode *vdir, struct dentry *dentry,
792 const char *symname)
793{
794 struct bch_fs *c = vdir->i_sb->s_fs_info;
795 struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
796 int ret;
797
6fed42bb 798 inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
42d23732 799 (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
3e3e02e6 800 if (IS_ERR(inode))
5c1ef830 801 return bch2_err_class(PTR_ERR(inode));
1c6fdbd8
KO
802
803 inode_lock(&inode->v);
804 ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
805 inode_unlock(&inode->v);
806
807 if (unlikely(ret))
808 goto err;
809
810 ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
811 if (unlikely(ret))
812 goto err;
813
1c6fdbd8
KO
814 ret = __bch2_link(c, inode, dir, dentry);
815 if (unlikely(ret))
816 goto err;
817
818 d_instantiate(dentry, &inode->v);
819 return 0;
820err:
821 iput(&inode->v);
1a1c93e7 822 return bch2_err_class(ret);
1c6fdbd8
KO
823}
824
825static int bch2_mkdir(struct mnt_idmap *idmap,
826 struct inode *vdir, struct dentry *dentry, umode_t mode)
827{
821a99b7 828 return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
1c6fdbd8
KO
829}
830
1c6fdbd8
KO
831static int bch2_rename2(struct mnt_idmap *idmap,
832 struct inode *src_vdir, struct dentry *src_dentry,
833 struct inode *dst_vdir, struct dentry *dst_dentry,
834 unsigned flags)
835{
836 struct bch_fs *c = src_vdir->i_sb->s_fs_info;
96385742
KO
837 struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
838 struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
839 struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
840 struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
1c6fdbd8 841 struct bch_inode_unpacked dst_dir_u, src_dir_u;
4645855d 842 struct bch_inode_unpacked src_inode_u, dst_inode_u, *whiteout_inode_u;
6bd68ec2 843 struct btree_trans *trans;
96385742
KO
844 enum bch_rename_mode mode = flags & RENAME_EXCHANGE
845 ? BCH_RENAME_EXCHANGE
846 : dst_dentry->d_inode
847 ? BCH_RENAME_OVERWRITE : BCH_RENAME;
4645855d 848 bool whiteout = !!(flags & RENAME_WHITEOUT);
1c6fdbd8
KO
849 int ret;
850
4645855d 851 if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE|RENAME_WHITEOUT))
1c6fdbd8
KO
852 return -EINVAL;
853
96385742
KO
854 if (mode == BCH_RENAME_OVERWRITE) {
855 ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
1c6fdbd8
KO
856 0, LLONG_MAX);
857 if (ret)
858 return ret;
859 }
860
168f4c5f 861 bch2_lock_inodes(INODE_UPDATE_LOCK,
96385742
KO
862 src_dir,
863 dst_dir,
864 src_inode,
865 dst_inode);
866
f369de82
KO
867 trans = bch2_trans_get(c);
868
112d21fd
KO
869 ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?:
870 bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol);
0d72ab35 871 if (ret)
74ec2f30 872 goto err_tx_restart;
0d72ab35 873
96385742
KO
874 if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
875 ret = bch2_fs_quota_transfer(c, src_inode,
876 dst_dir->ei_qid,
96012e14
KO
877 1 << QTYP_PRJ,
878 KEY_TYPE_QUOTA_PREALLOC);
879 if (ret)
880 goto err;
881 }
882
96385742
KO
883 if (mode == BCH_RENAME_EXCHANGE &&
884 inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
885 ret = bch2_fs_quota_transfer(c, dst_inode,
886 src_dir->ei_qid,
96012e14
KO
887 1 << QTYP_PRJ,
888 KEY_TYPE_QUOTA_PREALLOC);
889 if (ret)
890 goto err;
891 }
4645855d
SF
892retry:
893 bch2_trans_begin(trans);
96012e14 894
4645855d
SF
895 ret = bch2_rename_trans(trans,
896 inode_inum(src_dir), &src_dir_u,
897 inode_inum(dst_dir), &dst_dir_u,
898 &src_inode_u,
899 &dst_inode_u,
900 &src_dentry->d_name,
901 &dst_dentry->d_name,
902 mode);
1c6fdbd8 903 if (unlikely(ret))
4645855d
SF
904 goto err_tx_restart;
905
906 if (whiteout) {
907 whiteout_inode_u = bch2_trans_kmalloc_nomemzero(trans, sizeof(*whiteout_inode_u));
908 ret = PTR_ERR_OR_ZERO(whiteout_inode_u);
909 if (unlikely(ret))
910 goto err_tx_restart;
911 bch2_inode_init_early(c, whiteout_inode_u);
912
913 ret = bch2_create_trans(trans,
914 inode_inum(src_dir), &src_dir_u,
915 whiteout_inode_u,
916 &src_dentry->d_name,
917 from_kuid(i_user_ns(&src_dir->v), current_fsuid()),
918 from_kgid(i_user_ns(&src_dir->v), current_fsgid()),
919 S_IFCHR|WHITEOUT_MODE, 0,
920 NULL, NULL, (subvol_inum) { 0 }, 0) ?:
921 bch2_quota_acct(c, bch_qid(whiteout_inode_u), Q_INO, 1,
922 KEY_TYPE_QUOTA_PREALLOC);
923 if (unlikely(ret))
924 goto err_tx_restart;
925 }
926
927 ret = bch2_trans_commit(trans, NULL, NULL, 0);
928 if (unlikely(ret)) {
929err_tx_restart:
930 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
931 goto retry;
1c6fdbd8 932 goto err;
4645855d 933 }
1c6fdbd8 934
96385742
KO
935 BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
936 BUG_ON(dst_inode &&
937 dst_inode->v.i_ino != dst_inode_u.bi_inum);
938
6bd68ec2 939 bch2_inode_update_after_write(trans, src_dir, &src_dir_u,
1c6fdbd8 940 ATTR_MTIME|ATTR_CTIME);
1c6fdbd8 941
68a2054d 942 if (src_dir != dst_dir)
6bd68ec2 943 bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u,
1c6fdbd8 944 ATTR_MTIME|ATTR_CTIME);
1c6fdbd8 945
6bd68ec2 946 bch2_inode_update_after_write(trans, src_inode, &src_inode_u,
1c6fdbd8 947 ATTR_CTIME);
96385742 948
68a2054d 949 if (dst_inode)
6bd68ec2 950 bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
1c6fdbd8
KO
951 ATTR_CTIME);
952err:
6bd68ec2 953 bch2_trans_put(trans);
96012e14 954
96385742
KO
955 bch2_fs_quota_transfer(c, src_inode,
956 bch_qid(&src_inode->ei_inode),
96012e14
KO
957 1 << QTYP_PRJ,
958 KEY_TYPE_QUOTA_NOCHECK);
96385742
KO
959 if (dst_inode)
960 bch2_fs_quota_transfer(c, dst_inode,
961 bch_qid(&dst_inode->ei_inode),
96012e14
KO
962 1 << QTYP_PRJ,
963 KEY_TYPE_QUOTA_NOCHECK);
964
168f4c5f 965 bch2_unlock_inodes(INODE_UPDATE_LOCK,
96385742
KO
966 src_dir,
967 dst_dir,
968 src_inode,
969 dst_inode);
1c6fdbd8 970
1a1c93e7 971 return bch2_err_class(ret);
1c6fdbd8
KO
972}
973
58677a1d
KO
974static void bch2_setattr_copy(struct mnt_idmap *idmap,
975 struct bch_inode_info *inode,
976 struct bch_inode_unpacked *bi,
977 struct iattr *attr)
1c6fdbd8
KO
978{
979 struct bch_fs *c = inode->v.i_sb->s_fs_info;
58677a1d 980 unsigned int ia_valid = attr->ia_valid;
c24adfa0
HL
981 kuid_t kuid;
982 kgid_t kgid;
1c6fdbd8 983
c24adfa0
HL
984 if (ia_valid & ATTR_UID) {
985 kuid = from_vfsuid(idmap, i_user_ns(&inode->v), attr->ia_vfsuid);
986 bi->bi_uid = from_kuid(i_user_ns(&inode->v), kuid);
987 }
988 if (ia_valid & ATTR_GID) {
989 kgid = from_vfsgid(idmap, i_user_ns(&inode->v), attr->ia_vfsgid);
990 bi->bi_gid = from_kgid(i_user_ns(&inode->v), kgid);
991 }
1c6fdbd8 992
68a507a2
KO
993 if (ia_valid & ATTR_SIZE)
994 bi->bi_size = attr->ia_size;
995
1c6fdbd8 996 if (ia_valid & ATTR_ATIME)
58677a1d 997 bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
1c6fdbd8 998 if (ia_valid & ATTR_MTIME)
58677a1d 999 bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
1c6fdbd8 1000 if (ia_valid & ATTR_CTIME)
58677a1d 1001 bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
1c6fdbd8
KO
1002
1003 if (ia_valid & ATTR_MODE) {
58677a1d 1004 umode_t mode = attr->ia_mode;
1c6fdbd8 1005 kgid_t gid = ia_valid & ATTR_GID
c24adfa0 1006 ? kgid
1c6fdbd8
KO
1007 : inode->v.i_gid;
1008
c24adfa0
HL
1009 if (!in_group_or_capable(idmap, &inode->v,
1010 make_vfsgid(idmap, i_user_ns(&inode->v), gid)))
1c6fdbd8
KO
1011 mode &= ~S_ISGID;
1012 bi->bi_mode = mode;
1013 }
1c6fdbd8
KO
1014}
1015
68a507a2
KO
1016int bch2_setattr_nonsize(struct mnt_idmap *idmap,
1017 struct bch_inode_info *inode,
1018 struct iattr *attr)
1c6fdbd8
KO
1019{
1020 struct bch_fs *c = inode->v.i_sb->s_fs_info;
0f5254aa 1021 struct bch_qid qid;
6bd68ec2 1022 struct btree_trans *trans;
67e0dd8f 1023 struct btree_iter inode_iter = { NULL };
1c6fdbd8
KO
1024 struct bch_inode_unpacked inode_u;
1025 struct posix_acl *acl = NULL;
c24adfa0
HL
1026 kuid_t kuid;
1027 kgid_t kgid;
1c6fdbd8
KO
1028 int ret;
1029
1030 mutex_lock(&inode->ei_update_lock);
1031
0f5254aa
KO
1032 qid = inode->ei_qid;
1033
c24adfa0
HL
1034 if (attr->ia_valid & ATTR_UID) {
1035 kuid = from_vfsuid(idmap, i_user_ns(&inode->v), attr->ia_vfsuid);
1036 qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), kuid);
1037 }
1c6fdbd8 1038
c24adfa0
HL
1039 if (attr->ia_valid & ATTR_GID) {
1040 kgid = from_vfsgid(idmap, i_user_ns(&inode->v), attr->ia_vfsgid);
1041 qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), kgid);
1042 }
1c6fdbd8 1043
0f5254aa
KO
1044 ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
1045 KEY_TYPE_QUOTA_PREALLOC);
1046 if (ret)
1047 goto err;
1c6fdbd8 1048
6bd68ec2 1049 trans = bch2_trans_get(c);
1c6fdbd8 1050retry:
6bd68ec2 1051 bch2_trans_begin(trans);
1c6fdbd8
KO
1052 kfree(acl);
1053 acl = NULL;
1054
6bd68ec2 1055 ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
5dd8c60e 1056 BTREE_ITER_intent);
58677a1d
KO
1057 if (ret)
1058 goto btree_err;
1059
1060 bch2_setattr_copy(idmap, inode, &inode_u, attr);
1061
1062 if (attr->ia_valid & ATTR_MODE) {
6bd68ec2 1063 ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u,
6fed42bb 1064 inode_u.bi_mode, &acl);
58677a1d
KO
1065 if (ret)
1066 goto btree_err;
1067 }
1068
6bd68ec2
KO
1069 ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?:
1070 bch2_trans_commit(trans, NULL, NULL,
cb52d23e 1071 BCH_TRANS_COMMIT_no_enospc);
58677a1d 1072btree_err:
6bd68ec2 1073 bch2_trans_iter_exit(trans, &inode_iter);
50dc0f69 1074
549d173c 1075 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1c6fdbd8
KO
1076 goto retry;
1077 if (unlikely(ret))
1078 goto err_trans;
1079
6bd68ec2 1080 bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
1c6fdbd8
KO
1081
1082 if (acl)
1083 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
1084err_trans:
6bd68ec2 1085 bch2_trans_put(trans);
1c6fdbd8
KO
1086err:
1087 mutex_unlock(&inode->ei_update_lock);
1088
5c1ef830 1089 return bch2_err_class(ret);
1c6fdbd8
KO
1090}
1091
1092static int bch2_getattr(struct mnt_idmap *idmap,
1093 const struct path *path, struct kstat *stat,
1094 u32 request_mask, unsigned query_flags)
1095{
1096 struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
1097 struct bch_fs *c = inode->v.i_sb->s_fs_info;
c24adfa0
HL
1098 vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, &inode->v);
1099 vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, &inode->v);
1c6fdbd8
KO
1100
1101 stat->dev = inode->v.i_sb->s_dev;
1102 stat->ino = inode->v.i_ino;
1103 stat->mode = inode->v.i_mode;
1104 stat->nlink = inode->v.i_nlink;
c24adfa0
HL
1105 stat->uid = vfsuid_into_kuid(vfsuid);
1106 stat->gid = vfsgid_into_kgid(vfsgid);
1c6fdbd8
KO
1107 stat->rdev = inode->v.i_rdev;
1108 stat->size = i_size_read(&inode->v);
9e877052
LT
1109 stat->atime = inode_get_atime(&inode->v);
1110 stat->mtime = inode_get_mtime(&inode->v);
1c6fdbd8
KO
1111 stat->ctime = inode_get_ctime(&inode->v);
1112 stat->blksize = block_bytes(c);
1113 stat->blocks = inode->v.i_blocks;
1114
112d21fd 1115 stat->subvol = inode->ei_inum.subvol;
2a82bb02
KO
1116 stat->result_mask |= STATX_SUBVOL;
1117
95924420
HL
1118 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) {
1119 stat->result_mask |= STATX_DIOALIGN;
1120 /*
1121 * this is incorrect; we should be tracking this in superblock,
1122 * and checking the alignment of open devices
1123 */
1124 stat->dio_mem_align = SECTOR_SIZE;
1125 stat->dio_offset_align = block_bytes(c);
1126 }
1127
1c6fdbd8
KO
1128 if (request_mask & STATX_BTIME) {
1129 stat->result_mask |= STATX_BTIME;
1130 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
1131 }
1132
103ffe9a 1133 if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
1c6fdbd8 1134 stat->attributes |= STATX_ATTR_IMMUTABLE;
4a1d8d3e
KO
1135 stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
1136
103ffe9a 1137 if (inode->ei_inode.bi_flags & BCH_INODE_append)
1c6fdbd8 1138 stat->attributes |= STATX_ATTR_APPEND;
4a1d8d3e
KO
1139 stat->attributes_mask |= STATX_ATTR_APPEND;
1140
103ffe9a 1141 if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
1c6fdbd8 1142 stat->attributes |= STATX_ATTR_NODUMP;
4a1d8d3e 1143 stat->attributes_mask |= STATX_ATTR_NODUMP;
1c6fdbd8
KO
1144
1145 return 0;
1146}
1147
1148static int bch2_setattr(struct mnt_idmap *idmap,
1149 struct dentry *dentry, struct iattr *iattr)
1150{
1151 struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
0d72ab35 1152 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1c6fdbd8
KO
1153 int ret;
1154
1155 lockdep_assert_held(&inode->v.i_rwsem);
1156
112d21fd 1157 ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
0d72ab35 1158 setattr_prepare(idmap, dentry, iattr);
1c6fdbd8
KO
1159 if (ret)
1160 return ret;
1161
1162 return iattr->ia_valid & ATTR_SIZE
5902cc28 1163 ? bchfs_truncate(idmap, inode, iattr)
1c6fdbd8
KO
1164 : bch2_setattr_nonsize(idmap, inode, iattr);
1165}
1166
1167static int bch2_tmpfile(struct mnt_idmap *idmap,
1168 struct inode *vdir, struct file *file, umode_t mode)
1169{
1170 struct bch_inode_info *inode =
1171 __bch2_create(idmap, to_bch_ei(vdir),
6fed42bb 1172 file->f_path.dentry, mode, 0,
42d23732 1173 (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
1c6fdbd8
KO
1174
1175 if (IS_ERR(inode))
5c1ef830 1176 return bch2_err_class(PTR_ERR(inode));
1c6fdbd8
KO
1177
1178 d_mark_tmpfile(file, &inode->v);
1179 d_instantiate(file->f_path.dentry, &inode->v);
1180 return finish_open_simple(file, 0);
1181}
1182
5b6d40e2
KO
1183static int bch2_fill_extent(struct bch_fs *c,
1184 struct fiemap_extent_info *info,
99aaf570 1185 struct bkey_s_c k, unsigned flags)
1c6fdbd8 1186{
e7b854b1 1187 if (bkey_extent_is_direct_data(k.k)) {
99aaf570 1188 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1742237b
KO
1189 const union bch_extent_entry *entry;
1190 struct extent_ptr_decoded p;
1c6fdbd8
KO
1191 int ret;
1192
76426098
KO
1193 if (k.k->type == KEY_TYPE_reflink_v)
1194 flags |= FIEMAP_EXTENT_SHARED;
1195
99aaf570 1196 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
1c6fdbd8 1197 int flags2 = 0;
1742237b 1198 u64 offset = p.ptr.offset;
1c6fdbd8 1199
79203111
KO
1200 if (p.ptr.unwritten)
1201 flags2 |= FIEMAP_EXTENT_UNWRITTEN;
1202
1742237b 1203 if (p.crc.compression_type)
1c6fdbd8
KO
1204 flags2 |= FIEMAP_EXTENT_ENCODED;
1205 else
1742237b 1206 offset += p.crc.offset;
1c6fdbd8 1207
8244f320
KO
1208 if ((offset & (block_sectors(c) - 1)) ||
1209 (k.k->size & (block_sectors(c) - 1)))
1c6fdbd8
KO
1210 flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
1211
1212 ret = fiemap_fill_next_extent(info,
99aaf570 1213 bkey_start_offset(k.k) << 9,
1742237b 1214 offset << 9,
99aaf570 1215 k.k->size << 9, flags|flags2);
1c6fdbd8
KO
1216 if (ret)
1217 return ret;
1218 }
1219
1220 return 0;
e7b854b1
KO
1221 } else if (bkey_extent_is_inline_data(k.k)) {
1222 return fiemap_fill_next_extent(info,
1223 bkey_start_offset(k.k) << 9,
1224 0, k.k->size << 9,
1225 flags|
1226 FIEMAP_EXTENT_DATA_INLINE);
99aaf570 1227 } else if (k.k->type == KEY_TYPE_reservation) {
1c6fdbd8 1228 return fiemap_fill_next_extent(info,
99aaf570
KO
1229 bkey_start_offset(k.k) << 9,
1230 0, k.k->size << 9,
1c6fdbd8
KO
1231 flags|
1232 FIEMAP_EXTENT_DELALLOC|
1233 FIEMAP_EXTENT_UNWRITTEN);
1234 } else {
1235 BUG();
1236 }
1237}
1238
1239static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
1240 u64 start, u64 len)
1241{
1242 struct bch_fs *c = vinode->i_sb->s_fs_info;
1243 struct bch_inode_info *ei = to_bch_ei(vinode);
6bd68ec2 1244 struct btree_trans *trans;
67e0dd8f 1245 struct btree_iter iter;
1c6fdbd8 1246 struct bkey_s_c k;
07a1006a 1247 struct bkey_buf cur, prev;
76426098 1248 unsigned offset_into_extent, sectors;
1c6fdbd8
KO
1249 bool have_extent = false;
1250 int ret = 0;
1251
1252 ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
1253 if (ret)
1254 return ret;
1255
9a0ec045 1256 struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
1c6fdbd8
KO
1257 if (start + len < start)
1258 return -EINVAL;
1259
6fed42bb
KO
1260 start >>= 9;
1261
07a1006a
KO
1262 bch2_bkey_buf_init(&cur);
1263 bch2_bkey_buf_init(&prev);
6bd68ec2 1264 trans = bch2_trans_get(c);
6fed42bb 1265
6bd68ec2 1266 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
1a3158ec 1267 POS(ei->v.i_ino, start), 0);
6fed42bb 1268
e1c4d2f0 1269 while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
5ff75ccb
KO
1270 enum btree_id data_btree = BTREE_ID_extents;
1271
1a3158ec
KO
1272 bch2_trans_begin(trans);
1273
1274 u32 snapshot;
1275 ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
1276 if (ret)
e1c4d2f0 1277 continue;
1a3158ec
KO
1278
1279 bch2_btree_iter_set_snapshot(&iter, snapshot);
1280
1281 k = bch2_btree_iter_peek_upto(&iter, end);
1282 ret = bkey_err(k);
1283 if (ret)
e1c4d2f0 1284 continue;
1a3158ec
KO
1285
1286 if (!k.k)
1287 break;
1288
7d5224fc
KO
1289 if (!bkey_extent_is_data(k.k) &&
1290 k.k->type != KEY_TYPE_reservation) {
67e0dd8f 1291 bch2_btree_iter_advance(&iter);
7d5224fc
KO
1292 continue;
1293 }
99aaf570 1294
67e0dd8f 1295 offset_into_extent = iter.pos.offset -
76426098
KO
1296 bkey_start_offset(k.k);
1297 sectors = k.k->size - offset_into_extent;
1298
07a1006a 1299 bch2_bkey_buf_reassemble(&cur, c, k);
13dcd4ab 1300
6bd68ec2 1301 ret = bch2_read_indirect_extent(trans, &data_btree,
22d8a33d 1302 &offset_into_extent, &cur);
76426098 1303 if (ret)
e1c4d2f0 1304 continue;
76426098 1305
13dcd4ab 1306 k = bkey_i_to_s_c(cur.k);
07a1006a 1307 bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
13dcd4ab 1308
76426098
KO
1309 sectors = min(sectors, k.k->size - offset_into_extent);
1310
e7b854b1
KO
1311 bch2_cut_front(POS(k.k->p.inode,
1312 bkey_start_offset(k.k) +
1313 offset_into_extent),
1314 cur.k);
35189e09 1315 bch2_key_resize(&cur.k->k, sectors);
67e0dd8f 1316 cur.k->k.p = iter.pos;
35189e09 1317 cur.k->k.p.offset += cur.k->k.size;
76426098 1318
7d5224fc 1319 if (have_extent) {
6bd68ec2 1320 bch2_trans_unlock(trans);
7d5224fc 1321 ret = bch2_fill_extent(c, info,
35189e09 1322 bkey_i_to_s_c(prev.k), 0);
7d5224fc
KO
1323 if (ret)
1324 break;
1c6fdbd8 1325 }
76426098 1326
35189e09 1327 bkey_copy(prev.k, cur.k);
7d5224fc
KO
1328 have_extent = true;
1329
67e0dd8f
KO
1330 bch2_btree_iter_set_pos(&iter,
1331 POS(iter.pos.inode, iter.pos.offset + sectors));
99aaf570 1332 }
6bd68ec2 1333 bch2_trans_iter_exit(trans, &iter);
7d5224fc 1334
a83e108f 1335 if (!ret && have_extent) {
6bd68ec2 1336 bch2_trans_unlock(trans);
35189e09 1337 ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
99aaf570 1338 FIEMAP_EXTENT_LAST);
a83e108f 1339 }
7d5224fc 1340
6bd68ec2 1341 bch2_trans_put(trans);
07a1006a
KO
1342 bch2_bkey_buf_exit(&cur, c);
1343 bch2_bkey_buf_exit(&prev, c);
1c6fdbd8
KO
1344 return ret < 0 ? ret : 0;
1345}
1346
1347static const struct vm_operations_struct bch_vm_ops = {
1348 .fault = bch2_page_fault,
1349 .map_pages = filemap_map_pages,
1350 .page_mkwrite = bch2_page_mkwrite,
1351};
1352
1353static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
1354{
1355 file_accessed(file);
1356
1357 vma->vm_ops = &bch_vm_ops;
1358 return 0;
1359}
1360
1361/* Directories: */
1362
1363static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
1364{
1365 return generic_file_llseek_size(file, offset, whence,
1366 S64_MAX, S64_MAX);
1367}
1368
1369static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
1370{
96385742
KO
1371 struct bch_inode_info *inode = file_bch_inode(file);
1372 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1373
1374 if (!dir_emit_dots(file, ctx))
1375 return 0;
1c6fdbd8 1376
cf904c8d 1377 int ret = bch2_readdir(c, inode_inum(inode), ctx);
e691b391 1378
cf904c8d 1379 bch_err_fn(c, ret);
e691b391 1380 return bch2_err_class(ret);
1c6fdbd8
KO
1381}
1382
0d72ab35
KO
1383static int bch2_open(struct inode *vinode, struct file *file)
1384{
1385 if (file->f_flags & (O_WRONLY|O_RDWR)) {
1386 struct bch_inode_info *inode = to_bch_ei(vinode);
1387 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1388
112d21fd 1389 int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol);
0d72ab35
KO
1390 if (ret)
1391 return ret;
1392 }
1393
54429c90
YT
1394 file->f_mode |= FMODE_CAN_ODIRECT;
1395
0d72ab35
KO
1396 return generic_file_open(vinode, file);
1397}
1398
1c6fdbd8 1399static const struct file_operations bch_file_operations = {
0d72ab35 1400 .open = bch2_open,
1c6fdbd8
KO
1401 .llseek = bch2_llseek,
1402 .read_iter = bch2_read_iter,
1403 .write_iter = bch2_write_iter,
1404 .mmap = bch2_mmap,
c6cab97c 1405 .get_unmapped_area = thp_get_unmapped_area,
1c6fdbd8
KO
1406 .fsync = bch2_fsync,
1407 .splice_read = filemap_splice_read,
1408 .splice_write = iter_file_splice_write,
1409 .fallocate = bch2_fallocate_dispatch,
1410 .unlocked_ioctl = bch2_fs_file_ioctl,
1411#ifdef CONFIG_COMPAT
1412 .compat_ioctl = bch2_compat_fs_ioctl,
1413#endif
76426098 1414 .remap_file_range = bch2_remap_file_range,
1c6fdbd8
KO
1415};
1416
1417static const struct inode_operations bch_file_inode_operations = {
1418 .getattr = bch2_getattr,
1419 .setattr = bch2_setattr,
1420 .fiemap = bch2_fiemap,
1421 .listxattr = bch2_xattr_list,
1422#ifdef CONFIG_BCACHEFS_POSIX_ACL
f39bae2e 1423 .get_inode_acl = bch2_get_acl,
1c6fdbd8
KO
1424 .set_acl = bch2_set_acl,
1425#endif
1426};
1427
1428static const struct inode_operations bch_dir_inode_operations = {
1429 .lookup = bch2_lookup,
1430 .create = bch2_create,
1431 .link = bch2_link,
1432 .unlink = bch2_unlink,
1433 .symlink = bch2_symlink,
1434 .mkdir = bch2_mkdir,
821a99b7 1435 .rmdir = bch2_unlink,
1c6fdbd8
KO
1436 .mknod = bch2_mknod,
1437 .rename = bch2_rename2,
1438 .getattr = bch2_getattr,
1439 .setattr = bch2_setattr,
1440 .tmpfile = bch2_tmpfile,
1441 .listxattr = bch2_xattr_list,
1442#ifdef CONFIG_BCACHEFS_POSIX_ACL
f39bae2e 1443 .get_inode_acl = bch2_get_acl,
1c6fdbd8
KO
1444 .set_acl = bch2_set_acl,
1445#endif
1446};
1447
1448static const struct file_operations bch_dir_file_operations = {
1449 .llseek = bch2_dir_llseek,
1450 .read = generic_read_dir,
1451 .iterate_shared = bch2_vfs_readdir,
1452 .fsync = bch2_fsync,
1453 .unlocked_ioctl = bch2_fs_file_ioctl,
1454#ifdef CONFIG_COMPAT
1455 .compat_ioctl = bch2_compat_fs_ioctl,
1456#endif
1457};
1458
1459static const struct inode_operations bch_symlink_inode_operations = {
1460 .get_link = page_get_link,
1461 .getattr = bch2_getattr,
1462 .setattr = bch2_setattr,
1463 .listxattr = bch2_xattr_list,
1464#ifdef CONFIG_BCACHEFS_POSIX_ACL
f39bae2e 1465 .get_inode_acl = bch2_get_acl,
1c6fdbd8
KO
1466 .set_acl = bch2_set_acl,
1467#endif
1468};
1469
1470static const struct inode_operations bch_special_inode_operations = {
1471 .getattr = bch2_getattr,
1472 .setattr = bch2_setattr,
1473 .listxattr = bch2_xattr_list,
1474#ifdef CONFIG_BCACHEFS_POSIX_ACL
f39bae2e 1475 .get_inode_acl = bch2_get_acl,
1c6fdbd8
KO
1476 .set_acl = bch2_set_acl,
1477#endif
1478};
1479
1480static const struct address_space_operations bch_address_space_operations = {
1c6fdbd8
KO
1481 .read_folio = bch2_read_folio,
1482 .writepages = bch2_writepages,
1483 .readahead = bch2_readahead,
e1036a2a 1484 .dirty_folio = filemap_dirty_folio,
1c6fdbd8
KO
1485 .write_begin = bch2_write_begin,
1486 .write_end = bch2_write_end,
1487 .invalidate_folio = bch2_invalidate_folio,
1488 .release_folio = bch2_release_folio,
1c6fdbd8
KO
1489#ifdef CONFIG_MIGRATION
1490 .migrate_folio = filemap_migrate_folio,
1491#endif
af7628d6 1492 .error_remove_folio = generic_error_remove_folio,
1c6fdbd8
KO
1493};
1494
85e95ca7
KO
1495struct bcachefs_fid {
1496 u64 inum;
1497 u32 subvol;
1498 u32 gen;
1499} __packed;
1500
1501struct bcachefs_fid_with_parent {
1502 struct bcachefs_fid fid;
1503 struct bcachefs_fid dir;
1504} __packed;
1505
1506static int bcachefs_fid_valid(int fh_len, int fh_type)
1c6fdbd8 1507{
85e95ca7
KO
1508 switch (fh_type) {
1509 case FILEID_BCACHEFS_WITHOUT_PARENT:
1510 return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
1511 case FILEID_BCACHEFS_WITH_PARENT:
1512 return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
1513 default:
1514 return false;
1515 }
1516}
1517
1518static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
1519{
1520 return (struct bcachefs_fid) {
112d21fd
KO
1521 .inum = inode->ei_inum.inum,
1522 .subvol = inode->ei_inum.subvol,
85e95ca7
KO
1523 .gen = inode->ei_inode.bi_generation,
1524 };
1525}
1526
1527static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
1528 struct inode *vdir)
1529{
1530 struct bch_inode_info *inode = to_bch_ei(vinode);
1531 struct bch_inode_info *dir = to_bch_ei(vdir);
8bf77197 1532 int min_len;
85e95ca7
KO
1533
1534 if (!S_ISDIR(inode->v.i_mode) && dir) {
1535 struct bcachefs_fid_with_parent *fid = (void *) fh;
1c6fdbd8 1536
8bf77197
JK
1537 min_len = sizeof(*fid) / sizeof(u32);
1538 if (*len < min_len) {
1539 *len = min_len;
1540 return FILEID_INVALID;
1541 }
1542
85e95ca7
KO
1543 fid->fid = bch2_inode_to_fid(inode);
1544 fid->dir = bch2_inode_to_fid(dir);
1c6fdbd8 1545
8bf77197 1546 *len = min_len;
85e95ca7
KO
1547 return FILEID_BCACHEFS_WITH_PARENT;
1548 } else {
1549 struct bcachefs_fid *fid = (void *) fh;
1550
8bf77197
JK
1551 min_len = sizeof(*fid) / sizeof(u32);
1552 if (*len < min_len) {
1553 *len = min_len;
1554 return FILEID_INVALID;
1555 }
85e95ca7
KO
1556 *fid = bch2_inode_to_fid(inode);
1557
8bf77197 1558 *len = min_len;
85e95ca7
KO
1559 return FILEID_BCACHEFS_WITHOUT_PARENT;
1560 }
1561}
1562
1563static struct inode *bch2_nfs_get_inode(struct super_block *sb,
1564 struct bcachefs_fid fid)
1565{
1566 struct bch_fs *c = sb->s_fs_info;
1567 struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
1568 .subvol = fid.subvol,
1569 .inum = fid.inum,
1570 });
1571 if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
1c6fdbd8 1572 iput(vinode);
85e95ca7 1573 vinode = ERR_PTR(-ESTALE);
1c6fdbd8
KO
1574 }
1575 return vinode;
1576}
1577
85e95ca7 1578static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
1c6fdbd8
KO
1579 int fh_len, int fh_type)
1580{
85e95ca7
KO
1581 struct bcachefs_fid *fid = (void *) _fid;
1582
1583 if (!bcachefs_fid_valid(fh_len, fh_type))
1584 return NULL;
1585
1586 return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
1c6fdbd8
KO
1587}
1588
85e95ca7 1589static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
1c6fdbd8
KO
1590 int fh_len, int fh_type)
1591{
85e95ca7
KO
1592 struct bcachefs_fid_with_parent *fid = (void *) _fid;
1593
1594 if (!bcachefs_fid_valid(fh_len, fh_type) ||
1595 fh_type != FILEID_BCACHEFS_WITH_PARENT)
1596 return NULL;
1597
1598 return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
1599}
1600
1601static struct dentry *bch2_get_parent(struct dentry *child)
1602{
1603 struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1604 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1605 subvol_inum parent_inum = {
1606 .subvol = inode->ei_inode.bi_parent_subvol ?:
112d21fd 1607 inode->ei_inum.subvol,
85e95ca7
KO
1608 .inum = inode->ei_inode.bi_dir,
1609 };
1610
85e95ca7
KO
1611 return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
1612}
1613
1614static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
1615{
1616 struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1617 struct bch_inode_info *dir = to_bch_ei(parent->d_inode);
1618 struct bch_fs *c = inode->v.i_sb->s_fs_info;
6bd68ec2 1619 struct btree_trans *trans;
85e95ca7
KO
1620 struct btree_iter iter1;
1621 struct btree_iter iter2;
1622 struct bkey_s_c k;
1623 struct bkey_s_c_dirent d;
1624 struct bch_inode_unpacked inode_u;
1625 subvol_inum target;
1626 u32 snapshot;
01a7e74f
JA
1627 struct qstr dirent_name;
1628 unsigned name_len = 0;
85e95ca7
KO
1629 int ret;
1630
1631 if (!S_ISDIR(dir->v.i_mode))
1632 return -EINVAL;
1633
6bd68ec2 1634 trans = bch2_trans_get(c);
85e95ca7 1635
6bd68ec2 1636 bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
85e95ca7 1637 POS(dir->ei_inode.bi_inum, 0), 0);
6bd68ec2 1638 bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
85e95ca7
KO
1639 POS(dir->ei_inode.bi_inum, 0), 0);
1640retry:
6bd68ec2 1641 bch2_trans_begin(trans);
85e95ca7 1642
112d21fd 1643 ret = bch2_subvolume_get_snapshot(trans, dir->ei_inum.subvol, &snapshot);
85e95ca7
KO
1644 if (ret)
1645 goto err;
1646
1647 bch2_btree_iter_set_snapshot(&iter1, snapshot);
1648 bch2_btree_iter_set_snapshot(&iter2, snapshot);
1649
6bd68ec2 1650 ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
85e95ca7
KO
1651 if (ret)
1652 goto err;
1653
1654 if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
1655 bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
1656
1657 k = bch2_btree_iter_peek_slot(&iter1);
1658 ret = bkey_err(k);
1659 if (ret)
1660 goto err;
1661
1662 if (k.k->type != KEY_TYPE_dirent) {
e47a390a 1663 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
85e95ca7
KO
1664 goto err;
1665 }
1666
1667 d = bkey_s_c_to_dirent(k);
6bd68ec2 1668 ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
85e95ca7 1669 if (ret > 0)
e47a390a 1670 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
85e95ca7
KO
1671 if (ret)
1672 goto err;
1673
112d21fd 1674 if (subvol_inum_eq(target, inode->ei_inum))
85e95ca7
KO
1675 goto found;
1676 } else {
1677 /*
1678 * File with multiple hardlinks and our backref is to the wrong
1679 * directory - linear search:
1680 */
1681 for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
1682 if (k.k->p.inode > dir->ei_inode.bi_inum)
1683 break;
1684
1685 if (k.k->type != KEY_TYPE_dirent)
1686 continue;
1687
1688 d = bkey_s_c_to_dirent(k);
6bd68ec2 1689 ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
85e95ca7
KO
1690 if (ret < 0)
1691 break;
1692 if (ret)
1693 continue;
1694
112d21fd 1695 if (subvol_inum_eq(target, inode->ei_inum))
85e95ca7
KO
1696 goto found;
1697 }
1698 }
1699
1700 ret = -ENOENT;
1701 goto err;
1702found:
01a7e74f 1703 dirent_name = bch2_dirent_get_name(d);
85e95ca7 1704
01a7e74f
JA
1705 name_len = min_t(unsigned, dirent_name.len, NAME_MAX);
1706 memcpy(name, dirent_name.name, name_len);
85e95ca7
KO
1707 name[name_len] = '\0';
1708err:
549d173c 1709 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
85e95ca7
KO
1710 goto retry;
1711
6bd68ec2
KO
1712 bch2_trans_iter_exit(trans, &iter1);
1713 bch2_trans_iter_exit(trans, &iter2);
1714 bch2_trans_put(trans);
85e95ca7
KO
1715
1716 return ret;
1c6fdbd8
KO
1717}
1718
1719static const struct export_operations bch_export_ops = {
85e95ca7
KO
1720 .encode_fh = bch2_encode_fh,
1721 .fh_to_dentry = bch2_fh_to_dentry,
1722 .fh_to_parent = bch2_fh_to_parent,
1723 .get_parent = bch2_get_parent,
1724 .get_name = bch2_get_name,
1c6fdbd8
KO
1725};
1726
54f77024
KO
1727static void bch2_vfs_inode_init(struct btree_trans *trans,
1728 subvol_inum inum,
1c6fdbd8 1729 struct bch_inode_info *inode,
9ca4853b
KO
1730 struct bch_inode_unpacked *bi,
1731 struct bch_subvolume *subvol)
1c6fdbd8 1732{
112d21fd
KO
1733 inode->v.i_ino = inum.inum;
1734 inode->ei_inum = inum;
1735 inode->ei_inode.bi_inum = inum.inum;
32b26e8c 1736 bch2_inode_update_after_write(trans, inode, bi, ~0);
1c6fdbd8
KO
1737
1738 inode->v.i_blocks = bi->bi_sectors;
1739 inode->v.i_ino = bi->bi_inum;
1740 inode->v.i_rdev = bi->bi_dev;
1741 inode->v.i_generation = bi->bi_generation;
1742 inode->v.i_size = bi->bi_size;
1743
33c74e41 1744 inode->ei_flags = 0;
1c6fdbd8 1745 inode->ei_quota_reserved = 0;
0f5254aa 1746 inode->ei_qid = bch_qid(bi);
1c6fdbd8 1747
bd4da046
YT
1748 if (BCH_SUBVOLUME_SNAP(subvol))
1749 set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1750
1c6fdbd8
KO
1751 inode->v.i_mapping->a_ops = &bch_address_space_operations;
1752
1753 switch (inode->v.i_mode & S_IFMT) {
1754 case S_IFREG:
1755 inode->v.i_op = &bch_file_inode_operations;
1756 inode->v.i_fop = &bch_file_operations;
1757 break;
1758 case S_IFDIR:
1759 inode->v.i_op = &bch_dir_inode_operations;
1760 inode->v.i_fop = &bch_dir_file_operations;
1761 break;
1762 case S_IFLNK:
1763 inode_nohighmem(&inode->v);
1764 inode->v.i_op = &bch_symlink_inode_operations;
1765 break;
1766 default:
1767 init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
1768 inode->v.i_op = &bch_special_inode_operations;
1769 break;
1770 }
550a6a49
KO
1771
1772 mapping_set_large_folios(inode->v.i_mapping);
1c6fdbd8
KO
1773}
1774
36aa49d3 1775static void bch2_free_inode(struct inode *vinode)
1c6fdbd8 1776{
36aa49d3 1777 kmem_cache_free(bch2_inode_cache, to_bch_ei(vinode));
1c6fdbd8
KO
1778}
1779
791236b8
JA
1780static int inode_update_times_fn(struct btree_trans *trans,
1781 struct bch_inode_info *inode,
1c6fdbd8
KO
1782 struct bch_inode_unpacked *bi,
1783 void *p)
1784{
1785 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1786
9e877052
LT
1787 bi->bi_atime = timespec_to_bch2_time(c, inode_get_atime(&inode->v));
1788 bi->bi_mtime = timespec_to_bch2_time(c, inode_get_mtime(&inode->v));
1c6fdbd8
KO
1789 bi->bi_ctime = timespec_to_bch2_time(c, inode_get_ctime(&inode->v));
1790
1791 return 0;
1792}
1793
1794static int bch2_vfs_write_inode(struct inode *vinode,
1795 struct writeback_control *wbc)
1796{
1797 struct bch_fs *c = vinode->i_sb->s_fs_info;
1798 struct bch_inode_info *inode = to_bch_ei(vinode);
1799 int ret;
1800
1801 mutex_lock(&inode->ei_update_lock);
2ea90048
KO
1802 ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
1803 ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
1c6fdbd8
KO
1804 mutex_unlock(&inode->ei_update_lock);
1805
5c1ef830 1806 return bch2_err_class(ret);
1c6fdbd8
KO
1807}
1808
1809static void bch2_evict_inode(struct inode *vinode)
1810{
1811 struct bch_fs *c = vinode->i_sb->s_fs_info;
1812 struct bch_inode_info *inode = to_bch_ei(vinode);
112d21fd
KO
1813 bool delete = !inode->v.i_nlink && !is_bad_inode(&inode->v);
1814
1815 /*
1816 * evict() has waited for outstanding writeback, we'll do no more IO
1817 * through this inode: it's safe to remove from VFS inode hashtable here
1818 *
1819 * Do that now so that other threads aren't blocked from pulling it back
1820 * in, there's no reason for them to be:
1821 */
1822 if (!delete)
1823 bch2_inode_hash_remove(c, inode);
1c6fdbd8
KO
1824
1825 truncate_inode_pages_final(&inode->v.i_data);
1826
1827 clear_inode(&inode->v);
1828
1829 BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
1830
112d21fd 1831 if (delete) {
1c6fdbd8 1832 bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
26609b61 1833 KEY_TYPE_QUOTA_WARN);
1c6fdbd8 1834 bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
26609b61 1835 KEY_TYPE_QUOTA_WARN);
7c8f6f98 1836 bch2_inode_rm(c, inode_inum(inode));
112d21fd
KO
1837
1838 /*
1839 * If we are deleting, we need it present in the vfs hash table
1840 * so that fsck can check if unlinked inodes are still open:
1841 */
1842 bch2_inode_hash_remove(c, inode);
1c6fdbd8 1843 }
9edbcc72
KO
1844
1845 mutex_lock(&c->vfs_inodes_lock);
1846 list_del_init(&inode->ei_vfs_inode_list);
1847 mutex_unlock(&c->vfs_inodes_lock);
1c6fdbd8
KO
1848}
1849
9edbcc72 1850void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
41f9b7d3 1851{
defd9e39 1852 struct bch_inode_info *inode;
9edbcc72
KO
1853 DARRAY(struct bch_inode_info *) grabbed;
1854 bool clean_pass = false, this_pass_clean;
41f9b7d3 1855
9edbcc72
KO
1856 /*
1857 * Initially, we scan for inodes without I_DONTCACHE, then mark them to
1858 * be pruned with d_mark_dontcache().
1859 *
1860 * Once we've had a clean pass where we didn't find any inodes without
1861 * I_DONTCACHE, we wait for them to be freed:
1862 */
41f9b7d3 1863
9edbcc72
KO
1864 darray_init(&grabbed);
1865 darray_make_room(&grabbed, 1024);
41f9b7d3
KO
1866again:
1867 cond_resched();
9edbcc72
KO
1868 this_pass_clean = true;
1869
1870 mutex_lock(&c->vfs_inodes_lock);
1871 list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
112d21fd 1872 if (!snapshot_list_has_id(s, inode->ei_inum.subvol))
41f9b7d3
KO
1873 continue;
1874
9edbcc72 1875 if (!(inode->v.i_state & I_DONTCACHE) &&
b0e8c75e
KO
1876 !(inode->v.i_state & I_FREEING) &&
1877 igrab(&inode->v)) {
9edbcc72
KO
1878 this_pass_clean = false;
1879
b0e8c75e
KO
1880 if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
1881 iput(&inode->v);
9edbcc72 1882 break;
b0e8c75e 1883 }
9edbcc72 1884 } else if (clean_pass && this_pass_clean) {
0fe340a9
CB
1885 struct wait_bit_queue_entry wqe;
1886 struct wait_queue_head *wq_head;
41f9b7d3 1887
0fe340a9
CB
1888 wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
1889 prepare_to_wait_event(wq_head, &wqe.wq_entry,
1890 TASK_UNINTERRUPTIBLE);
9edbcc72
KO
1891 mutex_unlock(&c->vfs_inodes_lock);
1892
41f9b7d3 1893 schedule();
0fe340a9 1894 finish_wait(wq_head, &wqe.wq_entry);
41f9b7d3
KO
1895 goto again;
1896 }
9edbcc72
KO
1897 }
1898 mutex_unlock(&c->vfs_inodes_lock);
41f9b7d3 1899
b0e8c75e
KO
1900 darray_for_each(grabbed, i) {
1901 inode = *i;
1902 d_mark_dontcache(&inode->v);
1903 d_prune_aliases(&inode->v);
1904 iput(&inode->v);
1905 }
9edbcc72
KO
1906 grabbed.nr = 0;
1907
1908 if (!clean_pass || !this_pass_clean) {
1909 clean_pass = this_pass_clean;
1910 goto again;
41f9b7d3 1911 }
9edbcc72
KO
1912
1913 darray_exit(&grabbed);
41f9b7d3
KO
1914}
1915
1c6fdbd8
KO
1916static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1917{
1918 struct super_block *sb = dentry->d_sb;
1919 struct bch_fs *c = sb->s_fs_info;
5663a415 1920 struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
5b650fd1 1921 unsigned shift = sb->s_blocksize_bits - 9;
df082b3a
KO
1922 /*
1923 * this assumes inodes take up 64 bytes, which is a decent average
1924 * number:
1925 */
1926 u64 avail_inodes = ((usage.capacity - usage.used) << 3);
1c6fdbd8
KO
1927
1928 buf->f_type = BCACHEFS_STATFS_MAGIC;
1929 buf->f_bsize = sb->s_blocksize;
5663a415 1930 buf->f_blocks = usage.capacity >> shift;
47924527
KO
1931 buf->f_bfree = usage.free >> shift;
1932 buf->f_bavail = avail_factor(usage.free) >> shift;
df082b3a
KO
1933
1934 buf->f_files = usage.nr_inodes + avail_inodes;
1935 buf->f_ffree = avail_inodes;
1c6fdbd8 1936
f8f8fb44 1937 buf->f_fsid = uuid_to_fsid(c->sb.user_uuid.b);
1c6fdbd8
KO
1938 buf->f_namelen = BCH_NAME_MAX;
1939
1940 return 0;
1941}
1942
1943static int bch2_sync_fs(struct super_block *sb, int wait)
1944{
1945 struct bch_fs *c = sb->s_fs_info;
5c1ef830 1946 int ret;
1c6fdbd8 1947
747d1d6c
YT
1948 trace_bch2_sync_fs(sb, wait);
1949
416f6852
KO
1950 if (c->opts.journal_flush_disabled)
1951 return 0;
1952
1c6fdbd8
KO
1953 if (!wait) {
1954 bch2_journal_flush_async(&c->journal, NULL);
1955 return 0;
1956 }
1957
5c1ef830
KO
1958 ret = bch2_journal_flush(&c->journal);
1959 return bch2_err_class(ret);
1c6fdbd8
KO
1960}
1961
1962static struct bch_fs *bch2_path_to_fs(const char *path)
1963{
1964 struct bch_fs *c;
1965 dev_t dev;
1966 int ret;
1967
1968 ret = lookup_bdev(path, &dev);
1969 if (ret)
1970 return ERR_PTR(ret);
1971
1972 c = bch2_dev_to_fs(dev);
d5e4dcc2 1973 if (c)
1c6fdbd8 1974 closure_put(&c->cl);
d5e4dcc2 1975 return c ?: ERR_PTR(-ENOENT);
1c6fdbd8
KO
1976}
1977
929d9543
TB
1978static int bch2_remount(struct super_block *sb, int *flags,
1979 struct bch_opts opts)
1c6fdbd8
KO
1980{
1981 struct bch_fs *c = sb->s_fs_info;
929d9543 1982 int ret = 0;
1c6fdbd8 1983
62719cf3
KO
1984 opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
1985
1c6fdbd8 1986 if (opts.read_only != c->opts.read_only) {
1ada1606 1987 down_write(&c->state_lock);
1c6fdbd8
KO
1988
1989 if (opts.read_only) {
1990 bch2_fs_read_only(c);
1991
1992 sb->s_flags |= SB_RDONLY;
1993 } else {
134915f3
KO
1994 ret = bch2_fs_read_write(c);
1995 if (ret) {
1996 bch_err(c, "error going rw: %i", ret);
1ada1606 1997 up_write(&c->state_lock);
5c1ef830
KO
1998 ret = -EINVAL;
1999 goto err;
1c6fdbd8
KO
2000 }
2001
2002 sb->s_flags &= ~SB_RDONLY;
2003 }
2004
2005 c->opts.read_only = opts.read_only;
2006
1ada1606 2007 up_write(&c->state_lock);
1c6fdbd8
KO
2008 }
2009
96dea3d5 2010 if (opt_defined(opts, errors))
1c6fdbd8 2011 c->opts.errors = opts.errors;
5c1ef830
KO
2012err:
2013 return bch2_err_class(ret);
1c6fdbd8
KO
2014}
2015
625104ea
KO
2016static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
2017{
2018 struct bch_fs *c = root->d_sb->s_fs_info;
625104ea
KO
2019 bool first = true;
2020
9fea2274 2021 for_each_online_member(c, ca) {
625104ea
KO
2022 if (!first)
2023 seq_putc(seq, ':');
2024 first = false;
63807d95 2025 seq_puts(seq, ca->disk_sb.sb_name);
625104ea
KO
2026 }
2027
2028 return 0;
2029}
2030
1c6fdbd8
KO
2031static int bch2_show_options(struct seq_file *seq, struct dentry *root)
2032{
2033 struct bch_fs *c = root->d_sb->s_fs_info;
fa8e94fa 2034 struct printbuf buf = PRINTBUF;
1c6fdbd8 2035
3621ecc1
KO
2036 bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb,
2037 OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE);
2038 printbuf_nul_terminate(&buf);
07cf8bac 2039 seq_printf(seq, ",%s", buf.buf);
1c6fdbd8 2040
3621ecc1 2041 int ret = buf.allocation_failure ? -ENOMEM : 0;
fa8e94fa
KO
2042 printbuf_exit(&buf);
2043 return ret;
1c6fdbd8
KO
2044}
2045
d5e4dcc2
KO
2046static void bch2_put_super(struct super_block *sb)
2047{
2048 struct bch_fs *c = sb->s_fs_info;
2049
2050 __bch2_fs_stop(c);
2051}
2052
7239f8e0
BF
2053/*
2054 * bcachefs doesn't currently integrate intwrite freeze protection but the
2055 * internal write references serve the same purpose. Therefore reuse the
2056 * read-only transition code to perform the quiesce. The caveat is that we don't
2057 * currently have the ability to block tasks that want a write reference while
2058 * the superblock is frozen. This is fine for now, but we should either add
2059 * blocking support or find a way to integrate sb_start_intwrite() and friends.
2060 */
2061static int bch2_freeze(struct super_block *sb)
2062{
2063 struct bch_fs *c = sb->s_fs_info;
2064
2065 down_write(&c->state_lock);
2066 bch2_fs_read_only(c);
2067 up_write(&c->state_lock);
2068 return 0;
2069}
2070
2071static int bch2_unfreeze(struct super_block *sb)
2072{
2073 struct bch_fs *c = sb->s_fs_info;
2074 int ret;
2075
3c471b65 2076 if (test_bit(BCH_FS_emergency_ro, &c->flags))
57962305
BF
2077 return 0;
2078
7239f8e0
BF
2079 down_write(&c->state_lock);
2080 ret = bch2_fs_read_write(c);
2081 up_write(&c->state_lock);
2082 return ret;
2083}
2084
1c6fdbd8
KO
2085static const struct super_operations bch_super_operations = {
2086 .alloc_inode = bch2_alloc_inode,
36aa49d3 2087 .free_inode = bch2_free_inode,
1c6fdbd8
KO
2088 .write_inode = bch2_vfs_write_inode,
2089 .evict_inode = bch2_evict_inode,
2090 .sync_fs = bch2_sync_fs,
2091 .statfs = bch2_statfs,
625104ea 2092 .show_devname = bch2_show_devname,
1c6fdbd8 2093 .show_options = bch2_show_options,
1c6fdbd8
KO
2094 .put_super = bch2_put_super,
2095 .freeze_fs = bch2_freeze,
2096 .unfreeze_fs = bch2_unfreeze,
1c6fdbd8
KO
2097};
2098
1c6fdbd8
KO
2099static int bch2_set_super(struct super_block *s, void *data)
2100{
2101 s->s_fs_info = data;
2102 return 0;
2103}
2104
d5e4dcc2
KO
2105static int bch2_noset_super(struct super_block *s, void *data)
2106{
2107 return -EBUSY;
2108}
2109
806ebf2a
KO
2110typedef DARRAY(struct bch_fs *) darray_fs;
2111
d5e4dcc2
KO
2112static int bch2_test_super(struct super_block *s, void *data)
2113{
2114 struct bch_fs *c = s->s_fs_info;
806ebf2a 2115 darray_fs *d = data;
d5e4dcc2
KO
2116
2117 if (!c)
2118 return false;
2119
806ebf2a
KO
2120 darray_for_each(*d, i)
2121 if (c != *i)
d5e4dcc2
KO
2122 return false;
2123 return true;
2124}
2125
25ee25e6 2126static int bch2_fs_get_tree(struct fs_context *fc)
1c6fdbd8
KO
2127{
2128 struct bch_fs *c;
1c6fdbd8
KO
2129 struct super_block *sb;
2130 struct inode *vinode;
25ee25e6
KO
2131 struct bch2_opts_parse *opts_parse = fc->fs_private;
2132 struct bch_opts opts = opts_parse->opts;
5645c32c
KO
2133 darray_str devs;
2134 darray_fs devs_to_fs = {};
1c6fdbd8
KO
2135 int ret;
2136
25ee25e6 2137 opt_set(opts, read_only, (fc->sb_flags & SB_RDONLY) != 0);
5645c32c 2138 opt_set(opts, nostart, true);
1c6fdbd8 2139
25ee25e6
KO
2140 if (!fc->source || strlen(fc->source) == 0)
2141 return -EINVAL;
044c8c9e 2142
25ee25e6 2143 ret = bch2_split_devs(fc->source, &devs);
806ebf2a 2144 if (ret)
25ee25e6 2145 return ret;
1c6fdbd8 2146
806ebf2a
KO
2147 darray_for_each(devs, i) {
2148 ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i));
5645c32c
KO
2149 if (ret)
2150 goto err;
1c6fdbd8
KO
2151 }
2152
25ee25e6 2153 sb = sget(fc->fs_type, bch2_test_super, bch2_noset_super, fc->sb_flags|SB_NOSEC, &devs_to_fs);
d5e4dcc2
KO
2154 if (!IS_ERR(sb))
2155 goto got_sb;
2156
806ebf2a 2157 c = bch2_fs_open(devs.data, devs.nr, opts);
5645c32c
KO
2158 ret = PTR_ERR_OR_ZERO(c);
2159 if (ret)
2160 goto err;
a10e677a
KO
2161
2162 /* Some options can't be parsed until after the fs is started: */
5645c32c 2163 opts = bch2_opts_empty();
25ee25e6 2164 ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf);
5645c32c
KO
2165 if (ret)
2166 goto err_stop_fs;
a10e677a
KO
2167
2168 bch2_opts_apply(&c->opts, opts);
2169
5645c32c
KO
2170 ret = bch2_fs_start(c);
2171 if (ret)
2172 goto err_stop_fs;
d5e4dcc2 2173
25ee25e6 2174 sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c);
5645c32c
KO
2175 ret = PTR_ERR_OR_ZERO(sb);
2176 if (ret)
2177 goto err_stop_fs;
d5e4dcc2 2178got_sb:
d5e4dcc2 2179 c = sb->s_fs_info;
1c6fdbd8 2180
d5e4dcc2 2181 if (sb->s_root) {
25ee25e6 2182 if ((fc->sb_flags ^ sb->s_flags) & SB_RDONLY) {
1c6fdbd8
KO
2183 ret = -EBUSY;
2184 goto err_put_super;
2185 }
2186 goto out;
2187 }
2188
5b6d40e2
KO
2189 sb->s_blocksize = block_bytes(c);
2190 sb->s_blocksize_bits = ilog2(block_bytes(c));
1c6fdbd8
KO
2191 sb->s_maxbytes = MAX_LFS_FILESIZE;
2192 sb->s_op = &bch_super_operations;
2193 sb->s_export_op = &bch_export_ops;
2194#ifdef CONFIG_BCACHEFS_QUOTA
2195 sb->s_qcop = &bch2_quotactl_operations;
2196 sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
2197#endif
2198 sb->s_xattr = bch2_xattr_handlers;
2199 sb->s_magic = BCACHEFS_STATFS_MAGIC;
595c1e9b
KO
2200 sb->s_time_gran = c->sb.nsec_per_time_unit;
2201 sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
2202 sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
29223b5a 2203 sb->s_uuid = c->sb.user_uuid;
9ac3e660 2204 sb->s_shrink->seeks = 0;
1c6fdbd8 2205 c->vfs_sb = sb;
3e3e02e6 2206 strscpy(sb->s_id, c->name, sizeof(sb->s_id));
1c6fdbd8
KO
2207
2208 ret = super_setup_bdi(sb);
2209 if (ret)
2210 goto err_put_super;
2211
2212 sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
2213
9fea2274 2214 for_each_online_member(c, ca) {
1c6fdbd8
KO
2215 struct block_device *bdev = ca->disk_sb.bdev;
2216
2217 /* XXX: create an anonymous device for multi device filesystems */
2218 sb->s_bdev = bdev;
2219 sb->s_dev = bdev->bd_dev;
2220 percpu_ref_put(&ca->io_ref);
2221 break;
2222 }
2223
ddc7dd62
KO
2224 c->dev = sb->s_dev;
2225
1c6fdbd8
KO
2226#ifdef CONFIG_BCACHEFS_POSIX_ACL
2227 if (c->opts.acl)
2228 sb->s_flags |= SB_POSIXACL;
2229#endif
2230
ecae0bd5 2231 sb->s_shrink->seeks = 0;
e3f2db39 2232
284ae18c 2233 vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
d4bf5eec 2234 ret = PTR_ERR_OR_ZERO(vinode);
cf904c8d
KO
2235 bch_err_msg(c, ret, "mounting: error getting root inode");
2236 if (ret)
1c6fdbd8 2237 goto err_put_super;
1c6fdbd8
KO
2238
2239 sb->s_root = d_make_root(vinode);
2240 if (!sb->s_root) {
619f5bee 2241 bch_err(c, "error mounting: error allocating root dentry");
1c6fdbd8
KO
2242 ret = -ENOMEM;
2243 goto err_put_super;
2244 }
2245
2246 sb->s_flags |= SB_ACTIVE;
2247out:
25ee25e6 2248 fc->root = dget(sb->s_root);
83208cbf 2249err:
5645c32c
KO
2250 darray_exit(&devs_to_fs);
2251 bch2_darray_str_exit(&devs);
0f1f7324
KO
2252 if (ret)
2253 pr_err("error: %s", bch2_err_str(ret));
83208cbf
KO
2254 /*
2255 * On an inconsistency error in recovery we might see an -EROFS derived
2256 * errorcode (from the journal), but we don't want to return that to
2257 * userspace as that causes util-linux to retry the mount RO - which is
2258 * confusing:
2259 */
2260 if (bch2_err_matches(ret, EROFS) && ret != -EROFS)
2261 ret = -EIO;
25ee25e6 2262 return bch2_err_class(ret);
5645c32c
KO
2263
2264err_stop_fs:
2265 bch2_fs_stop(c);
2266 goto err;
2267
2268err_put_super:
2269 __bch2_fs_stop(c);
2270 deactivate_locked_super(sb);
2271 goto err;
1c6fdbd8
KO
2272}
2273
2274static void bch2_kill_sb(struct super_block *sb)
2275{
2276 struct bch_fs *c = sb->s_fs_info;
2277
2278 generic_shutdown_super(sb);
178c4873 2279 bch2_fs_free(c);
1c6fdbd8
KO
2280}
2281
929d9543
TB
2282static void bch2_fs_context_free(struct fs_context *fc)
2283{
2284 struct bch2_opts_parse *opts = fc->fs_private;
2285
2286 if (opts) {
2287 printbuf_exit(&opts->parse_later);
2288 kfree(opts);
2289 }
2290}
2291
2292static int bch2_fs_parse_param(struct fs_context *fc,
2293 struct fs_parameter *param)
2294{
2295 /*
2296 * the "source" param, i.e., the name of the device(s) to mount,
2297 * is handled by the VFS layer.
2298 */
2299 if (!strcmp(param->key, "source"))
2300 return -ENOPARAM;
2301
2302 struct bch2_opts_parse *opts = fc->fs_private;
2303 struct bch_fs *c = NULL;
2304
2305 /* for reconfigure, we already have a struct bch_fs */
2306 if (fc->root)
2307 c = fc->root->d_sb->s_fs_info;
2308
2309 int ret = bch2_parse_one_mount_opt(c, &opts->opts,
2310 &opts->parse_later, param->key,
2311 param->string);
2312
2313 return bch2_err_class(ret);
2314}
2315
929d9543
TB
2316static int bch2_fs_reconfigure(struct fs_context *fc)
2317{
2318 struct super_block *sb = fc->root->d_sb;
2319 struct bch2_opts_parse *opts = fc->fs_private;
2320
2321 return bch2_remount(sb, &fc->sb_flags, opts->opts);
2322}
2323
2324static const struct fs_context_operations bch2_context_ops = {
2325 .free = bch2_fs_context_free,
2326 .parse_param = bch2_fs_parse_param,
2327 .get_tree = bch2_fs_get_tree,
2328 .reconfigure = bch2_fs_reconfigure,
2329};
2330
2331static int bch2_init_fs_context(struct fs_context *fc)
2332{
2333 struct bch2_opts_parse *opts = kzalloc(sizeof(*opts), GFP_KERNEL);
2334
2335 if (!opts)
2336 return -ENOMEM;
2337
2338 opts->parse_later = PRINTBUF;
2339
2340 fc->ops = &bch2_context_ops;
2341 fc->fs_private = opts;
2342
2343 return 0;
2344}
2345
112d21fd
KO
2346void bch2_fs_vfs_exit(struct bch_fs *c)
2347{
2348 if (c->vfs_inodes_table.tbl)
2349 rhashtable_destroy(&c->vfs_inodes_table);
2350}
2351
2352int bch2_fs_vfs_init(struct bch_fs *c)
2353{
2354 return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params);
2355}
2356
1c6fdbd8 2357static struct file_system_type bcache_fs_type = {
929d9543
TB
2358 .owner = THIS_MODULE,
2359 .name = "bcachefs",
2360 .init_fs_context = bch2_init_fs_context,
2361 .kill_sb = bch2_kill_sb,
c24adfa0 2362 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
1c6fdbd8
KO
2363};
2364
2365MODULE_ALIAS_FS("bcachefs");
2366
2367void bch2_vfs_exit(void)
2368{
2369 unregister_filesystem(&bcache_fs_type);
3e3e02e6 2370 kmem_cache_destroy(bch2_inode_cache);
1c6fdbd8
KO
2371}
2372
2373int __init bch2_vfs_init(void)
2374{
2375 int ret = -ENOMEM;
2376
094c6a9f
YT
2377 bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT |
2378 SLAB_ACCOUNT);
1c6fdbd8
KO
2379 if (!bch2_inode_cache)
2380 goto err;
2381
2382 ret = register_filesystem(&bcache_fs_type);
2383 if (ret)
2384 goto err;
2385
2386 return 0;
2387err:
2388 bch2_vfs_exit();
2389 return ret;
2390}
2391
2392#endif /* NO_BCACHEFS_FS */
This page took 1.403538 seconds and 4 git commands to generate.