]> Git Repo - J-linux.git/blob - fs/ceph/super.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / fs / ceph / super.c
1 // SPDX-License-Identifier: GPL-2.0-only
2
3 #include <linux/ceph/ceph_debug.h>
4
5 #include <linux/backing-dev.h>
6 #include <linux/ctype.h>
7 #include <linux/fs.h>
8 #include <linux/inet.h>
9 #include <linux/in6.h>
10 #include <linux/module.h>
11 #include <linux/mount.h>
12 #include <linux/fs_context.h>
13 #include <linux/fs_parser.h>
14 #include <linux/sched.h>
15 #include <linux/seq_file.h>
16 #include <linux/slab.h>
17 #include <linux/statfs.h>
18 #include <linux/string.h>
19
20 #include "super.h"
21 #include "mds_client.h"
22 #include "cache.h"
23 #include "crypto.h"
24
25 #include <linux/ceph/ceph_features.h>
26 #include <linux/ceph/decode.h>
27 #include <linux/ceph/mon_client.h>
28 #include <linux/ceph/auth.h>
29 #include <linux/ceph/debugfs.h>
30
31 #include <uapi/linux/magic.h>
32
33 static DEFINE_SPINLOCK(ceph_fsc_lock);
34 static LIST_HEAD(ceph_fsc_list);
35
36 /*
37  * Ceph superblock operations
38  *
39  * Handle the basics of mounting, unmounting.
40  */
41
42 /*
43  * super ops
44  */
45 static void ceph_put_super(struct super_block *s)
46 {
47         struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s);
48
49         doutc(fsc->client, "begin\n");
50         ceph_fscrypt_free_dummy_policy(fsc);
51         ceph_mdsc_close_sessions(fsc->mdsc);
52         doutc(fsc->client, "done\n");
53 }
54
55 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
56 {
57         struct ceph_fs_client *fsc = ceph_inode_to_fs_client(d_inode(dentry));
58         struct ceph_mon_client *monc = &fsc->client->monc;
59         struct ceph_statfs st;
60         int i, err;
61         u64 data_pool;
62
63         doutc(fsc->client, "begin\n");
64         if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
65                 data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0];
66         } else {
67                 data_pool = CEPH_NOPOOL;
68         }
69
70         err = ceph_monc_do_statfs(monc, data_pool, &st);
71         if (err < 0)
72                 return err;
73
74         /* fill in kstatfs */
75         buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
76
77         /*
78          * Express utilization in terms of large blocks to avoid
79          * overflow on 32-bit machines.
80          */
81         buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
82
83         /*
84          * By default use root quota for stats; fallback to overall filesystem
85          * usage if using 'noquotadf' mount option or if the root dir doesn't
86          * have max_bytes quota set.
87          */
88         if (ceph_test_mount_opt(fsc, NOQUOTADF) ||
89             !ceph_quota_update_statfs(fsc, buf)) {
90                 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
91                 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
92                 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
93         }
94
95         /*
96          * NOTE: for the time being, we make bsize == frsize to humor
97          * not-yet-ancient versions of glibc that are broken.
98          * Someday, we will probably want to report a real block
99          * size...  whatever that may mean for a network file system!
100          */
101         buf->f_bsize = buf->f_frsize;
102
103         buf->f_files = le64_to_cpu(st.num_objects);
104         buf->f_ffree = -1;
105         buf->f_namelen = NAME_MAX;
106
107         /* Must convert the fsid, for consistent values across arches */
108         buf->f_fsid.val[0] = 0;
109         mutex_lock(&monc->mutex);
110         for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i)
111                 buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]);
112         mutex_unlock(&monc->mutex);
113
114         /* fold the fs_cluster_id into the upper bits */
115         buf->f_fsid.val[1] = monc->fs_cluster_id;
116
117         doutc(fsc->client, "done\n");
118         return 0;
119 }
120
121 static int ceph_sync_fs(struct super_block *sb, int wait)
122 {
123         struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
124         struct ceph_client *cl = fsc->client;
125
126         if (!wait) {
127                 doutc(cl, "(non-blocking)\n");
128                 ceph_flush_dirty_caps(fsc->mdsc);
129                 ceph_flush_cap_releases(fsc->mdsc);
130                 doutc(cl, "(non-blocking) done\n");
131                 return 0;
132         }
133
134         doutc(cl, "(blocking)\n");
135         ceph_osdc_sync(&fsc->client->osdc);
136         ceph_mdsc_sync(fsc->mdsc);
137         doutc(cl, "(blocking) done\n");
138         return 0;
139 }
140
141 /*
142  * mount options
143  */
144 enum {
145         Opt_wsize,
146         Opt_rsize,
147         Opt_rasize,
148         Opt_caps_wanted_delay_min,
149         Opt_caps_wanted_delay_max,
150         Opt_caps_max,
151         Opt_readdir_max_entries,
152         Opt_readdir_max_bytes,
153         Opt_congestion_kb,
154         /* int args above */
155         Opt_snapdirname,
156         Opt_mds_namespace,
157         Opt_recover_session,
158         Opt_source,
159         Opt_mon_addr,
160         Opt_test_dummy_encryption,
161         /* string args above */
162         Opt_dirstat,
163         Opt_rbytes,
164         Opt_asyncreaddir,
165         Opt_dcache,
166         Opt_ino32,
167         Opt_fscache,
168         Opt_poolperm,
169         Opt_require_active_mds,
170         Opt_acl,
171         Opt_quotadf,
172         Opt_copyfrom,
173         Opt_wsync,
174         Opt_pagecache,
175         Opt_sparseread,
176 };
177
178 enum ceph_recover_session_mode {
179         ceph_recover_session_no,
180         ceph_recover_session_clean
181 };
182
183 static const struct constant_table ceph_param_recover[] = {
184         { "no",         ceph_recover_session_no },
185         { "clean",      ceph_recover_session_clean },
186         {}
187 };
188
189 static const struct fs_parameter_spec ceph_mount_parameters[] = {
190         fsparam_flag_no ("acl",                         Opt_acl),
191         fsparam_flag_no ("asyncreaddir",                Opt_asyncreaddir),
192         fsparam_s32     ("caps_max",                    Opt_caps_max),
193         fsparam_u32     ("caps_wanted_delay_max",       Opt_caps_wanted_delay_max),
194         fsparam_u32     ("caps_wanted_delay_min",       Opt_caps_wanted_delay_min),
195         fsparam_u32     ("write_congestion_kb",         Opt_congestion_kb),
196         fsparam_flag_no ("copyfrom",                    Opt_copyfrom),
197         fsparam_flag_no ("dcache",                      Opt_dcache),
198         fsparam_flag_no ("dirstat",                     Opt_dirstat),
199         fsparam_flag_no ("fsc",                         Opt_fscache), // fsc|nofsc
200         fsparam_string  ("fsc",                         Opt_fscache), // fsc=...
201         fsparam_flag_no ("ino32",                       Opt_ino32),
202         fsparam_string  ("mds_namespace",               Opt_mds_namespace),
203         fsparam_string  ("mon_addr",                    Opt_mon_addr),
204         fsparam_flag_no ("poolperm",                    Opt_poolperm),
205         fsparam_flag_no ("quotadf",                     Opt_quotadf),
206         fsparam_u32     ("rasize",                      Opt_rasize),
207         fsparam_flag_no ("rbytes",                      Opt_rbytes),
208         fsparam_u32     ("readdir_max_bytes",           Opt_readdir_max_bytes),
209         fsparam_u32     ("readdir_max_entries",         Opt_readdir_max_entries),
210         fsparam_enum    ("recover_session",             Opt_recover_session, ceph_param_recover),
211         fsparam_flag_no ("require_active_mds",          Opt_require_active_mds),
212         fsparam_u32     ("rsize",                       Opt_rsize),
213         fsparam_string  ("snapdirname",                 Opt_snapdirname),
214         fsparam_string  ("source",                      Opt_source),
215         fsparam_flag    ("test_dummy_encryption",       Opt_test_dummy_encryption),
216         fsparam_string  ("test_dummy_encryption",       Opt_test_dummy_encryption),
217         fsparam_u32     ("wsize",                       Opt_wsize),
218         fsparam_flag_no ("wsync",                       Opt_wsync),
219         fsparam_flag_no ("pagecache",                   Opt_pagecache),
220         fsparam_flag_no ("sparseread",                  Opt_sparseread),
221         {}
222 };
223
224 struct ceph_parse_opts_ctx {
225         struct ceph_options             *copts;
226         struct ceph_mount_options       *opts;
227 };
228
229 /*
230  * Remove adjacent slashes and then the trailing slash, unless it is
231  * the only remaining character.
232  *
233  * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
234  */
235 static void canonicalize_path(char *path)
236 {
237         int i, j = 0;
238
239         for (i = 0; path[i] != '\0'; i++) {
240                 if (path[i] != '/' || j < 1 || path[j - 1] != '/')
241                         path[j++] = path[i];
242         }
243
244         if (j > 1 && path[j - 1] == '/')
245                 j--;
246         path[j] = '\0';
247 }
248
249 /*
250  * Check if the mds namespace in ceph_mount_options matches
251  * the passed in namespace string. First time match (when
252  * ->mds_namespace is NULL) is treated specially, since
253  * ->mds_namespace needs to be initialized by the caller.
254  */
255 static int namespace_equals(struct ceph_mount_options *fsopt,
256                             const char *namespace, size_t len)
257 {
258         return !(fsopt->mds_namespace &&
259                  (strlen(fsopt->mds_namespace) != len ||
260                   strncmp(fsopt->mds_namespace, namespace, len)));
261 }
262
263 static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
264                                  struct fs_context *fc)
265 {
266         int r;
267         struct ceph_parse_opts_ctx *pctx = fc->fs_private;
268         struct ceph_mount_options *fsopt = pctx->opts;
269
270         if (*dev_name_end != ':')
271                 return invalfc(fc, "separator ':' missing in source");
272
273         r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
274                                pctx->copts, fc->log.log, ',');
275         if (r)
276                 return r;
277
278         fsopt->new_dev_syntax = false;
279         return 0;
280 }
281
282 static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
283                                  struct fs_context *fc)
284 {
285         size_t len;
286         struct ceph_fsid fsid;
287         struct ceph_parse_opts_ctx *pctx = fc->fs_private;
288         struct ceph_options *opts = pctx->copts;
289         struct ceph_mount_options *fsopt = pctx->opts;
290         const char *name_start = dev_name;
291         const char *fsid_start, *fs_name_start;
292
293         if (*dev_name_end != '=') {
294                 dout("separator '=' missing in source");
295                 return -EINVAL;
296         }
297
298         fsid_start = strchr(dev_name, '@');
299         if (!fsid_start)
300                 return invalfc(fc, "missing cluster fsid");
301         len = fsid_start - name_start;
302         kfree(opts->name);
303         opts->name = kstrndup(name_start, len, GFP_KERNEL);
304         if (!opts->name)
305                 return -ENOMEM;
306         dout("using %s entity name", opts->name);
307
308         ++fsid_start; /* start of cluster fsid */
309         fs_name_start = strchr(fsid_start, '.');
310         if (!fs_name_start)
311                 return invalfc(fc, "missing file system name");
312
313         if (ceph_parse_fsid(fsid_start, &fsid))
314                 return invalfc(fc, "Invalid FSID");
315
316         ++fs_name_start; /* start of file system name */
317         len = dev_name_end - fs_name_start;
318
319         if (!namespace_equals(fsopt, fs_name_start, len))
320                 return invalfc(fc, "Mismatching mds_namespace");
321         kfree(fsopt->mds_namespace);
322         fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
323         if (!fsopt->mds_namespace)
324                 return -ENOMEM;
325         dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
326
327         fsopt->new_dev_syntax = true;
328         return 0;
329 }
330
331 /*
332  * Parse the source parameter for new device format. Distinguish the device
333  * spec from the path. Try parsing new device format and fallback to old
334  * format if needed.
335  *
336  * New device syntax will looks like:
337  *     <device_spec>=/<path>
338  * where
339  *     <device_spec> is [email protected]
340  *     <path> is optional, but if present must begin with '/'
341  * (monitor addresses are passed via mount option)
342  *
343  * Old device syntax is:
344  *     <server_spec>[,<server_spec>...]:[<path>]
345  * where
346  *     <server_spec> is <ip>[:<port>]
347  *     <path> is optional, but if present must begin with '/'
348  */
349 static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
350 {
351         struct ceph_parse_opts_ctx *pctx = fc->fs_private;
352         struct ceph_mount_options *fsopt = pctx->opts;
353         char *dev_name = param->string, *dev_name_end;
354         int ret;
355
356         dout("'%s'\n", dev_name);
357         if (!dev_name || !*dev_name)
358                 return invalfc(fc, "Empty source");
359
360         dev_name_end = strchr(dev_name, '/');
361         if (dev_name_end) {
362                 /*
363                  * The server_path will include the whole chars from userland
364                  * including the leading '/'.
365                  */
366                 kfree(fsopt->server_path);
367                 fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
368                 if (!fsopt->server_path)
369                         return -ENOMEM;
370
371                 canonicalize_path(fsopt->server_path);
372         } else {
373                 dev_name_end = dev_name + strlen(dev_name);
374         }
375
376         dev_name_end--;         /* back up to separator */
377         if (dev_name_end < dev_name)
378                 return invalfc(fc, "Path missing in source");
379
380         dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
381         if (fsopt->server_path)
382                 dout("server path '%s'\n", fsopt->server_path);
383
384         dout("trying new device syntax");
385         ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
386         if (ret) {
387                 if (ret != -EINVAL)
388                         return ret;
389                 dout("trying old device syntax");
390                 ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
391                 if (ret)
392                         return ret;
393         }
394
395         fc->source = param->string;
396         param->string = NULL;
397         return 0;
398 }
399
400 static int ceph_parse_mon_addr(struct fs_parameter *param,
401                                struct fs_context *fc)
402 {
403         struct ceph_parse_opts_ctx *pctx = fc->fs_private;
404         struct ceph_mount_options *fsopt = pctx->opts;
405
406         kfree(fsopt->mon_addr);
407         fsopt->mon_addr = param->string;
408         param->string = NULL;
409
410         return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
411                                   pctx->copts, fc->log.log, '/');
412 }
413
414 static int ceph_parse_mount_param(struct fs_context *fc,
415                                   struct fs_parameter *param)
416 {
417         struct ceph_parse_opts_ctx *pctx = fc->fs_private;
418         struct ceph_mount_options *fsopt = pctx->opts;
419         struct fs_parse_result result;
420         unsigned int mode;
421         int token, ret;
422
423         ret = ceph_parse_param(param, pctx->copts, fc->log.log);
424         if (ret != -ENOPARAM)
425                 return ret;
426
427         token = fs_parse(fc, ceph_mount_parameters, param, &result);
428         dout("%s: fs_parse '%s' token %d\n",__func__, param->key, token);
429         if (token < 0)
430                 return token;
431
432         switch (token) {
433         case Opt_snapdirname:
434                 if (strlen(param->string) > NAME_MAX)
435                         return invalfc(fc, "snapdirname too long");
436                 kfree(fsopt->snapdir_name);
437                 fsopt->snapdir_name = param->string;
438                 param->string = NULL;
439                 break;
440         case Opt_mds_namespace:
441                 if (!namespace_equals(fsopt, param->string, strlen(param->string)))
442                         return invalfc(fc, "Mismatching mds_namespace");
443                 kfree(fsopt->mds_namespace);
444                 fsopt->mds_namespace = param->string;
445                 param->string = NULL;
446                 break;
447         case Opt_recover_session:
448                 mode = result.uint_32;
449                 if (mode == ceph_recover_session_no)
450                         fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER;
451                 else if (mode == ceph_recover_session_clean)
452                         fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER;
453                 else
454                         BUG();
455                 break;
456         case Opt_source:
457                 if (fc->source)
458                         return invalfc(fc, "Multiple sources specified");
459                 return ceph_parse_source(param, fc);
460         case Opt_mon_addr:
461                 return ceph_parse_mon_addr(param, fc);
462         case Opt_wsize:
463                 if (result.uint_32 < PAGE_SIZE ||
464                     result.uint_32 > CEPH_MAX_WRITE_SIZE)
465                         goto out_of_range;
466                 fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE);
467                 break;
468         case Opt_rsize:
469                 if (result.uint_32 < PAGE_SIZE ||
470                     result.uint_32 > CEPH_MAX_READ_SIZE)
471                         goto out_of_range;
472                 fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE);
473                 break;
474         case Opt_rasize:
475                 fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE);
476                 break;
477         case Opt_caps_wanted_delay_min:
478                 if (result.uint_32 < 1)
479                         goto out_of_range;
480                 fsopt->caps_wanted_delay_min = result.uint_32;
481                 break;
482         case Opt_caps_wanted_delay_max:
483                 if (result.uint_32 < 1)
484                         goto out_of_range;
485                 fsopt->caps_wanted_delay_max = result.uint_32;
486                 break;
487         case Opt_caps_max:
488                 if (result.int_32 < 0)
489                         goto out_of_range;
490                 fsopt->caps_max = result.int_32;
491                 break;
492         case Opt_readdir_max_entries:
493                 if (result.uint_32 < 1)
494                         goto out_of_range;
495                 fsopt->max_readdir = result.uint_32;
496                 break;
497         case Opt_readdir_max_bytes:
498                 if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0)
499                         goto out_of_range;
500                 fsopt->max_readdir_bytes = result.uint_32;
501                 break;
502         case Opt_congestion_kb:
503                 if (result.uint_32 < 1024) /* at least 1M */
504                         goto out_of_range;
505                 fsopt->congestion_kb = result.uint_32;
506                 break;
507         case Opt_dirstat:
508                 if (!result.negated)
509                         fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
510                 else
511                         fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
512                 break;
513         case Opt_rbytes:
514                 if (!result.negated)
515                         fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
516                 else
517                         fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
518                 break;
519         case Opt_asyncreaddir:
520                 if (!result.negated)
521                         fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
522                 else
523                         fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
524                 break;
525         case Opt_dcache:
526                 if (!result.negated)
527                         fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
528                 else
529                         fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
530                 break;
531         case Opt_ino32:
532                 if (!result.negated)
533                         fsopt->flags |= CEPH_MOUNT_OPT_INO32;
534                 else
535                         fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
536                 break;
537
538         case Opt_fscache:
539 #ifdef CONFIG_CEPH_FSCACHE
540                 kfree(fsopt->fscache_uniq);
541                 fsopt->fscache_uniq = NULL;
542                 if (result.negated) {
543                         fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
544                 } else {
545                         fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
546                         fsopt->fscache_uniq = param->string;
547                         param->string = NULL;
548                 }
549                 break;
550 #else
551                 return invalfc(fc, "fscache support is disabled");
552 #endif
553         case Opt_poolperm:
554                 if (!result.negated)
555                         fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
556                 else
557                         fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
558                 break;
559         case Opt_require_active_mds:
560                 if (!result.negated)
561                         fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
562                 else
563                         fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
564                 break;
565         case Opt_quotadf:
566                 if (!result.negated)
567                         fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF;
568                 else
569                         fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF;
570                 break;
571         case Opt_copyfrom:
572                 if (!result.negated)
573                         fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM;
574                 else
575                         fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM;
576                 break;
577         case Opt_acl:
578                 if (!result.negated) {
579 #ifdef CONFIG_CEPH_FS_POSIX_ACL
580                         fc->sb_flags |= SB_POSIXACL;
581 #else
582                         return invalfc(fc, "POSIX ACL support is disabled");
583 #endif
584                 } else {
585                         fc->sb_flags &= ~SB_POSIXACL;
586                 }
587                 break;
588         case Opt_wsync:
589                 if (!result.negated)
590                         fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS;
591                 else
592                         fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
593                 break;
594         case Opt_pagecache:
595                 if (result.negated)
596                         fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
597                 else
598                         fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
599                 break;
600         case Opt_sparseread:
601                 if (result.negated)
602                         fsopt->flags &= ~CEPH_MOUNT_OPT_SPARSEREAD;
603                 else
604                         fsopt->flags |= CEPH_MOUNT_OPT_SPARSEREAD;
605                 break;
606         case Opt_test_dummy_encryption:
607 #ifdef CONFIG_FS_ENCRYPTION
608                 fscrypt_free_dummy_policy(&fsopt->dummy_enc_policy);
609                 ret = fscrypt_parse_test_dummy_encryption(param,
610                                                 &fsopt->dummy_enc_policy);
611                 if (ret == -EINVAL) {
612                         warnfc(fc, "Value of option \"%s\" is unrecognized",
613                                param->key);
614                 } else if (ret == -EEXIST) {
615                         warnfc(fc, "Conflicting test_dummy_encryption options");
616                         ret = -EINVAL;
617                 }
618 #else
619                 warnfc(fc,
620                        "FS encryption not supported: test_dummy_encryption mount option ignored");
621 #endif
622                 break;
623         default:
624                 BUG();
625         }
626         return 0;
627
628 out_of_range:
629         return invalfc(fc, "%s out of range", param->key);
630 }
631
632 static void destroy_mount_options(struct ceph_mount_options *args)
633 {
634         dout("destroy_mount_options %p\n", args);
635         if (!args)
636                 return;
637
638         kfree(args->snapdir_name);
639         kfree(args->mds_namespace);
640         kfree(args->server_path);
641         kfree(args->fscache_uniq);
642         kfree(args->mon_addr);
643         fscrypt_free_dummy_policy(&args->dummy_enc_policy);
644         kfree(args);
645 }
646
647 static int strcmp_null(const char *s1, const char *s2)
648 {
649         if (!s1 && !s2)
650                 return 0;
651         if (s1 && !s2)
652                 return -1;
653         if (!s1 && s2)
654                 return 1;
655         return strcmp(s1, s2);
656 }
657
658 static int compare_mount_options(struct ceph_mount_options *new_fsopt,
659                                  struct ceph_options *new_opt,
660                                  struct ceph_fs_client *fsc)
661 {
662         struct ceph_mount_options *fsopt1 = new_fsopt;
663         struct ceph_mount_options *fsopt2 = fsc->mount_options;
664         int ofs = offsetof(struct ceph_mount_options, snapdir_name);
665         int ret;
666
667         ret = memcmp(fsopt1, fsopt2, ofs);
668         if (ret)
669                 return ret;
670
671         ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
672         if (ret)
673                 return ret;
674
675         ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
676         if (ret)
677                 return ret;
678
679         ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
680         if (ret)
681                 return ret;
682
683         ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
684         if (ret)
685                 return ret;
686
687         ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
688         if (ret)
689                 return ret;
690
691         return ceph_compare_options(new_opt, fsc->client);
692 }
693
694 /**
695  * ceph_show_options - Show mount options in /proc/mounts
696  * @m: seq_file to write to
697  * @root: root of that (sub)tree
698  */
699 static int ceph_show_options(struct seq_file *m, struct dentry *root)
700 {
701         struct ceph_fs_client *fsc = ceph_sb_to_fs_client(root->d_sb);
702         struct ceph_mount_options *fsopt = fsc->mount_options;
703         size_t pos;
704         int ret;
705
706         /* a comma between MNT/MS and client options */
707         seq_putc(m, ',');
708         pos = m->count;
709
710         ret = ceph_print_client_options(m, fsc->client, false);
711         if (ret)
712                 return ret;
713
714         /* retract our comma if no client options */
715         if (m->count == pos)
716                 m->count--;
717
718         if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
719                 seq_puts(m, ",dirstat");
720         if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
721                 seq_puts(m, ",rbytes");
722         if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
723                 seq_puts(m, ",noasyncreaddir");
724         if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
725                 seq_puts(m, ",nodcache");
726         if (fsopt->flags & CEPH_MOUNT_OPT_INO32)
727                 seq_puts(m, ",ino32");
728         if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
729                 seq_show_option(m, "fsc", fsopt->fscache_uniq);
730         }
731         if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
732                 seq_puts(m, ",nopoolperm");
733         if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF)
734                 seq_puts(m, ",noquotadf");
735
736 #ifdef CONFIG_CEPH_FS_POSIX_ACL
737         if (root->d_sb->s_flags & SB_POSIXACL)
738                 seq_puts(m, ",acl");
739         else
740                 seq_puts(m, ",noacl");
741 #endif
742
743         if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
744                 seq_puts(m, ",copyfrom");
745
746         /* dump mds_namespace when old device syntax is in use */
747         if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
748                 seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
749
750         if (fsopt->mon_addr)
751                 seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
752
753         if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
754                 seq_show_option(m, "recover_session", "clean");
755
756         if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
757                 seq_puts(m, ",wsync");
758         if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
759                 seq_puts(m, ",nopagecache");
760         if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
761                 seq_puts(m, ",sparseread");
762
763         fscrypt_show_test_dummy_encryption(m, ',', root->d_sb);
764
765         if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
766                 seq_printf(m, ",wsize=%u", fsopt->wsize);
767         if (fsopt->rsize != CEPH_MAX_READ_SIZE)
768                 seq_printf(m, ",rsize=%u", fsopt->rsize);
769         if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
770                 seq_printf(m, ",rasize=%u", fsopt->rasize);
771         if (fsopt->congestion_kb != default_congestion_kb())
772                 seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb);
773         if (fsopt->caps_max)
774                 seq_printf(m, ",caps_max=%d", fsopt->caps_max);
775         if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
776                 seq_printf(m, ",caps_wanted_delay_min=%u",
777                          fsopt->caps_wanted_delay_min);
778         if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
779                 seq_printf(m, ",caps_wanted_delay_max=%u",
780                            fsopt->caps_wanted_delay_max);
781         if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
782                 seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir);
783         if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
784                 seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes);
785         if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
786                 seq_show_option(m, "snapdirname", fsopt->snapdir_name);
787
788         return 0;
789 }
790
791 /*
792  * handle any mon messages the standard library doesn't understand.
793  * return error if we don't either.
794  */
795 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
796 {
797         struct ceph_fs_client *fsc = client->private;
798         int type = le16_to_cpu(msg->hdr.type);
799
800         switch (type) {
801         case CEPH_MSG_MDS_MAP:
802                 ceph_mdsc_handle_mdsmap(fsc->mdsc, msg);
803                 return 0;
804         case CEPH_MSG_FS_MAP_USER:
805                 ceph_mdsc_handle_fsmap(fsc->mdsc, msg);
806                 return 0;
807         default:
808                 return -1;
809         }
810 }
811
812 /*
813  * create a new fs client
814  *
815  * Success or not, this function consumes @fsopt and @opt.
816  */
817 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
818                                         struct ceph_options *opt)
819 {
820         struct ceph_fs_client *fsc;
821         int err;
822
823         fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
824         if (!fsc) {
825                 err = -ENOMEM;
826                 goto fail;
827         }
828
829         fsc->client = ceph_create_client(opt, fsc);
830         if (IS_ERR(fsc->client)) {
831                 err = PTR_ERR(fsc->client);
832                 goto fail;
833         }
834         opt = NULL; /* fsc->client now owns this */
835
836         fsc->client->extra_mon_dispatch = extra_mon_dispatch;
837         ceph_set_opt(fsc->client, ABORT_ON_FULL);
838
839         if (!fsopt->mds_namespace) {
840                 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
841                                    0, true);
842         } else {
843                 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP,
844                                    0, false);
845         }
846
847         fsc->mount_options = fsopt;
848
849         fsc->sb = NULL;
850         fsc->mount_state = CEPH_MOUNT_MOUNTING;
851         fsc->filp_gen = 1;
852         fsc->have_copy_from2 = true;
853
854         atomic_long_set(&fsc->writeback_count, 0);
855         fsc->write_congested = false;
856
857         err = -ENOMEM;
858         /*
859          * The number of concurrent works can be high but they don't need
860          * to be processed in parallel, limit concurrency.
861          */
862         fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0);
863         if (!fsc->inode_wq)
864                 goto fail_client;
865         fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1);
866         if (!fsc->cap_wq)
867                 goto fail_inode_wq;
868
869         hash_init(fsc->async_unlink_conflict);
870         spin_lock_init(&fsc->async_unlink_conflict_lock);
871
872         spin_lock(&ceph_fsc_lock);
873         list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
874         spin_unlock(&ceph_fsc_lock);
875
876         return fsc;
877
878 fail_inode_wq:
879         destroy_workqueue(fsc->inode_wq);
880 fail_client:
881         ceph_destroy_client(fsc->client);
882 fail:
883         kfree(fsc);
884         if (opt)
885                 ceph_destroy_options(opt);
886         destroy_mount_options(fsopt);
887         return ERR_PTR(err);
888 }
889
890 static void flush_fs_workqueues(struct ceph_fs_client *fsc)
891 {
892         flush_workqueue(fsc->inode_wq);
893         flush_workqueue(fsc->cap_wq);
894 }
895
896 static void destroy_fs_client(struct ceph_fs_client *fsc)
897 {
898         doutc(fsc->client, "%p\n", fsc);
899
900         spin_lock(&ceph_fsc_lock);
901         list_del(&fsc->metric_wakeup);
902         spin_unlock(&ceph_fsc_lock);
903
904         ceph_mdsc_destroy(fsc);
905         destroy_workqueue(fsc->inode_wq);
906         destroy_workqueue(fsc->cap_wq);
907
908         destroy_mount_options(fsc->mount_options);
909
910         ceph_destroy_client(fsc->client);
911
912         kfree(fsc);
913         dout("%s: %p done\n", __func__, fsc);
914 }
915
916 /*
917  * caches
918  */
919 struct kmem_cache *ceph_inode_cachep;
920 struct kmem_cache *ceph_cap_cachep;
921 struct kmem_cache *ceph_cap_snap_cachep;
922 struct kmem_cache *ceph_cap_flush_cachep;
923 struct kmem_cache *ceph_dentry_cachep;
924 struct kmem_cache *ceph_file_cachep;
925 struct kmem_cache *ceph_dir_file_cachep;
926 struct kmem_cache *ceph_mds_request_cachep;
927 mempool_t *ceph_wb_pagevec_pool;
928
929 static void ceph_inode_init_once(void *foo)
930 {
931         struct ceph_inode_info *ci = foo;
932         inode_init_once(&ci->netfs.inode);
933 }
934
935 static int __init init_caches(void)
936 {
937         int error = -ENOMEM;
938
939         ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
940                                       sizeof(struct ceph_inode_info),
941                                       __alignof__(struct ceph_inode_info),
942                                       SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
943                                       ceph_inode_init_once);
944         if (!ceph_inode_cachep)
945                 return -ENOMEM;
946
947         ceph_cap_cachep = KMEM_CACHE(ceph_cap, 0);
948         if (!ceph_cap_cachep)
949                 goto bad_cap;
950         ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, 0);
951         if (!ceph_cap_snap_cachep)
952                 goto bad_cap_snap;
953         ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
954                                            SLAB_RECLAIM_ACCOUNT);
955         if (!ceph_cap_flush_cachep)
956                 goto bad_cap_flush;
957
958         ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
959                                         SLAB_RECLAIM_ACCOUNT);
960         if (!ceph_dentry_cachep)
961                 goto bad_dentry;
962
963         ceph_file_cachep = KMEM_CACHE(ceph_file_info, 0);
964         if (!ceph_file_cachep)
965                 goto bad_file;
966
967         ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, 0);
968         if (!ceph_dir_file_cachep)
969                 goto bad_dir_file;
970
971         ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, 0);
972         if (!ceph_mds_request_cachep)
973                 goto bad_mds_req;
974
975         ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10,
976             (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *));
977         if (!ceph_wb_pagevec_pool)
978                 goto bad_pagevec_pool;
979
980         return 0;
981
982 bad_pagevec_pool:
983         kmem_cache_destroy(ceph_mds_request_cachep);
984 bad_mds_req:
985         kmem_cache_destroy(ceph_dir_file_cachep);
986 bad_dir_file:
987         kmem_cache_destroy(ceph_file_cachep);
988 bad_file:
989         kmem_cache_destroy(ceph_dentry_cachep);
990 bad_dentry:
991         kmem_cache_destroy(ceph_cap_flush_cachep);
992 bad_cap_flush:
993         kmem_cache_destroy(ceph_cap_snap_cachep);
994 bad_cap_snap:
995         kmem_cache_destroy(ceph_cap_cachep);
996 bad_cap:
997         kmem_cache_destroy(ceph_inode_cachep);
998         return error;
999 }
1000
1001 static void destroy_caches(void)
1002 {
1003         /*
1004          * Make sure all delayed rcu free inodes are flushed before we
1005          * destroy cache.
1006          */
1007         rcu_barrier();
1008
1009         kmem_cache_destroy(ceph_inode_cachep);
1010         kmem_cache_destroy(ceph_cap_cachep);
1011         kmem_cache_destroy(ceph_cap_snap_cachep);
1012         kmem_cache_destroy(ceph_cap_flush_cachep);
1013         kmem_cache_destroy(ceph_dentry_cachep);
1014         kmem_cache_destroy(ceph_file_cachep);
1015         kmem_cache_destroy(ceph_dir_file_cachep);
1016         kmem_cache_destroy(ceph_mds_request_cachep);
1017         mempool_destroy(ceph_wb_pagevec_pool);
1018 }
1019
1020 static void __ceph_umount_begin(struct ceph_fs_client *fsc)
1021 {
1022         ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
1023         ceph_mdsc_force_umount(fsc->mdsc);
1024         fsc->filp_gen++; // invalidate open files
1025 }
1026
1027 /*
1028  * ceph_umount_begin - initiate forced umount.  Tear down the
1029  * mount, skipping steps that may hang while waiting for server(s).
1030  */
1031 void ceph_umount_begin(struct super_block *sb)
1032 {
1033         struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
1034
1035         doutc(fsc->client, "starting forced umount\n");
1036         if (!fsc)
1037                 return;
1038         fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
1039         __ceph_umount_begin(fsc);
1040 }
1041
1042 static const struct super_operations ceph_super_ops = {
1043         .alloc_inode    = ceph_alloc_inode,
1044         .free_inode     = ceph_free_inode,
1045         .write_inode    = ceph_write_inode,
1046         .drop_inode     = generic_delete_inode,
1047         .evict_inode    = ceph_evict_inode,
1048         .sync_fs        = ceph_sync_fs,
1049         .put_super      = ceph_put_super,
1050         .show_options   = ceph_show_options,
1051         .statfs         = ceph_statfs,
1052         .umount_begin   = ceph_umount_begin,
1053 };
1054
1055 /*
1056  * Bootstrap mount by opening the root directory.  Note the mount
1057  * @started time from caller, and time out if this takes too long.
1058  */
1059 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
1060                                        const char *path,
1061                                        unsigned long started)
1062 {
1063         struct ceph_client *cl = fsc->client;
1064         struct ceph_mds_client *mdsc = fsc->mdsc;
1065         struct ceph_mds_request *req = NULL;
1066         int err;
1067         struct dentry *root;
1068
1069         /* open dir */
1070         doutc(cl, "opening '%s'\n", path);
1071         req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
1072         if (IS_ERR(req))
1073                 return ERR_CAST(req);
1074         req->r_path1 = kstrdup(path, GFP_NOFS);
1075         if (!req->r_path1) {
1076                 root = ERR_PTR(-ENOMEM);
1077                 goto out;
1078         }
1079
1080         req->r_ino1.ino = CEPH_INO_ROOT;
1081         req->r_ino1.snap = CEPH_NOSNAP;
1082         req->r_started = started;
1083         req->r_timeout = fsc->client->options->mount_timeout;
1084         req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
1085         req->r_num_caps = 2;
1086         err = ceph_mdsc_do_request(mdsc, NULL, req);
1087         if (err == 0) {
1088                 struct inode *inode = req->r_target_inode;
1089                 req->r_target_inode = NULL;
1090                 doutc(cl, "success\n");
1091                 root = d_make_root(inode);
1092                 if (!root) {
1093                         root = ERR_PTR(-ENOMEM);
1094                         goto out;
1095                 }
1096                 doutc(cl, "success, root dentry is %p\n", root);
1097         } else {
1098                 root = ERR_PTR(err);
1099         }
1100 out:
1101         ceph_mdsc_put_request(req);
1102         return root;
1103 }
1104
1105 #ifdef CONFIG_FS_ENCRYPTION
1106 static int ceph_apply_test_dummy_encryption(struct super_block *sb,
1107                                             struct fs_context *fc,
1108                                             struct ceph_mount_options *fsopt)
1109 {
1110         struct ceph_fs_client *fsc = sb->s_fs_info;
1111
1112         if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy))
1113                 return 0;
1114
1115         /* No changing encryption context on remount. */
1116         if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
1117             !fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
1118                 if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
1119                                                  &fsc->fsc_dummy_enc_policy))
1120                         return 0;
1121                 errorfc(fc, "Can't set test_dummy_encryption on remount");
1122                 return -EINVAL;
1123         }
1124
1125         /* Also make sure fsopt doesn't contain a conflicting value. */
1126         if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
1127                 if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
1128                                                  &fsc->fsc_dummy_enc_policy))
1129                         return 0;
1130                 errorfc(fc, "Conflicting test_dummy_encryption options");
1131                 return -EINVAL;
1132         }
1133
1134         fsc->fsc_dummy_enc_policy = fsopt->dummy_enc_policy;
1135         memset(&fsopt->dummy_enc_policy, 0, sizeof(fsopt->dummy_enc_policy));
1136
1137         warnfc(fc, "test_dummy_encryption mode enabled");
1138         return 0;
1139 }
1140 #else
1141 static int ceph_apply_test_dummy_encryption(struct super_block *sb,
1142                                             struct fs_context *fc,
1143                                             struct ceph_mount_options *fsopt)
1144 {
1145         return 0;
1146 }
1147 #endif
1148
1149 /*
1150  * mount: join the ceph cluster, and open root directory.
1151  */
1152 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
1153                                       struct fs_context *fc)
1154 {
1155         struct ceph_client *cl = fsc->client;
1156         int err;
1157         unsigned long started = jiffies;  /* note the start time */
1158         struct dentry *root;
1159
1160         doutc(cl, "mount start %p\n", fsc);
1161         mutex_lock(&fsc->client->mount_mutex);
1162
1163         if (!fsc->sb->s_root) {
1164                 const char *path = fsc->mount_options->server_path ?
1165                                      fsc->mount_options->server_path + 1 : "";
1166
1167                 err = __ceph_open_session(fsc->client, started);
1168                 if (err < 0)
1169                         goto out;
1170
1171                 /* setup fscache */
1172                 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
1173                         err = ceph_fscache_register_fs(fsc, fc);
1174                         if (err < 0)
1175                                 goto out;
1176                 }
1177
1178                 err = ceph_apply_test_dummy_encryption(fsc->sb, fc,
1179                                                        fsc->mount_options);
1180                 if (err)
1181                         goto out;
1182
1183                 doutc(cl, "mount opening path '%s'\n", path);
1184
1185                 ceph_fs_debugfs_init(fsc);
1186
1187                 root = open_root_dentry(fsc, path, started);
1188                 if (IS_ERR(root)) {
1189                         err = PTR_ERR(root);
1190                         goto out;
1191                 }
1192                 fsc->sb->s_root = dget(root);
1193         } else {
1194                 root = dget(fsc->sb->s_root);
1195         }
1196
1197         fsc->mount_state = CEPH_MOUNT_MOUNTED;
1198         doutc(cl, "mount success\n");
1199         mutex_unlock(&fsc->client->mount_mutex);
1200         return root;
1201
1202 out:
1203         mutex_unlock(&fsc->client->mount_mutex);
1204         ceph_fscrypt_free_dummy_policy(fsc);
1205         return ERR_PTR(err);
1206 }
1207
1208 static int ceph_set_super(struct super_block *s, struct fs_context *fc)
1209 {
1210         struct ceph_fs_client *fsc = s->s_fs_info;
1211         struct ceph_client *cl = fsc->client;
1212         int ret;
1213
1214         doutc(cl, "%p\n", s);
1215
1216         s->s_maxbytes = MAX_LFS_FILESIZE;
1217
1218         s->s_xattr = ceph_xattr_handlers;
1219         fsc->sb = s;
1220         fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
1221
1222         s->s_op = &ceph_super_ops;
1223         s->s_d_op = &ceph_dentry_ops;
1224         s->s_export_op = &ceph_export_ops;
1225
1226         s->s_time_gran = 1;
1227         s->s_time_min = 0;
1228         s->s_time_max = U32_MAX;
1229         s->s_flags |= SB_NODIRATIME | SB_NOATIME;
1230
1231         ceph_fscrypt_set_ops(s);
1232
1233         ret = set_anon_super_fc(s, fc);
1234         if (ret != 0)
1235                 fsc->sb = NULL;
1236         return ret;
1237 }
1238
1239 /*
1240  * share superblock if same fs AND options
1241  */
1242 static int ceph_compare_super(struct super_block *sb, struct fs_context *fc)
1243 {
1244         struct ceph_fs_client *new = fc->s_fs_info;
1245         struct ceph_mount_options *fsopt = new->mount_options;
1246         struct ceph_options *opt = new->client->options;
1247         struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
1248         struct ceph_client *cl = fsc->client;
1249
1250         doutc(cl, "%p\n", sb);
1251
1252         if (compare_mount_options(fsopt, opt, fsc)) {
1253                 doutc(cl, "monitor(s)/mount options don't match\n");
1254                 return 0;
1255         }
1256         if ((opt->flags & CEPH_OPT_FSID) &&
1257             ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) {
1258                 doutc(cl, "fsid doesn't match\n");
1259                 return 0;
1260         }
1261         if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) {
1262                 doutc(cl, "flags differ\n");
1263                 return 0;
1264         }
1265
1266         if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) {
1267                 doutc(cl, "client is blocklisted (and CLEANRECOVER is not set)\n");
1268                 return 0;
1269         }
1270
1271         if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
1272                 doutc(cl, "client has been forcibly unmounted\n");
1273                 return 0;
1274         }
1275
1276         return 1;
1277 }
1278
1279 /*
1280  * construct our own bdi so we can control readahead, etc.
1281  */
1282 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
1283
1284 static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
1285 {
1286         int err;
1287
1288         err = super_setup_bdi_name(sb, "ceph-%ld",
1289                                    atomic_long_inc_return(&bdi_seq));
1290         if (err)
1291                 return err;
1292
1293         /* set ra_pages based on rasize mount option? */
1294         sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT;
1295
1296         /* set io_pages based on max osd read size */
1297         sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT;
1298
1299         return 0;
1300 }
1301
1302 static int ceph_get_tree(struct fs_context *fc)
1303 {
1304         struct ceph_parse_opts_ctx *pctx = fc->fs_private;
1305         struct ceph_mount_options *fsopt = pctx->opts;
1306         struct super_block *sb;
1307         struct ceph_fs_client *fsc;
1308         struct dentry *res;
1309         int (*compare_super)(struct super_block *, struct fs_context *) =
1310                 ceph_compare_super;
1311         int err;
1312
1313         dout("ceph_get_tree\n");
1314
1315         if (!fc->source)
1316                 return invalfc(fc, "No source");
1317         if (fsopt->new_dev_syntax && !fsopt->mon_addr)
1318                 return invalfc(fc, "No monitor address");
1319
1320         /* create client (which we may/may not use) */
1321         fsc = create_fs_client(pctx->opts, pctx->copts);
1322         pctx->opts = NULL;
1323         pctx->copts = NULL;
1324         if (IS_ERR(fsc)) {
1325                 err = PTR_ERR(fsc);
1326                 goto out_final;
1327         }
1328
1329         err = ceph_mdsc_init(fsc);
1330         if (err < 0)
1331                 goto out;
1332
1333         if (ceph_test_opt(fsc->client, NOSHARE))
1334                 compare_super = NULL;
1335
1336         fc->s_fs_info = fsc;
1337         sb = sget_fc(fc, compare_super, ceph_set_super);
1338         fc->s_fs_info = NULL;
1339         if (IS_ERR(sb)) {
1340                 err = PTR_ERR(sb);
1341                 goto out;
1342         }
1343
1344         if (ceph_sb_to_fs_client(sb) != fsc) {
1345                 destroy_fs_client(fsc);
1346                 fsc = ceph_sb_to_fs_client(sb);
1347                 dout("get_sb got existing client %p\n", fsc);
1348         } else {
1349                 dout("get_sb using new client %p\n", fsc);
1350                 err = ceph_setup_bdi(sb, fsc);
1351                 if (err < 0)
1352                         goto out_splat;
1353         }
1354
1355         res = ceph_real_mount(fsc, fc);
1356         if (IS_ERR(res)) {
1357                 err = PTR_ERR(res);
1358                 goto out_splat;
1359         }
1360
1361         doutc(fsc->client, "root %p inode %p ino %llx.%llx\n", res,
1362                     d_inode(res), ceph_vinop(d_inode(res)));
1363         fc->root = fsc->sb->s_root;
1364         return 0;
1365
1366 out_splat:
1367         if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
1368                 pr_info("No mds server is up or the cluster is laggy\n");
1369                 err = -EHOSTUNREACH;
1370         }
1371
1372         ceph_mdsc_close_sessions(fsc->mdsc);
1373         deactivate_locked_super(sb);
1374         goto out_final;
1375
1376 out:
1377         destroy_fs_client(fsc);
1378 out_final:
1379         dout("ceph_get_tree fail %d\n", err);
1380         return err;
1381 }
1382
1383 static void ceph_free_fc(struct fs_context *fc)
1384 {
1385         struct ceph_parse_opts_ctx *pctx = fc->fs_private;
1386
1387         if (pctx) {
1388                 destroy_mount_options(pctx->opts);
1389                 ceph_destroy_options(pctx->copts);
1390                 kfree(pctx);
1391         }
1392 }
1393
1394 static int ceph_reconfigure_fc(struct fs_context *fc)
1395 {
1396         int err;
1397         struct ceph_parse_opts_ctx *pctx = fc->fs_private;
1398         struct ceph_mount_options *fsopt = pctx->opts;
1399         struct super_block *sb = fc->root->d_sb;
1400         struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
1401
1402         err = ceph_apply_test_dummy_encryption(sb, fc, fsopt);
1403         if (err)
1404                 return err;
1405
1406         if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
1407                 ceph_set_mount_opt(fsc, ASYNC_DIROPS);
1408         else
1409                 ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
1410
1411         if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
1412                 ceph_set_mount_opt(fsc, SPARSEREAD);
1413         else
1414                 ceph_clear_mount_opt(fsc, SPARSEREAD);
1415
1416         if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
1417                 kfree(fsc->mount_options->mon_addr);
1418                 fsc->mount_options->mon_addr = fsopt->mon_addr;
1419                 fsopt->mon_addr = NULL;
1420                 pr_notice_client(fsc->client,
1421                         "monitor addresses recorded, but not used for reconnection");
1422         }
1423
1424         sync_filesystem(sb);
1425         return 0;
1426 }
1427
1428 static const struct fs_context_operations ceph_context_ops = {
1429         .free           = ceph_free_fc,
1430         .parse_param    = ceph_parse_mount_param,
1431         .get_tree       = ceph_get_tree,
1432         .reconfigure    = ceph_reconfigure_fc,
1433 };
1434
1435 /*
1436  * Set up the filesystem mount context.
1437  */
1438 static int ceph_init_fs_context(struct fs_context *fc)
1439 {
1440         struct ceph_parse_opts_ctx *pctx;
1441         struct ceph_mount_options *fsopt;
1442
1443         pctx = kzalloc(sizeof(*pctx), GFP_KERNEL);
1444         if (!pctx)
1445                 return -ENOMEM;
1446
1447         pctx->copts = ceph_alloc_options();
1448         if (!pctx->copts)
1449                 goto nomem;
1450
1451         pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL);
1452         if (!pctx->opts)
1453                 goto nomem;
1454
1455         fsopt = pctx->opts;
1456         fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
1457
1458         fsopt->wsize = CEPH_MAX_WRITE_SIZE;
1459         fsopt->rsize = CEPH_MAX_READ_SIZE;
1460         fsopt->rasize = CEPH_RASIZE_DEFAULT;
1461         fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
1462         if (!fsopt->snapdir_name)
1463                 goto nomem;
1464
1465         fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
1466         fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
1467         fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
1468         fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
1469         fsopt->congestion_kb = default_congestion_kb();
1470
1471 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1472         fc->sb_flags |= SB_POSIXACL;
1473 #endif
1474
1475         fc->fs_private = pctx;
1476         fc->ops = &ceph_context_ops;
1477         return 0;
1478
1479 nomem:
1480         destroy_mount_options(pctx->opts);
1481         ceph_destroy_options(pctx->copts);
1482         kfree(pctx);
1483         return -ENOMEM;
1484 }
1485
1486 /*
1487  * Return true if it successfully increases the blocker counter,
1488  * or false if the mdsc is in stopping and flushed state.
1489  */
1490 static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
1491 {
1492         spin_lock(&mdsc->stopping_lock);
1493         if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
1494                 spin_unlock(&mdsc->stopping_lock);
1495                 return false;
1496         }
1497         atomic_inc(&mdsc->stopping_blockers);
1498         spin_unlock(&mdsc->stopping_lock);
1499         return true;
1500 }
1501
1502 static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
1503 {
1504         spin_lock(&mdsc->stopping_lock);
1505         if (!atomic_dec_return(&mdsc->stopping_blockers) &&
1506             mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
1507                 complete_all(&mdsc->stopping_waiter);
1508         spin_unlock(&mdsc->stopping_lock);
1509 }
1510
1511 /* For metadata IO requests */
1512 bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
1513                                    struct ceph_mds_session *session)
1514 {
1515         mutex_lock(&session->s_mutex);
1516         inc_session_sequence(session);
1517         mutex_unlock(&session->s_mutex);
1518
1519         return __inc_stopping_blocker(mdsc);
1520 }
1521
1522 void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc)
1523 {
1524         __dec_stopping_blocker(mdsc);
1525 }
1526
1527 /* For data IO requests */
1528 bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc)
1529 {
1530         return __inc_stopping_blocker(mdsc);
1531 }
1532
1533 void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc)
1534 {
1535         __dec_stopping_blocker(mdsc);
1536 }
1537
1538 static void ceph_kill_sb(struct super_block *s)
1539 {
1540         struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s);
1541         struct ceph_client *cl = fsc->client;
1542         struct ceph_mds_client *mdsc = fsc->mdsc;
1543         bool wait;
1544
1545         doutc(cl, "%p\n", s);
1546
1547         ceph_mdsc_pre_umount(mdsc);
1548         flush_fs_workqueues(fsc);
1549
1550         /*
1551          * Though the kill_anon_super() will finally trigger the
1552          * sync_filesystem() anyway, we still need to do it here and
1553          * then bump the stage of shutdown. This will allow us to
1554          * drop any further message, which will increase the inodes'
1555          * i_count reference counters but makes no sense any more,
1556          * from MDSs.
1557          *
1558          * Without this when evicting the inodes it may fail in the
1559          * kill_anon_super(), which will trigger a warning when
1560          * destroying the fscrypt keyring and then possibly trigger
1561          * a further crash in ceph module when the iput() tries to
1562          * evict the inodes later.
1563          */
1564         sync_filesystem(s);
1565
1566         spin_lock(&mdsc->stopping_lock);
1567         mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
1568         wait = !!atomic_read(&mdsc->stopping_blockers);
1569         spin_unlock(&mdsc->stopping_lock);
1570
1571         if (wait && atomic_read(&mdsc->stopping_blockers)) {
1572                 long timeleft = wait_for_completion_killable_timeout(
1573                                         &mdsc->stopping_waiter,
1574                                         fsc->client->options->mount_timeout);
1575                 if (!timeleft) /* timed out */
1576                         pr_warn_client(cl, "umount timed out, %ld\n", timeleft);
1577                 else if (timeleft < 0) /* killed */
1578                         pr_warn_client(cl, "umount was killed, %ld\n", timeleft);
1579         }
1580
1581         mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
1582         kill_anon_super(s);
1583
1584         fsc->client->extra_mon_dispatch = NULL;
1585         ceph_fs_debugfs_cleanup(fsc);
1586
1587         ceph_fscache_unregister_fs(fsc);
1588
1589         destroy_fs_client(fsc);
1590 }
1591
1592 static struct file_system_type ceph_fs_type = {
1593         .owner          = THIS_MODULE,
1594         .name           = "ceph",
1595         .init_fs_context = ceph_init_fs_context,
1596         .kill_sb        = ceph_kill_sb,
1597         .fs_flags       = FS_RENAME_DOES_D_MOVE | FS_ALLOW_IDMAP,
1598 };
1599 MODULE_ALIAS_FS("ceph");
1600
1601 int ceph_force_reconnect(struct super_block *sb)
1602 {
1603         struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
1604         int err = 0;
1605
1606         fsc->mount_state = CEPH_MOUNT_RECOVER;
1607         __ceph_umount_begin(fsc);
1608
1609         /* Make sure all page caches get invalidated.
1610          * see remove_session_caps_cb() */
1611         flush_workqueue(fsc->inode_wq);
1612
1613         /* In case that we were blocklisted. This also reset
1614          * all mon/osd connections */
1615         ceph_reset_client_addr(fsc->client);
1616
1617         ceph_osdc_clear_abort_err(&fsc->client->osdc);
1618
1619         fsc->blocklisted = false;
1620         fsc->mount_state = CEPH_MOUNT_MOUNTED;
1621
1622         if (sb->s_root) {
1623                 err = __ceph_do_getattr(d_inode(sb->s_root), NULL,
1624                                         CEPH_STAT_CAP_INODE, true);
1625         }
1626         return err;
1627 }
1628
1629 static int __init init_ceph(void)
1630 {
1631         int ret = init_caches();
1632         if (ret)
1633                 goto out;
1634
1635         ceph_flock_init();
1636         ret = register_filesystem(&ceph_fs_type);
1637         if (ret)
1638                 goto out_caches;
1639
1640         pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
1641
1642         return 0;
1643
1644 out_caches:
1645         destroy_caches();
1646 out:
1647         return ret;
1648 }
1649
1650 static void __exit exit_ceph(void)
1651 {
1652         dout("exit_ceph\n");
1653         unregister_filesystem(&ceph_fs_type);
1654         destroy_caches();
1655 }
1656
1657 static int param_set_metrics(const char *val, const struct kernel_param *kp)
1658 {
1659         struct ceph_fs_client *fsc;
1660         int ret;
1661
1662         ret = param_set_bool(val, kp);
1663         if (ret) {
1664                 pr_err("Failed to parse sending metrics switch value '%s'\n",
1665                        val);
1666                 return ret;
1667         } else if (!disable_send_metrics) {
1668                 // wake up all the mds clients
1669                 spin_lock(&ceph_fsc_lock);
1670                 list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) {
1671                         metric_schedule_delayed(&fsc->mdsc->metric);
1672                 }
1673                 spin_unlock(&ceph_fsc_lock);
1674         }
1675
1676         return 0;
1677 }
1678
1679 static const struct kernel_param_ops param_ops_metrics = {
1680         .set = param_set_metrics,
1681         .get = param_get_bool,
1682 };
1683
1684 bool disable_send_metrics = false;
1685 module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
1686 MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
1687
1688 /* for both v1 and v2 syntax */
1689 static bool mount_support = true;
1690 static const struct kernel_param_ops param_ops_mount_syntax = {
1691         .get = param_get_bool,
1692 };
1693 module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
1694 module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
1695
1696 bool enable_unsafe_idmap = false;
1697 module_param(enable_unsafe_idmap, bool, 0644);
1698 MODULE_PARM_DESC(enable_unsafe_idmap,
1699                  "Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID");
1700
1701 module_init(init_ceph);
1702 module_exit(exit_ceph);
1703
1704 MODULE_AUTHOR("Sage Weil <[email protected]>");
1705 MODULE_AUTHOR("Yehuda Sadeh <[email protected]>");
1706 MODULE_AUTHOR("Patience Warnick <[email protected]>");
1707 MODULE_DESCRIPTION("Ceph filesystem for Linux");
1708 MODULE_LICENSE("GPL");
This page took 0.125232 seconds and 4 git commands to generate.