]> Git Repo - linux.git/blob - fs/overlayfs/file.c
Linux 6.14-rc3
[linux.git] / fs / overlayfs / file.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 Red Hat, Inc.
4  */
5
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/security.h>
13 #include <linux/fs.h>
14 #include <linux/backing-file.h>
15 #include "overlayfs.h"
16
17 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
18 {
19         if (realinode != ovl_inode_upper(inode))
20                 return 'l';
21         if (ovl_has_upperdata(inode))
22                 return 'u';
23         else
24                 return 'm';
25 }
26
27 static struct file *ovl_open_realfile(const struct file *file,
28                                       const struct path *realpath)
29 {
30         struct inode *realinode = d_inode(realpath->dentry);
31         struct inode *inode = file_inode(file);
32         struct mnt_idmap *real_idmap;
33         struct file *realfile;
34         const struct cred *old_cred;
35         int flags = file->f_flags | OVL_OPEN_FLAGS;
36         int acc_mode = ACC_MODE(flags);
37         int err;
38
39         if (flags & O_APPEND)
40                 acc_mode |= MAY_APPEND;
41
42         old_cred = ovl_override_creds(inode->i_sb);
43         real_idmap = mnt_idmap(realpath->mnt);
44         err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
45         if (err) {
46                 realfile = ERR_PTR(err);
47         } else {
48                 if (!inode_owner_or_capable(real_idmap, realinode))
49                         flags &= ~O_NOATIME;
50
51                 realfile = backing_file_open(&file->f_path, flags, realpath,
52                                              current_cred());
53         }
54         ovl_revert_creds(old_cred);
55
56         pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
57                  file, file, ovl_whatisit(inode, realinode), file->f_flags,
58                  realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
59
60         return realfile;
61 }
62
63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
64
65 static int ovl_change_flags(struct file *file, unsigned int flags)
66 {
67         struct inode *inode = file_inode(file);
68         int err;
69
70         flags &= OVL_SETFL_MASK;
71
72         if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
73                 return -EPERM;
74
75         if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
76                 return -EINVAL;
77
78         if (file->f_op->check_flags) {
79                 err = file->f_op->check_flags(flags);
80                 if (err)
81                         return err;
82         }
83
84         spin_lock(&file->f_lock);
85         file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
86         file->f_iocb_flags = iocb_flags(file);
87         spin_unlock(&file->f_lock);
88
89         return 0;
90 }
91
92 struct ovl_file {
93         struct file *realfile;
94         struct file *upperfile;
95 };
96
97 struct ovl_file *ovl_file_alloc(struct file *realfile)
98 {
99         struct ovl_file *of = kzalloc(sizeof(struct ovl_file), GFP_KERNEL);
100
101         if (unlikely(!of))
102                 return NULL;
103
104         of->realfile = realfile;
105         return of;
106 }
107
108 void ovl_file_free(struct ovl_file *of)
109 {
110         fput(of->realfile);
111         if (of->upperfile)
112                 fput(of->upperfile);
113         kfree(of);
114 }
115
116 static bool ovl_is_real_file(const struct file *realfile,
117                              const struct path *realpath)
118 {
119         return file_inode(realfile) == d_inode(realpath->dentry);
120 }
121
122 static struct file *ovl_real_file_path(const struct file *file,
123                                        struct path *realpath)
124 {
125         struct ovl_file *of = file->private_data;
126         struct file *realfile = of->realfile;
127
128         if (WARN_ON_ONCE(!realpath->dentry))
129                 return ERR_PTR(-EIO);
130
131         /*
132          * If the realfile that we want is not where the data used to be at
133          * open time, either we'd been copied up, or it's an fsync of a
134          * metacopied file.  We need the upperfile either way, so see if it
135          * is already opened and if it is not then open and store it.
136          */
137         if (unlikely(!ovl_is_real_file(realfile, realpath))) {
138                 struct file *upperfile = READ_ONCE(of->upperfile);
139                 struct file *old;
140
141                 if (!upperfile) { /* Nobody opened upperfile yet */
142                         upperfile = ovl_open_realfile(file, realpath);
143                         if (IS_ERR(upperfile))
144                                 return upperfile;
145
146                         /* Store the upperfile for later */
147                         old = cmpxchg_release(&of->upperfile, NULL, upperfile);
148                         if (old) { /* Someone opened upperfile before us */
149                                 fput(upperfile);
150                                 upperfile = old;
151                         }
152                 }
153                 /*
154                  * Stored file must be from the right inode, unless someone's
155                  * been corrupting the upper layer.
156                  */
157                 if (WARN_ON_ONCE(!ovl_is_real_file(upperfile, realpath)))
158                         return ERR_PTR(-EIO);
159
160                 realfile = upperfile;
161         }
162
163         /* Did the flags change since open? */
164         if (unlikely((file->f_flags ^ realfile->f_flags) & ~OVL_OPEN_FLAGS)) {
165                 int err = ovl_change_flags(realfile, file->f_flags);
166
167                 if (err)
168                         return ERR_PTR(err);
169         }
170
171         return realfile;
172 }
173
174 static struct file *ovl_real_file(const struct file *file)
175 {
176         struct dentry *dentry = file_dentry(file);
177         struct path realpath;
178         int err;
179
180         if (d_is_dir(dentry)) {
181                 struct file *f = ovl_dir_real_file(file, false);
182
183                 if (WARN_ON_ONCE(!f))
184                         return ERR_PTR(-EIO);
185                 return f;
186         }
187
188         /* lazy lookup and verify of lowerdata */
189         err = ovl_verify_lowerdata(dentry);
190         if (err)
191                 return ERR_PTR(err);
192
193         ovl_path_realdata(dentry, &realpath);
194
195         return ovl_real_file_path(file, &realpath);
196 }
197
198 static int ovl_open(struct inode *inode, struct file *file)
199 {
200         struct dentry *dentry = file_dentry(file);
201         struct file *realfile;
202         struct path realpath;
203         struct ovl_file *of;
204         int err;
205
206         /* lazy lookup and verify lowerdata */
207         err = ovl_verify_lowerdata(dentry);
208         if (err)
209                 return err;
210
211         err = ovl_maybe_copy_up(dentry, file->f_flags);
212         if (err)
213                 return err;
214
215         /* No longer need these flags, so don't pass them on to underlying fs */
216         file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
217
218         ovl_path_realdata(dentry, &realpath);
219         if (!realpath.dentry)
220                 return -EIO;
221
222         realfile = ovl_open_realfile(file, &realpath);
223         if (IS_ERR(realfile))
224                 return PTR_ERR(realfile);
225
226         of = ovl_file_alloc(realfile);
227         if (!of) {
228                 fput(realfile);
229                 return -ENOMEM;
230         }
231
232         file->private_data = of;
233
234         return 0;
235 }
236
237 static int ovl_release(struct inode *inode, struct file *file)
238 {
239         ovl_file_free(file->private_data);
240         return 0;
241 }
242
243 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
244 {
245         struct inode *inode = file_inode(file);
246         struct file *realfile;
247         const struct cred *old_cred;
248         loff_t ret;
249
250         /*
251          * The two special cases below do not need to involve real fs,
252          * so we can optimizing concurrent callers.
253          */
254         if (offset == 0) {
255                 if (whence == SEEK_CUR)
256                         return file->f_pos;
257
258                 if (whence == SEEK_SET)
259                         return vfs_setpos(file, 0, 0);
260         }
261
262         realfile = ovl_real_file(file);
263         if (IS_ERR(realfile))
264                 return PTR_ERR(realfile);
265
266         /*
267          * Overlay file f_pos is the master copy that is preserved
268          * through copy up and modified on read/write, but only real
269          * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
270          * limitations that are more strict than ->s_maxbytes for specific
271          * files, so we use the real file to perform seeks.
272          */
273         ovl_inode_lock(inode);
274         realfile->f_pos = file->f_pos;
275
276         old_cred = ovl_override_creds(inode->i_sb);
277         ret = vfs_llseek(realfile, offset, whence);
278         ovl_revert_creds(old_cred);
279
280         file->f_pos = realfile->f_pos;
281         ovl_inode_unlock(inode);
282
283         return ret;
284 }
285
286 static void ovl_file_modified(struct file *file)
287 {
288         /* Update size/mtime */
289         ovl_copyattr(file_inode(file));
290 }
291
292 static void ovl_file_end_write(struct kiocb *iocb, ssize_t ret)
293 {
294         ovl_file_modified(iocb->ki_filp);
295 }
296
297 static void ovl_file_accessed(struct file *file)
298 {
299         struct inode *inode, *upperinode;
300         struct timespec64 ctime, uctime;
301         struct timespec64 mtime, umtime;
302
303         if (file->f_flags & O_NOATIME)
304                 return;
305
306         inode = file_inode(file);
307         upperinode = ovl_inode_upper(inode);
308
309         if (!upperinode)
310                 return;
311
312         ctime = inode_get_ctime(inode);
313         uctime = inode_get_ctime(upperinode);
314         mtime = inode_get_mtime(inode);
315         umtime = inode_get_mtime(upperinode);
316         if ((!timespec64_equal(&mtime, &umtime)) ||
317              !timespec64_equal(&ctime, &uctime)) {
318                 inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
319                 inode_set_ctime_to_ts(inode, uctime);
320         }
321
322         touch_atime(&file->f_path);
323 }
324
325 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
326 {
327         struct file *file = iocb->ki_filp;
328         struct file *realfile;
329         struct backing_file_ctx ctx = {
330                 .cred = ovl_creds(file_inode(file)->i_sb),
331                 .accessed = ovl_file_accessed,
332         };
333
334         if (!iov_iter_count(iter))
335                 return 0;
336
337         realfile = ovl_real_file(file);
338         if (IS_ERR(realfile))
339                 return PTR_ERR(realfile);
340
341         return backing_file_read_iter(realfile, iter, iocb, iocb->ki_flags,
342                                       &ctx);
343 }
344
345 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
346 {
347         struct file *file = iocb->ki_filp;
348         struct inode *inode = file_inode(file);
349         struct file *realfile;
350         ssize_t ret;
351         int ifl = iocb->ki_flags;
352         struct backing_file_ctx ctx = {
353                 .cred = ovl_creds(inode->i_sb),
354                 .end_write = ovl_file_end_write,
355         };
356
357         if (!iov_iter_count(iter))
358                 return 0;
359
360         inode_lock(inode);
361         /* Update mode */
362         ovl_copyattr(inode);
363
364         realfile = ovl_real_file(file);
365         ret = PTR_ERR(realfile);
366         if (IS_ERR(realfile))
367                 goto out_unlock;
368
369         if (!ovl_should_sync(OVL_FS(inode->i_sb)))
370                 ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
371
372         /*
373          * Overlayfs doesn't support deferred completions, don't copy
374          * this property in case it is set by the issuer.
375          */
376         ifl &= ~IOCB_DIO_CALLER_COMP;
377         ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx);
378
379 out_unlock:
380         inode_unlock(inode);
381
382         return ret;
383 }
384
385 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
386                                struct pipe_inode_info *pipe, size_t len,
387                                unsigned int flags)
388 {
389         struct file *realfile;
390         ssize_t ret;
391         struct backing_file_ctx ctx = {
392                 .cred = ovl_creds(file_inode(in)->i_sb),
393                 .accessed = ovl_file_accessed,
394         };
395         struct kiocb iocb;
396
397         realfile = ovl_real_file(in);
398         if (IS_ERR(realfile))
399                 return PTR_ERR(realfile);
400
401         init_sync_kiocb(&iocb, in);
402         iocb.ki_pos = *ppos;
403         ret = backing_file_splice_read(realfile, &iocb, pipe, len, flags, &ctx);
404         *ppos = iocb.ki_pos;
405
406         return ret;
407 }
408
409 /*
410  * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
411  * due to lock order inversion between pipe->mutex in iter_file_splice_write()
412  * and file_start_write(realfile) in ovl_write_iter().
413  *
414  * So do everything ovl_write_iter() does and call iter_file_splice_write() on
415  * the real file.
416  */
417 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
418                                 loff_t *ppos, size_t len, unsigned int flags)
419 {
420         struct file *realfile;
421         struct inode *inode = file_inode(out);
422         ssize_t ret;
423         struct backing_file_ctx ctx = {
424                 .cred = ovl_creds(inode->i_sb),
425                 .end_write = ovl_file_end_write,
426         };
427         struct kiocb iocb;
428
429         inode_lock(inode);
430         /* Update mode */
431         ovl_copyattr(inode);
432
433         realfile = ovl_real_file(out);
434         ret = PTR_ERR(realfile);
435         if (IS_ERR(realfile))
436                 goto out_unlock;
437
438         init_sync_kiocb(&iocb, out);
439         iocb.ki_pos = *ppos;
440         ret = backing_file_splice_write(pipe, realfile, &iocb, len, flags, &ctx);
441         *ppos = iocb.ki_pos;
442
443 out_unlock:
444         inode_unlock(inode);
445
446         return ret;
447 }
448
449 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
450 {
451         struct dentry *dentry = file_dentry(file);
452         enum ovl_path_type type;
453         struct path upperpath;
454         struct file *upperfile;
455         const struct cred *old_cred;
456         int ret;
457
458         ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
459         if (ret <= 0)
460                 return ret;
461
462         /* Don't sync lower file for fear of receiving EROFS error */
463         type = ovl_path_type(dentry);
464         if (!OVL_TYPE_UPPER(type) || (datasync && OVL_TYPE_MERGE(type)))
465                 return 0;
466
467         ovl_path_upper(dentry, &upperpath);
468         upperfile = ovl_real_file_path(file, &upperpath);
469         if (IS_ERR(upperfile))
470                 return PTR_ERR(upperfile);
471
472         old_cred = ovl_override_creds(file_inode(file)->i_sb);
473         ret = vfs_fsync_range(upperfile, start, end, datasync);
474         ovl_revert_creds(old_cred);
475
476         return ret;
477 }
478
479 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
480 {
481         struct ovl_file *of = file->private_data;
482         struct backing_file_ctx ctx = {
483                 .cred = ovl_creds(file_inode(file)->i_sb),
484                 .accessed = ovl_file_accessed,
485         };
486
487         return backing_file_mmap(of->realfile, vma, &ctx);
488 }
489
490 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
491 {
492         struct inode *inode = file_inode(file);
493         struct file *realfile;
494         const struct cred *old_cred;
495         int ret;
496
497         inode_lock(inode);
498         /* Update mode */
499         ovl_copyattr(inode);
500         ret = file_remove_privs(file);
501         if (ret)
502                 goto out_unlock;
503
504         realfile = ovl_real_file(file);
505         ret = PTR_ERR(realfile);
506         if (IS_ERR(realfile))
507                 goto out_unlock;
508
509         old_cred = ovl_override_creds(file_inode(file)->i_sb);
510         ret = vfs_fallocate(realfile, mode, offset, len);
511         ovl_revert_creds(old_cred);
512
513         /* Update size */
514         ovl_file_modified(file);
515
516 out_unlock:
517         inode_unlock(inode);
518
519         return ret;
520 }
521
522 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
523 {
524         struct file *realfile;
525         const struct cred *old_cred;
526         int ret;
527
528         realfile = ovl_real_file(file);
529         if (IS_ERR(realfile))
530                 return PTR_ERR(realfile);
531
532         old_cred = ovl_override_creds(file_inode(file)->i_sb);
533         ret = vfs_fadvise(realfile, offset, len, advice);
534         ovl_revert_creds(old_cred);
535
536         return ret;
537 }
538
539 enum ovl_copyop {
540         OVL_COPY,
541         OVL_CLONE,
542         OVL_DEDUPE,
543 };
544
545 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
546                             struct file *file_out, loff_t pos_out,
547                             loff_t len, unsigned int flags, enum ovl_copyop op)
548 {
549         struct inode *inode_out = file_inode(file_out);
550         struct file *realfile_in, *realfile_out;
551         const struct cred *old_cred;
552         loff_t ret;
553
554         inode_lock(inode_out);
555         if (op != OVL_DEDUPE) {
556                 /* Update mode */
557                 ovl_copyattr(inode_out);
558                 ret = file_remove_privs(file_out);
559                 if (ret)
560                         goto out_unlock;
561         }
562
563         realfile_out = ovl_real_file(file_out);
564         ret = PTR_ERR(realfile_out);
565         if (IS_ERR(realfile_out))
566                 goto out_unlock;
567
568         realfile_in = ovl_real_file(file_in);
569         ret = PTR_ERR(realfile_in);
570         if (IS_ERR(realfile_in))
571                 goto out_unlock;
572
573         old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
574         switch (op) {
575         case OVL_COPY:
576                 ret = vfs_copy_file_range(realfile_in, pos_in,
577                                           realfile_out, pos_out, len, flags);
578                 break;
579
580         case OVL_CLONE:
581                 ret = vfs_clone_file_range(realfile_in, pos_in,
582                                            realfile_out, pos_out, len, flags);
583                 break;
584
585         case OVL_DEDUPE:
586                 ret = vfs_dedupe_file_range_one(realfile_in, pos_in,
587                                                 realfile_out, pos_out, len,
588                                                 flags);
589                 break;
590         }
591         ovl_revert_creds(old_cred);
592
593         /* Update size */
594         ovl_file_modified(file_out);
595
596 out_unlock:
597         inode_unlock(inode_out);
598
599         return ret;
600 }
601
602 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
603                                    struct file *file_out, loff_t pos_out,
604                                    size_t len, unsigned int flags)
605 {
606         return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
607                             OVL_COPY);
608 }
609
610 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
611                                    struct file *file_out, loff_t pos_out,
612                                    loff_t len, unsigned int remap_flags)
613 {
614         enum ovl_copyop op;
615
616         if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
617                 return -EINVAL;
618
619         if (remap_flags & REMAP_FILE_DEDUP)
620                 op = OVL_DEDUPE;
621         else
622                 op = OVL_CLONE;
623
624         /*
625          * Don't copy up because of a dedupe request, this wouldn't make sense
626          * most of the time (data would be duplicated instead of deduplicated).
627          */
628         if (op == OVL_DEDUPE &&
629             (!ovl_inode_upper(file_inode(file_in)) ||
630              !ovl_inode_upper(file_inode(file_out))))
631                 return -EPERM;
632
633         return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
634                             remap_flags, op);
635 }
636
637 static int ovl_flush(struct file *file, fl_owner_t id)
638 {
639         struct file *realfile;
640         const struct cred *old_cred;
641         int err = 0;
642
643         realfile = ovl_real_file(file);
644         if (IS_ERR(realfile))
645                 return PTR_ERR(realfile);
646
647         if (realfile->f_op->flush) {
648                 old_cred = ovl_override_creds(file_inode(file)->i_sb);
649                 err = realfile->f_op->flush(realfile, id);
650                 ovl_revert_creds(old_cred);
651         }
652
653         return err;
654 }
655
656 const struct file_operations ovl_file_operations = {
657         .open           = ovl_open,
658         .release        = ovl_release,
659         .llseek         = ovl_llseek,
660         .read_iter      = ovl_read_iter,
661         .write_iter     = ovl_write_iter,
662         .fsync          = ovl_fsync,
663         .mmap           = ovl_mmap,
664         .fallocate      = ovl_fallocate,
665         .fadvise        = ovl_fadvise,
666         .flush          = ovl_flush,
667         .splice_read    = ovl_splice_read,
668         .splice_write   = ovl_splice_write,
669
670         .copy_file_range        = ovl_copy_file_range,
671         .remap_file_range       = ovl_remap_file_range,
672 };
This page took 0.069826 seconds and 4 git commands to generate.