Merge tag 'ceph-for-5.16-rc1' of git://github.com/ceph/ceph-client

author Linus Torvalds <[email protected]>

Sat, 13 Nov 2021 19:31:07 +0000 (11:31 -0800)

committer Linus Torvalds <[email protected]>

Sat, 13 Nov 2021 19:31:07 +0000 (11:31 -0800)
author Linus Torvalds <[email protected]>
Sat, 13 Nov 2021 19:31:07 +0000 (11:31 -0800)
committer Linus Torvalds <[email protected]>
Sat, 13 Nov 2021 19:31:07 +0000 (11:31 -0800)
diff --combined fs/ceph/addr.c

index 04bbe853bcb1a9b566f703d904d6474fec1d4139,b39aebc2ed9528c0ab5fbbf65e1f87195ede0cb5..e53c8541f5b234ffe3a0009707be119d60fc34f7
--- 1/fs/ceph/addr.c
--- 2/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@@ -63,7 -63,7 +63,7 @@@
          (CONGESTION_ON_THRESH(congestion_kb) >> 2))
   
   static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
- -                                      struct page *page, void **_fsdata);
+ +                                      struct folio *folio, void **_fsdata);
   
   static inline struct ceph_snap_context *page_snap_context(struct page *page)
   {
@@@ -317,14 -317,13 +317,14 @@@ static const struct netfs_read_request_
   };
   
   /* read a single page, without unlocking it. */
- -static int ceph_readpage(struct file *file, struct page *page)
+ +static int ceph_readpage(struct file *file, struct page *subpage)
   {
+ +      struct folio *folio = page_folio(subpage);
         struct inode *inode = file_inode(file);
         struct ceph_inode_info *ci = ceph_inode(inode);
         struct ceph_vino vino = ceph_vino(inode);
- -      u64 off = page_offset(page);
- -      u64 len = thp_size(page);
+ +      size_t len = folio_size(folio);
+ +      u64 off = folio_file_pos(folio);
   
         if (ci->i_inline_version != CEPH_INLINE_NONE) {
                 /*
@@@ -332,19 -331,19 +332,19 @@@
                  * into page cache while getting Fcr caps.
                  */
                 if (off == 0) {
- -                      unlock_page(page);
+ +                      folio_unlock(folio);
                         return -EINVAL;
                 }
- -              zero_user_segment(page, 0, thp_size(page));
- -              SetPageUptodate(page);
- -              unlock_page(page);
+ +              zero_user_segment(&folio->page, 0, folio_size(folio));
+ +              folio_mark_uptodate(folio);
+ +              folio_unlock(folio);
                 return 0;
         }
   
- -      dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
- -           vino.ino, vino.snap, file, off, len, page, page->index);
+ +      dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n",
+ +           vino.ino, vino.snap, file, off, len, folio, folio_index(folio));
   
- -      return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
+ +      return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL);
   }
   
   static void ceph_readahead(struct readahead_control *ractl)
@@@ -725,7 -724,7 +725,7 @@@ static int ceph_writepages_start(struc
              wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
              (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
   
-       if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
+       if (ceph_inode_is_shutdown(inode)) {
                 if (ci->i_wrbuffer_ref > 0) {
                         pr_warn_ratelimited(
                                 "writepage_start %p %lld forced umount\n",
@@@ -1146,12 -1145,12 +1146,12 @@@ static struct ceph_snap_context 
   ceph_find_incompatible(struct page *page)
   {
         struct inode *inode = page->mapping->host;
-       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
         struct ceph_inode_info *ci = ceph_inode(inode);
   
-       if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
-               dout(" page %p forced umount\n", page);
-               return ERR_PTR(-EIO);
+       if (ceph_inode_is_shutdown(inode)) {
+               dout(" page %p %llx:%llx is shutdown\n", page,
+                    ceph_vinop(inode));
+               return ERR_PTR(-ESTALE);
         }
   
         for (;;) {
@@@ -1188,18 -1187,18 +1188,18 @@@
   }
   
   static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
- -                                      struct page *page, void **_fsdata)
+ +                                      struct folio *folio, void **_fsdata)
   {
         struct inode *inode = file_inode(file);
         struct ceph_inode_info *ci = ceph_inode(inode);
         struct ceph_snap_context *snapc;
   
- -      snapc = ceph_find_incompatible(page);
+ +      snapc = ceph_find_incompatible(folio_page(folio, 0));
         if (snapc) {
                 int r;
   
- -              unlock_page(page);
- -              put_page(page);
+ +              folio_unlock(folio);
+ +              folio_put(folio);
                 if (IS_ERR(snapc))
                         return PTR_ERR(snapc);
   
@@@ -1217,12 -1216,12 +1217,12 @@@
    * clean, or already dirty within the same snap context.
    */
   static int ceph_write_begin(struct file *file, struct address_space *mapping,
- -                          loff_t pos, unsigned len, unsigned flags,
+ +                          loff_t pos, unsigned len, unsigned aop_flags,
                             struct page **pagep, void **fsdata)
   {
         struct inode *inode = file_inode(file);
         struct ceph_inode_info *ci = ceph_inode(inode);
- -      struct page *page = NULL;
+ +      struct folio *folio = NULL;
         pgoff_t index = pos >> PAGE_SHIFT;
         int r;
   
@@@ -1231,43 -1230,39 +1231,43 @@@
          * for inline_version sent to the MDS.
          */
         if (ci->i_inline_version != CEPH_INLINE_NONE) {
- -              page = grab_cache_page_write_begin(mapping, index, flags);
- -              if (!page)
+ +              unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+ +              if (aop_flags & AOP_FLAG_NOFS)
+ +                      fgp_flags |= FGP_NOFS;
+ +              folio = __filemap_get_folio(mapping, index, fgp_flags,
+ +                                          mapping_gfp_mask(mapping));
+ +              if (!folio)
                         return -ENOMEM;
   
                 /*
                  * The inline_version on a new inode is set to 1. If that's the
- -               * case, then the page is brand new and isn't yet Uptodate.
+ +               * case, then the folio is brand new and isn't yet Uptodate.
                  */
                 r = 0;
                 if (index == 0 && ci->i_inline_version != 1) {
- -                      if (!PageUptodate(page)) {
+ +                      if (!folio_test_uptodate(folio)) {
                                 WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
                                           ci->i_inline_version);
                                 r = -EINVAL;
                         }
                         goto out;
                 }
- -              zero_user_segment(page, 0, thp_size(page));
- -              SetPageUptodate(page);
+ +              zero_user_segment(&folio->page, 0, folio_size(folio));
+ +              folio_mark_uptodate(folio);
                 goto out;
         }
   
- -      r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
+ +      r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL,
                               &ceph_netfs_read_ops, NULL);
   out:
         if (r == 0)
- -              wait_on_page_fscache(page);
+ +              folio_wait_fscache(folio);
         if (r < 0) {
- -              if (page)
- -                      put_page(page);
+ +              if (folio)
+ +                      folio_put(folio);
         } else {
- -              WARN_ON_ONCE(!PageLocked(page));
- -              *pagep = page;
+ +              WARN_ON_ONCE(!folio_test_locked(folio));
+ +              *pagep = &folio->page;
         }
         return r;
   }
@@@ -1278,33 -1273,32 +1278,33 @@@
    */
   static int ceph_write_end(struct file *file, struct address_space *mapping,
                           loff_t pos, unsigned len, unsigned copied,
- -                        struct page *page, void *fsdata)
+ +                        struct page *subpage, void *fsdata)
   {
+ +      struct folio *folio = page_folio(subpage);
         struct inode *inode = file_inode(file);
         bool check_cap = false;
   
- -      dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
- -           inode, page, (int)pos, (int)copied, (int)len);
+ +      dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file,
+ +           inode, folio, (int)pos, (int)copied, (int)len);
   
- -      if (!PageUptodate(page)) {
+ +      if (!folio_test_uptodate(folio)) {
                 /* just return that nothing was copied on a short copy */
                 if (copied < len) {
                         copied = 0;
                         goto out;
                 }
- -              SetPageUptodate(page);
+ +              folio_mark_uptodate(folio);
         }
   
         /* did file size increase? */
         if (pos+copied > i_size_read(inode))
                 check_cap = ceph_inode_set_size(inode, pos+copied);
   
- -      set_page_dirty(page);
+ +      folio_mark_dirty(folio);
   
   out:
- -      unlock_page(page);
- -      put_page(page);
+ +      folio_unlock(folio);
+ +      folio_put(folio);
   
         if (check_cap)
                 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
@@@ -1312,17 -1306,6 +1312,6 @@@
         return copied;
   }
   
- /*
-  * we set .direct_IO to indicate direct io is supported, but since we
-  * intercept O_DIRECT reads and writes early, this function should
-  * never get called.
-  */
- static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
- {
-       WARN_ON(1);
-       return -EINVAL;
- }
- 
   const struct address_space_operations ceph_aops = {
         .readpage = ceph_readpage,
         .readahead = ceph_readahead,
@@@ -1333,7 -1316,7 +1322,7 @@@
         .set_page_dirty = ceph_set_page_dirty,
         .invalidatepage = ceph_invalidatepage,
         .releasepage = ceph_releasepage,
-       .direct_IO = ceph_direct_io,
+       .direct_IO = noop_direct_IO,
   };
   
   static void ceph_block_sigs(sigset_t *oldset)
@@@ -1362,6 -1345,9 +1351,9 @@@ static vm_fault_t ceph_filemap_fault(st
         sigset_t oldset;
         vm_fault_t ret = VM_FAULT_SIGBUS;
   
+       if (ceph_inode_is_shutdown(inode))
+               return ret;
+ 
         ceph_block_sigs(&oldset);
   
         dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
@@@ -1453,6 -1439,9 +1445,9 @@@ static vm_fault_t ceph_page_mkwrite(str
         sigset_t oldset;
         vm_fault_t ret = VM_FAULT_SIGBUS;
   
+       if (ceph_inode_is_shutdown(inode))
+               return ret;
+ 
         prealloc_cf = ceph_alloc_cap_flush();
         if (!prealloc_cf)
                 return VM_FAULT_OOM;
diff --combined fs/ceph/file.c

index b129ea551378c222d657f286515563d1f13576e6,220a41831b46f935b8f3d6f1d797a9db0d4fab9f..02a0a0fd9ccd51c7f4d11b60c875eaa081d0e2a6
--- 1/fs/ceph/file.c
--- 2/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@@ -525,6 -525,7 +525,7 @@@ static void ceph_async_create_cb(struc
   
         if (result) {
                 struct dentry *dentry = req->r_dentry;
+               struct inode *inode = d_inode(dentry);
                 int pathlen = 0;
                 u64 base = 0;
                 char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
@@@ -534,7 -535,8 +535,8 @@@
                 if (!d_unhashed(dentry))
                         d_drop(dentry);
   
-               /* FIXME: start returning I/O errors on all accesses? */
+               ceph_inode_shutdown(inode);
+ 
                 pr_warn("ceph: async create failure path=(%llx)%s result=%d!\n",
                         base, IS_ERR(path) ? "<<bad>>" : path, result);
                 ceph_mdsc_free_path(path, pathlen);
@@@ -556,7 -558,7 +558,7 @@@
                 }
                 ceph_kick_flushing_inode_caps(req->r_session, ci);
                 spin_unlock(&ci->i_ceph_lock);
-       } else {
+       } else if (!result) {
                 pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__,
                         req->r_deleg_ino);
         }
@@@ -845,6 -847,7 +847,7 @@@ static ssize_t ceph_sync_read(struct ki
         ssize_t ret;
         u64 off = iocb->ki_pos;
         u64 len = iov_iter_count(to);
+       u64 i_size;
   
         dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
              (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
@@@ -868,7 -871,6 +871,6 @@@
                 struct page **pages;
                 int num_pages;
                 size_t page_off;
-               u64 i_size;
                 bool more;
                 int idx;
                 size_t left;
@@@ -951,11 -953,14 +953,14 @@@
         }
   
         if (off > iocb->ki_pos) {
-               if (ret >= 0 &&
-                   iov_iter_count(to) > 0 && off >= i_size_read(inode))
+               if (off >= i_size) {
                         *retry_op = CHECK_EOF;
-               ret = off - iocb->ki_pos;
-               iocb->ki_pos = off;
+                       ret = i_size - iocb->ki_pos;
+                       iocb->ki_pos = i_size;
+               } else {
+                       ret = off - iocb->ki_pos;
+                       iocb->ki_pos = off;
+               }
         }
   
         dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
@@@ -1022,7 -1027,7 +1027,7 @@@ static void ceph_aio_complete(struct in
         ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
                                                 CEPH_CAP_FILE_RD));
   
- -      aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
+ +      aio_req->iocb->ki_complete(aio_req->iocb, ret);
   
         ceph_free_cap_flush(aio_req->prealloc_cf);
         kfree(aio_req);
@@@ -1526,6 -1531,9 +1531,9 @@@ again
         dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
              inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
   
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+ 
         if (direct_lock)
                 ceph_start_io_direct(inode);
         else
@@@ -1678,6 -1686,9 +1686,9 @@@ static ssize_t ceph_write_iter(struct k
         loff_t pos;
         loff_t limit = max(i_size_read(inode), fsc->max_file_size);
   
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+ 
         if (ceph_snap(inode) != CEPH_NOSNAP)
                 return -EROFS;
   
@@@ -2200,6 -2211,54 +2211,54 @@@ static int is_file_size_ok(struct inod
         return 0;
   }
   
+ static struct ceph_osd_request *
+ ceph_alloc_copyfrom_request(struct ceph_osd_client *osdc,
+                           u64 src_snapid,
+                           struct ceph_object_id *src_oid,
+                           struct ceph_object_locator *src_oloc,
+                           struct ceph_object_id *dst_oid,
+                           struct ceph_object_locator *dst_oloc,
+                           u32 truncate_seq, u64 truncate_size)
+ {
+       struct ceph_osd_request *req;
+       int ret;
+       u32 src_fadvise_flags =
+               CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+               CEPH_OSD_OP_FLAG_FADVISE_NOCACHE;
+       u32 dst_fadvise_flags =
+               CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+               CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;
+ 
+       req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+       if (!req)
+               return ERR_PTR(-ENOMEM);
+ 
+       req->r_flags = CEPH_OSD_FLAG_WRITE;
+ 
+       ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
+       ceph_oid_copy(&req->r_t.base_oid, dst_oid);
+ 
+       ret = osd_req_op_copy_from_init(req, src_snapid, 0,
+                                       src_oid, src_oloc,
+                                       src_fadvise_flags,
+                                       dst_fadvise_flags,
+                                       truncate_seq,
+                                       truncate_size,
+                                       CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+       if (ret)
+               goto out;
+ 
+       ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
+       if (ret)
+               goto out;
+ 
+       return req;
+ 
+ out:
+       ceph_osdc_put_request(req);
+       return ERR_PTR(ret);
+ }
+ 
   static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off,
                                     struct ceph_inode_info *dst_ci, u64 *dst_off,
                                     struct ceph_fs_client *fsc,
@@@ -2207,6 -2266,8 +2266,8 @@@
   {
         struct ceph_object_locator src_oloc, dst_oloc;
         struct ceph_object_id src_oid, dst_oid;
+       struct ceph_osd_client *osdc;
+       struct ceph_osd_request *req;
         size_t bytes = 0;
         u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
         u32 src_objlen, dst_objlen;
@@@ -2217,6 -2278,7 +2278,7 @@@
         src_oloc.pool_ns = ceph_try_get_string(src_ci->i_layout.pool_ns);
         dst_oloc.pool = dst_ci->i_layout.pool_id;
         dst_oloc.pool_ns = ceph_try_get_string(dst_ci->i_layout.pool_ns);
+       osdc = &fsc->client->osdc;
   
         while (len >= object_size) {
                 ceph_calc_file_object_mapping(&src_ci->i_layout, *src_off,
@@@ -2232,17 -2294,22 +2294,22 @@@
                 ceph_oid_printf(&dst_oid, "%llx.%08llx",
                                 dst_ci->i_vino.ino, dst_objnum);
                 /* Do an object remote copy */
-               ret = ceph_osdc_copy_from(&fsc->client->osdc,
-                                         src_ci->i_vino.snap, 0,
-                                         &src_oid, &src_oloc,
-                                         CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-                                         CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
-                                         &dst_oid, &dst_oloc,
-                                         CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-                                         CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
-                                         dst_ci->i_truncate_seq,
-                                         dst_ci->i_truncate_size,
-                                         CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+               req = ceph_alloc_copyfrom_request(osdc, src_ci->i_vino.snap,
+                                                 &src_oid, &src_oloc,
+                                                 &dst_oid, &dst_oloc,
+                                                 dst_ci->i_truncate_seq,
+                                                 dst_ci->i_truncate_size);
+               if (IS_ERR(req))
+                       ret = PTR_ERR(req);
+               else {
+                       ceph_osdc_start_request(osdc, req, false);
+                       ret = ceph_osdc_wait_request(osdc, req);
+                       ceph_update_copyfrom_metrics(&fsc->mdsc->metric,
+                                                    req->r_start_latency,
+                                                    req->r_end_latency,
+                                                    object_size, ret);
+                       ceph_osdc_put_request(req);
+               }
                 if (ret) {
                         if (ret == -EOPNOTSUPP) {
                                 fsc->have_copy_from2 = false;
diff --combined fs/ceph/locks.c

index d8c31069fbf2b9eed2ec3e8825f7f4dae9ac4856,74c227d9abf56cf7b5c9e42bbf4c63d9900f5a63..d1f154aec249bf3d38fb8d10dd66c311065fdb68
--- 1/fs/ceph/locks.c
--- 2/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@@ -241,6 -241,9 +241,9 @@@ int ceph_lock(struct file *file, int cm
         if (!(fl->fl_flags & FL_POSIX))
                 return -ENOLCK;
   
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+ 
         dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
   
         /* set wait bit as appropriate, then make command as Ceph expects it*/
@@@ -302,7 -305,13 +305,10 @@@ int ceph_flock(struct file *file, int c
   
         if (!(fl->fl_flags & FL_FLOCK))
                 return -ENOLCK;
- -      /* No mandatory locks */
- -      if (fl->fl_type & LOCK_MAND)
- -              return -EOPNOTSUPP;
   
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+ 
         dout("ceph_flock, fl_file: %p\n", fl->fl_file);
   
         spin_lock(&ci->i_ceph_lock);
author	Linus Torvalds <[email protected]>
	Sat, 13 Nov 2021 19:31:07 +0000 (11:31 -0800)
committer	Linus Torvalds <[email protected]>
	Sat, 13 Nov 2021 19:31:07 +0000 (11:31 -0800)
		1	2
fs/ceph/addr.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ceph/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ceph/locks.c	patch \|	diff1 \|	diff2 \|	blob \| history