(CONGESTION_ON_THRESH(congestion_kb) >> 2))
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
- struct page *page, void **_fsdata);
+ struct folio *folio, void **_fsdata);
static inline struct ceph_snap_context *page_snap_context(struct page *page)
{
};
/* read a single page, without unlocking it. */
-static int ceph_readpage(struct file *file, struct page *page)
+static int ceph_readpage(struct file *file, struct page *subpage)
{
+ struct folio *folio = page_folio(subpage);
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_vino vino = ceph_vino(inode);
- u64 off = page_offset(page);
- u64 len = thp_size(page);
+ size_t len = folio_size(folio);
+ u64 off = folio_file_pos(folio);
if (ci->i_inline_version != CEPH_INLINE_NONE) {
/*
* into page cache while getting Fcr caps.
*/
if (off == 0) {
- unlock_page(page);
+ folio_unlock(folio);
return -EINVAL;
}
- zero_user_segment(page, 0, thp_size(page));
- SetPageUptodate(page);
- unlock_page(page);
+ zero_user_segment(&folio->page, 0, folio_size(folio));
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
return 0;
}
- dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
- vino.ino, vino.snap, file, off, len, page, page->index);
+ dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n",
+ vino.ino, vino.snap, file, off, len, folio, folio_index(folio));
- return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
+ return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL);
}
static void ceph_readahead(struct readahead_control *ractl)
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
- if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
+ if (ceph_inode_is_shutdown(inode)) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
ceph_find_incompatible(struct page *page)
{
struct inode *inode = page->mapping->host;
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
- if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
- dout(" page %p forced umount\n", page);
- return ERR_PTR(-EIO);
+ if (ceph_inode_is_shutdown(inode)) {
+ dout(" page %p %llx:%llx is shutdown\n", page,
+ ceph_vinop(inode));
+ return ERR_PTR(-ESTALE);
}
for (;;) {
}
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
- struct page *page, void **_fsdata)
+ struct folio *folio, void **_fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc;
- snapc = ceph_find_incompatible(page);
+ snapc = ceph_find_incompatible(folio_page(folio, 0));
if (snapc) {
int r;
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
if (IS_ERR(snapc))
return PTR_ERR(snapc);
* clean, or already dirty within the same snap context.
*/
static int ceph_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
+ loff_t pos, unsigned len, unsigned aop_flags,
struct page **pagep, void **fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct page *page = NULL;
+ struct folio *folio = NULL;
pgoff_t index = pos >> PAGE_SHIFT;
int r;
* for inline_version sent to the MDS.
*/
if (ci->i_inline_version != CEPH_INLINE_NONE) {
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
+ unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+ if (aop_flags & AOP_FLAG_NOFS)
+ fgp_flags |= FGP_NOFS;
+ folio = __filemap_get_folio(mapping, index, fgp_flags,
+ mapping_gfp_mask(mapping));
+ if (!folio)
return -ENOMEM;
/*
* The inline_version on a new inode is set to 1. If that's the
- * case, then the page is brand new and isn't yet Uptodate.
+ * case, then the folio is brand new and isn't yet Uptodate.
*/
r = 0;
if (index == 0 && ci->i_inline_version != 1) {
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
ci->i_inline_version);
r = -EINVAL;
}
goto out;
}
- zero_user_segment(page, 0, thp_size(page));
- SetPageUptodate(page);
+ zero_user_segment(&folio->page, 0, folio_size(folio));
+ folio_mark_uptodate(folio);
goto out;
}
- r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
+ r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL,
&ceph_netfs_read_ops, NULL);
out:
if (r == 0)
- wait_on_page_fscache(page);
+ folio_wait_fscache(folio);
if (r < 0) {
- if (page)
- put_page(page);
+ if (folio)
+ folio_put(folio);
} else {
- WARN_ON_ONCE(!PageLocked(page));
- *pagep = page;
+ WARN_ON_ONCE(!folio_test_locked(folio));
+ *pagep = &folio->page;
}
return r;
}
*/
static int ceph_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct page *subpage, void *fsdata)
{
+ struct folio *folio = page_folio(subpage);
struct inode *inode = file_inode(file);
bool check_cap = false;
- dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
- inode, page, (int)pos, (int)copied, (int)len);
+ dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file,
+ inode, folio, (int)pos, (int)copied, (int)len);
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
/* just return that nothing was copied on a short copy */
if (copied < len) {
copied = 0;
goto out;
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
/* did file size increase? */
if (pos+copied > i_size_read(inode))
check_cap = ceph_inode_set_size(inode, pos+copied);
- set_page_dirty(page);
+ folio_mark_dirty(folio);
out:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
if (check_cap)
ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
return copied;
}
- /*
- * we set .direct_IO to indicate direct io is supported, but since we
- * intercept O_DIRECT reads and writes early, this function should
- * never get called.
- */
- static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
- {
- WARN_ON(1);
- return -EINVAL;
- }
-
const struct address_space_operations ceph_aops = {
.readpage = ceph_readpage,
.readahead = ceph_readahead,
.set_page_dirty = ceph_set_page_dirty,
.invalidatepage = ceph_invalidatepage,
.releasepage = ceph_releasepage,
- .direct_IO = ceph_direct_io,
+ .direct_IO = noop_direct_IO,
};
static void ceph_block_sigs(sigset_t *oldset)
sigset_t oldset;
vm_fault_t ret = VM_FAULT_SIGBUS;
+ if (ceph_inode_is_shutdown(inode))
+ return ret;
+
ceph_block_sigs(&oldset);
dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
sigset_t oldset;
vm_fault_t ret = VM_FAULT_SIGBUS;
+ if (ceph_inode_is_shutdown(inode))
+ return ret;
+
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return VM_FAULT_OOM;
if (result) {
struct dentry *dentry = req->r_dentry;
+ struct inode *inode = d_inode(dentry);
int pathlen = 0;
u64 base = 0;
char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
if (!d_unhashed(dentry))
d_drop(dentry);
- /* FIXME: start returning I/O errors on all accesses? */
+ ceph_inode_shutdown(inode);
+
pr_warn("ceph: async create failure path=(%llx)%s result=%d!\n",
base, IS_ERR(path) ? "<<bad>>" : path, result);
ceph_mdsc_free_path(path, pathlen);
}
ceph_kick_flushing_inode_caps(req->r_session, ci);
spin_unlock(&ci->i_ceph_lock);
- } else {
+ } else if (!result) {
pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__,
req->r_deleg_ino);
}
ssize_t ret;
u64 off = iocb->ki_pos;
u64 len = iov_iter_count(to);
+ u64 i_size;
dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
struct page **pages;
int num_pages;
size_t page_off;
- u64 i_size;
bool more;
int idx;
size_t left;
}
if (off > iocb->ki_pos) {
- if (ret >= 0 &&
- iov_iter_count(to) > 0 && off >= i_size_read(inode))
+ if (off >= i_size) {
*retry_op = CHECK_EOF;
- ret = off - iocb->ki_pos;
- iocb->ki_pos = off;
+ ret = i_size - iocb->ki_pos;
+ iocb->ki_pos = i_size;
+ } else {
+ ret = off - iocb->ki_pos;
+ iocb->ki_pos = off;
+ }
}
dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
CEPH_CAP_FILE_RD));
- aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
+ aio_req->iocb->ki_complete(aio_req->iocb, ret);
ceph_free_cap_flush(aio_req->prealloc_cf);
kfree(aio_req);
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
+ if (ceph_inode_is_shutdown(inode))
+ return -ESTALE;
+
if (direct_lock)
ceph_start_io_direct(inode);
else
loff_t pos;
loff_t limit = max(i_size_read(inode), fsc->max_file_size);
+ if (ceph_inode_is_shutdown(inode))
+ return -ESTALE;
+
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
return 0;
}
+ static struct ceph_osd_request *
+ ceph_alloc_copyfrom_request(struct ceph_osd_client *osdc,
+ u64 src_snapid,
+ struct ceph_object_id *src_oid,
+ struct ceph_object_locator *src_oloc,
+ struct ceph_object_id *dst_oid,
+ struct ceph_object_locator *dst_oloc,
+ u32 truncate_seq, u64 truncate_size)
+ {
+ struct ceph_osd_request *req;
+ int ret;
+ u32 src_fadvise_flags =
+ CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+ CEPH_OSD_OP_FLAG_FADVISE_NOCACHE;
+ u32 dst_fadvise_flags =
+ CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+ CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+ if (!req)
+ return ERR_PTR(-ENOMEM);
+
+ req->r_flags = CEPH_OSD_FLAG_WRITE;
+
+ ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
+ ceph_oid_copy(&req->r_t.base_oid, dst_oid);
+
+ ret = osd_req_op_copy_from_init(req, src_snapid, 0,
+ src_oid, src_oloc,
+ src_fadvise_flags,
+ dst_fadvise_flags,
+ truncate_seq,
+ truncate_size,
+ CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+ if (ret)
+ goto out;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
+ if (ret)
+ goto out;
+
+ return req;
+
+ out:
+ ceph_osdc_put_request(req);
+ return ERR_PTR(ret);
+ }
+
static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off,
struct ceph_inode_info *dst_ci, u64 *dst_off,
struct ceph_fs_client *fsc,
{
struct ceph_object_locator src_oloc, dst_oloc;
struct ceph_object_id src_oid, dst_oid;
+ struct ceph_osd_client *osdc;
+ struct ceph_osd_request *req;
size_t bytes = 0;
u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
u32 src_objlen, dst_objlen;
src_oloc.pool_ns = ceph_try_get_string(src_ci->i_layout.pool_ns);
dst_oloc.pool = dst_ci->i_layout.pool_id;
dst_oloc.pool_ns = ceph_try_get_string(dst_ci->i_layout.pool_ns);
+ osdc = &fsc->client->osdc;
while (len >= object_size) {
ceph_calc_file_object_mapping(&src_ci->i_layout, *src_off,
ceph_oid_printf(&dst_oid, "%llx.%08llx",
dst_ci->i_vino.ino, dst_objnum);
/* Do an object remote copy */
- ret = ceph_osdc_copy_from(&fsc->client->osdc,
- src_ci->i_vino.snap, 0,
- &src_oid, &src_oloc,
- CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
- CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
- &dst_oid, &dst_oloc,
- CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
- CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
- dst_ci->i_truncate_seq,
- dst_ci->i_truncate_size,
- CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+ req = ceph_alloc_copyfrom_request(osdc, src_ci->i_vino.snap,
+ &src_oid, &src_oloc,
+ &dst_oid, &dst_oloc,
+ dst_ci->i_truncate_seq,
+ dst_ci->i_truncate_size);
+ if (IS_ERR(req))
+ ret = PTR_ERR(req);
+ else {
+ ceph_osdc_start_request(osdc, req, false);
+ ret = ceph_osdc_wait_request(osdc, req);
+ ceph_update_copyfrom_metrics(&fsc->mdsc->metric,
+ req->r_start_latency,
+ req->r_end_latency,
+ object_size, ret);
+ ceph_osdc_put_request(req);
+ }
if (ret) {
if (ret == -EOPNOTSUPP) {
fsc->have_copy_from2 = false;