]> Git Repo - linux.git/commitdiff
Merge branch 'iomap-4.19-merge' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
authorLinus Torvalds <[email protected]>
Tue, 14 Aug 2018 05:29:03 +0000 (22:29 -0700)
committerLinus Torvalds <[email protected]>
Tue, 14 Aug 2018 05:29:03 +0000 (22:29 -0700)
Pull fs iomap refactoring from Darrick Wong:
 "This is the first part of the XFS changes for 4.19.

  Christoph and Andreas coordinated some refactoring work on the iomap
  code in preparation for removing buffer heads from XFS and porting
  gfs2 to iomap. I'm sending this small pull request ahead of the main
  XFS merge to avoid holding up gfs2 unnecessarily"

* 'iomap-4.19-merge' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  iomap: add inline data support to iomap_readpage_actor
  iomap: support direct I/O to inline data
  iomap: refactor iomap_dio_actor
  iomap: add initial support for writes without buffer heads
  iomap: add an iomap-based readpage and readpages implementation
  iomap: add private pointer to struct iomap
  iomap: add a page_done callback
  iomap: generic inline data handling
  iomap: complete partial direct I/O writes synchronously
  iomap: mark newly allocated buffer heads as new
  fs: factor out a __generic_write_end helper

1  2 
fs/internal.h
fs/iomap.c
fs/xfs/xfs_iomap.c

diff --combined fs/internal.h
index 52a346903748b6a104a0dcc8e86548a1d9963eb1,4a18bdbd22142135dcf311e5f34ac64b71ee1357..50a28fc71300667838c692f601f9787e2acd8759
@@@ -43,6 -43,8 +43,8 @@@ static inline int __sync_blockdev(struc
  extern void guard_bio_eod(int rw, struct bio *bio);
  extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
                get_block_t *get_block, struct iomap *iomap);
+ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
+               struct page *page);
  
  /*
   * char_dev.c
@@@ -93,7 -95,7 +95,7 @@@ extern void chroot_fs_refs(const struc
  /*
   * file_table.c
   */
 -extern struct file *get_empty_filp(void);
 +extern struct file *alloc_empty_file(int, const struct cred *);
  
  /*
   * super.c
@@@ -125,7 -127,9 +127,7 @@@ int do_fchmodat(int dfd, const char __u
  int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
                int flag);
  
 -extern int open_check_o_direct(struct file *f);
 -extern int vfs_open(const struct path *, struct file *, const struct cred *);
 -extern struct file *filp_clone_open(struct file *);
 +extern int vfs_open(const struct path *, struct file *);
  
  /*
   * inode.c
diff --combined fs/iomap.c
index 0d0bd88455867f9dacd20421e278a4121851c0cb,13cdcf33e6c022b3c52b29ad56016b60b4e7bb7d..530670608fc85c7598304469214f2dd97e7c7c52
@@@ -1,6 -1,6 +1,6 @@@
  /*
   * Copyright (C) 2010 Red Hat, Inc.
-  * Copyright (c) 2016 Christoph Hellwig.
+  * Copyright (c) 2016-2018 Christoph Hellwig.
   *
   * This program is free software; you can redistribute it and/or modify it
   * under the terms and conditions of the GNU General Public License,
@@@ -18,6 -18,7 +18,7 @@@
  #include <linux/uaccess.h>
  #include <linux/gfp.h>
  #include <linux/mm.h>
+ #include <linux/mm_inline.h>
  #include <linux/swap.h>
  #include <linux/pagemap.h>
  #include <linux/pagevec.h>
@@@ -103,6 -104,243 +104,243 @@@ iomap_sector(struct iomap *iomap, loff_
        return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
  }
  
+ static void
+ iomap_read_inline_data(struct inode *inode, struct page *page,
+               struct iomap *iomap)
+ {
+       size_t size = i_size_read(inode);
+       void *addr;
+       if (PageUptodate(page))
+               return;
+       BUG_ON(page->index);
+       BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+       addr = kmap_atomic(page);
+       memcpy(addr, iomap->inline_data, size);
+       memset(addr + size, 0, PAGE_SIZE - size);
+       kunmap_atomic(addr);
+       SetPageUptodate(page);
+ }
+ static void
+ iomap_read_end_io(struct bio *bio)
+ {
+       int error = blk_status_to_errno(bio->bi_status);
+       struct bio_vec *bvec;
+       int i;
+       bio_for_each_segment_all(bvec, bio, i)
+               page_endio(bvec->bv_page, false, error);
+       bio_put(bio);
+ }
+ struct iomap_readpage_ctx {
+       struct page             *cur_page;
+       bool                    cur_page_in_bio;
+       bool                    is_readahead;
+       struct bio              *bio;
+       struct list_head        *pages;
+ };
+ static loff_t
+ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+               struct iomap *iomap)
+ {
+       struct iomap_readpage_ctx *ctx = data;
+       struct page *page = ctx->cur_page;
+       unsigned poff = pos & (PAGE_SIZE - 1);
+       unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+       bool is_contig = false;
+       sector_t sector;
+       if (iomap->type == IOMAP_INLINE) {
+               WARN_ON_ONCE(poff);
+               iomap_read_inline_data(inode, page, iomap);
+               return PAGE_SIZE;
+       }
+       /* we don't support blocksize < PAGE_SIZE quite yet. */
+       WARN_ON_ONCE(pos != page_offset(page));
+       WARN_ON_ONCE(plen != PAGE_SIZE);
+       if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
+               zero_user(page, poff, plen);
+               SetPageUptodate(page);
+               goto done;
+       }
+       ctx->cur_page_in_bio = true;
+       /*
+        * Try to merge into a previous segment if we can.
+        */
+       sector = iomap_sector(iomap, pos);
+       if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
+               if (__bio_try_merge_page(ctx->bio, page, plen, poff))
+                       goto done;
+               is_contig = true;
+       }
+       if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
+               gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
+               int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+               if (ctx->bio)
+                       submit_bio(ctx->bio);
+               if (ctx->is_readahead) /* same as readahead_gfp_mask */
+                       gfp |= __GFP_NORETRY | __GFP_NOWARN;
+               ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
+               ctx->bio->bi_opf = REQ_OP_READ;
+               if (ctx->is_readahead)
+                       ctx->bio->bi_opf |= REQ_RAHEAD;
+               ctx->bio->bi_iter.bi_sector = sector;
+               bio_set_dev(ctx->bio, iomap->bdev);
+               ctx->bio->bi_end_io = iomap_read_end_io;
+       }
+       __bio_add_page(ctx->bio, page, plen, poff);
+ done:
+       return plen;
+ }
+ int
+ iomap_readpage(struct page *page, const struct iomap_ops *ops)
+ {
+       struct iomap_readpage_ctx ctx = { .cur_page = page };
+       struct inode *inode = page->mapping->host;
+       unsigned poff;
+       loff_t ret;
+       WARN_ON_ONCE(page_has_buffers(page));
+       for (poff = 0; poff < PAGE_SIZE; poff += ret) {
+               ret = iomap_apply(inode, page_offset(page) + poff,
+                               PAGE_SIZE - poff, 0, ops, &ctx,
+                               iomap_readpage_actor);
+               if (ret <= 0) {
+                       WARN_ON_ONCE(ret == 0);
+                       SetPageError(page);
+                       break;
+               }
+       }
+       if (ctx.bio) {
+               submit_bio(ctx.bio);
+               WARN_ON_ONCE(!ctx.cur_page_in_bio);
+       } else {
+               WARN_ON_ONCE(ctx.cur_page_in_bio);
+               unlock_page(page);
+       }
+       /*
+        * Just like mpage_readpages and block_read_full_page we always
+        * return 0 and just mark the page as PageError on errors.  This
+        * should be cleaned up all through the stack eventually.
+        */
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(iomap_readpage);
+ static struct page *
+ iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
+               loff_t length, loff_t *done)
+ {
+       while (!list_empty(pages)) {
+               struct page *page = lru_to_page(pages);
+               if (page_offset(page) >= (u64)pos + length)
+                       break;
+               list_del(&page->lru);
+               if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
+                               GFP_NOFS))
+                       return page;
+               /*
+                * If we already have a page in the page cache at index we are
+                * done.  Upper layers don't care if it is uptodate after the
+                * readpages call itself as every page gets checked again once
+                * actually needed.
+                */
+               *done += PAGE_SIZE;
+               put_page(page);
+       }
+       return NULL;
+ }
+ static loff_t
+ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
+               void *data, struct iomap *iomap)
+ {
+       struct iomap_readpage_ctx *ctx = data;
+       loff_t done, ret;
+       for (done = 0; done < length; done += ret) {
+               if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
+                       if (!ctx->cur_page_in_bio)
+                               unlock_page(ctx->cur_page);
+                       put_page(ctx->cur_page);
+                       ctx->cur_page = NULL;
+               }
+               if (!ctx->cur_page) {
+                       ctx->cur_page = iomap_next_page(inode, ctx->pages,
+                                       pos, length, &done);
+                       if (!ctx->cur_page)
+                               break;
+                       ctx->cur_page_in_bio = false;
+               }
+               ret = iomap_readpage_actor(inode, pos + done, length - done,
+                               ctx, iomap);
+       }
+       return done;
+ }
+ int
+ iomap_readpages(struct address_space *mapping, struct list_head *pages,
+               unsigned nr_pages, const struct iomap_ops *ops)
+ {
+       struct iomap_readpage_ctx ctx = {
+               .pages          = pages,
+               .is_readahead   = true,
+       };
+       loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
+       loff_t last = page_offset(list_entry(pages->next, struct page, lru));
+       loff_t length = last - pos + PAGE_SIZE, ret = 0;
+       while (length > 0) {
+               ret = iomap_apply(mapping->host, pos, length, 0, ops,
+                               &ctx, iomap_readpages_actor);
+               if (ret <= 0) {
+                       WARN_ON_ONCE(ret == 0);
+                       goto done;
+               }
+               pos += ret;
+               length -= ret;
+       }
+       ret = 0;
+ done:
+       if (ctx.bio)
+               submit_bio(ctx.bio);
+       if (ctx.cur_page) {
+               if (!ctx.cur_page_in_bio)
+                       unlock_page(ctx.cur_page);
+               put_page(ctx.cur_page);
+       }
+       /*
+        * Check that we didn't lose a page due to the arcance calling
+        * conventions..
+        */
+       WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(iomap_readpages);
  static void
  iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
  {
                truncate_pagecache_range(inode, max(pos, i_size), pos + len);
  }
  
+ static int
+ iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
+               unsigned poff, unsigned plen, unsigned from, unsigned to,
+               struct iomap *iomap)
+ {
+       struct bio_vec bvec;
+       struct bio bio;
+       if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
+               zero_user_segments(page, poff, from, to, poff + plen);
+               return 0;
+       }
+       bio_init(&bio, &bvec, 1);
+       bio.bi_opf = REQ_OP_READ;
+       bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
+       bio_set_dev(&bio, iomap->bdev);
+       __bio_add_page(&bio, page, plen, poff);
+       return submit_bio_wait(&bio);
+ }
+ static int
+ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
+               struct page *page, struct iomap *iomap)
+ {
+       loff_t block_size = i_blocksize(inode);
+       loff_t block_start = pos & ~(block_size - 1);
+       loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
+       unsigned poff = block_start & (PAGE_SIZE - 1);
+       unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
+       unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
+       WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+       if (PageUptodate(page))
+               return 0;
+       if (from <= poff && to >= poff + plen)
+               return 0;
+       return iomap_read_page_sync(inode, block_start, page,
+                       poff, plen, from, to, iomap);
+ }
  static int
  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
                struct page **pagep, struct iomap *iomap)
        if (!page)
                return -ENOMEM;
  
-       status = __block_write_begin_int(page, pos, len, NULL, iomap);
+       if (iomap->type == IOMAP_INLINE)
+               iomap_read_inline_data(inode, page, iomap);
+       else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
+               status = __block_write_begin_int(page, pos, len, NULL, iomap);
+       else
+               status = __iomap_write_begin(inode, pos, len, page, iomap);
        if (unlikely(status)) {
                unlock_page(page);
                put_page(page);
        return status;
  }
  
+ int
+ iomap_set_page_dirty(struct page *page)
+ {
+       struct address_space *mapping = page_mapping(page);
+       int newly_dirty;
+       if (unlikely(!mapping))
+               return !TestSetPageDirty(page);
+       /*
+        * Lock out page->mem_cgroup migration to keep PageDirty
+        * synchronized with per-memcg dirty page counters.
+        */
+       lock_page_memcg(page);
+       newly_dirty = !TestSetPageDirty(page);
+       if (newly_dirty)
+               __set_page_dirty(page, mapping, 0);
+       unlock_page_memcg(page);
+       if (newly_dirty)
+               __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+       return newly_dirty;
+ }
+ EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
+ static int
+ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+               unsigned copied, struct page *page, struct iomap *iomap)
+ {
+       flush_dcache_page(page);
+       /*
+        * The blocks that were entirely written will now be uptodate, so we
+        * don't have to worry about a readpage reading them and overwriting a
+        * partial write.  However if we have encountered a short write and only
+        * partially written into a block, it will not be marked uptodate, so a
+        * readpage might come in and destroy our partial write.
+        *
+        * Do the simplest thing, and just treat any short write to a non
+        * uptodate page as a zero-length write, and force the caller to redo
+        * the whole thing.
+        */
+       if (unlikely(copied < len && !PageUptodate(page))) {
+               copied = 0;
+       } else {
+               SetPageUptodate(page);
+               iomap_set_page_dirty(page);
+       }
+       return __generic_write_end(inode, pos, copied, page);
+ }
+ static int
+ iomap_write_end_inline(struct inode *inode, struct page *page,
+               struct iomap *iomap, loff_t pos, unsigned copied)
+ {
+       void *addr;
+       WARN_ON_ONCE(!PageUptodate(page));
+       BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
+       addr = kmap_atomic(page);
+       memcpy(iomap->inline_data + pos, addr + pos, copied);
+       kunmap_atomic(addr);
+       mark_inode_dirty(inode);
+       __generic_write_end(inode, pos, copied, page);
+       return copied;
+ }
  static int
  iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
-               unsigned copied, struct page *page)
+               unsigned copied, struct page *page, struct iomap *iomap)
  {
        int ret;
  
-       ret = generic_write_end(NULL, inode->i_mapping, pos, len,
-                       copied, page, NULL);
+       if (iomap->type == IOMAP_INLINE) {
+               ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
+       } else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+               ret = generic_write_end(NULL, inode->i_mapping, pos, len,
+                               copied, page, NULL);
+       } else {
+               ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
+       }
+       if (iomap->page_done)
+               iomap->page_done(inode, pos, copied, page, iomap);
        if (ret < len)
                iomap_write_failed(inode, pos, len);
        return ret;
@@@ -208,7 -572,8 +572,8 @@@ again
  
                flush_dcache_page(page);
  
-               status = iomap_write_end(inode, pos, bytes, copied, page);
+               status = iomap_write_end(inode, pos, bytes, copied, page,
+                               iomap);
                if (unlikely(status < 0))
                        break;
                copied = status;
@@@ -302,7 -667,7 +667,7 @@@ iomap_dirty_actor(struct inode *inode, 
  
                WARN_ON_ONCE(!PageUptodate(page));
  
-               status = iomap_write_end(inode, pos, bytes, bytes, page);
+               status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
                if (unlikely(status <= 0)) {
                        if (WARN_ON_ONCE(status == 0))
                                return -EIO;
@@@ -354,7 -719,7 +719,7 @@@ static int iomap_zero(struct inode *ino
        zero_user(page, offset, bytes);
        mark_page_accessed(page);
  
-       return iomap_write_end(inode, pos, bytes, bytes, page);
+       return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
  }
  
  static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
@@@ -440,11 -805,16 +805,16 @@@ iomap_page_mkwrite_actor(struct inode *
        struct page *page = data;
        int ret;
  
-       ret = __block_write_begin_int(page, pos, length, NULL, iomap);
-       if (ret)
-               return ret;
+       if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+               ret = __block_write_begin_int(page, pos, length, NULL, iomap);
+               if (ret)
+                       return ret;
+               block_commit_write(page, 0, length);
+       } else {
+               WARN_ON_ONCE(!PageUptodate(page));
+               WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+       }
  
-       block_commit_write(page, 0, length);
        return length;
  }
  
@@@ -811,6 -1181,7 +1181,7 @@@ struct iomap_dio 
        atomic_t                ref;
        unsigned                flags;
        int                     error;
+       bool                    wait_for_completion;
  
        union {
                /* used during submission and for synchronous completion: */
@@@ -914,9 -1285,8 +1285,8 @@@ static void iomap_dio_bio_end_io(struc
                iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
  
        if (atomic_dec_and_test(&dio->ref)) {
-               if (is_sync_kiocb(dio->iocb)) {
+               if (dio->wait_for_completion) {
                        struct task_struct *waiter = dio->submit.waiter;
                        WRITE_ONCE(dio->submit.waiter, NULL);
                        wake_up_process(waiter);
                } else if (dio->flags & IOMAP_DIO_WRITE) {
@@@ -963,10 -1333,9 +1333,9 @@@ iomap_dio_zero(struct iomap_dio *dio, s
  }
  
  static loff_t
- iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
-               void *data, struct iomap *iomap)
+ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
+               struct iomap_dio *dio, struct iomap *iomap)
  {
-       struct iomap_dio *dio = data;
        unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
        unsigned int fs_block_size = i_blocksize(inode), pad;
        unsigned int align = iov_iter_alignment(dio->submit.iter);
        if ((pos | length | align) & ((1 << blkbits) - 1))
                return -EINVAL;
  
-       switch (iomap->type) {
-       case IOMAP_HOLE:
-               if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
-                       return -EIO;
-               /*FALLTHRU*/
-       case IOMAP_UNWRITTEN:
-               if (!(dio->flags & IOMAP_DIO_WRITE)) {
-                       length = iov_iter_zero(length, dio->submit.iter);
-                       dio->size += length;
-                       return length;
-               }
+       if (iomap->type == IOMAP_UNWRITTEN) {
                dio->flags |= IOMAP_DIO_UNWRITTEN;
                need_zeroout = true;
-               break;
-       case IOMAP_MAPPED:
-               if (iomap->flags & IOMAP_F_SHARED)
-                       dio->flags |= IOMAP_DIO_COW;
-               if (iomap->flags & IOMAP_F_NEW) {
-                       need_zeroout = true;
-               } else {
-                       /*
-                        * Use a FUA write if we need datasync semantics, this
-                        * is a pure data IO that doesn't require any metadata
-                        * updates and the underlying device supports FUA. This
-                        * allows us to avoid cache flushes on IO completion.
-                        */
-                       if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
-                           (dio->flags & IOMAP_DIO_WRITE_FUA) &&
-                           blk_queue_fua(bdev_get_queue(iomap->bdev)))
-                               use_fua = true;
-               }
-               break;
-       default:
-               WARN_ON_ONCE(1);
-               return -EIO;
+       }
+       if (iomap->flags & IOMAP_F_SHARED)
+               dio->flags |= IOMAP_DIO_COW;
+       if (iomap->flags & IOMAP_F_NEW) {
+               need_zeroout = true;
+       } else {
+               /*
+                * Use a FUA write if we need datasync semantics, this
+                * is a pure data IO that doesn't require any metadata
+                * updates and the underlying device supports FUA. This
+                * allows us to avoid cache flushes on IO completion.
+                */
+               if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
+                   (dio->flags & IOMAP_DIO_WRITE_FUA) &&
+                   blk_queue_fua(bdev_get_queue(iomap->bdev)))
+                       use_fua = true;
        }
  
        /*
        return copied;
  }
  
+ static loff_t
+ iomap_dio_hole_actor(loff_t length, struct iomap_dio *dio)
+ {
+       length = iov_iter_zero(length, dio->submit.iter);
+       dio->size += length;
+       return length;
+ }
+ static loff_t
+ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
+               struct iomap_dio *dio, struct iomap *iomap)
+ {
+       struct iov_iter *iter = dio->submit.iter;
+       size_t copied;
+       BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+       if (dio->flags & IOMAP_DIO_WRITE) {
+               loff_t size = inode->i_size;
+               if (pos > size)
+                       memset(iomap->inline_data + size, 0, pos - size);
+               copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+               if (copied) {
+                       if (pos + copied > size)
+                               i_size_write(inode, pos + copied);
+                       mark_inode_dirty(inode);
+               }
+       } else {
+               copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+       }
+       dio->size += copied;
+       return copied;
+ }
+ static loff_t
+ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
+               void *data, struct iomap *iomap)
+ {
+       struct iomap_dio *dio = data;
+       switch (iomap->type) {
+       case IOMAP_HOLE:
+               if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
+                       return -EIO;
+               return iomap_dio_hole_actor(length, dio);
+       case IOMAP_UNWRITTEN:
+               if (!(dio->flags & IOMAP_DIO_WRITE))
+                       return iomap_dio_hole_actor(length, dio);
+               return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
+       case IOMAP_MAPPED:
+               return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
+       case IOMAP_INLINE:
+               return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
+       default:
+               WARN_ON_ONCE(1);
+               return -EIO;
+       }
+ }
  /*
   * iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO
   * is being issued as AIO or not.  This allows us to optimise pure data writes
@@@ -1131,13 -1546,12 +1546,12 @@@ iomap_dio_rw(struct kiocb *iocb, struc
        dio->end_io = end_io;
        dio->error = 0;
        dio->flags = 0;
+       dio->wait_for_completion = is_sync_kiocb(iocb);
  
        dio->submit.iter = iter;
-       if (is_sync_kiocb(iocb)) {
-               dio->submit.waiter = current;
-               dio->submit.cookie = BLK_QC_T_NONE;
-               dio->submit.last_queue = NULL;
-       }
+       dio->submit.waiter = current;
+       dio->submit.cookie = BLK_QC_T_NONE;
+       dio->submit.last_queue = NULL;
  
        if (iov_iter_rw(iter) == READ) {
                if (pos >= dio->i_size)
                dio_warn_stale_pagecache(iocb->ki_filp);
        ret = 0;
  
-       if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+       if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
            !inode->i_sb->s_dio_done_wq) {
                ret = sb_init_dio_done_wq(inode->i_sb);
                if (ret < 0)
                                iomap_dio_actor);
                if (ret <= 0) {
                        /* magic error code to fall back to buffered I/O */
-                       if (ret == -ENOTBLK)
+                       if (ret == -ENOTBLK) {
+                               dio->wait_for_completion = true;
                                ret = 0;
+                       }
                        break;
                }
                pos += ret;
                dio->flags &= ~IOMAP_DIO_NEED_SYNC;
  
        if (!atomic_dec_and_test(&dio->ref)) {
-               if (!is_sync_kiocb(iocb))
+               if (!dio->wait_for_completion)
                        return -EIOCBQUEUED;
  
                for (;;) {
@@@ -1443,7 -1859,7 +1859,7 @@@ iomap_bmap(struct address_space *mappin
                const struct iomap_ops *ops)
  {
        struct inode *inode = mapping->host;
 -      loff_t pos = bno >> inode->i_blkbits;
 +      loff_t pos = bno << inode->i_blkbits;
        unsigned blocksize = i_blocksize(inode);
  
        if (filemap_write_and_wait(mapping))
diff --combined fs/xfs/xfs_iomap.c
index 55876dd02f0c8c75fa5653eeab82881bd3741928,8a3613d576afdab0e1d6b6cbb7b498e2536333ea..e08a84d9ee72d2874ef446ce5abadeaabb43cabe
@@@ -626,7 -626,7 +626,7 @@@ retry
         * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
         * them out if the write happens to fail.
         */
-       iomap->flags = IOMAP_F_NEW;
+       iomap->flags |= IOMAP_F_NEW;
        trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
  done:
        if (isnullstartblock(got.br_startblock))
@@@ -963,13 -963,12 +963,13 @@@ xfs_ilock_for_iomap
        unsigned                *lockmode)
  {
        unsigned                mode = XFS_ILOCK_SHARED;
 +      bool                    is_write = flags & (IOMAP_WRITE | IOMAP_ZERO);
  
        /*
         * COW writes may allocate delalloc space or convert unwritten COW
         * extents, so we need to make sure to take the lock exclusively here.
         */
 -      if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) {
 +      if (xfs_is_reflink_inode(ip) && is_write) {
                /*
                 * FIXME: It could still overwrite on unshared extents and not
                 * need allocation.
                mode = XFS_ILOCK_EXCL;
        }
  
 +relock:
        if (flags & IOMAP_NOWAIT) {
                if (!xfs_ilock_nowait(ip, mode))
                        return -EAGAIN;
                xfs_ilock(ip, mode);
        }
  
 +      /*
 +       * The reflink iflag could have changed since the earlier unlocked
 +       * check, so if we got ILOCK_SHARED for a write and but we're now a
 +       * reflink inode we have to switch to ILOCK_EXCL and relock.
 +       */
 +      if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) {
 +              xfs_iunlock(ip, mode);
 +              mode = XFS_ILOCK_EXCL;
 +              goto relock;
 +      }
 +
        *lockmode = mode;
        return 0;
  }
@@@ -1032,6 -1019,8 +1032,8 @@@ xfs_file_iomap_begin
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
  
+       iomap->flags |= IOMAP_F_BUFFER_HEAD;
        if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
                        !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
                /* Reserve delalloc blocks for regular writeback. */
        if (error)
                return error;
  
-       iomap->flags = IOMAP_F_NEW;
+       iomap->flags |= IOMAP_F_NEW;
        trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
  
  out_finish:
This page took 0.095905 seconds and 4 git commands to generate.