]> Git Repo - J-linux.git/commitdiff
Merge tag 'io_uring-5.10-2020-10-24' of git://git.kernel.dk/linux-block
authorLinus Torvalds <[email protected]>
Sat, 24 Oct 2020 19:40:18 +0000 (12:40 -0700)
committerLinus Torvalds <[email protected]>
Sat, 24 Oct 2020 19:40:18 +0000 (12:40 -0700)
Pull io_uring fixes from Jens Axboe:

 - fsize was missed in previous unification of work flags

 - Few fixes cleaning up the flags unification creds cases (Pavel)

 - Fix NUMA affinities for completely unplugged/replugged node for io-wq

 - Two fallout fixes from the set_fs changes. One local to io_uring, one
   for the splice entry point that io_uring uses.

 - Linked timeout fixes (Pavel)

 - Removal of ->flush() ->files work-around that we don't need anymore
   with referenced files (Pavel)

 - Various cleanups (Pavel)

* tag 'io_uring-5.10-2020-10-24' of git://git.kernel.dk/linux-block:
  splice: change exported internal do_splice() helper to take kernel offset
  io_uring: make loop_rw_iter() use original user supplied pointers
  io_uring: remove req cancel in ->flush()
  io-wq: re-set NUMA node affinities if CPUs come online
  io_uring: don't reuse linked_timeout
  io_uring: unify fsize with def->work_flags
  io_uring: fix racy REQ_F_LINK_TIMEOUT clearing
  io_uring: do poll's hash_node init in common code
  io_uring: inline io_poll_task_handler()
  io_uring: remove extra ->file check in poll prep
  io_uring: make cached_cq_overflow non atomic_t
  io_uring: inline io_fail_links()
  io_uring: kill ref get/drop in personality init
  io_uring: flags-based creds init in queue

1  2 
fs/io_uring.c
fs/splice.c

diff --combined fs/io_uring.c
index 626a9d111744f7542d696e9903b9184d9536b7d0,d40717f8647b182e3ba02e07ce86c4814034b95a..b42dfa0243bfd5ecf83caf903044a724a3919e76
@@@ -277,7 -277,7 +277,7 @@@ struct io_ring_ctx 
                unsigned                sq_mask;
                unsigned                sq_thread_idle;
                unsigned                cached_sq_dropped;
-               atomic_t                cached_cq_overflow;
+               unsigned                cached_cq_overflow;
                unsigned long           sq_check_overflow;
  
                struct list_head        defer_list;
@@@ -585,6 -585,7 +585,7 @@@ enum 
        REQ_F_BUFFER_SELECTED_BIT,
        REQ_F_NO_FILE_TABLE_BIT,
        REQ_F_WORK_INITIALIZED_BIT,
+       REQ_F_LTIMEOUT_ACTIVE_BIT,
  
        /* not a real bit, just to check we're not overflowing the space */
        __REQ_F_LAST_BIT,
@@@ -614,7 -615,7 +615,7 @@@ enum 
        REQ_F_CUR_POS           = BIT(REQ_F_CUR_POS_BIT),
        /* must not punt to workers */
        REQ_F_NOWAIT            = BIT(REQ_F_NOWAIT_BIT),
-       /* has linked timeout */
+       /* has or had linked timeout */
        REQ_F_LINK_TIMEOUT      = BIT(REQ_F_LINK_TIMEOUT_BIT),
        /* regular file */
        REQ_F_ISREG             = BIT(REQ_F_ISREG_BIT),
        REQ_F_NO_FILE_TABLE     = BIT(REQ_F_NO_FILE_TABLE_BIT),
        /* io_wq_work is initialized */
        REQ_F_WORK_INITIALIZED  = BIT(REQ_F_WORK_INITIALIZED_BIT),
+       /* linked timeout is active, i.e. prepared by link's head */
+       REQ_F_LTIMEOUT_ACTIVE   = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
  };
  
  struct async_poll {
@@@ -750,8 -753,6 +753,6 @@@ struct io_op_def 
        unsigned                pollout : 1;
        /* op supports buffer selection */
        unsigned                buffer_select : 1;
-       /* needs rlimit(RLIMIT_FSIZE) assigned */
-       unsigned                needs_fsize : 1;
        /* must always have async data allocated */
        unsigned                needs_async_data : 1;
        /* size of async data needed, if any */
@@@ -775,10 -776,10 +776,10 @@@ static const struct io_op_def io_op_def
                .hash_reg_file          = 1,
                .unbound_nonreg_file    = 1,
                .pollout                = 1,
-               .needs_fsize            = 1,
                .needs_async_data       = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
+               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+                                               IO_WQ_WORK_FSIZE,
        },
        [IORING_OP_FSYNC] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
                .pollin                 = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_BLKCG,
+               .work_flags             = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
        },
        [IORING_OP_WRITE_FIXED] = {
                .needs_file             = 1,
                .hash_reg_file          = 1,
                .unbound_nonreg_file    = 1,
                .pollout                = 1,
-               .needs_fsize            = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_BLKCG,
+               .work_flags             = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE |
+                                               IO_WQ_WORK_MM,
        },
        [IORING_OP_POLL_ADD] = {
                .needs_file             = 1,
        },
        [IORING_OP_FALLOCATE] = {
                .needs_file             = 1,
-               .needs_fsize            = 1,
-               .work_flags             = IO_WQ_WORK_BLKCG,
+               .work_flags             = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE,
        },
        [IORING_OP_OPENAT] = {
                .work_flags             = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG |
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
                .pollout                = 1,
-               .needs_fsize            = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
+               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
+                                               IO_WQ_WORK_FSIZE,
        },
        [IORING_OP_FADVISE] = {
                .needs_file             = 1,
@@@ -1070,6 -1070,12 +1070,12 @@@ static void io_init_identity(struct io_
        refcount_set(&id->count, 1);
  }
  
+ static inline void __io_req_init_async(struct io_kiocb *req)
+ {
+       memset(&req->work, 0, sizeof(req->work));
+       req->flags |= REQ_F_WORK_INITIALIZED;
+ }
  /*
   * Note: must call io_req_init_async() for the first time you
   * touch any members of io_wq_work.
@@@ -1081,8 -1087,7 +1087,7 @@@ static inline void io_req_init_async(st
        if (req->flags & REQ_F_WORK_INITIALIZED)
                return;
  
-       memset(&req->work, 0, sizeof(req->work));
-       req->flags |= REQ_F_WORK_INITIALIZED;
+       __io_req_init_async(req);
  
        /* Grab a ref if this isn't our static identity */
        req->work.identity = tctx->identity;
@@@ -1174,7 -1179,7 +1179,7 @@@ static bool req_need_defer(struct io_ki
                struct io_ring_ctx *ctx = req->ctx;
  
                return seq != ctx->cached_cq_tail
-                               + atomic_read(&ctx->cached_cq_overflow);
+                               + READ_ONCE(ctx->cached_cq_overflow);
        }
  
        return false;
@@@ -1285,8 -1290,11 +1290,11 @@@ static bool io_grab_identity(struct io_
        struct io_identity *id = req->work.identity;
        struct io_ring_ctx *ctx = req->ctx;
  
-       if (def->needs_fsize && id->fsize != rlimit(RLIMIT_FSIZE))
-               return false;
+       if (def->work_flags & IO_WQ_WORK_FSIZE) {
+               if (id->fsize != rlimit(RLIMIT_FSIZE))
+                       return false;
+               req->work.flags |= IO_WQ_WORK_FSIZE;
+       }
  
        if (!(req->work.flags & IO_WQ_WORK_FILES) &&
            (def->work_flags & IO_WQ_WORK_FILES) &&
@@@ -1619,8 -1627,9 +1627,9 @@@ static bool io_cqring_overflow_flush(st
                        WRITE_ONCE(cqe->res, req->result);
                        WRITE_ONCE(cqe->flags, req->compl.cflags);
                } else {
+                       ctx->cached_cq_overflow++;
                        WRITE_ONCE(ctx->rings->cq_overflow,
-                               atomic_inc_return(&ctx->cached_cq_overflow));
+                                  ctx->cached_cq_overflow);
                }
        }
  
@@@ -1662,8 -1671,8 +1671,8 @@@ static void __io_cqring_fill_event(stru
                 * then we cannot store the request for later flushing, we need
                 * to drop it on the floor.
                 */
-               WRITE_ONCE(ctx->rings->cq_overflow,
-                               atomic_inc_return(&ctx->cached_cq_overflow));
+               ctx->cached_cq_overflow++;
+               WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow);
        } else {
                if (list_empty(&ctx->cq_overflow_list)) {
                        set_bit(0, &ctx->sq_check_overflow);
@@@ -1865,6 -1874,12 +1874,12 @@@ static bool __io_kill_linked_timeout(st
        link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
        if (link->opcode != IORING_OP_LINK_TIMEOUT)
                return false;
+       /*
+        * Can happen if a linked timeout fired and link had been like
+        * req -> link t-out -> link t-out [-> ...]
+        */
+       if (!(link->flags & REQ_F_LTIMEOUT_ACTIVE))
+               return false;
  
        list_del_init(&link->link_list);
        wake_ev = io_link_cancel_timeout(link);
@@@ -1908,10 -1923,12 +1923,12 @@@ static struct io_kiocb *io_req_link_nex
  /*
   * Called if REQ_F_LINK_HEAD is set, and we fail the head request
   */
- static void __io_fail_links(struct io_kiocb *req)
+ static void io_fail_links(struct io_kiocb *req)
  {
        struct io_ring_ctx *ctx = req->ctx;
+       unsigned long flags;
  
+       spin_lock_irqsave(&ctx->completion_lock, flags);
        while (!list_empty(&req->link_list)) {
                struct io_kiocb *link = list_first_entry(&req->link_list,
                                                struct io_kiocb, link_list);
        }
  
        io_commit_cqring(ctx);
- }
- static void io_fail_links(struct io_kiocb *req)
- {
-       struct io_ring_ctx *ctx = req->ctx;
-       unsigned long flags;
-       spin_lock_irqsave(&ctx->completion_lock, flags);
-       __io_fail_links(req);
        spin_unlock_irqrestore(&ctx->completion_lock, flags);
  
        io_cqring_ev_posted(ctx);
@@@ -1976,8 -1984,7 +1984,8 @@@ static int io_req_task_work_add(struct 
  {
        struct task_struct *tsk = req->task;
        struct io_ring_ctx *ctx = req->ctx;
 -      int ret, notify;
 +      enum task_work_notify_mode notify;
 +      int ret;
  
        if (tsk->flags & PF_EXITING)
                return -ESRCH;
         * processing task_work. There's no reliable way to tell if TWA_RESUME
         * will do the job.
         */
 -      notify = 0;
 +      notify = TWA_NONE;
        if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
                notify = TWA_SIGNAL;
  
@@@ -2057,7 -2064,7 +2065,7 @@@ static void io_req_task_queue(struct io
  
                init_task_work(&req->task_work, io_req_task_cancel);
                tsk = io_wq_get_task(req->ctx->io_wq);
 -              task_work_add(tsk, &req->task_work, 0);
 +              task_work_add(tsk, &req->task_work, TWA_NONE);
                wake_up_process(tsk);
        }
  }
@@@ -2178,7 -2185,7 +2186,7 @@@ static void io_free_req_deferred(struc
                struct task_struct *tsk;
  
                tsk = io_wq_get_task(req->ctx->io_wq);
 -              task_work_add(tsk, &req->task_work, 0);
 +              task_work_add(tsk, &req->task_work, TWA_NONE);
                wake_up_process(tsk);
        }
  }
@@@ -3109,9 -3116,10 +3117,10 @@@ static inline loff_t *io_kiocb_ppos(str
   * For files that don't have ->read_iter() and ->write_iter(), handle them
   * by looping over ->read() or ->write() manually.
   */
- static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
-                          struct iov_iter *iter)
+ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
  {
+       struct kiocb *kiocb = &req->rw.kiocb;
+       struct file *file = req->file;
        ssize_t ret = 0;
  
        /*
                if (!iov_iter_is_bvec(iter)) {
                        iovec = iov_iter_iovec(iter);
                } else {
-                       /* fixed buffers import bvec */
-                       iovec.iov_base = kmap(iter->bvec->bv_page)
-                                               + iter->iov_offset;
-                       iovec.iov_len = min(iter->count,
-                                       iter->bvec->bv_len - iter->iov_offset);
+                       iovec.iov_base = u64_to_user_ptr(req->rw.addr);
+                       iovec.iov_len = req->rw.len;
                }
  
                if (rw == READ) {
                                               iovec.iov_len, io_kiocb_ppos(kiocb));
                }
  
-               if (iov_iter_is_bvec(iter))
-                       kunmap(iter->bvec->bv_page);
                if (nr < 0) {
                        if (!ret)
                                ret = nr;
                ret += nr;
                if (nr != iovec.iov_len)
                        break;
+               req->rw.len -= nr;
+               req->rw.addr += nr;
                iov_iter_advance(iter, nr);
        }
  
@@@ -3292,7 -3296,7 +3297,7 @@@ static int io_async_buf_func(struct wai
                /* queue just for cancelation */
                init_task_work(&req->task_work, io_req_task_cancel);
                tsk = io_wq_get_task(req->ctx->io_wq);
 -              task_work_add(tsk, &req->task_work, 0);
 +              task_work_add(tsk, &req->task_work, TWA_NONE);
                wake_up_process(tsk);
        }
        return 1;
@@@ -3346,7 -3350,7 +3351,7 @@@ static int io_iter_do_read(struct io_ki
        if (req->file->f_op->read_iter)
                return call_read_iter(req->file, &req->rw.kiocb, iter);
        else if (req->file->f_op->read)
-               return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter);
+               return loop_rw_iter(READ, req, iter);
        else
                return -EINVAL;
  }
@@@ -3537,7 -3541,7 +3542,7 @@@ static int io_write(struct io_kiocb *re
        if (req->file->f_op->write_iter)
                ret2 = call_write_iter(req->file, kiocb, iter);
        else if (req->file->f_op->write)
-               ret2 = loop_rw_iter(WRITE, req->file, kiocb, iter);
+               ret2 = loop_rw_iter(WRITE, req, iter);
        else
                ret2 = -EINVAL;
  
@@@ -4082,7 -4086,7 +4087,7 @@@ static int io_madvise(struct io_kiocb *
        if (force_nonblock)
                return -EAGAIN;
  
 -      ret = do_madvise(ma->addr, ma->len, ma->advice);
 +      ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
        if (ret < 0)
                req_set_fail_links(req);
        io_req_complete(req, ret);
@@@ -4858,7 -4862,7 +4863,7 @@@ static int __io_async_wake(struct io_ki
  
                WRITE_ONCE(poll->canceled, true);
                tsk = io_wq_get_task(req->ctx->io_wq);
 -              task_work_add(tsk, &req->task_work, 0);
 +              task_work_add(tsk, &req->task_work, TWA_NONE);
                wake_up_process(tsk);
        }
        return 1;
@@@ -4927,32 -4931,25 +4932,25 @@@ static void io_poll_complete(struct io_
        io_commit_cqring(ctx);
  }
  
- static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
+ static void io_poll_task_func(struct callback_head *cb)
  {
+       struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
        struct io_ring_ctx *ctx = req->ctx;
+       struct io_kiocb *nxt;
  
        if (io_poll_rewait(req, &req->poll)) {
                spin_unlock_irq(&ctx->completion_lock);
-               return;
-       }
-       hash_del(&req->hash_node);
-       io_poll_complete(req, req->result, 0);
-       spin_unlock_irq(&ctx->completion_lock);
-       *nxt = io_put_req_find_next(req);
-       io_cqring_ev_posted(ctx);
- }
+       } else {
+               hash_del(&req->hash_node);
+               io_poll_complete(req, req->result, 0);
+               spin_unlock_irq(&ctx->completion_lock);
  
- static void io_poll_task_func(struct callback_head *cb)
- {
-       struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
-       struct io_ring_ctx *ctx = req->ctx;
-       struct io_kiocb *nxt = NULL;
+               nxt = io_put_req_find_next(req);
+               io_cqring_ev_posted(ctx);
+               if (nxt)
+                       __io_req_task_submit(nxt);
+       }
  
-       io_poll_task_handler(req, &nxt);
-       if (nxt)
-               __io_req_task_submit(nxt);
        percpu_ref_put(&ctx->refs);
  }
  
@@@ -5106,6 -5103,7 +5104,7 @@@ static __poll_t __io_arm_poll_handler(s
        struct io_ring_ctx *ctx = req->ctx;
        bool cancel = false;
  
+       INIT_HLIST_NODE(&req->hash_node);
        io_init_poll_iocb(poll, mask, wake_func);
        poll->file = req->file;
        poll->wait.private = req;
@@@ -5167,7 -5165,6 +5166,6 @@@ static bool io_arm_poll_handler(struct 
  
        req->flags |= REQ_F_POLLED;
        req->apoll = apoll;
-       INIT_HLIST_NODE(&req->hash_node);
  
        mask = 0;
        if (def->pollin)
@@@ -5349,8 -5346,6 +5347,6 @@@ static int io_poll_add_prep(struct io_k
                return -EINVAL;
        if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
                return -EINVAL;
-       if (!poll->file)
-               return -EBADF;
  
        events = READ_ONCE(sqe->poll32_events);
  #ifdef __BIG_ENDIAN
@@@ -5368,7 -5363,6 +5364,6 @@@ static int io_poll_add(struct io_kiocb 
        struct io_poll_table ipt;
        __poll_t mask;
  
-       INIT_HLIST_NODE(&req->hash_node);
        ipt.pt._qproc = io_poll_queue_proc;
  
        mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
@@@ -6118,10 -6112,9 +6113,9 @@@ static enum hrtimer_restart io_link_tim
        if (!list_empty(&req->link_list)) {
                prev = list_entry(req->link_list.prev, struct io_kiocb,
                                  link_list);
-               if (refcount_inc_not_zero(&prev->refs)) {
+               if (refcount_inc_not_zero(&prev->refs))
                        list_del_init(&req->link_list);
-                       prev->flags &= ~REQ_F_LINK_TIMEOUT;
-               } else
+               else
                        prev = NULL;
        }
  
@@@ -6178,6 -6171,7 +6172,7 @@@ static struct io_kiocb *io_prep_linked_
        if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT)
                return NULL;
  
+       nxt->flags |= REQ_F_LTIMEOUT_ACTIVE;
        req->flags |= REQ_F_LINK_TIMEOUT;
        return nxt;
  }
@@@ -6192,7 -6186,8 +6187,8 @@@ static void __io_queue_sqe(struct io_ki
  again:
        linked_timeout = io_prep_linked_timeout(req);
  
-       if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.identity->creds &&
+       if ((req->flags & REQ_F_WORK_INITIALIZED) &&
+           (req->work.flags & IO_WQ_WORK_CREDS) &&
            req->work.identity->creds != current_cred()) {
                if (old_creds)
                        revert_creds(old_creds);
                        old_creds = NULL; /* restored original creds */
                else
                        old_creds = override_creds(req->work.identity->creds);
-               req->work.flags |= IO_WQ_WORK_CREDS;
        }
  
        ret = io_issue_sqe(req, true, cs);
@@@ -6241,8 -6235,10 +6236,10 @@@ punt
        if (nxt) {
                req = nxt;
  
-               if (req->flags & REQ_F_FORCE_ASYNC)
+               if (req->flags & REQ_F_FORCE_ASYNC) {
+                       linked_timeout = NULL;
                        goto punt;
+               }
                goto again;
        }
  exit:
@@@ -6505,12 -6501,12 +6502,12 @@@ static int io_init_req(struct io_ring_c
        if (id) {
                struct io_identity *iod;
  
-               io_req_init_async(req);
                iod = idr_find(&ctx->personality_idr, id);
                if (unlikely(!iod))
                        return -EINVAL;
                refcount_inc(&iod->count);
-               io_put_identity(current->io_uring, req);
+               __io_req_init_async(req);
                get_cred(iod->creds);
                req->work.identity = iod;
                req->work.flags |= IO_WQ_WORK_CREDS;
@@@ -8686,19 -8682,11 +8683,11 @@@ static void io_uring_del_task_file(stru
                fput(file);
  }
  
- static void __io_uring_attempt_task_drop(struct file *file)
- {
-       struct file *old = xa_load(&current->io_uring->xa, (unsigned long)file);
-       if (old == file)
-               io_uring_del_task_file(file);
- }
  /*
   * Drop task note for this file if we're the only ones that hold it after
   * pending fput()
   */
- static void io_uring_attempt_task_drop(struct file *file, bool exiting)
+ static void io_uring_attempt_task_drop(struct file *file)
  {
        if (!current->io_uring)
                return;
         * fput() is pending, will be 2 if the only other ref is our potential
         * task file note. If the task is exiting, drop regardless of count.
         */
-       if (!exiting && atomic_long_read(&file->f_count) != 2)
-               return;
-       __io_uring_attempt_task_drop(file);
+       if (fatal_signal_pending(current) || (current->flags & PF_EXITING) ||
+           atomic_long_read(&file->f_count) == 2)
+               io_uring_del_task_file(file);
  }
  
  void __io_uring_files_cancel(struct files_struct *files)
@@@ -8767,16 -8754,7 +8755,7 @@@ void __io_uring_task_cancel(void
  
  static int io_uring_flush(struct file *file, void *data)
  {
-       struct io_ring_ctx *ctx = file->private_data;
-       /*
-        * If the task is going away, cancel work it may have pending
-        */
-       if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
-               data = NULL;
-       io_uring_cancel_task_requests(ctx, data);
-       io_uring_attempt_task_drop(file, !data);
+       io_uring_attempt_task_drop(file);
        return 0;
  }
  
diff --combined fs/splice.c
index 599b740f1098faecc5408eb6d76d6fa7cc426a9c,d9305af930d878b6ca1b710974798173fbd2bfcf..866d5c2367b233091e98e372b2ab25c04c93fef3
@@@ -341,6 -341,89 +341,6 @@@ const struct pipe_buf_operations nostea
  };
  EXPORT_SYMBOL(nosteal_pipe_buf_ops);
  
 -static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
 -                          unsigned long vlen, loff_t offset)
 -{
 -      mm_segment_t old_fs;
 -      loff_t pos = offset;
 -      ssize_t res;
 -
 -      old_fs = get_fs();
 -      set_fs(KERNEL_DS);
 -      /* The cast to a user pointer is valid due to the set_fs() */
 -      res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
 -      set_fs(old_fs);
 -
 -      return res;
 -}
 -
 -static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 -                               struct pipe_inode_info *pipe, size_t len,
 -                               unsigned int flags)
 -{
 -      struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
 -      struct iov_iter to;
 -      struct page **pages;
 -      unsigned int nr_pages;
 -      unsigned int mask;
 -      size_t offset, base, copied = 0;
 -      ssize_t res;
 -      int i;
 -
 -      if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
 -              return -EAGAIN;
 -
 -      /*
 -       * Try to keep page boundaries matching to source pagecache ones -
 -       * it probably won't be much help, but...
 -       */
 -      offset = *ppos & ~PAGE_MASK;
 -
 -      iov_iter_pipe(&to, READ, pipe, len + offset);
 -
 -      res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
 -      if (res <= 0)
 -              return -ENOMEM;
 -
 -      nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
 -
 -      vec = __vec;
 -      if (nr_pages > PIPE_DEF_BUFFERS) {
 -              vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL);
 -              if (unlikely(!vec)) {
 -                      res = -ENOMEM;
 -                      goto out;
 -              }
 -      }
 -
 -      mask = pipe->ring_size - 1;
 -      pipe->bufs[to.head & mask].offset = offset;
 -      pipe->bufs[to.head & mask].len -= offset;
 -
 -      for (i = 0; i < nr_pages; i++) {
 -              size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
 -              vec[i].iov_base = page_address(pages[i]) + offset;
 -              vec[i].iov_len = this_len;
 -              len -= this_len;
 -              offset = 0;
 -      }
 -
 -      res = kernel_readv(in, vec, nr_pages, *ppos);
 -      if (res > 0) {
 -              copied = res;
 -              *ppos += res;
 -      }
 -
 -      if (vec != __vec)
 -              kfree(vec);
 -out:
 -      for (i = 0; i < nr_pages; i++)
 -              put_page(pages[i]);
 -      kvfree(pages);
 -      iov_iter_advance(&to, copied);  /* truncates and discards */
 -      return res;
 -}
 -
  /*
   * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
   * using sendpage(). Return the number of bytes sent.
@@@ -724,6 -807,33 +724,6 @@@ done
  
  EXPORT_SYMBOL(iter_file_splice_write);
  
 -static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 -                        struct splice_desc *sd)
 -{
 -      int ret;
 -      void *data;
 -      loff_t tmp = sd->pos;
 -
 -      data = kmap(buf->page);
 -      ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
 -      kunmap(buf->page);
 -
 -      return ret;
 -}
 -
 -static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
 -                                       struct file *out, loff_t *ppos,
 -                                       size_t len, unsigned int flags)
 -{
 -      ssize_t ret;
 -
 -      ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
 -      if (ret > 0)
 -              *ppos += ret;
 -
 -      return ret;
 -}
 -
  /**
   * generic_splice_sendpage - splice data from a pipe to a socket
   * @pipe:     pipe to splice from
@@@ -745,23 -855,15 +745,23 @@@ ssize_t generic_splice_sendpage(struct 
  
  EXPORT_SYMBOL(generic_splice_sendpage);
  
 +static int warn_unsupported(struct file *file, const char *op)
 +{
 +      pr_debug_ratelimited(
 +              "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
 +              op, file, current->pid, current->comm);
 +      return -EINVAL;
 +}
 +
  /*
   * Attempt to initiate a splice from pipe to file.
   */
  static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
                           loff_t *ppos, size_t len, unsigned int flags)
  {
 -      if (out->f_op->splice_write)
 -              return out->f_op->splice_write(pipe, out, ppos, len, flags);
 -      return default_file_splice_write(pipe, out, ppos, len, flags);
 +      if (unlikely(!out->f_op->splice_write))
 +              return warn_unsupported(out, "write");
 +      return out->f_op->splice_write(pipe, out, ppos, len, flags);
  }
  
  /*
@@@ -783,9 -885,9 +783,9 @@@ static long do_splice_to(struct file *i
        if (unlikely(len > MAX_RW_COUNT))
                len = MAX_RW_COUNT;
  
 -      if (in->f_op->splice_read)
 -              return in->f_op->splice_read(in, ppos, pipe, len, flags);
 -      return default_file_splice_read(in, ppos, pipe, len, flags);
 +      if (unlikely(!in->f_op->splice_read))
 +              return warn_unsupported(in, "read");
 +      return in->f_op->splice_read(in, ppos, pipe, len, flags);
  }
  
  /**
@@@ -1005,9 -1107,8 +1005,8 @@@ static int splice_pipe_to_pipe(struct p
  /*
   * Determine where to splice to/from.
   */
- long do_splice(struct file *in, loff_t __user *off_in,
-               struct file *out, loff_t __user *off_out,
-               size_t len, unsigned int flags)
+ long do_splice(struct file *in, loff_t *off_in, struct file *out,
+              loff_t *off_out, size_t len, unsigned int flags)
  {
        struct pipe_inode_info *ipipe;
        struct pipe_inode_info *opipe;
                if (off_out) {
                        if (!(out->f_mode & FMODE_PWRITE))
                                return -EINVAL;
-                       if (copy_from_user(&offset, off_out, sizeof(loff_t)))
-                               return -EFAULT;
+                       offset = *off_out;
                } else {
                        offset = out->f_pos;
                }
  
                if (!off_out)
                        out->f_pos = offset;
-               else if (copy_to_user(off_out, &offset, sizeof(loff_t)))
-                       ret = -EFAULT;
+               else
+                       *off_out = offset;
  
                return ret;
        }
                if (off_in) {
                        if (!(in->f_mode & FMODE_PREAD))
                                return -EINVAL;
-                       if (copy_from_user(&offset, off_in, sizeof(loff_t)))
-                               return -EFAULT;
+                       offset = *off_in;
                } else {
                        offset = in->f_pos;
                }
                        wakeup_pipe_readers(opipe);
                if (!off_in)
                        in->f_pos = offset;
-               else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
-                       ret = -EFAULT;
+               else
+                       *off_in = offset;
  
                return ret;
        }
        return -EINVAL;
  }
  
+ static long __do_splice(struct file *in, loff_t __user *off_in,
+                       struct file *out, loff_t __user *off_out,
+                       size_t len, unsigned int flags)
+ {
+       struct pipe_inode_info *ipipe;
+       struct pipe_inode_info *opipe;
+       loff_t offset, *__off_in = NULL, *__off_out = NULL;
+       long ret;
+       ipipe = get_pipe_info(in, true);
+       opipe = get_pipe_info(out, true);
+       if (ipipe && off_in)
+               return -ESPIPE;
+       if (opipe && off_out)
+               return -ESPIPE;
+       if (off_out) {
+               if (copy_from_user(&offset, off_out, sizeof(loff_t)))
+                       return -EFAULT;
+               __off_out = &offset;
+       }
+       if (off_in) {
+               if (copy_from_user(&offset, off_in, sizeof(loff_t)))
+                       return -EFAULT;
+               __off_in = &offset;
+       }
+       ret = do_splice(in, __off_in, out, __off_out, len, flags);
+       if (ret < 0)
+               return ret;
+       if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t)))
+               return -EFAULT;
+       if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t)))
+               return -EFAULT;
+       return ret;
+ }
  static int iter_to_pipe(struct iov_iter *from,
                        struct pipe_inode_info *pipe,
                        unsigned flags)
@@@ -1303,8 -1442,8 +1340,8 @@@ SYSCALL_DEFINE6(splice, int, fd_in, lof
        if (in.file) {
                out = fdget(fd_out);
                if (out.file) {
-                       error = do_splice(in.file, off_in, out.file, off_out,
-                                         len, flags);
+                       error = __do_splice(in.file, off_in, out.file, off_out,
+                                               len, flags);
                        fdput(out);
                }
                fdput(in);
This page took 0.11259 seconds and 4 git commands to generate.