From: Linus Torvalds Date: Sat, 5 Jan 2019 17:16:18 +0000 (-0800) Subject: Merge branch 'akpm' (patches from Andrew) X-Git-Tag: v5.0-rc1~38 X-Git-Url: https://repo.jachan.dev/J-linux.git/commitdiff_plain/a65981109f294ba7e64b33ad3b4575a4636fce66?hp=-c Merge branch 'akpm' (patches from Andrew) Merge more updates from Andrew Morton: - procfs updates - various misc bits - lib/ updates - epoll updates - autofs - fatfs - a few more MM bits * emailed patches from Andrew Morton : (58 commits) mm/page_io.c: fix polled swap page in checkpatch: add Co-developed-by to signature tags docs: fix Co-Developed-by docs drivers/base/platform.c: kmemleak ignore a known leak fs: don't open code lru_to_page() fs/: remove caller signal_pending branch predictions mm/: remove caller signal_pending branch predictions arch/arc/mm/fault.c: remove caller signal_pending_branch predictions kernel/sched/: remove caller signal_pending branch predictions kernel/locking/mutex.c: remove caller signal_pending branch predictions mm: select HAVE_MOVE_PMD on x86 for faster mremap mm: speed up mremap by 20x on large regions mm: treewide: remove unused address argument from pte_alloc functions initramfs: cleanup incomplete rootfs scripts/gdb: fix lx-version string output kernel/kcov.c: mark write_comp_data() as notrace kernel/sysctl: add panic_print into sysctl panic: add options to print system info when panic happens bfs: extra sanity checking and static inode bitmap exec: separate MM_ANONPAGES and RLIMIT_STACK accounting ... --- a65981109f294ba7e64b33ad3b4575a4636fce66 diff --combined fs/eventpoll.c index 7ebae39fbcb3,2329f96469e2..a5d219d920e7 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@@ -381,7 -381,8 +381,8 @@@ static void ep_nested_calls_init(struc */ static inline int ep_events_available(struct eventpoll *ep) { - return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; + return !list_empty_careful(&ep->rdllist) || + READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR; } #ifdef CONFIG_NET_RX_BUSY_POLL @@@ -471,7 -472,6 +472,6 @@@ static inline void ep_set_busy_poll_nap * no re-entered. * * @ncalls: Pointer to the nested_calls structure to be used for this call. - * @max_nests: Maximum number of allowed nesting calls. * @nproc: Nested call core function pointer. * @priv: Opaque data to be passed to the @nproc callback. * @cookie: Cookie to be used to identify this nested call. @@@ -480,7 -480,7 +480,7 @@@ * Returns: Returns the code returned by the @nproc callback, or -1 if * the maximum recursion limit has been exceeded. */ - static int ep_call_nested(struct nested_calls *ncalls, int max_nests, + static int ep_call_nested(struct nested_calls *ncalls, int (*nproc)(void *, void *, int), void *priv, void *cookie, void *ctx) { @@@ -499,7 -499,7 +499,7 @@@ */ list_for_each_entry(tncur, lsthead, llink) { if (tncur->ctx == ctx && - (tncur->cookie == cookie || ++call_nests > max_nests)) { + (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) { /* * Ops ... loop detected or maximum nest level reached. * We abort this wake by breaking the cycle itself. @@@ -573,7 -573,7 +573,7 @@@ static void ep_poll_safewake(wait_queue { int this_cpu = get_cpu(); - ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, + ep_call_nested(&poll_safewake_ncalls, ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); put_cpu(); @@@ -699,7 -699,7 +699,7 @@@ static __poll_t ep_scan_ready_list(stru */ spin_lock_irq(&ep->wq.lock); list_splice_init(&ep->rdllist, &txlist); - ep->ovflist = NULL; + WRITE_ONCE(ep->ovflist, NULL); spin_unlock_irq(&ep->wq.lock); /* @@@ -713,7 -713,7 +713,7 @@@ * other events might have been queued by the poll callback. * We re-insert them inside the main ready-list here. */ - for (nepi = ep->ovflist; (epi = nepi) != NULL; + for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL; nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { /* * We need to check if the item is already in the list. @@@ -731,7 -731,7 +731,7 @@@ * releasing the lock, events will be queued in the normal way inside * ep->rdllist. */ - ep->ovflist = EP_UNACTIVE_PTR; + WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR); /* * Quickly re-inject items left on "txlist". @@@ -1154,10 -1154,10 +1154,10 @@@ static int ep_poll_callback(wait_queue_ * semantics). All the events that happen during that period of time are * chained in ep->ovflist and requeued later on. */ - if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { + if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { if (epi->next == EP_UNACTIVE_PTR) { - epi->next = ep->ovflist; - ep->ovflist = epi; + epi->next = READ_ONCE(ep->ovflist); + WRITE_ONCE(ep->ovflist, epi); if (epi->ws) { /* * Activate ep->ws since epi->ws may get @@@ -1333,7 -1333,6 +1333,6 @@@ static int reverse_path_check_proc(voi } } else { error = ep_call_nested(&poll_loop_ncalls, - EP_MAX_NESTS, reverse_path_check_proc, child_file, child_file, current); @@@ -1367,7 -1366,7 +1366,7 @@@ static int reverse_path_check(void /* let's call this for all tfiles */ list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { path_count_init(); - error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + error = ep_call_nested(&poll_loop_ncalls, reverse_path_check_proc, current_file, current_file, current); if (error) @@@ -1626,21 -1625,24 +1625,24 @@@ static __poll_t ep_send_events_proc(str { struct ep_send_events_data *esed = priv; __poll_t revents; - struct epitem *epi; - struct epoll_event __user *uevent; + struct epitem *epi, *tmp; + struct epoll_event __user *uevent = esed->events; struct wakeup_source *ws; poll_table pt; init_poll_funcptr(&pt, NULL); + esed->res = 0; /* * We can loop without lock because we are passed a task private list. * Items cannot vanish during the loop because ep_scan_ready_list() is * holding "mtx" during this call. */ - for (esed->res = 0, uevent = esed->events; - !list_empty(head) && esed->res < esed->maxevents;) { - epi = list_first_entry(head, struct epitem, rdllink); + lockdep_assert_held(&ep->mtx); + + list_for_each_entry_safe(epi, tmp, head, rdllink) { + if (esed->res >= esed->maxevents) + break; /* * Activate ep->ws before deactivating epi->ws to prevent @@@ -1660,42 -1662,42 +1662,42 @@@ list_del_init(&epi->rdllink); - revents = ep_item_poll(epi, &pt, 1); - /* * If the event mask intersect the caller-requested one, * deliver the event to userspace. Again, ep_scan_ready_list() - * is holding "mtx", so no operations coming from userspace + * is holding ep->mtx, so no operations coming from userspace * can change the item. */ - if (revents) { - if (__put_user(revents, &uevent->events) || - __put_user(epi->event.data, &uevent->data)) { - list_add(&epi->rdllink, head); - ep_pm_stay_awake(epi); - if (!esed->res) - esed->res = -EFAULT; - return 0; - } - esed->res++; - uevent++; - if (epi->event.events & EPOLLONESHOT) - epi->event.events &= EP_PRIVATE_BITS; - else if (!(epi->event.events & EPOLLET)) { - /* - * If this file has been added with Level - * Trigger mode, we need to insert back inside - * the ready list, so that the next call to - * epoll_wait() will check again the events - * availability. At this point, no one can insert - * into ep->rdllist besides us. The epoll_ctl() - * callers are locked out by - * ep_scan_ready_list() holding "mtx" and the - * poll callback will queue them in ep->ovflist. - */ - list_add_tail(&epi->rdllink, &ep->rdllist); - ep_pm_stay_awake(epi); - } + revents = ep_item_poll(epi, &pt, 1); + if (!revents) + continue; + + if (__put_user(revents, &uevent->events) || + __put_user(epi->event.data, &uevent->data)) { + list_add(&epi->rdllink, head); + ep_pm_stay_awake(epi); + if (!esed->res) + esed->res = -EFAULT; + return 0; + } + esed->res++; + uevent++; + if (epi->event.events & EPOLLONESHOT) + epi->event.events &= EP_PRIVATE_BITS; + else if (!(epi->event.events & EPOLLET)) { + /* + * If this file has been added with Level + * Trigger mode, we need to insert back inside + * the ready list, so that the next call to + * epoll_wait() will check again the events + * availability. At this point, no one can insert + * into ep->rdllist besides us. The epoll_ctl() + * callers are locked out by + * ep_scan_ready_list() holding "mtx" and the + * poll callback will queue them in ep->ovflist. + */ + list_add_tail(&epi->rdllink, &ep->rdllist); + ep_pm_stay_awake(epi); } } @@@ -1747,6 -1749,7 +1749,7 @@@ static int ep_poll(struct eventpoll *ep { int res = 0, eavail, timed_out = 0; u64 slack = 0; + bool waiter = false; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@@ -1761,11 -1764,18 +1764,18 @@@ } else if (timeout == 0) { /* * Avoid the unnecessary trip to the wait queue loop, if the - * caller specified a non blocking operation. + * caller specified a non blocking operation. We still need + * lock because we could race and not see an epi being added + * to the ready list while in irq callback. Thus incorrectly + * returning 0 back to userspace. */ timed_out = 1; + spin_lock_irq(&ep->wq.lock); - goto check_events; + eavail = ep_events_available(ep); + spin_unlock_irq(&ep->wq.lock); + + goto send_events; } fetch_events: @@@ -1773,64 -1783,66 +1783,66 @@@ if (!ep_events_available(ep)) ep_busy_loop(ep, timed_out); - spin_lock_irq(&ep->wq.lock); + eavail = ep_events_available(ep); + if (eavail) + goto send_events; - if (!ep_events_available(ep)) { - /* - * Busy poll timed out. Drop NAPI ID for now, we can add - * it back in when we have moved a socket with a valid NAPI - * ID onto the ready list. - */ - ep_reset_busy_poll_napi_id(ep); + /* + * Busy poll timed out. Drop NAPI ID for now, we can add + * it back in when we have moved a socket with a valid NAPI + * ID onto the ready list. + */ + ep_reset_busy_poll_napi_id(ep); - /* - * We don't have any available event to return to the caller. - * We need to sleep here, and we will be wake up by - * ep_poll_callback() when events will become available. - */ + /* + * We don't have any available event to return to the caller. We need + * to sleep here, and we will be woken by ep_poll_callback() when events + * become available. + */ + if (!waiter) { + waiter = true; init_waitqueue_entry(&wait, current); - __add_wait_queue_exclusive(&ep->wq, &wait); - for (;;) { - /* - * We don't want to sleep if the ep_poll_callback() sends us - * a wakeup in between. That's why we set the task state - * to TASK_INTERRUPTIBLE before doing the checks. - */ - set_current_state(TASK_INTERRUPTIBLE); - /* - * Always short-circuit for fatal signals to allow - * threads to make a timely exit without the chance of - * finding more events available and fetching - * repeatedly. - */ - if (fatal_signal_pending(current)) { - res = -EINTR; - break; - } - if (ep_events_available(ep) || timed_out) - break; - if (signal_pending(current)) { - res = -EINTR; - break; - } + spin_lock_irq(&ep->wq.lock); + __add_wait_queue_exclusive(&ep->wq, &wait); + spin_unlock_irq(&ep->wq.lock); + } - spin_unlock_irq(&ep->wq.lock); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) - timed_out = 1; + for (;;) { + /* + * We don't want to sleep if the ep_poll_callback() sends us + * a wakeup in between. That's why we set the task state + * to TASK_INTERRUPTIBLE before doing the checks. + */ + set_current_state(TASK_INTERRUPTIBLE); + /* + * Always short-circuit for fatal signals to allow + * threads to make a timely exit without the chance of + * finding more events available and fetching + * repeatedly. + */ + if (fatal_signal_pending(current)) { + res = -EINTR; + break; + } - spin_lock_irq(&ep->wq.lock); + eavail = ep_events_available(ep); + if (eavail) + break; + if (signal_pending(current)) { + res = -EINTR; + break; } - __remove_wait_queue(&ep->wq, &wait); - __set_current_state(TASK_RUNNING); + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { + timed_out = 1; + break; + } } - check_events: - /* Is it worth to try to dig for events ? */ - eavail = ep_events_available(ep); - spin_unlock_irq(&ep->wq.lock); + __set_current_state(TASK_RUNNING); + send_events: /* * Try to transfer events to user space. In case we get 0 events and * there's still timeout left over, we go trying again in search of @@@ -1840,6 -1852,12 +1852,12 @@@ !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; + if (waiter) { + spin_lock_irq(&ep->wq.lock); + __remove_wait_queue(&ep->wq, &wait); + spin_unlock_irq(&ep->wq.lock); + } + return res; } @@@ -1876,7 -1894,7 +1894,7 @@@ static int ep_loop_check_proc(void *pri ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->visited) continue; - error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + error = ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, epi->ffd.file, ep_tovisit, current); if (error != 0) @@@ -1916,7 -1934,7 +1934,7 @@@ static int ep_loop_check(struct eventpo int ret; struct eventpoll *ep_cur, *ep_next; - ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + ret = ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, file, ep, current); /* clear visited list */ list_for_each_entry_safe(ep_cur, ep_next, &visited_list, @@@ -2172,7 -2190,7 +2190,7 @@@ static int do_epoll_wait(int epfd, stru return -EINVAL; /* Verify that the area passed by the user is writeable */ - if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) + if (!access_ok(events, maxevents * sizeof(struct epoll_event))) return -EFAULT; /* Get the "struct file *" for the eventpoll file */ diff --combined fs/fat/dir.c index 0295a095b920,20acaea8a7e6..9d01db37183f --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@@ -57,7 -57,7 +57,7 @@@ static inline void fat_dir_readahead(st if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1) return; /* root dir of FAT12/FAT16 */ - if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO)) + if (!is_fat32(sbi) && (dir->i_ino == MSDOS_ROOT_INO)) return; bh = sb_find_get_block(sb, phys); @@@ -805,7 -805,7 +805,7 @@@ static long fat_dir_ioctl(struct file * return fat_generic_ioctl(filp, cmd, arg); } - if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2]))) + if (!access_ok(d1, sizeof(struct __fat_dirent[2]))) return -EFAULT; /* * Yes, we don't need this put_user() absolutely. However old @@@ -845,7 -845,7 +845,7 @@@ static long fat_compat_dir_ioctl(struc return fat_generic_ioctl(filp, cmd, (unsigned long)arg); } - if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2]))) + if (!access_ok(d1, sizeof(struct compat_dirent[2]))) return -EFAULT; /* * Yes, we don't need this put_user() absolutely. However old @@@ -1313,7 -1313,7 +1313,7 @@@ int fat_add_entries(struct inode *dir, } } if (dir->i_ino == MSDOS_ROOT_INO) { - if (sbi->fat_bits != 32) + if (!is_fat32(sbi)) goto error; } else if (MSDOS_I(dir)->i_start == 0) { fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)", diff --combined kernel/sched/core.c index 1f3e19fd6dc6,17a954c9e153..223f78d5c111 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@@ -3416,7 -3416,7 +3416,7 @@@ static void __sched notrace __schedule( switch_count = &prev->nivcsw; if (!preempt && prev->state) { - if (unlikely(signal_pending_state(prev->state, prev))) { + if (signal_pending_state(prev->state, prev)) { prev->state = TASK_RUNNING; } else { deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); @@@ -4450,7 -4450,7 +4450,7 @@@ static int sched_copy_attr(struct sched u32 size; int ret; - if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0)) + if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0)) return -EFAULT; /* Zero the full structure, so that a short copy will be nice: */ @@@ -4650,7 -4650,7 +4650,7 @@@ static int sched_read_attr(struct sched { int ret; - if (!access_ok(VERIFY_WRITE, uattr, usize)) + if (!access_ok(uattr, usize)) return -EFAULT; /* diff --combined mm/gup.c index 6f591ccb8eca,6dd33e16a806..05acd7e2eb22 --- a/mm/gup.c +++ b/mm/gup.c @@@ -727,7 -727,7 +727,7 @@@ retry * If we have a pending SIGKILL, don't keep faulting pages and * potentially allocating memory. */ - if (unlikely(fatal_signal_pending(current))) { + if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; goto out; } @@@ -1813,7 -1813,8 +1813,7 @@@ int __get_user_pages_fast(unsigned lon len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - (void __user *)start, len))) + if (unlikely(!access_ok((void __user *)start, len))) return 0; /* @@@ -1867,7 -1868,8 +1867,7 @@@ int get_user_pages_fast(unsigned long s if (nr_pages <= 0) return 0; - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - (void __user *)start, len))) + if (unlikely(!access_ok((void __user *)start, len))) return -EFAULT; if (gup_fast_permitted(start, nr_pages, write)) {