From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 5 Jan 2019 17:16:18 +0000 (-0800)
Subject: Merge branch 'akpm' (patches from Andrew)
X-Git-Tag: v5.0-rc1~38
X-Git-Url: https://repo.jachan.dev/J-linux.git/commitdiff_plain/a65981109f294ba7e64b33ad3b4575a4636fce66?hp=-c

Merge branch 'akpm' (patches from Andrew)

Merge more updates from Andrew Morton:

 - procfs updates

 - various misc bits

 - lib/ updates

 - epoll updates

 - autofs

 - fatfs

 - a few more MM bits

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (58 commits)
  mm/page_io.c: fix polled swap page in
  checkpatch: add Co-developed-by to signature tags
  docs: fix Co-Developed-by docs
  drivers/base/platform.c: kmemleak ignore a known leak
  fs: don't open code lru_to_page()
  fs/: remove caller signal_pending branch predictions
  mm/: remove caller signal_pending branch predictions
  arch/arc/mm/fault.c: remove caller signal_pending_branch predictions
  kernel/sched/: remove caller signal_pending branch predictions
  kernel/locking/mutex.c: remove caller signal_pending branch predictions
  mm: select HAVE_MOVE_PMD on x86 for faster mremap
  mm: speed up mremap by 20x on large regions
  mm: treewide: remove unused address argument from pte_alloc functions
  initramfs: cleanup incomplete rootfs
  scripts/gdb: fix lx-version string output
  kernel/kcov.c: mark write_comp_data() as notrace
  kernel/sysctl: add panic_print into sysctl
  panic: add options to print system info when panic happens
  bfs: extra sanity checking and static inode bitmap
  exec: separate MM_ANONPAGES and RLIMIT_STACK accounting
  ...
---

a65981109f294ba7e64b33ad3b4575a4636fce66
diff --combined fs/eventpoll.c
index 7ebae39fbcb3,2329f96469e2..a5d219d920e7
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@@ -381,7 -381,8 +381,8 @@@ static void ep_nested_calls_init(struc
   */
  static inline int ep_events_available(struct eventpoll *ep)
  {
- 	return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
+ 	return !list_empty_careful(&ep->rdllist) ||
+ 		READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
  }
  
  #ifdef CONFIG_NET_RX_BUSY_POLL
@@@ -471,7 -472,6 +472,6 @@@ static inline void ep_set_busy_poll_nap
   *                  no re-entered.
   *
   * @ncalls: Pointer to the nested_calls structure to be used for this call.
-  * @max_nests: Maximum number of allowed nesting calls.
   * @nproc: Nested call core function pointer.
   * @priv: Opaque data to be passed to the @nproc callback.
   * @cookie: Cookie to be used to identify this nested call.
@@@ -480,7 -480,7 +480,7 @@@
   * Returns: Returns the code returned by the @nproc callback, or -1 if
   *          the maximum recursion limit has been exceeded.
   */
- static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
+ static int ep_call_nested(struct nested_calls *ncalls,
  			  int (*nproc)(void *, void *, int), void *priv,
  			  void *cookie, void *ctx)
  {
@@@ -499,7 -499,7 +499,7 @@@
  	 */
  	list_for_each_entry(tncur, lsthead, llink) {
  		if (tncur->ctx == ctx &&
- 		    (tncur->cookie == cookie || ++call_nests > max_nests)) {
+ 		    (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) {
  			/*
  			 * Ops ... loop detected or maximum nest level reached.
  			 * We abort this wake by breaking the cycle itself.
@@@ -573,7 -573,7 +573,7 @@@ static void ep_poll_safewake(wait_queue
  {
  	int this_cpu = get_cpu();
  
- 	ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+ 	ep_call_nested(&poll_safewake_ncalls,
  		       ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
  
  	put_cpu();
@@@ -699,7 -699,7 +699,7 @@@ static __poll_t ep_scan_ready_list(stru
  	 */
  	spin_lock_irq(&ep->wq.lock);
  	list_splice_init(&ep->rdllist, &txlist);
- 	ep->ovflist = NULL;
+ 	WRITE_ONCE(ep->ovflist, NULL);
  	spin_unlock_irq(&ep->wq.lock);
  
  	/*
@@@ -713,7 -713,7 +713,7 @@@
  	 * other events might have been queued by the poll callback.
  	 * We re-insert them inside the main ready-list here.
  	 */
- 	for (nepi = ep->ovflist; (epi = nepi) != NULL;
+ 	for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
  	     nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
  		/*
  		 * We need to check if the item is already in the list.
@@@ -731,7 -731,7 +731,7 @@@
  	 * releasing the lock, events will be queued in the normal way inside
  	 * ep->rdllist.
  	 */
- 	ep->ovflist = EP_UNACTIVE_PTR;
+ 	WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
  
  	/*
  	 * Quickly re-inject items left on "txlist".
@@@ -1154,10 -1154,10 +1154,10 @@@ static int ep_poll_callback(wait_queue_
  	 * semantics). All the events that happen during that period of time are
  	 * chained in ep->ovflist and requeued later on.
  	 */
- 	if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
+ 	if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
  		if (epi->next == EP_UNACTIVE_PTR) {
- 			epi->next = ep->ovflist;
- 			ep->ovflist = epi;
+ 			epi->next = READ_ONCE(ep->ovflist);
+ 			WRITE_ONCE(ep->ovflist, epi);
  			if (epi->ws) {
  				/*
  				 * Activate ep->ws since epi->ws may get
@@@ -1333,7 -1333,6 +1333,6 @@@ static int reverse_path_check_proc(voi
  				}
  			} else {
  				error = ep_call_nested(&poll_loop_ncalls,
- 							EP_MAX_NESTS,
  							reverse_path_check_proc,
  							child_file, child_file,
  							current);
@@@ -1367,7 -1366,7 +1366,7 @@@ static int reverse_path_check(void
  	/* let's call this for all tfiles */
  	list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
  		path_count_init();
- 		error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ 		error = ep_call_nested(&poll_loop_ncalls,
  					reverse_path_check_proc, current_file,
  					current_file, current);
  		if (error)
@@@ -1626,21 -1625,24 +1625,24 @@@ static __poll_t ep_send_events_proc(str
  {
  	struct ep_send_events_data *esed = priv;
  	__poll_t revents;
- 	struct epitem *epi;
- 	struct epoll_event __user *uevent;
+ 	struct epitem *epi, *tmp;
+ 	struct epoll_event __user *uevent = esed->events;
  	struct wakeup_source *ws;
  	poll_table pt;
  
  	init_poll_funcptr(&pt, NULL);
+ 	esed->res = 0;
  
  	/*
  	 * We can loop without lock because we are passed a task private list.
  	 * Items cannot vanish during the loop because ep_scan_ready_list() is
  	 * holding "mtx" during this call.
  	 */
- 	for (esed->res = 0, uevent = esed->events;
- 	     !list_empty(head) && esed->res < esed->maxevents;) {
- 		epi = list_first_entry(head, struct epitem, rdllink);
+ 	lockdep_assert_held(&ep->mtx);
+ 
+ 	list_for_each_entry_safe(epi, tmp, head, rdllink) {
+ 		if (esed->res >= esed->maxevents)
+ 			break;
  
  		/*
  		 * Activate ep->ws before deactivating epi->ws to prevent
@@@ -1660,42 -1662,42 +1662,42 @@@
  
  		list_del_init(&epi->rdllink);
  
- 		revents = ep_item_poll(epi, &pt, 1);
- 
  		/*
  		 * If the event mask intersect the caller-requested one,
  		 * deliver the event to userspace. Again, ep_scan_ready_list()
- 		 * is holding "mtx", so no operations coming from userspace
+ 		 * is holding ep->mtx, so no operations coming from userspace
  		 * can change the item.
  		 */
- 		if (revents) {
- 			if (__put_user(revents, &uevent->events) ||
- 			    __put_user(epi->event.data, &uevent->data)) {
- 				list_add(&epi->rdllink, head);
- 				ep_pm_stay_awake(epi);
- 				if (!esed->res)
- 					esed->res = -EFAULT;
- 				return 0;
- 			}
- 			esed->res++;
- 			uevent++;
- 			if (epi->event.events & EPOLLONESHOT)
- 				epi->event.events &= EP_PRIVATE_BITS;
- 			else if (!(epi->event.events & EPOLLET)) {
- 				/*
- 				 * If this file has been added with Level
- 				 * Trigger mode, we need to insert back inside
- 				 * the ready list, so that the next call to
- 				 * epoll_wait() will check again the events
- 				 * availability. At this point, no one can insert
- 				 * into ep->rdllist besides us. The epoll_ctl()
- 				 * callers are locked out by
- 				 * ep_scan_ready_list() holding "mtx" and the
- 				 * poll callback will queue them in ep->ovflist.
- 				 */
- 				list_add_tail(&epi->rdllink, &ep->rdllist);
- 				ep_pm_stay_awake(epi);
- 			}
+ 		revents = ep_item_poll(epi, &pt, 1);
+ 		if (!revents)
+ 			continue;
+ 
+ 		if (__put_user(revents, &uevent->events) ||
+ 		    __put_user(epi->event.data, &uevent->data)) {
+ 			list_add(&epi->rdllink, head);
+ 			ep_pm_stay_awake(epi);
+ 			if (!esed->res)
+ 				esed->res = -EFAULT;
+ 			return 0;
+ 		}
+ 		esed->res++;
+ 		uevent++;
+ 		if (epi->event.events & EPOLLONESHOT)
+ 			epi->event.events &= EP_PRIVATE_BITS;
+ 		else if (!(epi->event.events & EPOLLET)) {
+ 			/*
+ 			 * If this file has been added with Level
+ 			 * Trigger mode, we need to insert back inside
+ 			 * the ready list, so that the next call to
+ 			 * epoll_wait() will check again the events
+ 			 * availability. At this point, no one can insert
+ 			 * into ep->rdllist besides us. The epoll_ctl()
+ 			 * callers are locked out by
+ 			 * ep_scan_ready_list() holding "mtx" and the
+ 			 * poll callback will queue them in ep->ovflist.
+ 			 */
+ 			list_add_tail(&epi->rdllink, &ep->rdllist);
+ 			ep_pm_stay_awake(epi);
  		}
  	}
  
@@@ -1747,6 -1749,7 +1749,7 @@@ static int ep_poll(struct eventpoll *ep
  {
  	int res = 0, eavail, timed_out = 0;
  	u64 slack = 0;
+ 	bool waiter = false;
  	wait_queue_entry_t wait;
  	ktime_t expires, *to = NULL;
  
@@@ -1761,11 -1764,18 +1764,18 @@@
  	} else if (timeout == 0) {
  		/*
  		 * Avoid the unnecessary trip to the wait queue loop, if the
- 		 * caller specified a non blocking operation.
+ 		 * caller specified a non blocking operation. We still need
+ 		 * lock because we could race and not see an epi being added
+ 		 * to the ready list while in irq callback. Thus incorrectly
+ 		 * returning 0 back to userspace.
  		 */
  		timed_out = 1;
+ 
  		spin_lock_irq(&ep->wq.lock);
- 		goto check_events;
+ 		eavail = ep_events_available(ep);
+ 		spin_unlock_irq(&ep->wq.lock);
+ 
+ 		goto send_events;
  	}
  
  fetch_events:
@@@ -1773,64 -1783,66 +1783,66 @@@
  	if (!ep_events_available(ep))
  		ep_busy_loop(ep, timed_out);
  
- 	spin_lock_irq(&ep->wq.lock);
+ 	eavail = ep_events_available(ep);
+ 	if (eavail)
+ 		goto send_events;
  
- 	if (!ep_events_available(ep)) {
- 		/*
- 		 * Busy poll timed out.  Drop NAPI ID for now, we can add
- 		 * it back in when we have moved a socket with a valid NAPI
- 		 * ID onto the ready list.
- 		 */
- 		ep_reset_busy_poll_napi_id(ep);
+ 	/*
+ 	 * Busy poll timed out.  Drop NAPI ID for now, we can add
+ 	 * it back in when we have moved a socket with a valid NAPI
+ 	 * ID onto the ready list.
+ 	 */
+ 	ep_reset_busy_poll_napi_id(ep);
  
- 		/*
- 		 * We don't have any available event to return to the caller.
- 		 * We need to sleep here, and we will be wake up by
- 		 * ep_poll_callback() when events will become available.
- 		 */
+ 	/*
+ 	 * We don't have any available event to return to the caller.  We need
+ 	 * to sleep here, and we will be woken by ep_poll_callback() when events
+ 	 * become available.
+ 	 */
+ 	if (!waiter) {
+ 		waiter = true;
  		init_waitqueue_entry(&wait, current);
- 		__add_wait_queue_exclusive(&ep->wq, &wait);
  
- 		for (;;) {
- 			/*
- 			 * We don't want to sleep if the ep_poll_callback() sends us
- 			 * a wakeup in between. That's why we set the task state
- 			 * to TASK_INTERRUPTIBLE before doing the checks.
- 			 */
- 			set_current_state(TASK_INTERRUPTIBLE);
- 			/*
- 			 * Always short-circuit for fatal signals to allow
- 			 * threads to make a timely exit without the chance of
- 			 * finding more events available and fetching
- 			 * repeatedly.
- 			 */
- 			if (fatal_signal_pending(current)) {
- 				res = -EINTR;
- 				break;
- 			}
- 			if (ep_events_available(ep) || timed_out)
- 				break;
- 			if (signal_pending(current)) {
- 				res = -EINTR;
- 				break;
- 			}
+ 		spin_lock_irq(&ep->wq.lock);
+ 		__add_wait_queue_exclusive(&ep->wq, &wait);
+ 		spin_unlock_irq(&ep->wq.lock);
+ 	}
  
- 			spin_unlock_irq(&ep->wq.lock);
- 			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
- 				timed_out = 1;
+ 	for (;;) {
+ 		/*
+ 		 * We don't want to sleep if the ep_poll_callback() sends us
+ 		 * a wakeup in between. That's why we set the task state
+ 		 * to TASK_INTERRUPTIBLE before doing the checks.
+ 		 */
+ 		set_current_state(TASK_INTERRUPTIBLE);
+ 		/*
+ 		 * Always short-circuit for fatal signals to allow
+ 		 * threads to make a timely exit without the chance of
+ 		 * finding more events available and fetching
+ 		 * repeatedly.
+ 		 */
+ 		if (fatal_signal_pending(current)) {
+ 			res = -EINTR;
+ 			break;
+ 		}
  
- 			spin_lock_irq(&ep->wq.lock);
+ 		eavail = ep_events_available(ep);
+ 		if (eavail)
+ 			break;
+ 		if (signal_pending(current)) {
+ 			res = -EINTR;
+ 			break;
  		}
  
- 		__remove_wait_queue(&ep->wq, &wait);
- 		__set_current_state(TASK_RUNNING);
+ 		if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
+ 			timed_out = 1;
+ 			break;
+ 		}
  	}
- check_events:
- 	/* Is it worth to try to dig for events ? */
- 	eavail = ep_events_available(ep);
  
- 	spin_unlock_irq(&ep->wq.lock);
+ 	__set_current_state(TASK_RUNNING);
  
+ send_events:
  	/*
  	 * Try to transfer events to user space. In case we get 0 events and
  	 * there's still timeout left over, we go trying again in search of
@@@ -1840,6 -1852,12 +1852,12 @@@
  	    !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
  		goto fetch_events;
  
+ 	if (waiter) {
+ 		spin_lock_irq(&ep->wq.lock);
+ 		__remove_wait_queue(&ep->wq, &wait);
+ 		spin_unlock_irq(&ep->wq.lock);
+ 	}
+ 
  	return res;
  }
  
@@@ -1876,7 -1894,7 +1894,7 @@@ static int ep_loop_check_proc(void *pri
  			ep_tovisit = epi->ffd.file->private_data;
  			if (ep_tovisit->visited)
  				continue;
- 			error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ 			error = ep_call_nested(&poll_loop_ncalls,
  					ep_loop_check_proc, epi->ffd.file,
  					ep_tovisit, current);
  			if (error != 0)
@@@ -1916,7 -1934,7 +1934,7 @@@ static int ep_loop_check(struct eventpo
  	int ret;
  	struct eventpoll *ep_cur, *ep_next;
  
- 	ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ 	ret = ep_call_nested(&poll_loop_ncalls,
  			      ep_loop_check_proc, file, ep, current);
  	/* clear visited list */
  	list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
@@@ -2172,7 -2190,7 +2190,7 @@@ static int do_epoll_wait(int epfd, stru
  		return -EINVAL;
  
  	/* Verify that the area passed by the user is writeable */
 -	if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
 +	if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
  		return -EFAULT;
  
  	/* Get the "struct file *" for the eventpoll file */
diff --combined fs/fat/dir.c
index 0295a095b920,20acaea8a7e6..9d01db37183f
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@@ -57,7 -57,7 +57,7 @@@ static inline void fat_dir_readahead(st
  	if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1)
  		return;
  	/* root dir of FAT12/FAT16 */
- 	if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
+ 	if (!is_fat32(sbi) && (dir->i_ino == MSDOS_ROOT_INO))
  		return;
  
  	bh = sb_find_get_block(sb, phys);
@@@ -805,7 -805,7 +805,7 @@@ static long fat_dir_ioctl(struct file *
  		return fat_generic_ioctl(filp, cmd, arg);
  	}
  
 -	if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
 +	if (!access_ok(d1, sizeof(struct __fat_dirent[2])))
  		return -EFAULT;
  	/*
  	 * Yes, we don't need this put_user() absolutely. However old
@@@ -845,7 -845,7 +845,7 @@@ static long fat_compat_dir_ioctl(struc
  		return fat_generic_ioctl(filp, cmd, (unsigned long)arg);
  	}
  
 -	if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2])))
 +	if (!access_ok(d1, sizeof(struct compat_dirent[2])))
  		return -EFAULT;
  	/*
  	 * Yes, we don't need this put_user() absolutely. However old
@@@ -1313,7 -1313,7 +1313,7 @@@ int fat_add_entries(struct inode *dir, 
  		}
  	}
  	if (dir->i_ino == MSDOS_ROOT_INO) {
- 		if (sbi->fat_bits != 32)
+ 		if (!is_fat32(sbi))
  			goto error;
  	} else if (MSDOS_I(dir)->i_start == 0) {
  		fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
diff --combined kernel/sched/core.c
index 1f3e19fd6dc6,17a954c9e153..223f78d5c111
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -3416,7 -3416,7 +3416,7 @@@ static void __sched notrace __schedule(
  
  	switch_count = &prev->nivcsw;
  	if (!preempt && prev->state) {
- 		if (unlikely(signal_pending_state(prev->state, prev))) {
+ 		if (signal_pending_state(prev->state, prev)) {
  			prev->state = TASK_RUNNING;
  		} else {
  			deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
@@@ -4450,7 -4450,7 +4450,7 @@@ static int sched_copy_attr(struct sched
  	u32 size;
  	int ret;
  
 -	if (!access_ok(VERIFY_WRITE, uattr, SCHED_ATTR_SIZE_VER0))
 +	if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0))
  		return -EFAULT;
  
  	/* Zero the full structure, so that a short copy will be nice: */
@@@ -4650,7 -4650,7 +4650,7 @@@ static int sched_read_attr(struct sched
  {
  	int ret;
  
 -	if (!access_ok(VERIFY_WRITE, uattr, usize))
 +	if (!access_ok(uattr, usize))
  		return -EFAULT;
  
  	/*
diff --combined mm/gup.c
index 6f591ccb8eca,6dd33e16a806..05acd7e2eb22
--- a/mm/gup.c
+++ b/mm/gup.c
@@@ -727,7 -727,7 +727,7 @@@ retry
  		 * If we have a pending SIGKILL, don't keep faulting pages and
  		 * potentially allocating memory.
  		 */
- 		if (unlikely(fatal_signal_pending(current))) {
+ 		if (fatal_signal_pending(current)) {
  			ret = -ERESTARTSYS;
  			goto out;
  		}
@@@ -1813,7 -1813,8 +1813,7 @@@ int __get_user_pages_fast(unsigned lon
  	len = (unsigned long) nr_pages << PAGE_SHIFT;
  	end = start + len;
  
 -	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
 -					(void __user *)start, len)))
 +	if (unlikely(!access_ok((void __user *)start, len)))
  		return 0;
  
  	/*
@@@ -1867,7 -1868,8 +1867,7 @@@ int get_user_pages_fast(unsigned long s
  	if (nr_pages <= 0)
  		return 0;
  
 -	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
 -					(void __user *)start, len)))
 +	if (unlikely(!access_ok((void __user *)start, len)))
  		return -EFAULT;
  
  	if (gup_fast_permitted(start, nr_pages, write)) {