Git Repo - linux.git/blame_incremental

... / ...

Commit	Line	Data
	1	// SPDX-License-Identifier: GPL-2.0
	2	/*
	3	* linux/ipc/sem.c
	4	* Copyright (C) 1992 Krishna Balasubramanian
	5	* Copyright (C) 1995 Eric Schenk, Bruno Haible
	6	*
	7	* /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <[email protected]>
	8	*
	9	* SMP-threaded, sysctl's added
	10	* (c) 1999 Manfred Spraul <[email protected]>
	11	* Enforced range limit on SEM_UNDO
	12	* (c) 2001 Red Hat Inc
	13	* Lockless wakeup
	14	* (c) 2003 Manfred Spraul <[email protected]>
	15	* (c) 2016 Davidlohr Bueso <[email protected]>
	16	* Further wakeup optimizations, documentation
	17	* (c) 2010 Manfred Spraul <[email protected]>
	18	*
	19	* support for audit of ipc object properties and permission changes
	20	* Dustin Kirkland <[email protected]>
	21	*
	22	* namespaces support
	23	* OpenVZ, SWsoft Inc.
	24	* Pavel Emelianov <[email protected]>
	25	*
	26	* Implementation notes: (May 2010)
	27	* This file implements System V semaphores.
	28	*
	29	* User space visible behavior:
	30	* - FIFO ordering for semop() operations (just FIFO, not starvation
	31	* protection)
	32	* - multiple semaphore operations that alter the same semaphore in
	33	* one semop() are handled.
	34	* - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
	35	* SETALL calls.
	36	* - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
	37	* - undo adjustments at process exit are limited to 0..SEMVMX.
	38	* - namespace are supported.
	39	* - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
	40	* to /proc/sys/kernel/sem.
	41	* - statistics about the usage are reported in /proc/sysvipc/sem.
	42	*
	43	* Internals:
	44	* - scalability:
	45	* - all global variables are read-mostly.
	46	* - semop() calls and semctl(RMID) are synchronized by RCU.
	47	* - most operations do write operations (actually: spin_lock calls) to
	48	* the per-semaphore array structure.
	49	* Thus: Perfect SMP scaling between independent semaphore arrays.
	50	* If multiple semaphores in one array are used, then cache line
	51	* trashing on the semaphore array spinlock will limit the scaling.
	52	* - semncnt and semzcnt are calculated on demand in count_semcnt()
	53	* - the task that performs a successful semop() scans the list of all
	54	* sleeping tasks and completes any pending operations that can be fulfilled.
	55	* Semaphores are actively given to waiting tasks (necessary for FIFO).
	56	* (see update_queue())
	57	* - To improve the scalability, the actual wake-up calls are performed after
	58	* dropping all locks. (see wake_up_sem_queue_prepare())
	59	* - All work is done by the waker, the woken up task does not have to do
	60	* anything - not even acquiring a lock or dropping a refcount.
	61	* - A woken up task may not even touch the semaphore array anymore, it may
	62	* have been destroyed already by a semctl(RMID).
	63	* - UNDO values are stored in an array (one per process and per
	64	* semaphore array, lazily allocated). For backwards compatibility, multiple
	65	* modes for the UNDO variables are supported (per process, per thread)
	66	* (see copy_semundo, CLONE_SYSVSEM)
	67	* - There are two lists of the pending operations: a per-array list
	68	* and per-semaphore list (stored in the array). This allows to achieve FIFO
	69	* ordering without always scanning all pending operations.
	70	* The worst-case behavior is nevertheless O(N^2) for N wakeups.
	71	*/
	72
	73	#include <linux/compat.h>
	74	#include <linux/slab.h>
	75	#include <linux/spinlock.h>
	76	#include <linux/init.h>
	77	#include <linux/proc_fs.h>
	78	#include <linux/time.h>
	79	#include <linux/security.h>
	80	#include <linux/syscalls.h>
	81	#include <linux/audit.h>
	82	#include <linux/capability.h>
	83	#include <linux/seq_file.h>
	84	#include <linux/rwsem.h>
	85	#include <linux/nsproxy.h>
	86	#include <linux/ipc_namespace.h>
	87	#include <linux/sched/wake_q.h>
	88	#include <linux/nospec.h>
	89	#include <linux/rhashtable.h>
	90
	91	#include <linux/uaccess.h>
	92	#include "util.h"
	93
	94	/* One semaphore structure for each semaphore in the system. */
	95	struct sem {
	96	int semval; /* current value */
	97	/*
	98	* PID of the process that last modified the semaphore. For
	99	* Linux, specifically these are:
	100	* - semop
	101	* - semctl, via SETVAL and SETALL.
	102	* - at task exit when performing undo adjustments (see exit_sem).
	103	*/
	104	struct pid *sempid;
	105	spinlock_t lock; /* spinlock for fine-grained semtimedop */
	106	struct list_head pending_alter; /* pending single-sop operations */
	107	/* that alter the semaphore */
	108	struct list_head pending_const; /* pending single-sop operations */
	109	/* that do not alter the semaphore*/
	110	time64_t sem_otime; /* candidate for sem_otime */
	111	} ____cacheline_aligned_in_smp;
	112
	113	/* One sem_array data structure for each set of semaphores in the system. */
	114	struct sem_array {
	115	struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */
	116	time64_t sem_ctime; /* create/last semctl() time */
	117	struct list_head pending_alter; /* pending operations */
	118	/* that alter the array */
	119	struct list_head pending_const; /* pending complex operations */
	120	/* that do not alter semvals */
	121	struct list_head list_id; /* undo requests on this array */
	122	int sem_nsems; /* no. of semaphores in array */
	123	int complex_count; /* pending complex operations */
	124	unsigned int use_global_lock;/* >0: global lock required */
	125
	126	struct sem sems[];
	127	} __randomize_layout;
	128
	129	/* One queue for each sleeping process in the system. */
	130	struct sem_queue {
	131	struct list_head list; /* queue of pending operations */
	132	struct task_struct sleeper; / this process */
	133	struct sem_undo undo; / undo structure */
	134	struct pid pid; / process id of requesting process */
	135	int status; /* completion status of operation */
	136	struct sembuf sops; / array of pending operations */
	137	struct sembuf blocking; / the operation that blocked */
	138	int nsops; /* number of operations */
	139	bool alter; /* does sops alter the array? /
	140	bool dupsop; /* sops on more than one sem_num */
	141	};
	142
	143	/* Each task has a list of undo requests. They are executed automatically
	144	* when the process exits.
	145	*/
	146	struct sem_undo {
	147	struct list_head list_proc; /* per-process list: *
	148	* all undos from one process
	149	* rcu protected */
	150	struct rcu_head rcu; /* rcu struct for sem_undo */
	151	struct sem_undo_list ulp; / back ptr to sem_undo_list */
	152	struct list_head list_id; /* per semaphore array list:
	153	* all undos for one array */
	154	int semid; /* semaphore set identifier */
	155	short semadj; / array of adjustments */
	156	/* one per semaphore */
	157	};
	158
	159	/* sem_undo_list controls shared access to the list of sem_undo structures
	160	* that may be shared among all a CLONE_SYSVSEM task group.
	161	*/
	162	struct sem_undo_list {
	163	refcount_t refcnt;
	164	spinlock_t lock;
	165	struct list_head list_proc;
	166	};
	167
	168
	169	#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
	170
	171	static int newary(struct ipc_namespace , struct ipc_params );
	172	static void freeary(struct ipc_namespace , struct kern_ipc_perm );
	173	#ifdef CONFIG_PROC_FS
	174	static int sysvipc_sem_proc_show(struct seq_file s, void it);
	175	#endif
	176
	177	#define SEMMSL_FAST 256 /* 512 bytes on stack */
	178	#define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
	179
	180	/*
	181	* Switching from the mode suitable for simple ops
	182	* to the mode for complex ops is costly. Therefore:
	183	* use some hysteresis
	184	*/
	185	#define USE_GLOBAL_LOCK_HYSTERESIS 10
	186
	187	/*
	188	* Locking:
	189	* a) global sem_lock() for read/write
	190	* sem_undo.id_next,
	191	* sem_array.complex_count,
	192	* sem_array.pending{_alter,_const},
	193	* sem_array.sem_undo
	194	*
	195	* b) global or semaphore sem_lock() for read/write:
	196	* sem_array.sems[i].pending_{const,alter}:
	197	*
	198	* c) special:
	199	* sem_undo_list.list_proc:
	200	* * undo_list->lock for write
	201	* * rcu for read
	202	* use_global_lock:
	203	* * global sem_lock() for write
	204	* * either local or global sem_lock() for read.
	205	*
	206	* Memory ordering:
	207	* Most ordering is enforced by using spin_lock() and spin_unlock().
	208	* The special case is use_global_lock:
	209	* Setting it from non-zero to 0 is a RELEASE, this is ensured by
	210	* using smp_store_release().
	211	* Testing if it is non-zero is an ACQUIRE, this is ensured by using
	212	* smp_load_acquire().
	213	* Setting it from 0 to non-zero must be ordered with regards to
	214	* this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
	215	* is inside a spin_lock() and after a write from 0 to non-zero a
	216	* spin_lock()+spin_unlock() is done.
	217	*/
	218
	219	#define sc_semmsl sem_ctls[0]
	220	#define sc_semmns sem_ctls[1]
	221	#define sc_semopm sem_ctls[2]
	222	#define sc_semmni sem_ctls[3]
	223
	224	int sem_init_ns(struct ipc_namespace *ns)
	225	{
	226	ns->sc_semmsl = SEMMSL;
	227	ns->sc_semmns = SEMMNS;
	228	ns->sc_semopm = SEMOPM;
	229	ns->sc_semmni = SEMMNI;
	230	ns->used_sems = 0;
	231	return ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
	232	}
	233
	234	#ifdef CONFIG_IPC_NS
	235	void sem_exit_ns(struct ipc_namespace *ns)
	236	{
	237	free_ipcs(ns, &sem_ids(ns), freeary);
	238	idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
	239	rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);
	240	}
	241	#endif
	242
	243	int __init sem_init(void)
	244	{
	245	const int err = sem_init_ns(&init_ipc_ns);
	246
	247	ipc_init_proc_interface("sysvipc/sem",
	248	" key semid perms nsems uid gid cuid cgid otime ctime\n",
	249	IPC_SEM_IDS, sysvipc_sem_proc_show);
	250	return err;
	251	}
	252
	253	/**
	254	* unmerge_queues - unmerge queues, if possible.
	255	* @sma: semaphore array
	256	*
	257	* The function unmerges the wait queues if complex_count is 0.
	258	* It must be called prior to dropping the global semaphore array lock.
	259	*/
	260	static void unmerge_queues(struct sem_array *sma)
	261	{
	262	struct sem_queue q, tq;
	263
	264	/* complex operations still around? */
	265	if (sma->complex_count)
	266	return;
	267	/*
	268	* We will switch back to simple mode.
	269	* Move all pending operation back into the per-semaphore
	270	* queues.
	271	*/
	272	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
	273	struct sem *curr;
	274	curr = &sma->sems[q->sops[0].sem_num];
	275
	276	list_add_tail(&q->list, &curr->pending_alter);
	277	}
	278	INIT_LIST_HEAD(&sma->pending_alter);
	279	}
	280
	281	/**
	282	* merge_queues - merge single semop queues into global queue
	283	* @sma: semaphore array
	284	*
	285	* This function merges all per-semaphore queues into the global queue.
	286	* It is necessary to achieve FIFO ordering for the pending single-sop
	287	* operations when a multi-semop operation must sleep.
	288	* Only the alter operations must be moved, the const operations can stay.
	289	*/
	290	static void merge_queues(struct sem_array *sma)
	291	{
	292	int i;
	293	for (i = 0; i < sma->sem_nsems; i++) {
	294	struct sem *sem = &sma->sems[i];
	295
	296	list_splice_init(&sem->pending_alter, &sma->pending_alter);
	297	}
	298	}
	299
	300	static void sem_rcu_free(struct rcu_head *head)
	301	{
	302	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
	303	struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
	304
	305	security_sem_free(&sma->sem_perm);
	306	kvfree(sma);
	307	}
	308
	309	/*
	310	* Enter the mode suitable for non-simple operations:
	311	* Caller must own sem_perm.lock.
	312	*/
	313	static void complexmode_enter(struct sem_array *sma)
	314	{
	315	int i;
	316	struct sem *sem;
	317
	318	if (sma->use_global_lock > 0) {
	319	/*
	320	* We are already in global lock mode.
	321	* Nothing to do, just reset the
	322	* counter until we return to simple mode.
	323	*/
	324	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
	325	return;
	326	}
	327	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
	328
	329	for (i = 0; i < sma->sem_nsems; i++) {
	330	sem = &sma->sems[i];
	331	spin_lock(&sem->lock);
	332	spin_unlock(&sem->lock);
	333	}
	334	}
	335
	336	/*
	337	* Try to leave the mode that disallows simple operations:
	338	* Caller must own sem_perm.lock.
	339	*/
	340	static void complexmode_tryleave(struct sem_array *sma)
	341	{
	342	if (sma->complex_count) {
	343	/* Complex ops are sleeping.
	344	* We must stay in complex mode
	345	*/
	346	return;
	347	}
	348	if (sma->use_global_lock == 1) {
	349	/*
	350	* Immediately after setting use_global_lock to 0,
	351	* a simple op can start. Thus: all memory writes
	352	* performed by the current operation must be visible
	353	* before we set use_global_lock to 0.
	354	*/
	355	smp_store_release(&sma->use_global_lock, 0);
	356	} else {
	357	sma->use_global_lock--;
	358	}
	359	}
	360
	361	#define SEM_GLOBAL_LOCK (-1)
	362	/*
	363	* If the request contains only one semaphore operation, and there are
	364	* no complex transactions pending, lock only the semaphore involved.
	365	* Otherwise, lock the entire semaphore array, since we either have
	366	* multiple semaphores in our own semops, or we need to look at
	367	* semaphores from other pending complex operations.
	368	*/
	369	static inline int sem_lock(struct sem_array sma, struct sembuf sops,
	370	int nsops)
	371	{
	372	struct sem *sem;
	373	int idx;
	374
	375	if (nsops != 1) {
	376	/* Complex operation - acquire a full lock */
	377	ipc_lock_object(&sma->sem_perm);
	378
	379	/* Prevent parallel simple ops */
	380	complexmode_enter(sma);
	381	return SEM_GLOBAL_LOCK;
	382	}
	383
	384	/*
	385	* Only one semaphore affected - try to optimize locking.
	386	* Optimized locking is possible if no complex operation
	387	* is either enqueued or processed right now.
	388	*
	389	* Both facts are tracked by use_global_mode.
	390	*/
	391	idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
	392	sem = &sma->sems[idx];
	393
	394	/*
	395	* Initial check for use_global_lock. Just an optimization,
	396	* no locking, no memory barrier.
	397	*/
	398	if (!sma->use_global_lock) {
	399	/*
	400	* It appears that no complex operation is around.
	401	* Acquire the per-semaphore lock.
	402	*/
	403	spin_lock(&sem->lock);
	404
	405	/* pairs with smp_store_release() */
	406	if (!smp_load_acquire(&sma->use_global_lock)) {
	407	/* fast path successful! */
	408	return sops->sem_num;
	409	}
	410	spin_unlock(&sem->lock);
	411	}
	412
	413	/* slow path: acquire the full lock */
	414	ipc_lock_object(&sma->sem_perm);
	415
	416	if (sma->use_global_lock == 0) {
	417	/*
	418	* The use_global_lock mode ended while we waited for
	419	* sma->sem_perm.lock. Thus we must switch to locking
	420	* with sem->lock.
	421	* Unlike in the fast path, there is no need to recheck
	422	* sma->use_global_lock after we have acquired sem->lock:
	423	* We own sma->sem_perm.lock, thus use_global_lock cannot
	424	* change.
	425	*/
	426	spin_lock(&sem->lock);
	427
	428	ipc_unlock_object(&sma->sem_perm);
	429	return sops->sem_num;
	430	} else {
	431	/*
	432	* Not a false alarm, thus continue to use the global lock
	433	* mode. No need for complexmode_enter(), this was done by
	434	* the caller that has set use_global_mode to non-zero.
	435	*/
	436	return SEM_GLOBAL_LOCK;
	437	}
	438	}
	439
	440	static inline void sem_unlock(struct sem_array *sma, int locknum)
	441	{
	442	if (locknum == SEM_GLOBAL_LOCK) {
	443	unmerge_queues(sma);
	444	complexmode_tryleave(sma);
	445	ipc_unlock_object(&sma->sem_perm);
	446	} else {
	447	struct sem *sem = &sma->sems[locknum];
	448	spin_unlock(&sem->lock);
	449	}
	450	}
	451
	452	/*
	453	* sem_lock_(check_) routines are called in the paths where the rwsem
	454	* is not held.
	455	*
	456	* The caller holds the RCU read lock.
	457	*/
	458	static inline struct sem_array sem_obtain_object(struct ipc_namespace ns, int id)
	459	{
	460	struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
	461
	462	if (IS_ERR(ipcp))
	463	return ERR_CAST(ipcp);
	464
	465	return container_of(ipcp, struct sem_array, sem_perm);
	466	}
	467
	468	static inline struct sem_array sem_obtain_object_check(struct ipc_namespace ns,
	469	int id)
	470	{
	471	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);
	472
	473	if (IS_ERR(ipcp))
	474	return ERR_CAST(ipcp);
	475
	476	return container_of(ipcp, struct sem_array, sem_perm);
	477	}
	478
	479	static inline void sem_lock_and_putref(struct sem_array *sma)
	480	{
	481	sem_lock(sma, NULL, -1);
	482	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
	483	}
	484
	485	static inline void sem_rmid(struct ipc_namespace ns, struct sem_array s)
	486	{
	487	ipc_rmid(&sem_ids(ns), &s->sem_perm);
	488	}
	489
	490	static struct sem_array *sem_alloc(size_t nsems)
	491	{
	492	struct sem_array *sma;
	493	size_t size;
	494
	495	if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
	496	return NULL;
	497
	498	size = sizeof(sma) + nsems sizeof(sma->sems[0]);
	499	sma = kvmalloc(size, GFP_KERNEL);
	500	if (unlikely(!sma))
	501	return NULL;
	502
	503	memset(sma, 0, size);
	504
	505	return sma;
	506	}
	507
	508	/**
	509	* newary - Create a new semaphore set
	510	* @ns: namespace
	511	* @params: ptr to the structure that contains key, semflg and nsems
	512	*
	513	* Called with sem_ids.rwsem held (as a writer)
	514	*/
	515	static int newary(struct ipc_namespace ns, struct ipc_params params)
	516	{
	517	int retval;
	518	struct sem_array *sma;
	519	key_t key = params->key;
	520	int nsems = params->u.nsems;
	521	int semflg = params->flg;
	522	int i;
	523
	524	if (!nsems)
	525	return -EINVAL;
	526	if (ns->used_sems + nsems > ns->sc_semmns)
	527	return -ENOSPC;
	528
	529	sma = sem_alloc(nsems);
	530	if (!sma)
	531	return -ENOMEM;
	532
	533	sma->sem_perm.mode = (semflg & S_IRWXUGO);
	534	sma->sem_perm.key = key;
	535
	536	sma->sem_perm.security = NULL;
	537	retval = security_sem_alloc(&sma->sem_perm);
	538	if (retval) {
	539	kvfree(sma);
	540	return retval;
	541	}
	542
	543	for (i = 0; i < nsems; i++) {
	544	INIT_LIST_HEAD(&sma->sems[i].pending_alter);
	545	INIT_LIST_HEAD(&sma->sems[i].pending_const);
	546	spin_lock_init(&sma->sems[i].lock);
	547	}
	548
	549	sma->complex_count = 0;
	550	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
	551	INIT_LIST_HEAD(&sma->pending_alter);
	552	INIT_LIST_HEAD(&sma->pending_const);
	553	INIT_LIST_HEAD(&sma->list_id);
	554	sma->sem_nsems = nsems;
	555	sma->sem_ctime = ktime_get_real_seconds();
	556
	557	/* ipc_addid() locks sma upon success. */
	558	retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
	559	if (retval < 0) {
	560	call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
	561	return retval;
	562	}
	563	ns->used_sems += nsems;
	564
	565	sem_unlock(sma, -1);
	566	rcu_read_unlock();
	567
	568	return sma->sem_perm.id;
	569	}
	570
	571
	572	/*
	573	* Called with sem_ids.rwsem and ipcp locked.
	574	*/
	575	static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
	576	struct ipc_params *params)
	577	{
	578	struct sem_array *sma;
	579
	580	sma = container_of(ipcp, struct sem_array, sem_perm);
	581	if (params->u.nsems > sma->sem_nsems)
	582	return -EINVAL;
	583
	584	return 0;
	585	}
	586
	587	long ksys_semget(key_t key, int nsems, int semflg)
	588	{
	589	struct ipc_namespace *ns;
	590	static const struct ipc_ops sem_ops = {
	591	.getnew = newary,
	592	.associate = security_sem_associate,
	593	.more_checks = sem_more_checks,
	594	};
	595	struct ipc_params sem_params;
	596
	597	ns = current->nsproxy->ipc_ns;
	598
	599	if (nsems < 0 \|\| nsems > ns->sc_semmsl)
	600	return -EINVAL;
	601
	602	sem_params.key = key;
	603	sem_params.flg = semflg;
	604	sem_params.u.nsems = nsems;
	605
	606	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
	607	}
	608
	609	SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
	610	{
	611	return ksys_semget(key, nsems, semflg);
	612	}
	613
	614	/**
	615	* perform_atomic_semop[_slow] - Attempt to perform semaphore
	616	* operations on a given array.
	617	* @sma: semaphore array
	618	* @q: struct sem_queue that describes the operation
	619	*
	620	* Caller blocking are as follows, based the value
	621	* indicated by the semaphore operation (sem_op):
	622	*
	623	* (1) >0 never blocks.
	624	* (2) 0 (wait-for-zero operation): semval is non-zero.
	625	* (3) <0 attempting to decrement semval to a value smaller than zero.
	626	*
	627	* Returns 0 if the operation was possible.
	628	* Returns 1 if the operation is impossible, the caller must sleep.
	629	* Returns <0 for error codes.
	630	*/
	631	static int perform_atomic_semop_slow(struct sem_array sma, struct sem_queue q)
	632	{
	633	int result, sem_op, nsops;
	634	struct pid *pid;
	635	struct sembuf *sop;
	636	struct sem *curr;
	637	struct sembuf *sops;
	638	struct sem_undo *un;
	639
	640	sops = q->sops;
	641	nsops = q->nsops;
	642	un = q->undo;
	643
	644	for (sop = sops; sop < sops + nsops; sop++) {
	645	int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);
	646	curr = &sma->sems[idx];
	647	sem_op = sop->sem_op;
	648	result = curr->semval;
	649
	650	if (!sem_op && result)
	651	goto would_block;
	652
	653	result += sem_op;
	654	if (result < 0)
	655	goto would_block;
	656	if (result > SEMVMX)
	657	goto out_of_range;
	658
	659	if (sop->sem_flg & SEM_UNDO) {
	660	int undo = un->semadj[sop->sem_num] - sem_op;
	661	/* Exceeding the undo range is an error. */
	662	if (undo < (-SEMAEM - 1) \|\| undo > SEMAEM)
	663	goto out_of_range;
	664	un->semadj[sop->sem_num] = undo;
	665	}
	666
	667	curr->semval = result;
	668	}
	669
	670	sop--;
	671	pid = q->pid;
	672	while (sop >= sops) {
	673	ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid);
	674	sop--;
	675	}
	676
	677	return 0;
	678
	679	out_of_range:
	680	result = -ERANGE;
	681	goto undo;
	682
	683	would_block:
	684	q->blocking = sop;
	685
	686	if (sop->sem_flg & IPC_NOWAIT)
	687	result = -EAGAIN;
	688	else
	689	result = 1;
	690
	691	undo:
	692	sop--;
	693	while (sop >= sops) {
	694	sem_op = sop->sem_op;
	695	sma->sems[sop->sem_num].semval -= sem_op;
	696	if (sop->sem_flg & SEM_UNDO)
	697	un->semadj[sop->sem_num] += sem_op;
	698	sop--;
	699	}
	700
	701	return result;
	702	}
	703
	704	static int perform_atomic_semop(struct sem_array sma, struct sem_queue q)
	705	{
	706	int result, sem_op, nsops;
	707	struct sembuf *sop;
	708	struct sem *curr;
	709	struct sembuf *sops;
	710	struct sem_undo *un;
	711
	712	sops = q->sops;
	713	nsops = q->nsops;
	714	un = q->undo;
	715
	716	if (unlikely(q->dupsop))
	717	return perform_atomic_semop_slow(sma, q);
	718
	719	/*
	720	* We scan the semaphore set twice, first to ensure that the entire
	721	* operation can succeed, therefore avoiding any pointless writes
	722	* to shared memory and having to undo such changes in order to block
	723	* until the operations can go through.
	724	*/
	725	for (sop = sops; sop < sops + nsops; sop++) {
	726	int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);
	727
	728	curr = &sma->sems[idx];
	729	sem_op = sop->sem_op;
	730	result = curr->semval;
	731
	732	if (!sem_op && result)
	733	goto would_block; /* wait-for-zero */
	734
	735	result += sem_op;
	736	if (result < 0)
	737	goto would_block;
	738
	739	if (result > SEMVMX)
	740	return -ERANGE;
	741
	742	if (sop->sem_flg & SEM_UNDO) {
	743	int undo = un->semadj[sop->sem_num] - sem_op;
	744
	745	/* Exceeding the undo range is an error. */
	746	if (undo < (-SEMAEM - 1) \|\| undo > SEMAEM)
	747	return -ERANGE;
	748	}
	749	}
	750
	751	for (sop = sops; sop < sops + nsops; sop++) {
	752	curr = &sma->sems[sop->sem_num];
	753	sem_op = sop->sem_op;
	754	result = curr->semval;
	755
	756	if (sop->sem_flg & SEM_UNDO) {
	757	int undo = un->semadj[sop->sem_num] - sem_op;
	758
	759	un->semadj[sop->sem_num] = undo;
	760	}
	761	curr->semval += sem_op;
	762	ipc_update_pid(&curr->sempid, q->pid);
	763	}
	764
	765	return 0;
	766
	767	would_block:
	768	q->blocking = sop;
	769	return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1;
	770	}
	771
	772	static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
	773	struct wake_q_head *wake_q)
	774	{
	775	wake_q_add(wake_q, q->sleeper);
	776	/*
	777	* Rely on the above implicit barrier, such that we can
	778	* ensure that we hold reference to the task before setting
	779	* q->status. Otherwise we could race with do_exit if the
	780	* task is awoken by an external event before calling
	781	* wake_up_process().
	782	*/
	783	WRITE_ONCE(q->status, error);
	784	}
	785
	786	static void unlink_queue(struct sem_array sma, struct sem_queue q)
	787	{
	788	list_del(&q->list);
	789	if (q->nsops > 1)
	790	sma->complex_count--;
	791	}
	792
	793	/** check_restart(sma, q)
	794	* @sma: semaphore array
	795	* @q: the operation that just completed
	796	*
	797	* update_queue is O(N^2) when it restarts scanning the whole queue of
	798	* waiting operations. Therefore this function checks if the restart is
	799	* really necessary. It is called after a previously waiting operation
	800	* modified the array.
	801	* Note that wait-for-zero operations are handled without restart.
	802	*/
	803	static inline int check_restart(struct sem_array sma, struct sem_queue q)
	804	{
	805	/* pending complex alter operations are too difficult to analyse */
	806	if (!list_empty(&sma->pending_alter))
	807	return 1;
	808
	809	/* we were a sleeping complex operation. Too difficult */
	810	if (q->nsops > 1)
	811	return 1;
	812
	813	/* It is impossible that someone waits for the new value:
	814	* - complex operations always restart.
	815	* - wait-for-zero are handled seperately.
	816	* - q is a previously sleeping simple operation that
	817	* altered the array. It must be a decrement, because
	818	* simple increments never sleep.
	819	* - If there are older (higher priority) decrements
	820	* in the queue, then they have observed the original
	821	* semval value and couldn't proceed. The operation
	822	* decremented to value - thus they won't proceed either.
	823	*/
	824	return 0;
	825	}
	826
	827	/**
	828	* wake_const_ops - wake up non-alter tasks
	829	* @sma: semaphore array.
	830	* @semnum: semaphore that was modified.
	831	* @wake_q: lockless wake-queue head.
	832	*
	833	* wake_const_ops must be called after a semaphore in a semaphore array
	834	* was set to 0. If complex const operations are pending, wake_const_ops must
	835	* be called with semnum = -1, as well as with the number of each modified
	836	* semaphore.
	837	* The tasks that must be woken up are added to @wake_q. The return code
	838	* is stored in q->pid.
	839	* The function returns 1 if at least one operation was completed successfully.
	840	*/
	841	static int wake_const_ops(struct sem_array *sma, int semnum,
	842	struct wake_q_head *wake_q)
	843	{
	844	struct sem_queue q, tmp;
	845	struct list_head *pending_list;
	846	int semop_completed = 0;
	847
	848	if (semnum == -1)
	849	pending_list = &sma->pending_const;
	850	else
	851	pending_list = &sma->sems[semnum].pending_const;
	852
	853	list_for_each_entry_safe(q, tmp, pending_list, list) {
	854	int error = perform_atomic_semop(sma, q);
	855
	856	if (error > 0)
	857	continue;
	858	/* operation completed, remove from queue & wakeup */
	859	unlink_queue(sma, q);
	860
	861	wake_up_sem_queue_prepare(q, error, wake_q);
	862	if (error == 0)
	863	semop_completed = 1;
	864	}
	865
	866	return semop_completed;
	867	}
	868
	869	/**
	870	* do_smart_wakeup_zero - wakeup all wait for zero tasks
	871	* @sma: semaphore array
	872	* @sops: operations that were performed
	873	* @nsops: number of operations
	874	* @wake_q: lockless wake-queue head
	875	*
	876	* Checks all required queue for wait-for-zero operations, based
	877	* on the actual changes that were performed on the semaphore array.
	878	* The function returns 1 if at least one operation was completed successfully.
	879	*/
	880	static int do_smart_wakeup_zero(struct sem_array sma, struct sembuf sops,
	881	int nsops, struct wake_q_head *wake_q)
	882	{
	883	int i;
	884	int semop_completed = 0;
	885	int got_zero = 0;
	886
	887	/* first: the per-semaphore queues, if known */
	888	if (sops) {
	889	for (i = 0; i < nsops; i++) {
	890	int num = sops[i].sem_num;
	891
	892	if (sma->sems[num].semval == 0) {
	893	got_zero = 1;
	894	semop_completed \|= wake_const_ops(sma, num, wake_q);
	895	}
	896	}
	897	} else {
	898	/*
	899	* No sops means modified semaphores not known.
	900	* Assume all were changed.
	901	*/
	902	for (i = 0; i < sma->sem_nsems; i++) {
	903	if (sma->sems[i].semval == 0) {
	904	got_zero = 1;
	905	semop_completed \|= wake_const_ops(sma, i, wake_q);
	906	}
	907	}
	908	}
	909	/*
	910	* If one of the modified semaphores got 0,
	911	* then check the global queue, too.
	912	*/
	913	if (got_zero)
	914	semop_completed \|= wake_const_ops(sma, -1, wake_q);
	915
	916	return semop_completed;
	917	}
	918
	919
	920	/**
	921	* update_queue - look for tasks that can be completed.
	922	* @sma: semaphore array.
	923	* @semnum: semaphore that was modified.
	924	* @wake_q: lockless wake-queue head.
	925	*
	926	* update_queue must be called after a semaphore in a semaphore array
	927	* was modified. If multiple semaphores were modified, update_queue must
	928	* be called with semnum = -1, as well as with the number of each modified
	929	* semaphore.
	930	* The tasks that must be woken up are added to @wake_q. The return code
	931	* is stored in q->pid.
	932	* The function internally checks if const operations can now succeed.
	933	*
	934	* The function return 1 if at least one semop was completed successfully.
	935	*/
	936	static int update_queue(struct sem_array sma, int semnum, struct wake_q_head wake_q)
	937	{
	938	struct sem_queue q, tmp;
	939	struct list_head *pending_list;
	940	int semop_completed = 0;
	941
	942	if (semnum == -1)
	943	pending_list = &sma->pending_alter;
	944	else
	945	pending_list = &sma->sems[semnum].pending_alter;
	946
	947	again:
	948	list_for_each_entry_safe(q, tmp, pending_list, list) {
	949	int error, restart;
	950
	951	/* If we are scanning the single sop, per-semaphore list of
	952	* one semaphore and that semaphore is 0, then it is not
	953	* necessary to scan further: simple increments
	954	* that affect only one entry succeed immediately and cannot
	955	* be in the per semaphore pending queue, and decrements
	956	* cannot be successful if the value is already 0.
	957	*/
	958	if (semnum != -1 && sma->sems[semnum].semval == 0)
	959	break;
	960
	961	error = perform_atomic_semop(sma, q);
	962
	963	/* Does q->sleeper still need to sleep? */
	964	if (error > 0)
	965	continue;
	966
	967	unlink_queue(sma, q);
	968
	969	if (error) {
	970	restart = 0;
	971	} else {
	972	semop_completed = 1;
	973	do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q);
	974	restart = check_restart(sma, q);
	975	}
	976
	977	wake_up_sem_queue_prepare(q, error, wake_q);
	978	if (restart)
	979	goto again;
	980	}
	981	return semop_completed;
	982	}
	983
	984	/**
	985	* set_semotime - set sem_otime
	986	* @sma: semaphore array
	987	* @sops: operations that modified the array, may be NULL
	988	*
	989	* sem_otime is replicated to avoid cache line trashing.
	990	* This function sets one instance to the current time.
	991	*/
	992	static void set_semotime(struct sem_array sma, struct sembuf sops)
	993	{
	994	if (sops == NULL) {
	995	sma->sems[0].sem_otime = ktime_get_real_seconds();
	996	} else {
	997	sma->sems[sops[0].sem_num].sem_otime =
	998	ktime_get_real_seconds();
	999	}
	1000	}
	1001
	1002	/**
	1003	* do_smart_update - optimized update_queue
	1004	* @sma: semaphore array
	1005	* @sops: operations that were performed
	1006	* @nsops: number of operations
	1007	* @otime: force setting otime
	1008	* @wake_q: lockless wake-queue head
	1009	*
	1010	* do_smart_update() does the required calls to update_queue and wakeup_zero,
	1011	* based on the actual changes that were performed on the semaphore array.
	1012	* Note that the function does not do the actual wake-up: the caller is
	1013	* responsible for calling wake_up_q().
	1014	* It is safe to perform this call after dropping all locks.
	1015	*/
	1016	static void do_smart_update(struct sem_array sma, struct sembuf sops, int nsops,
	1017	int otime, struct wake_q_head *wake_q)
	1018	{
	1019	int i;
	1020
	1021	otime \|= do_smart_wakeup_zero(sma, sops, nsops, wake_q);
	1022
	1023	if (!list_empty(&sma->pending_alter)) {
	1024	/* semaphore array uses the global queue - just process it. */
	1025	otime \|= update_queue(sma, -1, wake_q);
	1026	} else {
	1027	if (!sops) {
	1028	/*
	1029	* No sops, thus the modified semaphores are not
	1030	* known. Check all.
	1031	*/
	1032	for (i = 0; i < sma->sem_nsems; i++)
	1033	otime \|= update_queue(sma, i, wake_q);
	1034	} else {
	1035	/*
	1036	* Check the semaphores that were increased:
	1037	* - No complex ops, thus all sleeping ops are
	1038	* decrease.
	1039	* - if we decreased the value, then any sleeping
	1040	* semaphore ops wont be able to run: If the
	1041	* previous value was too small, then the new
	1042	* value will be too small, too.
	1043	*/
	1044	for (i = 0; i < nsops; i++) {
	1045	if (sops[i].sem_op > 0) {
	1046	otime \|= update_queue(sma,
	1047	sops[i].sem_num, wake_q);
	1048	}
	1049	}
	1050	}
	1051	}
	1052	if (otime)
	1053	set_semotime(sma, sops);
	1054	}
	1055
	1056	/*
	1057	* check_qop: Test if a queued operation sleeps on the semaphore semnum
	1058	*/
	1059	static int check_qop(struct sem_array sma, int semnum, struct sem_queue q,
	1060	bool count_zero)
	1061	{
	1062	struct sembuf *sop = q->blocking;
	1063
	1064	/*
	1065	* Linux always (since 0.99.10) reported a task as sleeping on all
	1066	* semaphores. This violates SUS, therefore it was changed to the
	1067	* standard compliant behavior.
	1068	* Give the administrators a chance to notice that an application
	1069	* might misbehave because it relies on the Linux behavior.
	1070	*/
	1071	pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n"
	1072	"The task %s (%d) triggered the difference, watch for misbehavior.\n",
	1073	current->comm, task_pid_nr(current));
	1074
	1075	if (sop->sem_num != semnum)
	1076	return 0;
	1077
	1078	if (count_zero && sop->sem_op == 0)
	1079	return 1;
	1080	if (!count_zero && sop->sem_op < 0)
	1081	return 1;
	1082
	1083	return 0;
	1084	}
	1085
	1086	/* The following counts are associated to each semaphore:
	1087	* semncnt number of tasks waiting on semval being nonzero
	1088	* semzcnt number of tasks waiting on semval being zero
	1089	*
	1090	* Per definition, a task waits only on the semaphore of the first semop
	1091	* that cannot proceed, even if additional operation would block, too.
	1092	*/
	1093	static int count_semcnt(struct sem_array *sma, ushort semnum,
	1094	bool count_zero)
	1095	{
	1096	struct list_head *l;
	1097	struct sem_queue *q;
	1098	int semcnt;
	1099
	1100	semcnt = 0;
	1101	/* First: check the simple operations. They are easy to evaluate */
	1102	if (count_zero)
	1103	l = &sma->sems[semnum].pending_const;
	1104	else
	1105	l = &sma->sems[semnum].pending_alter;
	1106
	1107	list_for_each_entry(q, l, list) {
	1108	/* all task on a per-semaphore list sleep on exactly
	1109	* that semaphore
	1110	*/
	1111	semcnt++;
	1112	}
	1113
	1114	/* Then: check the complex operations. */
	1115	list_for_each_entry(q, &sma->pending_alter, list) {
	1116	semcnt += check_qop(sma, semnum, q, count_zero);
	1117	}
	1118	if (count_zero) {
	1119	list_for_each_entry(q, &sma->pending_const, list) {
	1120	semcnt += check_qop(sma, semnum, q, count_zero);
	1121	}
	1122	}
	1123	return semcnt;
	1124	}
	1125
	1126	/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
	1127	* as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
	1128	* remains locked on exit.
	1129	*/
	1130	static void freeary(struct ipc_namespace ns, struct kern_ipc_perm ipcp)
	1131	{
	1132	struct sem_undo un, tu;
	1133	struct sem_queue q, tq;
	1134	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
	1135	int i;
	1136	DEFINE_WAKE_Q(wake_q);
	1137
	1138	/* Free the existing undo structures for this semaphore set. */
	1139	ipc_assert_locked_object(&sma->sem_perm);
	1140	list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
	1141	list_del(&un->list_id);
	1142	spin_lock(&un->ulp->lock);
	1143	un->semid = -1;
	1144	list_del_rcu(&un->list_proc);
	1145	spin_unlock(&un->ulp->lock);
	1146	kfree_rcu(un, rcu);
	1147	}
	1148
	1149	/* Wake up all pending processes and let them fail with EIDRM. */
	1150	list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
	1151	unlink_queue(sma, q);
	1152	wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
	1153	}
	1154
	1155	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
	1156	unlink_queue(sma, q);
	1157	wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
	1158	}
	1159	for (i = 0; i < sma->sem_nsems; i++) {
	1160	struct sem *sem = &sma->sems[i];
	1161	list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
	1162	unlink_queue(sma, q);
	1163	wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
	1164	}
	1165	list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
	1166	unlink_queue(sma, q);
	1167	wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
	1168	}
	1169	ipc_update_pid(&sem->sempid, NULL);
	1170	}
	1171
	1172	/* Remove the semaphore set from the IDR */
	1173	sem_rmid(ns, sma);
	1174	sem_unlock(sma, -1);
	1175	rcu_read_unlock();
	1176
	1177	wake_up_q(&wake_q);
	1178	ns->used_sems -= sma->sem_nsems;
	1179	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
	1180	}
	1181
	1182	static unsigned long copy_semid_to_user(void __user buf, struct semid64_ds in, int version)
	1183	{
	1184	switch (version) {
	1185	case IPC_64:
	1186	return copy_to_user(buf, in, sizeof(*in));
	1187	case IPC_OLD:
	1188	{
	1189	struct semid_ds out;
	1190
	1191	memset(&out, 0, sizeof(out));
	1192
	1193	ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
	1194
	1195	out.sem_otime = in->sem_otime;
	1196	out.sem_ctime = in->sem_ctime;
	1197	out.sem_nsems = in->sem_nsems;
	1198
	1199	return copy_to_user(buf, &out, sizeof(out));
	1200	}
	1201	default:
	1202	return -EINVAL;
	1203	}
	1204	}
	1205
	1206	static time64_t get_semotime(struct sem_array *sma)
	1207	{
	1208	int i;
	1209	time64_t res;
	1210
	1211	res = sma->sems[0].sem_otime;
	1212	for (i = 1; i < sma->sem_nsems; i++) {
	1213	time64_t to = sma->sems[i].sem_otime;
	1214
	1215	if (to > res)
	1216	res = to;
	1217	}
	1218	return res;
	1219	}
	1220
	1221	static int semctl_stat(struct ipc_namespace *ns, int semid,
	1222	int cmd, struct semid64_ds *semid64)
	1223	{
	1224	struct sem_array *sma;
	1225	time64_t semotime;
	1226	int id = 0;
	1227	int err;
	1228
	1229	memset(semid64, 0, sizeof(*semid64));
	1230
	1231	rcu_read_lock();
	1232	if (cmd == SEM_STAT \|\| cmd == SEM_STAT_ANY) {
	1233	sma = sem_obtain_object(ns, semid);
	1234	if (IS_ERR(sma)) {
	1235	err = PTR_ERR(sma);
	1236	goto out_unlock;
	1237	}
	1238	id = sma->sem_perm.id;
	1239	} else { /* IPC_STAT */
	1240	sma = sem_obtain_object_check(ns, semid);
	1241	if (IS_ERR(sma)) {
	1242	err = PTR_ERR(sma);
	1243	goto out_unlock;
	1244	}
	1245	}
	1246
	1247	/* see comment for SHM_STAT_ANY */
	1248	if (cmd == SEM_STAT_ANY)
	1249	audit_ipc_obj(&sma->sem_perm);
	1250	else {
	1251	err = -EACCES;
	1252	if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
	1253	goto out_unlock;
	1254	}
	1255
	1256	err = security_sem_semctl(&sma->sem_perm, cmd);
	1257	if (err)
	1258	goto out_unlock;
	1259
	1260	ipc_lock_object(&sma->sem_perm);
	1261
	1262	if (!ipc_valid_object(&sma->sem_perm)) {
	1263	ipc_unlock_object(&sma->sem_perm);
	1264	err = -EIDRM;
	1265	goto out_unlock;
	1266	}
	1267
	1268	kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm);
	1269	semotime = get_semotime(sma);
	1270	semid64->sem_otime = semotime;
	1271	semid64->sem_ctime = sma->sem_ctime;
	1272	#ifndef CONFIG_64BIT
	1273	semid64->sem_otime_high = semotime >> 32;
	1274	semid64->sem_ctime_high = sma->sem_ctime >> 32;
	1275	#endif
	1276	semid64->sem_nsems = sma->sem_nsems;
	1277
	1278	ipc_unlock_object(&sma->sem_perm);
	1279	rcu_read_unlock();
	1280	return id;
	1281
	1282	out_unlock:
	1283	rcu_read_unlock();
	1284	return err;
	1285	}
	1286
	1287	static int semctl_info(struct ipc_namespace *ns, int semid,
	1288	int cmd, void __user *p)
	1289	{
	1290	struct seminfo seminfo;
	1291	int max_id;
	1292	int err;
	1293
	1294	err = security_sem_semctl(NULL, cmd);
	1295	if (err)
	1296	return err;
	1297
	1298	memset(&seminfo, 0, sizeof(seminfo));
	1299	seminfo.semmni = ns->sc_semmni;
	1300	seminfo.semmns = ns->sc_semmns;
	1301	seminfo.semmsl = ns->sc_semmsl;
	1302	seminfo.semopm = ns->sc_semopm;
	1303	seminfo.semvmx = SEMVMX;
	1304	seminfo.semmnu = SEMMNU;
	1305	seminfo.semmap = SEMMAP;
	1306	seminfo.semume = SEMUME;
	1307	down_read(&sem_ids(ns).rwsem);
	1308	if (cmd == SEM_INFO) {
	1309	seminfo.semusz = sem_ids(ns).in_use;
	1310	seminfo.semaem = ns->used_sems;
	1311	} else {
	1312	seminfo.semusz = SEMUSZ;
	1313	seminfo.semaem = SEMAEM;
	1314	}
	1315	max_id = ipc_get_maxid(&sem_ids(ns));
	1316	up_read(&sem_ids(ns).rwsem);
	1317	if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
	1318	return -EFAULT;
	1319	return (max_id < 0) ? 0 : max_id;
	1320	}
	1321
	1322	static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
	1323	int val)
	1324	{
	1325	struct sem_undo *un;
	1326	struct sem_array *sma;
	1327	struct sem *curr;
	1328	int err;
	1329	DEFINE_WAKE_Q(wake_q);
	1330
	1331	if (val > SEMVMX \|\| val < 0)
	1332	return -ERANGE;
	1333
	1334	rcu_read_lock();
	1335	sma = sem_obtain_object_check(ns, semid);
	1336	if (IS_ERR(sma)) {
	1337	rcu_read_unlock();
	1338	return PTR_ERR(sma);
	1339	}
	1340
	1341	if (semnum < 0 \|\| semnum >= sma->sem_nsems) {
	1342	rcu_read_unlock();
	1343	return -EINVAL;
	1344	}
	1345
	1346
	1347	if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
	1348	rcu_read_unlock();
	1349	return -EACCES;
	1350	}
	1351
	1352	err = security_sem_semctl(&sma->sem_perm, SETVAL);
	1353	if (err) {
	1354	rcu_read_unlock();
	1355	return -EACCES;
	1356	}
	1357
	1358	sem_lock(sma, NULL, -1);
	1359
	1360	if (!ipc_valid_object(&sma->sem_perm)) {
	1361	sem_unlock(sma, -1);
	1362	rcu_read_unlock();
	1363	return -EIDRM;
	1364	}
	1365
	1366	semnum = array_index_nospec(semnum, sma->sem_nsems);
	1367	curr = &sma->sems[semnum];
	1368
	1369	ipc_assert_locked_object(&sma->sem_perm);
	1370	list_for_each_entry(un, &sma->list_id, list_id)
	1371	un->semadj[semnum] = 0;
	1372
	1373	curr->semval = val;
	1374	ipc_update_pid(&curr->sempid, task_tgid(current));
	1375	sma->sem_ctime = ktime_get_real_seconds();
	1376	/* maybe some queued-up processes were waiting for this */
	1377	do_smart_update(sma, NULL, 0, 0, &wake_q);
	1378	sem_unlock(sma, -1);
	1379	rcu_read_unlock();
	1380	wake_up_q(&wake_q);
	1381	return 0;
	1382	}
	1383
	1384	static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
	1385	int cmd, void __user *p)
	1386	{
	1387	struct sem_array *sma;
	1388	struct sem *curr;
	1389	int err, nsems;
	1390	ushort fast_sem_io[SEMMSL_FAST];
	1391	ushort *sem_io = fast_sem_io;
	1392	DEFINE_WAKE_Q(wake_q);
	1393
	1394	rcu_read_lock();
	1395	sma = sem_obtain_object_check(ns, semid);
	1396	if (IS_ERR(sma)) {
	1397	rcu_read_unlock();
	1398	return PTR_ERR(sma);
	1399	}
	1400
	1401	nsems = sma->sem_nsems;
	1402
	1403	err = -EACCES;
	1404	if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))
	1405	goto out_rcu_wakeup;
	1406
	1407	err = security_sem_semctl(&sma->sem_perm, cmd);
	1408	if (err)
	1409	goto out_rcu_wakeup;
	1410
	1411	err = -EACCES;
	1412	switch (cmd) {
	1413	case GETALL:
	1414	{
	1415	ushort __user *array = p;
	1416	int i;
	1417
	1418	sem_lock(sma, NULL, -1);
	1419	if (!ipc_valid_object(&sma->sem_perm)) {
	1420	err = -EIDRM;
	1421	goto out_unlock;
	1422	}
	1423	if (nsems > SEMMSL_FAST) {
	1424	if (!ipc_rcu_getref(&sma->sem_perm)) {
	1425	err = -EIDRM;
	1426	goto out_unlock;
	1427	}
	1428	sem_unlock(sma, -1);
	1429	rcu_read_unlock();
	1430	sem_io = kvmalloc_array(nsems, sizeof(ushort),
	1431	GFP_KERNEL);
	1432	if (sem_io == NULL) {
	1433	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
	1434	return -ENOMEM;
	1435	}
	1436
	1437	rcu_read_lock();
	1438	sem_lock_and_putref(sma);
	1439	if (!ipc_valid_object(&sma->sem_perm)) {
	1440	err = -EIDRM;
	1441	goto out_unlock;
	1442	}
	1443	}
	1444	for (i = 0; i < sma->sem_nsems; i++)
	1445	sem_io[i] = sma->sems[i].semval;
	1446	sem_unlock(sma, -1);
	1447	rcu_read_unlock();
	1448	err = 0;
	1449	if (copy_to_user(array, sem_io, nsems*sizeof(ushort)))
	1450	err = -EFAULT;
	1451	goto out_free;
	1452	}
	1453	case SETALL:
	1454	{
	1455	int i;
	1456	struct sem_undo *un;
	1457
	1458	if (!ipc_rcu_getref(&sma->sem_perm)) {
	1459	err = -EIDRM;
	1460	goto out_rcu_wakeup;
	1461	}
	1462	rcu_read_unlock();
	1463
	1464	if (nsems > SEMMSL_FAST) {
	1465	sem_io = kvmalloc_array(nsems, sizeof(ushort),
	1466	GFP_KERNEL);
	1467	if (sem_io == NULL) {
	1468	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
	1469	return -ENOMEM;
	1470	}
	1471	}
	1472
	1473	if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
	1474	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
	1475	err = -EFAULT;
	1476	goto out_free;
	1477	}
	1478
	1479	for (i = 0; i < nsems; i++) {
	1480	if (sem_io[i] > SEMVMX) {
	1481	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
	1482	err = -ERANGE;
	1483	goto out_free;
	1484	}
	1485	}
	1486	rcu_read_lock();
	1487	sem_lock_and_putref(sma);
	1488	if (!ipc_valid_object(&sma->sem_perm)) {
	1489	err = -EIDRM;
	1490	goto out_unlock;
	1491	}
	1492
	1493	for (i = 0; i < nsems; i++) {
	1494	sma->sems[i].semval = sem_io[i];
	1495	ipc_update_pid(&sma->sems[i].sempid, task_tgid(current));
	1496	}
	1497
	1498	ipc_assert_locked_object(&sma->sem_perm);
	1499	list_for_each_entry(un, &sma->list_id, list_id) {
	1500	for (i = 0; i < nsems; i++)
	1501	un->semadj[i] = 0;
	1502	}
	1503	sma->sem_ctime = ktime_get_real_seconds();
	1504	/* maybe some queued-up processes were waiting for this */
	1505	do_smart_update(sma, NULL, 0, 0, &wake_q);
	1506	err = 0;
	1507	goto out_unlock;
	1508	}
	1509	/* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
	1510	}
	1511	err = -EINVAL;
	1512	if (semnum < 0 \|\| semnum >= nsems)
	1513	goto out_rcu_wakeup;
	1514
	1515	sem_lock(sma, NULL, -1);
	1516	if (!ipc_valid_object(&sma->sem_perm)) {
	1517	err = -EIDRM;
	1518	goto out_unlock;
	1519	}
	1520
	1521	semnum = array_index_nospec(semnum, nsems);
	1522	curr = &sma->sems[semnum];
	1523
	1524	switch (cmd) {
	1525	case GETVAL:
	1526	err = curr->semval;
	1527	goto out_unlock;
	1528	case GETPID:
	1529	err = pid_vnr(curr->sempid);
	1530	goto out_unlock;
	1531	case GETNCNT:
	1532	err = count_semcnt(sma, semnum, 0);
	1533	goto out_unlock;
	1534	case GETZCNT:
	1535	err = count_semcnt(sma, semnum, 1);
	1536	goto out_unlock;
	1537	}
	1538
	1539	out_unlock:
	1540	sem_unlock(sma, -1);
	1541	out_rcu_wakeup:
	1542	rcu_read_unlock();
	1543	wake_up_q(&wake_q);
	1544	out_free:
	1545	if (sem_io != fast_sem_io)
	1546	kvfree(sem_io);
	1547	return err;
	1548	}
	1549
	1550	static inline unsigned long
	1551	copy_semid_from_user(struct semid64_ds out, void __user buf, int version)
	1552	{
	1553	switch (version) {
	1554	case IPC_64:
	1555	if (copy_from_user(out, buf, sizeof(*out)))
	1556	return -EFAULT;
	1557	return 0;
	1558	case IPC_OLD:
	1559	{
	1560	struct semid_ds tbuf_old;
	1561
	1562	if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
	1563	return -EFAULT;
	1564
	1565	out->sem_perm.uid = tbuf_old.sem_perm.uid;
	1566	out->sem_perm.gid = tbuf_old.sem_perm.gid;
	1567	out->sem_perm.mode = tbuf_old.sem_perm.mode;
	1568
	1569	return 0;
	1570	}
	1571	default:
	1572	return -EINVAL;
	1573	}
	1574	}
	1575
	1576	/*
	1577	* This function handles some semctl commands which require the rwsem
	1578	* to be held in write mode.
	1579	* NOTE: no locks must be held, the rwsem is taken inside this function.
	1580	*/
	1581	static int semctl_down(struct ipc_namespace *ns, int semid,
	1582	int cmd, struct semid64_ds *semid64)
	1583	{
	1584	struct sem_array *sma;
	1585	int err;
	1586	struct kern_ipc_perm *ipcp;
	1587
	1588	down_write(&sem_ids(ns).rwsem);
	1589	rcu_read_lock();
	1590
	1591	ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
	1592	&semid64->sem_perm, 0);
	1593	if (IS_ERR(ipcp)) {
	1594	err = PTR_ERR(ipcp);
	1595	goto out_unlock1;
	1596	}
	1597
	1598	sma = container_of(ipcp, struct sem_array, sem_perm);
	1599
	1600	err = security_sem_semctl(&sma->sem_perm, cmd);
	1601	if (err)
	1602	goto out_unlock1;
	1603
	1604	switch (cmd) {
	1605	case IPC_RMID:
	1606	sem_lock(sma, NULL, -1);
	1607	/* freeary unlocks the ipc object and rcu */
	1608	freeary(ns, ipcp);
	1609	goto out_up;
	1610	case IPC_SET:
	1611	sem_lock(sma, NULL, -1);
	1612	err = ipc_update_perm(&semid64->sem_perm, ipcp);
	1613	if (err)
	1614	goto out_unlock0;
	1615	sma->sem_ctime = ktime_get_real_seconds();
	1616	break;
	1617	default:
	1618	err = -EINVAL;
	1619	goto out_unlock1;
	1620	}
	1621
	1622	out_unlock0:
	1623	sem_unlock(sma, -1);
	1624	out_unlock1:
	1625	rcu_read_unlock();
	1626	out_up:
	1627	up_write(&sem_ids(ns).rwsem);
	1628	return err;
	1629	}
	1630
	1631	long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)
	1632	{
	1633	int version;
	1634	struct ipc_namespace *ns;
	1635	void __user p = (void __user )arg;
	1636	struct semid64_ds semid64;
	1637	int err;
	1638
	1639	if (semid < 0)
	1640	return -EINVAL;
	1641
	1642	version = ipc_parse_version(&cmd);
	1643	ns = current->nsproxy->ipc_ns;
	1644
	1645	switch (cmd) {
	1646	case IPC_INFO:
	1647	case SEM_INFO:
	1648	return semctl_info(ns, semid, cmd, p);
	1649	case IPC_STAT:
	1650	case SEM_STAT:
	1651	case SEM_STAT_ANY:
	1652	err = semctl_stat(ns, semid, cmd, &semid64);
	1653	if (err < 0)
	1654	return err;
	1655	if (copy_semid_to_user(p, &semid64, version))
	1656	err = -EFAULT;
	1657	return err;
	1658	case GETALL:
	1659	case GETVAL:
	1660	case GETPID:
	1661	case GETNCNT:
	1662	case GETZCNT:
	1663	case SETALL:
	1664	return semctl_main(ns, semid, semnum, cmd, p);
	1665	case SETVAL: {
	1666	int val;
	1667	#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
	1668	/* big-endian 64bit */
	1669	val = arg >> 32;
	1670	#else
	1671	/* 32bit or little-endian 64bit */
	1672	val = arg;
	1673	#endif
	1674	return semctl_setval(ns, semid, semnum, val);
	1675	}
	1676	case IPC_SET:
	1677	if (copy_semid_from_user(&semid64, p, version))
	1678	return -EFAULT;
	1679	case IPC_RMID:
	1680	return semctl_down(ns, semid, cmd, &semid64);
	1681	default:
	1682	return -EINVAL;
	1683	}
	1684	}
	1685
	1686	SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
	1687	{
	1688	return ksys_semctl(semid, semnum, cmd, arg);
	1689	}
	1690
	1691	#ifdef CONFIG_COMPAT
	1692
	1693	struct compat_semid_ds {
	1694	struct compat_ipc_perm sem_perm;
	1695	compat_time_t sem_otime;
	1696	compat_time_t sem_ctime;
	1697	compat_uptr_t sem_base;
	1698	compat_uptr_t sem_pending;
	1699	compat_uptr_t sem_pending_last;
	1700	compat_uptr_t undo;
	1701	unsigned short sem_nsems;
	1702	};
	1703
	1704	static int copy_compat_semid_from_user(struct semid64_ds out, void __user buf,
	1705	int version)
	1706	{
	1707	memset(out, 0, sizeof(*out));
	1708	if (version == IPC_64) {
	1709	struct compat_semid64_ds __user *p = buf;
	1710	return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm);
	1711	} else {
	1712	struct compat_semid_ds __user *p = buf;
	1713	return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm);
	1714	}
	1715	}
	1716
	1717	static int copy_compat_semid_to_user(void __user buf, struct semid64_ds in,
	1718	int version)
	1719	{
	1720	if (version == IPC_64) {
	1721	struct compat_semid64_ds v;
	1722	memset(&v, 0, sizeof(v));
	1723	to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm);
	1724	v.sem_otime = lower_32_bits(in->sem_otime);
	1725	v.sem_otime_high = upper_32_bits(in->sem_otime);
	1726	v.sem_ctime = lower_32_bits(in->sem_ctime);
	1727	v.sem_ctime_high = upper_32_bits(in->sem_ctime);
	1728	v.sem_nsems = in->sem_nsems;
	1729	return copy_to_user(buf, &v, sizeof(v));
	1730	} else {
	1731	struct compat_semid_ds v;
	1732	memset(&v, 0, sizeof(v));
	1733	to_compat_ipc_perm(&v.sem_perm, &in->sem_perm);
	1734	v.sem_otime = in->sem_otime;
	1735	v.sem_ctime = in->sem_ctime;
	1736	v.sem_nsems = in->sem_nsems;
	1737	return copy_to_user(buf, &v, sizeof(v));
	1738	}
	1739	}
	1740
	1741	long compat_ksys_semctl(int semid, int semnum, int cmd, int arg)
	1742	{
	1743	void __user *p = compat_ptr(arg);
	1744	struct ipc_namespace *ns;
	1745	struct semid64_ds semid64;
	1746	int version = compat_ipc_parse_version(&cmd);
	1747	int err;
	1748
	1749	ns = current->nsproxy->ipc_ns;
	1750
	1751	if (semid < 0)
	1752	return -EINVAL;
	1753
	1754	switch (cmd & (~IPC_64)) {
	1755	case IPC_INFO:
	1756	case SEM_INFO:
	1757	return semctl_info(ns, semid, cmd, p);
	1758	case IPC_STAT:
	1759	case SEM_STAT:
	1760	case SEM_STAT_ANY:
	1761	err = semctl_stat(ns, semid, cmd, &semid64);
	1762	if (err < 0)
	1763	return err;
	1764	if (copy_compat_semid_to_user(p, &semid64, version))
	1765	err = -EFAULT;
	1766	return err;
	1767	case GETVAL:
	1768	case GETPID:
	1769	case GETNCNT:
	1770	case GETZCNT:
	1771	case GETALL:
	1772	case SETALL:
	1773	return semctl_main(ns, semid, semnum, cmd, p);
	1774	case SETVAL:
	1775	return semctl_setval(ns, semid, semnum, arg);
	1776	case IPC_SET:
	1777	if (copy_compat_semid_from_user(&semid64, p, version))
	1778	return -EFAULT;
	1779	/* fallthru */
	1780	case IPC_RMID:
	1781	return semctl_down(ns, semid, cmd, &semid64);
	1782	default:
	1783	return -EINVAL;
	1784	}
	1785	}
	1786
	1787	COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)
	1788	{
	1789	return compat_ksys_semctl(semid, semnum, cmd, arg);
	1790	}
	1791	#endif
	1792
	1793	/* If the task doesn't already have a undo_list, then allocate one
	1794	* here. We guarantee there is only one thread using this undo list,
	1795	* and current is THE ONE
	1796	*
	1797	* If this allocation and assignment succeeds, but later
	1798	* portions of this code fail, there is no need to free the sem_undo_list.
	1799	* Just let it stay associated with the task, and it'll be freed later
	1800	* at exit time.
	1801	*
	1802	* This can block, so callers must hold no locks.
	1803	*/
	1804	static inline int get_undo_list(struct sem_undo_list **undo_listp)
	1805	{
	1806	struct sem_undo_list *undo_list;
	1807
	1808	undo_list = current->sysvsem.undo_list;
	1809	if (!undo_list) {
	1810	undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
	1811	if (undo_list == NULL)
	1812	return -ENOMEM;
	1813	spin_lock_init(&undo_list->lock);
	1814	refcount_set(&undo_list->refcnt, 1);
	1815	INIT_LIST_HEAD(&undo_list->list_proc);
	1816
	1817	current->sysvsem.undo_list = undo_list;
	1818	}
	1819	*undo_listp = undo_list;
	1820	return 0;
	1821	}
	1822
	1823	static struct sem_undo __lookup_undo(struct sem_undo_list ulp, int semid)
	1824	{
	1825	struct sem_undo *un;
	1826
	1827	list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
	1828	if (un->semid == semid)
	1829	return un;
	1830	}
	1831	return NULL;
	1832	}
	1833
	1834	static struct sem_undo lookup_undo(struct sem_undo_list ulp, int semid)
	1835	{
	1836	struct sem_undo *un;
	1837
	1838	assert_spin_locked(&ulp->lock);
	1839
	1840	un = __lookup_undo(ulp, semid);
	1841	if (un) {
	1842	list_del_rcu(&un->list_proc);
	1843	list_add_rcu(&un->list_proc, &ulp->list_proc);
	1844	}
	1845	return un;
	1846	}
	1847
	1848	/**
	1849	* find_alloc_undo - lookup (and if not present create) undo array
	1850	* @ns: namespace
	1851	* @semid: semaphore array id
	1852	*
	1853	* The function looks up (and if not present creates) the undo structure.
	1854	* The size of the undo structure depends on the size of the semaphore
	1855	* array, thus the alloc path is not that straightforward.
	1856	* Lifetime-rules: sem_undo is rcu-protected, on success, the function
	1857	* performs a rcu_read_lock().
	1858	*/
	1859	static struct sem_undo find_alloc_undo(struct ipc_namespace ns, int semid)
	1860	{
	1861	struct sem_array *sma;
	1862	struct sem_undo_list *ulp;
	1863	struct sem_undo un, new;
	1864	int nsems, error;
	1865
	1866	error = get_undo_list(&ulp);
	1867	if (error)
	1868	return ERR_PTR(error);
	1869
	1870	rcu_read_lock();
	1871	spin_lock(&ulp->lock);
	1872	un = lookup_undo(ulp, semid);
	1873	spin_unlock(&ulp->lock);
	1874	if (likely(un != NULL))
	1875	goto out;
	1876
	1877	/* no undo structure around - allocate one. */
	1878	/* step 1: figure out the size of the semaphore array */
	1879	sma = sem_obtain_object_check(ns, semid);
	1880	if (IS_ERR(sma)) {
	1881	rcu_read_unlock();
	1882	return ERR_CAST(sma);
	1883	}
	1884
	1885	nsems = sma->sem_nsems;
	1886	if (!ipc_rcu_getref(&sma->sem_perm)) {
	1887	rcu_read_unlock();
	1888	un = ERR_PTR(-EIDRM);
	1889	goto out;
	1890	}
	1891	rcu_read_unlock();
	1892
	1893	/* step 2: allocate new undo structure */
	1894	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
	1895	if (!new) {
	1896	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
	1897	return ERR_PTR(-ENOMEM);
	1898	}
	1899
	1900	/* step 3: Acquire the lock on semaphore array */
	1901	rcu_read_lock();
	1902	sem_lock_and_putref(sma);
	1903	if (!ipc_valid_object(&sma->sem_perm)) {
	1904	sem_unlock(sma, -1);
	1905	rcu_read_unlock();
	1906	kfree(new);
	1907	un = ERR_PTR(-EIDRM);
	1908	goto out;
	1909	}
	1910	spin_lock(&ulp->lock);
	1911
	1912	/*
	1913	* step 4: check for races: did someone else allocate the undo struct?
	1914	*/
	1915	un = lookup_undo(ulp, semid);
	1916	if (un) {
	1917	kfree(new);
	1918	goto success;
	1919	}
	1920	/* step 5: initialize & link new undo structure */
	1921	new->semadj = (short *) &new[1];
	1922	new->ulp = ulp;
	1923	new->semid = semid;
	1924	assert_spin_locked(&ulp->lock);
	1925	list_add_rcu(&new->list_proc, &ulp->list_proc);
	1926	ipc_assert_locked_object(&sma->sem_perm);
	1927	list_add(&new->list_id, &sma->list_id);
	1928	un = new;
	1929
	1930	success:
	1931	spin_unlock(&ulp->lock);
	1932	sem_unlock(sma, -1);
	1933	out:
	1934	return un;
	1935	}
	1936
	1937	static long do_semtimedop(int semid, struct sembuf __user *tsops,
	1938	unsigned nsops, const struct timespec64 *timeout)
	1939	{
	1940	int error = -EINVAL;
	1941	struct sem_array *sma;
	1942	struct sembuf fast_sops[SEMOPM_FAST];
	1943	struct sembuf sops = fast_sops, sop;
	1944	struct sem_undo *un;
	1945	int max, locknum;
	1946	bool undos = false, alter = false, dupsop = false;
	1947	struct sem_queue queue;
	1948	unsigned long dup = 0, jiffies_left = 0;
	1949	struct ipc_namespace *ns;
	1950
	1951	ns = current->nsproxy->ipc_ns;
	1952
	1953	if (nsops < 1 \|\| semid < 0)
	1954	return -EINVAL;
	1955	if (nsops > ns->sc_semopm)
	1956	return -E2BIG;
	1957	if (nsops > SEMOPM_FAST) {
	1958	sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
	1959	if (sops == NULL)
	1960	return -ENOMEM;
	1961	}
	1962
	1963	if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
	1964	error = -EFAULT;
	1965	goto out_free;
	1966	}
	1967
	1968	if (timeout) {
	1969	if (timeout->tv_sec < 0 \|\| timeout->tv_nsec < 0 \|\|
	1970	timeout->tv_nsec >= 1000000000L) {
	1971	error = -EINVAL;
	1972	goto out_free;
	1973	}
	1974	jiffies_left = timespec64_to_jiffies(timeout);
	1975	}
	1976
	1977	max = 0;
	1978	for (sop = sops; sop < sops + nsops; sop++) {
	1979	unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);
	1980
	1981	if (sop->sem_num >= max)
	1982	max = sop->sem_num;
	1983	if (sop->sem_flg & SEM_UNDO)
	1984	undos = true;
	1985	if (dup & mask) {
	1986	/*
	1987	* There was a previous alter access that appears
	1988	* to have accessed the same semaphore, thus use
	1989	* the dupsop logic. "appears", because the detection
	1990	* can only check % BITS_PER_LONG.
	1991	*/
	1992	dupsop = true;
	1993	}
	1994	if (sop->sem_op != 0) {
	1995	alter = true;
	1996	dup \|= mask;
	1997	}
	1998	}
	1999
	2000	if (undos) {
	2001	/* On success, find_alloc_undo takes the rcu_read_lock */
	2002	un = find_alloc_undo(ns, semid);
	2003	if (IS_ERR(un)) {
	2004	error = PTR_ERR(un);
	2005	goto out_free;
	2006	}
	2007	} else {
	2008	un = NULL;
	2009	rcu_read_lock();
	2010	}
	2011
	2012	sma = sem_obtain_object_check(ns, semid);
	2013	if (IS_ERR(sma)) {
	2014	rcu_read_unlock();
	2015	error = PTR_ERR(sma);
	2016	goto out_free;
	2017	}
	2018
	2019	error = -EFBIG;
	2020	if (max >= sma->sem_nsems) {
	2021	rcu_read_unlock();
	2022	goto out_free;
	2023	}
	2024
	2025	error = -EACCES;
	2026	if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
	2027	rcu_read_unlock();
	2028	goto out_free;
	2029	}
	2030
	2031	error = security_sem_semop(&sma->sem_perm, sops, nsops, alter);
	2032	if (error) {
	2033	rcu_read_unlock();
	2034	goto out_free;
	2035	}
	2036
	2037	error = -EIDRM;
	2038	locknum = sem_lock(sma, sops, nsops);
	2039	/*
	2040	* We eventually might perform the following check in a lockless
	2041	* fashion, considering ipc_valid_object() locking constraints.
	2042	* If nsops == 1 and there is no contention for sem_perm.lock, then
	2043	* only a per-semaphore lock is held and it's OK to proceed with the
	2044	* check below. More details on the fine grained locking scheme
	2045	* entangled here and why it's RMID race safe on comments at sem_lock()
	2046	*/
	2047	if (!ipc_valid_object(&sma->sem_perm))
	2048	goto out_unlock_free;
	2049	/*
	2050	* semid identifiers are not unique - find_alloc_undo may have
	2051	* allocated an undo structure, it was invalidated by an RMID
	2052	* and now a new array with received the same id. Check and fail.
	2053	* This case can be detected checking un->semid. The existence of
	2054	* "un" itself is guaranteed by rcu.
	2055	*/
	2056	if (un && un->semid == -1)
	2057	goto out_unlock_free;
	2058
	2059	queue.sops = sops;
	2060	queue.nsops = nsops;
	2061	queue.undo = un;
	2062	queue.pid = task_tgid(current);
	2063	queue.alter = alter;
	2064	queue.dupsop = dupsop;
	2065
	2066	error = perform_atomic_semop(sma, &queue);
	2067	if (error == 0) { /* non-blocking succesfull path */
	2068	DEFINE_WAKE_Q(wake_q);
	2069
	2070	/*
	2071	* If the operation was successful, then do
	2072	* the required updates.
	2073	*/
	2074	if (alter)
	2075	do_smart_update(sma, sops, nsops, 1, &wake_q);
	2076	else
	2077	set_semotime(sma, sops);
	2078
	2079	sem_unlock(sma, locknum);
	2080	rcu_read_unlock();
	2081	wake_up_q(&wake_q);
	2082
	2083	goto out_free;
	2084	}
	2085	if (error < 0) /* non-blocking error path */
	2086	goto out_unlock_free;
	2087
	2088	/*
	2089	* We need to sleep on this operation, so we put the current
	2090	* task into the pending queue and go to sleep.
	2091	*/
	2092	if (nsops == 1) {
	2093	struct sem *curr;
	2094	int idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
	2095	curr = &sma->sems[idx];
	2096
	2097	if (alter) {
	2098	if (sma->complex_count) {
	2099	list_add_tail(&queue.list,
	2100	&sma->pending_alter);
	2101	} else {
	2102
	2103	list_add_tail(&queue.list,
	2104	&curr->pending_alter);
	2105	}
	2106	} else {
	2107	list_add_tail(&queue.list, &curr->pending_const);
	2108	}
	2109	} else {
	2110	if (!sma->complex_count)
	2111	merge_queues(sma);
	2112
	2113	if (alter)
	2114	list_add_tail(&queue.list, &sma->pending_alter);
	2115	else
	2116	list_add_tail(&queue.list, &sma->pending_const);
	2117
	2118	sma->complex_count++;
	2119	}
	2120
	2121	do {
	2122	WRITE_ONCE(queue.status, -EINTR);
	2123	queue.sleeper = current;
	2124
	2125	__set_current_state(TASK_INTERRUPTIBLE);
	2126	sem_unlock(sma, locknum);
	2127	rcu_read_unlock();
	2128
	2129	if (timeout)
	2130	jiffies_left = schedule_timeout(jiffies_left);
	2131	else
	2132	schedule();
	2133
	2134	/*
	2135	* fastpath: the semop has completed, either successfully or
	2136	* not, from the syscall pov, is quite irrelevant to us at this
	2137	* point; we're done.
	2138	*
	2139	* We _do_ care, nonetheless, about being awoken by a signal or
	2140	* spuriously. The queue.status is checked again in the
	2141	* slowpath (aka after taking sem_lock), such that we can detect
	2142	* scenarios where we were awakened externally, during the
	2143	* window between wake_q_add() and wake_up_q().
	2144	*/
	2145	error = READ_ONCE(queue.status);
	2146	if (error != -EINTR) {
	2147	/*
	2148	* User space could assume that semop() is a memory
	2149	* barrier: Without the mb(), the cpu could
	2150	* speculatively read in userspace stale data that was
	2151	* overwritten by the previous owner of the semaphore.
	2152	*/
	2153	smp_mb();
	2154	goto out_free;
	2155	}
	2156
	2157	rcu_read_lock();
	2158	locknum = sem_lock(sma, sops, nsops);
	2159
	2160	if (!ipc_valid_object(&sma->sem_perm))
	2161	goto out_unlock_free;
	2162
	2163	error = READ_ONCE(queue.status);
	2164
	2165	/*
	2166	* If queue.status != -EINTR we are woken up by another process.
	2167	* Leave without unlink_queue(), but with sem_unlock().
	2168	*/
	2169	if (error != -EINTR)
	2170	goto out_unlock_free;
	2171
	2172	/*
	2173	* If an interrupt occurred we have to clean up the queue.
	2174	*/
	2175	if (timeout && jiffies_left == 0)
	2176	error = -EAGAIN;
	2177	} while (error == -EINTR && !signal_pending(current)); /* spurious */
	2178
	2179	unlink_queue(sma, &queue);
	2180
	2181	out_unlock_free:
	2182	sem_unlock(sma, locknum);
	2183	rcu_read_unlock();
	2184	out_free:
	2185	if (sops != fast_sops)
	2186	kvfree(sops);
	2187	return error;
	2188	}
	2189
	2190	long ksys_semtimedop(int semid, struct sembuf __user *tsops,
	2191	unsigned int nsops, const struct __kernel_timespec __user *timeout)
	2192	{
	2193	if (timeout) {
	2194	struct timespec64 ts;
	2195	if (get_timespec64(&ts, timeout))
	2196	return -EFAULT;
	2197	return do_semtimedop(semid, tsops, nsops, &ts);
	2198	}
	2199	return do_semtimedop(semid, tsops, nsops, NULL);
	2200	}
	2201
	2202	SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
	2203	unsigned int, nsops, const struct __kernel_timespec __user *, timeout)
	2204	{
	2205	return ksys_semtimedop(semid, tsops, nsops, timeout);
	2206	}
	2207
	2208	#ifdef CONFIG_COMPAT_32BIT_TIME
	2209	long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
	2210	unsigned int nsops,
	2211	const struct compat_timespec __user *timeout)
	2212	{
	2213	if (timeout) {
	2214	struct timespec64 ts;
	2215	if (compat_get_timespec64(&ts, timeout))
	2216	return -EFAULT;
	2217	return do_semtimedop(semid, tsems, nsops, &ts);
	2218	}
	2219	return do_semtimedop(semid, tsems, nsops, NULL);
	2220	}
	2221
	2222	COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems,
	2223	unsigned int, nsops,
	2224	const struct compat_timespec __user *, timeout)
	2225	{
	2226	return compat_ksys_semtimedop(semid, tsems, nsops, timeout);
	2227	}
	2228	#endif
	2229
	2230	SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,
	2231	unsigned, nsops)
	2232	{
	2233	return do_semtimedop(semid, tsops, nsops, NULL);
	2234	}
	2235
	2236	/* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
	2237	* parent and child tasks.
	2238	*/
	2239
	2240	int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
	2241	{
	2242	struct sem_undo_list *undo_list;
	2243	int error;
	2244
	2245	if (clone_flags & CLONE_SYSVSEM) {
	2246	error = get_undo_list(&undo_list);
	2247	if (error)
	2248	return error;
	2249	refcount_inc(&undo_list->refcnt);
	2250	tsk->sysvsem.undo_list = undo_list;
	2251	} else
	2252	tsk->sysvsem.undo_list = NULL;
	2253
	2254	return 0;
	2255	}
	2256
	2257	/*
	2258	* add semadj values to semaphores, free undo structures.
	2259	* undo structures are not freed when semaphore arrays are destroyed
	2260	* so some of them may be out of date.
	2261	* IMPLEMENTATION NOTE: There is some confusion over whether the
	2262	* set of adjustments that needs to be done should be done in an atomic
	2263	* manner or not. That is, if we are attempting to decrement the semval
	2264	* should we queue up and wait until we can do so legally?
	2265	* The original implementation attempted to do this (queue and wait).
	2266	* The current implementation does not do so. The POSIX standard
	2267	* and SVID should be consulted to determine what behavior is mandated.
	2268	*/
	2269	void exit_sem(struct task_struct *tsk)
	2270	{
	2271	struct sem_undo_list *ulp;
	2272
	2273	ulp = tsk->sysvsem.undo_list;
	2274	if (!ulp)
	2275	return;
	2276	tsk->sysvsem.undo_list = NULL;
	2277
	2278	if (!refcount_dec_and_test(&ulp->refcnt))
	2279	return;
	2280
	2281	for (;;) {
	2282	struct sem_array *sma;
	2283	struct sem_undo *un;
	2284	int semid, i;
	2285	DEFINE_WAKE_Q(wake_q);
	2286
	2287	cond_resched();
	2288
	2289	rcu_read_lock();
	2290	un = list_entry_rcu(ulp->list_proc.next,
	2291	struct sem_undo, list_proc);
	2292	if (&un->list_proc == &ulp->list_proc) {
	2293	/*
	2294	* We must wait for freeary() before freeing this ulp,
	2295	* in case we raced with last sem_undo. There is a small
	2296	* possibility where we exit while freeary() didn't
	2297	* finish unlocking sem_undo_list.
	2298	*/
	2299	spin_lock(&ulp->lock);
	2300	spin_unlock(&ulp->lock);
	2301	rcu_read_unlock();
	2302	break;
	2303	}
	2304	spin_lock(&ulp->lock);
	2305	semid = un->semid;
	2306	spin_unlock(&ulp->lock);
	2307
	2308	/* exit_sem raced with IPC_RMID, nothing to do */
	2309	if (semid == -1) {
	2310	rcu_read_unlock();
	2311	continue;
	2312	}
	2313
	2314	sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid);
	2315	/* exit_sem raced with IPC_RMID, nothing to do */
	2316	if (IS_ERR(sma)) {
	2317	rcu_read_unlock();
	2318	continue;
	2319	}
	2320
	2321	sem_lock(sma, NULL, -1);
	2322	/* exit_sem raced with IPC_RMID, nothing to do */
	2323	if (!ipc_valid_object(&sma->sem_perm)) {
	2324	sem_unlock(sma, -1);
	2325	rcu_read_unlock();
	2326	continue;
	2327	}
	2328	un = __lookup_undo(ulp, semid);
	2329	if (un == NULL) {
	2330	/* exit_sem raced with IPC_RMID+semget() that created
	2331	* exactly the same semid. Nothing to do.
	2332	*/
	2333	sem_unlock(sma, -1);
	2334	rcu_read_unlock();
	2335	continue;
	2336	}
	2337
	2338	/* remove un from the linked lists */
	2339	ipc_assert_locked_object(&sma->sem_perm);
	2340	list_del(&un->list_id);
	2341
	2342	/* we are the last process using this ulp, acquiring ulp->lock
	2343	* isn't required. Besides that, we are also protected against
	2344	* IPC_RMID as we hold sma->sem_perm lock now
	2345	*/
	2346	list_del_rcu(&un->list_proc);
	2347
	2348	/* perform adjustments registered in un */
	2349	for (i = 0; i < sma->sem_nsems; i++) {
	2350	struct sem *semaphore = &sma->sems[i];
	2351	if (un->semadj[i]) {
	2352	semaphore->semval += un->semadj[i];
	2353	/*
	2354	* Range checks of the new semaphore value,
	2355	* not defined by sus:
	2356	* - Some unices ignore the undo entirely
	2357	* (e.g. HP UX 11i 11.22, Tru64 V5.1)
	2358	* - some cap the value (e.g. FreeBSD caps
	2359	* at 0, but doesn't enforce SEMVMX)
	2360	*
	2361	* Linux caps the semaphore value, both at 0
	2362	* and at SEMVMX.
	2363	*
	2364	* Manfred <[email protected]>
	2365	*/
	2366	if (semaphore->semval < 0)
	2367	semaphore->semval = 0;
	2368	if (semaphore->semval > SEMVMX)
	2369	semaphore->semval = SEMVMX;
	2370	ipc_update_pid(&semaphore->sempid, task_tgid(current));
	2371	}
	2372	}
	2373	/* maybe some queued-up processes were waiting for this */
	2374	do_smart_update(sma, NULL, 0, 1, &wake_q);
	2375	sem_unlock(sma, -1);
	2376	rcu_read_unlock();
	2377	wake_up_q(&wake_q);
	2378
	2379	kfree_rcu(un, rcu);
	2380	}
	2381	kfree(ulp);
	2382	}
	2383
	2384	#ifdef CONFIG_PROC_FS
	2385	static int sysvipc_sem_proc_show(struct seq_file s, void it)
	2386	{
	2387	struct user_namespace *user_ns = seq_user_ns(s);
	2388	struct kern_ipc_perm *ipcp = it;
	2389	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
	2390	time64_t sem_otime;
	2391
	2392	/*
	2393	* The proc interface isn't aware of sem_lock(), it calls
	2394	* ipc_lock_object() directly (in sysvipc_find_ipc).
	2395	* In order to stay compatible with sem_lock(), we must
	2396	* enter / leave complex_mode.
	2397	*/
	2398	complexmode_enter(sma);
	2399
	2400	sem_otime = get_semotime(sma);
	2401
	2402	seq_printf(s,
	2403	"%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n",
	2404	sma->sem_perm.key,
	2405	sma->sem_perm.id,
	2406	sma->sem_perm.mode,
	2407	sma->sem_nsems,
	2408	from_kuid_munged(user_ns, sma->sem_perm.uid),
	2409	from_kgid_munged(user_ns, sma->sem_perm.gid),
	2410	from_kuid_munged(user_ns, sma->sem_perm.cuid),
	2411	from_kgid_munged(user_ns, sma->sem_perm.cgid),
	2412	sem_otime,
	2413	sma->sem_ctime);
	2414
	2415	complexmode_tryleave(sma);
	2416
	2417	return 0;
	2418	}
	2419	#endif