[linux.git] / lib / rwsem.c

/* rwsem.c: R/W semaphores: contention handling functions
 *
 * Written by David Howells ([email protected]).
 * Derived from arch/i386/kernel/semaphore.c
 */
#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/module.h>

struct rwsem_waiter {
	struct list_head list;
	struct task_struct *task;
	unsigned int flags;
#define RWSEM_WAITING_FOR_READ	0x00000001
#define RWSEM_WAITING_FOR_WRITE	0x00000002
};

#if RWSEM_DEBUG
#undef rwsemtrace
void rwsemtrace(struct rw_semaphore *sem, const char *str)
{
	printk("sem=%p\n", sem);
	printk("(sem)=%08lx\n", sem->count);
	if (sem->debug)
		printk("[%d] %s({%08lx})\n", current->pid, str, sem->count);
}
#endif

/*
 * handle the lock release when processes blocked on it that can now run
 * - if we come here from up_xxxx(), then:
 *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
 *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
 *   - there must be someone on the queue
 * - the spinlock must be held by the caller
 * - woken process blocks are discarded from the list after having task zeroed
 * - writers are only woken if downgrading is false
 */
static inline struct rw_semaphore *
__rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
{
	struct rwsem_waiter *waiter;
	struct task_struct *tsk;
	struct list_head *next;
	signed long oldcount, woken, loop;

	rwsemtrace(sem, "Entering __rwsem_do_wake");

	if (downgrading)
		goto dont_wake_writers;

	/* if we came through an up_xxxx() call, we only only wake someone up
	 * if we can transition the active part of the count from 0 -> 1
	 */
 try_again:
	oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
						- RWSEM_ACTIVE_BIAS;
	if (oldcount & RWSEM_ACTIVE_MASK)
		goto undo;

	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);

	/* try to grant a single write lock if there's a writer at the front
	 * of the queue - note we leave the 'active part' of the count
	 * incremented by 1 and the waiting part incremented by 0x00010000
	 */
	if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
		goto readers_only;

	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
	 * It is an allocated on the waiter's stack and may become invalid at
	 * any time after that point (due to a wakeup from another source).
	 */
	list_del(&waiter->list);
	tsk = waiter->task;
	smp_mb();
	waiter->task = NULL;
	wake_up_process(tsk);
	put_task_struct(tsk);
	goto out;

	/* don't want to wake any writers */
 dont_wake_writers:
	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
	if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
		goto out;

	/* grant an infinite number of read locks to the readers at the front
	 * of the queue
	 * - note we increment the 'active part' of the count by the number of
	 *   readers before waking any processes up
	 */
 readers_only:
	woken = 0;
	do {
		woken++;

		if (waiter->list.next == &sem->wait_list)
			break;

		waiter = list_entry(waiter->list.next,
					struct rwsem_waiter, list);

	} while (waiter->flags & RWSEM_WAITING_FOR_READ);

	loop = woken;
	woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
	if (!downgrading)
		/* we'd already done one increment earlier */
		woken -= RWSEM_ACTIVE_BIAS;

	rwsem_atomic_add(woken, sem);

	next = sem->wait_list.next;
	for (; loop > 0; loop--) {
		waiter = list_entry(next, struct rwsem_waiter, list);
		next = waiter->list.next;
		tsk = waiter->task;
		smp_mb();
		waiter->task = NULL;
		wake_up_process(tsk);
		put_task_struct(tsk);
	}

	sem->wait_list.next = next;
	next->prev = &sem->wait_list;

 out:
	rwsemtrace(sem, "Leaving __rwsem_do_wake");
	return sem;

	/* undo the change to count, but check for a transition 1->0 */
 undo:
	if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0)
		goto out;
	goto try_again;
}

/*
 * wait for a lock to be granted
 */
static inline struct rw_semaphore *
rwsem_down_failed_common(struct rw_semaphore *sem,
			struct rwsem_waiter *waiter, signed long adjustment)
{
	struct task_struct *tsk = current;
	signed long count;

	set_task_state(tsk, TASK_UNINTERRUPTIBLE);

	/* set up my own style of waitqueue */
	spin_lock_irq(&sem->wait_lock);
	waiter->task = tsk;
	get_task_struct(tsk);

	list_add_tail(&waiter->list, &sem->wait_list);

	/* we're now waiting on the lock, but no longer actively read-locking */
	count = rwsem_atomic_update(adjustment, sem);

	/* if there are no active locks, wake the front queued process(es) up */
	if (!(count & RWSEM_ACTIVE_MASK))
		sem = __rwsem_do_wake(sem, 0);

	spin_unlock_irq(&sem->wait_lock);

	/* wait to be given the lock */
	for (;;) {
		if (!waiter->task)
			break;
		schedule();
		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
	}

	tsk->state = TASK_RUNNING;

	return sem;
}

/*
 * wait for the read lock to be granted
 */
struct rw_semaphore fastcall __sched *
rwsem_down_read_failed(struct rw_semaphore *sem)
{
	struct rwsem_waiter waiter;

	rwsemtrace(sem, "Entering rwsem_down_read_failed");

	waiter.flags = RWSEM_WAITING_FOR_READ;
	rwsem_down_failed_common(sem, &waiter,
				RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);

	rwsemtrace(sem, "Leaving rwsem_down_read_failed");
	return sem;
}

/*
 * wait for the write lock to be granted
 */
struct rw_semaphore fastcall __sched *
rwsem_down_write_failed(struct rw_semaphore *sem)
{
	struct rwsem_waiter waiter;

	rwsemtrace(sem, "Entering rwsem_down_write_failed");

	waiter.flags = RWSEM_WAITING_FOR_WRITE;
	rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);

	rwsemtrace(sem, "Leaving rwsem_down_write_failed");
	return sem;
}

/*
 * handle waking up a waiter on the semaphore
 * - up_read/up_write has decremented the active part of count if we come here
 */
struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
{
	unsigned long flags;

	rwsemtrace(sem, "Entering rwsem_wake");

	spin_lock_irqsave(&sem->wait_lock, flags);

	/* do nothing if list empty */
	if (!list_empty(&sem->wait_list))
		sem = __rwsem_do_wake(sem, 0);

	spin_unlock_irqrestore(&sem->wait_lock, flags);

	rwsemtrace(sem, "Leaving rwsem_wake");

	return sem;
}

/*
 * downgrade a write lock into a read lock
 * - caller incremented waiting part of count and discovered it still negative
 * - just wake up any readers at the front of the queue
 */
struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
{
	unsigned long flags;

	rwsemtrace(sem, "Entering rwsem_downgrade_wake");

	spin_lock_irqsave(&sem->wait_lock, flags);

	/* do nothing if list empty */
	if (!list_empty(&sem->wait_list))
		sem = __rwsem_do_wake(sem, 1);

	spin_unlock_irqrestore(&sem->wait_lock, flags);

	rwsemtrace(sem, "Leaving rwsem_downgrade_wake");
	return sem;
}

EXPORT_SYMBOL(rwsem_down_read_failed);
EXPORT_SYMBOL(rwsem_down_write_failed);
EXPORT_SYMBOL(rwsem_wake);
EXPORT_SYMBOL(rwsem_downgrade_wake);
#if RWSEM_DEBUG
EXPORT_SYMBOL(rwsemtrace);
#endif
Commit	Line	Data
1da177e4 LT	1	/* rwsem.c: R/W semaphores: contention handling functions
	2	*
	3	* Written by David Howells ([email protected]).
	4	* Derived from arch/i386/kernel/semaphore.c
	5	*/
	6	#include <linux/rwsem.h>
	7	#include <linux/sched.h>
	8	#include <linux/init.h>
	9	#include <linux/module.h>
	10
	11	struct rwsem_waiter {
	12	struct list_head list;
	13	struct task_struct *task;
	14	unsigned int flags;
	15	#define RWSEM_WAITING_FOR_READ 0x00000001
	16	#define RWSEM_WAITING_FOR_WRITE 0x00000002
	17	};
	18
	19	#if RWSEM_DEBUG
	20	#undef rwsemtrace
	21	void rwsemtrace(struct rw_semaphore sem, const char str)
	22	{
	23	printk("sem=%p\n", sem);
	24	printk("(sem)=%08lx\n", sem->count);
	25	if (sem->debug)
	26	printk("[%d] %s({%08lx})\n", current->pid, str, sem->count);
	27	}
	28	#endif
	29
	30	/*
	31	* handle the lock release when processes blocked on it that can now run
	32	* - if we come here from up_xxxx(), then:
	33	* - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
	34	* - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
	35	* - there must be someone on the queue
	36	* - the spinlock must be held by the caller
	37	* - woken process blocks are discarded from the list after having task zeroed
	38	* - writers are only woken if downgrading is false
	39	*/
	40	static inline struct rw_semaphore *
	41	__rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
	42	{
	43	struct rwsem_waiter *waiter;
	44	struct task_struct *tsk;
	45	struct list_head *next;
	46	signed long oldcount, woken, loop;
	47
	48	rwsemtrace(sem, "Entering __rwsem_do_wake");
	49
	50	if (downgrading)
	51	goto dont_wake_writers;
	52
	53	/* if we came through an up_xxxx() call, we only only wake someone up
	54	* if we can transition the active part of the count from 0 -> 1
	55	*/
	56	try_again:
	57	oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
	58	- RWSEM_ACTIVE_BIAS;
	59	if (oldcount & RWSEM_ACTIVE_MASK)
	60	goto undo;
	61
	62	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
	63
	64	/* try to grant a single write lock if there's a writer at the front
65	* of the queue - note we leave the 'active part' of the count
66	* incremented by 1 and the waiting part incremented by 0x00010000
67	*/
68	if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
69	goto readers_only;
70
71	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
72	* It is an allocated on the waiter's stack and may become invalid at
73	* any time after that point (due to a wakeup from another source).
74	*/
75	list_del(&waiter->list);
76	tsk = waiter->task;
d59dd462	77	smp_mb();
1da177e4 LT	78	waiter->task = NULL;
	79	wake_up_process(tsk);
	80	put_task_struct(tsk);
	81	goto out;
	82
	83	/* don't want to wake any writers */
	84	dont_wake_writers:
	85	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
	86	if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
	87	goto out;
	88
	89	/* grant an infinite number of read locks to the readers at the front
	90	* of the queue
	91	* - note we increment the 'active part' of the count by the number of
	92	* readers before waking any processes up
	93	*/
	94	readers_only:
	95	woken = 0;
	96	do {
	97	woken++;
	98
	99	if (waiter->list.next == &sem->wait_list)
	100	break;
	101
	102	waiter = list_entry(waiter->list.next,
	103	struct rwsem_waiter, list);
	104
	105	} while (waiter->flags & RWSEM_WAITING_FOR_READ);
	106
	107	loop = woken;
	108	woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
	109	if (!downgrading)
	110	/* we'd already done one increment earlier */
	111	woken -= RWSEM_ACTIVE_BIAS;
	112
	113	rwsem_atomic_add(woken, sem);
	114
	115	next = sem->wait_list.next;
	116	for (; loop > 0; loop--) {
	117	waiter = list_entry(next, struct rwsem_waiter, list);
	118	next = waiter->list.next;
	119	tsk = waiter->task;
d59dd462	120	smp_mb();
1da177e4 LT	121	waiter->task = NULL;
	122	wake_up_process(tsk);
	123	put_task_struct(tsk);
	124	}
	125
	126	sem->wait_list.next = next;
	127	next->prev = &sem->wait_list;
	128
	129	out:
	130	rwsemtrace(sem, "Leaving __rwsem_do_wake");
	131	return sem;
	132
	133	/* undo the change to count, but check for a transition 1->0 */
	134	undo:
	135	if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0)
	136	goto out;
	137	goto try_again;
	138	}
	139
	140	/*
	141	* wait for a lock to be granted
	142	*/
	143	static inline struct rw_semaphore *
	144	rwsem_down_failed_common(struct rw_semaphore *sem,
	145	struct rwsem_waiter *waiter, signed long adjustment)
	146	{
	147	struct task_struct *tsk = current;
	148	signed long count;
	149
	150	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
	151
	152	/* set up my own style of waitqueue */
	153	spin_lock_irq(&sem->wait_lock);
	154	waiter->task = tsk;
	155	get_task_struct(tsk);
	156
	157	list_add_tail(&waiter->list, &sem->wait_list);
	158
	159	/* we're now waiting on the lock, but no longer actively read-locking */
	160	count = rwsem_atomic_update(adjustment, sem);
	161
	162	/* if there are no active locks, wake the front queued process(es) up */
	163	if (!(count & RWSEM_ACTIVE_MASK))
	164	sem = __rwsem_do_wake(sem, 0);
	165
	166	spin_unlock_irq(&sem->wait_lock);
	167
	168	/* wait to be given the lock */
	169	for (;;) {
	170	if (!waiter->task)
	171	break;
	172	schedule();
	173	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
	174	}
	175
	176	tsk->state = TASK_RUNNING;
	177
	178	return sem;
	179	}
	180
	181	/*
	182	* wait for the read lock to be granted
	183	*/
	184	struct rw_semaphore fastcall __sched *
185	rwsem_down_read_failed(struct rw_semaphore *sem)
186	{
187	struct rwsem_waiter waiter;
188
189	rwsemtrace(sem, "Entering rwsem_down_read_failed");
190
191	waiter.flags = RWSEM_WAITING_FOR_READ;
192	rwsem_down_failed_common(sem, &waiter,
193	RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
194
195	rwsemtrace(sem, "Leaving rwsem_down_read_failed");
196	return sem;
197	}
198
199	/*
200	* wait for the write lock to be granted
201	*/
202	struct rw_semaphore fastcall __sched *
203	rwsem_down_write_failed(struct rw_semaphore *sem)
204	{
205	struct rwsem_waiter waiter;
206
207	rwsemtrace(sem, "Entering rwsem_down_write_failed");
208
209	waiter.flags = RWSEM_WAITING_FOR_WRITE;
210	rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
211
212	rwsemtrace(sem, "Leaving rwsem_down_write_failed");
213	return sem;
214	}
215
216	/*
217	* handle waking up a waiter on the semaphore
218	* - up_read/up_write has decremented the active part of count if we come here
219	*/
220	struct rw_semaphore fastcall rwsem_wake(struct rw_semaphore sem)
221	{
222	unsigned long flags;
223
224	rwsemtrace(sem, "Entering rwsem_wake");
225
226	spin_lock_irqsave(&sem->wait_lock, flags);
227
228	/* do nothing if list empty */
229	if (!list_empty(&sem->wait_list))
230	sem = __rwsem_do_wake(sem, 0);
231
232	spin_unlock_irqrestore(&sem->wait_lock, flags);
233
234	rwsemtrace(sem, "Leaving rwsem_wake");
235
236	return sem;
237	}
238
239	/*
240	* downgrade a write lock into a read lock
241	* - caller incremented waiting part of count and discovered it still negative
242	* - just wake up any readers at the front of the queue
243	*/
244	struct rw_semaphore fastcall rwsem_downgrade_wake(struct rw_semaphore sem)
245	{
246	unsigned long flags;
247
248	rwsemtrace(sem, "Entering rwsem_downgrade_wake");
249
250	spin_lock_irqsave(&sem->wait_lock, flags);
251
252	/* do nothing if list empty */
253	if (!list_empty(&sem->wait_list))
254	sem = __rwsem_do_wake(sem, 1);
255
256	spin_unlock_irqrestore(&sem->wait_lock, flags);
257
258	rwsemtrace(sem, "Leaving rwsem_downgrade_wake");
259	return sem;
260	}
261
262	EXPORT_SYMBOL(rwsem_down_read_failed);
263	EXPORT_SYMBOL(rwsem_down_write_failed);
264	EXPORT_SYMBOL(rwsem_wake);
265	EXPORT_SYMBOL(rwsem_downgrade_wake);
266	#if RWSEM_DEBUG
267	EXPORT_SYMBOL(rwsemtrace);
268	#endif