]>
Commit | Line | Data |
---|---|---|
fba2afaa DL |
1 | /* |
2 | * fs/signalfd.c | |
3 | * | |
4 | * Copyright (C) 2003 Linus Torvalds | |
5 | * | |
6 | * Mon Mar 5, 2007: Davide Libenzi <[email protected]> | |
7 | * Changed ->read() to return a siginfo strcture instead of signal number. | |
8 | * Fixed locking in ->poll(). | |
9 | * Added sighand-detach notification. | |
10 | * Added fd re-use in sys_signalfd() syscall. | |
11 | * Now using anonymous inode source. | |
12 | * Thanks to Oleg Nesterov for useful code review and suggestions. | |
13 | * More comments and suggestions from Arnd Bergmann. | |
b3762bfc DA |
14 | * Sat May 19, 2007: Davi E. M. Arnaut <[email protected]> |
15 | * Retrieve multiple signals with one read() call | |
fba2afaa DL |
16 | */ |
17 | ||
18 | #include <linux/file.h> | |
19 | #include <linux/poll.h> | |
20 | #include <linux/init.h> | |
21 | #include <linux/fs.h> | |
22 | #include <linux/sched.h> | |
23 | #include <linux/kernel.h> | |
24 | #include <linux/signal.h> | |
25 | #include <linux/list.h> | |
26 | #include <linux/anon_inodes.h> | |
27 | #include <linux/signalfd.h> | |
28 | ||
29 | struct signalfd_ctx { | |
30 | struct list_head lnk; | |
31 | wait_queue_head_t wqh; | |
32 | sigset_t sigmask; | |
33 | struct task_struct *tsk; | |
34 | }; | |
35 | ||
36 | struct signalfd_lockctx { | |
37 | struct task_struct *tsk; | |
38 | unsigned long flags; | |
39 | }; | |
40 | ||
41 | /* | |
42 | * Tries to acquire the sighand lock. We do not increment the sighand | |
43 | * use count, and we do not even pin the task struct, so we need to | |
44 | * do it inside an RCU read lock, and we must be prepared for the | |
45 | * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand | |
46 | * being detached. We return 0 if the sighand has been detached, or | |
47 | * 1 if we were able to pin the sighand lock. | |
48 | */ | |
49 | static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) | |
50 | { | |
51 | struct sighand_struct *sighand = NULL; | |
52 | ||
53 | rcu_read_lock(); | |
54 | lk->tsk = rcu_dereference(ctx->tsk); | |
55 | if (likely(lk->tsk != NULL)) | |
56 | sighand = lock_task_sighand(lk->tsk, &lk->flags); | |
57 | rcu_read_unlock(); | |
58 | ||
f9ee228b ON |
59 | if (!sighand) |
60 | return 0; | |
61 | ||
62 | if (!ctx->tsk) { | |
fba2afaa | 63 | unlock_task_sighand(lk->tsk, &lk->flags); |
f9ee228b | 64 | return 0; |
fba2afaa DL |
65 | } |
66 | ||
f9ee228b ON |
67 | if (lk->tsk->tgid == current->tgid) |
68 | lk->tsk = current; | |
69 | ||
70 | return 1; | |
fba2afaa DL |
71 | } |
72 | ||
73 | static void signalfd_unlock(struct signalfd_lockctx *lk) | |
74 | { | |
75 | unlock_task_sighand(lk->tsk, &lk->flags); | |
76 | } | |
77 | ||
78 | /* | |
79 | * This must be called with the sighand lock held. | |
80 | */ | |
81 | void signalfd_deliver(struct task_struct *tsk, int sig) | |
82 | { | |
83 | struct sighand_struct *sighand = tsk->sighand; | |
84 | struct signalfd_ctx *ctx, *tmp; | |
85 | ||
86 | BUG_ON(!sig); | |
87 | list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { | |
88 | /* | |
89 | * We use a negative signal value as a way to broadcast that the | |
90 | * sighand has been orphaned, so that we can notify all the | |
91 | * listeners about this. Remember the ctx->sigmask is inverted, | |
92 | * so if the user is interested in a signal, that corresponding | |
93 | * bit will be zero. | |
94 | */ | |
95 | if (sig < 0) { | |
96 | if (ctx->tsk == tsk) { | |
97 | ctx->tsk = NULL; | |
98 | list_del_init(&ctx->lnk); | |
99 | wake_up(&ctx->wqh); | |
100 | } | |
101 | } else { | |
102 | if (!sigismember(&ctx->sigmask, sig)) | |
103 | wake_up(&ctx->wqh); | |
104 | } | |
105 | } | |
106 | } | |
107 | ||
108 | static void signalfd_cleanup(struct signalfd_ctx *ctx) | |
109 | { | |
110 | struct signalfd_lockctx lk; | |
111 | ||
112 | /* | |
113 | * This is tricky. If the sighand is gone, we do not need to remove | |
114 | * context from the list, the list itself won't be there anymore. | |
115 | */ | |
116 | if (signalfd_lock(ctx, &lk)) { | |
117 | list_del(&ctx->lnk); | |
118 | signalfd_unlock(&lk); | |
119 | } | |
120 | kfree(ctx); | |
121 | } | |
122 | ||
123 | static int signalfd_release(struct inode *inode, struct file *file) | |
124 | { | |
125 | signalfd_cleanup(file->private_data); | |
126 | return 0; | |
127 | } | |
128 | ||
129 | static unsigned int signalfd_poll(struct file *file, poll_table *wait) | |
130 | { | |
131 | struct signalfd_ctx *ctx = file->private_data; | |
132 | unsigned int events = 0; | |
133 | struct signalfd_lockctx lk; | |
134 | ||
135 | poll_wait(file, &ctx->wqh, wait); | |
136 | ||
137 | /* | |
138 | * Let the caller get a POLLIN in this case, ala socket recv() when | |
139 | * the peer disconnects. | |
140 | */ | |
141 | if (signalfd_lock(ctx, &lk)) { | |
f8738c5c DL |
142 | if ((lk.tsk == current && |
143 | next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) || | |
fba2afaa DL |
144 | next_signal(&lk.tsk->signal->shared_pending, |
145 | &ctx->sigmask) > 0) | |
146 | events |= POLLIN; | |
147 | signalfd_unlock(&lk); | |
148 | } else | |
149 | events |= POLLIN; | |
150 | ||
151 | return events; | |
152 | } | |
153 | ||
154 | /* | |
155 | * Copied from copy_siginfo_to_user() in kernel/signal.c | |
156 | */ | |
157 | static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | |
158 | siginfo_t const *kinfo) | |
159 | { | |
160 | long err; | |
161 | ||
162 | BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128); | |
163 | ||
164 | /* | |
165 | * Unused memebers should be zero ... | |
166 | */ | |
167 | err = __clear_user(uinfo, sizeof(*uinfo)); | |
168 | ||
169 | /* | |
170 | * If you change siginfo_t structure, please be sure | |
171 | * this code is fixed accordingly. | |
172 | */ | |
173 | err |= __put_user(kinfo->si_signo, &uinfo->signo); | |
174 | err |= __put_user(kinfo->si_errno, &uinfo->err); | |
175 | err |= __put_user((short)kinfo->si_code, &uinfo->code); | |
176 | switch (kinfo->si_code & __SI_MASK) { | |
177 | case __SI_KILL: | |
178 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | |
179 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | |
180 | break; | |
181 | case __SI_TIMER: | |
182 | err |= __put_user(kinfo->si_tid, &uinfo->tid); | |
183 | err |= __put_user(kinfo->si_overrun, &uinfo->overrun); | |
184 | err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | |
185 | break; | |
186 | case __SI_POLL: | |
187 | err |= __put_user(kinfo->si_band, &uinfo->band); | |
188 | err |= __put_user(kinfo->si_fd, &uinfo->fd); | |
189 | break; | |
190 | case __SI_FAULT: | |
191 | err |= __put_user((long)kinfo->si_addr, &uinfo->addr); | |
192 | #ifdef __ARCH_SI_TRAPNO | |
193 | err |= __put_user(kinfo->si_trapno, &uinfo->trapno); | |
194 | #endif | |
195 | break; | |
196 | case __SI_CHLD: | |
197 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | |
198 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | |
199 | err |= __put_user(kinfo->si_status, &uinfo->status); | |
200 | err |= __put_user(kinfo->si_utime, &uinfo->utime); | |
201 | err |= __put_user(kinfo->si_stime, &uinfo->stime); | |
202 | break; | |
203 | case __SI_RT: /* This is not generated by the kernel as of now. */ | |
204 | case __SI_MESGQ: /* But this is */ | |
205 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | |
206 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | |
207 | err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr); | |
208 | break; | |
209 | default: /* this is just in case for now ... */ | |
210 | err |= __put_user(kinfo->si_pid, &uinfo->pid); | |
211 | err |= __put_user(kinfo->si_uid, &uinfo->uid); | |
212 | break; | |
213 | } | |
214 | ||
215 | return err ? -EFAULT: sizeof(*uinfo); | |
216 | } | |
217 | ||
b3762bfc DA |
218 | static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info, |
219 | int nonblock) | |
220 | { | |
221 | ssize_t ret; | |
222 | struct signalfd_lockctx lk; | |
223 | DECLARE_WAITQUEUE(wait, current); | |
224 | ||
225 | if (!signalfd_lock(ctx, &lk)) | |
226 | return 0; | |
227 | ||
228 | ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); | |
229 | switch (ret) { | |
230 | case 0: | |
231 | if (!nonblock) | |
232 | break; | |
233 | ret = -EAGAIN; | |
234 | default: | |
235 | signalfd_unlock(&lk); | |
236 | return ret; | |
237 | } | |
238 | ||
239 | add_wait_queue(&ctx->wqh, &wait); | |
240 | for (;;) { | |
241 | set_current_state(TASK_INTERRUPTIBLE); | |
242 | ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); | |
243 | signalfd_unlock(&lk); | |
244 | if (ret != 0) | |
245 | break; | |
246 | if (signal_pending(current)) { | |
247 | ret = -ERESTARTSYS; | |
248 | break; | |
249 | } | |
250 | schedule(); | |
251 | ret = signalfd_lock(ctx, &lk); | |
252 | if (unlikely(!ret)) { | |
253 | /* | |
254 | * Let the caller read zero byte, ala socket | |
255 | * recv() when the peer disconnect. This test | |
256 | * must be done before doing a dequeue_signal(), | |
257 | * because if the sighand has been orphaned, | |
258 | * the dequeue_signal() call is going to crash | |
259 | * because ->sighand will be long gone. | |
260 | */ | |
261 | break; | |
262 | } | |
263 | } | |
264 | ||
265 | remove_wait_queue(&ctx->wqh, &wait); | |
266 | __set_current_state(TASK_RUNNING); | |
267 | ||
268 | return ret; | |
269 | } | |
270 | ||
fba2afaa DL |
271 | /* |
272 | * Returns either the size of a "struct signalfd_siginfo", or zero if the | |
273 | * sighand we are attached to, has been orphaned. The "count" parameter | |
274 | * must be at least the size of a "struct signalfd_siginfo". | |
275 | */ | |
276 | static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, | |
277 | loff_t *ppos) | |
278 | { | |
279 | struct signalfd_ctx *ctx = file->private_data; | |
b3762bfc DA |
280 | struct signalfd_siginfo __user *siginfo; |
281 | int nonblock = file->f_flags & O_NONBLOCK; | |
282 | ssize_t ret, total = 0; | |
fba2afaa | 283 | siginfo_t info; |
fba2afaa | 284 | |
b3762bfc DA |
285 | count /= sizeof(struct signalfd_siginfo); |
286 | if (!count) | |
fba2afaa | 287 | return -EINVAL; |
fba2afaa | 288 | |
b3762bfc DA |
289 | siginfo = (struct signalfd_siginfo __user *) buf; |
290 | ||
291 | do { | |
292 | ret = signalfd_dequeue(ctx, &info, nonblock); | |
293 | if (unlikely(ret <= 0)) | |
294 | break; | |
295 | ret = signalfd_copyinfo(siginfo, &info); | |
296 | if (ret < 0) | |
297 | break; | |
298 | siginfo++; | |
299 | total += ret; | |
300 | nonblock = 1; | |
301 | } while (--count); | |
302 | ||
303 | return total ? total : ret; | |
fba2afaa DL |
304 | } |
305 | ||
306 | static const struct file_operations signalfd_fops = { | |
307 | .release = signalfd_release, | |
308 | .poll = signalfd_poll, | |
309 | .read = signalfd_read, | |
310 | }; | |
311 | ||
312 | /* | |
313 | * Create a file descriptor that is associated with our signal | |
314 | * state. We can pass it around to others if we want to, but | |
315 | * it will always be _our_ signal state. | |
316 | */ | |
317 | asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) | |
318 | { | |
319 | int error; | |
320 | sigset_t sigmask; | |
321 | struct signalfd_ctx *ctx; | |
322 | struct sighand_struct *sighand; | |
323 | struct file *file; | |
324 | struct inode *inode; | |
325 | struct signalfd_lockctx lk; | |
326 | ||
327 | if (sizemask != sizeof(sigset_t) || | |
328 | copy_from_user(&sigmask, user_mask, sizeof(sigmask))) | |
f50cadaa | 329 | return -EINVAL; |
fba2afaa DL |
330 | sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); |
331 | signotset(&sigmask); | |
332 | ||
333 | if (ufd == -1) { | |
334 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | |
335 | if (!ctx) | |
336 | return -ENOMEM; | |
337 | ||
338 | init_waitqueue_head(&ctx->wqh); | |
339 | ctx->sigmask = sigmask; | |
f9ee228b | 340 | ctx->tsk = current->group_leader; |
fba2afaa DL |
341 | |
342 | sighand = current->sighand; | |
343 | /* | |
344 | * Add this fd to the list of signal listeners. | |
345 | */ | |
346 | spin_lock_irq(&sighand->siglock); | |
347 | list_add_tail(&ctx->lnk, &sighand->signalfd_list); | |
348 | spin_unlock_irq(&sighand->siglock); | |
349 | ||
350 | /* | |
351 | * When we call this, the initialization must be complete, since | |
352 | * anon_inode_getfd() will install the fd. | |
353 | */ | |
354 | error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", | |
355 | &signalfd_fops, ctx); | |
356 | if (error) | |
357 | goto err_fdalloc; | |
358 | } else { | |
359 | file = fget(ufd); | |
360 | if (!file) | |
361 | return -EBADF; | |
362 | ctx = file->private_data; | |
363 | if (file->f_op != &signalfd_fops) { | |
364 | fput(file); | |
365 | return -EINVAL; | |
366 | } | |
367 | /* | |
368 | * We need to be prepared of the fact that the sighand this fd | |
369 | * is attached to, has been detched. In that case signalfd_lock() | |
370 | * will return 0, and we'll just skip setting the new mask. | |
371 | */ | |
372 | if (signalfd_lock(ctx, &lk)) { | |
373 | ctx->sigmask = sigmask; | |
374 | signalfd_unlock(&lk); | |
375 | } | |
376 | wake_up(&ctx->wqh); | |
377 | fput(file); | |
378 | } | |
379 | ||
380 | return ufd; | |
381 | ||
382 | err_fdalloc: | |
383 | signalfd_cleanup(ctx); | |
384 | return error; | |
385 | } | |
386 |