]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/kernel/softirq.c | |
3 | * | |
4 | * Copyright (C) 1992 Linus Torvalds | |
5 | * | |
6 | * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) | |
7 | */ | |
8 | ||
9 | #include <linux/module.h> | |
10 | #include <linux/kernel_stat.h> | |
11 | #include <linux/interrupt.h> | |
12 | #include <linux/init.h> | |
13 | #include <linux/mm.h> | |
14 | #include <linux/notifier.h> | |
15 | #include <linux/percpu.h> | |
16 | #include <linux/cpu.h> | |
17 | #include <linux/kthread.h> | |
18 | #include <linux/rcupdate.h> | |
19 | ||
20 | #include <asm/irq.h> | |
21 | /* | |
22 | - No shared variables, all the data are CPU local. | |
23 | - If a softirq needs serialization, let it serialize itself | |
24 | by its own spinlocks. | |
25 | - Even if softirq is serialized, only local cpu is marked for | |
26 | execution. Hence, we get something sort of weak cpu binding. | |
27 | Though it is still not clear, will it result in better locality | |
28 | or will not. | |
29 | ||
30 | Examples: | |
31 | - NET RX softirq. It is multithreaded and does not require | |
32 | any global serialization. | |
33 | - NET TX softirq. It kicks software netdevice queues, hence | |
34 | it is logically serialized per device, but this serialization | |
35 | is invisible to common code. | |
36 | - Tasklets: serialized wrt itself. | |
37 | */ | |
38 | ||
39 | #ifndef __ARCH_IRQ_STAT | |
40 | irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned; | |
41 | EXPORT_SYMBOL(irq_stat); | |
42 | #endif | |
43 | ||
44 | static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; | |
45 | ||
46 | static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | |
47 | ||
48 | /* | |
49 | * we cannot loop indefinitely here to avoid userspace starvation, | |
50 | * but we also don't want to introduce a worst case 1/HZ latency | |
51 | * to the pending events, so lets the scheduler to balance | |
52 | * the softirq load for us. | |
53 | */ | |
54 | static inline void wakeup_softirqd(void) | |
55 | { | |
56 | /* Interrupts are disabled: no need to stop preemption */ | |
57 | struct task_struct *tsk = __get_cpu_var(ksoftirqd); | |
58 | ||
59 | if (tsk && tsk->state != TASK_RUNNING) | |
60 | wake_up_process(tsk); | |
61 | } | |
62 | ||
63 | /* | |
64 | * We restart softirq processing MAX_SOFTIRQ_RESTART times, | |
65 | * and we fall back to softirqd after that. | |
66 | * | |
67 | * This number has been established via experimentation. | |
68 | * The two things to balance is latency against fairness - | |
69 | * we want to handle softirqs as soon as possible, but they | |
70 | * should not be able to lock up the box. | |
71 | */ | |
72 | #define MAX_SOFTIRQ_RESTART 10 | |
73 | ||
74 | asmlinkage void __do_softirq(void) | |
75 | { | |
76 | struct softirq_action *h; | |
77 | __u32 pending; | |
78 | int max_restart = MAX_SOFTIRQ_RESTART; | |
79 | int cpu; | |
80 | ||
81 | pending = local_softirq_pending(); | |
82 | ||
83 | local_bh_disable(); | |
84 | cpu = smp_processor_id(); | |
85 | restart: | |
86 | /* Reset the pending bitmask before enabling irqs */ | |
87 | local_softirq_pending() = 0; | |
88 | ||
89 | local_irq_enable(); | |
90 | ||
91 | h = softirq_vec; | |
92 | ||
93 | do { | |
94 | if (pending & 1) { | |
95 | h->action(h); | |
96 | rcu_bh_qsctr_inc(cpu); | |
97 | } | |
98 | h++; | |
99 | pending >>= 1; | |
100 | } while (pending); | |
101 | ||
102 | local_irq_disable(); | |
103 | ||
104 | pending = local_softirq_pending(); | |
105 | if (pending && --max_restart) | |
106 | goto restart; | |
107 | ||
108 | if (pending) | |
109 | wakeup_softirqd(); | |
110 | ||
111 | __local_bh_enable(); | |
112 | } | |
113 | ||
114 | #ifndef __ARCH_HAS_DO_SOFTIRQ | |
115 | ||
116 | asmlinkage void do_softirq(void) | |
117 | { | |
118 | __u32 pending; | |
119 | unsigned long flags; | |
120 | ||
121 | if (in_interrupt()) | |
122 | return; | |
123 | ||
124 | local_irq_save(flags); | |
125 | ||
126 | pending = local_softirq_pending(); | |
127 | ||
128 | if (pending) | |
129 | __do_softirq(); | |
130 | ||
131 | local_irq_restore(flags); | |
132 | } | |
133 | ||
134 | EXPORT_SYMBOL(do_softirq); | |
135 | ||
136 | #endif | |
137 | ||
138 | void local_bh_enable(void) | |
139 | { | |
140 | WARN_ON(irqs_disabled()); | |
141 | /* | |
142 | * Keep preemption disabled until we are done with | |
143 | * softirq processing: | |
144 | */ | |
145 | sub_preempt_count(SOFTIRQ_OFFSET - 1); | |
146 | ||
147 | if (unlikely(!in_interrupt() && local_softirq_pending())) | |
148 | do_softirq(); | |
149 | ||
150 | dec_preempt_count(); | |
151 | preempt_check_resched(); | |
152 | } | |
153 | EXPORT_SYMBOL(local_bh_enable); | |
154 | ||
155 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED | |
156 | # define invoke_softirq() __do_softirq() | |
157 | #else | |
158 | # define invoke_softirq() do_softirq() | |
159 | #endif | |
160 | ||
161 | /* | |
162 | * Exit an interrupt context. Process softirqs if needed and possible: | |
163 | */ | |
164 | void irq_exit(void) | |
165 | { | |
166 | account_system_vtime(current); | |
167 | sub_preempt_count(IRQ_EXIT_OFFSET); | |
168 | if (!in_interrupt() && local_softirq_pending()) | |
169 | invoke_softirq(); | |
170 | preempt_enable_no_resched(); | |
171 | } | |
172 | ||
173 | /* | |
174 | * This function must run with irqs disabled! | |
175 | */ | |
176 | inline fastcall void raise_softirq_irqoff(unsigned int nr) | |
177 | { | |
178 | __raise_softirq_irqoff(nr); | |
179 | ||
180 | /* | |
181 | * If we're in an interrupt or softirq, we're done | |
182 | * (this also catches softirq-disabled code). We will | |
183 | * actually run the softirq once we return from | |
184 | * the irq or softirq. | |
185 | * | |
186 | * Otherwise we wake up ksoftirqd to make sure we | |
187 | * schedule the softirq soon. | |
188 | */ | |
189 | if (!in_interrupt()) | |
190 | wakeup_softirqd(); | |
191 | } | |
192 | ||
193 | EXPORT_SYMBOL(raise_softirq_irqoff); | |
194 | ||
195 | void fastcall raise_softirq(unsigned int nr) | |
196 | { | |
197 | unsigned long flags; | |
198 | ||
199 | local_irq_save(flags); | |
200 | raise_softirq_irqoff(nr); | |
201 | local_irq_restore(flags); | |
202 | } | |
203 | ||
204 | void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) | |
205 | { | |
206 | softirq_vec[nr].data = data; | |
207 | softirq_vec[nr].action = action; | |
208 | } | |
209 | ||
210 | EXPORT_SYMBOL(open_softirq); | |
211 | ||
212 | /* Tasklets */ | |
213 | struct tasklet_head | |
214 | { | |
215 | struct tasklet_struct *list; | |
216 | }; | |
217 | ||
218 | /* Some compilers disobey section attribute on statics when not | |
219 | initialized -- RR */ | |
220 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL }; | |
221 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL }; | |
222 | ||
223 | void fastcall __tasklet_schedule(struct tasklet_struct *t) | |
224 | { | |
225 | unsigned long flags; | |
226 | ||
227 | local_irq_save(flags); | |
228 | t->next = __get_cpu_var(tasklet_vec).list; | |
229 | __get_cpu_var(tasklet_vec).list = t; | |
230 | raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
231 | local_irq_restore(flags); | |
232 | } | |
233 | ||
234 | EXPORT_SYMBOL(__tasklet_schedule); | |
235 | ||
236 | void fastcall __tasklet_hi_schedule(struct tasklet_struct *t) | |
237 | { | |
238 | unsigned long flags; | |
239 | ||
240 | local_irq_save(flags); | |
241 | t->next = __get_cpu_var(tasklet_hi_vec).list; | |
242 | __get_cpu_var(tasklet_hi_vec).list = t; | |
243 | raise_softirq_irqoff(HI_SOFTIRQ); | |
244 | local_irq_restore(flags); | |
245 | } | |
246 | ||
247 | EXPORT_SYMBOL(__tasklet_hi_schedule); | |
248 | ||
249 | static void tasklet_action(struct softirq_action *a) | |
250 | { | |
251 | struct tasklet_struct *list; | |
252 | ||
253 | local_irq_disable(); | |
254 | list = __get_cpu_var(tasklet_vec).list; | |
255 | __get_cpu_var(tasklet_vec).list = NULL; | |
256 | local_irq_enable(); | |
257 | ||
258 | while (list) { | |
259 | struct tasklet_struct *t = list; | |
260 | ||
261 | list = list->next; | |
262 | ||
263 | if (tasklet_trylock(t)) { | |
264 | if (!atomic_read(&t->count)) { | |
265 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
266 | BUG(); | |
267 | t->func(t->data); | |
268 | tasklet_unlock(t); | |
269 | continue; | |
270 | } | |
271 | tasklet_unlock(t); | |
272 | } | |
273 | ||
274 | local_irq_disable(); | |
275 | t->next = __get_cpu_var(tasklet_vec).list; | |
276 | __get_cpu_var(tasklet_vec).list = t; | |
277 | __raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
278 | local_irq_enable(); | |
279 | } | |
280 | } | |
281 | ||
282 | static void tasklet_hi_action(struct softirq_action *a) | |
283 | { | |
284 | struct tasklet_struct *list; | |
285 | ||
286 | local_irq_disable(); | |
287 | list = __get_cpu_var(tasklet_hi_vec).list; | |
288 | __get_cpu_var(tasklet_hi_vec).list = NULL; | |
289 | local_irq_enable(); | |
290 | ||
291 | while (list) { | |
292 | struct tasklet_struct *t = list; | |
293 | ||
294 | list = list->next; | |
295 | ||
296 | if (tasklet_trylock(t)) { | |
297 | if (!atomic_read(&t->count)) { | |
298 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
299 | BUG(); | |
300 | t->func(t->data); | |
301 | tasklet_unlock(t); | |
302 | continue; | |
303 | } | |
304 | tasklet_unlock(t); | |
305 | } | |
306 | ||
307 | local_irq_disable(); | |
308 | t->next = __get_cpu_var(tasklet_hi_vec).list; | |
309 | __get_cpu_var(tasklet_hi_vec).list = t; | |
310 | __raise_softirq_irqoff(HI_SOFTIRQ); | |
311 | local_irq_enable(); | |
312 | } | |
313 | } | |
314 | ||
315 | ||
316 | void tasklet_init(struct tasklet_struct *t, | |
317 | void (*func)(unsigned long), unsigned long data) | |
318 | { | |
319 | t->next = NULL; | |
320 | t->state = 0; | |
321 | atomic_set(&t->count, 0); | |
322 | t->func = func; | |
323 | t->data = data; | |
324 | } | |
325 | ||
326 | EXPORT_SYMBOL(tasklet_init); | |
327 | ||
328 | void tasklet_kill(struct tasklet_struct *t) | |
329 | { | |
330 | if (in_interrupt()) | |
331 | printk("Attempt to kill tasklet from interrupt\n"); | |
332 | ||
333 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | |
334 | do | |
335 | yield(); | |
336 | while (test_bit(TASKLET_STATE_SCHED, &t->state)); | |
337 | } | |
338 | tasklet_unlock_wait(t); | |
339 | clear_bit(TASKLET_STATE_SCHED, &t->state); | |
340 | } | |
341 | ||
342 | EXPORT_SYMBOL(tasklet_kill); | |
343 | ||
344 | void __init softirq_init(void) | |
345 | { | |
346 | open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); | |
347 | open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); | |
348 | } | |
349 | ||
350 | static int ksoftirqd(void * __bind_cpu) | |
351 | { | |
352 | set_user_nice(current, 19); | |
353 | current->flags |= PF_NOFREEZE; | |
354 | ||
355 | set_current_state(TASK_INTERRUPTIBLE); | |
356 | ||
357 | while (!kthread_should_stop()) { | |
358 | preempt_disable(); | |
359 | if (!local_softirq_pending()) { | |
360 | preempt_enable_no_resched(); | |
361 | schedule(); | |
362 | preempt_disable(); | |
363 | } | |
364 | ||
365 | __set_current_state(TASK_RUNNING); | |
366 | ||
367 | while (local_softirq_pending()) { | |
368 | /* Preempt disable stops cpu going offline. | |
369 | If already offline, we'll be on wrong CPU: | |
370 | don't process */ | |
371 | if (cpu_is_offline((long)__bind_cpu)) | |
372 | goto wait_to_die; | |
373 | do_softirq(); | |
374 | preempt_enable_no_resched(); | |
375 | cond_resched(); | |
376 | preempt_disable(); | |
377 | } | |
378 | preempt_enable(); | |
379 | set_current_state(TASK_INTERRUPTIBLE); | |
380 | } | |
381 | __set_current_state(TASK_RUNNING); | |
382 | return 0; | |
383 | ||
384 | wait_to_die: | |
385 | preempt_enable(); | |
386 | /* Wait for kthread_stop */ | |
387 | set_current_state(TASK_INTERRUPTIBLE); | |
388 | while (!kthread_should_stop()) { | |
389 | schedule(); | |
390 | set_current_state(TASK_INTERRUPTIBLE); | |
391 | } | |
392 | __set_current_state(TASK_RUNNING); | |
393 | return 0; | |
394 | } | |
395 | ||
396 | #ifdef CONFIG_HOTPLUG_CPU | |
397 | /* | |
398 | * tasklet_kill_immediate is called to remove a tasklet which can already be | |
399 | * scheduled for execution on @cpu. | |
400 | * | |
401 | * Unlike tasklet_kill, this function removes the tasklet | |
402 | * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state. | |
403 | * | |
404 | * When this function is called, @cpu must be in the CPU_DEAD state. | |
405 | */ | |
406 | void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) | |
407 | { | |
408 | struct tasklet_struct **i; | |
409 | ||
410 | BUG_ON(cpu_online(cpu)); | |
411 | BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state)); | |
412 | ||
413 | if (!test_bit(TASKLET_STATE_SCHED, &t->state)) | |
414 | return; | |
415 | ||
416 | /* CPU is dead, so no lock needed. */ | |
417 | for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) { | |
418 | if (*i == t) { | |
419 | *i = t->next; | |
420 | return; | |
421 | } | |
422 | } | |
423 | BUG(); | |
424 | } | |
425 | ||
426 | static void takeover_tasklets(unsigned int cpu) | |
427 | { | |
428 | struct tasklet_struct **i; | |
429 | ||
430 | /* CPU is dead, so no lock needed. */ | |
431 | local_irq_disable(); | |
432 | ||
433 | /* Find end, append list for that CPU. */ | |
434 | for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next); | |
435 | *i = per_cpu(tasklet_vec, cpu).list; | |
436 | per_cpu(tasklet_vec, cpu).list = NULL; | |
437 | raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
438 | ||
439 | for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next); | |
440 | *i = per_cpu(tasklet_hi_vec, cpu).list; | |
441 | per_cpu(tasklet_hi_vec, cpu).list = NULL; | |
442 | raise_softirq_irqoff(HI_SOFTIRQ); | |
443 | ||
444 | local_irq_enable(); | |
445 | } | |
446 | #endif /* CONFIG_HOTPLUG_CPU */ | |
447 | ||
448 | static int __devinit cpu_callback(struct notifier_block *nfb, | |
449 | unsigned long action, | |
450 | void *hcpu) | |
451 | { | |
452 | int hotcpu = (unsigned long)hcpu; | |
453 | struct task_struct *p; | |
454 | ||
455 | switch (action) { | |
456 | case CPU_UP_PREPARE: | |
457 | BUG_ON(per_cpu(tasklet_vec, hotcpu).list); | |
458 | BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list); | |
459 | p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); | |
460 | if (IS_ERR(p)) { | |
461 | printk("ksoftirqd for %i failed\n", hotcpu); | |
462 | return NOTIFY_BAD; | |
463 | } | |
464 | kthread_bind(p, hotcpu); | |
465 | per_cpu(ksoftirqd, hotcpu) = p; | |
466 | break; | |
467 | case CPU_ONLINE: | |
468 | wake_up_process(per_cpu(ksoftirqd, hotcpu)); | |
469 | break; | |
470 | #ifdef CONFIG_HOTPLUG_CPU | |
471 | case CPU_UP_CANCELED: | |
472 | /* Unbind so it can run. Fall thru. */ | |
473 | kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id()); | |
474 | case CPU_DEAD: | |
475 | p = per_cpu(ksoftirqd, hotcpu); | |
476 | per_cpu(ksoftirqd, hotcpu) = NULL; | |
477 | kthread_stop(p); | |
478 | takeover_tasklets(hotcpu); | |
479 | break; | |
480 | #endif /* CONFIG_HOTPLUG_CPU */ | |
481 | } | |
482 | return NOTIFY_OK; | |
483 | } | |
484 | ||
485 | static struct notifier_block __devinitdata cpu_nfb = { | |
486 | .notifier_call = cpu_callback | |
487 | }; | |
488 | ||
489 | __init int spawn_ksoftirqd(void) | |
490 | { | |
491 | void *cpu = (void *)(long)smp_processor_id(); | |
492 | cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | |
493 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | |
494 | register_cpu_notifier(&cpu_nfb); | |
495 | return 0; | |
496 | } |