]>
Commit | Line | Data |
---|---|---|
c39649c3 BH |
1 | /* |
2 | * cpu_rmap.c: CPU affinity reverse-map support | |
3 | * Copyright 2011 Solarflare Communications Inc. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU General Public License version 2 as published | |
7 | * by the Free Software Foundation, incorporated herein by reference. | |
8 | */ | |
9 | ||
10 | #include <linux/cpu_rmap.h> | |
11 | #ifdef CONFIG_GENERIC_HARDIRQS | |
12 | #include <linux/interrupt.h> | |
13 | #endif | |
8bc3bcc9 | 14 | #include <linux/export.h> |
c39649c3 BH |
15 | |
16 | /* | |
17 | * These functions maintain a mapping from CPUs to some ordered set of | |
18 | * objects with CPU affinities. This can be seen as a reverse-map of | |
19 | * CPU affinity. However, we do not assume that the object affinities | |
20 | * cover all CPUs in the system. For those CPUs not directly covered | |
21 | * by object affinities, we attempt to find a nearest object based on | |
22 | * CPU topology. | |
23 | */ | |
24 | ||
25 | /** | |
26 | * alloc_cpu_rmap - allocate CPU affinity reverse-map | |
27 | * @size: Number of objects to be mapped | |
28 | * @flags: Allocation flags e.g. %GFP_KERNEL | |
29 | */ | |
30 | struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) | |
31 | { | |
32 | struct cpu_rmap *rmap; | |
33 | unsigned int cpu; | |
34 | size_t obj_offset; | |
35 | ||
36 | /* This is a silly number of objects, and we use u16 indices. */ | |
37 | if (size > 0xffff) | |
38 | return NULL; | |
39 | ||
40 | /* Offset of object pointer array from base structure */ | |
41 | obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]), | |
42 | sizeof(void *)); | |
43 | ||
44 | rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags); | |
45 | if (!rmap) | |
46 | return NULL; | |
47 | ||
896f97ea | 48 | kref_init(&rmap->refcount); |
c39649c3 BH |
49 | rmap->obj = (void **)((char *)rmap + obj_offset); |
50 | ||
51 | /* Initially assign CPUs to objects on a rota, since we have | |
52 | * no idea where the objects are. Use infinite distance, so | |
53 | * any object with known distance is preferable. Include the | |
54 | * CPUs that are not present/online, since we definitely want | |
55 | * any newly-hotplugged CPUs to have some object assigned. | |
56 | */ | |
57 | for_each_possible_cpu(cpu) { | |
58 | rmap->near[cpu].index = cpu % size; | |
59 | rmap->near[cpu].dist = CPU_RMAP_DIST_INF; | |
60 | } | |
61 | ||
62 | rmap->size = size; | |
63 | return rmap; | |
64 | } | |
65 | EXPORT_SYMBOL(alloc_cpu_rmap); | |
66 | ||
896f97ea DD |
67 | /** |
68 | * cpu_rmap_release - internal reclaiming helper called from kref_put | |
69 | * @ref: kref to struct cpu_rmap | |
70 | */ | |
71 | static void cpu_rmap_release(struct kref *ref) | |
72 | { | |
73 | struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount); | |
74 | kfree(rmap); | |
75 | } | |
76 | ||
77 | /** | |
78 | * cpu_rmap_get - internal helper to get new ref on a cpu_rmap | |
79 | * @rmap: reverse-map allocated with alloc_cpu_rmap() | |
80 | */ | |
81 | static inline void cpu_rmap_get(struct cpu_rmap *rmap) | |
82 | { | |
83 | kref_get(&rmap->refcount); | |
84 | } | |
85 | ||
86 | /** | |
87 | * cpu_rmap_put - release ref on a cpu_rmap | |
88 | * @rmap: reverse-map allocated with alloc_cpu_rmap() | |
89 | */ | |
90 | int cpu_rmap_put(struct cpu_rmap *rmap) | |
91 | { | |
92 | return kref_put(&rmap->refcount, cpu_rmap_release); | |
93 | } | |
94 | EXPORT_SYMBOL(cpu_rmap_put); | |
95 | ||
c39649c3 BH |
96 | /* Reevaluate nearest object for given CPU, comparing with the given |
97 | * neighbours at the given distance. | |
98 | */ | |
99 | static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu, | |
100 | const struct cpumask *mask, u16 dist) | |
101 | { | |
102 | int neigh; | |
103 | ||
104 | for_each_cpu(neigh, mask) { | |
105 | if (rmap->near[cpu].dist > dist && | |
106 | rmap->near[neigh].dist <= dist) { | |
107 | rmap->near[cpu].index = rmap->near[neigh].index; | |
108 | rmap->near[cpu].dist = dist; | |
109 | return true; | |
110 | } | |
111 | } | |
112 | return false; | |
113 | } | |
114 | ||
115 | #ifdef DEBUG | |
116 | static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) | |
117 | { | |
118 | unsigned index; | |
119 | unsigned int cpu; | |
120 | ||
121 | pr_info("cpu_rmap %p, %s:\n", rmap, prefix); | |
122 | ||
123 | for_each_possible_cpu(cpu) { | |
124 | index = rmap->near[cpu].index; | |
125 | pr_info("cpu %d -> obj %u (distance %u)\n", | |
126 | cpu, index, rmap->near[cpu].dist); | |
127 | } | |
128 | } | |
129 | #else | |
130 | static inline void | |
131 | debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) | |
132 | { | |
133 | } | |
134 | #endif | |
135 | ||
136 | /** | |
137 | * cpu_rmap_add - add object to a rmap | |
138 | * @rmap: CPU rmap allocated with alloc_cpu_rmap() | |
139 | * @obj: Object to add to rmap | |
140 | * | |
141 | * Return index of object. | |
142 | */ | |
143 | int cpu_rmap_add(struct cpu_rmap *rmap, void *obj) | |
144 | { | |
145 | u16 index; | |
146 | ||
147 | BUG_ON(rmap->used >= rmap->size); | |
148 | index = rmap->used++; | |
149 | rmap->obj[index] = obj; | |
150 | return index; | |
151 | } | |
152 | EXPORT_SYMBOL(cpu_rmap_add); | |
153 | ||
154 | /** | |
155 | * cpu_rmap_update - update CPU rmap following a change of object affinity | |
156 | * @rmap: CPU rmap to update | |
157 | * @index: Index of object whose affinity changed | |
158 | * @affinity: New CPU affinity of object | |
159 | */ | |
160 | int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, | |
161 | const struct cpumask *affinity) | |
162 | { | |
163 | cpumask_var_t update_mask; | |
164 | unsigned int cpu; | |
165 | ||
166 | if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL))) | |
167 | return -ENOMEM; | |
168 | ||
169 | /* Invalidate distance for all CPUs for which this used to be | |
170 | * the nearest object. Mark those CPUs for update. | |
171 | */ | |
172 | for_each_online_cpu(cpu) { | |
173 | if (rmap->near[cpu].index == index) { | |
174 | rmap->near[cpu].dist = CPU_RMAP_DIST_INF; | |
175 | cpumask_set_cpu(cpu, update_mask); | |
176 | } | |
177 | } | |
178 | ||
179 | debug_print_rmap(rmap, "after invalidating old distances"); | |
180 | ||
181 | /* Set distance to 0 for all CPUs in the new affinity mask. | |
182 | * Mark all CPUs within their NUMA nodes for update. | |
183 | */ | |
184 | for_each_cpu(cpu, affinity) { | |
185 | rmap->near[cpu].index = index; | |
186 | rmap->near[cpu].dist = 0; | |
187 | cpumask_or(update_mask, update_mask, | |
188 | cpumask_of_node(cpu_to_node(cpu))); | |
189 | } | |
190 | ||
191 | debug_print_rmap(rmap, "after updating neighbours"); | |
192 | ||
193 | /* Update distances based on topology */ | |
194 | for_each_cpu(cpu, update_mask) { | |
195 | if (cpu_rmap_copy_neigh(rmap, cpu, | |
196 | topology_thread_cpumask(cpu), 1)) | |
197 | continue; | |
198 | if (cpu_rmap_copy_neigh(rmap, cpu, | |
199 | topology_core_cpumask(cpu), 2)) | |
200 | continue; | |
201 | if (cpu_rmap_copy_neigh(rmap, cpu, | |
202 | cpumask_of_node(cpu_to_node(cpu)), 3)) | |
203 | continue; | |
204 | /* We could continue into NUMA node distances, but for now | |
205 | * we give up. | |
206 | */ | |
207 | } | |
208 | ||
209 | debug_print_rmap(rmap, "after copying neighbours"); | |
210 | ||
211 | free_cpumask_var(update_mask); | |
212 | return 0; | |
213 | } | |
214 | EXPORT_SYMBOL(cpu_rmap_update); | |
215 | ||
216 | #ifdef CONFIG_GENERIC_HARDIRQS | |
217 | ||
218 | /* Glue between IRQ affinity notifiers and CPU rmaps */ | |
219 | ||
220 | struct irq_glue { | |
221 | struct irq_affinity_notify notify; | |
222 | struct cpu_rmap *rmap; | |
223 | u16 index; | |
224 | }; | |
225 | ||
226 | /** | |
227 | * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs | |
228 | * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL | |
229 | * | |
896f97ea | 230 | * Must be called in process context, before freeing the IRQs. |
c39649c3 BH |
231 | */ |
232 | void free_irq_cpu_rmap(struct cpu_rmap *rmap) | |
233 | { | |
234 | struct irq_glue *glue; | |
235 | u16 index; | |
236 | ||
237 | if (!rmap) | |
238 | return; | |
239 | ||
240 | for (index = 0; index < rmap->used; index++) { | |
241 | glue = rmap->obj[index]; | |
242 | irq_set_affinity_notifier(glue->notify.irq, NULL); | |
243 | } | |
c39649c3 | 244 | |
896f97ea | 245 | cpu_rmap_put(rmap); |
c39649c3 BH |
246 | } |
247 | EXPORT_SYMBOL(free_irq_cpu_rmap); | |
248 | ||
896f97ea DD |
249 | /** |
250 | * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated | |
251 | * @notify: struct irq_affinity_notify passed by irq/manage.c | |
252 | * @mask: cpu mask for new SMP affinity | |
253 | * | |
254 | * This is executed in workqueue context. | |
255 | */ | |
c39649c3 BH |
256 | static void |
257 | irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) | |
258 | { | |
259 | struct irq_glue *glue = | |
260 | container_of(notify, struct irq_glue, notify); | |
261 | int rc; | |
262 | ||
263 | rc = cpu_rmap_update(glue->rmap, glue->index, mask); | |
264 | if (rc) | |
265 | pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); | |
266 | } | |
267 | ||
896f97ea DD |
268 | /** |
269 | * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem | |
270 | * @ref: kref to struct irq_affinity_notify passed by irq/manage.c | |
271 | */ | |
c39649c3 BH |
272 | static void irq_cpu_rmap_release(struct kref *ref) |
273 | { | |
274 | struct irq_glue *glue = | |
275 | container_of(ref, struct irq_glue, notify.kref); | |
896f97ea DD |
276 | |
277 | cpu_rmap_put(glue->rmap); | |
c39649c3 BH |
278 | kfree(glue); |
279 | } | |
280 | ||
281 | /** | |
282 | * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map | |
283 | * @rmap: The reverse-map | |
284 | * @irq: The IRQ number | |
285 | * | |
286 | * This adds an IRQ affinity notifier that will update the reverse-map | |
287 | * automatically. | |
288 | * | |
289 | * Must be called in process context, after the IRQ is allocated but | |
290 | * before it is bound with request_irq(). | |
291 | */ | |
292 | int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) | |
293 | { | |
294 | struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL); | |
295 | int rc; | |
296 | ||
297 | if (!glue) | |
298 | return -ENOMEM; | |
299 | glue->notify.notify = irq_cpu_rmap_notify; | |
300 | glue->notify.release = irq_cpu_rmap_release; | |
301 | glue->rmap = rmap; | |
896f97ea | 302 | cpu_rmap_get(rmap); |
c39649c3 BH |
303 | glue->index = cpu_rmap_add(rmap, glue); |
304 | rc = irq_set_affinity_notifier(irq, &glue->notify); | |
896f97ea DD |
305 | if (rc) { |
306 | cpu_rmap_put(glue->rmap); | |
c39649c3 | 307 | kfree(glue); |
896f97ea | 308 | } |
c39649c3 BH |
309 | return rc; |
310 | } | |
311 | EXPORT_SYMBOL(irq_cpu_rmap_add); | |
312 | ||
313 | #endif /* CONFIG_GENERIC_HARDIRQS */ |