]>
Commit | Line | Data |
---|---|---|
3b20eb23 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
3e7ee490 | 2 | /* |
3e7ee490 HJ |
3 | * Copyright (c) 2009, Microsoft Corporation. |
4 | * | |
3e7ee490 HJ |
5 | * Authors: |
6 | * Haiyang Zhang <[email protected]> | |
7 | * Hank Janssen <[email protected]> | |
b0069f43 | 8 | * K. Y. Srinivasan <[email protected]> |
3e7ee490 | 9 | */ |
0a46618d HJ |
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
11 | ||
3e7ee490 HJ |
12 | #include <linux/init.h> |
13 | #include <linux/module.h> | |
14 | #include <linux/device.h> | |
3e7ee490 HJ |
15 | #include <linux/interrupt.h> |
16 | #include <linux/sysctl.h> | |
5a0e3ad6 | 17 | #include <linux/slab.h> |
b0069f43 | 18 | #include <linux/acpi.h> |
8b5d6d3b | 19 | #include <linux/completion.h> |
46a97191 | 20 | #include <linux/hyperv.h> |
b0209501 | 21 | #include <linux/kernel_stat.h> |
4061ed9e | 22 | #include <linux/clockchips.h> |
e513229b | 23 | #include <linux/cpu.h> |
68db0cf1 IM |
24 | #include <linux/sched/task_stack.h> |
25 | ||
1f48dcf1 | 26 | #include <linux/delay.h> |
96c1d058 NM |
27 | #include <linux/notifier.h> |
28 | #include <linux/ptrace.h> | |
35464483 | 29 | #include <linux/screen_info.h> |
510f7aef | 30 | #include <linux/kdebug.h> |
6d146aef | 31 | #include <linux/efi.h> |
4b44f2d1 | 32 | #include <linux/random.h> |
f3a99e76 | 33 | #include <linux/kernel.h> |
63ecc6d2 | 34 | #include <linux/syscore_ops.h> |
fd1fea68 | 35 | #include <clocksource/hyperv_timer.h> |
0f2a6619 | 36 | #include "hyperv_vmbus.h" |
3e7ee490 | 37 | |
fc76936d SH |
38 | struct vmbus_dynid { |
39 | struct list_head node; | |
40 | struct hv_vmbus_device_id id; | |
41 | }; | |
42 | ||
607c1a11 | 43 | static struct acpi_device *hv_acpi_dev; |
1168ac22 | 44 | |
71a6655d | 45 | static struct completion probe_event; |
98db4335 | 46 | |
76d36ab7 | 47 | static int hyperv_cpuhp_online; |
96c1d058 | 48 | |
81b18bce SM |
49 | static void *hv_panic_page; |
50 | ||
626b901f MK |
51 | /* Values parsed from ACPI DSDT */ |
52 | static int vmbus_irq; | |
53 | int vmbus_interrupt; | |
54 | ||
040026df TL |
55 | /* |
56 | * Boolean to control whether to report panic messages over Hyper-V. | |
57 | * | |
b18e3589 | 58 | * It can be set via /proc/sys/kernel/hyperv_record_panic_msg |
040026df TL |
59 | */ |
60 | static int sysctl_record_panic_msg = 1; | |
61 | ||
62 | static int hyperv_report_reg(void) | |
63 | { | |
64 | return !sysctl_record_panic_msg || !hv_panic_page; | |
65 | } | |
66 | ||
510f7aef VK |
67 | static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, |
68 | void *args) | |
69 | { | |
70 | struct pt_regs *regs; | |
71 | ||
74347a99 | 72 | vmbus_initiate_unload(true); |
510f7aef | 73 | |
73f26e52 TL |
74 | /* |
75 | * Hyper-V should be notified only once about a panic. If we will be | |
76 | * doing hyperv_report_panic_msg() later with kmsg data, don't do | |
77 | * the notification here. | |
78 | */ | |
79 | if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE | |
040026df | 80 | && hyperv_report_reg()) { |
74347a99 | 81 | regs = current_pt_regs(); |
f3a99e76 | 82 | hyperv_report_panic(regs, val, false); |
74347a99 | 83 | } |
96c1d058 NM |
84 | return NOTIFY_DONE; |
85 | } | |
86 | ||
510f7aef VK |
87 | static int hyperv_die_event(struct notifier_block *nb, unsigned long val, |
88 | void *args) | |
89 | { | |
49971e6b | 90 | struct die_args *die = args; |
510f7aef VK |
91 | struct pt_regs *regs = die->regs; |
92 | ||
608a973b MK |
93 | /* Don't notify Hyper-V if the die event is other than oops */ |
94 | if (val != DIE_OOPS) | |
95 | return NOTIFY_DONE; | |
96 | ||
73f26e52 TL |
97 | /* |
98 | * Hyper-V should be notified only once about a panic. If we will be | |
99 | * doing hyperv_report_panic_msg() later with kmsg data, don't do | |
100 | * the notification here. | |
101 | */ | |
040026df | 102 | if (hyperv_report_reg()) |
f3a99e76 | 103 | hyperv_report_panic(regs, val, true); |
510f7aef VK |
104 | return NOTIFY_DONE; |
105 | } | |
106 | ||
107 | static struct notifier_block hyperv_die_block = { | |
108 | .notifier_call = hyperv_die_event, | |
109 | }; | |
96c1d058 NM |
110 | static struct notifier_block hyperv_panic_block = { |
111 | .notifier_call = hyperv_panic_event, | |
112 | }; | |
113 | ||
6d146aef JO |
114 | static const char *fb_mmio_name = "fb_range"; |
115 | static struct resource *fb_mmio; | |
e2e80841 | 116 | static struct resource *hyperv_mmio; |
8aea7f82 | 117 | static DEFINE_MUTEX(hyperv_mmio_lock); |
98db4335 | 118 | |
cf6a2eac S |
119 | static int vmbus_exists(void) |
120 | { | |
121 | if (hv_acpi_dev == NULL) | |
122 | return -ENODEV; | |
123 | ||
124 | return 0; | |
125 | } | |
126 | ||
c2e5df61 | 127 | static u8 channel_monitor_group(const struct vmbus_channel *channel) |
76c52bbe GKH |
128 | { |
129 | return (u8)channel->offermsg.monitorid / 32; | |
130 | } | |
131 | ||
c2e5df61 | 132 | static u8 channel_monitor_offset(const struct vmbus_channel *channel) |
76c52bbe GKH |
133 | { |
134 | return (u8)channel->offermsg.monitorid % 32; | |
135 | } | |
136 | ||
c2e5df61 SH |
137 | static u32 channel_pending(const struct vmbus_channel *channel, |
138 | const struct hv_monitor_page *monitor_page) | |
76c52bbe GKH |
139 | { |
140 | u8 monitor_group = channel_monitor_group(channel); | |
c2e5df61 | 141 | |
76c52bbe GKH |
142 | return monitor_page->trigger_group[monitor_group].pending; |
143 | } | |
144 | ||
c2e5df61 SH |
145 | static u32 channel_latency(const struct vmbus_channel *channel, |
146 | const struct hv_monitor_page *monitor_page) | |
1cee272b GKH |
147 | { |
148 | u8 monitor_group = channel_monitor_group(channel); | |
149 | u8 monitor_offset = channel_monitor_offset(channel); | |
c2e5df61 | 150 | |
1cee272b GKH |
151 | return monitor_page->latency[monitor_group][monitor_offset]; |
152 | } | |
153 | ||
4947c745 GKH |
154 | static u32 channel_conn_id(struct vmbus_channel *channel, |
155 | struct hv_monitor_page *monitor_page) | |
156 | { | |
157 | u8 monitor_group = channel_monitor_group(channel); | |
158 | u8 monitor_offset = channel_monitor_offset(channel); | |
e4f2212e | 159 | |
4947c745 GKH |
160 | return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; |
161 | } | |
162 | ||
03f3a910 GKH |
163 | static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, |
164 | char *buf) | |
165 | { | |
166 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
167 | ||
168 | if (!hv_dev->channel) | |
169 | return -ENODEV; | |
170 | return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); | |
171 | } | |
172 | static DEVICE_ATTR_RO(id); | |
173 | ||
a8fb5f3d GKH |
174 | static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, |
175 | char *buf) | |
176 | { | |
177 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
178 | ||
179 | if (!hv_dev->channel) | |
180 | return -ENODEV; | |
181 | return sprintf(buf, "%d\n", hv_dev->channel->state); | |
182 | } | |
183 | static DEVICE_ATTR_RO(state); | |
184 | ||
5ffd00e2 GKH |
185 | static ssize_t monitor_id_show(struct device *dev, |
186 | struct device_attribute *dev_attr, char *buf) | |
187 | { | |
188 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
189 | ||
190 | if (!hv_dev->channel) | |
191 | return -ENODEV; | |
192 | return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); | |
193 | } | |
194 | static DEVICE_ATTR_RO(monitor_id); | |
195 | ||
68234c04 GKH |
196 | static ssize_t class_id_show(struct device *dev, |
197 | struct device_attribute *dev_attr, char *buf) | |
198 | { | |
199 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
200 | ||
201 | if (!hv_dev->channel) | |
202 | return -ENODEV; | |
203 | return sprintf(buf, "{%pUl}\n", | |
458c4475 | 204 | &hv_dev->channel->offermsg.offer.if_type); |
68234c04 GKH |
205 | } |
206 | static DEVICE_ATTR_RO(class_id); | |
207 | ||
7c55e1d0 GKH |
208 | static ssize_t device_id_show(struct device *dev, |
209 | struct device_attribute *dev_attr, char *buf) | |
210 | { | |
211 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
212 | ||
213 | if (!hv_dev->channel) | |
214 | return -ENODEV; | |
215 | return sprintf(buf, "{%pUl}\n", | |
458c4475 | 216 | &hv_dev->channel->offermsg.offer.if_instance); |
7c55e1d0 GKH |
217 | } |
218 | static DEVICE_ATTR_RO(device_id); | |
219 | ||
647fa371 GKH |
220 | static ssize_t modalias_show(struct device *dev, |
221 | struct device_attribute *dev_attr, char *buf) | |
222 | { | |
223 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
647fa371 | 224 | |
0027e3fd | 225 | return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); |
647fa371 GKH |
226 | } |
227 | static DEVICE_ATTR_RO(modalias); | |
228 | ||
7ceb1c37 SH |
229 | #ifdef CONFIG_NUMA |
230 | static ssize_t numa_node_show(struct device *dev, | |
231 | struct device_attribute *attr, char *buf) | |
232 | { | |
233 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
234 | ||
235 | if (!hv_dev->channel) | |
236 | return -ENODEV; | |
237 | ||
458d090f | 238 | return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); |
7ceb1c37 SH |
239 | } |
240 | static DEVICE_ATTR_RO(numa_node); | |
241 | #endif | |
242 | ||
76c52bbe GKH |
243 | static ssize_t server_monitor_pending_show(struct device *dev, |
244 | struct device_attribute *dev_attr, | |
245 | char *buf) | |
246 | { | |
247 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
248 | ||
249 | if (!hv_dev->channel) | |
250 | return -ENODEV; | |
251 | return sprintf(buf, "%d\n", | |
252 | channel_pending(hv_dev->channel, | |
fd8e3c35 | 253 | vmbus_connection.monitor_pages[0])); |
76c52bbe GKH |
254 | } |
255 | static DEVICE_ATTR_RO(server_monitor_pending); | |
256 | ||
257 | static ssize_t client_monitor_pending_show(struct device *dev, | |
258 | struct device_attribute *dev_attr, | |
259 | char *buf) | |
260 | { | |
261 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
262 | ||
263 | if (!hv_dev->channel) | |
264 | return -ENODEV; | |
265 | return sprintf(buf, "%d\n", | |
266 | channel_pending(hv_dev->channel, | |
267 | vmbus_connection.monitor_pages[1])); | |
268 | } | |
269 | static DEVICE_ATTR_RO(client_monitor_pending); | |
68234c04 | 270 | |
1cee272b GKH |
271 | static ssize_t server_monitor_latency_show(struct device *dev, |
272 | struct device_attribute *dev_attr, | |
273 | char *buf) | |
274 | { | |
275 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
276 | ||
277 | if (!hv_dev->channel) | |
278 | return -ENODEV; | |
279 | return sprintf(buf, "%d\n", | |
280 | channel_latency(hv_dev->channel, | |
281 | vmbus_connection.monitor_pages[0])); | |
282 | } | |
283 | static DEVICE_ATTR_RO(server_monitor_latency); | |
284 | ||
285 | static ssize_t client_monitor_latency_show(struct device *dev, | |
286 | struct device_attribute *dev_attr, | |
287 | char *buf) | |
288 | { | |
289 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
290 | ||
291 | if (!hv_dev->channel) | |
292 | return -ENODEV; | |
293 | return sprintf(buf, "%d\n", | |
294 | channel_latency(hv_dev->channel, | |
295 | vmbus_connection.monitor_pages[1])); | |
296 | } | |
297 | static DEVICE_ATTR_RO(client_monitor_latency); | |
298 | ||
4947c745 GKH |
299 | static ssize_t server_monitor_conn_id_show(struct device *dev, |
300 | struct device_attribute *dev_attr, | |
301 | char *buf) | |
302 | { | |
303 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
304 | ||
305 | if (!hv_dev->channel) | |
306 | return -ENODEV; | |
307 | return sprintf(buf, "%d\n", | |
308 | channel_conn_id(hv_dev->channel, | |
309 | vmbus_connection.monitor_pages[0])); | |
310 | } | |
311 | static DEVICE_ATTR_RO(server_monitor_conn_id); | |
312 | ||
313 | static ssize_t client_monitor_conn_id_show(struct device *dev, | |
314 | struct device_attribute *dev_attr, | |
315 | char *buf) | |
316 | { | |
317 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
318 | ||
319 | if (!hv_dev->channel) | |
320 | return -ENODEV; | |
321 | return sprintf(buf, "%d\n", | |
322 | channel_conn_id(hv_dev->channel, | |
323 | vmbus_connection.monitor_pages[1])); | |
324 | } | |
325 | static DEVICE_ATTR_RO(client_monitor_conn_id); | |
326 | ||
98f4c651 GKH |
327 | static ssize_t out_intr_mask_show(struct device *dev, |
328 | struct device_attribute *dev_attr, char *buf) | |
329 | { | |
330 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
331 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 332 | int ret; |
98f4c651 GKH |
333 | |
334 | if (!hv_dev->channel) | |
335 | return -ENODEV; | |
ba50bf1c DC |
336 | |
337 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
338 | &outbound); | |
339 | if (ret < 0) | |
340 | return ret; | |
341 | ||
98f4c651 GKH |
342 | return sprintf(buf, "%d\n", outbound.current_interrupt_mask); |
343 | } | |
344 | static DEVICE_ATTR_RO(out_intr_mask); | |
345 | ||
346 | static ssize_t out_read_index_show(struct device *dev, | |
347 | struct device_attribute *dev_attr, char *buf) | |
348 | { | |
349 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
350 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 351 | int ret; |
98f4c651 GKH |
352 | |
353 | if (!hv_dev->channel) | |
354 | return -ENODEV; | |
ba50bf1c DC |
355 | |
356 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
357 | &outbound); | |
358 | if (ret < 0) | |
359 | return ret; | |
98f4c651 GKH |
360 | return sprintf(buf, "%d\n", outbound.current_read_index); |
361 | } | |
362 | static DEVICE_ATTR_RO(out_read_index); | |
363 | ||
364 | static ssize_t out_write_index_show(struct device *dev, | |
365 | struct device_attribute *dev_attr, | |
366 | char *buf) | |
367 | { | |
368 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
369 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 370 | int ret; |
98f4c651 GKH |
371 | |
372 | if (!hv_dev->channel) | |
373 | return -ENODEV; | |
ba50bf1c DC |
374 | |
375 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
376 | &outbound); | |
377 | if (ret < 0) | |
378 | return ret; | |
98f4c651 GKH |
379 | return sprintf(buf, "%d\n", outbound.current_write_index); |
380 | } | |
381 | static DEVICE_ATTR_RO(out_write_index); | |
382 | ||
383 | static ssize_t out_read_bytes_avail_show(struct device *dev, | |
384 | struct device_attribute *dev_attr, | |
385 | char *buf) | |
386 | { | |
387 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
388 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 389 | int ret; |
98f4c651 GKH |
390 | |
391 | if (!hv_dev->channel) | |
392 | return -ENODEV; | |
ba50bf1c DC |
393 | |
394 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
395 | &outbound); | |
396 | if (ret < 0) | |
397 | return ret; | |
98f4c651 GKH |
398 | return sprintf(buf, "%d\n", outbound.bytes_avail_toread); |
399 | } | |
400 | static DEVICE_ATTR_RO(out_read_bytes_avail); | |
401 | ||
402 | static ssize_t out_write_bytes_avail_show(struct device *dev, | |
403 | struct device_attribute *dev_attr, | |
404 | char *buf) | |
405 | { | |
406 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
407 | struct hv_ring_buffer_debug_info outbound; | |
ba50bf1c | 408 | int ret; |
98f4c651 GKH |
409 | |
410 | if (!hv_dev->channel) | |
411 | return -ENODEV; | |
ba50bf1c DC |
412 | |
413 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, | |
414 | &outbound); | |
415 | if (ret < 0) | |
416 | return ret; | |
98f4c651 GKH |
417 | return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); |
418 | } | |
419 | static DEVICE_ATTR_RO(out_write_bytes_avail); | |
420 | ||
421 | static ssize_t in_intr_mask_show(struct device *dev, | |
422 | struct device_attribute *dev_attr, char *buf) | |
423 | { | |
424 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
425 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 426 | int ret; |
98f4c651 GKH |
427 | |
428 | if (!hv_dev->channel) | |
429 | return -ENODEV; | |
ba50bf1c DC |
430 | |
431 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
432 | if (ret < 0) | |
433 | return ret; | |
434 | ||
98f4c651 GKH |
435 | return sprintf(buf, "%d\n", inbound.current_interrupt_mask); |
436 | } | |
437 | static DEVICE_ATTR_RO(in_intr_mask); | |
438 | ||
439 | static ssize_t in_read_index_show(struct device *dev, | |
440 | struct device_attribute *dev_attr, char *buf) | |
441 | { | |
442 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
443 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 444 | int ret; |
98f4c651 GKH |
445 | |
446 | if (!hv_dev->channel) | |
447 | return -ENODEV; | |
ba50bf1c DC |
448 | |
449 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
450 | if (ret < 0) | |
451 | return ret; | |
452 | ||
98f4c651 GKH |
453 | return sprintf(buf, "%d\n", inbound.current_read_index); |
454 | } | |
455 | static DEVICE_ATTR_RO(in_read_index); | |
456 | ||
457 | static ssize_t in_write_index_show(struct device *dev, | |
458 | struct device_attribute *dev_attr, char *buf) | |
459 | { | |
460 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
461 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 462 | int ret; |
98f4c651 GKH |
463 | |
464 | if (!hv_dev->channel) | |
465 | return -ENODEV; | |
ba50bf1c DC |
466 | |
467 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
468 | if (ret < 0) | |
469 | return ret; | |
470 | ||
98f4c651 GKH |
471 | return sprintf(buf, "%d\n", inbound.current_write_index); |
472 | } | |
473 | static DEVICE_ATTR_RO(in_write_index); | |
474 | ||
475 | static ssize_t in_read_bytes_avail_show(struct device *dev, | |
476 | struct device_attribute *dev_attr, | |
477 | char *buf) | |
478 | { | |
479 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
480 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 481 | int ret; |
98f4c651 GKH |
482 | |
483 | if (!hv_dev->channel) | |
484 | return -ENODEV; | |
ba50bf1c DC |
485 | |
486 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
487 | if (ret < 0) | |
488 | return ret; | |
489 | ||
98f4c651 GKH |
490 | return sprintf(buf, "%d\n", inbound.bytes_avail_toread); |
491 | } | |
492 | static DEVICE_ATTR_RO(in_read_bytes_avail); | |
493 | ||
494 | static ssize_t in_write_bytes_avail_show(struct device *dev, | |
495 | struct device_attribute *dev_attr, | |
496 | char *buf) | |
497 | { | |
498 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
499 | struct hv_ring_buffer_debug_info inbound; | |
ba50bf1c | 500 | int ret; |
98f4c651 GKH |
501 | |
502 | if (!hv_dev->channel) | |
503 | return -ENODEV; | |
ba50bf1c DC |
504 | |
505 | ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); | |
506 | if (ret < 0) | |
507 | return ret; | |
508 | ||
98f4c651 GKH |
509 | return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); |
510 | } | |
511 | static DEVICE_ATTR_RO(in_write_bytes_avail); | |
512 | ||
042ab031 DC |
513 | static ssize_t channel_vp_mapping_show(struct device *dev, |
514 | struct device_attribute *dev_attr, | |
515 | char *buf) | |
516 | { | |
517 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
518 | struct vmbus_channel *channel = hv_dev->channel, *cur_sc; | |
042ab031 DC |
519 | int buf_size = PAGE_SIZE, n_written, tot_written; |
520 | struct list_head *cur; | |
521 | ||
522 | if (!channel) | |
523 | return -ENODEV; | |
524 | ||
3eb0ac86 APM |
525 | mutex_lock(&vmbus_connection.channel_mutex); |
526 | ||
042ab031 DC |
527 | tot_written = snprintf(buf, buf_size, "%u:%u\n", |
528 | channel->offermsg.child_relid, channel->target_cpu); | |
529 | ||
042ab031 DC |
530 | list_for_each(cur, &channel->sc_list) { |
531 | if (tot_written >= buf_size - 1) | |
532 | break; | |
533 | ||
534 | cur_sc = list_entry(cur, struct vmbus_channel, sc_list); | |
535 | n_written = scnprintf(buf + tot_written, | |
536 | buf_size - tot_written, | |
537 | "%u:%u\n", | |
538 | cur_sc->offermsg.child_relid, | |
539 | cur_sc->target_cpu); | |
540 | tot_written += n_written; | |
541 | } | |
542 | ||
3eb0ac86 | 543 | mutex_unlock(&vmbus_connection.channel_mutex); |
042ab031 DC |
544 | |
545 | return tot_written; | |
546 | } | |
547 | static DEVICE_ATTR_RO(channel_vp_mapping); | |
548 | ||
7047f17d S |
549 | static ssize_t vendor_show(struct device *dev, |
550 | struct device_attribute *dev_attr, | |
551 | char *buf) | |
552 | { | |
553 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
e4f2212e | 554 | |
7047f17d S |
555 | return sprintf(buf, "0x%x\n", hv_dev->vendor_id); |
556 | } | |
557 | static DEVICE_ATTR_RO(vendor); | |
558 | ||
559 | static ssize_t device_show(struct device *dev, | |
560 | struct device_attribute *dev_attr, | |
561 | char *buf) | |
562 | { | |
563 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
e4f2212e | 564 | |
7047f17d S |
565 | return sprintf(buf, "0x%x\n", hv_dev->device_id); |
566 | } | |
567 | static DEVICE_ATTR_RO(device); | |
568 | ||
d765edbb SH |
569 | static ssize_t driver_override_store(struct device *dev, |
570 | struct device_attribute *attr, | |
571 | const char *buf, size_t count) | |
572 | { | |
573 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
574 | char *driver_override, *old, *cp; | |
575 | ||
576 | /* We need to keep extra room for a newline */ | |
577 | if (count >= (PAGE_SIZE - 1)) | |
578 | return -EINVAL; | |
579 | ||
580 | driver_override = kstrndup(buf, count, GFP_KERNEL); | |
581 | if (!driver_override) | |
582 | return -ENOMEM; | |
583 | ||
584 | cp = strchr(driver_override, '\n'); | |
585 | if (cp) | |
586 | *cp = '\0'; | |
587 | ||
588 | device_lock(dev); | |
589 | old = hv_dev->driver_override; | |
590 | if (strlen(driver_override)) { | |
591 | hv_dev->driver_override = driver_override; | |
592 | } else { | |
593 | kfree(driver_override); | |
594 | hv_dev->driver_override = NULL; | |
595 | } | |
596 | device_unlock(dev); | |
597 | ||
598 | kfree(old); | |
599 | ||
600 | return count; | |
601 | } | |
602 | ||
603 | static ssize_t driver_override_show(struct device *dev, | |
604 | struct device_attribute *attr, char *buf) | |
605 | { | |
606 | struct hv_device *hv_dev = device_to_hv_device(dev); | |
607 | ssize_t len; | |
608 | ||
609 | device_lock(dev); | |
610 | len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override); | |
611 | device_unlock(dev); | |
612 | ||
613 | return len; | |
614 | } | |
615 | static DEVICE_ATTR_RW(driver_override); | |
616 | ||
98f4c651 | 617 | /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ |
fc76936d | 618 | static struct attribute *vmbus_dev_attrs[] = { |
03f3a910 | 619 | &dev_attr_id.attr, |
a8fb5f3d | 620 | &dev_attr_state.attr, |
5ffd00e2 | 621 | &dev_attr_monitor_id.attr, |
68234c04 | 622 | &dev_attr_class_id.attr, |
7c55e1d0 | 623 | &dev_attr_device_id.attr, |
647fa371 | 624 | &dev_attr_modalias.attr, |
7ceb1c37 SH |
625 | #ifdef CONFIG_NUMA |
626 | &dev_attr_numa_node.attr, | |
627 | #endif | |
76c52bbe GKH |
628 | &dev_attr_server_monitor_pending.attr, |
629 | &dev_attr_client_monitor_pending.attr, | |
1cee272b GKH |
630 | &dev_attr_server_monitor_latency.attr, |
631 | &dev_attr_client_monitor_latency.attr, | |
4947c745 GKH |
632 | &dev_attr_server_monitor_conn_id.attr, |
633 | &dev_attr_client_monitor_conn_id.attr, | |
98f4c651 GKH |
634 | &dev_attr_out_intr_mask.attr, |
635 | &dev_attr_out_read_index.attr, | |
636 | &dev_attr_out_write_index.attr, | |
637 | &dev_attr_out_read_bytes_avail.attr, | |
638 | &dev_attr_out_write_bytes_avail.attr, | |
639 | &dev_attr_in_intr_mask.attr, | |
640 | &dev_attr_in_read_index.attr, | |
641 | &dev_attr_in_write_index.attr, | |
642 | &dev_attr_in_read_bytes_avail.attr, | |
643 | &dev_attr_in_write_bytes_avail.attr, | |
042ab031 | 644 | &dev_attr_channel_vp_mapping.attr, |
7047f17d S |
645 | &dev_attr_vendor.attr, |
646 | &dev_attr_device.attr, | |
d765edbb | 647 | &dev_attr_driver_override.attr, |
03f3a910 GKH |
648 | NULL, |
649 | }; | |
46fc1548 KB |
650 | |
651 | /* | |
652 | * Device-level attribute_group callback function. Returns the permission for | |
653 | * each attribute, and returns 0 if an attribute is not visible. | |
654 | */ | |
655 | static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj, | |
656 | struct attribute *attr, int idx) | |
657 | { | |
658 | struct device *dev = kobj_to_dev(kobj); | |
659 | const struct hv_device *hv_dev = device_to_hv_device(dev); | |
660 | ||
661 | /* Hide the monitor attributes if the monitor mechanism is not used. */ | |
662 | if (!hv_dev->channel->offermsg.monitor_allocated && | |
663 | (attr == &dev_attr_monitor_id.attr || | |
664 | attr == &dev_attr_server_monitor_pending.attr || | |
665 | attr == &dev_attr_client_monitor_pending.attr || | |
666 | attr == &dev_attr_server_monitor_latency.attr || | |
667 | attr == &dev_attr_client_monitor_latency.attr || | |
668 | attr == &dev_attr_server_monitor_conn_id.attr || | |
669 | attr == &dev_attr_client_monitor_conn_id.attr)) | |
670 | return 0; | |
671 | ||
672 | return attr->mode; | |
673 | } | |
674 | ||
675 | static const struct attribute_group vmbus_dev_group = { | |
676 | .attrs = vmbus_dev_attrs, | |
677 | .is_visible = vmbus_dev_attr_is_visible | |
678 | }; | |
679 | __ATTRIBUTE_GROUPS(vmbus_dev); | |
03f3a910 | 680 | |
adde2487 S |
681 | /* |
682 | * vmbus_uevent - add uevent for our device | |
683 | * | |
684 | * This routine is invoked when a device is added or removed on the vmbus to | |
685 | * generate a uevent to udev in the userspace. The udev will then look at its | |
686 | * rule and the uevent generated here to load the appropriate driver | |
0ddda660 S |
687 | * |
688 | * The alias string will be of the form vmbus:guid where guid is the string | |
689 | * representation of the device guid (each byte of the guid will be | |
690 | * represented with two hex characters. | |
adde2487 S |
691 | */ |
692 | static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) | |
693 | { | |
694 | struct hv_device *dev = device_to_hv_device(device); | |
0027e3fd | 695 | const char *format = "MODALIAS=vmbus:%*phN"; |
0ddda660 | 696 | |
0027e3fd | 697 | return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type); |
adde2487 S |
698 | } |
699 | ||
d765edbb | 700 | static const struct hv_vmbus_device_id * |
593db803 | 701 | hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid) |
d765edbb SH |
702 | { |
703 | if (id == NULL) | |
704 | return NULL; /* empty device table */ | |
705 | ||
593db803 AS |
706 | for (; !guid_is_null(&id->guid); id++) |
707 | if (guid_equal(&id->guid, guid)) | |
d765edbb SH |
708 | return id; |
709 | ||
710 | return NULL; | |
711 | } | |
712 | ||
713 | static const struct hv_vmbus_device_id * | |
593db803 | 714 | hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid) |
3037a7b6 | 715 | { |
fc76936d SH |
716 | const struct hv_vmbus_device_id *id = NULL; |
717 | struct vmbus_dynid *dynid; | |
718 | ||
fc76936d SH |
719 | spin_lock(&drv->dynids.lock); |
720 | list_for_each_entry(dynid, &drv->dynids.list, node) { | |
593db803 | 721 | if (guid_equal(&dynid->id.guid, guid)) { |
fc76936d SH |
722 | id = &dynid->id; |
723 | break; | |
724 | } | |
725 | } | |
726 | spin_unlock(&drv->dynids.lock); | |
727 | ||
d765edbb SH |
728 | return id; |
729 | } | |
fc76936d | 730 | |
593db803 | 731 | static const struct hv_vmbus_device_id vmbus_device_null; |
fc76936d | 732 | |
d765edbb SH |
733 | /* |
734 | * Return a matching hv_vmbus_device_id pointer. | |
735 | * If there is no match, return NULL. | |
736 | */ | |
737 | static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv, | |
738 | struct hv_device *dev) | |
739 | { | |
593db803 | 740 | const guid_t *guid = &dev->dev_type; |
d765edbb | 741 | const struct hv_vmbus_device_id *id; |
3037a7b6 | 742 | |
d765edbb SH |
743 | /* When driver_override is set, only bind to the matching driver */ |
744 | if (dev->driver_override && strcmp(dev->driver_override, drv->name)) | |
745 | return NULL; | |
746 | ||
747 | /* Look at the dynamic ids first, before the static ones */ | |
748 | id = hv_vmbus_dynid_match(drv, guid); | |
749 | if (!id) | |
750 | id = hv_vmbus_dev_match(drv->id_table, guid); | |
751 | ||
752 | /* driver_override will always match, send a dummy id */ | |
753 | if (!id && dev->driver_override) | |
754 | id = &vmbus_device_null; | |
755 | ||
756 | return id; | |
3037a7b6 S |
757 | } |
758 | ||
fc76936d | 759 | /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ |
593db803 | 760 | static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) |
fc76936d SH |
761 | { |
762 | struct vmbus_dynid *dynid; | |
763 | ||
764 | dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); | |
765 | if (!dynid) | |
766 | return -ENOMEM; | |
767 | ||
768 | dynid->id.guid = *guid; | |
769 | ||
770 | spin_lock(&drv->dynids.lock); | |
771 | list_add_tail(&dynid->node, &drv->dynids.list); | |
772 | spin_unlock(&drv->dynids.lock); | |
773 | ||
774 | return driver_attach(&drv->driver); | |
775 | } | |
776 | ||
777 | static void vmbus_free_dynids(struct hv_driver *drv) | |
778 | { | |
779 | struct vmbus_dynid *dynid, *n; | |
780 | ||
781 | spin_lock(&drv->dynids.lock); | |
782 | list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { | |
783 | list_del(&dynid->node); | |
784 | kfree(dynid); | |
785 | } | |
786 | spin_unlock(&drv->dynids.lock); | |
787 | } | |
788 | ||
fc76936d SH |
789 | /* |
790 | * store_new_id - sysfs frontend to vmbus_add_dynid() | |
791 | * | |
792 | * Allow GUIDs to be added to an existing driver via sysfs. | |
793 | */ | |
794 | static ssize_t new_id_store(struct device_driver *driver, const char *buf, | |
795 | size_t count) | |
796 | { | |
797 | struct hv_driver *drv = drv_to_hv_drv(driver); | |
593db803 | 798 | guid_t guid; |
fc76936d SH |
799 | ssize_t retval; |
800 | ||
593db803 | 801 | retval = guid_parse(buf, &guid); |
31100108 AS |
802 | if (retval) |
803 | return retval; | |
fc76936d | 804 | |
d765edbb | 805 | if (hv_vmbus_dynid_match(drv, &guid)) |
fc76936d SH |
806 | return -EEXIST; |
807 | ||
808 | retval = vmbus_add_dynid(drv, &guid); | |
809 | if (retval) | |
810 | return retval; | |
811 | return count; | |
812 | } | |
813 | static DRIVER_ATTR_WO(new_id); | |
814 | ||
815 | /* | |
816 | * store_remove_id - remove a PCI device ID from this driver | |
817 | * | |
818 | * Removes a dynamic pci device ID to this driver. | |
819 | */ | |
820 | static ssize_t remove_id_store(struct device_driver *driver, const char *buf, | |
821 | size_t count) | |
822 | { | |
823 | struct hv_driver *drv = drv_to_hv_drv(driver); | |
824 | struct vmbus_dynid *dynid, *n; | |
593db803 | 825 | guid_t guid; |
31100108 | 826 | ssize_t retval; |
fc76936d | 827 | |
593db803 | 828 | retval = guid_parse(buf, &guid); |
31100108 AS |
829 | if (retval) |
830 | return retval; | |
fc76936d | 831 | |
31100108 | 832 | retval = -ENODEV; |
fc76936d SH |
833 | spin_lock(&drv->dynids.lock); |
834 | list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { | |
835 | struct hv_vmbus_device_id *id = &dynid->id; | |
836 | ||
593db803 | 837 | if (guid_equal(&id->guid, &guid)) { |
fc76936d SH |
838 | list_del(&dynid->node); |
839 | kfree(dynid); | |
840 | retval = count; | |
841 | break; | |
842 | } | |
843 | } | |
844 | spin_unlock(&drv->dynids.lock); | |
845 | ||
846 | return retval; | |
847 | } | |
848 | static DRIVER_ATTR_WO(remove_id); | |
849 | ||
850 | static struct attribute *vmbus_drv_attrs[] = { | |
851 | &driver_attr_new_id.attr, | |
852 | &driver_attr_remove_id.attr, | |
853 | NULL, | |
854 | }; | |
855 | ATTRIBUTE_GROUPS(vmbus_drv); | |
3037a7b6 | 856 | |
b7fc147b S |
857 | |
858 | /* | |
859 | * vmbus_match - Attempt to match the specified device to the specified driver | |
860 | */ | |
861 | static int vmbus_match(struct device *device, struct device_driver *driver) | |
862 | { | |
b7fc147b | 863 | struct hv_driver *drv = drv_to_hv_drv(driver); |
e8e27047 | 864 | struct hv_device *hv_dev = device_to_hv_device(device); |
b7fc147b | 865 | |
8981da32 DC |
866 | /* The hv_sock driver handles all hv_sock offers. */ |
867 | if (is_hvsock_channel(hv_dev->channel)) | |
868 | return drv->hvsock; | |
869 | ||
d765edbb | 870 | if (hv_vmbus_get_id(drv, hv_dev)) |
3037a7b6 | 871 | return 1; |
de632a2b | 872 | |
5841a829 | 873 | return 0; |
b7fc147b S |
874 | } |
875 | ||
f1f0d67b S |
876 | /* |
877 | * vmbus_probe - Add the new vmbus's child device | |
878 | */ | |
879 | static int vmbus_probe(struct device *child_device) | |
880 | { | |
881 | int ret = 0; | |
882 | struct hv_driver *drv = | |
883 | drv_to_hv_drv(child_device->driver); | |
9efd21e1 | 884 | struct hv_device *dev = device_to_hv_device(child_device); |
84946899 | 885 | const struct hv_vmbus_device_id *dev_id; |
f1f0d67b | 886 | |
d765edbb | 887 | dev_id = hv_vmbus_get_id(drv, dev); |
9efd21e1 | 888 | if (drv->probe) { |
84946899 | 889 | ret = drv->probe(dev, dev_id); |
b14a7b30 | 890 | if (ret != 0) |
0a46618d HJ |
891 | pr_err("probe failed for device %s (%d)\n", |
892 | dev_name(child_device), ret); | |
f1f0d67b | 893 | |
f1f0d67b | 894 | } else { |
0a46618d HJ |
895 | pr_err("probe not set for driver %s\n", |
896 | dev_name(child_device)); | |
6de925b1 | 897 | ret = -ENODEV; |
f1f0d67b S |
898 | } |
899 | return ret; | |
900 | } | |
901 | ||
c5dce3db S |
902 | /* |
903 | * vmbus_remove - Remove a vmbus device | |
904 | */ | |
905 | static int vmbus_remove(struct device *child_device) | |
906 | { | |
d15a0301 | 907 | struct hv_driver *drv; |
415b023a | 908 | struct hv_device *dev = device_to_hv_device(child_device); |
c5dce3db | 909 | |
d15a0301 S |
910 | if (child_device->driver) { |
911 | drv = drv_to_hv_drv(child_device->driver); | |
912 | if (drv->remove) | |
913 | drv->remove(dev); | |
d15a0301 | 914 | } |
c5dce3db S |
915 | |
916 | return 0; | |
917 | } | |
918 | ||
eb1bb259 S |
919 | |
920 | /* | |
921 | * vmbus_shutdown - Shutdown a vmbus device | |
922 | */ | |
923 | static void vmbus_shutdown(struct device *child_device) | |
924 | { | |
925 | struct hv_driver *drv; | |
ca6887fb | 926 | struct hv_device *dev = device_to_hv_device(child_device); |
eb1bb259 S |
927 | |
928 | ||
929 | /* The device may not be attached yet */ | |
930 | if (!child_device->driver) | |
931 | return; | |
932 | ||
933 | drv = drv_to_hv_drv(child_device->driver); | |
934 | ||
ca6887fb S |
935 | if (drv->shutdown) |
936 | drv->shutdown(dev); | |
eb1bb259 S |
937 | } |
938 | ||
83b50f83 | 939 | #ifdef CONFIG_PM_SLEEP |
271b2224 DC |
940 | /* |
941 | * vmbus_suspend - Suspend a vmbus device | |
942 | */ | |
943 | static int vmbus_suspend(struct device *child_device) | |
944 | { | |
945 | struct hv_driver *drv; | |
946 | struct hv_device *dev = device_to_hv_device(child_device); | |
947 | ||
948 | /* The device may not be attached yet */ | |
949 | if (!child_device->driver) | |
950 | return 0; | |
951 | ||
952 | drv = drv_to_hv_drv(child_device->driver); | |
953 | if (!drv->suspend) | |
954 | return -EOPNOTSUPP; | |
955 | ||
956 | return drv->suspend(dev); | |
957 | } | |
958 | ||
959 | /* | |
960 | * vmbus_resume - Resume a vmbus device | |
961 | */ | |
962 | static int vmbus_resume(struct device *child_device) | |
963 | { | |
964 | struct hv_driver *drv; | |
965 | struct hv_device *dev = device_to_hv_device(child_device); | |
966 | ||
967 | /* The device may not be attached yet */ | |
968 | if (!child_device->driver) | |
969 | return 0; | |
970 | ||
971 | drv = drv_to_hv_drv(child_device->driver); | |
972 | if (!drv->resume) | |
973 | return -EOPNOTSUPP; | |
974 | ||
975 | return drv->resume(dev); | |
976 | } | |
1a06d017 DC |
977 | #else |
978 | #define vmbus_suspend NULL | |
979 | #define vmbus_resume NULL | |
83b50f83 | 980 | #endif /* CONFIG_PM_SLEEP */ |
086e7a56 S |
981 | |
982 | /* | |
983 | * vmbus_device_release - Final callback release of the vmbus child device | |
984 | */ | |
985 | static void vmbus_device_release(struct device *device) | |
986 | { | |
e8e27047 | 987 | struct hv_device *hv_dev = device_to_hv_device(device); |
34c6801e | 988 | struct vmbus_channel *channel = hv_dev->channel; |
086e7a56 | 989 | |
af9ca6f9 BB |
990 | hv_debug_rm_dev_dir(hv_dev); |
991 | ||
54a66265 | 992 | mutex_lock(&vmbus_connection.channel_mutex); |
800b9329 | 993 | hv_process_channel_removal(channel); |
54a66265 | 994 | mutex_unlock(&vmbus_connection.channel_mutex); |
e8e27047 | 995 | kfree(hv_dev); |
086e7a56 S |
996 | } |
997 | ||
271b2224 | 998 | /* |
1a06d017 DC |
999 | * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm. |
1000 | * | |
1001 | * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we | |
1002 | * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there | |
1003 | * is no way to wake up a Generation-2 VM. | |
1004 | * | |
1005 | * The other 4 ops are for hibernation. | |
271b2224 | 1006 | */ |
1a06d017 | 1007 | |
271b2224 | 1008 | static const struct dev_pm_ops vmbus_pm = { |
1a06d017 DC |
1009 | .suspend_noirq = NULL, |
1010 | .resume_noirq = NULL, | |
1011 | .freeze_noirq = vmbus_suspend, | |
1012 | .thaw_noirq = vmbus_resume, | |
1013 | .poweroff_noirq = vmbus_suspend, | |
1014 | .restore_noirq = vmbus_resume, | |
271b2224 DC |
1015 | }; |
1016 | ||
454f18a9 | 1017 | /* The one and only one */ |
9adcac5c S |
1018 | static struct bus_type hv_bus = { |
1019 | .name = "vmbus", | |
1020 | .match = vmbus_match, | |
1021 | .shutdown = vmbus_shutdown, | |
1022 | .remove = vmbus_remove, | |
1023 | .probe = vmbus_probe, | |
1024 | .uevent = vmbus_uevent, | |
fc76936d SH |
1025 | .dev_groups = vmbus_dev_groups, |
1026 | .drv_groups = vmbus_drv_groups, | |
271b2224 | 1027 | .pm = &vmbus_pm, |
3e7ee490 HJ |
1028 | }; |
1029 | ||
bf6506f6 TT |
1030 | struct onmessage_work_context { |
1031 | struct work_struct work; | |
a276463b VK |
1032 | struct { |
1033 | struct hv_message_header header; | |
1034 | u8 payload[]; | |
1035 | } msg; | |
bf6506f6 TT |
1036 | }; |
1037 | ||
1038 | static void vmbus_onmessage_work(struct work_struct *work) | |
1039 | { | |
1040 | struct onmessage_work_context *ctx; | |
1041 | ||
09a19628 VK |
1042 | /* Do not process messages if we're in DISCONNECTED state */ |
1043 | if (vmbus_connection.conn_state == DISCONNECTED) | |
1044 | return; | |
1045 | ||
bf6506f6 TT |
1046 | ctx = container_of(work, struct onmessage_work_context, |
1047 | work); | |
5cc41500 VK |
1048 | vmbus_onmessage((struct vmbus_channel_message_header *) |
1049 | &ctx->msg.payload); | |
bf6506f6 TT |
1050 | kfree(ctx); |
1051 | } | |
1052 | ||
d81274aa | 1053 | void vmbus_on_msg_dpc(unsigned long data) |
36199a99 | 1054 | { |
37cdd991 SH |
1055 | struct hv_per_cpu_context *hv_cpu = (void *)data; |
1056 | void *page_addr = hv_cpu->synic_message_page; | |
36199a99 GKH |
1057 | struct hv_message *msg = (struct hv_message *)page_addr + |
1058 | VMBUS_MESSAGE_SINT; | |
652594c7 | 1059 | struct vmbus_channel_message_header *hdr; |
e6242fa0 | 1060 | const struct vmbus_channel_message_table_entry *entry; |
bf6506f6 | 1061 | struct onmessage_work_context *ctx; |
cd95aad5 | 1062 | u32 message_type = msg->header.message_type; |
36199a99 | 1063 | |
b0a284dc VK |
1064 | /* |
1065 | * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as | |
1066 | * it is being used in 'struct vmbus_channel_message_header' definition | |
1067 | * which is supposed to match hypervisor ABI. | |
1068 | */ | |
1069 | BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); | |
1070 | ||
cd95aad5 | 1071 | if (message_type == HVMSG_NONE) |
7be3e169 VK |
1072 | /* no msg */ |
1073 | return; | |
652594c7 | 1074 | |
7be3e169 | 1075 | hdr = (struct vmbus_channel_message_header *)msg->u.payload; |
652594c7 | 1076 | |
c9fe0f8f VK |
1077 | trace_vmbus_on_msg_dpc(hdr); |
1078 | ||
7be3e169 VK |
1079 | if (hdr->msgtype >= CHANNELMSG_COUNT) { |
1080 | WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); | |
1081 | goto msg_handled; | |
1082 | } | |
652594c7 | 1083 | |
ac0f7d42 VK |
1084 | if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { |
1085 | WARN_ONCE(1, "payload size is too large (%d)\n", | |
1086 | msg->header.payload_size); | |
1087 | goto msg_handled; | |
1088 | } | |
1089 | ||
7be3e169 | 1090 | entry = &channel_message_table[hdr->msgtype]; |
ddc9d357 DC |
1091 | |
1092 | if (!entry->message_handler) | |
1093 | goto msg_handled; | |
1094 | ||
52c7803f VK |
1095 | if (msg->header.payload_size < entry->min_payload_len) { |
1096 | WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", | |
1097 | hdr->msgtype, msg->header.payload_size); | |
1098 | goto msg_handled; | |
1099 | } | |
1100 | ||
7be3e169 | 1101 | if (entry->handler_type == VMHT_BLOCKING) { |
a276463b VK |
1102 | ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size, |
1103 | GFP_ATOMIC); | |
7be3e169 VK |
1104 | if (ctx == NULL) |
1105 | return; | |
652594c7 | 1106 | |
7be3e169 | 1107 | INIT_WORK(&ctx->work, vmbus_onmessage_work); |
ac0f7d42 VK |
1108 | memcpy(&ctx->msg, msg, sizeof(msg->header) + |
1109 | msg->header.payload_size); | |
652594c7 | 1110 | |
54a66265 S |
1111 | /* |
1112 | * The host can generate a rescind message while we | |
1113 | * may still be handling the original offer. We deal with | |
b9fa1b87 APM |
1114 | * this condition by relying on the synchronization provided |
1115 | * by offer_in_progress and by channel_mutex. See also the | |
1116 | * inline comments in vmbus_onoffer_rescind(). | |
54a66265 S |
1117 | */ |
1118 | switch (hdr->msgtype) { | |
1119 | case CHANNELMSG_RESCIND_CHANNELOFFER: | |
1120 | /* | |
1121 | * If we are handling the rescind message; | |
1122 | * schedule the work on the global work queue. | |
8a857c55 APM |
1123 | * |
1124 | * The OFFER message and the RESCIND message should | |
1125 | * not be handled by the same serialized work queue, | |
1126 | * because the OFFER handler may call vmbus_open(), | |
1127 | * which tries to open the channel by sending an | |
1128 | * OPEN_CHANNEL message to the host and waits for | |
1129 | * the host's response; however, if the host has | |
1130 | * rescinded the channel before it receives the | |
1131 | * OPEN_CHANNEL message, the host just silently | |
1132 | * ignores the OPEN_CHANNEL message; as a result, | |
1133 | * the guest's OFFER handler hangs for ever, if we | |
1134 | * handle the RESCIND message in the same serialized | |
1135 | * work queue: the RESCIND handler can not start to | |
1136 | * run before the OFFER handler finishes. | |
54a66265 | 1137 | */ |
b9fa1b87 | 1138 | schedule_work(&ctx->work); |
54a66265 S |
1139 | break; |
1140 | ||
1141 | case CHANNELMSG_OFFERCHANNEL: | |
b9fa1b87 APM |
1142 | /* |
1143 | * The host sends the offer message of a given channel | |
1144 | * before sending the rescind message of the same | |
1145 | * channel. These messages are sent to the guest's | |
1146 | * connect CPU; the guest then starts processing them | |
1147 | * in the tasklet handler on this CPU: | |
1148 | * | |
1149 | * VMBUS_CONNECT_CPU | |
1150 | * | |
1151 | * [vmbus_on_msg_dpc()] | |
1152 | * atomic_inc() // CHANNELMSG_OFFERCHANNEL | |
1153 | * queue_work() | |
1154 | * ... | |
1155 | * [vmbus_on_msg_dpc()] | |
1156 | * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER | |
1157 | * | |
1158 | * We rely on the memory-ordering properties of the | |
1159 | * queue_work() and schedule_work() primitives, which | |
1160 | * guarantee that the atomic increment will be visible | |
1161 | * to the CPUs which will execute the offer & rescind | |
1162 | * works by the time these works will start execution. | |
1163 | */ | |
54a66265 | 1164 | atomic_inc(&vmbus_connection.offer_in_progress); |
b9fa1b87 | 1165 | fallthrough; |
54a66265 S |
1166 | |
1167 | default: | |
1168 | queue_work(vmbus_connection.work_queue, &ctx->work); | |
1169 | } | |
7be3e169 VK |
1170 | } else |
1171 | entry->message_handler(hdr); | |
36199a99 | 1172 | |
652594c7 | 1173 | msg_handled: |
cd95aad5 | 1174 | vmbus_signal_eom(msg, message_type); |
36199a99 GKH |
1175 | } |
1176 | ||
83b50f83 | 1177 | #ifdef CONFIG_PM_SLEEP |
1f48dcf1 DC |
1178 | /* |
1179 | * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for | |
1180 | * hibernation, because hv_sock connections can not persist across hibernation. | |
1181 | */ | |
1182 | static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) | |
1183 | { | |
1184 | struct onmessage_work_context *ctx; | |
1185 | struct vmbus_channel_rescind_offer *rescind; | |
1186 | ||
1187 | WARN_ON(!is_hvsock_channel(channel)); | |
1188 | ||
1189 | /* | |
a276463b | 1190 | * Allocation size is small and the allocation should really not fail, |
1f48dcf1 DC |
1191 | * otherwise the state of the hv_sock connections ends up in limbo. |
1192 | */ | |
a276463b VK |
1193 | ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind), |
1194 | GFP_KERNEL | __GFP_NOFAIL); | |
1f48dcf1 DC |
1195 | |
1196 | /* | |
1197 | * So far, these are not really used by Linux. Just set them to the | |
1198 | * reasonable values conforming to the definitions of the fields. | |
1199 | */ | |
1200 | ctx->msg.header.message_type = 1; | |
1201 | ctx->msg.header.payload_size = sizeof(*rescind); | |
1202 | ||
1203 | /* These values are actually used by Linux. */ | |
a276463b | 1204 | rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload; |
1f48dcf1 DC |
1205 | rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; |
1206 | rescind->child_relid = channel->offermsg.child_relid; | |
1207 | ||
1208 | INIT_WORK(&ctx->work, vmbus_onmessage_work); | |
1209 | ||
b9fa1b87 | 1210 | queue_work(vmbus_connection.work_queue, &ctx->work); |
1f48dcf1 | 1211 | } |
83b50f83 | 1212 | #endif /* CONFIG_PM_SLEEP */ |
631e63a9 SH |
1213 | |
1214 | /* | |
1215 | * Schedule all channels with events pending | |
1216 | */ | |
1217 | static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) | |
1218 | { | |
1219 | unsigned long *recv_int_page; | |
1220 | u32 maxbits, relid; | |
1221 | ||
1222 | if (vmbus_proto_version < VERSION_WIN8) { | |
1223 | maxbits = MAX_NUM_CHANNELS_SUPPORTED; | |
1224 | recv_int_page = vmbus_connection.recv_int_page; | |
1225 | } else { | |
1226 | /* | |
1227 | * When the host is win8 and beyond, the event page | |
1228 | * can be directly checked to get the id of the channel | |
1229 | * that has the interrupt pending. | |
1230 | */ | |
1231 | void *page_addr = hv_cpu->synic_event_page; | |
1232 | union hv_synic_event_flags *event | |
1233 | = (union hv_synic_event_flags *)page_addr + | |
1234 | VMBUS_MESSAGE_SINT; | |
1235 | ||
1236 | maxbits = HV_EVENT_FLAGS_COUNT; | |
1237 | recv_int_page = event->flags; | |
1238 | } | |
1239 | ||
1240 | if (unlikely(!recv_int_page)) | |
1241 | return; | |
1242 | ||
1243 | for_each_set_bit(relid, recv_int_page, maxbits) { | |
9403b66e | 1244 | void (*callback_fn)(void *context); |
631e63a9 SH |
1245 | struct vmbus_channel *channel; |
1246 | ||
1247 | if (!sync_test_and_clear_bit(relid, recv_int_page)) | |
1248 | continue; | |
1249 | ||
1250 | /* Special case - vmbus channel protocol msg */ | |
1251 | if (relid == 0) | |
1252 | continue; | |
1253 | ||
8b6a877c APM |
1254 | /* |
1255 | * Pairs with the kfree_rcu() in vmbus_chan_release(). | |
1256 | * Guarantees that the channel data structure doesn't | |
1257 | * get freed while the channel pointer below is being | |
1258 | * dereferenced. | |
1259 | */ | |
8200f208 SH |
1260 | rcu_read_lock(); |
1261 | ||
631e63a9 | 1262 | /* Find channel based on relid */ |
8b6a877c APM |
1263 | channel = relid2channel(relid); |
1264 | if (channel == NULL) | |
1265 | goto sched_unlock_rcu; | |
b71e3282 | 1266 | |
8b6a877c APM |
1267 | if (channel->rescind) |
1268 | goto sched_unlock_rcu; | |
6f3d791f | 1269 | |
9403b66e APM |
1270 | /* |
1271 | * Make sure that the ring buffer data structure doesn't get | |
1272 | * freed while we dereference the ring buffer pointer. Test | |
1273 | * for the channel's onchannel_callback being NULL within a | |
1274 | * sched_lock critical section. See also the inline comments | |
1275 | * in vmbus_reset_channel_cb(). | |
1276 | */ | |
1277 | spin_lock(&channel->sched_lock); | |
991f8f1c | 1278 | |
9403b66e APM |
1279 | callback_fn = channel->onchannel_callback; |
1280 | if (unlikely(callback_fn == NULL)) | |
1281 | goto sched_unlock; | |
6981fbf3 | 1282 | |
8b6a877c | 1283 | trace_vmbus_chan_sched(channel); |
b71e3282 | 1284 | |
8b6a877c | 1285 | ++channel->interrupts; |
6981fbf3 | 1286 | |
8b6a877c APM |
1287 | switch (channel->callback_mode) { |
1288 | case HV_CALL_ISR: | |
9403b66e | 1289 | (*callback_fn)(channel->channel_callback_context); |
8b6a877c | 1290 | break; |
b71e3282 | 1291 | |
8b6a877c APM |
1292 | case HV_CALL_BATCHED: |
1293 | hv_begin_read(&channel->inbound); | |
1294 | fallthrough; | |
1295 | case HV_CALL_DIRECT: | |
1296 | tasklet_schedule(&channel->callback_event); | |
631e63a9 | 1297 | } |
8200f208 | 1298 | |
9403b66e APM |
1299 | sched_unlock: |
1300 | spin_unlock(&channel->sched_lock); | |
8b6a877c | 1301 | sched_unlock_rcu: |
8200f208 | 1302 | rcu_read_unlock(); |
631e63a9 SH |
1303 | } |
1304 | } | |
1305 | ||
76d388cd | 1306 | static void vmbus_isr(void) |
36199a99 | 1307 | { |
37cdd991 SH |
1308 | struct hv_per_cpu_context *hv_cpu |
1309 | = this_cpu_ptr(hv_context.cpu_context); | |
1310 | void *page_addr = hv_cpu->synic_event_page; | |
36199a99 GKH |
1311 | struct hv_message *msg; |
1312 | union hv_synic_event_flags *event; | |
ae4636e6 | 1313 | bool handled = false; |
36199a99 | 1314 | |
37cdd991 | 1315 | if (unlikely(page_addr == NULL)) |
76d388cd | 1316 | return; |
5ab05951 S |
1317 | |
1318 | event = (union hv_synic_event_flags *)page_addr + | |
1319 | VMBUS_MESSAGE_SINT; | |
7341d908 S |
1320 | /* |
1321 | * Check for events before checking for messages. This is the order | |
1322 | * in which events and messages are checked in Windows guests on | |
1323 | * Hyper-V, and the Windows team suggested we do the same. | |
1324 | */ | |
36199a99 | 1325 | |
6552ecd7 S |
1326 | if ((vmbus_proto_version == VERSION_WS2008) || |
1327 | (vmbus_proto_version == VERSION_WIN7)) { | |
36199a99 | 1328 | |
6552ecd7 | 1329 | /* Since we are a child, we only need to check bit 0 */ |
5c1bec61 | 1330 | if (sync_test_and_clear_bit(0, event->flags)) |
6552ecd7 | 1331 | handled = true; |
6552ecd7 S |
1332 | } else { |
1333 | /* | |
1334 | * Our host is win8 or above. The signaling mechanism | |
1335 | * has changed and we can directly look at the event page. | |
1336 | * If bit n is set then we have an interrup on the channel | |
1337 | * whose id is n. | |
1338 | */ | |
ae4636e6 | 1339 | handled = true; |
ae4636e6 | 1340 | } |
793be9c7 | 1341 | |
6552ecd7 | 1342 | if (handled) |
631e63a9 | 1343 | vmbus_chan_sched(hv_cpu); |
6552ecd7 | 1344 | |
37cdd991 | 1345 | page_addr = hv_cpu->synic_message_page; |
7341d908 S |
1346 | msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; |
1347 | ||
1348 | /* Check if there are actual msgs to be processed */ | |
4061ed9e | 1349 | if (msg->header.message_type != HVMSG_NONE) { |
fd1fea68 MK |
1350 | if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { |
1351 | hv_stimer0_isr(); | |
1352 | vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); | |
1353 | } else | |
37cdd991 | 1354 | tasklet_schedule(&hv_cpu->msg_dpc); |
4061ed9e | 1355 | } |
4b44f2d1 | 1356 | |
626b901f | 1357 | add_interrupt_randomness(hv_get_vector(), 0); |
793be9c7 S |
1358 | } |
1359 | ||
81b18bce SM |
1360 | /* |
1361 | * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg | |
1362 | * buffer and call into Hyper-V to transfer the data. | |
1363 | */ | |
1364 | static void hv_kmsg_dump(struct kmsg_dumper *dumper, | |
1365 | enum kmsg_dump_reason reason) | |
1366 | { | |
1367 | size_t bytes_written; | |
1368 | phys_addr_t panic_pa; | |
1369 | ||
1370 | /* We are only interested in panics. */ | |
1371 | if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) | |
1372 | return; | |
1373 | ||
1374 | panic_pa = virt_to_phys(hv_panic_page); | |
1375 | ||
1376 | /* | |
1377 | * Write dump contents to the page. No need to synchronize; panic should | |
1378 | * be single-threaded. | |
1379 | */ | |
77b48bea | 1380 | kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, |
ddcaf3ca SM |
1381 | &bytes_written); |
1382 | if (bytes_written) | |
1383 | hyperv_report_panic_msg(panic_pa, bytes_written); | |
81b18bce SM |
1384 | } |
1385 | ||
1386 | static struct kmsg_dumper hv_kmsg_dumper = { | |
1387 | .dump = hv_kmsg_dump, | |
1388 | }; | |
1389 | ||
b0c03eff MC |
1390 | static void hv_kmsg_dump_register(void) |
1391 | { | |
1392 | int ret; | |
1393 | ||
1394 | hv_panic_page = hv_alloc_hyperv_zeroed_page(); | |
1395 | if (!hv_panic_page) { | |
1396 | pr_err("Hyper-V: panic message page memory allocation failed\n"); | |
1397 | return; | |
1398 | } | |
1399 | ||
1400 | ret = kmsg_dump_register(&hv_kmsg_dumper); | |
1401 | if (ret) { | |
1402 | pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); | |
1403 | hv_free_hyperv_page((unsigned long)hv_panic_page); | |
1404 | hv_panic_page = NULL; | |
1405 | } | |
1406 | } | |
1407 | ||
81b18bce | 1408 | static struct ctl_table_header *hv_ctl_table_hdr; |
81b18bce SM |
1409 | |
1410 | /* | |
1411 | * sysctl option to allow the user to control whether kmsg data should be | |
1412 | * reported to Hyper-V on panic. | |
1413 | */ | |
1414 | static struct ctl_table hv_ctl_table[] = { | |
1415 | { | |
1416 | .procname = "hyperv_record_panic_msg", | |
1417 | .data = &sysctl_record_panic_msg, | |
1418 | .maxlen = sizeof(int), | |
1419 | .mode = 0644, | |
1420 | .proc_handler = proc_dointvec_minmax, | |
eec4844f MC |
1421 | .extra1 = SYSCTL_ZERO, |
1422 | .extra2 = SYSCTL_ONE | |
81b18bce SM |
1423 | }, |
1424 | {} | |
1425 | }; | |
1426 | ||
1427 | static struct ctl_table hv_root_table[] = { | |
1428 | { | |
1429 | .procname = "kernel", | |
1430 | .mode = 0555, | |
1431 | .child = hv_ctl_table | |
1432 | }, | |
1433 | {} | |
1434 | }; | |
e513229b | 1435 | |
3e189519 | 1436 | /* |
90c9960e GKH |
1437 | * vmbus_bus_init -Main vmbus driver initialization routine. |
1438 | * | |
1439 | * Here, we | |
0686e4f4 | 1440 | * - initialize the vmbus driver context |
0686e4f4 | 1441 | * - invoke the vmbus hv main init routine |
0686e4f4 | 1442 | * - retrieve the channel offers |
90c9960e | 1443 | */ |
efc26722 | 1444 | static int vmbus_bus_init(void) |
3e7ee490 | 1445 | { |
90c9960e | 1446 | int ret; |
3e7ee490 | 1447 | |
6d26e38f | 1448 | ret = hv_init(); |
90c9960e | 1449 | if (ret != 0) { |
0a46618d | 1450 | pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); |
d6c1c5de | 1451 | return ret; |
3e7ee490 HJ |
1452 | } |
1453 | ||
9adcac5c | 1454 | ret = bus_register(&hv_bus); |
d6c1c5de | 1455 | if (ret) |
d6f3609d | 1456 | return ret; |
3e7ee490 | 1457 | |
626b901f MK |
1458 | ret = hv_setup_vmbus_irq(vmbus_irq, vmbus_isr); |
1459 | if (ret) | |
1460 | goto err_setup; | |
3e7ee490 | 1461 | |
2608fb65 JW |
1462 | ret = hv_synic_alloc(); |
1463 | if (ret) | |
1464 | goto err_alloc; | |
fd1fea68 | 1465 | |
800b6902 | 1466 | /* |
fd1fea68 MK |
1467 | * Initialize the per-cpu interrupt state and stimer state. |
1468 | * Then connect to the host. | |
800b6902 | 1469 | */ |
4a5f3cde | 1470 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", |
76d36ab7 VK |
1471 | hv_synic_init, hv_synic_cleanup); |
1472 | if (ret < 0) | |
fd1fea68 | 1473 | goto err_cpuhp; |
76d36ab7 VK |
1474 | hyperv_cpuhp_online = ret; |
1475 | ||
800b6902 | 1476 | ret = vmbus_connect(); |
8b9987e9 | 1477 | if (ret) |
17efbee8 | 1478 | goto err_connect; |
800b6902 | 1479 | |
96c1d058 NM |
1480 | /* |
1481 | * Only register if the crash MSRs are available | |
1482 | */ | |
cc2dd402 | 1483 | if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { |
81b18bce SM |
1484 | u64 hyperv_crash_ctl; |
1485 | /* | |
1486 | * Sysctl registration is not fatal, since by default | |
1487 | * reporting is enabled. | |
1488 | */ | |
1489 | hv_ctl_table_hdr = register_sysctl_table(hv_root_table); | |
1490 | if (!hv_ctl_table_hdr) | |
1491 | pr_err("Hyper-V: sysctl table register error"); | |
1492 | ||
1493 | /* | |
1494 | * Register for panic kmsg callback only if the right | |
1495 | * capability is supported by the hypervisor. | |
1496 | */ | |
9d9c9656 | 1497 | hv_get_crash_ctl(hyperv_crash_ctl); |
b0c03eff MC |
1498 | if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) |
1499 | hv_kmsg_dump_register(); | |
81b18bce | 1500 | |
510f7aef | 1501 | register_die_notifier(&hyperv_die_block); |
96c1d058 NM |
1502 | } |
1503 | ||
74347a99 TL |
1504 | /* |
1505 | * Always register the panic notifier because we need to unload | |
1506 | * the VMbus channel connection to prevent any VMbus | |
1507 | * activity after the VM panics. | |
1508 | */ | |
1509 | atomic_notifier_chain_register(&panic_notifier_list, | |
1510 | &hyperv_panic_block); | |
1511 | ||
2d6e882b | 1512 | vmbus_request_offers(); |
8b5d6d3b | 1513 | |
d6c1c5de | 1514 | return 0; |
8b9987e9 | 1515 | |
17efbee8 | 1516 | err_connect: |
76d36ab7 | 1517 | cpuhp_remove_state(hyperv_cpuhp_online); |
fd1fea68 | 1518 | err_cpuhp: |
2608fb65 | 1519 | hv_synic_free(); |
4df4cb9e | 1520 | err_alloc: |
76d388cd | 1521 | hv_remove_vmbus_irq(); |
626b901f | 1522 | err_setup: |
8b9987e9 | 1523 | bus_unregister(&hv_bus); |
8afc06dd SM |
1524 | unregister_sysctl_table(hv_ctl_table_hdr); |
1525 | hv_ctl_table_hdr = NULL; | |
8b9987e9 | 1526 | return ret; |
3e7ee490 HJ |
1527 | } |
1528 | ||
90c9960e | 1529 | /** |
35464483 JO |
1530 | * __vmbus_child_driver_register() - Register a vmbus's driver |
1531 | * @hv_driver: Pointer to driver structure you want to register | |
768fa219 GKH |
1532 | * @owner: owner module of the drv |
1533 | * @mod_name: module name string | |
3e189519 HJ |
1534 | * |
1535 | * Registers the given driver with Linux through the 'driver_register()' call | |
768fa219 | 1536 | * and sets up the hyper-v vmbus handling for this driver. |
3e189519 HJ |
1537 | * It will return the state of the 'driver_register()' call. |
1538 | * | |
90c9960e | 1539 | */ |
768fa219 | 1540 | int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) |
3e7ee490 | 1541 | { |
5d48a1c2 | 1542 | int ret; |
3e7ee490 | 1543 | |
768fa219 | 1544 | pr_info("registering driver %s\n", hv_driver->name); |
3e7ee490 | 1545 | |
cf6a2eac S |
1546 | ret = vmbus_exists(); |
1547 | if (ret < 0) | |
1548 | return ret; | |
1549 | ||
768fa219 GKH |
1550 | hv_driver->driver.name = hv_driver->name; |
1551 | hv_driver->driver.owner = owner; | |
1552 | hv_driver->driver.mod_name = mod_name; | |
1553 | hv_driver->driver.bus = &hv_bus; | |
3e7ee490 | 1554 | |
fc76936d SH |
1555 | spin_lock_init(&hv_driver->dynids.lock); |
1556 | INIT_LIST_HEAD(&hv_driver->dynids.list); | |
1557 | ||
768fa219 | 1558 | ret = driver_register(&hv_driver->driver); |
3e7ee490 | 1559 | |
5d48a1c2 | 1560 | return ret; |
3e7ee490 | 1561 | } |
768fa219 | 1562 | EXPORT_SYMBOL_GPL(__vmbus_driver_register); |
3e7ee490 | 1563 | |
90c9960e | 1564 | /** |
768fa219 | 1565 | * vmbus_driver_unregister() - Unregister a vmbus's driver |
35464483 JO |
1566 | * @hv_driver: Pointer to driver structure you want to |
1567 | * un-register | |
3e189519 | 1568 | * |
768fa219 GKH |
1569 | * Un-register the given driver that was previous registered with a call to |
1570 | * vmbus_driver_register() | |
90c9960e | 1571 | */ |
768fa219 | 1572 | void vmbus_driver_unregister(struct hv_driver *hv_driver) |
3e7ee490 | 1573 | { |
768fa219 | 1574 | pr_info("unregistering driver %s\n", hv_driver->name); |
3e7ee490 | 1575 | |
fc76936d | 1576 | if (!vmbus_exists()) { |
8f257a14 | 1577 | driver_unregister(&hv_driver->driver); |
fc76936d SH |
1578 | vmbus_free_dynids(hv_driver); |
1579 | } | |
3e7ee490 | 1580 | } |
768fa219 | 1581 | EXPORT_SYMBOL_GPL(vmbus_driver_unregister); |
3e7ee490 | 1582 | |
c2e5df61 SH |
1583 | |
1584 | /* | |
1585 | * Called when last reference to channel is gone. | |
1586 | */ | |
1587 | static void vmbus_chan_release(struct kobject *kobj) | |
1588 | { | |
1589 | struct vmbus_channel *channel | |
1590 | = container_of(kobj, struct vmbus_channel, kobj); | |
1591 | ||
1592 | kfree_rcu(channel, rcu); | |
1593 | } | |
1594 | ||
1595 | struct vmbus_chan_attribute { | |
1596 | struct attribute attr; | |
14948e39 | 1597 | ssize_t (*show)(struct vmbus_channel *chan, char *buf); |
c2e5df61 SH |
1598 | ssize_t (*store)(struct vmbus_channel *chan, |
1599 | const char *buf, size_t count); | |
1600 | }; | |
1601 | #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ | |
1602 | struct vmbus_chan_attribute chan_attr_##_name \ | |
1603 | = __ATTR(_name, _mode, _show, _store) | |
1604 | #define VMBUS_CHAN_ATTR_RW(_name) \ | |
1605 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) | |
1606 | #define VMBUS_CHAN_ATTR_RO(_name) \ | |
1607 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) | |
1608 | #define VMBUS_CHAN_ATTR_WO(_name) \ | |
1609 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) | |
1610 | ||
1611 | static ssize_t vmbus_chan_attr_show(struct kobject *kobj, | |
1612 | struct attribute *attr, char *buf) | |
1613 | { | |
1614 | const struct vmbus_chan_attribute *attribute | |
1615 | = container_of(attr, struct vmbus_chan_attribute, attr); | |
14948e39 | 1616 | struct vmbus_channel *chan |
c2e5df61 SH |
1617 | = container_of(kobj, struct vmbus_channel, kobj); |
1618 | ||
1619 | if (!attribute->show) | |
1620 | return -EIO; | |
1621 | ||
1622 | return attribute->show(chan, buf); | |
1623 | } | |
1624 | ||
75278105 APM |
1625 | static ssize_t vmbus_chan_attr_store(struct kobject *kobj, |
1626 | struct attribute *attr, const char *buf, | |
1627 | size_t count) | |
1628 | { | |
1629 | const struct vmbus_chan_attribute *attribute | |
1630 | = container_of(attr, struct vmbus_chan_attribute, attr); | |
1631 | struct vmbus_channel *chan | |
1632 | = container_of(kobj, struct vmbus_channel, kobj); | |
1633 | ||
1634 | if (!attribute->store) | |
1635 | return -EIO; | |
1636 | ||
1637 | return attribute->store(chan, buf, count); | |
1638 | } | |
1639 | ||
c2e5df61 SH |
1640 | static const struct sysfs_ops vmbus_chan_sysfs_ops = { |
1641 | .show = vmbus_chan_attr_show, | |
75278105 | 1642 | .store = vmbus_chan_attr_store, |
c2e5df61 SH |
1643 | }; |
1644 | ||
14948e39 | 1645 | static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1646 | { |
14948e39 KB |
1647 | struct hv_ring_buffer_info *rbi = &channel->outbound; |
1648 | ssize_t ret; | |
c2e5df61 | 1649 | |
14948e39 KB |
1650 | mutex_lock(&rbi->ring_buffer_mutex); |
1651 | if (!rbi->ring_buffer) { | |
1652 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1653 | return -EINVAL; |
14948e39 | 1654 | } |
fcedbb29 | 1655 | |
14948e39 KB |
1656 | ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); |
1657 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1658 | return ret; | |
c2e5df61 | 1659 | } |
875c362b | 1660 | static VMBUS_CHAN_ATTR_RO(out_mask); |
c2e5df61 | 1661 | |
14948e39 | 1662 | static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1663 | { |
14948e39 KB |
1664 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1665 | ssize_t ret; | |
c2e5df61 | 1666 | |
14948e39 KB |
1667 | mutex_lock(&rbi->ring_buffer_mutex); |
1668 | if (!rbi->ring_buffer) { | |
1669 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1670 | return -EINVAL; |
14948e39 | 1671 | } |
fcedbb29 | 1672 | |
14948e39 KB |
1673 | ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); |
1674 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1675 | return ret; | |
c2e5df61 | 1676 | } |
875c362b | 1677 | static VMBUS_CHAN_ATTR_RO(in_mask); |
c2e5df61 | 1678 | |
14948e39 | 1679 | static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1680 | { |
14948e39 KB |
1681 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1682 | ssize_t ret; | |
c2e5df61 | 1683 | |
14948e39 KB |
1684 | mutex_lock(&rbi->ring_buffer_mutex); |
1685 | if (!rbi->ring_buffer) { | |
1686 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1687 | return -EINVAL; |
14948e39 | 1688 | } |
fcedbb29 | 1689 | |
14948e39 KB |
1690 | ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi)); |
1691 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1692 | return ret; | |
c2e5df61 | 1693 | } |
875c362b | 1694 | static VMBUS_CHAN_ATTR_RO(read_avail); |
c2e5df61 | 1695 | |
14948e39 | 1696 | static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 | 1697 | { |
14948e39 KB |
1698 | struct hv_ring_buffer_info *rbi = &channel->outbound; |
1699 | ssize_t ret; | |
c2e5df61 | 1700 | |
14948e39 KB |
1701 | mutex_lock(&rbi->ring_buffer_mutex); |
1702 | if (!rbi->ring_buffer) { | |
1703 | mutex_unlock(&rbi->ring_buffer_mutex); | |
fcedbb29 | 1704 | return -EINVAL; |
14948e39 | 1705 | } |
fcedbb29 | 1706 | |
14948e39 KB |
1707 | ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); |
1708 | mutex_unlock(&rbi->ring_buffer_mutex); | |
1709 | return ret; | |
c2e5df61 | 1710 | } |
875c362b | 1711 | static VMBUS_CHAN_ATTR_RO(write_avail); |
c2e5df61 | 1712 | |
75278105 | 1713 | static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) |
c2e5df61 SH |
1714 | { |
1715 | return sprintf(buf, "%u\n", channel->target_cpu); | |
1716 | } | |
75278105 APM |
1717 | static ssize_t target_cpu_store(struct vmbus_channel *channel, |
1718 | const char *buf, size_t count) | |
1719 | { | |
afaa33da | 1720 | u32 target_cpu, origin_cpu; |
75278105 | 1721 | ssize_t ret = count; |
75278105 APM |
1722 | |
1723 | if (vmbus_proto_version < VERSION_WIN10_V4_1) | |
1724 | return -EIO; | |
1725 | ||
1726 | if (sscanf(buf, "%uu", &target_cpu) != 1) | |
1727 | return -EIO; | |
1728 | ||
1729 | /* Validate target_cpu for the cpumask_test_cpu() operation below. */ | |
1730 | if (target_cpu >= nr_cpumask_bits) | |
1731 | return -EINVAL; | |
1732 | ||
1733 | /* No CPUs should come up or down during this. */ | |
1734 | cpus_read_lock(); | |
1735 | ||
0a968209 | 1736 | if (!cpu_online(target_cpu)) { |
75278105 APM |
1737 | cpus_read_unlock(); |
1738 | return -EINVAL; | |
1739 | } | |
1740 | ||
1741 | /* | |
1742 | * Synchronizes target_cpu_store() and channel closure: | |
1743 | * | |
1744 | * { Initially: state = CHANNEL_OPENED } | |
1745 | * | |
1746 | * CPU1 CPU2 | |
1747 | * | |
1748 | * [target_cpu_store()] [vmbus_disconnect_ring()] | |
1749 | * | |
1750 | * LOCK channel_mutex LOCK channel_mutex | |
1751 | * LOAD r1 = state LOAD r2 = state | |
1752 | * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) | |
1753 | * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN | |
1754 | * [...] SEND CLOSECHANNEL | |
1755 | * UNLOCK channel_mutex UNLOCK channel_mutex | |
1756 | * | |
1757 | * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes | |
1758 | * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND | |
1759 | * | |
1760 | * Note. The host processes the channel messages "sequentially", in | |
1761 | * the order in which they are received on a per-partition basis. | |
1762 | */ | |
1763 | mutex_lock(&vmbus_connection.channel_mutex); | |
1764 | ||
1765 | /* | |
1766 | * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; | |
1767 | * avoid sending the message and fail here for such channels. | |
1768 | */ | |
1769 | if (channel->state != CHANNEL_OPENED_STATE) { | |
1770 | ret = -EIO; | |
1771 | goto cpu_store_unlock; | |
1772 | } | |
1773 | ||
afaa33da APM |
1774 | origin_cpu = channel->target_cpu; |
1775 | if (target_cpu == origin_cpu) | |
75278105 APM |
1776 | goto cpu_store_unlock; |
1777 | ||
1778 | if (vmbus_send_modifychannel(channel->offermsg.child_relid, | |
1779 | hv_cpu_number_to_vp_number(target_cpu))) { | |
1780 | ret = -EIO; | |
1781 | goto cpu_store_unlock; | |
1782 | } | |
1783 | ||
1784 | /* | |
1785 | * Warning. At this point, there is *no* guarantee that the host will | |
1786 | * have successfully processed the vmbus_send_modifychannel() request. | |
1787 | * See the header comment of vmbus_send_modifychannel() for more info. | |
1788 | * | |
1789 | * Lags in the processing of the above vmbus_send_modifychannel() can | |
1790 | * result in missed interrupts if the "old" target CPU is taken offline | |
1791 | * before Hyper-V starts sending interrupts to the "new" target CPU. | |
1792 | * But apart from this offlining scenario, the code tolerates such | |
1793 | * lags. It will function correctly even if a channel interrupt comes | |
1794 | * in on a CPU that is different from the channel target_cpu value. | |
1795 | */ | |
1796 | ||
1797 | channel->target_cpu = target_cpu; | |
75278105 | 1798 | |
afaa33da APM |
1799 | /* See init_vp_index(). */ |
1800 | if (hv_is_perf_channel(channel)) | |
1801 | hv_update_alloced_cpus(origin_cpu, target_cpu); | |
1802 | ||
1803 | /* Currently set only for storvsc channels. */ | |
1804 | if (channel->change_target_cpu_callback) { | |
1805 | (*channel->change_target_cpu_callback)(channel, | |
1806 | origin_cpu, target_cpu); | |
1807 | } | |
1808 | ||
75278105 APM |
1809 | cpu_store_unlock: |
1810 | mutex_unlock(&vmbus_connection.channel_mutex); | |
1811 | cpus_read_unlock(); | |
1812 | return ret; | |
1813 | } | |
1814 | static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); | |
c2e5df61 | 1815 | |
14948e39 | 1816 | static ssize_t channel_pending_show(struct vmbus_channel *channel, |
c2e5df61 SH |
1817 | char *buf) |
1818 | { | |
1819 | return sprintf(buf, "%d\n", | |
1820 | channel_pending(channel, | |
1821 | vmbus_connection.monitor_pages[1])); | |
1822 | } | |
f0434de4 | 1823 | static VMBUS_CHAN_ATTR(pending, 0444, channel_pending_show, NULL); |
c2e5df61 | 1824 | |
14948e39 | 1825 | static ssize_t channel_latency_show(struct vmbus_channel *channel, |
c2e5df61 SH |
1826 | char *buf) |
1827 | { | |
1828 | return sprintf(buf, "%d\n", | |
1829 | channel_latency(channel, | |
1830 | vmbus_connection.monitor_pages[1])); | |
1831 | } | |
f0434de4 | 1832 | static VMBUS_CHAN_ATTR(latency, 0444, channel_latency_show, NULL); |
c2e5df61 | 1833 | |
14948e39 | 1834 | static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) |
6981fbf3 SH |
1835 | { |
1836 | return sprintf(buf, "%llu\n", channel->interrupts); | |
1837 | } | |
f0434de4 | 1838 | static VMBUS_CHAN_ATTR(interrupts, 0444, channel_interrupts_show, NULL); |
6981fbf3 | 1839 | |
14948e39 | 1840 | static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) |
6981fbf3 SH |
1841 | { |
1842 | return sprintf(buf, "%llu\n", channel->sig_events); | |
1843 | } | |
f0434de4 | 1844 | static VMBUS_CHAN_ATTR(events, 0444, channel_events_show, NULL); |
6981fbf3 | 1845 | |
14948e39 | 1846 | static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, |
396ae57e KB |
1847 | char *buf) |
1848 | { | |
1849 | return sprintf(buf, "%llu\n", | |
1850 | (unsigned long long)channel->intr_in_full); | |
1851 | } | |
1852 | static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL); | |
1853 | ||
14948e39 | 1854 | static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel, |
396ae57e KB |
1855 | char *buf) |
1856 | { | |
1857 | return sprintf(buf, "%llu\n", | |
1858 | (unsigned long long)channel->intr_out_empty); | |
1859 | } | |
1860 | static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL); | |
1861 | ||
14948e39 | 1862 | static ssize_t channel_out_full_first_show(struct vmbus_channel *channel, |
396ae57e KB |
1863 | char *buf) |
1864 | { | |
1865 | return sprintf(buf, "%llu\n", | |
1866 | (unsigned long long)channel->out_full_first); | |
1867 | } | |
1868 | static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL); | |
1869 | ||
14948e39 | 1870 | static ssize_t channel_out_full_total_show(struct vmbus_channel *channel, |
396ae57e KB |
1871 | char *buf) |
1872 | { | |
1873 | return sprintf(buf, "%llu\n", | |
1874 | (unsigned long long)channel->out_full_total); | |
1875 | } | |
1876 | static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL); | |
1877 | ||
14948e39 | 1878 | static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, |
f0fa2974 SH |
1879 | char *buf) |
1880 | { | |
1881 | return sprintf(buf, "%u\n", channel->offermsg.monitorid); | |
1882 | } | |
f0434de4 | 1883 | static VMBUS_CHAN_ATTR(monitor_id, 0444, subchannel_monitor_id_show, NULL); |
f0fa2974 | 1884 | |
14948e39 | 1885 | static ssize_t subchannel_id_show(struct vmbus_channel *channel, |
f0fa2974 SH |
1886 | char *buf) |
1887 | { | |
1888 | return sprintf(buf, "%u\n", | |
1889 | channel->offermsg.offer.sub_channel_index); | |
1890 | } | |
1891 | static VMBUS_CHAN_ATTR_RO(subchannel_id); | |
1892 | ||
c2e5df61 SH |
1893 | static struct attribute *vmbus_chan_attrs[] = { |
1894 | &chan_attr_out_mask.attr, | |
1895 | &chan_attr_in_mask.attr, | |
1896 | &chan_attr_read_avail.attr, | |
1897 | &chan_attr_write_avail.attr, | |
1898 | &chan_attr_cpu.attr, | |
1899 | &chan_attr_pending.attr, | |
1900 | &chan_attr_latency.attr, | |
6981fbf3 SH |
1901 | &chan_attr_interrupts.attr, |
1902 | &chan_attr_events.attr, | |
396ae57e KB |
1903 | &chan_attr_intr_in_full.attr, |
1904 | &chan_attr_intr_out_empty.attr, | |
1905 | &chan_attr_out_full_first.attr, | |
1906 | &chan_attr_out_full_total.attr, | |
f0fa2974 SH |
1907 | &chan_attr_monitor_id.attr, |
1908 | &chan_attr_subchannel_id.attr, | |
c2e5df61 SH |
1909 | NULL |
1910 | }; | |
1911 | ||
46fc1548 KB |
1912 | /* |
1913 | * Channel-level attribute_group callback function. Returns the permission for | |
1914 | * each attribute, and returns 0 if an attribute is not visible. | |
1915 | */ | |
1916 | static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, | |
1917 | struct attribute *attr, int idx) | |
1918 | { | |
1919 | const struct vmbus_channel *channel = | |
1920 | container_of(kobj, struct vmbus_channel, kobj); | |
1921 | ||
1922 | /* Hide the monitor attributes if the monitor mechanism is not used. */ | |
1923 | if (!channel->offermsg.monitor_allocated && | |
1924 | (attr == &chan_attr_pending.attr || | |
1925 | attr == &chan_attr_latency.attr || | |
1926 | attr == &chan_attr_monitor_id.attr)) | |
1927 | return 0; | |
1928 | ||
1929 | return attr->mode; | |
1930 | } | |
1931 | ||
1932 | static struct attribute_group vmbus_chan_group = { | |
1933 | .attrs = vmbus_chan_attrs, | |
1934 | .is_visible = vmbus_chan_attr_is_visible | |
1935 | }; | |
1936 | ||
c2e5df61 SH |
1937 | static struct kobj_type vmbus_chan_ktype = { |
1938 | .sysfs_ops = &vmbus_chan_sysfs_ops, | |
1939 | .release = vmbus_chan_release, | |
c2e5df61 SH |
1940 | }; |
1941 | ||
1942 | /* | |
1943 | * vmbus_add_channel_kobj - setup a sub-directory under device/channels | |
1944 | */ | |
1945 | int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) | |
1946 | { | |
46fc1548 | 1947 | const struct device *device = &dev->device; |
c2e5df61 SH |
1948 | struct kobject *kobj = &channel->kobj; |
1949 | u32 relid = channel->offermsg.child_relid; | |
1950 | int ret; | |
1951 | ||
1952 | kobj->kset = dev->channels_kset; | |
1953 | ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, | |
1954 | "%u", relid); | |
1955 | if (ret) | |
1956 | return ret; | |
1957 | ||
46fc1548 KB |
1958 | ret = sysfs_create_group(kobj, &vmbus_chan_group); |
1959 | ||
1960 | if (ret) { | |
1961 | /* | |
1962 | * The calling functions' error handling paths will cleanup the | |
1963 | * empty channel directory. | |
1964 | */ | |
1965 | dev_err(device, "Unable to set up channel sysfs files\n"); | |
1966 | return ret; | |
1967 | } | |
1968 | ||
c2e5df61 SH |
1969 | kobject_uevent(kobj, KOBJ_ADD); |
1970 | ||
1971 | return 0; | |
1972 | } | |
1973 | ||
46fc1548 KB |
1974 | /* |
1975 | * vmbus_remove_channel_attr_group - remove the channel's attribute group | |
1976 | */ | |
1977 | void vmbus_remove_channel_attr_group(struct vmbus_channel *channel) | |
1978 | { | |
1979 | sysfs_remove_group(&channel->kobj, &vmbus_chan_group); | |
1980 | } | |
1981 | ||
3e189519 | 1982 | /* |
f2c73011 | 1983 | * vmbus_device_create - Creates and registers a new child device |
3e189519 | 1984 | * on the vmbus. |
90c9960e | 1985 | */ |
593db803 AS |
1986 | struct hv_device *vmbus_device_create(const guid_t *type, |
1987 | const guid_t *instance, | |
1b9d48f2 | 1988 | struct vmbus_channel *channel) |
3e7ee490 | 1989 | { |
3d3b5518 | 1990 | struct hv_device *child_device_obj; |
3e7ee490 | 1991 | |
6bad88da S |
1992 | child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); |
1993 | if (!child_device_obj) { | |
0a46618d | 1994 | pr_err("Unable to allocate device object for child device\n"); |
3e7ee490 HJ |
1995 | return NULL; |
1996 | } | |
1997 | ||
cae5b843 | 1998 | child_device_obj->channel = channel; |
593db803 AS |
1999 | guid_copy(&child_device_obj->dev_type, type); |
2000 | guid_copy(&child_device_obj->dev_instance, instance); | |
7047f17d | 2001 | child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ |
3e7ee490 | 2002 | |
3e7ee490 HJ |
2003 | return child_device_obj; |
2004 | } | |
2005 | ||
3e189519 | 2006 | /* |
22794281 | 2007 | * vmbus_device_register - Register the child device |
90c9960e | 2008 | */ |
22794281 | 2009 | int vmbus_device_register(struct hv_device *child_device_obj) |
3e7ee490 | 2010 | { |
c2e5df61 SH |
2011 | struct kobject *kobj = &child_device_obj->device.kobj; |
2012 | int ret; | |
6bad88da | 2013 | |
f6b2db08 | 2014 | dev_set_name(&child_device_obj->device, "%pUl", |
458c4475 | 2015 | &child_device_obj->channel->offermsg.offer.if_instance); |
3e7ee490 | 2016 | |
0bce28b6 | 2017 | child_device_obj->device.bus = &hv_bus; |
607c1a11 | 2018 | child_device_obj->device.parent = &hv_acpi_dev->dev; |
6bad88da | 2019 | child_device_obj->device.release = vmbus_device_release; |
3e7ee490 | 2020 | |
90c9960e GKH |
2021 | /* |
2022 | * Register with the LDM. This will kick off the driver/device | |
2023 | * binding...which will eventually call vmbus_match() and vmbus_probe() | |
2024 | */ | |
6bad88da | 2025 | ret = device_register(&child_device_obj->device); |
c2e5df61 | 2026 | if (ret) { |
0a46618d | 2027 | pr_err("Unable to register child device\n"); |
c2e5df61 SH |
2028 | return ret; |
2029 | } | |
2030 | ||
2031 | child_device_obj->channels_kset = kset_create_and_add("channels", | |
2032 | NULL, kobj); | |
2033 | if (!child_device_obj->channels_kset) { | |
2034 | ret = -ENOMEM; | |
2035 | goto err_dev_unregister; | |
2036 | } | |
2037 | ||
2038 | ret = vmbus_add_channel_kobj(child_device_obj, | |
2039 | child_device_obj->channel); | |
2040 | if (ret) { | |
2041 | pr_err("Unable to register primary channeln"); | |
2042 | goto err_kset_unregister; | |
2043 | } | |
af9ca6f9 | 2044 | hv_debug_add_dev_dir(child_device_obj); |
c2e5df61 SH |
2045 | |
2046 | return 0; | |
2047 | ||
2048 | err_kset_unregister: | |
2049 | kset_unregister(child_device_obj->channels_kset); | |
3e7ee490 | 2050 | |
c2e5df61 SH |
2051 | err_dev_unregister: |
2052 | device_unregister(&child_device_obj->device); | |
3e7ee490 HJ |
2053 | return ret; |
2054 | } | |
2055 | ||
3e189519 | 2056 | /* |
696453ba | 2057 | * vmbus_device_unregister - Remove the specified child device |
3e189519 | 2058 | * from the vmbus. |
90c9960e | 2059 | */ |
696453ba | 2060 | void vmbus_device_unregister(struct hv_device *device_obj) |
3e7ee490 | 2061 | { |
84672369 FS |
2062 | pr_debug("child device %s unregistered\n", |
2063 | dev_name(&device_obj->device)); | |
2064 | ||
869b5567 DC |
2065 | kset_unregister(device_obj->channels_kset); |
2066 | ||
90c9960e GKH |
2067 | /* |
2068 | * Kick off the process of unregistering the device. | |
2069 | * This will call vmbus_remove() and eventually vmbus_device_release() | |
2070 | */ | |
6bad88da | 2071 | device_unregister(&device_obj->device); |
3e7ee490 HJ |
2072 | } |
2073 | ||
3e7ee490 | 2074 | |
b0069f43 | 2075 | /* |
7f163a6f | 2076 | * VMBUS is an acpi enumerated device. Get the information we |
90f34535 | 2077 | * need from DSDT. |
b0069f43 | 2078 | */ |
7f163a6f | 2079 | #define VTPM_BASE_ADDRESS 0xfed40000 |
90f34535 | 2080 | static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) |
b0069f43 | 2081 | { |
7f163a6f JO |
2082 | resource_size_t start = 0; |
2083 | resource_size_t end = 0; | |
2084 | struct resource *new_res; | |
2085 | struct resource **old_res = &hyperv_mmio; | |
2086 | struct resource **prev_res = NULL; | |
626b901f | 2087 | struct resource r; |
7f163a6f | 2088 | |
90f34535 | 2089 | switch (res->type) { |
7f163a6f JO |
2090 | |
2091 | /* | |
2092 | * "Address" descriptors are for bus windows. Ignore | |
2093 | * "memory" descriptors, which are for registers on | |
2094 | * devices. | |
2095 | */ | |
2096 | case ACPI_RESOURCE_TYPE_ADDRESS32: | |
2097 | start = res->data.address32.address.minimum; | |
2098 | end = res->data.address32.address.maximum; | |
4eb923f8 | 2099 | break; |
b0069f43 | 2100 | |
90f34535 | 2101 | case ACPI_RESOURCE_TYPE_ADDRESS64: |
7f163a6f JO |
2102 | start = res->data.address64.address.minimum; |
2103 | end = res->data.address64.address.maximum; | |
4eb923f8 | 2104 | break; |
7f163a6f | 2105 | |
626b901f MK |
2106 | /* |
2107 | * The IRQ information is needed only on ARM64, which Hyper-V | |
2108 | * sets up in the extended format. IRQ information is present | |
2109 | * on x86/x64 in the non-extended format but it is not used by | |
2110 | * Linux. So don't bother checking for the non-extended format. | |
2111 | */ | |
2112 | case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: | |
2113 | if (!acpi_dev_resource_interrupt(res, 0, &r)) { | |
2114 | pr_err("Unable to parse Hyper-V ACPI interrupt\n"); | |
2115 | return AE_ERROR; | |
2116 | } | |
2117 | /* ARM64 INTID for VMbus */ | |
2118 | vmbus_interrupt = res->data.extended_irq.interrupts[0]; | |
2119 | /* Linux IRQ number */ | |
2120 | vmbus_irq = r.start; | |
2121 | return AE_OK; | |
2122 | ||
7f163a6f JO |
2123 | default: |
2124 | /* Unused resource type */ | |
2125 | return AE_OK; | |
2126 | ||
b0069f43 | 2127 | } |
7f163a6f JO |
2128 | /* |
2129 | * Ignore ranges that are below 1MB, as they're not | |
2130 | * necessary or useful here. | |
2131 | */ | |
2132 | if (end < 0x100000) | |
2133 | return AE_OK; | |
2134 | ||
2135 | new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); | |
2136 | if (!new_res) | |
2137 | return AE_NO_MEMORY; | |
2138 | ||
2139 | /* If this range overlaps the virtual TPM, truncate it. */ | |
2140 | if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) | |
2141 | end = VTPM_BASE_ADDRESS; | |
2142 | ||
2143 | new_res->name = "hyperv mmio"; | |
2144 | new_res->flags = IORESOURCE_MEM; | |
2145 | new_res->start = start; | |
2146 | new_res->end = end; | |
2147 | ||
40f26f31 | 2148 | /* |
40f26f31 JO |
2149 | * If two ranges are adjacent, merge them. |
2150 | */ | |
7f163a6f JO |
2151 | do { |
2152 | if (!*old_res) { | |
2153 | *old_res = new_res; | |
2154 | break; | |
2155 | } | |
2156 | ||
40f26f31 JO |
2157 | if (((*old_res)->end + 1) == new_res->start) { |
2158 | (*old_res)->end = new_res->end; | |
2159 | kfree(new_res); | |
2160 | break; | |
2161 | } | |
2162 | ||
2163 | if ((*old_res)->start == new_res->end + 1) { | |
2164 | (*old_res)->start = new_res->start; | |
2165 | kfree(new_res); | |
2166 | break; | |
2167 | } | |
2168 | ||
23a06831 | 2169 | if ((*old_res)->start > new_res->end) { |
7f163a6f JO |
2170 | new_res->sibling = *old_res; |
2171 | if (prev_res) | |
2172 | (*prev_res)->sibling = new_res; | |
2173 | *old_res = new_res; | |
2174 | break; | |
2175 | } | |
2176 | ||
2177 | prev_res = old_res; | |
2178 | old_res = &(*old_res)->sibling; | |
2179 | ||
2180 | } while (1); | |
b0069f43 S |
2181 | |
2182 | return AE_OK; | |
2183 | } | |
2184 | ||
7f163a6f JO |
2185 | static int vmbus_acpi_remove(struct acpi_device *device) |
2186 | { | |
2187 | struct resource *cur_res; | |
2188 | struct resource *next_res; | |
2189 | ||
2190 | if (hyperv_mmio) { | |
6d146aef JO |
2191 | if (fb_mmio) { |
2192 | __release_region(hyperv_mmio, fb_mmio->start, | |
2193 | resource_size(fb_mmio)); | |
2194 | fb_mmio = NULL; | |
2195 | } | |
2196 | ||
7f163a6f JO |
2197 | for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { |
2198 | next_res = cur_res->sibling; | |
2199 | kfree(cur_res); | |
2200 | } | |
2201 | } | |
2202 | ||
2203 | return 0; | |
2204 | } | |
2205 | ||
6d146aef JO |
2206 | static void vmbus_reserve_fb(void) |
2207 | { | |
2208 | int size; | |
2209 | /* | |
2210 | * Make a claim for the frame buffer in the resource tree under the | |
2211 | * first node, which will be the one below 4GB. The length seems to | |
2212 | * be underreported, particularly in a Generation 1 VM. So start out | |
2213 | * reserving a larger area and make it smaller until it succeeds. | |
2214 | */ | |
2215 | ||
2216 | if (screen_info.lfb_base) { | |
2217 | if (efi_enabled(EFI_BOOT)) | |
2218 | size = max_t(__u32, screen_info.lfb_size, 0x800000); | |
2219 | else | |
2220 | size = max_t(__u32, screen_info.lfb_size, 0x4000000); | |
2221 | ||
2222 | for (; !fb_mmio && (size >= 0x100000); size >>= 1) { | |
2223 | fb_mmio = __request_region(hyperv_mmio, | |
2224 | screen_info.lfb_base, size, | |
2225 | fb_mmio_name, 0); | |
2226 | } | |
2227 | } | |
2228 | } | |
2229 | ||
35464483 JO |
2230 | /** |
2231 | * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. | |
2232 | * @new: If successful, supplied a pointer to the | |
2233 | * allocated MMIO space. | |
2234 | * @device_obj: Identifies the caller | |
2235 | * @min: Minimum guest physical address of the | |
2236 | * allocation | |
2237 | * @max: Maximum guest physical address | |
2238 | * @size: Size of the range to be allocated | |
2239 | * @align: Alignment of the range to be allocated | |
2240 | * @fb_overlap_ok: Whether this allocation can be allowed | |
2241 | * to overlap the video frame buffer. | |
2242 | * | |
2243 | * This function walks the resources granted to VMBus by the | |
2244 | * _CRS object in the ACPI namespace underneath the parent | |
2245 | * "bridge" whether that's a root PCI bus in the Generation 1 | |
2246 | * case or a Module Device in the Generation 2 case. It then | |
2247 | * attempts to allocate from the global MMIO pool in a way that | |
2248 | * matches the constraints supplied in these parameters and by | |
2249 | * that _CRS. | |
2250 | * | |
2251 | * Return: 0 on success, -errno on failure | |
2252 | */ | |
2253 | int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, | |
2254 | resource_size_t min, resource_size_t max, | |
2255 | resource_size_t size, resource_size_t align, | |
2256 | bool fb_overlap_ok) | |
2257 | { | |
be000f93 | 2258 | struct resource *iter, *shadow; |
ea37a6b8 | 2259 | resource_size_t range_min, range_max, start; |
35464483 | 2260 | const char *dev_n = dev_name(&device_obj->device); |
ea37a6b8 | 2261 | int retval; |
e16dad6b JO |
2262 | |
2263 | retval = -ENXIO; | |
8aea7f82 | 2264 | mutex_lock(&hyperv_mmio_lock); |
35464483 | 2265 | |
ea37a6b8 JO |
2266 | /* |
2267 | * If overlaps with frame buffers are allowed, then first attempt to | |
2268 | * make the allocation from within the reserved region. Because it | |
2269 | * is already reserved, no shadow allocation is necessary. | |
2270 | */ | |
2271 | if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && | |
2272 | !(max < fb_mmio->start)) { | |
2273 | ||
2274 | range_min = fb_mmio->start; | |
2275 | range_max = fb_mmio->end; | |
2276 | start = (range_min + align - 1) & ~(align - 1); | |
2277 | for (; start + size - 1 <= range_max; start += align) { | |
2278 | *new = request_mem_region_exclusive(start, size, dev_n); | |
2279 | if (*new) { | |
2280 | retval = 0; | |
2281 | goto exit; | |
2282 | } | |
2283 | } | |
2284 | } | |
2285 | ||
35464483 JO |
2286 | for (iter = hyperv_mmio; iter; iter = iter->sibling) { |
2287 | if ((iter->start >= max) || (iter->end <= min)) | |
2288 | continue; | |
2289 | ||
2290 | range_min = iter->start; | |
2291 | range_max = iter->end; | |
ea37a6b8 JO |
2292 | start = (range_min + align - 1) & ~(align - 1); |
2293 | for (; start + size - 1 <= range_max; start += align) { | |
2294 | shadow = __request_region(iter, start, size, NULL, | |
2295 | IORESOURCE_BUSY); | |
2296 | if (!shadow) | |
2297 | continue; | |
2298 | ||
2299 | *new = request_mem_region_exclusive(start, size, dev_n); | |
2300 | if (*new) { | |
2301 | shadow->name = (char *)*new; | |
2302 | retval = 0; | |
2303 | goto exit; | |
35464483 JO |
2304 | } |
2305 | ||
ea37a6b8 | 2306 | __release_region(iter, start, size); |
35464483 JO |
2307 | } |
2308 | } | |
2309 | ||
e16dad6b | 2310 | exit: |
8aea7f82 | 2311 | mutex_unlock(&hyperv_mmio_lock); |
e16dad6b | 2312 | return retval; |
35464483 JO |
2313 | } |
2314 | EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); | |
2315 | ||
97fb77dc JO |
2316 | /** |
2317 | * vmbus_free_mmio() - Free a memory-mapped I/O range. | |
2318 | * @start: Base address of region to release. | |
2319 | * @size: Size of the range to be allocated | |
2320 | * | |
2321 | * This function releases anything requested by | |
2322 | * vmbus_mmio_allocate(). | |
2323 | */ | |
2324 | void vmbus_free_mmio(resource_size_t start, resource_size_t size) | |
2325 | { | |
be000f93 JO |
2326 | struct resource *iter; |
2327 | ||
8aea7f82 | 2328 | mutex_lock(&hyperv_mmio_lock); |
be000f93 JO |
2329 | for (iter = hyperv_mmio; iter; iter = iter->sibling) { |
2330 | if ((iter->start >= start + size) || (iter->end <= start)) | |
2331 | continue; | |
2332 | ||
2333 | __release_region(iter, start, size); | |
2334 | } | |
97fb77dc | 2335 | release_mem_region(start, size); |
8aea7f82 | 2336 | mutex_unlock(&hyperv_mmio_lock); |
97fb77dc JO |
2337 | |
2338 | } | |
2339 | EXPORT_SYMBOL_GPL(vmbus_free_mmio); | |
2340 | ||
b0069f43 S |
2341 | static int vmbus_acpi_add(struct acpi_device *device) |
2342 | { | |
2343 | acpi_status result; | |
90f34535 | 2344 | int ret_val = -ENODEV; |
7f163a6f | 2345 | struct acpi_device *ancestor; |
b0069f43 | 2346 | |
607c1a11 S |
2347 | hv_acpi_dev = device; |
2348 | ||
0a4425b6 | 2349 | result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, |
90f34535 | 2350 | vmbus_walk_resources, NULL); |
b0069f43 | 2351 | |
90f34535 S |
2352 | if (ACPI_FAILURE(result)) |
2353 | goto acpi_walk_err; | |
2354 | /* | |
7f163a6f JO |
2355 | * Some ancestor of the vmbus acpi device (Gen1 or Gen2 |
2356 | * firmware) is the VMOD that has the mmio ranges. Get that. | |
90f34535 | 2357 | */ |
7f163a6f JO |
2358 | for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { |
2359 | result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, | |
2360 | vmbus_walk_resources, NULL); | |
90f34535 S |
2361 | |
2362 | if (ACPI_FAILURE(result)) | |
7f163a6f | 2363 | continue; |
6d146aef JO |
2364 | if (hyperv_mmio) { |
2365 | vmbus_reserve_fb(); | |
7f163a6f | 2366 | break; |
6d146aef | 2367 | } |
b0069f43 | 2368 | } |
90f34535 S |
2369 | ret_val = 0; |
2370 | ||
2371 | acpi_walk_err: | |
b0069f43 | 2372 | complete(&probe_event); |
7f163a6f JO |
2373 | if (ret_val) |
2374 | vmbus_acpi_remove(device); | |
90f34535 | 2375 | return ret_val; |
b0069f43 S |
2376 | } |
2377 | ||
83b50f83 | 2378 | #ifdef CONFIG_PM_SLEEP |
f53335e3 DC |
2379 | static int vmbus_bus_suspend(struct device *dev) |
2380 | { | |
b307b389 | 2381 | struct vmbus_channel *channel, *sc; |
1f48dcf1 DC |
2382 | |
2383 | while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { | |
2384 | /* | |
2385 | * We wait here until the completion of any channel | |
2386 | * offers that are currently in progress. | |
2387 | */ | |
14c685d9 | 2388 | usleep_range(1000, 2000); |
1f48dcf1 DC |
2389 | } |
2390 | ||
2391 | mutex_lock(&vmbus_connection.channel_mutex); | |
2392 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { | |
2393 | if (!is_hvsock_channel(channel)) | |
2394 | continue; | |
2395 | ||
2396 | vmbus_force_channel_rescinded(channel); | |
2397 | } | |
2398 | mutex_unlock(&vmbus_connection.channel_mutex); | |
2399 | ||
b307b389 DC |
2400 | /* |
2401 | * Wait until all the sub-channels and hv_sock channels have been | |
2402 | * cleaned up. Sub-channels should be destroyed upon suspend, otherwise | |
2403 | * they would conflict with the new sub-channels that will be created | |
2404 | * in the resume path. hv_sock channels should also be destroyed, but | |
2405 | * a hv_sock channel of an established hv_sock connection can not be | |
2406 | * really destroyed since it may still be referenced by the userspace | |
2407 | * application, so we just force the hv_sock channel to be rescinded | |
2408 | * by vmbus_force_channel_rescinded(), and the userspace application | |
2409 | * will thoroughly destroy the channel after hibernation. | |
2410 | * | |
2411 | * Note: the counter nr_chan_close_on_suspend may never go above 0 if | |
2412 | * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. | |
2413 | */ | |
2414 | if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) | |
2415 | wait_for_completion(&vmbus_connection.ready_for_suspend_event); | |
2416 | ||
19873eec DC |
2417 | if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) { |
2418 | pr_err("Can not suspend due to a previous failed resuming\n"); | |
2419 | return -EBUSY; | |
2420 | } | |
d8bd2d44 | 2421 | |
b307b389 DC |
2422 | mutex_lock(&vmbus_connection.channel_mutex); |
2423 | ||
2424 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { | |
d8bd2d44 | 2425 | /* |
8b6a877c APM |
2426 | * Remove the channel from the array of channels and invalidate |
2427 | * the channel's relid. Upon resume, vmbus_onoffer() will fix | |
2428 | * up the relid (and other fields, if necessary) and add the | |
2429 | * channel back to the array. | |
d8bd2d44 | 2430 | */ |
8b6a877c | 2431 | vmbus_channel_unmap_relid(channel); |
d8bd2d44 DC |
2432 | channel->offermsg.child_relid = INVALID_RELID; |
2433 | ||
b307b389 DC |
2434 | if (is_hvsock_channel(channel)) { |
2435 | if (!channel->rescind) { | |
2436 | pr_err("hv_sock channel not rescinded!\n"); | |
2437 | WARN_ON_ONCE(1); | |
2438 | } | |
2439 | continue; | |
2440 | } | |
2441 | ||
b307b389 DC |
2442 | list_for_each_entry(sc, &channel->sc_list, sc_list) { |
2443 | pr_err("Sub-channel not deleted!\n"); | |
2444 | WARN_ON_ONCE(1); | |
2445 | } | |
d8bd2d44 DC |
2446 | |
2447 | atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); | |
b307b389 DC |
2448 | } |
2449 | ||
2450 | mutex_unlock(&vmbus_connection.channel_mutex); | |
2451 | ||
f53335e3 DC |
2452 | vmbus_initiate_unload(false); |
2453 | ||
d8bd2d44 DC |
2454 | /* Reset the event for the next resume. */ |
2455 | reinit_completion(&vmbus_connection.ready_for_resume_event); | |
2456 | ||
f53335e3 DC |
2457 | return 0; |
2458 | } | |
2459 | ||
2460 | static int vmbus_bus_resume(struct device *dev) | |
2461 | { | |
2462 | struct vmbus_channel_msginfo *msginfo; | |
2463 | size_t msgsize; | |
2464 | int ret; | |
2465 | ||
2466 | /* | |
2467 | * We only use the 'vmbus_proto_version', which was in use before | |
2468 | * hibernation, to re-negotiate with the host. | |
2469 | */ | |
bedc61a9 | 2470 | if (!vmbus_proto_version) { |
f53335e3 DC |
2471 | pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); |
2472 | return -EINVAL; | |
2473 | } | |
2474 | ||
2475 | msgsize = sizeof(*msginfo) + | |
2476 | sizeof(struct vmbus_channel_initiate_contact); | |
2477 | ||
2478 | msginfo = kzalloc(msgsize, GFP_KERNEL); | |
2479 | ||
2480 | if (msginfo == NULL) | |
2481 | return -ENOMEM; | |
2482 | ||
2483 | ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); | |
2484 | ||
2485 | kfree(msginfo); | |
2486 | ||
2487 | if (ret != 0) | |
2488 | return ret; | |
2489 | ||
d8bd2d44 DC |
2490 | WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); |
2491 | ||
f53335e3 DC |
2492 | vmbus_request_offers(); |
2493 | ||
19873eec DC |
2494 | if (wait_for_completion_timeout( |
2495 | &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0) | |
2496 | pr_err("Some vmbus device is missing after suspending?\n"); | |
d8bd2d44 | 2497 | |
b307b389 DC |
2498 | /* Reset the event for the next suspend. */ |
2499 | reinit_completion(&vmbus_connection.ready_for_suspend_event); | |
2500 | ||
f53335e3 DC |
2501 | return 0; |
2502 | } | |
1a06d017 DC |
2503 | #else |
2504 | #define vmbus_bus_suspend NULL | |
2505 | #define vmbus_bus_resume NULL | |
83b50f83 | 2506 | #endif /* CONFIG_PM_SLEEP */ |
f53335e3 | 2507 | |
b0069f43 S |
2508 | static const struct acpi_device_id vmbus_acpi_device_ids[] = { |
2509 | {"VMBUS", 0}, | |
9d7b18d1 | 2510 | {"VMBus", 0}, |
b0069f43 S |
2511 | {"", 0}, |
2512 | }; | |
2513 | MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); | |
2514 | ||
f53335e3 | 2515 | /* |
1a06d017 DC |
2516 | * Note: we must use the "no_irq" ops, otherwise hibernation can not work with |
2517 | * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in | |
2518 | * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see | |
f53335e3 DC |
2519 | * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> |
2520 | * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's | |
1a06d017 DC |
2521 | * resume callback must also run via the "noirq" ops. |
2522 | * | |
2523 | * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment | |
2524 | * earlier in this file before vmbus_pm. | |
f53335e3 | 2525 | */ |
1a06d017 | 2526 | |
f53335e3 | 2527 | static const struct dev_pm_ops vmbus_bus_pm = { |
1a06d017 DC |
2528 | .suspend_noirq = NULL, |
2529 | .resume_noirq = NULL, | |
2530 | .freeze_noirq = vmbus_bus_suspend, | |
2531 | .thaw_noirq = vmbus_bus_resume, | |
2532 | .poweroff_noirq = vmbus_bus_suspend, | |
2533 | .restore_noirq = vmbus_bus_resume | |
f53335e3 DC |
2534 | }; |
2535 | ||
b0069f43 S |
2536 | static struct acpi_driver vmbus_acpi_driver = { |
2537 | .name = "vmbus", | |
2538 | .ids = vmbus_acpi_device_ids, | |
2539 | .ops = { | |
2540 | .add = vmbus_acpi_add, | |
e4ecb41c | 2541 | .remove = vmbus_acpi_remove, |
b0069f43 | 2542 | }, |
f53335e3 | 2543 | .drv.pm = &vmbus_bus_pm, |
b0069f43 S |
2544 | }; |
2545 | ||
2517281d VK |
2546 | static void hv_kexec_handler(void) |
2547 | { | |
fd1fea68 | 2548 | hv_stimer_global_cleanup(); |
75ff3a8a | 2549 | vmbus_initiate_unload(false); |
523b9408 VK |
2550 | /* Make sure conn_state is set as hv_synic_cleanup checks for it */ |
2551 | mb(); | |
76d36ab7 | 2552 | cpuhp_remove_state(hyperv_cpuhp_online); |
d6f3609d | 2553 | hyperv_cleanup(); |
2517281d VK |
2554 | }; |
2555 | ||
b4370df2 VK |
2556 | static void hv_crash_handler(struct pt_regs *regs) |
2557 | { | |
fd1fea68 MK |
2558 | int cpu; |
2559 | ||
75ff3a8a | 2560 | vmbus_initiate_unload(true); |
b4370df2 VK |
2561 | /* |
2562 | * In crash handler we can't schedule synic cleanup for all CPUs, | |
2563 | * doing the cleanup for current CPU only. This should be sufficient | |
2564 | * for kdump. | |
2565 | */ | |
fd1fea68 MK |
2566 | cpu = smp_processor_id(); |
2567 | hv_stimer_cleanup(cpu); | |
7a1323b5 | 2568 | hv_synic_disable_regs(cpu); |
d6f3609d | 2569 | hyperv_cleanup(); |
b4370df2 VK |
2570 | }; |
2571 | ||
63ecc6d2 DC |
2572 | static int hv_synic_suspend(void) |
2573 | { | |
2574 | /* | |
4df4cb9e MK |
2575 | * When we reach here, all the non-boot CPUs have been offlined. |
2576 | * If we're in a legacy configuration where stimer Direct Mode is | |
2577 | * not enabled, the stimers on the non-boot CPUs have been unbound | |
2578 | * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() -> | |
63ecc6d2 DC |
2579 | * hv_stimer_cleanup() -> clockevents_unbind_device(). |
2580 | * | |
4df4cb9e MK |
2581 | * hv_synic_suspend() only runs on CPU0 with interrupts disabled. |
2582 | * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because: | |
2583 | * 1) it's unnecessary as interrupts remain disabled between | |
2584 | * syscore_suspend() and syscore_resume(): see create_image() and | |
2585 | * resume_target_kernel() | |
63ecc6d2 DC |
2586 | * 2) the stimer on CPU0 is automatically disabled later by |
2587 | * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... | |
4df4cb9e MK |
2588 | * -> clockevents_shutdown() -> ... -> hv_ce_shutdown() |
2589 | * 3) a warning would be triggered if we call | |
2590 | * clockevents_unbind_device(), which may sleep, in an | |
2591 | * interrupts-disabled context. | |
63ecc6d2 DC |
2592 | */ |
2593 | ||
2594 | hv_synic_disable_regs(0); | |
2595 | ||
2596 | return 0; | |
2597 | } | |
2598 | ||
2599 | static void hv_synic_resume(void) | |
2600 | { | |
2601 | hv_synic_enable_regs(0); | |
2602 | ||
2603 | /* | |
2604 | * Note: we don't need to call hv_stimer_init(0), because the timer | |
2605 | * on CPU0 is not unbound in hv_synic_suspend(), and the timer is | |
2606 | * automatically re-enabled in timekeeping_resume(). | |
2607 | */ | |
2608 | } | |
2609 | ||
2610 | /* The callbacks run only on CPU0, with irqs_disabled. */ | |
2611 | static struct syscore_ops hv_synic_syscore_ops = { | |
2612 | .suspend = hv_synic_suspend, | |
2613 | .resume = hv_synic_resume, | |
2614 | }; | |
2615 | ||
607c1a11 | 2616 | static int __init hv_acpi_init(void) |
1168ac22 | 2617 | { |
2dda95f8 | 2618 | int ret, t; |
b0069f43 | 2619 | |
4a5f3cde | 2620 | if (!hv_is_hyperv_initialized()) |
0592969e JW |
2621 | return -ENODEV; |
2622 | ||
b0069f43 S |
2623 | init_completion(&probe_event); |
2624 | ||
2625 | /* | |
efc26722 | 2626 | * Get ACPI resources first. |
b0069f43 | 2627 | */ |
0246604c S |
2628 | ret = acpi_bus_register_driver(&vmbus_acpi_driver); |
2629 | ||
b0069f43 S |
2630 | if (ret) |
2631 | return ret; | |
2632 | ||
2dda95f8 S |
2633 | t = wait_for_completion_timeout(&probe_event, 5*HZ); |
2634 | if (t == 0) { | |
2635 | ret = -ETIMEDOUT; | |
2636 | goto cleanup; | |
2637 | } | |
af9ca6f9 | 2638 | hv_debug_init(); |
b0069f43 | 2639 | |
efc26722 | 2640 | ret = vmbus_bus_init(); |
91fd799e | 2641 | if (ret) |
2dda95f8 S |
2642 | goto cleanup; |
2643 | ||
2517281d | 2644 | hv_setup_kexec_handler(hv_kexec_handler); |
b4370df2 | 2645 | hv_setup_crash_handler(hv_crash_handler); |
2517281d | 2646 | |
63ecc6d2 DC |
2647 | register_syscore_ops(&hv_synic_syscore_ops); |
2648 | ||
2dda95f8 S |
2649 | return 0; |
2650 | ||
2651 | cleanup: | |
2652 | acpi_bus_unregister_driver(&vmbus_acpi_driver); | |
cf6a2eac | 2653 | hv_acpi_dev = NULL; |
91fd799e | 2654 | return ret; |
1168ac22 S |
2655 | } |
2656 | ||
93e5bd06 S |
2657 | static void __exit vmbus_exit(void) |
2658 | { | |
e72e7ac5 VK |
2659 | int cpu; |
2660 | ||
63ecc6d2 DC |
2661 | unregister_syscore_ops(&hv_synic_syscore_ops); |
2662 | ||
2517281d | 2663 | hv_remove_kexec_handler(); |
b4370df2 | 2664 | hv_remove_crash_handler(); |
09a19628 | 2665 | vmbus_connection.conn_state = DISCONNECTED; |
fd1fea68 | 2666 | hv_stimer_global_cleanup(); |
2db84eff | 2667 | vmbus_disconnect(); |
76d388cd | 2668 | hv_remove_vmbus_irq(); |
37cdd991 SH |
2669 | for_each_online_cpu(cpu) { |
2670 | struct hv_per_cpu_context *hv_cpu | |
2671 | = per_cpu_ptr(hv_context.cpu_context, cpu); | |
2672 | ||
2673 | tasklet_kill(&hv_cpu->msg_dpc); | |
2674 | } | |
af9ca6f9 BB |
2675 | hv_debug_rm_all_dir(); |
2676 | ||
93e5bd06 | 2677 | vmbus_free_channels(); |
8b6a877c | 2678 | kfree(vmbus_connection.channels); |
37cdd991 | 2679 | |
cc2dd402 | 2680 | if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { |
81b18bce | 2681 | kmsg_dump_unregister(&hv_kmsg_dumper); |
510f7aef | 2682 | unregister_die_notifier(&hyperv_die_block); |
096c605f VK |
2683 | atomic_notifier_chain_unregister(&panic_notifier_list, |
2684 | &hyperv_panic_block); | |
2685 | } | |
81b18bce SM |
2686 | |
2687 | free_page((unsigned long)hv_panic_page); | |
8afc06dd SM |
2688 | unregister_sysctl_table(hv_ctl_table_hdr); |
2689 | hv_ctl_table_hdr = NULL; | |
93e5bd06 | 2690 | bus_unregister(&hv_bus); |
37cdd991 | 2691 | |
76d36ab7 | 2692 | cpuhp_remove_state(hyperv_cpuhp_online); |
06210b42 | 2693 | hv_synic_free(); |
93e5bd06 S |
2694 | acpi_bus_unregister_driver(&vmbus_acpi_driver); |
2695 | } | |
2696 | ||
1168ac22 | 2697 | |
90c9960e | 2698 | MODULE_LICENSE("GPL"); |
674eecb3 | 2699 | MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver"); |
3e7ee490 | 2700 | |
43d4e119 | 2701 | subsys_initcall(hv_acpi_init); |
93e5bd06 | 2702 | module_exit(vmbus_exit); |