]>
Commit | Line | Data |
---|---|---|
02eb84d0 MT |
1 | /* |
2 | * MSI-X device support | |
3 | * | |
4 | * This module includes support for MSI-X in pci devices. | |
5 | * | |
6 | * Author: Michael S. Tsirkin <[email protected]> | |
7 | * | |
8 | * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin ([email protected]) | |
9 | * | |
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
11 | * the COPYING file in the top-level directory. | |
6b620ca3 PB |
12 | * |
13 | * Contributions after 2012-01-13 are licensed under the terms of the | |
14 | * GNU GPL, version 2 or (at your option) any later version. | |
02eb84d0 MT |
15 | */ |
16 | ||
17 | #include "hw.h" | |
60ba3cc2 | 18 | #include "msi.h" |
02eb84d0 MT |
19 | #include "msix.h" |
20 | #include "pci.h" | |
bf1b0071 | 21 | #include "range.h" |
02eb84d0 | 22 | |
02eb84d0 MT |
23 | #define MSIX_CAP_LENGTH 12 |
24 | ||
2760952b MT |
25 | /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ |
26 | #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) | |
02eb84d0 | 27 | #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) |
5b5cb086 | 28 | #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) |
02eb84d0 | 29 | |
5a1fc5e8 MT |
30 | /* How much space does an MSIX table need. */ |
31 | /* The spec requires giving the table structure | |
32 | * a 4K aligned region all by itself. */ | |
33 | #define MSIX_PAGE_SIZE 0x1000 | |
34 | /* Reserve second half of the page for pending bits */ | |
35 | #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) | |
02eb84d0 MT |
36 | #define MSIX_MAX_ENTRIES 32 |
37 | ||
38 | ||
02eb84d0 MT |
39 | /* Add MSI-X capability to the config space for the device. */ |
40 | /* Given a bar and its size, add MSI-X table on top of it | |
41 | * and fill MSI-X capability in the config space. | |
42 | * Original bar size must be a power of 2 or 0. | |
43 | * New bar size is returned. */ | |
44 | static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries, | |
45 | unsigned bar_nr, unsigned bar_size) | |
46 | { | |
47 | int config_offset; | |
48 | uint8_t *config; | |
49 | uint32_t new_size; | |
50 | ||
51 | if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) | |
52 | return -EINVAL; | |
53 | if (bar_size > 0x80000000) | |
54 | return -ENOSPC; | |
55 | ||
56 | /* Add space for MSI-X structures */ | |
5e520a7d | 57 | if (!bar_size) { |
5a1fc5e8 MT |
58 | new_size = MSIX_PAGE_SIZE; |
59 | } else if (bar_size < MSIX_PAGE_SIZE) { | |
60 | bar_size = MSIX_PAGE_SIZE; | |
61 | new_size = MSIX_PAGE_SIZE * 2; | |
62 | } else { | |
02eb84d0 | 63 | new_size = bar_size * 2; |
5a1fc5e8 | 64 | } |
02eb84d0 MT |
65 | |
66 | pdev->msix_bar_size = new_size; | |
ca77089d IY |
67 | config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, |
68 | 0, MSIX_CAP_LENGTH); | |
02eb84d0 MT |
69 | if (config_offset < 0) |
70 | return config_offset; | |
71 | config = pdev->config + config_offset; | |
72 | ||
73 | pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); | |
74 | /* Table on top of BAR */ | |
01731cfb | 75 | pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr); |
02eb84d0 | 76 | /* Pending bits on top of that */ |
01731cfb | 77 | pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) | |
5a1fc5e8 | 78 | bar_nr); |
02eb84d0 | 79 | pdev->msix_cap = config_offset; |
ebabb67a | 80 | /* Make flags bit writable. */ |
5b5cb086 MT |
81 | pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK | |
82 | MSIX_MASKALL_MASK; | |
50322249 | 83 | pdev->msix_function_masked = true; |
02eb84d0 MT |
84 | return 0; |
85 | } | |
86 | ||
95524ae8 AK |
87 | static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr, |
88 | unsigned size) | |
02eb84d0 MT |
89 | { |
90 | PCIDevice *dev = opaque; | |
76f5159d | 91 | unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; |
02eb84d0 | 92 | void *page = dev->msix_table_page; |
02eb84d0 | 93 | |
76f5159d | 94 | return pci_get_long(page + offset); |
02eb84d0 MT |
95 | } |
96 | ||
02eb84d0 MT |
97 | static uint8_t msix_pending_mask(int vector) |
98 | { | |
99 | return 1 << (vector % 8); | |
100 | } | |
101 | ||
102 | static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) | |
103 | { | |
5a1fc5e8 | 104 | return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8; |
02eb84d0 MT |
105 | } |
106 | ||
107 | static int msix_is_pending(PCIDevice *dev, int vector) | |
108 | { | |
109 | return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); | |
110 | } | |
111 | ||
112 | static void msix_set_pending(PCIDevice *dev, int vector) | |
113 | { | |
114 | *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); | |
115 | } | |
116 | ||
117 | static void msix_clr_pending(PCIDevice *dev, int vector) | |
118 | { | |
119 | *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); | |
120 | } | |
121 | ||
ae392c41 | 122 | static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask) |
02eb84d0 | 123 | { |
ae392c41 MT |
124 | unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; |
125 | return fmask || dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT; | |
5b5cb086 MT |
126 | } |
127 | ||
ae392c41 | 128 | static bool msix_is_masked(PCIDevice *dev, int vector) |
5b5cb086 | 129 | { |
ae392c41 MT |
130 | return msix_vector_masked(dev, vector, dev->msix_function_masked); |
131 | } | |
132 | ||
133 | static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) | |
134 | { | |
135 | bool is_masked = msix_is_masked(dev, vector); | |
136 | if (is_masked == was_masked) { | |
137 | return; | |
138 | } | |
139 | ||
140 | if (!is_masked && msix_is_pending(dev, vector)) { | |
5b5cb086 MT |
141 | msix_clr_pending(dev, vector); |
142 | msix_notify(dev, vector); | |
143 | } | |
144 | } | |
145 | ||
50322249 MT |
146 | static void msix_update_function_masked(PCIDevice *dev) |
147 | { | |
148 | dev->msix_function_masked = !msix_enabled(dev) || | |
149 | (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK); | |
150 | } | |
151 | ||
5b5cb086 MT |
152 | /* Handle MSI-X capability config write. */ |
153 | void msix_write_config(PCIDevice *dev, uint32_t addr, | |
154 | uint32_t val, int len) | |
155 | { | |
156 | unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET; | |
157 | int vector; | |
50322249 | 158 | bool was_masked; |
5b5cb086 | 159 | |
98a3cb02 | 160 | if (!range_covers_byte(addr, len, enable_pos)) { |
5b5cb086 MT |
161 | return; |
162 | } | |
163 | ||
50322249 MT |
164 | was_masked = dev->msix_function_masked; |
165 | msix_update_function_masked(dev); | |
166 | ||
5b5cb086 MT |
167 | if (!msix_enabled(dev)) { |
168 | return; | |
169 | } | |
170 | ||
e407bf13 | 171 | pci_device_deassert_intx(dev); |
5b5cb086 | 172 | |
50322249 | 173 | if (dev->msix_function_masked == was_masked) { |
5b5cb086 MT |
174 | return; |
175 | } | |
176 | ||
177 | for (vector = 0; vector < dev->msix_entries_nr; ++vector) { | |
ae392c41 MT |
178 | msix_handle_mask_update(dev, vector, |
179 | msix_vector_masked(dev, vector, was_masked)); | |
5b5cb086 | 180 | } |
02eb84d0 MT |
181 | } |
182 | ||
95524ae8 AK |
183 | static void msix_mmio_write(void *opaque, target_phys_addr_t addr, |
184 | uint64_t val, unsigned size) | |
02eb84d0 MT |
185 | { |
186 | PCIDevice *dev = opaque; | |
76f5159d | 187 | unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; |
01731cfb | 188 | int vector = offset / PCI_MSIX_ENTRY_SIZE; |
ae392c41 | 189 | bool was_masked; |
9a93b617 MT |
190 | |
191 | /* MSI-X page includes a read-only PBA and a writeable Vector Control. */ | |
192 | if (vector >= dev->msix_entries_nr) { | |
193 | return; | |
194 | } | |
195 | ||
ae392c41 | 196 | was_masked = msix_is_masked(dev, vector); |
76f5159d | 197 | pci_set_long(dev->msix_table_page + offset, val); |
ae392c41 | 198 | msix_handle_mask_update(dev, vector, was_masked); |
02eb84d0 MT |
199 | } |
200 | ||
95524ae8 AK |
201 | static const MemoryRegionOps msix_mmio_ops = { |
202 | .read = msix_mmio_read, | |
203 | .write = msix_mmio_write, | |
204 | .endianness = DEVICE_NATIVE_ENDIAN, | |
205 | .valid = { | |
206 | .min_access_size = 4, | |
207 | .max_access_size = 4, | |
208 | }, | |
02eb84d0 MT |
209 | }; |
210 | ||
95524ae8 | 211 | static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar) |
02eb84d0 MT |
212 | { |
213 | uint8_t *config = d->config + d->msix_cap; | |
01731cfb | 214 | uint32_t table = pci_get_long(config + PCI_MSIX_TABLE); |
5a1fc5e8 | 215 | uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1); |
02eb84d0 MT |
216 | /* TODO: for assigned devices, we'll want to make it possible to map |
217 | * pending bits separately in case they are in a separate bar. */ | |
02eb84d0 | 218 | |
95524ae8 | 219 | memory_region_add_subregion(bar, offset, &d->msix_mmio); |
02eb84d0 MT |
220 | } |
221 | ||
ae1be0bb MT |
222 | static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) |
223 | { | |
224 | int vector; | |
225 | for (vector = 0; vector < nentries; ++vector) { | |
01731cfb JK |
226 | unsigned offset = |
227 | vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; | |
228 | dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; | |
ae1be0bb MT |
229 | } |
230 | } | |
231 | ||
02eb84d0 MT |
232 | /* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is |
233 | * modified, it should be retrieved with msix_bar_size. */ | |
234 | int msix_init(struct PCIDevice *dev, unsigned short nentries, | |
95524ae8 | 235 | MemoryRegion *bar, |
5a1fc5e8 | 236 | unsigned bar_nr, unsigned bar_size) |
02eb84d0 MT |
237 | { |
238 | int ret; | |
60ba3cc2 | 239 | |
02eb84d0 | 240 | /* Nothing to do if MSI is not supported by interrupt controller */ |
60ba3cc2 | 241 | if (!msi_supported) { |
02eb84d0 | 242 | return -ENOTSUP; |
60ba3cc2 | 243 | } |
02eb84d0 MT |
244 | if (nentries > MSIX_MAX_ENTRIES) |
245 | return -EINVAL; | |
246 | ||
7267c094 | 247 | dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES * |
02eb84d0 MT |
248 | sizeof *dev->msix_entry_used); |
249 | ||
7267c094 | 250 | dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE); |
ae1be0bb | 251 | msix_mask_all(dev, nentries); |
02eb84d0 | 252 | |
95524ae8 AK |
253 | memory_region_init_io(&dev->msix_mmio, &msix_mmio_ops, dev, |
254 | "msix", MSIX_PAGE_SIZE); | |
02eb84d0 MT |
255 | |
256 | dev->msix_entries_nr = nentries; | |
257 | ret = msix_add_config(dev, nentries, bar_nr, bar_size); | |
258 | if (ret) | |
259 | goto err_config; | |
260 | ||
261 | dev->cap_present |= QEMU_PCI_CAP_MSIX; | |
95524ae8 | 262 | msix_mmio_setup(dev, bar); |
02eb84d0 MT |
263 | return 0; |
264 | ||
265 | err_config: | |
3174ecd1 | 266 | dev->msix_entries_nr = 0; |
95524ae8 | 267 | memory_region_destroy(&dev->msix_mmio); |
7267c094 | 268 | g_free(dev->msix_table_page); |
02eb84d0 | 269 | dev->msix_table_page = NULL; |
7267c094 | 270 | g_free(dev->msix_entry_used); |
02eb84d0 MT |
271 | dev->msix_entry_used = NULL; |
272 | return ret; | |
273 | } | |
274 | ||
98304c84 MT |
275 | static void msix_free_irq_entries(PCIDevice *dev) |
276 | { | |
277 | int vector; | |
278 | ||
279 | for (vector = 0; vector < dev->msix_entries_nr; ++vector) { | |
280 | dev->msix_entry_used[vector] = 0; | |
281 | msix_clr_pending(dev, vector); | |
282 | } | |
283 | } | |
284 | ||
02eb84d0 | 285 | /* Clean up resources for the device. */ |
95524ae8 | 286 | int msix_uninit(PCIDevice *dev, MemoryRegion *bar) |
02eb84d0 MT |
287 | { |
288 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
289 | return 0; | |
290 | pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); | |
291 | dev->msix_cap = 0; | |
292 | msix_free_irq_entries(dev); | |
293 | dev->msix_entries_nr = 0; | |
95524ae8 AK |
294 | memory_region_del_subregion(bar, &dev->msix_mmio); |
295 | memory_region_destroy(&dev->msix_mmio); | |
7267c094 | 296 | g_free(dev->msix_table_page); |
02eb84d0 | 297 | dev->msix_table_page = NULL; |
7267c094 | 298 | g_free(dev->msix_entry_used); |
02eb84d0 MT |
299 | dev->msix_entry_used = NULL; |
300 | dev->cap_present &= ~QEMU_PCI_CAP_MSIX; | |
301 | return 0; | |
302 | } | |
303 | ||
304 | void msix_save(PCIDevice *dev, QEMUFile *f) | |
305 | { | |
9a3e12c8 MT |
306 | unsigned n = dev->msix_entries_nr; |
307 | ||
72755a70 | 308 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { |
9a3e12c8 | 309 | return; |
72755a70 | 310 | } |
9a3e12c8 | 311 | |
01731cfb | 312 | qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); |
5a1fc5e8 | 313 | qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); |
02eb84d0 MT |
314 | } |
315 | ||
316 | /* Should be called after restoring the config space. */ | |
317 | void msix_load(PCIDevice *dev, QEMUFile *f) | |
318 | { | |
319 | unsigned n = dev->msix_entries_nr; | |
320 | ||
98846d73 | 321 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { |
02eb84d0 | 322 | return; |
98846d73 | 323 | } |
02eb84d0 | 324 | |
4bfd1712 | 325 | msix_free_irq_entries(dev); |
01731cfb | 326 | qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); |
5a1fc5e8 | 327 | qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); |
50322249 | 328 | msix_update_function_masked(dev); |
02eb84d0 MT |
329 | } |
330 | ||
331 | /* Does device support MSI-X? */ | |
332 | int msix_present(PCIDevice *dev) | |
333 | { | |
334 | return dev->cap_present & QEMU_PCI_CAP_MSIX; | |
335 | } | |
336 | ||
337 | /* Is MSI-X enabled? */ | |
338 | int msix_enabled(PCIDevice *dev) | |
339 | { | |
340 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) && | |
2760952b | 341 | (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & |
02eb84d0 MT |
342 | MSIX_ENABLE_MASK); |
343 | } | |
344 | ||
345 | /* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */ | |
346 | uint32_t msix_bar_size(PCIDevice *dev) | |
347 | { | |
348 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) ? | |
349 | dev->msix_bar_size : 0; | |
350 | } | |
351 | ||
352 | /* Send an MSI-X message */ | |
353 | void msix_notify(PCIDevice *dev, unsigned vector) | |
354 | { | |
01731cfb | 355 | uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; |
02eb84d0 MT |
356 | uint64_t address; |
357 | uint32_t data; | |
358 | ||
359 | if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) | |
360 | return; | |
361 | if (msix_is_masked(dev, vector)) { | |
362 | msix_set_pending(dev, vector); | |
363 | return; | |
364 | } | |
365 | ||
01731cfb JK |
366 | address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); |
367 | data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); | |
ae5d3eb4 | 368 | stl_le_phys(address, data); |
02eb84d0 MT |
369 | } |
370 | ||
371 | void msix_reset(PCIDevice *dev) | |
372 | { | |
373 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
374 | return; | |
375 | msix_free_irq_entries(dev); | |
2760952b MT |
376 | dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= |
377 | ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; | |
5a1fc5e8 | 378 | memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE); |
ae1be0bb | 379 | msix_mask_all(dev, dev->msix_entries_nr); |
02eb84d0 MT |
380 | } |
381 | ||
382 | /* PCI spec suggests that devices make it possible for software to configure | |
383 | * less vectors than supported by the device, but does not specify a standard | |
384 | * mechanism for devices to do so. | |
385 | * | |
386 | * We support this by asking devices to declare vectors software is going to | |
387 | * actually use, and checking this on the notification path. Devices that | |
388 | * don't want to follow the spec suggestion can declare all vectors as used. */ | |
389 | ||
390 | /* Mark vector as used. */ | |
391 | int msix_vector_use(PCIDevice *dev, unsigned vector) | |
392 | { | |
393 | if (vector >= dev->msix_entries_nr) | |
394 | return -EINVAL; | |
395 | dev->msix_entry_used[vector]++; | |
396 | return 0; | |
397 | } | |
398 | ||
399 | /* Mark vector as unused. */ | |
400 | void msix_vector_unuse(PCIDevice *dev, unsigned vector) | |
401 | { | |
98304c84 MT |
402 | if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { |
403 | return; | |
404 | } | |
405 | if (--dev->msix_entry_used[vector]) { | |
406 | return; | |
407 | } | |
408 | msix_clr_pending(dev, vector); | |
02eb84d0 | 409 | } |
b5f28bca MT |
410 | |
411 | void msix_unuse_all_vectors(PCIDevice *dev) | |
412 | { | |
413 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
414 | return; | |
415 | msix_free_irq_entries(dev); | |
416 | } |