]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * QEMU aio implementation | |
3 | * | |
4 | * Copyright IBM, Corp. 2008 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | * Contributions after 2012-01-13 are licensed under the terms of the | |
13 | * GNU GPL, version 2 or (at your option) any later version. | |
14 | */ | |
15 | ||
16 | #include "qemu-common.h" | |
17 | #include "block/block.h" | |
18 | #include "qemu/queue.h" | |
19 | #include "qemu/sockets.h" | |
20 | ||
21 | struct AioHandler | |
22 | { | |
23 | GPollFD pfd; | |
24 | IOHandler *io_read; | |
25 | IOHandler *io_write; | |
26 | int deleted; | |
27 | void *opaque; | |
28 | QLIST_ENTRY(AioHandler) node; | |
29 | }; | |
30 | ||
31 | static AioHandler *find_aio_handler(AioContext *ctx, int fd) | |
32 | { | |
33 | AioHandler *node; | |
34 | ||
35 | QLIST_FOREACH(node, &ctx->aio_handlers, node) { | |
36 | if (node->pfd.fd == fd) | |
37 | if (!node->deleted) | |
38 | return node; | |
39 | } | |
40 | ||
41 | return NULL; | |
42 | } | |
43 | ||
44 | void aio_set_fd_handler(AioContext *ctx, | |
45 | int fd, | |
46 | IOHandler *io_read, | |
47 | IOHandler *io_write, | |
48 | void *opaque) | |
49 | { | |
50 | AioHandler *node; | |
51 | ||
52 | node = find_aio_handler(ctx, fd); | |
53 | ||
54 | /* Are we deleting the fd handler? */ | |
55 | if (!io_read && !io_write) { | |
56 | if (node) { | |
57 | g_source_remove_poll(&ctx->source, &node->pfd); | |
58 | ||
59 | /* If the lock is held, just mark the node as deleted */ | |
60 | if (ctx->walking_handlers) { | |
61 | node->deleted = 1; | |
62 | node->pfd.revents = 0; | |
63 | } else { | |
64 | /* Otherwise, delete it for real. We can't just mark it as | |
65 | * deleted because deleted nodes are only cleaned up after | |
66 | * releasing the walking_handlers lock. | |
67 | */ | |
68 | QLIST_REMOVE(node, node); | |
69 | g_free(node); | |
70 | } | |
71 | } | |
72 | } else { | |
73 | if (node == NULL) { | |
74 | /* Alloc and insert if it's not already there */ | |
75 | node = g_new0(AioHandler, 1); | |
76 | node->pfd.fd = fd; | |
77 | QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); | |
78 | ||
79 | g_source_add_poll(&ctx->source, &node->pfd); | |
80 | } | |
81 | /* Update handler with latest information */ | |
82 | node->io_read = io_read; | |
83 | node->io_write = io_write; | |
84 | node->opaque = opaque; | |
85 | ||
86 | node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0); | |
87 | node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0); | |
88 | } | |
89 | ||
90 | aio_notify(ctx); | |
91 | } | |
92 | ||
93 | void aio_set_event_notifier(AioContext *ctx, | |
94 | EventNotifier *notifier, | |
95 | EventNotifierHandler *io_read) | |
96 | { | |
97 | aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), | |
98 | (IOHandler *)io_read, NULL, notifier); | |
99 | } | |
100 | ||
101 | bool aio_prepare(AioContext *ctx) | |
102 | { | |
103 | return false; | |
104 | } | |
105 | ||
106 | bool aio_pending(AioContext *ctx) | |
107 | { | |
108 | AioHandler *node; | |
109 | ||
110 | QLIST_FOREACH(node, &ctx->aio_handlers, node) { | |
111 | int revents; | |
112 | ||
113 | revents = node->pfd.revents & node->pfd.events; | |
114 | if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) { | |
115 | return true; | |
116 | } | |
117 | if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) { | |
118 | return true; | |
119 | } | |
120 | } | |
121 | ||
122 | return false; | |
123 | } | |
124 | ||
125 | bool aio_dispatch(AioContext *ctx) | |
126 | { | |
127 | AioHandler *node; | |
128 | bool progress = false; | |
129 | ||
130 | /* | |
131 | * If there are callbacks left that have been queued, we need to call them. | |
132 | * Do not call select in this case, because it is possible that the caller | |
133 | * does not need a complete flush (as is the case for aio_poll loops). | |
134 | */ | |
135 | if (aio_bh_poll(ctx)) { | |
136 | progress = true; | |
137 | } | |
138 | ||
139 | /* | |
140 | * We have to walk very carefully in case aio_set_fd_handler is | |
141 | * called while we're walking. | |
142 | */ | |
143 | node = QLIST_FIRST(&ctx->aio_handlers); | |
144 | while (node) { | |
145 | AioHandler *tmp; | |
146 | int revents; | |
147 | ||
148 | ctx->walking_handlers++; | |
149 | ||
150 | revents = node->pfd.revents & node->pfd.events; | |
151 | node->pfd.revents = 0; | |
152 | ||
153 | if (!node->deleted && | |
154 | (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) && | |
155 | node->io_read) { | |
156 | node->io_read(node->opaque); | |
157 | ||
158 | /* aio_notify() does not count as progress */ | |
159 | if (node->opaque != &ctx->notifier) { | |
160 | progress = true; | |
161 | } | |
162 | } | |
163 | if (!node->deleted && | |
164 | (revents & (G_IO_OUT | G_IO_ERR)) && | |
165 | node->io_write) { | |
166 | node->io_write(node->opaque); | |
167 | progress = true; | |
168 | } | |
169 | ||
170 | tmp = node; | |
171 | node = QLIST_NEXT(node, node); | |
172 | ||
173 | ctx->walking_handlers--; | |
174 | ||
175 | if (!ctx->walking_handlers && tmp->deleted) { | |
176 | QLIST_REMOVE(tmp, node); | |
177 | g_free(tmp); | |
178 | } | |
179 | } | |
180 | ||
181 | /* Run our timers */ | |
182 | progress |= timerlistgroup_run_timers(&ctx->tlg); | |
183 | ||
184 | return progress; | |
185 | } | |
186 | ||
187 | /* These thread-local variables are used only in a small part of aio_poll | |
188 | * around the call to the poll() system call. In particular they are not | |
189 | * used while aio_poll is performing callbacks, which makes it much easier | |
190 | * to think about reentrancy! | |
191 | * | |
192 | * Stack-allocated arrays would be perfect but they have size limitations; | |
193 | * heap allocation is expensive enough that we want to reuse arrays across | |
194 | * calls to aio_poll(). And because poll() has to be called without holding | |
195 | * any lock, the arrays cannot be stored in AioContext. Thread-local data | |
196 | * has none of the disadvantages of these three options. | |
197 | */ | |
198 | static __thread GPollFD *pollfds; | |
199 | static __thread AioHandler **nodes; | |
200 | static __thread unsigned npfd, nalloc; | |
201 | static __thread Notifier pollfds_cleanup_notifier; | |
202 | ||
203 | static void pollfds_cleanup(Notifier *n, void *unused) | |
204 | { | |
205 | g_assert(npfd == 0); | |
206 | g_free(pollfds); | |
207 | g_free(nodes); | |
208 | nalloc = 0; | |
209 | } | |
210 | ||
211 | static void add_pollfd(AioHandler *node) | |
212 | { | |
213 | if (npfd == nalloc) { | |
214 | if (nalloc == 0) { | |
215 | pollfds_cleanup_notifier.notify = pollfds_cleanup; | |
216 | qemu_thread_atexit_add(&pollfds_cleanup_notifier); | |
217 | nalloc = 8; | |
218 | } else { | |
219 | g_assert(nalloc <= INT_MAX); | |
220 | nalloc *= 2; | |
221 | } | |
222 | pollfds = g_renew(GPollFD, pollfds, nalloc); | |
223 | nodes = g_renew(AioHandler *, nodes, nalloc); | |
224 | } | |
225 | nodes[npfd] = node; | |
226 | pollfds[npfd] = (GPollFD) { | |
227 | .fd = node->pfd.fd, | |
228 | .events = node->pfd.events, | |
229 | }; | |
230 | npfd++; | |
231 | } | |
232 | ||
233 | bool aio_poll(AioContext *ctx, bool blocking) | |
234 | { | |
235 | AioHandler *node; | |
236 | bool was_dispatching; | |
237 | int i, ret; | |
238 | bool progress; | |
239 | int64_t timeout; | |
240 | ||
241 | aio_context_acquire(ctx); | |
242 | was_dispatching = ctx->dispatching; | |
243 | progress = false; | |
244 | ||
245 | /* aio_notify can avoid the expensive event_notifier_set if | |
246 | * everything (file descriptors, bottom halves, timers) will | |
247 | * be re-evaluated before the next blocking poll(). This is | |
248 | * already true when aio_poll is called with blocking == false; | |
249 | * if blocking == true, it is only true after poll() returns. | |
250 | * | |
251 | * If we're in a nested event loop, ctx->dispatching might be true. | |
252 | * In that case we can restore it just before returning, but we | |
253 | * have to clear it now. | |
254 | */ | |
255 | aio_set_dispatching(ctx, !blocking); | |
256 | ||
257 | ctx->walking_handlers++; | |
258 | ||
259 | assert(npfd == 0); | |
260 | ||
261 | /* fill pollfds */ | |
262 | QLIST_FOREACH(node, &ctx->aio_handlers, node) { | |
263 | if (!node->deleted && node->pfd.events) { | |
264 | add_pollfd(node); | |
265 | } | |
266 | } | |
267 | ||
268 | timeout = blocking ? aio_compute_timeout(ctx) : 0; | |
269 | ||
270 | /* wait until next event */ | |
271 | if (timeout) { | |
272 | aio_context_release(ctx); | |
273 | } | |
274 | ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout); | |
275 | if (timeout) { | |
276 | aio_context_acquire(ctx); | |
277 | } | |
278 | ||
279 | /* if we have any readable fds, dispatch event */ | |
280 | if (ret > 0) { | |
281 | for (i = 0; i < npfd; i++) { | |
282 | nodes[i]->pfd.revents = pollfds[i].revents; | |
283 | } | |
284 | } | |
285 | ||
286 | npfd = 0; | |
287 | ctx->walking_handlers--; | |
288 | ||
289 | /* Run dispatch even if there were no readable fds to run timers */ | |
290 | aio_set_dispatching(ctx, true); | |
291 | if (aio_dispatch(ctx)) { | |
292 | progress = true; | |
293 | } | |
294 | ||
295 | aio_set_dispatching(ctx, was_dispatching); | |
296 | aio_context_release(ctx); | |
297 | ||
298 | return progress; | |
299 | } |