]>
Commit | Line | Data |
---|---|---|
a76bab49 AL |
1 | /* |
2 | * QEMU aio implementation | |
3 | * | |
4 | * Copyright IBM, Corp. 2008 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <[email protected]> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
6b620ca3 PB |
12 | * Contributions after 2012-01-13 are licensed under the terms of the |
13 | * GNU GPL, version 2 or (at your option) any later version. | |
a76bab49 AL |
14 | */ |
15 | ||
16 | #include "qemu-common.h" | |
737e150e | 17 | #include "block/block.h" |
1de7afc9 PB |
18 | #include "qemu/queue.h" |
19 | #include "qemu/sockets.h" | |
a76bab49 | 20 | |
a76bab49 AL |
21 | struct AioHandler |
22 | { | |
cd9ba1eb | 23 | GPollFD pfd; |
a76bab49 AL |
24 | IOHandler *io_read; |
25 | IOHandler *io_write; | |
a76bab49 AL |
26 | int deleted; |
27 | void *opaque; | |
dca21ef2 | 28 | bool is_external; |
72cf2d4f | 29 | QLIST_ENTRY(AioHandler) node; |
a76bab49 AL |
30 | }; |
31 | ||
a915f4bc | 32 | static AioHandler *find_aio_handler(AioContext *ctx, int fd) |
a76bab49 AL |
33 | { |
34 | AioHandler *node; | |
35 | ||
a915f4bc | 36 | QLIST_FOREACH(node, &ctx->aio_handlers, node) { |
cd9ba1eb | 37 | if (node->pfd.fd == fd) |
79d5ca56 AG |
38 | if (!node->deleted) |
39 | return node; | |
a76bab49 AL |
40 | } |
41 | ||
42 | return NULL; | |
43 | } | |
44 | ||
a915f4bc PB |
45 | void aio_set_fd_handler(AioContext *ctx, |
46 | int fd, | |
dca21ef2 | 47 | bool is_external, |
a915f4bc PB |
48 | IOHandler *io_read, |
49 | IOHandler *io_write, | |
a915f4bc | 50 | void *opaque) |
a76bab49 AL |
51 | { |
52 | AioHandler *node; | |
53 | ||
a915f4bc | 54 | node = find_aio_handler(ctx, fd); |
a76bab49 AL |
55 | |
56 | /* Are we deleting the fd handler? */ | |
57 | if (!io_read && !io_write) { | |
58 | if (node) { | |
e3713e00 PB |
59 | g_source_remove_poll(&ctx->source, &node->pfd); |
60 | ||
a76bab49 | 61 | /* If the lock is held, just mark the node as deleted */ |
cd9ba1eb | 62 | if (ctx->walking_handlers) { |
a76bab49 | 63 | node->deleted = 1; |
cd9ba1eb PB |
64 | node->pfd.revents = 0; |
65 | } else { | |
a76bab49 AL |
66 | /* Otherwise, delete it for real. We can't just mark it as |
67 | * deleted because deleted nodes are only cleaned up after | |
68 | * releasing the walking_handlers lock. | |
69 | */ | |
72cf2d4f | 70 | QLIST_REMOVE(node, node); |
7267c094 | 71 | g_free(node); |
a76bab49 AL |
72 | } |
73 | } | |
74 | } else { | |
75 | if (node == NULL) { | |
76 | /* Alloc and insert if it's not already there */ | |
3ba235a0 | 77 | node = g_new0(AioHandler, 1); |
cd9ba1eb | 78 | node->pfd.fd = fd; |
a915f4bc | 79 | QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node); |
e3713e00 PB |
80 | |
81 | g_source_add_poll(&ctx->source, &node->pfd); | |
a76bab49 AL |
82 | } |
83 | /* Update handler with latest information */ | |
84 | node->io_read = io_read; | |
85 | node->io_write = io_write; | |
a76bab49 | 86 | node->opaque = opaque; |
dca21ef2 | 87 | node->is_external = is_external; |
cd9ba1eb | 88 | |
b5a01a70 SH |
89 | node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0); |
90 | node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0); | |
a76bab49 | 91 | } |
7ed2b24c PB |
92 | |
93 | aio_notify(ctx); | |
9958c351 PB |
94 | } |
95 | ||
a915f4bc PB |
96 | void aio_set_event_notifier(AioContext *ctx, |
97 | EventNotifier *notifier, | |
dca21ef2 | 98 | bool is_external, |
f2e5dca4 | 99 | EventNotifierHandler *io_read) |
a76bab49 | 100 | { |
a915f4bc | 101 | aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), |
dca21ef2 | 102 | is_external, (IOHandler *)io_read, NULL, notifier); |
a76bab49 AL |
103 | } |
104 | ||
a3462c65 PB |
105 | bool aio_prepare(AioContext *ctx) |
106 | { | |
107 | return false; | |
108 | } | |
109 | ||
cd9ba1eb PB |
110 | bool aio_pending(AioContext *ctx) |
111 | { | |
112 | AioHandler *node; | |
113 | ||
114 | QLIST_FOREACH(node, &ctx->aio_handlers, node) { | |
115 | int revents; | |
116 | ||
cd9ba1eb PB |
117 | revents = node->pfd.revents & node->pfd.events; |
118 | if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) { | |
119 | return true; | |
120 | } | |
121 | if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) { | |
122 | return true; | |
123 | } | |
124 | } | |
125 | ||
126 | return false; | |
127 | } | |
128 | ||
e4c7e2d1 | 129 | bool aio_dispatch(AioContext *ctx) |
a76bab49 | 130 | { |
9eb0bfca | 131 | AioHandler *node; |
d0c8d2c0 | 132 | bool progress = false; |
7c0628b2 | 133 | |
e4c7e2d1 PB |
134 | /* |
135 | * If there are callbacks left that have been queued, we need to call them. | |
136 | * Do not call select in this case, because it is possible that the caller | |
137 | * does not need a complete flush (as is the case for aio_poll loops). | |
138 | */ | |
139 | if (aio_bh_poll(ctx)) { | |
140 | progress = true; | |
141 | } | |
142 | ||
cd9ba1eb | 143 | /* |
87f68d31 | 144 | * We have to walk very carefully in case aio_set_fd_handler is |
cd9ba1eb PB |
145 | * called while we're walking. |
146 | */ | |
147 | node = QLIST_FIRST(&ctx->aio_handlers); | |
148 | while (node) { | |
149 | AioHandler *tmp; | |
150 | int revents; | |
151 | ||
152 | ctx->walking_handlers++; | |
153 | ||
154 | revents = node->pfd.revents & node->pfd.events; | |
155 | node->pfd.revents = 0; | |
156 | ||
d0c8d2c0 SH |
157 | if (!node->deleted && |
158 | (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) && | |
159 | node->io_read) { | |
cd9ba1eb | 160 | node->io_read(node->opaque); |
164a101f SH |
161 | |
162 | /* aio_notify() does not count as progress */ | |
163 | if (node->opaque != &ctx->notifier) { | |
164 | progress = true; | |
165 | } | |
cd9ba1eb | 166 | } |
d0c8d2c0 SH |
167 | if (!node->deleted && |
168 | (revents & (G_IO_OUT | G_IO_ERR)) && | |
169 | node->io_write) { | |
cd9ba1eb PB |
170 | node->io_write(node->opaque); |
171 | progress = true; | |
172 | } | |
173 | ||
174 | tmp = node; | |
175 | node = QLIST_NEXT(node, node); | |
176 | ||
177 | ctx->walking_handlers--; | |
178 | ||
179 | if (!ctx->walking_handlers && tmp->deleted) { | |
180 | QLIST_REMOVE(tmp, node); | |
181 | g_free(tmp); | |
182 | } | |
183 | } | |
438e1f47 AB |
184 | |
185 | /* Run our timers */ | |
186 | progress |= timerlistgroup_run_timers(&ctx->tlg); | |
187 | ||
d0c8d2c0 SH |
188 | return progress; |
189 | } | |
190 | ||
e98ab097 PB |
191 | /* These thread-local variables are used only in a small part of aio_poll |
192 | * around the call to the poll() system call. In particular they are not | |
193 | * used while aio_poll is performing callbacks, which makes it much easier | |
194 | * to think about reentrancy! | |
195 | * | |
196 | * Stack-allocated arrays would be perfect but they have size limitations; | |
197 | * heap allocation is expensive enough that we want to reuse arrays across | |
198 | * calls to aio_poll(). And because poll() has to be called without holding | |
199 | * any lock, the arrays cannot be stored in AioContext. Thread-local data | |
200 | * has none of the disadvantages of these three options. | |
201 | */ | |
202 | static __thread GPollFD *pollfds; | |
203 | static __thread AioHandler **nodes; | |
204 | static __thread unsigned npfd, nalloc; | |
205 | static __thread Notifier pollfds_cleanup_notifier; | |
206 | ||
207 | static void pollfds_cleanup(Notifier *n, void *unused) | |
208 | { | |
209 | g_assert(npfd == 0); | |
210 | g_free(pollfds); | |
211 | g_free(nodes); | |
212 | nalloc = 0; | |
213 | } | |
214 | ||
215 | static void add_pollfd(AioHandler *node) | |
216 | { | |
217 | if (npfd == nalloc) { | |
218 | if (nalloc == 0) { | |
219 | pollfds_cleanup_notifier.notify = pollfds_cleanup; | |
220 | qemu_thread_atexit_add(&pollfds_cleanup_notifier); | |
221 | nalloc = 8; | |
222 | } else { | |
223 | g_assert(nalloc <= INT_MAX); | |
224 | nalloc *= 2; | |
225 | } | |
226 | pollfds = g_renew(GPollFD, pollfds, nalloc); | |
227 | nodes = g_renew(AioHandler *, nodes, nalloc); | |
228 | } | |
229 | nodes[npfd] = node; | |
230 | pollfds[npfd] = (GPollFD) { | |
231 | .fd = node->pfd.fd, | |
232 | .events = node->pfd.events, | |
233 | }; | |
234 | npfd++; | |
235 | } | |
236 | ||
d0c8d2c0 SH |
237 | bool aio_poll(AioContext *ctx, bool blocking) |
238 | { | |
d0c8d2c0 | 239 | AioHandler *node; |
e98ab097 | 240 | int i, ret; |
164a101f | 241 | bool progress; |
e98ab097 | 242 | int64_t timeout; |
d0c8d2c0 | 243 | |
49110174 | 244 | aio_context_acquire(ctx); |
d0c8d2c0 SH |
245 | progress = false; |
246 | ||
0ceb849b PB |
247 | /* aio_notify can avoid the expensive event_notifier_set if |
248 | * everything (file descriptors, bottom halves, timers) will | |
e4c7e2d1 PB |
249 | * be re-evaluated before the next blocking poll(). This is |
250 | * already true when aio_poll is called with blocking == false; | |
eabc9779 PB |
251 | * if blocking == true, it is only true after poll() returns, |
252 | * so disable the optimization now. | |
0ceb849b | 253 | */ |
eabc9779 PB |
254 | if (blocking) { |
255 | atomic_add(&ctx->notify_me, 2); | |
256 | } | |
0ceb849b | 257 | |
a915f4bc | 258 | ctx->walking_handlers++; |
a76bab49 | 259 | |
e98ab097 | 260 | assert(npfd == 0); |
a76bab49 | 261 | |
6b5f8762 | 262 | /* fill pollfds */ |
a915f4bc | 263 | QLIST_FOREACH(node, &ctx->aio_handlers, node) { |
c1e1e5fa FZ |
264 | if (!node->deleted && node->pfd.events |
265 | && aio_node_check(ctx, node->is_external)) { | |
e98ab097 | 266 | add_pollfd(node); |
9eb0bfca PB |
267 | } |
268 | } | |
a76bab49 | 269 | |
e98ab097 | 270 | timeout = blocking ? aio_compute_timeout(ctx) : 0; |
a76bab49 | 271 | |
9eb0bfca | 272 | /* wait until next event */ |
49110174 PB |
273 | if (timeout) { |
274 | aio_context_release(ctx); | |
275 | } | |
e98ab097 | 276 | ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout); |
eabc9779 PB |
277 | if (blocking) { |
278 | atomic_sub(&ctx->notify_me, 2); | |
279 | } | |
49110174 PB |
280 | if (timeout) { |
281 | aio_context_acquire(ctx); | |
282 | } | |
9eb0bfca | 283 | |
05e514b1 | 284 | aio_notify_accept(ctx); |
21a03d17 | 285 | |
9eb0bfca PB |
286 | /* if we have any readable fds, dispatch event */ |
287 | if (ret > 0) { | |
e98ab097 PB |
288 | for (i = 0; i < npfd; i++) { |
289 | nodes[i]->pfd.revents = pollfds[i].revents; | |
a76bab49 | 290 | } |
438e1f47 AB |
291 | } |
292 | ||
e98ab097 PB |
293 | npfd = 0; |
294 | ctx->walking_handlers--; | |
295 | ||
438e1f47 AB |
296 | /* Run dispatch even if there were no readable fds to run timers */ |
297 | if (aio_dispatch(ctx)) { | |
298 | progress = true; | |
9eb0bfca | 299 | } |
bcdc1857 | 300 | |
49110174 PB |
301 | aio_context_release(ctx); |
302 | ||
164a101f | 303 | return progress; |
a76bab49 | 304 | } |