2 * QEMU posix-aio emulation
4 * Copyright IBM, Corp. 2008
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <sys/ioctl.h>
23 #include "qemu-common.h"
25 #include "posix-aio-compat.h"
27 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
28 static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
29 static pthread_t thread_id;
30 static pthread_attr_t attr;
31 static int max_threads = 64;
32 static int cur_threads = 0;
33 static int idle_threads = 0;
34 static TAILQ_HEAD(, qemu_paiocb) request_list;
37 static int preadv_present = 1;
39 static int preadv_present = 0;
42 static void die2(int err, const char *what)
44 fprintf(stderr, "%s failed: %s\n", what, strerror(err));
48 static void die(const char *what)
53 static void mutex_lock(pthread_mutex_t *mutex)
55 int ret = pthread_mutex_lock(mutex);
56 if (ret) die2(ret, "pthread_mutex_lock");
59 static void mutex_unlock(pthread_mutex_t *mutex)
61 int ret = pthread_mutex_unlock(mutex);
62 if (ret) die2(ret, "pthread_mutex_unlock");
65 static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
68 int ret = pthread_cond_timedwait(cond, mutex, ts);
69 if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait");
73 static void cond_signal(pthread_cond_t *cond)
75 int ret = pthread_cond_signal(cond);
76 if (ret) die2(ret, "pthread_cond_signal");
79 static void thread_create(pthread_t *thread, pthread_attr_t *attr,
80 void *(*start_routine)(void*), void *arg)
82 int ret = pthread_create(thread, attr, start_routine, arg);
83 if (ret) die2(ret, "pthread_create");
86 static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
90 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
99 qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
101 return preadv(fd, iov, nr_iov, offset);
105 qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
107 return pwritev(fd, iov, nr_iov, offset);
113 qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
119 qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
127 * Check if we need to copy the data in the aiocb into a new
128 * properly aligned buffer.
130 static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
132 if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
135 for (i = 0; i < aiocb->aio_niov; i++)
136 if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
143 static size_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
149 if (aiocb->aio_type == QEMU_PAIO_WRITE)
150 len = qemu_pwritev(aiocb->aio_fildes,
153 aiocb->aio_offset + offset);
155 len = qemu_preadv(aiocb->aio_fildes,
158 aiocb->aio_offset + offset);
159 } while (len == -1 && errno == EINTR);
166 static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
171 while (offset < aiocb->aio_nbytes) {
172 if (aiocb->aio_type == QEMU_PAIO_WRITE)
173 len = pwrite(aiocb->aio_fildes,
174 (const char *)buf + offset,
175 aiocb->aio_nbytes - offset,
176 aiocb->aio_offset + offset);
178 len = pread(aiocb->aio_fildes,
180 aiocb->aio_nbytes - offset,
181 aiocb->aio_offset + offset);
183 if (len == -1 && errno == EINTR)
185 else if (len == -1) {
197 static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
202 if (!aiocb_needs_copy(aiocb)) {
204 * If there is just a single buffer, and it is properly aligned
205 * we can just use plain pread/pwrite without any problems.
207 if (aiocb->aio_niov == 1)
208 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
211 * We have more than one iovec, and all are properly aligned.
213 * Try preadv/pwritev first and fall back to linearizing the
214 * buffer if it's not supported.
216 if (preadv_present) {
217 nbytes = handle_aiocb_rw_vector(aiocb);
218 if (nbytes == aiocb->aio_nbytes)
220 if (nbytes < 0 && nbytes != -ENOSYS)
226 * XXX(hch): short read/write. no easy way to handle the reminder
227 * using these interfaces. For now retry using plain
233 * Ok, we have to do it the hard way, copy all segments into
234 * a single aligned buffer.
236 buf = qemu_memalign(512, aiocb->aio_nbytes);
237 if (aiocb->aio_type == QEMU_PAIO_WRITE) {
241 for (i = 0; i < aiocb->aio_niov; ++i) {
242 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
243 p += aiocb->aio_iov[i].iov_len;
247 nbytes = handle_aiocb_rw_linear(aiocb, buf);
248 if (aiocb->aio_type != QEMU_PAIO_WRITE) {
250 size_t count = aiocb->aio_nbytes, copy;
253 for (i = 0; i < aiocb->aio_niov && count; ++i) {
255 if (copy > aiocb->aio_iov[i].iov_len)
256 copy = aiocb->aio_iov[i].iov_len;
257 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
267 static void *aio_thread(void *unused)
274 /* block all signals */
275 if (sigfillset(&set)) die("sigfillset");
276 if (sigprocmask(SIG_BLOCK, &set, NULL)) die("sigprocmask");
279 struct qemu_paiocb *aiocb;
284 qemu_gettimeofday(&tv);
285 ts.tv_sec = tv.tv_sec + 10;
290 while (TAILQ_EMPTY(&request_list) &&
291 !(ret == ETIMEDOUT)) {
292 ret = cond_timedwait(&cond, &lock, &ts);
295 if (TAILQ_EMPTY(&request_list))
298 aiocb = TAILQ_FIRST(&request_list);
299 TAILQ_REMOVE(&request_list, aiocb, node);
304 switch (aiocb->aio_type) {
306 case QEMU_PAIO_WRITE:
307 ret = handle_aiocb_rw(aiocb);
309 case QEMU_PAIO_IOCTL:
310 ret = handle_aiocb_ioctl(aiocb);
313 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
323 if (kill(pid, aiocb->ev_signo)) die("kill failed");
333 static void spawn_thread(void)
337 thread_create(&thread_id, &attr, aio_thread, NULL);
340 int qemu_paio_init(struct qemu_paioinit *aioinit)
344 ret = pthread_attr_init(&attr);
345 if (ret) die2(ret, "pthread_attr_init");
347 ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
348 if (ret) die2(ret, "pthread_attr_setdetachstate");
350 TAILQ_INIT(&request_list);
355 static int qemu_paio_submit(struct qemu_paiocb *aiocb, int type)
357 aiocb->aio_type = type;
358 aiocb->ret = -EINPROGRESS;
361 if (idle_threads == 0 && cur_threads < max_threads)
363 TAILQ_INSERT_TAIL(&request_list, aiocb, node);
370 int qemu_paio_read(struct qemu_paiocb *aiocb)
372 return qemu_paio_submit(aiocb, QEMU_PAIO_READ);
375 int qemu_paio_write(struct qemu_paiocb *aiocb)
377 return qemu_paio_submit(aiocb, QEMU_PAIO_WRITE);
380 int qemu_paio_ioctl(struct qemu_paiocb *aiocb)
382 return qemu_paio_submit(aiocb, QEMU_PAIO_IOCTL);
385 ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
396 int qemu_paio_error(struct qemu_paiocb *aiocb)
398 ssize_t ret = qemu_paio_return(aiocb);
408 int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb)
413 if (!aiocb->active) {
414 TAILQ_REMOVE(&request_list, aiocb, node);
415 aiocb->ret = -ECANCELED;
416 ret = QEMU_PAIO_CANCELED;
417 } else if (aiocb->ret == -EINPROGRESS)
418 ret = QEMU_PAIO_NOTCANCELED;
420 ret = QEMU_PAIO_ALLDONE;