4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2010 Red Hat, Inc.
7 * QEMU library functions on POSIX which are shared between QEMU and
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "qemu/osdep.h"
32 #include <glib/gprintf.h>
34 #include "qemu-common.h"
35 #include "sysemu/sysemu.h"
37 #include "qapi/error.h"
38 #include "qemu/sockets.h"
39 #include "qemu/thread.h"
41 #include "qemu/cutils.h"
44 #include <sys/syscall.h>
48 #include <sys/sysctl.h>
55 #include <sys/sysctl.h>
60 #include <mach-o/dyld.h>
64 #include <kernel/image.h>
67 #include "qemu/mmap-alloc.h"
69 #ifdef CONFIG_DEBUG_STACK_USAGE
70 #include "qemu/error-report.h"
73 #define MAX_MEM_PREALLOC_THREAD_COUNT 16
82 typedef struct MemsetThread MemsetThread;
84 static MemsetThread *memset_thread;
85 static int memset_num_threads;
86 static bool memset_thread_failed;
88 static QemuMutex page_mutex;
89 static QemuCond page_cond;
90 static bool threads_created_flag;
92 int qemu_get_thread_id(void)
94 #if defined(__linux__)
95 return syscall(SYS_gettid);
96 #elif defined(__FreeBSD__)
97 /* thread id is up to INT_MAX */
101 #elif defined(__NetBSD__)
103 #elif defined(__OpenBSD__)
110 int qemu_daemon(int nochdir, int noclose)
112 return daemon(nochdir, noclose);
115 bool qemu_write_pidfile(const char *path, Error **errp)
122 struct flock lock = {
124 .l_whence = SEEK_SET,
128 fd = qemu_open_old(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
130 error_setg_errno(errp, errno, "Cannot open pid file");
134 if (fstat(fd, &b) < 0) {
135 error_setg_errno(errp, errno, "Cannot stat file");
139 if (fcntl(fd, F_SETLK, &lock)) {
140 error_setg_errno(errp, errno, "Cannot lock pid file");
145 * Now make sure the path we locked is the same one that now
146 * exists on the filesystem.
148 if (stat(path, &a) < 0) {
150 * PID file disappeared, someone else must be racing with
157 if (a.st_ino == b.st_ino) {
162 * PID file was recreated, someone else must be racing with
168 if (ftruncate(fd, 0) < 0) {
169 error_setg_errno(errp, errno, "Failed to truncate pid file");
173 snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
174 if (write(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
175 error_setg(errp, "Failed to write pid file");
188 void *qemu_oom_check(void *ptr)
191 fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
197 void *qemu_try_memalign(size_t alignment, size_t size)
201 if (alignment < sizeof(void*)) {
202 alignment = sizeof(void*);
205 #if defined(CONFIG_POSIX_MEMALIGN)
207 ret = posix_memalign(&ptr, alignment, size);
212 #elif defined(CONFIG_BSD)
215 ptr = memalign(alignment, size);
217 trace_qemu_memalign(alignment, size, ptr);
221 void *qemu_memalign(size_t alignment, size_t size)
223 return qemu_oom_check(qemu_try_memalign(alignment, size));
226 /* alloc shared memory pages */
227 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
229 size_t align = QEMU_VMALLOC_ALIGN;
230 void *ptr = qemu_ram_mmap(-1, size, align, shared, false);
232 if (ptr == MAP_FAILED) {
240 trace_qemu_anon_ram_alloc(size, ptr);
244 void qemu_vfree(void *ptr)
246 trace_qemu_vfree(ptr);
250 void qemu_anon_ram_free(void *ptr, size_t size)
252 trace_qemu_anon_ram_free(ptr, size);
253 qemu_ram_munmap(-1, ptr, size);
256 void qemu_set_block(int fd)
259 f = fcntl(fd, F_GETFL);
261 f = fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
265 int qemu_try_set_nonblock(int fd)
268 f = fcntl(fd, F_GETFL);
272 if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) {
275 * Previous to OpenBSD 6.3, fcntl(F_SETFL) is not permitted on
276 * memory devices and sets errno to ENODEV.
277 * It's OK if we fail to set O_NONBLOCK on devices like /dev/null,
278 * because they will never block anyway.
280 if (errno == ENODEV) {
289 void qemu_set_nonblock(int fd)
292 f = qemu_try_set_nonblock(fd);
296 int socket_set_fast_reuse(int fd)
300 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
301 (const char *)&val, sizeof(val));
308 void qemu_set_cloexec(int fd)
311 f = fcntl(fd, F_GETFD);
313 f = fcntl(fd, F_SETFD, f | FD_CLOEXEC);
318 * Creates a pipe with FD_CLOEXEC set on both file descriptors
320 int qemu_pipe(int pipefd[2])
325 ret = pipe2(pipefd, O_CLOEXEC);
326 if (ret != -1 || errno != ENOSYS) {
332 qemu_set_cloexec(pipefd[0]);
333 qemu_set_cloexec(pipefd[1]);
340 qemu_get_local_state_pathname(const char *relative_pathname)
342 g_autofree char *dir = g_strdup_printf("%s/%s",
343 CONFIG_QEMU_LOCALSTATEDIR,
345 return get_relocated_path(dir);
348 void qemu_set_tty_echo(int fd, bool echo)
355 tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
357 tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
360 tcsetattr(fd, TCSANOW, &tty);
363 static const char *exec_dir;
365 void qemu_init_exec_dir(const char *argv0)
374 #if defined(__linux__)
377 len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
383 #elif defined(__FreeBSD__) \
384 || (defined(__NetBSD__) && defined(KERN_PROC_PATHNAME))
386 #if defined(__FreeBSD__)
387 static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
389 static int mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME};
391 size_t len = sizeof(buf) - 1;
394 if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
396 buf[sizeof(buf) - 1] = '\0';
400 #elif defined(__APPLE__)
402 char fpath[PATH_MAX];
403 uint32_t len = sizeof(fpath);
404 if (_NSGetExecutablePath(fpath, &len) == 0) {
405 p = realpath(fpath, buf);
411 #elif defined(__HAIKU__)
417 while (get_next_image_info(0, &c, &ii) == B_OK) {
418 if (ii.type == B_APP_IMAGE) {
419 strncpy(buf, ii.name, sizeof(buf));
420 buf[sizeof(buf) - 1] = 0;
427 /* If we don't have any way of figuring out the actual executable
428 location then try argv[0]. */
430 p = realpath(argv0, buf);
433 exec_dir = g_path_get_dirname(p);
435 exec_dir = CONFIG_BINDIR;
439 const char *qemu_get_exec_dir(void)
444 static void sigbus_handler(int signal)
448 for (i = 0; i < memset_num_threads; i++) {
449 if (qemu_thread_is_self(&memset_thread[i].pgthread)) {
450 siglongjmp(memset_thread[i].env, 1);
456 static void *do_touch_pages(void *arg)
458 MemsetThread *memset_args = (MemsetThread *)arg;
459 sigset_t set, oldset;
462 * On Linux, the page faults from the loop below can cause mmap_sem
463 * contention with allocation of the thread stacks. Do not start
464 * clearing until all threads have been created.
466 qemu_mutex_lock(&page_mutex);
467 while(!threads_created_flag){
468 qemu_cond_wait(&page_cond, &page_mutex);
470 qemu_mutex_unlock(&page_mutex);
474 sigaddset(&set, SIGBUS);
475 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
477 if (sigsetjmp(memset_args->env, 1)) {
478 memset_thread_failed = true;
480 char *addr = memset_args->addr;
481 size_t numpages = memset_args->numpages;
482 size_t hpagesize = memset_args->hpagesize;
484 for (i = 0; i < numpages; i++) {
486 * Read & write back the same value, so we don't
487 * corrupt existing user/app data that might be
490 * 'volatile' to stop compiler optimizing this away
493 * TODO: get a better solution from kernel so we
494 * don't need to write at all so we don't cause
495 * wear on the storage backing the region...
497 *(volatile char *)addr = *addr;
501 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
505 static inline int get_memset_num_threads(int smp_cpus)
507 long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
510 if (host_procs > 0) {
511 ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus);
513 /* In case sysconf() fails, we fall back to single threaded */
517 static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
520 static gsize initialized = 0;
521 size_t numpages_per_thread, leftover;
525 if (g_once_init_enter(&initialized)) {
526 qemu_mutex_init(&page_mutex);
527 qemu_cond_init(&page_cond);
528 g_once_init_leave(&initialized, 1);
531 memset_thread_failed = false;
532 threads_created_flag = false;
533 memset_num_threads = get_memset_num_threads(smp_cpus);
534 memset_thread = g_new0(MemsetThread, memset_num_threads);
535 numpages_per_thread = numpages / memset_num_threads;
536 leftover = numpages % memset_num_threads;
537 for (i = 0; i < memset_num_threads; i++) {
538 memset_thread[i].addr = addr;
539 memset_thread[i].numpages = numpages_per_thread + (i < leftover);
540 memset_thread[i].hpagesize = hpagesize;
541 qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
542 do_touch_pages, &memset_thread[i],
543 QEMU_THREAD_JOINABLE);
544 addr += memset_thread[i].numpages * hpagesize;
547 qemu_mutex_lock(&page_mutex);
548 threads_created_flag = true;
549 qemu_cond_broadcast(&page_cond);
550 qemu_mutex_unlock(&page_mutex);
552 for (i = 0; i < memset_num_threads; i++) {
553 qemu_thread_join(&memset_thread[i].pgthread);
555 g_free(memset_thread);
556 memset_thread = NULL;
558 return memset_thread_failed;
561 void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
565 struct sigaction act, oldact;
566 size_t hpagesize = qemu_fd_getpagesize(fd);
567 size_t numpages = DIV_ROUND_UP(memory, hpagesize);
569 memset(&act, 0, sizeof(act));
570 act.sa_handler = &sigbus_handler;
573 ret = sigaction(SIGBUS, &act, &oldact);
575 error_setg_errno(errp, errno,
576 "os_mem_prealloc: failed to install signal handler");
580 /* touch pages simultaneously */
581 if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
582 error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
583 "pages available to allocate guest RAM");
586 ret = sigaction(SIGBUS, &oldact, NULL);
588 /* Terminate QEMU since it can't recover from error */
589 perror("os_mem_prealloc: failed to reinstall signal handler");
594 char *qemu_get_pid_name(pid_t pid)
598 #if defined(__FreeBSD__)
599 /* BSDs don't have /proc, but they provide a nice substitute */
600 struct kinfo_proc *proc = kinfo_getproc(pid);
603 name = g_strdup(proc->ki_comm);
607 /* Assume a system with reasonable procfs */
611 pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
612 g_file_get_contents(pid_path, &name, &len, NULL);
620 pid_t qemu_fork(Error **errp)
622 sigset_t oldmask, newmask;
623 struct sigaction sig_action;
628 * Need to block signals now, so that child process can safely
629 * kill off caller's signal handlers without a race.
631 sigfillset(&newmask);
632 if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
633 error_setg_errno(errp, errno,
634 "cannot block signals");
642 /* attempt to restore signal mask, but ignore failure, to
643 * avoid obscuring the fork failure */
644 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
645 error_setg_errno(errp, saved_errno,
646 "cannot fork child process");
652 /* Restore our original signal mask now that the child is
653 * safely running. Only documented failures are EFAULT (not
654 * possible, since we are using just-grabbed mask) or EINVAL
655 * (not possible, since we are using correct arguments). */
656 (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
661 /* Clear out all signal handlers from parent so nothing
662 * unexpected can happen in our child once we unblock
664 sig_action.sa_handler = SIG_DFL;
665 sig_action.sa_flags = 0;
666 sigemptyset(&sig_action.sa_mask);
668 for (i = 1; i < NSIG; i++) {
669 /* Only possible errors are EFAULT or EINVAL The former
670 * won't happen, the latter we expect, so no need to check
672 (void)sigaction(i, &sig_action, NULL);
675 /* Unmask all signals in child, since we've no idea what the
676 * caller's done with their signal mask and don't want to
677 * propagate that to children */
678 sigemptyset(&newmask);
679 if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
680 Error *local_err = NULL;
681 error_setg_errno(&local_err, errno,
682 "cannot unblock signals");
683 error_report_err(local_err);
690 void *qemu_alloc_stack(size_t *sz)
692 void *ptr, *guardpage;
694 #ifdef CONFIG_DEBUG_STACK_USAGE
697 size_t pagesz = qemu_real_host_page_size;
698 #ifdef _SC_THREAD_STACK_MIN
699 /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
700 long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
701 *sz = MAX(MAX(min_stack_sz, 0), *sz);
703 /* adjust stack size to a multiple of the page size */
704 *sz = ROUND_UP(*sz, pagesz);
705 /* allocate one extra page for the guard page */
708 flags = MAP_PRIVATE | MAP_ANONYMOUS;
709 #if defined(MAP_STACK) && defined(__OpenBSD__)
710 /* Only enable MAP_STACK on OpenBSD. Other OS's such as
711 * Linux/FreeBSD/NetBSD have a flag with the same name
712 * but have differing functionality. OpenBSD will SEGV
713 * if it spots execution with a stack pointer pointing
714 * at memory that was not allocated with MAP_STACK.
719 ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0);
720 if (ptr == MAP_FAILED) {
721 perror("failed to allocate memory for stack");
725 #if defined(HOST_IA64)
726 /* separate register stack */
727 guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
728 #elif defined(HOST_HPPA)
730 guardpage = ptr + *sz - pagesz;
732 /* stack grows down */
735 if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
736 perror("failed to set up stack guard page");
740 #ifdef CONFIG_DEBUG_STACK_USAGE
741 for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
742 *(uint32_t *)ptr2 = 0xdeadbeaf;
749 #ifdef CONFIG_DEBUG_STACK_USAGE
750 static __thread unsigned int max_stack_usage;
753 void qemu_free_stack(void *stack, size_t sz)
755 #ifdef CONFIG_DEBUG_STACK_USAGE
759 for (ptr = stack + qemu_real_host_page_size; ptr < stack + sz;
760 ptr += sizeof(uint32_t)) {
761 if (*(uint32_t *)ptr != 0xdeadbeaf) {
765 usage = sz - (uintptr_t) (ptr - stack);
766 if (usage > max_stack_usage) {
767 error_report("thread %d max stack usage increased from %u to %u",
768 qemu_get_thread_id(), max_stack_usage, usage);
769 max_stack_usage = usage;
776 void sigaction_invoke(struct sigaction *action,
777 struct qemu_signalfd_siginfo *info)
780 si.si_signo = info->ssi_signo;
781 si.si_errno = info->ssi_errno;
782 si.si_code = info->ssi_code;
784 /* Convert the minimal set of fields defined by POSIX.
785 * Positive si_code values are reserved for kernel-generated
786 * signals, where the valid siginfo fields are determined by
787 * the signal number. But according to POSIX, it is unspecified
788 * whether SI_USER and SI_QUEUE have values less than or equal to
791 if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
792 info->ssi_code <= 0) {
794 si.si_pid = info->ssi_pid;
795 si.si_uid = info->ssi_uid;
796 } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
797 info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
798 si.si_addr = (void *)(uintptr_t)info->ssi_addr;
799 } else if (info->ssi_signo == SIGCHLD) {
800 si.si_pid = info->ssi_pid;
801 si.si_status = info->ssi_status;
802 si.si_uid = info->ssi_uid;
804 action->sa_sigaction(info->ssi_signo, &si, NULL);
807 #ifndef HOST_NAME_MAX
808 # ifdef _POSIX_HOST_NAME_MAX
809 # define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
811 # define HOST_NAME_MAX 255
815 char *qemu_get_host_name(Error **errp)
818 g_autofree char *hostname = NULL;
820 #ifdef _SC_HOST_NAME_MAX
821 len = sysconf(_SC_HOST_NAME_MAX);
822 #endif /* _SC_HOST_NAME_MAX */
828 /* Unfortunately, gethostname() below does not guarantee a
829 * NULL terminated string. Therefore, allocate one byte more
831 hostname = g_new0(char, len + 1);
833 if (gethostname(hostname, len) < 0) {
834 error_setg_errno(errp, errno,
835 "cannot get hostname");
839 return g_steal_pointer(&hostname);
842 size_t qemu_get_host_physmem(void)
844 #ifdef _SC_PHYS_PAGES
845 long pages = sysconf(_SC_PHYS_PAGES);
847 if (pages > SIZE_MAX / qemu_real_host_page_size) {
850 return pages * qemu_real_host_page_size;