target-i386: fix pcmpxstrx equal-ordered (strstr) mode

[qemu.git] / exec.c
diff --git a/exec.c b/exec.c

index 487583b1bdff0194a0cf9b6bf7605604bb11566b..7431f2f449af23b40cef4421fb1acd4dcdda840c 100644 (file)
--- a/exec.c
+++ b/exec.c
@@ -48,13 +48,16 @@
  #endif
  #include "exec/cpu-all.h"
  #include "qemu/rcu_queue.h"
-#include "exec/cputlb.h"
+#include "qemu/main-loop.h"
  #include "translate-all.h"
  
  #include "exec/memory-internal.h"
  #include "exec/ram_addr.h"
  
  #include "qemu/range.h"
+#ifndef _WIN32
+#include "qemu/mmap-alloc.h"
+#endif
  
  //#define DEBUG_SUBPAGE
  
@@ -84,12 +87,15 @@ static MemoryRegion io_mem_unassigned;
   */
  #define RAM_RESIZEABLE (1 << 2)
  
+/* RAM is backed by an mmapped file.
+ */
+#define RAM_FILE (1 << 3)
  #endif
  
  struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
  /* current CPU in the current thread. It is only valid inside
     cpu_exec() */
-DEFINE_TLS(CPUState *, current_cpu);
+__thread CPUState *current_cpu;
  /* 0 = Do not count executed instructions.
     1 = Precise instruction counting.
     2 = Adaptive rate instruction counting.  */
@@ -158,6 +164,21 @@ static void memory_map_init(void);
  static void tcg_commit(MemoryListener *listener);
  
  static MemoryRegion io_mem_watch;
+
+/**
+ * CPUAddressSpace: all the information a CPU needs about an AddressSpace
+ * @cpu: the CPU whose AddressSpace this is
+ * @as: the AddressSpace itself
+ * @memory_dispatch: its dispatch pointer (cached, RCU protected)
+ * @tcg_as_listener: listener for tracking changes to the AddressSpace
+ */
+struct CPUAddressSpace {
+    CPUState *cpu;
+    AddressSpace *as;
+    struct AddressSpaceDispatch *memory_dispatch;
+    MemoryListener tcg_as_listener;
+};
+
  #endif
  
  #if !defined(CONFIG_USER_ONLY)
@@ -341,6 +362,7 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x
                                   hwaddr *plen, bool resolve_subpage)
  {
      MemoryRegionSection *section;
+    MemoryRegion *mr;
      Int128 diff;
  
      section = address_space_lookup_region(d, addr, resolve_subpage);
@@ -350,8 +372,23 @@ address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *x
      /* Compute offset within MemoryRegion */
      *xlat = addr + section->offset_within_region;
  
-    diff = int128_sub(section->mr->size, int128_make64(addr));
-    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
+    mr = section->mr;
+
+    /* MMIO registers can be expected to perform full-width accesses based only
+     * on their address, without considering adjacent registers that could
+     * decode to completely different MemoryRegions.  When such registers
+     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
+     * regions overlap wildly.  For this reason we cannot clamp the accesses
+     * here.
+     *
+     * If the length is small (as is the case for address_space_ldl/stl),
+     * everything works fine.  If the incoming length is large, however,
+     * the caller really has to do the clamping through memory_access_size.
+     */
+    if (memory_region_is_ram(mr)) {
+        diff = int128_sub(section->size, int128_make64(addr));
+        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
+    }
      return section;
  }
  
@@ -412,7 +449,7 @@ address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
                                    hwaddr *xlat, hwaddr *plen)
  {
      MemoryRegionSection *section;
-    section = address_space_translate_internal(cpu->memory_dispatch,
+    section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
                                                 addr, xlat, plen, false);
  
      assert(!section->mr->iommu_ops);
@@ -454,12 +491,31 @@ static const VMStateDescription vmstate_cpu_common_exception_index = {
      .name = "cpu_common/exception_index",
      .version_id = 1,
      .minimum_version_id = 1,
+    .needed = cpu_common_exception_index_needed,
      .fields = (VMStateField[]) {
          VMSTATE_INT32(exception_index, CPUState),
          VMSTATE_END_OF_LIST()
      }
  };
  
+static bool cpu_common_crash_occurred_needed(void *opaque)
+{
+    CPUState *cpu = opaque;
+
+    return cpu->crash_occurred;
+}
+
+static const VMStateDescription vmstate_cpu_common_crash_occurred = {
+    .name = "cpu_common/crash_occurred",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = cpu_common_crash_occurred_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(crash_occurred, CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
  const VMStateDescription vmstate_cpu_common = {
      .name = "cpu_common",
      .version_id = 1,
@@ -471,13 +527,10 @@ const VMStateDescription vmstate_cpu_common = {
          VMSTATE_UINT32(interrupt_request, CPUState),
          VMSTATE_END_OF_LIST()
      },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &vmstate_cpu_common_exception_index,
-            .needed = cpu_common_exception_index_needed,
-        } , {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_cpu_common_exception_index,
+        &vmstate_cpu_common_crash_occurred,
+        NULL
      }
  };
  
@@ -502,39 +555,86 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
      /* We only support one address space per cpu at the moment.  */
      assert(cpu->as == as);
  
-    if (cpu->tcg_as_listener) {
-        memory_listener_unregister(cpu->tcg_as_listener);
-    } else {
-        cpu->tcg_as_listener = g_new0(MemoryListener, 1);
+    if (cpu->cpu_ases) {
+        /* We've already registered the listener for our only AS */
+        return;
      }
-    cpu->tcg_as_listener->commit = tcg_commit;
-    memory_listener_register(cpu->tcg_as_listener, as);
+
+    cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
+    cpu->cpu_ases[0].cpu = cpu;
+    cpu->cpu_ases[0].as = as;
+    cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
+    memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
  }
  #endif
  
-void cpu_exec_init(CPUArchState *env)
+#ifndef CONFIG_USER_ONLY
+static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
+
+static int cpu_get_free_index(Error **errp)
+{
+    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
+
+    if (cpu >= MAX_CPUMASK_BITS) {
+        error_setg(errp, "Trying to use more CPUs than max of %d",
+                   MAX_CPUMASK_BITS);
+        return -1;
+    }
+
+    bitmap_set(cpu_index_map, cpu, 1);
+    return cpu;
+}
+
+void cpu_exec_exit(CPUState *cpu)
+{
+    if (cpu->cpu_index == -1) {
+        /* cpu_index was never allocated by this @cpu or was already freed. */
+        return;
+    }
+
+    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
+    cpu->cpu_index = -1;
+}
+#else
+
+static int cpu_get_free_index(Error **errp)
  {
-    CPUState *cpu = ENV_GET_CPU(env);
-    CPUClass *cc = CPU_GET_CLASS(cpu);
      CPUState *some_cpu;
-    int cpu_index;
+    int cpu_index = 0;
  
-#if defined(CONFIG_USER_ONLY)
-    cpu_list_lock();
-#endif
-    cpu_index = 0;
      CPU_FOREACH(some_cpu) {
          cpu_index++;
      }
-    cpu->cpu_index = cpu_index;
-    cpu->numa_node = 0;
-    QTAILQ_INIT(&cpu->breakpoints);
-    QTAILQ_INIT(&cpu->watchpoints);
+    return cpu_index;
+}
+
+void cpu_exec_exit(CPUState *cpu)
+{
+}
+#endif
+
+void cpu_exec_init(CPUState *cpu, Error **errp)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+    int cpu_index;
+    Error *local_err = NULL;
+
  #ifndef CONFIG_USER_ONLY
      cpu->as = &address_space_memory;
      cpu->thread_id = qemu_get_thread_id();
-    cpu_reload_memory_map(cpu);
  #endif
+
+#if defined(CONFIG_USER_ONLY)
+    cpu_list_lock();
+#endif
+    cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+#if defined(CONFIG_USER_ONLY)
+        cpu_list_unlock();
+#endif
+        return;
+    }
      QTAILQ_INSERT_TAIL(&cpus, cpu, node);
  #if defined(CONFIG_USER_ONLY)
      cpu_list_unlock();
@@ -544,7 +644,7 @@ void cpu_exec_init(CPUArchState *env)
      }
  #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
      register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
-                    cpu_save, cpu_load, env);
+                    cpu_save, cpu_load, cpu->env_ptr);
      assert(cc->vmsd == NULL);
      assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
  #endif
@@ -756,8 +856,7 @@ void cpu_single_step(CPUState *cpu, int enabled)
          } else {
              /* must flush all the translated code to avoid inconsistencies */
              /* XXX: only flush what is necessary */
-            CPUArchState *env = cpu->env_ptr;
-            tb_flush(env);
+            tb_flush(cpu);
          }
      }
  }
@@ -836,6 +935,7 @@ found:
  
  static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
  {
+    CPUState *cpu;
      ram_addr_t start1;
      RAMBlock *block;
      ram_addr_t end;
@@ -847,7 +947,9 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
      block = qemu_get_ram_block(start);
      assert(block == qemu_get_ram_block(end - 1));
      start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
-    cpu_tlb_reset_dirty_all(start1, length);
+    CPU_FOREACH(cpu) {
+        tlb_reset_dirty(cpu, start1, length);
+    }
      rcu_read_unlock();
  }
  
@@ -896,7 +998,10 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
              iotlb |= PHYS_SECTION_ROM;
          }
      } else {
-        iotlb = section - section->address_space->dispatch->map.sections;
+        AddressSpaceDispatch *d;
+
+        d = atomic_rcu_read(&section->address_space->dispatch);
+        iotlb = section - d->map.sections;
          iotlb += xlat;
      }
  
@@ -1103,7 +1208,7 @@ static void *file_ram_alloc(RAMBlock *block,
      char *filename;
      char *sanitized_name;
      char *c;
-    void *area = NULL;
+    void *area;
      int fd;
      uint64_t hpagesize;
      Error *local_err = NULL;
@@ -1149,7 +1254,7 @@ static void *file_ram_alloc(RAMBlock *block,
      unlink(filename);
      g_free(filename);
  
-    memory = (memory+hpagesize-1) & ~(hpagesize-1);
+    memory = ROUND_UP(memory, hpagesize);
  
      /*
       * ftruncate is not supported by hugetlbfs in older
@@ -1161,9 +1266,7 @@ static void *file_ram_alloc(RAMBlock *block,
          perror("ftruncate");
      }
  
-    area = mmap(0, memory, PROT_READ | PROT_WRITE,
-                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
-                fd, 0);
+    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
      if (area == MAP_FAILED) {
          error_setg_errno(errp, errno,
                           "unable to map backing store for hugepages");
@@ -1179,10 +1282,6 @@ static void *file_ram_alloc(RAMBlock *block,
      return area;
  
  error:
-    if (mem_prealloc) {
-        error_report("%s", error_get_pretty(*errp));
-        exit(1);
-    }
      return NULL;
  }
  #endif
@@ -1400,6 +1499,11 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
          }
      }
  
+    new_ram_size = MAX(old_ram_size,
+              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
+    if (new_ram_size > old_ram_size) {
+        migration_bitmap_extend(old_ram_size, new_ram_size);
+    }
      /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
       * QLIST (which has an RCU-friendly variant) does not have insertion at
       * tail, so save the last element in last_block.
@@ -1483,6 +1587,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
      new_block->used_length = size;
      new_block->max_length = size;
      new_block->flags = share ? RAM_SHARED : 0;
+    new_block->flags |= RAM_FILE;
      new_block->host = file_ram_alloc(new_block, size,
                                       mem_path, errp);
      if (!new_block->host) {
@@ -1584,7 +1689,11 @@ static void reclaim_ramblock(RAMBlock *block)
          xen_invalidate_map_cache_entry(block->host);
  #ifndef _WIN32
      } else if (block->fd >= 0) {
-        munmap(block->host, block->max_length);
+        if (block->flags & RAM_FILE) {
+            qemu_ram_munmap(block->host, block->max_length);
+        } else {
+            munmap(block->host, block->max_length);
+        }
          close(block->fd);
  #endif
      } else {
@@ -1834,8 +1943,7 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
      /* we remove the notdirty callback only if the code has been
         flushed */
      if (!cpu_physical_memory_is_clean(ram_addr)) {
-        CPUArchState *env = current_cpu->env_ptr;
-        tlb_set_dirty(env, current_cpu->mem_io_vaddr);
+        tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
      }
  }
  
@@ -2097,7 +2205,8 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
  
  MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
  {
-    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
+    CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
+    AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
      MemoryRegionSection *sections = d->map.sections;
  
      return sections[index & ~TARGET_PAGE_MASK].mr;
@@ -2156,19 +2265,20 @@ static void mem_commit(MemoryListener *listener)
  
  static void tcg_commit(MemoryListener *listener)
  {
-    CPUState *cpu;
+    CPUAddressSpace *cpuas;
+    AddressSpaceDispatch *d;
  
      /* since each CPU stores ram addresses in its TLB cache, we must
         reset the modified entries */
-    /* XXX: slow ! */
-    CPU_FOREACH(cpu) {
-        /* FIXME: Disentangle the cpu.h circular files deps so we can
-           directly get the right CPU from listener.  */
-        if (cpu->tcg_as_listener != listener) {
-            continue;
-        }
-        cpu_reload_memory_map(cpu);
-    }
+    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
+    cpu_reloading_memory_map();
+    /* The CPU and TLB are protected by the iothread lock.
+     * We reload the dispatch pointer now because cpu_reloading_memory_map()
+     * may have split the RCU critical section.
+     */
+    d = atomic_rcu_read(&cpuas->as->dispatch);
+    cpuas->memory_dispatch = d;
+    tlb_flush(cpuas->cpu, 1);
  }
  
  void address_space_init_dispatch(AddressSpace *as)
@@ -2308,13 +2418,34 @@ static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
      if (l > access_size_max) {
          l = access_size_max;
      }
-    if (l & (l - 1)) {
-        l = 1 << (qemu_fls(l) - 1);
-    }
+    l = pow2floor(l);
  
      return l;
  }
  
+static bool prepare_mmio_access(MemoryRegion *mr)
+{
+    bool unlocked = !qemu_mutex_iothread_locked();
+    bool release_lock = false;
+
+    if (unlocked && mr->global_locking) {
+        qemu_mutex_lock_iothread();
+        unlocked = false;
+        release_lock = true;
+    }
+    if (mr->flush_coalesced_mmio) {
+        if (unlocked) {
+            qemu_mutex_lock_iothread();
+        }
+        qemu_flush_coalesced_mmio_buffer();
+        if (unlocked) {
+            qemu_mutex_unlock_iothread();
+        }
+    }
+
+    return release_lock;
+}
+
  MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                               uint8_t *buf, int len, bool is_write)
  {
@@ -2324,6 +2455,7 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
      hwaddr addr1;
      MemoryRegion *mr;
      MemTxResult result = MEMTX_OK;
+    bool release_lock = false;
  
      rcu_read_lock();
      while (len > 0) {
@@ -2332,6 +2464,7 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
  
          if (is_write) {
              if (!memory_access_is_direct(mr, is_write)) {
+                release_lock |= prepare_mmio_access(mr);
                  l = memory_access_size(mr, l, addr1);
                  /* XXX: could force current_cpu to NULL to avoid
                     potential bugs */
@@ -2373,6 +2506,7 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
          } else {
              if (!memory_access_is_direct(mr, is_write)) {
                  /* I/O case */
+                release_lock |= prepare_mmio_access(mr);
                  l = memory_access_size(mr, l, addr1);
                  switch (l) {
                  case 8:
@@ -2408,6 +2542,12 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                  memcpy(buf, ptr, l);
              }
          }
+
+        if (release_lock) {
+            qemu_mutex_unlock_iothread();
+            release_lock = false;
+        }
+
          len -= l;
          buf += l;
          addr += l;
@@ -2457,7 +2597,7 @@ static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
  
          if (!(memory_region_is_ram(mr) ||
                memory_region_is_romd(mr))) {
-            /* do nothing */
+            l = memory_access_size(mr, l, addr1);
          } else {
              addr1 += memory_region_get_ram_addr(mr);
              /* ROM/RAM case */
@@ -2734,10 +2874,13 @@ static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
      hwaddr l = 4;
      hwaddr addr1;
      MemTxResult r;
+    bool release_lock = false;
  
      rcu_read_lock();
      mr = address_space_translate(as, addr, &addr1, &l, false);
      if (l < 4 || !memory_access_is_direct(mr, false)) {
+        release_lock |= prepare_mmio_access(mr);
+
          /* I/O case */
          r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
  #if defined(TARGET_WORDS_BIGENDIAN)
@@ -2770,6 +2913,9 @@ static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
      if (result) {
          *result = r;
      }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
      rcu_read_unlock();
      return val;
  }
@@ -2822,11 +2968,14 @@ static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
      hwaddr l = 8;
      hwaddr addr1;
      MemTxResult r;
+    bool release_lock = false;
  
      rcu_read_lock();
      mr = address_space_translate(as, addr, &addr1, &l,
                                   false);
      if (l < 8 || !memory_access_is_direct(mr, false)) {
+        release_lock |= prepare_mmio_access(mr);
+
          /* I/O case */
          r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
  #if defined(TARGET_WORDS_BIGENDIAN)
@@ -2859,6 +3008,9 @@ static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
      if (result) {
          *result = r;
      }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
      rcu_read_unlock();
      return val;
  }
@@ -2931,11 +3083,14 @@ static inline uint32_t address_space_lduw_internal(AddressSpace *as,
      hwaddr l = 2;
      hwaddr addr1;
      MemTxResult r;
+    bool release_lock = false;
  
      rcu_read_lock();
      mr = address_space_translate(as, addr, &addr1, &l,
                                   false);
      if (l < 2 || !memory_access_is_direct(mr, false)) {
+        release_lock |= prepare_mmio_access(mr);
+
          /* I/O case */
          r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
  #if defined(TARGET_WORDS_BIGENDIAN)
@@ -2968,6 +3123,9 @@ static inline uint32_t address_space_lduw_internal(AddressSpace *as,
      if (result) {
          *result = r;
      }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
      rcu_read_unlock();
      return val;
  }
@@ -3020,11 +3178,14 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
      hwaddr addr1;
      MemTxResult r;
      uint8_t dirty_log_mask;
+    bool release_lock = false;
  
      rcu_read_lock();
      mr = address_space_translate(as, addr, &addr1, &l,
                                   true);
      if (l < 4 || !memory_access_is_direct(mr, true)) {
+        release_lock |= prepare_mmio_access(mr);
+
          r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
      } else {
          addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
@@ -3039,6 +3200,9 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
      if (result) {
          *result = r;
      }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
      rcu_read_unlock();
  }
  
@@ -3059,11 +3223,14 @@ static inline void address_space_stl_internal(AddressSpace *as,
      hwaddr l = 4;
      hwaddr addr1;
      MemTxResult r;
+    bool release_lock = false;
  
      rcu_read_lock();
      mr = address_space_translate(as, addr, &addr1, &l,
                                   true);
      if (l < 4 || !memory_access_is_direct(mr, true)) {
+        release_lock |= prepare_mmio_access(mr);
+
  #if defined(TARGET_WORDS_BIGENDIAN)
          if (endian == DEVICE_LITTLE_ENDIAN) {
              val = bswap32(val);
@@ -3095,6 +3262,9 @@ static inline void address_space_stl_internal(AddressSpace *as,
      if (result) {
          *result = r;
      }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
      rcu_read_unlock();
  }
  
@@ -3164,10 +3334,13 @@ static inline void address_space_stw_internal(AddressSpace *as,
      hwaddr l = 2;
      hwaddr addr1;
      MemTxResult r;
+    bool release_lock = false;
  
      rcu_read_lock();
      mr = address_space_translate(as, addr, &addr1, &l, true);
      if (l < 2 || !memory_access_is_direct(mr, true)) {
+        release_lock |= prepare_mmio_access(mr);
+
  #if defined(TARGET_WORDS_BIGENDIAN)
          if (endian == DEVICE_LITTLE_ENDIAN) {
              val = bswap16(val);
@@ -3199,6 +3372,9 @@ static inline void address_space_stw_internal(AddressSpace *as,
      if (result) {
          *result = r;
      }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
      rcu_read_unlock();
  }
  
@@ -3348,14 +3524,20 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr)
      return res;
  }
  
-void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
  {
      RAMBlock *block;
+    int ret = 0;
  
      rcu_read_lock();
      QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        func(block->host, block->offset, block->used_length, opaque);
+        ret = func(block->idstr, block->host, block->offset,
+                   block->used_length, opaque);
+        if (ret) {
+            break;
+        }
      }
      rcu_read_unlock();
+    return ret;
  }
  #endif