Git Repo - qemu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Virtual page mapping
	3	*
	4	* Copyright (c) 2003 Fabrice Bellard
	5	*
	6	* This library is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU Lesser General Public
	8	* License as published by the Free Software Foundation; either
	9	* version 2 of the License, or (at your option) any later version.
	10	*
	11	* This library is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	* Lesser General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU Lesser General Public
	17	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
	18	*/
	19	#include "qemu/osdep.h"
	20	#include "qapi/error.h"
	21	#ifndef _WIN32
	22	#endif
	23
	24	#include "qemu/cutils.h"
	25	#include "cpu.h"
	26	#include "exec/exec-all.h"
	27	#include "tcg.h"
	28	#include "hw/qdev-core.h"
	29	#if !defined(CONFIG_USER_ONLY)
	30	#include "hw/boards.h"
	31	#include "hw/xen/xen.h"
	32	#endif
	33	#include "sysemu/kvm.h"
	34	#include "sysemu/sysemu.h"
	35	#include "qemu/timer.h"
	36	#include "qemu/config-file.h"
	37	#include "qemu/error-report.h"
	38	#if defined(CONFIG_USER_ONLY)
	39	#include "qemu.h"
	40	#else /* !CONFIG_USER_ONLY */
	41	#include "hw/hw.h"
	42	#include "exec/memory.h"
	43	#include "exec/ioport.h"
	44	#include "sysemu/dma.h"
	45	#include "exec/address-spaces.h"
	46	#include "sysemu/xen-mapcache.h"
	47	#include "trace-root.h"
	48	#endif
	49	#include "exec/cpu-all.h"
	50	#include "qemu/rcu_queue.h"
	51	#include "qemu/main-loop.h"
	52	#include "translate-all.h"
	53	#include "sysemu/replay.h"
	54
	55	#include "exec/memory-internal.h"
	56	#include "exec/ram_addr.h"
	57	#include "exec/log.h"
	58
	59	#include "migration/vmstate.h"
	60
	61	#include "qemu/range.h"
	62	#ifndef _WIN32
	63	#include "qemu/mmap-alloc.h"
	64	#endif
	65
	66	//#define DEBUG_SUBPAGE
	67
	68	#if !defined(CONFIG_USER_ONLY)
	69	/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
	70	* are protected by the ramlist lock.
	71	*/
	72	RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
	73
	74	static MemoryRegion *system_memory;
	75	static MemoryRegion *system_io;
	76
	77	AddressSpace address_space_io;
	78	AddressSpace address_space_memory;
	79
	80	MemoryRegion io_mem_rom, io_mem_notdirty;
	81	static MemoryRegion io_mem_unassigned;
	82
	83	/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
	84	#define RAM_PREALLOC (1 << 0)
	85
	86	/* RAM is mmap-ed with MAP_SHARED */
	87	#define RAM_SHARED (1 << 1)
	88
	89	/* Only a portion of RAM (used_length) is actually used, and migrated.
	90	* This used_length size can change across reboots.
	91	*/
	92	#define RAM_RESIZEABLE (1 << 2)
	93
	94	#endif
	95
	96	#ifdef TARGET_PAGE_BITS_VARY
	97	int target_page_bits;
	98	bool target_page_bits_decided;
	99	#endif
	100
	101	struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
	102	/* current CPU in the current thread. It is only valid inside
	103	cpu_exec() */
	104	__thread CPUState *current_cpu;
	105	/* 0 = Do not count executed instructions.
	106	1 = Precise instruction counting.
	107	2 = Adaptive rate instruction counting. */
	108	int use_icount;
	109
	110	bool set_preferred_target_page_bits(int bits)
	111	{
	112	/* The target page size is the lowest common denominator for all
	113	* the CPUs in the system, so we can only make it smaller, never
	114	* larger. And we can't make it smaller once we've committed to
	115	* a particular size.
	116	*/
	117	#ifdef TARGET_PAGE_BITS_VARY
	118	assert(bits >= TARGET_PAGE_BITS_MIN);
	119	if (target_page_bits == 0 \|\| target_page_bits > bits) {
	120	if (target_page_bits_decided) {
	121	return false;
	122	}
	123	target_page_bits = bits;
	124	}
	125	#endif
	126	return true;
	127	}
	128
	129	#if !defined(CONFIG_USER_ONLY)
	130
	131	static void finalize_target_page_bits(void)
	132	{
	133	#ifdef TARGET_PAGE_BITS_VARY
	134	if (target_page_bits == 0) {
	135	target_page_bits = TARGET_PAGE_BITS_MIN;
	136	}
	137	target_page_bits_decided = true;
	138	#endif
	139	}
	140
	141	typedef struct PhysPageEntry PhysPageEntry;
	142
	143	struct PhysPageEntry {
	144	/* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
	145	uint32_t skip : 6;
	146	/* index into phys_sections (!skip) or phys_map_nodes (skip) */
	147	uint32_t ptr : 26;
	148	};
	149
	150	#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
	151
	152	/* Size of the L2 (and L3, etc) page tables. */
	153	#define ADDR_SPACE_BITS 64
	154
	155	#define P_L2_BITS 9
	156	#define P_L2_SIZE (1 << P_L2_BITS)
	157
	158	#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
	159
	160	typedef PhysPageEntry Node[P_L2_SIZE];
	161
	162	typedef struct PhysPageMap {
	163	struct rcu_head rcu;
	164
	165	unsigned sections_nb;
	166	unsigned sections_nb_alloc;
	167	unsigned nodes_nb;
	168	unsigned nodes_nb_alloc;
	169	Node *nodes;
	170	MemoryRegionSection *sections;
	171	} PhysPageMap;
	172
	173	struct AddressSpaceDispatch {
	174	struct rcu_head rcu;
	175
	176	MemoryRegionSection *mru_section;
	177	/* This is a multi-level map on the physical address space.
	178	* The bottom level has pointers to MemoryRegionSections.
	179	*/
	180	PhysPageEntry phys_map;
	181	PhysPageMap map;
	182	AddressSpace *as;
	183	};
	184
	185	#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
	186	typedef struct subpage_t {
	187	MemoryRegion iomem;
	188	AddressSpace *as;
	189	hwaddr base;
	190	uint16_t sub_section[];
	191	} subpage_t;
	192
	193	#define PHYS_SECTION_UNASSIGNED 0
	194	#define PHYS_SECTION_NOTDIRTY 1
	195	#define PHYS_SECTION_ROM 2
	196	#define PHYS_SECTION_WATCH 3
	197
	198	static void io_mem_init(void);
	199	static void memory_map_init(void);
	200	static void tcg_commit(MemoryListener *listener);
	201
	202	static MemoryRegion io_mem_watch;
	203
	204	/**
	205	* CPUAddressSpace: all the information a CPU needs about an AddressSpace
	206	* @cpu: the CPU whose AddressSpace this is
	207	* @as: the AddressSpace itself
	208	* @memory_dispatch: its dispatch pointer (cached, RCU protected)
	209	* @tcg_as_listener: listener for tracking changes to the AddressSpace
	210	*/
	211	struct CPUAddressSpace {
	212	CPUState *cpu;
	213	AddressSpace *as;
	214	struct AddressSpaceDispatch *memory_dispatch;
	215	MemoryListener tcg_as_listener;
	216	};
	217
	218	#endif
	219
	220	#if !defined(CONFIG_USER_ONLY)
	221
	222	static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
	223	{
	224	static unsigned alloc_hint = 16;
	225	if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
	226	map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
	227	map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
	228	map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
	229	alloc_hint = map->nodes_nb_alloc;
	230	}
	231	}
	232
	233	static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
	234	{
	235	unsigned i;
	236	uint32_t ret;
	237	PhysPageEntry e;
	238	PhysPageEntry *p;
	239
	240	ret = map->nodes_nb++;
	241	p = map->nodes[ret];
	242	assert(ret != PHYS_MAP_NODE_NIL);
	243	assert(ret != map->nodes_nb_alloc);
	244
	245	e.skip = leaf ? 0 : 1;
	246	e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
	247	for (i = 0; i < P_L2_SIZE; ++i) {
	248	memcpy(&p[i], &e, sizeof(e));
	249	}
	250	return ret;
	251	}
	252
	253	static void phys_page_set_level(PhysPageMap map, PhysPageEntry lp,
	254	hwaddr index, hwaddr nb, uint16_t leaf,
	255	int level)
	256	{
	257	PhysPageEntry *p;
	258	hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
	259
	260	if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
	261	lp->ptr = phys_map_node_alloc(map, level == 0);
	262	}
	263	p = map->nodes[lp->ptr];
	264	lp = &p[(index >> (level P_L2_BITS)) & (P_L2_SIZE - 1)];
	265
	266	while (*nb && lp < &p[P_L2_SIZE]) {
	267	if ((index & (step - 1)) == 0 && nb >= step) {
	268	lp->skip = 0;
	269	lp->ptr = leaf;
	270	*index += step;
	271	*nb -= step;
	272	} else {
	273	phys_page_set_level(map, lp, index, nb, leaf, level - 1);
	274	}
	275	++lp;
	276	}
	277	}
	278
	279	static void phys_page_set(AddressSpaceDispatch *d,
	280	hwaddr index, hwaddr nb,
	281	uint16_t leaf)
	282	{
	283	/* Wildly overreserve - it doesn't matter much. */
	284	phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
	285
	286	phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
	287	}
	288
	289	/* Compact a non leaf page entry. Simply detect that the entry has a single child,
	290	* and update our entry so we can skip it and go directly to the destination.
	291	*/
	292	static void phys_page_compact(PhysPageEntry lp, Node nodes)
	293	{
	294	unsigned valid_ptr = P_L2_SIZE;
	295	int valid = 0;
	296	PhysPageEntry *p;
	297	int i;
	298
	299	if (lp->ptr == PHYS_MAP_NODE_NIL) {
	300	return;
	301	}
	302
	303	p = nodes[lp->ptr];
	304	for (i = 0; i < P_L2_SIZE; i++) {
	305	if (p[i].ptr == PHYS_MAP_NODE_NIL) {
	306	continue;
	307	}
	308
	309	valid_ptr = i;
	310	valid++;
	311	if (p[i].skip) {
	312	phys_page_compact(&p[i], nodes);
	313	}
	314	}
	315
	316	/* We can only compress if there's only one child. */
	317	if (valid != 1) {
	318	return;
	319	}
	320
	321	assert(valid_ptr < P_L2_SIZE);
	322
	323	/* Don't compress if it won't fit in the # of bits we have. */
	324	if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
	325	return;
	326	}
	327
	328	lp->ptr = p[valid_ptr].ptr;
	329	if (!p[valid_ptr].skip) {
	330	/* If our only child is a leaf, make this a leaf. */
	331	/* By design, we should have made this node a leaf to begin with so we
	332	* should never reach here.
	333	* But since it's so simple to handle this, let's do it just in case we
	334	* change this rule.
	335	*/
	336	lp->skip = 0;
	337	} else {
	338	lp->skip += p[valid_ptr].skip;
	339	}
	340	}
	341
	342	static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
	343	{
	344	if (d->phys_map.skip) {
	345	phys_page_compact(&d->phys_map, d->map.nodes);
	346	}
	347	}
	348
	349	static inline bool section_covers_addr(const MemoryRegionSection *section,
	350	hwaddr addr)
	351	{
	352	/* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
	353	* the section must cover the entire address space.
	354	*/
	355	return int128_gethi(section->size) \|\|
	356	range_covers_byte(section->offset_within_address_space,
	357	int128_getlo(section->size), addr);
	358	}
	359
	360	static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
	361	Node nodes, MemoryRegionSection sections)
	362	{
	363	PhysPageEntry *p;
	364	hwaddr index = addr >> TARGET_PAGE_BITS;
	365	int i;
	366
	367	for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
	368	if (lp.ptr == PHYS_MAP_NODE_NIL) {
	369	return &sections[PHYS_SECTION_UNASSIGNED];
	370	}
	371	p = nodes[lp.ptr];
	372	lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
	373	}
	374
	375	if (section_covers_addr(&sections[lp.ptr], addr)) {
	376	return &sections[lp.ptr];
	377	} else {
	378	return &sections[PHYS_SECTION_UNASSIGNED];
	379	}
	380	}
	381
	382	bool memory_region_is_unassigned(MemoryRegion *mr)
	383	{
	384	return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
	385	&& mr != &io_mem_watch;
	386	}
	387
	388	/* Called from RCU critical section */
	389	static MemoryRegionSection address_space_lookup_region(AddressSpaceDispatch d,
	390	hwaddr addr,
	391	bool resolve_subpage)
	392	{
	393	MemoryRegionSection *section = atomic_read(&d->mru_section);
	394	subpage_t *subpage;
	395	bool update;
	396
	397	if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
	398	section_covers_addr(section, addr)) {
	399	update = false;
	400	} else {
	401	section = phys_page_find(d->phys_map, addr, d->map.nodes,
	402	d->map.sections);
	403	update = true;
	404	}
	405	if (resolve_subpage && section->mr->subpage) {
	406	subpage = container_of(section->mr, subpage_t, iomem);
	407	section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
	408	}
	409	if (update) {
	410	atomic_set(&d->mru_section, section);
	411	}
	412	return section;
	413	}
	414
	415	/* Called from RCU critical section */
	416	static MemoryRegionSection *
	417	address_space_translate_internal(AddressSpaceDispatch d, hwaddr addr, hwaddr xlat,
	418	hwaddr *plen, bool resolve_subpage)
	419	{
	420	MemoryRegionSection *section;
	421	MemoryRegion *mr;
	422	Int128 diff;
	423
	424	section = address_space_lookup_region(d, addr, resolve_subpage);
	425	/* Compute offset within MemoryRegionSection */
	426	addr -= section->offset_within_address_space;
	427
	428	/* Compute offset within MemoryRegion */
	429	*xlat = addr + section->offset_within_region;
	430
	431	mr = section->mr;
	432
	433	/* MMIO registers can be expected to perform full-width accesses based only
	434	* on their address, without considering adjacent registers that could
	435	* decode to completely different MemoryRegions. When such registers
	436	* exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
	437	* regions overlap wildly. For this reason we cannot clamp the accesses
	438	* here.
	439	*
	440	* If the length is small (as is the case for address_space_ldl/stl),
	441	* everything works fine. If the incoming length is large, however,
	442	* the caller really has to do the clamping through memory_access_size.
	443	*/
	444	if (memory_region_is_ram(mr)) {
	445	diff = int128_sub(section->size, int128_make64(addr));
	446	plen = int128_get64(int128_min(diff, int128_make64(plen)));
	447	}
	448	return section;
	449	}
	450
	451	/* Called from RCU critical section */
	452	IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
	453	bool is_write)
	454	{
	455	IOMMUTLBEntry iotlb = {0};
	456	MemoryRegionSection *section;
	457	MemoryRegion *mr;
	458
	459	for (;;) {
	460	AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
	461	section = address_space_lookup_region(d, addr, false);
	462	addr = addr - section->offset_within_address_space
	463	+ section->offset_within_region;
	464	mr = section->mr;
	465
	466	if (!mr->iommu_ops) {
	467	break;
	468	}
	469
	470	iotlb = mr->iommu_ops->translate(mr, addr, is_write);
	471	if (!(iotlb.perm & (1 << is_write))) {
	472	iotlb.target_as = NULL;
	473	break;
	474	}
	475
	476	addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
	477	\| (addr & iotlb.addr_mask));
	478	as = iotlb.target_as;
	479	}
	480
	481	return iotlb;
	482	}
	483
	484	/* Called from RCU critical section */
	485	MemoryRegion address_space_translate(AddressSpace as, hwaddr addr,
	486	hwaddr xlat, hwaddr plen,
	487	bool is_write)
	488	{
	489	IOMMUTLBEntry iotlb;
	490	MemoryRegionSection *section;
	491	MemoryRegion *mr;
	492
	493	for (;;) {
	494	AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
	495	section = address_space_translate_internal(d, addr, &addr, plen, true);
	496	mr = section->mr;
	497
	498	if (!mr->iommu_ops) {
	499	break;
	500	}
	501
	502	iotlb = mr->iommu_ops->translate(mr, addr, is_write);
	503	addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
	504	\| (addr & iotlb.addr_mask));
	505	plen = MIN(plen, (addr \| iotlb.addr_mask) - addr + 1);
	506	if (!(iotlb.perm & (1 << is_write))) {
	507	mr = &io_mem_unassigned;
	508	break;
	509	}
	510
	511	as = iotlb.target_as;
	512	}
	513
	514	if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
	515	hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
	516	plen = MIN(page, plen);
	517	}
	518
	519	*xlat = addr;
	520	return mr;
	521	}
	522
	523	/* Called from RCU critical section */
	524	MemoryRegionSection *
	525	address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
	526	hwaddr xlat, hwaddr plen)
	527	{
	528	MemoryRegionSection *section;
	529	AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
	530
	531	section = address_space_translate_internal(d, addr, xlat, plen, false);
	532
	533	assert(!section->mr->iommu_ops);
	534	return section;
	535	}
	536	#endif
	537
	538	#if !defined(CONFIG_USER_ONLY)
	539
	540	static int cpu_common_post_load(void *opaque, int version_id)
	541	{
	542	CPUState *cpu = opaque;
	543
	544	/* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
	545	version_id is increased. */
	546	cpu->interrupt_request &= ~0x01;
	547	tlb_flush(cpu);
	548
	549	return 0;
	550	}
	551
	552	static int cpu_common_pre_load(void *opaque)
	553	{
	554	CPUState *cpu = opaque;
	555
	556	cpu->exception_index = -1;
	557
	558	return 0;
	559	}
	560
	561	static bool cpu_common_exception_index_needed(void *opaque)
	562	{
	563	CPUState *cpu = opaque;
	564
	565	return tcg_enabled() && cpu->exception_index != -1;
	566	}
	567
	568	static const VMStateDescription vmstate_cpu_common_exception_index = {
	569	.name = "cpu_common/exception_index",
	570	.version_id = 1,
	571	.minimum_version_id = 1,
	572	.needed = cpu_common_exception_index_needed,
	573	.fields = (VMStateField[]) {
	574	VMSTATE_INT32(exception_index, CPUState),
	575	VMSTATE_END_OF_LIST()
	576	}
	577	};
	578
	579	static bool cpu_common_crash_occurred_needed(void *opaque)
	580	{
	581	CPUState *cpu = opaque;
	582
	583	return cpu->crash_occurred;
	584	}
	585
	586	static const VMStateDescription vmstate_cpu_common_crash_occurred = {
	587	.name = "cpu_common/crash_occurred",
	588	.version_id = 1,
	589	.minimum_version_id = 1,
	590	.needed = cpu_common_crash_occurred_needed,
	591	.fields = (VMStateField[]) {
	592	VMSTATE_BOOL(crash_occurred, CPUState),
	593	VMSTATE_END_OF_LIST()
	594	}
	595	};
	596
	597	const VMStateDescription vmstate_cpu_common = {
	598	.name = "cpu_common",
	599	.version_id = 1,
	600	.minimum_version_id = 1,
	601	.pre_load = cpu_common_pre_load,
	602	.post_load = cpu_common_post_load,
	603	.fields = (VMStateField[]) {
	604	VMSTATE_UINT32(halted, CPUState),
	605	VMSTATE_UINT32(interrupt_request, CPUState),
	606	VMSTATE_END_OF_LIST()
	607	},
	608	.subsections = (const VMStateDescription*[]) {
	609	&vmstate_cpu_common_exception_index,
	610	&vmstate_cpu_common_crash_occurred,
	611	NULL
	612	}
	613	};
	614
	615	#endif
	616
	617	CPUState *qemu_get_cpu(int index)
	618	{
	619	CPUState *cpu;
	620
	621	CPU_FOREACH(cpu) {
	622	if (cpu->cpu_index == index) {
	623	return cpu;
	624	}
	625	}
	626
	627	return NULL;
	628	}
	629
	630	#if !defined(CONFIG_USER_ONLY)
	631	void cpu_address_space_init(CPUState cpu, AddressSpace as, int asidx)
	632	{
	633	CPUAddressSpace *newas;
	634
	635	/* Target code should have set num_ases before calling us */
	636	assert(asidx < cpu->num_ases);
	637
	638	if (asidx == 0) {
	639	/* address space 0 gets the convenience alias */
	640	cpu->as = as;
	641	}
	642
	643	/* KVM cannot currently support multiple address spaces. */
	644	assert(asidx == 0 \|\| !kvm_enabled());
	645
	646	if (!cpu->cpu_ases) {
	647	cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
	648	}
	649
	650	newas = &cpu->cpu_ases[asidx];
	651	newas->cpu = cpu;
	652	newas->as = as;
	653	if (tcg_enabled()) {
	654	newas->tcg_as_listener.commit = tcg_commit;
	655	memory_listener_register(&newas->tcg_as_listener, as);
	656	}
	657	}
	658
	659	AddressSpace cpu_get_address_space(CPUState cpu, int asidx)
	660	{
	661	/* Return the AddressSpace corresponding to the specified index */
	662	return cpu->cpu_ases[asidx].as;
	663	}
	664	#endif
	665
	666	void cpu_exec_unrealizefn(CPUState *cpu)
	667	{
	668	CPUClass *cc = CPU_GET_CLASS(cpu);
	669
	670	cpu_list_remove(cpu);
	671
	672	if (cc->vmsd != NULL) {
	673	vmstate_unregister(NULL, cc->vmsd, cpu);
	674	}
	675	if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
	676	vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
	677	}
	678	}
	679
	680	void cpu_exec_initfn(CPUState *cpu)
	681	{
	682	cpu->as = NULL;
	683	cpu->num_ases = 0;
	684
	685	#ifndef CONFIG_USER_ONLY
	686	cpu->thread_id = qemu_get_thread_id();
	687
	688	/* This is a softmmu CPU object, so create a property for it
	689	* so users can wire up its memory. (This can't go in qom/cpu.c
	690	* because that file is compiled only once for both user-mode
	691	* and system builds.) The default if no link is set up is to use
	692	* the system address space.
	693	*/
	694	object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
	695	(Object **)&cpu->memory,
	696	qdev_prop_allow_set_link_before_realize,
	697	OBJ_PROP_LINK_UNREF_ON_RELEASE,
	698	&error_abort);
	699	cpu->memory = system_memory;
	700	object_ref(OBJECT(cpu->memory));
	701	#endif
	702	}
	703
	704	void cpu_exec_realizefn(CPUState cpu, Error *errp)
	705	{
	706	CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
	707
	708	cpu_list_add(cpu);
	709
	710	#ifndef CONFIG_USER_ONLY
	711	if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
	712	vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
	713	}
	714	if (cc->vmsd != NULL) {
	715	vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
	716	}
	717	#endif
	718	}
	719
	720	static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
	721	{
	722	/* Flush the whole TB as this will not have race conditions
	723	* even if we don't have proper locking yet.
	724	* Ideally we would just invalidate the TBs for the
	725	* specified PC.
	726	*/
	727	tb_flush(cpu);
	728	}
	729
	730	#if defined(CONFIG_USER_ONLY)
	731	void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
	732
	733	{
	734	}
	735
	736	int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
	737	int flags)
	738	{
	739	return -ENOSYS;
	740	}
	741
	742	void cpu_watchpoint_remove_by_ref(CPUState cpu, CPUWatchpoint watchpoint)
	743	{
	744	}
	745
	746	int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
	747	int flags, CPUWatchpoint **watchpoint)
	748	{
	749	return -ENOSYS;
	750	}
	751	#else
	752	/* Add a watchpoint. */
	753	int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
	754	int flags, CPUWatchpoint **watchpoint)
	755	{
	756	CPUWatchpoint *wp;
	757
	758	/* forbid ranges which are empty or run off the end of the address space */
	759	if (len == 0 \|\| (addr + len - 1) < addr) {
	760	error_report("tried to set invalid watchpoint at %"
	761	VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
	762	return -EINVAL;
	763	}
	764	wp = g_malloc(sizeof(*wp));
	765
	766	wp->vaddr = addr;
	767	wp->len = len;
	768	wp->flags = flags;
	769
	770	/* keep all GDB-injected watchpoints in front */
	771	if (flags & BP_GDB) {
	772	QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
	773	} else {
	774	QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
	775	}
	776
	777	tlb_flush_page(cpu, addr);
	778
	779	if (watchpoint)
	780	*watchpoint = wp;
	781	return 0;
	782	}
	783
	784	/* Remove a specific watchpoint. */
	785	int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
	786	int flags)
	787	{
	788	CPUWatchpoint *wp;
	789
	790	QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
	791	if (addr == wp->vaddr && len == wp->len
	792	&& flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
	793	cpu_watchpoint_remove_by_ref(cpu, wp);
	794	return 0;
	795	}
	796	}
	797	return -ENOENT;
	798	}
	799
	800	/* Remove a specific watchpoint by reference. */
	801	void cpu_watchpoint_remove_by_ref(CPUState cpu, CPUWatchpoint watchpoint)
	802	{
	803	QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
	804
	805	tlb_flush_page(cpu, watchpoint->vaddr);
	806
	807	g_free(watchpoint);
	808	}
	809
	810	/* Remove all matching watchpoints. */
	811	void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
	812	{
	813	CPUWatchpoint wp, next;
	814
	815	QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
	816	if (wp->flags & mask) {
	817	cpu_watchpoint_remove_by_ref(cpu, wp);
	818	}
	819	}
	820	}
	821
	822	/* Return true if this watchpoint address matches the specified
	823	* access (ie the address range covered by the watchpoint overlaps
	824	* partially or completely with the address range covered by the
	825	* access).
	826	*/
	827	static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
	828	vaddr addr,
	829	vaddr len)
	830	{
	831	/* We know the lengths are non-zero, but a little caution is
	832	* required to avoid errors in the case where the range ends
	833	* exactly at the top of the address space and so addr + len
	834	* wraps round to zero.
	835	*/
	836	vaddr wpend = wp->vaddr + wp->len - 1;
	837	vaddr addrend = addr + len - 1;
	838
	839	return !(addr > wpend \|\| wp->vaddr > addrend);
	840	}
	841
	842	#endif
	843
	844	/* Add a breakpoint. */
	845	int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
	846	CPUBreakpoint **breakpoint)
	847	{
	848	CPUBreakpoint *bp;
	849
	850	bp = g_malloc(sizeof(*bp));
	851
	852	bp->pc = pc;
	853	bp->flags = flags;
	854
	855	/* keep all GDB-injected breakpoints in front */
	856	if (flags & BP_GDB) {
	857	QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
	858	} else {
	859	QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
	860	}
	861
	862	breakpoint_invalidate(cpu, pc);
	863
	864	if (breakpoint) {
	865	*breakpoint = bp;
	866	}
	867	return 0;
	868	}
	869
	870	/* Remove a specific breakpoint. */
	871	int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
	872	{
	873	CPUBreakpoint *bp;
	874
	875	QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
	876	if (bp->pc == pc && bp->flags == flags) {
	877	cpu_breakpoint_remove_by_ref(cpu, bp);
	878	return 0;
	879	}
	880	}
	881	return -ENOENT;
	882	}
	883
	884	/* Remove a specific breakpoint by reference. */
	885	void cpu_breakpoint_remove_by_ref(CPUState cpu, CPUBreakpoint breakpoint)
	886	{
	887	QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
	888
	889	breakpoint_invalidate(cpu, breakpoint->pc);
	890
	891	g_free(breakpoint);
	892	}
	893
	894	/* Remove all matching breakpoints. */
	895	void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
	896	{
	897	CPUBreakpoint bp, next;
	898
	899	QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
	900	if (bp->flags & mask) {
	901	cpu_breakpoint_remove_by_ref(cpu, bp);
	902	}
	903	}
	904	}
	905
	906	/* enable or disable single step mode. EXCP_DEBUG is returned by the
	907	CPU loop after each instruction */
	908	void cpu_single_step(CPUState *cpu, int enabled)
	909	{
	910	if (cpu->singlestep_enabled != enabled) {
	911	cpu->singlestep_enabled = enabled;
	912	if (kvm_enabled()) {
	913	kvm_update_guest_debug(cpu, 0);
	914	} else {
	915	/* must flush all the translated code to avoid inconsistencies */
	916	/* XXX: only flush what is necessary */
	917	tb_flush(cpu);
	918	}
	919	}
	920	}
	921
	922	void cpu_abort(CPUState cpu, const char fmt, ...)
	923	{
	924	va_list ap;
	925	va_list ap2;
	926
	927	va_start(ap, fmt);
	928	va_copy(ap2, ap);
	929	fprintf(stderr, "qemu: fatal: ");
	930	vfprintf(stderr, fmt, ap);
	931	fprintf(stderr, "\n");
	932	cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU \| CPU_DUMP_CCOP);
	933	if (qemu_log_separate()) {
	934	qemu_log_lock();
	935	qemu_log("qemu: fatal: ");
	936	qemu_log_vprintf(fmt, ap2);
	937	qemu_log("\n");
	938	log_cpu_state(cpu, CPU_DUMP_FPU \| CPU_DUMP_CCOP);
	939	qemu_log_flush();
	940	qemu_log_unlock();
	941	qemu_log_close();
	942	}
	943	va_end(ap2);
	944	va_end(ap);
	945	replay_finish();
	946	#if defined(CONFIG_USER_ONLY)
	947	{
	948	struct sigaction act;
	949	sigfillset(&act.sa_mask);
	950	act.sa_handler = SIG_DFL;
	951	sigaction(SIGABRT, &act, NULL);
	952	}
	953	#endif
	954	abort();
	955	}
	956
	957	#if !defined(CONFIG_USER_ONLY)
	958	/* Called from RCU critical section */
	959	static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
	960	{
	961	RAMBlock *block;
	962
	963	block = atomic_rcu_read(&ram_list.mru_block);
	964	if (block && addr - block->offset < block->max_length) {
	965	return block;
	966	}
	967	QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
	968	if (addr - block->offset < block->max_length) {
	969	goto found;
	970	}
	971	}
	972
	973	fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
	974	abort();
	975
	976	found:
	977	/* It is safe to write mru_block outside the iothread lock. This
	978	* is what happens:
	979	*
	980	* mru_block = xxx
	981	* rcu_read_unlock()
	982	* xxx removed from list
	983	* rcu_read_lock()
	984	* read mru_block
	985	* mru_block = NULL;
	986	* call_rcu(reclaim_ramblock, xxx);
	987	* rcu_read_unlock()
	988	*
	989	* atomic_rcu_set is not needed here. The block was already published
	990	* when it was placed into the list. Here we're just making an extra
	991	* copy of the pointer.
	992	*/
	993	ram_list.mru_block = block;
	994	return block;
	995	}
	996
	997	static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
	998	{
	999	CPUState *cpu;
	1000	ram_addr_t start1;
	1001	RAMBlock *block;
	1002	ram_addr_t end;
	1003
	1004	end = TARGET_PAGE_ALIGN(start + length);
	1005	start &= TARGET_PAGE_MASK;
	1006
	1007	rcu_read_lock();
	1008	block = qemu_get_ram_block(start);
	1009	assert(block == qemu_get_ram_block(end - 1));
	1010	start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
	1011	CPU_FOREACH(cpu) {
	1012	tlb_reset_dirty(cpu, start1, length);
	1013	}
	1014	rcu_read_unlock();
	1015	}
	1016
	1017	/* Note: start and end must be within the same ram block. */
	1018	bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
	1019	ram_addr_t length,
	1020	unsigned client)
	1021	{
	1022	DirtyMemoryBlocks *blocks;
	1023	unsigned long end, page;
	1024	bool dirty = false;
	1025
	1026	if (length == 0) {
	1027	return false;
	1028	}
	1029
	1030	end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
	1031	page = start >> TARGET_PAGE_BITS;
	1032
	1033	rcu_read_lock();
	1034
	1035	blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
	1036
	1037	while (page < end) {
	1038	unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
	1039	unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
	1040	unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
	1041
	1042	dirty \|= bitmap_test_and_clear_atomic(blocks->blocks[idx],
	1043	offset, num);
	1044	page += num;
	1045	}
	1046
	1047	rcu_read_unlock();
	1048
	1049	if (dirty && tcg_enabled()) {
	1050	tlb_reset_dirty_range_all(start, length);
	1051	}
	1052
	1053	return dirty;
	1054	}
	1055
	1056	/* Called from RCU critical section */
	1057	hwaddr memory_region_section_get_iotlb(CPUState *cpu,
	1058	MemoryRegionSection *section,
	1059	target_ulong vaddr,
	1060	hwaddr paddr, hwaddr xlat,
	1061	int prot,
	1062	target_ulong *address)
	1063	{
	1064	hwaddr iotlb;
	1065	CPUWatchpoint *wp;
	1066
	1067	if (memory_region_is_ram(section->mr)) {
	1068	/* Normal RAM. */
	1069	iotlb = memory_region_get_ram_addr(section->mr) + xlat;
	1070	if (!section->readonly) {
	1071	iotlb \|= PHYS_SECTION_NOTDIRTY;
	1072	} else {
	1073	iotlb \|= PHYS_SECTION_ROM;
	1074	}
	1075	} else {
	1076	AddressSpaceDispatch *d;
	1077
	1078	d = atomic_rcu_read(&section->address_space->dispatch);
	1079	iotlb = section - d->map.sections;
	1080	iotlb += xlat;
	1081	}
	1082
	1083	/* Make accesses to pages with watchpoints go via the
	1084	watchpoint trap routines. */
	1085	QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
	1086	if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
	1087	/* Avoid trapping reads of pages with a write breakpoint. */
	1088	if ((prot & PAGE_WRITE) \|\| (wp->flags & BP_MEM_READ)) {
	1089	iotlb = PHYS_SECTION_WATCH + paddr;
	1090	*address \|= TLB_MMIO;
	1091	break;
	1092	}
	1093	}
	1094	}
	1095
	1096	return iotlb;
	1097	}
	1098	#endif /* defined(CONFIG_USER_ONLY) */
	1099
	1100	#if !defined(CONFIG_USER_ONLY)
	1101
	1102	static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
	1103	uint16_t section);
	1104	static subpage_t subpage_init(AddressSpace as, hwaddr base);
	1105
	1106	static void (phys_mem_alloc)(size_t size, uint64_t *align) =
	1107	qemu_anon_ram_alloc;
	1108
	1109	/*
	1110	* Set a custom physical guest memory alloator.
	1111	* Accelerators with unusual needs may need this. Hopefully, we can
	1112	* get rid of it eventually.
	1113	*/
	1114	void phys_mem_set_alloc(void (alloc)(size_t, uint64_t *align))
	1115	{
	1116	phys_mem_alloc = alloc;
	1117	}
	1118
	1119	static uint16_t phys_section_add(PhysPageMap *map,
	1120	MemoryRegionSection *section)
	1121	{
	1122	/* The physical section number is ORed with a page-aligned
	1123	* pointer to produce the iotlb entries. Thus it should
	1124	* never overflow into the page-aligned value.
	1125	*/
	1126	assert(map->sections_nb < TARGET_PAGE_SIZE);
	1127
	1128	if (map->sections_nb == map->sections_nb_alloc) {
	1129	map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
	1130	map->sections = g_renew(MemoryRegionSection, map->sections,
	1131	map->sections_nb_alloc);
	1132	}
	1133	map->sections[map->sections_nb] = *section;
	1134	memory_region_ref(section->mr);
	1135	return map->sections_nb++;
	1136	}
	1137
	1138	static void phys_section_destroy(MemoryRegion *mr)
	1139	{
	1140	bool have_sub_page = mr->subpage;
	1141
	1142	memory_region_unref(mr);
	1143
	1144	if (have_sub_page) {
	1145	subpage_t *subpage = container_of(mr, subpage_t, iomem);
	1146	object_unref(OBJECT(&subpage->iomem));
	1147	g_free(subpage);
	1148	}
	1149	}
	1150
	1151	static void phys_sections_free(PhysPageMap *map)
	1152	{
	1153	while (map->sections_nb > 0) {
	1154	MemoryRegionSection *section = &map->sections[--map->sections_nb];
	1155	phys_section_destroy(section->mr);
	1156	}
	1157	g_free(map->sections);
	1158	g_free(map->nodes);
	1159	}
	1160
	1161	static void register_subpage(AddressSpaceDispatch d, MemoryRegionSection section)
	1162	{
	1163	subpage_t *subpage;
	1164	hwaddr base = section->offset_within_address_space
	1165	& TARGET_PAGE_MASK;
	1166	MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
	1167	d->map.nodes, d->map.sections);
	1168	MemoryRegionSection subsection = {
	1169	.offset_within_address_space = base,
	1170	.size = int128_make64(TARGET_PAGE_SIZE),
	1171	};
	1172	hwaddr start, end;
	1173
	1174	assert(existing->mr->subpage \|\| existing->mr == &io_mem_unassigned);
	1175
	1176	if (!(existing->mr->subpage)) {
	1177	subpage = subpage_init(d->as, base);
	1178	subsection.address_space = d->as;
	1179	subsection.mr = &subpage->iomem;
	1180	phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
	1181	phys_section_add(&d->map, &subsection));
	1182	} else {
	1183	subpage = container_of(existing->mr, subpage_t, iomem);
	1184	}
	1185	start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
	1186	end = start + int128_get64(section->size) - 1;
	1187	subpage_register(subpage, start, end,
	1188	phys_section_add(&d->map, section));
	1189	}
	1190
	1191
	1192	static void register_multipage(AddressSpaceDispatch *d,
	1193	MemoryRegionSection *section)
	1194	{
	1195	hwaddr start_addr = section->offset_within_address_space;
	1196	uint16_t section_index = phys_section_add(&d->map, section);
	1197	uint64_t num_pages = int128_get64(int128_rshift(section->size,
	1198	TARGET_PAGE_BITS));
	1199
	1200	assert(num_pages);
	1201	phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
	1202	}
	1203
	1204	static void mem_add(MemoryListener listener, MemoryRegionSection section)
	1205	{
	1206	AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
	1207	AddressSpaceDispatch *d = as->next_dispatch;
	1208	MemoryRegionSection now = section, remain = section;
	1209	Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
	1210
	1211	if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
	1212	uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
	1213	- now.offset_within_address_space;
	1214
	1215	now.size = int128_min(int128_make64(left), now.size);
	1216	register_subpage(d, &now);
	1217	} else {
	1218	now.size = int128_zero();
	1219	}
	1220	while (int128_ne(remain.size, now.size)) {
	1221	remain.size = int128_sub(remain.size, now.size);
	1222	remain.offset_within_address_space += int128_get64(now.size);
	1223	remain.offset_within_region += int128_get64(now.size);
	1224	now = remain;
	1225	if (int128_lt(remain.size, page_size)) {
	1226	register_subpage(d, &now);
	1227	} else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
	1228	now.size = page_size;
	1229	register_subpage(d, &now);
	1230	} else {
	1231	now.size = int128_and(now.size, int128_neg(page_size));
	1232	register_multipage(d, &now);
	1233	}
	1234	}
	1235	}
	1236
	1237	void qemu_flush_coalesced_mmio_buffer(void)
	1238	{
	1239	if (kvm_enabled())
	1240	kvm_flush_coalesced_mmio_buffer();
	1241	}
	1242
	1243	void qemu_mutex_lock_ramlist(void)
	1244	{
	1245	qemu_mutex_lock(&ram_list.mutex);
	1246	}
	1247
	1248	void qemu_mutex_unlock_ramlist(void)
	1249	{
	1250	qemu_mutex_unlock(&ram_list.mutex);
	1251	}
	1252
	1253	#ifdef __linux__
	1254	static int64_t get_file_size(int fd)
	1255	{
	1256	int64_t size = lseek(fd, 0, SEEK_END);
	1257	if (size < 0) {
	1258	return -errno;
	1259	}
	1260	return size;
	1261	}
	1262
	1263	static void file_ram_alloc(RAMBlock block,
	1264	ram_addr_t memory,
	1265	const char *path,
	1266	Error **errp)
	1267	{
	1268	bool unlink_on_error = false;
	1269	char *filename;
	1270	char *sanitized_name;
	1271	char *c;
	1272	void *area = MAP_FAILED;
	1273	int fd = -1;
	1274	int64_t file_size;
	1275
	1276	if (kvm_enabled() && !kvm_has_sync_mmu()) {
	1277	error_setg(errp,
	1278	"host lacks kvm mmu notifiers, -mem-path unsupported");
	1279	return NULL;
	1280	}
	1281
	1282	for (;;) {
	1283	fd = open(path, O_RDWR);
	1284	if (fd >= 0) {
	1285	/* @path names an existing file, use it */
	1286	break;
	1287	}
	1288	if (errno == ENOENT) {
	1289	/* @path names a file that doesn't exist, create it */
	1290	fd = open(path, O_RDWR \| O_CREAT \| O_EXCL, 0644);
	1291	if (fd >= 0) {
	1292	unlink_on_error = true;
	1293	break;
	1294	}
	1295	} else if (errno == EISDIR) {
	1296	/* @path names a directory, create a file there */
	1297	/* Make name safe to use with mkstemp by replacing '/' with '_'. */
	1298	sanitized_name = g_strdup(memory_region_name(block->mr));
	1299	for (c = sanitized_name; *c != '\0'; c++) {
	1300	if (*c == '/') {
	1301	*c = '_';
	1302	}
	1303	}
	1304
	1305	filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
	1306	sanitized_name);
	1307	g_free(sanitized_name);
	1308
	1309	fd = mkstemp(filename);
	1310	if (fd >= 0) {
	1311	unlink(filename);
	1312	g_free(filename);
	1313	break;
	1314	}
	1315	g_free(filename);
	1316	}
	1317	if (errno != EEXIST && errno != EINTR) {
	1318	error_setg_errno(errp, errno,
	1319	"can't open backing store %s for guest RAM",
	1320	path);
	1321	goto error;
	1322	}
	1323	/*
	1324	* Try again on EINTR and EEXIST. The latter happens when
	1325	* something else creates the file between our two open().
	1326	*/
	1327	}
	1328
	1329	block->page_size = qemu_fd_getpagesize(fd);
	1330	block->mr->align = block->page_size;
	1331	#if defined(__s390x__)
	1332	if (kvm_enabled()) {
	1333	block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
	1334	}
	1335	#endif
	1336
	1337	file_size = get_file_size(fd);
	1338
	1339	if (memory < block->page_size) {
	1340	error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
	1341	"or larger than page size 0x%zx",
	1342	memory, block->page_size);
	1343	goto error;
	1344	}
	1345
	1346	if (file_size > 0 && file_size < memory) {
	1347	error_setg(errp, "backing store %s size 0x%" PRIx64
	1348	" does not match 'size' option 0x" RAM_ADDR_FMT,
	1349	path, file_size, memory);
	1350	goto error;
	1351	}
	1352
	1353	memory = ROUND_UP(memory, block->page_size);
	1354
	1355	/*
	1356	* ftruncate is not supported by hugetlbfs in older
	1357	* hosts, so don't bother bailing out on errors.
	1358	* If anything goes wrong with it under other filesystems,
	1359	* mmap will fail.
	1360	*
	1361	* Do not truncate the non-empty backend file to avoid corrupting
	1362	* the existing data in the file. Disabling shrinking is not
	1363	* enough. For example, the current vNVDIMM implementation stores
	1364	* the guest NVDIMM labels at the end of the backend file. If the
	1365	* backend file is later extended, QEMU will not be able to find
	1366	* those labels. Therefore, extending the non-empty backend file
	1367	* is disabled as well.
	1368	*/
	1369	if (!file_size && ftruncate(fd, memory)) {
	1370	perror("ftruncate");
	1371	}
	1372
	1373	area = qemu_ram_mmap(fd, memory, block->mr->align,
	1374	block->flags & RAM_SHARED);
	1375	if (area == MAP_FAILED) {
	1376	error_setg_errno(errp, errno,
	1377	"unable to map backing store for guest RAM");
	1378	goto error;
	1379	}
	1380
	1381	if (mem_prealloc) {
	1382	os_mem_prealloc(fd, area, memory, errp);
	1383	if (errp && *errp) {
	1384	goto error;
	1385	}
	1386	}
	1387
	1388	block->fd = fd;
	1389	return area;
	1390
	1391	error:
	1392	if (area != MAP_FAILED) {
	1393	qemu_ram_munmap(area, memory);
	1394	}
	1395	if (unlink_on_error) {
	1396	unlink(path);
	1397	}
	1398	if (fd != -1) {
	1399	close(fd);
	1400	}
	1401	return NULL;
	1402	}
	1403	#endif
	1404
	1405	/* Called with the ramlist lock held. */
	1406	static ram_addr_t find_ram_offset(ram_addr_t size)
	1407	{
	1408	RAMBlock block, next_block;
	1409	ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
	1410
	1411	assert(size != 0); /* it would hand out same offset multiple times */
	1412
	1413	if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
	1414	return 0;
	1415	}
	1416
	1417	QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
	1418	ram_addr_t end, next = RAM_ADDR_MAX;
	1419
	1420	end = block->offset + block->max_length;
	1421
	1422	QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
	1423	if (next_block->offset >= end) {
	1424	next = MIN(next, next_block->offset);
	1425	}
	1426	}
	1427	if (next - end >= size && next - end < mingap) {
	1428	offset = end;
	1429	mingap = next - end;
	1430	}
	1431	}
	1432
	1433	if (offset == RAM_ADDR_MAX) {
	1434	fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
	1435	(uint64_t)size);
	1436	abort();
	1437	}
	1438
	1439	return offset;
	1440	}
	1441
	1442	ram_addr_t last_ram_offset(void)
	1443	{
	1444	RAMBlock *block;
	1445	ram_addr_t last = 0;
	1446
	1447	rcu_read_lock();
	1448	QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
	1449	last = MAX(last, block->offset + block->max_length);
	1450	}
	1451	rcu_read_unlock();
	1452	return last;
	1453	}
	1454
	1455	static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
	1456	{
	1457	int ret;
	1458
	1459	/* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
	1460	if (!machine_dump_guest_core(current_machine)) {
	1461	ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
	1462	if (ret) {
	1463	perror("qemu_madvise");
	1464	fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
	1465	"but dump_guest_core=off specified\n");
	1466	}
	1467	}
	1468	}
	1469
	1470	const char qemu_ram_get_idstr(RAMBlock rb)
	1471	{
	1472	return rb->idstr;
	1473	}
	1474
	1475	/* Called with iothread lock held. */
	1476	void qemu_ram_set_idstr(RAMBlock new_block, const char name, DeviceState *dev)
	1477	{
	1478	RAMBlock *block;
	1479
	1480	assert(new_block);
	1481	assert(!new_block->idstr[0]);
	1482
	1483	if (dev) {
	1484	char *id = qdev_get_dev_path(dev);
	1485	if (id) {
	1486	snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
	1487	g_free(id);
	1488	}
	1489	}
	1490	pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
	1491
	1492	rcu_read_lock();
	1493	QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
	1494	if (block != new_block &&
	1495	!strcmp(block->idstr, new_block->idstr)) {
	1496	fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
	1497	new_block->idstr);
	1498	abort();
	1499	}
	1500	}
	1501	rcu_read_unlock();
	1502	}
	1503
	1504	/* Called with iothread lock held. */
	1505	void qemu_ram_unset_idstr(RAMBlock *block)
	1506	{
	1507	/* FIXME: arch_init.c assumes that this is not called throughout
	1508	* migration. Ignore the problem since hot-unplug during migration
	1509	* does not work anyway.
	1510	*/
	1511	if (block) {
	1512	memset(block->idstr, 0, sizeof(block->idstr));
	1513	}
	1514	}
	1515
	1516	size_t qemu_ram_pagesize(RAMBlock *rb)
	1517	{
	1518	return rb->page_size;
	1519	}
	1520
	1521	static int memory_try_enable_merging(void *addr, size_t len)
	1522	{
	1523	if (!machine_mem_merge(current_machine)) {
	1524	/* disabled by the user */
	1525	return 0;
	1526	}
	1527
	1528	return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
	1529	}
	1530
	1531	/* Only legal before guest might have detected the memory size: e.g. on
	1532	* incoming migration, or right after reset.
	1533	*
	1534	* As memory core doesn't know how is memory accessed, it is up to
	1535	* resize callback to update device state and/or add assertions to detect
	1536	* misuse, if necessary.
	1537	*/
	1538	int qemu_ram_resize(RAMBlock block, ram_addr_t newsize, Error *errp)
	1539	{
	1540	assert(block);
	1541
	1542	newsize = HOST_PAGE_ALIGN(newsize);
	1543
	1544	if (block->used_length == newsize) {
	1545	return 0;
	1546	}
	1547
	1548	if (!(block->flags & RAM_RESIZEABLE)) {
	1549	error_setg_errno(errp, EINVAL,
	1550	"Length mismatch: %s: 0x" RAM_ADDR_FMT
	1551	" in != 0x" RAM_ADDR_FMT, block->idstr,
	1552	newsize, block->used_length);
	1553	return -EINVAL;
	1554	}
	1555
	1556	if (block->max_length < newsize) {
	1557	error_setg_errno(errp, EINVAL,
	1558	"Length too large: %s: 0x" RAM_ADDR_FMT
	1559	" > 0x" RAM_ADDR_FMT, block->idstr,
	1560	newsize, block->max_length);
	1561	return -EINVAL;
	1562	}
	1563
	1564	cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
	1565	block->used_length = newsize;
	1566	cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
	1567	DIRTY_CLIENTS_ALL);
	1568	memory_region_set_size(block->mr, newsize);
	1569	if (block->resized) {
	1570	block->resized(block->idstr, newsize, block->host);
	1571	}
	1572	return 0;
	1573	}
	1574
	1575	/* Called with ram_list.mutex held */
	1576	static void dirty_memory_extend(ram_addr_t old_ram_size,
	1577	ram_addr_t new_ram_size)
	1578	{
	1579	ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
	1580	DIRTY_MEMORY_BLOCK_SIZE);
	1581	ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
	1582	DIRTY_MEMORY_BLOCK_SIZE);
	1583	int i;
	1584
	1585	/* Only need to extend if block count increased */
	1586	if (new_num_blocks <= old_num_blocks) {
	1587	return;
	1588	}
	1589
	1590	for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
	1591	DirtyMemoryBlocks *old_blocks;
	1592	DirtyMemoryBlocks *new_blocks;
	1593	int j;
	1594
	1595	old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
	1596	new_blocks = g_malloc(sizeof(*new_blocks) +
	1597	sizeof(new_blocks->blocks[0]) * new_num_blocks);
	1598
	1599	if (old_num_blocks) {
	1600	memcpy(new_blocks->blocks, old_blocks->blocks,
	1601	old_num_blocks * sizeof(old_blocks->blocks[0]));
	1602	}
	1603
	1604	for (j = old_num_blocks; j < new_num_blocks; j++) {
	1605	new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
	1606	}
	1607
	1608	atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
	1609
	1610	if (old_blocks) {
	1611	g_free_rcu(old_blocks, rcu);
	1612	}
	1613	}
	1614	}
	1615
	1616	static void ram_block_add(RAMBlock new_block, Error *errp)
	1617	{
	1618	RAMBlock *block;
	1619	RAMBlock *last_block = NULL;
	1620	ram_addr_t old_ram_size, new_ram_size;
	1621	Error *err = NULL;
	1622
	1623	old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
	1624
	1625	qemu_mutex_lock_ramlist();
	1626	new_block->offset = find_ram_offset(new_block->max_length);
	1627
	1628	if (!new_block->host) {
	1629	if (xen_enabled()) {
	1630	xen_ram_alloc(new_block->offset, new_block->max_length,
	1631	new_block->mr, &err);
	1632	if (err) {
	1633	error_propagate(errp, err);
	1634	qemu_mutex_unlock_ramlist();
	1635	return;
	1636	}
	1637	} else {
	1638	new_block->host = phys_mem_alloc(new_block->max_length,
	1639	&new_block->mr->align);
	1640	if (!new_block->host) {
	1641	error_setg_errno(errp, errno,
	1642	"cannot set up guest memory '%s'",
	1643	memory_region_name(new_block->mr));
	1644	qemu_mutex_unlock_ramlist();
	1645	return;
	1646	}
	1647	memory_try_enable_merging(new_block->host, new_block->max_length);
	1648	}
	1649	}
	1650
	1651	new_ram_size = MAX(old_ram_size,
	1652	(new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
	1653	if (new_ram_size > old_ram_size) {
	1654	migration_bitmap_extend(old_ram_size, new_ram_size);
	1655	dirty_memory_extend(old_ram_size, new_ram_size);
	1656	}
	1657	/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
	1658	* QLIST (which has an RCU-friendly variant) does not have insertion at
	1659	* tail, so save the last element in last_block.
	1660	*/
	1661	QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
	1662	last_block = block;
	1663	if (block->max_length < new_block->max_length) {
	1664	break;
	1665	}
	1666	}
	1667	if (block) {
	1668	QLIST_INSERT_BEFORE_RCU(block, new_block, next);
	1669	} else if (last_block) {
	1670	QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
	1671	} else { /* list is empty */
	1672	QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
	1673	}
	1674	ram_list.mru_block = NULL;
	1675
	1676	/* Write list before version */
	1677	smp_wmb();
	1678	ram_list.version++;
	1679	qemu_mutex_unlock_ramlist();
	1680
	1681	cpu_physical_memory_set_dirty_range(new_block->offset,
	1682	new_block->used_length,
	1683	DIRTY_CLIENTS_ALL);
	1684
	1685	if (new_block->host) {
	1686	qemu_ram_setup_dump(new_block->host, new_block->max_length);
	1687	qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
	1688	/* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
	1689	qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
	1690	ram_block_notify_add(new_block->host, new_block->max_length);
	1691	}
	1692	}
	1693
	1694	#ifdef __linux__
	1695	RAMBlock qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion mr,
	1696	bool share, const char *mem_path,
	1697	Error **errp)
	1698	{
	1699	RAMBlock *new_block;
	1700	Error *local_err = NULL;
	1701
	1702	if (xen_enabled()) {
	1703	error_setg(errp, "-mem-path not supported with Xen");
	1704	return NULL;
	1705	}
	1706
	1707	if (phys_mem_alloc != qemu_anon_ram_alloc) {
	1708	/*
	1709	* file_ram_alloc() needs to allocate just like
	1710	* phys_mem_alloc, but we haven't bothered to provide
	1711	* a hook there.
	1712	*/
	1713	error_setg(errp,
	1714	"-mem-path not supported with this accelerator");
	1715	return NULL;
	1716	}
	1717
	1718	size = HOST_PAGE_ALIGN(size);
	1719	new_block = g_malloc0(sizeof(*new_block));
	1720	new_block->mr = mr;
	1721	new_block->used_length = size;
	1722	new_block->max_length = size;
	1723	new_block->flags = share ? RAM_SHARED : 0;
	1724	new_block->host = file_ram_alloc(new_block, size,
	1725	mem_path, errp);
	1726	if (!new_block->host) {
	1727	g_free(new_block);
	1728	return NULL;
	1729	}
	1730
	1731	ram_block_add(new_block, &local_err);
	1732	if (local_err) {
	1733	g_free(new_block);
	1734	error_propagate(errp, local_err);
	1735	return NULL;
	1736	}
	1737	return new_block;
	1738	}
	1739	#endif
	1740
	1741	static
	1742	RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
	1743	void (resized)(const char,
	1744	uint64_t length,
	1745	void *host),
	1746	void *host, bool resizeable,
	1747	MemoryRegion mr, Error *errp)
	1748	{
	1749	RAMBlock *new_block;
	1750	Error *local_err = NULL;
	1751
	1752	size = HOST_PAGE_ALIGN(size);
	1753	max_size = HOST_PAGE_ALIGN(max_size);
	1754	new_block = g_malloc0(sizeof(*new_block));
	1755	new_block->mr = mr;
	1756	new_block->resized = resized;
	1757	new_block->used_length = size;
	1758	new_block->max_length = max_size;
	1759	assert(max_size >= size);
	1760	new_block->fd = -1;
	1761	new_block->page_size = getpagesize();
	1762	new_block->host = host;
	1763	if (host) {
	1764	new_block->flags \|= RAM_PREALLOC;
	1765	}
	1766	if (resizeable) {
	1767	new_block->flags \|= RAM_RESIZEABLE;
	1768	}
	1769	ram_block_add(new_block, &local_err);
	1770	if (local_err) {
	1771	g_free(new_block);
	1772	error_propagate(errp, local_err);
	1773	return NULL;
	1774	}
	1775	return new_block;
	1776	}
	1777
	1778	RAMBlock qemu_ram_alloc_from_ptr(ram_addr_t size, void host,
	1779	MemoryRegion mr, Error *errp)
	1780	{
	1781	return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
	1782	}
	1783
	1784	RAMBlock qemu_ram_alloc(ram_addr_t size, MemoryRegion mr, Error **errp)
	1785	{
	1786	return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
	1787	}
	1788
	1789	RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
	1790	void (resized)(const char,
	1791	uint64_t length,
	1792	void *host),
	1793	MemoryRegion mr, Error *errp)
	1794	{
	1795	return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
	1796	}
	1797
	1798	static void reclaim_ramblock(RAMBlock *block)
	1799	{
	1800	if (block->flags & RAM_PREALLOC) {
	1801	;
	1802	} else if (xen_enabled()) {
	1803	xen_invalidate_map_cache_entry(block->host);
	1804	#ifndef _WIN32
	1805	} else if (block->fd >= 0) {
	1806	qemu_ram_munmap(block->host, block->max_length);
	1807	close(block->fd);
	1808	#endif
	1809	} else {
	1810	qemu_anon_ram_free(block->host, block->max_length);
	1811	}
	1812	g_free(block);
	1813	}
	1814
	1815	void qemu_ram_free(RAMBlock *block)
	1816	{
	1817	if (!block) {
	1818	return;
	1819	}
	1820
	1821	if (block->host) {
	1822	ram_block_notify_remove(block->host, block->max_length);
	1823	}
	1824
	1825	qemu_mutex_lock_ramlist();
	1826	QLIST_REMOVE_RCU(block, next);
	1827	ram_list.mru_block = NULL;
	1828	/* Write list before version */
	1829	smp_wmb();
	1830	ram_list.version++;
	1831	call_rcu(block, reclaim_ramblock, rcu);
	1832	qemu_mutex_unlock_ramlist();
	1833	}
	1834
	1835	#ifndef _WIN32
	1836	void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
	1837	{
	1838	RAMBlock *block;
	1839	ram_addr_t offset;
	1840	int flags;
	1841	void area, vaddr;
	1842
	1843	QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
	1844	offset = addr - block->offset;
	1845	if (offset < block->max_length) {
	1846	vaddr = ramblock_ptr(block, offset);
	1847	if (block->flags & RAM_PREALLOC) {
	1848	;
	1849	} else if (xen_enabled()) {
	1850	abort();
	1851	} else {
	1852	flags = MAP_FIXED;
	1853	if (block->fd >= 0) {
	1854	flags \|= (block->flags & RAM_SHARED ?
	1855	MAP_SHARED : MAP_PRIVATE);
	1856	area = mmap(vaddr, length, PROT_READ \| PROT_WRITE,
	1857	flags, block->fd, offset);
	1858	} else {
	1859	/*
	1860	* Remap needs to match alloc. Accelerators that
	1861	* set phys_mem_alloc never remap. If they did,
	1862	* we'd need a remap hook here.
	1863	*/
	1864	assert(phys_mem_alloc == qemu_anon_ram_alloc);
	1865
	1866	flags \|= MAP_PRIVATE \| MAP_ANONYMOUS;
	1867	area = mmap(vaddr, length, PROT_READ \| PROT_WRITE,
	1868	flags, -1, 0);
	1869	}
	1870	if (area != vaddr) {
	1871	fprintf(stderr, "Could not remap addr: "
	1872	RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
	1873	length, addr);
	1874	exit(1);
	1875	}
	1876	memory_try_enable_merging(vaddr, length);
	1877	qemu_ram_setup_dump(vaddr, length);
	1878	}
	1879	}
	1880	}
	1881	}
	1882	#endif /* !_WIN32 */
	1883
	1884	/* Return a host pointer to ram allocated with qemu_ram_alloc.
	1885	* This should not be used for general purpose DMA. Use address_space_map
	1886	* or address_space_rw instead. For local memory (e.g. video ram) that the
	1887	* device owns, use memory_region_get_ram_ptr.
	1888	*
	1889	* Called within RCU critical section.
	1890	*/
	1891	void qemu_map_ram_ptr(RAMBlock ram_block, ram_addr_t addr)
	1892	{
	1893	RAMBlock *block = ram_block;
	1894
	1895	if (block == NULL) {
	1896	block = qemu_get_ram_block(addr);
	1897	addr -= block->offset;
	1898	}
	1899
	1900	if (xen_enabled() && block->host == NULL) {
	1901	/* We need to check if the requested address is in the RAM
	1902	* because we don't want to map the entire memory in QEMU.
	1903	* In that case just map until the end of the page.
	1904	*/
	1905	if (block->offset == 0) {
	1906	return xen_map_cache(addr, 0, 0);
	1907	}
	1908
	1909	block->host = xen_map_cache(block->offset, block->max_length, 1);
	1910	}
	1911	return ramblock_ptr(block, addr);
	1912	}
	1913
	1914	/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
	1915	* but takes a size argument.
	1916	*
	1917	* Called within RCU critical section.
	1918	*/
	1919	static void qemu_ram_ptr_length(RAMBlock ram_block, ram_addr_t addr,
	1920	hwaddr *size)
	1921	{
	1922	RAMBlock *block = ram_block;
	1923	if (*size == 0) {
	1924	return NULL;
	1925	}
	1926
	1927	if (block == NULL) {
	1928	block = qemu_get_ram_block(addr);
	1929	addr -= block->offset;
	1930	}
	1931	size = MIN(size, block->max_length - addr);
	1932
	1933	if (xen_enabled() && block->host == NULL) {
	1934	/* We need to check if the requested address is in the RAM
	1935	* because we don't want to map the entire memory in QEMU.
	1936	* In that case just map the requested area.
	1937	*/
	1938	if (block->offset == 0) {
	1939	return xen_map_cache(addr, *size, 1);
	1940	}
	1941
	1942	block->host = xen_map_cache(block->offset, block->max_length, 1);
	1943	}
	1944
	1945	return ramblock_ptr(block, addr);
	1946	}
	1947
	1948	/*
	1949	* Translates a host ptr back to a RAMBlock, a ram_addr and an offset
	1950	* in that RAMBlock.
	1951	*
	1952	* ptr: Host pointer to look up
	1953	* round_offset: If true round the result offset down to a page boundary
	1954	* *ram_addr: set to result ram_addr
	1955	* *offset: set to result offset within the RAMBlock
	1956	*
	1957	* Returns: RAMBlock (or NULL if not found)
	1958	*
	1959	* By the time this function returns, the returned pointer is not protected
	1960	* by RCU anymore. If the caller is not within an RCU critical section and
	1961	* does not hold the iothread lock, it must have other means of protecting the
	1962	* pointer, such as a reference to the region that includes the incoming
	1963	* ram_addr_t.
	1964	*/
	1965	RAMBlock qemu_ram_block_from_host(void ptr, bool round_offset,
	1966	ram_addr_t *offset)
	1967	{
	1968	RAMBlock *block;
	1969	uint8_t *host = ptr;
	1970
	1971	if (xen_enabled()) {
	1972	ram_addr_t ram_addr;
	1973	rcu_read_lock();
	1974	ram_addr = xen_ram_addr_from_mapcache(ptr);
	1975	block = qemu_get_ram_block(ram_addr);
	1976	if (block) {
	1977	*offset = ram_addr - block->offset;
	1978	}
	1979	rcu_read_unlock();
	1980	return block;
	1981	}
	1982
	1983	rcu_read_lock();
	1984	block = atomic_rcu_read(&ram_list.mru_block);
	1985	if (block && block->host && host - block->host < block->max_length) {
	1986	goto found;
	1987	}
	1988
	1989	QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
	1990	/* This case append when the block is not mapped. */
	1991	if (block->host == NULL) {
	1992	continue;
	1993	}
	1994	if (host - block->host < block->max_length) {
	1995	goto found;
	1996	}
	1997	}
	1998
	1999	rcu_read_unlock();
	2000	return NULL;
	2001
	2002	found:
	2003	*offset = (host - block->host);
	2004	if (round_offset) {
	2005	*offset &= TARGET_PAGE_MASK;
	2006	}
	2007	rcu_read_unlock();
	2008	return block;
	2009	}
	2010
	2011	/*
	2012	* Finds the named RAMBlock
	2013	*
	2014	* name: The name of RAMBlock to find
	2015	*
	2016	* Returns: RAMBlock (or NULL if not found)
	2017	*/
	2018	RAMBlock qemu_ram_block_by_name(const char name)
	2019	{
	2020	RAMBlock *block;
	2021
	2022	QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
	2023	if (!strcmp(name, block->idstr)) {
	2024	return block;
	2025	}
	2026	}
	2027
	2028	return NULL;
	2029	}
	2030
	2031	/* Some of the softmmu routines need to translate from a host pointer
	2032	(typically a TLB entry) back to a ram offset. */
	2033	ram_addr_t qemu_ram_addr_from_host(void *ptr)
	2034	{
	2035	RAMBlock *block;
	2036	ram_addr_t offset;
	2037
	2038	block = qemu_ram_block_from_host(ptr, false, &offset);
	2039	if (!block) {
	2040	return RAM_ADDR_INVALID;
	2041	}
	2042
	2043	return block->offset + offset;
	2044	}
	2045
	2046	/* Called within RCU critical section. */
	2047	static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
	2048	uint64_t val, unsigned size)
	2049	{
	2050	bool locked = false;
	2051
	2052	if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
	2053	locked = true;
	2054	tb_lock();
	2055	tb_invalidate_phys_page_fast(ram_addr, size);
	2056	}
	2057	switch (size) {
	2058	case 1:
	2059	stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
	2060	break;
	2061	case 2:
	2062	stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
	2063	break;
	2064	case 4:
	2065	stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
	2066	break;
	2067	default:
	2068	abort();
	2069	}
	2070
	2071	if (locked) {
	2072	tb_unlock();
	2073	}
	2074
	2075	/* Set both VGA and migration bits for simplicity and to remove
	2076	* the notdirty callback faster.
	2077	*/
	2078	cpu_physical_memory_set_dirty_range(ram_addr, size,
	2079	DIRTY_CLIENTS_NOCODE);
	2080	/* we remove the notdirty callback only if the code has been
	2081	flushed */
	2082	if (!cpu_physical_memory_is_clean(ram_addr)) {
	2083	tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
	2084	}
	2085	}
	2086
	2087	static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
	2088	unsigned size, bool is_write)
	2089	{
	2090	return is_write;
	2091	}
	2092
	2093	static const MemoryRegionOps notdirty_mem_ops = {
	2094	.write = notdirty_mem_write,
	2095	.valid.accepts = notdirty_mem_accepts,
	2096	.endianness = DEVICE_NATIVE_ENDIAN,
	2097	};
	2098
	2099	/* Generate a debug exception if a watchpoint has been hit. */
	2100	static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
	2101	{
	2102	CPUState *cpu = current_cpu;
	2103	CPUClass *cc = CPU_GET_CLASS(cpu);
	2104	CPUArchState *env = cpu->env_ptr;
	2105	target_ulong pc, cs_base;
	2106	target_ulong vaddr;
	2107	CPUWatchpoint *wp;
	2108	uint32_t cpu_flags;
	2109
	2110	if (cpu->watchpoint_hit) {
	2111	/* We re-entered the check after replacing the TB. Now raise
	2112	* the debug interrupt so that is will trigger after the
	2113	* current instruction. */
	2114	cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
	2115	return;
	2116	}
	2117	vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
	2118	vaddr = cc->adjust_watchpoint_address(cpu, vaddr, len);
	2119	QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
	2120	if (cpu_watchpoint_address_matches(wp, vaddr, len)
	2121	&& (wp->flags & flags)) {
	2122	if (flags == BP_MEM_READ) {
	2123	wp->flags \|= BP_WATCHPOINT_HIT_READ;
	2124	} else {
	2125	wp->flags \|= BP_WATCHPOINT_HIT_WRITE;
	2126	}
	2127	wp->hitaddr = vaddr;
	2128	wp->hitattrs = attrs;
	2129	if (!cpu->watchpoint_hit) {
	2130	if (wp->flags & BP_CPU &&
	2131	!cc->debug_check_watchpoint(cpu, wp)) {
	2132	wp->flags &= ~BP_WATCHPOINT_HIT;
	2133	continue;
	2134	}
	2135	cpu->watchpoint_hit = wp;
	2136
	2137	/* Both tb_lock and iothread_mutex will be reset when
	2138	* cpu_loop_exit or cpu_loop_exit_noexc longjmp
	2139	* back into the cpu_exec main loop.
	2140	*/
	2141	tb_lock();
	2142	tb_check_watchpoint(cpu);
	2143	if (wp->flags & BP_STOP_BEFORE_ACCESS) {
	2144	cpu->exception_index = EXCP_DEBUG;
	2145	cpu_loop_exit(cpu);
	2146	} else {
	2147	cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
	2148	tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
	2149	cpu_loop_exit_noexc(cpu);
	2150	}
	2151	}
	2152	} else {
	2153	wp->flags &= ~BP_WATCHPOINT_HIT;
	2154	}
	2155	}
	2156	}
	2157
	2158	/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
	2159	so these check for a hit then pass through to the normal out-of-line
	2160	phys routines. */
	2161	static MemTxResult watch_mem_read(void opaque, hwaddr addr, uint64_t pdata,
	2162	unsigned size, MemTxAttrs attrs)
	2163	{
	2164	MemTxResult res;
	2165	uint64_t data;
	2166	int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
	2167	AddressSpace *as = current_cpu->cpu_ases[asidx].as;
	2168
	2169	check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
	2170	switch (size) {
	2171	case 1:
	2172	data = address_space_ldub(as, addr, attrs, &res);
	2173	break;
	2174	case 2:
	2175	data = address_space_lduw(as, addr, attrs, &res);
	2176	break;
	2177	case 4:
	2178	data = address_space_ldl(as, addr, attrs, &res);
	2179	break;
	2180	default: abort();
	2181	}
	2182	*pdata = data;
	2183	return res;
	2184	}
	2185
	2186	static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
	2187	uint64_t val, unsigned size,
	2188	MemTxAttrs attrs)
	2189	{
	2190	MemTxResult res;
	2191	int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
	2192	AddressSpace *as = current_cpu->cpu_ases[asidx].as;
	2193
	2194	check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
	2195	switch (size) {
	2196	case 1:
	2197	address_space_stb(as, addr, val, attrs, &res);
	2198	break;
	2199	case 2:
	2200	address_space_stw(as, addr, val, attrs, &res);
	2201	break;
	2202	case 4:
	2203	address_space_stl(as, addr, val, attrs, &res);
	2204	break;
	2205	default: abort();
	2206	}
	2207	return res;
	2208	}
	2209
	2210	static const MemoryRegionOps watch_mem_ops = {
	2211	.read_with_attrs = watch_mem_read,
	2212	.write_with_attrs = watch_mem_write,
	2213	.endianness = DEVICE_NATIVE_ENDIAN,
	2214	};
	2215
	2216	static MemTxResult subpage_read(void opaque, hwaddr addr, uint64_t data,
	2217	unsigned len, MemTxAttrs attrs)
	2218	{
	2219	subpage_t *subpage = opaque;
	2220	uint8_t buf[8];
	2221	MemTxResult res;
	2222
	2223	#if defined(DEBUG_SUBPAGE)
	2224	printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
	2225	subpage, len, addr);
	2226	#endif
	2227	res = address_space_read(subpage->as, addr + subpage->base,
	2228	attrs, buf, len);
	2229	if (res) {
	2230	return res;
	2231	}
	2232	switch (len) {
	2233	case 1:
	2234	*data = ldub_p(buf);
	2235	return MEMTX_OK;
	2236	case 2:
	2237	*data = lduw_p(buf);
	2238	return MEMTX_OK;
	2239	case 4:
	2240	*data = ldl_p(buf);
	2241	return MEMTX_OK;
	2242	case 8:
	2243	*data = ldq_p(buf);
	2244	return MEMTX_OK;
	2245	default:
	2246	abort();
	2247	}
	2248	}
	2249
	2250	static MemTxResult subpage_write(void *opaque, hwaddr addr,
	2251	uint64_t value, unsigned len, MemTxAttrs attrs)
	2252	{
	2253	subpage_t *subpage = opaque;
	2254	uint8_t buf[8];
	2255
	2256	#if defined(DEBUG_SUBPAGE)
	2257	printf("%s: subpage %p len %u addr " TARGET_FMT_plx
	2258	" value %"PRIx64"\n",
	2259	__func__, subpage, len, addr, value);
	2260	#endif
	2261	switch (len) {
	2262	case 1:
	2263	stb_p(buf, value);
	2264	break;
	2265	case 2:
	2266	stw_p(buf, value);
	2267	break;
	2268	case 4:
	2269	stl_p(buf, value);
	2270	break;
	2271	case 8:
	2272	stq_p(buf, value);
	2273	break;
	2274	default:
	2275	abort();
	2276	}
	2277	return address_space_write(subpage->as, addr + subpage->base,
	2278	attrs, buf, len);
	2279	}
	2280
	2281	static bool subpage_accepts(void *opaque, hwaddr addr,
	2282	unsigned len, bool is_write)
	2283	{
	2284	subpage_t *subpage = opaque;
	2285	#if defined(DEBUG_SUBPAGE)
	2286	printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
	2287	__func__, subpage, is_write ? 'w' : 'r', len, addr);
	2288	#endif
	2289
	2290	return address_space_access_valid(subpage->as, addr + subpage->base,
	2291	len, is_write);
	2292	}
	2293
	2294	static const MemoryRegionOps subpage_ops = {
	2295	.read_with_attrs = subpage_read,
	2296	.write_with_attrs = subpage_write,
	2297	.impl.min_access_size = 1,
	2298	.impl.max_access_size = 8,
	2299	.valid.min_access_size = 1,
	2300	.valid.max_access_size = 8,
	2301	.valid.accepts = subpage_accepts,
	2302	.endianness = DEVICE_NATIVE_ENDIAN,
	2303	};
	2304
	2305	static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
	2306	uint16_t section)
	2307	{
	2308	int idx, eidx;
	2309
	2310	if (start >= TARGET_PAGE_SIZE \|\| end >= TARGET_PAGE_SIZE)
	2311	return -1;
	2312	idx = SUBPAGE_IDX(start);
	2313	eidx = SUBPAGE_IDX(end);
	2314	#if defined(DEBUG_SUBPAGE)
	2315	printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
	2316	__func__, mmio, start, end, idx, eidx, section);
	2317	#endif
	2318	for (; idx <= eidx; idx++) {
	2319	mmio->sub_section[idx] = section;
	2320	}
	2321
	2322	return 0;
	2323	}
	2324
	2325	static subpage_t subpage_init(AddressSpace as, hwaddr base)
	2326	{
	2327	subpage_t *mmio;
	2328
	2329	mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
	2330	mmio->as = as;
	2331	mmio->base = base;
	2332	memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
	2333	NULL, TARGET_PAGE_SIZE);
	2334	mmio->iomem.subpage = true;
	2335	#if defined(DEBUG_SUBPAGE)
	2336	printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
	2337	mmio, base, TARGET_PAGE_SIZE);
	2338	#endif
	2339	subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
	2340
	2341	return mmio;
	2342	}
	2343
	2344	static uint16_t dummy_section(PhysPageMap map, AddressSpace as,
	2345	MemoryRegion *mr)
	2346	{
	2347	assert(as);
	2348	MemoryRegionSection section = {
	2349	.address_space = as,
	2350	.mr = mr,
	2351	.offset_within_address_space = 0,
	2352	.offset_within_region = 0,
	2353	.size = int128_2_64(),
	2354	};
	2355
	2356	return phys_section_add(map, &section);
	2357	}
	2358
	2359	MemoryRegion iotlb_to_region(CPUState cpu, hwaddr index, MemTxAttrs attrs)
	2360	{
	2361	int asidx = cpu_asidx_from_attrs(cpu, attrs);
	2362	CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
	2363	AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
	2364	MemoryRegionSection *sections = d->map.sections;
	2365
	2366	return sections[index & ~TARGET_PAGE_MASK].mr;
	2367	}
	2368
	2369	static void io_mem_init(void)
	2370	{
	2371	memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
	2372	memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
	2373	NULL, UINT64_MAX);
	2374
	2375	/* io_mem_notdirty calls tb_invalidate_phys_page_fast,
	2376	* which can be called without the iothread mutex.
	2377	*/
	2378	memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
	2379	NULL, UINT64_MAX);
	2380	memory_region_clear_global_locking(&io_mem_notdirty);
	2381
	2382	memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
	2383	NULL, UINT64_MAX);
	2384	}
	2385
	2386	static void mem_begin(MemoryListener *listener)
	2387	{
	2388	AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
	2389	AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
	2390	uint16_t n;
	2391
	2392	n = dummy_section(&d->map, as, &io_mem_unassigned);
	2393	assert(n == PHYS_SECTION_UNASSIGNED);
	2394	n = dummy_section(&d->map, as, &io_mem_notdirty);
	2395	assert(n == PHYS_SECTION_NOTDIRTY);
	2396	n = dummy_section(&d->map, as, &io_mem_rom);
	2397	assert(n == PHYS_SECTION_ROM);
	2398	n = dummy_section(&d->map, as, &io_mem_watch);
	2399	assert(n == PHYS_SECTION_WATCH);
	2400
	2401	d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
	2402	d->as = as;
	2403	as->next_dispatch = d;
	2404	}
	2405
	2406	static void address_space_dispatch_free(AddressSpaceDispatch *d)
	2407	{
	2408	phys_sections_free(&d->map);
	2409	g_free(d);
	2410	}
	2411
	2412	static void mem_commit(MemoryListener *listener)
	2413	{
	2414	AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
	2415	AddressSpaceDispatch *cur = as->dispatch;
	2416	AddressSpaceDispatch *next = as->next_dispatch;
	2417
	2418	phys_page_compact_all(next, next->map.nodes_nb);
	2419
	2420	atomic_rcu_set(&as->dispatch, next);
	2421	if (cur) {
	2422	call_rcu(cur, address_space_dispatch_free, rcu);
	2423	}
	2424	}
	2425
	2426	static void tcg_commit(MemoryListener *listener)
	2427	{
	2428	CPUAddressSpace *cpuas;
	2429	AddressSpaceDispatch *d;
	2430
	2431	/* since each CPU stores ram addresses in its TLB cache, we must
	2432	reset the modified entries */
	2433	cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
	2434	cpu_reloading_memory_map();
	2435	/* The CPU and TLB are protected by the iothread lock.
	2436	* We reload the dispatch pointer now because cpu_reloading_memory_map()
	2437	* may have split the RCU critical section.
	2438	*/
	2439	d = atomic_rcu_read(&cpuas->as->dispatch);
	2440	atomic_rcu_set(&cpuas->memory_dispatch, d);
	2441	tlb_flush(cpuas->cpu);
	2442	}
	2443
	2444	void address_space_init_dispatch(AddressSpace *as)
	2445	{
	2446	as->dispatch = NULL;
	2447	as->dispatch_listener = (MemoryListener) {
	2448	.begin = mem_begin,
	2449	.commit = mem_commit,
	2450	.region_add = mem_add,
	2451	.region_nop = mem_add,
	2452	.priority = 0,
	2453	};
	2454	memory_listener_register(&as->dispatch_listener, as);
	2455	}
	2456
	2457	void address_space_unregister(AddressSpace *as)
	2458	{
	2459	memory_listener_unregister(&as->dispatch_listener);
	2460	}
	2461
	2462	void address_space_destroy_dispatch(AddressSpace *as)
	2463	{
	2464	AddressSpaceDispatch *d = as->dispatch;
	2465
	2466	atomic_rcu_set(&as->dispatch, NULL);
	2467	if (d) {
	2468	call_rcu(d, address_space_dispatch_free, rcu);
	2469	}
	2470	}
	2471
	2472	static void memory_map_init(void)
	2473	{
	2474	system_memory = g_malloc(sizeof(*system_memory));
	2475
	2476	memory_region_init(system_memory, NULL, "system", UINT64_MAX);
	2477	address_space_init(&address_space_memory, system_memory, "memory");
	2478
	2479	system_io = g_malloc(sizeof(*system_io));
	2480	memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
	2481	65536);
	2482	address_space_init(&address_space_io, system_io, "I/O");
	2483	}
	2484
	2485	MemoryRegion *get_system_memory(void)
	2486	{
	2487	return system_memory;
	2488	}
	2489
	2490	MemoryRegion *get_system_io(void)
	2491	{
	2492	return system_io;
	2493	}
	2494
	2495	#endif /* !defined(CONFIG_USER_ONLY) */
	2496
	2497	/* physical memory access (slow version, mainly for debug) */
	2498	#if defined(CONFIG_USER_ONLY)
	2499	int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
	2500	uint8_t *buf, int len, int is_write)
	2501	{
	2502	int l, flags;
	2503	target_ulong page;
	2504	void * p;
	2505
	2506	while (len > 0) {
	2507	page = addr & TARGET_PAGE_MASK;
	2508	l = (page + TARGET_PAGE_SIZE) - addr;
	2509	if (l > len)
	2510	l = len;
	2511	flags = page_get_flags(page);
	2512	if (!(flags & PAGE_VALID))
	2513	return -1;
	2514	if (is_write) {
	2515	if (!(flags & PAGE_WRITE))
	2516	return -1;
	2517	/* XXX: this code should not depend on lock_user */
	2518	if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
	2519	return -1;
	2520	memcpy(p, buf, l);
	2521	unlock_user(p, addr, l);
	2522	} else {
	2523	if (!(flags & PAGE_READ))
	2524	return -1;
	2525	/* XXX: this code should not depend on lock_user */
	2526	if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
	2527	return -1;
	2528	memcpy(buf, p, l);
	2529	unlock_user(p, addr, 0);
	2530	}
	2531	len -= l;
	2532	buf += l;
	2533	addr += l;
	2534	}
	2535	return 0;
	2536	}
	2537
	2538	#else
	2539
	2540	static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
	2541	hwaddr length)
	2542	{
	2543	uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
	2544	addr += memory_region_get_ram_addr(mr);
	2545
	2546	/* No early return if dirty_log_mask is or becomes 0, because
	2547	* cpu_physical_memory_set_dirty_range will still call
	2548	* xen_modified_memory.
	2549	*/
	2550	if (dirty_log_mask) {
	2551	dirty_log_mask =
	2552	cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
	2553	}
	2554	if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
	2555	tb_lock();
	2556	tb_invalidate_phys_range(addr, addr + length);
	2557	tb_unlock();
	2558	dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
	2559	}
	2560	cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
	2561	}
	2562
	2563	static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
	2564	{
	2565	unsigned access_size_max = mr->ops->valid.max_access_size;
	2566
	2567	/* Regions are assumed to support 1-4 byte accesses unless
	2568	otherwise specified. */
	2569	if (access_size_max == 0) {
	2570	access_size_max = 4;
	2571	}
	2572
	2573	/* Bound the maximum access by the alignment of the address. */
	2574	if (!mr->ops->impl.unaligned) {
	2575	unsigned align_size_max = addr & -addr;
	2576	if (align_size_max != 0 && align_size_max < access_size_max) {
	2577	access_size_max = align_size_max;
	2578	}
	2579	}
	2580
	2581	/* Don't attempt accesses larger than the maximum. */
	2582	if (l > access_size_max) {
	2583	l = access_size_max;
	2584	}
	2585	l = pow2floor(l);
	2586
	2587	return l;
	2588	}
	2589
	2590	static bool prepare_mmio_access(MemoryRegion *mr)
	2591	{
	2592	bool unlocked = !qemu_mutex_iothread_locked();
	2593	bool release_lock = false;
	2594
	2595	if (unlocked && mr->global_locking) {
	2596	qemu_mutex_lock_iothread();
	2597	unlocked = false;
	2598	release_lock = true;
	2599	}
	2600	if (mr->flush_coalesced_mmio) {
	2601	if (unlocked) {
	2602	qemu_mutex_lock_iothread();
	2603	}
	2604	qemu_flush_coalesced_mmio_buffer();
	2605	if (unlocked) {
	2606	qemu_mutex_unlock_iothread();
	2607	}
	2608	}
	2609
	2610	return release_lock;
	2611	}
	2612
	2613	/* Called within RCU critical section. */
	2614	static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
	2615	MemTxAttrs attrs,
	2616	const uint8_t *buf,
	2617	int len, hwaddr addr1,
	2618	hwaddr l, MemoryRegion *mr)
	2619	{
	2620	uint8_t *ptr;
	2621	uint64_t val;
	2622	MemTxResult result = MEMTX_OK;
	2623	bool release_lock = false;
	2624
	2625	for (;;) {
	2626	if (!memory_access_is_direct(mr, true)) {
	2627	release_lock \|= prepare_mmio_access(mr);
	2628	l = memory_access_size(mr, l, addr1);
	2629	/* XXX: could force current_cpu to NULL to avoid
	2630	potential bugs */
	2631	switch (l) {
	2632	case 8:
	2633	/* 64 bit write access */
	2634	val = ldq_p(buf);
	2635	result \|= memory_region_dispatch_write(mr, addr1, val, 8,
	2636	attrs);
	2637	break;
	2638	case 4:
	2639	/* 32 bit write access */
	2640	val = (uint32_t)ldl_p(buf);
	2641	result \|= memory_region_dispatch_write(mr, addr1, val, 4,
	2642	attrs);
	2643	break;
	2644	case 2:
	2645	/* 16 bit write access */
	2646	val = lduw_p(buf);
	2647	result \|= memory_region_dispatch_write(mr, addr1, val, 2,
	2648	attrs);
	2649	break;
	2650	case 1:
	2651	/* 8 bit write access */
	2652	val = ldub_p(buf);
	2653	result \|= memory_region_dispatch_write(mr, addr1, val, 1,
	2654	attrs);
	2655	break;
	2656	default:
	2657	abort();
	2658	}
	2659	} else {
	2660	/* RAM case */
	2661	ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
	2662	memcpy(ptr, buf, l);
	2663	invalidate_and_set_dirty(mr, addr1, l);
	2664	}
	2665
	2666	if (release_lock) {
	2667	qemu_mutex_unlock_iothread();
	2668	release_lock = false;
	2669	}
	2670
	2671	len -= l;
	2672	buf += l;
	2673	addr += l;
	2674
	2675	if (!len) {
	2676	break;
	2677	}
	2678
	2679	l = len;
	2680	mr = address_space_translate(as, addr, &addr1, &l, true);
	2681	}
	2682
	2683	return result;
	2684	}
	2685
	2686	MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
	2687	const uint8_t *buf, int len)
	2688	{
	2689	hwaddr l;
	2690	hwaddr addr1;
	2691	MemoryRegion *mr;
	2692	MemTxResult result = MEMTX_OK;
	2693
	2694	if (len > 0) {
	2695	rcu_read_lock();
	2696	l = len;
	2697	mr = address_space_translate(as, addr, &addr1, &l, true);
	2698	result = address_space_write_continue(as, addr, attrs, buf, len,
	2699	addr1, l, mr);
	2700	rcu_read_unlock();
	2701	}
	2702
	2703	return result;
	2704	}
	2705
	2706	/* Called within RCU critical section. */
	2707	MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
	2708	MemTxAttrs attrs, uint8_t *buf,
	2709	int len, hwaddr addr1, hwaddr l,
	2710	MemoryRegion *mr)
	2711	{
	2712	uint8_t *ptr;
	2713	uint64_t val;
	2714	MemTxResult result = MEMTX_OK;
	2715	bool release_lock = false;
	2716
	2717	for (;;) {
	2718	if (!memory_access_is_direct(mr, false)) {
	2719	/* I/O case */
	2720	release_lock \|= prepare_mmio_access(mr);
	2721	l = memory_access_size(mr, l, addr1);
	2722	switch (l) {
	2723	case 8:
	2724	/* 64 bit read access */
	2725	result \|= memory_region_dispatch_read(mr, addr1, &val, 8,
	2726	attrs);
	2727	stq_p(buf, val);
	2728	break;
	2729	case 4:
	2730	/* 32 bit read access */
	2731	result \|= memory_region_dispatch_read(mr, addr1, &val, 4,
	2732	attrs);
	2733	stl_p(buf, val);
	2734	break;
	2735	case 2:
	2736	/* 16 bit read access */
	2737	result \|= memory_region_dispatch_read(mr, addr1, &val, 2,
	2738	attrs);
	2739	stw_p(buf, val);
	2740	break;
	2741	case 1:
	2742	/* 8 bit read access */
	2743	result \|= memory_region_dispatch_read(mr, addr1, &val, 1,
	2744	attrs);
	2745	stb_p(buf, val);
	2746	break;
	2747	default:
	2748	abort();
	2749	}
	2750	} else {
	2751	/* RAM case */
	2752	ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
	2753	memcpy(buf, ptr, l);
	2754	}
	2755
	2756	if (release_lock) {
	2757	qemu_mutex_unlock_iothread();
	2758	release_lock = false;
	2759	}
	2760
	2761	len -= l;
	2762	buf += l;
	2763	addr += l;
	2764
	2765	if (!len) {
	2766	break;
	2767	}
	2768
	2769	l = len;
	2770	mr = address_space_translate(as, addr, &addr1, &l, false);
	2771	}
	2772
	2773	return result;
	2774	}
	2775
	2776	MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
	2777	MemTxAttrs attrs, uint8_t *buf, int len)
	2778	{
	2779	hwaddr l;
	2780	hwaddr addr1;
	2781	MemoryRegion *mr;
	2782	MemTxResult result = MEMTX_OK;
	2783
	2784	if (len > 0) {
	2785	rcu_read_lock();
	2786	l = len;
	2787	mr = address_space_translate(as, addr, &addr1, &l, false);
	2788	result = address_space_read_continue(as, addr, attrs, buf, len,
	2789	addr1, l, mr);
	2790	rcu_read_unlock();
	2791	}
	2792
	2793	return result;
	2794	}
	2795
	2796	MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
	2797	uint8_t *buf, int len, bool is_write)
	2798	{
	2799	if (is_write) {
	2800	return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
	2801	} else {
	2802	return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
	2803	}
	2804	}
	2805
	2806	void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
	2807	int len, int is_write)
	2808	{
	2809	address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
	2810	buf, len, is_write);
	2811	}
	2812
	2813	enum write_rom_type {
	2814	WRITE_DATA,
	2815	FLUSH_CACHE,
	2816	};
	2817
	2818	static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
	2819	hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
	2820	{
	2821	hwaddr l;
	2822	uint8_t *ptr;
	2823	hwaddr addr1;
	2824	MemoryRegion *mr;
	2825
	2826	rcu_read_lock();
	2827	while (len > 0) {
	2828	l = len;
	2829	mr = address_space_translate(as, addr, &addr1, &l, true);
	2830
	2831	if (!(memory_region_is_ram(mr) \|\|
	2832	memory_region_is_romd(mr))) {
	2833	l = memory_access_size(mr, l, addr1);
	2834	} else {
	2835	/* ROM/RAM case */
	2836	ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
	2837	switch (type) {
	2838	case WRITE_DATA:
	2839	memcpy(ptr, buf, l);
	2840	invalidate_and_set_dirty(mr, addr1, l);
	2841	break;
	2842	case FLUSH_CACHE:
	2843	flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
	2844	break;
	2845	}
	2846	}
	2847	len -= l;
	2848	buf += l;
	2849	addr += l;
	2850	}
	2851	rcu_read_unlock();
	2852	}
	2853
	2854	/* used for ROM loading : can write in RAM and ROM */
	2855	void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
	2856	const uint8_t *buf, int len)
	2857	{
	2858	cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
	2859	}
	2860
	2861	void cpu_flush_icache_range(hwaddr start, int len)
	2862	{
	2863	/*
	2864	* This function should do the same thing as an icache flush that was
	2865	* triggered from within the guest. For TCG we are always cache coherent,
	2866	* so there is no need to flush anything. For KVM / Xen we need to flush
	2867	* the host's instruction cache at least.
	2868	*/
	2869	if (tcg_enabled()) {
	2870	return;
	2871	}
	2872
	2873	cpu_physical_memory_write_rom_internal(&address_space_memory,
	2874	start, NULL, len, FLUSH_CACHE);
	2875	}
	2876
	2877	typedef struct {
	2878	MemoryRegion *mr;
	2879	void *buffer;
	2880	hwaddr addr;
	2881	hwaddr len;
	2882	bool in_use;
	2883	} BounceBuffer;
	2884
	2885	static BounceBuffer bounce;
	2886
	2887	typedef struct MapClient {
	2888	QEMUBH *bh;
	2889	QLIST_ENTRY(MapClient) link;
	2890	} MapClient;
	2891
	2892	QemuMutex map_client_list_lock;
	2893	static QLIST_HEAD(map_client_list, MapClient) map_client_list
	2894	= QLIST_HEAD_INITIALIZER(map_client_list);
	2895
	2896	static void cpu_unregister_map_client_do(MapClient *client)
	2897	{
	2898	QLIST_REMOVE(client, link);
	2899	g_free(client);
	2900	}
	2901
	2902	static void cpu_notify_map_clients_locked(void)
	2903	{
	2904	MapClient *client;
	2905
	2906	while (!QLIST_EMPTY(&map_client_list)) {
	2907	client = QLIST_FIRST(&map_client_list);
	2908	qemu_bh_schedule(client->bh);
	2909	cpu_unregister_map_client_do(client);
	2910	}
	2911	}
	2912
	2913	void cpu_register_map_client(QEMUBH *bh)
	2914	{
	2915	MapClient client = g_malloc(sizeof(client));
	2916
	2917	qemu_mutex_lock(&map_client_list_lock);
	2918	client->bh = bh;
	2919	QLIST_INSERT_HEAD(&map_client_list, client, link);
	2920	if (!atomic_read(&bounce.in_use)) {
	2921	cpu_notify_map_clients_locked();
	2922	}
	2923	qemu_mutex_unlock(&map_client_list_lock);
	2924	}
	2925
	2926	void cpu_exec_init_all(void)
	2927	{
	2928	qemu_mutex_init(&ram_list.mutex);
	2929	/* The data structures we set up here depend on knowing the page size,
	2930	* so no more changes can be made after this point.
	2931	* In an ideal world, nothing we did before we had finished the
	2932	* machine setup would care about the target page size, and we could
	2933	* do this much later, rather than requiring board models to state
	2934	* up front what their requirements are.
	2935	*/
	2936	finalize_target_page_bits();
	2937	io_mem_init();
	2938	memory_map_init();
	2939	qemu_mutex_init(&map_client_list_lock);
	2940	}
	2941
	2942	void cpu_unregister_map_client(QEMUBH *bh)
	2943	{
	2944	MapClient *client;
	2945
	2946	qemu_mutex_lock(&map_client_list_lock);
	2947	QLIST_FOREACH(client, &map_client_list, link) {
	2948	if (client->bh == bh) {
	2949	cpu_unregister_map_client_do(client);
	2950	break;
	2951	}
	2952	}
	2953	qemu_mutex_unlock(&map_client_list_lock);
	2954	}
	2955
	2956	static void cpu_notify_map_clients(void)
	2957	{
	2958	qemu_mutex_lock(&map_client_list_lock);
	2959	cpu_notify_map_clients_locked();
	2960	qemu_mutex_unlock(&map_client_list_lock);
	2961	}
	2962
	2963	bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
	2964	{
	2965	MemoryRegion *mr;
	2966	hwaddr l, xlat;
	2967
	2968	rcu_read_lock();
	2969	while (len > 0) {
	2970	l = len;
	2971	mr = address_space_translate(as, addr, &xlat, &l, is_write);
	2972	if (!memory_access_is_direct(mr, is_write)) {
	2973	l = memory_access_size(mr, l, addr);
	2974	if (!memory_region_access_valid(mr, xlat, l, is_write)) {
	2975	rcu_read_unlock();
	2976	return false;
	2977	}
	2978	}
	2979
	2980	len -= l;
	2981	addr += l;
	2982	}
	2983	rcu_read_unlock();
	2984	return true;
	2985	}
	2986
	2987	static hwaddr
	2988	address_space_extend_translation(AddressSpace *as, hwaddr addr, hwaddr target_len,
	2989	MemoryRegion *mr, hwaddr base, hwaddr len,
	2990	bool is_write)
	2991	{
	2992	hwaddr done = 0;
	2993	hwaddr xlat;
	2994	MemoryRegion *this_mr;
	2995
	2996	for (;;) {
	2997	target_len -= len;
	2998	addr += len;
	2999	done += len;
	3000	if (target_len == 0) {
	3001	return done;
	3002	}
	3003
	3004	len = target_len;
	3005	this_mr = address_space_translate(as, addr, &xlat, &len, is_write);
	3006	if (this_mr != mr \|\| xlat != base + done) {
	3007	return done;
	3008	}
	3009	}
	3010	}
	3011
	3012	/* Map a physical memory region into a host virtual address.
	3013	* May map a subset of the requested range, given by and returned in *plen.
	3014	* May return NULL if resources needed to perform the mapping are exhausted.
	3015	* Use only for reads OR writes - not for read-modify-write operations.
	3016	* Use cpu_register_map_client() to know when retrying the map operation is
	3017	* likely to succeed.
	3018	*/
	3019	void address_space_map(AddressSpace as,
	3020	hwaddr addr,
	3021	hwaddr *plen,
	3022	bool is_write)
	3023	{
	3024	hwaddr len = *plen;
	3025	hwaddr l, xlat;
	3026	MemoryRegion *mr;
	3027	void *ptr;
	3028
	3029	if (len == 0) {
	3030	return NULL;
	3031	}
	3032
	3033	l = len;
	3034	rcu_read_lock();
	3035	mr = address_space_translate(as, addr, &xlat, &l, is_write);
	3036
	3037	if (!memory_access_is_direct(mr, is_write)) {
	3038	if (atomic_xchg(&bounce.in_use, true)) {
	3039	rcu_read_unlock();
	3040	return NULL;
	3041	}
	3042	/* Avoid unbounded allocations */
	3043	l = MIN(l, TARGET_PAGE_SIZE);
	3044	bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
	3045	bounce.addr = addr;
	3046	bounce.len = l;
	3047
	3048	memory_region_ref(mr);
	3049	bounce.mr = mr;
	3050	if (!is_write) {
	3051	address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
	3052	bounce.buffer, l);
	3053	}
	3054
	3055	rcu_read_unlock();
	3056	*plen = l;
	3057	return bounce.buffer;
	3058	}
	3059
	3060
	3061	memory_region_ref(mr);
	3062	*plen = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
	3063	ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen);
	3064	rcu_read_unlock();
	3065
	3066	return ptr;
	3067	}
	3068
	3069	/* Unmaps a memory region previously mapped by address_space_map().
	3070	* Will also mark the memory as dirty if is_write == 1. access_len gives
	3071	* the amount of memory that was actually read or written by the caller.
	3072	*/
	3073	void address_space_unmap(AddressSpace as, void buffer, hwaddr len,
	3074	int is_write, hwaddr access_len)
	3075	{
	3076	if (buffer != bounce.buffer) {
	3077	MemoryRegion *mr;
	3078	ram_addr_t addr1;
	3079
	3080	mr = memory_region_from_host(buffer, &addr1);
	3081	assert(mr != NULL);
	3082	if (is_write) {
	3083	invalidate_and_set_dirty(mr, addr1, access_len);
	3084	}
	3085	if (xen_enabled()) {
	3086	xen_invalidate_map_cache_entry(buffer);
	3087	}
	3088	memory_region_unref(mr);
	3089	return;
	3090	}
	3091	if (is_write) {
	3092	address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
	3093	bounce.buffer, access_len);
	3094	}
	3095	qemu_vfree(bounce.buffer);
	3096	bounce.buffer = NULL;
	3097	memory_region_unref(bounce.mr);
	3098	atomic_mb_set(&bounce.in_use, false);
	3099	cpu_notify_map_clients();
	3100	}
	3101
	3102	void *cpu_physical_memory_map(hwaddr addr,
	3103	hwaddr *plen,
	3104	int is_write)
	3105	{
	3106	return address_space_map(&address_space_memory, addr, plen, is_write);
	3107	}
	3108
	3109	void cpu_physical_memory_unmap(void *buffer, hwaddr len,
	3110	int is_write, hwaddr access_len)
	3111	{
	3112	return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
	3113	}
	3114
	3115	#define ARG1_DECL AddressSpace *as
	3116	#define ARG1 as
	3117	#define SUFFIX
	3118	#define TRANSLATE(...) address_space_translate(as, __VA_ARGS__)
	3119	#define IS_DIRECT(mr, is_write) memory_access_is_direct(mr, is_write)
	3120	#define MAP_RAM(mr, ofs) qemu_map_ram_ptr((mr)->ram_block, ofs)
	3121	#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len)
	3122	#define RCU_READ_LOCK(...) rcu_read_lock()
	3123	#define RCU_READ_UNLOCK(...) rcu_read_unlock()
	3124	#include "memory_ldst.inc.c"
	3125
	3126	int64_t address_space_cache_init(MemoryRegionCache *cache,
	3127	AddressSpace *as,
	3128	hwaddr addr,
	3129	hwaddr len,
	3130	bool is_write)
	3131	{
	3132	hwaddr l, xlat;
	3133	MemoryRegion *mr;
	3134	void *ptr;
	3135
	3136	assert(len > 0);
	3137
	3138	l = len;
	3139	mr = address_space_translate(as, addr, &xlat, &l, is_write);
	3140	if (!memory_access_is_direct(mr, is_write)) {
	3141	return -EINVAL;
	3142	}
	3143
	3144	l = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
	3145	ptr = qemu_ram_ptr_length(mr->ram_block, xlat, &l);
	3146
	3147	cache->xlat = xlat;
	3148	cache->is_write = is_write;
	3149	cache->mr = mr;
	3150	cache->ptr = ptr;
	3151	cache->len = l;
	3152	memory_region_ref(cache->mr);
	3153
	3154	return l;
	3155	}
	3156
	3157	void address_space_cache_invalidate(MemoryRegionCache *cache,
	3158	hwaddr addr,
	3159	hwaddr access_len)
	3160	{
	3161	assert(cache->is_write);
	3162	invalidate_and_set_dirty(cache->mr, addr + cache->xlat, access_len);
	3163	}
	3164
	3165	void address_space_cache_destroy(MemoryRegionCache *cache)
	3166	{
	3167	if (!cache->mr) {
	3168	return;
	3169	}
	3170
	3171	if (xen_enabled()) {
	3172	xen_invalidate_map_cache_entry(cache->ptr);
	3173	}
	3174	memory_region_unref(cache->mr);
	3175	cache->mr = NULL;
	3176	}
	3177
	3178	/* Called from RCU critical section. This function has the same
	3179	* semantics as address_space_translate, but it only works on a
	3180	* predefined range of a MemoryRegion that was mapped with
	3181	* address_space_cache_init.
	3182	*/
	3183	static inline MemoryRegion *address_space_translate_cached(
	3184	MemoryRegionCache cache, hwaddr addr, hwaddr xlat,
	3185	hwaddr *plen, bool is_write)
	3186	{
	3187	assert(addr < cache->len && *plen <= cache->len - addr);
	3188	*xlat = addr + cache->xlat;
	3189	return cache->mr;
	3190	}
	3191
	3192	#define ARG1_DECL MemoryRegionCache *cache
	3193	#define ARG1 cache
	3194	#define SUFFIX _cached
	3195	#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__)
	3196	#define IS_DIRECT(mr, is_write) true
	3197	#define MAP_RAM(mr, ofs) (cache->ptr + (ofs - cache->xlat))
	3198	#define INVALIDATE(mr, ofs, len) ((void)0)
	3199	#define RCU_READ_LOCK() ((void)0)
	3200	#define RCU_READ_UNLOCK() ((void)0)
	3201	#include "memory_ldst.inc.c"
	3202
	3203	/* virtual memory access for debug (includes writing to ROM) */
	3204	int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
	3205	uint8_t *buf, int len, int is_write)
	3206	{
	3207	int l;
	3208	hwaddr phys_addr;
	3209	target_ulong page;
	3210
	3211	while (len > 0) {
	3212	int asidx;
	3213	MemTxAttrs attrs;
	3214
	3215	page = addr & TARGET_PAGE_MASK;
	3216	phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
	3217	asidx = cpu_asidx_from_attrs(cpu, attrs);
	3218	/* if no physical page mapped, return an error */
	3219	if (phys_addr == -1)
	3220	return -1;
	3221	l = (page + TARGET_PAGE_SIZE) - addr;
	3222	if (l > len)
	3223	l = len;
	3224	phys_addr += (addr & ~TARGET_PAGE_MASK);
	3225	if (is_write) {
	3226	cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
	3227	phys_addr, buf, l);
	3228	} else {
	3229	address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
	3230	MEMTXATTRS_UNSPECIFIED,
	3231	buf, l, 0);
	3232	}
	3233	len -= l;
	3234	buf += l;
	3235	addr += l;
	3236	}
	3237	return 0;
	3238	}
	3239
	3240	/*
	3241	* Allows code that needs to deal with migration bitmaps etc to still be built
	3242	* target independent.
	3243	*/
	3244	size_t qemu_target_page_bits(void)
	3245	{
	3246	return TARGET_PAGE_BITS;
	3247	}
	3248
	3249	#endif
	3250
	3251	/*
	3252	* A helper function for the _utterly broken_ virtio device model to find out if
	3253	* it's running on a big endian machine. Don't do this at home kids!
	3254	*/
	3255	bool target_words_bigendian(void);
	3256	bool target_words_bigendian(void)
	3257	{
	3258	#if defined(TARGET_WORDS_BIGENDIAN)
	3259	return true;
	3260	#else
	3261	return false;
	3262	#endif
	3263	}
	3264
	3265	#ifndef CONFIG_USER_ONLY
	3266	bool cpu_physical_memory_is_io(hwaddr phys_addr)
	3267	{
	3268	MemoryRegion*mr;
	3269	hwaddr l = 1;
	3270	bool res;
	3271
	3272	rcu_read_lock();
	3273	mr = address_space_translate(&address_space_memory,
	3274	phys_addr, &phys_addr, &l, false);
	3275
	3276	res = !(memory_region_is_ram(mr) \|\| memory_region_is_romd(mr));
	3277	rcu_read_unlock();
	3278	return res;
	3279	}
	3280
	3281	int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
	3282	{
	3283	RAMBlock *block;
	3284	int ret = 0;
	3285
	3286	rcu_read_lock();
	3287	QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
	3288	ret = func(block->idstr, block->host, block->offset,
	3289	block->used_length, opaque);
	3290	if (ret) {
	3291	break;
	3292	}
	3293	}
	3294	rcu_read_unlock();
	3295	return ret;
	3296	}
	3297	#endif