Git Repo - linux.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* linux/fs/super.c
	3	*
	4	* Copyright (C) 1991, 1992 Linus Torvalds
	5	*
	6	* super.c contains code to handle: - mount structures
	7	* - super-block tables
	8	* - filesystem drivers list
	9	* - mount system call
	10	* - umount system call
	11	* - ustat system call
	12	*
	13	* GK 2/5/95 - Changed to support mounting the root fs via NFS
	14	*
	15	* Added kerneld support: Jacques Gelinas and Bjorn Ekwall
	16	* Added change_root: Werner Almesberger & Hans Lermen, Feb '96
	17	* Added options to /proc/mounts:
	18	* Torbjörn Lindh ([email protected]), April 14, 1996.
	19	* Added devfs support: Richard Gooch <[email protected]>, 13-JAN-1998
	20	* Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
	21	*/
	22
	23	#include <linux/export.h>
	24	#include <linux/slab.h>
	25	#include <linux/blkdev.h>
	26	#include <linux/mount.h>
	27	#include <linux/security.h>
	28	#include <linux/writeback.h> /* for the emergency remount stuff */
	29	#include <linux/idr.h>
	30	#include <linux/mutex.h>
	31	#include <linux/backing-dev.h>
	32	#include <linux/rculist_bl.h>
	33	#include <linux/cleancache.h>
	34	#include <linux/fsnotify.h>
	35	#include <linux/lockdep.h>
	36	#include <linux/user_namespace.h>
	37	#include "internal.h"
	38
	39
	40	static LIST_HEAD(super_blocks);
	41	static DEFINE_SPINLOCK(sb_lock);
	42
	43	static char *sb_writers_name[SB_FREEZE_LEVELS] = {
	44	"sb_writers",
	45	"sb_pagefaults",
	46	"sb_internal",
	47	};
	48
	49	/*
	50	* One thing we have to be careful of with a per-sb shrinker is that we don't
	51	* drop the last active reference to the superblock from within the shrinker.
	52	* If that happens we could trigger unregistering the shrinker from within the
	53	* shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
	54	* take a passive reference to the superblock to avoid this from occurring.
	55	*/
	56	static unsigned long super_cache_scan(struct shrinker *shrink,
	57	struct shrink_control *sc)
	58	{
	59	struct super_block *sb;
	60	long fs_objects = 0;
	61	long total_objects;
	62	long freed = 0;
	63	long dentries;
	64	long inodes;
	65
	66	sb = container_of(shrink, struct super_block, s_shrink);
	67
	68	/*
	69	* Deadlock avoidance. We may hold various FS locks, and we don't want
	70	* to recurse into the FS that called us in clear_inode() and friends..
	71	*/
	72	if (!(sc->gfp_mask & __GFP_FS))
	73	return SHRINK_STOP;
	74
	75	if (!trylock_super(sb))
	76	return SHRINK_STOP;
	77
	78	if (sb->s_op->nr_cached_objects)
	79	fs_objects = sb->s_op->nr_cached_objects(sb, sc);
	80
	81	inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
	82	dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
	83	total_objects = dentries + inodes + fs_objects + 1;
	84	if (!total_objects)
	85	total_objects = 1;
	86
	87	/* proportion the scan between the caches */
	88	dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
	89	inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
	90	fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
	91
	92	/*
	93	* prune the dcache first as the icache is pinned by it, then
	94	* prune the icache, followed by the filesystem specific caches
	95	*
	96	* Ensure that we always scan at least one object - memcg kmem
	97	* accounting uses this to fully empty the caches.
	98	*/
	99	sc->nr_to_scan = dentries + 1;
	100	freed = prune_dcache_sb(sb, sc);
	101	sc->nr_to_scan = inodes + 1;
	102	freed += prune_icache_sb(sb, sc);
	103
	104	if (fs_objects) {
	105	sc->nr_to_scan = fs_objects + 1;
	106	freed += sb->s_op->free_cached_objects(sb, sc);
	107	}
	108
	109	up_read(&sb->s_umount);
	110	return freed;
	111	}
	112
	113	static unsigned long super_cache_count(struct shrinker *shrink,
	114	struct shrink_control *sc)
	115	{
	116	struct super_block *sb;
	117	long total_objects = 0;
	118
	119	sb = container_of(shrink, struct super_block, s_shrink);
	120
	121	/*
	122	* Don't call trylock_super as it is a potential
	123	* scalability bottleneck. The counts could get updated
	124	* between super_cache_count and super_cache_scan anyway.
	125	* Call to super_cache_count with shrinker_rwsem held
	126	* ensures the safety of call to list_lru_shrink_count() and
	127	* s_op->nr_cached_objects().
	128	*/
	129	if (sb->s_op && sb->s_op->nr_cached_objects)
	130	total_objects = sb->s_op->nr_cached_objects(sb, sc);
	131
	132	total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
	133	total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
	134
	135	total_objects = vfs_pressure_ratio(total_objects);
	136	return total_objects;
	137	}
	138
	139	static void destroy_super_work(struct work_struct *work)
	140	{
	141	struct super_block *s = container_of(work, struct super_block,
	142	destroy_work);
	143	int i;
	144
	145	for (i = 0; i < SB_FREEZE_LEVELS; i++)
	146	percpu_free_rwsem(&s->s_writers.rw_sem[i]);
	147	kfree(s);
	148	}
	149
	150	static void destroy_super_rcu(struct rcu_head *head)
	151	{
	152	struct super_block *s = container_of(head, struct super_block, rcu);
	153	INIT_WORK(&s->destroy_work, destroy_super_work);
	154	schedule_work(&s->destroy_work);
	155	}
	156
	157	/**
	158	* destroy_super - frees a superblock
	159	* @s: superblock to free
	160	*
	161	* Frees a superblock.
	162	*/
	163	static void destroy_super(struct super_block *s)
	164	{
	165	list_lru_destroy(&s->s_dentry_lru);
	166	list_lru_destroy(&s->s_inode_lru);
	167	security_sb_free(s);
	168	WARN_ON(!list_empty(&s->s_mounts));
	169	put_user_ns(s->s_user_ns);
	170	kfree(s->s_subtype);
	171	kfree(s->s_options);
	172	call_rcu(&s->rcu, destroy_super_rcu);
	173	}
	174
	175	/**
	176	* alloc_super - create new superblock
	177	* @type: filesystem type superblock should belong to
	178	* @flags: the mount flags
	179	* @user_ns: User namespace for the super_block
	180	*
	181	* Allocates and initializes a new &struct super_block. alloc_super()
	182	* returns a pointer new superblock or %NULL if allocation had failed.
	183	*/
	184	static struct super_block alloc_super(struct file_system_type type, int flags,
	185	struct user_namespace *user_ns)
	186	{
	187	struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
	188	static const struct super_operations default_op;
	189	int i;
	190
	191	if (!s)
	192	return NULL;
	193
	194	INIT_LIST_HEAD(&s->s_mounts);
	195	s->s_user_ns = get_user_ns(user_ns);
	196
	197	if (security_sb_alloc(s))
	198	goto fail;
	199
	200	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
	201	if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
	202	sb_writers_name[i],
	203	&type->s_writers_key[i]))
	204	goto fail;
	205	}
	206	init_waitqueue_head(&s->s_writers.wait_unfrozen);
	207	s->s_bdi = &noop_backing_dev_info;
	208	s->s_flags = flags;
	209	if (s->s_user_ns != &init_user_ns)
	210	s->s_iflags \|= SB_I_NODEV;
	211	INIT_HLIST_NODE(&s->s_instances);
	212	INIT_HLIST_BL_HEAD(&s->s_anon);
	213	mutex_init(&s->s_sync_lock);
	214	INIT_LIST_HEAD(&s->s_inodes);
	215	spin_lock_init(&s->s_inode_list_lock);
	216	INIT_LIST_HEAD(&s->s_inodes_wb);
	217	spin_lock_init(&s->s_inode_wblist_lock);
	218
	219	if (list_lru_init_memcg(&s->s_dentry_lru))
	220	goto fail;
	221	if (list_lru_init_memcg(&s->s_inode_lru))
	222	goto fail;
	223
	224	init_rwsem(&s->s_umount);
	225	lockdep_set_class(&s->s_umount, &type->s_umount_key);
	226	/*
	227	* sget() can have s_umount recursion.
	228	*
	229	* When it cannot find a suitable sb, it allocates a new
	230	* one (this one), and tries again to find a suitable old
	231	* one.
	232	*
	233	* In case that succeeds, it will acquire the s_umount
	234	* lock of the old one. Since these are clearly distrinct
	235	* locks, and this object isn't exposed yet, there's no
	236	* risk of deadlocks.
	237	*
	238	* Annotate this by putting this lock in a different
	239	* subclass.
	240	*/
	241	down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
	242	s->s_count = 1;
	243	atomic_set(&s->s_active, 1);
	244	mutex_init(&s->s_vfs_rename_mutex);
	245	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
	246	mutex_init(&s->s_dquot.dqio_mutex);
	247	mutex_init(&s->s_dquot.dqonoff_mutex);
	248	s->s_maxbytes = MAX_NON_LFS;
	249	s->s_op = &default_op;
	250	s->s_time_gran = 1000000000;
	251	s->cleancache_poolid = CLEANCACHE_NO_POOL;
	252
	253	s->s_shrink.seeks = DEFAULT_SEEKS;
	254	s->s_shrink.scan_objects = super_cache_scan;
	255	s->s_shrink.count_objects = super_cache_count;
	256	s->s_shrink.batch = 1024;
	257	s->s_shrink.flags = SHRINKER_NUMA_AWARE \| SHRINKER_MEMCG_AWARE;
	258	return s;
	259
	260	fail:
	261	destroy_super(s);
	262	return NULL;
	263	}
	264
	265	/* Superblock refcounting */
	266
	267	/*
	268	* Drop a superblock's refcount. The caller must hold sb_lock.
	269	*/
	270	static void __put_super(struct super_block *sb)
	271	{
	272	if (!--sb->s_count) {
	273	list_del_init(&sb->s_list);
	274	destroy_super(sb);
	275	}
	276	}
	277
	278	/**
	279	* put_super - drop a temporary reference to superblock
	280	* @sb: superblock in question
	281	*
	282	* Drops a temporary reference, frees superblock if there's no
	283	* references left.
	284	*/
	285	static void put_super(struct super_block *sb)
	286	{
	287	spin_lock(&sb_lock);
	288	__put_super(sb);
	289	spin_unlock(&sb_lock);
	290	}
	291
	292
	293	/**
	294	* deactivate_locked_super - drop an active reference to superblock
	295	* @s: superblock to deactivate
	296	*
	297	* Drops an active reference to superblock, converting it into a temporary
	298	* one if there is no other active references left. In that case we
	299	* tell fs driver to shut it down and drop the temporary reference we
	300	* had just acquired.
	301	*
	302	* Caller holds exclusive lock on superblock; that lock is released.
	303	*/
	304	void deactivate_locked_super(struct super_block *s)
	305	{
	306	struct file_system_type *fs = s->s_type;
	307	if (atomic_dec_and_test(&s->s_active)) {
	308	cleancache_invalidate_fs(s);
	309	unregister_shrinker(&s->s_shrink);
	310	fs->kill_sb(s);
	311
	312	/*
	313	* Since list_lru_destroy() may sleep, we cannot call it from
	314	* put_super(), where we hold the sb_lock. Therefore we destroy
	315	* the lru lists right now.
	316	*/
	317	list_lru_destroy(&s->s_dentry_lru);
	318	list_lru_destroy(&s->s_inode_lru);
	319
	320	put_filesystem(fs);
	321	put_super(s);
	322	} else {
	323	up_write(&s->s_umount);
	324	}
	325	}
	326
	327	EXPORT_SYMBOL(deactivate_locked_super);
	328
	329	/**
	330	* deactivate_super - drop an active reference to superblock
	331	* @s: superblock to deactivate
	332	*
	333	* Variant of deactivate_locked_super(), except that superblock is not
	334	* locked by caller. If we are going to drop the final active reference,
	335	* lock will be acquired prior to that.
	336	*/
	337	void deactivate_super(struct super_block *s)
	338	{
	339	if (!atomic_add_unless(&s->s_active, -1, 1)) {
	340	down_write(&s->s_umount);
	341	deactivate_locked_super(s);
	342	}
	343	}
	344
	345	EXPORT_SYMBOL(deactivate_super);
	346
	347	/**
	348	* grab_super - acquire an active reference
	349	* @s: reference we are trying to make active
	350	*
	351	* Tries to acquire an active reference. grab_super() is used when we
	352	* had just found a superblock in super_blocks or fs_type->fs_supers
	353	* and want to turn it into a full-blown active reference. grab_super()
	354	* is called with sb_lock held and drops it. Returns 1 in case of
	355	* success, 0 if we had failed (superblock contents was already dead or
	356	* dying when grab_super() had been called). Note that this is only
	357	* called for superblocks not in rundown mode (== ones still on ->fs_supers
	358	* of their type), so increment of ->s_count is OK here.
	359	*/
	360	static int grab_super(struct super_block *s) __releases(sb_lock)
	361	{
	362	s->s_count++;
	363	spin_unlock(&sb_lock);
	364	down_write(&s->s_umount);
	365	if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
	366	put_super(s);
	367	return 1;
	368	}
	369	up_write(&s->s_umount);
	370	put_super(s);
	371	return 0;
	372	}
	373
	374	/*
	375	* trylock_super - try to grab ->s_umount shared
	376	* @sb: reference we are trying to grab
	377	*
	378	* Try to prevent fs shutdown. This is used in places where we
	379	* cannot take an active reference but we need to ensure that the
	380	* filesystem is not shut down while we are working on it. It returns
	381	* false if we cannot acquire s_umount or if we lose the race and
	382	* filesystem already got into shutdown, and returns true with the s_umount
	383	* lock held in read mode in case of success. On successful return,
	384	* the caller must drop the s_umount lock when done.
	385	*
	386	* Note that unlike get_super() et.al. this one does not bump ->s_count.
	387	* The reason why it's safe is that we are OK with doing trylock instead
	388	* of down_read(). There's a couple of places that are OK with that, but
	389	* it's very much not a general-purpose interface.
	390	*/
	391	bool trylock_super(struct super_block *sb)
	392	{
	393	if (down_read_trylock(&sb->s_umount)) {
	394	if (!hlist_unhashed(&sb->s_instances) &&
	395	sb->s_root && (sb->s_flags & MS_BORN))
	396	return true;
	397	up_read(&sb->s_umount);
	398	}
	399
	400	return false;
	401	}
	402
	403	/**
	404	* generic_shutdown_super - common helper for ->kill_sb()
	405	* @sb: superblock to kill
	406	*
	407	* generic_shutdown_super() does all fs-independent work on superblock
	408	* shutdown. Typical ->kill_sb() should pick all fs-specific objects
	409	* that need destruction out of superblock, call generic_shutdown_super()
	410	* and release aforementioned objects. Note: dentries and inodes _are_
	411	* taken care of and do not need specific handling.
	412	*
	413	* Upon calling this function, the filesystem may no longer alter or
	414	* rearrange the set of dentries belonging to this super_block, nor may it
	415	* change the attachments of dentries to inodes.
	416	*/
	417	void generic_shutdown_super(struct super_block *sb)
	418	{
	419	const struct super_operations *sop = sb->s_op;
	420
	421	if (sb->s_root) {
	422	shrink_dcache_for_umount(sb);
	423	sync_filesystem(sb);
	424	sb->s_flags &= ~MS_ACTIVE;
	425
	426	fsnotify_unmount_inodes(sb);
	427	cgroup_writeback_umount();
	428
	429	evict_inodes(sb);
	430
	431	if (sb->s_dio_done_wq) {
	432	destroy_workqueue(sb->s_dio_done_wq);
	433	sb->s_dio_done_wq = NULL;
	434	}
	435
	436	if (sop->put_super)
	437	sop->put_super(sb);
	438
	439	if (!list_empty(&sb->s_inodes)) {
	440	printk("VFS: Busy inodes after unmount of %s. "
	441	"Self-destruct in 5 seconds. Have a nice day...\n",
	442	sb->s_id);
	443	}
	444	}
	445	spin_lock(&sb_lock);
	446	/* should be initialized for __put_super_and_need_restart() */
	447	hlist_del_init(&sb->s_instances);
	448	spin_unlock(&sb_lock);
	449	up_write(&sb->s_umount);
	450	}
	451
	452	EXPORT_SYMBOL(generic_shutdown_super);
	453
	454	/**
	455	* sget_userns - find or create a superblock
	456	* @type: filesystem type superblock should belong to
	457	* @test: comparison callback
	458	* @set: setup callback
	459	* @flags: mount flags
	460	* @user_ns: User namespace for the super_block
	461	* @data: argument to each of them
	462	*/
	463	struct super_block sget_userns(struct file_system_type type,
	464	int (test)(struct super_block ,void *),
	465	int (set)(struct super_block ,void *),
	466	int flags, struct user_namespace *user_ns,
	467	void *data)
	468	{
	469	struct super_block *s = NULL;
	470	struct super_block *old;
	471	int err;
	472
	473	if (!(flags & MS_KERNMOUNT) &&
	474	!(type->fs_flags & FS_USERNS_MOUNT) &&
	475	!capable(CAP_SYS_ADMIN))
	476	return ERR_PTR(-EPERM);
	477	retry:
	478	spin_lock(&sb_lock);
	479	if (test) {
	480	hlist_for_each_entry(old, &type->fs_supers, s_instances) {
	481	if (!test(old, data))
	482	continue;
	483	if (user_ns != old->s_user_ns) {
	484	spin_unlock(&sb_lock);
	485	if (s) {
	486	up_write(&s->s_umount);
	487	destroy_super(s);
	488	}
	489	return ERR_PTR(-EBUSY);
	490	}
	491	if (!grab_super(old))
	492	goto retry;
	493	if (s) {
	494	up_write(&s->s_umount);
	495	destroy_super(s);
	496	s = NULL;
	497	}
	498	return old;
	499	}
	500	}
	501	if (!s) {
	502	spin_unlock(&sb_lock);
	503	s = alloc_super(type, flags, user_ns);
	504	if (!s)
	505	return ERR_PTR(-ENOMEM);
	506	goto retry;
	507	}
	508
	509	err = set(s, data);
	510	if (err) {
	511	spin_unlock(&sb_lock);
	512	up_write(&s->s_umount);
	513	destroy_super(s);
	514	return ERR_PTR(err);
	515	}
	516	s->s_type = type;
	517	strlcpy(s->s_id, type->name, sizeof(s->s_id));
	518	list_add_tail(&s->s_list, &super_blocks);
	519	hlist_add_head(&s->s_instances, &type->fs_supers);
	520	spin_unlock(&sb_lock);
	521	get_filesystem(type);
	522	register_shrinker(&s->s_shrink);
	523	return s;
	524	}
	525
	526	EXPORT_SYMBOL(sget_userns);
	527
	528	/**
	529	* sget - find or create a superblock
	530	* @type: filesystem type superblock should belong to
	531	* @test: comparison callback
	532	* @set: setup callback
	533	* @flags: mount flags
	534	* @data: argument to each of them
	535	*/
	536	struct super_block sget(struct file_system_type type,
	537	int (test)(struct super_block ,void *),
	538	int (set)(struct super_block ,void *),
	539	int flags,
	540	void *data)
	541	{
	542	struct user_namespace *user_ns = current_user_ns();
	543
	544	/* Ensure the requestor has permissions over the target filesystem */
	545	if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
	546	return ERR_PTR(-EPERM);
	547
	548	return sget_userns(type, test, set, flags, user_ns, data);
	549	}
	550
	551	EXPORT_SYMBOL(sget);
	552
	553	void drop_super(struct super_block *sb)
	554	{
	555	up_read(&sb->s_umount);
	556	put_super(sb);
	557	}
	558
	559	EXPORT_SYMBOL(drop_super);
	560
	561	/**
	562	* iterate_supers - call function for all active superblocks
	563	* @f: function to call
	564	* @arg: argument to pass to it
	565	*
	566	* Scans the superblock list and calls given function, passing it
	567	* locked superblock and given argument.
	568	*/
	569	void iterate_supers(void (f)(struct super_block , void ), void arg)
	570	{
	571	struct super_block sb, p = NULL;
	572
	573	spin_lock(&sb_lock);
	574	list_for_each_entry(sb, &super_blocks, s_list) {
	575	if (hlist_unhashed(&sb->s_instances))
	576	continue;
	577	sb->s_count++;
	578	spin_unlock(&sb_lock);
	579
	580	down_read(&sb->s_umount);
	581	if (sb->s_root && (sb->s_flags & MS_BORN))
	582	f(sb, arg);
	583	up_read(&sb->s_umount);
	584
	585	spin_lock(&sb_lock);
	586	if (p)
	587	__put_super(p);
	588	p = sb;
	589	}
	590	if (p)
	591	__put_super(p);
	592	spin_unlock(&sb_lock);
	593	}
	594
	595	/**
	596	* iterate_supers_type - call function for superblocks of given type
	597	* @type: fs type
	598	* @f: function to call
	599	* @arg: argument to pass to it
	600	*
	601	* Scans the superblock list and calls given function, passing it
	602	* locked superblock and given argument.
	603	*/
	604	void iterate_supers_type(struct file_system_type *type,
	605	void (f)(struct super_block , void ), void arg)
	606	{
	607	struct super_block sb, p = NULL;
	608
	609	spin_lock(&sb_lock);
	610	hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
	611	sb->s_count++;
	612	spin_unlock(&sb_lock);
	613
	614	down_read(&sb->s_umount);
	615	if (sb->s_root && (sb->s_flags & MS_BORN))
	616	f(sb, arg);
	617	up_read(&sb->s_umount);
	618
	619	spin_lock(&sb_lock);
	620	if (p)
	621	__put_super(p);
	622	p = sb;
	623	}
	624	if (p)
	625	__put_super(p);
	626	spin_unlock(&sb_lock);
	627	}
	628
	629	EXPORT_SYMBOL(iterate_supers_type);
	630
	631	/**
	632	* get_super - get the superblock of a device
	633	* @bdev: device to get the superblock for
	634	*
	635	* Scans the superblock list and finds the superblock of the file system
	636	* mounted on the device given. %NULL is returned if no match is found.
	637	*/
	638
	639	struct super_block get_super(struct block_device bdev)
	640	{
	641	struct super_block *sb;
	642
	643	if (!bdev)
	644	return NULL;
	645
	646	spin_lock(&sb_lock);
	647	rescan:
	648	list_for_each_entry(sb, &super_blocks, s_list) {
	649	if (hlist_unhashed(&sb->s_instances))
	650	continue;
	651	if (sb->s_bdev == bdev) {
	652	sb->s_count++;
	653	spin_unlock(&sb_lock);
	654	down_read(&sb->s_umount);
	655	/* still alive? */
	656	if (sb->s_root && (sb->s_flags & MS_BORN))
	657	return sb;
	658	up_read(&sb->s_umount);
	659	/* nope, got unmounted */
	660	spin_lock(&sb_lock);
	661	__put_super(sb);
	662	goto rescan;
	663	}
	664	}
	665	spin_unlock(&sb_lock);
	666	return NULL;
	667	}
	668
	669	EXPORT_SYMBOL(get_super);
	670
	671	/**
	672	* get_super_thawed - get thawed superblock of a device
	673	* @bdev: device to get the superblock for
	674	*
	675	* Scans the superblock list and finds the superblock of the file system
	676	* mounted on the device. The superblock is returned once it is thawed
	677	* (or immediately if it was not frozen). %NULL is returned if no match
	678	* is found.
	679	*/
	680	struct super_block get_super_thawed(struct block_device bdev)
	681	{
	682	while (1) {
	683	struct super_block *s = get_super(bdev);
	684	if (!s \|\| s->s_writers.frozen == SB_UNFROZEN)
	685	return s;
	686	up_read(&s->s_umount);
	687	wait_event(s->s_writers.wait_unfrozen,
	688	s->s_writers.frozen == SB_UNFROZEN);
	689	put_super(s);
	690	}
	691	}
	692	EXPORT_SYMBOL(get_super_thawed);
	693
	694	/**
	695	* get_active_super - get an active reference to the superblock of a device
	696	* @bdev: device to get the superblock for
	697	*
	698	* Scans the superblock list and finds the superblock of the file system
	699	* mounted on the device given. Returns the superblock with an active
	700	* reference or %NULL if none was found.
	701	*/
	702	struct super_block get_active_super(struct block_device bdev)
	703	{
	704	struct super_block *sb;
	705
	706	if (!bdev)
	707	return NULL;
	708
	709	restart:
	710	spin_lock(&sb_lock);
	711	list_for_each_entry(sb, &super_blocks, s_list) {
	712	if (hlist_unhashed(&sb->s_instances))
	713	continue;
	714	if (sb->s_bdev == bdev) {
	715	if (!grab_super(sb))
	716	goto restart;
	717	up_write(&sb->s_umount);
	718	return sb;
	719	}
	720	}
	721	spin_unlock(&sb_lock);
	722	return NULL;
	723	}
	724
	725	struct super_block *user_get_super(dev_t dev)
	726	{
	727	struct super_block *sb;
	728
	729	spin_lock(&sb_lock);
	730	rescan:
	731	list_for_each_entry(sb, &super_blocks, s_list) {
	732	if (hlist_unhashed(&sb->s_instances))
	733	continue;
	734	if (sb->s_dev == dev) {
	735	sb->s_count++;
	736	spin_unlock(&sb_lock);
	737	down_read(&sb->s_umount);
	738	/* still alive? */
	739	if (sb->s_root && (sb->s_flags & MS_BORN))
	740	return sb;
	741	up_read(&sb->s_umount);
	742	/* nope, got unmounted */
	743	spin_lock(&sb_lock);
	744	__put_super(sb);
	745	goto rescan;
	746	}
	747	}
	748	spin_unlock(&sb_lock);
	749	return NULL;
	750	}
	751
	752	/**
	753	* do_remount_sb - asks filesystem to change mount options.
	754	* @sb: superblock in question
	755	* @flags: numeric part of options
	756	* @data: the rest of options
	757	* @force: whether or not to force the change
	758	*
	759	* Alters the mount options of a mounted file system.
	760	*/
	761	int do_remount_sb(struct super_block sb, int flags, void data, int force)
	762	{
	763	int retval;
	764	int remount_ro;
	765
	766	if (sb->s_writers.frozen != SB_UNFROZEN)
	767	return -EBUSY;
	768
	769	#ifdef CONFIG_BLOCK
	770	if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
	771	return -EACCES;
	772	#endif
	773
	774	remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
	775
	776	if (remount_ro) {
	777	if (!hlist_empty(&sb->s_pins)) {
	778	up_write(&sb->s_umount);
	779	group_pin_kill(&sb->s_pins);
	780	down_write(&sb->s_umount);
	781	if (!sb->s_root)
	782	return 0;
	783	if (sb->s_writers.frozen != SB_UNFROZEN)
	784	return -EBUSY;
	785	remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
	786	}
	787	}
	788	shrink_dcache_sb(sb);
	789
	790	/* If we are remounting RDONLY and current sb is read/write,
	791	make sure there are no rw files opened */
	792	if (remount_ro) {
	793	if (force) {
	794	sb->s_readonly_remount = 1;
	795	smp_wmb();
	796	} else {
	797	retval = sb_prepare_remount_readonly(sb);
	798	if (retval)
	799	return retval;
	800	}
	801	}
	802
	803	if (sb->s_op->remount_fs) {
	804	retval = sb->s_op->remount_fs(sb, &flags, data);
	805	if (retval) {
	806	if (!force)
	807	goto cancel_readonly;
	808	/* If forced remount, go ahead despite any errors */
	809	WARN(1, "forced remount of a %s fs returned %i\n",
	810	sb->s_type->name, retval);
	811	}
	812	}
	813	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) \| (flags & MS_RMT_MASK);
	814	/* Needs to be ordered wrt mnt_is_readonly() */
	815	smp_wmb();
	816	sb->s_readonly_remount = 0;
	817
	818	/*
	819	* Some filesystems modify their metadata via some other path than the
	820	* bdev buffer cache (eg. use a private mapping, or directories in
	821	* pagecache, etc). Also file data modifications go via their own
	822	* mappings. So If we try to mount readonly then copy the filesystem
	823	* from bdev, we could get stale data, so invalidate it to give a best
	824	* effort at coherency.
	825	*/
	826	if (remount_ro && sb->s_bdev)
	827	invalidate_bdev(sb->s_bdev);
	828	return 0;
	829
	830	cancel_readonly:
	831	sb->s_readonly_remount = 0;
	832	return retval;
	833	}
	834
	835	static void do_emergency_remount(struct work_struct *work)
	836	{
	837	struct super_block sb, p = NULL;
	838
	839	spin_lock(&sb_lock);
	840	list_for_each_entry(sb, &super_blocks, s_list) {
	841	if (hlist_unhashed(&sb->s_instances))
	842	continue;
	843	sb->s_count++;
	844	spin_unlock(&sb_lock);
	845	down_write(&sb->s_umount);
	846	if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&
	847	!(sb->s_flags & MS_RDONLY)) {
	848	/*
	849	* What lock protects sb->s_flags??
	850	*/
	851	do_remount_sb(sb, MS_RDONLY, NULL, 1);
	852	}
	853	up_write(&sb->s_umount);
	854	spin_lock(&sb_lock);
	855	if (p)
	856	__put_super(p);
	857	p = sb;
	858	}
	859	if (p)
	860	__put_super(p);
	861	spin_unlock(&sb_lock);
	862	kfree(work);
	863	printk("Emergency Remount complete\n");
	864	}
	865
	866	void emergency_remount(void)
	867	{
	868	struct work_struct *work;
	869
	870	work = kmalloc(sizeof(*work), GFP_ATOMIC);
	871	if (work) {
	872	INIT_WORK(work, do_emergency_remount);
	873	schedule_work(work);
	874	}
	875	}
	876
	877	/*
	878	* Unnamed block devices are dummy devices used by virtual
	879	* filesystems which don't use real block-devices. -- jrs
	880	*/
	881
	882	static DEFINE_IDA(unnamed_dev_ida);
	883	static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
	884	/* Many userspace utilities consider an FSID of 0 invalid.
	885	* Always return at least 1 from get_anon_bdev.
	886	*/
	887	static int unnamed_dev_start = 1;
	888
	889	int get_anon_bdev(dev_t *p)
	890	{
	891	int dev;
	892	int error;
	893
	894	retry:
	895	if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
	896	return -ENOMEM;
	897	spin_lock(&unnamed_dev_lock);
	898	error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
	899	if (!error)
	900	unnamed_dev_start = dev + 1;
	901	spin_unlock(&unnamed_dev_lock);
	902	if (error == -EAGAIN)
	903	/* We raced and lost with another CPU. */
	904	goto retry;
	905	else if (error)
	906	return -EAGAIN;
	907
	908	if (dev >= (1 << MINORBITS)) {
	909	spin_lock(&unnamed_dev_lock);
	910	ida_remove(&unnamed_dev_ida, dev);
	911	if (unnamed_dev_start > dev)
	912	unnamed_dev_start = dev;
	913	spin_unlock(&unnamed_dev_lock);
	914	return -EMFILE;
	915	}
	916	*p = MKDEV(0, dev & MINORMASK);
	917	return 0;
	918	}
	919	EXPORT_SYMBOL(get_anon_bdev);
	920
	921	void free_anon_bdev(dev_t dev)
	922	{
	923	int slot = MINOR(dev);
	924	spin_lock(&unnamed_dev_lock);
	925	ida_remove(&unnamed_dev_ida, slot);
	926	if (slot < unnamed_dev_start)
	927	unnamed_dev_start = slot;
	928	spin_unlock(&unnamed_dev_lock);
	929	}
	930	EXPORT_SYMBOL(free_anon_bdev);
	931
	932	int set_anon_super(struct super_block s, void data)
	933	{
	934	return get_anon_bdev(&s->s_dev);
	935	}
	936
	937	EXPORT_SYMBOL(set_anon_super);
	938
	939	void kill_anon_super(struct super_block *sb)
	940	{
	941	dev_t dev = sb->s_dev;
	942	generic_shutdown_super(sb);
	943	free_anon_bdev(dev);
	944	}
	945
	946	EXPORT_SYMBOL(kill_anon_super);
	947
	948	void kill_litter_super(struct super_block *sb)
	949	{
	950	if (sb->s_root)
	951	d_genocide(sb->s_root);
	952	kill_anon_super(sb);
	953	}
	954
	955	EXPORT_SYMBOL(kill_litter_super);
	956
	957	static int ns_test_super(struct super_block sb, void data)
	958	{
	959	return sb->s_fs_info == data;
	960	}
	961
	962	static int ns_set_super(struct super_block sb, void data)
	963	{
	964	sb->s_fs_info = data;
	965	return set_anon_super(sb, NULL);
	966	}
	967
	968	struct dentry mount_ns(struct file_system_type fs_type,
	969	int flags, void data, void ns, struct user_namespace *user_ns,
	970	int (fill_super)(struct super_block , void *, int))
	971	{
	972	struct super_block *sb;
	973
	974	/* Don't allow mounting unless the caller has CAP_SYS_ADMIN
	975	* over the namespace.
	976	*/
	977	if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
	978	return ERR_PTR(-EPERM);
	979
	980	sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
	981	user_ns, ns);
	982	if (IS_ERR(sb))
	983	return ERR_CAST(sb);
	984
	985	if (!sb->s_root) {
	986	int err;
	987	err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
	988	if (err) {
	989	deactivate_locked_super(sb);
	990	return ERR_PTR(err);
	991	}
	992
	993	sb->s_flags \|= MS_ACTIVE;
	994	}
	995
	996	return dget(sb->s_root);
	997	}
	998
	999	EXPORT_SYMBOL(mount_ns);
	1000
	1001	#ifdef CONFIG_BLOCK
	1002	static int set_bdev_super(struct super_block s, void data)
	1003	{
	1004	s->s_bdev = data;
	1005	s->s_dev = s->s_bdev->bd_dev;
	1006
	1007	/*
	1008	* We set the bdi here to the queue backing, file systems can
	1009	* overwrite this in ->fill_super()
	1010	*/
	1011	s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
	1012	return 0;
	1013	}
	1014
	1015	static int test_bdev_super(struct super_block s, void data)
	1016	{
	1017	return (void *)s->s_bdev == data;
	1018	}
	1019
	1020	struct dentry mount_bdev(struct file_system_type fs_type,
	1021	int flags, const char dev_name, void data,
	1022	int (fill_super)(struct super_block , void *, int))
	1023	{
	1024	struct block_device *bdev;
	1025	struct super_block *s;
	1026	fmode_t mode = FMODE_READ \| FMODE_EXCL;
	1027	int error = 0;
	1028
	1029	if (!(flags & MS_RDONLY))
	1030	mode \|= FMODE_WRITE;
	1031
	1032	bdev = blkdev_get_by_path(dev_name, mode, fs_type);
	1033	if (IS_ERR(bdev))
	1034	return ERR_CAST(bdev);
	1035
	1036	/*
	1037	* once the super is inserted into the list by sget, s_umount
	1038	* will protect the lockfs code from trying to start a snapshot
	1039	* while we are mounting
	1040	*/
	1041	mutex_lock(&bdev->bd_fsfreeze_mutex);
	1042	if (bdev->bd_fsfreeze_count > 0) {
	1043	mutex_unlock(&bdev->bd_fsfreeze_mutex);
	1044	error = -EBUSY;
	1045	goto error_bdev;
	1046	}
	1047	s = sget(fs_type, test_bdev_super, set_bdev_super, flags \| MS_NOSEC,
	1048	bdev);
	1049	mutex_unlock(&bdev->bd_fsfreeze_mutex);
	1050	if (IS_ERR(s))
	1051	goto error_s;
	1052
	1053	if (s->s_root) {
	1054	if ((flags ^ s->s_flags) & MS_RDONLY) {
	1055	deactivate_locked_super(s);
	1056	error = -EBUSY;
	1057	goto error_bdev;
	1058	}
	1059
	1060	/*
	1061	* s_umount nests inside bd_mutex during
	1062	* __invalidate_device(). blkdev_put() acquires
	1063	* bd_mutex and can't be called under s_umount. Drop
	1064	* s_umount temporarily. This is safe as we're
	1065	* holding an active reference.
	1066	*/
	1067	up_write(&s->s_umount);
	1068	blkdev_put(bdev, mode);
	1069	down_write(&s->s_umount);
	1070	} else {
	1071	s->s_mode = mode;
	1072	snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
	1073	sb_set_blocksize(s, block_size(bdev));
	1074	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
	1075	if (error) {
	1076	deactivate_locked_super(s);
	1077	goto error;
	1078	}
	1079
	1080	s->s_flags \|= MS_ACTIVE;
	1081	bdev->bd_super = s;
	1082	}
	1083
	1084	return dget(s->s_root);
	1085
	1086	error_s:
	1087	error = PTR_ERR(s);
	1088	error_bdev:
	1089	blkdev_put(bdev, mode);
	1090	error:
	1091	return ERR_PTR(error);
	1092	}
	1093	EXPORT_SYMBOL(mount_bdev);
	1094
	1095	void kill_block_super(struct super_block *sb)
	1096	{
	1097	struct block_device *bdev = sb->s_bdev;
	1098	fmode_t mode = sb->s_mode;
	1099
	1100	bdev->bd_super = NULL;
	1101	generic_shutdown_super(sb);
	1102	sync_blockdev(bdev);
	1103	WARN_ON_ONCE(!(mode & FMODE_EXCL));
	1104	blkdev_put(bdev, mode \| FMODE_EXCL);
	1105	}
	1106
	1107	EXPORT_SYMBOL(kill_block_super);
	1108	#endif
	1109
	1110	struct dentry mount_nodev(struct file_system_type fs_type,
	1111	int flags, void *data,
	1112	int (fill_super)(struct super_block , void *, int))
	1113	{
	1114	int error;
	1115	struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
	1116
	1117	if (IS_ERR(s))
	1118	return ERR_CAST(s);
	1119
	1120	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
	1121	if (error) {
	1122	deactivate_locked_super(s);
	1123	return ERR_PTR(error);
	1124	}
	1125	s->s_flags \|= MS_ACTIVE;
	1126	return dget(s->s_root);
	1127	}
	1128	EXPORT_SYMBOL(mount_nodev);
	1129
	1130	static int compare_single(struct super_block s, void p)
	1131	{
	1132	return 1;
	1133	}
	1134
	1135	struct dentry mount_single(struct file_system_type fs_type,
	1136	int flags, void *data,
	1137	int (fill_super)(struct super_block , void *, int))
	1138	{
	1139	struct super_block *s;
	1140	int error;
	1141
	1142	s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
	1143	if (IS_ERR(s))
	1144	return ERR_CAST(s);
	1145	if (!s->s_root) {
	1146	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
	1147	if (error) {
	1148	deactivate_locked_super(s);
	1149	return ERR_PTR(error);
	1150	}
	1151	s->s_flags \|= MS_ACTIVE;
	1152	} else {
	1153	do_remount_sb(s, flags, data, 0);
	1154	}
	1155	return dget(s->s_root);
	1156	}
	1157	EXPORT_SYMBOL(mount_single);
	1158
	1159	struct dentry *
	1160	mount_fs(struct file_system_type type, int flags, const char name, void *data)
	1161	{
	1162	struct dentry *root;
	1163	struct super_block *sb;
	1164	char *secdata = NULL;
	1165	int error = -ENOMEM;
	1166
	1167	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
	1168	secdata = alloc_secdata();
	1169	if (!secdata)
	1170	goto out;
	1171
	1172	error = security_sb_copy_data(data, secdata);
	1173	if (error)
	1174	goto out_free_secdata;
	1175	}
	1176
	1177	root = type->mount(type, flags, name, data);
	1178	if (IS_ERR(root)) {
	1179	error = PTR_ERR(root);
	1180	goto out_free_secdata;
	1181	}
	1182	sb = root->d_sb;
	1183	BUG_ON(!sb);
	1184	WARN_ON(!sb->s_bdi);
	1185	sb->s_flags \|= MS_BORN;
	1186
	1187	error = security_sb_kern_mount(sb, flags, secdata);
	1188	if (error)
	1189	goto out_sb;
	1190
	1191	/*
	1192	* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
	1193	* but s_maxbytes was an unsigned long long for many releases. Throw
	1194	* this warning for a little while to try and catch filesystems that
	1195	* violate this rule.
	1196	*/
	1197	WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
	1198	"negative value (%lld)\n", type->name, sb->s_maxbytes);
	1199
	1200	up_write(&sb->s_umount);
	1201	free_secdata(secdata);
	1202	return root;
	1203	out_sb:
	1204	dput(root);
	1205	deactivate_locked_super(sb);
	1206	out_free_secdata:
	1207	free_secdata(secdata);
	1208	out:
	1209	return ERR_PTR(error);
	1210	}
	1211
	1212	/*
	1213	* This is an internal function, please use sb_end_{write,pagefault,intwrite}
	1214	* instead.
	1215	*/
	1216	void __sb_end_write(struct super_block *sb, int level)
	1217	{
	1218	percpu_up_read(sb->s_writers.rw_sem + level-1);
	1219	}
	1220	EXPORT_SYMBOL(__sb_end_write);
	1221
	1222	/*
	1223	* This is an internal function, please use sb_start_{write,pagefault,intwrite}
	1224	* instead.
	1225	*/
	1226	int __sb_start_write(struct super_block *sb, int level, bool wait)
	1227	{
	1228	bool force_trylock = false;
	1229	int ret = 1;
	1230
	1231	#ifdef CONFIG_LOCKDEP
	1232	/*
	1233	* We want lockdep to tell us about possible deadlocks with freezing
	1234	* but it's it bit tricky to properly instrument it. Getting a freeze
	1235	* protection works as getting a read lock but there are subtle
	1236	* problems. XFS for example gets freeze protection on internal level
	1237	* twice in some cases, which is OK only because we already hold a
	1238	* freeze protection also on higher level. Due to these cases we have
	1239	* to use wait == F (trylock mode) which must not fail.
	1240	*/
	1241	if (wait) {
	1242	int i;
	1243
	1244	for (i = 0; i < level - 1; i++)
	1245	if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
	1246	force_trylock = true;
	1247	break;
	1248	}
	1249	}
	1250	#endif
	1251	if (wait && !force_trylock)
	1252	percpu_down_read(sb->s_writers.rw_sem + level-1);
	1253	else
	1254	ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
	1255
	1256	WARN_ON(force_trylock && !ret);
	1257	return ret;
	1258	}
	1259	EXPORT_SYMBOL(__sb_start_write);
	1260
	1261	/**
	1262	* sb_wait_write - wait until all writers to given file system finish
	1263	* @sb: the super for which we wait
	1264	* @level: type of writers we wait for (normal vs page fault)
	1265	*
	1266	* This function waits until there are no writers of given type to given file
	1267	* system.
	1268	*/
	1269	static void sb_wait_write(struct super_block *sb, int level)
	1270	{
	1271	percpu_down_write(sb->s_writers.rw_sem + level-1);
	1272	/*
	1273	* We are going to return to userspace and forget about this lock, the
	1274	* ownership goes to the caller of thaw_super() which does unlock.
	1275	*
	1276	* FIXME: we should do this before return from freeze_super() after we
	1277	* called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
	1278	* should re-acquire these locks before s_op->unfreeze_fs(sb). However
	1279	* this leads to lockdep false-positives, so currently we do the early
	1280	* release right after acquire.
	1281	*/
	1282	percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
	1283	}
	1284
	1285	static void sb_freeze_unlock(struct super_block *sb)
	1286	{
	1287	int level;
	1288
	1289	for (level = 0; level < SB_FREEZE_LEVELS; ++level)
	1290	percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
	1291
	1292	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
	1293	percpu_up_write(sb->s_writers.rw_sem + level);
	1294	}
	1295
	1296	/**
	1297	* freeze_super - lock the filesystem and force it into a consistent state
	1298	* @sb: the super to lock
	1299	*
	1300	* Syncs the super to make sure the filesystem is consistent and calls the fs's
	1301	* freeze_fs. Subsequent calls to this without first thawing the fs will return
	1302	* -EBUSY.
	1303	*
	1304	* During this function, sb->s_writers.frozen goes through these values:
	1305	*
	1306	* SB_UNFROZEN: File system is normal, all writes progress as usual.
	1307	*
	1308	* SB_FREEZE_WRITE: The file system is in the process of being frozen. New
	1309	* writes should be blocked, though page faults are still allowed. We wait for
	1310	* all writes to complete and then proceed to the next stage.
	1311	*
	1312	* SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
	1313	* but internal fs threads can still modify the filesystem (although they
	1314	* should not dirty new pages or inodes), writeback can run etc. After waiting
	1315	* for all running page faults we sync the filesystem which will clean all
	1316	* dirty pages and inodes (no new dirty pages or inodes can be created when
	1317	* sync is running).
	1318	*
	1319	* SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
	1320	* modification are blocked (e.g. XFS preallocation truncation on inode
	1321	* reclaim). This is usually implemented by blocking new transactions for
	1322	* filesystems that have them and need this additional guard. After all
	1323	* internal writers are finished we call ->freeze_fs() to finish filesystem
	1324	* freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
	1325	* mostly auxiliary for filesystems to verify they do not modify frozen fs.
	1326	*
	1327	* sb->s_writers.frozen is protected by sb->s_umount.
	1328	*/
	1329	int freeze_super(struct super_block *sb)
	1330	{
	1331	int ret;
	1332
	1333	atomic_inc(&sb->s_active);
	1334	down_write(&sb->s_umount);
	1335	if (sb->s_writers.frozen != SB_UNFROZEN) {
	1336	deactivate_locked_super(sb);
	1337	return -EBUSY;
	1338	}
	1339
	1340	if (!(sb->s_flags & MS_BORN)) {
	1341	up_write(&sb->s_umount);
	1342	return 0; /* sic - it's "nothing to do" */
	1343	}
	1344
	1345	if (sb->s_flags & MS_RDONLY) {
	1346	/* Nothing to do really... */
	1347	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
	1348	up_write(&sb->s_umount);
	1349	return 0;
	1350	}
	1351
	1352	sb->s_writers.frozen = SB_FREEZE_WRITE;
	1353	/* Release s_umount to preserve sb_start_write -> s_umount ordering */
	1354	up_write(&sb->s_umount);
	1355	sb_wait_write(sb, SB_FREEZE_WRITE);
	1356	down_write(&sb->s_umount);
	1357
	1358	/* Now we go and block page faults... */
	1359	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
	1360	sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
	1361
	1362	/* All writers are done so after syncing there won't be dirty data */
	1363	sync_filesystem(sb);
	1364
	1365	/* Now wait for internal filesystem counter */
	1366	sb->s_writers.frozen = SB_FREEZE_FS;
	1367	sb_wait_write(sb, SB_FREEZE_FS);
	1368
	1369	if (sb->s_op->freeze_fs) {
	1370	ret = sb->s_op->freeze_fs(sb);
	1371	if (ret) {
	1372	printk(KERN_ERR
	1373	"VFS:Filesystem freeze failed\n");
	1374	sb->s_writers.frozen = SB_UNFROZEN;
	1375	sb_freeze_unlock(sb);
	1376	wake_up(&sb->s_writers.wait_unfrozen);
	1377	deactivate_locked_super(sb);
	1378	return ret;
	1379	}
	1380	}
	1381	/*
	1382	* This is just for debugging purposes so that fs can warn if it
	1383	* sees write activity when frozen is set to SB_FREEZE_COMPLETE.
	1384	*/
	1385	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
	1386	up_write(&sb->s_umount);
	1387	return 0;
	1388	}
	1389	EXPORT_SYMBOL(freeze_super);
	1390
	1391	/**
	1392	* thaw_super -- unlock filesystem
	1393	* @sb: the super to thaw
	1394	*
	1395	* Unlocks the filesystem and marks it writeable again after freeze_super().
	1396	*/
	1397	int thaw_super(struct super_block *sb)
	1398	{
	1399	int error;
	1400
	1401	down_write(&sb->s_umount);
	1402	if (sb->s_writers.frozen == SB_UNFROZEN) {
	1403	up_write(&sb->s_umount);
	1404	return -EINVAL;
	1405	}
	1406
	1407	if (sb->s_flags & MS_RDONLY) {
	1408	sb->s_writers.frozen = SB_UNFROZEN;
	1409	goto out;
	1410	}
	1411
	1412	if (sb->s_op->unfreeze_fs) {
	1413	error = sb->s_op->unfreeze_fs(sb);
	1414	if (error) {
	1415	printk(KERN_ERR
	1416	"VFS:Filesystem thaw failed\n");
	1417	up_write(&sb->s_umount);
	1418	return error;
	1419	}
	1420	}
	1421
	1422	sb->s_writers.frozen = SB_UNFROZEN;
	1423	sb_freeze_unlock(sb);
	1424	out:
	1425	wake_up(&sb->s_writers.wait_unfrozen);
	1426	deactivate_locked_super(sb);
	1427	return 0;
	1428	}
	1429	EXPORT_SYMBOL(thaw_super);