Git Repo - linux.git/blame - fs/ocfs2/file.c

Commit	Line	Data
328970de	1	// SPDX-License-Identifier: GPL-2.0-or-later
fa60ce2c	2	/*
ccd979bd MF	3	* file.c
	4	*
	5	* File open, close, extend, truncate
	6	*
	7	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
ccd979bd MF	8	*/
ccd979bd MF	9
16f7e0fe	10	#include <linux/capability.h>
ccd979bd MF	11	#include <linux/fs.h>
	12	#include <linux/types.h>
	13	#include <linux/slab.h>
	14	#include <linux/highmem.h>
	15	#include <linux/pagemap.h>
	16	#include <linux/uio.h>
e2057c5a	17	#include <linux/sched.h>
d6b29d7c	18	#include <linux/splice.h>
7f1a37e3	19	#include <linux/mount.h>
9517bac6	20	#include <linux/writeback.h>
385820a3	21	#include <linux/falloc.h>
a90714c1	22	#include <linux/quotaops.h>
04eda1a1	23	#include <linux/blkdev.h>
66114cad	24	#include <linux/backing-dev.h>
ccd979bd	25
ccd979bd MF	26	#include <cluster/masklog.h>
	27
	28	#include "ocfs2.h"
	29
	30	#include "alloc.h"
	31	#include "aops.h"
	32	#include "dir.h"
	33	#include "dlmglue.h"
	34	#include "extent_map.h"
	35	#include "file.h"
	36	#include "sysfile.h"
	37	#include "inode.h"
ca4d147e	38	#include "ioctl.h"
ccd979bd	39	#include "journal.h"
53fc622b	40	#include "locks.h"
ccd979bd MF	41	#include "mmap.h"
	42	#include "suballoc.h"
	43	#include "super.h"
cf1d6c76	44	#include "xattr.h"
23fc2702	45	#include "acl.h"
a90714c1	46	#include "quota.h"
293b2f70	47	#include "refcounttree.h"
468eedde	48	#include "ocfs2_trace.h"
ccd979bd MF	49
	50	#include "buffer_head_io.h"
	51
53fc622b MF	52	static int ocfs2_init_file_private(struct inode inode, struct file file)
	53	{
	54	struct ocfs2_file_private *fp;
	55
	56	fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL);
	57	if (!fp)
	58	return -ENOMEM;
	59
	60	fp->fp_file = file;
	61	mutex_init(&fp->fp_mutex);
	62	ocfs2_file_lock_res_init(&fp->fp_flock, fp);
	63	file->private_data = fp;
	64
	65	return 0;
	66	}
	67
	68	static void ocfs2_free_file_private(struct inode inode, struct file file)
	69	{
	70	struct ocfs2_file_private *fp = file->private_data;
	71	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	72
	73	if (fp) {
	74	ocfs2_simple_drop_lockres(osb, &fp->fp_flock);
	75	ocfs2_lock_res_free(&fp->fp_flock);
	76	kfree(fp);
	77	file->private_data = NULL;
	78	}
	79	}
	80
ccd979bd MF	81	static int ocfs2_file_open(struct inode inode, struct file file)
	82	{
	83	int status;
	84	int mode = file->f_flags;
	85	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	86
468eedde	87	trace_ocfs2_file_open(inode, file, file->f_path.dentry,
d324cd4c	88	(unsigned long long)oi->ip_blkno,
468eedde TM	89	file->f_path.dentry->d_name.len,
468eedde TM	90	file->f_path.dentry->d_name.name, mode);
ccd979bd	91
9c89fe0a JK	92	if (file->f_mode & FMODE_WRITE) {
	93	status = dquot_initialize(inode);
	94	if (status)
	95	goto leave;
	96	}
907f4554	97
ccd979bd MF	98	spin_lock(&oi->ip_lock);
	99
	100	/* Check that the inode hasn't been wiped from disk by another
	101	* node. If it hasn't then we're safe as long as we hold the
	102	* spin lock until our increment of open count. */
d324cd4c	103	if (oi->ip_flags & OCFS2_INODE_DELETED) {
ccd979bd MF	104	spin_unlock(&oi->ip_lock);
	105
	106	status = -ENOENT;
	107	goto leave;
	108	}
	109
	110	if (mode & O_DIRECT)
	111	oi->ip_flags \|= OCFS2_INODE_OPEN_DIRECT;
	112
	113	oi->ip_open_count++;
	114	spin_unlock(&oi->ip_lock);
53fc622b MF	115
	116	status = ocfs2_init_file_private(inode, file);
	117	if (status) {
	118	/*
	119	* We want to set open count back if we're failing the
	120	* open.
	121	*/
	122	spin_lock(&oi->ip_lock);
	123	oi->ip_open_count--;
	124	spin_unlock(&oi->ip_lock);
	125	}
	126
c4c2416a GH	127	file->f_mode \|= FMODE_NOWAIT;
c4c2416a GH	128
ccd979bd	129	leave:
ccd979bd MF	130	return status;
	131	}
	132
	133	static int ocfs2_file_release(struct inode inode, struct file file)
	134	{
	135	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	136
ccd979bd MF	137	spin_lock(&oi->ip_lock);
	138	if (!--oi->ip_open_count)
	139	oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
468eedde TM	140
	141	trace_ocfs2_file_release(inode, file, file->f_path.dentry,
	142	oi->ip_blkno,
	143	file->f_path.dentry->d_name.len,
	144	file->f_path.dentry->d_name.name,
	145	oi->ip_open_count);
ccd979bd MF	146	spin_unlock(&oi->ip_lock);
ccd979bd MF	147
53fc622b MF	148	ocfs2_free_file_private(inode, file);
53fc622b MF	149
ccd979bd MF	150	return 0;
	151	}
	152
53fc622b MF	153	static int ocfs2_dir_open(struct inode inode, struct file file)
	154	{
	155	return ocfs2_init_file_private(inode, file);
	156	}
	157
	158	static int ocfs2_dir_release(struct inode inode, struct file file)
	159	{
	160	ocfs2_free_file_private(inode, file);
	161	return 0;
	162	}
	163
02c24a82 JB	164	static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
02c24a82 JB	165	int datasync)
ccd979bd MF	166	{
ccd979bd MF	167	int err = 0;
7ea80859	168	struct inode *inode = file->f_mapping->host;
ccd979bd	169	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2931cdcb DW	170	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	171	journal_t *journal = osb->journal->j_journal;
	172	int ret;
	173	tid_t commit_tid;
	174	bool needs_barrier = false;
ccd979bd	175
468eedde	176	trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
d324cd4c	177	oi->ip_blkno,
468eedde TM	178	file->f_path.dentry->d_name.len,
	179	file->f_path.dentry->d_name.name,
	180	(unsigned long long)datasync);
ccd979bd	181
a987c7ca YL	182	if (ocfs2_is_hard_readonly(osb) \|\| ocfs2_is_soft_readonly(osb))
	183	return -EROFS;
	184
3b49c9a1	185	err = file_write_and_wait_range(file, start, end);
02c24a82 JB	186	if (err)
	187	return err;
	188
2931cdcb DW	189	commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid;
	190	if (journal->j_flags & JBD2_BARRIER &&
	191	!jbd2_trans_will_send_data_barrier(journal, commit_tid))
	192	needs_barrier = true;
	193	err = jbd2_complete_transaction(journal, commit_tid);
	194	if (needs_barrier) {
c6bf3f0e	195	ret = blkdev_issue_flush(inode->i_sb->s_bdev);
2931cdcb DW	196	if (!err)
2931cdcb DW	197	err = ret;
04eda1a1	198	}
e04cc15f	199
c1e8d35e TM	200	if (err)
c1e8d35e TM	201	mlog_errno(err);
ccd979bd MF	202
	203	return (err < 0) ? -EIO : 0;
	204	}
	205
7f1a37e3 TY	206	int ocfs2_should_update_atime(struct inode *inode,
	207	struct vfsmount *vfsmnt)
	208	{
95582b00	209	struct timespec64 now;
7f1a37e3 TY	210	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	211
	212	if (ocfs2_is_hard_readonly(osb) \|\| ocfs2_is_soft_readonly(osb))
	213	return 0;
	214
	215	if ((inode->i_flags & S_NOATIME) \|\|
1751e8a6	216	((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)))
7f1a37e3 TY	217	return 0;
7f1a37e3 TY	218
6c2aad05 MF	219	/*
	220	* We can be called with no vfsmnt structure - NFSD will
	221	* sometimes do this.
	222	*
	223	* Note that our action here is different than touch_atime() -
	224	* if we can't tell whether this is a noatime mount, then we
	225	* don't know whether to trust the value of s_atime_quantum.
	226	*/
	227	if (vfsmnt == NULL)
	228	return 0;
	229
7f1a37e3 TY	230	if ((vfsmnt->mnt_flags & MNT_NOATIME) \|\|
	231	((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
	232	return 0;
	233
7e913c53	234	if (vfsmnt->mnt_flags & MNT_RELATIME) {
6861de97	235	struct timespec64 ctime = inode_get_ctime(inode);
fd6acbbc JL	236	struct timespec64 atime = inode_get_atime(inode);
fd6acbbc JL	237	struct timespec64 mtime = inode_get_mtime(inode);
6861de97	238
fd6acbbc JL	239	if ((timespec64_compare(&atime, &mtime) <= 0) \|\|
fd6acbbc JL	240	(timespec64_compare(&atime, &ctime) <= 0))
7e913c53 MF	241	return 1;
	242
	243	return 0;
	244	}
	245
078cd827	246	now = current_time(inode);
fd6acbbc	247	if ((now.tv_sec - inode_get_atime_sec(inode) <= osb->s_atime_quantum))
7f1a37e3 TY	248	return 0;
	249	else
	250	return 1;
	251	}
	252
	253	int ocfs2_update_inode_atime(struct inode *inode,
	254	struct buffer_head *bh)
	255	{
	256	int ret;
	257	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	258	handle_t *handle;
c11e9faf	259	struct ocfs2_dinode di = (struct ocfs2_dinode ) bh->b_data;
7f1a37e3	260
7f1a37e3	261	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
fa38e92c JK	262	if (IS_ERR(handle)) {
fa38e92c JK	263	ret = PTR_ERR(handle);
7f1a37e3 TY	264	mlog_errno(ret);
	265	goto out;
	266	}
	267
0cf2f763	268	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
13723d00	269	OCFS2_JOURNAL_ACCESS_WRITE);
c11e9faf MF	270	if (ret) {
	271	mlog_errno(ret);
	272	goto out_commit;
	273	}
	274
	275	/*
	276	* Don't use ocfs2_mark_inode_dirty() here as we don't always
137cebf9	277	* have i_rwsem to guard against concurrent changes to other
c11e9faf MF	278	* inode fields.
c11e9faf MF	279	*/
fd6acbbc JL	280	inode_set_atime_to_ts(inode, current_time(inode));
	281	di->i_atime = cpu_to_le64(inode_get_atime_sec(inode));
	282	di->i_atime_nsec = cpu_to_le32(inode_get_atime_nsec(inode));
6fdb702d	283	ocfs2_update_inode_fsync_trans(handle, inode, 0);
ec20cec7	284	ocfs2_journal_dirty(handle, bh);
7f1a37e3	285
c11e9faf	286	out_commit:
1119d3c0	287	ocfs2_commit_trans(osb, handle);
7f1a37e3	288	out:
7f1a37e3 TY	289	return ret;
	290	}
	291
026749a8	292	int ocfs2_set_inode_size(handle_t *handle,
6cb129f5 AB	293	struct inode *inode,
	294	struct buffer_head *fe_bh,
	295	u64 new_i_size)
ccd979bd MF	296	{
	297	int status;
	298
ccd979bd	299	i_size_write(inode, new_i_size);
8110b073	300	inode->i_blocks = ocfs2_inode_sector_count(inode);
fd6acbbc	301	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
ccd979bd MF	302
	303	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
	304	if (status < 0) {
	305	mlog_errno(status);
	306	goto bail;
	307	}
	308
	309	bail:
ccd979bd MF	310	return status;
	311	}
	312
9e33d69f JK	313	int ocfs2_simple_size_update(struct inode *inode,
	314	struct buffer_head *di_bh,
	315	u64 new_i_size)
ccd979bd MF	316	{
	317	int ret;
	318	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1fabe148	319	handle_t *handle = NULL;
ccd979bd	320
65eff9cc	321	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
fa38e92c JK	322	if (IS_ERR(handle)) {
fa38e92c JK	323	ret = PTR_ERR(handle);
ccd979bd MF	324	mlog_errno(ret);
	325	goto out;
	326	}
	327
	328	ret = ocfs2_set_inode_size(handle, inode, di_bh,
	329	new_i_size);
	330	if (ret < 0)
	331	mlog_errno(ret);
	332
6fdb702d	333	ocfs2_update_inode_fsync_trans(handle, inode, 0);
02dc1af4	334	ocfs2_commit_trans(osb, handle);
ccd979bd MF	335	out:
	336	return ret;
	337	}
	338
37f8a2bf TM	339	static int ocfs2_cow_file_pos(struct inode *inode,
	340	struct buffer_head *fe_bh,
	341	u64 offset)
	342	{
	343	int status;
	344	u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
	345	unsigned int num_clusters = 0;
	346	unsigned int ext_flags = 0;
	347
	348	/*
	349	* If the new offset is aligned to the range of the cluster, there is
	350	* no space for ocfs2_zero_range_for_truncate to fill, so no need to
	351	* CoW either.
	352	*/
	353	if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
	354	return 0;
	355
	356	status = ocfs2_get_clusters(inode, cpos, &phys,
	357	&num_clusters, &ext_flags);
	358	if (status) {
	359	mlog_errno(status);
	360	goto out;
	361	}
	362
	363	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
	364	goto out;
	365
c7dd3392	366	return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
37f8a2bf TM	367
	368	out:
	369	return status;
	370	}
	371
ccd979bd MF	372	static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
	373	struct inode *inode,
	374	struct buffer_head *fe_bh,
	375	u64 new_i_size)
	376	{
	377	int status;
1fabe148	378	handle_t *handle;
60b11392	379	struct ocfs2_dinode *di;
35edec1d	380	u64 cluster_bytes;
ccd979bd	381
37f8a2bf TM	382	/*
	383	* We need to CoW the cluster contains the offset if it is reflinked
	384	* since we will call ocfs2_zero_range_for_truncate later which will
	385	* write "0" from offset to the end of the cluster.
	386	*/
	387	status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
	388	if (status) {
	389	mlog_errno(status);
	390	return status;
	391	}
	392
ccd979bd MF	393	/* TODO: This needs to actually orphan the inode in this
	394	* transaction. */
	395
65eff9cc	396	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
ccd979bd MF	397	if (IS_ERR(handle)) {
	398	status = PTR_ERR(handle);
	399	mlog_errno(status);
	400	goto out;
	401	}
	402
0cf2f763	403	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
13723d00	404	OCFS2_JOURNAL_ACCESS_WRITE);
60b11392 MF	405	if (status < 0) {
	406	mlog_errno(status);
	407	goto out_commit;
	408	}
	409
	410	/*
	411	* Do this before setting i_size.
	412	*/
35edec1d MF	413	cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size);
	414	status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size,
	415	cluster_bytes);
60b11392 MF	416	if (status) {
	417	mlog_errno(status);
	418	goto out_commit;
	419	}
	420
	421	i_size_write(inode, new_i_size);
fd6acbbc	422	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
60b11392 MF	423
	424	di = (struct ocfs2_dinode *) fe_bh->b_data;
	425	di->i_size = cpu_to_le64(new_i_size);
fd6acbbc JL	426	di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime_sec(inode));
fd6acbbc JL	427	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
6fdb702d	428	ocfs2_update_inode_fsync_trans(handle, inode, 0);
60b11392	429
ec20cec7	430	ocfs2_journal_dirty(handle, fe_bh);
ccd979bd	431
60b11392	432	out_commit:
02dc1af4	433	ocfs2_commit_trans(osb, handle);
ccd979bd	434	out:
ccd979bd MF	435	return status;
	436	}
	437
026749a8	438	int ocfs2_truncate_file(struct inode *inode,
ccd979bd MF	439	struct buffer_head *di_bh,
	440	u64 new_i_size)
	441	{
	442	int status = 0;
	443	struct ocfs2_dinode *fe = NULL;
	444	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
ccd979bd	445
b657c95c JB	446	/* We trust di_bh because it comes from ocfs2_inode_lock(), which
b657c95c JB	447	* already validated it */
ccd979bd	448	fe = (struct ocfs2_dinode *) di_bh->b_data;
ccd979bd	449
468eedde TM	450	trace_ocfs2_truncate_file((unsigned long long)OCFS2_I(inode)->ip_blkno,
	451	(unsigned long long)le64_to_cpu(fe->i_size),
	452	(unsigned long long)new_i_size);
	453
ccd979bd	454	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
b0697053 MF	455	"Inode %llu, inode i_size = %lld != di "
	456	"i_size = %llu, i_flags = 0x%x\n",
	457	(unsigned long long)OCFS2_I(inode)->ip_blkno,
ccd979bd	458	i_size_read(inode),
b0697053 MF	459	(unsigned long long)le64_to_cpu(fe->i_size),
b0697053 MF	460	le32_to_cpu(fe->i_flags));
ccd979bd MF	461
ccd979bd MF	462	if (new_i_size > le64_to_cpu(fe->i_size)) {
468eedde TM	463	trace_ocfs2_truncate_file_error(
	464	(unsigned long long)le64_to_cpu(fe->i_size),
	465	(unsigned long long)new_i_size);
ccd979bd MF	466	status = -EINVAL;
	467	mlog_errno(status);
	468	goto bail;
	469	}
	470
2e89b2e4 MF	471	down_write(&OCFS2_I(inode)->ip_alloc_sem);
2e89b2e4 MF	472
4fe370af MF	473	ocfs2_resv_discard(&osb->osb_la_resmap,
	474	&OCFS2_I(inode)->ip_la_data_resv);
	475
c934a92d MF	476	/*
	477	* The inode lock forced other nodes to sync and drop their
	478	* pages, which (correctly) happens even if we have a truncate
	479	* without allocation change - ocfs2 cluster sizes can be much
	480	* greater than page size, so we have to truncate them
	481	* anyway.
	482	*/
2e89b2e4	483
1afc32b9	484	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
839b6386 JK	485	unmap_mapping_range(inode->i_mapping,
	486	new_i_size + PAGE_SIZE - 1, 0, 1);
	487	truncate_inode_pages(inode->i_mapping, new_i_size);
1afc32b9	488	status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
b1967d0e	489	i_size_read(inode), 1);
1afc32b9 MF	490	if (status)
	491	mlog_errno(status);
	492
c934a92d	493	goto bail_unlock_sem;
1afc32b9 MF	494	}
1afc32b9 MF	495
ccd979bd MF	496	/* alright, we're going to need to do a full blown alloc size
	497	* change. Orphan the inode so that recovery can complete the
	498	* truncate if necessary. This does the task of marking
	499	* i_size. */
	500	status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
	501	if (status < 0) {
	502	mlog_errno(status);
c934a92d	503	goto bail_unlock_sem;
ccd979bd MF	504	}
ccd979bd MF	505
839b6386 JK	506	unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
	507	truncate_inode_pages(inode->i_mapping, new_i_size);
	508
78f94673	509	status = ocfs2_commit_truncate(osb, inode, di_bh);
ccd979bd MF	510	if (status < 0) {
ccd979bd MF	511	mlog_errno(status);
c934a92d	512	goto bail_unlock_sem;
ccd979bd MF	513	}
	514
	515	/* TODO: orphan dir cleanup here. */
c934a92d	516	bail_unlock_sem:
2e89b2e4 MF	517	up_write(&OCFS2_I(inode)->ip_alloc_sem);
2e89b2e4 MF	518
ccd979bd	519	bail:
8b2c0dba TM	520	if (!status && OCFS2_I(inode)->ip_clusters == 0)
8b2c0dba TM	521	status = ocfs2_try_remove_refcount_tree(inode, di_bh);
ccd979bd	522
ccd979bd MF	523	return status;
	524	}
	525
	526	/*
0eb8d47e	527	* extend file allocation only here.
ccd979bd MF	528	* we'll update all the disk stuff, and oip->alloc_size
	529	*
	530	* expect stuff to be locked, a transaction started and enough data /
	531	* metadata reservations in the contexts.
	532	*
	533	* Will return -EAGAIN, and a reason if a restart is needed.
	534	* If passed in, *reason will always be set, even in error.
	535	*/
0eb8d47e TM	536	int ocfs2_add_inode_data(struct ocfs2_super *osb,
	537	struct inode *inode,
	538	u32 *logical_offset,
	539	u32 clusters_to_add,
	540	int mark_unwritten,
	541	struct buffer_head *fe_bh,
	542	handle_t *handle,
	543	struct ocfs2_alloc_context *data_ac,
	544	struct ocfs2_alloc_context *meta_ac,
	545	enum ocfs2_alloc_restarted *reason_ret)
ccd979bd	546	{
f99b9b7c	547	struct ocfs2_extent_tree et;
ccd979bd	548
5e404e9e	549	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
38c9d2d3 JQ	550	return ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
	551	clusters_to_add, mark_unwritten,
	552	data_ac, meta_ac, reason_ret);
ccd979bd MF	553	}
ccd979bd MF	554
5bc55d65 JG	555	static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
5bc55d65 JG	556	u32 clusters_to_add, int mark_unwritten)
ccd979bd MF	557	{
	558	int status = 0;
	559	int restart_func = 0;
abf8b156	560	int credits;
2ae99a60	561	u32 prev_clusters;
ccd979bd MF	562	struct buffer_head *bh = NULL;
ccd979bd MF	563	struct ocfs2_dinode *fe = NULL;
1fabe148	564	handle_t *handle = NULL;
ccd979bd MF	565	struct ocfs2_alloc_context *data_ac = NULL;
ccd979bd MF	566	struct ocfs2_alloc_context *meta_ac = NULL;
696cdf73	567	enum ocfs2_alloc_restarted why = RESTART_NONE;
ccd979bd	568	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
f99b9b7c	569	struct ocfs2_extent_tree et;
a90714c1	570	int did_quota = 0;
ccd979bd	571
dcd0538f	572	/*
f0cb0f0b	573	* Unwritten extent only exists for file systems which
dcd0538f MF	574	* support holes.
dcd0538f MF	575	*/
2ae99a60	576	BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
dcd0538f	577
b657c95c	578	status = ocfs2_read_inode_block(inode, &bh);
ccd979bd MF	579	if (status < 0) {
	580	mlog_errno(status);
	581	goto leave;
	582	}
ccd979bd	583	fe = (struct ocfs2_dinode *) bh->b_data;
ccd979bd MF	584
	585	restart_all:
	586	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
	587
5e404e9e	588	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
f99b9b7c JB	589	status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
f99b9b7c JB	590	&data_ac, &meta_ac);
9517bac6 MF	591	if (status) {
	592	mlog_errno(status);
	593	goto leave;
	594	}
	595
06f9da6e	596	credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list);
65eff9cc	597	handle = ocfs2_start_trans(osb, credits);
ccd979bd MF	598	if (IS_ERR(handle)) {
	599	status = PTR_ERR(handle);
	600	handle = NULL;
	601	mlog_errno(status);
	602	goto leave;
	603	}
	604
	605	restarted_transaction:
468eedde TM	606	trace_ocfs2_extend_allocation(
	607	(unsigned long long)OCFS2_I(inode)->ip_blkno,
	608	(unsigned long long)i_size_read(inode),
	609	le32_to_cpu(fe->i_clusters), clusters_to_add,
	610	why, restart_func);
	611
5dd4056d CH	612	status = dquot_alloc_space_nodirty(inode,
	613	ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
	614	if (status)
a90714c1	615	goto leave;
a90714c1 JK	616	did_quota = 1;
a90714c1 JK	617
ccd979bd MF	618	/* reserve a write to the file entry early on - that we if we
	619	* run out of credits in the allocation path, we can still
	620	* update i_size. */
0cf2f763	621	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
13723d00	622	OCFS2_JOURNAL_ACCESS_WRITE);
ccd979bd MF	623	if (status < 0) {
	624	mlog_errno(status);
	625	goto leave;
	626	}
	627
	628	prev_clusters = OCFS2_I(inode)->ip_clusters;
	629
0eb8d47e TM	630	status = ocfs2_add_inode_data(osb,
	631	inode,
	632	&logical_start,
	633	clusters_to_add,
	634	mark_unwritten,
	635	bh,
	636	handle,
	637	data_ac,
	638	meta_ac,
	639	&why);
ccd979bd MF	640	if ((status < 0) && (status != -EAGAIN)) {
	641	if (status != -ENOSPC)
	642	mlog_errno(status);
	643	goto leave;
	644	}
2931cdcb	645	ocfs2_update_inode_fsync_trans(handle, inode, 1);
ec20cec7	646	ocfs2_journal_dirty(handle, bh);
ccd979bd MF	647
	648	spin_lock(&OCFS2_I(inode)->ip_lock);
	649	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
	650	spin_unlock(&OCFS2_I(inode)->ip_lock);
a90714c1	651	/* Release unused quota reservation */
5dd4056d	652	dquot_free_space(inode,
a90714c1 JK	653	ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
a90714c1 JK	654	did_quota = 0;
ccd979bd MF	655
	656	if (why != RESTART_NONE && clusters_to_add) {
	657	if (why == RESTART_META) {
ccd979bd	658	restart_func = 1;
79681842	659	status = 0;
ccd979bd MF	660	} else {
	661	BUG_ON(why != RESTART_TRANS);
	662
2b1e55c3	663	status = ocfs2_allocate_extend_trans(handle, 1);
ccd979bd MF	664	if (status < 0) {
	665	/* handle still has to be committed at
	666	* this point. */
	667	status = -ENOMEM;
	668	mlog_errno(status);
	669	goto leave;
	670	}
	671	goto restarted_transaction;
	672	}
	673	}
	674
468eedde	675	trace_ocfs2_extend_allocation_end(OCFS2_I(inode)->ip_blkno,
1ca1a111	676	le32_to_cpu(fe->i_clusters),
468eedde TM	677	(unsigned long long)le64_to_cpu(fe->i_size),
	678	OCFS2_I(inode)->ip_clusters,
	679	(unsigned long long)i_size_read(inode));
ccd979bd MF	680
ccd979bd MF	681	leave:
a90714c1	682	if (status < 0 && did_quota)
5dd4056d	683	dquot_free_space(inode,
a90714c1	684	ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
ccd979bd	685	if (handle) {
02dc1af4	686	ocfs2_commit_trans(osb, handle);
ccd979bd MF	687	handle = NULL;
	688	}
	689	if (data_ac) {
	690	ocfs2_free_alloc_context(data_ac);
	691	data_ac = NULL;
	692	}
	693	if (meta_ac) {
	694	ocfs2_free_alloc_context(meta_ac);
	695	meta_ac = NULL;
	696	}
	697	if ((!status) && restart_func) {
	698	restart_func = 0;
	699	goto restart_all;
	700	}
a81cb88b MF	701	brelse(bh);
a81cb88b MF	702	bh = NULL;
ccd979bd	703
ccd979bd MF	704	return status;
	705	}
	706
a4bfb4cf JB	707	/*
	708	* While a write will already be ordering the data, a truncate will not.
	709	* Thus, we need to explicitly order the zeroed pages.
	710	*/
c7d2cbc3	711	static handle_t ocfs2_zero_start_ordered_transaction(struct inode inode,
bbd0f327 JQ	712	struct buffer_head *di_bh,
	713	loff_t start_byte,
	714	loff_t length)
a4bfb4cf JB	715	{
	716	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	717	handle_t *handle = NULL;
	718	int ret = 0;
	719
	720	if (!ocfs2_should_order_data(inode))
	721	goto out;
	722
	723	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
	724	if (IS_ERR(handle)) {
	725	ret = -ENOMEM;
	726	mlog_errno(ret);
	727	goto out;
	728	}
	729
bbd0f327	730	ret = ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length);
c7d2cbc3 JB	731	if (ret < 0) {
	732	mlog_errno(ret);
	733	goto out;
	734	}
	735
	736	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
	737	OCFS2_JOURNAL_ACCESS_WRITE);
	738	if (ret)
a4bfb4cf	739	mlog_errno(ret);
6fdb702d	740	ocfs2_update_inode_fsync_trans(handle, inode, 1);
a4bfb4cf JB	741
	742	out:
	743	if (ret) {
	744	if (!IS_ERR(handle))
	745	ocfs2_commit_trans(osb, handle);
	746	handle = ERR_PTR(ret);
	747	}
	748	return handle;
	749	}
	750
ccd979bd MF	751	/* Some parts of this taken from generic_cont_expand, which turned out
ccd979bd MF	752	* to be too fragile to do exactly what we need without us having to
4e02ed4b	753	* worry about recursive locking in ->write_begin() and ->write_end(). */
a4bfb4cf	754	static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
c7d2cbc3	755	u64 abs_to, struct buffer_head *di_bh)
ccd979bd MF	756	{
ccd979bd MF	757	struct address_space *mapping = inode->i_mapping;
7f90d7f1	758	struct folio *folio;
09cbfeaf	759	unsigned long index = abs_from >> PAGE_SHIFT;
f775da2f	760	handle_t *handle;
5453258d	761	int ret = 0;
a4bfb4cf	762	unsigned zero_from, zero_to, block_start, block_end;
c7d2cbc3	763	struct ocfs2_dinode di = (struct ocfs2_dinode )di_bh->b_data;
ccd979bd	764
a4bfb4cf	765	BUG_ON(abs_from >= abs_to);
09cbfeaf	766	BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
a4bfb4cf	767	BUG_ON(abs_from & (inode->i_blkbits - 1));
ccd979bd	768
bbd0f327 JQ	769	handle = ocfs2_zero_start_ordered_transaction(inode, di_bh,
	770	abs_from,
	771	abs_to - abs_from);
f775da2f JB	772	if (IS_ERR(handle)) {
	773	ret = PTR_ERR(handle);
	774	goto out;
	775	}
	776
7f90d7f1 MWO	777	folio = __filemap_get_folio(mapping, index,
	778	FGP_LOCK \| FGP_ACCESSED \| FGP_CREAT, GFP_NOFS);
	779	if (IS_ERR(folio)) {
	780	ret = PTR_ERR(folio);
ccd979bd	781	mlog_errno(ret);
f775da2f	782	goto out_commit_trans;
ccd979bd MF	783	}
ccd979bd MF	784
e7606f4a MT	785	/* Get the offsets within the folio that we want to zero */
	786	zero_from = offset_in_folio(folio, abs_from);
	787	zero_to = offset_in_folio(folio, abs_to);
a4bfb4cf	788	if (!zero_to)
e7606f4a	789	zero_to = folio_size(folio);
ccd979bd	790
468eedde TM	791	trace_ocfs2_write_zero_page(
	792	(unsigned long long)OCFS2_I(inode)->ip_blkno,
	793	(unsigned long long)abs_from,
	794	(unsigned long long)abs_to,
	795	index, zero_from, zero_to);
5693486b	796
a4bfb4cf JB	797	/* We know that zero_from is block aligned */
	798	for (block_start = zero_from; block_start < zero_to;
	799	block_start = block_end) {
93407472	800	block_end = block_start + i_blocksize(inode);
a4bfb4cf JB	801
a4bfb4cf JB	802	/*
ebdec241 CH	803	* block_start is block-aligned. Bump it by one to force
ebdec241 CH	804	* __block_write_begin and block_commit_write to zero the
a4bfb4cf JB	805	* whole block.
a4bfb4cf JB	806	*/
9f04609f	807	ret = __block_write_begin(folio, block_start + 1, 0,
ebdec241	808	ocfs2_get_block);
a4bfb4cf JB	809	if (ret < 0) {
a4bfb4cf JB	810	mlog_errno(ret);
ccd979bd MF	811	goto out_unlock;
ccd979bd MF	812	}
ccd979bd	813
a4bfb4cf JB	814
a4bfb4cf JB	815	/* must not update i_size! */
7f90d7f1	816	block_commit_write(&folio->page, block_start + 1, block_start + 1);
a4bfb4cf	817	}
ccd979bd	818
f775da2f JB	819	/*
	820	* fs-writeback will release the dirty pages without page lock
	821	* whose offset are over inode size, the release happens at
17bf23a9	822	* block_write_full_folio().
f775da2f JB	823	*/
	824	i_size_write(inode, abs_to);
	825	inode->i_blocks = ocfs2_inode_sector_count(inode);
	826	di->i_size = cpu_to_le64((u64)i_size_read(inode));
fd6acbbc JL	827	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
	828	di->i_mtime = di->i_ctime = cpu_to_le64(inode_get_mtime_sec(inode));
	829	di->i_ctime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
f775da2f	830	di->i_mtime_nsec = di->i_ctime_nsec;
c7d2cbc3	831	if (handle) {
c7d2cbc3	832	ocfs2_journal_dirty(handle, di_bh);
6fdb702d	833	ocfs2_update_inode_fsync_trans(handle, inode, 1);
c7d2cbc3	834	}
a4bfb4cf	835
ccd979bd	836	out_unlock:
7f90d7f1 MWO	837	folio_unlock(folio);
7f90d7f1 MWO	838	folio_put(folio);
f775da2f JB	839	out_commit_trans:
	840	if (handle)
	841	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
ccd979bd MF	842	out:
	843	return ret;
	844	}
	845
5693486b JB	846	/*
	847	* Find the next range to zero. We do this in terms of bytes because
	848	* that's what ocfs2_zero_extend() wants, and it is dealing with the
	849	* pagecache. We may return multiple extents.
	850	*
	851	* zero_start and zero_end are ocfs2_zero_extend()s current idea of what
	852	* needs to be zeroed. range_start and range_end return the next zeroing
	853	* range. A subsequent call should pass the previous range_end as its
	854	* zero_start. If range_end is 0, there's nothing to do.
	855	*
	856	* Unwritten extents are skipped over. Refcounted extents are CoWd.
	857	*/
	858	static int ocfs2_zero_extend_get_range(struct inode *inode,
	859	struct buffer_head *di_bh,
	860	u64 zero_start, u64 zero_end,
	861	u64 range_start, u64 range_end)
ccd979bd	862	{
5693486b JB	863	int rc = 0, needs_cow = 0;
	864	u32 p_cpos, zero_clusters = 0;
	865	u32 zero_cpos =
	866	zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
	867	u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
	868	unsigned int num_clusters = 0;
	869	unsigned int ext_flags = 0;
ccd979bd	870
5693486b JB	871	while (zero_cpos < last_cpos) {
	872	rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
	873	&num_clusters, &ext_flags);
	874	if (rc) {
	875	mlog_errno(rc);
ccd979bd MF	876	goto out;
	877	}
	878
5693486b JB	879	if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
	880	zero_clusters = num_clusters;
	881	if (ext_flags & OCFS2_EXT_REFCOUNTED)
	882	needs_cow = 1;
	883	break;
	884	}
	885
	886	zero_cpos += num_clusters;
	887	}
	888	if (!zero_clusters) {
	889	*range_end = 0;
	890	goto out;
	891	}
	892
	893	while ((zero_cpos + zero_clusters) < last_cpos) {
	894	rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
	895	&p_cpos, &num_clusters,
	896	&ext_flags);
	897	if (rc) {
	898	mlog_errno(rc);
	899	goto out;
	900	}
	901
	902	if (!p_cpos \|\| (ext_flags & OCFS2_EXT_UNWRITTEN))
	903	break;
	904	if (ext_flags & OCFS2_EXT_REFCOUNTED)
	905	needs_cow = 1;
	906	zero_clusters += num_clusters;
	907	}
	908	if ((zero_cpos + zero_clusters) > last_cpos)
	909	zero_clusters = last_cpos - zero_cpos;
	910
	911	if (needs_cow) {
c7dd3392	912	rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos,
15502712	913	zero_clusters, UINT_MAX);
5693486b JB	914	if (rc) {
	915	mlog_errno(rc);
	916	goto out;
	917	}
	918	}
	919
	920	*range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
	921	*range_end = ocfs2_clusters_to_bytes(inode->i_sb,
	922	zero_cpos + zero_clusters);
	923
	924	out:
	925	return rc;
	926	}
	927
	928	/*
	929	* Zero one range returned from ocfs2_zero_extend_get_range(). The caller
	930	* has made sure that the entire range needs zeroing.
	931	*/
	932	static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
c7d2cbc3	933	u64 range_end, struct buffer_head *di_bh)
5693486b JB	934	{
	935	int rc = 0;
	936	u64 next_pos;
	937	u64 zero_pos = range_start;
	938
468eedde TM	939	trace_ocfs2_zero_extend_range(
	940	(unsigned long long)OCFS2_I(inode)->ip_blkno,
	941	(unsigned long long)range_start,
	942	(unsigned long long)range_end);
5693486b JB	943	BUG_ON(range_start >= range_end);
	944
	945	while (zero_pos < range_end) {
09cbfeaf	946	next_pos = (zero_pos & PAGE_MASK) + PAGE_SIZE;
5693486b JB	947	if (next_pos > range_end)
5693486b JB	948	next_pos = range_end;
c7d2cbc3	949	rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
5693486b JB	950	if (rc < 0) {
	951	mlog_errno(rc);
	952	break;
	953	}
	954	zero_pos = next_pos;
e2057c5a MF	955
	956	/*
	957	* Very large extends have the potential to lock up
	958	* the cpu for extended periods of time.
	959	*/
	960	cond_resched();
ccd979bd MF	961	}
ccd979bd MF	962
5693486b JB	963	return rc;
	964	}
	965
	966	int ocfs2_zero_extend(struct inode inode, struct buffer_head di_bh,
	967	loff_t zero_to_size)
	968	{
	969	int ret = 0;
	970	u64 zero_start, range_start = 0, range_end = 0;
	971	struct super_block *sb = inode->i_sb;
	972
	973	zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
468eedde TM	974	trace_ocfs2_zero_extend((unsigned long long)OCFS2_I(inode)->ip_blkno,
	975	(unsigned long long)zero_start,
	976	(unsigned long long)i_size_read(inode));
5693486b JB	977	while (zero_start < zero_to_size) {
	978	ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
	979	zero_to_size,
	980	&range_start,
	981	&range_end);
	982	if (ret) {
	983	mlog_errno(ret);
	984	break;
	985	}
	986	if (!range_end)
	987	break;
	988	/* Trim the ends */
	989	if (range_start < zero_start)
	990	range_start = zero_start;
	991	if (range_end > zero_to_size)
	992	range_end = zero_to_size;
	993
	994	ret = ocfs2_zero_extend_range(inode, range_start,
c7d2cbc3	995	range_end, di_bh);
5693486b JB	996	if (ret) {
	997	mlog_errno(ret);
	998	break;
	999	}
	1000	zero_start = range_end;
	1001	}
	1002
ccd979bd MF	1003	return ret;
	1004	}
	1005
5693486b JB	1006	int ocfs2_extend_no_holes(struct inode inode, struct buffer_head di_bh,
5693486b JB	1007	u64 new_i_size, u64 zero_to)
65ed39d6 MF	1008	{
	1009	int ret;
	1010	u32 clusters_to_add;
	1011	struct ocfs2_inode_info *oi = OCFS2_I(inode);
	1012
5693486b JB	1013	/*
	1014	* Only quota files call this without a bh, and they can't be
	1015	* refcounted.
	1016	*/
84e40080	1017	BUG_ON(!di_bh && ocfs2_is_refcount_inode(inode));
5693486b JB	1018	BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
5693486b JB	1019
65ed39d6 MF	1020	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
	1021	if (clusters_to_add < oi->ip_clusters)
	1022	clusters_to_add = 0;
	1023	else
	1024	clusters_to_add -= oi->ip_clusters;
	1025
	1026	if (clusters_to_add) {
5bc55d65 JG	1027	ret = ocfs2_extend_allocation(inode, oi->ip_clusters,
5bc55d65 JG	1028	clusters_to_add, 0);
65ed39d6 MF	1029	if (ret) {
	1030	mlog_errno(ret);
	1031	goto out;
	1032	}
	1033	}
	1034
	1035	/*
	1036	* Call this even if we don't add any clusters to the tree. We
	1037	* still need to zero the area between the old i_size and the
	1038	* new i_size.
	1039	*/
5693486b	1040	ret = ocfs2_zero_extend(inode, di_bh, zero_to);
65ed39d6 MF	1041	if (ret < 0)
	1042	mlog_errno(ret);
	1043
	1044	out:
	1045	return ret;
	1046	}
	1047
ccd979bd MF	1048	static int ocfs2_extend_file(struct inode *inode,
ccd979bd MF	1049	struct buffer_head *di_bh,
65ed39d6	1050	u64 new_i_size)
ccd979bd	1051	{
c934a92d	1052	int ret = 0;
1afc32b9	1053	struct ocfs2_inode_info *oi = OCFS2_I(inode);
ccd979bd	1054
65ed39d6	1055	BUG_ON(!di_bh);
53013cba	1056
ccd979bd MF	1057	/* setattr sometimes calls us like this. */
	1058	if (new_i_size == 0)
	1059	goto out;
	1060
	1061	if (i_size_read(inode) == new_i_size)
5693486b	1062	goto out;
ccd979bd MF	1063	BUG_ON(new_i_size < i_size_read(inode));
ccd979bd MF	1064
0effef77	1065	/*
65ed39d6 MF	1066	* The alloc sem blocks people in read/write from reading our
65ed39d6 MF	1067	* allocation until we're done changing it. We depend on
137cebf9	1068	* i_rwsem to block other extend/truncate calls while we're
5693486b JB	1069	* here. We even have to hold it for sparse files because there
5693486b JB	1070	* might be some tail zeroing.
0effef77	1071	*/
1afc32b9 MF	1072	down_write(&oi->ip_alloc_sem);
	1073
	1074	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
	1075	/*
	1076	* We can optimize small extends by keeping the inodes
	1077	* inline data.
	1078	*/
	1079	if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
	1080	up_write(&oi->ip_alloc_sem);
	1081	goto out_update_size;
	1082	}
	1083
	1084	ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
	1085	if (ret) {
	1086	up_write(&oi->ip_alloc_sem);
1afc32b9	1087	mlog_errno(ret);
c934a92d	1088	goto out;
1afc32b9 MF	1089	}
	1090	}
	1091
5693486b JB	1092	if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
	1093	ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
	1094	else
	1095	ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
	1096	new_i_size);
1afc32b9 MF	1097
1afc32b9 MF	1098	up_write(&oi->ip_alloc_sem);
65ed39d6	1099
0effef77 MF	1100	if (ret < 0) {
0effef77 MF	1101	mlog_errno(ret);
c934a92d	1102	goto out;
53013cba MF	1103	}
53013cba MF	1104
3a0782d0	1105	out_update_size:
65ed39d6 MF	1106	ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
	1107	if (ret < 0)
	1108	mlog_errno(ret);
ccd979bd MF	1109
	1110	out:
	1111	return ret;
	1112	}
	1113
c1632a0f	1114	int ocfs2_setattr(struct mnt_idmap idmap, struct dentry dentry,
549c7297	1115	struct iattr *attr)
ccd979bd MF	1116	{
ccd979bd MF	1117	int status = 0, size_change;
3d46a44a	1118	int inode_locked = 0;
2b0143b5	1119	struct inode *inode = d_inode(dentry);
ccd979bd MF	1120	struct super_block *sb = inode->i_sb;
	1121	struct ocfs2_super *osb = OCFS2_SB(sb);
	1122	struct buffer_head *bh = NULL;
1fabe148	1123	handle_t *handle = NULL;
65bac575	1124	struct dquot *transfer_to[MAXQUOTAS] = { };
52a9ee28	1125	int qtype;
b891fa50 ER	1126	int had_lock;
b891fa50 ER	1127	struct ocfs2_lock_holder oh;
ccd979bd	1128
468eedde TM	1129	trace_ocfs2_setattr(inode, dentry,
	1130	(unsigned long long)OCFS2_I(inode)->ip_blkno,
	1131	dentry->d_name.len, dentry->d_name.name,
15f34347 AZ	1132	attr->ia_valid,
	1133	attr->ia_valid & ATTR_MODE ? attr->ia_mode : 0,
	1134	attr->ia_valid & ATTR_UID ?
	1135	from_kuid(&init_user_ns, attr->ia_uid) : 0,
	1136	attr->ia_valid & ATTR_GID ?
	1137	from_kgid(&init_user_ns, attr->ia_gid) : 0);
ccd979bd	1138
bc535809 SM	1139	/* ensuring we don't even attempt to truncate a symlink */
	1140	if (S_ISLNK(inode->i_mode))
	1141	attr->ia_valid &= ~ATTR_SIZE;
	1142
ccd979bd MF	1143	#define OCFS2_VALID_ATTRS (ATTR_ATIME \| ATTR_MTIME \| ATTR_CTIME \| ATTR_SIZE \
ccd979bd MF	1144	\| ATTR_GID \| ATTR_UID \| ATTR_MODE)
468eedde	1145	if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
ccd979bd	1146	return 0;
ccd979bd	1147
c1632a0f	1148	status = setattr_prepare(&nop_mnt_idmap, dentry, attr);
ccd979bd MF	1149	if (status)
	1150	return status;
	1151
f861646a	1152	if (is_quota_modification(&nop_mnt_idmap, inode, attr)) {
9c89fe0a JK	1153	status = dquot_initialize(inode);
	1154	if (status)
	1155	return status;
	1156	}
ccd979bd MF	1157	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
ccd979bd MF	1158	if (size_change) {
28f5a8a7	1159	/*
	1160	* Here we should wait dio to finish before inode lock
	1161	* to avoid a deadlock between ocfs2_setattr() and
	1162	* ocfs2_dio_end_io_write()
	1163	*/
	1164	inode_dio_wait(inode);
	1165
ccd979bd MF	1166	status = ocfs2_rw_lock(inode, 1);
	1167	if (status < 0) {
	1168	mlog_errno(status);
	1169	goto bail;
	1170	}
	1171	}
	1172
b891fa50 ER	1173	had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh);
	1174	if (had_lock < 0) {
	1175	status = had_lock;
ccd979bd	1176	goto bail_unlock_rw;
b891fa50 ER	1177	} else if (had_lock) {
	1178	/*
	1179	* As far as we know, ocfs2_setattr() could only be the first
	1180	* VFS entry point in the call chain of recursive cluster
	1181	* locking issue.
	1182	*
	1183	* For instance:
	1184	* chmod_common()
	1185	* notify_change()
	1186	* ocfs2_setattr()
	1187	* posix_acl_chmod()
	1188	* ocfs2_iop_get_acl()
	1189	*
	1190	* But, we're not 100% sure if it's always true, because the
	1191	* ordering of the VFS entry points in the call chain is out
	1192	* of our control. So, we'd better dump the stack here to
	1193	* catch the other cases of recursive locking.
	1194	*/
	1195	mlog(ML_ERROR, "Another case of recursive locking:\n");
	1196	dump_stack();
ccd979bd	1197	}
3d46a44a	1198	inode_locked = 1;
ccd979bd	1199
d62e74be	1200	if (size_change) {
5051f768 WW	1201	status = inode_newsize_ok(inode, attr->ia_size);
5051f768 WW	1202	if (status)
ce76fd30	1203	goto bail_unlock;
ce76fd30	1204
d62e74be	1205	if (i_size_read(inode) >= attr->ia_size) {
2b4e30fb JB	1206	if (ocfs2_should_order_data(inode)) {
	1207	status = ocfs2_begin_ordered_truncate(inode,
	1208	attr->ia_size);
	1209	if (status)
	1210	goto bail_unlock;
	1211	}
ccd979bd	1212	status = ocfs2_truncate_file(inode, bh, attr->ia_size);
2b4e30fb	1213	} else
65ed39d6	1214	status = ocfs2_extend_file(inode, bh, attr->ia_size);
ccd979bd MF	1215	if (status < 0) {
	1216	if (status != -ENOSPC)
	1217	mlog_errno(status);
	1218	status = -ENOSPC;
	1219	goto bail_unlock;
	1220	}
	1221	}
	1222
488c8ef0 EB	1223	if ((attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) \|\|
488c8ef0 EB	1224	(attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
65bac575 JK	1225	/*
	1226	* Gather pointers to quota structures so that allocation /
	1227	* freeing of quota structures happens here and not inside
b43fa828	1228	* dquot_transfer() where we have problems with lock ordering
65bac575	1229	*/
488c8ef0	1230	if (attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)
a90714c1 JK	1231	&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
a90714c1 JK	1232	OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
aca645a6	1233	transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
6184fc0b JK	1234	if (IS_ERR(transfer_to[USRQUOTA])) {
6184fc0b JK	1235	status = PTR_ERR(transfer_to[USRQUOTA]);
ce750f43	1236	transfer_to[USRQUOTA] = NULL;
a90714c1	1237	goto bail_unlock;
65bac575	1238	}
a90714c1	1239	}
488c8ef0	1240	if (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)
a90714c1 JK	1241	&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
a90714c1 JK	1242	OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
aca645a6	1243	transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
6184fc0b JK	1244	if (IS_ERR(transfer_to[GRPQUOTA])) {
6184fc0b JK	1245	status = PTR_ERR(transfer_to[GRPQUOTA]);
ce750f43	1246	transfer_to[GRPQUOTA] = NULL;
a90714c1	1247	goto bail_unlock;
65bac575	1248	}
a90714c1	1249	}
90bd070a	1250	down_write(&OCFS2_I(inode)->ip_alloc_sem);
65bac575 JK	1251	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS +
65bac575 JK	1252	2 * ocfs2_quota_trans_credits(sb));
a90714c1 JK	1253	if (IS_ERR(handle)) {
	1254	status = PTR_ERR(handle);
	1255	mlog_errno(status);
90bd070a	1256	goto bail_unlock_alloc;
a90714c1	1257	}
52a9ee28	1258	status = __dquot_transfer(inode, transfer_to);
a90714c1 JK	1259	if (status < 0)
	1260	goto bail_commit;
	1261	} else {
90bd070a	1262	down_write(&OCFS2_I(inode)->ip_alloc_sem);
a90714c1 JK	1263	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
	1264	if (IS_ERR(handle)) {
	1265	status = PTR_ERR(handle);
	1266	mlog_errno(status);
90bd070a	1267	goto bail_unlock_alloc;
a90714c1	1268	}
ccd979bd MF	1269	}
ccd979bd MF	1270
c1632a0f	1271	setattr_copy(&nop_mnt_idmap, inode, attr);
1025774c CH	1272	mark_inode_dirty(inode);
1025774c CH	1273
ccd979bd MF	1274	status = ocfs2_mark_inode_dirty(handle, inode, bh);
	1275	if (status < 0)
	1276	mlog_errno(status);
	1277
	1278	bail_commit:
02dc1af4	1279	ocfs2_commit_trans(osb, handle);
90bd070a WW	1280	bail_unlock_alloc:
90bd070a WW	1281	up_write(&OCFS2_I(inode)->ip_alloc_sem);
ccd979bd	1282	bail_unlock:
b891fa50 ER	1283	if (status && inode_locked) {
b891fa50 ER	1284	ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
3d46a44a TS	1285	inode_locked = 0;
3d46a44a TS	1286	}
ccd979bd MF	1287	bail_unlock_rw:
	1288	if (size_change)
	1289	ocfs2_rw_unlock(inode, 1);
	1290	bail:
ccd979bd	1291
65bac575	1292	/* Release quota pointers in case we acquired them */
52362810	1293	for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++)
65bac575	1294	dqput(transfer_to[qtype]);
65bac575	1295
060bc66d	1296	if (!status && attr->ia_valid & ATTR_MODE) {
5ee0fbd5	1297	status = ocfs2_acl_chmod(inode, bh);
060bc66d TY	1298	if (status < 0)
	1299	mlog_errno(status);
	1300	}
3d46a44a	1301	if (inode_locked)
b891fa50	1302	ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
060bc66d	1303
5ee0fbd5	1304	brelse(bh);
ccd979bd MF	1305	return status;
	1306	}
	1307
b74d24f7	1308	int ocfs2_getattr(struct mnt_idmap idmap, const struct path path,
549c7297	1309	struct kstat *stat, u32 request_mask, unsigned int flags)
ccd979bd	1310	{
a528d35e DH	1311	struct inode *inode = d_inode(path->dentry);
a528d35e DH	1312	struct super_block *sb = path->dentry->d_sb;
ccd979bd MF	1313	struct ocfs2_super *osb = sb->s_fs_info;
	1314	int err;
	1315
a528d35e	1316	err = ocfs2_inode_revalidate(path->dentry);
ccd979bd MF	1317	if (err) {
	1318	if (err != -ENOENT)
	1319	mlog_errno(err);
	1320	goto bail;
	1321	}
	1322
0d72b928	1323	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
d6364627 JH	1324	/*
	1325	* If there is inline data in the inode, the inode will normally not
	1326	* have data blocks allocated (it may have an external xattr block).
	1327	* Report at least one sector for such files, so tools like tar, rsync,
	1328	* others don't incorrectly think the file is completely sparse.
	1329	*/
	1330	if (unlikely(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
	1331	stat->blocks += (stat->size + 511)>>9;
ccd979bd MF	1332
	1333	/* We set the blksize from the cluster size for performance */
	1334	stat->blksize = osb->s_clustersize;
	1335
	1336	bail:
ccd979bd MF	1337	return err;
	1338	}
	1339
4609e1f1	1340	int ocfs2_permission(struct mnt_idmap idmap, struct inode inode,
549c7297	1341	int mask)
d38eb8db	1342	{
b891fa50 ER	1343	int ret, had_lock;
b891fa50 ER	1344	struct ocfs2_lock_holder oh;
d38eb8db	1345
10556cb2	1346	if (mask & MAY_NOT_BLOCK)
b74c79e9 NP	1347	return -ECHILD;
b74c79e9 NP	1348
b891fa50 ER	1349	had_lock = ocfs2_inode_lock_tracker(inode, NULL, 0, &oh);
	1350	if (had_lock < 0) {
	1351	ret = had_lock;
d38eb8db	1352	goto out;
b891fa50 ER	1353	} else if (had_lock) {
	1354	/* See comments in ocfs2_setattr() for details.
	1355	* The call chain of this case could be:
	1356	* do_sys_open()
	1357	* may_open()
	1358	* inode_permission()
	1359	* ocfs2_permission()
	1360	* ocfs2_iop_get_acl()
	1361	*/
	1362	mlog(ML_ERROR, "Another case of recursive locking:\n");
	1363	dump_stack();
d38eb8db TY	1364	}
d38eb8db TY	1365
4609e1f1	1366	ret = generic_permission(&nop_mnt_idmap, inode, mask);
d38eb8db	1367
b891fa50	1368	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
d38eb8db	1369	out:
d38eb8db TY	1370	return ret;
	1371	}
	1372
b2580103 MF	1373	static int __ocfs2_write_remove_suid(struct inode *inode,
b2580103 MF	1374	struct buffer_head *bh)
ccd979bd MF	1375	{
ccd979bd MF	1376	int ret;
1fabe148	1377	handle_t *handle;
ccd979bd MF	1378	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	1379	struct ocfs2_dinode *di;
	1380
468eedde TM	1381	trace_ocfs2_write_remove_suid(
	1382	(unsigned long long)OCFS2_I(inode)->ip_blkno,
	1383	inode->i_mode);
ccd979bd	1384
65eff9cc	1385	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
fa38e92c JK	1386	if (IS_ERR(handle)) {
fa38e92c JK	1387	ret = PTR_ERR(handle);
ccd979bd MF	1388	mlog_errno(ret);
	1389	goto out;
	1390	}
	1391
0cf2f763	1392	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
13723d00	1393	OCFS2_JOURNAL_ACCESS_WRITE);
ccd979bd MF	1394	if (ret < 0) {
ccd979bd MF	1395	mlog_errno(ret);
b2580103	1396	goto out_trans;
ccd979bd MF	1397	}
	1398
	1399	inode->i_mode &= ~S_ISUID;
	1400	if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP))
	1401	inode->i_mode &= ~S_ISGID;
	1402
	1403	di = (struct ocfs2_dinode *) bh->b_data;
	1404	di->i_mode = cpu_to_le16(inode->i_mode);
6fdb702d	1405	ocfs2_update_inode_fsync_trans(handle, inode, 0);
ccd979bd	1406
ec20cec7	1407	ocfs2_journal_dirty(handle, bh);
b2580103	1408
ccd979bd	1409	out_trans:
02dc1af4	1410	ocfs2_commit_trans(osb, handle);
ccd979bd	1411	out:
ccd979bd MF	1412	return ret;
	1413	}
	1414
b2580103 MF	1415	static int ocfs2_write_remove_suid(struct inode *inode)
	1416	{
	1417	int ret;
	1418	struct buffer_head *bh = NULL;
b2580103	1419
b657c95c	1420	ret = ocfs2_read_inode_block(inode, &bh);
b2580103 MF	1421	if (ret < 0) {
	1422	mlog_errno(ret);
	1423	goto out;
	1424	}
	1425
	1426	ret = __ocfs2_write_remove_suid(inode, bh);
	1427	out:
	1428	brelse(bh);
	1429	return ret;
	1430	}
	1431
2ae99a60 MF	1432	/*
	1433	* Allocate enough extents to cover the region starting at byte offset
	1434	* start for len bytes. Existing extents are skipped, any extents
	1435	* added are marked as "unwritten".
	1436	*/
	1437	static int ocfs2_allocate_unwritten_extents(struct inode *inode,
	1438	u64 start, u64 len)
	1439	{
	1440	int ret;
	1441	u32 cpos, phys_cpos, clusters, alloc_size;
1afc32b9 MF	1442	u64 end = start + len;
	1443	struct buffer_head *di_bh = NULL;
	1444
	1445	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
b657c95c	1446	ret = ocfs2_read_inode_block(inode, &di_bh);
1afc32b9 MF	1447	if (ret) {
	1448	mlog_errno(ret);
	1449	goto out;
	1450	}
	1451
	1452	/*
	1453	* Nothing to do if the requested reservation range
	1454	* fits within the inode.
	1455	*/
	1456	if (ocfs2_size_fits_inline_data(di_bh, end))
	1457	goto out;
	1458
	1459	ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
	1460	if (ret) {
	1461	mlog_errno(ret);
	1462	goto out;
	1463	}
	1464	}
2ae99a60 MF	1465
	1466	/*
	1467	* We consider both start and len to be inclusive.
	1468	*/
	1469	cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
	1470	clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
	1471	clusters -= cpos;
	1472
	1473	while (clusters) {
	1474	ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
	1475	&alloc_size, NULL);
	1476	if (ret) {
	1477	mlog_errno(ret);
	1478	goto out;
	1479	}
	1480
	1481	/*
	1482	* Hole or existing extent len can be arbitrary, so
	1483	* cap it to our own allocation request.
	1484	*/
	1485	if (alloc_size > clusters)
	1486	alloc_size = clusters;
	1487
	1488	if (phys_cpos) {
	1489	/*
	1490	* We already have an allocation at this
	1491	* region so we can safely skip it.
	1492	*/
	1493	goto next;
	1494	}
	1495
5bc55d65	1496	ret = ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
2ae99a60 MF	1497	if (ret) {
	1498	if (ret != -ENOSPC)
	1499	mlog_errno(ret);
	1500	goto out;
	1501	}
	1502
	1503	next:
	1504	cpos += alloc_size;
	1505	clusters -= alloc_size;
	1506	}
	1507
	1508	ret = 0;
	1509	out:
1afc32b9 MF	1510
1afc32b9 MF	1511	brelse(di_bh);
2ae99a60 MF	1512	return ret;
	1513	}
	1514
063c4561 MF	1515	/*
	1516	* Truncate a byte range, avoiding pages within partial clusters. This
	1517	* preserves those pages for the zeroing code to write to.
	1518	*/
	1519	static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
	1520	u64 byte_len)
	1521	{
	1522	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	1523	loff_t start, end;
	1524	struct address_space *mapping = inode->i_mapping;
	1525
	1526	start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start);
	1527	end = byte_start + byte_len;
	1528	end = end & ~(osb->s_clustersize - 1);
	1529
	1530	if (start < end) {
	1531	unmap_mapping_range(mapping, start, end - start, 0);
	1532	truncate_inode_pages_range(mapping, start, end - 1);
	1533	}
	1534	}
	1535
9449ad33 JB	1536	/*
	1537	* zero out partial blocks of one cluster.
	1538	*
	1539	* start: file offset where zero starts, will be made upper block aligned.
	1540	* len: it will be trimmed to the end of current cluster if "start + len"
	1541	* is bigger than it.
	1542	*/
	1543	static int ocfs2_zeroout_partial_cluster(struct inode *inode,
	1544	u64 start, u64 len)
	1545	{
	1546	int ret;
	1547	u64 start_block, end_block, nr_blocks;
	1548	u64 p_block, offset;
	1549	u32 cluster, p_cluster, nr_clusters;
	1550	struct super_block *sb = inode->i_sb;
	1551	u64 end = ocfs2_align_bytes_to_clusters(sb, start);
	1552
	1553	if (start + len < end)
	1554	end = start + len;
	1555
	1556	start_block = ocfs2_blocks_for_bytes(sb, start);
	1557	end_block = ocfs2_blocks_for_bytes(sb, end);
	1558	nr_blocks = end_block - start_block;
	1559	if (!nr_blocks)
	1560	return 0;
	1561
	1562	cluster = ocfs2_bytes_to_clusters(sb, start);
	1563	ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
	1564	&nr_clusters, NULL);
	1565	if (ret)
	1566	return ret;
	1567	if (!p_cluster)
	1568	return 0;
	1569
	1570	offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
	1571	p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
	1572	return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
	1573	}
	1574
063c4561 MF	1575	static int ocfs2_zero_partial_clusters(struct inode *inode,
	1576	u64 start, u64 len)
	1577	{
	1578	int ret = 0;
d21c353d AS	1579	u64 tmpend = 0;
d21c353d AS	1580	u64 end = start + len;
063c4561 MF	1581	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	1582	unsigned int csize = osb->s_clustersize;
	1583	handle_t *handle;
9449ad33	1584	loff_t isize = i_size_read(inode);
063c4561 MF	1585
	1586	/*
	1587	* The "start" and "end" values are NOT necessarily part of
	1588	* the range whose allocation is being deleted. Rather, this
	1589	* is what the user passed in with the request. We must zero
	1590	* partial clusters here. There's no need to worry about
	1591	* physical allocation - the zeroing code knows to skip holes.
	1592	*/
468eedde TM	1593	trace_ocfs2_zero_partial_clusters(
	1594	(unsigned long long)OCFS2_I(inode)->ip_blkno,
	1595	(unsigned long long)start, (unsigned long long)end);
063c4561 MF	1596
	1597	/*
	1598	* If both edges are on a cluster boundary then there's no
	1599	* zeroing required as the region is part of the allocation to
	1600	* be truncated.
	1601	*/
	1602	if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
	1603	goto out;
	1604
9449ad33 JB	1605	/* No page cache for EOF blocks, issue zero out to disk. */
	1606	if (end > isize) {
	1607	/*
	1608	* zeroout eof blocks in last cluster starting from
	1609	* "isize" even "start" > "isize" because it is
	1610	* complicated to zeroout just at "start" as "start"
	1611	* may be not aligned with block size, buffer write
	1612	* would be required to do that, but out of eof buffer
	1613	* write is not supported.
	1614	*/
	1615	ret = ocfs2_zeroout_partial_cluster(inode, isize,
	1616	end - isize);
	1617	if (ret) {
	1618	mlog_errno(ret);
	1619	goto out;
	1620	}
	1621	if (start >= isize)
	1622	goto out;
	1623	end = isize;
	1624	}
063c4561	1625	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
fa38e92c JK	1626	if (IS_ERR(handle)) {
fa38e92c JK	1627	ret = PTR_ERR(handle);
063c4561 MF	1628	mlog_errno(ret);
	1629	goto out;
	1630	}
	1631
	1632	/*
d21c353d AS	1633	* If start is on a cluster boundary and end is somewhere in another
	1634	* cluster, we have not COWed the cluster starting at start, unless
	1635	* end is also within the same cluster. So, in this case, we skip this
	1636	* first call to ocfs2_zero_range_for_truncate() truncate and move on
	1637	* to the next one.
063c4561	1638	*/
d21c353d AS	1639	if ((start & (csize - 1)) != 0) {
	1640	/*
	1641	* We want to get the byte offset of the end of the 1st
	1642	* cluster.
	1643	*/
	1644	tmpend = (u64)osb->s_clustersize +
	1645	(start & ~(osb->s_clustersize - 1));
	1646	if (tmpend > end)
	1647	tmpend = end;
063c4561	1648
d21c353d AS	1649	trace_ocfs2_zero_partial_clusters_range1(
	1650	(unsigned long long)start,
	1651	(unsigned long long)tmpend);
063c4561	1652
d21c353d AS	1653	ret = ocfs2_zero_range_for_truncate(inode, handle, start,
	1654	tmpend);
	1655	if (ret)
	1656	mlog_errno(ret);
	1657	}
063c4561 MF	1658
	1659	if (tmpend < end) {
	1660	/*
	1661	* This may make start and end equal, but the zeroing
	1662	* code will skip any work in that case so there's no
	1663	* need to catch it up here.
	1664	*/
	1665	start = end & ~(osb->s_clustersize - 1);
	1666
468eedde TM	1667	trace_ocfs2_zero_partial_clusters_range2(
468eedde TM	1668	(unsigned long long)start, (unsigned long long)end);
063c4561 MF	1669
	1670	ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
	1671	if (ret)
	1672	mlog_errno(ret);
	1673	}
6fdb702d	1674	ocfs2_update_inode_fsync_trans(handle, inode, 1);
063c4561 MF	1675
	1676	ocfs2_commit_trans(osb, handle);
	1677	out:
	1678	return ret;
	1679	}
	1680
c1631d4a TY	1681	static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
	1682	{
	1683	int i;
	1684	struct ocfs2_extent_rec *rec = NULL;
	1685
	1686	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
	1687
	1688	rec = &el->l_recs[i];
	1689
	1690	if (le32_to_cpu(rec->e_cpos) < pos)
	1691	break;
	1692	}
	1693
	1694	return i;
	1695	}
	1696
	1697	/*
	1698	* Helper to calculate the punching pos and length in one run, we handle the
	1699	* following three cases in order:
	1700	*
	1701	* - remove the entire record
	1702	* - remove a partial record
	1703	* - no record needs to be removed (hole-punching completed)
	1704	*/
	1705	static void ocfs2_calc_trunc_pos(struct inode *inode,
	1706	struct ocfs2_extent_list *el,
	1707	struct ocfs2_extent_rec *rec,
	1708	u32 trunc_start, u32 *trunc_cpos,
	1709	u32 trunc_len, u32 trunc_end,
	1710	u64 blkno, int done)
	1711	{
	1712	int ret = 0;
	1713	u32 coff, range;
	1714
	1715	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
	1716
	1717	if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
9a790ba1 TY	1718	/*
	1719	* remove an entire extent record.
	1720	*/
c1631d4a TY	1721	*trunc_cpos = le32_to_cpu(rec->e_cpos);
	1722	/*
	1723	* Skip holes if any.
	1724	*/
	1725	if (range < *trunc_end)
	1726	*trunc_end = range;
	1727	trunc_len = trunc_end - le32_to_cpu(rec->e_cpos);
	1728	*blkno = le64_to_cpu(rec->e_blkno);
	1729	*trunc_end = le32_to_cpu(rec->e_cpos);
	1730	} else if (range > trunc_start) {
9a790ba1 TY	1731	/*
	1732	* remove a partial extent record, which means we're
	1733	* removing the last extent record.
	1734	*/
c1631d4a	1735	*trunc_cpos = trunc_start;
9a790ba1 TY	1736	/*
	1737	* skip hole if any.
	1738	*/
	1739	if (range < *trunc_end)
	1740	*trunc_end = range;
c1631d4a TY	1741	trunc_len = trunc_end - trunc_start;
	1742	coff = trunc_start - le32_to_cpu(rec->e_cpos);
	1743	*blkno = le64_to_cpu(rec->e_blkno) +
	1744	ocfs2_clusters_to_blocks(inode->i_sb, coff);
	1745	*trunc_end = trunc_start;
	1746	} else {
	1747	/*
	1748	* It may have two following possibilities:
	1749	*
	1750	* - last record has been removed
	1751	* - trunc_start was within a hole
	1752	*
	1753	* both two cases mean the completion of hole punching.
	1754	*/
	1755	ret = 1;
	1756	}
	1757
	1758	*done = ret;
	1759	}
	1760
29ac8e85 DW	1761	int ocfs2_remove_inode_range(struct inode *inode,
	1762	struct buffer_head *di_bh, u64 byte_start,
	1763	u64 byte_len)
063c4561	1764	{
c1631d4a TY	1765	int ret = 0, flags = 0, done = 0, i;
	1766	u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
	1767	u32 cluster_in_el;
063c4561 MF	1768	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
063c4561 MF	1769	struct ocfs2_cached_dealloc_ctxt dealloc;
b1967d0e	1770	struct address_space *mapping = inode->i_mapping;
fecc0112	1771	struct ocfs2_extent_tree et;
c1631d4a TY	1772	struct ocfs2_path *path = NULL;
	1773	struct ocfs2_extent_list *el = NULL;
	1774	struct ocfs2_extent_rec *rec = NULL;
e8aec068	1775	struct ocfs2_dinode di = (struct ocfs2_dinode )di_bh->b_data;
c1631d4a	1776	u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
063c4561	1777
5e404e9e	1778	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
063c4561 MF	1779	ocfs2_init_dealloc_ctxt(&dealloc);
063c4561 MF	1780
468eedde TM	1781	trace_ocfs2_remove_inode_range(
	1782	(unsigned long long)OCFS2_I(inode)->ip_blkno,
	1783	(unsigned long long)byte_start,
	1784	(unsigned long long)byte_len);
	1785
063c4561 MF	1786	if (byte_len == 0)
	1787	return 0;
	1788
1afc32b9	1789	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
bc0a2f3a EAD	1790	int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di);
	1791
	1792	if (byte_start > id_count \|\| byte_start + byte_len > id_count) {
	1793	ret = -EINVAL;
	1794	mlog_errno(ret);
	1795	goto out;
	1796	}
	1797
1afc32b9	1798	ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
b1967d0e MF	1799	byte_start + byte_len, 0);
b1967d0e MF	1800	if (ret) {
1afc32b9	1801	mlog_errno(ret);
b1967d0e MF	1802	goto out;
	1803	}
	1804	/*
	1805	* There's no need to get fancy with the page cache
	1806	* truncate of an inline-data inode. We're talking
	1807	* about less than a page here, which will be cached
	1808	* in the dinode buffer anyway.
	1809	*/
	1810	unmap_mapping_range(mapping, 0, 0, 0);
	1811	truncate_inode_pages(mapping, 0);
	1812	goto out;
1afc32b9 MF	1813	}
1afc32b9 MF	1814
e8aec068 TY	1815	/*
	1816	* For reflinks, we may need to CoW 2 clusters which might be
	1817	* partially zero'd later, if hole's start and end offset were
	1818	* within one cluster(means is not exactly aligned to clustersize).
	1819	*/
	1820
84e40080	1821	if (ocfs2_is_refcount_inode(inode)) {
e8aec068 TY	1822	ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
	1823	if (ret) {
	1824	mlog_errno(ret);
	1825	goto out;
	1826	}
	1827
	1828	ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
	1829	if (ret) {
	1830	mlog_errno(ret);
	1831	goto out;
	1832	}
	1833	}
	1834
063c4561	1835	trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
c1631d4a TY	1836	trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
c1631d4a TY	1837	cluster_in_el = trunc_end;
063c4561	1838
063c4561 MF	1839	ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
	1840	if (ret) {
	1841	mlog_errno(ret);
	1842	goto out;
	1843	}
	1844
c1631d4a TY	1845	path = ocfs2_new_path_from_et(&et);
	1846	if (!path) {
	1847	ret = -ENOMEM;
	1848	mlog_errno(ret);
	1849	goto out;
	1850	}
	1851
	1852	while (trunc_end > trunc_start) {
	1853
	1854	ret = ocfs2_find_path(INODE_CACHE(inode), path,
	1855	cluster_in_el);
063c4561 MF	1856	if (ret) {
	1857	mlog_errno(ret);
	1858	goto out;
	1859	}
	1860
c1631d4a	1861	el = path_leaf_el(path);
063c4561	1862
c1631d4a TY	1863	i = ocfs2_find_rec(el, trunc_end);
	1864	/*
	1865	* Need to go to previous extent block.
	1866	*/
	1867	if (i < 0) {
	1868	if (path->p_tree_depth == 0)
	1869	break;
063c4561	1870
c1631d4a TY	1871	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
	1872	path,
	1873	&cluster_in_el);
063c4561 MF	1874	if (ret) {
	1875	mlog_errno(ret);
	1876	goto out;
	1877	}
c1631d4a TY	1878
	1879	/*
	1880	* We've reached the leftmost extent block,
	1881	* it's safe to leave.
	1882	*/
	1883	if (cluster_in_el == 0)
	1884	break;
	1885
	1886	/*
	1887	* The 'pos' searched for previous extent block is
	1888	* always one cluster less than actual trunc_end.
	1889	*/
	1890	trunc_end = cluster_in_el + 1;
	1891
	1892	ocfs2_reinit_path(path, 1);
	1893
	1894	continue;
	1895
	1896	} else
	1897	rec = &el->l_recs[i];
	1898
	1899	ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
	1900	&trunc_len, &trunc_end, &blkno, &done);
	1901	if (done)
	1902	break;
	1903
	1904	flags = rec->e_flags;
	1905	phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
	1906
	1907	ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
	1908	phys_cpos, trunc_len, flags,
f62f12b3	1909	&dealloc, refcount_loc, false);
c1631d4a TY	1910	if (ret < 0) {
	1911	mlog_errno(ret);
	1912	goto out;
063c4561 MF	1913	}
063c4561 MF	1914
c1631d4a TY	1915	cluster_in_el = trunc_end;
	1916
	1917	ocfs2_reinit_path(path, 1);
063c4561 MF	1918	}
	1919
	1920	ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
	1921
	1922	out:
7aebff18	1923	ocfs2_free_path(path);
063c4561 MF	1924	ocfs2_schedule_truncate_log_flush(osb, 1);
	1925	ocfs2_run_deallocs(osb, &dealloc);
	1926
	1927	return ret;
	1928	}
	1929
b2580103 MF	1930	/*
	1931	* Parts of this function taken from xfs_change_file_space()
	1932	*/
385820a3 MF	1933	static int __ocfs2_change_file_space(struct file file, struct inode inode,
	1934	loff_t f_pos, unsigned int cmd,
	1935	struct ocfs2_space_resv *sr,
	1936	int change_size)
b2580103 MF	1937	{
	1938	int ret;
	1939	s64 llen;
6bba4471	1940	loff_t size, orig_isize;
b2580103 MF	1941	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	1942	struct buffer_head *di_bh = NULL;
	1943	handle_t *handle;
a00cce35	1944	unsigned long long max_off = inode->i_sb->s_maxbytes;
b2580103	1945
b2580103 MF	1946	if (ocfs2_is_hard_readonly(osb) \|\| ocfs2_is_soft_readonly(osb))
	1947	return -EROFS;
	1948
5955102c	1949	inode_lock(inode);
b2580103	1950
952b023f SY	1951	/* Wait all existing dio workers, newcomers will block on i_rwsem */
952b023f SY	1952	inode_dio_wait(inode);
b2580103 MF	1953	/*
	1954	* This prevents concurrent writes on other nodes
	1955	*/
	1956	ret = ocfs2_rw_lock(inode, 1);
	1957	if (ret) {
	1958	mlog_errno(ret);
	1959	goto out;
	1960	}
	1961
e63aecb6	1962	ret = ocfs2_inode_lock(inode, &di_bh, 1);
b2580103 MF	1963	if (ret) {
	1964	mlog_errno(ret);
	1965	goto out_rw_unlock;
	1966	}
	1967
	1968	if (inode->i_flags & (S_IMMUTABLE\|S_APPEND)) {
	1969	ret = -EPERM;
e63aecb6	1970	goto out_inode_unlock;
b2580103 MF	1971	}
	1972
	1973	switch (sr->l_whence) {
	1974	case 0: /SEEK_SET/
	1975	break;
	1976	case 1: /SEEK_CUR/
385820a3	1977	sr->l_start += f_pos;
b2580103 MF	1978	break;
b2580103 MF	1979	case 2: /SEEK_END/
f267aeb6	1980	sr->l_start += i_size_read(inode);
b2580103 MF	1981	break;
	1982	default:
	1983	ret = -EINVAL;
e63aecb6	1984	goto out_inode_unlock;
b2580103 MF	1985	}
	1986	sr->l_whence = 0;
	1987
	1988	llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
	1989
	1990	if (sr->l_start < 0
	1991	\|\| sr->l_start > max_off
	1992	\|\| (sr->l_start + llen) < 0
	1993	\|\| (sr->l_start + llen) > max_off) {
	1994	ret = -EINVAL;
e63aecb6	1995	goto out_inode_unlock;
b2580103	1996	}
385820a3	1997	size = sr->l_start + sr->l_len;
b2580103	1998
a2a3b398 TS	1999	if (cmd == OCFS2_IOC_RESVSP \|\| cmd == OCFS2_IOC_RESVSP64 \|\|
a2a3b398 TS	2000	cmd == OCFS2_IOC_UNRESVSP \|\| cmd == OCFS2_IOC_UNRESVSP64) {
b2580103 MF	2001	if (sr->l_len <= 0) {
b2580103 MF	2002	ret = -EINVAL;
e63aecb6	2003	goto out_inode_unlock;
b2580103 MF	2004	}
	2005	}
	2006
9452e93e	2007	if (file && setattr_should_drop_suidgid(&nop_mnt_idmap, file_inode(file))) {
b2580103 MF	2008	ret = __ocfs2_write_remove_suid(inode, di_bh);
	2009	if (ret) {
	2010	mlog_errno(ret);
e63aecb6	2011	goto out_inode_unlock;
b2580103 MF	2012	}
	2013	}
	2014
	2015	down_write(&OCFS2_I(inode)->ip_alloc_sem);
	2016	switch (cmd) {
	2017	case OCFS2_IOC_RESVSP:
	2018	case OCFS2_IOC_RESVSP64:
	2019	/*
	2020	* This takes unsigned offsets, but the signed ones we
	2021	* pass have been checked against overflow above.
	2022	*/
	2023	ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,
	2024	sr->l_len);
	2025	break;
	2026	case OCFS2_IOC_UNRESVSP:
	2027	case OCFS2_IOC_UNRESVSP64:
	2028	ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,
	2029	sr->l_len);
	2030	break;
	2031	default:
	2032	ret = -EINVAL;
	2033	}
6bba4471	2034
f267aeb6	2035	orig_isize = i_size_read(inode);
6bba4471 JB	2036	/* zeroout eof blocks in the cluster. */
	2037	if (!ret && change_size && orig_isize < size) {
	2038	ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
	2039	size - orig_isize);
	2040	if (!ret)
	2041	i_size_write(inode, size);
	2042	}
b2580103 MF	2043	up_write(&OCFS2_I(inode)->ip_alloc_sem);
	2044	if (ret) {
	2045	mlog_errno(ret);
e63aecb6	2046	goto out_inode_unlock;
b2580103 MF	2047	}
	2048
	2049	/*
	2050	* We update c/mtime for these changes
	2051	*/
	2052	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
	2053	if (IS_ERR(handle)) {
	2054	ret = PTR_ERR(handle);
	2055	mlog_errno(ret);
e63aecb6	2056	goto out_inode_unlock;
b2580103 MF	2057	}
b2580103 MF	2058
fd6acbbc	2059	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
b2580103 MF	2060	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
	2061	if (ret < 0)
	2062	mlog_errno(ret);
	2063
a4e08d00	2064	if (file && (file->f_flags & O_SYNC))
df295d4a MF	2065	handle->h_sync = 1;
df295d4a MF	2066
b2580103 MF	2067	ocfs2_commit_trans(osb, handle);
b2580103 MF	2068
e63aecb6	2069	out_inode_unlock:
b2580103	2070	brelse(di_bh);
e63aecb6	2071	ocfs2_inode_unlock(inode, 1);
b2580103 MF	2072	out_rw_unlock:
	2073	ocfs2_rw_unlock(inode, 1);
	2074
b2580103	2075	out:
5955102c	2076	inode_unlock(inode);
b2580103 MF	2077	return ret;
	2078	}
	2079
385820a3 MF	2080	int ocfs2_change_file_space(struct file *file, unsigned int cmd,
	2081	struct ocfs2_space_resv *sr)
	2082	{
496ad9aa	2083	struct inode *inode = file_inode(file);
c19a28e1	2084	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
fef6925c	2085	int ret;
385820a3 MF	2086
	2087	if ((cmd == OCFS2_IOC_RESVSP \|\| cmd == OCFS2_IOC_RESVSP64) &&
	2088	!ocfs2_writes_unwritten_extents(osb))
	2089	return -ENOTTY;
	2090	else if ((cmd == OCFS2_IOC_UNRESVSP \|\| cmd == OCFS2_IOC_UNRESVSP64) &&
	2091	!ocfs2_sparse_alloc(osb))
	2092	return -ENOTTY;
	2093
	2094	if (!S_ISREG(inode->i_mode))
	2095	return -EINVAL;
	2096
	2097	if (!(file->f_mode & FMODE_WRITE))
	2098	return -EBADF;
	2099
fef6925c JK	2100	ret = mnt_want_write_file(file);
	2101	if (ret)
	2102	return ret;
	2103	ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
	2104	mnt_drop_write_file(file);
	2105	return ret;
385820a3 MF	2106	}
385820a3 MF	2107
2fe17c10	2108	static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
385820a3 MF	2109	loff_t len)
385820a3 MF	2110	{
496ad9aa	2111	struct inode *inode = file_inode(file);
385820a3 MF	2112	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
	2113	struct ocfs2_space_resv sr;
	2114	int change_size = 1;
db47fef2	2115	int cmd = OCFS2_IOC_RESVSP64;
26a6ffff	2116	int ret = 0;
385820a3	2117
64c23e86 CH	2118	if (mode & ~(FALLOC_FL_KEEP_SIZE \| FALLOC_FL_PUNCH_HOLE))
64c23e86 CH	2119	return -EOPNOTSUPP;
385820a3 MF	2120	if (!ocfs2_writes_unwritten_extents(osb))
	2121	return -EOPNOTSUPP;
	2122
26a6ffff	2123	if (mode & FALLOC_FL_KEEP_SIZE) {
385820a3	2124	change_size = 0;
26a6ffff LH	2125	} else {
	2126	ret = inode_newsize_ok(inode, offset + len);
	2127	if (ret)
	2128	return ret;
	2129	}
385820a3	2130
db47fef2 JB	2131	if (mode & FALLOC_FL_PUNCH_HOLE)
	2132	cmd = OCFS2_IOC_UNRESVSP64;
	2133
385820a3 MF	2134	sr.l_whence = 0;
	2135	sr.l_start = (s64)offset;
	2136	sr.l_len = (s64)len;
	2137
db47fef2 JB	2138	return __ocfs2_change_file_space(NULL, inode, offset, cmd, &sr,
db47fef2 JB	2139	change_size);
385820a3 MF	2140	}
385820a3 MF	2141
293b2f70 TM	2142	int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
	2143	size_t count)
	2144	{
	2145	int ret = 0;
	2146	unsigned int extent_flags;
	2147	u32 cpos, clusters, extent_len, phys_cpos;
	2148	struct super_block *sb = inode->i_sb;
	2149
	2150	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) \|\|
84e40080	2151	!ocfs2_is_refcount_inode(inode) \|\|
2f48d593	2152	OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
293b2f70 TM	2153	return 0;
	2154
	2155	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
	2156	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
	2157
	2158	while (clusters) {
	2159	ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
	2160	&extent_flags);
	2161	if (ret < 0) {
	2162	mlog_errno(ret);
	2163	goto out;
	2164	}
	2165
	2166	if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
	2167	ret = 1;
	2168	break;
	2169	}
	2170
	2171	if (extent_len > clusters)
	2172	extent_len = clusters;
	2173
	2174	clusters -= extent_len;
	2175	cpos += extent_len;
	2176	}
	2177	out:
	2178	return ret;
	2179	}
	2180
a11f7e63 MF	2181	static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
	2182	{
	2183	int blockmask = inode->i_sb->s_blocksize - 1;
	2184	loff_t final_size = pos + count;
	2185
	2186	if ((pos & blockmask) \|\| (final_size & blockmask))
	2187	return 1;
	2188	return 0;
	2189	}
	2190
e74540b2 SZ	2191	static int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
	2192	struct buffer_head **di_bh,
	2193	int meta_level,
e74540b2 SZ	2194	int write_sem,
e74540b2 SZ	2195	int wait)
293b2f70	2196	{
e74540b2	2197	int ret = 0;
293b2f70	2198
e74540b2	2199	if (wait)
2d797e9f	2200	ret = ocfs2_inode_lock(inode, di_bh, meta_level);
e74540b2	2201	else
2d797e9f	2202	ret = ocfs2_try_inode_lock(inode, di_bh, meta_level);
e74540b2	2203	if (ret < 0)
293b2f70	2204	goto out;
e74540b2 SZ	2205
	2206	if (wait) {
	2207	if (write_sem)
	2208	down_write(&OCFS2_I(inode)->ip_alloc_sem);
	2209	else
	2210	down_read(&OCFS2_I(inode)->ip_alloc_sem);
	2211	} else {
	2212	if (write_sem)
	2213	ret = down_write_trylock(&OCFS2_I(inode)->ip_alloc_sem);
	2214	else
	2215	ret = down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem);
	2216
	2217	if (!ret) {
	2218	ret = -EAGAIN;
	2219	goto out_unlock;
	2220	}
293b2f70 TM	2221	}
293b2f70 TM	2222
e74540b2	2223	return ret;
293b2f70	2224
e74540b2 SZ	2225	out_unlock:
e74540b2 SZ	2226	brelse(*di_bh);
2d797e9f	2227	*di_bh = NULL;
e74540b2	2228	ocfs2_inode_unlock(inode, meta_level);
293b2f70	2229	out:
293b2f70 TM	2230	return ret;
	2231	}
	2232
e74540b2 SZ	2233	static void ocfs2_inode_unlock_for_extent_tree(struct inode *inode,
	2234	struct buffer_head **di_bh,
	2235	int meta_level,
	2236	int write_sem)
	2237	{
	2238	if (write_sem)
	2239	up_write(&OCFS2_I(inode)->ip_alloc_sem);
	2240	else
	2241	up_read(&OCFS2_I(inode)->ip_alloc_sem);
	2242
	2243	brelse(*di_bh);
	2244	*di_bh = NULL;
	2245
	2246	if (meta_level >= 0)
	2247	ocfs2_inode_unlock(inode, meta_level);
	2248	}
	2249
b8908236	2250	static int ocfs2_prepare_inode_for_write(struct file *file,
c4c2416a	2251	loff_t pos, size_t count, int wait)
ccd979bd	2252	{
c4c2416a	2253	int ret = 0, meta_level = 0, overwrite_io = 0;
e74540b2	2254	int write_sem = 0;
b8908236	2255	struct dentry *dentry = file->f_path.dentry;
2b0143b5	2256	struct inode *inode = d_inode(dentry);
c4c2416a	2257	struct buffer_head *di_bh = NULL;
e74540b2 SZ	2258	u32 cpos;
e74540b2 SZ	2259	u32 clusters;
ccd979bd	2260
2bd63216	2261	/*
65ed39d6 MF	2262	* We start with a read level meta lock and only jump to an ex
65ed39d6 MF	2263	* if we need to make modifications here.
ccd979bd	2264	*/
ccd979bd	2265	for(;;) {
e74540b2 SZ	2266	ret = ocfs2_inode_lock_for_extent_tree(inode,
	2267	&di_bh,
	2268	meta_level,
e74540b2 SZ	2269	write_sem,
e74540b2 SZ	2270	wait);
ccd979bd	2271	if (ret < 0) {
c4c2416a GH	2272	if (ret != -EAGAIN)
c4c2416a GH	2273	mlog_errno(ret);
ccd979bd MF	2274	goto out;
	2275	}
	2276
c4c2416a GH	2277	/*
	2278	* Check if IO will overwrite allocated blocks in case
	2279	* IOCB_NOWAIT flag is set.
	2280	*/
	2281	if (!wait && !overwrite_io) {
	2282	overwrite_io = 1;
c4c2416a GH	2283
c4c2416a GH	2284	ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
c4c2416a GH	2285	if (ret < 0) {
	2286	if (ret != -EAGAIN)
	2287	mlog_errno(ret);
	2288	goto out_unlock;
	2289	}
	2290	}
	2291
ccd979bd MF	2292	/* Clear suid / sgid if necessary. We do this here
	2293	* instead of later in the write path because
	2294	* remove_suid() calls ->setattr without any hint that
	2295	* we may have already done our cluster locking. Since
	2296	* ocfs2_setattr() must take cluster locks to
42b2aa86	2297	* proceed, this will lead us to recursively lock the
ccd979bd MF	2298	* inode. There's also the dinode i_size state which
	2299	* can be lost via setattr during extending writes (we
	2300	* set inode->i_size at the end of a write. */
9452e93e	2301	if (setattr_should_drop_suidgid(&nop_mnt_idmap, inode)) {
ccd979bd	2302	if (meta_level == 0) {
e74540b2 SZ	2303	ocfs2_inode_unlock_for_extent_tree(inode,
	2304	&di_bh,
	2305	meta_level,
	2306	write_sem);
ccd979bd MF	2307	meta_level = 1;
	2308	continue;
	2309	}
	2310
	2311	ret = ocfs2_write_remove_suid(inode);
	2312	if (ret < 0) {
	2313	mlog_errno(ret);
8659ac25	2314	goto out_unlock;
ccd979bd MF	2315	}
	2316	}
	2317
90320251	2318	ret = ocfs2_check_range_for_refcount(inode, pos, count);
293b2f70	2319	if (ret == 1) {
e74540b2 SZ	2320	ocfs2_inode_unlock_for_extent_tree(inode,
	2321	&di_bh,
	2322	meta_level,
	2323	write_sem);
2d797e9f GH	2324	meta_level = 1;
2d797e9f GH	2325	write_sem = 1;
e74540b2 SZ	2326	ret = ocfs2_inode_lock_for_extent_tree(inode,
	2327	&di_bh,
	2328	meta_level,
2d797e9f	2329	write_sem,
e74540b2	2330	wait);
e74540b2 SZ	2331	if (ret < 0) {
	2332	if (ret != -EAGAIN)
	2333	mlog_errno(ret);
	2334	goto out;
	2335	}
	2336
	2337	cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
	2338	clusters =
	2339	ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
	2340	ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
293b2f70 TM	2341	}
	2342
	2343	if (ret < 0) {
e74540b2 SZ	2344	if (ret != -EAGAIN)
e74540b2 SZ	2345	mlog_errno(ret);
293b2f70 TM	2346	goto out_unlock;
	2347	}
	2348
ccd979bd MF	2349	break;
	2350	}
	2351
8659ac25	2352	out_unlock:
468eedde	2353	trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
c4c2416a GH	2354	pos, count, wait);
c4c2416a GH	2355
e74540b2 SZ	2356	ocfs2_inode_unlock_for_extent_tree(inode,
	2357	&di_bh,
	2358	meta_level,
	2359	write_sem);
8659ac25 TY	2360
	2361	out:
	2362	return ret;
	2363	}
	2364
3ef045c3 AV	2365	static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
3ef045c3 AV	2366	struct iov_iter *from)
8659ac25	2367	{
c4c2416a	2368	int rw_level;
9517bac6	2369	ssize_t written = 0;
3309dd04	2370	ssize_t ret;
f1f973ff	2371	size_t count = iov_iter_count(from);
9517bac6	2372	struct file *file = iocb->ki_filp;
496ad9aa	2373	struct inode *inode = file_inode(file);
9ea2d32f	2374	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7bdb0d18 TY	2375	int full_coherency = !(osb->s_mount_opt &
7bdb0d18 TY	2376	OCFS2_MOUNT_COHERENCY_BUFFERED);
e63890f3	2377	void *saved_ki_complete = NULL;
faaebf18 JQ	2378	int append_write = ((iocb->ki_pos + count) >=
faaebf18 JQ	2379	i_size_read(inode) ? 1 : 0);
c4c2416a GH	2380	int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
c4c2416a GH	2381	int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
9517bac6	2382
1202d4ba	2383	trace_ocfs2_file_write_iter(inode, file, file->f_path.dentry,
468eedde TM	2384	(unsigned long long)OCFS2_I(inode)->ip_blkno,
	2385	file->f_path.dentry->d_name.len,
	2386	file->f_path.dentry->d_name.name,
3ef045c3	2387	(unsigned int)from->nr_segs); /* GRRRRR */
8659ac25	2388
c4c2416a GH	2389	if (!direct_io && nowait)
	2390	return -EOPNOTSUPP;
	2391
66ee59af	2392	if (count == 0)
8659ac25 TY	2393	return 0;
8659ac25 TY	2394
c4c2416a GH	2395	if (nowait) {
	2396	if (!inode_trylock(inode))
	2397	return -EAGAIN;
	2398	} else
	2399	inode_lock(inode);
9517bac6	2400
adc77b19 DA	2401	ocfs2_iocb_init_rw_locked(iocb);
adc77b19 DA	2402
7bdb0d18 TY	2403	/*
	2404	* Concurrent O_DIRECT writes are allowed with
	2405	* mount_option "coherency=buffered".
faaebf18	2406	* For append write, we must take rw EX.
7bdb0d18	2407	*/
faaebf18	2408	rw_level = (!direct_io \|\| full_coherency \|\| append_write);
7bdb0d18	2409
c4c2416a GH	2410	if (nowait)
	2411	ret = ocfs2_try_rw_lock(inode, rw_level);
	2412	else
	2413	ret = ocfs2_rw_lock(inode, rw_level);
8659ac25	2414	if (ret < 0) {
c4c2416a GH	2415	if (ret != -EAGAIN)
c4c2416a GH	2416	mlog_errno(ret);
fa5a0eb3	2417	goto out_mutex;
8659ac25 TY	2418	}
8659ac25 TY	2419
7bdb0d18 TY	2420	/*
	2421	* O_DIRECT writes with "coherency=full" need to take EX cluster
	2422	* inode_lock to guarantee coherency.
	2423	*/
	2424	if (direct_io && full_coherency) {
	2425	/*
	2426	* We need to take and drop the inode lock to force
	2427	* other nodes to drop their caches. Buffered I/O
	2428	* already does this in write_begin().
	2429	*/
c4c2416a GH	2430	if (nowait)
	2431	ret = ocfs2_try_inode_lock(inode, NULL, 1);
	2432	else
	2433	ret = ocfs2_inode_lock(inode, NULL, 1);
7bdb0d18	2434	if (ret < 0) {
c4c2416a GH	2435	if (ret != -EAGAIN)
c4c2416a GH	2436	mlog_errno(ret);
afe1bb73	2437	goto out;
7bdb0d18 TY	2438	}
	2439
	2440	ocfs2_inode_unlock(inode, 1);
	2441	}
	2442
3309dd04 AV	2443	ret = generic_write_checks(iocb, from);
	2444	if (ret <= 0) {
	2445	if (ret)
	2446	mlog_errno(ret);
90320251 AV	2447	goto out;
90320251 AV	2448	}
3309dd04	2449	count = ret;
90320251	2450
c4c2416a	2451	ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
8659ac25	2452	if (ret < 0) {
c4c2416a GH	2453	if (ret != -EAGAIN)
c4c2416a GH	2454	mlog_errno(ret);
8659ac25 TY	2455	goto out;
8659ac25 TY	2456	}
ccd979bd	2457
e63890f3 RD	2458	if (direct_io && !is_sync_kiocb(iocb) &&
e63890f3 RD	2459	ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) {
a11f7e63	2460	/*
e63890f3	2461	* Make it a sync io if it's an unaligned aio.
a11f7e63	2462	*/
e63890f3	2463	saved_ki_complete = xchg(&iocb->ki_complete, NULL);
a11f7e63 MF	2464	}
a11f7e63 MF	2465
ccd979bd	2466	/* communicate with ocfs2_dio_end_io */
7cdfc3a1	2467	ocfs2_iocb_set_rw_locked(iocb, rw_level);
ccd979bd	2468
7da839c4	2469	written = __generic_file_write_iter(iocb, from);
ccd979bd	2470	/* buffered aio wouldn't have proper lock coverage today */
9e985787	2471	BUG_ON(written == -EIOCBQUEUED && !direct_io);
ccd979bd	2472
aa1057b3 RD	2473	/*
	2474	* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
	2475	* function pointer which is called when o_direct io completes so that
	2476	* it can unlock our rw lock.
	2477	* Unfortunately there are error cases which call end_io and others
	2478	* that don't. so we don't have to unlock the rw_lock if either an
	2479	* async dio is going to do it in the future or an end_io after an
	2480	* error has already done it.
	2481	*/
	2482	if ((written == -EIOCBQUEUED) \|\| (!ocfs2_iocb_is_rw_locked(iocb))) {
	2483	rw_level = -1;
aa1057b3 RD	2484	}
aa1057b3 RD	2485
64b4e252	2486	if (unlikely(written <= 0))
e63890f3	2487	goto out;
64b4e252	2488
7da839c4	2489	if (((file->f_flags & O_DSYNC) && !direct_io) \|\|
f1f973ff	2490	IS_SYNC(inode)) {
64b4e252 AV	2491	ret = filemap_fdatawrite_range(file->f_mapping,
	2492	iocb->ki_pos - written,
	2493	iocb->ki_pos - 1);
918941a3 JK	2494	if (ret < 0)
	2495	written = ret;
	2496
86b9c6f3	2497	if (!ret) {
2b4e30fb	2498	ret = jbd2_journal_force_commit(osb->journal->j_journal);
9ea2d32f MF	2499	if (ret < 0)
	2500	written = ret;
	2501	}
918941a3 JK	2502
918941a3 JK	2503	if (!ret)
64b4e252 AV	2504	ret = filemap_fdatawait_range(file->f_mapping,
	2505	iocb->ki_pos - written,
	2506	iocb->ki_pos - 1);
9ea2d32f MF	2507	}
9ea2d32f MF	2508
ccd979bd	2509	out:
e63890f3 RD	2510	if (saved_ki_complete)
	2511	xchg(&iocb->ki_complete, saved_ki_complete);
	2512
9517bac6 MF	2513	if (rw_level != -1)
	2514	ocfs2_rw_unlock(inode, rw_level);
	2515
fa5a0eb3	2516	out_mutex:
5955102c	2517	inode_unlock(inode);
ccd979bd	2518
812e7a6a WW	2519	if (written)
812e7a6a WW	2520	ret = written;
812e7a6a	2521	return ret;
ccd979bd MF	2522	}
ccd979bd MF	2523
3cd9ad5a AV	2524	static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
3cd9ad5a AV	2525	struct iov_iter *to)
ccd979bd	2526	{
fa5a0eb3	2527	int ret = 0, rw_level = -1, lock_level = 0;
ccd979bd	2528	struct file *filp = iocb->ki_filp;
496ad9aa	2529	struct inode *inode = file_inode(filp);
c4c2416a GH	2530	int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
c4c2416a GH	2531	int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
ccd979bd	2532
1202d4ba	2533	trace_ocfs2_file_read_iter(inode, filp, filp->f_path.dentry,
468eedde TM	2534	(unsigned long long)OCFS2_I(inode)->ip_blkno,
468eedde TM	2535	filp->f_path.dentry->d_name.len,
3cd9ad5a AV	2536	filp->f_path.dentry->d_name.name,
3cd9ad5a AV	2537	to->nr_segs); /* GRRRRR */
468eedde	2538
ccd979bd MF	2539
	2540	if (!inode) {
	2541	ret = -EINVAL;
	2542	mlog_errno(ret);
	2543	goto bail;
	2544	}
	2545
c4c2416a GH	2546	if (!direct_io && nowait)
	2547	return -EOPNOTSUPP;
	2548
adc77b19 DA	2549	ocfs2_iocb_init_rw_locked(iocb);
adc77b19 DA	2550
2bd63216	2551	/*
bb9263fc	2552	* buffered reads protect themselves in ->read_folio(). O_DIRECT reads
ccd979bd MF	2553	* need locks to protect pending reads from racing with truncate.
ccd979bd MF	2554	*/
c4c2416a GH	2555	if (direct_io) {
	2556	if (nowait)
	2557	ret = ocfs2_try_rw_lock(inode, 0);
	2558	else
	2559	ret = ocfs2_rw_lock(inode, 0);
	2560
ccd979bd	2561	if (ret < 0) {
c4c2416a GH	2562	if (ret != -EAGAIN)
c4c2416a GH	2563	mlog_errno(ret);
ccd979bd MF	2564	goto bail;
	2565	}
	2566	rw_level = 0;
	2567	/* communicate with ocfs2_dio_end_io */
7cdfc3a1	2568	ocfs2_iocb_set_rw_locked(iocb, rw_level);
ccd979bd MF	2569	}
ccd979bd MF	2570
c4374f8a MF	2571	/*
	2572	* We're fine letting folks race truncates and extending
	2573	* writes with read across the cluster, just like they can
	2574	* locally. Hence no rw_lock during read.
2bd63216	2575	*
c4374f8a MF	2576	* Take and drop the meta data lock to update inode fields
c4374f8a MF	2577	* like i_size. This allows the checks down below
94aca682	2578	* copy_splice_read() a chance of actually working.
c4374f8a	2579	*/
c4c2416a GH	2580	ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
c4c2416a GH	2581	!nowait);
c4374f8a	2582	if (ret < 0) {
c4c2416a GH	2583	if (ret != -EAGAIN)
c4c2416a GH	2584	mlog_errno(ret);
c4374f8a MF	2585	goto bail;
c4374f8a MF	2586	}
e63aecb6	2587	ocfs2_inode_unlock(inode, lock_level);
c4374f8a	2588
3cd9ad5a	2589	ret = generic_file_read_iter(iocb, to);
1202d4ba	2590	trace_generic_file_read_iter_ret(ret);
ccd979bd MF	2591
ccd979bd MF	2592	/* buffered aio wouldn't have proper lock coverage today */
9e985787	2593	BUG_ON(ret == -EIOCBQUEUED && !direct_io);
ccd979bd	2594
3ef045c3	2595	/* see ocfs2_file_write_iter */
ccd979bd MF	2596	if (ret == -EIOCBQUEUED \|\| !ocfs2_iocb_is_rw_locked(iocb)) {
ccd979bd MF	2597	rw_level = -1;
ccd979bd MF	2598	}
	2599
	2600	bail:
2bd63216	2601	if (rw_level != -1)
ccd979bd	2602	ocfs2_rw_unlock(inode, rw_level);
ccd979bd MF	2603
	2604	return ret;
	2605	}
	2606
94aca682 DH	2607	static ssize_t ocfs2_file_splice_read(struct file in, loff_t ppos,
	2608	struct pipe_inode_info *pipe,
	2609	size_t len, unsigned int flags)
	2610	{
	2611	struct inode *inode = file_inode(in);
	2612	ssize_t ret = 0;
	2613	int lock_level = 0;
	2614
	2615	trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
	2616	(unsigned long long)OCFS2_I(inode)->ip_blkno,
	2617	in->f_path.dentry->d_name.len,
	2618	in->f_path.dentry->d_name.name,
	2619	flags);
	2620
	2621	/*
	2622	* We're fine letting folks race truncates and extending writes with
	2623	* read across the cluster, just like they can locally. Hence no
	2624	* rw_lock during read.
	2625	*
	2626	* Take and drop the meta data lock to update inode fields like i_size.
	2627	* This allows the checks down below filemap_splice_read() a chance of
	2628	* actually working.
	2629	*/
	2630	ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level, 1);
	2631	if (ret < 0) {
	2632	if (ret != -EAGAIN)
	2633	mlog_errno(ret);
	2634	goto bail;
	2635	}
	2636	ocfs2_inode_unlock(inode, lock_level);
	2637
	2638	ret = filemap_splice_read(in, ppos, pipe, len, flags);
	2639	trace_filemap_splice_read_ret(ret);
	2640	bail:
	2641	return ret;
	2642	}
	2643
93862d5e	2644	/* Refer generic_file_llseek_unlocked() */
965c8e59	2645	static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
93862d5e SM	2646	{
	2647	struct inode *inode = file->f_mapping->host;
	2648	int ret = 0;
	2649
5955102c	2650	inode_lock(inode);
93862d5e	2651
965c8e59	2652	switch (whence) {
93862d5e SM	2653	case SEEK_SET:
	2654	break;
	2655	case SEEK_END:
c8d888d9 J	2656	/* SEEK_END requires the OCFS2 inode lock for the file
	2657	* because it references the file's size.
	2658	*/
	2659	ret = ocfs2_inode_lock(inode, NULL, 0);
	2660	if (ret < 0) {
	2661	mlog_errno(ret);
	2662	goto out;
	2663	}
	2664	offset += i_size_read(inode);
	2665	ocfs2_inode_unlock(inode, 0);
93862d5e SM	2666	break;
	2667	case SEEK_CUR:
	2668	if (offset == 0) {
	2669	offset = file->f_pos;
	2670	goto out;
	2671	}
	2672	offset += file->f_pos;
	2673	break;
	2674	case SEEK_DATA:
	2675	case SEEK_HOLE:
965c8e59	2676	ret = ocfs2_seek_data_hole_offset(file, &offset, whence);
93862d5e SM	2677	if (ret)
	2678	goto out;
	2679	break;
	2680	default:
	2681	ret = -EINVAL;
	2682	goto out;
	2683	}
	2684
46a1c2c7	2685	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
93862d5e SM	2686
93862d5e SM	2687	out:
5955102c	2688	inode_unlock(inode);
93862d5e SM	2689	if (ret)
	2690	return ret;
	2691	return offset;
	2692	}
	2693
42ec3d4c DW	2694	static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
	2695	struct file *file_out, loff_t pos_out,
	2696	loff_t len, unsigned int remap_flags)
29ac8e85	2697	{
65f098e9 DW	2698	struct inode *inode_in = file_inode(file_in);
	2699	struct inode *inode_out = file_inode(file_out);
	2700	struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
	2701	struct buffer_head in_bh = NULL, out_bh = NULL;
	2702	bool same_inode = (inode_in == inode_out);
	2703	loff_t remapped = 0;
	2704	ssize_t ret;
	2705
2e5dfc99 DW	2706	if (remap_flags & ~(REMAP_FILE_DEDUP \| REMAP_FILE_ADVISORY))
2e5dfc99 DW	2707	return -EINVAL;
65f098e9 DW	2708	if (!ocfs2_refcount_tree(osb))
	2709	return -EOPNOTSUPP;
	2710	if (ocfs2_is_hard_readonly(osb) \|\| ocfs2_is_soft_readonly(osb))
	2711	return -EROFS;
29ac8e85	2712
65f098e9 DW	2713	/* Lock both files against IO */
	2714	ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
	2715	if (ret)
	2716	return ret;
	2717
	2718	/* Check file eligibility and prepare for block sharing. */
	2719	ret = -EINVAL;
	2720	if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) \|\|
	2721	(OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
	2722	goto out_unlock;
	2723
	2724	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
	2725	&len, remap_flags);
	2726	if (ret < 0 \|\| len == 0)
	2727	goto out_unlock;
	2728
	2729	/* Lock out changes to the allocation maps and remap. */
	2730	down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
	2731	if (!same_inode)
	2732	down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
	2733	SINGLE_DEPTH_NESTING);
	2734
	2735	/* Zap any page cache for the destination file's range. */
	2736	truncate_inode_pages_range(&inode_out->i_data,
	2737	round_down(pos_out, PAGE_SIZE),
	2738	round_up(pos_out + len, PAGE_SIZE) - 1);
	2739
	2740	remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in,
	2741	inode_out, out_bh, pos_out, len);
	2742	up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
	2743	if (!same_inode)
	2744	up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
	2745	if (remapped < 0) {
	2746	ret = remapped;
	2747	mlog_errno(ret);
	2748	goto out_unlock;
	2749	}
	2750
	2751	/*
	2752	* Empty the extent map so that we may get the right extent
	2753	* record from the disk.
	2754	*/
	2755	ocfs2_extent_map_trunc(inode_in, 0);
	2756	ocfs2_extent_map_trunc(inode_out, 0);
	2757
	2758	ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
	2759	if (ret) {
	2760	mlog_errno(ret);
	2761	goto out_unlock;
	2762	}
	2763
	2764	out_unlock:
	2765	ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
	2766	return remapped > 0 ? remapped : ret;
29ac8e85 DW	2767	}
29ac8e85 DW	2768
ceaa5e80 CB	2769	static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence)
	2770	{
	2771	struct ocfs2_file_private *fp = file->private_data;
	2772
	2773	return generic_llseek_cookie(file, offset, whence, &fp->cookie);
	2774	}
	2775
92e1d5be	2776	const struct inode_operations ocfs2_file_iops = {
ccd979bd MF	2777	.setattr = ocfs2_setattr,
ccd979bd MF	2778	.getattr = ocfs2_getattr,
d38eb8db	2779	.permission = ocfs2_permission,
cf1d6c76	2780	.listxattr = ocfs2_listxattr,
00dc417f	2781	.fiemap = ocfs2_fiemap,
cac2f8b8	2782	.get_inode_acl = ocfs2_iop_get_acl,
702e5bc6	2783	.set_acl = ocfs2_iop_set_acl,
2b5f52c5 MS	2784	.fileattr_get = ocfs2_fileattr_get,
2b5f52c5 MS	2785	.fileattr_set = ocfs2_fileattr_set,
ccd979bd MF	2786	};
ccd979bd MF	2787
92e1d5be	2788	const struct inode_operations ocfs2_special_file_iops = {
ccd979bd MF	2789	.setattr = ocfs2_setattr,
ccd979bd MF	2790	.getattr = ocfs2_getattr,
41e296f6	2791	.listxattr = ocfs2_listxattr,
d38eb8db	2792	.permission = ocfs2_permission,
cac2f8b8	2793	.get_inode_acl = ocfs2_iop_get_acl,
702e5bc6	2794	.set_acl = ocfs2_iop_set_acl,
ccd979bd MF	2795	};
ccd979bd MF	2796
53da4939 MF	2797	/*
	2798	* Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with
	2799	* ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
	2800	*/
4b6f5d20	2801	const struct file_operations ocfs2_fops = {
93862d5e	2802	.llseek = ocfs2_file_llseek,
ccd979bd MF	2803	.mmap = ocfs2_mmap,
	2804	.fsync = ocfs2_sync_file,
	2805	.release = ocfs2_file_release,
	2806	.open = ocfs2_file_open,
3cd9ad5a	2807	.read_iter = ocfs2_file_read_iter,
3ef045c3	2808	.write_iter = ocfs2_file_write_iter,
c9ec1488	2809	.unlocked_ioctl = ocfs2_ioctl,
586d232b MF	2810	#ifdef CONFIG_COMPAT
	2811	.compat_ioctl = ocfs2_compat_ioctl,
	2812	#endif
53da4939	2813	.lock = ocfs2_lock,
53fc622b	2814	.flock = ocfs2_flock,
94aca682	2815	.splice_read = ocfs2_file_splice_read,
6dc8bc0f	2816	.splice_write = iter_file_splice_write,
2fe17c10	2817	.fallocate = ocfs2_fallocate,
2e5dfc99	2818	.remap_file_range = ocfs2_remap_file_range,
2253ab99	2819	.fop_flags = FOP_ASYNC_LOCK,
ccd979bd MF	2820	};
ccd979bd MF	2821
3e327154	2822	WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
4b6f5d20	2823	const struct file_operations ocfs2_dops = {
ceaa5e80	2824	.llseek = ocfs2_dir_llseek,
ccd979bd	2825	.read = generic_read_dir,
3e327154	2826	.iterate_shared = shared_ocfs2_readdir,
ccd979bd	2827	.fsync = ocfs2_sync_file,
53fc622b MF	2828	.release = ocfs2_dir_release,
53fc622b MF	2829	.open = ocfs2_dir_open,
c9ec1488	2830	.unlocked_ioctl = ocfs2_ioctl,
586d232b MF	2831	#ifdef CONFIG_COMPAT
586d232b MF	2832	.compat_ioctl = ocfs2_compat_ioctl,
53da4939 MF	2833	#endif
	2834	.lock = ocfs2_lock,
	2835	.flock = ocfs2_flock,
2253ab99	2836	.fop_flags = FOP_ASYNC_LOCK,
53da4939 MF	2837	};
	2838
	2839	/*
	2840	* POSIX-lockless variants of our file_operations.
	2841	*
	2842	* These will be used if the underlying cluster stack does not support
	2843	* posix file locking, if the user passes the "localflocks" mount
	2844	* option, or if we have a local-only fs.
	2845	*
	2846	* ocfs2_flock is in here because all stacks handle UNIX file locks,
	2847	* so we still want it in the case of no stack support for
	2848	* plocks. Internally, it will do the right thing when asked to ignore
	2849	* the cluster.
	2850	*/
	2851	const struct file_operations ocfs2_fops_no_plocks = {
93862d5e	2852	.llseek = ocfs2_file_llseek,
53da4939 MF	2853	.mmap = ocfs2_mmap,
	2854	.fsync = ocfs2_sync_file,
	2855	.release = ocfs2_file_release,
	2856	.open = ocfs2_file_open,
3cd9ad5a	2857	.read_iter = ocfs2_file_read_iter,
3ef045c3	2858	.write_iter = ocfs2_file_write_iter,
53da4939 MF	2859	.unlocked_ioctl = ocfs2_ioctl,
	2860	#ifdef CONFIG_COMPAT
	2861	.compat_ioctl = ocfs2_compat_ioctl,
	2862	#endif
	2863	.flock = ocfs2_flock,
2cb1e089	2864	.splice_read = filemap_splice_read,
6dc8bc0f	2865	.splice_write = iter_file_splice_write,
3d1c1829	2866	.fallocate = ocfs2_fallocate,
2e5dfc99	2867	.remap_file_range = ocfs2_remap_file_range,
53da4939 MF	2868	};
	2869
	2870	const struct file_operations ocfs2_dops_no_plocks = {
ceaa5e80	2871	.llseek = ocfs2_dir_llseek,
53da4939	2872	.read = generic_read_dir,
3e327154	2873	.iterate_shared = shared_ocfs2_readdir,
53da4939 MF	2874	.fsync = ocfs2_sync_file,
	2875	.release = ocfs2_dir_release,
	2876	.open = ocfs2_dir_open,
	2877	.unlocked_ioctl = ocfs2_ioctl,
	2878	#ifdef CONFIG_COMPAT
	2879	.compat_ioctl = ocfs2_compat_ioctl,
586d232b	2880	#endif
53fc622b	2881	.flock = ocfs2_flock,
ccd979bd	2882	};