Git Repo - linux.git/blame_incremental

... / ...

Commit	Line	Data
	1	// SPDX-License-Identifier: GPL-2.0
	2	/*
	3	* fs/f2fs/file.c
	4	*
	5	* Copyright (c) 2012 Samsung Electronics Co., Ltd.
	6	* http://www.samsung.com/
	7	*/
	8	#include <linux/fs.h>
	9	#include <linux/f2fs_fs.h>
	10	#include <linux/stat.h>
	11	#include <linux/writeback.h>
	12	#include <linux/blkdev.h>
	13	#include <linux/falloc.h>
	14	#include <linux/types.h>
	15	#include <linux/compat.h>
	16	#include <linux/uaccess.h>
	17	#include <linux/mount.h>
	18	#include <linux/pagevec.h>
	19	#include <linux/uio.h>
	20	#include <linux/uuid.h>
	21	#include <linux/file.h>
	22	#include <linux/nls.h>
	23	#include <linux/sched/signal.h>
	24	#include <linux/fileattr.h>
	25	#include <linux/fadvise.h>
	26	#include <linux/iomap.h>
	27
	28	#include "f2fs.h"
	29	#include "node.h"
	30	#include "segment.h"
	31	#include "xattr.h"
	32	#include "acl.h"
	33	#include "gc.h"
	34	#include "iostat.h"
	35	#include <trace/events/f2fs.h>
	36	#include <uapi/linux/f2fs.h>
	37
	38	static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
	39	{
	40	struct inode *inode = file_inode(vmf->vma->vm_file);
	41	vm_flags_t flags = vmf->vma->vm_flags;
	42	vm_fault_t ret;
	43
	44	ret = filemap_fault(vmf);
	45	if (ret & VM_FAULT_LOCKED)
	46	f2fs_update_iostat(F2FS_I_SB(inode), inode,
	47	APP_MAPPED_READ_IO, F2FS_BLKSIZE);
	48
	49	trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret);
	50
	51	return ret;
	52	}
	53
	54	static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
	55	{
	56	struct folio *folio = page_folio(vmf->page);
	57	struct inode *inode = file_inode(vmf->vma->vm_file);
	58	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	59	struct dnode_of_data dn;
	60	bool need_alloc = !f2fs_is_pinned_file(inode);
	61	int err = 0;
	62	vm_fault_t ret;
	63
	64	if (unlikely(IS_IMMUTABLE(inode)))
	65	return VM_FAULT_SIGBUS;
	66
	67	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
	68	err = -EIO;
	69	goto out;
	70	}
	71
	72	if (unlikely(f2fs_cp_error(sbi))) {
	73	err = -EIO;
	74	goto out;
	75	}
	76
	77	if (!f2fs_is_checkpoint_ready(sbi)) {
	78	err = -ENOSPC;
	79	goto out;
	80	}
	81
	82	err = f2fs_convert_inline_inode(inode);
	83	if (err)
	84	goto out;
	85
	86	#ifdef CONFIG_F2FS_FS_COMPRESSION
	87	if (f2fs_compressed_file(inode)) {
	88	int ret = f2fs_is_compressed_cluster(inode, folio->index);
	89
	90	if (ret < 0) {
	91	err = ret;
	92	goto out;
	93	} else if (ret) {
	94	need_alloc = false;
	95	}
	96	}
	97	#endif
	98	/* should do out of any locked page */
	99	if (need_alloc)
	100	f2fs_balance_fs(sbi, true);
	101
	102	sb_start_pagefault(inode->i_sb);
	103
	104	f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
	105
	106	file_update_time(vmf->vma->vm_file);
	107	filemap_invalidate_lock_shared(inode->i_mapping);
	108	folio_lock(folio);
	109	if (unlikely(folio->mapping != inode->i_mapping \|\|
	110	folio_pos(folio) > i_size_read(inode) \|\|
	111	!folio_test_uptodate(folio))) {
	112	folio_unlock(folio);
	113	err = -EFAULT;
	114	goto out_sem;
	115	}
	116
	117	set_new_dnode(&dn, inode, NULL, NULL, 0);
	118	if (need_alloc) {
	119	/* block allocation */
	120	err = f2fs_get_block_locked(&dn, folio->index);
	121	} else {
	122	err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
	123	f2fs_put_dnode(&dn);
	124	if (f2fs_is_pinned_file(inode) &&
	125	!__is_valid_data_blkaddr(dn.data_blkaddr))
	126	err = -EIO;
	127	}
	128
	129	if (err) {
	130	folio_unlock(folio);
	131	goto out_sem;
	132	}
	133
	134	f2fs_wait_on_page_writeback(folio_page(folio, 0), DATA, false, true);
	135
	136	/* wait for GCed page writeback via META_MAPPING */
	137	f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
	138
	139	/*
	140	* check to see if the page is mapped already (no holes)
	141	*/
	142	if (folio_test_mappedtodisk(folio))
	143	goto out_sem;
	144
	145	/* page is wholly or partially inside EOF */
	146	if (((loff_t)(folio->index + 1) << PAGE_SHIFT) >
	147	i_size_read(inode)) {
	148	loff_t offset;
	149
	150	offset = i_size_read(inode) & ~PAGE_MASK;
	151	folio_zero_segment(folio, offset, folio_size(folio));
	152	}
	153	folio_mark_dirty(folio);
	154
	155	f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
	156	f2fs_update_time(sbi, REQ_TIME);
	157
	158	out_sem:
	159	filemap_invalidate_unlock_shared(inode->i_mapping);
	160
	161	sb_end_pagefault(inode->i_sb);
	162	out:
	163	ret = vmf_fs_error(err);
	164
	165	trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret);
	166	return ret;
	167	}
	168
	169	static const struct vm_operations_struct f2fs_file_vm_ops = {
	170	.fault = f2fs_filemap_fault,
	171	.map_pages = filemap_map_pages,
	172	.page_mkwrite = f2fs_vm_page_mkwrite,
	173	};
	174
	175	static int get_parent_ino(struct inode inode, nid_t pino)
	176	{
	177	struct dentry *dentry;
	178
	179	/*
	180	* Make sure to get the non-deleted alias. The alias associated with
	181	* the open file descriptor being fsync()'ed may be deleted already.
	182	*/
	183	dentry = d_find_alias(inode);
	184	if (!dentry)
	185	return 0;
	186
	187	*pino = d_parent_ino(dentry);
	188	dput(dentry);
	189	return 1;
	190	}
	191
	192	static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
	193	{
	194	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	195	enum cp_reason_type cp_reason = CP_NO_NEEDED;
	196
	197	if (!S_ISREG(inode->i_mode))
	198	cp_reason = CP_NON_REGULAR;
	199	else if (f2fs_compressed_file(inode))
	200	cp_reason = CP_COMPRESSED;
	201	else if (inode->i_nlink != 1)
	202	cp_reason = CP_HARDLINK;
	203	else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
	204	cp_reason = CP_SB_NEED_CP;
	205	else if (file_wrong_pino(inode))
	206	cp_reason = CP_WRONG_PINO;
	207	else if (!f2fs_space_for_roll_forward(sbi))
	208	cp_reason = CP_NO_SPC_ROLL;
	209	else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
	210	cp_reason = CP_NODE_NEED_CP;
	211	else if (test_opt(sbi, FASTBOOT))
	212	cp_reason = CP_FASTBOOT_MODE;
	213	else if (F2FS_OPTION(sbi).active_logs == 2)
	214	cp_reason = CP_SPEC_LOG_NUM;
	215	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
	216	f2fs_need_dentry_mark(sbi, inode->i_ino) &&
	217	f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
	218	TRANS_DIR_INO))
	219	cp_reason = CP_RECOVER_DIR;
	220	else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
	221	XATTR_DIR_INO))
	222	cp_reason = CP_XATTR_DIR;
	223
	224	return cp_reason;
	225	}
	226
	227	static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
	228	{
	229	struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
	230	bool ret = false;
	231	/* But we need to avoid that there are some inode updates */
	232	if ((i && PageDirty(i)) \|\| f2fs_need_inode_block_update(sbi, ino))
	233	ret = true;
	234	f2fs_put_page(i, 0);
	235	return ret;
	236	}
	237
	238	static void try_to_fix_pino(struct inode *inode)
	239	{
	240	struct f2fs_inode_info *fi = F2FS_I(inode);
	241	nid_t pino;
	242
	243	f2fs_down_write(&fi->i_sem);
	244	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
	245	get_parent_ino(inode, &pino)) {
	246	f2fs_i_pino_write(inode, pino);
	247	file_got_pino(inode);
	248	}
	249	f2fs_up_write(&fi->i_sem);
	250	}
	251
	252	static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
	253	int datasync, bool atomic)
	254	{
	255	struct inode *inode = file->f_mapping->host;
	256	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	257	nid_t ino = inode->i_ino;
	258	int ret = 0;
	259	enum cp_reason_type cp_reason = 0;
	260	struct writeback_control wbc = {
	261	.sync_mode = WB_SYNC_ALL,
	262	.nr_to_write = LONG_MAX,
	263	.for_reclaim = 0,
	264	};
	265	unsigned int seq_id = 0;
	266
	267	if (unlikely(f2fs_readonly(inode->i_sb)))
	268	return 0;
	269
	270	trace_f2fs_sync_file_enter(inode);
	271
	272	if (S_ISDIR(inode->i_mode))
	273	goto go_write;
	274
	275	/* if fdatasync is triggered, let's do in-place-update */
	276	if (datasync \|\| get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
	277	set_inode_flag(inode, FI_NEED_IPU);
	278	ret = file_write_and_wait_range(file, start, end);
	279	clear_inode_flag(inode, FI_NEED_IPU);
	280
	281	if (ret \|\| is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
	282	trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
	283	return ret;
	284	}
	285
	286	/* if the inode is dirty, let's recover all the time */
	287	if (!f2fs_skip_inode_update(inode, datasync)) {
	288	f2fs_write_inode(inode, NULL);
	289	goto go_write;
	290	}
	291
	292	/*
	293	* if there is no written data, don't waste time to write recovery info.
	294	*/
	295	if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
	296	!f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
	297
	298	/* it may call write_inode just prior to fsync */
	299	if (need_inode_page_update(sbi, ino))
	300	goto go_write;
	301
	302	if (is_inode_flag_set(inode, FI_UPDATE_WRITE) \|\|
	303	f2fs_exist_written_data(sbi, ino, UPDATE_INO))
	304	goto flush_out;
	305	goto out;
	306	} else {
	307	/*
	308	* for OPU case, during fsync(), node can be persisted before
	309	* data when lower device doesn't support write barrier, result
	310	* in data corruption after SPO.
	311	* So for strict fsync mode, force to use atomic write semantics
	312	* to keep write order in between data/node and last node to
	313	* avoid potential data corruption.
	314	*/
	315	if (F2FS_OPTION(sbi).fsync_mode ==
	316	FSYNC_MODE_STRICT && !atomic)
	317	atomic = true;
	318	}
	319	go_write:
	320	/*
	321	* Both of fdatasync() and fsync() are able to be recovered from
	322	* sudden-power-off.
	323	*/
	324	f2fs_down_read(&F2FS_I(inode)->i_sem);
	325	cp_reason = need_do_checkpoint(inode);
	326	f2fs_up_read(&F2FS_I(inode)->i_sem);
	327
	328	if (cp_reason) {
	329	/* all the dirty node pages should be flushed for POR */
	330	ret = f2fs_sync_fs(inode->i_sb, 1);
	331
	332	/*
	333	* We've secured consistency through sync_fs. Following pino
	334	* will be used only for fsynced inodes after checkpoint.
	335	*/
	336	try_to_fix_pino(inode);
	337	clear_inode_flag(inode, FI_APPEND_WRITE);
	338	clear_inode_flag(inode, FI_UPDATE_WRITE);
	339	goto out;
	340	}
	341	sync_nodes:
	342	atomic_inc(&sbi->wb_sync_req[NODE]);
	343	ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
	344	atomic_dec(&sbi->wb_sync_req[NODE]);
	345	if (ret)
	346	goto out;
	347
	348	/* if cp_error was enabled, we should avoid infinite loop */
	349	if (unlikely(f2fs_cp_error(sbi))) {
	350	ret = -EIO;
	351	goto out;
	352	}
	353
	354	if (f2fs_need_inode_block_update(sbi, ino)) {
	355	f2fs_mark_inode_dirty_sync(inode, true);
	356	f2fs_write_inode(inode, NULL);
	357	goto sync_nodes;
	358	}
	359
	360	/*
	361	* If it's atomic_write, it's just fine to keep write ordering. So
	362	* here we don't need to wait for node write completion, since we use
	363	* node chain which serializes node blocks. If one of node writes are
	364	* reordered, we can see simply broken chain, resulting in stopping
	365	* roll-forward recovery. It means we'll recover all or none node blocks
	366	* given fsync mark.
	367	*/
	368	if (!atomic) {
	369	ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
	370	if (ret)
	371	goto out;
	372	}
	373
	374	/* once recovery info is written, don't need to tack this */
	375	f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
	376	clear_inode_flag(inode, FI_APPEND_WRITE);
	377	flush_out:
	378	if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
	379	ret = f2fs_issue_flush(sbi, inode->i_ino);
	380	if (!ret) {
	381	f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
	382	clear_inode_flag(inode, FI_UPDATE_WRITE);
	383	f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
	384	}
	385	f2fs_update_time(sbi, REQ_TIME);
	386	out:
	387	trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
	388	return ret;
	389	}
	390
	391	int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
	392	{
	393	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
	394	return -EIO;
	395	return f2fs_do_sync_file(file, start, end, datasync, false);
	396	}
	397
	398	static bool __found_offset(struct address_space *mapping,
	399	struct dnode_of_data *dn, pgoff_t index, int whence)
	400	{
	401	block_t blkaddr = f2fs_data_blkaddr(dn);
	402	struct inode *inode = mapping->host;
	403	bool compressed_cluster = false;
	404
	405	if (f2fs_compressed_file(inode)) {
	406	block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
	407	ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size));
	408
	409	compressed_cluster = first_blkaddr == COMPRESS_ADDR;
	410	}
	411
	412	switch (whence) {
	413	case SEEK_DATA:
	414	if (__is_valid_data_blkaddr(blkaddr))
	415	return true;
	416	if (blkaddr == NEW_ADDR &&
	417	xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
	418	return true;
	419	if (compressed_cluster)
	420	return true;
	421	break;
	422	case SEEK_HOLE:
	423	if (compressed_cluster)
	424	return false;
	425	if (blkaddr == NULL_ADDR)
	426	return true;
	427	break;
	428	}
	429	return false;
	430	}
	431
	432	static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
	433	{
	434	struct inode *inode = file->f_mapping->host;
	435	loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
	436	struct dnode_of_data dn;
	437	pgoff_t pgofs, end_offset;
	438	loff_t data_ofs = offset;
	439	loff_t isize;
	440	int err = 0;
	441
	442	inode_lock_shared(inode);
	443
	444	isize = i_size_read(inode);
	445	if (offset >= isize)
	446	goto fail;
	447
	448	/* handle inline data case */
	449	if (f2fs_has_inline_data(inode)) {
	450	if (whence == SEEK_HOLE) {
	451	data_ofs = isize;
	452	goto found;
	453	} else if (whence == SEEK_DATA) {
	454	data_ofs = offset;
	455	goto found;
	456	}
	457	}
	458
	459	pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
	460
	461	for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
	462	set_new_dnode(&dn, inode, NULL, NULL, 0);
	463	err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
	464	if (err && err != -ENOENT) {
	465	goto fail;
	466	} else if (err == -ENOENT) {
	467	/* direct node does not exists */
	468	if (whence == SEEK_DATA) {
	469	pgofs = f2fs_get_next_page_offset(&dn, pgofs);
	470	continue;
	471	} else {
	472	goto found;
	473	}
	474	}
	475
	476	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
	477
	478	/* find data/hole in dnode block */
	479	for (; dn.ofs_in_node < end_offset;
	480	dn.ofs_in_node++, pgofs++,
	481	data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
	482	block_t blkaddr;
	483
	484	blkaddr = f2fs_data_blkaddr(&dn);
	485
	486	if (__is_valid_data_blkaddr(blkaddr) &&
	487	!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
	488	blkaddr, DATA_GENERIC_ENHANCE)) {
	489	f2fs_put_dnode(&dn);
	490	goto fail;
	491	}
	492
	493	if (__found_offset(file->f_mapping, &dn,
	494	pgofs, whence)) {
	495	f2fs_put_dnode(&dn);
	496	goto found;
	497	}
	498	}
	499	f2fs_put_dnode(&dn);
	500	}
	501
	502	if (whence == SEEK_DATA)
	503	goto fail;
	504	found:
	505	if (whence == SEEK_HOLE && data_ofs > isize)
	506	data_ofs = isize;
	507	inode_unlock_shared(inode);
	508	return vfs_setpos(file, data_ofs, maxbytes);
	509	fail:
	510	inode_unlock_shared(inode);
	511	return -ENXIO;
	512	}
	513
	514	static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
	515	{
	516	struct inode *inode = file->f_mapping->host;
	517	loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
	518
	519	switch (whence) {
	520	case SEEK_SET:
	521	case SEEK_CUR:
	522	case SEEK_END:
	523	return generic_file_llseek_size(file, offset, whence,
	524	maxbytes, i_size_read(inode));
	525	case SEEK_DATA:
	526	case SEEK_HOLE:
	527	if (offset < 0)
	528	return -ENXIO;
	529	return f2fs_seek_block(file, offset, whence);
	530	}
	531
	532	return -EINVAL;
	533	}
	534
	535	static int f2fs_file_mmap(struct file file, struct vm_area_struct vma)
	536	{
	537	struct inode *inode = file_inode(file);
	538
	539	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
	540	return -EIO;
	541
	542	if (!f2fs_is_compress_backend_ready(inode))
	543	return -EOPNOTSUPP;
	544
	545	file_accessed(file);
	546	vma->vm_ops = &f2fs_file_vm_ops;
	547
	548	f2fs_down_read(&F2FS_I(inode)->i_sem);
	549	set_inode_flag(inode, FI_MMAP_FILE);
	550	f2fs_up_read(&F2FS_I(inode)->i_sem);
	551
	552	return 0;
	553	}
	554
	555	static int finish_preallocate_blocks(struct inode *inode)
	556	{
	557	int ret;
	558
	559	inode_lock(inode);
	560	if (is_inode_flag_set(inode, FI_OPENED_FILE)) {
	561	inode_unlock(inode);
	562	return 0;
	563	}
	564
	565	if (!file_should_truncate(inode)) {
	566	set_inode_flag(inode, FI_OPENED_FILE);
	567	inode_unlock(inode);
	568	return 0;
	569	}
	570
	571	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	572	filemap_invalidate_lock(inode->i_mapping);
	573
	574	truncate_setsize(inode, i_size_read(inode));
	575	ret = f2fs_truncate(inode);
	576
	577	filemap_invalidate_unlock(inode->i_mapping);
	578	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	579
	580	if (!ret)
	581	set_inode_flag(inode, FI_OPENED_FILE);
	582
	583	inode_unlock(inode);
	584	if (ret)
	585	return ret;
	586
	587	file_dont_truncate(inode);
	588	return 0;
	589	}
	590
	591	static int f2fs_file_open(struct inode inode, struct file filp)
	592	{
	593	int err = fscrypt_file_open(inode, filp);
	594
	595	if (err)
	596	return err;
	597
	598	if (!f2fs_is_compress_backend_ready(inode))
	599	return -EOPNOTSUPP;
	600
	601	err = fsverity_file_open(inode, filp);
	602	if (err)
	603	return err;
	604
	605	filp->f_mode \|= FMODE_NOWAIT;
	606	filp->f_mode \|= FMODE_CAN_ODIRECT;
	607
	608	err = dquot_file_open(inode, filp);
	609	if (err)
	610	return err;
	611
	612	return finish_preallocate_blocks(inode);
	613	}
	614
	615	void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
	616	{
	617	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
	618	int nr_free = 0, ofs = dn->ofs_in_node, len = count;
	619	__le32 *addr;
	620	bool compressed_cluster = false;
	621	int cluster_index = 0, valid_blocks = 0;
	622	int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
	623	bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
	624	block_t blkstart;
	625	int blklen = 0;
	626
	627	addr = get_dnode_addr(dn->inode, dn->node_page) + ofs;
	628	blkstart = le32_to_cpu(*addr);
	629
	630	/* Assumption: truncation starts with cluster */
	631	for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) {
	632	block_t blkaddr = le32_to_cpu(*addr);
	633
	634	if (f2fs_compressed_file(dn->inode) &&
	635	!(cluster_index & (cluster_size - 1))) {
	636	if (compressed_cluster)
	637	f2fs_i_compr_blocks_update(dn->inode,
	638	valid_blocks, false);
	639	compressed_cluster = (blkaddr == COMPRESS_ADDR);
	640	valid_blocks = 0;
	641	}
	642
	643	if (blkaddr == NULL_ADDR)
	644	goto next;
	645
	646	f2fs_set_data_blkaddr(dn, NULL_ADDR);
	647
	648	if (__is_valid_data_blkaddr(blkaddr)) {
	649	if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
	650	goto next;
	651	if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
	652	DATA_GENERIC_ENHANCE))
	653	goto next;
	654	if (compressed_cluster)
	655	valid_blocks++;
	656	}
	657
	658	if (blkstart + blklen == blkaddr) {
	659	blklen++;
	660	} else {
	661	f2fs_invalidate_blocks(sbi, blkstart, blklen);
	662	blkstart = blkaddr;
	663	blklen = 1;
	664	}
	665
	666	if (!released \|\| blkaddr != COMPRESS_ADDR)
	667	nr_free++;
	668
	669	continue;
	670
	671	next:
	672	if (blklen)
	673	f2fs_invalidate_blocks(sbi, blkstart, blklen);
	674
	675	blkstart = le32_to_cpu(*(addr + 1));
	676	blklen = 0;
	677	}
	678
	679	if (blklen)
	680	f2fs_invalidate_blocks(sbi, blkstart, blklen);
	681
	682	if (compressed_cluster)
	683	f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false);
	684
	685	if (nr_free) {
	686	pgoff_t fofs;
	687	/*
	688	* once we invalidate valid blkaddr in range [ofs, ofs + count],
	689	* we will invalidate all blkaddr in the whole range.
	690	*/
	691	fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
	692	dn->inode) + ofs;
	693	f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
	694	f2fs_update_age_extent_cache_range(dn, fofs, len);
	695	dec_valid_block_count(sbi, dn->inode, nr_free);
	696	}
	697	dn->ofs_in_node = ofs;
	698
	699	f2fs_update_time(sbi, REQ_TIME);
	700	trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
	701	dn->ofs_in_node, nr_free);
	702	}
	703
	704	static int truncate_partial_data_page(struct inode *inode, u64 from,
	705	bool cache_only)
	706	{
	707	loff_t offset = from & (PAGE_SIZE - 1);
	708	pgoff_t index = from >> PAGE_SHIFT;
	709	struct address_space *mapping = inode->i_mapping;
	710	struct page *page;
	711
	712	if (!offset && !cache_only)
	713	return 0;
	714
	715	if (cache_only) {
	716	page = find_lock_page(mapping, index);
	717	if (page && PageUptodate(page))
	718	goto truncate_out;
	719	f2fs_put_page(page, 1);
	720	return 0;
	721	}
	722
	723	page = f2fs_get_lock_data_page(inode, index, true);
	724	if (IS_ERR(page))
	725	return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
	726	truncate_out:
	727	f2fs_wait_on_page_writeback(page, DATA, true, true);
	728	zero_user(page, offset, PAGE_SIZE - offset);
	729
	730	/* An encrypted inode should have a key and truncate the last page. */
	731	f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode));
	732	if (!cache_only)
	733	set_page_dirty(page);
	734	f2fs_put_page(page, 1);
	735	return 0;
	736	}
	737
	738	int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
	739	{
	740	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	741	struct dnode_of_data dn;
	742	pgoff_t free_from;
	743	int count = 0, err = 0;
	744	struct page *ipage;
	745	bool truncate_page = false;
	746
	747	trace_f2fs_truncate_blocks_enter(inode, from);
	748
	749	if (IS_DEVICE_ALIASING(inode) && from) {
	750	err = -EINVAL;
	751	goto out_err;
	752	}
	753
	754	free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
	755
	756	if (free_from >= max_file_blocks(inode))
	757	goto free_partial;
	758
	759	if (lock)
	760	f2fs_lock_op(sbi);
	761
	762	ipage = f2fs_get_node_page(sbi, inode->i_ino);
	763	if (IS_ERR(ipage)) {
	764	err = PTR_ERR(ipage);
	765	goto out;
	766	}
	767
	768	if (IS_DEVICE_ALIASING(inode)) {
	769	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
	770	struct extent_info ei = et->largest;
	771
	772	f2fs_invalidate_blocks(sbi, ei.blk, ei.len);
	773
	774	dec_valid_block_count(sbi, inode, ei.len);
	775	f2fs_update_time(sbi, REQ_TIME);
	776
	777	f2fs_put_page(ipage, 1);
	778	goto out;
	779	}
	780
	781	if (f2fs_has_inline_data(inode)) {
	782	f2fs_truncate_inline_inode(inode, ipage, from);
	783	f2fs_put_page(ipage, 1);
	784	truncate_page = true;
	785	goto out;
	786	}
	787
	788	set_new_dnode(&dn, inode, ipage, NULL, 0);
	789	err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
	790	if (err) {
	791	if (err == -ENOENT)
	792	goto free_next;
	793	goto out;
	794	}
	795
	796	count = ADDRS_PER_PAGE(dn.node_page, inode);
	797
	798	count -= dn.ofs_in_node;
	799	f2fs_bug_on(sbi, count < 0);
	800
	801	if (dn.ofs_in_node \|\| IS_INODE(dn.node_page)) {
	802	f2fs_truncate_data_blocks_range(&dn, count);
	803	free_from += count;
	804	}
	805
	806	f2fs_put_dnode(&dn);
	807	free_next:
	808	err = f2fs_truncate_inode_blocks(inode, free_from);
	809	out:
	810	if (lock)
	811	f2fs_unlock_op(sbi);
	812	free_partial:
	813	/* lastly zero out the first data page */
	814	if (!err)
	815	err = truncate_partial_data_page(inode, from, truncate_page);
	816	out_err:
	817	trace_f2fs_truncate_blocks_exit(inode, err);
	818	return err;
	819	}
	820
	821	int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
	822	{
	823	u64 free_from = from;
	824	int err;
	825
	826	#ifdef CONFIG_F2FS_FS_COMPRESSION
	827	/*
	828	* for compressed file, only support cluster size
	829	* aligned truncation.
	830	*/
	831	if (f2fs_compressed_file(inode))
	832	free_from = round_up(from,
	833	F2FS_I(inode)->i_cluster_size << PAGE_SHIFT);
	834	#endif
	835
	836	err = f2fs_do_truncate_blocks(inode, free_from, lock);
	837	if (err)
	838	return err;
	839
	840	#ifdef CONFIG_F2FS_FS_COMPRESSION
	841	/*
	842	* For compressed file, after release compress blocks, don't allow write
	843	* direct, but we should allow write direct after truncate to zero.
	844	*/
	845	if (f2fs_compressed_file(inode) && !free_from
	846	&& is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
	847	clear_inode_flag(inode, FI_COMPRESS_RELEASED);
	848
	849	if (from != free_from) {
	850	err = f2fs_truncate_partial_cluster(inode, from, lock);
	851	if (err)
	852	return err;
	853	}
	854	#endif
	855
	856	return 0;
	857	}
	858
	859	int f2fs_truncate(struct inode *inode)
	860	{
	861	int err;
	862
	863	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
	864	return -EIO;
	865
	866	if (!(S_ISREG(inode->i_mode) \|\| S_ISDIR(inode->i_mode) \|\|
	867	S_ISLNK(inode->i_mode)))
	868	return 0;
	869
	870	trace_f2fs_truncate(inode);
	871
	872	if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE))
	873	return -EIO;
	874
	875	err = f2fs_dquot_initialize(inode);
	876	if (err)
	877	return err;
	878
	879	/* we should check inline_data size */
	880	if (!f2fs_may_inline_data(inode)) {
	881	err = f2fs_convert_inline_inode(inode);
	882	if (err)
	883	return err;
	884	}
	885
	886	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
	887	if (err)
	888	return err;
	889
	890	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
	891	f2fs_mark_inode_dirty_sync(inode, false);
	892	return 0;
	893	}
	894
	895	static bool f2fs_force_buffered_io(struct inode *inode, int rw)
	896	{
	897	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	898
	899	if (!fscrypt_dio_supported(inode))
	900	return true;
	901	if (fsverity_active(inode))
	902	return true;
	903	if (f2fs_compressed_file(inode))
	904	return true;
	905	/*
	906	* only force direct read to use buffered IO, for direct write,
	907	* it expects inline data conversion before committing IO.
	908	*/
	909	if (f2fs_has_inline_data(inode) && rw == READ)
	910	return true;
	911
	912	/* disallow direct IO if any of devices has unaligned blksize */
	913	if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
	914	return true;
	915	/*
	916	* for blkzoned device, fallback direct IO to buffered IO, so
	917	* all IOs can be serialized by log-structured write.
	918	*/
	919	if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) &&
	920	!f2fs_is_pinned_file(inode))
	921	return true;
	922	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
	923	return true;
	924
	925	return false;
	926	}
	927
	928	int f2fs_getattr(struct mnt_idmap idmap, const struct path path,
	929	struct kstat *stat, u32 request_mask, unsigned int query_flags)
	930	{
	931	struct inode *inode = d_inode(path->dentry);
	932	struct f2fs_inode_info *fi = F2FS_I(inode);
	933	struct f2fs_inode *ri = NULL;
	934	unsigned int flags;
	935
	936	if (f2fs_has_extra_attr(inode) &&
	937	f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
	938	F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
	939	stat->result_mask \|= STATX_BTIME;
	940	stat->btime.tv_sec = fi->i_crtime.tv_sec;
	941	stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
	942	}
	943
	944	/*
	945	* Return the DIO alignment restrictions if requested. We only return
	946	* this information when requested, since on encrypted files it might
	947	* take a fair bit of work to get if the file wasn't opened recently.
	948	*
	949	* f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN
	950	* cannot represent that, so in that case we report no DIO support.
	951	*/
	952	if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
	953	unsigned int bsize = i_blocksize(inode);
	954
	955	stat->result_mask \|= STATX_DIOALIGN;
	956	if (!f2fs_force_buffered_io(inode, WRITE)) {
	957	stat->dio_mem_align = bsize;
	958	stat->dio_offset_align = bsize;
	959	}
	960	}
	961
	962	flags = fi->i_flags;
	963	if (flags & F2FS_COMPR_FL)
	964	stat->attributes \|= STATX_ATTR_COMPRESSED;
	965	if (flags & F2FS_APPEND_FL)
	966	stat->attributes \|= STATX_ATTR_APPEND;
	967	if (IS_ENCRYPTED(inode))
	968	stat->attributes \|= STATX_ATTR_ENCRYPTED;
	969	if (flags & F2FS_IMMUTABLE_FL)
	970	stat->attributes \|= STATX_ATTR_IMMUTABLE;
	971	if (flags & F2FS_NODUMP_FL)
	972	stat->attributes \|= STATX_ATTR_NODUMP;
	973	if (IS_VERITY(inode))
	974	stat->attributes \|= STATX_ATTR_VERITY;
	975
	976	stat->attributes_mask \|= (STATX_ATTR_COMPRESSED \|
	977	STATX_ATTR_APPEND \|
	978	STATX_ATTR_ENCRYPTED \|
	979	STATX_ATTR_IMMUTABLE \|
	980	STATX_ATTR_NODUMP \|
	981	STATX_ATTR_VERITY);
	982
	983	generic_fillattr(idmap, request_mask, inode, stat);
	984
	985	/* we need to show initial sectors used for inline_data/dentries */
	986	if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) \|\|
	987	f2fs_has_inline_dentry(inode))
	988	stat->blocks += (stat->size + 511) >> 9;
	989
	990	return 0;
	991	}
	992
	993	#ifdef CONFIG_F2FS_FS_POSIX_ACL
	994	static void __setattr_copy(struct mnt_idmap *idmap,
	995	struct inode inode, const struct iattr attr)
	996	{
	997	unsigned int ia_valid = attr->ia_valid;
	998
	999	i_uid_update(idmap, attr, inode);
	1000	i_gid_update(idmap, attr, inode);
	1001	if (ia_valid & ATTR_ATIME)
	1002	inode_set_atime_to_ts(inode, attr->ia_atime);
	1003	if (ia_valid & ATTR_MTIME)
	1004	inode_set_mtime_to_ts(inode, attr->ia_mtime);
	1005	if (ia_valid & ATTR_CTIME)
	1006	inode_set_ctime_to_ts(inode, attr->ia_ctime);
	1007	if (ia_valid & ATTR_MODE) {
	1008	umode_t mode = attr->ia_mode;
	1009
	1010	if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
	1011	mode &= ~S_ISGID;
	1012	set_acl_inode(inode, mode);
	1013	}
	1014	}
	1015	#else
	1016	#define __setattr_copy setattr_copy
	1017	#endif
	1018
	1019	int f2fs_setattr(struct mnt_idmap idmap, struct dentry dentry,
	1020	struct iattr *attr)
	1021	{
	1022	struct inode *inode = d_inode(dentry);
	1023	struct f2fs_inode_info *fi = F2FS_I(inode);
	1024	int err;
	1025
	1026	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
	1027	return -EIO;
	1028
	1029	if (unlikely(IS_IMMUTABLE(inode)))
	1030	return -EPERM;
	1031
	1032	if (unlikely(IS_APPEND(inode) &&
	1033	(attr->ia_valid & (ATTR_MODE \| ATTR_UID \|
	1034	ATTR_GID \| ATTR_TIMES_SET))))
	1035	return -EPERM;
	1036
	1037	if ((attr->ia_valid & ATTR_SIZE)) {
	1038	if (!f2fs_is_compress_backend_ready(inode) \|\|
	1039	IS_DEVICE_ALIASING(inode))
	1040	return -EOPNOTSUPP;
	1041	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) &&
	1042	!IS_ALIGNED(attr->ia_size,
	1043	F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
	1044	return -EINVAL;
	1045	}
	1046
	1047	err = setattr_prepare(idmap, dentry, attr);
	1048	if (err)
	1049	return err;
	1050
	1051	err = fscrypt_prepare_setattr(dentry, attr);
	1052	if (err)
	1053	return err;
	1054
	1055	err = fsverity_prepare_setattr(dentry, attr);
	1056	if (err)
	1057	return err;
	1058
	1059	if (is_quota_modification(idmap, inode, attr)) {
	1060	err = f2fs_dquot_initialize(inode);
	1061	if (err)
	1062	return err;
	1063	}
	1064	if (i_uid_needs_update(idmap, attr, inode) \|\|
	1065	i_gid_needs_update(idmap, attr, inode)) {
	1066	f2fs_lock_op(F2FS_I_SB(inode));
	1067	err = dquot_transfer(idmap, inode, attr);
	1068	if (err) {
	1069	set_sbi_flag(F2FS_I_SB(inode),
	1070	SBI_QUOTA_NEED_REPAIR);
	1071	f2fs_unlock_op(F2FS_I_SB(inode));
	1072	return err;
	1073	}
	1074	/*
	1075	* update uid/gid under lock_op(), so that dquot and inode can
	1076	* be updated atomically.
	1077	*/
	1078	i_uid_update(idmap, attr, inode);
	1079	i_gid_update(idmap, attr, inode);
	1080	f2fs_mark_inode_dirty_sync(inode, true);
	1081	f2fs_unlock_op(F2FS_I_SB(inode));
	1082	}
	1083
	1084	if (attr->ia_valid & ATTR_SIZE) {
	1085	loff_t old_size = i_size_read(inode);
	1086
	1087	if (attr->ia_size > MAX_INLINE_DATA(inode)) {
	1088	/*
	1089	* should convert inline inode before i_size_write to
	1090	* keep smaller than inline_data size with inline flag.
	1091	*/
	1092	err = f2fs_convert_inline_inode(inode);
	1093	if (err)
	1094	return err;
	1095	}
	1096
	1097	/*
	1098	* wait for inflight dio, blocks should be removed after
	1099	* IO completion.
	1100	*/
	1101	if (attr->ia_size < old_size)
	1102	inode_dio_wait(inode);
	1103
	1104	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
	1105	filemap_invalidate_lock(inode->i_mapping);
	1106
	1107	truncate_setsize(inode, attr->ia_size);
	1108
	1109	if (attr->ia_size <= old_size)
	1110	err = f2fs_truncate(inode);
	1111	/*
	1112	* do not trim all blocks after i_size if target size is
	1113	* larger than i_size.
	1114	*/
	1115	filemap_invalidate_unlock(inode->i_mapping);
	1116	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
	1117	if (err)
	1118	return err;
	1119
	1120	spin_lock(&fi->i_size_lock);
	1121	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
	1122	fi->last_disk_size = i_size_read(inode);
	1123	spin_unlock(&fi->i_size_lock);
	1124	}
	1125
	1126	__setattr_copy(idmap, inode, attr);
	1127
	1128	if (attr->ia_valid & ATTR_MODE) {
	1129	err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode));
	1130
	1131	if (is_inode_flag_set(inode, FI_ACL_MODE)) {
	1132	if (!err)
	1133	inode->i_mode = fi->i_acl_mode;
	1134	clear_inode_flag(inode, FI_ACL_MODE);
	1135	}
	1136	}
	1137
	1138	/* file size may changed here */
	1139	f2fs_mark_inode_dirty_sync(inode, true);
	1140
	1141	/* inode change will produce dirty node pages flushed by checkpoint */
	1142	f2fs_balance_fs(F2FS_I_SB(inode), true);
	1143
	1144	return err;
	1145	}
	1146
	1147	const struct inode_operations f2fs_file_inode_operations = {
	1148	.getattr = f2fs_getattr,
	1149	.setattr = f2fs_setattr,
	1150	.get_inode_acl = f2fs_get_acl,
	1151	.set_acl = f2fs_set_acl,
	1152	.listxattr = f2fs_listxattr,
	1153	.fiemap = f2fs_fiemap,
	1154	.fileattr_get = f2fs_fileattr_get,
	1155	.fileattr_set = f2fs_fileattr_set,
	1156	};
	1157
	1158	static int fill_zero(struct inode *inode, pgoff_t index,
	1159	loff_t start, loff_t len)
	1160	{
	1161	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	1162	struct page *page;
	1163
	1164	if (!len)
	1165	return 0;
	1166
	1167	f2fs_balance_fs(sbi, true);
	1168
	1169	f2fs_lock_op(sbi);
	1170	page = f2fs_get_new_data_page(inode, NULL, index, false);
	1171	f2fs_unlock_op(sbi);
	1172
	1173	if (IS_ERR(page))
	1174	return PTR_ERR(page);
	1175
	1176	f2fs_wait_on_page_writeback(page, DATA, true, true);
	1177	zero_user(page, start, len);
	1178	set_page_dirty(page);
	1179	f2fs_put_page(page, 1);
	1180	return 0;
	1181	}
	1182
	1183	int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
	1184	{
	1185	int err;
	1186
	1187	while (pg_start < pg_end) {
	1188	struct dnode_of_data dn;
	1189	pgoff_t end_offset, count;
	1190
	1191	set_new_dnode(&dn, inode, NULL, NULL, 0);
	1192	err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
	1193	if (err) {
	1194	if (err == -ENOENT) {
	1195	pg_start = f2fs_get_next_page_offset(&dn,
	1196	pg_start);
	1197	continue;
	1198	}
	1199	return err;
	1200	}
	1201
	1202	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
	1203	count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
	1204
	1205	f2fs_bug_on(F2FS_I_SB(inode), count == 0 \|\| count > end_offset);
	1206
	1207	f2fs_truncate_data_blocks_range(&dn, count);
	1208	f2fs_put_dnode(&dn);
	1209
	1210	pg_start += count;
	1211	}
	1212	return 0;
	1213	}
	1214
	1215	static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
	1216	{
	1217	pgoff_t pg_start, pg_end;
	1218	loff_t off_start, off_end;
	1219	int ret;
	1220
	1221	ret = f2fs_convert_inline_inode(inode);
	1222	if (ret)
	1223	return ret;
	1224
	1225	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
	1226	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
	1227
	1228	off_start = offset & (PAGE_SIZE - 1);
	1229	off_end = (offset + len) & (PAGE_SIZE - 1);
	1230
	1231	if (pg_start == pg_end) {
	1232	ret = fill_zero(inode, pg_start, off_start,
	1233	off_end - off_start);
	1234	if (ret)
	1235	return ret;
	1236	} else {
	1237	if (off_start) {
	1238	ret = fill_zero(inode, pg_start++, off_start,
	1239	PAGE_SIZE - off_start);
	1240	if (ret)
	1241	return ret;
	1242	}
	1243	if (off_end) {
	1244	ret = fill_zero(inode, pg_end, 0, off_end);
	1245	if (ret)
	1246	return ret;
	1247	}
	1248
	1249	if (pg_start < pg_end) {
	1250	loff_t blk_start, blk_end;
	1251	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	1252
	1253	f2fs_balance_fs(sbi, true);
	1254
	1255	blk_start = (loff_t)pg_start << PAGE_SHIFT;
	1256	blk_end = (loff_t)pg_end << PAGE_SHIFT;
	1257
	1258	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	1259	filemap_invalidate_lock(inode->i_mapping);
	1260
	1261	truncate_pagecache_range(inode, blk_start, blk_end - 1);
	1262
	1263	f2fs_lock_op(sbi);
	1264	ret = f2fs_truncate_hole(inode, pg_start, pg_end);
	1265	f2fs_unlock_op(sbi);
	1266
	1267	filemap_invalidate_unlock(inode->i_mapping);
	1268	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	1269	}
	1270	}
	1271
	1272	return ret;
	1273	}
	1274
	1275	static int __read_out_blkaddrs(struct inode inode, block_t blkaddr,
	1276	int *do_replace, pgoff_t off, pgoff_t len)
	1277	{
	1278	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	1279	struct dnode_of_data dn;
	1280	int ret, done, i;
	1281
	1282	next_dnode:
	1283	set_new_dnode(&dn, inode, NULL, NULL, 0);
	1284	ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
	1285	if (ret && ret != -ENOENT) {
	1286	return ret;
	1287	} else if (ret == -ENOENT) {
	1288	if (dn.max_level == 0)
	1289	return -ENOENT;
	1290	done = min((pgoff_t)ADDRS_PER_BLOCK(inode) -
	1291	dn.ofs_in_node, len);
	1292	blkaddr += done;
	1293	do_replace += done;
	1294	goto next;
	1295	}
	1296
	1297	done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) -
	1298	dn.ofs_in_node, len);
	1299	for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
	1300	*blkaddr = f2fs_data_blkaddr(&dn);
	1301
	1302	if (__is_valid_data_blkaddr(*blkaddr) &&
	1303	!f2fs_is_valid_blkaddr(sbi, *blkaddr,
	1304	DATA_GENERIC_ENHANCE)) {
	1305	f2fs_put_dnode(&dn);
	1306	return -EFSCORRUPTED;
	1307	}
	1308
	1309	if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
	1310
	1311	if (f2fs_lfs_mode(sbi)) {
	1312	f2fs_put_dnode(&dn);
	1313	return -EOPNOTSUPP;
	1314	}
	1315
	1316	/* do not invalidate this block address */
	1317	f2fs_update_data_blkaddr(&dn, NULL_ADDR);
	1318	*do_replace = 1;
	1319	}
	1320	}
	1321	f2fs_put_dnode(&dn);
	1322	next:
	1323	len -= done;
	1324	off += done;
	1325	if (len)
	1326	goto next_dnode;
	1327	return 0;
	1328	}
	1329
	1330	static int __roll_back_blkaddrs(struct inode inode, block_t blkaddr,
	1331	int *do_replace, pgoff_t off, int len)
	1332	{
	1333	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	1334	struct dnode_of_data dn;
	1335	int ret, i;
	1336
	1337	for (i = 0; i < len; i++, do_replace++, blkaddr++) {
	1338	if (*do_replace == 0)
	1339	continue;
	1340
	1341	set_new_dnode(&dn, inode, NULL, NULL, 0);
	1342	ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
	1343	if (ret) {
	1344	dec_valid_block_count(sbi, inode, 1);
	1345	f2fs_invalidate_blocks(sbi, *blkaddr, 1);
	1346	} else {
	1347	f2fs_update_data_blkaddr(&dn, *blkaddr);
	1348	}
	1349	f2fs_put_dnode(&dn);
	1350	}
	1351	return 0;
	1352	}
	1353
	1354	static int __clone_blkaddrs(struct inode src_inode, struct inode dst_inode,
	1355	block_t blkaddr, int do_replace,
	1356	pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
	1357	{
	1358	struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
	1359	pgoff_t i = 0;
	1360	int ret;
	1361
	1362	while (i < len) {
	1363	if (blkaddr[i] == NULL_ADDR && !full) {
	1364	i++;
	1365	continue;
	1366	}
	1367
	1368	if (do_replace[i] \|\| blkaddr[i] == NULL_ADDR) {
	1369	struct dnode_of_data dn;
	1370	struct node_info ni;
	1371	size_t new_size;
	1372	pgoff_t ilen;
	1373
	1374	set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
	1375	ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
	1376	if (ret)
	1377	return ret;
	1378
	1379	ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
	1380	if (ret) {
	1381	f2fs_put_dnode(&dn);
	1382	return ret;
	1383	}
	1384
	1385	ilen = min((pgoff_t)
	1386	ADDRS_PER_PAGE(dn.node_page, dst_inode) -
	1387	dn.ofs_in_node, len - i);
	1388	do {
	1389	dn.data_blkaddr = f2fs_data_blkaddr(&dn);
	1390	f2fs_truncate_data_blocks_range(&dn, 1);
	1391
	1392	if (do_replace[i]) {
	1393	f2fs_i_blocks_write(src_inode,
	1394	1, false, false);
	1395	f2fs_i_blocks_write(dst_inode,
	1396	1, true, false);
	1397	f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
	1398	blkaddr[i], ni.version, true, false);
	1399
	1400	do_replace[i] = 0;
	1401	}
	1402	dn.ofs_in_node++;
	1403	i++;
	1404	new_size = (loff_t)(dst + i) << PAGE_SHIFT;
	1405	if (dst_inode->i_size < new_size)
	1406	f2fs_i_size_write(dst_inode, new_size);
	1407	} while (--ilen && (do_replace[i] \|\| blkaddr[i] == NULL_ADDR));
	1408
	1409	f2fs_put_dnode(&dn);
	1410	} else {
	1411	struct page psrc, pdst;
	1412
	1413	psrc = f2fs_get_lock_data_page(src_inode,
	1414	src + i, true);
	1415	if (IS_ERR(psrc))
	1416	return PTR_ERR(psrc);
	1417	pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i,
	1418	true);
	1419	if (IS_ERR(pdst)) {
	1420	f2fs_put_page(psrc, 1);
	1421	return PTR_ERR(pdst);
	1422	}
	1423
	1424	f2fs_wait_on_page_writeback(pdst, DATA, true, true);
	1425
	1426	memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE);
	1427	set_page_dirty(pdst);
	1428	set_page_private_gcing(pdst);
	1429	f2fs_put_page(pdst, 1);
	1430	f2fs_put_page(psrc, 1);
	1431
	1432	ret = f2fs_truncate_hole(src_inode,
	1433	src + i, src + i + 1);
	1434	if (ret)
	1435	return ret;
	1436	i++;
	1437	}
	1438	}
	1439	return 0;
	1440	}
	1441
	1442	static int __exchange_data_block(struct inode *src_inode,
	1443	struct inode *dst_inode, pgoff_t src, pgoff_t dst,
	1444	pgoff_t len, bool full)
	1445	{
	1446	block_t *src_blkaddr;
	1447	int *do_replace;
	1448	pgoff_t olen;
	1449	int ret;
	1450
	1451	while (len) {
	1452	olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
	1453
	1454	src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
	1455	array_size(olen, sizeof(block_t)),
	1456	GFP_NOFS);
	1457	if (!src_blkaddr)
	1458	return -ENOMEM;
	1459
	1460	do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
	1461	array_size(olen, sizeof(int)),
	1462	GFP_NOFS);
	1463	if (!do_replace) {
	1464	kvfree(src_blkaddr);
	1465	return -ENOMEM;
	1466	}
	1467
	1468	ret = __read_out_blkaddrs(src_inode, src_blkaddr,
	1469	do_replace, src, olen);
	1470	if (ret)
	1471	goto roll_back;
	1472
	1473	ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
	1474	do_replace, src, dst, olen, full);
	1475	if (ret)
	1476	goto roll_back;
	1477
	1478	src += olen;
	1479	dst += olen;
	1480	len -= olen;
	1481
	1482	kvfree(src_blkaddr);
	1483	kvfree(do_replace);
	1484	}
	1485	return 0;
	1486
	1487	roll_back:
	1488	__roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
	1489	kvfree(src_blkaddr);
	1490	kvfree(do_replace);
	1491	return ret;
	1492	}
	1493
	1494	static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
	1495	{
	1496	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	1497	pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
	1498	pgoff_t start = offset >> PAGE_SHIFT;
	1499	pgoff_t end = (offset + len) >> PAGE_SHIFT;
	1500	int ret;
	1501
	1502	f2fs_balance_fs(sbi, true);
	1503
	1504	/* avoid gc operation during block exchange */
	1505	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	1506	filemap_invalidate_lock(inode->i_mapping);
	1507
	1508	f2fs_lock_op(sbi);
	1509	f2fs_drop_extent_tree(inode);
	1510	truncate_pagecache(inode, offset);
	1511	ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
	1512	f2fs_unlock_op(sbi);
	1513
	1514	filemap_invalidate_unlock(inode->i_mapping);
	1515	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	1516	return ret;
	1517	}
	1518
	1519	static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
	1520	{
	1521	loff_t new_size;
	1522	int ret;
	1523
	1524	if (offset + len >= i_size_read(inode))
	1525	return -EINVAL;
	1526
	1527	/* collapse range should be aligned to block size of f2fs. */
	1528	if (offset & (F2FS_BLKSIZE - 1) \|\| len & (F2FS_BLKSIZE - 1))
	1529	return -EINVAL;
	1530
	1531	ret = f2fs_convert_inline_inode(inode);
	1532	if (ret)
	1533	return ret;
	1534
	1535	/* write out all dirty pages from offset */
	1536	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
	1537	if (ret)
	1538	return ret;
	1539
	1540	ret = f2fs_do_collapse(inode, offset, len);
	1541	if (ret)
	1542	return ret;
	1543
	1544	/* write out all moved pages, if possible */
	1545	filemap_invalidate_lock(inode->i_mapping);
	1546	filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
	1547	truncate_pagecache(inode, offset);
	1548
	1549	new_size = i_size_read(inode) - len;
	1550	ret = f2fs_truncate_blocks(inode, new_size, true);
	1551	filemap_invalidate_unlock(inode->i_mapping);
	1552	if (!ret)
	1553	f2fs_i_size_write(inode, new_size);
	1554	return ret;
	1555	}
	1556
	1557	static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
	1558	pgoff_t end)
	1559	{
	1560	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
	1561	pgoff_t index = start;
	1562	unsigned int ofs_in_node = dn->ofs_in_node;
	1563	blkcnt_t count = 0;
	1564	int ret;
	1565
	1566	for (; index < end; index++, dn->ofs_in_node++) {
	1567	if (f2fs_data_blkaddr(dn) == NULL_ADDR)
	1568	count++;
	1569	}
	1570
	1571	dn->ofs_in_node = ofs_in_node;
	1572	ret = f2fs_reserve_new_blocks(dn, count);
	1573	if (ret)
	1574	return ret;
	1575
	1576	dn->ofs_in_node = ofs_in_node;
	1577	for (index = start; index < end; index++, dn->ofs_in_node++) {
	1578	dn->data_blkaddr = f2fs_data_blkaddr(dn);
	1579	/*
	1580	* f2fs_reserve_new_blocks will not guarantee entire block
	1581	* allocation.
	1582	*/
	1583	if (dn->data_blkaddr == NULL_ADDR) {
	1584	ret = -ENOSPC;
	1585	break;
	1586	}
	1587
	1588	if (dn->data_blkaddr == NEW_ADDR)
	1589	continue;
	1590
	1591	if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
	1592	DATA_GENERIC_ENHANCE)) {
	1593	ret = -EFSCORRUPTED;
	1594	break;
	1595	}
	1596
	1597	f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1);
	1598	f2fs_set_data_blkaddr(dn, NEW_ADDR);
	1599	}
	1600
	1601	f2fs_update_read_extent_cache_range(dn, start, 0, index - start);
	1602	f2fs_update_age_extent_cache_range(dn, start, index - start);
	1603
	1604	return ret;
	1605	}
	1606
	1607	static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
	1608	int mode)
	1609	{
	1610	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	1611	struct address_space *mapping = inode->i_mapping;
	1612	pgoff_t index, pg_start, pg_end;
	1613	loff_t new_size = i_size_read(inode);
	1614	loff_t off_start, off_end;
	1615	int ret = 0;
	1616
	1617	ret = inode_newsize_ok(inode, (len + offset));
	1618	if (ret)
	1619	return ret;
	1620
	1621	ret = f2fs_convert_inline_inode(inode);
	1622	if (ret)
	1623	return ret;
	1624
	1625	ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
	1626	if (ret)
	1627	return ret;
	1628
	1629	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
	1630	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
	1631
	1632	off_start = offset & (PAGE_SIZE - 1);
	1633	off_end = (offset + len) & (PAGE_SIZE - 1);
	1634
	1635	if (pg_start == pg_end) {
	1636	ret = fill_zero(inode, pg_start, off_start,
	1637	off_end - off_start);
	1638	if (ret)
	1639	return ret;
	1640
	1641	new_size = max_t(loff_t, new_size, offset + len);
	1642	} else {
	1643	if (off_start) {
	1644	ret = fill_zero(inode, pg_start++, off_start,
	1645	PAGE_SIZE - off_start);
	1646	if (ret)
	1647	return ret;
	1648
	1649	new_size = max_t(loff_t, new_size,
	1650	(loff_t)pg_start << PAGE_SHIFT);
	1651	}
	1652
	1653	for (index = pg_start; index < pg_end;) {
	1654	struct dnode_of_data dn;
	1655	unsigned int end_offset;
	1656	pgoff_t end;
	1657
	1658	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	1659	filemap_invalidate_lock(mapping);
	1660
	1661	truncate_pagecache_range(inode,
	1662	(loff_t)index << PAGE_SHIFT,
	1663	((loff_t)pg_end << PAGE_SHIFT) - 1);
	1664
	1665	f2fs_lock_op(sbi);
	1666
	1667	set_new_dnode(&dn, inode, NULL, NULL, 0);
	1668	ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
	1669	if (ret) {
	1670	f2fs_unlock_op(sbi);
	1671	filemap_invalidate_unlock(mapping);
	1672	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	1673	goto out;
	1674	}
	1675
	1676	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
	1677	end = min(pg_end, end_offset - dn.ofs_in_node + index);
	1678
	1679	ret = f2fs_do_zero_range(&dn, index, end);
	1680	f2fs_put_dnode(&dn);
	1681
	1682	f2fs_unlock_op(sbi);
	1683	filemap_invalidate_unlock(mapping);
	1684	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	1685
	1686	f2fs_balance_fs(sbi, dn.node_changed);
	1687
	1688	if (ret)
	1689	goto out;
	1690
	1691	index = end;
	1692	new_size = max_t(loff_t, new_size,
	1693	(loff_t)index << PAGE_SHIFT);
	1694	}
	1695
	1696	if (off_end) {
	1697	ret = fill_zero(inode, pg_end, 0, off_end);
	1698	if (ret)
	1699	goto out;
	1700
	1701	new_size = max_t(loff_t, new_size, offset + len);
	1702	}
	1703	}
	1704
	1705	out:
	1706	if (new_size > i_size_read(inode)) {
	1707	if (mode & FALLOC_FL_KEEP_SIZE)
	1708	file_set_keep_isize(inode);
	1709	else
	1710	f2fs_i_size_write(inode, new_size);
	1711	}
	1712	return ret;
	1713	}
	1714
	1715	static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
	1716	{
	1717	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	1718	struct address_space *mapping = inode->i_mapping;
	1719	pgoff_t nr, pg_start, pg_end, delta, idx;
	1720	loff_t new_size;
	1721	int ret = 0;
	1722
	1723	new_size = i_size_read(inode) + len;
	1724	ret = inode_newsize_ok(inode, new_size);
	1725	if (ret)
	1726	return ret;
	1727
	1728	if (offset >= i_size_read(inode))
	1729	return -EINVAL;
	1730
	1731	/* insert range should be aligned to block size of f2fs. */
	1732	if (offset & (F2FS_BLKSIZE - 1) \|\| len & (F2FS_BLKSIZE - 1))
	1733	return -EINVAL;
	1734
	1735	ret = f2fs_convert_inline_inode(inode);
	1736	if (ret)
	1737	return ret;
	1738
	1739	f2fs_balance_fs(sbi, true);
	1740
	1741	filemap_invalidate_lock(mapping);
	1742	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
	1743	filemap_invalidate_unlock(mapping);
	1744	if (ret)
	1745	return ret;
	1746
	1747	/* write out all dirty pages from offset */
	1748	ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
	1749	if (ret)
	1750	return ret;
	1751
	1752	pg_start = offset >> PAGE_SHIFT;
	1753	pg_end = (offset + len) >> PAGE_SHIFT;
	1754	delta = pg_end - pg_start;
	1755	idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
	1756
	1757	/* avoid gc operation during block exchange */
	1758	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	1759	filemap_invalidate_lock(mapping);
	1760	truncate_pagecache(inode, offset);
	1761
	1762	while (!ret && idx > pg_start) {
	1763	nr = idx - pg_start;
	1764	if (nr > delta)
	1765	nr = delta;
	1766	idx -= nr;
	1767
	1768	f2fs_lock_op(sbi);
	1769	f2fs_drop_extent_tree(inode);
	1770
	1771	ret = __exchange_data_block(inode, inode, idx,
	1772	idx + delta, nr, false);
	1773	f2fs_unlock_op(sbi);
	1774	}
	1775	filemap_invalidate_unlock(mapping);
	1776	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	1777	if (ret)
	1778	return ret;
	1779
	1780	/* write out all moved pages, if possible */
	1781	filemap_invalidate_lock(mapping);
	1782	ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
	1783	truncate_pagecache(inode, offset);
	1784	filemap_invalidate_unlock(mapping);
	1785
	1786	if (!ret)
	1787	f2fs_i_size_write(inode, new_size);
	1788	return ret;
	1789	}
	1790
	1791	static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
	1792	loff_t len, int mode)
	1793	{
	1794	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	1795	struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
	1796	.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
	1797	.m_may_create = true };
	1798	struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
	1799	.init_gc_type = FG_GC,
	1800	.should_migrate_blocks = false,
	1801	.err_gc_skipped = true,
	1802	.nr_free_secs = 0 };
	1803	pgoff_t pg_start, pg_end;
	1804	loff_t new_size;
	1805	loff_t off_end;
	1806	block_t expanded = 0;
	1807	int err;
	1808
	1809	err = inode_newsize_ok(inode, (len + offset));
	1810	if (err)
	1811	return err;
	1812
	1813	err = f2fs_convert_inline_inode(inode);
	1814	if (err)
	1815	return err;
	1816
	1817	f2fs_balance_fs(sbi, true);
	1818
	1819	pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
	1820	pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
	1821	off_end = (offset + len) & (PAGE_SIZE - 1);
	1822
	1823	map.m_lblk = pg_start;
	1824	map.m_len = pg_end - pg_start;
	1825	if (off_end)
	1826	map.m_len++;
	1827
	1828	if (!map.m_len)
	1829	return 0;
	1830
	1831	if (f2fs_is_pinned_file(inode)) {
	1832	block_t sec_blks = CAP_BLKS_PER_SEC(sbi);
	1833	block_t sec_len = roundup(map.m_len, sec_blks);
	1834
	1835	map.m_len = sec_blks;
	1836	next_alloc:
	1837	if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ?
	1838	ZONED_PIN_SEC_REQUIRED_COUNT :
	1839	GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
	1840	f2fs_down_write(&sbi->gc_lock);
	1841	stat_inc_gc_call_count(sbi, FOREGROUND);
	1842	err = f2fs_gc(sbi, &gc_control);
	1843	if (err && err != -ENODATA)
	1844	goto out_err;
	1845	}
	1846
	1847	f2fs_down_write(&sbi->pin_sem);
	1848
	1849	err = f2fs_allocate_pinning_section(sbi);
	1850	if (err) {
	1851	f2fs_up_write(&sbi->pin_sem);
	1852	goto out_err;
	1853	}
	1854
	1855	map.m_seg_type = CURSEG_COLD_DATA_PINNED;
	1856	err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
	1857	file_dont_truncate(inode);
	1858
	1859	f2fs_up_write(&sbi->pin_sem);
	1860
	1861	expanded += map.m_len;
	1862	sec_len -= map.m_len;
	1863	map.m_lblk += map.m_len;
	1864	if (!err && sec_len)
	1865	goto next_alloc;
	1866
	1867	map.m_len = expanded;
	1868	} else {
	1869	err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO);
	1870	expanded = map.m_len;
	1871	}
	1872	out_err:
	1873	if (err) {
	1874	pgoff_t last_off;
	1875
	1876	if (!expanded)
	1877	return err;
	1878
	1879	last_off = pg_start + expanded - 1;
	1880
	1881	/* update new size to the failed position */
	1882	new_size = (last_off == pg_end) ? offset + len :
	1883	(loff_t)(last_off + 1) << PAGE_SHIFT;
	1884	} else {
	1885	new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
	1886	}
	1887
	1888	if (new_size > i_size_read(inode)) {
	1889	if (mode & FALLOC_FL_KEEP_SIZE)
	1890	file_set_keep_isize(inode);
	1891	else
	1892	f2fs_i_size_write(inode, new_size);
	1893	}
	1894
	1895	return err;
	1896	}
	1897
	1898	static long f2fs_fallocate(struct file *file, int mode,
	1899	loff_t offset, loff_t len)
	1900	{
	1901	struct inode *inode = file_inode(file);
	1902	long ret = 0;
	1903
	1904	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
	1905	return -EIO;
	1906	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
	1907	return -ENOSPC;
	1908	if (!f2fs_is_compress_backend_ready(inode) \|\| IS_DEVICE_ALIASING(inode))
	1909	return -EOPNOTSUPP;
	1910
	1911	/* f2fs only support ->fallocate for regular file */
	1912	if (!S_ISREG(inode->i_mode))
	1913	return -EINVAL;
	1914
	1915	if (IS_ENCRYPTED(inode) &&
	1916	(mode & (FALLOC_FL_COLLAPSE_RANGE \| FALLOC_FL_INSERT_RANGE)))
	1917	return -EOPNOTSUPP;
	1918
	1919	if (mode & ~(FALLOC_FL_KEEP_SIZE \| FALLOC_FL_PUNCH_HOLE \|
	1920	FALLOC_FL_COLLAPSE_RANGE \| FALLOC_FL_ZERO_RANGE \|
	1921	FALLOC_FL_INSERT_RANGE))
	1922	return -EOPNOTSUPP;
	1923
	1924	inode_lock(inode);
	1925
	1926	/*
	1927	* Pinned file should not support partial truncation since the block
	1928	* can be used by applications.
	1929	*/
	1930	if ((f2fs_compressed_file(inode) \|\| f2fs_is_pinned_file(inode)) &&
	1931	(mode & (FALLOC_FL_PUNCH_HOLE \| FALLOC_FL_COLLAPSE_RANGE \|
	1932	FALLOC_FL_ZERO_RANGE \| FALLOC_FL_INSERT_RANGE))) {
	1933	ret = -EOPNOTSUPP;
	1934	goto out;
	1935	}
	1936
	1937	ret = file_modified(file);
	1938	if (ret)
	1939	goto out;
	1940
	1941	/*
	1942	* wait for inflight dio, blocks should be removed after IO
	1943	* completion.
	1944	*/
	1945	inode_dio_wait(inode);
	1946
	1947	if (mode & FALLOC_FL_PUNCH_HOLE) {
	1948	if (offset >= inode->i_size)
	1949	goto out;
	1950
	1951	ret = f2fs_punch_hole(inode, offset, len);
	1952	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
	1953	ret = f2fs_collapse_range(inode, offset, len);
	1954	} else if (mode & FALLOC_FL_ZERO_RANGE) {
	1955	ret = f2fs_zero_range(inode, offset, len, mode);
	1956	} else if (mode & FALLOC_FL_INSERT_RANGE) {
	1957	ret = f2fs_insert_range(inode, offset, len);
	1958	} else {
	1959	ret = f2fs_expand_inode_data(inode, offset, len, mode);
	1960	}
	1961
	1962	if (!ret) {
	1963	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
	1964	f2fs_mark_inode_dirty_sync(inode, false);
	1965	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	1966	}
	1967
	1968	out:
	1969	inode_unlock(inode);
	1970
	1971	trace_f2fs_fallocate(inode, mode, offset, len, ret);
	1972	return ret;
	1973	}
	1974
	1975	static int f2fs_release_file(struct inode inode, struct file filp)
	1976	{
	1977	/*
	1978	* f2fs_release_file is called at every close calls. So we should
	1979	* not drop any inmemory pages by close called by other process.
	1980	*/
	1981	if (!(filp->f_mode & FMODE_WRITE) \|\|
	1982	atomic_read(&inode->i_writecount) != 1)
	1983	return 0;
	1984
	1985	inode_lock(inode);
	1986	f2fs_abort_atomic_write(inode, true);
	1987	inode_unlock(inode);
	1988
	1989	return 0;
	1990	}
	1991
	1992	static int f2fs_file_flush(struct file *file, fl_owner_t id)
	1993	{
	1994	struct inode *inode = file_inode(file);
	1995
	1996	/*
	1997	* If the process doing a transaction is crashed, we should do
	1998	* roll-back. Otherwise, other reader/write can see corrupted database
	1999	* until all the writers close its file. Since this should be done
	2000	* before dropping file lock, it needs to do in ->flush.
	2001	*/
	2002	if (F2FS_I(inode)->atomic_write_task == current &&
	2003	(current->flags & PF_EXITING)) {
	2004	inode_lock(inode);
	2005	f2fs_abort_atomic_write(inode, true);
	2006	inode_unlock(inode);
	2007	}
	2008
	2009	return 0;
	2010	}
	2011
	2012	static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
	2013	{
	2014	struct f2fs_inode_info *fi = F2FS_I(inode);
	2015	u32 masked_flags = fi->i_flags & mask;
	2016
	2017	/* mask can be shrunk by flags_valid selector */
	2018	iflags &= mask;
	2019
	2020	/* Is it quota file? Do not allow user to mess with it */
	2021	if (IS_NOQUOTA(inode))
	2022	return -EPERM;
	2023
	2024	if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) {
	2025	if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
	2026	return -EOPNOTSUPP;
	2027	if (!f2fs_empty_dir(inode))
	2028	return -ENOTEMPTY;
	2029	}
	2030
	2031	if (iflags & (F2FS_COMPR_FL \| F2FS_NOCOMP_FL)) {
	2032	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
	2033	return -EOPNOTSUPP;
	2034	if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL))
	2035	return -EINVAL;
	2036	}
	2037
	2038	if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
	2039	if (masked_flags & F2FS_COMPR_FL) {
	2040	if (!f2fs_disable_compressed_file(inode))
	2041	return -EINVAL;
	2042	} else {
	2043	/* try to convert inline_data to support compression */
	2044	int err = f2fs_convert_inline_inode(inode);
	2045	if (err)
	2046	return err;
	2047
	2048	f2fs_down_write(&fi->i_sem);
	2049	if (!f2fs_may_compress(inode) \|\|
	2050	(S_ISREG(inode->i_mode) &&
	2051	F2FS_HAS_BLOCKS(inode))) {
	2052	f2fs_up_write(&fi->i_sem);
	2053	return -EINVAL;
	2054	}
	2055	err = set_compress_context(inode);
	2056	f2fs_up_write(&fi->i_sem);
	2057
	2058	if (err)
	2059	return err;
	2060	}
	2061	}
	2062
	2063	fi->i_flags = iflags \| (fi->i_flags & ~mask);
	2064	f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
	2065	(fi->i_flags & F2FS_NOCOMP_FL));
	2066
	2067	if (fi->i_flags & F2FS_PROJINHERIT_FL)
	2068	set_inode_flag(inode, FI_PROJ_INHERIT);
	2069	else
	2070	clear_inode_flag(inode, FI_PROJ_INHERIT);
	2071
	2072	inode_set_ctime_current(inode);
	2073	f2fs_set_inode_flags(inode);
	2074	f2fs_mark_inode_dirty_sync(inode, true);
	2075	return 0;
	2076	}
	2077
	2078	/* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */
	2079
	2080	/*
	2081	* To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
	2082	* for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
	2083	* F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add
	2084	* its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
	2085	*
	2086	* Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and
	2087	* FS_IOC_FSSETXATTR is done by the VFS.
	2088	*/
	2089
	2090	static const struct {
	2091	u32 iflag;
	2092	u32 fsflag;
	2093	} f2fs_fsflags_map[] = {
	2094	{ F2FS_COMPR_FL, FS_COMPR_FL },
	2095	{ F2FS_SYNC_FL, FS_SYNC_FL },
	2096	{ F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL },
	2097	{ F2FS_APPEND_FL, FS_APPEND_FL },
	2098	{ F2FS_NODUMP_FL, FS_NODUMP_FL },
	2099	{ F2FS_NOATIME_FL, FS_NOATIME_FL },
	2100	{ F2FS_NOCOMP_FL, FS_NOCOMP_FL },
	2101	{ F2FS_INDEX_FL, FS_INDEX_FL },
	2102	{ F2FS_DIRSYNC_FL, FS_DIRSYNC_FL },
	2103	{ F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL },
	2104	{ F2FS_CASEFOLD_FL, FS_CASEFOLD_FL },
	2105	};
	2106
	2107	#define F2FS_GETTABLE_FS_FL ( \
	2108	FS_COMPR_FL \| \
	2109	FS_SYNC_FL \| \
	2110	FS_IMMUTABLE_FL \| \
	2111	FS_APPEND_FL \| \
	2112	FS_NODUMP_FL \| \
	2113	FS_NOATIME_FL \| \
	2114	FS_NOCOMP_FL \| \
	2115	FS_INDEX_FL \| \
	2116	FS_DIRSYNC_FL \| \
	2117	FS_PROJINHERIT_FL \| \
	2118	FS_ENCRYPT_FL \| \
	2119	FS_INLINE_DATA_FL \| \
	2120	FS_NOCOW_FL \| \
	2121	FS_VERITY_FL \| \
	2122	FS_CASEFOLD_FL)
	2123
	2124	#define F2FS_SETTABLE_FS_FL ( \
	2125	FS_COMPR_FL \| \
	2126	FS_SYNC_FL \| \
	2127	FS_IMMUTABLE_FL \| \
	2128	FS_APPEND_FL \| \
	2129	FS_NODUMP_FL \| \
	2130	FS_NOATIME_FL \| \
	2131	FS_NOCOMP_FL \| \
	2132	FS_DIRSYNC_FL \| \
	2133	FS_PROJINHERIT_FL \| \
	2134	FS_CASEFOLD_FL)
	2135
	2136	/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
	2137	static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
	2138	{
	2139	u32 fsflags = 0;
	2140	int i;
	2141
	2142	for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
	2143	if (iflags & f2fs_fsflags_map[i].iflag)
	2144	fsflags \|= f2fs_fsflags_map[i].fsflag;
	2145
	2146	return fsflags;
	2147	}
	2148
	2149	/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
	2150	static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
	2151	{
	2152	u32 iflags = 0;
	2153	int i;
	2154
	2155	for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
	2156	if (fsflags & f2fs_fsflags_map[i].fsflag)
	2157	iflags \|= f2fs_fsflags_map[i].iflag;
	2158
	2159	return iflags;
	2160	}
	2161
	2162	static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
	2163	{
	2164	struct inode *inode = file_inode(filp);
	2165
	2166	return put_user(inode->i_generation, (int __user *)arg);
	2167	}
	2168
	2169	static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
	2170	{
	2171	struct inode *inode = file_inode(filp);
	2172	struct mnt_idmap *idmap = file_mnt_idmap(filp);
	2173	struct f2fs_inode_info *fi = F2FS_I(inode);
	2174	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	2175	loff_t isize;
	2176	int ret;
	2177
	2178	if (!(filp->f_mode & FMODE_WRITE))
	2179	return -EBADF;
	2180
	2181	if (!inode_owner_or_capable(idmap, inode))
	2182	return -EACCES;
	2183
	2184	if (!S_ISREG(inode->i_mode))
	2185	return -EINVAL;
	2186
	2187	if (filp->f_flags & O_DIRECT)
	2188	return -EINVAL;
	2189
	2190	ret = mnt_want_write_file(filp);
	2191	if (ret)
	2192	return ret;
	2193
	2194	inode_lock(inode);
	2195
	2196	if (!f2fs_disable_compressed_file(inode) \|\|
	2197	f2fs_is_pinned_file(inode)) {
	2198	ret = -EINVAL;
	2199	goto out;
	2200	}
	2201
	2202	if (f2fs_is_atomic_file(inode))
	2203	goto out;
	2204
	2205	ret = f2fs_convert_inline_inode(inode);
	2206	if (ret)
	2207	goto out;
	2208
	2209	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
	2210	f2fs_down_write(&fi->i_gc_rwsem[READ]);
	2211
	2212	/*
	2213	* Should wait end_io to count F2FS_WB_CP_DATA correctly by
	2214	* f2fs_is_atomic_file.
	2215	*/
	2216	if (get_dirty_pages(inode))
	2217	f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u",
	2218	inode->i_ino, get_dirty_pages(inode));
	2219	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
	2220	if (ret)
	2221	goto out_unlock;
	2222
	2223	/* Check if the inode already has a COW inode */
	2224	if (fi->cow_inode == NULL) {
	2225	/* Create a COW inode for atomic write */
	2226	struct dentry *dentry = file_dentry(filp);
	2227	struct inode *dir = d_inode(dentry->d_parent);
	2228
	2229	ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode);
	2230	if (ret)
	2231	goto out_unlock;
	2232
	2233	set_inode_flag(fi->cow_inode, FI_COW_FILE);
	2234	clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
	2235
	2236	/* Set the COW inode's atomic_inode to the atomic inode */
	2237	F2FS_I(fi->cow_inode)->atomic_inode = inode;
	2238	} else {
	2239	/* Reuse the already created COW inode */
	2240	f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode));
	2241
	2242	invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1);
	2243
	2244	ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
	2245	if (ret)
	2246	goto out_unlock;
	2247	}
	2248
	2249	f2fs_write_inode(inode, NULL);
	2250
	2251	stat_inc_atomic_inode(inode);
	2252
	2253	set_inode_flag(inode, FI_ATOMIC_FILE);
	2254
	2255	isize = i_size_read(inode);
	2256	fi->original_i_size = isize;
	2257	if (truncate) {
	2258	set_inode_flag(inode, FI_ATOMIC_REPLACE);
	2259	truncate_inode_pages_final(inode->i_mapping);
	2260	f2fs_i_size_write(inode, 0);
	2261	isize = 0;
	2262	}
	2263	f2fs_i_size_write(fi->cow_inode, isize);
	2264
	2265	out_unlock:
	2266	f2fs_up_write(&fi->i_gc_rwsem[READ]);
	2267	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
	2268	if (ret)
	2269	goto out;
	2270
	2271	f2fs_update_time(sbi, REQ_TIME);
	2272	fi->atomic_write_task = current;
	2273	stat_update_max_atomic_write(inode);
	2274	fi->atomic_write_cnt = 0;
	2275	out:
	2276	inode_unlock(inode);
	2277	mnt_drop_write_file(filp);
	2278	return ret;
	2279	}
	2280
	2281	static int f2fs_ioc_commit_atomic_write(struct file *filp)
	2282	{
	2283	struct inode *inode = file_inode(filp);
	2284	struct mnt_idmap *idmap = file_mnt_idmap(filp);
	2285	int ret;
	2286
	2287	if (!(filp->f_mode & FMODE_WRITE))
	2288	return -EBADF;
	2289
	2290	if (!inode_owner_or_capable(idmap, inode))
	2291	return -EACCES;
	2292
	2293	ret = mnt_want_write_file(filp);
	2294	if (ret)
	2295	return ret;
	2296
	2297	f2fs_balance_fs(F2FS_I_SB(inode), true);
	2298
	2299	inode_lock(inode);
	2300
	2301	if (f2fs_is_atomic_file(inode)) {
	2302	ret = f2fs_commit_atomic_write(inode);
	2303	if (!ret)
	2304	ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
	2305
	2306	f2fs_abort_atomic_write(inode, ret);
	2307	} else {
	2308	ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
	2309	}
	2310
	2311	inode_unlock(inode);
	2312	mnt_drop_write_file(filp);
	2313	return ret;
	2314	}
	2315
	2316	static int f2fs_ioc_abort_atomic_write(struct file *filp)
	2317	{
	2318	struct inode *inode = file_inode(filp);
	2319	struct mnt_idmap *idmap = file_mnt_idmap(filp);
	2320	int ret;
	2321
	2322	if (!(filp->f_mode & FMODE_WRITE))
	2323	return -EBADF;
	2324
	2325	if (!inode_owner_or_capable(idmap, inode))
	2326	return -EACCES;
	2327
	2328	ret = mnt_want_write_file(filp);
	2329	if (ret)
	2330	return ret;
	2331
	2332	inode_lock(inode);
	2333
	2334	f2fs_abort_atomic_write(inode, true);
	2335
	2336	inode_unlock(inode);
	2337
	2338	mnt_drop_write_file(filp);
	2339	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	2340	return ret;
	2341	}
	2342
	2343	int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag,
	2344	bool readonly, bool need_lock)
	2345	{
	2346	struct super_block *sb = sbi->sb;
	2347	int ret = 0;
	2348
	2349	switch (flag) {
	2350	case F2FS_GOING_DOWN_FULLSYNC:
	2351	ret = bdev_freeze(sb->s_bdev);
	2352	if (ret)
	2353	goto out;
	2354	f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
	2355	bdev_thaw(sb->s_bdev);
	2356	break;
	2357	case F2FS_GOING_DOWN_METASYNC:
	2358	/* do checkpoint only */
	2359	ret = f2fs_sync_fs(sb, 1);
	2360	if (ret) {
	2361	if (ret == -EIO)
	2362	ret = 0;
	2363	goto out;
	2364	}
	2365	f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
	2366	break;
	2367	case F2FS_GOING_DOWN_NOSYNC:
	2368	f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
	2369	break;
	2370	case F2FS_GOING_DOWN_METAFLUSH:
	2371	f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
	2372	f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
	2373	break;
	2374	case F2FS_GOING_DOWN_NEED_FSCK:
	2375	set_sbi_flag(sbi, SBI_NEED_FSCK);
	2376	set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
	2377	set_sbi_flag(sbi, SBI_IS_DIRTY);
	2378	/* do checkpoint only */
	2379	ret = f2fs_sync_fs(sb, 1);
	2380	if (ret == -EIO)
	2381	ret = 0;
	2382	goto out;
	2383	default:
	2384	ret = -EINVAL;
	2385	goto out;
	2386	}
	2387
	2388	if (readonly)
	2389	goto out;
	2390
	2391	/*
	2392	* grab sb->s_umount to avoid racing w/ remount() and other shutdown
	2393	* paths.
	2394	*/
	2395	if (need_lock)
	2396	down_write(&sbi->sb->s_umount);
	2397
	2398	f2fs_stop_gc_thread(sbi);
	2399	f2fs_stop_discard_thread(sbi);
	2400
	2401	f2fs_drop_discard_cmd(sbi);
	2402	clear_opt(sbi, DISCARD);
	2403
	2404	if (need_lock)
	2405	up_write(&sbi->sb->s_umount);
	2406
	2407	f2fs_update_time(sbi, REQ_TIME);
	2408	out:
	2409
	2410	trace_f2fs_shutdown(sbi, flag, ret);
	2411
	2412	return ret;
	2413	}
	2414
	2415	static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
	2416	{
	2417	struct inode *inode = file_inode(filp);
	2418	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	2419	__u32 in;
	2420	int ret;
	2421	bool need_drop = false, readonly = false;
	2422
	2423	if (!capable(CAP_SYS_ADMIN))
	2424	return -EPERM;
	2425
	2426	if (get_user(in, (__u32 __user *)arg))
	2427	return -EFAULT;
	2428
	2429	if (in != F2FS_GOING_DOWN_FULLSYNC) {
	2430	ret = mnt_want_write_file(filp);
	2431	if (ret) {
	2432	if (ret != -EROFS)
	2433	return ret;
	2434
	2435	/* fallback to nosync shutdown for readonly fs */
	2436	in = F2FS_GOING_DOWN_NOSYNC;
	2437	readonly = true;
	2438	} else {
	2439	need_drop = true;
	2440	}
	2441	}
	2442
	2443	ret = f2fs_do_shutdown(sbi, in, readonly, true);
	2444
	2445	if (need_drop)
	2446	mnt_drop_write_file(filp);
	2447
	2448	return ret;
	2449	}
	2450
	2451	static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
	2452	{
	2453	struct inode *inode = file_inode(filp);
	2454	struct super_block *sb = inode->i_sb;
	2455	struct fstrim_range range;
	2456	int ret;
	2457
	2458	if (!capable(CAP_SYS_ADMIN))
	2459	return -EPERM;
	2460
	2461	if (!f2fs_hw_support_discard(F2FS_SB(sb)))
	2462	return -EOPNOTSUPP;
	2463
	2464	if (copy_from_user(&range, (struct fstrim_range __user *)arg,
	2465	sizeof(range)))
	2466	return -EFAULT;
	2467
	2468	ret = mnt_want_write_file(filp);
	2469	if (ret)
	2470	return ret;
	2471
	2472	range.minlen = max((unsigned int)range.minlen,
	2473	bdev_discard_granularity(sb->s_bdev));
	2474	ret = f2fs_trim_fs(F2FS_SB(sb), &range);
	2475	mnt_drop_write_file(filp);
	2476	if (ret < 0)
	2477	return ret;
	2478
	2479	if (copy_to_user((struct fstrim_range __user *)arg, &range,
	2480	sizeof(range)))
	2481	return -EFAULT;
	2482	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	2483	return 0;
	2484	}
	2485
	2486	static bool uuid_is_nonzero(__u8 u[16])
	2487	{
	2488	int i;
	2489
	2490	for (i = 0; i < 16; i++)
	2491	if (u[i])
	2492	return true;
	2493	return false;
	2494	}
	2495
	2496	static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
	2497	{
	2498	struct inode *inode = file_inode(filp);
	2499	int ret;
	2500
	2501	if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
	2502	return -EOPNOTSUPP;
	2503
	2504	ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
	2505	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	2506	return ret;
	2507	}
	2508
	2509	static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
	2510	{
	2511	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
	2512	return -EOPNOTSUPP;
	2513	return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
	2514	}
	2515
	2516	static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
	2517	{
	2518	struct inode *inode = file_inode(filp);
	2519	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	2520	u8 encrypt_pw_salt[16];
	2521	int err;
	2522
	2523	if (!f2fs_sb_has_encrypt(sbi))
	2524	return -EOPNOTSUPP;
	2525
	2526	err = mnt_want_write_file(filp);
	2527	if (err)
	2528	return err;
	2529
	2530	f2fs_down_write(&sbi->sb_lock);
	2531
	2532	if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
	2533	goto got_it;
	2534
	2535	/* update superblock with uuid */
	2536	generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
	2537
	2538	err = f2fs_commit_super(sbi, false);
	2539	if (err) {
	2540	/* undo new data */
	2541	memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
	2542	goto out_err;
	2543	}
	2544	got_it:
	2545	memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16);
	2546	out_err:
	2547	f2fs_up_write(&sbi->sb_lock);
	2548	mnt_drop_write_file(filp);
	2549
	2550	if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16))
	2551	err = -EFAULT;
	2552
	2553	return err;
	2554	}
	2555
	2556	static int f2fs_ioc_get_encryption_policy_ex(struct file *filp,
	2557	unsigned long arg)
	2558	{
	2559	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
	2560	return -EOPNOTSUPP;
	2561
	2562	return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
	2563	}
	2564
	2565	static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg)
	2566	{
	2567	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
	2568	return -EOPNOTSUPP;
	2569
	2570	return fscrypt_ioctl_add_key(filp, (void __user *)arg);
	2571	}
	2572
	2573	static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg)
	2574	{
	2575	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
	2576	return -EOPNOTSUPP;
	2577
	2578	return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
	2579	}
	2580
	2581	static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp,
	2582	unsigned long arg)
	2583	{
	2584	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
	2585	return -EOPNOTSUPP;
	2586
	2587	return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg);
	2588	}
	2589
	2590	static int f2fs_ioc_get_encryption_key_status(struct file *filp,
	2591	unsigned long arg)
	2592	{
	2593	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
	2594	return -EOPNOTSUPP;
	2595
	2596	return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
	2597	}
	2598
	2599	static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg)
	2600	{
	2601	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
	2602	return -EOPNOTSUPP;
	2603
	2604	return fscrypt_ioctl_get_nonce(filp, (void __user *)arg);
	2605	}
	2606
	2607	static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
	2608	{
	2609	struct inode *inode = file_inode(filp);
	2610	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	2611	struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
	2612	.no_bg_gc = false,
	2613	.should_migrate_blocks = false,
	2614	.nr_free_secs = 0 };
	2615	__u32 sync;
	2616	int ret;
	2617
	2618	if (!capable(CAP_SYS_ADMIN))
	2619	return -EPERM;
	2620
	2621	if (get_user(sync, (__u32 __user *)arg))
	2622	return -EFAULT;
	2623
	2624	if (f2fs_readonly(sbi->sb))
	2625	return -EROFS;
	2626
	2627	ret = mnt_want_write_file(filp);
	2628	if (ret)
	2629	return ret;
	2630
	2631	if (!sync) {
	2632	if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
	2633	ret = -EBUSY;
	2634	goto out;
	2635	}
	2636	} else {
	2637	f2fs_down_write(&sbi->gc_lock);
	2638	}
	2639
	2640	gc_control.init_gc_type = sync ? FG_GC : BG_GC;
	2641	gc_control.err_gc_skipped = sync;
	2642	stat_inc_gc_call_count(sbi, FOREGROUND);
	2643	ret = f2fs_gc(sbi, &gc_control);
	2644	out:
	2645	mnt_drop_write_file(filp);
	2646	return ret;
	2647	}
	2648
	2649	static int __f2fs_ioc_gc_range(struct file filp, struct f2fs_gc_range range)
	2650	{
	2651	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
	2652	struct f2fs_gc_control gc_control = {
	2653	.init_gc_type = range->sync ? FG_GC : BG_GC,
	2654	.no_bg_gc = false,
	2655	.should_migrate_blocks = false,
	2656	.err_gc_skipped = range->sync,
	2657	.nr_free_secs = 0 };
	2658	u64 end;
	2659	int ret;
	2660
	2661	if (!capable(CAP_SYS_ADMIN))
	2662	return -EPERM;
	2663	if (f2fs_readonly(sbi->sb))
	2664	return -EROFS;
	2665
	2666	end = range->start + range->len;
	2667	if (end < range->start \|\| range->start < MAIN_BLKADDR(sbi) \|\|
	2668	end >= MAX_BLKADDR(sbi))
	2669	return -EINVAL;
	2670
	2671	ret = mnt_want_write_file(filp);
	2672	if (ret)
	2673	return ret;
	2674
	2675	do_more:
	2676	if (!range->sync) {
	2677	if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
	2678	ret = -EBUSY;
	2679	goto out;
	2680	}
	2681	} else {
	2682	f2fs_down_write(&sbi->gc_lock);
	2683	}
	2684
	2685	gc_control.victim_segno = GET_SEGNO(sbi, range->start);
	2686	stat_inc_gc_call_count(sbi, FOREGROUND);
	2687	ret = f2fs_gc(sbi, &gc_control);
	2688	if (ret) {
	2689	if (ret == -EBUSY)
	2690	ret = -EAGAIN;
	2691	goto out;
	2692	}
	2693	range->start += CAP_BLKS_PER_SEC(sbi);
	2694	if (range->start <= end)
	2695	goto do_more;
	2696	out:
	2697	mnt_drop_write_file(filp);
	2698	return ret;
	2699	}
	2700
	2701	static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
	2702	{
	2703	struct f2fs_gc_range range;
	2704
	2705	if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
	2706	sizeof(range)))
	2707	return -EFAULT;
	2708	return __f2fs_ioc_gc_range(filp, &range);
	2709	}
	2710
	2711	static int f2fs_ioc_write_checkpoint(struct file *filp)
	2712	{
	2713	struct inode *inode = file_inode(filp);
	2714	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	2715	int ret;
	2716
	2717	if (!capable(CAP_SYS_ADMIN))
	2718	return -EPERM;
	2719
	2720	if (f2fs_readonly(sbi->sb))
	2721	return -EROFS;
	2722
	2723	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
	2724	f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
	2725	return -EINVAL;
	2726	}
	2727
	2728	ret = mnt_want_write_file(filp);
	2729	if (ret)
	2730	return ret;
	2731
	2732	ret = f2fs_sync_fs(sbi->sb, 1);
	2733
	2734	mnt_drop_write_file(filp);
	2735	return ret;
	2736	}
	2737
	2738	static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
	2739	struct file *filp,
	2740	struct f2fs_defragment *range)
	2741	{
	2742	struct inode *inode = file_inode(filp);
	2743	struct f2fs_map_blocks map = { .m_next_extent = NULL,
	2744	.m_seg_type = NO_CHECK_TYPE,
	2745	.m_may_create = false };
	2746	struct extent_info ei = {};
	2747	pgoff_t pg_start, pg_end, next_pgofs;
	2748	unsigned int total = 0, sec_num;
	2749	block_t blk_end = 0;
	2750	bool fragmented = false;
	2751	int err;
	2752
	2753	f2fs_balance_fs(sbi, true);
	2754
	2755	inode_lock(inode);
	2756	pg_start = range->start >> PAGE_SHIFT;
	2757	pg_end = min_t(pgoff_t,
	2758	(range->start + range->len) >> PAGE_SHIFT,
	2759	DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE));
	2760
	2761	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) \|\|
	2762	f2fs_is_atomic_file(inode)) {
	2763	err = -EINVAL;
	2764	goto unlock_out;
	2765	}
	2766
	2767	/* if in-place-update policy is enabled, don't waste time here */
	2768	set_inode_flag(inode, FI_OPU_WRITE);
	2769	if (f2fs_should_update_inplace(inode, NULL)) {
	2770	err = -EINVAL;
	2771	goto out;
	2772	}
	2773
	2774	/* writeback all dirty pages in the range */
	2775	err = filemap_write_and_wait_range(inode->i_mapping,
	2776	pg_start << PAGE_SHIFT,
	2777	(pg_end << PAGE_SHIFT) - 1);
	2778	if (err)
	2779	goto out;
	2780
	2781	/*
	2782	* lookup mapping info in extent cache, skip defragmenting if physical
	2783	* block addresses are continuous.
	2784	*/
	2785	if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
	2786	if ((pgoff_t)ei.fofs + ei.len >= pg_end)
	2787	goto out;
	2788	}
	2789
	2790	map.m_lblk = pg_start;
	2791	map.m_next_pgofs = &next_pgofs;
	2792
	2793	/*
	2794	* lookup mapping info in dnode page cache, skip defragmenting if all
	2795	* physical block addresses are continuous even if there are hole(s)
	2796	* in logical blocks.
	2797	*/
	2798	while (map.m_lblk < pg_end) {
	2799	map.m_len = pg_end - map.m_lblk;
	2800	err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
	2801	if (err)
	2802	goto out;
	2803
	2804	if (!(map.m_flags & F2FS_MAP_FLAGS)) {
	2805	map.m_lblk = next_pgofs;
	2806	continue;
	2807	}
	2808
	2809	if (blk_end && blk_end != map.m_pblk)
	2810	fragmented = true;
	2811
	2812	/* record total count of block that we're going to move */
	2813	total += map.m_len;
	2814
	2815	blk_end = map.m_pblk + map.m_len;
	2816
	2817	map.m_lblk += map.m_len;
	2818	}
	2819
	2820	if (!fragmented) {
	2821	total = 0;
	2822	goto out;
	2823	}
	2824
	2825	sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi));
	2826
	2827	/*
	2828	* make sure there are enough free section for LFS allocation, this can
	2829	* avoid defragment running in SSR mode when free section are allocated
	2830	* intensively
	2831	*/
	2832	if (has_not_enough_free_secs(sbi, 0, sec_num)) {
	2833	err = -EAGAIN;
	2834	goto out;
	2835	}
	2836
	2837	map.m_lblk = pg_start;
	2838	map.m_len = pg_end - pg_start;
	2839	total = 0;
	2840
	2841	while (map.m_lblk < pg_end) {
	2842	pgoff_t idx;
	2843	int cnt = 0;
	2844
	2845	do_map:
	2846	map.m_len = pg_end - map.m_lblk;
	2847	err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
	2848	if (err)
	2849	goto clear_out;
	2850
	2851	if (!(map.m_flags & F2FS_MAP_FLAGS)) {
	2852	map.m_lblk = next_pgofs;
	2853	goto check;
	2854	}
	2855
	2856	set_inode_flag(inode, FI_SKIP_WRITES);
	2857
	2858	idx = map.m_lblk;
	2859	while (idx < map.m_lblk + map.m_len &&
	2860	cnt < BLKS_PER_SEG(sbi)) {
	2861	struct page *page;
	2862
	2863	page = f2fs_get_lock_data_page(inode, idx, true);
	2864	if (IS_ERR(page)) {
	2865	err = PTR_ERR(page);
	2866	goto clear_out;
	2867	}
	2868
	2869	f2fs_wait_on_page_writeback(page, DATA, true, true);
	2870
	2871	set_page_dirty(page);
	2872	set_page_private_gcing(page);
	2873	f2fs_put_page(page, 1);
	2874
	2875	idx++;
	2876	cnt++;
	2877	total++;
	2878	}
	2879
	2880	map.m_lblk = idx;
	2881	check:
	2882	if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi))
	2883	goto do_map;
	2884
	2885	clear_inode_flag(inode, FI_SKIP_WRITES);
	2886
	2887	err = filemap_fdatawrite(inode->i_mapping);
	2888	if (err)
	2889	goto out;
	2890	}
	2891	clear_out:
	2892	clear_inode_flag(inode, FI_SKIP_WRITES);
	2893	out:
	2894	clear_inode_flag(inode, FI_OPU_WRITE);
	2895	unlock_out:
	2896	inode_unlock(inode);
	2897	if (!err)
	2898	range->len = (u64)total << PAGE_SHIFT;
	2899	return err;
	2900	}
	2901
	2902	static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
	2903	{
	2904	struct inode *inode = file_inode(filp);
	2905	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	2906	struct f2fs_defragment range;
	2907	int err;
	2908
	2909	if (!capable(CAP_SYS_ADMIN))
	2910	return -EPERM;
	2911
	2912	if (!S_ISREG(inode->i_mode))
	2913	return -EINVAL;
	2914
	2915	if (f2fs_readonly(sbi->sb))
	2916	return -EROFS;
	2917
	2918	if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
	2919	sizeof(range)))
	2920	return -EFAULT;
	2921
	2922	/* verify alignment of offset & size */
	2923	if (range.start & (F2FS_BLKSIZE - 1) \|\| range.len & (F2FS_BLKSIZE - 1))
	2924	return -EINVAL;
	2925
	2926	if (unlikely((range.start + range.len) >> PAGE_SHIFT >
	2927	max_file_blocks(inode)))
	2928	return -EINVAL;
	2929
	2930	err = mnt_want_write_file(filp);
	2931	if (err)
	2932	return err;
	2933
	2934	err = f2fs_defragment_range(sbi, filp, &range);
	2935	mnt_drop_write_file(filp);
	2936
	2937	if (range.len)
	2938	f2fs_update_time(sbi, REQ_TIME);
	2939	if (err < 0)
	2940	return err;
	2941
	2942	if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
	2943	sizeof(range)))
	2944	return -EFAULT;
	2945
	2946	return 0;
	2947	}
	2948
	2949	static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
	2950	struct file *file_out, loff_t pos_out, size_t len)
	2951	{
	2952	struct inode *src = file_inode(file_in);
	2953	struct inode *dst = file_inode(file_out);
	2954	struct f2fs_sb_info *sbi = F2FS_I_SB(src);
	2955	size_t olen = len, dst_max_i_size = 0;
	2956	size_t dst_osize;
	2957	int ret;
	2958
	2959	if (file_in->f_path.mnt != file_out->f_path.mnt \|\|
	2960	src->i_sb != dst->i_sb)
	2961	return -EXDEV;
	2962
	2963	if (unlikely(f2fs_readonly(src->i_sb)))
	2964	return -EROFS;
	2965
	2966	if (!S_ISREG(src->i_mode) \|\| !S_ISREG(dst->i_mode))
	2967	return -EINVAL;
	2968
	2969	if (IS_ENCRYPTED(src) \|\| IS_ENCRYPTED(dst))
	2970	return -EOPNOTSUPP;
	2971
	2972	if (pos_out < 0 \|\| pos_in < 0)
	2973	return -EINVAL;
	2974
	2975	if (src == dst) {
	2976	if (pos_in == pos_out)
	2977	return 0;
	2978	if (pos_out > pos_in && pos_out < pos_in + len)
	2979	return -EINVAL;
	2980	}
	2981
	2982	inode_lock(src);
	2983	if (src != dst) {
	2984	ret = -EBUSY;
	2985	if (!inode_trylock(dst))
	2986	goto out;
	2987	}
	2988
	2989	if (f2fs_compressed_file(src) \|\| f2fs_compressed_file(dst) \|\|
	2990	f2fs_is_pinned_file(src) \|\| f2fs_is_pinned_file(dst)) {
	2991	ret = -EOPNOTSUPP;
	2992	goto out_unlock;
	2993	}
	2994
	2995	if (f2fs_is_atomic_file(src) \|\| f2fs_is_atomic_file(dst)) {
	2996	ret = -EINVAL;
	2997	goto out_unlock;
	2998	}
	2999
	3000	ret = -EINVAL;
	3001	if (pos_in + len > src->i_size \|\| pos_in + len < pos_in)
	3002	goto out_unlock;
	3003	if (len == 0)
	3004	olen = len = src->i_size - pos_in;
	3005	if (pos_in + len == src->i_size)
	3006	len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
	3007	if (len == 0) {
	3008	ret = 0;
	3009	goto out_unlock;
	3010	}
	3011
	3012	dst_osize = dst->i_size;
	3013	if (pos_out + olen > dst->i_size)
	3014	dst_max_i_size = pos_out + olen;
	3015
	3016	/* verify the end result is block aligned */
	3017	if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) \|\|
	3018	!IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) \|\|
	3019	!IS_ALIGNED(pos_out, F2FS_BLKSIZE))
	3020	goto out_unlock;
	3021
	3022	ret = f2fs_convert_inline_inode(src);
	3023	if (ret)
	3024	goto out_unlock;
	3025
	3026	ret = f2fs_convert_inline_inode(dst);
	3027	if (ret)
	3028	goto out_unlock;
	3029
	3030	/* write out all dirty pages from offset */
	3031	ret = filemap_write_and_wait_range(src->i_mapping,
	3032	pos_in, pos_in + len);
	3033	if (ret)
	3034	goto out_unlock;
	3035
	3036	ret = filemap_write_and_wait_range(dst->i_mapping,
	3037	pos_out, pos_out + len);
	3038	if (ret)
	3039	goto out_unlock;
	3040
	3041	f2fs_balance_fs(sbi, true);
	3042
	3043	f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
	3044	if (src != dst) {
	3045	ret = -EBUSY;
	3046	if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
	3047	goto out_src;
	3048	}
	3049
	3050	f2fs_lock_op(sbi);
	3051	ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in),
	3052	F2FS_BYTES_TO_BLK(pos_out),
	3053	F2FS_BYTES_TO_BLK(len), false);
	3054
	3055	if (!ret) {
	3056	if (dst_max_i_size)
	3057	f2fs_i_size_write(dst, dst_max_i_size);
	3058	else if (dst_osize != dst->i_size)
	3059	f2fs_i_size_write(dst, dst_osize);
	3060	}
	3061	f2fs_unlock_op(sbi);
	3062
	3063	if (src != dst)
	3064	f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
	3065	out_src:
	3066	f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
	3067	if (ret)
	3068	goto out_unlock;
	3069
	3070	inode_set_mtime_to_ts(src, inode_set_ctime_current(src));
	3071	f2fs_mark_inode_dirty_sync(src, false);
	3072	if (src != dst) {
	3073	inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst));
	3074	f2fs_mark_inode_dirty_sync(dst, false);
	3075	}
	3076	f2fs_update_time(sbi, REQ_TIME);
	3077
	3078	out_unlock:
	3079	if (src != dst)
	3080	inode_unlock(dst);
	3081	out:
	3082	inode_unlock(src);
	3083	return ret;
	3084	}
	3085
	3086	static int __f2fs_ioc_move_range(struct file *filp,
	3087	struct f2fs_move_range *range)
	3088	{
	3089	int err;
	3090
	3091	if (!(filp->f_mode & FMODE_READ) \|\|
	3092	!(filp->f_mode & FMODE_WRITE))
	3093	return -EBADF;
	3094
	3095	CLASS(fd, dst)(range->dst_fd);
	3096	if (fd_empty(dst))
	3097	return -EBADF;
	3098
	3099	if (!(fd_file(dst)->f_mode & FMODE_WRITE))
	3100	return -EBADF;
	3101
	3102	err = mnt_want_write_file(filp);
	3103	if (err)
	3104	return err;
	3105
	3106	err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst),
	3107	range->pos_out, range->len);
	3108
	3109	mnt_drop_write_file(filp);
	3110	return err;
	3111	}
	3112
	3113	static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
	3114	{
	3115	struct f2fs_move_range range;
	3116
	3117	if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
	3118	sizeof(range)))
	3119	return -EFAULT;
	3120	return __f2fs_ioc_move_range(filp, &range);
	3121	}
	3122
	3123	static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
	3124	{
	3125	struct inode *inode = file_inode(filp);
	3126	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	3127	struct sit_info *sm = SIT_I(sbi);
	3128	unsigned int start_segno = 0, end_segno = 0;
	3129	unsigned int dev_start_segno = 0, dev_end_segno = 0;
	3130	struct f2fs_flush_device range;
	3131	struct f2fs_gc_control gc_control = {
	3132	.init_gc_type = FG_GC,
	3133	.should_migrate_blocks = true,
	3134	.err_gc_skipped = true,
	3135	.nr_free_secs = 0 };
	3136	int ret;
	3137
	3138	if (!capable(CAP_SYS_ADMIN))
	3139	return -EPERM;
	3140
	3141	if (f2fs_readonly(sbi->sb))
	3142	return -EROFS;
	3143
	3144	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
	3145	return -EINVAL;
	3146
	3147	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
	3148	sizeof(range)))
	3149	return -EFAULT;
	3150
	3151	if (!f2fs_is_multi_device(sbi) \|\| sbi->s_ndevs - 1 <= range.dev_num \|\|
	3152	__is_large_section(sbi)) {
	3153	f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1",
	3154	range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi));
	3155	return -EINVAL;
	3156	}
	3157
	3158	ret = mnt_want_write_file(filp);
	3159	if (ret)
	3160	return ret;
	3161
	3162	if (range.dev_num != 0)
	3163	dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
	3164	dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
	3165
	3166	start_segno = sm->last_victim[FLUSH_DEVICE];
	3167	if (start_segno < dev_start_segno \|\| start_segno >= dev_end_segno)
	3168	start_segno = dev_start_segno;
	3169	end_segno = min(start_segno + range.segments, dev_end_segno);
	3170
	3171	while (start_segno < end_segno) {
	3172	if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
	3173	ret = -EBUSY;
	3174	goto out;
	3175	}
	3176	sm->last_victim[GC_CB] = end_segno + 1;
	3177	sm->last_victim[GC_GREEDY] = end_segno + 1;
	3178	sm->last_victim[ALLOC_NEXT] = end_segno + 1;
	3179
	3180	gc_control.victim_segno = start_segno;
	3181	stat_inc_gc_call_count(sbi, FOREGROUND);
	3182	ret = f2fs_gc(sbi, &gc_control);
	3183	if (ret == -EAGAIN)
	3184	ret = 0;
	3185	else if (ret < 0)
	3186	break;
	3187	start_segno++;
	3188	}
	3189	out:
	3190	mnt_drop_write_file(filp);
	3191	return ret;
	3192	}
	3193
	3194	static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
	3195	{
	3196	struct inode *inode = file_inode(filp);
	3197	u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
	3198
	3199	/* Must validate to set it with SQLite behavior in Android. */
	3200	sb_feature \|= F2FS_FEATURE_ATOMIC_WRITE;
	3201
	3202	return put_user(sb_feature, (u32 __user *)arg);
	3203	}
	3204
	3205	#ifdef CONFIG_QUOTA
	3206	int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
	3207	{
	3208	struct dquot *transfer_to[MAXQUOTAS] = {};
	3209	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	3210	struct super_block *sb = sbi->sb;
	3211	int err;
	3212
	3213	transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
	3214	if (IS_ERR(transfer_to[PRJQUOTA]))
	3215	return PTR_ERR(transfer_to[PRJQUOTA]);
	3216
	3217	err = __dquot_transfer(inode, transfer_to);
	3218	if (err)
	3219	set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
	3220	dqput(transfer_to[PRJQUOTA]);
	3221	return err;
	3222	}
	3223
	3224	static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
	3225	{
	3226	struct f2fs_inode_info *fi = F2FS_I(inode);
	3227	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	3228	struct f2fs_inode *ri = NULL;
	3229	kprojid_t kprojid;
	3230	int err;
	3231
	3232	if (!f2fs_sb_has_project_quota(sbi)) {
	3233	if (projid != F2FS_DEF_PROJID)
	3234	return -EOPNOTSUPP;
	3235	else
	3236	return 0;
	3237	}
	3238
	3239	if (!f2fs_has_extra_attr(inode))
	3240	return -EOPNOTSUPP;
	3241
	3242	kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
	3243
	3244	if (projid_eq(kprojid, fi->i_projid))
	3245	return 0;
	3246
	3247	err = -EPERM;
	3248	/* Is it quota file? Do not allow user to mess with it */
	3249	if (IS_NOQUOTA(inode))
	3250	return err;
	3251
	3252	if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
	3253	return -EOVERFLOW;
	3254
	3255	err = f2fs_dquot_initialize(inode);
	3256	if (err)
	3257	return err;
	3258
	3259	f2fs_lock_op(sbi);
	3260	err = f2fs_transfer_project_quota(inode, kprojid);
	3261	if (err)
	3262	goto out_unlock;
	3263
	3264	fi->i_projid = kprojid;
	3265	inode_set_ctime_current(inode);
	3266	f2fs_mark_inode_dirty_sync(inode, true);
	3267	out_unlock:
	3268	f2fs_unlock_op(sbi);
	3269	return err;
	3270	}
	3271	#else
	3272	int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
	3273	{
	3274	return 0;
	3275	}
	3276
	3277	static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
	3278	{
	3279	if (projid != F2FS_DEF_PROJID)
	3280	return -EOPNOTSUPP;
	3281	return 0;
	3282	}
	3283	#endif
	3284
	3285	int f2fs_fileattr_get(struct dentry dentry, struct fileattr fa)
	3286	{
	3287	struct inode *inode = d_inode(dentry);
	3288	struct f2fs_inode_info *fi = F2FS_I(inode);
	3289	u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
	3290
	3291	if (IS_ENCRYPTED(inode))
	3292	fsflags \|= FS_ENCRYPT_FL;
	3293	if (IS_VERITY(inode))
	3294	fsflags \|= FS_VERITY_FL;
	3295	if (f2fs_has_inline_data(inode) \|\| f2fs_has_inline_dentry(inode))
	3296	fsflags \|= FS_INLINE_DATA_FL;
	3297	if (is_inode_flag_set(inode, FI_PIN_FILE))
	3298	fsflags \|= FS_NOCOW_FL;
	3299
	3300	fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL);
	3301
	3302	if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
	3303	fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid);
	3304
	3305	return 0;
	3306	}
	3307
	3308	int f2fs_fileattr_set(struct mnt_idmap *idmap,
	3309	struct dentry dentry, struct fileattr fa)
	3310	{
	3311	struct inode *inode = d_inode(dentry);
	3312	u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL;
	3313	u32 iflags;
	3314	int err;
	3315
	3316	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
	3317	return -EIO;
	3318	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
	3319	return -ENOSPC;
	3320	if (fsflags & ~F2FS_GETTABLE_FS_FL)
	3321	return -EOPNOTSUPP;
	3322	fsflags &= F2FS_SETTABLE_FS_FL;
	3323	if (!fa->flags_valid)
	3324	mask &= FS_COMMON_FL;
	3325
	3326	iflags = f2fs_fsflags_to_iflags(fsflags);
	3327	if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
	3328	return -EOPNOTSUPP;
	3329
	3330	err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask));
	3331	if (!err)
	3332	err = f2fs_ioc_setproject(inode, fa->fsx_projid);
	3333
	3334	return err;
	3335	}
	3336
	3337	int f2fs_pin_file_control(struct inode *inode, bool inc)
	3338	{
	3339	struct f2fs_inode_info *fi = F2FS_I(inode);
	3340	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	3341
	3342	if (IS_DEVICE_ALIASING(inode))
	3343	return -EINVAL;
	3344
	3345	if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) {
	3346	f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
	3347	__func__, inode->i_ino, fi->i_gc_failures);
	3348	clear_inode_flag(inode, FI_PIN_FILE);
	3349	return -EAGAIN;
	3350	}
	3351
	3352	/* Use i_gc_failures for normal file as a risk signal. */
	3353	if (inc)
	3354	f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
	3355
	3356	return 0;
	3357	}
	3358
	3359	static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
	3360	{
	3361	struct inode *inode = file_inode(filp);
	3362	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	3363	__u32 pin;
	3364	int ret = 0;
	3365
	3366	if (get_user(pin, (__u32 __user *)arg))
	3367	return -EFAULT;
	3368
	3369	if (!S_ISREG(inode->i_mode))
	3370	return -EINVAL;
	3371
	3372	if (f2fs_readonly(sbi->sb))
	3373	return -EROFS;
	3374
	3375	if (!pin && IS_DEVICE_ALIASING(inode))
	3376	return -EOPNOTSUPP;
	3377
	3378	ret = mnt_want_write_file(filp);
	3379	if (ret)
	3380	return ret;
	3381
	3382	inode_lock(inode);
	3383
	3384	if (f2fs_is_atomic_file(inode)) {
	3385	ret = -EINVAL;
	3386	goto out;
	3387	}
	3388
	3389	if (!pin) {
	3390	clear_inode_flag(inode, FI_PIN_FILE);
	3391	f2fs_i_gc_failures_write(inode, 0);
	3392	goto done;
	3393	} else if (f2fs_is_pinned_file(inode)) {
	3394	goto done;
	3395	}
	3396
	3397	if (F2FS_HAS_BLOCKS(inode)) {
	3398	ret = -EFBIG;
	3399	goto out;
	3400	}
	3401
	3402	/* Let's allow file pinning on zoned device. */
	3403	if (!f2fs_sb_has_blkzoned(sbi) &&
	3404	f2fs_should_update_outplace(inode, NULL)) {
	3405	ret = -EINVAL;
	3406	goto out;
	3407	}
	3408
	3409	if (f2fs_pin_file_control(inode, false)) {
	3410	ret = -EAGAIN;
	3411	goto out;
	3412	}
	3413
	3414	ret = f2fs_convert_inline_inode(inode);
	3415	if (ret)
	3416	goto out;
	3417
	3418	if (!f2fs_disable_compressed_file(inode)) {
	3419	ret = -EOPNOTSUPP;
	3420	goto out;
	3421	}
	3422
	3423	set_inode_flag(inode, FI_PIN_FILE);
	3424	ret = F2FS_I(inode)->i_gc_failures;
	3425	done:
	3426	f2fs_update_time(sbi, REQ_TIME);
	3427	out:
	3428	inode_unlock(inode);
	3429	mnt_drop_write_file(filp);
	3430	return ret;
	3431	}
	3432
	3433	static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
	3434	{
	3435	struct inode *inode = file_inode(filp);
	3436	__u32 pin = 0;
	3437
	3438	if (is_inode_flag_set(inode, FI_PIN_FILE))
	3439	pin = F2FS_I(inode)->i_gc_failures;
	3440	return put_user(pin, (u32 __user *)arg);
	3441	}
	3442
	3443	static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg)
	3444	{
	3445	return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0,
	3446	(u32 __user *)arg);
	3447	}
	3448
	3449	int f2fs_precache_extents(struct inode *inode)
	3450	{
	3451	struct f2fs_inode_info *fi = F2FS_I(inode);
	3452	struct f2fs_map_blocks map;
	3453	pgoff_t m_next_extent;
	3454	loff_t end;
	3455	int err;
	3456
	3457	if (is_inode_flag_set(inode, FI_NO_EXTENT))
	3458	return -EOPNOTSUPP;
	3459
	3460	map.m_lblk = 0;
	3461	map.m_pblk = 0;
	3462	map.m_next_pgofs = NULL;
	3463	map.m_next_extent = &m_next_extent;
	3464	map.m_seg_type = NO_CHECK_TYPE;
	3465	map.m_may_create = false;
	3466	end = F2FS_BLK_ALIGN(i_size_read(inode));
	3467
	3468	while (map.m_lblk < end) {
	3469	map.m_len = end - map.m_lblk;
	3470
	3471	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
	3472	err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE);
	3473	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
	3474	if (err \|\| !map.m_len)
	3475	return err;
	3476
	3477	map.m_lblk = m_next_extent;
	3478	}
	3479
	3480	return 0;
	3481	}
	3482
	3483	static int f2fs_ioc_precache_extents(struct file *filp)
	3484	{
	3485	return f2fs_precache_extents(file_inode(filp));
	3486	}
	3487
	3488	static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
	3489	{
	3490	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
	3491	__u64 block_count;
	3492
	3493	if (!capable(CAP_SYS_ADMIN))
	3494	return -EPERM;
	3495
	3496	if (f2fs_readonly(sbi->sb))
	3497	return -EROFS;
	3498
	3499	if (copy_from_user(&block_count, (void __user *)arg,
	3500	sizeof(block_count)))
	3501	return -EFAULT;
	3502
	3503	return f2fs_resize_fs(filp, block_count);
	3504	}
	3505
	3506	static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
	3507	{
	3508	struct inode *inode = file_inode(filp);
	3509
	3510	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	3511
	3512	if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
	3513	f2fs_warn(F2FS_I_SB(inode),
	3514	"Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem",
	3515	inode->i_ino);
	3516	return -EOPNOTSUPP;
	3517	}
	3518
	3519	return fsverity_ioctl_enable(filp, (const void __user *)arg);
	3520	}
	3521
	3522	static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
	3523	{
	3524	if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
	3525	return -EOPNOTSUPP;
	3526
	3527	return fsverity_ioctl_measure(filp, (void __user *)arg);
	3528	}
	3529
	3530	static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg)
	3531	{
	3532	if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
	3533	return -EOPNOTSUPP;
	3534
	3535	return fsverity_ioctl_read_metadata(filp, (const void __user *)arg);
	3536	}
	3537
	3538	static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
	3539	{
	3540	struct inode *inode = file_inode(filp);
	3541	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	3542	char *vbuf;
	3543	int count;
	3544	int err = 0;
	3545
	3546	vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
	3547	if (!vbuf)
	3548	return -ENOMEM;
	3549
	3550	f2fs_down_read(&sbi->sb_lock);
	3551	count = utf16s_to_utf8s(sbi->raw_super->volume_name,
	3552	ARRAY_SIZE(sbi->raw_super->volume_name),
	3553	UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
	3554	f2fs_up_read(&sbi->sb_lock);
	3555
	3556	if (copy_to_user((char __user *)arg, vbuf,
	3557	min(FSLABEL_MAX, count)))
	3558	err = -EFAULT;
	3559
	3560	kfree(vbuf);
	3561	return err;
	3562	}
	3563
	3564	static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
	3565	{
	3566	struct inode *inode = file_inode(filp);
	3567	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	3568	char *vbuf;
	3569	int err = 0;
	3570
	3571	if (!capable(CAP_SYS_ADMIN))
	3572	return -EPERM;
	3573
	3574	vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
	3575	if (IS_ERR(vbuf))
	3576	return PTR_ERR(vbuf);
	3577
	3578	err = mnt_want_write_file(filp);
	3579	if (err)
	3580	goto out;
	3581
	3582	f2fs_down_write(&sbi->sb_lock);
	3583
	3584	memset(sbi->raw_super->volume_name, 0,
	3585	sizeof(sbi->raw_super->volume_name));
	3586	utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
	3587	sbi->raw_super->volume_name,
	3588	ARRAY_SIZE(sbi->raw_super->volume_name));
	3589
	3590	err = f2fs_commit_super(sbi, false);
	3591
	3592	f2fs_up_write(&sbi->sb_lock);
	3593
	3594	mnt_drop_write_file(filp);
	3595	out:
	3596	kfree(vbuf);
	3597	return err;
	3598	}
	3599
	3600	static int f2fs_get_compress_blocks(struct inode inode, __u64 blocks)
	3601	{
	3602	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
	3603	return -EOPNOTSUPP;
	3604
	3605	if (!f2fs_compressed_file(inode))
	3606	return -EINVAL;
	3607
	3608	*blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
	3609
	3610	return 0;
	3611	}
	3612
	3613	static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg)
	3614	{
	3615	struct inode *inode = file_inode(filp);
	3616	__u64 blocks;
	3617	int ret;
	3618
	3619	ret = f2fs_get_compress_blocks(inode, &blocks);
	3620	if (ret < 0)
	3621	return ret;
	3622
	3623	return put_user(blocks, (u64 __user *)arg);
	3624	}
	3625
	3626	static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
	3627	{
	3628	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
	3629	unsigned int released_blocks = 0;
	3630	int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
	3631	block_t blkaddr;
	3632	int i;
	3633
	3634	for (i = 0; i < count; i++) {
	3635	blkaddr = data_blkaddr(dn->inode, dn->node_page,
	3636	dn->ofs_in_node + i);
	3637
	3638	if (!__is_valid_data_blkaddr(blkaddr))
	3639	continue;
	3640	if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
	3641	DATA_GENERIC_ENHANCE)))
	3642	return -EFSCORRUPTED;
	3643	}
	3644
	3645	while (count) {
	3646	int compr_blocks = 0;
	3647
	3648	for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
	3649	blkaddr = f2fs_data_blkaddr(dn);
	3650
	3651	if (i == 0) {
	3652	if (blkaddr == COMPRESS_ADDR)
	3653	continue;
	3654	dn->ofs_in_node += cluster_size;
	3655	goto next;
	3656	}
	3657
	3658	if (__is_valid_data_blkaddr(blkaddr))
	3659	compr_blocks++;
	3660
	3661	if (blkaddr != NEW_ADDR)
	3662	continue;
	3663
	3664	f2fs_set_data_blkaddr(dn, NULL_ADDR);
	3665	}
	3666
	3667	f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false);
	3668	dec_valid_block_count(sbi, dn->inode,
	3669	cluster_size - compr_blocks);
	3670
	3671	released_blocks += cluster_size - compr_blocks;
	3672	next:
	3673	count -= cluster_size;
	3674	}
	3675
	3676	return released_blocks;
	3677	}
	3678
	3679	static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
	3680	{
	3681	struct inode *inode = file_inode(filp);
	3682	struct f2fs_inode_info *fi = F2FS_I(inode);
	3683	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	3684	pgoff_t page_idx = 0, last_idx;
	3685	unsigned int released_blocks = 0;
	3686	int ret;
	3687	int writecount;
	3688
	3689	if (!f2fs_sb_has_compression(sbi))
	3690	return -EOPNOTSUPP;
	3691
	3692	if (f2fs_readonly(sbi->sb))
	3693	return -EROFS;
	3694
	3695	ret = mnt_want_write_file(filp);
	3696	if (ret)
	3697	return ret;
	3698
	3699	f2fs_balance_fs(sbi, true);
	3700
	3701	inode_lock(inode);
	3702
	3703	writecount = atomic_read(&inode->i_writecount);
	3704	if ((filp->f_mode & FMODE_WRITE && writecount != 1) \|\|
	3705	(!(filp->f_mode & FMODE_WRITE) && writecount)) {
	3706	ret = -EBUSY;
	3707	goto out;
	3708	}
	3709
	3710	if (!f2fs_compressed_file(inode) \|\|
	3711	is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
	3712	ret = -EINVAL;
	3713	goto out;
	3714	}
	3715
	3716	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
	3717	if (ret)
	3718	goto out;
	3719
	3720	if (!atomic_read(&fi->i_compr_blocks)) {
	3721	ret = -EPERM;
	3722	goto out;
	3723	}
	3724
	3725	set_inode_flag(inode, FI_COMPRESS_RELEASED);
	3726	inode_set_ctime_current(inode);
	3727	f2fs_mark_inode_dirty_sync(inode, true);
	3728
	3729	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
	3730	filemap_invalidate_lock(inode->i_mapping);
	3731
	3732	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
	3733
	3734	while (page_idx < last_idx) {
	3735	struct dnode_of_data dn;
	3736	pgoff_t end_offset, count;
	3737
	3738	f2fs_lock_op(sbi);
	3739
	3740	set_new_dnode(&dn, inode, NULL, NULL, 0);
	3741	ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
	3742	if (ret) {
	3743	f2fs_unlock_op(sbi);
	3744	if (ret == -ENOENT) {
	3745	page_idx = f2fs_get_next_page_offset(&dn,
	3746	page_idx);
	3747	ret = 0;
	3748	continue;
	3749	}
	3750	break;
	3751	}
	3752
	3753	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
	3754	count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
	3755	count = round_up(count, fi->i_cluster_size);
	3756
	3757	ret = release_compress_blocks(&dn, count);
	3758
	3759	f2fs_put_dnode(&dn);
	3760
	3761	f2fs_unlock_op(sbi);
	3762
	3763	if (ret < 0)
	3764	break;
	3765
	3766	page_idx += count;
	3767	released_blocks += ret;
	3768	}
	3769
	3770	filemap_invalidate_unlock(inode->i_mapping);
	3771	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
	3772	out:
	3773	if (released_blocks)
	3774	f2fs_update_time(sbi, REQ_TIME);
	3775	inode_unlock(inode);
	3776
	3777	mnt_drop_write_file(filp);
	3778
	3779	if (ret >= 0) {
	3780	ret = put_user(released_blocks, (u64 __user *)arg);
	3781	} else if (released_blocks &&
	3782	atomic_read(&fi->i_compr_blocks)) {
	3783	set_sbi_flag(sbi, SBI_NEED_FSCK);
	3784	f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
	3785	"iblocks=%llu, released=%u, compr_blocks=%u, "
	3786	"run fsck to fix.",
	3787	__func__, inode->i_ino, inode->i_blocks,
	3788	released_blocks,
	3789	atomic_read(&fi->i_compr_blocks));
	3790	}
	3791
	3792	return ret;
	3793	}
	3794
	3795	static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count,
	3796	unsigned int *reserved_blocks)
	3797	{
	3798	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
	3799	int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
	3800	block_t blkaddr;
	3801	int i;
	3802
	3803	for (i = 0; i < count; i++) {
	3804	blkaddr = data_blkaddr(dn->inode, dn->node_page,
	3805	dn->ofs_in_node + i);
	3806
	3807	if (!__is_valid_data_blkaddr(blkaddr))
	3808	continue;
	3809	if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
	3810	DATA_GENERIC_ENHANCE)))
	3811	return -EFSCORRUPTED;
	3812	}
	3813
	3814	while (count) {
	3815	int compr_blocks = 0;
	3816	blkcnt_t reserved = 0;
	3817	blkcnt_t to_reserved;
	3818	int ret;
	3819
	3820	for (i = 0; i < cluster_size; i++) {
	3821	blkaddr = data_blkaddr(dn->inode, dn->node_page,
	3822	dn->ofs_in_node + i);
	3823
	3824	if (i == 0) {
	3825	if (blkaddr != COMPRESS_ADDR) {
	3826	dn->ofs_in_node += cluster_size;
	3827	goto next;
	3828	}
	3829	continue;
	3830	}
	3831
	3832	/*
	3833	* compressed cluster was not released due to it
	3834	* fails in release_compress_blocks(), so NEW_ADDR
	3835	* is a possible case.
	3836	*/
	3837	if (blkaddr == NEW_ADDR) {
	3838	reserved++;
	3839	continue;
	3840	}
	3841	if (__is_valid_data_blkaddr(blkaddr)) {
	3842	compr_blocks++;
	3843	continue;
	3844	}
	3845	}
	3846
	3847	to_reserved = cluster_size - compr_blocks - reserved;
	3848
	3849	/* for the case all blocks in cluster were reserved */
	3850	if (reserved && to_reserved == 1) {
	3851	dn->ofs_in_node += cluster_size;
	3852	goto next;
	3853	}
	3854
	3855	ret = inc_valid_block_count(sbi, dn->inode,
	3856	&to_reserved, false);
	3857	if (unlikely(ret))
	3858	return ret;
	3859
	3860	for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
	3861	if (f2fs_data_blkaddr(dn) == NULL_ADDR)
	3862	f2fs_set_data_blkaddr(dn, NEW_ADDR);
	3863	}
	3864
	3865	f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
	3866
	3867	*reserved_blocks += to_reserved;
	3868	next:
	3869	count -= cluster_size;
	3870	}
	3871
	3872	return 0;
	3873	}
	3874
	3875	static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
	3876	{
	3877	struct inode *inode = file_inode(filp);
	3878	struct f2fs_inode_info *fi = F2FS_I(inode);
	3879	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	3880	pgoff_t page_idx = 0, last_idx;
	3881	unsigned int reserved_blocks = 0;
	3882	int ret;
	3883
	3884	if (!f2fs_sb_has_compression(sbi))
	3885	return -EOPNOTSUPP;
	3886
	3887	if (f2fs_readonly(sbi->sb))
	3888	return -EROFS;
	3889
	3890	ret = mnt_want_write_file(filp);
	3891	if (ret)
	3892	return ret;
	3893
	3894	f2fs_balance_fs(sbi, true);
	3895
	3896	inode_lock(inode);
	3897
	3898	if (!f2fs_compressed_file(inode) \|\|
	3899	!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
	3900	ret = -EINVAL;
	3901	goto unlock_inode;
	3902	}
	3903
	3904	if (atomic_read(&fi->i_compr_blocks))
	3905	goto unlock_inode;
	3906
	3907	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
	3908	filemap_invalidate_lock(inode->i_mapping);
	3909
	3910	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
	3911
	3912	while (page_idx < last_idx) {
	3913	struct dnode_of_data dn;
	3914	pgoff_t end_offset, count;
	3915
	3916	f2fs_lock_op(sbi);
	3917
	3918	set_new_dnode(&dn, inode, NULL, NULL, 0);
	3919	ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
	3920	if (ret) {
	3921	f2fs_unlock_op(sbi);
	3922	if (ret == -ENOENT) {
	3923	page_idx = f2fs_get_next_page_offset(&dn,
	3924	page_idx);
	3925	ret = 0;
	3926	continue;
	3927	}
	3928	break;
	3929	}
	3930
	3931	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
	3932	count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
	3933	count = round_up(count, fi->i_cluster_size);
	3934
	3935	ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
	3936
	3937	f2fs_put_dnode(&dn);
	3938
	3939	f2fs_unlock_op(sbi);
	3940
	3941	if (ret < 0)
	3942	break;
	3943
	3944	page_idx += count;
	3945	}
	3946
	3947	filemap_invalidate_unlock(inode->i_mapping);
	3948	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
	3949
	3950	if (!ret) {
	3951	clear_inode_flag(inode, FI_COMPRESS_RELEASED);
	3952	inode_set_ctime_current(inode);
	3953	f2fs_mark_inode_dirty_sync(inode, true);
	3954	}
	3955	unlock_inode:
	3956	if (reserved_blocks)
	3957	f2fs_update_time(sbi, REQ_TIME);
	3958	inode_unlock(inode);
	3959	mnt_drop_write_file(filp);
	3960
	3961	if (!ret) {
	3962	ret = put_user(reserved_blocks, (u64 __user *)arg);
	3963	} else if (reserved_blocks &&
	3964	atomic_read(&fi->i_compr_blocks)) {
	3965	set_sbi_flag(sbi, SBI_NEED_FSCK);
	3966	f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx "
	3967	"iblocks=%llu, reserved=%u, compr_blocks=%u, "
	3968	"run fsck to fix.",
	3969	__func__, inode->i_ino, inode->i_blocks,
	3970	reserved_blocks,
	3971	atomic_read(&fi->i_compr_blocks));
	3972	}
	3973
	3974	return ret;
	3975	}
	3976
	3977	static int f2fs_secure_erase(struct block_device bdev, struct inode inode,
	3978	pgoff_t off, block_t block, block_t len, u32 flags)
	3979	{
	3980	sector_t sector = SECTOR_FROM_BLOCK(block);
	3981	sector_t nr_sects = SECTOR_FROM_BLOCK(len);
	3982	int ret = 0;
	3983
	3984	if (flags & F2FS_TRIM_FILE_DISCARD) {
	3985	if (bdev_max_secure_erase_sectors(bdev))
	3986	ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
	3987	GFP_NOFS);
	3988	else
	3989	ret = blkdev_issue_discard(bdev, sector, nr_sects,
	3990	GFP_NOFS);
	3991	}
	3992
	3993	if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
	3994	if (IS_ENCRYPTED(inode))
	3995	ret = fscrypt_zeroout_range(inode, off, block, len);
	3996	else
	3997	ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
	3998	GFP_NOFS, 0);
	3999	}
	4000
	4001	return ret;
	4002	}
	4003
	4004	static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
	4005	{
	4006	struct inode *inode = file_inode(filp);
	4007	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	4008	struct address_space *mapping = inode->i_mapping;
	4009	struct block_device *prev_bdev = NULL;
	4010	struct f2fs_sectrim_range range;
	4011	pgoff_t index, pg_end, prev_index = 0;
	4012	block_t prev_block = 0, len = 0;
	4013	loff_t end_addr;
	4014	bool to_end = false;
	4015	int ret = 0;
	4016
	4017	if (!(filp->f_mode & FMODE_WRITE))
	4018	return -EBADF;
	4019
	4020	if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg,
	4021	sizeof(range)))
	4022	return -EFAULT;
	4023
	4024	if (range.flags == 0 \|\| (range.flags & ~F2FS_TRIM_FILE_MASK) \|\|
	4025	!S_ISREG(inode->i_mode))
	4026	return -EINVAL;
	4027
	4028	if (((range.flags & F2FS_TRIM_FILE_DISCARD) &&
	4029	!f2fs_hw_support_discard(sbi)) \|\|
	4030	((range.flags & F2FS_TRIM_FILE_ZEROOUT) &&
	4031	IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
	4032	return -EOPNOTSUPP;
	4033
	4034	ret = mnt_want_write_file(filp);
	4035	if (ret)
	4036	return ret;
	4037	inode_lock(inode);
	4038
	4039	if (f2fs_is_atomic_file(inode) \|\| f2fs_compressed_file(inode) \|\|
	4040	range.start >= inode->i_size) {
	4041	ret = -EINVAL;
	4042	goto err;
	4043	}
	4044
	4045	if (range.len == 0)
	4046	goto err;
	4047
	4048	if (inode->i_size - range.start > range.len) {
	4049	end_addr = range.start + range.len;
	4050	} else {
	4051	end_addr = range.len == (u64)-1 ?
	4052	sbi->sb->s_maxbytes : inode->i_size;
	4053	to_end = true;
	4054	}
	4055
	4056	if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) \|\|
	4057	(!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) {
	4058	ret = -EINVAL;
	4059	goto err;
	4060	}
	4061
	4062	index = F2FS_BYTES_TO_BLK(range.start);
	4063	pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE);
	4064
	4065	ret = f2fs_convert_inline_inode(inode);
	4066	if (ret)
	4067	goto err;
	4068
	4069	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	4070	filemap_invalidate_lock(mapping);
	4071
	4072	ret = filemap_write_and_wait_range(mapping, range.start,
	4073	to_end ? LLONG_MAX : end_addr - 1);
	4074	if (ret)
	4075	goto out;
	4076
	4077	truncate_inode_pages_range(mapping, range.start,
	4078	to_end ? -1 : end_addr - 1);
	4079
	4080	while (index < pg_end) {
	4081	struct dnode_of_data dn;
	4082	pgoff_t end_offset, count;
	4083	int i;
	4084
	4085	set_new_dnode(&dn, inode, NULL, NULL, 0);
	4086	ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
	4087	if (ret) {
	4088	if (ret == -ENOENT) {
	4089	index = f2fs_get_next_page_offset(&dn, index);
	4090	continue;
	4091	}
	4092	goto out;
	4093	}
	4094
	4095	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
	4096	count = min(end_offset - dn.ofs_in_node, pg_end - index);
	4097	for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
	4098	struct block_device *cur_bdev;
	4099	block_t blkaddr = f2fs_data_blkaddr(&dn);
	4100
	4101	if (!__is_valid_data_blkaddr(blkaddr))
	4102	continue;
	4103
	4104	if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
	4105	DATA_GENERIC_ENHANCE)) {
	4106	ret = -EFSCORRUPTED;
	4107	f2fs_put_dnode(&dn);
	4108	goto out;
	4109	}
	4110
	4111	cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
	4112	if (f2fs_is_multi_device(sbi)) {
	4113	int di = f2fs_target_device_index(sbi, blkaddr);
	4114
	4115	blkaddr -= FDEV(di).start_blk;
	4116	}
	4117
	4118	if (len) {
	4119	if (prev_bdev == cur_bdev &&
	4120	index == prev_index + len &&
	4121	blkaddr == prev_block + len) {
	4122	len++;
	4123	} else {
	4124	ret = f2fs_secure_erase(prev_bdev,
	4125	inode, prev_index, prev_block,
	4126	len, range.flags);
	4127	if (ret) {
	4128	f2fs_put_dnode(&dn);
	4129	goto out;
	4130	}
	4131
	4132	len = 0;
	4133	}
	4134	}
	4135
	4136	if (!len) {
	4137	prev_bdev = cur_bdev;
	4138	prev_index = index;
	4139	prev_block = blkaddr;
	4140	len = 1;
	4141	}
	4142	}
	4143
	4144	f2fs_put_dnode(&dn);
	4145
	4146	if (fatal_signal_pending(current)) {
	4147	ret = -EINTR;
	4148	goto out;
	4149	}
	4150	cond_resched();
	4151	}
	4152
	4153	if (len)
	4154	ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
	4155	prev_block, len, range.flags);
	4156	f2fs_update_time(sbi, REQ_TIME);
	4157	out:
	4158	filemap_invalidate_unlock(mapping);
	4159	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	4160	err:
	4161	inode_unlock(inode);
	4162	mnt_drop_write_file(filp);
	4163
	4164	return ret;
	4165	}
	4166
	4167	static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
	4168	{
	4169	struct inode *inode = file_inode(filp);
	4170	struct f2fs_comp_option option;
	4171
	4172	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
	4173	return -EOPNOTSUPP;
	4174
	4175	inode_lock_shared(inode);
	4176
	4177	if (!f2fs_compressed_file(inode)) {
	4178	inode_unlock_shared(inode);
	4179	return -ENODATA;
	4180	}
	4181
	4182	option.algorithm = F2FS_I(inode)->i_compress_algorithm;
	4183	option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
	4184
	4185	inode_unlock_shared(inode);
	4186
	4187	if (copy_to_user((struct f2fs_comp_option __user *)arg, &option,
	4188	sizeof(option)))
	4189	return -EFAULT;
	4190
	4191	return 0;
	4192	}
	4193
	4194	static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
	4195	{
	4196	struct inode *inode = file_inode(filp);
	4197	struct f2fs_inode_info *fi = F2FS_I(inode);
	4198	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	4199	struct f2fs_comp_option option;
	4200	int ret = 0;
	4201
	4202	if (!f2fs_sb_has_compression(sbi))
	4203	return -EOPNOTSUPP;
	4204
	4205	if (!(filp->f_mode & FMODE_WRITE))
	4206	return -EBADF;
	4207
	4208	if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg,
	4209	sizeof(option)))
	4210	return -EFAULT;
	4211
	4212	if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE \|\|
	4213	option.log_cluster_size > MAX_COMPRESS_LOG_SIZE \|\|
	4214	option.algorithm >= COMPRESS_MAX)
	4215	return -EINVAL;
	4216
	4217	ret = mnt_want_write_file(filp);
	4218	if (ret)
	4219	return ret;
	4220	inode_lock(inode);
	4221
	4222	f2fs_down_write(&F2FS_I(inode)->i_sem);
	4223	if (!f2fs_compressed_file(inode)) {
	4224	ret = -EINVAL;
	4225	goto out;
	4226	}
	4227
	4228	if (f2fs_is_mmap_file(inode) \|\| get_dirty_pages(inode)) {
	4229	ret = -EBUSY;
	4230	goto out;
	4231	}
	4232
	4233	if (F2FS_HAS_BLOCKS(inode)) {
	4234	ret = -EFBIG;
	4235	goto out;
	4236	}
	4237
	4238	fi->i_compress_algorithm = option.algorithm;
	4239	fi->i_log_cluster_size = option.log_cluster_size;
	4240	fi->i_cluster_size = BIT(option.log_cluster_size);
	4241	/* Set default level */
	4242	if (fi->i_compress_algorithm == COMPRESS_ZSTD)
	4243	fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
	4244	else
	4245	fi->i_compress_level = 0;
	4246	/* Adjust mount option level */
	4247	if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
	4248	F2FS_OPTION(sbi).compress_level)
	4249	fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
	4250	f2fs_mark_inode_dirty_sync(inode, true);
	4251
	4252	if (!f2fs_is_compress_backend_ready(inode))
	4253	f2fs_warn(sbi, "compression algorithm is successfully set, "
	4254	"but current kernel doesn't support this algorithm.");
	4255	out:
	4256	f2fs_up_write(&fi->i_sem);
	4257	inode_unlock(inode);
	4258	mnt_drop_write_file(filp);
	4259
	4260	return ret;
	4261	}
	4262
	4263	static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
	4264	{
	4265	DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx);
	4266	struct address_space *mapping = inode->i_mapping;
	4267	struct page *page;
	4268	pgoff_t redirty_idx = page_idx;
	4269	int i, page_len = 0, ret = 0;
	4270
	4271	page_cache_ra_unbounded(&ractl, len, 0);
	4272
	4273	for (i = 0; i < len; i++, page_idx++) {
	4274	page = read_cache_page(mapping, page_idx, NULL, NULL);
	4275	if (IS_ERR(page)) {
	4276	ret = PTR_ERR(page);
	4277	break;
	4278	}
	4279	page_len++;
	4280	}
	4281
	4282	for (i = 0; i < page_len; i++, redirty_idx++) {
	4283	page = find_lock_page(mapping, redirty_idx);
	4284
	4285	/* It will never fail, when page has pinned above */
	4286	f2fs_bug_on(F2FS_I_SB(inode), !page);
	4287
	4288	f2fs_wait_on_page_writeback(page, DATA, true, true);
	4289
	4290	set_page_dirty(page);
	4291	set_page_private_gcing(page);
	4292	f2fs_put_page(page, 1);
	4293	f2fs_put_page(page, 0);
	4294	}
	4295
	4296	return ret;
	4297	}
	4298
	4299	static int f2fs_ioc_decompress_file(struct file *filp)
	4300	{
	4301	struct inode *inode = file_inode(filp);
	4302	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	4303	struct f2fs_inode_info *fi = F2FS_I(inode);
	4304	pgoff_t page_idx = 0, last_idx, cluster_idx;
	4305	int ret;
	4306
	4307	if (!f2fs_sb_has_compression(sbi) \|\|
	4308	F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
	4309	return -EOPNOTSUPP;
	4310
	4311	if (!(filp->f_mode & FMODE_WRITE))
	4312	return -EBADF;
	4313
	4314	f2fs_balance_fs(sbi, true);
	4315
	4316	ret = mnt_want_write_file(filp);
	4317	if (ret)
	4318	return ret;
	4319	inode_lock(inode);
	4320
	4321	if (!f2fs_is_compress_backend_ready(inode)) {
	4322	ret = -EOPNOTSUPP;
	4323	goto out;
	4324	}
	4325
	4326	if (!f2fs_compressed_file(inode) \|\|
	4327	is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
	4328	ret = -EINVAL;
	4329	goto out;
	4330	}
	4331
	4332	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
	4333	if (ret)
	4334	goto out;
	4335
	4336	if (!atomic_read(&fi->i_compr_blocks))
	4337	goto out;
	4338
	4339	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
	4340	last_idx >>= fi->i_log_cluster_size;
	4341
	4342	for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
	4343	page_idx = cluster_idx << fi->i_log_cluster_size;
	4344
	4345	if (!f2fs_is_compressed_cluster(inode, page_idx))
	4346	continue;
	4347
	4348	ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
	4349	if (ret < 0)
	4350	break;
	4351
	4352	if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
	4353	ret = filemap_fdatawrite(inode->i_mapping);
	4354	if (ret < 0)
	4355	break;
	4356	}
	4357
	4358	cond_resched();
	4359	if (fatal_signal_pending(current)) {
	4360	ret = -EINTR;
	4361	break;
	4362	}
	4363	}
	4364
	4365	if (!ret)
	4366	ret = filemap_write_and_wait_range(inode->i_mapping, 0,
	4367	LLONG_MAX);
	4368
	4369	if (ret)
	4370	f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.",
	4371	__func__, ret);
	4372	f2fs_update_time(sbi, REQ_TIME);
	4373	out:
	4374	inode_unlock(inode);
	4375	mnt_drop_write_file(filp);
	4376
	4377	return ret;
	4378	}
	4379
	4380	static int f2fs_ioc_compress_file(struct file *filp)
	4381	{
	4382	struct inode *inode = file_inode(filp);
	4383	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	4384	struct f2fs_inode_info *fi = F2FS_I(inode);
	4385	pgoff_t page_idx = 0, last_idx, cluster_idx;
	4386	int ret;
	4387
	4388	if (!f2fs_sb_has_compression(sbi) \|\|
	4389	F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
	4390	return -EOPNOTSUPP;
	4391
	4392	if (!(filp->f_mode & FMODE_WRITE))
	4393	return -EBADF;
	4394
	4395	f2fs_balance_fs(sbi, true);
	4396
	4397	ret = mnt_want_write_file(filp);
	4398	if (ret)
	4399	return ret;
	4400	inode_lock(inode);
	4401
	4402	if (!f2fs_is_compress_backend_ready(inode)) {
	4403	ret = -EOPNOTSUPP;
	4404	goto out;
	4405	}
	4406
	4407	if (!f2fs_compressed_file(inode) \|\|
	4408	is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
	4409	ret = -EINVAL;
	4410	goto out;
	4411	}
	4412
	4413	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
	4414	if (ret)
	4415	goto out;
	4416
	4417	set_inode_flag(inode, FI_ENABLE_COMPRESS);
	4418
	4419	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
	4420	last_idx >>= fi->i_log_cluster_size;
	4421
	4422	for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
	4423	page_idx = cluster_idx << fi->i_log_cluster_size;
	4424
	4425	if (f2fs_is_sparse_cluster(inode, page_idx))
	4426	continue;
	4427
	4428	ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
	4429	if (ret < 0)
	4430	break;
	4431
	4432	if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
	4433	ret = filemap_fdatawrite(inode->i_mapping);
	4434	if (ret < 0)
	4435	break;
	4436	}
	4437
	4438	cond_resched();
	4439	if (fatal_signal_pending(current)) {
	4440	ret = -EINTR;
	4441	break;
	4442	}
	4443	}
	4444
	4445	if (!ret)
	4446	ret = filemap_write_and_wait_range(inode->i_mapping, 0,
	4447	LLONG_MAX);
	4448
	4449	clear_inode_flag(inode, FI_ENABLE_COMPRESS);
	4450
	4451	if (ret)
	4452	f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.",
	4453	__func__, ret);
	4454	f2fs_update_time(sbi, REQ_TIME);
	4455	out:
	4456	inode_unlock(inode);
	4457	mnt_drop_write_file(filp);
	4458
	4459	return ret;
	4460	}
	4461
	4462	static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
	4463	{
	4464	switch (cmd) {
	4465	case FS_IOC_GETVERSION:
	4466	return f2fs_ioc_getversion(filp, arg);
	4467	case F2FS_IOC_START_ATOMIC_WRITE:
	4468	return f2fs_ioc_start_atomic_write(filp, false);
	4469	case F2FS_IOC_START_ATOMIC_REPLACE:
	4470	return f2fs_ioc_start_atomic_write(filp, true);
	4471	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
	4472	return f2fs_ioc_commit_atomic_write(filp);
	4473	case F2FS_IOC_ABORT_ATOMIC_WRITE:
	4474	return f2fs_ioc_abort_atomic_write(filp);
	4475	case F2FS_IOC_START_VOLATILE_WRITE:
	4476	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
	4477	return -EOPNOTSUPP;
	4478	case F2FS_IOC_SHUTDOWN:
	4479	return f2fs_ioc_shutdown(filp, arg);
	4480	case FITRIM:
	4481	return f2fs_ioc_fitrim(filp, arg);
	4482	case FS_IOC_SET_ENCRYPTION_POLICY:
	4483	return f2fs_ioc_set_encryption_policy(filp, arg);
	4484	case FS_IOC_GET_ENCRYPTION_POLICY:
	4485	return f2fs_ioc_get_encryption_policy(filp, arg);
	4486	case FS_IOC_GET_ENCRYPTION_PWSALT:
	4487	return f2fs_ioc_get_encryption_pwsalt(filp, arg);
	4488	case FS_IOC_GET_ENCRYPTION_POLICY_EX:
	4489	return f2fs_ioc_get_encryption_policy_ex(filp, arg);
	4490	case FS_IOC_ADD_ENCRYPTION_KEY:
	4491	return f2fs_ioc_add_encryption_key(filp, arg);
	4492	case FS_IOC_REMOVE_ENCRYPTION_KEY:
	4493	return f2fs_ioc_remove_encryption_key(filp, arg);
	4494	case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
	4495	return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
	4496	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
	4497	return f2fs_ioc_get_encryption_key_status(filp, arg);
	4498	case FS_IOC_GET_ENCRYPTION_NONCE:
	4499	return f2fs_ioc_get_encryption_nonce(filp, arg);
	4500	case F2FS_IOC_GARBAGE_COLLECT:
	4501	return f2fs_ioc_gc(filp, arg);
	4502	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
	4503	return f2fs_ioc_gc_range(filp, arg);
	4504	case F2FS_IOC_WRITE_CHECKPOINT:
	4505	return f2fs_ioc_write_checkpoint(filp);
	4506	case F2FS_IOC_DEFRAGMENT:
	4507	return f2fs_ioc_defragment(filp, arg);
	4508	case F2FS_IOC_MOVE_RANGE:
	4509	return f2fs_ioc_move_range(filp, arg);
	4510	case F2FS_IOC_FLUSH_DEVICE:
	4511	return f2fs_ioc_flush_device(filp, arg);
	4512	case F2FS_IOC_GET_FEATURES:
	4513	return f2fs_ioc_get_features(filp, arg);
	4514	case F2FS_IOC_GET_PIN_FILE:
	4515	return f2fs_ioc_get_pin_file(filp, arg);
	4516	case F2FS_IOC_SET_PIN_FILE:
	4517	return f2fs_ioc_set_pin_file(filp, arg);
	4518	case F2FS_IOC_PRECACHE_EXTENTS:
	4519	return f2fs_ioc_precache_extents(filp);
	4520	case F2FS_IOC_RESIZE_FS:
	4521	return f2fs_ioc_resize_fs(filp, arg);
	4522	case FS_IOC_ENABLE_VERITY:
	4523	return f2fs_ioc_enable_verity(filp, arg);
	4524	case FS_IOC_MEASURE_VERITY:
	4525	return f2fs_ioc_measure_verity(filp, arg);
	4526	case FS_IOC_READ_VERITY_METADATA:
	4527	return f2fs_ioc_read_verity_metadata(filp, arg);
	4528	case FS_IOC_GETFSLABEL:
	4529	return f2fs_ioc_getfslabel(filp, arg);
	4530	case FS_IOC_SETFSLABEL:
	4531	return f2fs_ioc_setfslabel(filp, arg);
	4532	case F2FS_IOC_GET_COMPRESS_BLOCKS:
	4533	return f2fs_ioc_get_compress_blocks(filp, arg);
	4534	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
	4535	return f2fs_release_compress_blocks(filp, arg);
	4536	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
	4537	return f2fs_reserve_compress_blocks(filp, arg);
	4538	case F2FS_IOC_SEC_TRIM_FILE:
	4539	return f2fs_sec_trim_file(filp, arg);
	4540	case F2FS_IOC_GET_COMPRESS_OPTION:
	4541	return f2fs_ioc_get_compress_option(filp, arg);
	4542	case F2FS_IOC_SET_COMPRESS_OPTION:
	4543	return f2fs_ioc_set_compress_option(filp, arg);
	4544	case F2FS_IOC_DECOMPRESS_FILE:
	4545	return f2fs_ioc_decompress_file(filp);
	4546	case F2FS_IOC_COMPRESS_FILE:
	4547	return f2fs_ioc_compress_file(filp);
	4548	case F2FS_IOC_GET_DEV_ALIAS_FILE:
	4549	return f2fs_ioc_get_dev_alias_file(filp, arg);
	4550	default:
	4551	return -ENOTTY;
	4552	}
	4553	}
	4554
	4555	long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
	4556	{
	4557	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
	4558	return -EIO;
	4559	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
	4560	return -ENOSPC;
	4561
	4562	return __f2fs_ioctl(filp, cmd, arg);
	4563	}
	4564
	4565	/*
	4566	* Return %true if the given read or write request should use direct I/O, or
	4567	* %false if it should use buffered I/O.
	4568	*/
	4569	static bool f2fs_should_use_dio(struct inode inode, struct kiocb iocb,
	4570	struct iov_iter *iter)
	4571	{
	4572	unsigned int align;
	4573
	4574	if (!(iocb->ki_flags & IOCB_DIRECT))
	4575	return false;
	4576
	4577	if (f2fs_force_buffered_io(inode, iov_iter_rw(iter)))
	4578	return false;
	4579
	4580	/*
	4581	* Direct I/O not aligned to the disk's logical_block_size will be
	4582	* attempted, but will fail with -EINVAL.
	4583	*
	4584	* f2fs additionally requires that direct I/O be aligned to the
	4585	* filesystem block size, which is often a stricter requirement.
	4586	* However, f2fs traditionally falls back to buffered I/O on requests
	4587	* that are logical_block_size-aligned but not fs-block aligned.
	4588	*
	4589	* The below logic implements this behavior.
	4590	*/
	4591	align = iocb->ki_pos \| iov_iter_alignment(iter);
	4592	if (!IS_ALIGNED(align, i_blocksize(inode)) &&
	4593	IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
	4594	return false;
	4595
	4596	return true;
	4597	}
	4598
	4599	static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
	4600	unsigned int flags)
	4601	{
	4602	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
	4603
	4604	dec_page_count(sbi, F2FS_DIO_READ);
	4605	if (error)
	4606	return error;
	4607	f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size);
	4608	return 0;
	4609	}
	4610
	4611	static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
	4612	.end_io = f2fs_dio_read_end_io,
	4613	};
	4614
	4615	static ssize_t f2fs_dio_read_iter(struct kiocb iocb, struct iov_iter to)
	4616	{
	4617	struct file *file = iocb->ki_filp;
	4618	struct inode *inode = file_inode(file);
	4619	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	4620	struct f2fs_inode_info *fi = F2FS_I(inode);
	4621	const loff_t pos = iocb->ki_pos;
	4622	const size_t count = iov_iter_count(to);
	4623	struct iomap_dio *dio;
	4624	ssize_t ret;
	4625
	4626	if (count == 0)
	4627	return 0; /* skip atime update */
	4628
	4629	trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
	4630
	4631	if (iocb->ki_flags & IOCB_NOWAIT) {
	4632	if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
	4633	ret = -EAGAIN;
	4634	goto out;
	4635	}
	4636	} else {
	4637	f2fs_down_read(&fi->i_gc_rwsem[READ]);
	4638	}
	4639
	4640	/* dio is not compatible w/ atomic file */
	4641	if (f2fs_is_atomic_file(inode)) {
	4642	f2fs_up_read(&fi->i_gc_rwsem[READ]);
	4643	ret = -EOPNOTSUPP;
	4644	goto out;
	4645	}
	4646
	4647	/*
	4648	* We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
	4649	* the higher-level function iomap_dio_rw() in order to ensure that the
	4650	* F2FS_DIO_READ counter will be decremented correctly in all cases.
	4651	*/
	4652	inc_page_count(sbi, F2FS_DIO_READ);
	4653	dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
	4654	&f2fs_iomap_dio_read_ops, 0, NULL, 0);
	4655	if (IS_ERR_OR_NULL(dio)) {
	4656	ret = PTR_ERR_OR_ZERO(dio);
	4657	if (ret != -EIOCBQUEUED)
	4658	dec_page_count(sbi, F2FS_DIO_READ);
	4659	} else {
	4660	ret = iomap_dio_complete(dio);
	4661	}
	4662
	4663	f2fs_up_read(&fi->i_gc_rwsem[READ]);
	4664
	4665	file_accessed(file);
	4666	out:
	4667	trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
	4668	return ret;
	4669	}
	4670
	4671	static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count,
	4672	int rw)
	4673	{
	4674	struct inode *inode = file_inode(file);
	4675	char buf, path;
	4676
	4677	buf = f2fs_getname(F2FS_I_SB(inode));
	4678	if (!buf)
	4679	return;
	4680	path = dentry_path_raw(file_dentry(file), buf, PATH_MAX);
	4681	if (IS_ERR(path))
	4682	goto free_buf;
	4683	if (rw == WRITE)
	4684	trace_f2fs_datawrite_start(inode, pos, count,
	4685	current->pid, path, current->comm);
	4686	else
	4687	trace_f2fs_dataread_start(inode, pos, count,
	4688	current->pid, path, current->comm);
	4689	free_buf:
	4690	f2fs_putname(buf);
	4691	}
	4692
	4693	static ssize_t f2fs_file_read_iter(struct kiocb iocb, struct iov_iter to)
	4694	{
	4695	struct inode *inode = file_inode(iocb->ki_filp);
	4696	const loff_t pos = iocb->ki_pos;
	4697	ssize_t ret;
	4698
	4699	if (!f2fs_is_compress_backend_ready(inode))
	4700	return -EOPNOTSUPP;
	4701
	4702	if (trace_f2fs_dataread_start_enabled())
	4703	f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
	4704	iov_iter_count(to), READ);
	4705
	4706	/* In LFS mode, if there is inflight dio, wait for its completion */
	4707	if (f2fs_lfs_mode(F2FS_I_SB(inode)) &&
	4708	get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE))
	4709	inode_dio_wait(inode);
	4710
	4711	if (f2fs_should_use_dio(inode, iocb, to)) {
	4712	ret = f2fs_dio_read_iter(iocb, to);
	4713	} else {
	4714	ret = filemap_read(iocb, to, 0);
	4715	if (ret > 0)
	4716	f2fs_update_iostat(F2FS_I_SB(inode), inode,
	4717	APP_BUFFERED_READ_IO, ret);
	4718	}
	4719	if (trace_f2fs_dataread_end_enabled())
	4720	trace_f2fs_dataread_end(inode, pos, ret);
	4721	return ret;
	4722	}
	4723
	4724	static ssize_t f2fs_file_splice_read(struct file in, loff_t ppos,
	4725	struct pipe_inode_info *pipe,
	4726	size_t len, unsigned int flags)
	4727	{
	4728	struct inode *inode = file_inode(in);
	4729	const loff_t pos = *ppos;
	4730	ssize_t ret;
	4731
	4732	if (!f2fs_is_compress_backend_ready(inode))
	4733	return -EOPNOTSUPP;
	4734
	4735	if (trace_f2fs_dataread_start_enabled())
	4736	f2fs_trace_rw_file_path(in, pos, len, READ);
	4737
	4738	ret = filemap_splice_read(in, ppos, pipe, len, flags);
	4739	if (ret > 0)
	4740	f2fs_update_iostat(F2FS_I_SB(inode), inode,
	4741	APP_BUFFERED_READ_IO, ret);
	4742
	4743	if (trace_f2fs_dataread_end_enabled())
	4744	trace_f2fs_dataread_end(inode, pos, ret);
	4745	return ret;
	4746	}
	4747
	4748	static ssize_t f2fs_write_checks(struct kiocb iocb, struct iov_iter from)
	4749	{
	4750	struct file *file = iocb->ki_filp;
	4751	struct inode *inode = file_inode(file);
	4752	ssize_t count;
	4753	int err;
	4754
	4755	if (IS_IMMUTABLE(inode))
	4756	return -EPERM;
	4757
	4758	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
	4759	return -EPERM;
	4760
	4761	count = generic_write_checks(iocb, from);
	4762	if (count <= 0)
	4763	return count;
	4764
	4765	err = file_modified(file);
	4766	if (err)
	4767	return err;
	4768	return count;
	4769	}
	4770
	4771	/*
	4772	* Preallocate blocks for a write request, if it is possible and helpful to do
	4773	* so. Returns a positive number if blocks may have been preallocated, 0 if no
	4774	* blocks were preallocated, or a negative errno value if something went
	4775	* seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if all the
	4776	* requested blocks (not just some of them) have been allocated.
	4777	*/
	4778	static int f2fs_preallocate_blocks(struct kiocb iocb, struct iov_iter iter,
	4779	bool dio)
	4780	{
	4781	struct inode *inode = file_inode(iocb->ki_filp);
	4782	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	4783	const loff_t pos = iocb->ki_pos;
	4784	const size_t count = iov_iter_count(iter);
	4785	struct f2fs_map_blocks map = {};
	4786	int flag;
	4787	int ret;
	4788
	4789	/* If it will be an out-of-place direct write, don't bother. */
	4790	if (dio && f2fs_lfs_mode(sbi))
	4791	return 0;
	4792	/*
	4793	* Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
	4794	* buffered IO, if DIO meets any holes.
	4795	*/
	4796	if (dio && i_size_read(inode) &&
	4797	(F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
	4798	return 0;
	4799
	4800	/* No-wait I/O can't allocate blocks. */
	4801	if (iocb->ki_flags & IOCB_NOWAIT)
	4802	return 0;
	4803
	4804	/* If it will be a short write, don't bother. */
	4805	if (fault_in_iov_iter_readable(iter, count))
	4806	return 0;
	4807
	4808	if (f2fs_has_inline_data(inode)) {
	4809	/* If the data will fit inline, don't bother. */
	4810	if (pos + count <= MAX_INLINE_DATA(inode))
	4811	return 0;
	4812	ret = f2fs_convert_inline_inode(inode);
	4813	if (ret)
	4814	return ret;
	4815	}
	4816
	4817	/* Do not preallocate blocks that will be written partially in 4KB. */
	4818	map.m_lblk = F2FS_BLK_ALIGN(pos);
	4819	map.m_len = F2FS_BYTES_TO_BLK(pos + count);
	4820	if (map.m_len > map.m_lblk)
	4821	map.m_len -= map.m_lblk;
	4822	else
	4823	return 0;
	4824
	4825	if (!IS_DEVICE_ALIASING(inode))
	4826	map.m_may_create = true;
	4827	if (dio) {
	4828	map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi,
	4829	inode->i_write_hint);
	4830	flag = F2FS_GET_BLOCK_PRE_DIO;
	4831	} else {
	4832	map.m_seg_type = NO_CHECK_TYPE;
	4833	flag = F2FS_GET_BLOCK_PRE_AIO;
	4834	}
	4835
	4836	ret = f2fs_map_blocks(inode, &map, flag);
	4837	/* -ENOSPC\|-EDQUOT are fine to report the number of allocated blocks. */
	4838	if (ret < 0 && !((ret == -ENOSPC \|\| ret == -EDQUOT) && map.m_len > 0))
	4839	return ret;
	4840	if (ret == 0)
	4841	set_inode_flag(inode, FI_PREALLOCATED_ALL);
	4842	return map.m_len;
	4843	}
	4844
	4845	static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
	4846	struct iov_iter *from)
	4847	{
	4848	struct file *file = iocb->ki_filp;
	4849	struct inode *inode = file_inode(file);
	4850	ssize_t ret;
	4851
	4852	if (iocb->ki_flags & IOCB_NOWAIT)
	4853	return -EOPNOTSUPP;
	4854
	4855	ret = generic_perform_write(iocb, from);
	4856
	4857	if (ret > 0) {
	4858	f2fs_update_iostat(F2FS_I_SB(inode), inode,
	4859	APP_BUFFERED_IO, ret);
	4860	}
	4861	return ret;
	4862	}
	4863
	4864	static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
	4865	unsigned int flags)
	4866	{
	4867	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
	4868
	4869	dec_page_count(sbi, F2FS_DIO_WRITE);
	4870	if (error)
	4871	return error;
	4872	f2fs_update_time(sbi, REQ_TIME);
	4873	f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size);
	4874	return 0;
	4875	}
	4876
	4877	static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
	4878	struct bio *bio, loff_t file_offset)
	4879	{
	4880	struct inode *inode = iter->inode;
	4881	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	4882	enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint);
	4883	enum temp_type temp = f2fs_get_segment_temp(sbi, type);
	4884
	4885	bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
	4886	submit_bio(bio);
	4887	}
	4888
	4889	static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
	4890	.end_io = f2fs_dio_write_end_io,
	4891	.submit_io = f2fs_dio_write_submit_io,
	4892	};
	4893
	4894	static void f2fs_flush_buffered_write(struct address_space *mapping,
	4895	loff_t start_pos, loff_t end_pos)
	4896	{
	4897	int ret;
	4898
	4899	ret = filemap_write_and_wait_range(mapping, start_pos, end_pos);
	4900	if (ret < 0)
	4901	return;
	4902	invalidate_mapping_pages(mapping,
	4903	start_pos >> PAGE_SHIFT,
	4904	end_pos >> PAGE_SHIFT);
	4905	}
	4906
	4907	static ssize_t f2fs_dio_write_iter(struct kiocb iocb, struct iov_iter from,
	4908	bool *may_need_sync)
	4909	{
	4910	struct file *file = iocb->ki_filp;
	4911	struct inode *inode = file_inode(file);
	4912	struct f2fs_inode_info *fi = F2FS_I(inode);
	4913	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	4914	const bool do_opu = f2fs_lfs_mode(sbi);
	4915	const loff_t pos = iocb->ki_pos;
	4916	const ssize_t count = iov_iter_count(from);
	4917	unsigned int dio_flags;
	4918	struct iomap_dio *dio;
	4919	ssize_t ret;
	4920
	4921	trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
	4922
	4923	if (iocb->ki_flags & IOCB_NOWAIT) {
	4924	/* f2fs_convert_inline_inode() and block allocation can block */
	4925	if (f2fs_has_inline_data(inode) \|\|
	4926	!f2fs_overwrite_io(inode, pos, count)) {
	4927	ret = -EAGAIN;
	4928	goto out;
	4929	}
	4930
	4931	if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
	4932	ret = -EAGAIN;
	4933	goto out;
	4934	}
	4935	if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
	4936	f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
	4937	ret = -EAGAIN;
	4938	goto out;
	4939	}
	4940	} else {
	4941	ret = f2fs_convert_inline_inode(inode);
	4942	if (ret)
	4943	goto out;
	4944
	4945	f2fs_down_read(&fi->i_gc_rwsem[WRITE]);
	4946	if (do_opu)
	4947	f2fs_down_read(&fi->i_gc_rwsem[READ]);
	4948	}
	4949
	4950	/*
	4951	* We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
	4952	* the higher-level function iomap_dio_rw() in order to ensure that the
	4953	* F2FS_DIO_WRITE counter will be decremented correctly in all cases.
	4954	*/
	4955	inc_page_count(sbi, F2FS_DIO_WRITE);
	4956	dio_flags = 0;
	4957	if (pos + count > inode->i_size)
	4958	dio_flags \|= IOMAP_DIO_FORCE_WAIT;
	4959	dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
	4960	&f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
	4961	if (IS_ERR_OR_NULL(dio)) {
	4962	ret = PTR_ERR_OR_ZERO(dio);
	4963	if (ret == -ENOTBLK)
	4964	ret = 0;
	4965	if (ret != -EIOCBQUEUED)
	4966	dec_page_count(sbi, F2FS_DIO_WRITE);
	4967	} else {
	4968	ret = iomap_dio_complete(dio);
	4969	}
	4970
	4971	if (do_opu)
	4972	f2fs_up_read(&fi->i_gc_rwsem[READ]);
	4973	f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
	4974
	4975	if (ret < 0)
	4976	goto out;
	4977	if (pos + ret > inode->i_size)
	4978	f2fs_i_size_write(inode, pos + ret);
	4979	if (!do_opu)
	4980	set_inode_flag(inode, FI_UPDATE_WRITE);
	4981
	4982	if (iov_iter_count(from)) {
	4983	ssize_t ret2;
	4984	loff_t bufio_start_pos = iocb->ki_pos;
	4985
	4986	/*
	4987	* The direct write was partial, so we need to fall back to a
	4988	* buffered write for the remainder.
	4989	*/
	4990
	4991	ret2 = f2fs_buffered_write_iter(iocb, from);
	4992	if (iov_iter_count(from))
	4993	f2fs_write_failed(inode, iocb->ki_pos);
	4994	if (ret2 < 0)
	4995	goto out;
	4996
	4997	/*
	4998	* Ensure that the pagecache pages are written to disk and
	4999	* invalidated to preserve the expected O_DIRECT semantics.
	5000	*/
	5001	if (ret2 > 0) {
	5002	loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
	5003
	5004	ret += ret2;
	5005
	5006	f2fs_flush_buffered_write(file->f_mapping,
	5007	bufio_start_pos,
	5008	bufio_end_pos);
	5009	}
	5010	} else {
	5011	/* iomap_dio_rw() already handled the generic_write_sync(). */
	5012	*may_need_sync = false;
	5013	}
	5014	out:
	5015	trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
	5016	return ret;
	5017	}
	5018
	5019	static ssize_t f2fs_file_write_iter(struct kiocb iocb, struct iov_iter from)
	5020	{
	5021	struct inode *inode = file_inode(iocb->ki_filp);
	5022	const loff_t orig_pos = iocb->ki_pos;
	5023	const size_t orig_count = iov_iter_count(from);
	5024	loff_t target_size;
	5025	bool dio;
	5026	bool may_need_sync = true;
	5027	int preallocated;
	5028	const loff_t pos = iocb->ki_pos;
	5029	const ssize_t count = iov_iter_count(from);
	5030	ssize_t ret;
	5031
	5032	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
	5033	ret = -EIO;
	5034	goto out;
	5035	}
	5036
	5037	if (!f2fs_is_compress_backend_ready(inode)) {
	5038	ret = -EOPNOTSUPP;
	5039	goto out;
	5040	}
	5041
	5042	if (iocb->ki_flags & IOCB_NOWAIT) {
	5043	if (!inode_trylock(inode)) {
	5044	ret = -EAGAIN;
	5045	goto out;
	5046	}
	5047	} else {
	5048	inode_lock(inode);
	5049	}
	5050
	5051	if (f2fs_is_pinned_file(inode) &&
	5052	!f2fs_overwrite_io(inode, pos, count)) {
	5053	ret = -EIO;
	5054	goto out_unlock;
	5055	}
	5056
	5057	ret = f2fs_write_checks(iocb, from);
	5058	if (ret <= 0)
	5059	goto out_unlock;
	5060
	5061	/* Determine whether we will do a direct write or a buffered write. */
	5062	dio = f2fs_should_use_dio(inode, iocb, from);
	5063
	5064	/* dio is not compatible w/ atomic write */
	5065	if (dio && f2fs_is_atomic_file(inode)) {
	5066	ret = -EOPNOTSUPP;
	5067	goto out_unlock;
	5068	}
	5069
	5070	/* Possibly preallocate the blocks for the write. */
	5071	target_size = iocb->ki_pos + iov_iter_count(from);
	5072	preallocated = f2fs_preallocate_blocks(iocb, from, dio);
	5073	if (preallocated < 0) {
	5074	ret = preallocated;
	5075	} else {
	5076	if (trace_f2fs_datawrite_start_enabled())
	5077	f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
	5078	orig_count, WRITE);
	5079
	5080	/* Do the actual write. */
	5081	ret = dio ?
	5082	f2fs_dio_write_iter(iocb, from, &may_need_sync) :
	5083	f2fs_buffered_write_iter(iocb, from);
	5084
	5085	if (trace_f2fs_datawrite_end_enabled())
	5086	trace_f2fs_datawrite_end(inode, orig_pos, ret);
	5087	}
	5088
	5089	/* Don't leave any preallocated blocks around past i_size. */
	5090	if (preallocated && i_size_read(inode) < target_size) {
	5091	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	5092	filemap_invalidate_lock(inode->i_mapping);
	5093	if (!f2fs_truncate(inode))
	5094	file_dont_truncate(inode);
	5095	filemap_invalidate_unlock(inode->i_mapping);
	5096	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	5097	} else {
	5098	file_dont_truncate(inode);
	5099	}
	5100
	5101	clear_inode_flag(inode, FI_PREALLOCATED_ALL);
	5102	out_unlock:
	5103	inode_unlock(inode);
	5104	out:
	5105	trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
	5106
	5107	if (ret > 0 && may_need_sync)
	5108	ret = generic_write_sync(iocb, ret);
	5109
	5110	/* If buffered IO was forced, flush and drop the data from
	5111	* the page cache to preserve O_DIRECT semantics
	5112	*/
	5113	if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT))
	5114	f2fs_flush_buffered_write(iocb->ki_filp->f_mapping,
	5115	orig_pos,
	5116	orig_pos + ret - 1);
	5117
	5118	return ret;
	5119	}
	5120
	5121	static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
	5122	int advice)
	5123	{
	5124	struct address_space *mapping;
	5125	struct backing_dev_info *bdi;
	5126	struct inode *inode = file_inode(filp);
	5127	int err;
	5128
	5129	if (advice == POSIX_FADV_SEQUENTIAL) {
	5130	if (S_ISFIFO(inode->i_mode))
	5131	return -ESPIPE;
	5132
	5133	mapping = filp->f_mapping;
	5134	if (!mapping \|\| len < 0)
	5135	return -EINVAL;
	5136
	5137	bdi = inode_to_bdi(mapping->host);
	5138	filp->f_ra.ra_pages = bdi->ra_pages *
	5139	F2FS_I_SB(inode)->seq_file_ra_mul;
	5140	spin_lock(&filp->f_lock);
	5141	filp->f_mode &= ~FMODE_RANDOM;
	5142	spin_unlock(&filp->f_lock);
	5143	return 0;
	5144	} else if (advice == POSIX_FADV_WILLNEED && offset == 0) {
	5145	/* Load extent cache at the first readahead. */
	5146	f2fs_precache_extents(inode);
	5147	}
	5148
	5149	err = generic_fadvise(filp, offset, len, advice);
	5150	if (!err && advice == POSIX_FADV_DONTNEED &&
	5151	test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
	5152	f2fs_compressed_file(inode))
	5153	f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
	5154
	5155	return err;
	5156	}
	5157
	5158	#ifdef CONFIG_COMPAT
	5159	struct compat_f2fs_gc_range {
	5160	u32 sync;
	5161	compat_u64 start;
	5162	compat_u64 len;
	5163	};
	5164	#define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\
	5165	struct compat_f2fs_gc_range)
	5166
	5167	static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg)
	5168	{
	5169	struct compat_f2fs_gc_range __user *urange;
	5170	struct f2fs_gc_range range;
	5171	int err;
	5172
	5173	urange = compat_ptr(arg);
	5174	err = get_user(range.sync, &urange->sync);
	5175	err \|= get_user(range.start, &urange->start);
	5176	err \|= get_user(range.len, &urange->len);
	5177	if (err)
	5178	return -EFAULT;
	5179
	5180	return __f2fs_ioc_gc_range(file, &range);
	5181	}
	5182
	5183	struct compat_f2fs_move_range {
	5184	u32 dst_fd;
	5185	compat_u64 pos_in;
	5186	compat_u64 pos_out;
	5187	compat_u64 len;
	5188	};
	5189	#define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \
	5190	struct compat_f2fs_move_range)
	5191
	5192	static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg)
	5193	{
	5194	struct compat_f2fs_move_range __user *urange;
	5195	struct f2fs_move_range range;
	5196	int err;
	5197
	5198	urange = compat_ptr(arg);
	5199	err = get_user(range.dst_fd, &urange->dst_fd);
	5200	err \|= get_user(range.pos_in, &urange->pos_in);
	5201	err \|= get_user(range.pos_out, &urange->pos_out);
	5202	err \|= get_user(range.len, &urange->len);
	5203	if (err)
	5204	return -EFAULT;
	5205
	5206	return __f2fs_ioc_move_range(file, &range);
	5207	}
	5208
	5209	long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
	5210	{
	5211	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
	5212	return -EIO;
	5213	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file))))
	5214	return -ENOSPC;
	5215
	5216	switch (cmd) {
	5217	case FS_IOC32_GETVERSION:
	5218	cmd = FS_IOC_GETVERSION;
	5219	break;
	5220	case F2FS_IOC32_GARBAGE_COLLECT_RANGE:
	5221	return f2fs_compat_ioc_gc_range(file, arg);
	5222	case F2FS_IOC32_MOVE_RANGE:
	5223	return f2fs_compat_ioc_move_range(file, arg);
	5224	case F2FS_IOC_START_ATOMIC_WRITE:
	5225	case F2FS_IOC_START_ATOMIC_REPLACE:
	5226	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
	5227	case F2FS_IOC_START_VOLATILE_WRITE:
	5228	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
	5229	case F2FS_IOC_ABORT_ATOMIC_WRITE:
	5230	case F2FS_IOC_SHUTDOWN:
	5231	case FITRIM:
	5232	case FS_IOC_SET_ENCRYPTION_POLICY:
	5233	case FS_IOC_GET_ENCRYPTION_PWSALT:
	5234	case FS_IOC_GET_ENCRYPTION_POLICY:
	5235	case FS_IOC_GET_ENCRYPTION_POLICY_EX:
	5236	case FS_IOC_ADD_ENCRYPTION_KEY:
	5237	case FS_IOC_REMOVE_ENCRYPTION_KEY:
	5238	case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
	5239	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
	5240	case FS_IOC_GET_ENCRYPTION_NONCE:
	5241	case F2FS_IOC_GARBAGE_COLLECT:
	5242	case F2FS_IOC_WRITE_CHECKPOINT:
	5243	case F2FS_IOC_DEFRAGMENT:
	5244	case F2FS_IOC_FLUSH_DEVICE:
	5245	case F2FS_IOC_GET_FEATURES:
	5246	case F2FS_IOC_GET_PIN_FILE:
	5247	case F2FS_IOC_SET_PIN_FILE:
	5248	case F2FS_IOC_PRECACHE_EXTENTS:
	5249	case F2FS_IOC_RESIZE_FS:
	5250	case FS_IOC_ENABLE_VERITY:
	5251	case FS_IOC_MEASURE_VERITY:
	5252	case FS_IOC_READ_VERITY_METADATA:
	5253	case FS_IOC_GETFSLABEL:
	5254	case FS_IOC_SETFSLABEL:
	5255	case F2FS_IOC_GET_COMPRESS_BLOCKS:
	5256	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
	5257	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
	5258	case F2FS_IOC_SEC_TRIM_FILE:
	5259	case F2FS_IOC_GET_COMPRESS_OPTION:
	5260	case F2FS_IOC_SET_COMPRESS_OPTION:
	5261	case F2FS_IOC_DECOMPRESS_FILE:
	5262	case F2FS_IOC_COMPRESS_FILE:
	5263	case F2FS_IOC_GET_DEV_ALIAS_FILE:
	5264	break;
	5265	default:
	5266	return -ENOIOCTLCMD;
	5267	}
	5268	return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
	5269	}
	5270	#endif
	5271
	5272	const struct file_operations f2fs_file_operations = {
	5273	.llseek = f2fs_llseek,
	5274	.read_iter = f2fs_file_read_iter,
	5275	.write_iter = f2fs_file_write_iter,
	5276	.iopoll = iocb_bio_iopoll,
	5277	.open = f2fs_file_open,
	5278	.release = f2fs_release_file,
	5279	.mmap = f2fs_file_mmap,
	5280	.flush = f2fs_file_flush,
	5281	.fsync = f2fs_sync_file,
	5282	.fallocate = f2fs_fallocate,
	5283	.unlocked_ioctl = f2fs_ioctl,
	5284	#ifdef CONFIG_COMPAT
	5285	.compat_ioctl = f2fs_compat_ioctl,
	5286	#endif
	5287	.splice_read = f2fs_file_splice_read,
	5288	.splice_write = iter_file_splice_write,
	5289	.fadvise = f2fs_file_fadvise,
	5290	.fop_flags = FOP_BUFFER_RASYNC,
	5291	};