]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0 | |
2 | /* | |
3 | * fs/f2fs/file.c | |
4 | * | |
5 | * Copyright (c) 2012 Samsung Electronics Co., Ltd. | |
6 | * http://www.samsung.com/ | |
7 | */ | |
8 | #include <linux/fs.h> | |
9 | #include <linux/f2fs_fs.h> | |
10 | #include <linux/stat.h> | |
11 | #include <linux/writeback.h> | |
12 | #include <linux/blkdev.h> | |
13 | #include <linux/falloc.h> | |
14 | #include <linux/types.h> | |
15 | #include <linux/compat.h> | |
16 | #include <linux/uaccess.h> | |
17 | #include <linux/mount.h> | |
18 | #include <linux/pagevec.h> | |
19 | #include <linux/uio.h> | |
20 | #include <linux/uuid.h> | |
21 | #include <linux/file.h> | |
22 | #include <linux/nls.h> | |
23 | #include <linux/sched/signal.h> | |
24 | #include <linux/fileattr.h> | |
25 | #include <linux/fadvise.h> | |
26 | #include <linux/iomap.h> | |
27 | ||
28 | #include "f2fs.h" | |
29 | #include "node.h" | |
30 | #include "segment.h" | |
31 | #include "xattr.h" | |
32 | #include "acl.h" | |
33 | #include "gc.h" | |
34 | #include "iostat.h" | |
35 | #include <trace/events/f2fs.h> | |
36 | #include <uapi/linux/f2fs.h> | |
37 | ||
38 | static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) | |
39 | { | |
40 | struct inode *inode = file_inode(vmf->vma->vm_file); | |
41 | vm_flags_t flags = vmf->vma->vm_flags; | |
42 | vm_fault_t ret; | |
43 | ||
44 | ret = filemap_fault(vmf); | |
45 | if (ret & VM_FAULT_LOCKED) | |
46 | f2fs_update_iostat(F2FS_I_SB(inode), inode, | |
47 | APP_MAPPED_READ_IO, F2FS_BLKSIZE); | |
48 | ||
49 | trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); | |
50 | ||
51 | return ret; | |
52 | } | |
53 | ||
54 | static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) | |
55 | { | |
56 | struct folio *folio = page_folio(vmf->page); | |
57 | struct inode *inode = file_inode(vmf->vma->vm_file); | |
58 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
59 | struct dnode_of_data dn; | |
60 | bool need_alloc = !f2fs_is_pinned_file(inode); | |
61 | int err = 0; | |
62 | vm_fault_t ret; | |
63 | ||
64 | if (unlikely(IS_IMMUTABLE(inode))) | |
65 | return VM_FAULT_SIGBUS; | |
66 | ||
67 | if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { | |
68 | err = -EIO; | |
69 | goto out; | |
70 | } | |
71 | ||
72 | if (unlikely(f2fs_cp_error(sbi))) { | |
73 | err = -EIO; | |
74 | goto out; | |
75 | } | |
76 | ||
77 | if (!f2fs_is_checkpoint_ready(sbi)) { | |
78 | err = -ENOSPC; | |
79 | goto out; | |
80 | } | |
81 | ||
82 | err = f2fs_convert_inline_inode(inode); | |
83 | if (err) | |
84 | goto out; | |
85 | ||
86 | #ifdef CONFIG_F2FS_FS_COMPRESSION | |
87 | if (f2fs_compressed_file(inode)) { | |
88 | int ret = f2fs_is_compressed_cluster(inode, folio->index); | |
89 | ||
90 | if (ret < 0) { | |
91 | err = ret; | |
92 | goto out; | |
93 | } else if (ret) { | |
94 | need_alloc = false; | |
95 | } | |
96 | } | |
97 | #endif | |
98 | /* should do out of any locked page */ | |
99 | if (need_alloc) | |
100 | f2fs_balance_fs(sbi, true); | |
101 | ||
102 | sb_start_pagefault(inode->i_sb); | |
103 | ||
104 | f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); | |
105 | ||
106 | file_update_time(vmf->vma->vm_file); | |
107 | filemap_invalidate_lock_shared(inode->i_mapping); | |
108 | folio_lock(folio); | |
109 | if (unlikely(folio->mapping != inode->i_mapping || | |
110 | folio_pos(folio) > i_size_read(inode) || | |
111 | !folio_test_uptodate(folio))) { | |
112 | folio_unlock(folio); | |
113 | err = -EFAULT; | |
114 | goto out_sem; | |
115 | } | |
116 | ||
117 | set_new_dnode(&dn, inode, NULL, NULL, 0); | |
118 | if (need_alloc) { | |
119 | /* block allocation */ | |
120 | err = f2fs_get_block_locked(&dn, folio->index); | |
121 | } else { | |
122 | err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); | |
123 | f2fs_put_dnode(&dn); | |
124 | if (f2fs_is_pinned_file(inode) && | |
125 | !__is_valid_data_blkaddr(dn.data_blkaddr)) | |
126 | err = -EIO; | |
127 | } | |
128 | ||
129 | if (err) { | |
130 | folio_unlock(folio); | |
131 | goto out_sem; | |
132 | } | |
133 | ||
134 | f2fs_wait_on_page_writeback(folio_page(folio, 0), DATA, false, true); | |
135 | ||
136 | /* wait for GCed page writeback via META_MAPPING */ | |
137 | f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); | |
138 | ||
139 | /* | |
140 | * check to see if the page is mapped already (no holes) | |
141 | */ | |
142 | if (folio_test_mappedtodisk(folio)) | |
143 | goto out_sem; | |
144 | ||
145 | /* page is wholly or partially inside EOF */ | |
146 | if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > | |
147 | i_size_read(inode)) { | |
148 | loff_t offset; | |
149 | ||
150 | offset = i_size_read(inode) & ~PAGE_MASK; | |
151 | folio_zero_segment(folio, offset, folio_size(folio)); | |
152 | } | |
153 | folio_mark_dirty(folio); | |
154 | ||
155 | f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); | |
156 | f2fs_update_time(sbi, REQ_TIME); | |
157 | ||
158 | out_sem: | |
159 | filemap_invalidate_unlock_shared(inode->i_mapping); | |
160 | ||
161 | sb_end_pagefault(inode->i_sb); | |
162 | out: | |
163 | ret = vmf_fs_error(err); | |
164 | ||
165 | trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); | |
166 | return ret; | |
167 | } | |
168 | ||
169 | static const struct vm_operations_struct f2fs_file_vm_ops = { | |
170 | .fault = f2fs_filemap_fault, | |
171 | .map_pages = filemap_map_pages, | |
172 | .page_mkwrite = f2fs_vm_page_mkwrite, | |
173 | }; | |
174 | ||
175 | static int get_parent_ino(struct inode *inode, nid_t *pino) | |
176 | { | |
177 | struct dentry *dentry; | |
178 | ||
179 | /* | |
180 | * Make sure to get the non-deleted alias. The alias associated with | |
181 | * the open file descriptor being fsync()'ed may be deleted already. | |
182 | */ | |
183 | dentry = d_find_alias(inode); | |
184 | if (!dentry) | |
185 | return 0; | |
186 | ||
187 | *pino = d_parent_ino(dentry); | |
188 | dput(dentry); | |
189 | return 1; | |
190 | } | |
191 | ||
192 | static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) | |
193 | { | |
194 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
195 | enum cp_reason_type cp_reason = CP_NO_NEEDED; | |
196 | ||
197 | if (!S_ISREG(inode->i_mode)) | |
198 | cp_reason = CP_NON_REGULAR; | |
199 | else if (f2fs_compressed_file(inode)) | |
200 | cp_reason = CP_COMPRESSED; | |
201 | else if (inode->i_nlink != 1) | |
202 | cp_reason = CP_HARDLINK; | |
203 | else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) | |
204 | cp_reason = CP_SB_NEED_CP; | |
205 | else if (file_wrong_pino(inode)) | |
206 | cp_reason = CP_WRONG_PINO; | |
207 | else if (!f2fs_space_for_roll_forward(sbi)) | |
208 | cp_reason = CP_NO_SPC_ROLL; | |
209 | else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) | |
210 | cp_reason = CP_NODE_NEED_CP; | |
211 | else if (test_opt(sbi, FASTBOOT)) | |
212 | cp_reason = CP_FASTBOOT_MODE; | |
213 | else if (F2FS_OPTION(sbi).active_logs == 2) | |
214 | cp_reason = CP_SPEC_LOG_NUM; | |
215 | else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && | |
216 | f2fs_need_dentry_mark(sbi, inode->i_ino) && | |
217 | f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, | |
218 | TRANS_DIR_INO)) | |
219 | cp_reason = CP_RECOVER_DIR; | |
220 | else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, | |
221 | XATTR_DIR_INO)) | |
222 | cp_reason = CP_XATTR_DIR; | |
223 | ||
224 | return cp_reason; | |
225 | } | |
226 | ||
227 | static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) | |
228 | { | |
229 | struct page *i = find_get_page(NODE_MAPPING(sbi), ino); | |
230 | bool ret = false; | |
231 | /* But we need to avoid that there are some inode updates */ | |
232 | if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino)) | |
233 | ret = true; | |
234 | f2fs_put_page(i, 0); | |
235 | return ret; | |
236 | } | |
237 | ||
238 | static void try_to_fix_pino(struct inode *inode) | |
239 | { | |
240 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
241 | nid_t pino; | |
242 | ||
243 | f2fs_down_write(&fi->i_sem); | |
244 | if (file_wrong_pino(inode) && inode->i_nlink == 1 && | |
245 | get_parent_ino(inode, &pino)) { | |
246 | f2fs_i_pino_write(inode, pino); | |
247 | file_got_pino(inode); | |
248 | } | |
249 | f2fs_up_write(&fi->i_sem); | |
250 | } | |
251 | ||
252 | static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, | |
253 | int datasync, bool atomic) | |
254 | { | |
255 | struct inode *inode = file->f_mapping->host; | |
256 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
257 | nid_t ino = inode->i_ino; | |
258 | int ret = 0; | |
259 | enum cp_reason_type cp_reason = 0; | |
260 | struct writeback_control wbc = { | |
261 | .sync_mode = WB_SYNC_ALL, | |
262 | .nr_to_write = LONG_MAX, | |
263 | .for_reclaim = 0, | |
264 | }; | |
265 | unsigned int seq_id = 0; | |
266 | ||
267 | if (unlikely(f2fs_readonly(inode->i_sb))) | |
268 | return 0; | |
269 | ||
270 | trace_f2fs_sync_file_enter(inode); | |
271 | ||
272 | if (S_ISDIR(inode->i_mode)) | |
273 | goto go_write; | |
274 | ||
275 | /* if fdatasync is triggered, let's do in-place-update */ | |
276 | if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) | |
277 | set_inode_flag(inode, FI_NEED_IPU); | |
278 | ret = file_write_and_wait_range(file, start, end); | |
279 | clear_inode_flag(inode, FI_NEED_IPU); | |
280 | ||
281 | if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { | |
282 | trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); | |
283 | return ret; | |
284 | } | |
285 | ||
286 | /* if the inode is dirty, let's recover all the time */ | |
287 | if (!f2fs_skip_inode_update(inode, datasync)) { | |
288 | f2fs_write_inode(inode, NULL); | |
289 | goto go_write; | |
290 | } | |
291 | ||
292 | /* | |
293 | * if there is no written data, don't waste time to write recovery info. | |
294 | */ | |
295 | if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && | |
296 | !f2fs_exist_written_data(sbi, ino, APPEND_INO)) { | |
297 | ||
298 | /* it may call write_inode just prior to fsync */ | |
299 | if (need_inode_page_update(sbi, ino)) | |
300 | goto go_write; | |
301 | ||
302 | if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || | |
303 | f2fs_exist_written_data(sbi, ino, UPDATE_INO)) | |
304 | goto flush_out; | |
305 | goto out; | |
306 | } else { | |
307 | /* | |
308 | * for OPU case, during fsync(), node can be persisted before | |
309 | * data when lower device doesn't support write barrier, result | |
310 | * in data corruption after SPO. | |
311 | * So for strict fsync mode, force to use atomic write semantics | |
312 | * to keep write order in between data/node and last node to | |
313 | * avoid potential data corruption. | |
314 | */ | |
315 | if (F2FS_OPTION(sbi).fsync_mode == | |
316 | FSYNC_MODE_STRICT && !atomic) | |
317 | atomic = true; | |
318 | } | |
319 | go_write: | |
320 | /* | |
321 | * Both of fdatasync() and fsync() are able to be recovered from | |
322 | * sudden-power-off. | |
323 | */ | |
324 | f2fs_down_read(&F2FS_I(inode)->i_sem); | |
325 | cp_reason = need_do_checkpoint(inode); | |
326 | f2fs_up_read(&F2FS_I(inode)->i_sem); | |
327 | ||
328 | if (cp_reason) { | |
329 | /* all the dirty node pages should be flushed for POR */ | |
330 | ret = f2fs_sync_fs(inode->i_sb, 1); | |
331 | ||
332 | /* | |
333 | * We've secured consistency through sync_fs. Following pino | |
334 | * will be used only for fsynced inodes after checkpoint. | |
335 | */ | |
336 | try_to_fix_pino(inode); | |
337 | clear_inode_flag(inode, FI_APPEND_WRITE); | |
338 | clear_inode_flag(inode, FI_UPDATE_WRITE); | |
339 | goto out; | |
340 | } | |
341 | sync_nodes: | |
342 | atomic_inc(&sbi->wb_sync_req[NODE]); | |
343 | ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id); | |
344 | atomic_dec(&sbi->wb_sync_req[NODE]); | |
345 | if (ret) | |
346 | goto out; | |
347 | ||
348 | /* if cp_error was enabled, we should avoid infinite loop */ | |
349 | if (unlikely(f2fs_cp_error(sbi))) { | |
350 | ret = -EIO; | |
351 | goto out; | |
352 | } | |
353 | ||
354 | if (f2fs_need_inode_block_update(sbi, ino)) { | |
355 | f2fs_mark_inode_dirty_sync(inode, true); | |
356 | f2fs_write_inode(inode, NULL); | |
357 | goto sync_nodes; | |
358 | } | |
359 | ||
360 | /* | |
361 | * If it's atomic_write, it's just fine to keep write ordering. So | |
362 | * here we don't need to wait for node write completion, since we use | |
363 | * node chain which serializes node blocks. If one of node writes are | |
364 | * reordered, we can see simply broken chain, resulting in stopping | |
365 | * roll-forward recovery. It means we'll recover all or none node blocks | |
366 | * given fsync mark. | |
367 | */ | |
368 | if (!atomic) { | |
369 | ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id); | |
370 | if (ret) | |
371 | goto out; | |
372 | } | |
373 | ||
374 | /* once recovery info is written, don't need to tack this */ | |
375 | f2fs_remove_ino_entry(sbi, ino, APPEND_INO); | |
376 | clear_inode_flag(inode, FI_APPEND_WRITE); | |
377 | flush_out: | |
378 | if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) | |
379 | ret = f2fs_issue_flush(sbi, inode->i_ino); | |
380 | if (!ret) { | |
381 | f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); | |
382 | clear_inode_flag(inode, FI_UPDATE_WRITE); | |
383 | f2fs_remove_ino_entry(sbi, ino, FLUSH_INO); | |
384 | } | |
385 | f2fs_update_time(sbi, REQ_TIME); | |
386 | out: | |
387 | trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); | |
388 | return ret; | |
389 | } | |
390 | ||
391 | int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |
392 | { | |
393 | if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) | |
394 | return -EIO; | |
395 | return f2fs_do_sync_file(file, start, end, datasync, false); | |
396 | } | |
397 | ||
398 | static bool __found_offset(struct address_space *mapping, | |
399 | struct dnode_of_data *dn, pgoff_t index, int whence) | |
400 | { | |
401 | block_t blkaddr = f2fs_data_blkaddr(dn); | |
402 | struct inode *inode = mapping->host; | |
403 | bool compressed_cluster = false; | |
404 | ||
405 | if (f2fs_compressed_file(inode)) { | |
406 | block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, | |
407 | ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); | |
408 | ||
409 | compressed_cluster = first_blkaddr == COMPRESS_ADDR; | |
410 | } | |
411 | ||
412 | switch (whence) { | |
413 | case SEEK_DATA: | |
414 | if (__is_valid_data_blkaddr(blkaddr)) | |
415 | return true; | |
416 | if (blkaddr == NEW_ADDR && | |
417 | xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) | |
418 | return true; | |
419 | if (compressed_cluster) | |
420 | return true; | |
421 | break; | |
422 | case SEEK_HOLE: | |
423 | if (compressed_cluster) | |
424 | return false; | |
425 | if (blkaddr == NULL_ADDR) | |
426 | return true; | |
427 | break; | |
428 | } | |
429 | return false; | |
430 | } | |
431 | ||
432 | static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) | |
433 | { | |
434 | struct inode *inode = file->f_mapping->host; | |
435 | loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); | |
436 | struct dnode_of_data dn; | |
437 | pgoff_t pgofs, end_offset; | |
438 | loff_t data_ofs = offset; | |
439 | loff_t isize; | |
440 | int err = 0; | |
441 | ||
442 | inode_lock_shared(inode); | |
443 | ||
444 | isize = i_size_read(inode); | |
445 | if (offset >= isize) | |
446 | goto fail; | |
447 | ||
448 | /* handle inline data case */ | |
449 | if (f2fs_has_inline_data(inode)) { | |
450 | if (whence == SEEK_HOLE) { | |
451 | data_ofs = isize; | |
452 | goto found; | |
453 | } else if (whence == SEEK_DATA) { | |
454 | data_ofs = offset; | |
455 | goto found; | |
456 | } | |
457 | } | |
458 | ||
459 | pgofs = (pgoff_t)(offset >> PAGE_SHIFT); | |
460 | ||
461 | for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { | |
462 | set_new_dnode(&dn, inode, NULL, NULL, 0); | |
463 | err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); | |
464 | if (err && err != -ENOENT) { | |
465 | goto fail; | |
466 | } else if (err == -ENOENT) { | |
467 | /* direct node does not exists */ | |
468 | if (whence == SEEK_DATA) { | |
469 | pgofs = f2fs_get_next_page_offset(&dn, pgofs); | |
470 | continue; | |
471 | } else { | |
472 | goto found; | |
473 | } | |
474 | } | |
475 | ||
476 | end_offset = ADDRS_PER_PAGE(dn.node_page, inode); | |
477 | ||
478 | /* find data/hole in dnode block */ | |
479 | for (; dn.ofs_in_node < end_offset; | |
480 | dn.ofs_in_node++, pgofs++, | |
481 | data_ofs = (loff_t)pgofs << PAGE_SHIFT) { | |
482 | block_t blkaddr; | |
483 | ||
484 | blkaddr = f2fs_data_blkaddr(&dn); | |
485 | ||
486 | if (__is_valid_data_blkaddr(blkaddr) && | |
487 | !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), | |
488 | blkaddr, DATA_GENERIC_ENHANCE)) { | |
489 | f2fs_put_dnode(&dn); | |
490 | goto fail; | |
491 | } | |
492 | ||
493 | if (__found_offset(file->f_mapping, &dn, | |
494 | pgofs, whence)) { | |
495 | f2fs_put_dnode(&dn); | |
496 | goto found; | |
497 | } | |
498 | } | |
499 | f2fs_put_dnode(&dn); | |
500 | } | |
501 | ||
502 | if (whence == SEEK_DATA) | |
503 | goto fail; | |
504 | found: | |
505 | if (whence == SEEK_HOLE && data_ofs > isize) | |
506 | data_ofs = isize; | |
507 | inode_unlock_shared(inode); | |
508 | return vfs_setpos(file, data_ofs, maxbytes); | |
509 | fail: | |
510 | inode_unlock_shared(inode); | |
511 | return -ENXIO; | |
512 | } | |
513 | ||
514 | static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) | |
515 | { | |
516 | struct inode *inode = file->f_mapping->host; | |
517 | loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); | |
518 | ||
519 | switch (whence) { | |
520 | case SEEK_SET: | |
521 | case SEEK_CUR: | |
522 | case SEEK_END: | |
523 | return generic_file_llseek_size(file, offset, whence, | |
524 | maxbytes, i_size_read(inode)); | |
525 | case SEEK_DATA: | |
526 | case SEEK_HOLE: | |
527 | if (offset < 0) | |
528 | return -ENXIO; | |
529 | return f2fs_seek_block(file, offset, whence); | |
530 | } | |
531 | ||
532 | return -EINVAL; | |
533 | } | |
534 | ||
535 | static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) | |
536 | { | |
537 | struct inode *inode = file_inode(file); | |
538 | ||
539 | if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) | |
540 | return -EIO; | |
541 | ||
542 | if (!f2fs_is_compress_backend_ready(inode)) | |
543 | return -EOPNOTSUPP; | |
544 | ||
545 | file_accessed(file); | |
546 | vma->vm_ops = &f2fs_file_vm_ops; | |
547 | ||
548 | f2fs_down_read(&F2FS_I(inode)->i_sem); | |
549 | set_inode_flag(inode, FI_MMAP_FILE); | |
550 | f2fs_up_read(&F2FS_I(inode)->i_sem); | |
551 | ||
552 | return 0; | |
553 | } | |
554 | ||
555 | static int finish_preallocate_blocks(struct inode *inode) | |
556 | { | |
557 | int ret; | |
558 | ||
559 | inode_lock(inode); | |
560 | if (is_inode_flag_set(inode, FI_OPENED_FILE)) { | |
561 | inode_unlock(inode); | |
562 | return 0; | |
563 | } | |
564 | ||
565 | if (!file_should_truncate(inode)) { | |
566 | set_inode_flag(inode, FI_OPENED_FILE); | |
567 | inode_unlock(inode); | |
568 | return 0; | |
569 | } | |
570 | ||
571 | f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
572 | filemap_invalidate_lock(inode->i_mapping); | |
573 | ||
574 | truncate_setsize(inode, i_size_read(inode)); | |
575 | ret = f2fs_truncate(inode); | |
576 | ||
577 | filemap_invalidate_unlock(inode->i_mapping); | |
578 | f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
579 | ||
580 | if (!ret) | |
581 | set_inode_flag(inode, FI_OPENED_FILE); | |
582 | ||
583 | inode_unlock(inode); | |
584 | if (ret) | |
585 | return ret; | |
586 | ||
587 | file_dont_truncate(inode); | |
588 | return 0; | |
589 | } | |
590 | ||
591 | static int f2fs_file_open(struct inode *inode, struct file *filp) | |
592 | { | |
593 | int err = fscrypt_file_open(inode, filp); | |
594 | ||
595 | if (err) | |
596 | return err; | |
597 | ||
598 | if (!f2fs_is_compress_backend_ready(inode)) | |
599 | return -EOPNOTSUPP; | |
600 | ||
601 | err = fsverity_file_open(inode, filp); | |
602 | if (err) | |
603 | return err; | |
604 | ||
605 | filp->f_mode |= FMODE_NOWAIT; | |
606 | filp->f_mode |= FMODE_CAN_ODIRECT; | |
607 | ||
608 | err = dquot_file_open(inode, filp); | |
609 | if (err) | |
610 | return err; | |
611 | ||
612 | return finish_preallocate_blocks(inode); | |
613 | } | |
614 | ||
615 | void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) | |
616 | { | |
617 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); | |
618 | int nr_free = 0, ofs = dn->ofs_in_node, len = count; | |
619 | __le32 *addr; | |
620 | bool compressed_cluster = false; | |
621 | int cluster_index = 0, valid_blocks = 0; | |
622 | int cluster_size = F2FS_I(dn->inode)->i_cluster_size; | |
623 | bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); | |
624 | block_t blkstart; | |
625 | int blklen = 0; | |
626 | ||
627 | addr = get_dnode_addr(dn->inode, dn->node_page) + ofs; | |
628 | blkstart = le32_to_cpu(*addr); | |
629 | ||
630 | /* Assumption: truncation starts with cluster */ | |
631 | for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { | |
632 | block_t blkaddr = le32_to_cpu(*addr); | |
633 | ||
634 | if (f2fs_compressed_file(dn->inode) && | |
635 | !(cluster_index & (cluster_size - 1))) { | |
636 | if (compressed_cluster) | |
637 | f2fs_i_compr_blocks_update(dn->inode, | |
638 | valid_blocks, false); | |
639 | compressed_cluster = (blkaddr == COMPRESS_ADDR); | |
640 | valid_blocks = 0; | |
641 | } | |
642 | ||
643 | if (blkaddr == NULL_ADDR) | |
644 | goto next; | |
645 | ||
646 | f2fs_set_data_blkaddr(dn, NULL_ADDR); | |
647 | ||
648 | if (__is_valid_data_blkaddr(blkaddr)) { | |
649 | if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) | |
650 | goto next; | |
651 | if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, | |
652 | DATA_GENERIC_ENHANCE)) | |
653 | goto next; | |
654 | if (compressed_cluster) | |
655 | valid_blocks++; | |
656 | } | |
657 | ||
658 | if (blkstart + blklen == blkaddr) { | |
659 | blklen++; | |
660 | } else { | |
661 | f2fs_invalidate_blocks(sbi, blkstart, blklen); | |
662 | blkstart = blkaddr; | |
663 | blklen = 1; | |
664 | } | |
665 | ||
666 | if (!released || blkaddr != COMPRESS_ADDR) | |
667 | nr_free++; | |
668 | ||
669 | continue; | |
670 | ||
671 | next: | |
672 | if (blklen) | |
673 | f2fs_invalidate_blocks(sbi, blkstart, blklen); | |
674 | ||
675 | blkstart = le32_to_cpu(*(addr + 1)); | |
676 | blklen = 0; | |
677 | } | |
678 | ||
679 | if (blklen) | |
680 | f2fs_invalidate_blocks(sbi, blkstart, blklen); | |
681 | ||
682 | if (compressed_cluster) | |
683 | f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); | |
684 | ||
685 | if (nr_free) { | |
686 | pgoff_t fofs; | |
687 | /* | |
688 | * once we invalidate valid blkaddr in range [ofs, ofs + count], | |
689 | * we will invalidate all blkaddr in the whole range. | |
690 | */ | |
691 | fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), | |
692 | dn->inode) + ofs; | |
693 | f2fs_update_read_extent_cache_range(dn, fofs, 0, len); | |
694 | f2fs_update_age_extent_cache_range(dn, fofs, len); | |
695 | dec_valid_block_count(sbi, dn->inode, nr_free); | |
696 | } | |
697 | dn->ofs_in_node = ofs; | |
698 | ||
699 | f2fs_update_time(sbi, REQ_TIME); | |
700 | trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, | |
701 | dn->ofs_in_node, nr_free); | |
702 | } | |
703 | ||
704 | static int truncate_partial_data_page(struct inode *inode, u64 from, | |
705 | bool cache_only) | |
706 | { | |
707 | loff_t offset = from & (PAGE_SIZE - 1); | |
708 | pgoff_t index = from >> PAGE_SHIFT; | |
709 | struct address_space *mapping = inode->i_mapping; | |
710 | struct page *page; | |
711 | ||
712 | if (!offset && !cache_only) | |
713 | return 0; | |
714 | ||
715 | if (cache_only) { | |
716 | page = find_lock_page(mapping, index); | |
717 | if (page && PageUptodate(page)) | |
718 | goto truncate_out; | |
719 | f2fs_put_page(page, 1); | |
720 | return 0; | |
721 | } | |
722 | ||
723 | page = f2fs_get_lock_data_page(inode, index, true); | |
724 | if (IS_ERR(page)) | |
725 | return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); | |
726 | truncate_out: | |
727 | f2fs_wait_on_page_writeback(page, DATA, true, true); | |
728 | zero_user(page, offset, PAGE_SIZE - offset); | |
729 | ||
730 | /* An encrypted inode should have a key and truncate the last page. */ | |
731 | f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); | |
732 | if (!cache_only) | |
733 | set_page_dirty(page); | |
734 | f2fs_put_page(page, 1); | |
735 | return 0; | |
736 | } | |
737 | ||
738 | int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) | |
739 | { | |
740 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
741 | struct dnode_of_data dn; | |
742 | pgoff_t free_from; | |
743 | int count = 0, err = 0; | |
744 | struct page *ipage; | |
745 | bool truncate_page = false; | |
746 | ||
747 | trace_f2fs_truncate_blocks_enter(inode, from); | |
748 | ||
749 | if (IS_DEVICE_ALIASING(inode) && from) { | |
750 | err = -EINVAL; | |
751 | goto out_err; | |
752 | } | |
753 | ||
754 | free_from = (pgoff_t)F2FS_BLK_ALIGN(from); | |
755 | ||
756 | if (free_from >= max_file_blocks(inode)) | |
757 | goto free_partial; | |
758 | ||
759 | if (lock) | |
760 | f2fs_lock_op(sbi); | |
761 | ||
762 | ipage = f2fs_get_node_page(sbi, inode->i_ino); | |
763 | if (IS_ERR(ipage)) { | |
764 | err = PTR_ERR(ipage); | |
765 | goto out; | |
766 | } | |
767 | ||
768 | if (IS_DEVICE_ALIASING(inode)) { | |
769 | struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; | |
770 | struct extent_info ei = et->largest; | |
771 | ||
772 | f2fs_invalidate_blocks(sbi, ei.blk, ei.len); | |
773 | ||
774 | dec_valid_block_count(sbi, inode, ei.len); | |
775 | f2fs_update_time(sbi, REQ_TIME); | |
776 | ||
777 | f2fs_put_page(ipage, 1); | |
778 | goto out; | |
779 | } | |
780 | ||
781 | if (f2fs_has_inline_data(inode)) { | |
782 | f2fs_truncate_inline_inode(inode, ipage, from); | |
783 | f2fs_put_page(ipage, 1); | |
784 | truncate_page = true; | |
785 | goto out; | |
786 | } | |
787 | ||
788 | set_new_dnode(&dn, inode, ipage, NULL, 0); | |
789 | err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); | |
790 | if (err) { | |
791 | if (err == -ENOENT) | |
792 | goto free_next; | |
793 | goto out; | |
794 | } | |
795 | ||
796 | count = ADDRS_PER_PAGE(dn.node_page, inode); | |
797 | ||
798 | count -= dn.ofs_in_node; | |
799 | f2fs_bug_on(sbi, count < 0); | |
800 | ||
801 | if (dn.ofs_in_node || IS_INODE(dn.node_page)) { | |
802 | f2fs_truncate_data_blocks_range(&dn, count); | |
803 | free_from += count; | |
804 | } | |
805 | ||
806 | f2fs_put_dnode(&dn); | |
807 | free_next: | |
808 | err = f2fs_truncate_inode_blocks(inode, free_from); | |
809 | out: | |
810 | if (lock) | |
811 | f2fs_unlock_op(sbi); | |
812 | free_partial: | |
813 | /* lastly zero out the first data page */ | |
814 | if (!err) | |
815 | err = truncate_partial_data_page(inode, from, truncate_page); | |
816 | out_err: | |
817 | trace_f2fs_truncate_blocks_exit(inode, err); | |
818 | return err; | |
819 | } | |
820 | ||
821 | int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) | |
822 | { | |
823 | u64 free_from = from; | |
824 | int err; | |
825 | ||
826 | #ifdef CONFIG_F2FS_FS_COMPRESSION | |
827 | /* | |
828 | * for compressed file, only support cluster size | |
829 | * aligned truncation. | |
830 | */ | |
831 | if (f2fs_compressed_file(inode)) | |
832 | free_from = round_up(from, | |
833 | F2FS_I(inode)->i_cluster_size << PAGE_SHIFT); | |
834 | #endif | |
835 | ||
836 | err = f2fs_do_truncate_blocks(inode, free_from, lock); | |
837 | if (err) | |
838 | return err; | |
839 | ||
840 | #ifdef CONFIG_F2FS_FS_COMPRESSION | |
841 | /* | |
842 | * For compressed file, after release compress blocks, don't allow write | |
843 | * direct, but we should allow write direct after truncate to zero. | |
844 | */ | |
845 | if (f2fs_compressed_file(inode) && !free_from | |
846 | && is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) | |
847 | clear_inode_flag(inode, FI_COMPRESS_RELEASED); | |
848 | ||
849 | if (from != free_from) { | |
850 | err = f2fs_truncate_partial_cluster(inode, from, lock); | |
851 | if (err) | |
852 | return err; | |
853 | } | |
854 | #endif | |
855 | ||
856 | return 0; | |
857 | } | |
858 | ||
859 | int f2fs_truncate(struct inode *inode) | |
860 | { | |
861 | int err; | |
862 | ||
863 | if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) | |
864 | return -EIO; | |
865 | ||
866 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | |
867 | S_ISLNK(inode->i_mode))) | |
868 | return 0; | |
869 | ||
870 | trace_f2fs_truncate(inode); | |
871 | ||
872 | if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) | |
873 | return -EIO; | |
874 | ||
875 | err = f2fs_dquot_initialize(inode); | |
876 | if (err) | |
877 | return err; | |
878 | ||
879 | /* we should check inline_data size */ | |
880 | if (!f2fs_may_inline_data(inode)) { | |
881 | err = f2fs_convert_inline_inode(inode); | |
882 | if (err) | |
883 | return err; | |
884 | } | |
885 | ||
886 | err = f2fs_truncate_blocks(inode, i_size_read(inode), true); | |
887 | if (err) | |
888 | return err; | |
889 | ||
890 | inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); | |
891 | f2fs_mark_inode_dirty_sync(inode, false); | |
892 | return 0; | |
893 | } | |
894 | ||
895 | static bool f2fs_force_buffered_io(struct inode *inode, int rw) | |
896 | { | |
897 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
898 | ||
899 | if (!fscrypt_dio_supported(inode)) | |
900 | return true; | |
901 | if (fsverity_active(inode)) | |
902 | return true; | |
903 | if (f2fs_compressed_file(inode)) | |
904 | return true; | |
905 | /* | |
906 | * only force direct read to use buffered IO, for direct write, | |
907 | * it expects inline data conversion before committing IO. | |
908 | */ | |
909 | if (f2fs_has_inline_data(inode) && rw == READ) | |
910 | return true; | |
911 | ||
912 | /* disallow direct IO if any of devices has unaligned blksize */ | |
913 | if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) | |
914 | return true; | |
915 | /* | |
916 | * for blkzoned device, fallback direct IO to buffered IO, so | |
917 | * all IOs can be serialized by log-structured write. | |
918 | */ | |
919 | if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && | |
920 | !f2fs_is_pinned_file(inode)) | |
921 | return true; | |
922 | if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) | |
923 | return true; | |
924 | ||
925 | return false; | |
926 | } | |
927 | ||
928 | int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path, | |
929 | struct kstat *stat, u32 request_mask, unsigned int query_flags) | |
930 | { | |
931 | struct inode *inode = d_inode(path->dentry); | |
932 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
933 | struct f2fs_inode *ri = NULL; | |
934 | unsigned int flags; | |
935 | ||
936 | if (f2fs_has_extra_attr(inode) && | |
937 | f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && | |
938 | F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { | |
939 | stat->result_mask |= STATX_BTIME; | |
940 | stat->btime.tv_sec = fi->i_crtime.tv_sec; | |
941 | stat->btime.tv_nsec = fi->i_crtime.tv_nsec; | |
942 | } | |
943 | ||
944 | /* | |
945 | * Return the DIO alignment restrictions if requested. We only return | |
946 | * this information when requested, since on encrypted files it might | |
947 | * take a fair bit of work to get if the file wasn't opened recently. | |
948 | * | |
949 | * f2fs sometimes supports DIO reads but not DIO writes. STATX_DIOALIGN | |
950 | * cannot represent that, so in that case we report no DIO support. | |
951 | */ | |
952 | if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { | |
953 | unsigned int bsize = i_blocksize(inode); | |
954 | ||
955 | stat->result_mask |= STATX_DIOALIGN; | |
956 | if (!f2fs_force_buffered_io(inode, WRITE)) { | |
957 | stat->dio_mem_align = bsize; | |
958 | stat->dio_offset_align = bsize; | |
959 | } | |
960 | } | |
961 | ||
962 | flags = fi->i_flags; | |
963 | if (flags & F2FS_COMPR_FL) | |
964 | stat->attributes |= STATX_ATTR_COMPRESSED; | |
965 | if (flags & F2FS_APPEND_FL) | |
966 | stat->attributes |= STATX_ATTR_APPEND; | |
967 | if (IS_ENCRYPTED(inode)) | |
968 | stat->attributes |= STATX_ATTR_ENCRYPTED; | |
969 | if (flags & F2FS_IMMUTABLE_FL) | |
970 | stat->attributes |= STATX_ATTR_IMMUTABLE; | |
971 | if (flags & F2FS_NODUMP_FL) | |
972 | stat->attributes |= STATX_ATTR_NODUMP; | |
973 | if (IS_VERITY(inode)) | |
974 | stat->attributes |= STATX_ATTR_VERITY; | |
975 | ||
976 | stat->attributes_mask |= (STATX_ATTR_COMPRESSED | | |
977 | STATX_ATTR_APPEND | | |
978 | STATX_ATTR_ENCRYPTED | | |
979 | STATX_ATTR_IMMUTABLE | | |
980 | STATX_ATTR_NODUMP | | |
981 | STATX_ATTR_VERITY); | |
982 | ||
983 | generic_fillattr(idmap, request_mask, inode, stat); | |
984 | ||
985 | /* we need to show initial sectors used for inline_data/dentries */ | |
986 | if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || | |
987 | f2fs_has_inline_dentry(inode)) | |
988 | stat->blocks += (stat->size + 511) >> 9; | |
989 | ||
990 | return 0; | |
991 | } | |
992 | ||
993 | #ifdef CONFIG_F2FS_FS_POSIX_ACL | |
994 | static void __setattr_copy(struct mnt_idmap *idmap, | |
995 | struct inode *inode, const struct iattr *attr) | |
996 | { | |
997 | unsigned int ia_valid = attr->ia_valid; | |
998 | ||
999 | i_uid_update(idmap, attr, inode); | |
1000 | i_gid_update(idmap, attr, inode); | |
1001 | if (ia_valid & ATTR_ATIME) | |
1002 | inode_set_atime_to_ts(inode, attr->ia_atime); | |
1003 | if (ia_valid & ATTR_MTIME) | |
1004 | inode_set_mtime_to_ts(inode, attr->ia_mtime); | |
1005 | if (ia_valid & ATTR_CTIME) | |
1006 | inode_set_ctime_to_ts(inode, attr->ia_ctime); | |
1007 | if (ia_valid & ATTR_MODE) { | |
1008 | umode_t mode = attr->ia_mode; | |
1009 | ||
1010 | if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode))) | |
1011 | mode &= ~S_ISGID; | |
1012 | set_acl_inode(inode, mode); | |
1013 | } | |
1014 | } | |
1015 | #else | |
1016 | #define __setattr_copy setattr_copy | |
1017 | #endif | |
1018 | ||
1019 | int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, | |
1020 | struct iattr *attr) | |
1021 | { | |
1022 | struct inode *inode = d_inode(dentry); | |
1023 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
1024 | int err; | |
1025 | ||
1026 | if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) | |
1027 | return -EIO; | |
1028 | ||
1029 | if (unlikely(IS_IMMUTABLE(inode))) | |
1030 | return -EPERM; | |
1031 | ||
1032 | if (unlikely(IS_APPEND(inode) && | |
1033 | (attr->ia_valid & (ATTR_MODE | ATTR_UID | | |
1034 | ATTR_GID | ATTR_TIMES_SET)))) | |
1035 | return -EPERM; | |
1036 | ||
1037 | if ((attr->ia_valid & ATTR_SIZE)) { | |
1038 | if (!f2fs_is_compress_backend_ready(inode) || | |
1039 | IS_DEVICE_ALIASING(inode)) | |
1040 | return -EOPNOTSUPP; | |
1041 | if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && | |
1042 | !IS_ALIGNED(attr->ia_size, | |
1043 | F2FS_BLK_TO_BYTES(fi->i_cluster_size))) | |
1044 | return -EINVAL; | |
1045 | } | |
1046 | ||
1047 | err = setattr_prepare(idmap, dentry, attr); | |
1048 | if (err) | |
1049 | return err; | |
1050 | ||
1051 | err = fscrypt_prepare_setattr(dentry, attr); | |
1052 | if (err) | |
1053 | return err; | |
1054 | ||
1055 | err = fsverity_prepare_setattr(dentry, attr); | |
1056 | if (err) | |
1057 | return err; | |
1058 | ||
1059 | if (is_quota_modification(idmap, inode, attr)) { | |
1060 | err = f2fs_dquot_initialize(inode); | |
1061 | if (err) | |
1062 | return err; | |
1063 | } | |
1064 | if (i_uid_needs_update(idmap, attr, inode) || | |
1065 | i_gid_needs_update(idmap, attr, inode)) { | |
1066 | f2fs_lock_op(F2FS_I_SB(inode)); | |
1067 | err = dquot_transfer(idmap, inode, attr); | |
1068 | if (err) { | |
1069 | set_sbi_flag(F2FS_I_SB(inode), | |
1070 | SBI_QUOTA_NEED_REPAIR); | |
1071 | f2fs_unlock_op(F2FS_I_SB(inode)); | |
1072 | return err; | |
1073 | } | |
1074 | /* | |
1075 | * update uid/gid under lock_op(), so that dquot and inode can | |
1076 | * be updated atomically. | |
1077 | */ | |
1078 | i_uid_update(idmap, attr, inode); | |
1079 | i_gid_update(idmap, attr, inode); | |
1080 | f2fs_mark_inode_dirty_sync(inode, true); | |
1081 | f2fs_unlock_op(F2FS_I_SB(inode)); | |
1082 | } | |
1083 | ||
1084 | if (attr->ia_valid & ATTR_SIZE) { | |
1085 | loff_t old_size = i_size_read(inode); | |
1086 | ||
1087 | if (attr->ia_size > MAX_INLINE_DATA(inode)) { | |
1088 | /* | |
1089 | * should convert inline inode before i_size_write to | |
1090 | * keep smaller than inline_data size with inline flag. | |
1091 | */ | |
1092 | err = f2fs_convert_inline_inode(inode); | |
1093 | if (err) | |
1094 | return err; | |
1095 | } | |
1096 | ||
1097 | /* | |
1098 | * wait for inflight dio, blocks should be removed after | |
1099 | * IO completion. | |
1100 | */ | |
1101 | if (attr->ia_size < old_size) | |
1102 | inode_dio_wait(inode); | |
1103 | ||
1104 | f2fs_down_write(&fi->i_gc_rwsem[WRITE]); | |
1105 | filemap_invalidate_lock(inode->i_mapping); | |
1106 | ||
1107 | truncate_setsize(inode, attr->ia_size); | |
1108 | ||
1109 | if (attr->ia_size <= old_size) | |
1110 | err = f2fs_truncate(inode); | |
1111 | /* | |
1112 | * do not trim all blocks after i_size if target size is | |
1113 | * larger than i_size. | |
1114 | */ | |
1115 | filemap_invalidate_unlock(inode->i_mapping); | |
1116 | f2fs_up_write(&fi->i_gc_rwsem[WRITE]); | |
1117 | if (err) | |
1118 | return err; | |
1119 | ||
1120 | spin_lock(&fi->i_size_lock); | |
1121 | inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); | |
1122 | fi->last_disk_size = i_size_read(inode); | |
1123 | spin_unlock(&fi->i_size_lock); | |
1124 | } | |
1125 | ||
1126 | __setattr_copy(idmap, inode, attr); | |
1127 | ||
1128 | if (attr->ia_valid & ATTR_MODE) { | |
1129 | err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode)); | |
1130 | ||
1131 | if (is_inode_flag_set(inode, FI_ACL_MODE)) { | |
1132 | if (!err) | |
1133 | inode->i_mode = fi->i_acl_mode; | |
1134 | clear_inode_flag(inode, FI_ACL_MODE); | |
1135 | } | |
1136 | } | |
1137 | ||
1138 | /* file size may changed here */ | |
1139 | f2fs_mark_inode_dirty_sync(inode, true); | |
1140 | ||
1141 | /* inode change will produce dirty node pages flushed by checkpoint */ | |
1142 | f2fs_balance_fs(F2FS_I_SB(inode), true); | |
1143 | ||
1144 | return err; | |
1145 | } | |
1146 | ||
1147 | const struct inode_operations f2fs_file_inode_operations = { | |
1148 | .getattr = f2fs_getattr, | |
1149 | .setattr = f2fs_setattr, | |
1150 | .get_inode_acl = f2fs_get_acl, | |
1151 | .set_acl = f2fs_set_acl, | |
1152 | .listxattr = f2fs_listxattr, | |
1153 | .fiemap = f2fs_fiemap, | |
1154 | .fileattr_get = f2fs_fileattr_get, | |
1155 | .fileattr_set = f2fs_fileattr_set, | |
1156 | }; | |
1157 | ||
1158 | static int fill_zero(struct inode *inode, pgoff_t index, | |
1159 | loff_t start, loff_t len) | |
1160 | { | |
1161 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
1162 | struct page *page; | |
1163 | ||
1164 | if (!len) | |
1165 | return 0; | |
1166 | ||
1167 | f2fs_balance_fs(sbi, true); | |
1168 | ||
1169 | f2fs_lock_op(sbi); | |
1170 | page = f2fs_get_new_data_page(inode, NULL, index, false); | |
1171 | f2fs_unlock_op(sbi); | |
1172 | ||
1173 | if (IS_ERR(page)) | |
1174 | return PTR_ERR(page); | |
1175 | ||
1176 | f2fs_wait_on_page_writeback(page, DATA, true, true); | |
1177 | zero_user(page, start, len); | |
1178 | set_page_dirty(page); | |
1179 | f2fs_put_page(page, 1); | |
1180 | return 0; | |
1181 | } | |
1182 | ||
1183 | int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) | |
1184 | { | |
1185 | int err; | |
1186 | ||
1187 | while (pg_start < pg_end) { | |
1188 | struct dnode_of_data dn; | |
1189 | pgoff_t end_offset, count; | |
1190 | ||
1191 | set_new_dnode(&dn, inode, NULL, NULL, 0); | |
1192 | err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); | |
1193 | if (err) { | |
1194 | if (err == -ENOENT) { | |
1195 | pg_start = f2fs_get_next_page_offset(&dn, | |
1196 | pg_start); | |
1197 | continue; | |
1198 | } | |
1199 | return err; | |
1200 | } | |
1201 | ||
1202 | end_offset = ADDRS_PER_PAGE(dn.node_page, inode); | |
1203 | count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); | |
1204 | ||
1205 | f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); | |
1206 | ||
1207 | f2fs_truncate_data_blocks_range(&dn, count); | |
1208 | f2fs_put_dnode(&dn); | |
1209 | ||
1210 | pg_start += count; | |
1211 | } | |
1212 | return 0; | |
1213 | } | |
1214 | ||
1215 | static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |
1216 | { | |
1217 | pgoff_t pg_start, pg_end; | |
1218 | loff_t off_start, off_end; | |
1219 | int ret; | |
1220 | ||
1221 | ret = f2fs_convert_inline_inode(inode); | |
1222 | if (ret) | |
1223 | return ret; | |
1224 | ||
1225 | pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; | |
1226 | pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; | |
1227 | ||
1228 | off_start = offset & (PAGE_SIZE - 1); | |
1229 | off_end = (offset + len) & (PAGE_SIZE - 1); | |
1230 | ||
1231 | if (pg_start == pg_end) { | |
1232 | ret = fill_zero(inode, pg_start, off_start, | |
1233 | off_end - off_start); | |
1234 | if (ret) | |
1235 | return ret; | |
1236 | } else { | |
1237 | if (off_start) { | |
1238 | ret = fill_zero(inode, pg_start++, off_start, | |
1239 | PAGE_SIZE - off_start); | |
1240 | if (ret) | |
1241 | return ret; | |
1242 | } | |
1243 | if (off_end) { | |
1244 | ret = fill_zero(inode, pg_end, 0, off_end); | |
1245 | if (ret) | |
1246 | return ret; | |
1247 | } | |
1248 | ||
1249 | if (pg_start < pg_end) { | |
1250 | loff_t blk_start, blk_end; | |
1251 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
1252 | ||
1253 | f2fs_balance_fs(sbi, true); | |
1254 | ||
1255 | blk_start = (loff_t)pg_start << PAGE_SHIFT; | |
1256 | blk_end = (loff_t)pg_end << PAGE_SHIFT; | |
1257 | ||
1258 | f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
1259 | filemap_invalidate_lock(inode->i_mapping); | |
1260 | ||
1261 | truncate_pagecache_range(inode, blk_start, blk_end - 1); | |
1262 | ||
1263 | f2fs_lock_op(sbi); | |
1264 | ret = f2fs_truncate_hole(inode, pg_start, pg_end); | |
1265 | f2fs_unlock_op(sbi); | |
1266 | ||
1267 | filemap_invalidate_unlock(inode->i_mapping); | |
1268 | f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
1269 | } | |
1270 | } | |
1271 | ||
1272 | return ret; | |
1273 | } | |
1274 | ||
1275 | static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, | |
1276 | int *do_replace, pgoff_t off, pgoff_t len) | |
1277 | { | |
1278 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
1279 | struct dnode_of_data dn; | |
1280 | int ret, done, i; | |
1281 | ||
1282 | next_dnode: | |
1283 | set_new_dnode(&dn, inode, NULL, NULL, 0); | |
1284 | ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); | |
1285 | if (ret && ret != -ENOENT) { | |
1286 | return ret; | |
1287 | } else if (ret == -ENOENT) { | |
1288 | if (dn.max_level == 0) | |
1289 | return -ENOENT; | |
1290 | done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - | |
1291 | dn.ofs_in_node, len); | |
1292 | blkaddr += done; | |
1293 | do_replace += done; | |
1294 | goto next; | |
1295 | } | |
1296 | ||
1297 | done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - | |
1298 | dn.ofs_in_node, len); | |
1299 | for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { | |
1300 | *blkaddr = f2fs_data_blkaddr(&dn); | |
1301 | ||
1302 | if (__is_valid_data_blkaddr(*blkaddr) && | |
1303 | !f2fs_is_valid_blkaddr(sbi, *blkaddr, | |
1304 | DATA_GENERIC_ENHANCE)) { | |
1305 | f2fs_put_dnode(&dn); | |
1306 | return -EFSCORRUPTED; | |
1307 | } | |
1308 | ||
1309 | if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { | |
1310 | ||
1311 | if (f2fs_lfs_mode(sbi)) { | |
1312 | f2fs_put_dnode(&dn); | |
1313 | return -EOPNOTSUPP; | |
1314 | } | |
1315 | ||
1316 | /* do not invalidate this block address */ | |
1317 | f2fs_update_data_blkaddr(&dn, NULL_ADDR); | |
1318 | *do_replace = 1; | |
1319 | } | |
1320 | } | |
1321 | f2fs_put_dnode(&dn); | |
1322 | next: | |
1323 | len -= done; | |
1324 | off += done; | |
1325 | if (len) | |
1326 | goto next_dnode; | |
1327 | return 0; | |
1328 | } | |
1329 | ||
1330 | static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, | |
1331 | int *do_replace, pgoff_t off, int len) | |
1332 | { | |
1333 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
1334 | struct dnode_of_data dn; | |
1335 | int ret, i; | |
1336 | ||
1337 | for (i = 0; i < len; i++, do_replace++, blkaddr++) { | |
1338 | if (*do_replace == 0) | |
1339 | continue; | |
1340 | ||
1341 | set_new_dnode(&dn, inode, NULL, NULL, 0); | |
1342 | ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); | |
1343 | if (ret) { | |
1344 | dec_valid_block_count(sbi, inode, 1); | |
1345 | f2fs_invalidate_blocks(sbi, *blkaddr, 1); | |
1346 | } else { | |
1347 | f2fs_update_data_blkaddr(&dn, *blkaddr); | |
1348 | } | |
1349 | f2fs_put_dnode(&dn); | |
1350 | } | |
1351 | return 0; | |
1352 | } | |
1353 | ||
1354 | static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, | |
1355 | block_t *blkaddr, int *do_replace, | |
1356 | pgoff_t src, pgoff_t dst, pgoff_t len, bool full) | |
1357 | { | |
1358 | struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); | |
1359 | pgoff_t i = 0; | |
1360 | int ret; | |
1361 | ||
1362 | while (i < len) { | |
1363 | if (blkaddr[i] == NULL_ADDR && !full) { | |
1364 | i++; | |
1365 | continue; | |
1366 | } | |
1367 | ||
1368 | if (do_replace[i] || blkaddr[i] == NULL_ADDR) { | |
1369 | struct dnode_of_data dn; | |
1370 | struct node_info ni; | |
1371 | size_t new_size; | |
1372 | pgoff_t ilen; | |
1373 | ||
1374 | set_new_dnode(&dn, dst_inode, NULL, NULL, 0); | |
1375 | ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE); | |
1376 | if (ret) | |
1377 | return ret; | |
1378 | ||
1379 | ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); | |
1380 | if (ret) { | |
1381 | f2fs_put_dnode(&dn); | |
1382 | return ret; | |
1383 | } | |
1384 | ||
1385 | ilen = min((pgoff_t) | |
1386 | ADDRS_PER_PAGE(dn.node_page, dst_inode) - | |
1387 | dn.ofs_in_node, len - i); | |
1388 | do { | |
1389 | dn.data_blkaddr = f2fs_data_blkaddr(&dn); | |
1390 | f2fs_truncate_data_blocks_range(&dn, 1); | |
1391 | ||
1392 | if (do_replace[i]) { | |
1393 | f2fs_i_blocks_write(src_inode, | |
1394 | 1, false, false); | |
1395 | f2fs_i_blocks_write(dst_inode, | |
1396 | 1, true, false); | |
1397 | f2fs_replace_block(sbi, &dn, dn.data_blkaddr, | |
1398 | blkaddr[i], ni.version, true, false); | |
1399 | ||
1400 | do_replace[i] = 0; | |
1401 | } | |
1402 | dn.ofs_in_node++; | |
1403 | i++; | |
1404 | new_size = (loff_t)(dst + i) << PAGE_SHIFT; | |
1405 | if (dst_inode->i_size < new_size) | |
1406 | f2fs_i_size_write(dst_inode, new_size); | |
1407 | } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); | |
1408 | ||
1409 | f2fs_put_dnode(&dn); | |
1410 | } else { | |
1411 | struct page *psrc, *pdst; | |
1412 | ||
1413 | psrc = f2fs_get_lock_data_page(src_inode, | |
1414 | src + i, true); | |
1415 | if (IS_ERR(psrc)) | |
1416 | return PTR_ERR(psrc); | |
1417 | pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i, | |
1418 | true); | |
1419 | if (IS_ERR(pdst)) { | |
1420 | f2fs_put_page(psrc, 1); | |
1421 | return PTR_ERR(pdst); | |
1422 | } | |
1423 | ||
1424 | f2fs_wait_on_page_writeback(pdst, DATA, true, true); | |
1425 | ||
1426 | memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); | |
1427 | set_page_dirty(pdst); | |
1428 | set_page_private_gcing(pdst); | |
1429 | f2fs_put_page(pdst, 1); | |
1430 | f2fs_put_page(psrc, 1); | |
1431 | ||
1432 | ret = f2fs_truncate_hole(src_inode, | |
1433 | src + i, src + i + 1); | |
1434 | if (ret) | |
1435 | return ret; | |
1436 | i++; | |
1437 | } | |
1438 | } | |
1439 | return 0; | |
1440 | } | |
1441 | ||
1442 | static int __exchange_data_block(struct inode *src_inode, | |
1443 | struct inode *dst_inode, pgoff_t src, pgoff_t dst, | |
1444 | pgoff_t len, bool full) | |
1445 | { | |
1446 | block_t *src_blkaddr; | |
1447 | int *do_replace; | |
1448 | pgoff_t olen; | |
1449 | int ret; | |
1450 | ||
1451 | while (len) { | |
1452 | olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len); | |
1453 | ||
1454 | src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), | |
1455 | array_size(olen, sizeof(block_t)), | |
1456 | GFP_NOFS); | |
1457 | if (!src_blkaddr) | |
1458 | return -ENOMEM; | |
1459 | ||
1460 | do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), | |
1461 | array_size(olen, sizeof(int)), | |
1462 | GFP_NOFS); | |
1463 | if (!do_replace) { | |
1464 | kvfree(src_blkaddr); | |
1465 | return -ENOMEM; | |
1466 | } | |
1467 | ||
1468 | ret = __read_out_blkaddrs(src_inode, src_blkaddr, | |
1469 | do_replace, src, olen); | |
1470 | if (ret) | |
1471 | goto roll_back; | |
1472 | ||
1473 | ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, | |
1474 | do_replace, src, dst, olen, full); | |
1475 | if (ret) | |
1476 | goto roll_back; | |
1477 | ||
1478 | src += olen; | |
1479 | dst += olen; | |
1480 | len -= olen; | |
1481 | ||
1482 | kvfree(src_blkaddr); | |
1483 | kvfree(do_replace); | |
1484 | } | |
1485 | return 0; | |
1486 | ||
1487 | roll_back: | |
1488 | __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen); | |
1489 | kvfree(src_blkaddr); | |
1490 | kvfree(do_replace); | |
1491 | return ret; | |
1492 | } | |
1493 | ||
1494 | static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) | |
1495 | { | |
1496 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
1497 | pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); | |
1498 | pgoff_t start = offset >> PAGE_SHIFT; | |
1499 | pgoff_t end = (offset + len) >> PAGE_SHIFT; | |
1500 | int ret; | |
1501 | ||
1502 | f2fs_balance_fs(sbi, true); | |
1503 | ||
1504 | /* avoid gc operation during block exchange */ | |
1505 | f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
1506 | filemap_invalidate_lock(inode->i_mapping); | |
1507 | ||
1508 | f2fs_lock_op(sbi); | |
1509 | f2fs_drop_extent_tree(inode); | |
1510 | truncate_pagecache(inode, offset); | |
1511 | ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); | |
1512 | f2fs_unlock_op(sbi); | |
1513 | ||
1514 | filemap_invalidate_unlock(inode->i_mapping); | |
1515 | f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
1516 | return ret; | |
1517 | } | |
1518 | ||
1519 | static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) | |
1520 | { | |
1521 | loff_t new_size; | |
1522 | int ret; | |
1523 | ||
1524 | if (offset + len >= i_size_read(inode)) | |
1525 | return -EINVAL; | |
1526 | ||
1527 | /* collapse range should be aligned to block size of f2fs. */ | |
1528 | if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) | |
1529 | return -EINVAL; | |
1530 | ||
1531 | ret = f2fs_convert_inline_inode(inode); | |
1532 | if (ret) | |
1533 | return ret; | |
1534 | ||
1535 | /* write out all dirty pages from offset */ | |
1536 | ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); | |
1537 | if (ret) | |
1538 | return ret; | |
1539 | ||
1540 | ret = f2fs_do_collapse(inode, offset, len); | |
1541 | if (ret) | |
1542 | return ret; | |
1543 | ||
1544 | /* write out all moved pages, if possible */ | |
1545 | filemap_invalidate_lock(inode->i_mapping); | |
1546 | filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); | |
1547 | truncate_pagecache(inode, offset); | |
1548 | ||
1549 | new_size = i_size_read(inode) - len; | |
1550 | ret = f2fs_truncate_blocks(inode, new_size, true); | |
1551 | filemap_invalidate_unlock(inode->i_mapping); | |
1552 | if (!ret) | |
1553 | f2fs_i_size_write(inode, new_size); | |
1554 | return ret; | |
1555 | } | |
1556 | ||
1557 | static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, | |
1558 | pgoff_t end) | |
1559 | { | |
1560 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); | |
1561 | pgoff_t index = start; | |
1562 | unsigned int ofs_in_node = dn->ofs_in_node; | |
1563 | blkcnt_t count = 0; | |
1564 | int ret; | |
1565 | ||
1566 | for (; index < end; index++, dn->ofs_in_node++) { | |
1567 | if (f2fs_data_blkaddr(dn) == NULL_ADDR) | |
1568 | count++; | |
1569 | } | |
1570 | ||
1571 | dn->ofs_in_node = ofs_in_node; | |
1572 | ret = f2fs_reserve_new_blocks(dn, count); | |
1573 | if (ret) | |
1574 | return ret; | |
1575 | ||
1576 | dn->ofs_in_node = ofs_in_node; | |
1577 | for (index = start; index < end; index++, dn->ofs_in_node++) { | |
1578 | dn->data_blkaddr = f2fs_data_blkaddr(dn); | |
1579 | /* | |
1580 | * f2fs_reserve_new_blocks will not guarantee entire block | |
1581 | * allocation. | |
1582 | */ | |
1583 | if (dn->data_blkaddr == NULL_ADDR) { | |
1584 | ret = -ENOSPC; | |
1585 | break; | |
1586 | } | |
1587 | ||
1588 | if (dn->data_blkaddr == NEW_ADDR) | |
1589 | continue; | |
1590 | ||
1591 | if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, | |
1592 | DATA_GENERIC_ENHANCE)) { | |
1593 | ret = -EFSCORRUPTED; | |
1594 | break; | |
1595 | } | |
1596 | ||
1597 | f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); | |
1598 | f2fs_set_data_blkaddr(dn, NEW_ADDR); | |
1599 | } | |
1600 | ||
1601 | f2fs_update_read_extent_cache_range(dn, start, 0, index - start); | |
1602 | f2fs_update_age_extent_cache_range(dn, start, index - start); | |
1603 | ||
1604 | return ret; | |
1605 | } | |
1606 | ||
1607 | static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, | |
1608 | int mode) | |
1609 | { | |
1610 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
1611 | struct address_space *mapping = inode->i_mapping; | |
1612 | pgoff_t index, pg_start, pg_end; | |
1613 | loff_t new_size = i_size_read(inode); | |
1614 | loff_t off_start, off_end; | |
1615 | int ret = 0; | |
1616 | ||
1617 | ret = inode_newsize_ok(inode, (len + offset)); | |
1618 | if (ret) | |
1619 | return ret; | |
1620 | ||
1621 | ret = f2fs_convert_inline_inode(inode); | |
1622 | if (ret) | |
1623 | return ret; | |
1624 | ||
1625 | ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); | |
1626 | if (ret) | |
1627 | return ret; | |
1628 | ||
1629 | pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; | |
1630 | pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; | |
1631 | ||
1632 | off_start = offset & (PAGE_SIZE - 1); | |
1633 | off_end = (offset + len) & (PAGE_SIZE - 1); | |
1634 | ||
1635 | if (pg_start == pg_end) { | |
1636 | ret = fill_zero(inode, pg_start, off_start, | |
1637 | off_end - off_start); | |
1638 | if (ret) | |
1639 | return ret; | |
1640 | ||
1641 | new_size = max_t(loff_t, new_size, offset + len); | |
1642 | } else { | |
1643 | if (off_start) { | |
1644 | ret = fill_zero(inode, pg_start++, off_start, | |
1645 | PAGE_SIZE - off_start); | |
1646 | if (ret) | |
1647 | return ret; | |
1648 | ||
1649 | new_size = max_t(loff_t, new_size, | |
1650 | (loff_t)pg_start << PAGE_SHIFT); | |
1651 | } | |
1652 | ||
1653 | for (index = pg_start; index < pg_end;) { | |
1654 | struct dnode_of_data dn; | |
1655 | unsigned int end_offset; | |
1656 | pgoff_t end; | |
1657 | ||
1658 | f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
1659 | filemap_invalidate_lock(mapping); | |
1660 | ||
1661 | truncate_pagecache_range(inode, | |
1662 | (loff_t)index << PAGE_SHIFT, | |
1663 | ((loff_t)pg_end << PAGE_SHIFT) - 1); | |
1664 | ||
1665 | f2fs_lock_op(sbi); | |
1666 | ||
1667 | set_new_dnode(&dn, inode, NULL, NULL, 0); | |
1668 | ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); | |
1669 | if (ret) { | |
1670 | f2fs_unlock_op(sbi); | |
1671 | filemap_invalidate_unlock(mapping); | |
1672 | f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
1673 | goto out; | |
1674 | } | |
1675 | ||
1676 | end_offset = ADDRS_PER_PAGE(dn.node_page, inode); | |
1677 | end = min(pg_end, end_offset - dn.ofs_in_node + index); | |
1678 | ||
1679 | ret = f2fs_do_zero_range(&dn, index, end); | |
1680 | f2fs_put_dnode(&dn); | |
1681 | ||
1682 | f2fs_unlock_op(sbi); | |
1683 | filemap_invalidate_unlock(mapping); | |
1684 | f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
1685 | ||
1686 | f2fs_balance_fs(sbi, dn.node_changed); | |
1687 | ||
1688 | if (ret) | |
1689 | goto out; | |
1690 | ||
1691 | index = end; | |
1692 | new_size = max_t(loff_t, new_size, | |
1693 | (loff_t)index << PAGE_SHIFT); | |
1694 | } | |
1695 | ||
1696 | if (off_end) { | |
1697 | ret = fill_zero(inode, pg_end, 0, off_end); | |
1698 | if (ret) | |
1699 | goto out; | |
1700 | ||
1701 | new_size = max_t(loff_t, new_size, offset + len); | |
1702 | } | |
1703 | } | |
1704 | ||
1705 | out: | |
1706 | if (new_size > i_size_read(inode)) { | |
1707 | if (mode & FALLOC_FL_KEEP_SIZE) | |
1708 | file_set_keep_isize(inode); | |
1709 | else | |
1710 | f2fs_i_size_write(inode, new_size); | |
1711 | } | |
1712 | return ret; | |
1713 | } | |
1714 | ||
1715 | static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) | |
1716 | { | |
1717 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
1718 | struct address_space *mapping = inode->i_mapping; | |
1719 | pgoff_t nr, pg_start, pg_end, delta, idx; | |
1720 | loff_t new_size; | |
1721 | int ret = 0; | |
1722 | ||
1723 | new_size = i_size_read(inode) + len; | |
1724 | ret = inode_newsize_ok(inode, new_size); | |
1725 | if (ret) | |
1726 | return ret; | |
1727 | ||
1728 | if (offset >= i_size_read(inode)) | |
1729 | return -EINVAL; | |
1730 | ||
1731 | /* insert range should be aligned to block size of f2fs. */ | |
1732 | if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) | |
1733 | return -EINVAL; | |
1734 | ||
1735 | ret = f2fs_convert_inline_inode(inode); | |
1736 | if (ret) | |
1737 | return ret; | |
1738 | ||
1739 | f2fs_balance_fs(sbi, true); | |
1740 | ||
1741 | filemap_invalidate_lock(mapping); | |
1742 | ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); | |
1743 | filemap_invalidate_unlock(mapping); | |
1744 | if (ret) | |
1745 | return ret; | |
1746 | ||
1747 | /* write out all dirty pages from offset */ | |
1748 | ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); | |
1749 | if (ret) | |
1750 | return ret; | |
1751 | ||
1752 | pg_start = offset >> PAGE_SHIFT; | |
1753 | pg_end = (offset + len) >> PAGE_SHIFT; | |
1754 | delta = pg_end - pg_start; | |
1755 | idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); | |
1756 | ||
1757 | /* avoid gc operation during block exchange */ | |
1758 | f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
1759 | filemap_invalidate_lock(mapping); | |
1760 | truncate_pagecache(inode, offset); | |
1761 | ||
1762 | while (!ret && idx > pg_start) { | |
1763 | nr = idx - pg_start; | |
1764 | if (nr > delta) | |
1765 | nr = delta; | |
1766 | idx -= nr; | |
1767 | ||
1768 | f2fs_lock_op(sbi); | |
1769 | f2fs_drop_extent_tree(inode); | |
1770 | ||
1771 | ret = __exchange_data_block(inode, inode, idx, | |
1772 | idx + delta, nr, false); | |
1773 | f2fs_unlock_op(sbi); | |
1774 | } | |
1775 | filemap_invalidate_unlock(mapping); | |
1776 | f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
1777 | if (ret) | |
1778 | return ret; | |
1779 | ||
1780 | /* write out all moved pages, if possible */ | |
1781 | filemap_invalidate_lock(mapping); | |
1782 | ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); | |
1783 | truncate_pagecache(inode, offset); | |
1784 | filemap_invalidate_unlock(mapping); | |
1785 | ||
1786 | if (!ret) | |
1787 | f2fs_i_size_write(inode, new_size); | |
1788 | return ret; | |
1789 | } | |
1790 | ||
1791 | static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, | |
1792 | loff_t len, int mode) | |
1793 | { | |
1794 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
1795 | struct f2fs_map_blocks map = { .m_next_pgofs = NULL, | |
1796 | .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, | |
1797 | .m_may_create = true }; | |
1798 | struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, | |
1799 | .init_gc_type = FG_GC, | |
1800 | .should_migrate_blocks = false, | |
1801 | .err_gc_skipped = true, | |
1802 | .nr_free_secs = 0 }; | |
1803 | pgoff_t pg_start, pg_end; | |
1804 | loff_t new_size; | |
1805 | loff_t off_end; | |
1806 | block_t expanded = 0; | |
1807 | int err; | |
1808 | ||
1809 | err = inode_newsize_ok(inode, (len + offset)); | |
1810 | if (err) | |
1811 | return err; | |
1812 | ||
1813 | err = f2fs_convert_inline_inode(inode); | |
1814 | if (err) | |
1815 | return err; | |
1816 | ||
1817 | f2fs_balance_fs(sbi, true); | |
1818 | ||
1819 | pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; | |
1820 | pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT; | |
1821 | off_end = (offset + len) & (PAGE_SIZE - 1); | |
1822 | ||
1823 | map.m_lblk = pg_start; | |
1824 | map.m_len = pg_end - pg_start; | |
1825 | if (off_end) | |
1826 | map.m_len++; | |
1827 | ||
1828 | if (!map.m_len) | |
1829 | return 0; | |
1830 | ||
1831 | if (f2fs_is_pinned_file(inode)) { | |
1832 | block_t sec_blks = CAP_BLKS_PER_SEC(sbi); | |
1833 | block_t sec_len = roundup(map.m_len, sec_blks); | |
1834 | ||
1835 | map.m_len = sec_blks; | |
1836 | next_alloc: | |
1837 | if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ? | |
1838 | ZONED_PIN_SEC_REQUIRED_COUNT : | |
1839 | GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { | |
1840 | f2fs_down_write(&sbi->gc_lock); | |
1841 | stat_inc_gc_call_count(sbi, FOREGROUND); | |
1842 | err = f2fs_gc(sbi, &gc_control); | |
1843 | if (err && err != -ENODATA) | |
1844 | goto out_err; | |
1845 | } | |
1846 | ||
1847 | f2fs_down_write(&sbi->pin_sem); | |
1848 | ||
1849 | err = f2fs_allocate_pinning_section(sbi); | |
1850 | if (err) { | |
1851 | f2fs_up_write(&sbi->pin_sem); | |
1852 | goto out_err; | |
1853 | } | |
1854 | ||
1855 | map.m_seg_type = CURSEG_COLD_DATA_PINNED; | |
1856 | err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); | |
1857 | file_dont_truncate(inode); | |
1858 | ||
1859 | f2fs_up_write(&sbi->pin_sem); | |
1860 | ||
1861 | expanded += map.m_len; | |
1862 | sec_len -= map.m_len; | |
1863 | map.m_lblk += map.m_len; | |
1864 | if (!err && sec_len) | |
1865 | goto next_alloc; | |
1866 | ||
1867 | map.m_len = expanded; | |
1868 | } else { | |
1869 | err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO); | |
1870 | expanded = map.m_len; | |
1871 | } | |
1872 | out_err: | |
1873 | if (err) { | |
1874 | pgoff_t last_off; | |
1875 | ||
1876 | if (!expanded) | |
1877 | return err; | |
1878 | ||
1879 | last_off = pg_start + expanded - 1; | |
1880 | ||
1881 | /* update new size to the failed position */ | |
1882 | new_size = (last_off == pg_end) ? offset + len : | |
1883 | (loff_t)(last_off + 1) << PAGE_SHIFT; | |
1884 | } else { | |
1885 | new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; | |
1886 | } | |
1887 | ||
1888 | if (new_size > i_size_read(inode)) { | |
1889 | if (mode & FALLOC_FL_KEEP_SIZE) | |
1890 | file_set_keep_isize(inode); | |
1891 | else | |
1892 | f2fs_i_size_write(inode, new_size); | |
1893 | } | |
1894 | ||
1895 | return err; | |
1896 | } | |
1897 | ||
1898 | static long f2fs_fallocate(struct file *file, int mode, | |
1899 | loff_t offset, loff_t len) | |
1900 | { | |
1901 | struct inode *inode = file_inode(file); | |
1902 | long ret = 0; | |
1903 | ||
1904 | if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) | |
1905 | return -EIO; | |
1906 | if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) | |
1907 | return -ENOSPC; | |
1908 | if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) | |
1909 | return -EOPNOTSUPP; | |
1910 | ||
1911 | /* f2fs only support ->fallocate for regular file */ | |
1912 | if (!S_ISREG(inode->i_mode)) | |
1913 | return -EINVAL; | |
1914 | ||
1915 | if (IS_ENCRYPTED(inode) && | |
1916 | (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) | |
1917 | return -EOPNOTSUPP; | |
1918 | ||
1919 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | | |
1920 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | | |
1921 | FALLOC_FL_INSERT_RANGE)) | |
1922 | return -EOPNOTSUPP; | |
1923 | ||
1924 | inode_lock(inode); | |
1925 | ||
1926 | /* | |
1927 | * Pinned file should not support partial truncation since the block | |
1928 | * can be used by applications. | |
1929 | */ | |
1930 | if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && | |
1931 | (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | | |
1932 | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { | |
1933 | ret = -EOPNOTSUPP; | |
1934 | goto out; | |
1935 | } | |
1936 | ||
1937 | ret = file_modified(file); | |
1938 | if (ret) | |
1939 | goto out; | |
1940 | ||
1941 | /* | |
1942 | * wait for inflight dio, blocks should be removed after IO | |
1943 | * completion. | |
1944 | */ | |
1945 | inode_dio_wait(inode); | |
1946 | ||
1947 | if (mode & FALLOC_FL_PUNCH_HOLE) { | |
1948 | if (offset >= inode->i_size) | |
1949 | goto out; | |
1950 | ||
1951 | ret = f2fs_punch_hole(inode, offset, len); | |
1952 | } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { | |
1953 | ret = f2fs_collapse_range(inode, offset, len); | |
1954 | } else if (mode & FALLOC_FL_ZERO_RANGE) { | |
1955 | ret = f2fs_zero_range(inode, offset, len, mode); | |
1956 | } else if (mode & FALLOC_FL_INSERT_RANGE) { | |
1957 | ret = f2fs_insert_range(inode, offset, len); | |
1958 | } else { | |
1959 | ret = f2fs_expand_inode_data(inode, offset, len, mode); | |
1960 | } | |
1961 | ||
1962 | if (!ret) { | |
1963 | inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); | |
1964 | f2fs_mark_inode_dirty_sync(inode, false); | |
1965 | f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); | |
1966 | } | |
1967 | ||
1968 | out: | |
1969 | inode_unlock(inode); | |
1970 | ||
1971 | trace_f2fs_fallocate(inode, mode, offset, len, ret); | |
1972 | return ret; | |
1973 | } | |
1974 | ||
1975 | static int f2fs_release_file(struct inode *inode, struct file *filp) | |
1976 | { | |
1977 | /* | |
1978 | * f2fs_release_file is called at every close calls. So we should | |
1979 | * not drop any inmemory pages by close called by other process. | |
1980 | */ | |
1981 | if (!(filp->f_mode & FMODE_WRITE) || | |
1982 | atomic_read(&inode->i_writecount) != 1) | |
1983 | return 0; | |
1984 | ||
1985 | inode_lock(inode); | |
1986 | f2fs_abort_atomic_write(inode, true); | |
1987 | inode_unlock(inode); | |
1988 | ||
1989 | return 0; | |
1990 | } | |
1991 | ||
1992 | static int f2fs_file_flush(struct file *file, fl_owner_t id) | |
1993 | { | |
1994 | struct inode *inode = file_inode(file); | |
1995 | ||
1996 | /* | |
1997 | * If the process doing a transaction is crashed, we should do | |
1998 | * roll-back. Otherwise, other reader/write can see corrupted database | |
1999 | * until all the writers close its file. Since this should be done | |
2000 | * before dropping file lock, it needs to do in ->flush. | |
2001 | */ | |
2002 | if (F2FS_I(inode)->atomic_write_task == current && | |
2003 | (current->flags & PF_EXITING)) { | |
2004 | inode_lock(inode); | |
2005 | f2fs_abort_atomic_write(inode, true); | |
2006 | inode_unlock(inode); | |
2007 | } | |
2008 | ||
2009 | return 0; | |
2010 | } | |
2011 | ||
2012 | static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) | |
2013 | { | |
2014 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
2015 | u32 masked_flags = fi->i_flags & mask; | |
2016 | ||
2017 | /* mask can be shrunk by flags_valid selector */ | |
2018 | iflags &= mask; | |
2019 | ||
2020 | /* Is it quota file? Do not allow user to mess with it */ | |
2021 | if (IS_NOQUOTA(inode)) | |
2022 | return -EPERM; | |
2023 | ||
2024 | if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { | |
2025 | if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) | |
2026 | return -EOPNOTSUPP; | |
2027 | if (!f2fs_empty_dir(inode)) | |
2028 | return -ENOTEMPTY; | |
2029 | } | |
2030 | ||
2031 | if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { | |
2032 | if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) | |
2033 | return -EOPNOTSUPP; | |
2034 | if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) | |
2035 | return -EINVAL; | |
2036 | } | |
2037 | ||
2038 | if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { | |
2039 | if (masked_flags & F2FS_COMPR_FL) { | |
2040 | if (!f2fs_disable_compressed_file(inode)) | |
2041 | return -EINVAL; | |
2042 | } else { | |
2043 | /* try to convert inline_data to support compression */ | |
2044 | int err = f2fs_convert_inline_inode(inode); | |
2045 | if (err) | |
2046 | return err; | |
2047 | ||
2048 | f2fs_down_write(&fi->i_sem); | |
2049 | if (!f2fs_may_compress(inode) || | |
2050 | (S_ISREG(inode->i_mode) && | |
2051 | F2FS_HAS_BLOCKS(inode))) { | |
2052 | f2fs_up_write(&fi->i_sem); | |
2053 | return -EINVAL; | |
2054 | } | |
2055 | err = set_compress_context(inode); | |
2056 | f2fs_up_write(&fi->i_sem); | |
2057 | ||
2058 | if (err) | |
2059 | return err; | |
2060 | } | |
2061 | } | |
2062 | ||
2063 | fi->i_flags = iflags | (fi->i_flags & ~mask); | |
2064 | f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && | |
2065 | (fi->i_flags & F2FS_NOCOMP_FL)); | |
2066 | ||
2067 | if (fi->i_flags & F2FS_PROJINHERIT_FL) | |
2068 | set_inode_flag(inode, FI_PROJ_INHERIT); | |
2069 | else | |
2070 | clear_inode_flag(inode, FI_PROJ_INHERIT); | |
2071 | ||
2072 | inode_set_ctime_current(inode); | |
2073 | f2fs_set_inode_flags(inode); | |
2074 | f2fs_mark_inode_dirty_sync(inode, true); | |
2075 | return 0; | |
2076 | } | |
2077 | ||
2078 | /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */ | |
2079 | ||
2080 | /* | |
2081 | * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry | |
2082 | * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to | |
2083 | * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add | |
2084 | * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. | |
2085 | * | |
2086 | * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and | |
2087 | * FS_IOC_FSSETXATTR is done by the VFS. | |
2088 | */ | |
2089 | ||
2090 | static const struct { | |
2091 | u32 iflag; | |
2092 | u32 fsflag; | |
2093 | } f2fs_fsflags_map[] = { | |
2094 | { F2FS_COMPR_FL, FS_COMPR_FL }, | |
2095 | { F2FS_SYNC_FL, FS_SYNC_FL }, | |
2096 | { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, | |
2097 | { F2FS_APPEND_FL, FS_APPEND_FL }, | |
2098 | { F2FS_NODUMP_FL, FS_NODUMP_FL }, | |
2099 | { F2FS_NOATIME_FL, FS_NOATIME_FL }, | |
2100 | { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, | |
2101 | { F2FS_INDEX_FL, FS_INDEX_FL }, | |
2102 | { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, | |
2103 | { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, | |
2104 | { F2FS_CASEFOLD_FL, FS_CASEFOLD_FL }, | |
2105 | }; | |
2106 | ||
2107 | #define F2FS_GETTABLE_FS_FL ( \ | |
2108 | FS_COMPR_FL | \ | |
2109 | FS_SYNC_FL | \ | |
2110 | FS_IMMUTABLE_FL | \ | |
2111 | FS_APPEND_FL | \ | |
2112 | FS_NODUMP_FL | \ | |
2113 | FS_NOATIME_FL | \ | |
2114 | FS_NOCOMP_FL | \ | |
2115 | FS_INDEX_FL | \ | |
2116 | FS_DIRSYNC_FL | \ | |
2117 | FS_PROJINHERIT_FL | \ | |
2118 | FS_ENCRYPT_FL | \ | |
2119 | FS_INLINE_DATA_FL | \ | |
2120 | FS_NOCOW_FL | \ | |
2121 | FS_VERITY_FL | \ | |
2122 | FS_CASEFOLD_FL) | |
2123 | ||
2124 | #define F2FS_SETTABLE_FS_FL ( \ | |
2125 | FS_COMPR_FL | \ | |
2126 | FS_SYNC_FL | \ | |
2127 | FS_IMMUTABLE_FL | \ | |
2128 | FS_APPEND_FL | \ | |
2129 | FS_NODUMP_FL | \ | |
2130 | FS_NOATIME_FL | \ | |
2131 | FS_NOCOMP_FL | \ | |
2132 | FS_DIRSYNC_FL | \ | |
2133 | FS_PROJINHERIT_FL | \ | |
2134 | FS_CASEFOLD_FL) | |
2135 | ||
2136 | /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ | |
2137 | static inline u32 f2fs_iflags_to_fsflags(u32 iflags) | |
2138 | { | |
2139 | u32 fsflags = 0; | |
2140 | int i; | |
2141 | ||
2142 | for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) | |
2143 | if (iflags & f2fs_fsflags_map[i].iflag) | |
2144 | fsflags |= f2fs_fsflags_map[i].fsflag; | |
2145 | ||
2146 | return fsflags; | |
2147 | } | |
2148 | ||
2149 | /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ | |
2150 | static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) | |
2151 | { | |
2152 | u32 iflags = 0; | |
2153 | int i; | |
2154 | ||
2155 | for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) | |
2156 | if (fsflags & f2fs_fsflags_map[i].fsflag) | |
2157 | iflags |= f2fs_fsflags_map[i].iflag; | |
2158 | ||
2159 | return iflags; | |
2160 | } | |
2161 | ||
2162 | static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) | |
2163 | { | |
2164 | struct inode *inode = file_inode(filp); | |
2165 | ||
2166 | return put_user(inode->i_generation, (int __user *)arg); | |
2167 | } | |
2168 | ||
2169 | static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) | |
2170 | { | |
2171 | struct inode *inode = file_inode(filp); | |
2172 | struct mnt_idmap *idmap = file_mnt_idmap(filp); | |
2173 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
2174 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
2175 | loff_t isize; | |
2176 | int ret; | |
2177 | ||
2178 | if (!(filp->f_mode & FMODE_WRITE)) | |
2179 | return -EBADF; | |
2180 | ||
2181 | if (!inode_owner_or_capable(idmap, inode)) | |
2182 | return -EACCES; | |
2183 | ||
2184 | if (!S_ISREG(inode->i_mode)) | |
2185 | return -EINVAL; | |
2186 | ||
2187 | if (filp->f_flags & O_DIRECT) | |
2188 | return -EINVAL; | |
2189 | ||
2190 | ret = mnt_want_write_file(filp); | |
2191 | if (ret) | |
2192 | return ret; | |
2193 | ||
2194 | inode_lock(inode); | |
2195 | ||
2196 | if (!f2fs_disable_compressed_file(inode) || | |
2197 | f2fs_is_pinned_file(inode)) { | |
2198 | ret = -EINVAL; | |
2199 | goto out; | |
2200 | } | |
2201 | ||
2202 | if (f2fs_is_atomic_file(inode)) | |
2203 | goto out; | |
2204 | ||
2205 | ret = f2fs_convert_inline_inode(inode); | |
2206 | if (ret) | |
2207 | goto out; | |
2208 | ||
2209 | f2fs_down_write(&fi->i_gc_rwsem[WRITE]); | |
2210 | f2fs_down_write(&fi->i_gc_rwsem[READ]); | |
2211 | ||
2212 | /* | |
2213 | * Should wait end_io to count F2FS_WB_CP_DATA correctly by | |
2214 | * f2fs_is_atomic_file. | |
2215 | */ | |
2216 | if (get_dirty_pages(inode)) | |
2217 | f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", | |
2218 | inode->i_ino, get_dirty_pages(inode)); | |
2219 | ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); | |
2220 | if (ret) | |
2221 | goto out_unlock; | |
2222 | ||
2223 | /* Check if the inode already has a COW inode */ | |
2224 | if (fi->cow_inode == NULL) { | |
2225 | /* Create a COW inode for atomic write */ | |
2226 | struct dentry *dentry = file_dentry(filp); | |
2227 | struct inode *dir = d_inode(dentry->d_parent); | |
2228 | ||
2229 | ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); | |
2230 | if (ret) | |
2231 | goto out_unlock; | |
2232 | ||
2233 | set_inode_flag(fi->cow_inode, FI_COW_FILE); | |
2234 | clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); | |
2235 | ||
2236 | /* Set the COW inode's atomic_inode to the atomic inode */ | |
2237 | F2FS_I(fi->cow_inode)->atomic_inode = inode; | |
2238 | } else { | |
2239 | /* Reuse the already created COW inode */ | |
2240 | f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); | |
2241 | ||
2242 | invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); | |
2243 | ||
2244 | ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); | |
2245 | if (ret) | |
2246 | goto out_unlock; | |
2247 | } | |
2248 | ||
2249 | f2fs_write_inode(inode, NULL); | |
2250 | ||
2251 | stat_inc_atomic_inode(inode); | |
2252 | ||
2253 | set_inode_flag(inode, FI_ATOMIC_FILE); | |
2254 | ||
2255 | isize = i_size_read(inode); | |
2256 | fi->original_i_size = isize; | |
2257 | if (truncate) { | |
2258 | set_inode_flag(inode, FI_ATOMIC_REPLACE); | |
2259 | truncate_inode_pages_final(inode->i_mapping); | |
2260 | f2fs_i_size_write(inode, 0); | |
2261 | isize = 0; | |
2262 | } | |
2263 | f2fs_i_size_write(fi->cow_inode, isize); | |
2264 | ||
2265 | out_unlock: | |
2266 | f2fs_up_write(&fi->i_gc_rwsem[READ]); | |
2267 | f2fs_up_write(&fi->i_gc_rwsem[WRITE]); | |
2268 | if (ret) | |
2269 | goto out; | |
2270 | ||
2271 | f2fs_update_time(sbi, REQ_TIME); | |
2272 | fi->atomic_write_task = current; | |
2273 | stat_update_max_atomic_write(inode); | |
2274 | fi->atomic_write_cnt = 0; | |
2275 | out: | |
2276 | inode_unlock(inode); | |
2277 | mnt_drop_write_file(filp); | |
2278 | return ret; | |
2279 | } | |
2280 | ||
2281 | static int f2fs_ioc_commit_atomic_write(struct file *filp) | |
2282 | { | |
2283 | struct inode *inode = file_inode(filp); | |
2284 | struct mnt_idmap *idmap = file_mnt_idmap(filp); | |
2285 | int ret; | |
2286 | ||
2287 | if (!(filp->f_mode & FMODE_WRITE)) | |
2288 | return -EBADF; | |
2289 | ||
2290 | if (!inode_owner_or_capable(idmap, inode)) | |
2291 | return -EACCES; | |
2292 | ||
2293 | ret = mnt_want_write_file(filp); | |
2294 | if (ret) | |
2295 | return ret; | |
2296 | ||
2297 | f2fs_balance_fs(F2FS_I_SB(inode), true); | |
2298 | ||
2299 | inode_lock(inode); | |
2300 | ||
2301 | if (f2fs_is_atomic_file(inode)) { | |
2302 | ret = f2fs_commit_atomic_write(inode); | |
2303 | if (!ret) | |
2304 | ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); | |
2305 | ||
2306 | f2fs_abort_atomic_write(inode, ret); | |
2307 | } else { | |
2308 | ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); | |
2309 | } | |
2310 | ||
2311 | inode_unlock(inode); | |
2312 | mnt_drop_write_file(filp); | |
2313 | return ret; | |
2314 | } | |
2315 | ||
2316 | static int f2fs_ioc_abort_atomic_write(struct file *filp) | |
2317 | { | |
2318 | struct inode *inode = file_inode(filp); | |
2319 | struct mnt_idmap *idmap = file_mnt_idmap(filp); | |
2320 | int ret; | |
2321 | ||
2322 | if (!(filp->f_mode & FMODE_WRITE)) | |
2323 | return -EBADF; | |
2324 | ||
2325 | if (!inode_owner_or_capable(idmap, inode)) | |
2326 | return -EACCES; | |
2327 | ||
2328 | ret = mnt_want_write_file(filp); | |
2329 | if (ret) | |
2330 | return ret; | |
2331 | ||
2332 | inode_lock(inode); | |
2333 | ||
2334 | f2fs_abort_atomic_write(inode, true); | |
2335 | ||
2336 | inode_unlock(inode); | |
2337 | ||
2338 | mnt_drop_write_file(filp); | |
2339 | f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); | |
2340 | return ret; | |
2341 | } | |
2342 | ||
2343 | int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, | |
2344 | bool readonly, bool need_lock) | |
2345 | { | |
2346 | struct super_block *sb = sbi->sb; | |
2347 | int ret = 0; | |
2348 | ||
2349 | switch (flag) { | |
2350 | case F2FS_GOING_DOWN_FULLSYNC: | |
2351 | ret = bdev_freeze(sb->s_bdev); | |
2352 | if (ret) | |
2353 | goto out; | |
2354 | f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); | |
2355 | bdev_thaw(sb->s_bdev); | |
2356 | break; | |
2357 | case F2FS_GOING_DOWN_METASYNC: | |
2358 | /* do checkpoint only */ | |
2359 | ret = f2fs_sync_fs(sb, 1); | |
2360 | if (ret) { | |
2361 | if (ret == -EIO) | |
2362 | ret = 0; | |
2363 | goto out; | |
2364 | } | |
2365 | f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); | |
2366 | break; | |
2367 | case F2FS_GOING_DOWN_NOSYNC: | |
2368 | f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); | |
2369 | break; | |
2370 | case F2FS_GOING_DOWN_METAFLUSH: | |
2371 | f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); | |
2372 | f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); | |
2373 | break; | |
2374 | case F2FS_GOING_DOWN_NEED_FSCK: | |
2375 | set_sbi_flag(sbi, SBI_NEED_FSCK); | |
2376 | set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); | |
2377 | set_sbi_flag(sbi, SBI_IS_DIRTY); | |
2378 | /* do checkpoint only */ | |
2379 | ret = f2fs_sync_fs(sb, 1); | |
2380 | if (ret == -EIO) | |
2381 | ret = 0; | |
2382 | goto out; | |
2383 | default: | |
2384 | ret = -EINVAL; | |
2385 | goto out; | |
2386 | } | |
2387 | ||
2388 | if (readonly) | |
2389 | goto out; | |
2390 | ||
2391 | /* | |
2392 | * grab sb->s_umount to avoid racing w/ remount() and other shutdown | |
2393 | * paths. | |
2394 | */ | |
2395 | if (need_lock) | |
2396 | down_write(&sbi->sb->s_umount); | |
2397 | ||
2398 | f2fs_stop_gc_thread(sbi); | |
2399 | f2fs_stop_discard_thread(sbi); | |
2400 | ||
2401 | f2fs_drop_discard_cmd(sbi); | |
2402 | clear_opt(sbi, DISCARD); | |
2403 | ||
2404 | if (need_lock) | |
2405 | up_write(&sbi->sb->s_umount); | |
2406 | ||
2407 | f2fs_update_time(sbi, REQ_TIME); | |
2408 | out: | |
2409 | ||
2410 | trace_f2fs_shutdown(sbi, flag, ret); | |
2411 | ||
2412 | return ret; | |
2413 | } | |
2414 | ||
2415 | static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) | |
2416 | { | |
2417 | struct inode *inode = file_inode(filp); | |
2418 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
2419 | __u32 in; | |
2420 | int ret; | |
2421 | bool need_drop = false, readonly = false; | |
2422 | ||
2423 | if (!capable(CAP_SYS_ADMIN)) | |
2424 | return -EPERM; | |
2425 | ||
2426 | if (get_user(in, (__u32 __user *)arg)) | |
2427 | return -EFAULT; | |
2428 | ||
2429 | if (in != F2FS_GOING_DOWN_FULLSYNC) { | |
2430 | ret = mnt_want_write_file(filp); | |
2431 | if (ret) { | |
2432 | if (ret != -EROFS) | |
2433 | return ret; | |
2434 | ||
2435 | /* fallback to nosync shutdown for readonly fs */ | |
2436 | in = F2FS_GOING_DOWN_NOSYNC; | |
2437 | readonly = true; | |
2438 | } else { | |
2439 | need_drop = true; | |
2440 | } | |
2441 | } | |
2442 | ||
2443 | ret = f2fs_do_shutdown(sbi, in, readonly, true); | |
2444 | ||
2445 | if (need_drop) | |
2446 | mnt_drop_write_file(filp); | |
2447 | ||
2448 | return ret; | |
2449 | } | |
2450 | ||
2451 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) | |
2452 | { | |
2453 | struct inode *inode = file_inode(filp); | |
2454 | struct super_block *sb = inode->i_sb; | |
2455 | struct fstrim_range range; | |
2456 | int ret; | |
2457 | ||
2458 | if (!capable(CAP_SYS_ADMIN)) | |
2459 | return -EPERM; | |
2460 | ||
2461 | if (!f2fs_hw_support_discard(F2FS_SB(sb))) | |
2462 | return -EOPNOTSUPP; | |
2463 | ||
2464 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, | |
2465 | sizeof(range))) | |
2466 | return -EFAULT; | |
2467 | ||
2468 | ret = mnt_want_write_file(filp); | |
2469 | if (ret) | |
2470 | return ret; | |
2471 | ||
2472 | range.minlen = max((unsigned int)range.minlen, | |
2473 | bdev_discard_granularity(sb->s_bdev)); | |
2474 | ret = f2fs_trim_fs(F2FS_SB(sb), &range); | |
2475 | mnt_drop_write_file(filp); | |
2476 | if (ret < 0) | |
2477 | return ret; | |
2478 | ||
2479 | if (copy_to_user((struct fstrim_range __user *)arg, &range, | |
2480 | sizeof(range))) | |
2481 | return -EFAULT; | |
2482 | f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); | |
2483 | return 0; | |
2484 | } | |
2485 | ||
2486 | static bool uuid_is_nonzero(__u8 u[16]) | |
2487 | { | |
2488 | int i; | |
2489 | ||
2490 | for (i = 0; i < 16; i++) | |
2491 | if (u[i]) | |
2492 | return true; | |
2493 | return false; | |
2494 | } | |
2495 | ||
2496 | static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) | |
2497 | { | |
2498 | struct inode *inode = file_inode(filp); | |
2499 | int ret; | |
2500 | ||
2501 | if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) | |
2502 | return -EOPNOTSUPP; | |
2503 | ||
2504 | ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); | |
2505 | f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); | |
2506 | return ret; | |
2507 | } | |
2508 | ||
2509 | static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) | |
2510 | { | |
2511 | if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) | |
2512 | return -EOPNOTSUPP; | |
2513 | return fscrypt_ioctl_get_policy(filp, (void __user *)arg); | |
2514 | } | |
2515 | ||
2516 | static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) | |
2517 | { | |
2518 | struct inode *inode = file_inode(filp); | |
2519 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
2520 | u8 encrypt_pw_salt[16]; | |
2521 | int err; | |
2522 | ||
2523 | if (!f2fs_sb_has_encrypt(sbi)) | |
2524 | return -EOPNOTSUPP; | |
2525 | ||
2526 | err = mnt_want_write_file(filp); | |
2527 | if (err) | |
2528 | return err; | |
2529 | ||
2530 | f2fs_down_write(&sbi->sb_lock); | |
2531 | ||
2532 | if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) | |
2533 | goto got_it; | |
2534 | ||
2535 | /* update superblock with uuid */ | |
2536 | generate_random_uuid(sbi->raw_super->encrypt_pw_salt); | |
2537 | ||
2538 | err = f2fs_commit_super(sbi, false); | |
2539 | if (err) { | |
2540 | /* undo new data */ | |
2541 | memset(sbi->raw_super->encrypt_pw_salt, 0, 16); | |
2542 | goto out_err; | |
2543 | } | |
2544 | got_it: | |
2545 | memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16); | |
2546 | out_err: | |
2547 | f2fs_up_write(&sbi->sb_lock); | |
2548 | mnt_drop_write_file(filp); | |
2549 | ||
2550 | if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16)) | |
2551 | err = -EFAULT; | |
2552 | ||
2553 | return err; | |
2554 | } | |
2555 | ||
2556 | static int f2fs_ioc_get_encryption_policy_ex(struct file *filp, | |
2557 | unsigned long arg) | |
2558 | { | |
2559 | if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) | |
2560 | return -EOPNOTSUPP; | |
2561 | ||
2562 | return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); | |
2563 | } | |
2564 | ||
2565 | static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg) | |
2566 | { | |
2567 | if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) | |
2568 | return -EOPNOTSUPP; | |
2569 | ||
2570 | return fscrypt_ioctl_add_key(filp, (void __user *)arg); | |
2571 | } | |
2572 | ||
2573 | static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg) | |
2574 | { | |
2575 | if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) | |
2576 | return -EOPNOTSUPP; | |
2577 | ||
2578 | return fscrypt_ioctl_remove_key(filp, (void __user *)arg); | |
2579 | } | |
2580 | ||
2581 | static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp, | |
2582 | unsigned long arg) | |
2583 | { | |
2584 | if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) | |
2585 | return -EOPNOTSUPP; | |
2586 | ||
2587 | return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); | |
2588 | } | |
2589 | ||
2590 | static int f2fs_ioc_get_encryption_key_status(struct file *filp, | |
2591 | unsigned long arg) | |
2592 | { | |
2593 | if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) | |
2594 | return -EOPNOTSUPP; | |
2595 | ||
2596 | return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); | |
2597 | } | |
2598 | ||
2599 | static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg) | |
2600 | { | |
2601 | if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) | |
2602 | return -EOPNOTSUPP; | |
2603 | ||
2604 | return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); | |
2605 | } | |
2606 | ||
2607 | static int f2fs_ioc_gc(struct file *filp, unsigned long arg) | |
2608 | { | |
2609 | struct inode *inode = file_inode(filp); | |
2610 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
2611 | struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, | |
2612 | .no_bg_gc = false, | |
2613 | .should_migrate_blocks = false, | |
2614 | .nr_free_secs = 0 }; | |
2615 | __u32 sync; | |
2616 | int ret; | |
2617 | ||
2618 | if (!capable(CAP_SYS_ADMIN)) | |
2619 | return -EPERM; | |
2620 | ||
2621 | if (get_user(sync, (__u32 __user *)arg)) | |
2622 | return -EFAULT; | |
2623 | ||
2624 | if (f2fs_readonly(sbi->sb)) | |
2625 | return -EROFS; | |
2626 | ||
2627 | ret = mnt_want_write_file(filp); | |
2628 | if (ret) | |
2629 | return ret; | |
2630 | ||
2631 | if (!sync) { | |
2632 | if (!f2fs_down_write_trylock(&sbi->gc_lock)) { | |
2633 | ret = -EBUSY; | |
2634 | goto out; | |
2635 | } | |
2636 | } else { | |
2637 | f2fs_down_write(&sbi->gc_lock); | |
2638 | } | |
2639 | ||
2640 | gc_control.init_gc_type = sync ? FG_GC : BG_GC; | |
2641 | gc_control.err_gc_skipped = sync; | |
2642 | stat_inc_gc_call_count(sbi, FOREGROUND); | |
2643 | ret = f2fs_gc(sbi, &gc_control); | |
2644 | out: | |
2645 | mnt_drop_write_file(filp); | |
2646 | return ret; | |
2647 | } | |
2648 | ||
2649 | static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) | |
2650 | { | |
2651 | struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); | |
2652 | struct f2fs_gc_control gc_control = { | |
2653 | .init_gc_type = range->sync ? FG_GC : BG_GC, | |
2654 | .no_bg_gc = false, | |
2655 | .should_migrate_blocks = false, | |
2656 | .err_gc_skipped = range->sync, | |
2657 | .nr_free_secs = 0 }; | |
2658 | u64 end; | |
2659 | int ret; | |
2660 | ||
2661 | if (!capable(CAP_SYS_ADMIN)) | |
2662 | return -EPERM; | |
2663 | if (f2fs_readonly(sbi->sb)) | |
2664 | return -EROFS; | |
2665 | ||
2666 | end = range->start + range->len; | |
2667 | if (end < range->start || range->start < MAIN_BLKADDR(sbi) || | |
2668 | end >= MAX_BLKADDR(sbi)) | |
2669 | return -EINVAL; | |
2670 | ||
2671 | ret = mnt_want_write_file(filp); | |
2672 | if (ret) | |
2673 | return ret; | |
2674 | ||
2675 | do_more: | |
2676 | if (!range->sync) { | |
2677 | if (!f2fs_down_write_trylock(&sbi->gc_lock)) { | |
2678 | ret = -EBUSY; | |
2679 | goto out; | |
2680 | } | |
2681 | } else { | |
2682 | f2fs_down_write(&sbi->gc_lock); | |
2683 | } | |
2684 | ||
2685 | gc_control.victim_segno = GET_SEGNO(sbi, range->start); | |
2686 | stat_inc_gc_call_count(sbi, FOREGROUND); | |
2687 | ret = f2fs_gc(sbi, &gc_control); | |
2688 | if (ret) { | |
2689 | if (ret == -EBUSY) | |
2690 | ret = -EAGAIN; | |
2691 | goto out; | |
2692 | } | |
2693 | range->start += CAP_BLKS_PER_SEC(sbi); | |
2694 | if (range->start <= end) | |
2695 | goto do_more; | |
2696 | out: | |
2697 | mnt_drop_write_file(filp); | |
2698 | return ret; | |
2699 | } | |
2700 | ||
2701 | static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) | |
2702 | { | |
2703 | struct f2fs_gc_range range; | |
2704 | ||
2705 | if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, | |
2706 | sizeof(range))) | |
2707 | return -EFAULT; | |
2708 | return __f2fs_ioc_gc_range(filp, &range); | |
2709 | } | |
2710 | ||
2711 | static int f2fs_ioc_write_checkpoint(struct file *filp) | |
2712 | { | |
2713 | struct inode *inode = file_inode(filp); | |
2714 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
2715 | int ret; | |
2716 | ||
2717 | if (!capable(CAP_SYS_ADMIN)) | |
2718 | return -EPERM; | |
2719 | ||
2720 | if (f2fs_readonly(sbi->sb)) | |
2721 | return -EROFS; | |
2722 | ||
2723 | if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { | |
2724 | f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); | |
2725 | return -EINVAL; | |
2726 | } | |
2727 | ||
2728 | ret = mnt_want_write_file(filp); | |
2729 | if (ret) | |
2730 | return ret; | |
2731 | ||
2732 | ret = f2fs_sync_fs(sbi->sb, 1); | |
2733 | ||
2734 | mnt_drop_write_file(filp); | |
2735 | return ret; | |
2736 | } | |
2737 | ||
2738 | static int f2fs_defragment_range(struct f2fs_sb_info *sbi, | |
2739 | struct file *filp, | |
2740 | struct f2fs_defragment *range) | |
2741 | { | |
2742 | struct inode *inode = file_inode(filp); | |
2743 | struct f2fs_map_blocks map = { .m_next_extent = NULL, | |
2744 | .m_seg_type = NO_CHECK_TYPE, | |
2745 | .m_may_create = false }; | |
2746 | struct extent_info ei = {}; | |
2747 | pgoff_t pg_start, pg_end, next_pgofs; | |
2748 | unsigned int total = 0, sec_num; | |
2749 | block_t blk_end = 0; | |
2750 | bool fragmented = false; | |
2751 | int err; | |
2752 | ||
2753 | f2fs_balance_fs(sbi, true); | |
2754 | ||
2755 | inode_lock(inode); | |
2756 | pg_start = range->start >> PAGE_SHIFT; | |
2757 | pg_end = min_t(pgoff_t, | |
2758 | (range->start + range->len) >> PAGE_SHIFT, | |
2759 | DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); | |
2760 | ||
2761 | if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || | |
2762 | f2fs_is_atomic_file(inode)) { | |
2763 | err = -EINVAL; | |
2764 | goto unlock_out; | |
2765 | } | |
2766 | ||
2767 | /* if in-place-update policy is enabled, don't waste time here */ | |
2768 | set_inode_flag(inode, FI_OPU_WRITE); | |
2769 | if (f2fs_should_update_inplace(inode, NULL)) { | |
2770 | err = -EINVAL; | |
2771 | goto out; | |
2772 | } | |
2773 | ||
2774 | /* writeback all dirty pages in the range */ | |
2775 | err = filemap_write_and_wait_range(inode->i_mapping, | |
2776 | pg_start << PAGE_SHIFT, | |
2777 | (pg_end << PAGE_SHIFT) - 1); | |
2778 | if (err) | |
2779 | goto out; | |
2780 | ||
2781 | /* | |
2782 | * lookup mapping info in extent cache, skip defragmenting if physical | |
2783 | * block addresses are continuous. | |
2784 | */ | |
2785 | if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { | |
2786 | if ((pgoff_t)ei.fofs + ei.len >= pg_end) | |
2787 | goto out; | |
2788 | } | |
2789 | ||
2790 | map.m_lblk = pg_start; | |
2791 | map.m_next_pgofs = &next_pgofs; | |
2792 | ||
2793 | /* | |
2794 | * lookup mapping info in dnode page cache, skip defragmenting if all | |
2795 | * physical block addresses are continuous even if there are hole(s) | |
2796 | * in logical blocks. | |
2797 | */ | |
2798 | while (map.m_lblk < pg_end) { | |
2799 | map.m_len = pg_end - map.m_lblk; | |
2800 | err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); | |
2801 | if (err) | |
2802 | goto out; | |
2803 | ||
2804 | if (!(map.m_flags & F2FS_MAP_FLAGS)) { | |
2805 | map.m_lblk = next_pgofs; | |
2806 | continue; | |
2807 | } | |
2808 | ||
2809 | if (blk_end && blk_end != map.m_pblk) | |
2810 | fragmented = true; | |
2811 | ||
2812 | /* record total count of block that we're going to move */ | |
2813 | total += map.m_len; | |
2814 | ||
2815 | blk_end = map.m_pblk + map.m_len; | |
2816 | ||
2817 | map.m_lblk += map.m_len; | |
2818 | } | |
2819 | ||
2820 | if (!fragmented) { | |
2821 | total = 0; | |
2822 | goto out; | |
2823 | } | |
2824 | ||
2825 | sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi)); | |
2826 | ||
2827 | /* | |
2828 | * make sure there are enough free section for LFS allocation, this can | |
2829 | * avoid defragment running in SSR mode when free section are allocated | |
2830 | * intensively | |
2831 | */ | |
2832 | if (has_not_enough_free_secs(sbi, 0, sec_num)) { | |
2833 | err = -EAGAIN; | |
2834 | goto out; | |
2835 | } | |
2836 | ||
2837 | map.m_lblk = pg_start; | |
2838 | map.m_len = pg_end - pg_start; | |
2839 | total = 0; | |
2840 | ||
2841 | while (map.m_lblk < pg_end) { | |
2842 | pgoff_t idx; | |
2843 | int cnt = 0; | |
2844 | ||
2845 | do_map: | |
2846 | map.m_len = pg_end - map.m_lblk; | |
2847 | err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); | |
2848 | if (err) | |
2849 | goto clear_out; | |
2850 | ||
2851 | if (!(map.m_flags & F2FS_MAP_FLAGS)) { | |
2852 | map.m_lblk = next_pgofs; | |
2853 | goto check; | |
2854 | } | |
2855 | ||
2856 | set_inode_flag(inode, FI_SKIP_WRITES); | |
2857 | ||
2858 | idx = map.m_lblk; | |
2859 | while (idx < map.m_lblk + map.m_len && | |
2860 | cnt < BLKS_PER_SEG(sbi)) { | |
2861 | struct page *page; | |
2862 | ||
2863 | page = f2fs_get_lock_data_page(inode, idx, true); | |
2864 | if (IS_ERR(page)) { | |
2865 | err = PTR_ERR(page); | |
2866 | goto clear_out; | |
2867 | } | |
2868 | ||
2869 | f2fs_wait_on_page_writeback(page, DATA, true, true); | |
2870 | ||
2871 | set_page_dirty(page); | |
2872 | set_page_private_gcing(page); | |
2873 | f2fs_put_page(page, 1); | |
2874 | ||
2875 | idx++; | |
2876 | cnt++; | |
2877 | total++; | |
2878 | } | |
2879 | ||
2880 | map.m_lblk = idx; | |
2881 | check: | |
2882 | if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) | |
2883 | goto do_map; | |
2884 | ||
2885 | clear_inode_flag(inode, FI_SKIP_WRITES); | |
2886 | ||
2887 | err = filemap_fdatawrite(inode->i_mapping); | |
2888 | if (err) | |
2889 | goto out; | |
2890 | } | |
2891 | clear_out: | |
2892 | clear_inode_flag(inode, FI_SKIP_WRITES); | |
2893 | out: | |
2894 | clear_inode_flag(inode, FI_OPU_WRITE); | |
2895 | unlock_out: | |
2896 | inode_unlock(inode); | |
2897 | if (!err) | |
2898 | range->len = (u64)total << PAGE_SHIFT; | |
2899 | return err; | |
2900 | } | |
2901 | ||
2902 | static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) | |
2903 | { | |
2904 | struct inode *inode = file_inode(filp); | |
2905 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
2906 | struct f2fs_defragment range; | |
2907 | int err; | |
2908 | ||
2909 | if (!capable(CAP_SYS_ADMIN)) | |
2910 | return -EPERM; | |
2911 | ||
2912 | if (!S_ISREG(inode->i_mode)) | |
2913 | return -EINVAL; | |
2914 | ||
2915 | if (f2fs_readonly(sbi->sb)) | |
2916 | return -EROFS; | |
2917 | ||
2918 | if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, | |
2919 | sizeof(range))) | |
2920 | return -EFAULT; | |
2921 | ||
2922 | /* verify alignment of offset & size */ | |
2923 | if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) | |
2924 | return -EINVAL; | |
2925 | ||
2926 | if (unlikely((range.start + range.len) >> PAGE_SHIFT > | |
2927 | max_file_blocks(inode))) | |
2928 | return -EINVAL; | |
2929 | ||
2930 | err = mnt_want_write_file(filp); | |
2931 | if (err) | |
2932 | return err; | |
2933 | ||
2934 | err = f2fs_defragment_range(sbi, filp, &range); | |
2935 | mnt_drop_write_file(filp); | |
2936 | ||
2937 | if (range.len) | |
2938 | f2fs_update_time(sbi, REQ_TIME); | |
2939 | if (err < 0) | |
2940 | return err; | |
2941 | ||
2942 | if (copy_to_user((struct f2fs_defragment __user *)arg, &range, | |
2943 | sizeof(range))) | |
2944 | return -EFAULT; | |
2945 | ||
2946 | return 0; | |
2947 | } | |
2948 | ||
2949 | static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, | |
2950 | struct file *file_out, loff_t pos_out, size_t len) | |
2951 | { | |
2952 | struct inode *src = file_inode(file_in); | |
2953 | struct inode *dst = file_inode(file_out); | |
2954 | struct f2fs_sb_info *sbi = F2FS_I_SB(src); | |
2955 | size_t olen = len, dst_max_i_size = 0; | |
2956 | size_t dst_osize; | |
2957 | int ret; | |
2958 | ||
2959 | if (file_in->f_path.mnt != file_out->f_path.mnt || | |
2960 | src->i_sb != dst->i_sb) | |
2961 | return -EXDEV; | |
2962 | ||
2963 | if (unlikely(f2fs_readonly(src->i_sb))) | |
2964 | return -EROFS; | |
2965 | ||
2966 | if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) | |
2967 | return -EINVAL; | |
2968 | ||
2969 | if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst)) | |
2970 | return -EOPNOTSUPP; | |
2971 | ||
2972 | if (pos_out < 0 || pos_in < 0) | |
2973 | return -EINVAL; | |
2974 | ||
2975 | if (src == dst) { | |
2976 | if (pos_in == pos_out) | |
2977 | return 0; | |
2978 | if (pos_out > pos_in && pos_out < pos_in + len) | |
2979 | return -EINVAL; | |
2980 | } | |
2981 | ||
2982 | inode_lock(src); | |
2983 | if (src != dst) { | |
2984 | ret = -EBUSY; | |
2985 | if (!inode_trylock(dst)) | |
2986 | goto out; | |
2987 | } | |
2988 | ||
2989 | if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || | |
2990 | f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { | |
2991 | ret = -EOPNOTSUPP; | |
2992 | goto out_unlock; | |
2993 | } | |
2994 | ||
2995 | if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { | |
2996 | ret = -EINVAL; | |
2997 | goto out_unlock; | |
2998 | } | |
2999 | ||
3000 | ret = -EINVAL; | |
3001 | if (pos_in + len > src->i_size || pos_in + len < pos_in) | |
3002 | goto out_unlock; | |
3003 | if (len == 0) | |
3004 | olen = len = src->i_size - pos_in; | |
3005 | if (pos_in + len == src->i_size) | |
3006 | len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in; | |
3007 | if (len == 0) { | |
3008 | ret = 0; | |
3009 | goto out_unlock; | |
3010 | } | |
3011 | ||
3012 | dst_osize = dst->i_size; | |
3013 | if (pos_out + olen > dst->i_size) | |
3014 | dst_max_i_size = pos_out + olen; | |
3015 | ||
3016 | /* verify the end result is block aligned */ | |
3017 | if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) || | |
3018 | !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) || | |
3019 | !IS_ALIGNED(pos_out, F2FS_BLKSIZE)) | |
3020 | goto out_unlock; | |
3021 | ||
3022 | ret = f2fs_convert_inline_inode(src); | |
3023 | if (ret) | |
3024 | goto out_unlock; | |
3025 | ||
3026 | ret = f2fs_convert_inline_inode(dst); | |
3027 | if (ret) | |
3028 | goto out_unlock; | |
3029 | ||
3030 | /* write out all dirty pages from offset */ | |
3031 | ret = filemap_write_and_wait_range(src->i_mapping, | |
3032 | pos_in, pos_in + len); | |
3033 | if (ret) | |
3034 | goto out_unlock; | |
3035 | ||
3036 | ret = filemap_write_and_wait_range(dst->i_mapping, | |
3037 | pos_out, pos_out + len); | |
3038 | if (ret) | |
3039 | goto out_unlock; | |
3040 | ||
3041 | f2fs_balance_fs(sbi, true); | |
3042 | ||
3043 | f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); | |
3044 | if (src != dst) { | |
3045 | ret = -EBUSY; | |
3046 | if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) | |
3047 | goto out_src; | |
3048 | } | |
3049 | ||
3050 | f2fs_lock_op(sbi); | |
3051 | ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), | |
3052 | F2FS_BYTES_TO_BLK(pos_out), | |
3053 | F2FS_BYTES_TO_BLK(len), false); | |
3054 | ||
3055 | if (!ret) { | |
3056 | if (dst_max_i_size) | |
3057 | f2fs_i_size_write(dst, dst_max_i_size); | |
3058 | else if (dst_osize != dst->i_size) | |
3059 | f2fs_i_size_write(dst, dst_osize); | |
3060 | } | |
3061 | f2fs_unlock_op(sbi); | |
3062 | ||
3063 | if (src != dst) | |
3064 | f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); | |
3065 | out_src: | |
3066 | f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); | |
3067 | if (ret) | |
3068 | goto out_unlock; | |
3069 | ||
3070 | inode_set_mtime_to_ts(src, inode_set_ctime_current(src)); | |
3071 | f2fs_mark_inode_dirty_sync(src, false); | |
3072 | if (src != dst) { | |
3073 | inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst)); | |
3074 | f2fs_mark_inode_dirty_sync(dst, false); | |
3075 | } | |
3076 | f2fs_update_time(sbi, REQ_TIME); | |
3077 | ||
3078 | out_unlock: | |
3079 | if (src != dst) | |
3080 | inode_unlock(dst); | |
3081 | out: | |
3082 | inode_unlock(src); | |
3083 | return ret; | |
3084 | } | |
3085 | ||
3086 | static int __f2fs_ioc_move_range(struct file *filp, | |
3087 | struct f2fs_move_range *range) | |
3088 | { | |
3089 | int err; | |
3090 | ||
3091 | if (!(filp->f_mode & FMODE_READ) || | |
3092 | !(filp->f_mode & FMODE_WRITE)) | |
3093 | return -EBADF; | |
3094 | ||
3095 | CLASS(fd, dst)(range->dst_fd); | |
3096 | if (fd_empty(dst)) | |
3097 | return -EBADF; | |
3098 | ||
3099 | if (!(fd_file(dst)->f_mode & FMODE_WRITE)) | |
3100 | return -EBADF; | |
3101 | ||
3102 | err = mnt_want_write_file(filp); | |
3103 | if (err) | |
3104 | return err; | |
3105 | ||
3106 | err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), | |
3107 | range->pos_out, range->len); | |
3108 | ||
3109 | mnt_drop_write_file(filp); | |
3110 | return err; | |
3111 | } | |
3112 | ||
3113 | static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) | |
3114 | { | |
3115 | struct f2fs_move_range range; | |
3116 | ||
3117 | if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, | |
3118 | sizeof(range))) | |
3119 | return -EFAULT; | |
3120 | return __f2fs_ioc_move_range(filp, &range); | |
3121 | } | |
3122 | ||
3123 | static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) | |
3124 | { | |
3125 | struct inode *inode = file_inode(filp); | |
3126 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
3127 | struct sit_info *sm = SIT_I(sbi); | |
3128 | unsigned int start_segno = 0, end_segno = 0; | |
3129 | unsigned int dev_start_segno = 0, dev_end_segno = 0; | |
3130 | struct f2fs_flush_device range; | |
3131 | struct f2fs_gc_control gc_control = { | |
3132 | .init_gc_type = FG_GC, | |
3133 | .should_migrate_blocks = true, | |
3134 | .err_gc_skipped = true, | |
3135 | .nr_free_secs = 0 }; | |
3136 | int ret; | |
3137 | ||
3138 | if (!capable(CAP_SYS_ADMIN)) | |
3139 | return -EPERM; | |
3140 | ||
3141 | if (f2fs_readonly(sbi->sb)) | |
3142 | return -EROFS; | |
3143 | ||
3144 | if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) | |
3145 | return -EINVAL; | |
3146 | ||
3147 | if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, | |
3148 | sizeof(range))) | |
3149 | return -EFAULT; | |
3150 | ||
3151 | if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || | |
3152 | __is_large_section(sbi)) { | |
3153 | f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", | |
3154 | range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); | |
3155 | return -EINVAL; | |
3156 | } | |
3157 | ||
3158 | ret = mnt_want_write_file(filp); | |
3159 | if (ret) | |
3160 | return ret; | |
3161 | ||
3162 | if (range.dev_num != 0) | |
3163 | dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); | |
3164 | dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); | |
3165 | ||
3166 | start_segno = sm->last_victim[FLUSH_DEVICE]; | |
3167 | if (start_segno < dev_start_segno || start_segno >= dev_end_segno) | |
3168 | start_segno = dev_start_segno; | |
3169 | end_segno = min(start_segno + range.segments, dev_end_segno); | |
3170 | ||
3171 | while (start_segno < end_segno) { | |
3172 | if (!f2fs_down_write_trylock(&sbi->gc_lock)) { | |
3173 | ret = -EBUSY; | |
3174 | goto out; | |
3175 | } | |
3176 | sm->last_victim[GC_CB] = end_segno + 1; | |
3177 | sm->last_victim[GC_GREEDY] = end_segno + 1; | |
3178 | sm->last_victim[ALLOC_NEXT] = end_segno + 1; | |
3179 | ||
3180 | gc_control.victim_segno = start_segno; | |
3181 | stat_inc_gc_call_count(sbi, FOREGROUND); | |
3182 | ret = f2fs_gc(sbi, &gc_control); | |
3183 | if (ret == -EAGAIN) | |
3184 | ret = 0; | |
3185 | else if (ret < 0) | |
3186 | break; | |
3187 | start_segno++; | |
3188 | } | |
3189 | out: | |
3190 | mnt_drop_write_file(filp); | |
3191 | return ret; | |
3192 | } | |
3193 | ||
3194 | static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) | |
3195 | { | |
3196 | struct inode *inode = file_inode(filp); | |
3197 | u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature); | |
3198 | ||
3199 | /* Must validate to set it with SQLite behavior in Android. */ | |
3200 | sb_feature |= F2FS_FEATURE_ATOMIC_WRITE; | |
3201 | ||
3202 | return put_user(sb_feature, (u32 __user *)arg); | |
3203 | } | |
3204 | ||
3205 | #ifdef CONFIG_QUOTA | |
3206 | int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) | |
3207 | { | |
3208 | struct dquot *transfer_to[MAXQUOTAS] = {}; | |
3209 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
3210 | struct super_block *sb = sbi->sb; | |
3211 | int err; | |
3212 | ||
3213 | transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); | |
3214 | if (IS_ERR(transfer_to[PRJQUOTA])) | |
3215 | return PTR_ERR(transfer_to[PRJQUOTA]); | |
3216 | ||
3217 | err = __dquot_transfer(inode, transfer_to); | |
3218 | if (err) | |
3219 | set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); | |
3220 | dqput(transfer_to[PRJQUOTA]); | |
3221 | return err; | |
3222 | } | |
3223 | ||
3224 | static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) | |
3225 | { | |
3226 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
3227 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
3228 | struct f2fs_inode *ri = NULL; | |
3229 | kprojid_t kprojid; | |
3230 | int err; | |
3231 | ||
3232 | if (!f2fs_sb_has_project_quota(sbi)) { | |
3233 | if (projid != F2FS_DEF_PROJID) | |
3234 | return -EOPNOTSUPP; | |
3235 | else | |
3236 | return 0; | |
3237 | } | |
3238 | ||
3239 | if (!f2fs_has_extra_attr(inode)) | |
3240 | return -EOPNOTSUPP; | |
3241 | ||
3242 | kprojid = make_kprojid(&init_user_ns, (projid_t)projid); | |
3243 | ||
3244 | if (projid_eq(kprojid, fi->i_projid)) | |
3245 | return 0; | |
3246 | ||
3247 | err = -EPERM; | |
3248 | /* Is it quota file? Do not allow user to mess with it */ | |
3249 | if (IS_NOQUOTA(inode)) | |
3250 | return err; | |
3251 | ||
3252 | if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) | |
3253 | return -EOVERFLOW; | |
3254 | ||
3255 | err = f2fs_dquot_initialize(inode); | |
3256 | if (err) | |
3257 | return err; | |
3258 | ||
3259 | f2fs_lock_op(sbi); | |
3260 | err = f2fs_transfer_project_quota(inode, kprojid); | |
3261 | if (err) | |
3262 | goto out_unlock; | |
3263 | ||
3264 | fi->i_projid = kprojid; | |
3265 | inode_set_ctime_current(inode); | |
3266 | f2fs_mark_inode_dirty_sync(inode, true); | |
3267 | out_unlock: | |
3268 | f2fs_unlock_op(sbi); | |
3269 | return err; | |
3270 | } | |
3271 | #else | |
3272 | int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) | |
3273 | { | |
3274 | return 0; | |
3275 | } | |
3276 | ||
3277 | static int f2fs_ioc_setproject(struct inode *inode, __u32 projid) | |
3278 | { | |
3279 | if (projid != F2FS_DEF_PROJID) | |
3280 | return -EOPNOTSUPP; | |
3281 | return 0; | |
3282 | } | |
3283 | #endif | |
3284 | ||
3285 | int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa) | |
3286 | { | |
3287 | struct inode *inode = d_inode(dentry); | |
3288 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
3289 | u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); | |
3290 | ||
3291 | if (IS_ENCRYPTED(inode)) | |
3292 | fsflags |= FS_ENCRYPT_FL; | |
3293 | if (IS_VERITY(inode)) | |
3294 | fsflags |= FS_VERITY_FL; | |
3295 | if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) | |
3296 | fsflags |= FS_INLINE_DATA_FL; | |
3297 | if (is_inode_flag_set(inode, FI_PIN_FILE)) | |
3298 | fsflags |= FS_NOCOW_FL; | |
3299 | ||
3300 | fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL); | |
3301 | ||
3302 | if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) | |
3303 | fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); | |
3304 | ||
3305 | return 0; | |
3306 | } | |
3307 | ||
3308 | int f2fs_fileattr_set(struct mnt_idmap *idmap, | |
3309 | struct dentry *dentry, struct fileattr *fa) | |
3310 | { | |
3311 | struct inode *inode = d_inode(dentry); | |
3312 | u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; | |
3313 | u32 iflags; | |
3314 | int err; | |
3315 | ||
3316 | if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) | |
3317 | return -EIO; | |
3318 | if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) | |
3319 | return -ENOSPC; | |
3320 | if (fsflags & ~F2FS_GETTABLE_FS_FL) | |
3321 | return -EOPNOTSUPP; | |
3322 | fsflags &= F2FS_SETTABLE_FS_FL; | |
3323 | if (!fa->flags_valid) | |
3324 | mask &= FS_COMMON_FL; | |
3325 | ||
3326 | iflags = f2fs_fsflags_to_iflags(fsflags); | |
3327 | if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) | |
3328 | return -EOPNOTSUPP; | |
3329 | ||
3330 | err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask)); | |
3331 | if (!err) | |
3332 | err = f2fs_ioc_setproject(inode, fa->fsx_projid); | |
3333 | ||
3334 | return err; | |
3335 | } | |
3336 | ||
3337 | int f2fs_pin_file_control(struct inode *inode, bool inc) | |
3338 | { | |
3339 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
3340 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
3341 | ||
3342 | if (IS_DEVICE_ALIASING(inode)) | |
3343 | return -EINVAL; | |
3344 | ||
3345 | if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { | |
3346 | f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", | |
3347 | __func__, inode->i_ino, fi->i_gc_failures); | |
3348 | clear_inode_flag(inode, FI_PIN_FILE); | |
3349 | return -EAGAIN; | |
3350 | } | |
3351 | ||
3352 | /* Use i_gc_failures for normal file as a risk signal. */ | |
3353 | if (inc) | |
3354 | f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); | |
3355 | ||
3356 | return 0; | |
3357 | } | |
3358 | ||
3359 | static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) | |
3360 | { | |
3361 | struct inode *inode = file_inode(filp); | |
3362 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
3363 | __u32 pin; | |
3364 | int ret = 0; | |
3365 | ||
3366 | if (get_user(pin, (__u32 __user *)arg)) | |
3367 | return -EFAULT; | |
3368 | ||
3369 | if (!S_ISREG(inode->i_mode)) | |
3370 | return -EINVAL; | |
3371 | ||
3372 | if (f2fs_readonly(sbi->sb)) | |
3373 | return -EROFS; | |
3374 | ||
3375 | if (!pin && IS_DEVICE_ALIASING(inode)) | |
3376 | return -EOPNOTSUPP; | |
3377 | ||
3378 | ret = mnt_want_write_file(filp); | |
3379 | if (ret) | |
3380 | return ret; | |
3381 | ||
3382 | inode_lock(inode); | |
3383 | ||
3384 | if (f2fs_is_atomic_file(inode)) { | |
3385 | ret = -EINVAL; | |
3386 | goto out; | |
3387 | } | |
3388 | ||
3389 | if (!pin) { | |
3390 | clear_inode_flag(inode, FI_PIN_FILE); | |
3391 | f2fs_i_gc_failures_write(inode, 0); | |
3392 | goto done; | |
3393 | } else if (f2fs_is_pinned_file(inode)) { | |
3394 | goto done; | |
3395 | } | |
3396 | ||
3397 | if (F2FS_HAS_BLOCKS(inode)) { | |
3398 | ret = -EFBIG; | |
3399 | goto out; | |
3400 | } | |
3401 | ||
3402 | /* Let's allow file pinning on zoned device. */ | |
3403 | if (!f2fs_sb_has_blkzoned(sbi) && | |
3404 | f2fs_should_update_outplace(inode, NULL)) { | |
3405 | ret = -EINVAL; | |
3406 | goto out; | |
3407 | } | |
3408 | ||
3409 | if (f2fs_pin_file_control(inode, false)) { | |
3410 | ret = -EAGAIN; | |
3411 | goto out; | |
3412 | } | |
3413 | ||
3414 | ret = f2fs_convert_inline_inode(inode); | |
3415 | if (ret) | |
3416 | goto out; | |
3417 | ||
3418 | if (!f2fs_disable_compressed_file(inode)) { | |
3419 | ret = -EOPNOTSUPP; | |
3420 | goto out; | |
3421 | } | |
3422 | ||
3423 | set_inode_flag(inode, FI_PIN_FILE); | |
3424 | ret = F2FS_I(inode)->i_gc_failures; | |
3425 | done: | |
3426 | f2fs_update_time(sbi, REQ_TIME); | |
3427 | out: | |
3428 | inode_unlock(inode); | |
3429 | mnt_drop_write_file(filp); | |
3430 | return ret; | |
3431 | } | |
3432 | ||
3433 | static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) | |
3434 | { | |
3435 | struct inode *inode = file_inode(filp); | |
3436 | __u32 pin = 0; | |
3437 | ||
3438 | if (is_inode_flag_set(inode, FI_PIN_FILE)) | |
3439 | pin = F2FS_I(inode)->i_gc_failures; | |
3440 | return put_user(pin, (u32 __user *)arg); | |
3441 | } | |
3442 | ||
3443 | static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) | |
3444 | { | |
3445 | return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, | |
3446 | (u32 __user *)arg); | |
3447 | } | |
3448 | ||
3449 | int f2fs_precache_extents(struct inode *inode) | |
3450 | { | |
3451 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
3452 | struct f2fs_map_blocks map; | |
3453 | pgoff_t m_next_extent; | |
3454 | loff_t end; | |
3455 | int err; | |
3456 | ||
3457 | if (is_inode_flag_set(inode, FI_NO_EXTENT)) | |
3458 | return -EOPNOTSUPP; | |
3459 | ||
3460 | map.m_lblk = 0; | |
3461 | map.m_pblk = 0; | |
3462 | map.m_next_pgofs = NULL; | |
3463 | map.m_next_extent = &m_next_extent; | |
3464 | map.m_seg_type = NO_CHECK_TYPE; | |
3465 | map.m_may_create = false; | |
3466 | end = F2FS_BLK_ALIGN(i_size_read(inode)); | |
3467 | ||
3468 | while (map.m_lblk < end) { | |
3469 | map.m_len = end - map.m_lblk; | |
3470 | ||
3471 | f2fs_down_write(&fi->i_gc_rwsem[WRITE]); | |
3472 | err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE); | |
3473 | f2fs_up_write(&fi->i_gc_rwsem[WRITE]); | |
3474 | if (err || !map.m_len) | |
3475 | return err; | |
3476 | ||
3477 | map.m_lblk = m_next_extent; | |
3478 | } | |
3479 | ||
3480 | return 0; | |
3481 | } | |
3482 | ||
3483 | static int f2fs_ioc_precache_extents(struct file *filp) | |
3484 | { | |
3485 | return f2fs_precache_extents(file_inode(filp)); | |
3486 | } | |
3487 | ||
3488 | static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) | |
3489 | { | |
3490 | struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); | |
3491 | __u64 block_count; | |
3492 | ||
3493 | if (!capable(CAP_SYS_ADMIN)) | |
3494 | return -EPERM; | |
3495 | ||
3496 | if (f2fs_readonly(sbi->sb)) | |
3497 | return -EROFS; | |
3498 | ||
3499 | if (copy_from_user(&block_count, (void __user *)arg, | |
3500 | sizeof(block_count))) | |
3501 | return -EFAULT; | |
3502 | ||
3503 | return f2fs_resize_fs(filp, block_count); | |
3504 | } | |
3505 | ||
3506 | static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) | |
3507 | { | |
3508 | struct inode *inode = file_inode(filp); | |
3509 | ||
3510 | f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); | |
3511 | ||
3512 | if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) { | |
3513 | f2fs_warn(F2FS_I_SB(inode), | |
3514 | "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem", | |
3515 | inode->i_ino); | |
3516 | return -EOPNOTSUPP; | |
3517 | } | |
3518 | ||
3519 | return fsverity_ioctl_enable(filp, (const void __user *)arg); | |
3520 | } | |
3521 | ||
3522 | static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) | |
3523 | { | |
3524 | if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) | |
3525 | return -EOPNOTSUPP; | |
3526 | ||
3527 | return fsverity_ioctl_measure(filp, (void __user *)arg); | |
3528 | } | |
3529 | ||
3530 | static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg) | |
3531 | { | |
3532 | if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp)))) | |
3533 | return -EOPNOTSUPP; | |
3534 | ||
3535 | return fsverity_ioctl_read_metadata(filp, (const void __user *)arg); | |
3536 | } | |
3537 | ||
3538 | static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) | |
3539 | { | |
3540 | struct inode *inode = file_inode(filp); | |
3541 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
3542 | char *vbuf; | |
3543 | int count; | |
3544 | int err = 0; | |
3545 | ||
3546 | vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL); | |
3547 | if (!vbuf) | |
3548 | return -ENOMEM; | |
3549 | ||
3550 | f2fs_down_read(&sbi->sb_lock); | |
3551 | count = utf16s_to_utf8s(sbi->raw_super->volume_name, | |
3552 | ARRAY_SIZE(sbi->raw_super->volume_name), | |
3553 | UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); | |
3554 | f2fs_up_read(&sbi->sb_lock); | |
3555 | ||
3556 | if (copy_to_user((char __user *)arg, vbuf, | |
3557 | min(FSLABEL_MAX, count))) | |
3558 | err = -EFAULT; | |
3559 | ||
3560 | kfree(vbuf); | |
3561 | return err; | |
3562 | } | |
3563 | ||
3564 | static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) | |
3565 | { | |
3566 | struct inode *inode = file_inode(filp); | |
3567 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
3568 | char *vbuf; | |
3569 | int err = 0; | |
3570 | ||
3571 | if (!capable(CAP_SYS_ADMIN)) | |
3572 | return -EPERM; | |
3573 | ||
3574 | vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX); | |
3575 | if (IS_ERR(vbuf)) | |
3576 | return PTR_ERR(vbuf); | |
3577 | ||
3578 | err = mnt_want_write_file(filp); | |
3579 | if (err) | |
3580 | goto out; | |
3581 | ||
3582 | f2fs_down_write(&sbi->sb_lock); | |
3583 | ||
3584 | memset(sbi->raw_super->volume_name, 0, | |
3585 | sizeof(sbi->raw_super->volume_name)); | |
3586 | utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN, | |
3587 | sbi->raw_super->volume_name, | |
3588 | ARRAY_SIZE(sbi->raw_super->volume_name)); | |
3589 | ||
3590 | err = f2fs_commit_super(sbi, false); | |
3591 | ||
3592 | f2fs_up_write(&sbi->sb_lock); | |
3593 | ||
3594 | mnt_drop_write_file(filp); | |
3595 | out: | |
3596 | kfree(vbuf); | |
3597 | return err; | |
3598 | } | |
3599 | ||
3600 | static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks) | |
3601 | { | |
3602 | if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) | |
3603 | return -EOPNOTSUPP; | |
3604 | ||
3605 | if (!f2fs_compressed_file(inode)) | |
3606 | return -EINVAL; | |
3607 | ||
3608 | *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks); | |
3609 | ||
3610 | return 0; | |
3611 | } | |
3612 | ||
3613 | static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg) | |
3614 | { | |
3615 | struct inode *inode = file_inode(filp); | |
3616 | __u64 blocks; | |
3617 | int ret; | |
3618 | ||
3619 | ret = f2fs_get_compress_blocks(inode, &blocks); | |
3620 | if (ret < 0) | |
3621 | return ret; | |
3622 | ||
3623 | return put_user(blocks, (u64 __user *)arg); | |
3624 | } | |
3625 | ||
3626 | static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) | |
3627 | { | |
3628 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); | |
3629 | unsigned int released_blocks = 0; | |
3630 | int cluster_size = F2FS_I(dn->inode)->i_cluster_size; | |
3631 | block_t blkaddr; | |
3632 | int i; | |
3633 | ||
3634 | for (i = 0; i < count; i++) { | |
3635 | blkaddr = data_blkaddr(dn->inode, dn->node_page, | |
3636 | dn->ofs_in_node + i); | |
3637 | ||
3638 | if (!__is_valid_data_blkaddr(blkaddr)) | |
3639 | continue; | |
3640 | if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, | |
3641 | DATA_GENERIC_ENHANCE))) | |
3642 | return -EFSCORRUPTED; | |
3643 | } | |
3644 | ||
3645 | while (count) { | |
3646 | int compr_blocks = 0; | |
3647 | ||
3648 | for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { | |
3649 | blkaddr = f2fs_data_blkaddr(dn); | |
3650 | ||
3651 | if (i == 0) { | |
3652 | if (blkaddr == COMPRESS_ADDR) | |
3653 | continue; | |
3654 | dn->ofs_in_node += cluster_size; | |
3655 | goto next; | |
3656 | } | |
3657 | ||
3658 | if (__is_valid_data_blkaddr(blkaddr)) | |
3659 | compr_blocks++; | |
3660 | ||
3661 | if (blkaddr != NEW_ADDR) | |
3662 | continue; | |
3663 | ||
3664 | f2fs_set_data_blkaddr(dn, NULL_ADDR); | |
3665 | } | |
3666 | ||
3667 | f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); | |
3668 | dec_valid_block_count(sbi, dn->inode, | |
3669 | cluster_size - compr_blocks); | |
3670 | ||
3671 | released_blocks += cluster_size - compr_blocks; | |
3672 | next: | |
3673 | count -= cluster_size; | |
3674 | } | |
3675 | ||
3676 | return released_blocks; | |
3677 | } | |
3678 | ||
3679 | static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) | |
3680 | { | |
3681 | struct inode *inode = file_inode(filp); | |
3682 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
3683 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
3684 | pgoff_t page_idx = 0, last_idx; | |
3685 | unsigned int released_blocks = 0; | |
3686 | int ret; | |
3687 | int writecount; | |
3688 | ||
3689 | if (!f2fs_sb_has_compression(sbi)) | |
3690 | return -EOPNOTSUPP; | |
3691 | ||
3692 | if (f2fs_readonly(sbi->sb)) | |
3693 | return -EROFS; | |
3694 | ||
3695 | ret = mnt_want_write_file(filp); | |
3696 | if (ret) | |
3697 | return ret; | |
3698 | ||
3699 | f2fs_balance_fs(sbi, true); | |
3700 | ||
3701 | inode_lock(inode); | |
3702 | ||
3703 | writecount = atomic_read(&inode->i_writecount); | |
3704 | if ((filp->f_mode & FMODE_WRITE && writecount != 1) || | |
3705 | (!(filp->f_mode & FMODE_WRITE) && writecount)) { | |
3706 | ret = -EBUSY; | |
3707 | goto out; | |
3708 | } | |
3709 | ||
3710 | if (!f2fs_compressed_file(inode) || | |
3711 | is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { | |
3712 | ret = -EINVAL; | |
3713 | goto out; | |
3714 | } | |
3715 | ||
3716 | ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); | |
3717 | if (ret) | |
3718 | goto out; | |
3719 | ||
3720 | if (!atomic_read(&fi->i_compr_blocks)) { | |
3721 | ret = -EPERM; | |
3722 | goto out; | |
3723 | } | |
3724 | ||
3725 | set_inode_flag(inode, FI_COMPRESS_RELEASED); | |
3726 | inode_set_ctime_current(inode); | |
3727 | f2fs_mark_inode_dirty_sync(inode, true); | |
3728 | ||
3729 | f2fs_down_write(&fi->i_gc_rwsem[WRITE]); | |
3730 | filemap_invalidate_lock(inode->i_mapping); | |
3731 | ||
3732 | last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); | |
3733 | ||
3734 | while (page_idx < last_idx) { | |
3735 | struct dnode_of_data dn; | |
3736 | pgoff_t end_offset, count; | |
3737 | ||
3738 | f2fs_lock_op(sbi); | |
3739 | ||
3740 | set_new_dnode(&dn, inode, NULL, NULL, 0); | |
3741 | ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); | |
3742 | if (ret) { | |
3743 | f2fs_unlock_op(sbi); | |
3744 | if (ret == -ENOENT) { | |
3745 | page_idx = f2fs_get_next_page_offset(&dn, | |
3746 | page_idx); | |
3747 | ret = 0; | |
3748 | continue; | |
3749 | } | |
3750 | break; | |
3751 | } | |
3752 | ||
3753 | end_offset = ADDRS_PER_PAGE(dn.node_page, inode); | |
3754 | count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); | |
3755 | count = round_up(count, fi->i_cluster_size); | |
3756 | ||
3757 | ret = release_compress_blocks(&dn, count); | |
3758 | ||
3759 | f2fs_put_dnode(&dn); | |
3760 | ||
3761 | f2fs_unlock_op(sbi); | |
3762 | ||
3763 | if (ret < 0) | |
3764 | break; | |
3765 | ||
3766 | page_idx += count; | |
3767 | released_blocks += ret; | |
3768 | } | |
3769 | ||
3770 | filemap_invalidate_unlock(inode->i_mapping); | |
3771 | f2fs_up_write(&fi->i_gc_rwsem[WRITE]); | |
3772 | out: | |
3773 | if (released_blocks) | |
3774 | f2fs_update_time(sbi, REQ_TIME); | |
3775 | inode_unlock(inode); | |
3776 | ||
3777 | mnt_drop_write_file(filp); | |
3778 | ||
3779 | if (ret >= 0) { | |
3780 | ret = put_user(released_blocks, (u64 __user *)arg); | |
3781 | } else if (released_blocks && | |
3782 | atomic_read(&fi->i_compr_blocks)) { | |
3783 | set_sbi_flag(sbi, SBI_NEED_FSCK); | |
3784 | f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " | |
3785 | "iblocks=%llu, released=%u, compr_blocks=%u, " | |
3786 | "run fsck to fix.", | |
3787 | __func__, inode->i_ino, inode->i_blocks, | |
3788 | released_blocks, | |
3789 | atomic_read(&fi->i_compr_blocks)); | |
3790 | } | |
3791 | ||
3792 | return ret; | |
3793 | } | |
3794 | ||
3795 | static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, | |
3796 | unsigned int *reserved_blocks) | |
3797 | { | |
3798 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); | |
3799 | int cluster_size = F2FS_I(dn->inode)->i_cluster_size; | |
3800 | block_t blkaddr; | |
3801 | int i; | |
3802 | ||
3803 | for (i = 0; i < count; i++) { | |
3804 | blkaddr = data_blkaddr(dn->inode, dn->node_page, | |
3805 | dn->ofs_in_node + i); | |
3806 | ||
3807 | if (!__is_valid_data_blkaddr(blkaddr)) | |
3808 | continue; | |
3809 | if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, | |
3810 | DATA_GENERIC_ENHANCE))) | |
3811 | return -EFSCORRUPTED; | |
3812 | } | |
3813 | ||
3814 | while (count) { | |
3815 | int compr_blocks = 0; | |
3816 | blkcnt_t reserved = 0; | |
3817 | blkcnt_t to_reserved; | |
3818 | int ret; | |
3819 | ||
3820 | for (i = 0; i < cluster_size; i++) { | |
3821 | blkaddr = data_blkaddr(dn->inode, dn->node_page, | |
3822 | dn->ofs_in_node + i); | |
3823 | ||
3824 | if (i == 0) { | |
3825 | if (blkaddr != COMPRESS_ADDR) { | |
3826 | dn->ofs_in_node += cluster_size; | |
3827 | goto next; | |
3828 | } | |
3829 | continue; | |
3830 | } | |
3831 | ||
3832 | /* | |
3833 | * compressed cluster was not released due to it | |
3834 | * fails in release_compress_blocks(), so NEW_ADDR | |
3835 | * is a possible case. | |
3836 | */ | |
3837 | if (blkaddr == NEW_ADDR) { | |
3838 | reserved++; | |
3839 | continue; | |
3840 | } | |
3841 | if (__is_valid_data_blkaddr(blkaddr)) { | |
3842 | compr_blocks++; | |
3843 | continue; | |
3844 | } | |
3845 | } | |
3846 | ||
3847 | to_reserved = cluster_size - compr_blocks - reserved; | |
3848 | ||
3849 | /* for the case all blocks in cluster were reserved */ | |
3850 | if (reserved && to_reserved == 1) { | |
3851 | dn->ofs_in_node += cluster_size; | |
3852 | goto next; | |
3853 | } | |
3854 | ||
3855 | ret = inc_valid_block_count(sbi, dn->inode, | |
3856 | &to_reserved, false); | |
3857 | if (unlikely(ret)) | |
3858 | return ret; | |
3859 | ||
3860 | for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { | |
3861 | if (f2fs_data_blkaddr(dn) == NULL_ADDR) | |
3862 | f2fs_set_data_blkaddr(dn, NEW_ADDR); | |
3863 | } | |
3864 | ||
3865 | f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); | |
3866 | ||
3867 | *reserved_blocks += to_reserved; | |
3868 | next: | |
3869 | count -= cluster_size; | |
3870 | } | |
3871 | ||
3872 | return 0; | |
3873 | } | |
3874 | ||
3875 | static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) | |
3876 | { | |
3877 | struct inode *inode = file_inode(filp); | |
3878 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
3879 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
3880 | pgoff_t page_idx = 0, last_idx; | |
3881 | unsigned int reserved_blocks = 0; | |
3882 | int ret; | |
3883 | ||
3884 | if (!f2fs_sb_has_compression(sbi)) | |
3885 | return -EOPNOTSUPP; | |
3886 | ||
3887 | if (f2fs_readonly(sbi->sb)) | |
3888 | return -EROFS; | |
3889 | ||
3890 | ret = mnt_want_write_file(filp); | |
3891 | if (ret) | |
3892 | return ret; | |
3893 | ||
3894 | f2fs_balance_fs(sbi, true); | |
3895 | ||
3896 | inode_lock(inode); | |
3897 | ||
3898 | if (!f2fs_compressed_file(inode) || | |
3899 | !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { | |
3900 | ret = -EINVAL; | |
3901 | goto unlock_inode; | |
3902 | } | |
3903 | ||
3904 | if (atomic_read(&fi->i_compr_blocks)) | |
3905 | goto unlock_inode; | |
3906 | ||
3907 | f2fs_down_write(&fi->i_gc_rwsem[WRITE]); | |
3908 | filemap_invalidate_lock(inode->i_mapping); | |
3909 | ||
3910 | last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); | |
3911 | ||
3912 | while (page_idx < last_idx) { | |
3913 | struct dnode_of_data dn; | |
3914 | pgoff_t end_offset, count; | |
3915 | ||
3916 | f2fs_lock_op(sbi); | |
3917 | ||
3918 | set_new_dnode(&dn, inode, NULL, NULL, 0); | |
3919 | ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); | |
3920 | if (ret) { | |
3921 | f2fs_unlock_op(sbi); | |
3922 | if (ret == -ENOENT) { | |
3923 | page_idx = f2fs_get_next_page_offset(&dn, | |
3924 | page_idx); | |
3925 | ret = 0; | |
3926 | continue; | |
3927 | } | |
3928 | break; | |
3929 | } | |
3930 | ||
3931 | end_offset = ADDRS_PER_PAGE(dn.node_page, inode); | |
3932 | count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); | |
3933 | count = round_up(count, fi->i_cluster_size); | |
3934 | ||
3935 | ret = reserve_compress_blocks(&dn, count, &reserved_blocks); | |
3936 | ||
3937 | f2fs_put_dnode(&dn); | |
3938 | ||
3939 | f2fs_unlock_op(sbi); | |
3940 | ||
3941 | if (ret < 0) | |
3942 | break; | |
3943 | ||
3944 | page_idx += count; | |
3945 | } | |
3946 | ||
3947 | filemap_invalidate_unlock(inode->i_mapping); | |
3948 | f2fs_up_write(&fi->i_gc_rwsem[WRITE]); | |
3949 | ||
3950 | if (!ret) { | |
3951 | clear_inode_flag(inode, FI_COMPRESS_RELEASED); | |
3952 | inode_set_ctime_current(inode); | |
3953 | f2fs_mark_inode_dirty_sync(inode, true); | |
3954 | } | |
3955 | unlock_inode: | |
3956 | if (reserved_blocks) | |
3957 | f2fs_update_time(sbi, REQ_TIME); | |
3958 | inode_unlock(inode); | |
3959 | mnt_drop_write_file(filp); | |
3960 | ||
3961 | if (!ret) { | |
3962 | ret = put_user(reserved_blocks, (u64 __user *)arg); | |
3963 | } else if (reserved_blocks && | |
3964 | atomic_read(&fi->i_compr_blocks)) { | |
3965 | set_sbi_flag(sbi, SBI_NEED_FSCK); | |
3966 | f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " | |
3967 | "iblocks=%llu, reserved=%u, compr_blocks=%u, " | |
3968 | "run fsck to fix.", | |
3969 | __func__, inode->i_ino, inode->i_blocks, | |
3970 | reserved_blocks, | |
3971 | atomic_read(&fi->i_compr_blocks)); | |
3972 | } | |
3973 | ||
3974 | return ret; | |
3975 | } | |
3976 | ||
3977 | static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, | |
3978 | pgoff_t off, block_t block, block_t len, u32 flags) | |
3979 | { | |
3980 | sector_t sector = SECTOR_FROM_BLOCK(block); | |
3981 | sector_t nr_sects = SECTOR_FROM_BLOCK(len); | |
3982 | int ret = 0; | |
3983 | ||
3984 | if (flags & F2FS_TRIM_FILE_DISCARD) { | |
3985 | if (bdev_max_secure_erase_sectors(bdev)) | |
3986 | ret = blkdev_issue_secure_erase(bdev, sector, nr_sects, | |
3987 | GFP_NOFS); | |
3988 | else | |
3989 | ret = blkdev_issue_discard(bdev, sector, nr_sects, | |
3990 | GFP_NOFS); | |
3991 | } | |
3992 | ||
3993 | if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { | |
3994 | if (IS_ENCRYPTED(inode)) | |
3995 | ret = fscrypt_zeroout_range(inode, off, block, len); | |
3996 | else | |
3997 | ret = blkdev_issue_zeroout(bdev, sector, nr_sects, | |
3998 | GFP_NOFS, 0); | |
3999 | } | |
4000 | ||
4001 | return ret; | |
4002 | } | |
4003 | ||
4004 | static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) | |
4005 | { | |
4006 | struct inode *inode = file_inode(filp); | |
4007 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
4008 | struct address_space *mapping = inode->i_mapping; | |
4009 | struct block_device *prev_bdev = NULL; | |
4010 | struct f2fs_sectrim_range range; | |
4011 | pgoff_t index, pg_end, prev_index = 0; | |
4012 | block_t prev_block = 0, len = 0; | |
4013 | loff_t end_addr; | |
4014 | bool to_end = false; | |
4015 | int ret = 0; | |
4016 | ||
4017 | if (!(filp->f_mode & FMODE_WRITE)) | |
4018 | return -EBADF; | |
4019 | ||
4020 | if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg, | |
4021 | sizeof(range))) | |
4022 | return -EFAULT; | |
4023 | ||
4024 | if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) || | |
4025 | !S_ISREG(inode->i_mode)) | |
4026 | return -EINVAL; | |
4027 | ||
4028 | if (((range.flags & F2FS_TRIM_FILE_DISCARD) && | |
4029 | !f2fs_hw_support_discard(sbi)) || | |
4030 | ((range.flags & F2FS_TRIM_FILE_ZEROOUT) && | |
4031 | IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) | |
4032 | return -EOPNOTSUPP; | |
4033 | ||
4034 | ret = mnt_want_write_file(filp); | |
4035 | if (ret) | |
4036 | return ret; | |
4037 | inode_lock(inode); | |
4038 | ||
4039 | if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || | |
4040 | range.start >= inode->i_size) { | |
4041 | ret = -EINVAL; | |
4042 | goto err; | |
4043 | } | |
4044 | ||
4045 | if (range.len == 0) | |
4046 | goto err; | |
4047 | ||
4048 | if (inode->i_size - range.start > range.len) { | |
4049 | end_addr = range.start + range.len; | |
4050 | } else { | |
4051 | end_addr = range.len == (u64)-1 ? | |
4052 | sbi->sb->s_maxbytes : inode->i_size; | |
4053 | to_end = true; | |
4054 | } | |
4055 | ||
4056 | if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) || | |
4057 | (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) { | |
4058 | ret = -EINVAL; | |
4059 | goto err; | |
4060 | } | |
4061 | ||
4062 | index = F2FS_BYTES_TO_BLK(range.start); | |
4063 | pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE); | |
4064 | ||
4065 | ret = f2fs_convert_inline_inode(inode); | |
4066 | if (ret) | |
4067 | goto err; | |
4068 | ||
4069 | f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
4070 | filemap_invalidate_lock(mapping); | |
4071 | ||
4072 | ret = filemap_write_and_wait_range(mapping, range.start, | |
4073 | to_end ? LLONG_MAX : end_addr - 1); | |
4074 | if (ret) | |
4075 | goto out; | |
4076 | ||
4077 | truncate_inode_pages_range(mapping, range.start, | |
4078 | to_end ? -1 : end_addr - 1); | |
4079 | ||
4080 | while (index < pg_end) { | |
4081 | struct dnode_of_data dn; | |
4082 | pgoff_t end_offset, count; | |
4083 | int i; | |
4084 | ||
4085 | set_new_dnode(&dn, inode, NULL, NULL, 0); | |
4086 | ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); | |
4087 | if (ret) { | |
4088 | if (ret == -ENOENT) { | |
4089 | index = f2fs_get_next_page_offset(&dn, index); | |
4090 | continue; | |
4091 | } | |
4092 | goto out; | |
4093 | } | |
4094 | ||
4095 | end_offset = ADDRS_PER_PAGE(dn.node_page, inode); | |
4096 | count = min(end_offset - dn.ofs_in_node, pg_end - index); | |
4097 | for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { | |
4098 | struct block_device *cur_bdev; | |
4099 | block_t blkaddr = f2fs_data_blkaddr(&dn); | |
4100 | ||
4101 | if (!__is_valid_data_blkaddr(blkaddr)) | |
4102 | continue; | |
4103 | ||
4104 | if (!f2fs_is_valid_blkaddr(sbi, blkaddr, | |
4105 | DATA_GENERIC_ENHANCE)) { | |
4106 | ret = -EFSCORRUPTED; | |
4107 | f2fs_put_dnode(&dn); | |
4108 | goto out; | |
4109 | } | |
4110 | ||
4111 | cur_bdev = f2fs_target_device(sbi, blkaddr, NULL); | |
4112 | if (f2fs_is_multi_device(sbi)) { | |
4113 | int di = f2fs_target_device_index(sbi, blkaddr); | |
4114 | ||
4115 | blkaddr -= FDEV(di).start_blk; | |
4116 | } | |
4117 | ||
4118 | if (len) { | |
4119 | if (prev_bdev == cur_bdev && | |
4120 | index == prev_index + len && | |
4121 | blkaddr == prev_block + len) { | |
4122 | len++; | |
4123 | } else { | |
4124 | ret = f2fs_secure_erase(prev_bdev, | |
4125 | inode, prev_index, prev_block, | |
4126 | len, range.flags); | |
4127 | if (ret) { | |
4128 | f2fs_put_dnode(&dn); | |
4129 | goto out; | |
4130 | } | |
4131 | ||
4132 | len = 0; | |
4133 | } | |
4134 | } | |
4135 | ||
4136 | if (!len) { | |
4137 | prev_bdev = cur_bdev; | |
4138 | prev_index = index; | |
4139 | prev_block = blkaddr; | |
4140 | len = 1; | |
4141 | } | |
4142 | } | |
4143 | ||
4144 | f2fs_put_dnode(&dn); | |
4145 | ||
4146 | if (fatal_signal_pending(current)) { | |
4147 | ret = -EINTR; | |
4148 | goto out; | |
4149 | } | |
4150 | cond_resched(); | |
4151 | } | |
4152 | ||
4153 | if (len) | |
4154 | ret = f2fs_secure_erase(prev_bdev, inode, prev_index, | |
4155 | prev_block, len, range.flags); | |
4156 | f2fs_update_time(sbi, REQ_TIME); | |
4157 | out: | |
4158 | filemap_invalidate_unlock(mapping); | |
4159 | f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
4160 | err: | |
4161 | inode_unlock(inode); | |
4162 | mnt_drop_write_file(filp); | |
4163 | ||
4164 | return ret; | |
4165 | } | |
4166 | ||
4167 | static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) | |
4168 | { | |
4169 | struct inode *inode = file_inode(filp); | |
4170 | struct f2fs_comp_option option; | |
4171 | ||
4172 | if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) | |
4173 | return -EOPNOTSUPP; | |
4174 | ||
4175 | inode_lock_shared(inode); | |
4176 | ||
4177 | if (!f2fs_compressed_file(inode)) { | |
4178 | inode_unlock_shared(inode); | |
4179 | return -ENODATA; | |
4180 | } | |
4181 | ||
4182 | option.algorithm = F2FS_I(inode)->i_compress_algorithm; | |
4183 | option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size; | |
4184 | ||
4185 | inode_unlock_shared(inode); | |
4186 | ||
4187 | if (copy_to_user((struct f2fs_comp_option __user *)arg, &option, | |
4188 | sizeof(option))) | |
4189 | return -EFAULT; | |
4190 | ||
4191 | return 0; | |
4192 | } | |
4193 | ||
4194 | static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) | |
4195 | { | |
4196 | struct inode *inode = file_inode(filp); | |
4197 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
4198 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
4199 | struct f2fs_comp_option option; | |
4200 | int ret = 0; | |
4201 | ||
4202 | if (!f2fs_sb_has_compression(sbi)) | |
4203 | return -EOPNOTSUPP; | |
4204 | ||
4205 | if (!(filp->f_mode & FMODE_WRITE)) | |
4206 | return -EBADF; | |
4207 | ||
4208 | if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg, | |
4209 | sizeof(option))) | |
4210 | return -EFAULT; | |
4211 | ||
4212 | if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || | |
4213 | option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || | |
4214 | option.algorithm >= COMPRESS_MAX) | |
4215 | return -EINVAL; | |
4216 | ||
4217 | ret = mnt_want_write_file(filp); | |
4218 | if (ret) | |
4219 | return ret; | |
4220 | inode_lock(inode); | |
4221 | ||
4222 | f2fs_down_write(&F2FS_I(inode)->i_sem); | |
4223 | if (!f2fs_compressed_file(inode)) { | |
4224 | ret = -EINVAL; | |
4225 | goto out; | |
4226 | } | |
4227 | ||
4228 | if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { | |
4229 | ret = -EBUSY; | |
4230 | goto out; | |
4231 | } | |
4232 | ||
4233 | if (F2FS_HAS_BLOCKS(inode)) { | |
4234 | ret = -EFBIG; | |
4235 | goto out; | |
4236 | } | |
4237 | ||
4238 | fi->i_compress_algorithm = option.algorithm; | |
4239 | fi->i_log_cluster_size = option.log_cluster_size; | |
4240 | fi->i_cluster_size = BIT(option.log_cluster_size); | |
4241 | /* Set default level */ | |
4242 | if (fi->i_compress_algorithm == COMPRESS_ZSTD) | |
4243 | fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; | |
4244 | else | |
4245 | fi->i_compress_level = 0; | |
4246 | /* Adjust mount option level */ | |
4247 | if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && | |
4248 | F2FS_OPTION(sbi).compress_level) | |
4249 | fi->i_compress_level = F2FS_OPTION(sbi).compress_level; | |
4250 | f2fs_mark_inode_dirty_sync(inode, true); | |
4251 | ||
4252 | if (!f2fs_is_compress_backend_ready(inode)) | |
4253 | f2fs_warn(sbi, "compression algorithm is successfully set, " | |
4254 | "but current kernel doesn't support this algorithm."); | |
4255 | out: | |
4256 | f2fs_up_write(&fi->i_sem); | |
4257 | inode_unlock(inode); | |
4258 | mnt_drop_write_file(filp); | |
4259 | ||
4260 | return ret; | |
4261 | } | |
4262 | ||
4263 | static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) | |
4264 | { | |
4265 | DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); | |
4266 | struct address_space *mapping = inode->i_mapping; | |
4267 | struct page *page; | |
4268 | pgoff_t redirty_idx = page_idx; | |
4269 | int i, page_len = 0, ret = 0; | |
4270 | ||
4271 | page_cache_ra_unbounded(&ractl, len, 0); | |
4272 | ||
4273 | for (i = 0; i < len; i++, page_idx++) { | |
4274 | page = read_cache_page(mapping, page_idx, NULL, NULL); | |
4275 | if (IS_ERR(page)) { | |
4276 | ret = PTR_ERR(page); | |
4277 | break; | |
4278 | } | |
4279 | page_len++; | |
4280 | } | |
4281 | ||
4282 | for (i = 0; i < page_len; i++, redirty_idx++) { | |
4283 | page = find_lock_page(mapping, redirty_idx); | |
4284 | ||
4285 | /* It will never fail, when page has pinned above */ | |
4286 | f2fs_bug_on(F2FS_I_SB(inode), !page); | |
4287 | ||
4288 | f2fs_wait_on_page_writeback(page, DATA, true, true); | |
4289 | ||
4290 | set_page_dirty(page); | |
4291 | set_page_private_gcing(page); | |
4292 | f2fs_put_page(page, 1); | |
4293 | f2fs_put_page(page, 0); | |
4294 | } | |
4295 | ||
4296 | return ret; | |
4297 | } | |
4298 | ||
4299 | static int f2fs_ioc_decompress_file(struct file *filp) | |
4300 | { | |
4301 | struct inode *inode = file_inode(filp); | |
4302 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
4303 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
4304 | pgoff_t page_idx = 0, last_idx, cluster_idx; | |
4305 | int ret; | |
4306 | ||
4307 | if (!f2fs_sb_has_compression(sbi) || | |
4308 | F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) | |
4309 | return -EOPNOTSUPP; | |
4310 | ||
4311 | if (!(filp->f_mode & FMODE_WRITE)) | |
4312 | return -EBADF; | |
4313 | ||
4314 | f2fs_balance_fs(sbi, true); | |
4315 | ||
4316 | ret = mnt_want_write_file(filp); | |
4317 | if (ret) | |
4318 | return ret; | |
4319 | inode_lock(inode); | |
4320 | ||
4321 | if (!f2fs_is_compress_backend_ready(inode)) { | |
4322 | ret = -EOPNOTSUPP; | |
4323 | goto out; | |
4324 | } | |
4325 | ||
4326 | if (!f2fs_compressed_file(inode) || | |
4327 | is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { | |
4328 | ret = -EINVAL; | |
4329 | goto out; | |
4330 | } | |
4331 | ||
4332 | ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); | |
4333 | if (ret) | |
4334 | goto out; | |
4335 | ||
4336 | if (!atomic_read(&fi->i_compr_blocks)) | |
4337 | goto out; | |
4338 | ||
4339 | last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); | |
4340 | last_idx >>= fi->i_log_cluster_size; | |
4341 | ||
4342 | for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { | |
4343 | page_idx = cluster_idx << fi->i_log_cluster_size; | |
4344 | ||
4345 | if (!f2fs_is_compressed_cluster(inode, page_idx)) | |
4346 | continue; | |
4347 | ||
4348 | ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); | |
4349 | if (ret < 0) | |
4350 | break; | |
4351 | ||
4352 | if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { | |
4353 | ret = filemap_fdatawrite(inode->i_mapping); | |
4354 | if (ret < 0) | |
4355 | break; | |
4356 | } | |
4357 | ||
4358 | cond_resched(); | |
4359 | if (fatal_signal_pending(current)) { | |
4360 | ret = -EINTR; | |
4361 | break; | |
4362 | } | |
4363 | } | |
4364 | ||
4365 | if (!ret) | |
4366 | ret = filemap_write_and_wait_range(inode->i_mapping, 0, | |
4367 | LLONG_MAX); | |
4368 | ||
4369 | if (ret) | |
4370 | f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", | |
4371 | __func__, ret); | |
4372 | f2fs_update_time(sbi, REQ_TIME); | |
4373 | out: | |
4374 | inode_unlock(inode); | |
4375 | mnt_drop_write_file(filp); | |
4376 | ||
4377 | return ret; | |
4378 | } | |
4379 | ||
4380 | static int f2fs_ioc_compress_file(struct file *filp) | |
4381 | { | |
4382 | struct inode *inode = file_inode(filp); | |
4383 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
4384 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
4385 | pgoff_t page_idx = 0, last_idx, cluster_idx; | |
4386 | int ret; | |
4387 | ||
4388 | if (!f2fs_sb_has_compression(sbi) || | |
4389 | F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) | |
4390 | return -EOPNOTSUPP; | |
4391 | ||
4392 | if (!(filp->f_mode & FMODE_WRITE)) | |
4393 | return -EBADF; | |
4394 | ||
4395 | f2fs_balance_fs(sbi, true); | |
4396 | ||
4397 | ret = mnt_want_write_file(filp); | |
4398 | if (ret) | |
4399 | return ret; | |
4400 | inode_lock(inode); | |
4401 | ||
4402 | if (!f2fs_is_compress_backend_ready(inode)) { | |
4403 | ret = -EOPNOTSUPP; | |
4404 | goto out; | |
4405 | } | |
4406 | ||
4407 | if (!f2fs_compressed_file(inode) || | |
4408 | is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { | |
4409 | ret = -EINVAL; | |
4410 | goto out; | |
4411 | } | |
4412 | ||
4413 | ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); | |
4414 | if (ret) | |
4415 | goto out; | |
4416 | ||
4417 | set_inode_flag(inode, FI_ENABLE_COMPRESS); | |
4418 | ||
4419 | last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); | |
4420 | last_idx >>= fi->i_log_cluster_size; | |
4421 | ||
4422 | for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { | |
4423 | page_idx = cluster_idx << fi->i_log_cluster_size; | |
4424 | ||
4425 | if (f2fs_is_sparse_cluster(inode, page_idx)) | |
4426 | continue; | |
4427 | ||
4428 | ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); | |
4429 | if (ret < 0) | |
4430 | break; | |
4431 | ||
4432 | if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { | |
4433 | ret = filemap_fdatawrite(inode->i_mapping); | |
4434 | if (ret < 0) | |
4435 | break; | |
4436 | } | |
4437 | ||
4438 | cond_resched(); | |
4439 | if (fatal_signal_pending(current)) { | |
4440 | ret = -EINTR; | |
4441 | break; | |
4442 | } | |
4443 | } | |
4444 | ||
4445 | if (!ret) | |
4446 | ret = filemap_write_and_wait_range(inode->i_mapping, 0, | |
4447 | LLONG_MAX); | |
4448 | ||
4449 | clear_inode_flag(inode, FI_ENABLE_COMPRESS); | |
4450 | ||
4451 | if (ret) | |
4452 | f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", | |
4453 | __func__, ret); | |
4454 | f2fs_update_time(sbi, REQ_TIME); | |
4455 | out: | |
4456 | inode_unlock(inode); | |
4457 | mnt_drop_write_file(filp); | |
4458 | ||
4459 | return ret; | |
4460 | } | |
4461 | ||
4462 | static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
4463 | { | |
4464 | switch (cmd) { | |
4465 | case FS_IOC_GETVERSION: | |
4466 | return f2fs_ioc_getversion(filp, arg); | |
4467 | case F2FS_IOC_START_ATOMIC_WRITE: | |
4468 | return f2fs_ioc_start_atomic_write(filp, false); | |
4469 | case F2FS_IOC_START_ATOMIC_REPLACE: | |
4470 | return f2fs_ioc_start_atomic_write(filp, true); | |
4471 | case F2FS_IOC_COMMIT_ATOMIC_WRITE: | |
4472 | return f2fs_ioc_commit_atomic_write(filp); | |
4473 | case F2FS_IOC_ABORT_ATOMIC_WRITE: | |
4474 | return f2fs_ioc_abort_atomic_write(filp); | |
4475 | case F2FS_IOC_START_VOLATILE_WRITE: | |
4476 | case F2FS_IOC_RELEASE_VOLATILE_WRITE: | |
4477 | return -EOPNOTSUPP; | |
4478 | case F2FS_IOC_SHUTDOWN: | |
4479 | return f2fs_ioc_shutdown(filp, arg); | |
4480 | case FITRIM: | |
4481 | return f2fs_ioc_fitrim(filp, arg); | |
4482 | case FS_IOC_SET_ENCRYPTION_POLICY: | |
4483 | return f2fs_ioc_set_encryption_policy(filp, arg); | |
4484 | case FS_IOC_GET_ENCRYPTION_POLICY: | |
4485 | return f2fs_ioc_get_encryption_policy(filp, arg); | |
4486 | case FS_IOC_GET_ENCRYPTION_PWSALT: | |
4487 | return f2fs_ioc_get_encryption_pwsalt(filp, arg); | |
4488 | case FS_IOC_GET_ENCRYPTION_POLICY_EX: | |
4489 | return f2fs_ioc_get_encryption_policy_ex(filp, arg); | |
4490 | case FS_IOC_ADD_ENCRYPTION_KEY: | |
4491 | return f2fs_ioc_add_encryption_key(filp, arg); | |
4492 | case FS_IOC_REMOVE_ENCRYPTION_KEY: | |
4493 | return f2fs_ioc_remove_encryption_key(filp, arg); | |
4494 | case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: | |
4495 | return f2fs_ioc_remove_encryption_key_all_users(filp, arg); | |
4496 | case FS_IOC_GET_ENCRYPTION_KEY_STATUS: | |
4497 | return f2fs_ioc_get_encryption_key_status(filp, arg); | |
4498 | case FS_IOC_GET_ENCRYPTION_NONCE: | |
4499 | return f2fs_ioc_get_encryption_nonce(filp, arg); | |
4500 | case F2FS_IOC_GARBAGE_COLLECT: | |
4501 | return f2fs_ioc_gc(filp, arg); | |
4502 | case F2FS_IOC_GARBAGE_COLLECT_RANGE: | |
4503 | return f2fs_ioc_gc_range(filp, arg); | |
4504 | case F2FS_IOC_WRITE_CHECKPOINT: | |
4505 | return f2fs_ioc_write_checkpoint(filp); | |
4506 | case F2FS_IOC_DEFRAGMENT: | |
4507 | return f2fs_ioc_defragment(filp, arg); | |
4508 | case F2FS_IOC_MOVE_RANGE: | |
4509 | return f2fs_ioc_move_range(filp, arg); | |
4510 | case F2FS_IOC_FLUSH_DEVICE: | |
4511 | return f2fs_ioc_flush_device(filp, arg); | |
4512 | case F2FS_IOC_GET_FEATURES: | |
4513 | return f2fs_ioc_get_features(filp, arg); | |
4514 | case F2FS_IOC_GET_PIN_FILE: | |
4515 | return f2fs_ioc_get_pin_file(filp, arg); | |
4516 | case F2FS_IOC_SET_PIN_FILE: | |
4517 | return f2fs_ioc_set_pin_file(filp, arg); | |
4518 | case F2FS_IOC_PRECACHE_EXTENTS: | |
4519 | return f2fs_ioc_precache_extents(filp); | |
4520 | case F2FS_IOC_RESIZE_FS: | |
4521 | return f2fs_ioc_resize_fs(filp, arg); | |
4522 | case FS_IOC_ENABLE_VERITY: | |
4523 | return f2fs_ioc_enable_verity(filp, arg); | |
4524 | case FS_IOC_MEASURE_VERITY: | |
4525 | return f2fs_ioc_measure_verity(filp, arg); | |
4526 | case FS_IOC_READ_VERITY_METADATA: | |
4527 | return f2fs_ioc_read_verity_metadata(filp, arg); | |
4528 | case FS_IOC_GETFSLABEL: | |
4529 | return f2fs_ioc_getfslabel(filp, arg); | |
4530 | case FS_IOC_SETFSLABEL: | |
4531 | return f2fs_ioc_setfslabel(filp, arg); | |
4532 | case F2FS_IOC_GET_COMPRESS_BLOCKS: | |
4533 | return f2fs_ioc_get_compress_blocks(filp, arg); | |
4534 | case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: | |
4535 | return f2fs_release_compress_blocks(filp, arg); | |
4536 | case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: | |
4537 | return f2fs_reserve_compress_blocks(filp, arg); | |
4538 | case F2FS_IOC_SEC_TRIM_FILE: | |
4539 | return f2fs_sec_trim_file(filp, arg); | |
4540 | case F2FS_IOC_GET_COMPRESS_OPTION: | |
4541 | return f2fs_ioc_get_compress_option(filp, arg); | |
4542 | case F2FS_IOC_SET_COMPRESS_OPTION: | |
4543 | return f2fs_ioc_set_compress_option(filp, arg); | |
4544 | case F2FS_IOC_DECOMPRESS_FILE: | |
4545 | return f2fs_ioc_decompress_file(filp); | |
4546 | case F2FS_IOC_COMPRESS_FILE: | |
4547 | return f2fs_ioc_compress_file(filp); | |
4548 | case F2FS_IOC_GET_DEV_ALIAS_FILE: | |
4549 | return f2fs_ioc_get_dev_alias_file(filp, arg); | |
4550 | default: | |
4551 | return -ENOTTY; | |
4552 | } | |
4553 | } | |
4554 | ||
4555 | long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
4556 | { | |
4557 | if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) | |
4558 | return -EIO; | |
4559 | if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) | |
4560 | return -ENOSPC; | |
4561 | ||
4562 | return __f2fs_ioctl(filp, cmd, arg); | |
4563 | } | |
4564 | ||
4565 | /* | |
4566 | * Return %true if the given read or write request should use direct I/O, or | |
4567 | * %false if it should use buffered I/O. | |
4568 | */ | |
4569 | static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, | |
4570 | struct iov_iter *iter) | |
4571 | { | |
4572 | unsigned int align; | |
4573 | ||
4574 | if (!(iocb->ki_flags & IOCB_DIRECT)) | |
4575 | return false; | |
4576 | ||
4577 | if (f2fs_force_buffered_io(inode, iov_iter_rw(iter))) | |
4578 | return false; | |
4579 | ||
4580 | /* | |
4581 | * Direct I/O not aligned to the disk's logical_block_size will be | |
4582 | * attempted, but will fail with -EINVAL. | |
4583 | * | |
4584 | * f2fs additionally requires that direct I/O be aligned to the | |
4585 | * filesystem block size, which is often a stricter requirement. | |
4586 | * However, f2fs traditionally falls back to buffered I/O on requests | |
4587 | * that are logical_block_size-aligned but not fs-block aligned. | |
4588 | * | |
4589 | * The below logic implements this behavior. | |
4590 | */ | |
4591 | align = iocb->ki_pos | iov_iter_alignment(iter); | |
4592 | if (!IS_ALIGNED(align, i_blocksize(inode)) && | |
4593 | IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) | |
4594 | return false; | |
4595 | ||
4596 | return true; | |
4597 | } | |
4598 | ||
4599 | static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, | |
4600 | unsigned int flags) | |
4601 | { | |
4602 | struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); | |
4603 | ||
4604 | dec_page_count(sbi, F2FS_DIO_READ); | |
4605 | if (error) | |
4606 | return error; | |
4607 | f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size); | |
4608 | return 0; | |
4609 | } | |
4610 | ||
4611 | static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = { | |
4612 | .end_io = f2fs_dio_read_end_io, | |
4613 | }; | |
4614 | ||
4615 | static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) | |
4616 | { | |
4617 | struct file *file = iocb->ki_filp; | |
4618 | struct inode *inode = file_inode(file); | |
4619 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
4620 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
4621 | const loff_t pos = iocb->ki_pos; | |
4622 | const size_t count = iov_iter_count(to); | |
4623 | struct iomap_dio *dio; | |
4624 | ssize_t ret; | |
4625 | ||
4626 | if (count == 0) | |
4627 | return 0; /* skip atime update */ | |
4628 | ||
4629 | trace_f2fs_direct_IO_enter(inode, iocb, count, READ); | |
4630 | ||
4631 | if (iocb->ki_flags & IOCB_NOWAIT) { | |
4632 | if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { | |
4633 | ret = -EAGAIN; | |
4634 | goto out; | |
4635 | } | |
4636 | } else { | |
4637 | f2fs_down_read(&fi->i_gc_rwsem[READ]); | |
4638 | } | |
4639 | ||
4640 | /* dio is not compatible w/ atomic file */ | |
4641 | if (f2fs_is_atomic_file(inode)) { | |
4642 | f2fs_up_read(&fi->i_gc_rwsem[READ]); | |
4643 | ret = -EOPNOTSUPP; | |
4644 | goto out; | |
4645 | } | |
4646 | ||
4647 | /* | |
4648 | * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of | |
4649 | * the higher-level function iomap_dio_rw() in order to ensure that the | |
4650 | * F2FS_DIO_READ counter will be decremented correctly in all cases. | |
4651 | */ | |
4652 | inc_page_count(sbi, F2FS_DIO_READ); | |
4653 | dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, | |
4654 | &f2fs_iomap_dio_read_ops, 0, NULL, 0); | |
4655 | if (IS_ERR_OR_NULL(dio)) { | |
4656 | ret = PTR_ERR_OR_ZERO(dio); | |
4657 | if (ret != -EIOCBQUEUED) | |
4658 | dec_page_count(sbi, F2FS_DIO_READ); | |
4659 | } else { | |
4660 | ret = iomap_dio_complete(dio); | |
4661 | } | |
4662 | ||
4663 | f2fs_up_read(&fi->i_gc_rwsem[READ]); | |
4664 | ||
4665 | file_accessed(file); | |
4666 | out: | |
4667 | trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); | |
4668 | return ret; | |
4669 | } | |
4670 | ||
4671 | static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count, | |
4672 | int rw) | |
4673 | { | |
4674 | struct inode *inode = file_inode(file); | |
4675 | char *buf, *path; | |
4676 | ||
4677 | buf = f2fs_getname(F2FS_I_SB(inode)); | |
4678 | if (!buf) | |
4679 | return; | |
4680 | path = dentry_path_raw(file_dentry(file), buf, PATH_MAX); | |
4681 | if (IS_ERR(path)) | |
4682 | goto free_buf; | |
4683 | if (rw == WRITE) | |
4684 | trace_f2fs_datawrite_start(inode, pos, count, | |
4685 | current->pid, path, current->comm); | |
4686 | else | |
4687 | trace_f2fs_dataread_start(inode, pos, count, | |
4688 | current->pid, path, current->comm); | |
4689 | free_buf: | |
4690 | f2fs_putname(buf); | |
4691 | } | |
4692 | ||
4693 | static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) | |
4694 | { | |
4695 | struct inode *inode = file_inode(iocb->ki_filp); | |
4696 | const loff_t pos = iocb->ki_pos; | |
4697 | ssize_t ret; | |
4698 | ||
4699 | if (!f2fs_is_compress_backend_ready(inode)) | |
4700 | return -EOPNOTSUPP; | |
4701 | ||
4702 | if (trace_f2fs_dataread_start_enabled()) | |
4703 | f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, | |
4704 | iov_iter_count(to), READ); | |
4705 | ||
4706 | /* In LFS mode, if there is inflight dio, wait for its completion */ | |
4707 | if (f2fs_lfs_mode(F2FS_I_SB(inode)) && | |
4708 | get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) | |
4709 | inode_dio_wait(inode); | |
4710 | ||
4711 | if (f2fs_should_use_dio(inode, iocb, to)) { | |
4712 | ret = f2fs_dio_read_iter(iocb, to); | |
4713 | } else { | |
4714 | ret = filemap_read(iocb, to, 0); | |
4715 | if (ret > 0) | |
4716 | f2fs_update_iostat(F2FS_I_SB(inode), inode, | |
4717 | APP_BUFFERED_READ_IO, ret); | |
4718 | } | |
4719 | if (trace_f2fs_dataread_end_enabled()) | |
4720 | trace_f2fs_dataread_end(inode, pos, ret); | |
4721 | return ret; | |
4722 | } | |
4723 | ||
4724 | static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos, | |
4725 | struct pipe_inode_info *pipe, | |
4726 | size_t len, unsigned int flags) | |
4727 | { | |
4728 | struct inode *inode = file_inode(in); | |
4729 | const loff_t pos = *ppos; | |
4730 | ssize_t ret; | |
4731 | ||
4732 | if (!f2fs_is_compress_backend_ready(inode)) | |
4733 | return -EOPNOTSUPP; | |
4734 | ||
4735 | if (trace_f2fs_dataread_start_enabled()) | |
4736 | f2fs_trace_rw_file_path(in, pos, len, READ); | |
4737 | ||
4738 | ret = filemap_splice_read(in, ppos, pipe, len, flags); | |
4739 | if (ret > 0) | |
4740 | f2fs_update_iostat(F2FS_I_SB(inode), inode, | |
4741 | APP_BUFFERED_READ_IO, ret); | |
4742 | ||
4743 | if (trace_f2fs_dataread_end_enabled()) | |
4744 | trace_f2fs_dataread_end(inode, pos, ret); | |
4745 | return ret; | |
4746 | } | |
4747 | ||
4748 | static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) | |
4749 | { | |
4750 | struct file *file = iocb->ki_filp; | |
4751 | struct inode *inode = file_inode(file); | |
4752 | ssize_t count; | |
4753 | int err; | |
4754 | ||
4755 | if (IS_IMMUTABLE(inode)) | |
4756 | return -EPERM; | |
4757 | ||
4758 | if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) | |
4759 | return -EPERM; | |
4760 | ||
4761 | count = generic_write_checks(iocb, from); | |
4762 | if (count <= 0) | |
4763 | return count; | |
4764 | ||
4765 | err = file_modified(file); | |
4766 | if (err) | |
4767 | return err; | |
4768 | return count; | |
4769 | } | |
4770 | ||
4771 | /* | |
4772 | * Preallocate blocks for a write request, if it is possible and helpful to do | |
4773 | * so. Returns a positive number if blocks may have been preallocated, 0 if no | |
4774 | * blocks were preallocated, or a negative errno value if something went | |
4775 | * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the | |
4776 | * requested blocks (not just some of them) have been allocated. | |
4777 | */ | |
4778 | static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, | |
4779 | bool dio) | |
4780 | { | |
4781 | struct inode *inode = file_inode(iocb->ki_filp); | |
4782 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
4783 | const loff_t pos = iocb->ki_pos; | |
4784 | const size_t count = iov_iter_count(iter); | |
4785 | struct f2fs_map_blocks map = {}; | |
4786 | int flag; | |
4787 | int ret; | |
4788 | ||
4789 | /* If it will be an out-of-place direct write, don't bother. */ | |
4790 | if (dio && f2fs_lfs_mode(sbi)) | |
4791 | return 0; | |
4792 | /* | |
4793 | * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into | |
4794 | * buffered IO, if DIO meets any holes. | |
4795 | */ | |
4796 | if (dio && i_size_read(inode) && | |
4797 | (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode)))) | |
4798 | return 0; | |
4799 | ||
4800 | /* No-wait I/O can't allocate blocks. */ | |
4801 | if (iocb->ki_flags & IOCB_NOWAIT) | |
4802 | return 0; | |
4803 | ||
4804 | /* If it will be a short write, don't bother. */ | |
4805 | if (fault_in_iov_iter_readable(iter, count)) | |
4806 | return 0; | |
4807 | ||
4808 | if (f2fs_has_inline_data(inode)) { | |
4809 | /* If the data will fit inline, don't bother. */ | |
4810 | if (pos + count <= MAX_INLINE_DATA(inode)) | |
4811 | return 0; | |
4812 | ret = f2fs_convert_inline_inode(inode); | |
4813 | if (ret) | |
4814 | return ret; | |
4815 | } | |
4816 | ||
4817 | /* Do not preallocate blocks that will be written partially in 4KB. */ | |
4818 | map.m_lblk = F2FS_BLK_ALIGN(pos); | |
4819 | map.m_len = F2FS_BYTES_TO_BLK(pos + count); | |
4820 | if (map.m_len > map.m_lblk) | |
4821 | map.m_len -= map.m_lblk; | |
4822 | else | |
4823 | return 0; | |
4824 | ||
4825 | if (!IS_DEVICE_ALIASING(inode)) | |
4826 | map.m_may_create = true; | |
4827 | if (dio) { | |
4828 | map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, | |
4829 | inode->i_write_hint); | |
4830 | flag = F2FS_GET_BLOCK_PRE_DIO; | |
4831 | } else { | |
4832 | map.m_seg_type = NO_CHECK_TYPE; | |
4833 | flag = F2FS_GET_BLOCK_PRE_AIO; | |
4834 | } | |
4835 | ||
4836 | ret = f2fs_map_blocks(inode, &map, flag); | |
4837 | /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */ | |
4838 | if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0)) | |
4839 | return ret; | |
4840 | if (ret == 0) | |
4841 | set_inode_flag(inode, FI_PREALLOCATED_ALL); | |
4842 | return map.m_len; | |
4843 | } | |
4844 | ||
4845 | static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, | |
4846 | struct iov_iter *from) | |
4847 | { | |
4848 | struct file *file = iocb->ki_filp; | |
4849 | struct inode *inode = file_inode(file); | |
4850 | ssize_t ret; | |
4851 | ||
4852 | if (iocb->ki_flags & IOCB_NOWAIT) | |
4853 | return -EOPNOTSUPP; | |
4854 | ||
4855 | ret = generic_perform_write(iocb, from); | |
4856 | ||
4857 | if (ret > 0) { | |
4858 | f2fs_update_iostat(F2FS_I_SB(inode), inode, | |
4859 | APP_BUFFERED_IO, ret); | |
4860 | } | |
4861 | return ret; | |
4862 | } | |
4863 | ||
4864 | static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, | |
4865 | unsigned int flags) | |
4866 | { | |
4867 | struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); | |
4868 | ||
4869 | dec_page_count(sbi, F2FS_DIO_WRITE); | |
4870 | if (error) | |
4871 | return error; | |
4872 | f2fs_update_time(sbi, REQ_TIME); | |
4873 | f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); | |
4874 | return 0; | |
4875 | } | |
4876 | ||
4877 | static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, | |
4878 | struct bio *bio, loff_t file_offset) | |
4879 | { | |
4880 | struct inode *inode = iter->inode; | |
4881 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
4882 | enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); | |
4883 | enum temp_type temp = f2fs_get_segment_temp(sbi, type); | |
4884 | ||
4885 | bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); | |
4886 | submit_bio(bio); | |
4887 | } | |
4888 | ||
4889 | static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { | |
4890 | .end_io = f2fs_dio_write_end_io, | |
4891 | .submit_io = f2fs_dio_write_submit_io, | |
4892 | }; | |
4893 | ||
4894 | static void f2fs_flush_buffered_write(struct address_space *mapping, | |
4895 | loff_t start_pos, loff_t end_pos) | |
4896 | { | |
4897 | int ret; | |
4898 | ||
4899 | ret = filemap_write_and_wait_range(mapping, start_pos, end_pos); | |
4900 | if (ret < 0) | |
4901 | return; | |
4902 | invalidate_mapping_pages(mapping, | |
4903 | start_pos >> PAGE_SHIFT, | |
4904 | end_pos >> PAGE_SHIFT); | |
4905 | } | |
4906 | ||
4907 | static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, | |
4908 | bool *may_need_sync) | |
4909 | { | |
4910 | struct file *file = iocb->ki_filp; | |
4911 | struct inode *inode = file_inode(file); | |
4912 | struct f2fs_inode_info *fi = F2FS_I(inode); | |
4913 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | |
4914 | const bool do_opu = f2fs_lfs_mode(sbi); | |
4915 | const loff_t pos = iocb->ki_pos; | |
4916 | const ssize_t count = iov_iter_count(from); | |
4917 | unsigned int dio_flags; | |
4918 | struct iomap_dio *dio; | |
4919 | ssize_t ret; | |
4920 | ||
4921 | trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); | |
4922 | ||
4923 | if (iocb->ki_flags & IOCB_NOWAIT) { | |
4924 | /* f2fs_convert_inline_inode() and block allocation can block */ | |
4925 | if (f2fs_has_inline_data(inode) || | |
4926 | !f2fs_overwrite_io(inode, pos, count)) { | |
4927 | ret = -EAGAIN; | |
4928 | goto out; | |
4929 | } | |
4930 | ||
4931 | if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { | |
4932 | ret = -EAGAIN; | |
4933 | goto out; | |
4934 | } | |
4935 | if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { | |
4936 | f2fs_up_read(&fi->i_gc_rwsem[WRITE]); | |
4937 | ret = -EAGAIN; | |
4938 | goto out; | |
4939 | } | |
4940 | } else { | |
4941 | ret = f2fs_convert_inline_inode(inode); | |
4942 | if (ret) | |
4943 | goto out; | |
4944 | ||
4945 | f2fs_down_read(&fi->i_gc_rwsem[WRITE]); | |
4946 | if (do_opu) | |
4947 | f2fs_down_read(&fi->i_gc_rwsem[READ]); | |
4948 | } | |
4949 | ||
4950 | /* | |
4951 | * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of | |
4952 | * the higher-level function iomap_dio_rw() in order to ensure that the | |
4953 | * F2FS_DIO_WRITE counter will be decremented correctly in all cases. | |
4954 | */ | |
4955 | inc_page_count(sbi, F2FS_DIO_WRITE); | |
4956 | dio_flags = 0; | |
4957 | if (pos + count > inode->i_size) | |
4958 | dio_flags |= IOMAP_DIO_FORCE_WAIT; | |
4959 | dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, | |
4960 | &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); | |
4961 | if (IS_ERR_OR_NULL(dio)) { | |
4962 | ret = PTR_ERR_OR_ZERO(dio); | |
4963 | if (ret == -ENOTBLK) | |
4964 | ret = 0; | |
4965 | if (ret != -EIOCBQUEUED) | |
4966 | dec_page_count(sbi, F2FS_DIO_WRITE); | |
4967 | } else { | |
4968 | ret = iomap_dio_complete(dio); | |
4969 | } | |
4970 | ||
4971 | if (do_opu) | |
4972 | f2fs_up_read(&fi->i_gc_rwsem[READ]); | |
4973 | f2fs_up_read(&fi->i_gc_rwsem[WRITE]); | |
4974 | ||
4975 | if (ret < 0) | |
4976 | goto out; | |
4977 | if (pos + ret > inode->i_size) | |
4978 | f2fs_i_size_write(inode, pos + ret); | |
4979 | if (!do_opu) | |
4980 | set_inode_flag(inode, FI_UPDATE_WRITE); | |
4981 | ||
4982 | if (iov_iter_count(from)) { | |
4983 | ssize_t ret2; | |
4984 | loff_t bufio_start_pos = iocb->ki_pos; | |
4985 | ||
4986 | /* | |
4987 | * The direct write was partial, so we need to fall back to a | |
4988 | * buffered write for the remainder. | |
4989 | */ | |
4990 | ||
4991 | ret2 = f2fs_buffered_write_iter(iocb, from); | |
4992 | if (iov_iter_count(from)) | |
4993 | f2fs_write_failed(inode, iocb->ki_pos); | |
4994 | if (ret2 < 0) | |
4995 | goto out; | |
4996 | ||
4997 | /* | |
4998 | * Ensure that the pagecache pages are written to disk and | |
4999 | * invalidated to preserve the expected O_DIRECT semantics. | |
5000 | */ | |
5001 | if (ret2 > 0) { | |
5002 | loff_t bufio_end_pos = bufio_start_pos + ret2 - 1; | |
5003 | ||
5004 | ret += ret2; | |
5005 | ||
5006 | f2fs_flush_buffered_write(file->f_mapping, | |
5007 | bufio_start_pos, | |
5008 | bufio_end_pos); | |
5009 | } | |
5010 | } else { | |
5011 | /* iomap_dio_rw() already handled the generic_write_sync(). */ | |
5012 | *may_need_sync = false; | |
5013 | } | |
5014 | out: | |
5015 | trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret); | |
5016 | return ret; | |
5017 | } | |
5018 | ||
5019 | static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |
5020 | { | |
5021 | struct inode *inode = file_inode(iocb->ki_filp); | |
5022 | const loff_t orig_pos = iocb->ki_pos; | |
5023 | const size_t orig_count = iov_iter_count(from); | |
5024 | loff_t target_size; | |
5025 | bool dio; | |
5026 | bool may_need_sync = true; | |
5027 | int preallocated; | |
5028 | const loff_t pos = iocb->ki_pos; | |
5029 | const ssize_t count = iov_iter_count(from); | |
5030 | ssize_t ret; | |
5031 | ||
5032 | if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { | |
5033 | ret = -EIO; | |
5034 | goto out; | |
5035 | } | |
5036 | ||
5037 | if (!f2fs_is_compress_backend_ready(inode)) { | |
5038 | ret = -EOPNOTSUPP; | |
5039 | goto out; | |
5040 | } | |
5041 | ||
5042 | if (iocb->ki_flags & IOCB_NOWAIT) { | |
5043 | if (!inode_trylock(inode)) { | |
5044 | ret = -EAGAIN; | |
5045 | goto out; | |
5046 | } | |
5047 | } else { | |
5048 | inode_lock(inode); | |
5049 | } | |
5050 | ||
5051 | if (f2fs_is_pinned_file(inode) && | |
5052 | !f2fs_overwrite_io(inode, pos, count)) { | |
5053 | ret = -EIO; | |
5054 | goto out_unlock; | |
5055 | } | |
5056 | ||
5057 | ret = f2fs_write_checks(iocb, from); | |
5058 | if (ret <= 0) | |
5059 | goto out_unlock; | |
5060 | ||
5061 | /* Determine whether we will do a direct write or a buffered write. */ | |
5062 | dio = f2fs_should_use_dio(inode, iocb, from); | |
5063 | ||
5064 | /* dio is not compatible w/ atomic write */ | |
5065 | if (dio && f2fs_is_atomic_file(inode)) { | |
5066 | ret = -EOPNOTSUPP; | |
5067 | goto out_unlock; | |
5068 | } | |
5069 | ||
5070 | /* Possibly preallocate the blocks for the write. */ | |
5071 | target_size = iocb->ki_pos + iov_iter_count(from); | |
5072 | preallocated = f2fs_preallocate_blocks(iocb, from, dio); | |
5073 | if (preallocated < 0) { | |
5074 | ret = preallocated; | |
5075 | } else { | |
5076 | if (trace_f2fs_datawrite_start_enabled()) | |
5077 | f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, | |
5078 | orig_count, WRITE); | |
5079 | ||
5080 | /* Do the actual write. */ | |
5081 | ret = dio ? | |
5082 | f2fs_dio_write_iter(iocb, from, &may_need_sync) : | |
5083 | f2fs_buffered_write_iter(iocb, from); | |
5084 | ||
5085 | if (trace_f2fs_datawrite_end_enabled()) | |
5086 | trace_f2fs_datawrite_end(inode, orig_pos, ret); | |
5087 | } | |
5088 | ||
5089 | /* Don't leave any preallocated blocks around past i_size. */ | |
5090 | if (preallocated && i_size_read(inode) < target_size) { | |
5091 | f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
5092 | filemap_invalidate_lock(inode->i_mapping); | |
5093 | if (!f2fs_truncate(inode)) | |
5094 | file_dont_truncate(inode); | |
5095 | filemap_invalidate_unlock(inode->i_mapping); | |
5096 | f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); | |
5097 | } else { | |
5098 | file_dont_truncate(inode); | |
5099 | } | |
5100 | ||
5101 | clear_inode_flag(inode, FI_PREALLOCATED_ALL); | |
5102 | out_unlock: | |
5103 | inode_unlock(inode); | |
5104 | out: | |
5105 | trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); | |
5106 | ||
5107 | if (ret > 0 && may_need_sync) | |
5108 | ret = generic_write_sync(iocb, ret); | |
5109 | ||
5110 | /* If buffered IO was forced, flush and drop the data from | |
5111 | * the page cache to preserve O_DIRECT semantics | |
5112 | */ | |
5113 | if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT)) | |
5114 | f2fs_flush_buffered_write(iocb->ki_filp->f_mapping, | |
5115 | orig_pos, | |
5116 | orig_pos + ret - 1); | |
5117 | ||
5118 | return ret; | |
5119 | } | |
5120 | ||
5121 | static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, | |
5122 | int advice) | |
5123 | { | |
5124 | struct address_space *mapping; | |
5125 | struct backing_dev_info *bdi; | |
5126 | struct inode *inode = file_inode(filp); | |
5127 | int err; | |
5128 | ||
5129 | if (advice == POSIX_FADV_SEQUENTIAL) { | |
5130 | if (S_ISFIFO(inode->i_mode)) | |
5131 | return -ESPIPE; | |
5132 | ||
5133 | mapping = filp->f_mapping; | |
5134 | if (!mapping || len < 0) | |
5135 | return -EINVAL; | |
5136 | ||
5137 | bdi = inode_to_bdi(mapping->host); | |
5138 | filp->f_ra.ra_pages = bdi->ra_pages * | |
5139 | F2FS_I_SB(inode)->seq_file_ra_mul; | |
5140 | spin_lock(&filp->f_lock); | |
5141 | filp->f_mode &= ~FMODE_RANDOM; | |
5142 | spin_unlock(&filp->f_lock); | |
5143 | return 0; | |
5144 | } else if (advice == POSIX_FADV_WILLNEED && offset == 0) { | |
5145 | /* Load extent cache at the first readahead. */ | |
5146 | f2fs_precache_extents(inode); | |
5147 | } | |
5148 | ||
5149 | err = generic_fadvise(filp, offset, len, advice); | |
5150 | if (!err && advice == POSIX_FADV_DONTNEED && | |
5151 | test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && | |
5152 | f2fs_compressed_file(inode)) | |
5153 | f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); | |
5154 | ||
5155 | return err; | |
5156 | } | |
5157 | ||
5158 | #ifdef CONFIG_COMPAT | |
5159 | struct compat_f2fs_gc_range { | |
5160 | u32 sync; | |
5161 | compat_u64 start; | |
5162 | compat_u64 len; | |
5163 | }; | |
5164 | #define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\ | |
5165 | struct compat_f2fs_gc_range) | |
5166 | ||
5167 | static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg) | |
5168 | { | |
5169 | struct compat_f2fs_gc_range __user *urange; | |
5170 | struct f2fs_gc_range range; | |
5171 | int err; | |
5172 | ||
5173 | urange = compat_ptr(arg); | |
5174 | err = get_user(range.sync, &urange->sync); | |
5175 | err |= get_user(range.start, &urange->start); | |
5176 | err |= get_user(range.len, &urange->len); | |
5177 | if (err) | |
5178 | return -EFAULT; | |
5179 | ||
5180 | return __f2fs_ioc_gc_range(file, &range); | |
5181 | } | |
5182 | ||
5183 | struct compat_f2fs_move_range { | |
5184 | u32 dst_fd; | |
5185 | compat_u64 pos_in; | |
5186 | compat_u64 pos_out; | |
5187 | compat_u64 len; | |
5188 | }; | |
5189 | #define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ | |
5190 | struct compat_f2fs_move_range) | |
5191 | ||
5192 | static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg) | |
5193 | { | |
5194 | struct compat_f2fs_move_range __user *urange; | |
5195 | struct f2fs_move_range range; | |
5196 | int err; | |
5197 | ||
5198 | urange = compat_ptr(arg); | |
5199 | err = get_user(range.dst_fd, &urange->dst_fd); | |
5200 | err |= get_user(range.pos_in, &urange->pos_in); | |
5201 | err |= get_user(range.pos_out, &urange->pos_out); | |
5202 | err |= get_user(range.len, &urange->len); | |
5203 | if (err) | |
5204 | return -EFAULT; | |
5205 | ||
5206 | return __f2fs_ioc_move_range(file, &range); | |
5207 | } | |
5208 | ||
5209 | long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |
5210 | { | |
5211 | if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) | |
5212 | return -EIO; | |
5213 | if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file)))) | |
5214 | return -ENOSPC; | |
5215 | ||
5216 | switch (cmd) { | |
5217 | case FS_IOC32_GETVERSION: | |
5218 | cmd = FS_IOC_GETVERSION; | |
5219 | break; | |
5220 | case F2FS_IOC32_GARBAGE_COLLECT_RANGE: | |
5221 | return f2fs_compat_ioc_gc_range(file, arg); | |
5222 | case F2FS_IOC32_MOVE_RANGE: | |
5223 | return f2fs_compat_ioc_move_range(file, arg); | |
5224 | case F2FS_IOC_START_ATOMIC_WRITE: | |
5225 | case F2FS_IOC_START_ATOMIC_REPLACE: | |
5226 | case F2FS_IOC_COMMIT_ATOMIC_WRITE: | |
5227 | case F2FS_IOC_START_VOLATILE_WRITE: | |
5228 | case F2FS_IOC_RELEASE_VOLATILE_WRITE: | |
5229 | case F2FS_IOC_ABORT_ATOMIC_WRITE: | |
5230 | case F2FS_IOC_SHUTDOWN: | |
5231 | case FITRIM: | |
5232 | case FS_IOC_SET_ENCRYPTION_POLICY: | |
5233 | case FS_IOC_GET_ENCRYPTION_PWSALT: | |
5234 | case FS_IOC_GET_ENCRYPTION_POLICY: | |
5235 | case FS_IOC_GET_ENCRYPTION_POLICY_EX: | |
5236 | case FS_IOC_ADD_ENCRYPTION_KEY: | |
5237 | case FS_IOC_REMOVE_ENCRYPTION_KEY: | |
5238 | case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: | |
5239 | case FS_IOC_GET_ENCRYPTION_KEY_STATUS: | |
5240 | case FS_IOC_GET_ENCRYPTION_NONCE: | |
5241 | case F2FS_IOC_GARBAGE_COLLECT: | |
5242 | case F2FS_IOC_WRITE_CHECKPOINT: | |
5243 | case F2FS_IOC_DEFRAGMENT: | |
5244 | case F2FS_IOC_FLUSH_DEVICE: | |
5245 | case F2FS_IOC_GET_FEATURES: | |
5246 | case F2FS_IOC_GET_PIN_FILE: | |
5247 | case F2FS_IOC_SET_PIN_FILE: | |
5248 | case F2FS_IOC_PRECACHE_EXTENTS: | |
5249 | case F2FS_IOC_RESIZE_FS: | |
5250 | case FS_IOC_ENABLE_VERITY: | |
5251 | case FS_IOC_MEASURE_VERITY: | |
5252 | case FS_IOC_READ_VERITY_METADATA: | |
5253 | case FS_IOC_GETFSLABEL: | |
5254 | case FS_IOC_SETFSLABEL: | |
5255 | case F2FS_IOC_GET_COMPRESS_BLOCKS: | |
5256 | case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: | |
5257 | case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: | |
5258 | case F2FS_IOC_SEC_TRIM_FILE: | |
5259 | case F2FS_IOC_GET_COMPRESS_OPTION: | |
5260 | case F2FS_IOC_SET_COMPRESS_OPTION: | |
5261 | case F2FS_IOC_DECOMPRESS_FILE: | |
5262 | case F2FS_IOC_COMPRESS_FILE: | |
5263 | case F2FS_IOC_GET_DEV_ALIAS_FILE: | |
5264 | break; | |
5265 | default: | |
5266 | return -ENOIOCTLCMD; | |
5267 | } | |
5268 | return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); | |
5269 | } | |
5270 | #endif | |
5271 | ||
5272 | const struct file_operations f2fs_file_operations = { | |
5273 | .llseek = f2fs_llseek, | |
5274 | .read_iter = f2fs_file_read_iter, | |
5275 | .write_iter = f2fs_file_write_iter, | |
5276 | .iopoll = iocb_bio_iopoll, | |
5277 | .open = f2fs_file_open, | |
5278 | .release = f2fs_release_file, | |
5279 | .mmap = f2fs_file_mmap, | |
5280 | .flush = f2fs_file_flush, | |
5281 | .fsync = f2fs_sync_file, | |
5282 | .fallocate = f2fs_fallocate, | |
5283 | .unlocked_ioctl = f2fs_ioctl, | |
5284 | #ifdef CONFIG_COMPAT | |
5285 | .compat_ioctl = f2fs_compat_ioctl, | |
5286 | #endif | |
5287 | .splice_read = f2fs_file_splice_read, | |
5288 | .splice_write = iter_file_splice_write, | |
5289 | .fadvise = f2fs_file_fadvise, | |
5290 | .fop_flags = FOP_BUFFER_RASYNC, | |
5291 | }; |