]> Git Repo - J-linux.git/blob - fs/f2fs/segment.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / fs / f2fs / segment.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/segment.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/sched/mm.h>
13 #include <linux/prefetch.h>
14 #include <linux/kthread.h>
15 #include <linux/swap.h>
16 #include <linux/timer.h>
17 #include <linux/freezer.h>
18 #include <linux/sched/signal.h>
19 #include <linux/random.h>
20
21 #include "f2fs.h"
22 #include "segment.h"
23 #include "node.h"
24 #include "gc.h"
25 #include "iostat.h"
26 #include <trace/events/f2fs.h>
27
28 #define __reverse_ffz(x) __reverse_ffs(~(x))
29
30 static struct kmem_cache *discard_entry_slab;
31 static struct kmem_cache *discard_cmd_slab;
32 static struct kmem_cache *sit_entry_set_slab;
33 static struct kmem_cache *revoke_entry_slab;
34
35 static unsigned long __reverse_ulong(unsigned char *str)
36 {
37         unsigned long tmp = 0;
38         int shift = 24, idx = 0;
39
40 #if BITS_PER_LONG == 64
41         shift = 56;
42 #endif
43         while (shift >= 0) {
44                 tmp |= (unsigned long)str[idx++] << shift;
45                 shift -= BITS_PER_BYTE;
46         }
47         return tmp;
48 }
49
50 /*
51  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
52  * MSB and LSB are reversed in a byte by f2fs_set_bit.
53  */
54 static inline unsigned long __reverse_ffs(unsigned long word)
55 {
56         int num = 0;
57
58 #if BITS_PER_LONG == 64
59         if ((word & 0xffffffff00000000UL) == 0)
60                 num += 32;
61         else
62                 word >>= 32;
63 #endif
64         if ((word & 0xffff0000) == 0)
65                 num += 16;
66         else
67                 word >>= 16;
68
69         if ((word & 0xff00) == 0)
70                 num += 8;
71         else
72                 word >>= 8;
73
74         if ((word & 0xf0) == 0)
75                 num += 4;
76         else
77                 word >>= 4;
78
79         if ((word & 0xc) == 0)
80                 num += 2;
81         else
82                 word >>= 2;
83
84         if ((word & 0x2) == 0)
85                 num += 1;
86         return num;
87 }
88
89 /*
90  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
91  * f2fs_set_bit makes MSB and LSB reversed in a byte.
92  * @size must be integral times of unsigned long.
93  * Example:
94  *                             MSB <--> LSB
95  *   f2fs_set_bit(0, bitmap) => 1000 0000
96  *   f2fs_set_bit(7, bitmap) => 0000 0001
97  */
98 static unsigned long __find_rev_next_bit(const unsigned long *addr,
99                         unsigned long size, unsigned long offset)
100 {
101         const unsigned long *p = addr + BIT_WORD(offset);
102         unsigned long result = size;
103         unsigned long tmp;
104
105         if (offset >= size)
106                 return size;
107
108         size -= (offset & ~(BITS_PER_LONG - 1));
109         offset %= BITS_PER_LONG;
110
111         while (1) {
112                 if (*p == 0)
113                         goto pass;
114
115                 tmp = __reverse_ulong((unsigned char *)p);
116
117                 tmp &= ~0UL >> offset;
118                 if (size < BITS_PER_LONG)
119                         tmp &= (~0UL << (BITS_PER_LONG - size));
120                 if (tmp)
121                         goto found;
122 pass:
123                 if (size <= BITS_PER_LONG)
124                         break;
125                 size -= BITS_PER_LONG;
126                 offset = 0;
127                 p++;
128         }
129         return result;
130 found:
131         return result - size + __reverse_ffs(tmp);
132 }
133
134 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
135                         unsigned long size, unsigned long offset)
136 {
137         const unsigned long *p = addr + BIT_WORD(offset);
138         unsigned long result = size;
139         unsigned long tmp;
140
141         if (offset >= size)
142                 return size;
143
144         size -= (offset & ~(BITS_PER_LONG - 1));
145         offset %= BITS_PER_LONG;
146
147         while (1) {
148                 if (*p == ~0UL)
149                         goto pass;
150
151                 tmp = __reverse_ulong((unsigned char *)p);
152
153                 if (offset)
154                         tmp |= ~0UL << (BITS_PER_LONG - offset);
155                 if (size < BITS_PER_LONG)
156                         tmp |= ~0UL >> size;
157                 if (tmp != ~0UL)
158                         goto found;
159 pass:
160                 if (size <= BITS_PER_LONG)
161                         break;
162                 size -= BITS_PER_LONG;
163                 offset = 0;
164                 p++;
165         }
166         return result;
167 found:
168         return result - size + __reverse_ffz(tmp);
169 }
170
171 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
172 {
173         int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
174         int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
175         int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
176
177         if (f2fs_lfs_mode(sbi))
178                 return false;
179         if (sbi->gc_mode == GC_URGENT_HIGH)
180                 return true;
181         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
182                 return true;
183
184         return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
185                         SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
186 }
187
188 void f2fs_abort_atomic_write(struct inode *inode, bool clean)
189 {
190         struct f2fs_inode_info *fi = F2FS_I(inode);
191
192         if (!f2fs_is_atomic_file(inode))
193                 return;
194
195         if (clean)
196                 truncate_inode_pages_final(inode->i_mapping);
197
198         release_atomic_write_cnt(inode);
199         clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
200         clear_inode_flag(inode, FI_ATOMIC_REPLACE);
201         clear_inode_flag(inode, FI_ATOMIC_FILE);
202         if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) {
203                 clear_inode_flag(inode, FI_ATOMIC_DIRTIED);
204                 f2fs_mark_inode_dirty_sync(inode, true);
205         }
206         stat_dec_atomic_inode(inode);
207
208         F2FS_I(inode)->atomic_write_task = NULL;
209
210         if (clean) {
211                 f2fs_i_size_write(inode, fi->original_i_size);
212                 fi->original_i_size = 0;
213         }
214         /* avoid stale dirty inode during eviction */
215         sync_inode_metadata(inode, 0);
216 }
217
218 static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
219                         block_t new_addr, block_t *old_addr, bool recover)
220 {
221         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
222         struct dnode_of_data dn;
223         struct node_info ni;
224         int err;
225
226 retry:
227         set_new_dnode(&dn, inode, NULL, NULL, 0);
228         err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
229         if (err) {
230                 if (err == -ENOMEM) {
231                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
232                         goto retry;
233                 }
234                 return err;
235         }
236
237         err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
238         if (err) {
239                 f2fs_put_dnode(&dn);
240                 return err;
241         }
242
243         if (recover) {
244                 /* dn.data_blkaddr is always valid */
245                 if (!__is_valid_data_blkaddr(new_addr)) {
246                         if (new_addr == NULL_ADDR)
247                                 dec_valid_block_count(sbi, inode, 1);
248                         f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
249                         f2fs_update_data_blkaddr(&dn, new_addr);
250                 } else {
251                         f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
252                                 new_addr, ni.version, true, true);
253                 }
254         } else {
255                 blkcnt_t count = 1;
256
257                 err = inc_valid_block_count(sbi, inode, &count, true);
258                 if (err) {
259                         f2fs_put_dnode(&dn);
260                         return err;
261                 }
262
263                 *old_addr = dn.data_blkaddr;
264                 f2fs_truncate_data_blocks_range(&dn, 1);
265                 dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
266
267                 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
268                                         ni.version, true, false);
269         }
270
271         f2fs_put_dnode(&dn);
272
273         trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
274                         index, old_addr ? *old_addr : 0, new_addr, recover);
275         return 0;
276 }
277
278 static void __complete_revoke_list(struct inode *inode, struct list_head *head,
279                                         bool revoke)
280 {
281         struct revoke_entry *cur, *tmp;
282         pgoff_t start_index = 0;
283         bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
284
285         list_for_each_entry_safe(cur, tmp, head, list) {
286                 if (revoke) {
287                         __replace_atomic_write_block(inode, cur->index,
288                                                 cur->old_addr, NULL, true);
289                 } else if (truncate) {
290                         f2fs_truncate_hole(inode, start_index, cur->index);
291                         start_index = cur->index + 1;
292                 }
293
294                 list_del(&cur->list);
295                 kmem_cache_free(revoke_entry_slab, cur);
296         }
297
298         if (!revoke && truncate)
299                 f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
300 }
301
302 static int __f2fs_commit_atomic_write(struct inode *inode)
303 {
304         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
305         struct f2fs_inode_info *fi = F2FS_I(inode);
306         struct inode *cow_inode = fi->cow_inode;
307         struct revoke_entry *new;
308         struct list_head revoke_list;
309         block_t blkaddr;
310         struct dnode_of_data dn;
311         pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
312         pgoff_t off = 0, blen, index;
313         int ret = 0, i;
314
315         INIT_LIST_HEAD(&revoke_list);
316
317         while (len) {
318                 blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
319
320                 set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
321                 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
322                 if (ret && ret != -ENOENT) {
323                         goto out;
324                 } else if (ret == -ENOENT) {
325                         ret = 0;
326                         if (dn.max_level == 0)
327                                 goto out;
328                         goto next;
329                 }
330
331                 blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
332                                 len);
333                 index = off;
334                 for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
335                         blkaddr = f2fs_data_blkaddr(&dn);
336
337                         if (!__is_valid_data_blkaddr(blkaddr)) {
338                                 continue;
339                         } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
340                                         DATA_GENERIC_ENHANCE)) {
341                                 f2fs_put_dnode(&dn);
342                                 ret = -EFSCORRUPTED;
343                                 goto out;
344                         }
345
346                         new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
347                                                         true, NULL);
348
349                         ret = __replace_atomic_write_block(inode, index, blkaddr,
350                                                         &new->old_addr, false);
351                         if (ret) {
352                                 f2fs_put_dnode(&dn);
353                                 kmem_cache_free(revoke_entry_slab, new);
354                                 goto out;
355                         }
356
357                         f2fs_update_data_blkaddr(&dn, NULL_ADDR);
358                         new->index = index;
359                         list_add_tail(&new->list, &revoke_list);
360                 }
361                 f2fs_put_dnode(&dn);
362 next:
363                 off += blen;
364                 len -= blen;
365         }
366
367 out:
368         if (ret) {
369                 sbi->revoked_atomic_block += fi->atomic_write_cnt;
370         } else {
371                 sbi->committed_atomic_block += fi->atomic_write_cnt;
372                 set_inode_flag(inode, FI_ATOMIC_COMMITTED);
373                 if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) {
374                         clear_inode_flag(inode, FI_ATOMIC_DIRTIED);
375                         f2fs_mark_inode_dirty_sync(inode, true);
376                 }
377         }
378
379         __complete_revoke_list(inode, &revoke_list, ret ? true : false);
380
381         return ret;
382 }
383
384 int f2fs_commit_atomic_write(struct inode *inode)
385 {
386         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
387         struct f2fs_inode_info *fi = F2FS_I(inode);
388         int err;
389
390         err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
391         if (err)
392                 return err;
393
394         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
395         f2fs_lock_op(sbi);
396
397         err = __f2fs_commit_atomic_write(inode);
398
399         f2fs_unlock_op(sbi);
400         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
401
402         return err;
403 }
404
405 /*
406  * This function balances dirty node and dentry pages.
407  * In addition, it controls garbage collection.
408  */
409 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
410 {
411         if (f2fs_cp_error(sbi))
412                 return;
413
414         if (time_to_inject(sbi, FAULT_CHECKPOINT))
415                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
416
417         /* balance_fs_bg is able to be pending */
418         if (need && excess_cached_nats(sbi))
419                 f2fs_balance_fs_bg(sbi, false);
420
421         if (!f2fs_is_checkpoint_ready(sbi))
422                 return;
423
424         /*
425          * We should do GC or end up with checkpoint, if there are so many dirty
426          * dir/node pages without enough free segments.
427          */
428         if (has_enough_free_secs(sbi, 0, 0))
429                 return;
430
431         if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
432                                 sbi->gc_thread->f2fs_gc_task) {
433                 DEFINE_WAIT(wait);
434
435                 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
436                                         TASK_UNINTERRUPTIBLE);
437                 wake_up(&sbi->gc_thread->gc_wait_queue_head);
438                 io_schedule();
439                 finish_wait(&sbi->gc_thread->fggc_wq, &wait);
440         } else {
441                 struct f2fs_gc_control gc_control = {
442                         .victim_segno = NULL_SEGNO,
443                         .init_gc_type = BG_GC,
444                         .no_bg_gc = true,
445                         .should_migrate_blocks = false,
446                         .err_gc_skipped = false,
447                         .nr_free_secs = 1 };
448                 f2fs_down_write(&sbi->gc_lock);
449                 stat_inc_gc_call_count(sbi, FOREGROUND);
450                 f2fs_gc(sbi, &gc_control);
451         }
452 }
453
454 static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
455 {
456         int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
457         unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
458         unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
459         unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
460         unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
461         unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
462         unsigned int threshold =
463                 SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD));
464         unsigned int global_threshold = threshold * 3 / 2;
465
466         if (dents >= threshold || qdata >= threshold ||
467                 nodes >= threshold || meta >= threshold ||
468                 imeta >= threshold)
469                 return true;
470         return dents + qdata + nodes + meta + imeta >  global_threshold;
471 }
472
473 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
474 {
475         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
476                 return;
477
478         /* try to shrink extent cache when there is no enough memory */
479         if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
480                 f2fs_shrink_read_extent_tree(sbi,
481                                 READ_EXTENT_CACHE_SHRINK_NUMBER);
482
483         /* try to shrink age extent cache when there is no enough memory */
484         if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
485                 f2fs_shrink_age_extent_tree(sbi,
486                                 AGE_EXTENT_CACHE_SHRINK_NUMBER);
487
488         /* check the # of cached NAT entries */
489         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
490                 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
491
492         if (!f2fs_available_free_memory(sbi, FREE_NIDS))
493                 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
494         else
495                 f2fs_build_free_nids(sbi, false, false);
496
497         if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
498                 excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
499                 goto do_sync;
500
501         /* there is background inflight IO or foreground operation recently */
502         if (is_inflight_io(sbi, REQ_TIME) ||
503                 (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
504                 return;
505
506         /* exceed periodical checkpoint timeout threshold */
507         if (f2fs_time_over(sbi, CP_TIME))
508                 goto do_sync;
509
510         /* checkpoint is the only way to shrink partial cached entries */
511         if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
512                 f2fs_available_free_memory(sbi, INO_ENTRIES))
513                 return;
514
515 do_sync:
516         if (test_opt(sbi, DATA_FLUSH) && from_bg) {
517                 struct blk_plug plug;
518
519                 mutex_lock(&sbi->flush_lock);
520
521                 blk_start_plug(&plug);
522                 f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
523                 blk_finish_plug(&plug);
524
525                 mutex_unlock(&sbi->flush_lock);
526         }
527         stat_inc_cp_call_count(sbi, BACKGROUND);
528         f2fs_sync_fs(sbi->sb, 1);
529 }
530
531 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
532                                 struct block_device *bdev)
533 {
534         int ret = blkdev_issue_flush(bdev);
535
536         trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
537                                 test_opt(sbi, FLUSH_MERGE), ret);
538         if (!ret)
539                 f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
540         return ret;
541 }
542
543 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
544 {
545         int ret = 0;
546         int i;
547
548         if (!f2fs_is_multi_device(sbi))
549                 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
550
551         for (i = 0; i < sbi->s_ndevs; i++) {
552                 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
553                         continue;
554                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
555                 if (ret)
556                         break;
557         }
558         return ret;
559 }
560
561 static int issue_flush_thread(void *data)
562 {
563         struct f2fs_sb_info *sbi = data;
564         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
565         wait_queue_head_t *q = &fcc->flush_wait_queue;
566 repeat:
567         if (kthread_should_stop())
568                 return 0;
569
570         if (!llist_empty(&fcc->issue_list)) {
571                 struct flush_cmd *cmd, *next;
572                 int ret;
573
574                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
575                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
576
577                 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
578
579                 ret = submit_flush_wait(sbi, cmd->ino);
580                 atomic_inc(&fcc->issued_flush);
581
582                 llist_for_each_entry_safe(cmd, next,
583                                           fcc->dispatch_list, llnode) {
584                         cmd->ret = ret;
585                         complete(&cmd->wait);
586                 }
587                 fcc->dispatch_list = NULL;
588         }
589
590         wait_event_interruptible(*q,
591                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
592         goto repeat;
593 }
594
595 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
596 {
597         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
598         struct flush_cmd cmd;
599         int ret;
600
601         if (test_opt(sbi, NOBARRIER))
602                 return 0;
603
604         if (!test_opt(sbi, FLUSH_MERGE)) {
605                 atomic_inc(&fcc->queued_flush);
606                 ret = submit_flush_wait(sbi, ino);
607                 atomic_dec(&fcc->queued_flush);
608                 atomic_inc(&fcc->issued_flush);
609                 return ret;
610         }
611
612         if (atomic_inc_return(&fcc->queued_flush) == 1 ||
613             f2fs_is_multi_device(sbi)) {
614                 ret = submit_flush_wait(sbi, ino);
615                 atomic_dec(&fcc->queued_flush);
616
617                 atomic_inc(&fcc->issued_flush);
618                 return ret;
619         }
620
621         cmd.ino = ino;
622         init_completion(&cmd.wait);
623
624         llist_add(&cmd.llnode, &fcc->issue_list);
625
626         /*
627          * update issue_list before we wake up issue_flush thread, this
628          * smp_mb() pairs with another barrier in ___wait_event(), see
629          * more details in comments of waitqueue_active().
630          */
631         smp_mb();
632
633         if (waitqueue_active(&fcc->flush_wait_queue))
634                 wake_up(&fcc->flush_wait_queue);
635
636         if (fcc->f2fs_issue_flush) {
637                 wait_for_completion(&cmd.wait);
638                 atomic_dec(&fcc->queued_flush);
639         } else {
640                 struct llist_node *list;
641
642                 list = llist_del_all(&fcc->issue_list);
643                 if (!list) {
644                         wait_for_completion(&cmd.wait);
645                         atomic_dec(&fcc->queued_flush);
646                 } else {
647                         struct flush_cmd *tmp, *next;
648
649                         ret = submit_flush_wait(sbi, ino);
650
651                         llist_for_each_entry_safe(tmp, next, list, llnode) {
652                                 if (tmp == &cmd) {
653                                         cmd.ret = ret;
654                                         atomic_dec(&fcc->queued_flush);
655                                         continue;
656                                 }
657                                 tmp->ret = ret;
658                                 complete(&tmp->wait);
659                         }
660                 }
661         }
662
663         return cmd.ret;
664 }
665
666 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
667 {
668         dev_t dev = sbi->sb->s_bdev->bd_dev;
669         struct flush_cmd_control *fcc;
670
671         if (SM_I(sbi)->fcc_info) {
672                 fcc = SM_I(sbi)->fcc_info;
673                 if (fcc->f2fs_issue_flush)
674                         return 0;
675                 goto init_thread;
676         }
677
678         fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
679         if (!fcc)
680                 return -ENOMEM;
681         atomic_set(&fcc->issued_flush, 0);
682         atomic_set(&fcc->queued_flush, 0);
683         init_waitqueue_head(&fcc->flush_wait_queue);
684         init_llist_head(&fcc->issue_list);
685         SM_I(sbi)->fcc_info = fcc;
686         if (!test_opt(sbi, FLUSH_MERGE))
687                 return 0;
688
689 init_thread:
690         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
691                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
692         if (IS_ERR(fcc->f2fs_issue_flush)) {
693                 int err = PTR_ERR(fcc->f2fs_issue_flush);
694
695                 fcc->f2fs_issue_flush = NULL;
696                 return err;
697         }
698
699         return 0;
700 }
701
702 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
703 {
704         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
705
706         if (fcc && fcc->f2fs_issue_flush) {
707                 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
708
709                 fcc->f2fs_issue_flush = NULL;
710                 kthread_stop(flush_thread);
711         }
712         if (free) {
713                 kfree(fcc);
714                 SM_I(sbi)->fcc_info = NULL;
715         }
716 }
717
718 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
719 {
720         int ret = 0, i;
721
722         if (!f2fs_is_multi_device(sbi))
723                 return 0;
724
725         if (test_opt(sbi, NOBARRIER))
726                 return 0;
727
728         for (i = 1; i < sbi->s_ndevs; i++) {
729                 int count = DEFAULT_RETRY_IO_COUNT;
730
731                 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
732                         continue;
733
734                 do {
735                         ret = __submit_flush_wait(sbi, FDEV(i).bdev);
736                         if (ret)
737                                 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
738                 } while (ret && --count);
739
740                 if (ret) {
741                         f2fs_stop_checkpoint(sbi, false,
742                                         STOP_CP_REASON_FLUSH_FAIL);
743                         break;
744                 }
745
746                 spin_lock(&sbi->dev_lock);
747                 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
748                 spin_unlock(&sbi->dev_lock);
749         }
750
751         return ret;
752 }
753
754 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
755                 enum dirty_type dirty_type)
756 {
757         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
758
759         /* need not be added */
760         if (IS_CURSEG(sbi, segno))
761                 return;
762
763         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
764                 dirty_i->nr_dirty[dirty_type]++;
765
766         if (dirty_type == DIRTY) {
767                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
768                 enum dirty_type t = sentry->type;
769
770                 if (unlikely(t >= DIRTY)) {
771                         f2fs_bug_on(sbi, 1);
772                         return;
773                 }
774                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
775                         dirty_i->nr_dirty[t]++;
776
777                 if (__is_large_section(sbi)) {
778                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
779                         block_t valid_blocks =
780                                 get_valid_blocks(sbi, segno, true);
781
782                         f2fs_bug_on(sbi,
783                                 (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
784                                 !valid_blocks) ||
785                                 valid_blocks == CAP_BLKS_PER_SEC(sbi));
786
787                         if (!IS_CURSEC(sbi, secno))
788                                 set_bit(secno, dirty_i->dirty_secmap);
789                 }
790         }
791 }
792
793 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
794                 enum dirty_type dirty_type)
795 {
796         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
797         block_t valid_blocks;
798
799         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
800                 dirty_i->nr_dirty[dirty_type]--;
801
802         if (dirty_type == DIRTY) {
803                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
804                 enum dirty_type t = sentry->type;
805
806                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
807                         dirty_i->nr_dirty[t]--;
808
809                 valid_blocks = get_valid_blocks(sbi, segno, true);
810                 if (valid_blocks == 0) {
811                         clear_bit(GET_SEC_FROM_SEG(sbi, segno),
812                                                 dirty_i->victim_secmap);
813 #ifdef CONFIG_F2FS_CHECK_FS
814                         clear_bit(segno, SIT_I(sbi)->invalid_segmap);
815 #endif
816                 }
817                 if (__is_large_section(sbi)) {
818                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
819
820                         if (!valid_blocks ||
821                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
822                                 clear_bit(secno, dirty_i->dirty_secmap);
823                                 return;
824                         }
825
826                         if (!IS_CURSEC(sbi, secno))
827                                 set_bit(secno, dirty_i->dirty_secmap);
828                 }
829         }
830 }
831
832 /*
833  * Should not occur error such as -ENOMEM.
834  * Adding dirty entry into seglist is not critical operation.
835  * If a given segment is one of current working segments, it won't be added.
836  */
837 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
838 {
839         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
840         unsigned short valid_blocks, ckpt_valid_blocks;
841         unsigned int usable_blocks;
842
843         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
844                 return;
845
846         usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
847         mutex_lock(&dirty_i->seglist_lock);
848
849         valid_blocks = get_valid_blocks(sbi, segno, false);
850         ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
851
852         if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
853                 ckpt_valid_blocks == usable_blocks)) {
854                 __locate_dirty_segment(sbi, segno, PRE);
855                 __remove_dirty_segment(sbi, segno, DIRTY);
856         } else if (valid_blocks < usable_blocks) {
857                 __locate_dirty_segment(sbi, segno, DIRTY);
858         } else {
859                 /* Recovery routine with SSR needs this */
860                 __remove_dirty_segment(sbi, segno, DIRTY);
861         }
862
863         mutex_unlock(&dirty_i->seglist_lock);
864 }
865
866 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
867 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
868 {
869         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
870         unsigned int segno;
871
872         mutex_lock(&dirty_i->seglist_lock);
873         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
874                 if (get_valid_blocks(sbi, segno, false))
875                         continue;
876                 if (IS_CURSEG(sbi, segno))
877                         continue;
878                 __locate_dirty_segment(sbi, segno, PRE);
879                 __remove_dirty_segment(sbi, segno, DIRTY);
880         }
881         mutex_unlock(&dirty_i->seglist_lock);
882 }
883
884 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
885 {
886         int ovp_hole_segs =
887                 (overprovision_segments(sbi) - reserved_segments(sbi));
888         block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs);
889         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
890         block_t holes[2] = {0, 0};      /* DATA and NODE */
891         block_t unusable;
892         struct seg_entry *se;
893         unsigned int segno;
894
895         mutex_lock(&dirty_i->seglist_lock);
896         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
897                 se = get_seg_entry(sbi, segno);
898                 if (IS_NODESEG(se->type))
899                         holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
900                                                         se->valid_blocks;
901                 else
902                         holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
903                                                         se->valid_blocks;
904         }
905         mutex_unlock(&dirty_i->seglist_lock);
906
907         unusable = max(holes[DATA], holes[NODE]);
908         if (unusable > ovp_holes)
909                 return unusable - ovp_holes;
910         return 0;
911 }
912
913 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
914 {
915         int ovp_hole_segs =
916                 (overprovision_segments(sbi) - reserved_segments(sbi));
917
918         if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
919                 return 0;
920         if (unusable > F2FS_OPTION(sbi).unusable_cap)
921                 return -EAGAIN;
922         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
923                 dirty_segments(sbi) > ovp_hole_segs)
924                 return -EAGAIN;
925         if (has_not_enough_free_secs(sbi, 0, 0))
926                 return -EAGAIN;
927         return 0;
928 }
929
930 /* This is only used by SBI_CP_DISABLED */
931 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
932 {
933         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
934         unsigned int segno = 0;
935
936         mutex_lock(&dirty_i->seglist_lock);
937         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
938                 if (get_valid_blocks(sbi, segno, false))
939                         continue;
940                 if (get_ckpt_valid_blocks(sbi, segno, false))
941                         continue;
942                 mutex_unlock(&dirty_i->seglist_lock);
943                 return segno;
944         }
945         mutex_unlock(&dirty_i->seglist_lock);
946         return NULL_SEGNO;
947 }
948
949 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
950                 struct block_device *bdev, block_t lstart,
951                 block_t start, block_t len)
952 {
953         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
954         struct list_head *pend_list;
955         struct discard_cmd *dc;
956
957         f2fs_bug_on(sbi, !len);
958
959         pend_list = &dcc->pend_list[plist_idx(len)];
960
961         dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
962         INIT_LIST_HEAD(&dc->list);
963         dc->bdev = bdev;
964         dc->di.lstart = lstart;
965         dc->di.start = start;
966         dc->di.len = len;
967         dc->ref = 0;
968         dc->state = D_PREP;
969         dc->queued = 0;
970         dc->error = 0;
971         init_completion(&dc->wait);
972         list_add_tail(&dc->list, pend_list);
973         spin_lock_init(&dc->lock);
974         dc->bio_ref = 0;
975         atomic_inc(&dcc->discard_cmd_cnt);
976         dcc->undiscard_blks += len;
977
978         return dc;
979 }
980
981 static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
982 {
983 #ifdef CONFIG_F2FS_CHECK_FS
984         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
985         struct rb_node *cur = rb_first_cached(&dcc->root), *next;
986         struct discard_cmd *cur_dc, *next_dc;
987
988         while (cur) {
989                 next = rb_next(cur);
990                 if (!next)
991                         return true;
992
993                 cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
994                 next_dc = rb_entry(next, struct discard_cmd, rb_node);
995
996                 if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
997                         f2fs_info(sbi, "broken discard_rbtree, "
998                                 "cur(%u, %u) next(%u, %u)",
999                                 cur_dc->di.lstart, cur_dc->di.len,
1000                                 next_dc->di.lstart, next_dc->di.len);
1001                         return false;
1002                 }
1003                 cur = next;
1004         }
1005 #endif
1006         return true;
1007 }
1008
1009 static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
1010                                                 block_t blkaddr)
1011 {
1012         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1013         struct rb_node *node = dcc->root.rb_root.rb_node;
1014         struct discard_cmd *dc;
1015
1016         while (node) {
1017                 dc = rb_entry(node, struct discard_cmd, rb_node);
1018
1019                 if (blkaddr < dc->di.lstart)
1020                         node = node->rb_left;
1021                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1022                         node = node->rb_right;
1023                 else
1024                         return dc;
1025         }
1026         return NULL;
1027 }
1028
1029 static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
1030                                 block_t blkaddr,
1031                                 struct discard_cmd **prev_entry,
1032                                 struct discard_cmd **next_entry,
1033                                 struct rb_node ***insert_p,
1034                                 struct rb_node **insert_parent)
1035 {
1036         struct rb_node **pnode = &root->rb_root.rb_node;
1037         struct rb_node *parent = NULL, *tmp_node;
1038         struct discard_cmd *dc;
1039
1040         *insert_p = NULL;
1041         *insert_parent = NULL;
1042         *prev_entry = NULL;
1043         *next_entry = NULL;
1044
1045         if (RB_EMPTY_ROOT(&root->rb_root))
1046                 return NULL;
1047
1048         while (*pnode) {
1049                 parent = *pnode;
1050                 dc = rb_entry(*pnode, struct discard_cmd, rb_node);
1051
1052                 if (blkaddr < dc->di.lstart)
1053                         pnode = &(*pnode)->rb_left;
1054                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1055                         pnode = &(*pnode)->rb_right;
1056                 else
1057                         goto lookup_neighbors;
1058         }
1059
1060         *insert_p = pnode;
1061         *insert_parent = parent;
1062
1063         dc = rb_entry(parent, struct discard_cmd, rb_node);
1064         tmp_node = parent;
1065         if (parent && blkaddr > dc->di.lstart)
1066                 tmp_node = rb_next(parent);
1067         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1068
1069         tmp_node = parent;
1070         if (parent && blkaddr < dc->di.lstart)
1071                 tmp_node = rb_prev(parent);
1072         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1073         return NULL;
1074
1075 lookup_neighbors:
1076         /* lookup prev node for merging backward later */
1077         tmp_node = rb_prev(&dc->rb_node);
1078         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1079
1080         /* lookup next node for merging frontward later */
1081         tmp_node = rb_next(&dc->rb_node);
1082         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1083         return dc;
1084 }
1085
1086 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
1087                                                         struct discard_cmd *dc)
1088 {
1089         if (dc->state == D_DONE)
1090                 atomic_sub(dc->queued, &dcc->queued_discard);
1091
1092         list_del(&dc->list);
1093         rb_erase_cached(&dc->rb_node, &dcc->root);
1094         dcc->undiscard_blks -= dc->di.len;
1095
1096         kmem_cache_free(discard_cmd_slab, dc);
1097
1098         atomic_dec(&dcc->discard_cmd_cnt);
1099 }
1100
1101 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1102                                                         struct discard_cmd *dc)
1103 {
1104         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1105         unsigned long flags;
1106
1107         trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
1108
1109         spin_lock_irqsave(&dc->lock, flags);
1110         if (dc->bio_ref) {
1111                 spin_unlock_irqrestore(&dc->lock, flags);
1112                 return;
1113         }
1114         spin_unlock_irqrestore(&dc->lock, flags);
1115
1116         f2fs_bug_on(sbi, dc->ref);
1117
1118         if (dc->error == -EOPNOTSUPP)
1119                 dc->error = 0;
1120
1121         if (dc->error)
1122                 f2fs_info_ratelimited(sbi,
1123                         "Issue discard(%u, %u, %u) failed, ret: %d",
1124                         dc->di.lstart, dc->di.start, dc->di.len, dc->error);
1125         __detach_discard_cmd(dcc, dc);
1126 }
1127
1128 static void f2fs_submit_discard_endio(struct bio *bio)
1129 {
1130         struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1131         unsigned long flags;
1132
1133         spin_lock_irqsave(&dc->lock, flags);
1134         if (!dc->error)
1135                 dc->error = blk_status_to_errno(bio->bi_status);
1136         dc->bio_ref--;
1137         if (!dc->bio_ref && dc->state == D_SUBMIT) {
1138                 dc->state = D_DONE;
1139                 complete_all(&dc->wait);
1140         }
1141         spin_unlock_irqrestore(&dc->lock, flags);
1142         bio_put(bio);
1143 }
1144
1145 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1146                                 block_t start, block_t end)
1147 {
1148 #ifdef CONFIG_F2FS_CHECK_FS
1149         struct seg_entry *sentry;
1150         unsigned int segno;
1151         block_t blk = start;
1152         unsigned long offset, size, *map;
1153
1154         while (blk < end) {
1155                 segno = GET_SEGNO(sbi, blk);
1156                 sentry = get_seg_entry(sbi, segno);
1157                 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1158
1159                 if (end < START_BLOCK(sbi, segno + 1))
1160                         size = GET_BLKOFF_FROM_SEG0(sbi, end);
1161                 else
1162                         size = BLKS_PER_SEG(sbi);
1163                 map = (unsigned long *)(sentry->cur_valid_map);
1164                 offset = __find_rev_next_bit(map, size, offset);
1165                 f2fs_bug_on(sbi, offset != size);
1166                 blk = START_BLOCK(sbi, segno + 1);
1167         }
1168 #endif
1169 }
1170
1171 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1172                                 struct discard_policy *dpolicy,
1173                                 int discard_type, unsigned int granularity)
1174 {
1175         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1176
1177         /* common policy */
1178         dpolicy->type = discard_type;
1179         dpolicy->sync = true;
1180         dpolicy->ordered = false;
1181         dpolicy->granularity = granularity;
1182
1183         dpolicy->max_requests = dcc->max_discard_request;
1184         dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
1185         dpolicy->timeout = false;
1186
1187         if (discard_type == DPOLICY_BG) {
1188                 dpolicy->min_interval = dcc->min_discard_issue_time;
1189                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1190                 dpolicy->max_interval = dcc->max_discard_issue_time;
1191                 if (dcc->discard_io_aware == DPOLICY_IO_AWARE_ENABLE)
1192                         dpolicy->io_aware = true;
1193                 else if (dcc->discard_io_aware == DPOLICY_IO_AWARE_DISABLE)
1194                         dpolicy->io_aware = false;
1195                 dpolicy->sync = false;
1196                 dpolicy->ordered = true;
1197                 if (utilization(sbi) > dcc->discard_urgent_util) {
1198                         dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1199                         if (atomic_read(&dcc->discard_cmd_cnt))
1200                                 dpolicy->max_interval =
1201                                         dcc->min_discard_issue_time;
1202                 }
1203         } else if (discard_type == DPOLICY_FORCE) {
1204                 dpolicy->min_interval = dcc->min_discard_issue_time;
1205                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1206                 dpolicy->max_interval = dcc->max_discard_issue_time;
1207                 dpolicy->io_aware = false;
1208         } else if (discard_type == DPOLICY_FSTRIM) {
1209                 dpolicy->io_aware = false;
1210         } else if (discard_type == DPOLICY_UMOUNT) {
1211                 dpolicy->io_aware = false;
1212                 /* we need to issue all to keep CP_TRIMMED_FLAG */
1213                 dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1214                 dpolicy->timeout = true;
1215         }
1216 }
1217
1218 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1219                                 struct block_device *bdev, block_t lstart,
1220                                 block_t start, block_t len);
1221
1222 #ifdef CONFIG_BLK_DEV_ZONED
1223 static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
1224                                    struct discard_cmd *dc, blk_opf_t flag,
1225                                    struct list_head *wait_list,
1226                                    unsigned int *issued)
1227 {
1228         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1229         struct block_device *bdev = dc->bdev;
1230         struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
1231         unsigned long flags;
1232
1233         trace_f2fs_issue_reset_zone(bdev, dc->di.start);
1234
1235         spin_lock_irqsave(&dc->lock, flags);
1236         dc->state = D_SUBMIT;
1237         dc->bio_ref++;
1238         spin_unlock_irqrestore(&dc->lock, flags);
1239
1240         if (issued)
1241                 (*issued)++;
1242
1243         atomic_inc(&dcc->queued_discard);
1244         dc->queued++;
1245         list_move_tail(&dc->list, wait_list);
1246
1247         /* sanity check on discard range */
1248         __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
1249
1250         bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
1251         bio->bi_private = dc;
1252         bio->bi_end_io = f2fs_submit_discard_endio;
1253         submit_bio(bio);
1254
1255         atomic_inc(&dcc->issued_discard);
1256         f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
1257 }
1258 #endif
1259
1260 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1261 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1262                                 struct discard_policy *dpolicy,
1263                                 struct discard_cmd *dc, int *issued)
1264 {
1265         struct block_device *bdev = dc->bdev;
1266         unsigned int max_discard_blocks =
1267                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1268         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1269         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1270                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1271         blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
1272         block_t lstart, start, len, total_len;
1273         int err = 0;
1274
1275         if (dc->state != D_PREP)
1276                 return 0;
1277
1278         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1279                 return 0;
1280
1281 #ifdef CONFIG_BLK_DEV_ZONED
1282         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
1283                 int devi = f2fs_bdev_index(sbi, bdev);
1284
1285                 if (devi < 0)
1286                         return -EINVAL;
1287
1288                 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1289                         __submit_zone_reset_cmd(sbi, dc, flag,
1290                                                 wait_list, issued);
1291                         return 0;
1292                 }
1293         }
1294 #endif
1295
1296         /*
1297          * stop issuing discard for any of below cases:
1298          * 1. device is conventional zone, but it doesn't support discard.
1299          * 2. device is regulare device, after snapshot it doesn't support
1300          * discard.
1301          */
1302         if (!bdev_max_discard_sectors(bdev))
1303                 return -EOPNOTSUPP;
1304
1305         trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
1306
1307         lstart = dc->di.lstart;
1308         start = dc->di.start;
1309         len = dc->di.len;
1310         total_len = len;
1311
1312         dc->di.len = 0;
1313
1314         while (total_len && *issued < dpolicy->max_requests && !err) {
1315                 struct bio *bio = NULL;
1316                 unsigned long flags;
1317                 bool last = true;
1318
1319                 if (len > max_discard_blocks) {
1320                         len = max_discard_blocks;
1321                         last = false;
1322                 }
1323
1324                 (*issued)++;
1325                 if (*issued == dpolicy->max_requests)
1326                         last = true;
1327
1328                 dc->di.len += len;
1329
1330                 if (time_to_inject(sbi, FAULT_DISCARD)) {
1331                         err = -EIO;
1332                 } else {
1333                         err = __blkdev_issue_discard(bdev,
1334                                         SECTOR_FROM_BLOCK(start),
1335                                         SECTOR_FROM_BLOCK(len),
1336                                         GFP_NOFS, &bio);
1337                 }
1338                 if (err) {
1339                         spin_lock_irqsave(&dc->lock, flags);
1340                         if (dc->state == D_PARTIAL)
1341                                 dc->state = D_SUBMIT;
1342                         spin_unlock_irqrestore(&dc->lock, flags);
1343
1344                         break;
1345                 }
1346
1347                 f2fs_bug_on(sbi, !bio);
1348
1349                 /*
1350                  * should keep before submission to avoid D_DONE
1351                  * right away
1352                  */
1353                 spin_lock_irqsave(&dc->lock, flags);
1354                 if (last)
1355                         dc->state = D_SUBMIT;
1356                 else
1357                         dc->state = D_PARTIAL;
1358                 dc->bio_ref++;
1359                 spin_unlock_irqrestore(&dc->lock, flags);
1360
1361                 atomic_inc(&dcc->queued_discard);
1362                 dc->queued++;
1363                 list_move_tail(&dc->list, wait_list);
1364
1365                 /* sanity check on discard range */
1366                 __check_sit_bitmap(sbi, lstart, lstart + len);
1367
1368                 bio->bi_private = dc;
1369                 bio->bi_end_io = f2fs_submit_discard_endio;
1370                 bio->bi_opf |= flag;
1371                 submit_bio(bio);
1372
1373                 atomic_inc(&dcc->issued_discard);
1374
1375                 f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
1376
1377                 lstart += len;
1378                 start += len;
1379                 total_len -= len;
1380                 len = total_len;
1381         }
1382
1383         if (!err && len) {
1384                 dcc->undiscard_blks -= len;
1385                 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1386         }
1387         return err;
1388 }
1389
1390 static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
1391                                 struct block_device *bdev, block_t lstart,
1392                                 block_t start, block_t len)
1393 {
1394         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1395         struct rb_node **p = &dcc->root.rb_root.rb_node;
1396         struct rb_node *parent = NULL;
1397         struct discard_cmd *dc;
1398         bool leftmost = true;
1399
1400         /* look up rb tree to find parent node */
1401         while (*p) {
1402                 parent = *p;
1403                 dc = rb_entry(parent, struct discard_cmd, rb_node);
1404
1405                 if (lstart < dc->di.lstart) {
1406                         p = &(*p)->rb_left;
1407                 } else if (lstart >= dc->di.lstart + dc->di.len) {
1408                         p = &(*p)->rb_right;
1409                         leftmost = false;
1410                 } else {
1411                         /* Let's skip to add, if exists */
1412                         return;
1413                 }
1414         }
1415
1416         dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
1417
1418         rb_link_node(&dc->rb_node, parent, p);
1419         rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
1420 }
1421
1422 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1423                                                 struct discard_cmd *dc)
1424 {
1425         list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
1426 }
1427
1428 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1429                                 struct discard_cmd *dc, block_t blkaddr)
1430 {
1431         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1432         struct discard_info di = dc->di;
1433         bool modified = false;
1434
1435         if (dc->state == D_DONE || dc->di.len == 1) {
1436                 __remove_discard_cmd(sbi, dc);
1437                 return;
1438         }
1439
1440         dcc->undiscard_blks -= di.len;
1441
1442         if (blkaddr > di.lstart) {
1443                 dc->di.len = blkaddr - dc->di.lstart;
1444                 dcc->undiscard_blks += dc->di.len;
1445                 __relocate_discard_cmd(dcc, dc);
1446                 modified = true;
1447         }
1448
1449         if (blkaddr < di.lstart + di.len - 1) {
1450                 if (modified) {
1451                         __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
1452                                         di.start + blkaddr + 1 - di.lstart,
1453                                         di.lstart + di.len - 1 - blkaddr);
1454                 } else {
1455                         dc->di.lstart++;
1456                         dc->di.len--;
1457                         dc->di.start++;
1458                         dcc->undiscard_blks += dc->di.len;
1459                         __relocate_discard_cmd(dcc, dc);
1460                 }
1461         }
1462 }
1463
1464 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1465                                 struct block_device *bdev, block_t lstart,
1466                                 block_t start, block_t len)
1467 {
1468         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1469         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1470         struct discard_cmd *dc;
1471         struct discard_info di = {0};
1472         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1473         unsigned int max_discard_blocks =
1474                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1475         block_t end = lstart + len;
1476
1477         dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
1478                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1479         if (dc)
1480                 prev_dc = dc;
1481
1482         if (!prev_dc) {
1483                 di.lstart = lstart;
1484                 di.len = next_dc ? next_dc->di.lstart - lstart : len;
1485                 di.len = min(di.len, len);
1486                 di.start = start;
1487         }
1488
1489         while (1) {
1490                 struct rb_node *node;
1491                 bool merged = false;
1492                 struct discard_cmd *tdc = NULL;
1493
1494                 if (prev_dc) {
1495                         di.lstart = prev_dc->di.lstart + prev_dc->di.len;
1496                         if (di.lstart < lstart)
1497                                 di.lstart = lstart;
1498                         if (di.lstart >= end)
1499                                 break;
1500
1501                         if (!next_dc || next_dc->di.lstart > end)
1502                                 di.len = end - di.lstart;
1503                         else
1504                                 di.len = next_dc->di.lstart - di.lstart;
1505                         di.start = start + di.lstart - lstart;
1506                 }
1507
1508                 if (!di.len)
1509                         goto next;
1510
1511                 if (prev_dc && prev_dc->state == D_PREP &&
1512                         prev_dc->bdev == bdev &&
1513                         __is_discard_back_mergeable(&di, &prev_dc->di,
1514                                                         max_discard_blocks)) {
1515                         prev_dc->di.len += di.len;
1516                         dcc->undiscard_blks += di.len;
1517                         __relocate_discard_cmd(dcc, prev_dc);
1518                         di = prev_dc->di;
1519                         tdc = prev_dc;
1520                         merged = true;
1521                 }
1522
1523                 if (next_dc && next_dc->state == D_PREP &&
1524                         next_dc->bdev == bdev &&
1525                         __is_discard_front_mergeable(&di, &next_dc->di,
1526                                                         max_discard_blocks)) {
1527                         next_dc->di.lstart = di.lstart;
1528                         next_dc->di.len += di.len;
1529                         next_dc->di.start = di.start;
1530                         dcc->undiscard_blks += di.len;
1531                         __relocate_discard_cmd(dcc, next_dc);
1532                         if (tdc)
1533                                 __remove_discard_cmd(sbi, tdc);
1534                         merged = true;
1535                 }
1536
1537                 if (!merged)
1538                         __insert_discard_cmd(sbi, bdev,
1539                                                 di.lstart, di.start, di.len);
1540  next:
1541                 prev_dc = next_dc;
1542                 if (!prev_dc)
1543                         break;
1544
1545                 node = rb_next(&prev_dc->rb_node);
1546                 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1547         }
1548 }
1549
1550 #ifdef CONFIG_BLK_DEV_ZONED
1551 static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
1552                 struct block_device *bdev, block_t blkstart, block_t lblkstart,
1553                 block_t blklen)
1554 {
1555         trace_f2fs_queue_reset_zone(bdev, blkstart);
1556
1557         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1558         __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
1559         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1560 }
1561 #endif
1562
1563 static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
1564                 struct block_device *bdev, block_t blkstart, block_t blklen)
1565 {
1566         block_t lblkstart = blkstart;
1567
1568         if (!f2fs_bdev_support_discard(bdev))
1569                 return;
1570
1571         trace_f2fs_queue_discard(bdev, blkstart, blklen);
1572
1573         if (f2fs_is_multi_device(sbi)) {
1574                 int devi = f2fs_target_device_index(sbi, blkstart);
1575
1576                 blkstart -= FDEV(devi).start_blk;
1577         }
1578         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1579         __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1580         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1581 }
1582
1583 static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1584                 struct discard_policy *dpolicy, int *issued)
1585 {
1586         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1587         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1588         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1589         struct discard_cmd *dc;
1590         struct blk_plug plug;
1591         bool io_interrupted = false;
1592
1593         mutex_lock(&dcc->cmd_lock);
1594         dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
1595                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1596         if (!dc)
1597                 dc = next_dc;
1598
1599         blk_start_plug(&plug);
1600
1601         while (dc) {
1602                 struct rb_node *node;
1603                 int err = 0;
1604
1605                 if (dc->state != D_PREP)
1606                         goto next;
1607
1608                 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1609                         io_interrupted = true;
1610                         break;
1611                 }
1612
1613                 dcc->next_pos = dc->di.lstart + dc->di.len;
1614                 err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
1615
1616                 if (*issued >= dpolicy->max_requests)
1617                         break;
1618 next:
1619                 node = rb_next(&dc->rb_node);
1620                 if (err)
1621                         __remove_discard_cmd(sbi, dc);
1622                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1623         }
1624
1625         blk_finish_plug(&plug);
1626
1627         if (!dc)
1628                 dcc->next_pos = 0;
1629
1630         mutex_unlock(&dcc->cmd_lock);
1631
1632         if (!(*issued) && io_interrupted)
1633                 *issued = -1;
1634 }
1635 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1636                                         struct discard_policy *dpolicy);
1637
1638 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1639                                         struct discard_policy *dpolicy)
1640 {
1641         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1642         struct list_head *pend_list;
1643         struct discard_cmd *dc, *tmp;
1644         struct blk_plug plug;
1645         int i, issued;
1646         bool io_interrupted = false;
1647
1648         if (dpolicy->timeout)
1649                 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1650
1651 retry:
1652         issued = 0;
1653         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1654                 if (dpolicy->timeout &&
1655                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1656                         break;
1657
1658                 if (i + 1 < dpolicy->granularity)
1659                         break;
1660
1661                 if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
1662                         __issue_discard_cmd_orderly(sbi, dpolicy, &issued);
1663                         return issued;
1664                 }
1665
1666                 pend_list = &dcc->pend_list[i];
1667
1668                 mutex_lock(&dcc->cmd_lock);
1669                 if (list_empty(pend_list))
1670                         goto next;
1671                 if (unlikely(dcc->rbtree_check))
1672                         f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
1673                 blk_start_plug(&plug);
1674                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1675                         f2fs_bug_on(sbi, dc->state != D_PREP);
1676
1677                         if (dpolicy->timeout &&
1678                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1679                                 break;
1680
1681                         if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1682                                                 !is_idle(sbi, DISCARD_TIME)) {
1683                                 io_interrupted = true;
1684                                 break;
1685                         }
1686
1687                         __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1688
1689                         if (issued >= dpolicy->max_requests)
1690                                 break;
1691                 }
1692                 blk_finish_plug(&plug);
1693 next:
1694                 mutex_unlock(&dcc->cmd_lock);
1695
1696                 if (issued >= dpolicy->max_requests || io_interrupted)
1697                         break;
1698         }
1699
1700         if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1701                 __wait_all_discard_cmd(sbi, dpolicy);
1702                 goto retry;
1703         }
1704
1705         if (!issued && io_interrupted)
1706                 issued = -1;
1707
1708         return issued;
1709 }
1710
1711 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1712 {
1713         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1714         struct list_head *pend_list;
1715         struct discard_cmd *dc, *tmp;
1716         int i;
1717         bool dropped = false;
1718
1719         mutex_lock(&dcc->cmd_lock);
1720         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1721                 pend_list = &dcc->pend_list[i];
1722                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1723                         f2fs_bug_on(sbi, dc->state != D_PREP);
1724                         __remove_discard_cmd(sbi, dc);
1725                         dropped = true;
1726                 }
1727         }
1728         mutex_unlock(&dcc->cmd_lock);
1729
1730         return dropped;
1731 }
1732
1733 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1734 {
1735         __drop_discard_cmd(sbi);
1736 }
1737
1738 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1739                                                         struct discard_cmd *dc)
1740 {
1741         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1742         unsigned int len = 0;
1743
1744         wait_for_completion_io(&dc->wait);
1745         mutex_lock(&dcc->cmd_lock);
1746         f2fs_bug_on(sbi, dc->state != D_DONE);
1747         dc->ref--;
1748         if (!dc->ref) {
1749                 if (!dc->error)
1750                         len = dc->di.len;
1751                 __remove_discard_cmd(sbi, dc);
1752         }
1753         mutex_unlock(&dcc->cmd_lock);
1754
1755         return len;
1756 }
1757
1758 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1759                                                 struct discard_policy *dpolicy,
1760                                                 block_t start, block_t end)
1761 {
1762         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1763         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1764                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1765         struct discard_cmd *dc = NULL, *iter, *tmp;
1766         unsigned int trimmed = 0;
1767
1768 next:
1769         dc = NULL;
1770
1771         mutex_lock(&dcc->cmd_lock);
1772         list_for_each_entry_safe(iter, tmp, wait_list, list) {
1773                 if (iter->di.lstart + iter->di.len <= start ||
1774                                         end <= iter->di.lstart)
1775                         continue;
1776                 if (iter->di.len < dpolicy->granularity)
1777                         continue;
1778                 if (iter->state == D_DONE && !iter->ref) {
1779                         wait_for_completion_io(&iter->wait);
1780                         if (!iter->error)
1781                                 trimmed += iter->di.len;
1782                         __remove_discard_cmd(sbi, iter);
1783                 } else {
1784                         iter->ref++;
1785                         dc = iter;
1786                         break;
1787                 }
1788         }
1789         mutex_unlock(&dcc->cmd_lock);
1790
1791         if (dc) {
1792                 trimmed += __wait_one_discard_bio(sbi, dc);
1793                 goto next;
1794         }
1795
1796         return trimmed;
1797 }
1798
1799 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1800                                                 struct discard_policy *dpolicy)
1801 {
1802         struct discard_policy dp;
1803         unsigned int discard_blks;
1804
1805         if (dpolicy)
1806                 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1807
1808         /* wait all */
1809         __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
1810         discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1811         __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
1812         discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1813
1814         return discard_blks;
1815 }
1816
1817 /* This should be covered by global mutex, &sit_i->sentry_lock */
1818 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1819 {
1820         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1821         struct discard_cmd *dc;
1822         bool need_wait = false;
1823
1824         mutex_lock(&dcc->cmd_lock);
1825         dc = __lookup_discard_cmd(sbi, blkaddr);
1826 #ifdef CONFIG_BLK_DEV_ZONED
1827         if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
1828                 int devi = f2fs_bdev_index(sbi, dc->bdev);
1829
1830                 if (devi < 0) {
1831                         mutex_unlock(&dcc->cmd_lock);
1832                         return;
1833                 }
1834
1835                 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1836                         /* force submit zone reset */
1837                         if (dc->state == D_PREP)
1838                                 __submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
1839                                                         &dcc->wait_list, NULL);
1840                         dc->ref++;
1841                         mutex_unlock(&dcc->cmd_lock);
1842                         /* wait zone reset */
1843                         __wait_one_discard_bio(sbi, dc);
1844                         return;
1845                 }
1846         }
1847 #endif
1848         if (dc) {
1849                 if (dc->state == D_PREP) {
1850                         __punch_discard_cmd(sbi, dc, blkaddr);
1851                 } else {
1852                         dc->ref++;
1853                         need_wait = true;
1854                 }
1855         }
1856         mutex_unlock(&dcc->cmd_lock);
1857
1858         if (need_wait)
1859                 __wait_one_discard_bio(sbi, dc);
1860 }
1861
1862 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1863 {
1864         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1865
1866         if (dcc && dcc->f2fs_issue_discard) {
1867                 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1868
1869                 dcc->f2fs_issue_discard = NULL;
1870                 kthread_stop(discard_thread);
1871         }
1872 }
1873
1874 /**
1875  * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
1876  * @sbi: the f2fs_sb_info data for discard cmd to issue
1877  *
1878  * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
1879  *
1880  * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
1881  */
1882 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1883 {
1884         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1885         struct discard_policy dpolicy;
1886         bool dropped;
1887
1888         if (!atomic_read(&dcc->discard_cmd_cnt))
1889                 return true;
1890
1891         __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1892                                         dcc->discard_granularity);
1893         __issue_discard_cmd(sbi, &dpolicy);
1894         dropped = __drop_discard_cmd(sbi);
1895
1896         /* just to make sure there is no pending discard commands */
1897         __wait_all_discard_cmd(sbi, NULL);
1898
1899         f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1900         return !dropped;
1901 }
1902
1903 static int issue_discard_thread(void *data)
1904 {
1905         struct f2fs_sb_info *sbi = data;
1906         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1907         wait_queue_head_t *q = &dcc->discard_wait_queue;
1908         struct discard_policy dpolicy;
1909         unsigned int wait_ms = dcc->min_discard_issue_time;
1910         int issued;
1911
1912         set_freezable();
1913
1914         do {
1915                 wait_event_freezable_timeout(*q,
1916                                 kthread_should_stop() || dcc->discard_wake,
1917                                 msecs_to_jiffies(wait_ms));
1918
1919                 if (sbi->gc_mode == GC_URGENT_HIGH ||
1920                         !f2fs_available_free_memory(sbi, DISCARD_CACHE))
1921                         __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
1922                                                 MIN_DISCARD_GRANULARITY);
1923                 else
1924                         __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1925                                                 dcc->discard_granularity);
1926
1927                 if (dcc->discard_wake)
1928                         dcc->discard_wake = false;
1929
1930                 /* clean up pending candidates before going to sleep */
1931                 if (atomic_read(&dcc->queued_discard))
1932                         __wait_all_discard_cmd(sbi, NULL);
1933
1934                 if (f2fs_readonly(sbi->sb))
1935                         continue;
1936                 if (kthread_should_stop())
1937                         return 0;
1938                 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
1939                         !atomic_read(&dcc->discard_cmd_cnt)) {
1940                         wait_ms = dpolicy.max_interval;
1941                         continue;
1942                 }
1943
1944                 sb_start_intwrite(sbi->sb);
1945
1946                 issued = __issue_discard_cmd(sbi, &dpolicy);
1947                 if (issued > 0) {
1948                         __wait_all_discard_cmd(sbi, &dpolicy);
1949                         wait_ms = dpolicy.min_interval;
1950                 } else if (issued == -1) {
1951                         wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1952                         if (!wait_ms)
1953                                 wait_ms = dpolicy.mid_interval;
1954                 } else {
1955                         wait_ms = dpolicy.max_interval;
1956                 }
1957                 if (!atomic_read(&dcc->discard_cmd_cnt))
1958                         wait_ms = dpolicy.max_interval;
1959
1960                 sb_end_intwrite(sbi->sb);
1961
1962         } while (!kthread_should_stop());
1963         return 0;
1964 }
1965
1966 #ifdef CONFIG_BLK_DEV_ZONED
1967 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1968                 struct block_device *bdev, block_t blkstart, block_t blklen)
1969 {
1970         sector_t sector, nr_sects;
1971         block_t lblkstart = blkstart;
1972         int devi = 0;
1973         u64 remainder = 0;
1974
1975         if (f2fs_is_multi_device(sbi)) {
1976                 devi = f2fs_target_device_index(sbi, blkstart);
1977                 if (blkstart < FDEV(devi).start_blk ||
1978                     blkstart > FDEV(devi).end_blk) {
1979                         f2fs_err(sbi, "Invalid block %x", blkstart);
1980                         return -EIO;
1981                 }
1982                 blkstart -= FDEV(devi).start_blk;
1983         }
1984
1985         /* For sequential zones, reset the zone write pointer */
1986         if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1987                 sector = SECTOR_FROM_BLOCK(blkstart);
1988                 nr_sects = SECTOR_FROM_BLOCK(blklen);
1989                 div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
1990
1991                 if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
1992                         f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1993                                  devi, sbi->s_ndevs ? FDEV(devi).path : "",
1994                                  blkstart, blklen);
1995                         return -EIO;
1996                 }
1997
1998                 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
1999                         unsigned int nofs_flags;
2000                         int ret;
2001
2002                         trace_f2fs_issue_reset_zone(bdev, blkstart);
2003                         nofs_flags = memalloc_nofs_save();
2004                         ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
2005                                                 sector, nr_sects);
2006                         memalloc_nofs_restore(nofs_flags);
2007                         return ret;
2008                 }
2009
2010                 __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
2011                 return 0;
2012         }
2013
2014         /* For conventional zones, use regular discard if supported */
2015         __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
2016         return 0;
2017 }
2018 #endif
2019
2020 static int __issue_discard_async(struct f2fs_sb_info *sbi,
2021                 struct block_device *bdev, block_t blkstart, block_t blklen)
2022 {
2023 #ifdef CONFIG_BLK_DEV_ZONED
2024         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
2025                 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
2026 #endif
2027         __queue_discard_cmd(sbi, bdev, blkstart, blklen);
2028         return 0;
2029 }
2030
2031 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
2032                                 block_t blkstart, block_t blklen)
2033 {
2034         sector_t start = blkstart, len = 0;
2035         struct block_device *bdev;
2036         struct seg_entry *se;
2037         unsigned int offset;
2038         block_t i;
2039         int err = 0;
2040
2041         bdev = f2fs_target_device(sbi, blkstart, NULL);
2042
2043         for (i = blkstart; i < blkstart + blklen; i++, len++) {
2044                 if (i != start) {
2045                         struct block_device *bdev2 =
2046                                 f2fs_target_device(sbi, i, NULL);
2047
2048                         if (bdev2 != bdev) {
2049                                 err = __issue_discard_async(sbi, bdev,
2050                                                 start, len);
2051                                 if (err)
2052                                         return err;
2053                                 bdev = bdev2;
2054                                 start = i;
2055                                 len = 0;
2056                         }
2057                 }
2058
2059                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
2060                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
2061
2062                 if (f2fs_block_unit_discard(sbi) &&
2063                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2064                         sbi->discard_blks--;
2065         }
2066
2067         if (len)
2068                 err = __issue_discard_async(sbi, bdev, start, len);
2069         return err;
2070 }
2071
2072 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
2073                                                         bool check_only)
2074 {
2075         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2076         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
2077         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2078         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2079         unsigned long *discard_map = (unsigned long *)se->discard_map;
2080         unsigned long *dmap = SIT_I(sbi)->tmp_map;
2081         unsigned int start = 0, end = -1;
2082         bool force = (cpc->reason & CP_DISCARD);
2083         struct discard_entry *de = NULL;
2084         struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
2085         int i;
2086
2087         if (se->valid_blocks == BLKS_PER_SEG(sbi) ||
2088             !f2fs_hw_support_discard(sbi) ||
2089             !f2fs_block_unit_discard(sbi))
2090                 return false;
2091
2092         if (!force) {
2093                 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
2094                         SM_I(sbi)->dcc_info->nr_discards >=
2095                                 SM_I(sbi)->dcc_info->max_discards)
2096                         return false;
2097         }
2098
2099         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
2100         for (i = 0; i < entries; i++)
2101                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
2102                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
2103
2104         while (force || SM_I(sbi)->dcc_info->nr_discards <=
2105                                 SM_I(sbi)->dcc_info->max_discards) {
2106                 start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1);
2107                 if (start >= BLKS_PER_SEG(sbi))
2108                         break;
2109
2110                 end = __find_rev_next_zero_bit(dmap,
2111                                                 BLKS_PER_SEG(sbi), start + 1);
2112                 if (force && start && end != BLKS_PER_SEG(sbi) &&
2113                     (end - start) < cpc->trim_minlen)
2114                         continue;
2115
2116                 if (check_only)
2117                         return true;
2118
2119                 if (!de) {
2120                         de = f2fs_kmem_cache_alloc(discard_entry_slab,
2121                                                 GFP_F2FS_ZERO, true, NULL);
2122                         de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
2123                         list_add_tail(&de->list, head);
2124                 }
2125
2126                 for (i = start; i < end; i++)
2127                         __set_bit_le(i, (void *)de->discard_map);
2128
2129                 SM_I(sbi)->dcc_info->nr_discards += end - start;
2130         }
2131         return false;
2132 }
2133
2134 static void release_discard_addr(struct discard_entry *entry)
2135 {
2136         list_del(&entry->list);
2137         kmem_cache_free(discard_entry_slab, entry);
2138 }
2139
2140 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
2141 {
2142         struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
2143         struct discard_entry *entry, *this;
2144
2145         /* drop caches */
2146         list_for_each_entry_safe(entry, this, head, list)
2147                 release_discard_addr(entry);
2148 }
2149
2150 /*
2151  * Should call f2fs_clear_prefree_segments after checkpoint is done.
2152  */
2153 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
2154 {
2155         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2156         unsigned int segno;
2157
2158         mutex_lock(&dirty_i->seglist_lock);
2159         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
2160                 __set_test_and_free(sbi, segno, false);
2161         mutex_unlock(&dirty_i->seglist_lock);
2162 }
2163
2164 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
2165                                                 struct cp_control *cpc)
2166 {
2167         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2168         struct list_head *head = &dcc->entry_list;
2169         struct discard_entry *entry, *this;
2170         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2171         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
2172         unsigned int start = 0, end = -1;
2173         unsigned int secno, start_segno;
2174         bool force = (cpc->reason & CP_DISCARD);
2175         bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
2176                                                 DISCARD_UNIT_SECTION;
2177
2178         if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
2179                 section_alignment = true;
2180
2181         mutex_lock(&dirty_i->seglist_lock);
2182
2183         while (1) {
2184                 int i;
2185
2186                 if (section_alignment && end != -1)
2187                         end--;
2188                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2189                 if (start >= MAIN_SEGS(sbi))
2190                         break;
2191                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2192                                                                 start + 1);
2193
2194                 if (section_alignment) {
2195                         start = rounddown(start, SEGS_PER_SEC(sbi));
2196                         end = roundup(end, SEGS_PER_SEC(sbi));
2197                 }
2198
2199                 for (i = start; i < end; i++) {
2200                         if (test_and_clear_bit(i, prefree_map))
2201                                 dirty_i->nr_dirty[PRE]--;
2202                 }
2203
2204                 if (!f2fs_realtime_discard_enable(sbi))
2205                         continue;
2206
2207                 if (force && start >= cpc->trim_start &&
2208                                         (end - 1) <= cpc->trim_end)
2209                         continue;
2210
2211                 /* Should cover 2MB zoned device for zone-based reset */
2212                 if (!f2fs_sb_has_blkzoned(sbi) &&
2213                     (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
2214                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2215                                 SEGS_TO_BLKS(sbi, end - start));
2216                         continue;
2217                 }
2218 next:
2219                 secno = GET_SEC_FROM_SEG(sbi, start);
2220                 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2221                 if (!IS_CURSEC(sbi, secno) &&
2222                         !get_valid_blocks(sbi, start, true))
2223                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2224                                                 BLKS_PER_SEC(sbi));
2225
2226                 start = start_segno + SEGS_PER_SEC(sbi);
2227                 if (start < end)
2228                         goto next;
2229                 else
2230                         end = start - 1;
2231         }
2232         mutex_unlock(&dirty_i->seglist_lock);
2233
2234         if (!f2fs_block_unit_discard(sbi))
2235                 goto wakeup;
2236
2237         /* send small discards */
2238         list_for_each_entry_safe(entry, this, head, list) {
2239                 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2240                 bool is_valid = test_bit_le(0, entry->discard_map);
2241
2242 find_next:
2243                 if (is_valid) {
2244                         next_pos = find_next_zero_bit_le(entry->discard_map,
2245                                                 BLKS_PER_SEG(sbi), cur_pos);
2246                         len = next_pos - cur_pos;
2247
2248                         if (f2fs_sb_has_blkzoned(sbi) ||
2249                             (force && len < cpc->trim_minlen))
2250                                 goto skip;
2251
2252                         f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2253                                                                         len);
2254                         total_len += len;
2255                 } else {
2256                         next_pos = find_next_bit_le(entry->discard_map,
2257                                                 BLKS_PER_SEG(sbi), cur_pos);
2258                 }
2259 skip:
2260                 cur_pos = next_pos;
2261                 is_valid = !is_valid;
2262
2263                 if (cur_pos < BLKS_PER_SEG(sbi))
2264                         goto find_next;
2265
2266                 release_discard_addr(entry);
2267                 dcc->nr_discards -= total_len;
2268         }
2269
2270 wakeup:
2271         wake_up_discard_thread(sbi, false);
2272 }
2273
2274 int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
2275 {
2276         dev_t dev = sbi->sb->s_bdev->bd_dev;
2277         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2278         int err = 0;
2279
2280         if (f2fs_sb_has_readonly(sbi)) {
2281                 f2fs_info(sbi,
2282                         "Skip to start discard thread for readonly image");
2283                 return 0;
2284         }
2285
2286         if (!f2fs_realtime_discard_enable(sbi))
2287                 return 0;
2288
2289         dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2290                                 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2291         if (IS_ERR(dcc->f2fs_issue_discard)) {
2292                 err = PTR_ERR(dcc->f2fs_issue_discard);
2293                 dcc->f2fs_issue_discard = NULL;
2294         }
2295
2296         return err;
2297 }
2298
2299 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2300 {
2301         struct discard_cmd_control *dcc;
2302         int err = 0, i;
2303
2304         if (SM_I(sbi)->dcc_info) {
2305                 dcc = SM_I(sbi)->dcc_info;
2306                 goto init_thread;
2307         }
2308
2309         dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2310         if (!dcc)
2311                 return -ENOMEM;
2312
2313         dcc->discard_io_aware_gran = MAX_PLIST_NUM;
2314         dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2315         dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
2316         dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE;
2317         if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
2318                 dcc->discard_granularity = BLKS_PER_SEG(sbi);
2319         else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
2320                 dcc->discard_granularity = BLKS_PER_SEC(sbi);
2321
2322         INIT_LIST_HEAD(&dcc->entry_list);
2323         for (i = 0; i < MAX_PLIST_NUM; i++)
2324                 INIT_LIST_HEAD(&dcc->pend_list[i]);
2325         INIT_LIST_HEAD(&dcc->wait_list);
2326         INIT_LIST_HEAD(&dcc->fstrim_list);
2327         mutex_init(&dcc->cmd_lock);
2328         atomic_set(&dcc->issued_discard, 0);
2329         atomic_set(&dcc->queued_discard, 0);
2330         atomic_set(&dcc->discard_cmd_cnt, 0);
2331         dcc->nr_discards = 0;
2332         dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi));
2333         dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
2334         dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
2335         dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
2336         dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
2337         dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
2338         dcc->undiscard_blks = 0;
2339         dcc->next_pos = 0;
2340         dcc->root = RB_ROOT_CACHED;
2341         dcc->rbtree_check = false;
2342
2343         init_waitqueue_head(&dcc->discard_wait_queue);
2344         SM_I(sbi)->dcc_info = dcc;
2345 init_thread:
2346         err = f2fs_start_discard_thread(sbi);
2347         if (err) {
2348                 kfree(dcc);
2349                 SM_I(sbi)->dcc_info = NULL;
2350         }
2351
2352         return err;
2353 }
2354
2355 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2356 {
2357         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2358
2359         if (!dcc)
2360                 return;
2361
2362         f2fs_stop_discard_thread(sbi);
2363
2364         /*
2365          * Recovery can cache discard commands, so in error path of
2366          * fill_super(), it needs to give a chance to handle them.
2367          */
2368         f2fs_issue_discard_timeout(sbi);
2369
2370         kfree(dcc);
2371         SM_I(sbi)->dcc_info = NULL;
2372 }
2373
2374 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2375 {
2376         struct sit_info *sit_i = SIT_I(sbi);
2377
2378         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2379                 sit_i->dirty_sentries++;
2380                 return false;
2381         }
2382
2383         return true;
2384 }
2385
2386 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2387                                         unsigned int segno, int modified)
2388 {
2389         struct seg_entry *se = get_seg_entry(sbi, segno);
2390
2391         se->type = type;
2392         if (modified)
2393                 __mark_sit_entry_dirty(sbi, segno);
2394 }
2395
2396 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2397                                                                 block_t blkaddr)
2398 {
2399         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2400
2401         if (segno == NULL_SEGNO)
2402                 return 0;
2403         return get_seg_entry(sbi, segno)->mtime;
2404 }
2405
2406 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2407                                                 unsigned long long old_mtime)
2408 {
2409         struct seg_entry *se;
2410         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2411         unsigned long long ctime = get_mtime(sbi, false);
2412         unsigned long long mtime = old_mtime ? old_mtime : ctime;
2413
2414         if (segno == NULL_SEGNO)
2415                 return;
2416
2417         se = get_seg_entry(sbi, segno);
2418
2419         if (!se->mtime)
2420                 se->mtime = mtime;
2421         else
2422                 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2423                                                 se->valid_blocks + 1);
2424
2425         if (ctime > SIT_I(sbi)->max_mtime)
2426                 SIT_I(sbi)->max_mtime = ctime;
2427 }
2428
2429 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2430 {
2431         struct seg_entry *se;
2432         unsigned int segno, offset;
2433         long int new_vblocks;
2434         bool exist;
2435 #ifdef CONFIG_F2FS_CHECK_FS
2436         bool mir_exist;
2437 #endif
2438
2439         segno = GET_SEGNO(sbi, blkaddr);
2440         if (segno == NULL_SEGNO)
2441                 return;
2442
2443         se = get_seg_entry(sbi, segno);
2444         new_vblocks = se->valid_blocks + del;
2445         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2446
2447         f2fs_bug_on(sbi, (new_vblocks < 0 ||
2448                         (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2449
2450         se->valid_blocks = new_vblocks;
2451
2452         /* Update valid block bitmap */
2453         if (del > 0) {
2454                 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2455 #ifdef CONFIG_F2FS_CHECK_FS
2456                 mir_exist = f2fs_test_and_set_bit(offset,
2457                                                 se->cur_valid_map_mir);
2458                 if (unlikely(exist != mir_exist)) {
2459                         f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2460                                  blkaddr, exist);
2461                         f2fs_bug_on(sbi, 1);
2462                 }
2463 #endif
2464                 if (unlikely(exist)) {
2465                         f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2466                                  blkaddr);
2467                         f2fs_bug_on(sbi, 1);
2468                         se->valid_blocks--;
2469                         del = 0;
2470                 }
2471
2472                 if (f2fs_block_unit_discard(sbi) &&
2473                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2474                         sbi->discard_blks--;
2475
2476                 /*
2477                  * SSR should never reuse block which is checkpointed
2478                  * or newly invalidated.
2479                  */
2480                 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2481                         if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2482                                 se->ckpt_valid_blocks++;
2483                 }
2484         } else {
2485                 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2486 #ifdef CONFIG_F2FS_CHECK_FS
2487                 mir_exist = f2fs_test_and_clear_bit(offset,
2488                                                 se->cur_valid_map_mir);
2489                 if (unlikely(exist != mir_exist)) {
2490                         f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2491                                  blkaddr, exist);
2492                         f2fs_bug_on(sbi, 1);
2493                 }
2494 #endif
2495                 if (unlikely(!exist)) {
2496                         f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2497                                  blkaddr);
2498                         f2fs_bug_on(sbi, 1);
2499                         se->valid_blocks++;
2500                         del = 0;
2501                 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2502                         /*
2503                          * If checkpoints are off, we must not reuse data that
2504                          * was used in the previous checkpoint. If it was used
2505                          * before, we must track that to know how much space we
2506                          * really have.
2507                          */
2508                         if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2509                                 spin_lock(&sbi->stat_lock);
2510                                 sbi->unusable_block_count++;
2511                                 spin_unlock(&sbi->stat_lock);
2512                         }
2513                 }
2514
2515                 if (f2fs_block_unit_discard(sbi) &&
2516                         f2fs_test_and_clear_bit(offset, se->discard_map))
2517                         sbi->discard_blks++;
2518         }
2519         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2520                 se->ckpt_valid_blocks += del;
2521
2522         __mark_sit_entry_dirty(sbi, segno);
2523
2524         /* update total number of valid blocks to be written in ckpt area */
2525         SIT_I(sbi)->written_valid_blocks += del;
2526
2527         if (__is_large_section(sbi))
2528                 get_sec_entry(sbi, segno)->valid_blocks += del;
2529 }
2530
2531 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2532 {
2533         unsigned int segno = GET_SEGNO(sbi, addr);
2534         struct sit_info *sit_i = SIT_I(sbi);
2535
2536         f2fs_bug_on(sbi, addr == NULL_ADDR);
2537         if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2538                 return;
2539
2540         f2fs_invalidate_internal_cache(sbi, addr);
2541
2542         /* add it into sit main buffer */
2543         down_write(&sit_i->sentry_lock);
2544
2545         update_segment_mtime(sbi, addr, 0);
2546         update_sit_entry(sbi, addr, -1);
2547
2548         /* add it into dirty seglist */
2549         locate_dirty_segment(sbi, segno);
2550
2551         up_write(&sit_i->sentry_lock);
2552 }
2553
2554 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2555 {
2556         struct sit_info *sit_i = SIT_I(sbi);
2557         unsigned int segno, offset;
2558         struct seg_entry *se;
2559         bool is_cp = false;
2560
2561         if (!__is_valid_data_blkaddr(blkaddr))
2562                 return true;
2563
2564         down_read(&sit_i->sentry_lock);
2565
2566         segno = GET_SEGNO(sbi, blkaddr);
2567         se = get_seg_entry(sbi, segno);
2568         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2569
2570         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2571                 is_cp = true;
2572
2573         up_read(&sit_i->sentry_lock);
2574
2575         return is_cp;
2576 }
2577
2578 static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
2579 {
2580         struct curseg_info *curseg = CURSEG_I(sbi, type);
2581
2582         if (sbi->ckpt->alloc_type[type] == SSR)
2583                 return BLKS_PER_SEG(sbi);
2584         return curseg->next_blkoff;
2585 }
2586
2587 /*
2588  * Calculate the number of current summary pages for writing
2589  */
2590 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2591 {
2592         int valid_sum_count = 0;
2593         int i, sum_in_page;
2594
2595         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2596                 if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
2597                         valid_sum_count +=
2598                                 le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2599                 else
2600                         valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
2601         }
2602
2603         sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2604                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2605         if (valid_sum_count <= sum_in_page)
2606                 return 1;
2607         else if ((valid_sum_count - sum_in_page) <=
2608                 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2609                 return 2;
2610         return 3;
2611 }
2612
2613 /*
2614  * Caller should put this summary page
2615  */
2616 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2617 {
2618         if (unlikely(f2fs_cp_error(sbi)))
2619                 return ERR_PTR(-EIO);
2620         return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2621 }
2622
2623 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2624                                         void *src, block_t blk_addr)
2625 {
2626         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2627
2628         memcpy(page_address(page), src, PAGE_SIZE);
2629         set_page_dirty(page);
2630         f2fs_put_page(page, 1);
2631 }
2632
2633 static void write_sum_page(struct f2fs_sb_info *sbi,
2634                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
2635 {
2636         f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2637 }
2638
2639 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2640                                                 int type, block_t blk_addr)
2641 {
2642         struct curseg_info *curseg = CURSEG_I(sbi, type);
2643         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2644         struct f2fs_summary_block *src = curseg->sum_blk;
2645         struct f2fs_summary_block *dst;
2646
2647         dst = (struct f2fs_summary_block *)page_address(page);
2648         memset(dst, 0, PAGE_SIZE);
2649
2650         mutex_lock(&curseg->curseg_mutex);
2651
2652         down_read(&curseg->journal_rwsem);
2653         memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2654         up_read(&curseg->journal_rwsem);
2655
2656         memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2657         memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2658
2659         mutex_unlock(&curseg->curseg_mutex);
2660
2661         set_page_dirty(page);
2662         f2fs_put_page(page, 1);
2663 }
2664
2665 static int is_next_segment_free(struct f2fs_sb_info *sbi,
2666                                 struct curseg_info *curseg)
2667 {
2668         unsigned int segno = curseg->segno + 1;
2669         struct free_segmap_info *free_i = FREE_I(sbi);
2670
2671         if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi))
2672                 return !test_bit(segno, free_i->free_segmap);
2673         return 0;
2674 }
2675
2676 /*
2677  * Find a new segment from the free segments bitmap to right order
2678  * This function should be returned with success, otherwise BUG
2679  */
2680 static int get_new_segment(struct f2fs_sb_info *sbi,
2681                         unsigned int *newseg, bool new_sec, bool pinning)
2682 {
2683         struct free_segmap_info *free_i = FREE_I(sbi);
2684         unsigned int segno, secno, zoneno;
2685         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2686         unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2687         unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2688         bool init = true;
2689         int i;
2690         int ret = 0;
2691
2692         spin_lock(&free_i->segmap_lock);
2693
2694         if (time_to_inject(sbi, FAULT_NO_SEGMENT)) {
2695                 ret = -ENOSPC;
2696                 goto out_unlock;
2697         }
2698
2699         if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) {
2700                 segno = find_next_zero_bit(free_i->free_segmap,
2701                         GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2702                 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2703                         goto got_it;
2704         }
2705
2706 #ifdef CONFIG_BLK_DEV_ZONED
2707         /*
2708          * If we format f2fs on zoned storage, let's try to get pinned sections
2709          * from beginning of the storage, which should be a conventional one.
2710          */
2711         if (f2fs_sb_has_blkzoned(sbi)) {
2712                 /* Prioritize writing to conventional zones */
2713                 if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning)
2714                         segno = 0;
2715                 else
2716                         segno = max(sbi->first_zoned_segno, *newseg);
2717                 hint = GET_SEC_FROM_SEG(sbi, segno);
2718         }
2719 #endif
2720
2721 find_other_zone:
2722         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2723
2724 #ifdef CONFIG_BLK_DEV_ZONED
2725         if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) {
2726                 /* Write only to sequential zones */
2727                 if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) {
2728                         hint = GET_SEC_FROM_SEG(sbi, sbi->first_zoned_segno);
2729                         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2730                 } else
2731                         secno = find_first_zero_bit(free_i->free_secmap,
2732                                                                 MAIN_SECS(sbi));
2733                 if (secno >= MAIN_SECS(sbi)) {
2734                         ret = -ENOSPC;
2735                         f2fs_bug_on(sbi, 1);
2736                         goto out_unlock;
2737                 }
2738         }
2739 #endif
2740
2741         if (secno >= MAIN_SECS(sbi)) {
2742                 secno = find_first_zero_bit(free_i->free_secmap,
2743                                                         MAIN_SECS(sbi));
2744                 if (secno >= MAIN_SECS(sbi)) {
2745                         ret = -ENOSPC;
2746                         f2fs_bug_on(sbi, 1);
2747                         goto out_unlock;
2748                 }
2749         }
2750         segno = GET_SEG_FROM_SEC(sbi, secno);
2751         zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2752
2753         /* give up on finding another zone */
2754         if (!init)
2755                 goto got_it;
2756         if (sbi->secs_per_zone == 1)
2757                 goto got_it;
2758         if (zoneno == old_zoneno)
2759                 goto got_it;
2760         for (i = 0; i < NR_CURSEG_TYPE; i++)
2761                 if (CURSEG_I(sbi, i)->zone == zoneno)
2762                         break;
2763
2764         if (i < NR_CURSEG_TYPE) {
2765                 /* zone is in user, try another */
2766                 if (zoneno + 1 >= total_zones)
2767                         hint = 0;
2768                 else
2769                         hint = (zoneno + 1) * sbi->secs_per_zone;
2770                 init = false;
2771                 goto find_other_zone;
2772         }
2773 got_it:
2774         /* set it as dirty segment in free segmap */
2775         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2776
2777         /* no free section in conventional zone */
2778         if (new_sec && pinning &&
2779                 !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
2780                 ret = -EAGAIN;
2781                 goto out_unlock;
2782         }
2783         __set_inuse(sbi, segno);
2784         *newseg = segno;
2785 out_unlock:
2786         spin_unlock(&free_i->segmap_lock);
2787
2788         if (ret == -ENOSPC)
2789                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT);
2790         return ret;
2791 }
2792
2793 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2794 {
2795         struct curseg_info *curseg = CURSEG_I(sbi, type);
2796         struct summary_footer *sum_footer;
2797         unsigned short seg_type = curseg->seg_type;
2798
2799         /* only happen when get_new_segment() fails */
2800         if (curseg->next_segno == NULL_SEGNO)
2801                 return;
2802
2803         curseg->inited = true;
2804         curseg->segno = curseg->next_segno;
2805         curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2806         curseg->next_blkoff = 0;
2807         curseg->next_segno = NULL_SEGNO;
2808
2809         sum_footer = &(curseg->sum_blk->footer);
2810         memset(sum_footer, 0, sizeof(struct summary_footer));
2811
2812         sanity_check_seg_type(sbi, seg_type);
2813
2814         if (IS_DATASEG(seg_type))
2815                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2816         if (IS_NODESEG(seg_type))
2817                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2818         __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2819 }
2820
2821 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2822 {
2823         struct curseg_info *curseg = CURSEG_I(sbi, type);
2824         unsigned short seg_type = curseg->seg_type;
2825
2826         sanity_check_seg_type(sbi, seg_type);
2827         if (__is_large_section(sbi)) {
2828                 if (f2fs_need_rand_seg(sbi)) {
2829                         unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno);
2830
2831                         if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint)
2832                                 return curseg->segno;
2833                         return get_random_u32_inclusive(curseg->segno + 1,
2834                                         GET_SEG_FROM_SEC(sbi, hint + 1) - 1);
2835                 }
2836                 return curseg->segno;
2837         } else if (f2fs_need_rand_seg(sbi)) {
2838                 return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
2839         }
2840
2841         /* inmem log may not locate on any segment after mount */
2842         if (!curseg->inited)
2843                 return 0;
2844
2845         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2846                 return 0;
2847
2848         if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))
2849                 return 0;
2850
2851         if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2852                 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2853
2854         /* find segments from 0 to reuse freed segments */
2855         if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2856                 return 0;
2857
2858         return curseg->segno;
2859 }
2860
2861 /*
2862  * Allocate a current working segment.
2863  * This function always allocates a free segment in LFS manner.
2864  */
2865 static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2866 {
2867         struct curseg_info *curseg = CURSEG_I(sbi, type);
2868         unsigned int segno = curseg->segno;
2869         bool pinning = type == CURSEG_COLD_DATA_PINNED;
2870         int ret;
2871
2872         if (curseg->inited)
2873                 write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
2874
2875         segno = __get_next_segno(sbi, type);
2876         ret = get_new_segment(sbi, &segno, new_sec, pinning);
2877         if (ret) {
2878                 if (ret == -ENOSPC)
2879                         curseg->segno = NULL_SEGNO;
2880                 return ret;
2881         }
2882
2883         curseg->next_segno = segno;
2884         reset_curseg(sbi, type, 1);
2885         curseg->alloc_type = LFS;
2886         if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
2887                 curseg->fragment_remained_chunk =
2888                                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
2889         return 0;
2890 }
2891
2892 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
2893                                         int segno, block_t start)
2894 {
2895         struct seg_entry *se = get_seg_entry(sbi, segno);
2896         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2897         unsigned long *target_map = SIT_I(sbi)->tmp_map;
2898         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2899         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2900         int i;
2901
2902         for (i = 0; i < entries; i++)
2903                 target_map[i] = ckpt_map[i] | cur_map[i];
2904
2905         return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start);
2906 }
2907
2908 static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
2909                 struct curseg_info *seg)
2910 {
2911         return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
2912 }
2913
2914 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2915 {
2916         return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi);
2917 }
2918
2919 /*
2920  * This function always allocates a used segment(from dirty seglist) by SSR
2921  * manner, so it should recover the existing segment information of valid blocks
2922  */
2923 static int change_curseg(struct f2fs_sb_info *sbi, int type)
2924 {
2925         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2926         struct curseg_info *curseg = CURSEG_I(sbi, type);
2927         unsigned int new_segno = curseg->next_segno;
2928         struct f2fs_summary_block *sum_node;
2929         struct page *sum_page;
2930
2931         if (curseg->inited)
2932                 write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
2933
2934         __set_test_and_inuse(sbi, new_segno);
2935
2936         mutex_lock(&dirty_i->seglist_lock);
2937         __remove_dirty_segment(sbi, new_segno, PRE);
2938         __remove_dirty_segment(sbi, new_segno, DIRTY);
2939         mutex_unlock(&dirty_i->seglist_lock);
2940
2941         reset_curseg(sbi, type, 1);
2942         curseg->alloc_type = SSR;
2943         curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
2944
2945         sum_page = f2fs_get_sum_page(sbi, new_segno);
2946         if (IS_ERR(sum_page)) {
2947                 /* GC won't be able to use stale summary pages by cp_error */
2948                 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2949                 return PTR_ERR(sum_page);
2950         }
2951         sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2952         memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2953         f2fs_put_page(sum_page, 1);
2954         return 0;
2955 }
2956
2957 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2958                                 int alloc_mode, unsigned long long age);
2959
2960 static int get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2961                                         int target_type, int alloc_mode,
2962                                         unsigned long long age)
2963 {
2964         struct curseg_info *curseg = CURSEG_I(sbi, type);
2965         int ret = 0;
2966
2967         curseg->seg_type = target_type;
2968
2969         if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2970                 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2971
2972                 curseg->seg_type = se->type;
2973                 ret = change_curseg(sbi, type);
2974         } else {
2975                 /* allocate cold segment by default */
2976                 curseg->seg_type = CURSEG_COLD_DATA;
2977                 ret = new_curseg(sbi, type, true);
2978         }
2979         stat_inc_seg_type(sbi, curseg);
2980         return ret;
2981 }
2982
2983 static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force)
2984 {
2985         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2986         int ret = 0;
2987
2988         if (!sbi->am.atgc_enabled && !force)
2989                 return 0;
2990
2991         f2fs_down_read(&SM_I(sbi)->curseg_lock);
2992
2993         mutex_lock(&curseg->curseg_mutex);
2994         down_write(&SIT_I(sbi)->sentry_lock);
2995
2996         ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC,
2997                                         CURSEG_COLD_DATA, SSR, 0);
2998
2999         up_write(&SIT_I(sbi)->sentry_lock);
3000         mutex_unlock(&curseg->curseg_mutex);
3001
3002         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3003         return ret;
3004 }
3005
3006 int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
3007 {
3008         return __f2fs_init_atgc_curseg(sbi, false);
3009 }
3010
3011 int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi)
3012 {
3013         int ret;
3014
3015         if (!test_opt(sbi, ATGC))
3016                 return 0;
3017         if (sbi->am.atgc_enabled)
3018                 return 0;
3019         if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) <
3020                         sbi->am.age_threshold)
3021                 return 0;
3022
3023         ret = __f2fs_init_atgc_curseg(sbi, true);
3024         if (!ret) {
3025                 sbi->am.atgc_enabled = true;
3026                 f2fs_info(sbi, "reenabled age threshold GC");
3027         }
3028         return ret;
3029 }
3030
3031 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
3032 {
3033         struct curseg_info *curseg = CURSEG_I(sbi, type);
3034
3035         mutex_lock(&curseg->curseg_mutex);
3036         if (!curseg->inited)
3037                 goto out;
3038
3039         if (get_valid_blocks(sbi, curseg->segno, false)) {
3040                 write_sum_page(sbi, curseg->sum_blk,
3041                                 GET_SUM_BLOCK(sbi, curseg->segno));
3042         } else {
3043                 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
3044                 __set_test_and_free(sbi, curseg->segno, true);
3045                 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
3046         }
3047 out:
3048         mutex_unlock(&curseg->curseg_mutex);
3049 }
3050
3051 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
3052 {
3053         __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
3054
3055         if (sbi->am.atgc_enabled)
3056                 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
3057 }
3058
3059 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
3060 {
3061         struct curseg_info *curseg = CURSEG_I(sbi, type);
3062
3063         mutex_lock(&curseg->curseg_mutex);
3064         if (!curseg->inited)
3065                 goto out;
3066         if (get_valid_blocks(sbi, curseg->segno, false))
3067                 goto out;
3068
3069         mutex_lock(&DIRTY_I(sbi)->seglist_lock);
3070         __set_test_and_inuse(sbi, curseg->segno);
3071         mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
3072 out:
3073         mutex_unlock(&curseg->curseg_mutex);
3074 }
3075
3076 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
3077 {
3078         __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
3079
3080         if (sbi->am.atgc_enabled)
3081                 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
3082 }
3083
3084 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
3085                                 int alloc_mode, unsigned long long age)
3086 {
3087         struct curseg_info *curseg = CURSEG_I(sbi, type);
3088         unsigned segno = NULL_SEGNO;
3089         unsigned short seg_type = curseg->seg_type;
3090         int i, cnt;
3091         bool reversed = false;
3092
3093         sanity_check_seg_type(sbi, seg_type);
3094
3095         /* f2fs_need_SSR() already forces to do this */
3096         if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type,
3097                                 alloc_mode, age, false)) {
3098                 curseg->next_segno = segno;
3099                 return 1;
3100         }
3101
3102         /* For node segments, let's do SSR more intensively */
3103         if (IS_NODESEG(seg_type)) {
3104                 if (seg_type >= CURSEG_WARM_NODE) {
3105                         reversed = true;
3106                         i = CURSEG_COLD_NODE;
3107                 } else {
3108                         i = CURSEG_HOT_NODE;
3109                 }
3110                 cnt = NR_CURSEG_NODE_TYPE;
3111         } else {
3112                 if (seg_type >= CURSEG_WARM_DATA) {
3113                         reversed = true;
3114                         i = CURSEG_COLD_DATA;
3115                 } else {
3116                         i = CURSEG_HOT_DATA;
3117                 }
3118                 cnt = NR_CURSEG_DATA_TYPE;
3119         }
3120
3121         for (; cnt-- > 0; reversed ? i-- : i++) {
3122                 if (i == seg_type)
3123                         continue;
3124                 if (!f2fs_get_victim(sbi, &segno, BG_GC, i,
3125                                         alloc_mode, age, false)) {
3126                         curseg->next_segno = segno;
3127                         return 1;
3128                 }
3129         }
3130
3131         /* find valid_blocks=0 in dirty list */
3132         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
3133                 segno = get_free_segment(sbi);
3134                 if (segno != NULL_SEGNO) {
3135                         curseg->next_segno = segno;
3136                         return 1;
3137                 }
3138         }
3139         return 0;
3140 }
3141
3142 static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
3143 {
3144         struct curseg_info *curseg = CURSEG_I(sbi, type);
3145
3146         if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
3147             curseg->seg_type == CURSEG_WARM_NODE)
3148                 return true;
3149         if (curseg->alloc_type == LFS && is_next_segment_free(sbi, curseg) &&
3150             likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3151                 return true;
3152         if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
3153                 return true;
3154         return false;
3155 }
3156
3157 int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
3158                                         unsigned int start, unsigned int end)
3159 {
3160         struct curseg_info *curseg = CURSEG_I(sbi, type);
3161         unsigned int segno;
3162         int ret = 0;
3163
3164         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3165         mutex_lock(&curseg->curseg_mutex);
3166         down_write(&SIT_I(sbi)->sentry_lock);
3167
3168         segno = CURSEG_I(sbi, type)->segno;
3169         if (segno < start || segno > end)
3170                 goto unlock;
3171
3172         if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
3173                 ret = change_curseg(sbi, type);
3174         else
3175                 ret = new_curseg(sbi, type, true);
3176
3177         stat_inc_seg_type(sbi, curseg);
3178
3179         locate_dirty_segment(sbi, segno);
3180 unlock:
3181         up_write(&SIT_I(sbi)->sentry_lock);
3182
3183         if (segno != curseg->segno)
3184                 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
3185                             type, segno, curseg->segno);
3186
3187         mutex_unlock(&curseg->curseg_mutex);
3188         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3189         return ret;
3190 }
3191
3192 static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
3193                                                 bool new_sec, bool force)
3194 {
3195         struct curseg_info *curseg = CURSEG_I(sbi, type);
3196         unsigned int old_segno;
3197         int err = 0;
3198
3199         if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited)
3200                 goto allocate;
3201
3202         if (!force && curseg->inited &&
3203             !curseg->next_blkoff &&
3204             !get_valid_blocks(sbi, curseg->segno, new_sec) &&
3205             !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
3206                 return 0;
3207
3208 allocate:
3209         old_segno = curseg->segno;
3210         err = new_curseg(sbi, type, true);
3211         if (err)
3212                 return err;
3213         stat_inc_seg_type(sbi, curseg);
3214         locate_dirty_segment(sbi, old_segno);
3215         return 0;
3216 }
3217
3218 int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
3219 {
3220         int ret;
3221
3222         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3223         down_write(&SIT_I(sbi)->sentry_lock);
3224         ret = __allocate_new_segment(sbi, type, true, force);
3225         up_write(&SIT_I(sbi)->sentry_lock);
3226         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3227
3228         return ret;
3229 }
3230
3231 int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
3232 {
3233         int err;
3234         bool gc_required = true;
3235
3236 retry:
3237         f2fs_lock_op(sbi);
3238         err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3239         f2fs_unlock_op(sbi);
3240
3241         if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) {
3242                 f2fs_down_write(&sbi->gc_lock);
3243                 err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk),
3244                                 true, ZONED_PIN_SEC_REQUIRED_COUNT);
3245                 f2fs_up_write(&sbi->gc_lock);
3246
3247                 gc_required = false;
3248                 if (!err)
3249                         goto retry;
3250         }
3251
3252         return err;
3253 }
3254
3255 int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
3256 {
3257         int i;
3258         int err = 0;
3259
3260         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3261         down_write(&SIT_I(sbi)->sentry_lock);
3262         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
3263                 err += __allocate_new_segment(sbi, i, false, false);
3264         up_write(&SIT_I(sbi)->sentry_lock);
3265         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3266
3267         return err;
3268 }
3269
3270 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
3271                                                 struct cp_control *cpc)
3272 {
3273         __u64 trim_start = cpc->trim_start;
3274         bool has_candidate = false;
3275
3276         down_write(&SIT_I(sbi)->sentry_lock);
3277         for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
3278                 if (add_discard_addrs(sbi, cpc, true)) {
3279                         has_candidate = true;
3280                         break;
3281                 }
3282         }
3283         up_write(&SIT_I(sbi)->sentry_lock);
3284
3285         cpc->trim_start = trim_start;
3286         return has_candidate;
3287 }
3288
3289 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
3290                                         struct discard_policy *dpolicy,
3291                                         unsigned int start, unsigned int end)
3292 {
3293         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
3294         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
3295         struct rb_node **insert_p = NULL, *insert_parent = NULL;
3296         struct discard_cmd *dc;
3297         struct blk_plug plug;
3298         int issued;
3299         unsigned int trimmed = 0;
3300
3301 next:
3302         issued = 0;
3303
3304         mutex_lock(&dcc->cmd_lock);
3305         if (unlikely(dcc->rbtree_check))
3306                 f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
3307
3308         dc = __lookup_discard_cmd_ret(&dcc->root, start,
3309                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
3310         if (!dc)
3311                 dc = next_dc;
3312
3313         blk_start_plug(&plug);
3314
3315         while (dc && dc->di.lstart <= end) {
3316                 struct rb_node *node;
3317                 int err = 0;
3318
3319                 if (dc->di.len < dpolicy->granularity)
3320                         goto skip;
3321
3322                 if (dc->state != D_PREP) {
3323                         list_move_tail(&dc->list, &dcc->fstrim_list);
3324                         goto skip;
3325                 }
3326
3327                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
3328
3329                 if (issued >= dpolicy->max_requests) {
3330                         start = dc->di.lstart + dc->di.len;
3331
3332                         if (err)
3333                                 __remove_discard_cmd(sbi, dc);
3334
3335                         blk_finish_plug(&plug);
3336                         mutex_unlock(&dcc->cmd_lock);
3337                         trimmed += __wait_all_discard_cmd(sbi, NULL);
3338                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
3339                         goto next;
3340                 }
3341 skip:
3342                 node = rb_next(&dc->rb_node);
3343                 if (err)
3344                         __remove_discard_cmd(sbi, dc);
3345                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3346
3347                 if (fatal_signal_pending(current))
3348                         break;
3349         }
3350
3351         blk_finish_plug(&plug);
3352         mutex_unlock(&dcc->cmd_lock);
3353
3354         return trimmed;
3355 }
3356
3357 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3358 {
3359         __u64 start = F2FS_BYTES_TO_BLK(range->start);
3360         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3361         unsigned int start_segno, end_segno;
3362         block_t start_block, end_block;
3363         struct cp_control cpc;
3364         struct discard_policy dpolicy;
3365         unsigned long long trimmed = 0;
3366         int err = 0;
3367         bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3368
3369         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3370                 return -EINVAL;
3371
3372         if (end < MAIN_BLKADDR(sbi))
3373                 goto out;
3374
3375         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3376                 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3377                 return -EFSCORRUPTED;
3378         }
3379
3380         /* start/end segment number in main_area */
3381         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3382         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3383                                                 GET_SEGNO(sbi, end);
3384         if (need_align) {
3385                 start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi));
3386                 end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1;
3387         }
3388
3389         cpc.reason = CP_DISCARD;
3390         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3391         cpc.trim_start = start_segno;
3392         cpc.trim_end = end_segno;
3393
3394         if (sbi->discard_blks == 0)
3395                 goto out;
3396
3397         f2fs_down_write(&sbi->gc_lock);
3398         stat_inc_cp_call_count(sbi, TOTAL_CALL);
3399         err = f2fs_write_checkpoint(sbi, &cpc);
3400         f2fs_up_write(&sbi->gc_lock);
3401         if (err)
3402                 goto out;
3403
3404         /*
3405          * We filed discard candidates, but actually we don't need to wait for
3406          * all of them, since they'll be issued in idle time along with runtime
3407          * discard option. User configuration looks like using runtime discard
3408          * or periodic fstrim instead of it.
3409          */
3410         if (f2fs_realtime_discard_enable(sbi))
3411                 goto out;
3412
3413         start_block = START_BLOCK(sbi, start_segno);
3414         end_block = START_BLOCK(sbi, end_segno + 1);
3415
3416         __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3417         trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3418                                         start_block, end_block);
3419
3420         trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3421                                         start_block, end_block);
3422 out:
3423         if (!err)
3424                 range->len = F2FS_BLK_TO_BYTES(trimmed);
3425         return err;
3426 }
3427
3428 int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint)
3429 {
3430         if (F2FS_OPTION(sbi).active_logs == 2)
3431                 return CURSEG_HOT_DATA;
3432         else if (F2FS_OPTION(sbi).active_logs == 4)
3433                 return CURSEG_COLD_DATA;
3434
3435         /* active_log == 6 */
3436         switch (hint) {
3437         case WRITE_LIFE_SHORT:
3438                 return CURSEG_HOT_DATA;
3439         case WRITE_LIFE_EXTREME:
3440                 return CURSEG_COLD_DATA;
3441         default:
3442                 return CURSEG_WARM_DATA;
3443         }
3444 }
3445
3446 /*
3447  * This returns write hints for each segment type. This hints will be
3448  * passed down to block layer as below by default.
3449  *
3450  * User                  F2FS                     Block
3451  * ----                  ----                     -----
3452  *                       META                     WRITE_LIFE_NONE|REQ_META
3453  *                       HOT_NODE                 WRITE_LIFE_NONE
3454  *                       WARM_NODE                WRITE_LIFE_MEDIUM
3455  *                       COLD_NODE                WRITE_LIFE_LONG
3456  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
3457  * extension list        "                        "
3458  *
3459  * -- buffered io
3460  *                       COLD_DATA                WRITE_LIFE_EXTREME
3461  *                       HOT_DATA                 WRITE_LIFE_SHORT
3462  *                       WARM_DATA                WRITE_LIFE_NOT_SET
3463  *
3464  * -- direct io
3465  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
3466  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
3467  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
3468  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
3469  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
3470  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
3471  */
3472 enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
3473                                 enum page_type type, enum temp_type temp)
3474 {
3475         switch (type) {
3476         case DATA:
3477                 switch (temp) {
3478                 case WARM:
3479                         return WRITE_LIFE_NOT_SET;
3480                 case HOT:
3481                         return WRITE_LIFE_SHORT;
3482                 case COLD:
3483                         return WRITE_LIFE_EXTREME;
3484                 default:
3485                         return WRITE_LIFE_NONE;
3486                 }
3487         case NODE:
3488                 switch (temp) {
3489                 case WARM:
3490                         return WRITE_LIFE_MEDIUM;
3491                 case HOT:
3492                         return WRITE_LIFE_NONE;
3493                 case COLD:
3494                         return WRITE_LIFE_LONG;
3495                 default:
3496                         return WRITE_LIFE_NONE;
3497                 }
3498         case META:
3499                 return WRITE_LIFE_NONE;
3500         default:
3501                 return WRITE_LIFE_NONE;
3502         }
3503 }
3504
3505 static int __get_segment_type_2(struct f2fs_io_info *fio)
3506 {
3507         if (fio->type == DATA)
3508                 return CURSEG_HOT_DATA;
3509         else
3510                 return CURSEG_HOT_NODE;
3511 }
3512
3513 static int __get_segment_type_4(struct f2fs_io_info *fio)
3514 {
3515         if (fio->type == DATA) {
3516                 struct inode *inode = fio->page->mapping->host;
3517
3518                 if (S_ISDIR(inode->i_mode))
3519                         return CURSEG_HOT_DATA;
3520                 else
3521                         return CURSEG_COLD_DATA;
3522         } else {
3523                 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3524                         return CURSEG_WARM_NODE;
3525                 else
3526                         return CURSEG_COLD_NODE;
3527         }
3528 }
3529
3530 static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
3531 {
3532         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3533         struct extent_info ei = {};
3534
3535         if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
3536                 if (!ei.age)
3537                         return NO_CHECK_TYPE;
3538                 if (ei.age <= sbi->hot_data_age_threshold)
3539                         return CURSEG_HOT_DATA;
3540                 if (ei.age <= sbi->warm_data_age_threshold)
3541                         return CURSEG_WARM_DATA;
3542                 return CURSEG_COLD_DATA;
3543         }
3544         return NO_CHECK_TYPE;
3545 }
3546
3547 static int __get_segment_type_6(struct f2fs_io_info *fio)
3548 {
3549         if (fio->type == DATA) {
3550                 struct inode *inode = fio->page->mapping->host;
3551                 int type;
3552
3553                 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
3554                         return CURSEG_COLD_DATA_PINNED;
3555
3556                 if (page_private_gcing(fio->page)) {
3557                         if (fio->sbi->am.atgc_enabled &&
3558                                 (fio->io_type == FS_DATA_IO) &&
3559                                 (fio->sbi->gc_mode != GC_URGENT_HIGH) &&
3560                                 __is_valid_data_blkaddr(fio->old_blkaddr) &&
3561                                 !is_inode_flag_set(inode, FI_OPU_WRITE))
3562                                 return CURSEG_ALL_DATA_ATGC;
3563                         else
3564                                 return CURSEG_COLD_DATA;
3565                 }
3566                 if (file_is_cold(inode) || f2fs_need_compress_data(inode))
3567                         return CURSEG_COLD_DATA;
3568
3569                 type = __get_age_segment_type(inode,
3570                                 page_folio(fio->page)->index);
3571                 if (type != NO_CHECK_TYPE)
3572                         return type;
3573
3574                 if (file_is_hot(inode) ||
3575                                 is_inode_flag_set(inode, FI_HOT_DATA) ||
3576                                 f2fs_is_cow_file(inode))
3577                         return CURSEG_HOT_DATA;
3578                 return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
3579                                                 inode->i_write_hint);
3580         } else {
3581                 if (IS_DNODE(fio->page))
3582                         return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3583                                                 CURSEG_HOT_NODE;
3584                 return CURSEG_COLD_NODE;
3585         }
3586 }
3587
3588 enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi,
3589                                                 enum log_type type)
3590 {
3591         struct curseg_info *curseg = CURSEG_I(sbi, type);
3592         enum temp_type temp = COLD;
3593
3594         switch (curseg->seg_type) {
3595         case CURSEG_HOT_NODE:
3596         case CURSEG_HOT_DATA:
3597                 temp = HOT;
3598                 break;
3599         case CURSEG_WARM_NODE:
3600         case CURSEG_WARM_DATA:
3601                 temp = WARM;
3602                 break;
3603         case CURSEG_COLD_NODE:
3604         case CURSEG_COLD_DATA:
3605                 temp = COLD;
3606                 break;
3607         default:
3608                 f2fs_bug_on(sbi, 1);
3609         }
3610
3611         return temp;
3612 }
3613
3614 static int __get_segment_type(struct f2fs_io_info *fio)
3615 {
3616         enum log_type type = CURSEG_HOT_DATA;
3617
3618         switch (F2FS_OPTION(fio->sbi).active_logs) {
3619         case 2:
3620                 type = __get_segment_type_2(fio);
3621                 break;
3622         case 4:
3623                 type = __get_segment_type_4(fio);
3624                 break;
3625         case 6:
3626                 type = __get_segment_type_6(fio);
3627                 break;
3628         default:
3629                 f2fs_bug_on(fio->sbi, true);
3630         }
3631
3632         fio->temp = f2fs_get_segment_temp(fio->sbi, type);
3633
3634         return type;
3635 }
3636
3637 static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
3638                 struct curseg_info *seg)
3639 {
3640         /* To allocate block chunks in different sizes, use random number */
3641         if (--seg->fragment_remained_chunk > 0)
3642                 return;
3643
3644         seg->fragment_remained_chunk =
3645                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
3646         seg->next_blkoff +=
3647                 get_random_u32_inclusive(1, sbi->max_fragment_hole);
3648 }
3649
3650 static void reset_curseg_fields(struct curseg_info *curseg)
3651 {
3652         curseg->inited = false;
3653         curseg->segno = NULL_SEGNO;
3654         curseg->next_segno = 0;
3655 }
3656
3657 int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3658                 block_t old_blkaddr, block_t *new_blkaddr,
3659                 struct f2fs_summary *sum, int type,
3660                 struct f2fs_io_info *fio)
3661 {
3662         struct sit_info *sit_i = SIT_I(sbi);
3663         struct curseg_info *curseg = CURSEG_I(sbi, type);
3664         unsigned long long old_mtime;
3665         bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3666         struct seg_entry *se = NULL;
3667         bool segment_full = false;
3668         int ret = 0;
3669
3670         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3671
3672         mutex_lock(&curseg->curseg_mutex);
3673         down_write(&sit_i->sentry_lock);
3674
3675         if (curseg->segno == NULL_SEGNO) {
3676                 ret = -ENOSPC;
3677                 goto out_err;
3678         }
3679
3680         if (from_gc) {
3681                 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3682                 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3683                 sanity_check_seg_type(sbi, se->type);
3684                 f2fs_bug_on(sbi, IS_NODESEG(se->type));
3685         }
3686         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3687
3688         f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi));
3689
3690         f2fs_wait_discard_bio(sbi, *new_blkaddr);
3691
3692         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3693         if (curseg->alloc_type == SSR) {
3694                 curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
3695         } else {
3696                 curseg->next_blkoff++;
3697                 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
3698                         f2fs_randomize_chunk(sbi, curseg);
3699         }
3700         if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
3701                 segment_full = true;
3702         stat_inc_block_count(sbi, curseg);
3703
3704         if (from_gc) {
3705                 old_mtime = get_segment_mtime(sbi, old_blkaddr);
3706         } else {
3707                 update_segment_mtime(sbi, old_blkaddr, 0);
3708                 old_mtime = 0;
3709         }
3710         update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3711
3712         /*
3713          * SIT information should be updated before segment allocation,
3714          * since SSR needs latest valid block information.
3715          */
3716         update_sit_entry(sbi, *new_blkaddr, 1);
3717         update_sit_entry(sbi, old_blkaddr, -1);
3718
3719         /*
3720          * If the current segment is full, flush it out and replace it with a
3721          * new segment.
3722          */
3723         if (segment_full) {
3724                 if (type == CURSEG_COLD_DATA_PINNED &&
3725                     !((curseg->segno + 1) % sbi->segs_per_sec)) {
3726                         write_sum_page(sbi, curseg->sum_blk,
3727                                         GET_SUM_BLOCK(sbi, curseg->segno));
3728                         reset_curseg_fields(curseg);
3729                         goto skip_new_segment;
3730                 }
3731
3732                 if (from_gc) {
3733                         ret = get_atssr_segment(sbi, type, se->type,
3734                                                 AT_SSR, se->mtime);
3735                 } else {
3736                         if (need_new_seg(sbi, type))
3737                                 ret = new_curseg(sbi, type, false);
3738                         else
3739                                 ret = change_curseg(sbi, type);
3740                         stat_inc_seg_type(sbi, curseg);
3741                 }
3742
3743                 if (ret)
3744                         goto out_err;
3745         }
3746
3747 skip_new_segment:
3748         /*
3749          * segment dirty status should be updated after segment allocation,
3750          * so we just need to update status only one time after previous
3751          * segment being closed.
3752          */
3753         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3754         locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3755
3756         if (IS_DATASEG(curseg->seg_type))
3757                 atomic64_inc(&sbi->allocated_data_blocks);
3758
3759         up_write(&sit_i->sentry_lock);
3760
3761         if (page && IS_NODESEG(curseg->seg_type)) {
3762                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3763
3764                 f2fs_inode_chksum_set(sbi, page);
3765         }
3766
3767         if (fio) {
3768                 struct f2fs_bio_info *io;
3769
3770                 INIT_LIST_HEAD(&fio->list);
3771                 fio->in_list = 1;
3772                 io = sbi->write_io[fio->type] + fio->temp;
3773                 spin_lock(&io->io_lock);
3774                 list_add_tail(&fio->list, &io->io_list);
3775                 spin_unlock(&io->io_lock);
3776         }
3777
3778         mutex_unlock(&curseg->curseg_mutex);
3779         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3780         return 0;
3781
3782 out_err:
3783         *new_blkaddr = NULL_ADDR;
3784         up_write(&sit_i->sentry_lock);
3785         mutex_unlock(&curseg->curseg_mutex);
3786         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3787         return ret;
3788 }
3789
3790 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3791                                         block_t blkaddr, unsigned int blkcnt)
3792 {
3793         if (!f2fs_is_multi_device(sbi))
3794                 return;
3795
3796         while (1) {
3797                 unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
3798                 unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
3799
3800                 /* update device state for fsync */
3801                 f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
3802
3803                 /* update device state for checkpoint */
3804                 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3805                         spin_lock(&sbi->dev_lock);
3806                         f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3807                         spin_unlock(&sbi->dev_lock);
3808                 }
3809
3810                 if (blkcnt <= blks)
3811                         break;
3812                 blkcnt -= blks;
3813                 blkaddr += blks;
3814         }
3815 }
3816
3817 static int log_type_to_seg_type(enum log_type type)
3818 {
3819         int seg_type = CURSEG_COLD_DATA;
3820
3821         switch (type) {
3822         case CURSEG_HOT_DATA:
3823         case CURSEG_WARM_DATA:
3824         case CURSEG_COLD_DATA:
3825         case CURSEG_HOT_NODE:
3826         case CURSEG_WARM_NODE:
3827         case CURSEG_COLD_NODE:
3828                 seg_type = (int)type;
3829                 break;
3830         case CURSEG_COLD_DATA_PINNED:
3831         case CURSEG_ALL_DATA_ATGC:
3832                 seg_type = CURSEG_COLD_DATA;
3833                 break;
3834         default:
3835                 break;
3836         }
3837         return seg_type;
3838 }
3839
3840 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3841 {
3842         enum log_type type = __get_segment_type(fio);
3843         int seg_type = log_type_to_seg_type(type);
3844         bool keep_order = (f2fs_lfs_mode(fio->sbi) &&
3845                                 seg_type == CURSEG_COLD_DATA);
3846
3847         if (keep_order)
3848                 f2fs_down_read(&fio->sbi->io_order_lock);
3849
3850         if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3851                         &fio->new_blkaddr, sum, type, fio)) {
3852                 if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host))
3853                         fscrypt_finalize_bounce_page(&fio->encrypted_page);
3854                 end_page_writeback(fio->page);
3855                 if (f2fs_in_warm_node_list(fio->sbi, fio->page))
3856                         f2fs_del_fsync_node_entry(fio->sbi, fio->page);
3857                 goto out;
3858         }
3859         if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3860                 f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
3861
3862         /* writeout dirty page into bdev */
3863         f2fs_submit_page_write(fio);
3864
3865         f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
3866 out:
3867         if (keep_order)
3868                 f2fs_up_read(&fio->sbi->io_order_lock);
3869 }
3870
3871 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio,
3872                                         enum iostat_type io_type)
3873 {
3874         struct f2fs_io_info fio = {
3875                 .sbi = sbi,
3876                 .type = META,
3877                 .temp = HOT,
3878                 .op = REQ_OP_WRITE,
3879                 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3880                 .old_blkaddr = folio->index,
3881                 .new_blkaddr = folio->index,
3882                 .page = folio_page(folio, 0),
3883                 .encrypted_page = NULL,
3884                 .in_list = 0,
3885         };
3886
3887         if (unlikely(folio->index >= MAIN_BLKADDR(sbi)))
3888                 fio.op_flags &= ~REQ_META;
3889
3890         folio_start_writeback(folio);
3891         f2fs_submit_page_write(&fio);
3892
3893         stat_inc_meta_count(sbi, folio->index);
3894         f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
3895 }
3896
3897 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3898 {
3899         struct f2fs_summary sum;
3900
3901         set_summary(&sum, nid, 0, 0);
3902         do_write_page(&sum, fio);
3903
3904         f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
3905 }
3906
3907 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3908                                         struct f2fs_io_info *fio)
3909 {
3910         struct f2fs_sb_info *sbi = fio->sbi;
3911         struct f2fs_summary sum;
3912
3913         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3914         if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
3915                 f2fs_update_age_extent_cache(dn);
3916         set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3917         do_write_page(&sum, fio);
3918         f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3919
3920         f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
3921 }
3922
3923 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3924 {
3925         int err;
3926         struct f2fs_sb_info *sbi = fio->sbi;
3927         unsigned int segno;
3928
3929         fio->new_blkaddr = fio->old_blkaddr;
3930         /* i/o temperature is needed for passing down write hints */
3931         __get_segment_type(fio);
3932
3933         segno = GET_SEGNO(sbi, fio->new_blkaddr);
3934
3935         if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3936                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3937                 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3938                           __func__, segno);
3939                 err = -EFSCORRUPTED;
3940                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
3941                 goto drop_bio;
3942         }
3943
3944         if (f2fs_cp_error(sbi)) {
3945                 err = -EIO;
3946                 goto drop_bio;
3947         }
3948
3949         if (fio->meta_gc)
3950                 f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
3951
3952         stat_inc_inplace_blocks(fio->sbi);
3953
3954         if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
3955                 err = f2fs_merge_page_bio(fio);
3956         else
3957                 err = f2fs_submit_page_bio(fio);
3958         if (!err) {
3959                 f2fs_update_device_state(fio->sbi, fio->ino,
3960                                                 fio->new_blkaddr, 1);
3961                 f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
3962                                                 fio->io_type, F2FS_BLKSIZE);
3963         }
3964
3965         return err;
3966 drop_bio:
3967         if (fio->bio && *(fio->bio)) {
3968                 struct bio *bio = *(fio->bio);
3969
3970                 bio->bi_status = BLK_STS_IOERR;
3971                 bio_endio(bio);
3972                 *(fio->bio) = NULL;
3973         }
3974         return err;
3975 }
3976
3977 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3978                                                 unsigned int segno)
3979 {
3980         int i;
3981
3982         for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3983                 if (CURSEG_I(sbi, i)->segno == segno)
3984                         break;
3985         }
3986         return i;
3987 }
3988
3989 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3990                                 block_t old_blkaddr, block_t new_blkaddr,
3991                                 bool recover_curseg, bool recover_newaddr,
3992                                 bool from_gc)
3993 {
3994         struct sit_info *sit_i = SIT_I(sbi);
3995         struct curseg_info *curseg;
3996         unsigned int segno, old_cursegno;
3997         struct seg_entry *se;
3998         int type;
3999         unsigned short old_blkoff;
4000         unsigned char old_alloc_type;
4001
4002         segno = GET_SEGNO(sbi, new_blkaddr);
4003         se = get_seg_entry(sbi, segno);
4004         type = se->type;
4005
4006         f2fs_down_write(&SM_I(sbi)->curseg_lock);
4007
4008         if (!recover_curseg) {
4009                 /* for recovery flow */
4010                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
4011                         if (old_blkaddr == NULL_ADDR)
4012                                 type = CURSEG_COLD_DATA;
4013                         else
4014                                 type = CURSEG_WARM_DATA;
4015                 }
4016         } else {
4017                 if (IS_CURSEG(sbi, segno)) {
4018                         /* se->type is volatile as SSR allocation */
4019                         type = __f2fs_get_curseg(sbi, segno);
4020                         f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
4021                 } else {
4022                         type = CURSEG_WARM_DATA;
4023                 }
4024         }
4025
4026         curseg = CURSEG_I(sbi, type);
4027         f2fs_bug_on(sbi, !IS_DATASEG(curseg->seg_type));
4028
4029         mutex_lock(&curseg->curseg_mutex);
4030         down_write(&sit_i->sentry_lock);
4031
4032         old_cursegno = curseg->segno;
4033         old_blkoff = curseg->next_blkoff;
4034         old_alloc_type = curseg->alloc_type;
4035
4036         /* change the current segment */
4037         if (segno != curseg->segno) {
4038                 curseg->next_segno = segno;
4039                 if (change_curseg(sbi, type))
4040                         goto out_unlock;
4041         }
4042
4043         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
4044         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
4045
4046         if (!recover_curseg || recover_newaddr) {
4047                 if (!from_gc)
4048                         update_segment_mtime(sbi, new_blkaddr, 0);
4049                 update_sit_entry(sbi, new_blkaddr, 1);
4050         }
4051         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
4052                 f2fs_invalidate_internal_cache(sbi, old_blkaddr);
4053                 if (!from_gc)
4054                         update_segment_mtime(sbi, old_blkaddr, 0);
4055                 update_sit_entry(sbi, old_blkaddr, -1);
4056         }
4057
4058         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
4059         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
4060
4061         locate_dirty_segment(sbi, old_cursegno);
4062
4063         if (recover_curseg) {
4064                 if (old_cursegno != curseg->segno) {
4065                         curseg->next_segno = old_cursegno;
4066                         if (change_curseg(sbi, type))
4067                                 goto out_unlock;
4068                 }
4069                 curseg->next_blkoff = old_blkoff;
4070                 curseg->alloc_type = old_alloc_type;
4071         }
4072
4073 out_unlock:
4074         up_write(&sit_i->sentry_lock);
4075         mutex_unlock(&curseg->curseg_mutex);
4076         f2fs_up_write(&SM_I(sbi)->curseg_lock);
4077 }
4078
4079 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
4080                                 block_t old_addr, block_t new_addr,
4081                                 unsigned char version, bool recover_curseg,
4082                                 bool recover_newaddr)
4083 {
4084         struct f2fs_summary sum;
4085
4086         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
4087
4088         f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
4089                                         recover_curseg, recover_newaddr, false);
4090
4091         f2fs_update_data_blkaddr(dn, new_addr);
4092 }
4093
4094 void f2fs_wait_on_page_writeback(struct page *page,
4095                                 enum page_type type, bool ordered, bool locked)
4096 {
4097         if (folio_test_writeback(page_folio(page))) {
4098                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
4099
4100                 /* submit cached LFS IO */
4101                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
4102                 /* submit cached IPU IO */
4103                 f2fs_submit_merged_ipu_write(sbi, NULL, page);
4104                 if (ordered) {
4105                         wait_on_page_writeback(page);
4106                         f2fs_bug_on(sbi, locked &&
4107                                 folio_test_writeback(page_folio(page)));
4108                 } else {
4109                         wait_for_stable_page(page);
4110                 }
4111         }
4112 }
4113
4114 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
4115 {
4116         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4117         struct page *cpage;
4118
4119         if (!f2fs_meta_inode_gc_required(inode))
4120                 return;
4121
4122         if (!__is_valid_data_blkaddr(blkaddr))
4123                 return;
4124
4125         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
4126         if (cpage) {
4127                 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
4128                 f2fs_put_page(cpage, 1);
4129         }
4130 }
4131
4132 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
4133                                                                 block_t len)
4134 {
4135         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4136         block_t i;
4137
4138         if (!f2fs_meta_inode_gc_required(inode))
4139                 return;
4140
4141         for (i = 0; i < len; i++)
4142                 f2fs_wait_on_block_writeback(inode, blkaddr + i);
4143
4144         f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
4145 }
4146
4147 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
4148 {
4149         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4150         struct curseg_info *seg_i;
4151         unsigned char *kaddr;
4152         struct page *page;
4153         block_t start;
4154         int i, j, offset;
4155
4156         start = start_sum_block(sbi);
4157
4158         page = f2fs_get_meta_page(sbi, start++);
4159         if (IS_ERR(page))
4160                 return PTR_ERR(page);
4161         kaddr = (unsigned char *)page_address(page);
4162
4163         /* Step 1: restore nat cache */
4164         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4165         memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
4166
4167         /* Step 2: restore sit cache */
4168         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4169         memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
4170         offset = 2 * SUM_JOURNAL_SIZE;
4171
4172         /* Step 3: restore summary entries */
4173         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4174                 unsigned short blk_off;
4175                 unsigned int segno;
4176
4177                 seg_i = CURSEG_I(sbi, i);
4178                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
4179                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
4180                 seg_i->next_segno = segno;
4181                 reset_curseg(sbi, i, 0);
4182                 seg_i->alloc_type = ckpt->alloc_type[i];
4183                 seg_i->next_blkoff = blk_off;
4184
4185                 if (seg_i->alloc_type == SSR)
4186                         blk_off = BLKS_PER_SEG(sbi);
4187
4188                 for (j = 0; j < blk_off; j++) {
4189                         struct f2fs_summary *s;
4190
4191                         s = (struct f2fs_summary *)(kaddr + offset);
4192                         seg_i->sum_blk->entries[j] = *s;
4193                         offset += SUMMARY_SIZE;
4194                         if (offset + SUMMARY_SIZE <= PAGE_SIZE -
4195                                                 SUM_FOOTER_SIZE)
4196                                 continue;
4197
4198                         f2fs_put_page(page, 1);
4199                         page = NULL;
4200
4201                         page = f2fs_get_meta_page(sbi, start++);
4202                         if (IS_ERR(page))
4203                                 return PTR_ERR(page);
4204                         kaddr = (unsigned char *)page_address(page);
4205                         offset = 0;
4206                 }
4207         }
4208         f2fs_put_page(page, 1);
4209         return 0;
4210 }
4211
4212 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
4213 {
4214         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4215         struct f2fs_summary_block *sum;
4216         struct curseg_info *curseg;
4217         struct page *new;
4218         unsigned short blk_off;
4219         unsigned int segno = 0;
4220         block_t blk_addr = 0;
4221         int err = 0;
4222
4223         /* get segment number and block addr */
4224         if (IS_DATASEG(type)) {
4225                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
4226                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
4227                                                         CURSEG_HOT_DATA]);
4228                 if (__exist_node_summaries(sbi))
4229                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
4230                 else
4231                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
4232         } else {
4233                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
4234                                                         CURSEG_HOT_NODE]);
4235                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
4236                                                         CURSEG_HOT_NODE]);
4237                 if (__exist_node_summaries(sbi))
4238                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
4239                                                         type - CURSEG_HOT_NODE);
4240                 else
4241                         blk_addr = GET_SUM_BLOCK(sbi, segno);
4242         }
4243
4244         new = f2fs_get_meta_page(sbi, blk_addr);
4245         if (IS_ERR(new))
4246                 return PTR_ERR(new);
4247         sum = (struct f2fs_summary_block *)page_address(new);
4248
4249         if (IS_NODESEG(type)) {
4250                 if (__exist_node_summaries(sbi)) {
4251                         struct f2fs_summary *ns = &sum->entries[0];
4252                         int i;
4253
4254                         for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) {
4255                                 ns->version = 0;
4256                                 ns->ofs_in_node = 0;
4257                         }
4258                 } else {
4259                         err = f2fs_restore_node_summary(sbi, segno, sum);
4260                         if (err)
4261                                 goto out;
4262                 }
4263         }
4264
4265         /* set uncompleted segment to curseg */
4266         curseg = CURSEG_I(sbi, type);
4267         mutex_lock(&curseg->curseg_mutex);
4268
4269         /* update journal info */
4270         down_write(&curseg->journal_rwsem);
4271         memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
4272         up_write(&curseg->journal_rwsem);
4273
4274         memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
4275         memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
4276         curseg->next_segno = segno;
4277         reset_curseg(sbi, type, 0);
4278         curseg->alloc_type = ckpt->alloc_type[type];
4279         curseg->next_blkoff = blk_off;
4280         mutex_unlock(&curseg->curseg_mutex);
4281 out:
4282         f2fs_put_page(new, 1);
4283         return err;
4284 }
4285
4286 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
4287 {
4288         struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
4289         struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
4290         int type = CURSEG_HOT_DATA;
4291         int err;
4292
4293         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
4294                 int npages = f2fs_npages_for_summary_flush(sbi, true);
4295
4296                 if (npages >= 2)
4297                         f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
4298                                                         META_CP, true);
4299
4300                 /* restore for compacted data summary */
4301                 err = read_compacted_summaries(sbi);
4302                 if (err)
4303                         return err;
4304                 type = CURSEG_HOT_NODE;
4305         }
4306
4307         if (__exist_node_summaries(sbi))
4308                 f2fs_ra_meta_pages(sbi,
4309                                 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
4310                                 NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
4311
4312         for (; type <= CURSEG_COLD_NODE; type++) {
4313                 err = read_normal_summaries(sbi, type);
4314                 if (err)
4315                         return err;
4316         }
4317
4318         /* sanity check for summary blocks */
4319         if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
4320                         sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
4321                 f2fs_err(sbi, "invalid journal entries nats %u sits %u",
4322                          nats_in_cursum(nat_j), sits_in_cursum(sit_j));
4323                 return -EINVAL;
4324         }
4325
4326         return 0;
4327 }
4328
4329 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
4330 {
4331         struct page *page;
4332         unsigned char *kaddr;
4333         struct f2fs_summary *summary;
4334         struct curseg_info *seg_i;
4335         int written_size = 0;
4336         int i, j;
4337
4338         page = f2fs_grab_meta_page(sbi, blkaddr++);
4339         kaddr = (unsigned char *)page_address(page);
4340         memset(kaddr, 0, PAGE_SIZE);
4341
4342         /* Step 1: write nat cache */
4343         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4344         memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
4345         written_size += SUM_JOURNAL_SIZE;
4346
4347         /* Step 2: write sit cache */
4348         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4349         memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
4350         written_size += SUM_JOURNAL_SIZE;
4351
4352         /* Step 3: write summary entries */
4353         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4354                 seg_i = CURSEG_I(sbi, i);
4355                 for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
4356                         if (!page) {
4357                                 page = f2fs_grab_meta_page(sbi, blkaddr++);
4358                                 kaddr = (unsigned char *)page_address(page);
4359                                 memset(kaddr, 0, PAGE_SIZE);
4360                                 written_size = 0;
4361                         }
4362                         summary = (struct f2fs_summary *)(kaddr + written_size);
4363                         *summary = seg_i->sum_blk->entries[j];
4364                         written_size += SUMMARY_SIZE;
4365
4366                         if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
4367                                                         SUM_FOOTER_SIZE)
4368                                 continue;
4369
4370                         set_page_dirty(page);
4371                         f2fs_put_page(page, 1);
4372                         page = NULL;
4373                 }
4374         }
4375         if (page) {
4376                 set_page_dirty(page);
4377                 f2fs_put_page(page, 1);
4378         }
4379 }
4380
4381 static void write_normal_summaries(struct f2fs_sb_info *sbi,
4382                                         block_t blkaddr, int type)
4383 {
4384         int i, end;
4385
4386         if (IS_DATASEG(type))
4387                 end = type + NR_CURSEG_DATA_TYPE;
4388         else
4389                 end = type + NR_CURSEG_NODE_TYPE;
4390
4391         for (i = type; i < end; i++)
4392                 write_current_sum_page(sbi, i, blkaddr + (i - type));
4393 }
4394
4395 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4396 {
4397         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
4398                 write_compacted_summaries(sbi, start_blk);
4399         else
4400                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
4401 }
4402
4403 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4404 {
4405         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
4406 }
4407
4408 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
4409                                         unsigned int val, int alloc)
4410 {
4411         int i;
4412
4413         if (type == NAT_JOURNAL) {
4414                 for (i = 0; i < nats_in_cursum(journal); i++) {
4415                         if (le32_to_cpu(nid_in_journal(journal, i)) == val)
4416                                 return i;
4417                 }
4418                 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
4419                         return update_nats_in_cursum(journal, 1);
4420         } else if (type == SIT_JOURNAL) {
4421                 for (i = 0; i < sits_in_cursum(journal); i++)
4422                         if (le32_to_cpu(segno_in_journal(journal, i)) == val)
4423                                 return i;
4424                 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
4425                         return update_sits_in_cursum(journal, 1);
4426         }
4427         return -1;
4428 }
4429
4430 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
4431                                         unsigned int segno)
4432 {
4433         return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
4434 }
4435
4436 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
4437                                         unsigned int start)
4438 {
4439         struct sit_info *sit_i = SIT_I(sbi);
4440         struct page *page;
4441         pgoff_t src_off, dst_off;
4442
4443         src_off = current_sit_addr(sbi, start);
4444         dst_off = next_sit_addr(sbi, src_off);
4445
4446         page = f2fs_grab_meta_page(sbi, dst_off);
4447         seg_info_to_sit_page(sbi, page, start);
4448
4449         set_page_dirty(page);
4450         set_to_next_sit(sit_i, start);
4451
4452         return page;
4453 }
4454
4455 static struct sit_entry_set *grab_sit_entry_set(void)
4456 {
4457         struct sit_entry_set *ses =
4458                         f2fs_kmem_cache_alloc(sit_entry_set_slab,
4459                                                 GFP_NOFS, true, NULL);
4460
4461         ses->entry_cnt = 0;
4462         INIT_LIST_HEAD(&ses->set_list);
4463         return ses;
4464 }
4465
4466 static void release_sit_entry_set(struct sit_entry_set *ses)
4467 {
4468         list_del(&ses->set_list);
4469         kmem_cache_free(sit_entry_set_slab, ses);
4470 }
4471
4472 static void adjust_sit_entry_set(struct sit_entry_set *ses,
4473                                                 struct list_head *head)
4474 {
4475         struct sit_entry_set *next = ses;
4476
4477         if (list_is_last(&ses->set_list, head))
4478                 return;
4479
4480         list_for_each_entry_continue(next, head, set_list)
4481                 if (ses->entry_cnt <= next->entry_cnt) {
4482                         list_move_tail(&ses->set_list, &next->set_list);
4483                         return;
4484                 }
4485
4486         list_move_tail(&ses->set_list, head);
4487 }
4488
4489 static void add_sit_entry(unsigned int segno, struct list_head *head)
4490 {
4491         struct sit_entry_set *ses;
4492         unsigned int start_segno = START_SEGNO(segno);
4493
4494         list_for_each_entry(ses, head, set_list) {
4495                 if (ses->start_segno == start_segno) {
4496                         ses->entry_cnt++;
4497                         adjust_sit_entry_set(ses, head);
4498                         return;
4499                 }
4500         }
4501
4502         ses = grab_sit_entry_set();
4503
4504         ses->start_segno = start_segno;
4505         ses->entry_cnt++;
4506         list_add(&ses->set_list, head);
4507 }
4508
4509 static void add_sits_in_set(struct f2fs_sb_info *sbi)
4510 {
4511         struct f2fs_sm_info *sm_info = SM_I(sbi);
4512         struct list_head *set_list = &sm_info->sit_entry_set;
4513         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4514         unsigned int segno;
4515
4516         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4517                 add_sit_entry(segno, set_list);
4518 }
4519
4520 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4521 {
4522         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4523         struct f2fs_journal *journal = curseg->journal;
4524         int i;
4525
4526         down_write(&curseg->journal_rwsem);
4527         for (i = 0; i < sits_in_cursum(journal); i++) {
4528                 unsigned int segno;
4529                 bool dirtied;
4530
4531                 segno = le32_to_cpu(segno_in_journal(journal, i));
4532                 dirtied = __mark_sit_entry_dirty(sbi, segno);
4533
4534                 if (!dirtied)
4535                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4536         }
4537         update_sits_in_cursum(journal, -i);
4538         up_write(&curseg->journal_rwsem);
4539 }
4540
4541 /*
4542  * CP calls this function, which flushes SIT entries including sit_journal,
4543  * and moves prefree segs to free segs.
4544  */
4545 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4546 {
4547         struct sit_info *sit_i = SIT_I(sbi);
4548         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4549         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4550         struct f2fs_journal *journal = curseg->journal;
4551         struct sit_entry_set *ses, *tmp;
4552         struct list_head *head = &SM_I(sbi)->sit_entry_set;
4553         bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4554         struct seg_entry *se;
4555
4556         down_write(&sit_i->sentry_lock);
4557
4558         if (!sit_i->dirty_sentries)
4559                 goto out;
4560
4561         /*
4562          * add and account sit entries of dirty bitmap in sit entry
4563          * set temporarily
4564          */
4565         add_sits_in_set(sbi);
4566
4567         /*
4568          * if there are no enough space in journal to store dirty sit
4569          * entries, remove all entries from journal and add and account
4570          * them in sit entry set.
4571          */
4572         if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4573                                                                 !to_journal)
4574                 remove_sits_in_journal(sbi);
4575
4576         /*
4577          * there are two steps to flush sit entries:
4578          * #1, flush sit entries to journal in current cold data summary block.
4579          * #2, flush sit entries to sit page.
4580          */
4581         list_for_each_entry_safe(ses, tmp, head, set_list) {
4582                 struct page *page = NULL;
4583                 struct f2fs_sit_block *raw_sit = NULL;
4584                 unsigned int start_segno = ses->start_segno;
4585                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4586                                                 (unsigned long)MAIN_SEGS(sbi));
4587                 unsigned int segno = start_segno;
4588
4589                 if (to_journal &&
4590                         !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4591                         to_journal = false;
4592
4593                 if (to_journal) {
4594                         down_write(&curseg->journal_rwsem);
4595                 } else {
4596                         page = get_next_sit_page(sbi, start_segno);
4597                         raw_sit = page_address(page);
4598                 }
4599
4600                 /* flush dirty sit entries in region of current sit set */
4601                 for_each_set_bit_from(segno, bitmap, end) {
4602                         int offset, sit_offset;
4603
4604                         se = get_seg_entry(sbi, segno);
4605 #ifdef CONFIG_F2FS_CHECK_FS
4606                         if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4607                                                 SIT_VBLOCK_MAP_SIZE))
4608                                 f2fs_bug_on(sbi, 1);
4609 #endif
4610
4611                         /* add discard candidates */
4612                         if (!(cpc->reason & CP_DISCARD)) {
4613                                 cpc->trim_start = segno;
4614                                 add_discard_addrs(sbi, cpc, false);
4615                         }
4616
4617                         if (to_journal) {
4618                                 offset = f2fs_lookup_journal_in_cursum(journal,
4619                                                         SIT_JOURNAL, segno, 1);
4620                                 f2fs_bug_on(sbi, offset < 0);
4621                                 segno_in_journal(journal, offset) =
4622                                                         cpu_to_le32(segno);
4623                                 seg_info_to_raw_sit(se,
4624                                         &sit_in_journal(journal, offset));
4625                                 check_block_count(sbi, segno,
4626                                         &sit_in_journal(journal, offset));
4627                         } else {
4628                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4629                                 seg_info_to_raw_sit(se,
4630                                                 &raw_sit->entries[sit_offset]);
4631                                 check_block_count(sbi, segno,
4632                                                 &raw_sit->entries[sit_offset]);
4633                         }
4634
4635                         __clear_bit(segno, bitmap);
4636                         sit_i->dirty_sentries--;
4637                         ses->entry_cnt--;
4638                 }
4639
4640                 if (to_journal)
4641                         up_write(&curseg->journal_rwsem);
4642                 else
4643                         f2fs_put_page(page, 1);
4644
4645                 f2fs_bug_on(sbi, ses->entry_cnt);
4646                 release_sit_entry_set(ses);
4647         }
4648
4649         f2fs_bug_on(sbi, !list_empty(head));
4650         f2fs_bug_on(sbi, sit_i->dirty_sentries);
4651 out:
4652         if (cpc->reason & CP_DISCARD) {
4653                 __u64 trim_start = cpc->trim_start;
4654
4655                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4656                         add_discard_addrs(sbi, cpc, false);
4657
4658                 cpc->trim_start = trim_start;
4659         }
4660         up_write(&sit_i->sentry_lock);
4661
4662         set_prefree_as_free_segments(sbi);
4663 }
4664
4665 static int build_sit_info(struct f2fs_sb_info *sbi)
4666 {
4667         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4668         struct sit_info *sit_i;
4669         unsigned int sit_segs, start;
4670         char *src_bitmap, *bitmap;
4671         unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4672         unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
4673
4674         /* allocate memory for SIT information */
4675         sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4676         if (!sit_i)
4677                 return -ENOMEM;
4678
4679         SM_I(sbi)->sit_info = sit_i;
4680
4681         sit_i->sentries =
4682                 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4683                                               MAIN_SEGS(sbi)),
4684                               GFP_KERNEL);
4685         if (!sit_i->sentries)
4686                 return -ENOMEM;
4687
4688         main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4689         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4690                                                                 GFP_KERNEL);
4691         if (!sit_i->dirty_sentries_bitmap)
4692                 return -ENOMEM;
4693
4694 #ifdef CONFIG_F2FS_CHECK_FS
4695         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
4696 #else
4697         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
4698 #endif
4699         sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4700         if (!sit_i->bitmap)
4701                 return -ENOMEM;
4702
4703         bitmap = sit_i->bitmap;
4704
4705         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4706                 sit_i->sentries[start].cur_valid_map = bitmap;
4707                 bitmap += SIT_VBLOCK_MAP_SIZE;
4708
4709                 sit_i->sentries[start].ckpt_valid_map = bitmap;
4710                 bitmap += SIT_VBLOCK_MAP_SIZE;
4711
4712 #ifdef CONFIG_F2FS_CHECK_FS
4713                 sit_i->sentries[start].cur_valid_map_mir = bitmap;
4714                 bitmap += SIT_VBLOCK_MAP_SIZE;
4715 #endif
4716
4717                 if (discard_map) {
4718                         sit_i->sentries[start].discard_map = bitmap;
4719                         bitmap += SIT_VBLOCK_MAP_SIZE;
4720                 }
4721         }
4722
4723         sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4724         if (!sit_i->tmp_map)
4725                 return -ENOMEM;
4726
4727         if (__is_large_section(sbi)) {
4728                 sit_i->sec_entries =
4729                         f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4730                                                       MAIN_SECS(sbi)),
4731                                       GFP_KERNEL);
4732                 if (!sit_i->sec_entries)
4733                         return -ENOMEM;
4734         }
4735
4736         /* get information related with SIT */
4737         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4738
4739         /* setup SIT bitmap from ckeckpoint pack */
4740         sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4741         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4742
4743         sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4744         if (!sit_i->sit_bitmap)
4745                 return -ENOMEM;
4746
4747 #ifdef CONFIG_F2FS_CHECK_FS
4748         sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4749                                         sit_bitmap_size, GFP_KERNEL);
4750         if (!sit_i->sit_bitmap_mir)
4751                 return -ENOMEM;
4752
4753         sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4754                                         main_bitmap_size, GFP_KERNEL);
4755         if (!sit_i->invalid_segmap)
4756                 return -ENOMEM;
4757 #endif
4758
4759         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4760         sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs);
4761         sit_i->written_valid_blocks = 0;
4762         sit_i->bitmap_size = sit_bitmap_size;
4763         sit_i->dirty_sentries = 0;
4764         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4765         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4766         sit_i->mounted_time = ktime_get_boottime_seconds();
4767         init_rwsem(&sit_i->sentry_lock);
4768         return 0;
4769 }
4770
4771 static int build_free_segmap(struct f2fs_sb_info *sbi)
4772 {
4773         struct free_segmap_info *free_i;
4774         unsigned int bitmap_size, sec_bitmap_size;
4775
4776         /* allocate memory for free segmap information */
4777         free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4778         if (!free_i)
4779                 return -ENOMEM;
4780
4781         SM_I(sbi)->free_info = free_i;
4782
4783         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4784         free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4785         if (!free_i->free_segmap)
4786                 return -ENOMEM;
4787
4788         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4789         free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4790         if (!free_i->free_secmap)
4791                 return -ENOMEM;
4792
4793         /* set all segments as dirty temporarily */
4794         memset(free_i->free_segmap, 0xff, bitmap_size);
4795         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4796
4797         /* init free segmap information */
4798         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4799         free_i->free_segments = 0;
4800         free_i->free_sections = 0;
4801         spin_lock_init(&free_i->segmap_lock);
4802         return 0;
4803 }
4804
4805 static int build_curseg(struct f2fs_sb_info *sbi)
4806 {
4807         struct curseg_info *array;
4808         int i;
4809
4810         array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4811                                         sizeof(*array)), GFP_KERNEL);
4812         if (!array)
4813                 return -ENOMEM;
4814
4815         SM_I(sbi)->curseg_array = array;
4816
4817         for (i = 0; i < NO_CHECK_TYPE; i++) {
4818                 mutex_init(&array[i].curseg_mutex);
4819                 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4820                 if (!array[i].sum_blk)
4821                         return -ENOMEM;
4822                 init_rwsem(&array[i].journal_rwsem);
4823                 array[i].journal = f2fs_kzalloc(sbi,
4824                                 sizeof(struct f2fs_journal), GFP_KERNEL);
4825                 if (!array[i].journal)
4826                         return -ENOMEM;
4827                 array[i].seg_type = log_type_to_seg_type(i);
4828                 reset_curseg_fields(&array[i]);
4829         }
4830         return restore_curseg_summaries(sbi);
4831 }
4832
4833 static int build_sit_entries(struct f2fs_sb_info *sbi)
4834 {
4835         struct sit_info *sit_i = SIT_I(sbi);
4836         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4837         struct f2fs_journal *journal = curseg->journal;
4838         struct seg_entry *se;
4839         struct f2fs_sit_entry sit;
4840         int sit_blk_cnt = SIT_BLK_CNT(sbi);
4841         unsigned int i, start, end;
4842         unsigned int readed, start_blk = 0;
4843         int err = 0;
4844         block_t sit_valid_blocks[2] = {0, 0};
4845
4846         do {
4847                 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
4848                                                         META_SIT, true);
4849
4850                 start = start_blk * sit_i->sents_per_block;
4851                 end = (start_blk + readed) * sit_i->sents_per_block;
4852
4853                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4854                         struct f2fs_sit_block *sit_blk;
4855                         struct page *page;
4856
4857                         se = &sit_i->sentries[start];
4858                         page = get_current_sit_page(sbi, start);
4859                         if (IS_ERR(page))
4860                                 return PTR_ERR(page);
4861                         sit_blk = (struct f2fs_sit_block *)page_address(page);
4862                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4863                         f2fs_put_page(page, 1);
4864
4865                         err = check_block_count(sbi, start, &sit);
4866                         if (err)
4867                                 return err;
4868                         seg_info_from_raw_sit(se, &sit);
4869
4870                         if (se->type >= NR_PERSISTENT_LOG) {
4871                                 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4872                                                         se->type, start);
4873                                 f2fs_handle_error(sbi,
4874                                                 ERROR_INCONSISTENT_SUM_TYPE);
4875                                 return -EFSCORRUPTED;
4876                         }
4877
4878                         sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4879
4880                         if (!f2fs_block_unit_discard(sbi))
4881                                 goto init_discard_map_done;
4882
4883                         /* build discard map only one time */
4884                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4885                                 memset(se->discard_map, 0xff,
4886                                                 SIT_VBLOCK_MAP_SIZE);
4887                                 goto init_discard_map_done;
4888                         }
4889                         memcpy(se->discard_map, se->cur_valid_map,
4890                                                 SIT_VBLOCK_MAP_SIZE);
4891                         sbi->discard_blks += BLKS_PER_SEG(sbi) -
4892                                                 se->valid_blocks;
4893 init_discard_map_done:
4894                         if (__is_large_section(sbi))
4895                                 get_sec_entry(sbi, start)->valid_blocks +=
4896                                                         se->valid_blocks;
4897                 }
4898                 start_blk += readed;
4899         } while (start_blk < sit_blk_cnt);
4900
4901         down_read(&curseg->journal_rwsem);
4902         for (i = 0; i < sits_in_cursum(journal); i++) {
4903                 unsigned int old_valid_blocks;
4904
4905                 start = le32_to_cpu(segno_in_journal(journal, i));
4906                 if (start >= MAIN_SEGS(sbi)) {
4907                         f2fs_err(sbi, "Wrong journal entry on segno %u",
4908                                  start);
4909                         err = -EFSCORRUPTED;
4910                         f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
4911                         break;
4912                 }
4913
4914                 se = &sit_i->sentries[start];
4915                 sit = sit_in_journal(journal, i);
4916
4917                 old_valid_blocks = se->valid_blocks;
4918
4919                 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
4920
4921                 err = check_block_count(sbi, start, &sit);
4922                 if (err)
4923                         break;
4924                 seg_info_from_raw_sit(se, &sit);
4925
4926                 if (se->type >= NR_PERSISTENT_LOG) {
4927                         f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4928                                                         se->type, start);
4929                         err = -EFSCORRUPTED;
4930                         f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
4931                         break;
4932                 }
4933
4934                 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4935
4936                 if (f2fs_block_unit_discard(sbi)) {
4937                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4938                                 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4939                         } else {
4940                                 memcpy(se->discard_map, se->cur_valid_map,
4941                                                         SIT_VBLOCK_MAP_SIZE);
4942                                 sbi->discard_blks += old_valid_blocks;
4943                                 sbi->discard_blks -= se->valid_blocks;
4944                         }
4945                 }
4946
4947                 if (__is_large_section(sbi)) {
4948                         get_sec_entry(sbi, start)->valid_blocks +=
4949                                                         se->valid_blocks;
4950                         get_sec_entry(sbi, start)->valid_blocks -=
4951                                                         old_valid_blocks;
4952                 }
4953         }
4954         up_read(&curseg->journal_rwsem);
4955
4956         if (err)
4957                 return err;
4958
4959         if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
4960                 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4961                          sit_valid_blocks[NODE], valid_node_count(sbi));
4962                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
4963                 return -EFSCORRUPTED;
4964         }
4965
4966         if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
4967                                 valid_user_blocks(sbi)) {
4968                 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
4969                          sit_valid_blocks[DATA], sit_valid_blocks[NODE],
4970                          valid_user_blocks(sbi));
4971                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
4972                 return -EFSCORRUPTED;
4973         }
4974
4975         return 0;
4976 }
4977
4978 static void init_free_segmap(struct f2fs_sb_info *sbi)
4979 {
4980         unsigned int start;
4981         int type;
4982         struct seg_entry *sentry;
4983
4984         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4985                 if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4986                         continue;
4987                 sentry = get_seg_entry(sbi, start);
4988                 if (!sentry->valid_blocks)
4989                         __set_free(sbi, start);
4990                 else
4991                         SIT_I(sbi)->written_valid_blocks +=
4992                                                 sentry->valid_blocks;
4993         }
4994
4995         /* set use the current segments */
4996         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4997                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4998
4999                 __set_test_and_inuse(sbi, curseg_t->segno);
5000         }
5001 }
5002
5003 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
5004 {
5005         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5006         struct free_segmap_info *free_i = FREE_I(sbi);
5007         unsigned int segno = 0, offset = 0, secno;
5008         block_t valid_blocks, usable_blks_in_seg;
5009
5010         while (1) {
5011                 /* find dirty segment based on free segmap */
5012                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
5013                 if (segno >= MAIN_SEGS(sbi))
5014                         break;
5015                 offset = segno + 1;
5016                 valid_blocks = get_valid_blocks(sbi, segno, false);
5017                 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
5018                 if (valid_blocks == usable_blks_in_seg || !valid_blocks)
5019                         continue;
5020                 if (valid_blocks > usable_blks_in_seg) {
5021                         f2fs_bug_on(sbi, 1);
5022                         continue;
5023                 }
5024                 mutex_lock(&dirty_i->seglist_lock);
5025                 __locate_dirty_segment(sbi, segno, DIRTY);
5026                 mutex_unlock(&dirty_i->seglist_lock);
5027         }
5028
5029         if (!__is_large_section(sbi))
5030                 return;
5031
5032         mutex_lock(&dirty_i->seglist_lock);
5033         for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
5034                 valid_blocks = get_valid_blocks(sbi, segno, true);
5035                 secno = GET_SEC_FROM_SEG(sbi, segno);
5036
5037                 if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
5038                         continue;
5039                 if (IS_CURSEC(sbi, secno))
5040                         continue;
5041                 set_bit(secno, dirty_i->dirty_secmap);
5042         }
5043         mutex_unlock(&dirty_i->seglist_lock);
5044 }
5045
5046 static int init_victim_secmap(struct f2fs_sb_info *sbi)
5047 {
5048         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5049         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
5050
5051         dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
5052         if (!dirty_i->victim_secmap)
5053                 return -ENOMEM;
5054
5055         dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
5056         if (!dirty_i->pinned_secmap)
5057                 return -ENOMEM;
5058
5059         dirty_i->pinned_secmap_cnt = 0;
5060         dirty_i->enable_pin_section = true;
5061         return 0;
5062 }
5063
5064 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
5065 {
5066         struct dirty_seglist_info *dirty_i;
5067         unsigned int bitmap_size, i;
5068
5069         /* allocate memory for dirty segments list information */
5070         dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
5071                                                                 GFP_KERNEL);
5072         if (!dirty_i)
5073                 return -ENOMEM;
5074
5075         SM_I(sbi)->dirty_info = dirty_i;
5076         mutex_init(&dirty_i->seglist_lock);
5077
5078         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
5079
5080         for (i = 0; i < NR_DIRTY_TYPE; i++) {
5081                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
5082                                                                 GFP_KERNEL);
5083                 if (!dirty_i->dirty_segmap[i])
5084                         return -ENOMEM;
5085         }
5086
5087         if (__is_large_section(sbi)) {
5088                 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
5089                 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
5090                                                 bitmap_size, GFP_KERNEL);
5091                 if (!dirty_i->dirty_secmap)
5092                         return -ENOMEM;
5093         }
5094
5095         init_dirty_segmap(sbi);
5096         return init_victim_secmap(sbi);
5097 }
5098
5099 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
5100 {
5101         int i;
5102
5103         /*
5104          * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
5105          * In LFS curseg, all blkaddr after .next_blkoff should be unused.
5106          */
5107         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5108                 struct curseg_info *curseg = CURSEG_I(sbi, i);
5109                 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
5110                 unsigned int blkofs = curseg->next_blkoff;
5111
5112                 if (f2fs_sb_has_readonly(sbi) &&
5113                         i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
5114                         continue;
5115
5116                 sanity_check_seg_type(sbi, curseg->seg_type);
5117
5118                 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
5119                         f2fs_err(sbi,
5120                                  "Current segment has invalid alloc_type:%d",
5121                                  curseg->alloc_type);
5122                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
5123                         return -EFSCORRUPTED;
5124                 }
5125
5126                 if (f2fs_test_bit(blkofs, se->cur_valid_map))
5127                         goto out;
5128
5129                 if (curseg->alloc_type == SSR)
5130                         continue;
5131
5132                 for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) {
5133                         if (!f2fs_test_bit(blkofs, se->cur_valid_map))
5134                                 continue;
5135 out:
5136                         f2fs_err(sbi,
5137                                  "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
5138                                  i, curseg->segno, curseg->alloc_type,
5139                                  curseg->next_blkoff, blkofs);
5140                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
5141                         return -EFSCORRUPTED;
5142                 }
5143         }
5144         return 0;
5145 }
5146
5147 #ifdef CONFIG_BLK_DEV_ZONED
5148 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
5149                                     struct f2fs_dev_info *fdev,
5150                                     struct blk_zone *zone)
5151 {
5152         unsigned int zone_segno;
5153         block_t zone_block, valid_block_cnt;
5154         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
5155         int ret;
5156         unsigned int nofs_flags;
5157
5158         if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5159                 return 0;
5160
5161         zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
5162         zone_segno = GET_SEGNO(sbi, zone_block);
5163
5164         /*
5165          * Skip check of zones cursegs point to, since
5166          * fix_curseg_write_pointer() checks them.
5167          */
5168         if (zone_segno >= MAIN_SEGS(sbi))
5169                 return 0;
5170
5171         /*
5172          * Get # of valid block of the zone.
5173          */
5174         valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
5175         if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
5176                 f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
5177                                 zone_segno, valid_block_cnt,
5178                                 blk_zone_cond_str(zone->cond));
5179                 return 0;
5180         }
5181
5182         if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) ||
5183             (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL))
5184                 return 0;
5185
5186         if (!valid_block_cnt) {
5187                 f2fs_notice(sbi, "Zone without valid block has non-zero write "
5188                             "pointer. Reset the write pointer: cond[%s]",
5189                             blk_zone_cond_str(zone->cond));
5190                 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
5191                                         zone->len >> log_sectors_per_block);
5192                 if (ret)
5193                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5194                                  fdev->path, ret);
5195                 return ret;
5196         }
5197
5198         /*
5199          * If there are valid blocks and the write pointer doesn't match
5200          * with them, we need to report the inconsistency and fill
5201          * the zone till the end to close the zone. This inconsistency
5202          * does not cause write error because the zone will not be
5203          * selected for write operation until it get discarded.
5204          */
5205         f2fs_notice(sbi, "Valid blocks are not aligned with write "
5206                     "pointer: valid block[0x%x,0x%x] cond[%s]",
5207                     zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond));
5208
5209         nofs_flags = memalloc_nofs_save();
5210         ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
5211                                 zone->start, zone->len);
5212         memalloc_nofs_restore(nofs_flags);
5213         if (ret == -EOPNOTSUPP) {
5214                 ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
5215                                         zone->len - (zone->wp - zone->start),
5216                                         GFP_NOFS, 0);
5217                 if (ret)
5218                         f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
5219                                         fdev->path, ret);
5220         } else if (ret) {
5221                 f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)",
5222                                 fdev->path, ret);
5223         }
5224
5225         return ret;
5226 }
5227
5228 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
5229                                                   block_t zone_blkaddr)
5230 {
5231         int i;
5232
5233         for (i = 0; i < sbi->s_ndevs; i++) {
5234                 if (!bdev_is_zoned(FDEV(i).bdev))
5235                         continue;
5236                 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
5237                                 zone_blkaddr <= FDEV(i).end_blk))
5238                         return &FDEV(i);
5239         }
5240
5241         return NULL;
5242 }
5243
5244 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
5245                               void *data)
5246 {
5247         memcpy(data, zone, sizeof(struct blk_zone));
5248         return 0;
5249 }
5250
5251 static int do_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
5252 {
5253         struct curseg_info *cs = CURSEG_I(sbi, type);
5254         struct f2fs_dev_info *zbd;
5255         struct blk_zone zone;
5256         unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
5257         block_t cs_zone_block, wp_block;
5258         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
5259         sector_t zone_sector;
5260         int err;
5261
5262         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5263         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5264
5265         zbd = get_target_zoned_dev(sbi, cs_zone_block);
5266         if (!zbd)
5267                 return 0;
5268
5269         /* report zone for the sector the curseg points to */
5270         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5271                 << log_sectors_per_block;
5272         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5273                                   report_one_zone_cb, &zone);
5274         if (err != 1) {
5275                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5276                          zbd->path, err);
5277                 return err;
5278         }
5279
5280         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5281                 return 0;
5282
5283         /*
5284          * When safely unmounted in the previous mount, we could use current
5285          * segments. Otherwise, allocate new sections.
5286          */
5287         if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
5288                 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
5289                 wp_segno = GET_SEGNO(sbi, wp_block);
5290                 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
5291                 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
5292
5293                 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
5294                                 wp_sector_off == 0)
5295                         return 0;
5296
5297                 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
5298                             "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno,
5299                             cs->next_blkoff, wp_segno, wp_blkoff);
5300         }
5301
5302         /* Allocate a new section if it's not new. */
5303         if (cs->next_blkoff ||
5304             cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) {
5305                 unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff;
5306
5307                 f2fs_allocate_new_section(sbi, type, true);
5308                 f2fs_notice(sbi, "Assign new section to curseg[%d]: "
5309                                 "[0x%x,0x%x] -> [0x%x,0x%x]",
5310                                 type, old_segno, old_blkoff,
5311                                 cs->segno, cs->next_blkoff);
5312         }
5313
5314         /* check consistency of the zone curseg pointed to */
5315         if (check_zone_write_pointer(sbi, zbd, &zone))
5316                 return -EIO;
5317
5318         /* check newly assigned zone */
5319         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5320         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5321
5322         zbd = get_target_zoned_dev(sbi, cs_zone_block);
5323         if (!zbd)
5324                 return 0;
5325
5326         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5327                 << log_sectors_per_block;
5328         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5329                                   report_one_zone_cb, &zone);
5330         if (err != 1) {
5331                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5332                          zbd->path, err);
5333                 return err;
5334         }
5335
5336         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5337                 return 0;
5338
5339         if (zone.wp != zone.start) {
5340                 f2fs_notice(sbi,
5341                             "New zone for curseg[%d] is not yet discarded. "
5342                             "Reset the zone: curseg[0x%x,0x%x]",
5343                             type, cs->segno, cs->next_blkoff);
5344                 err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
5345                                         zone.len >> log_sectors_per_block);
5346                 if (err) {
5347                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5348                                  zbd->path, err);
5349                         return err;
5350                 }
5351         }
5352
5353         return 0;
5354 }
5355
5356 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5357 {
5358         int i, ret;
5359
5360         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5361                 ret = do_fix_curseg_write_pointer(sbi, i);
5362                 if (ret)
5363                         return ret;
5364         }
5365
5366         return 0;
5367 }
5368
5369 struct check_zone_write_pointer_args {
5370         struct f2fs_sb_info *sbi;
5371         struct f2fs_dev_info *fdev;
5372 };
5373
5374 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
5375                                       void *data)
5376 {
5377         struct check_zone_write_pointer_args *args;
5378
5379         args = (struct check_zone_write_pointer_args *)data;
5380
5381         return check_zone_write_pointer(args->sbi, args->fdev, zone);
5382 }
5383
5384 static int check_write_pointer(struct f2fs_sb_info *sbi)
5385 {
5386         int i, ret;
5387         struct check_zone_write_pointer_args args;
5388
5389         for (i = 0; i < sbi->s_ndevs; i++) {
5390                 if (!bdev_is_zoned(FDEV(i).bdev))
5391                         continue;
5392
5393                 args.sbi = sbi;
5394                 args.fdev = &FDEV(i);
5395                 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
5396                                           check_zone_write_pointer_cb, &args);
5397                 if (ret < 0)
5398                         return ret;
5399         }
5400
5401         return 0;
5402 }
5403
5404 int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi)
5405 {
5406         int ret;
5407
5408         if (!f2fs_sb_has_blkzoned(sbi) || f2fs_readonly(sbi->sb))
5409                 return 0;
5410
5411         f2fs_notice(sbi, "Checking entire write pointers");
5412         ret = fix_curseg_write_pointer(sbi);
5413         if (!ret)
5414                 ret = check_write_pointer(sbi);
5415         return ret;
5416 }
5417
5418 /*
5419  * Return the number of usable blocks in a segment. The number of blocks
5420  * returned is always equal to the number of blocks in a segment for
5421  * segments fully contained within a sequential zone capacity or a
5422  * conventional zone. For segments partially contained in a sequential
5423  * zone capacity, the number of usable blocks up to the zone capacity
5424  * is returned. 0 is returned in all other cases.
5425  */
5426 static inline unsigned int f2fs_usable_zone_blks_in_seg(
5427                         struct f2fs_sb_info *sbi, unsigned int segno)
5428 {
5429         block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5430         unsigned int secno;
5431
5432         if (!sbi->unusable_blocks_per_sec)
5433                 return BLKS_PER_SEG(sbi);
5434
5435         secno = GET_SEC_FROM_SEG(sbi, segno);
5436         seg_start = START_BLOCK(sbi, segno);
5437         sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5438         sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
5439
5440         /*
5441          * If segment starts before zone capacity and spans beyond
5442          * zone capacity, then usable blocks are from seg start to
5443          * zone capacity. If the segment starts after the zone capacity,
5444          * then there are no usable blocks.
5445          */
5446         if (seg_start >= sec_cap_blkaddr)
5447                 return 0;
5448         if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr)
5449                 return sec_cap_blkaddr - seg_start;
5450
5451         return BLKS_PER_SEG(sbi);
5452 }
5453 #else
5454 int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi)
5455 {
5456         return 0;
5457 }
5458
5459 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5460                                                         unsigned int segno)
5461 {
5462         return 0;
5463 }
5464
5465 #endif
5466 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5467                                         unsigned int segno)
5468 {
5469         if (f2fs_sb_has_blkzoned(sbi))
5470                 return f2fs_usable_zone_blks_in_seg(sbi, segno);
5471
5472         return BLKS_PER_SEG(sbi);
5473 }
5474
5475 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi)
5476 {
5477         if (f2fs_sb_has_blkzoned(sbi))
5478                 return CAP_SEGS_PER_SEC(sbi);
5479
5480         return SEGS_PER_SEC(sbi);
5481 }
5482
5483 unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi,
5484         unsigned int segno)
5485 {
5486         unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi);
5487         unsigned int secno = 0, start = 0;
5488         unsigned int total_valid_blocks = 0;
5489         unsigned long long mtime = 0;
5490         unsigned int i = 0;
5491
5492         secno = GET_SEC_FROM_SEG(sbi, segno);
5493         start = GET_SEG_FROM_SEC(sbi, secno);
5494
5495         if (!__is_large_section(sbi))
5496                 return get_seg_entry(sbi, start + i)->mtime;
5497
5498         for (i = 0; i < usable_segs_per_sec; i++) {
5499                 /* for large section, only check the mtime of valid segments */
5500                 struct seg_entry *se = get_seg_entry(sbi, start+i);
5501
5502                 mtime += se->mtime * se->valid_blocks;
5503                 total_valid_blocks += se->valid_blocks;
5504         }
5505
5506         if (total_valid_blocks == 0)
5507                 return INVALID_MTIME;
5508
5509         return div_u64(mtime, total_valid_blocks);
5510 }
5511
5512 /*
5513  * Update min, max modified time for cost-benefit GC algorithm
5514  */
5515 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5516 {
5517         struct sit_info *sit_i = SIT_I(sbi);
5518         unsigned int segno;
5519
5520         down_write(&sit_i->sentry_lock);
5521
5522         sit_i->min_mtime = ULLONG_MAX;
5523
5524         for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
5525                 unsigned long long mtime = 0;
5526
5527                 mtime = f2fs_get_section_mtime(sbi, segno);
5528
5529                 if (sit_i->min_mtime > mtime)
5530                         sit_i->min_mtime = mtime;
5531         }
5532         sit_i->max_mtime = get_mtime(sbi, false);
5533         sit_i->dirty_max_mtime = 0;
5534         up_write(&sit_i->sentry_lock);
5535 }
5536
5537 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5538 {
5539         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5540         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5541         struct f2fs_sm_info *sm_info;
5542         int err;
5543
5544         sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5545         if (!sm_info)
5546                 return -ENOMEM;
5547
5548         /* init sm info */
5549         sbi->sm_info = sm_info;
5550         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5551         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5552         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5553         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5554         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5555         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5556         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5557         sm_info->rec_prefree_segments = sm_info->main_segments *
5558                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5559         if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5560                 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5561
5562         if (!f2fs_lfs_mode(sbi))
5563                 sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
5564         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5565         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5566         sm_info->min_seq_blocks = BLKS_PER_SEG(sbi);
5567         sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5568         sm_info->min_ssr_sections = reserved_sections(sbi);
5569
5570         INIT_LIST_HEAD(&sm_info->sit_entry_set);
5571
5572         init_f2fs_rwsem(&sm_info->curseg_lock);
5573
5574         err = f2fs_create_flush_cmd_control(sbi);
5575         if (err)
5576                 return err;
5577
5578         err = create_discard_cmd_control(sbi);
5579         if (err)
5580                 return err;
5581
5582         err = build_sit_info(sbi);
5583         if (err)
5584                 return err;
5585         err = build_free_segmap(sbi);
5586         if (err)
5587                 return err;
5588         err = build_curseg(sbi);
5589         if (err)
5590                 return err;
5591
5592         /* reinit free segmap based on SIT */
5593         err = build_sit_entries(sbi);
5594         if (err)
5595                 return err;
5596
5597         init_free_segmap(sbi);
5598         err = build_dirty_segmap(sbi);
5599         if (err)
5600                 return err;
5601
5602         err = sanity_check_curseg(sbi);
5603         if (err)
5604                 return err;
5605
5606         init_min_max_mtime(sbi);
5607         return 0;
5608 }
5609
5610 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5611                 enum dirty_type dirty_type)
5612 {
5613         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5614
5615         mutex_lock(&dirty_i->seglist_lock);
5616         kvfree(dirty_i->dirty_segmap[dirty_type]);
5617         dirty_i->nr_dirty[dirty_type] = 0;
5618         mutex_unlock(&dirty_i->seglist_lock);
5619 }
5620
5621 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5622 {
5623         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5624
5625         kvfree(dirty_i->pinned_secmap);
5626         kvfree(dirty_i->victim_secmap);
5627 }
5628
5629 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5630 {
5631         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5632         int i;
5633
5634         if (!dirty_i)
5635                 return;
5636
5637         /* discard pre-free/dirty segments list */
5638         for (i = 0; i < NR_DIRTY_TYPE; i++)
5639                 discard_dirty_segmap(sbi, i);
5640
5641         if (__is_large_section(sbi)) {
5642                 mutex_lock(&dirty_i->seglist_lock);
5643                 kvfree(dirty_i->dirty_secmap);
5644                 mutex_unlock(&dirty_i->seglist_lock);
5645         }
5646
5647         destroy_victim_secmap(sbi);
5648         SM_I(sbi)->dirty_info = NULL;
5649         kfree(dirty_i);
5650 }
5651
5652 static void destroy_curseg(struct f2fs_sb_info *sbi)
5653 {
5654         struct curseg_info *array = SM_I(sbi)->curseg_array;
5655         int i;
5656
5657         if (!array)
5658                 return;
5659         SM_I(sbi)->curseg_array = NULL;
5660         for (i = 0; i < NR_CURSEG_TYPE; i++) {
5661                 kfree(array[i].sum_blk);
5662                 kfree(array[i].journal);
5663         }
5664         kfree(array);
5665 }
5666
5667 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5668 {
5669         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5670
5671         if (!free_i)
5672                 return;
5673         SM_I(sbi)->free_info = NULL;
5674         kvfree(free_i->free_segmap);
5675         kvfree(free_i->free_secmap);
5676         kfree(free_i);
5677 }
5678
5679 static void destroy_sit_info(struct f2fs_sb_info *sbi)
5680 {
5681         struct sit_info *sit_i = SIT_I(sbi);
5682
5683         if (!sit_i)
5684                 return;
5685
5686         if (sit_i->sentries)
5687                 kvfree(sit_i->bitmap);
5688         kfree(sit_i->tmp_map);
5689
5690         kvfree(sit_i->sentries);
5691         kvfree(sit_i->sec_entries);
5692         kvfree(sit_i->dirty_sentries_bitmap);
5693
5694         SM_I(sbi)->sit_info = NULL;
5695         kvfree(sit_i->sit_bitmap);
5696 #ifdef CONFIG_F2FS_CHECK_FS
5697         kvfree(sit_i->sit_bitmap_mir);
5698         kvfree(sit_i->invalid_segmap);
5699 #endif
5700         kfree(sit_i);
5701 }
5702
5703 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5704 {
5705         struct f2fs_sm_info *sm_info = SM_I(sbi);
5706
5707         if (!sm_info)
5708                 return;
5709         f2fs_destroy_flush_cmd_control(sbi, true);
5710         destroy_discard_cmd_control(sbi);
5711         destroy_dirty_segmap(sbi);
5712         destroy_curseg(sbi);
5713         destroy_free_segmap(sbi);
5714         destroy_sit_info(sbi);
5715         sbi->sm_info = NULL;
5716         kfree(sm_info);
5717 }
5718
5719 int __init f2fs_create_segment_manager_caches(void)
5720 {
5721         discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5722                         sizeof(struct discard_entry));
5723         if (!discard_entry_slab)
5724                 goto fail;
5725
5726         discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5727                         sizeof(struct discard_cmd));
5728         if (!discard_cmd_slab)
5729                 goto destroy_discard_entry;
5730
5731         sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5732                         sizeof(struct sit_entry_set));
5733         if (!sit_entry_set_slab)
5734                 goto destroy_discard_cmd;
5735
5736         revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
5737                         sizeof(struct revoke_entry));
5738         if (!revoke_entry_slab)
5739                 goto destroy_sit_entry_set;
5740         return 0;
5741
5742 destroy_sit_entry_set:
5743         kmem_cache_destroy(sit_entry_set_slab);
5744 destroy_discard_cmd:
5745         kmem_cache_destroy(discard_cmd_slab);
5746 destroy_discard_entry:
5747         kmem_cache_destroy(discard_entry_slab);
5748 fail:
5749         return -ENOMEM;
5750 }
5751
5752 void f2fs_destroy_segment_manager_caches(void)
5753 {
5754         kmem_cache_destroy(sit_entry_set_slab);
5755         kmem_cache_destroy(discard_cmd_slab);
5756         kmem_cache_destroy(discard_entry_slab);
5757         kmem_cache_destroy(revoke_entry_slab);
5758 }
This page took 0.347496 seconds and 4 git commands to generate.