]> Git Repo - J-linux.git/blob - fs/f2fs/segment.c
Merge tag 'f2fs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk...
[J-linux.git] / fs / f2fs / segment.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/segment.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/sched/mm.h>
13 #include <linux/prefetch.h>
14 #include <linux/kthread.h>
15 #include <linux/swap.h>
16 #include <linux/timer.h>
17 #include <linux/freezer.h>
18 #include <linux/sched/signal.h>
19 #include <linux/random.h>
20
21 #include "f2fs.h"
22 #include "segment.h"
23 #include "node.h"
24 #include "gc.h"
25 #include "iostat.h"
26 #include <trace/events/f2fs.h>
27
28 #define __reverse_ffz(x) __reverse_ffs(~(x))
29
30 static struct kmem_cache *discard_entry_slab;
31 static struct kmem_cache *discard_cmd_slab;
32 static struct kmem_cache *sit_entry_set_slab;
33 static struct kmem_cache *revoke_entry_slab;
34
35 static unsigned long __reverse_ulong(unsigned char *str)
36 {
37         unsigned long tmp = 0;
38         int shift = 24, idx = 0;
39
40 #if BITS_PER_LONG == 64
41         shift = 56;
42 #endif
43         while (shift >= 0) {
44                 tmp |= (unsigned long)str[idx++] << shift;
45                 shift -= BITS_PER_BYTE;
46         }
47         return tmp;
48 }
49
50 /*
51  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
52  * MSB and LSB are reversed in a byte by f2fs_set_bit.
53  */
54 static inline unsigned long __reverse_ffs(unsigned long word)
55 {
56         int num = 0;
57
58 #if BITS_PER_LONG == 64
59         if ((word & 0xffffffff00000000UL) == 0)
60                 num += 32;
61         else
62                 word >>= 32;
63 #endif
64         if ((word & 0xffff0000) == 0)
65                 num += 16;
66         else
67                 word >>= 16;
68
69         if ((word & 0xff00) == 0)
70                 num += 8;
71         else
72                 word >>= 8;
73
74         if ((word & 0xf0) == 0)
75                 num += 4;
76         else
77                 word >>= 4;
78
79         if ((word & 0xc) == 0)
80                 num += 2;
81         else
82                 word >>= 2;
83
84         if ((word & 0x2) == 0)
85                 num += 1;
86         return num;
87 }
88
89 /*
90  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
91  * f2fs_set_bit makes MSB and LSB reversed in a byte.
92  * @size must be integral times of unsigned long.
93  * Example:
94  *                             MSB <--> LSB
95  *   f2fs_set_bit(0, bitmap) => 1000 0000
96  *   f2fs_set_bit(7, bitmap) => 0000 0001
97  */
98 static unsigned long __find_rev_next_bit(const unsigned long *addr,
99                         unsigned long size, unsigned long offset)
100 {
101         const unsigned long *p = addr + BIT_WORD(offset);
102         unsigned long result = size;
103         unsigned long tmp;
104
105         if (offset >= size)
106                 return size;
107
108         size -= (offset & ~(BITS_PER_LONG - 1));
109         offset %= BITS_PER_LONG;
110
111         while (1) {
112                 if (*p == 0)
113                         goto pass;
114
115                 tmp = __reverse_ulong((unsigned char *)p);
116
117                 tmp &= ~0UL >> offset;
118                 if (size < BITS_PER_LONG)
119                         tmp &= (~0UL << (BITS_PER_LONG - size));
120                 if (tmp)
121                         goto found;
122 pass:
123                 if (size <= BITS_PER_LONG)
124                         break;
125                 size -= BITS_PER_LONG;
126                 offset = 0;
127                 p++;
128         }
129         return result;
130 found:
131         return result - size + __reverse_ffs(tmp);
132 }
133
134 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
135                         unsigned long size, unsigned long offset)
136 {
137         const unsigned long *p = addr + BIT_WORD(offset);
138         unsigned long result = size;
139         unsigned long tmp;
140
141         if (offset >= size)
142                 return size;
143
144         size -= (offset & ~(BITS_PER_LONG - 1));
145         offset %= BITS_PER_LONG;
146
147         while (1) {
148                 if (*p == ~0UL)
149                         goto pass;
150
151                 tmp = __reverse_ulong((unsigned char *)p);
152
153                 if (offset)
154                         tmp |= ~0UL << (BITS_PER_LONG - offset);
155                 if (size < BITS_PER_LONG)
156                         tmp |= ~0UL >> size;
157                 if (tmp != ~0UL)
158                         goto found;
159 pass:
160                 if (size <= BITS_PER_LONG)
161                         break;
162                 size -= BITS_PER_LONG;
163                 offset = 0;
164                 p++;
165         }
166         return result;
167 found:
168         return result - size + __reverse_ffz(tmp);
169 }
170
171 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
172 {
173         int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
174         int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
175         int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
176
177         if (f2fs_lfs_mode(sbi))
178                 return false;
179         if (sbi->gc_mode == GC_URGENT_HIGH)
180                 return true;
181         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
182                 return true;
183
184         return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
185                         SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
186 }
187
188 void f2fs_abort_atomic_write(struct inode *inode, bool clean)
189 {
190         struct f2fs_inode_info *fi = F2FS_I(inode);
191
192         if (!f2fs_is_atomic_file(inode))
193                 return;
194
195         clear_inode_flag(fi->cow_inode, FI_COW_FILE);
196         iput(fi->cow_inode);
197         fi->cow_inode = NULL;
198         release_atomic_write_cnt(inode);
199         clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
200         clear_inode_flag(inode, FI_ATOMIC_REPLACE);
201         clear_inode_flag(inode, FI_ATOMIC_FILE);
202         stat_dec_atomic_inode(inode);
203
204         if (clean) {
205                 truncate_inode_pages_final(inode->i_mapping);
206                 f2fs_i_size_write(inode, fi->original_i_size);
207         }
208 }
209
210 static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
211                         block_t new_addr, block_t *old_addr, bool recover)
212 {
213         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
214         struct dnode_of_data dn;
215         struct node_info ni;
216         int err;
217
218 retry:
219         set_new_dnode(&dn, inode, NULL, NULL, 0);
220         err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE_RA);
221         if (err) {
222                 if (err == -ENOMEM) {
223                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
224                         goto retry;
225                 }
226                 return err;
227         }
228
229         err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
230         if (err) {
231                 f2fs_put_dnode(&dn);
232                 return err;
233         }
234
235         if (recover) {
236                 /* dn.data_blkaddr is always valid */
237                 if (!__is_valid_data_blkaddr(new_addr)) {
238                         if (new_addr == NULL_ADDR)
239                                 dec_valid_block_count(sbi, inode, 1);
240                         f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
241                         f2fs_update_data_blkaddr(&dn, new_addr);
242                 } else {
243                         f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
244                                 new_addr, ni.version, true, true);
245                 }
246         } else {
247                 blkcnt_t count = 1;
248
249                 *old_addr = dn.data_blkaddr;
250                 f2fs_truncate_data_blocks_range(&dn, 1);
251                 dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
252                 inc_valid_block_count(sbi, inode, &count);
253                 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
254                                         ni.version, true, false);
255         }
256
257         f2fs_put_dnode(&dn);
258         return 0;
259 }
260
261 static void __complete_revoke_list(struct inode *inode, struct list_head *head,
262                                         bool revoke)
263 {
264         struct revoke_entry *cur, *tmp;
265         bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
266
267         list_for_each_entry_safe(cur, tmp, head, list) {
268                 if (revoke)
269                         __replace_atomic_write_block(inode, cur->index,
270                                                 cur->old_addr, NULL, true);
271
272                 list_del(&cur->list);
273                 kmem_cache_free(revoke_entry_slab, cur);
274         }
275
276         if (!revoke && truncate)
277                 f2fs_do_truncate_blocks(inode, 0, false);
278 }
279
280 static int __f2fs_commit_atomic_write(struct inode *inode)
281 {
282         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
283         struct f2fs_inode_info *fi = F2FS_I(inode);
284         struct inode *cow_inode = fi->cow_inode;
285         struct revoke_entry *new;
286         struct list_head revoke_list;
287         block_t blkaddr;
288         struct dnode_of_data dn;
289         pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
290         pgoff_t off = 0, blen, index;
291         int ret = 0, i;
292
293         INIT_LIST_HEAD(&revoke_list);
294
295         while (len) {
296                 blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
297
298                 set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
299                 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
300                 if (ret && ret != -ENOENT) {
301                         goto out;
302                 } else if (ret == -ENOENT) {
303                         ret = 0;
304                         if (dn.max_level == 0)
305                                 goto out;
306                         goto next;
307                 }
308
309                 blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
310                                 len);
311                 index = off;
312                 for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
313                         blkaddr = f2fs_data_blkaddr(&dn);
314
315                         if (!__is_valid_data_blkaddr(blkaddr)) {
316                                 continue;
317                         } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
318                                         DATA_GENERIC_ENHANCE)) {
319                                 f2fs_put_dnode(&dn);
320                                 ret = -EFSCORRUPTED;
321                                 f2fs_handle_error(sbi,
322                                                 ERROR_INVALID_BLKADDR);
323                                 goto out;
324                         }
325
326                         new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
327                                                         true, NULL);
328
329                         ret = __replace_atomic_write_block(inode, index, blkaddr,
330                                                         &new->old_addr, false);
331                         if (ret) {
332                                 f2fs_put_dnode(&dn);
333                                 kmem_cache_free(revoke_entry_slab, new);
334                                 goto out;
335                         }
336
337                         f2fs_update_data_blkaddr(&dn, NULL_ADDR);
338                         new->index = index;
339                         list_add_tail(&new->list, &revoke_list);
340                 }
341                 f2fs_put_dnode(&dn);
342 next:
343                 off += blen;
344                 len -= blen;
345         }
346
347 out:
348         if (ret) {
349                 sbi->revoked_atomic_block += fi->atomic_write_cnt;
350         } else {
351                 sbi->committed_atomic_block += fi->atomic_write_cnt;
352                 set_inode_flag(inode, FI_ATOMIC_COMMITTED);
353         }
354
355         __complete_revoke_list(inode, &revoke_list, ret ? true : false);
356
357         return ret;
358 }
359
360 int f2fs_commit_atomic_write(struct inode *inode)
361 {
362         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
363         struct f2fs_inode_info *fi = F2FS_I(inode);
364         int err;
365
366         err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
367         if (err)
368                 return err;
369
370         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
371         f2fs_lock_op(sbi);
372
373         err = __f2fs_commit_atomic_write(inode);
374
375         f2fs_unlock_op(sbi);
376         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
377
378         return err;
379 }
380
381 /*
382  * This function balances dirty node and dentry pages.
383  * In addition, it controls garbage collection.
384  */
385 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
386 {
387         if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
388                 f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
389                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
390         }
391
392         /* balance_fs_bg is able to be pending */
393         if (need && excess_cached_nats(sbi))
394                 f2fs_balance_fs_bg(sbi, false);
395
396         if (!f2fs_is_checkpoint_ready(sbi))
397                 return;
398
399         /*
400          * We should do GC or end up with checkpoint, if there are so many dirty
401          * dir/node pages without enough free segments.
402          */
403         if (has_not_enough_free_secs(sbi, 0, 0)) {
404                 if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
405                                         sbi->gc_thread->f2fs_gc_task) {
406                         DEFINE_WAIT(wait);
407
408                         prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
409                                                 TASK_UNINTERRUPTIBLE);
410                         wake_up(&sbi->gc_thread->gc_wait_queue_head);
411                         io_schedule();
412                         finish_wait(&sbi->gc_thread->fggc_wq, &wait);
413                 } else {
414                         struct f2fs_gc_control gc_control = {
415                                 .victim_segno = NULL_SEGNO,
416                                 .init_gc_type = BG_GC,
417                                 .no_bg_gc = true,
418                                 .should_migrate_blocks = false,
419                                 .err_gc_skipped = false,
420                                 .nr_free_secs = 1 };
421                         f2fs_down_write(&sbi->gc_lock);
422                         f2fs_gc(sbi, &gc_control);
423                 }
424         }
425 }
426
427 static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
428 {
429         int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
430         unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
431         unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
432         unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
433         unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
434         unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
435         unsigned int threshold = sbi->blocks_per_seg * factor *
436                                         DEFAULT_DIRTY_THRESHOLD;
437         unsigned int global_threshold = threshold * 3 / 2;
438
439         if (dents >= threshold || qdata >= threshold ||
440                 nodes >= threshold || meta >= threshold ||
441                 imeta >= threshold)
442                 return true;
443         return dents + qdata + nodes + meta + imeta >  global_threshold;
444 }
445
446 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
447 {
448         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
449                 return;
450
451         /* try to shrink extent cache when there is no enough memory */
452         if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
453                 f2fs_shrink_read_extent_tree(sbi,
454                                 READ_EXTENT_CACHE_SHRINK_NUMBER);
455
456         /* try to shrink age extent cache when there is no enough memory */
457         if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
458                 f2fs_shrink_age_extent_tree(sbi,
459                                 AGE_EXTENT_CACHE_SHRINK_NUMBER);
460
461         /* check the # of cached NAT entries */
462         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
463                 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
464
465         if (!f2fs_available_free_memory(sbi, FREE_NIDS))
466                 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
467         else
468                 f2fs_build_free_nids(sbi, false, false);
469
470         if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
471                 excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
472                 goto do_sync;
473
474         /* there is background inflight IO or foreground operation recently */
475         if (is_inflight_io(sbi, REQ_TIME) ||
476                 (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
477                 return;
478
479         /* exceed periodical checkpoint timeout threshold */
480         if (f2fs_time_over(sbi, CP_TIME))
481                 goto do_sync;
482
483         /* checkpoint is the only way to shrink partial cached entries */
484         if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
485                 f2fs_available_free_memory(sbi, INO_ENTRIES))
486                 return;
487
488 do_sync:
489         if (test_opt(sbi, DATA_FLUSH) && from_bg) {
490                 struct blk_plug plug;
491
492                 mutex_lock(&sbi->flush_lock);
493
494                 blk_start_plug(&plug);
495                 f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
496                 blk_finish_plug(&plug);
497
498                 mutex_unlock(&sbi->flush_lock);
499         }
500         f2fs_sync_fs(sbi->sb, 1);
501         stat_inc_bg_cp_count(sbi->stat_info);
502 }
503
504 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
505                                 struct block_device *bdev)
506 {
507         int ret = blkdev_issue_flush(bdev);
508
509         trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
510                                 test_opt(sbi, FLUSH_MERGE), ret);
511         return ret;
512 }
513
514 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
515 {
516         int ret = 0;
517         int i;
518
519         if (!f2fs_is_multi_device(sbi))
520                 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
521
522         for (i = 0; i < sbi->s_ndevs; i++) {
523                 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
524                         continue;
525                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
526                 if (ret)
527                         break;
528         }
529         return ret;
530 }
531
532 static int issue_flush_thread(void *data)
533 {
534         struct f2fs_sb_info *sbi = data;
535         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
536         wait_queue_head_t *q = &fcc->flush_wait_queue;
537 repeat:
538         if (kthread_should_stop())
539                 return 0;
540
541         if (!llist_empty(&fcc->issue_list)) {
542                 struct flush_cmd *cmd, *next;
543                 int ret;
544
545                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
546                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
547
548                 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
549
550                 ret = submit_flush_wait(sbi, cmd->ino);
551                 atomic_inc(&fcc->issued_flush);
552
553                 llist_for_each_entry_safe(cmd, next,
554                                           fcc->dispatch_list, llnode) {
555                         cmd->ret = ret;
556                         complete(&cmd->wait);
557                 }
558                 fcc->dispatch_list = NULL;
559         }
560
561         wait_event_interruptible(*q,
562                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
563         goto repeat;
564 }
565
566 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
567 {
568         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
569         struct flush_cmd cmd;
570         int ret;
571
572         if (test_opt(sbi, NOBARRIER))
573                 return 0;
574
575         if (!test_opt(sbi, FLUSH_MERGE)) {
576                 atomic_inc(&fcc->queued_flush);
577                 ret = submit_flush_wait(sbi, ino);
578                 atomic_dec(&fcc->queued_flush);
579                 atomic_inc(&fcc->issued_flush);
580                 return ret;
581         }
582
583         if (atomic_inc_return(&fcc->queued_flush) == 1 ||
584             f2fs_is_multi_device(sbi)) {
585                 ret = submit_flush_wait(sbi, ino);
586                 atomic_dec(&fcc->queued_flush);
587
588                 atomic_inc(&fcc->issued_flush);
589                 return ret;
590         }
591
592         cmd.ino = ino;
593         init_completion(&cmd.wait);
594
595         llist_add(&cmd.llnode, &fcc->issue_list);
596
597         /*
598          * update issue_list before we wake up issue_flush thread, this
599          * smp_mb() pairs with another barrier in ___wait_event(), see
600          * more details in comments of waitqueue_active().
601          */
602         smp_mb();
603
604         if (waitqueue_active(&fcc->flush_wait_queue))
605                 wake_up(&fcc->flush_wait_queue);
606
607         if (fcc->f2fs_issue_flush) {
608                 wait_for_completion(&cmd.wait);
609                 atomic_dec(&fcc->queued_flush);
610         } else {
611                 struct llist_node *list;
612
613                 list = llist_del_all(&fcc->issue_list);
614                 if (!list) {
615                         wait_for_completion(&cmd.wait);
616                         atomic_dec(&fcc->queued_flush);
617                 } else {
618                         struct flush_cmd *tmp, *next;
619
620                         ret = submit_flush_wait(sbi, ino);
621
622                         llist_for_each_entry_safe(tmp, next, list, llnode) {
623                                 if (tmp == &cmd) {
624                                         cmd.ret = ret;
625                                         atomic_dec(&fcc->queued_flush);
626                                         continue;
627                                 }
628                                 tmp->ret = ret;
629                                 complete(&tmp->wait);
630                         }
631                 }
632         }
633
634         return cmd.ret;
635 }
636
637 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
638 {
639         dev_t dev = sbi->sb->s_bdev->bd_dev;
640         struct flush_cmd_control *fcc;
641
642         if (SM_I(sbi)->fcc_info) {
643                 fcc = SM_I(sbi)->fcc_info;
644                 if (fcc->f2fs_issue_flush)
645                         return 0;
646                 goto init_thread;
647         }
648
649         fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
650         if (!fcc)
651                 return -ENOMEM;
652         atomic_set(&fcc->issued_flush, 0);
653         atomic_set(&fcc->queued_flush, 0);
654         init_waitqueue_head(&fcc->flush_wait_queue);
655         init_llist_head(&fcc->issue_list);
656         SM_I(sbi)->fcc_info = fcc;
657         if (!test_opt(sbi, FLUSH_MERGE))
658                 return 0;
659
660 init_thread:
661         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
662                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
663         if (IS_ERR(fcc->f2fs_issue_flush)) {
664                 int err = PTR_ERR(fcc->f2fs_issue_flush);
665
666                 kfree(fcc);
667                 SM_I(sbi)->fcc_info = NULL;
668                 return err;
669         }
670
671         return 0;
672 }
673
674 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
675 {
676         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
677
678         if (fcc && fcc->f2fs_issue_flush) {
679                 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
680
681                 fcc->f2fs_issue_flush = NULL;
682                 kthread_stop(flush_thread);
683         }
684         if (free) {
685                 kfree(fcc);
686                 SM_I(sbi)->fcc_info = NULL;
687         }
688 }
689
690 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
691 {
692         int ret = 0, i;
693
694         if (!f2fs_is_multi_device(sbi))
695                 return 0;
696
697         if (test_opt(sbi, NOBARRIER))
698                 return 0;
699
700         for (i = 1; i < sbi->s_ndevs; i++) {
701                 int count = DEFAULT_RETRY_IO_COUNT;
702
703                 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
704                         continue;
705
706                 do {
707                         ret = __submit_flush_wait(sbi, FDEV(i).bdev);
708                         if (ret)
709                                 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
710                 } while (ret && --count);
711
712                 if (ret) {
713                         f2fs_stop_checkpoint(sbi, false,
714                                         STOP_CP_REASON_FLUSH_FAIL);
715                         break;
716                 }
717
718                 spin_lock(&sbi->dev_lock);
719                 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
720                 spin_unlock(&sbi->dev_lock);
721         }
722
723         return ret;
724 }
725
726 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
727                 enum dirty_type dirty_type)
728 {
729         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
730
731         /* need not be added */
732         if (IS_CURSEG(sbi, segno))
733                 return;
734
735         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
736                 dirty_i->nr_dirty[dirty_type]++;
737
738         if (dirty_type == DIRTY) {
739                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
740                 enum dirty_type t = sentry->type;
741
742                 if (unlikely(t >= DIRTY)) {
743                         f2fs_bug_on(sbi, 1);
744                         return;
745                 }
746                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
747                         dirty_i->nr_dirty[t]++;
748
749                 if (__is_large_section(sbi)) {
750                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
751                         block_t valid_blocks =
752                                 get_valid_blocks(sbi, segno, true);
753
754                         f2fs_bug_on(sbi, unlikely(!valid_blocks ||
755                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)));
756
757                         if (!IS_CURSEC(sbi, secno))
758                                 set_bit(secno, dirty_i->dirty_secmap);
759                 }
760         }
761 }
762
763 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
764                 enum dirty_type dirty_type)
765 {
766         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
767         block_t valid_blocks;
768
769         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
770                 dirty_i->nr_dirty[dirty_type]--;
771
772         if (dirty_type == DIRTY) {
773                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
774                 enum dirty_type t = sentry->type;
775
776                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
777                         dirty_i->nr_dirty[t]--;
778
779                 valid_blocks = get_valid_blocks(sbi, segno, true);
780                 if (valid_blocks == 0) {
781                         clear_bit(GET_SEC_FROM_SEG(sbi, segno),
782                                                 dirty_i->victim_secmap);
783 #ifdef CONFIG_F2FS_CHECK_FS
784                         clear_bit(segno, SIT_I(sbi)->invalid_segmap);
785 #endif
786                 }
787                 if (__is_large_section(sbi)) {
788                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
789
790                         if (!valid_blocks ||
791                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
792                                 clear_bit(secno, dirty_i->dirty_secmap);
793                                 return;
794                         }
795
796                         if (!IS_CURSEC(sbi, secno))
797                                 set_bit(secno, dirty_i->dirty_secmap);
798                 }
799         }
800 }
801
802 /*
803  * Should not occur error such as -ENOMEM.
804  * Adding dirty entry into seglist is not critical operation.
805  * If a given segment is one of current working segments, it won't be added.
806  */
807 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
808 {
809         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
810         unsigned short valid_blocks, ckpt_valid_blocks;
811         unsigned int usable_blocks;
812
813         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
814                 return;
815
816         usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
817         mutex_lock(&dirty_i->seglist_lock);
818
819         valid_blocks = get_valid_blocks(sbi, segno, false);
820         ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
821
822         if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
823                 ckpt_valid_blocks == usable_blocks)) {
824                 __locate_dirty_segment(sbi, segno, PRE);
825                 __remove_dirty_segment(sbi, segno, DIRTY);
826         } else if (valid_blocks < usable_blocks) {
827                 __locate_dirty_segment(sbi, segno, DIRTY);
828         } else {
829                 /* Recovery routine with SSR needs this */
830                 __remove_dirty_segment(sbi, segno, DIRTY);
831         }
832
833         mutex_unlock(&dirty_i->seglist_lock);
834 }
835
836 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
837 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
838 {
839         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
840         unsigned int segno;
841
842         mutex_lock(&dirty_i->seglist_lock);
843         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
844                 if (get_valid_blocks(sbi, segno, false))
845                         continue;
846                 if (IS_CURSEG(sbi, segno))
847                         continue;
848                 __locate_dirty_segment(sbi, segno, PRE);
849                 __remove_dirty_segment(sbi, segno, DIRTY);
850         }
851         mutex_unlock(&dirty_i->seglist_lock);
852 }
853
854 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
855 {
856         int ovp_hole_segs =
857                 (overprovision_segments(sbi) - reserved_segments(sbi));
858         block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
859         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
860         block_t holes[2] = {0, 0};      /* DATA and NODE */
861         block_t unusable;
862         struct seg_entry *se;
863         unsigned int segno;
864
865         mutex_lock(&dirty_i->seglist_lock);
866         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
867                 se = get_seg_entry(sbi, segno);
868                 if (IS_NODESEG(se->type))
869                         holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
870                                                         se->valid_blocks;
871                 else
872                         holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
873                                                         se->valid_blocks;
874         }
875         mutex_unlock(&dirty_i->seglist_lock);
876
877         unusable = max(holes[DATA], holes[NODE]);
878         if (unusable > ovp_holes)
879                 return unusable - ovp_holes;
880         return 0;
881 }
882
883 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
884 {
885         int ovp_hole_segs =
886                 (overprovision_segments(sbi) - reserved_segments(sbi));
887         if (unusable > F2FS_OPTION(sbi).unusable_cap)
888                 return -EAGAIN;
889         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
890                 dirty_segments(sbi) > ovp_hole_segs)
891                 return -EAGAIN;
892         return 0;
893 }
894
895 /* This is only used by SBI_CP_DISABLED */
896 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
897 {
898         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
899         unsigned int segno = 0;
900
901         mutex_lock(&dirty_i->seglist_lock);
902         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
903                 if (get_valid_blocks(sbi, segno, false))
904                         continue;
905                 if (get_ckpt_valid_blocks(sbi, segno, false))
906                         continue;
907                 mutex_unlock(&dirty_i->seglist_lock);
908                 return segno;
909         }
910         mutex_unlock(&dirty_i->seglist_lock);
911         return NULL_SEGNO;
912 }
913
914 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
915                 struct block_device *bdev, block_t lstart,
916                 block_t start, block_t len)
917 {
918         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
919         struct list_head *pend_list;
920         struct discard_cmd *dc;
921
922         f2fs_bug_on(sbi, !len);
923
924         pend_list = &dcc->pend_list[plist_idx(len)];
925
926         dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
927         INIT_LIST_HEAD(&dc->list);
928         dc->bdev = bdev;
929         dc->lstart = lstart;
930         dc->start = start;
931         dc->len = len;
932         dc->ref = 0;
933         dc->state = D_PREP;
934         dc->queued = 0;
935         dc->error = 0;
936         init_completion(&dc->wait);
937         list_add_tail(&dc->list, pend_list);
938         spin_lock_init(&dc->lock);
939         dc->bio_ref = 0;
940         atomic_inc(&dcc->discard_cmd_cnt);
941         dcc->undiscard_blks += len;
942
943         return dc;
944 }
945
946 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
947                                 struct block_device *bdev, block_t lstart,
948                                 block_t start, block_t len,
949                                 struct rb_node *parent, struct rb_node **p,
950                                 bool leftmost)
951 {
952         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
953         struct discard_cmd *dc;
954
955         dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
956
957         rb_link_node(&dc->rb_node, parent, p);
958         rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
959
960         return dc;
961 }
962
963 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
964                                                         struct discard_cmd *dc)
965 {
966         if (dc->state == D_DONE)
967                 atomic_sub(dc->queued, &dcc->queued_discard);
968
969         list_del(&dc->list);
970         rb_erase_cached(&dc->rb_node, &dcc->root);
971         dcc->undiscard_blks -= dc->len;
972
973         kmem_cache_free(discard_cmd_slab, dc);
974
975         atomic_dec(&dcc->discard_cmd_cnt);
976 }
977
978 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
979                                                         struct discard_cmd *dc)
980 {
981         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
982         unsigned long flags;
983
984         trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
985
986         spin_lock_irqsave(&dc->lock, flags);
987         if (dc->bio_ref) {
988                 spin_unlock_irqrestore(&dc->lock, flags);
989                 return;
990         }
991         spin_unlock_irqrestore(&dc->lock, flags);
992
993         f2fs_bug_on(sbi, dc->ref);
994
995         if (dc->error == -EOPNOTSUPP)
996                 dc->error = 0;
997
998         if (dc->error)
999                 printk_ratelimited(
1000                         "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
1001                         KERN_INFO, sbi->sb->s_id,
1002                         dc->lstart, dc->start, dc->len, dc->error);
1003         __detach_discard_cmd(dcc, dc);
1004 }
1005
1006 static void f2fs_submit_discard_endio(struct bio *bio)
1007 {
1008         struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1009         unsigned long flags;
1010
1011         spin_lock_irqsave(&dc->lock, flags);
1012         if (!dc->error)
1013                 dc->error = blk_status_to_errno(bio->bi_status);
1014         dc->bio_ref--;
1015         if (!dc->bio_ref && dc->state == D_SUBMIT) {
1016                 dc->state = D_DONE;
1017                 complete_all(&dc->wait);
1018         }
1019         spin_unlock_irqrestore(&dc->lock, flags);
1020         bio_put(bio);
1021 }
1022
1023 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1024                                 block_t start, block_t end)
1025 {
1026 #ifdef CONFIG_F2FS_CHECK_FS
1027         struct seg_entry *sentry;
1028         unsigned int segno;
1029         block_t blk = start;
1030         unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1031         unsigned long *map;
1032
1033         while (blk < end) {
1034                 segno = GET_SEGNO(sbi, blk);
1035                 sentry = get_seg_entry(sbi, segno);
1036                 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1037
1038                 if (end < START_BLOCK(sbi, segno + 1))
1039                         size = GET_BLKOFF_FROM_SEG0(sbi, end);
1040                 else
1041                         size = max_blocks;
1042                 map = (unsigned long *)(sentry->cur_valid_map);
1043                 offset = __find_rev_next_bit(map, size, offset);
1044                 f2fs_bug_on(sbi, offset != size);
1045                 blk = START_BLOCK(sbi, segno + 1);
1046         }
1047 #endif
1048 }
1049
1050 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1051                                 struct discard_policy *dpolicy,
1052                                 int discard_type, unsigned int granularity)
1053 {
1054         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1055
1056         /* common policy */
1057         dpolicy->type = discard_type;
1058         dpolicy->sync = true;
1059         dpolicy->ordered = false;
1060         dpolicy->granularity = granularity;
1061
1062         dpolicy->max_requests = dcc->max_discard_request;
1063         dpolicy->io_aware_gran = MAX_PLIST_NUM;
1064         dpolicy->timeout = false;
1065
1066         if (discard_type == DPOLICY_BG) {
1067                 dpolicy->min_interval = dcc->min_discard_issue_time;
1068                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1069                 dpolicy->max_interval = dcc->max_discard_issue_time;
1070                 dpolicy->io_aware = true;
1071                 dpolicy->sync = false;
1072                 dpolicy->ordered = true;
1073                 if (utilization(sbi) > dcc->discard_urgent_util) {
1074                         dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1075                         if (atomic_read(&dcc->discard_cmd_cnt))
1076                                 dpolicy->max_interval =
1077                                         dcc->min_discard_issue_time;
1078                 }
1079         } else if (discard_type == DPOLICY_FORCE) {
1080                 dpolicy->min_interval = dcc->min_discard_issue_time;
1081                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1082                 dpolicy->max_interval = dcc->max_discard_issue_time;
1083                 dpolicy->io_aware = false;
1084         } else if (discard_type == DPOLICY_FSTRIM) {
1085                 dpolicy->io_aware = false;
1086         } else if (discard_type == DPOLICY_UMOUNT) {
1087                 dpolicy->io_aware = false;
1088                 /* we need to issue all to keep CP_TRIMMED_FLAG */
1089                 dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1090                 dpolicy->timeout = true;
1091         }
1092 }
1093
1094 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1095                                 struct block_device *bdev, block_t lstart,
1096                                 block_t start, block_t len);
1097 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1098 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1099                                                 struct discard_policy *dpolicy,
1100                                                 struct discard_cmd *dc,
1101                                                 unsigned int *issued)
1102 {
1103         struct block_device *bdev = dc->bdev;
1104         unsigned int max_discard_blocks =
1105                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1106         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1107         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1108                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1109         blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
1110         block_t lstart, start, len, total_len;
1111         int err = 0;
1112
1113         if (dc->state != D_PREP)
1114                 return 0;
1115
1116         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1117                 return 0;
1118
1119         trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1120
1121         lstart = dc->lstart;
1122         start = dc->start;
1123         len = dc->len;
1124         total_len = len;
1125
1126         dc->len = 0;
1127
1128         while (total_len && *issued < dpolicy->max_requests && !err) {
1129                 struct bio *bio = NULL;
1130                 unsigned long flags;
1131                 bool last = true;
1132
1133                 if (len > max_discard_blocks) {
1134                         len = max_discard_blocks;
1135                         last = false;
1136                 }
1137
1138                 (*issued)++;
1139                 if (*issued == dpolicy->max_requests)
1140                         last = true;
1141
1142                 dc->len += len;
1143
1144                 if (time_to_inject(sbi, FAULT_DISCARD)) {
1145                         f2fs_show_injection_info(sbi, FAULT_DISCARD);
1146                         err = -EIO;
1147                 } else {
1148                         err = __blkdev_issue_discard(bdev,
1149                                         SECTOR_FROM_BLOCK(start),
1150                                         SECTOR_FROM_BLOCK(len),
1151                                         GFP_NOFS, &bio);
1152                 }
1153                 if (err) {
1154                         spin_lock_irqsave(&dc->lock, flags);
1155                         if (dc->state == D_PARTIAL)
1156                                 dc->state = D_SUBMIT;
1157                         spin_unlock_irqrestore(&dc->lock, flags);
1158
1159                         break;
1160                 }
1161
1162                 f2fs_bug_on(sbi, !bio);
1163
1164                 /*
1165                  * should keep before submission to avoid D_DONE
1166                  * right away
1167                  */
1168                 spin_lock_irqsave(&dc->lock, flags);
1169                 if (last)
1170                         dc->state = D_SUBMIT;
1171                 else
1172                         dc->state = D_PARTIAL;
1173                 dc->bio_ref++;
1174                 spin_unlock_irqrestore(&dc->lock, flags);
1175
1176                 atomic_inc(&dcc->queued_discard);
1177                 dc->queued++;
1178                 list_move_tail(&dc->list, wait_list);
1179
1180                 /* sanity check on discard range */
1181                 __check_sit_bitmap(sbi, lstart, lstart + len);
1182
1183                 bio->bi_private = dc;
1184                 bio->bi_end_io = f2fs_submit_discard_endio;
1185                 bio->bi_opf |= flag;
1186                 submit_bio(bio);
1187
1188                 atomic_inc(&dcc->issued_discard);
1189
1190                 f2fs_update_iostat(sbi, NULL, FS_DISCARD, len * F2FS_BLKSIZE);
1191
1192                 lstart += len;
1193                 start += len;
1194                 total_len -= len;
1195                 len = total_len;
1196         }
1197
1198         if (!err && len) {
1199                 dcc->undiscard_blks -= len;
1200                 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1201         }
1202         return err;
1203 }
1204
1205 static void __insert_discard_tree(struct f2fs_sb_info *sbi,
1206                                 struct block_device *bdev, block_t lstart,
1207                                 block_t start, block_t len,
1208                                 struct rb_node **insert_p,
1209                                 struct rb_node *insert_parent)
1210 {
1211         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1212         struct rb_node **p;
1213         struct rb_node *parent = NULL;
1214         bool leftmost = true;
1215
1216         if (insert_p && insert_parent) {
1217                 parent = insert_parent;
1218                 p = insert_p;
1219                 goto do_insert;
1220         }
1221
1222         p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
1223                                                         lstart, &leftmost);
1224 do_insert:
1225         __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
1226                                                                 p, leftmost);
1227 }
1228
1229 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1230                                                 struct discard_cmd *dc)
1231 {
1232         list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1233 }
1234
1235 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1236                                 struct discard_cmd *dc, block_t blkaddr)
1237 {
1238         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1239         struct discard_info di = dc->di;
1240         bool modified = false;
1241
1242         if (dc->state == D_DONE || dc->len == 1) {
1243                 __remove_discard_cmd(sbi, dc);
1244                 return;
1245         }
1246
1247         dcc->undiscard_blks -= di.len;
1248
1249         if (blkaddr > di.lstart) {
1250                 dc->len = blkaddr - dc->lstart;
1251                 dcc->undiscard_blks += dc->len;
1252                 __relocate_discard_cmd(dcc, dc);
1253                 modified = true;
1254         }
1255
1256         if (blkaddr < di.lstart + di.len - 1) {
1257                 if (modified) {
1258                         __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1259                                         di.start + blkaddr + 1 - di.lstart,
1260                                         di.lstart + di.len - 1 - blkaddr,
1261                                         NULL, NULL);
1262                 } else {
1263                         dc->lstart++;
1264                         dc->len--;
1265                         dc->start++;
1266                         dcc->undiscard_blks += dc->len;
1267                         __relocate_discard_cmd(dcc, dc);
1268                 }
1269         }
1270 }
1271
1272 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1273                                 struct block_device *bdev, block_t lstart,
1274                                 block_t start, block_t len)
1275 {
1276         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1277         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1278         struct discard_cmd *dc;
1279         struct discard_info di = {0};
1280         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1281         unsigned int max_discard_blocks =
1282                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1283         block_t end = lstart + len;
1284
1285         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1286                                         NULL, lstart,
1287                                         (struct rb_entry **)&prev_dc,
1288                                         (struct rb_entry **)&next_dc,
1289                                         &insert_p, &insert_parent, true, NULL);
1290         if (dc)
1291                 prev_dc = dc;
1292
1293         if (!prev_dc) {
1294                 di.lstart = lstart;
1295                 di.len = next_dc ? next_dc->lstart - lstart : len;
1296                 di.len = min(di.len, len);
1297                 di.start = start;
1298         }
1299
1300         while (1) {
1301                 struct rb_node *node;
1302                 bool merged = false;
1303                 struct discard_cmd *tdc = NULL;
1304
1305                 if (prev_dc) {
1306                         di.lstart = prev_dc->lstart + prev_dc->len;
1307                         if (di.lstart < lstart)
1308                                 di.lstart = lstart;
1309                         if (di.lstart >= end)
1310                                 break;
1311
1312                         if (!next_dc || next_dc->lstart > end)
1313                                 di.len = end - di.lstart;
1314                         else
1315                                 di.len = next_dc->lstart - di.lstart;
1316                         di.start = start + di.lstart - lstart;
1317                 }
1318
1319                 if (!di.len)
1320                         goto next;
1321
1322                 if (prev_dc && prev_dc->state == D_PREP &&
1323                         prev_dc->bdev == bdev &&
1324                         __is_discard_back_mergeable(&di, &prev_dc->di,
1325                                                         max_discard_blocks)) {
1326                         prev_dc->di.len += di.len;
1327                         dcc->undiscard_blks += di.len;
1328                         __relocate_discard_cmd(dcc, prev_dc);
1329                         di = prev_dc->di;
1330                         tdc = prev_dc;
1331                         merged = true;
1332                 }
1333
1334                 if (next_dc && next_dc->state == D_PREP &&
1335                         next_dc->bdev == bdev &&
1336                         __is_discard_front_mergeable(&di, &next_dc->di,
1337                                                         max_discard_blocks)) {
1338                         next_dc->di.lstart = di.lstart;
1339                         next_dc->di.len += di.len;
1340                         next_dc->di.start = di.start;
1341                         dcc->undiscard_blks += di.len;
1342                         __relocate_discard_cmd(dcc, next_dc);
1343                         if (tdc)
1344                                 __remove_discard_cmd(sbi, tdc);
1345                         merged = true;
1346                 }
1347
1348                 if (!merged) {
1349                         __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1350                                                         di.len, NULL, NULL);
1351                 }
1352  next:
1353                 prev_dc = next_dc;
1354                 if (!prev_dc)
1355                         break;
1356
1357                 node = rb_next(&prev_dc->rb_node);
1358                 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1359         }
1360 }
1361
1362 static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
1363                 struct block_device *bdev, block_t blkstart, block_t blklen)
1364 {
1365         block_t lblkstart = blkstart;
1366
1367         if (!f2fs_bdev_support_discard(bdev))
1368                 return;
1369
1370         trace_f2fs_queue_discard(bdev, blkstart, blklen);
1371
1372         if (f2fs_is_multi_device(sbi)) {
1373                 int devi = f2fs_target_device_index(sbi, blkstart);
1374
1375                 blkstart -= FDEV(devi).start_blk;
1376         }
1377         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1378         __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1379         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1380 }
1381
1382 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1383                                         struct discard_policy *dpolicy)
1384 {
1385         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1386         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1387         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1388         struct discard_cmd *dc;
1389         struct blk_plug plug;
1390         unsigned int pos = dcc->next_pos;
1391         unsigned int issued = 0;
1392         bool io_interrupted = false;
1393
1394         mutex_lock(&dcc->cmd_lock);
1395         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1396                                         NULL, pos,
1397                                         (struct rb_entry **)&prev_dc,
1398                                         (struct rb_entry **)&next_dc,
1399                                         &insert_p, &insert_parent, true, NULL);
1400         if (!dc)
1401                 dc = next_dc;
1402
1403         blk_start_plug(&plug);
1404
1405         while (dc) {
1406                 struct rb_node *node;
1407                 int err = 0;
1408
1409                 if (dc->state != D_PREP)
1410                         goto next;
1411
1412                 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1413                         io_interrupted = true;
1414                         break;
1415                 }
1416
1417                 dcc->next_pos = dc->lstart + dc->len;
1418                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1419
1420                 if (issued >= dpolicy->max_requests)
1421                         break;
1422 next:
1423                 node = rb_next(&dc->rb_node);
1424                 if (err)
1425                         __remove_discard_cmd(sbi, dc);
1426                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1427         }
1428
1429         blk_finish_plug(&plug);
1430
1431         if (!dc)
1432                 dcc->next_pos = 0;
1433
1434         mutex_unlock(&dcc->cmd_lock);
1435
1436         if (!issued && io_interrupted)
1437                 issued = -1;
1438
1439         return issued;
1440 }
1441 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1442                                         struct discard_policy *dpolicy);
1443
1444 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1445                                         struct discard_policy *dpolicy)
1446 {
1447         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1448         struct list_head *pend_list;
1449         struct discard_cmd *dc, *tmp;
1450         struct blk_plug plug;
1451         int i, issued;
1452         bool io_interrupted = false;
1453
1454         if (dpolicy->timeout)
1455                 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1456
1457 retry:
1458         issued = 0;
1459         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1460                 if (dpolicy->timeout &&
1461                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1462                         break;
1463
1464                 if (i + 1 < dpolicy->granularity)
1465                         break;
1466
1467                 if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered)
1468                         return __issue_discard_cmd_orderly(sbi, dpolicy);
1469
1470                 pend_list = &dcc->pend_list[i];
1471
1472                 mutex_lock(&dcc->cmd_lock);
1473                 if (list_empty(pend_list))
1474                         goto next;
1475                 if (unlikely(dcc->rbtree_check))
1476                         f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1477                                                         &dcc->root, false));
1478                 blk_start_plug(&plug);
1479                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1480                         f2fs_bug_on(sbi, dc->state != D_PREP);
1481
1482                         if (dpolicy->timeout &&
1483                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1484                                 break;
1485
1486                         if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1487                                                 !is_idle(sbi, DISCARD_TIME)) {
1488                                 io_interrupted = true;
1489                                 break;
1490                         }
1491
1492                         __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1493
1494                         if (issued >= dpolicy->max_requests)
1495                                 break;
1496                 }
1497                 blk_finish_plug(&plug);
1498 next:
1499                 mutex_unlock(&dcc->cmd_lock);
1500
1501                 if (issued >= dpolicy->max_requests || io_interrupted)
1502                         break;
1503         }
1504
1505         if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1506                 __wait_all_discard_cmd(sbi, dpolicy);
1507                 goto retry;
1508         }
1509
1510         if (!issued && io_interrupted)
1511                 issued = -1;
1512
1513         return issued;
1514 }
1515
1516 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1517 {
1518         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1519         struct list_head *pend_list;
1520         struct discard_cmd *dc, *tmp;
1521         int i;
1522         bool dropped = false;
1523
1524         mutex_lock(&dcc->cmd_lock);
1525         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1526                 pend_list = &dcc->pend_list[i];
1527                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1528                         f2fs_bug_on(sbi, dc->state != D_PREP);
1529                         __remove_discard_cmd(sbi, dc);
1530                         dropped = true;
1531                 }
1532         }
1533         mutex_unlock(&dcc->cmd_lock);
1534
1535         return dropped;
1536 }
1537
1538 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1539 {
1540         __drop_discard_cmd(sbi);
1541 }
1542
1543 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1544                                                         struct discard_cmd *dc)
1545 {
1546         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1547         unsigned int len = 0;
1548
1549         wait_for_completion_io(&dc->wait);
1550         mutex_lock(&dcc->cmd_lock);
1551         f2fs_bug_on(sbi, dc->state != D_DONE);
1552         dc->ref--;
1553         if (!dc->ref) {
1554                 if (!dc->error)
1555                         len = dc->len;
1556                 __remove_discard_cmd(sbi, dc);
1557         }
1558         mutex_unlock(&dcc->cmd_lock);
1559
1560         return len;
1561 }
1562
1563 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1564                                                 struct discard_policy *dpolicy,
1565                                                 block_t start, block_t end)
1566 {
1567         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1568         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1569                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1570         struct discard_cmd *dc = NULL, *iter, *tmp;
1571         unsigned int trimmed = 0;
1572
1573 next:
1574         dc = NULL;
1575
1576         mutex_lock(&dcc->cmd_lock);
1577         list_for_each_entry_safe(iter, tmp, wait_list, list) {
1578                 if (iter->lstart + iter->len <= start || end <= iter->lstart)
1579                         continue;
1580                 if (iter->len < dpolicy->granularity)
1581                         continue;
1582                 if (iter->state == D_DONE && !iter->ref) {
1583                         wait_for_completion_io(&iter->wait);
1584                         if (!iter->error)
1585                                 trimmed += iter->len;
1586                         __remove_discard_cmd(sbi, iter);
1587                 } else {
1588                         iter->ref++;
1589                         dc = iter;
1590                         break;
1591                 }
1592         }
1593         mutex_unlock(&dcc->cmd_lock);
1594
1595         if (dc) {
1596                 trimmed += __wait_one_discard_bio(sbi, dc);
1597                 goto next;
1598         }
1599
1600         return trimmed;
1601 }
1602
1603 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1604                                                 struct discard_policy *dpolicy)
1605 {
1606         struct discard_policy dp;
1607         unsigned int discard_blks;
1608
1609         if (dpolicy)
1610                 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1611
1612         /* wait all */
1613         __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1614         discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1615         __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1616         discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1617
1618         return discard_blks;
1619 }
1620
1621 /* This should be covered by global mutex, &sit_i->sentry_lock */
1622 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1623 {
1624         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1625         struct discard_cmd *dc;
1626         bool need_wait = false;
1627
1628         mutex_lock(&dcc->cmd_lock);
1629         dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1630                                                         NULL, blkaddr);
1631         if (dc) {
1632                 if (dc->state == D_PREP) {
1633                         __punch_discard_cmd(sbi, dc, blkaddr);
1634                 } else {
1635                         dc->ref++;
1636                         need_wait = true;
1637                 }
1638         }
1639         mutex_unlock(&dcc->cmd_lock);
1640
1641         if (need_wait)
1642                 __wait_one_discard_bio(sbi, dc);
1643 }
1644
1645 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1646 {
1647         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1648
1649         if (dcc && dcc->f2fs_issue_discard) {
1650                 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1651
1652                 dcc->f2fs_issue_discard = NULL;
1653                 kthread_stop(discard_thread);
1654         }
1655 }
1656
1657 /* This comes from f2fs_put_super */
1658 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1659 {
1660         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1661         struct discard_policy dpolicy;
1662         bool dropped;
1663
1664         if (!atomic_read(&dcc->discard_cmd_cnt))
1665                 return false;
1666
1667         __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1668                                         dcc->discard_granularity);
1669         __issue_discard_cmd(sbi, &dpolicy);
1670         dropped = __drop_discard_cmd(sbi);
1671
1672         /* just to make sure there is no pending discard commands */
1673         __wait_all_discard_cmd(sbi, NULL);
1674
1675         f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1676         return dropped;
1677 }
1678
1679 static int issue_discard_thread(void *data)
1680 {
1681         struct f2fs_sb_info *sbi = data;
1682         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1683         wait_queue_head_t *q = &dcc->discard_wait_queue;
1684         struct discard_policy dpolicy;
1685         unsigned int wait_ms = dcc->min_discard_issue_time;
1686         int issued;
1687
1688         set_freezable();
1689
1690         do {
1691                 wait_event_interruptible_timeout(*q,
1692                                 kthread_should_stop() || freezing(current) ||
1693                                 dcc->discard_wake,
1694                                 msecs_to_jiffies(wait_ms));
1695
1696                 if (sbi->gc_mode == GC_URGENT_HIGH ||
1697                         !f2fs_available_free_memory(sbi, DISCARD_CACHE))
1698                         __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1699                 else
1700                         __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1701                                                 dcc->discard_granularity);
1702
1703                 if (dcc->discard_wake)
1704                         dcc->discard_wake = 0;
1705
1706                 /* clean up pending candidates before going to sleep */
1707                 if (atomic_read(&dcc->queued_discard))
1708                         __wait_all_discard_cmd(sbi, NULL);
1709
1710                 if (try_to_freeze())
1711                         continue;
1712                 if (f2fs_readonly(sbi->sb))
1713                         continue;
1714                 if (kthread_should_stop())
1715                         return 0;
1716                 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
1717                         !atomic_read(&dcc->discard_cmd_cnt)) {
1718                         wait_ms = dpolicy.max_interval;
1719                         continue;
1720                 }
1721
1722                 sb_start_intwrite(sbi->sb);
1723
1724                 issued = __issue_discard_cmd(sbi, &dpolicy);
1725                 if (issued > 0) {
1726                         __wait_all_discard_cmd(sbi, &dpolicy);
1727                         wait_ms = dpolicy.min_interval;
1728                 } else if (issued == -1) {
1729                         wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1730                         if (!wait_ms)
1731                                 wait_ms = dpolicy.mid_interval;
1732                 } else {
1733                         wait_ms = dpolicy.max_interval;
1734                 }
1735                 if (!atomic_read(&dcc->discard_cmd_cnt))
1736                         wait_ms = dpolicy.max_interval;
1737
1738                 sb_end_intwrite(sbi->sb);
1739
1740         } while (!kthread_should_stop());
1741         return 0;
1742 }
1743
1744 #ifdef CONFIG_BLK_DEV_ZONED
1745 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1746                 struct block_device *bdev, block_t blkstart, block_t blklen)
1747 {
1748         sector_t sector, nr_sects;
1749         block_t lblkstart = blkstart;
1750         int devi = 0;
1751
1752         if (f2fs_is_multi_device(sbi)) {
1753                 devi = f2fs_target_device_index(sbi, blkstart);
1754                 if (blkstart < FDEV(devi).start_blk ||
1755                     blkstart > FDEV(devi).end_blk) {
1756                         f2fs_err(sbi, "Invalid block %x", blkstart);
1757                         return -EIO;
1758                 }
1759                 blkstart -= FDEV(devi).start_blk;
1760         }
1761
1762         /* For sequential zones, reset the zone write pointer */
1763         if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1764                 sector = SECTOR_FROM_BLOCK(blkstart);
1765                 nr_sects = SECTOR_FROM_BLOCK(blklen);
1766
1767                 if (sector & (bdev_zone_sectors(bdev) - 1) ||
1768                                 nr_sects != bdev_zone_sectors(bdev)) {
1769                         f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1770                                  devi, sbi->s_ndevs ? FDEV(devi).path : "",
1771                                  blkstart, blklen);
1772                         return -EIO;
1773                 }
1774                 trace_f2fs_issue_reset_zone(bdev, blkstart);
1775                 return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
1776                                         sector, nr_sects, GFP_NOFS);
1777         }
1778
1779         /* For conventional zones, use regular discard if supported */
1780         __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1781         return 0;
1782 }
1783 #endif
1784
1785 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1786                 struct block_device *bdev, block_t blkstart, block_t blklen)
1787 {
1788 #ifdef CONFIG_BLK_DEV_ZONED
1789         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1790                 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1791 #endif
1792         __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1793         return 0;
1794 }
1795
1796 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1797                                 block_t blkstart, block_t blklen)
1798 {
1799         sector_t start = blkstart, len = 0;
1800         struct block_device *bdev;
1801         struct seg_entry *se;
1802         unsigned int offset;
1803         block_t i;
1804         int err = 0;
1805
1806         bdev = f2fs_target_device(sbi, blkstart, NULL);
1807
1808         for (i = blkstart; i < blkstart + blklen; i++, len++) {
1809                 if (i != start) {
1810                         struct block_device *bdev2 =
1811                                 f2fs_target_device(sbi, i, NULL);
1812
1813                         if (bdev2 != bdev) {
1814                                 err = __issue_discard_async(sbi, bdev,
1815                                                 start, len);
1816                                 if (err)
1817                                         return err;
1818                                 bdev = bdev2;
1819                                 start = i;
1820                                 len = 0;
1821                         }
1822                 }
1823
1824                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1825                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1826
1827                 if (f2fs_block_unit_discard(sbi) &&
1828                                 !f2fs_test_and_set_bit(offset, se->discard_map))
1829                         sbi->discard_blks--;
1830         }
1831
1832         if (len)
1833                 err = __issue_discard_async(sbi, bdev, start, len);
1834         return err;
1835 }
1836
1837 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1838                                                         bool check_only)
1839 {
1840         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1841         int max_blocks = sbi->blocks_per_seg;
1842         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1843         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1844         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1845         unsigned long *discard_map = (unsigned long *)se->discard_map;
1846         unsigned long *dmap = SIT_I(sbi)->tmp_map;
1847         unsigned int start = 0, end = -1;
1848         bool force = (cpc->reason & CP_DISCARD);
1849         struct discard_entry *de = NULL;
1850         struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1851         int i;
1852
1853         if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
1854                         !f2fs_block_unit_discard(sbi))
1855                 return false;
1856
1857         if (!force) {
1858                 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1859                         SM_I(sbi)->dcc_info->nr_discards >=
1860                                 SM_I(sbi)->dcc_info->max_discards)
1861                         return false;
1862         }
1863
1864         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
1865         for (i = 0; i < entries; i++)
1866                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1867                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1868
1869         while (force || SM_I(sbi)->dcc_info->nr_discards <=
1870                                 SM_I(sbi)->dcc_info->max_discards) {
1871                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1872                 if (start >= max_blocks)
1873                         break;
1874
1875                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1876                 if (force && start && end != max_blocks
1877                                         && (end - start) < cpc->trim_minlen)
1878                         continue;
1879
1880                 if (check_only)
1881                         return true;
1882
1883                 if (!de) {
1884                         de = f2fs_kmem_cache_alloc(discard_entry_slab,
1885                                                 GFP_F2FS_ZERO, true, NULL);
1886                         de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1887                         list_add_tail(&de->list, head);
1888                 }
1889
1890                 for (i = start; i < end; i++)
1891                         __set_bit_le(i, (void *)de->discard_map);
1892
1893                 SM_I(sbi)->dcc_info->nr_discards += end - start;
1894         }
1895         return false;
1896 }
1897
1898 static void release_discard_addr(struct discard_entry *entry)
1899 {
1900         list_del(&entry->list);
1901         kmem_cache_free(discard_entry_slab, entry);
1902 }
1903
1904 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1905 {
1906         struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1907         struct discard_entry *entry, *this;
1908
1909         /* drop caches */
1910         list_for_each_entry_safe(entry, this, head, list)
1911                 release_discard_addr(entry);
1912 }
1913
1914 /*
1915  * Should call f2fs_clear_prefree_segments after checkpoint is done.
1916  */
1917 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1918 {
1919         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1920         unsigned int segno;
1921
1922         mutex_lock(&dirty_i->seglist_lock);
1923         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
1924                 __set_test_and_free(sbi, segno, false);
1925         mutex_unlock(&dirty_i->seglist_lock);
1926 }
1927
1928 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
1929                                                 struct cp_control *cpc)
1930 {
1931         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1932         struct list_head *head = &dcc->entry_list;
1933         struct discard_entry *entry, *this;
1934         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1935         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
1936         unsigned int start = 0, end = -1;
1937         unsigned int secno, start_segno;
1938         bool force = (cpc->reason & CP_DISCARD);
1939         bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
1940                                                 DISCARD_UNIT_SECTION;
1941
1942         if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
1943                 section_alignment = true;
1944
1945         mutex_lock(&dirty_i->seglist_lock);
1946
1947         while (1) {
1948                 int i;
1949
1950                 if (section_alignment && end != -1)
1951                         end--;
1952                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
1953                 if (start >= MAIN_SEGS(sbi))
1954                         break;
1955                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
1956                                                                 start + 1);
1957
1958                 if (section_alignment) {
1959                         start = rounddown(start, sbi->segs_per_sec);
1960                         end = roundup(end, sbi->segs_per_sec);
1961                 }
1962
1963                 for (i = start; i < end; i++) {
1964                         if (test_and_clear_bit(i, prefree_map))
1965                                 dirty_i->nr_dirty[PRE]--;
1966                 }
1967
1968                 if (!f2fs_realtime_discard_enable(sbi))
1969                         continue;
1970
1971                 if (force && start >= cpc->trim_start &&
1972                                         (end - 1) <= cpc->trim_end)
1973                                 continue;
1974
1975                 if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) {
1976                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1977                                 (end - start) << sbi->log_blocks_per_seg);
1978                         continue;
1979                 }
1980 next:
1981                 secno = GET_SEC_FROM_SEG(sbi, start);
1982                 start_segno = GET_SEG_FROM_SEC(sbi, secno);
1983                 if (!IS_CURSEC(sbi, secno) &&
1984                         !get_valid_blocks(sbi, start, true))
1985                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
1986                                 sbi->segs_per_sec << sbi->log_blocks_per_seg);
1987
1988                 start = start_segno + sbi->segs_per_sec;
1989                 if (start < end)
1990                         goto next;
1991                 else
1992                         end = start - 1;
1993         }
1994         mutex_unlock(&dirty_i->seglist_lock);
1995
1996         if (!f2fs_block_unit_discard(sbi))
1997                 goto wakeup;
1998
1999         /* send small discards */
2000         list_for_each_entry_safe(entry, this, head, list) {
2001                 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2002                 bool is_valid = test_bit_le(0, entry->discard_map);
2003
2004 find_next:
2005                 if (is_valid) {
2006                         next_pos = find_next_zero_bit_le(entry->discard_map,
2007                                         sbi->blocks_per_seg, cur_pos);
2008                         len = next_pos - cur_pos;
2009
2010                         if (f2fs_sb_has_blkzoned(sbi) ||
2011                             (force && len < cpc->trim_minlen))
2012                                 goto skip;
2013
2014                         f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2015                                                                         len);
2016                         total_len += len;
2017                 } else {
2018                         next_pos = find_next_bit_le(entry->discard_map,
2019                                         sbi->blocks_per_seg, cur_pos);
2020                 }
2021 skip:
2022                 cur_pos = next_pos;
2023                 is_valid = !is_valid;
2024
2025                 if (cur_pos < sbi->blocks_per_seg)
2026                         goto find_next;
2027
2028                 release_discard_addr(entry);
2029                 dcc->nr_discards -= total_len;
2030         }
2031
2032 wakeup:
2033         wake_up_discard_thread(sbi, false);
2034 }
2035
2036 int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
2037 {
2038         dev_t dev = sbi->sb->s_bdev->bd_dev;
2039         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2040         int err = 0;
2041
2042         if (!f2fs_realtime_discard_enable(sbi))
2043                 return 0;
2044
2045         dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2046                                 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2047         if (IS_ERR(dcc->f2fs_issue_discard)) {
2048                 err = PTR_ERR(dcc->f2fs_issue_discard);
2049                 dcc->f2fs_issue_discard = NULL;
2050         }
2051
2052         return err;
2053 }
2054
2055 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2056 {
2057         struct discard_cmd_control *dcc;
2058         int err = 0, i;
2059
2060         if (SM_I(sbi)->dcc_info) {
2061                 dcc = SM_I(sbi)->dcc_info;
2062                 goto init_thread;
2063         }
2064
2065         dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2066         if (!dcc)
2067                 return -ENOMEM;
2068
2069         dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2070         dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
2071         if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
2072                 dcc->discard_granularity = sbi->blocks_per_seg;
2073         else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
2074                 dcc->discard_granularity = BLKS_PER_SEC(sbi);
2075
2076         INIT_LIST_HEAD(&dcc->entry_list);
2077         for (i = 0; i < MAX_PLIST_NUM; i++)
2078                 INIT_LIST_HEAD(&dcc->pend_list[i]);
2079         INIT_LIST_HEAD(&dcc->wait_list);
2080         INIT_LIST_HEAD(&dcc->fstrim_list);
2081         mutex_init(&dcc->cmd_lock);
2082         atomic_set(&dcc->issued_discard, 0);
2083         atomic_set(&dcc->queued_discard, 0);
2084         atomic_set(&dcc->discard_cmd_cnt, 0);
2085         dcc->nr_discards = 0;
2086         dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2087         dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
2088         dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
2089         dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
2090         dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
2091         dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
2092         dcc->undiscard_blks = 0;
2093         dcc->next_pos = 0;
2094         dcc->root = RB_ROOT_CACHED;
2095         dcc->rbtree_check = false;
2096
2097         init_waitqueue_head(&dcc->discard_wait_queue);
2098         SM_I(sbi)->dcc_info = dcc;
2099 init_thread:
2100         err = f2fs_start_discard_thread(sbi);
2101         if (err) {
2102                 kfree(dcc);
2103                 SM_I(sbi)->dcc_info = NULL;
2104         }
2105
2106         return err;
2107 }
2108
2109 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2110 {
2111         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2112
2113         if (!dcc)
2114                 return;
2115
2116         f2fs_stop_discard_thread(sbi);
2117
2118         /*
2119          * Recovery can cache discard commands, so in error path of
2120          * fill_super(), it needs to give a chance to handle them.
2121          */
2122         f2fs_issue_discard_timeout(sbi);
2123
2124         kfree(dcc);
2125         SM_I(sbi)->dcc_info = NULL;
2126 }
2127
2128 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2129 {
2130         struct sit_info *sit_i = SIT_I(sbi);
2131
2132         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2133                 sit_i->dirty_sentries++;
2134                 return false;
2135         }
2136
2137         return true;
2138 }
2139
2140 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2141                                         unsigned int segno, int modified)
2142 {
2143         struct seg_entry *se = get_seg_entry(sbi, segno);
2144
2145         se->type = type;
2146         if (modified)
2147                 __mark_sit_entry_dirty(sbi, segno);
2148 }
2149
2150 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2151                                                                 block_t blkaddr)
2152 {
2153         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2154
2155         if (segno == NULL_SEGNO)
2156                 return 0;
2157         return get_seg_entry(sbi, segno)->mtime;
2158 }
2159
2160 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2161                                                 unsigned long long old_mtime)
2162 {
2163         struct seg_entry *se;
2164         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2165         unsigned long long ctime = get_mtime(sbi, false);
2166         unsigned long long mtime = old_mtime ? old_mtime : ctime;
2167
2168         if (segno == NULL_SEGNO)
2169                 return;
2170
2171         se = get_seg_entry(sbi, segno);
2172
2173         if (!se->mtime)
2174                 se->mtime = mtime;
2175         else
2176                 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2177                                                 se->valid_blocks + 1);
2178
2179         if (ctime > SIT_I(sbi)->max_mtime)
2180                 SIT_I(sbi)->max_mtime = ctime;
2181 }
2182
2183 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2184 {
2185         struct seg_entry *se;
2186         unsigned int segno, offset;
2187         long int new_vblocks;
2188         bool exist;
2189 #ifdef CONFIG_F2FS_CHECK_FS
2190         bool mir_exist;
2191 #endif
2192
2193         segno = GET_SEGNO(sbi, blkaddr);
2194
2195         se = get_seg_entry(sbi, segno);
2196         new_vblocks = se->valid_blocks + del;
2197         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2198
2199         f2fs_bug_on(sbi, (new_vblocks < 0 ||
2200                         (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2201
2202         se->valid_blocks = new_vblocks;
2203
2204         /* Update valid block bitmap */
2205         if (del > 0) {
2206                 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2207 #ifdef CONFIG_F2FS_CHECK_FS
2208                 mir_exist = f2fs_test_and_set_bit(offset,
2209                                                 se->cur_valid_map_mir);
2210                 if (unlikely(exist != mir_exist)) {
2211                         f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2212                                  blkaddr, exist);
2213                         f2fs_bug_on(sbi, 1);
2214                 }
2215 #endif
2216                 if (unlikely(exist)) {
2217                         f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2218                                  blkaddr);
2219                         f2fs_bug_on(sbi, 1);
2220                         se->valid_blocks--;
2221                         del = 0;
2222                 }
2223
2224                 if (f2fs_block_unit_discard(sbi) &&
2225                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2226                         sbi->discard_blks--;
2227
2228                 /*
2229                  * SSR should never reuse block which is checkpointed
2230                  * or newly invalidated.
2231                  */
2232                 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2233                         if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2234                                 se->ckpt_valid_blocks++;
2235                 }
2236         } else {
2237                 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2238 #ifdef CONFIG_F2FS_CHECK_FS
2239                 mir_exist = f2fs_test_and_clear_bit(offset,
2240                                                 se->cur_valid_map_mir);
2241                 if (unlikely(exist != mir_exist)) {
2242                         f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2243                                  blkaddr, exist);
2244                         f2fs_bug_on(sbi, 1);
2245                 }
2246 #endif
2247                 if (unlikely(!exist)) {
2248                         f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2249                                  blkaddr);
2250                         f2fs_bug_on(sbi, 1);
2251                         se->valid_blocks++;
2252                         del = 0;
2253                 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2254                         /*
2255                          * If checkpoints are off, we must not reuse data that
2256                          * was used in the previous checkpoint. If it was used
2257                          * before, we must track that to know how much space we
2258                          * really have.
2259                          */
2260                         if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2261                                 spin_lock(&sbi->stat_lock);
2262                                 sbi->unusable_block_count++;
2263                                 spin_unlock(&sbi->stat_lock);
2264                         }
2265                 }
2266
2267                 if (f2fs_block_unit_discard(sbi) &&
2268                         f2fs_test_and_clear_bit(offset, se->discard_map))
2269                         sbi->discard_blks++;
2270         }
2271         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2272                 se->ckpt_valid_blocks += del;
2273
2274         __mark_sit_entry_dirty(sbi, segno);
2275
2276         /* update total number of valid blocks to be written in ckpt area */
2277         SIT_I(sbi)->written_valid_blocks += del;
2278
2279         if (__is_large_section(sbi))
2280                 get_sec_entry(sbi, segno)->valid_blocks += del;
2281 }
2282
2283 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2284 {
2285         unsigned int segno = GET_SEGNO(sbi, addr);
2286         struct sit_info *sit_i = SIT_I(sbi);
2287
2288         f2fs_bug_on(sbi, addr == NULL_ADDR);
2289         if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2290                 return;
2291
2292         invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2293         f2fs_invalidate_compress_page(sbi, addr);
2294
2295         /* add it into sit main buffer */
2296         down_write(&sit_i->sentry_lock);
2297
2298         update_segment_mtime(sbi, addr, 0);
2299         update_sit_entry(sbi, addr, -1);
2300
2301         /* add it into dirty seglist */
2302         locate_dirty_segment(sbi, segno);
2303
2304         up_write(&sit_i->sentry_lock);
2305 }
2306
2307 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2308 {
2309         struct sit_info *sit_i = SIT_I(sbi);
2310         unsigned int segno, offset;
2311         struct seg_entry *se;
2312         bool is_cp = false;
2313
2314         if (!__is_valid_data_blkaddr(blkaddr))
2315                 return true;
2316
2317         down_read(&sit_i->sentry_lock);
2318
2319         segno = GET_SEGNO(sbi, blkaddr);
2320         se = get_seg_entry(sbi, segno);
2321         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2322
2323         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2324                 is_cp = true;
2325
2326         up_read(&sit_i->sentry_lock);
2327
2328         return is_cp;
2329 }
2330
2331 /*
2332  * This function should be resided under the curseg_mutex lock
2333  */
2334 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2335                                         struct f2fs_summary *sum)
2336 {
2337         struct curseg_info *curseg = CURSEG_I(sbi, type);
2338         void *addr = curseg->sum_blk;
2339
2340         addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2341         memcpy(addr, sum, sizeof(struct f2fs_summary));
2342 }
2343
2344 /*
2345  * Calculate the number of current summary pages for writing
2346  */
2347 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2348 {
2349         int valid_sum_count = 0;
2350         int i, sum_in_page;
2351
2352         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2353                 if (sbi->ckpt->alloc_type[i] == SSR)
2354                         valid_sum_count += sbi->blocks_per_seg;
2355                 else {
2356                         if (for_ra)
2357                                 valid_sum_count += le16_to_cpu(
2358                                         F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2359                         else
2360                                 valid_sum_count += curseg_blkoff(sbi, i);
2361                 }
2362         }
2363
2364         sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2365                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2366         if (valid_sum_count <= sum_in_page)
2367                 return 1;
2368         else if ((valid_sum_count - sum_in_page) <=
2369                 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2370                 return 2;
2371         return 3;
2372 }
2373
2374 /*
2375  * Caller should put this summary page
2376  */
2377 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2378 {
2379         if (unlikely(f2fs_cp_error(sbi)))
2380                 return ERR_PTR(-EIO);
2381         return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2382 }
2383
2384 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2385                                         void *src, block_t blk_addr)
2386 {
2387         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2388
2389         memcpy(page_address(page), src, PAGE_SIZE);
2390         set_page_dirty(page);
2391         f2fs_put_page(page, 1);
2392 }
2393
2394 static void write_sum_page(struct f2fs_sb_info *sbi,
2395                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
2396 {
2397         f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2398 }
2399
2400 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2401                                                 int type, block_t blk_addr)
2402 {
2403         struct curseg_info *curseg = CURSEG_I(sbi, type);
2404         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2405         struct f2fs_summary_block *src = curseg->sum_blk;
2406         struct f2fs_summary_block *dst;
2407
2408         dst = (struct f2fs_summary_block *)page_address(page);
2409         memset(dst, 0, PAGE_SIZE);
2410
2411         mutex_lock(&curseg->curseg_mutex);
2412
2413         down_read(&curseg->journal_rwsem);
2414         memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2415         up_read(&curseg->journal_rwsem);
2416
2417         memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2418         memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2419
2420         mutex_unlock(&curseg->curseg_mutex);
2421
2422         set_page_dirty(page);
2423         f2fs_put_page(page, 1);
2424 }
2425
2426 static int is_next_segment_free(struct f2fs_sb_info *sbi,
2427                                 struct curseg_info *curseg, int type)
2428 {
2429         unsigned int segno = curseg->segno + 1;
2430         struct free_segmap_info *free_i = FREE_I(sbi);
2431
2432         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2433                 return !test_bit(segno, free_i->free_segmap);
2434         return 0;
2435 }
2436
2437 /*
2438  * Find a new segment from the free segments bitmap to right order
2439  * This function should be returned with success, otherwise BUG
2440  */
2441 static void get_new_segment(struct f2fs_sb_info *sbi,
2442                         unsigned int *newseg, bool new_sec, int dir)
2443 {
2444         struct free_segmap_info *free_i = FREE_I(sbi);
2445         unsigned int segno, secno, zoneno;
2446         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2447         unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2448         unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2449         unsigned int left_start = hint;
2450         bool init = true;
2451         int go_left = 0;
2452         int i;
2453
2454         spin_lock(&free_i->segmap_lock);
2455
2456         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2457                 segno = find_next_zero_bit(free_i->free_segmap,
2458                         GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2459                 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2460                         goto got_it;
2461         }
2462 find_other_zone:
2463         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2464         if (secno >= MAIN_SECS(sbi)) {
2465                 if (dir == ALLOC_RIGHT) {
2466                         secno = find_first_zero_bit(free_i->free_secmap,
2467                                                         MAIN_SECS(sbi));
2468                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2469                 } else {
2470                         go_left = 1;
2471                         left_start = hint - 1;
2472                 }
2473         }
2474         if (go_left == 0)
2475                 goto skip_left;
2476
2477         while (test_bit(left_start, free_i->free_secmap)) {
2478                 if (left_start > 0) {
2479                         left_start--;
2480                         continue;
2481                 }
2482                 left_start = find_first_zero_bit(free_i->free_secmap,
2483                                                         MAIN_SECS(sbi));
2484                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2485                 break;
2486         }
2487         secno = left_start;
2488 skip_left:
2489         segno = GET_SEG_FROM_SEC(sbi, secno);
2490         zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2491
2492         /* give up on finding another zone */
2493         if (!init)
2494                 goto got_it;
2495         if (sbi->secs_per_zone == 1)
2496                 goto got_it;
2497         if (zoneno == old_zoneno)
2498                 goto got_it;
2499         if (dir == ALLOC_LEFT) {
2500                 if (!go_left && zoneno + 1 >= total_zones)
2501                         goto got_it;
2502                 if (go_left && zoneno == 0)
2503                         goto got_it;
2504         }
2505         for (i = 0; i < NR_CURSEG_TYPE; i++)
2506                 if (CURSEG_I(sbi, i)->zone == zoneno)
2507                         break;
2508
2509         if (i < NR_CURSEG_TYPE) {
2510                 /* zone is in user, try another */
2511                 if (go_left)
2512                         hint = zoneno * sbi->secs_per_zone - 1;
2513                 else if (zoneno + 1 >= total_zones)
2514                         hint = 0;
2515                 else
2516                         hint = (zoneno + 1) * sbi->secs_per_zone;
2517                 init = false;
2518                 goto find_other_zone;
2519         }
2520 got_it:
2521         /* set it as dirty segment in free segmap */
2522         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2523         __set_inuse(sbi, segno);
2524         *newseg = segno;
2525         spin_unlock(&free_i->segmap_lock);
2526 }
2527
2528 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2529 {
2530         struct curseg_info *curseg = CURSEG_I(sbi, type);
2531         struct summary_footer *sum_footer;
2532         unsigned short seg_type = curseg->seg_type;
2533
2534         curseg->inited = true;
2535         curseg->segno = curseg->next_segno;
2536         curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2537         curseg->next_blkoff = 0;
2538         curseg->next_segno = NULL_SEGNO;
2539
2540         sum_footer = &(curseg->sum_blk->footer);
2541         memset(sum_footer, 0, sizeof(struct summary_footer));
2542
2543         sanity_check_seg_type(sbi, seg_type);
2544
2545         if (IS_DATASEG(seg_type))
2546                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2547         if (IS_NODESEG(seg_type))
2548                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2549         __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2550 }
2551
2552 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2553 {
2554         struct curseg_info *curseg = CURSEG_I(sbi, type);
2555         unsigned short seg_type = curseg->seg_type;
2556
2557         sanity_check_seg_type(sbi, seg_type);
2558         if (f2fs_need_rand_seg(sbi))
2559                 return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
2560
2561         /* if segs_per_sec is large than 1, we need to keep original policy. */
2562         if (__is_large_section(sbi))
2563                 return curseg->segno;
2564
2565         /* inmem log may not locate on any segment after mount */
2566         if (!curseg->inited)
2567                 return 0;
2568
2569         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2570                 return 0;
2571
2572         if (test_opt(sbi, NOHEAP) &&
2573                 (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
2574                 return 0;
2575
2576         if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2577                 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2578
2579         /* find segments from 0 to reuse freed segments */
2580         if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2581                 return 0;
2582
2583         return curseg->segno;
2584 }
2585
2586 /*
2587  * Allocate a current working segment.
2588  * This function always allocates a free segment in LFS manner.
2589  */
2590 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2591 {
2592         struct curseg_info *curseg = CURSEG_I(sbi, type);
2593         unsigned short seg_type = curseg->seg_type;
2594         unsigned int segno = curseg->segno;
2595         int dir = ALLOC_LEFT;
2596
2597         if (curseg->inited)
2598                 write_sum_page(sbi, curseg->sum_blk,
2599                                 GET_SUM_BLOCK(sbi, segno));
2600         if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
2601                 dir = ALLOC_RIGHT;
2602
2603         if (test_opt(sbi, NOHEAP))
2604                 dir = ALLOC_RIGHT;
2605
2606         segno = __get_next_segno(sbi, type);
2607         get_new_segment(sbi, &segno, new_sec, dir);
2608         curseg->next_segno = segno;
2609         reset_curseg(sbi, type, 1);
2610         curseg->alloc_type = LFS;
2611         if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
2612                 curseg->fragment_remained_chunk =
2613                                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
2614 }
2615
2616 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
2617                                         int segno, block_t start)
2618 {
2619         struct seg_entry *se = get_seg_entry(sbi, segno);
2620         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2621         unsigned long *target_map = SIT_I(sbi)->tmp_map;
2622         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2623         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2624         int i;
2625
2626         for (i = 0; i < entries; i++)
2627                 target_map[i] = ckpt_map[i] | cur_map[i];
2628
2629         return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2630 }
2631
2632 /*
2633  * If a segment is written by LFS manner, next block offset is just obtained
2634  * by increasing the current block offset. However, if a segment is written by
2635  * SSR manner, next block offset obtained by calling __next_free_blkoff
2636  */
2637 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2638                                 struct curseg_info *seg)
2639 {
2640         if (seg->alloc_type == SSR) {
2641                 seg->next_blkoff =
2642                         __next_free_blkoff(sbi, seg->segno,
2643                                                 seg->next_blkoff + 1);
2644         } else {
2645                 seg->next_blkoff++;
2646                 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
2647                         /* To allocate block chunks in different sizes, use random number */
2648                         if (--seg->fragment_remained_chunk <= 0) {
2649                                 seg->fragment_remained_chunk =
2650                                    get_random_u32_inclusive(1, sbi->max_fragment_chunk);
2651                                 seg->next_blkoff +=
2652                                    get_random_u32_inclusive(1, sbi->max_fragment_hole);
2653                         }
2654                 }
2655         }
2656 }
2657
2658 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2659 {
2660         return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
2661 }
2662
2663 /*
2664  * This function always allocates a used segment(from dirty seglist) by SSR
2665  * manner, so it should recover the existing segment information of valid blocks
2666  */
2667 static void change_curseg(struct f2fs_sb_info *sbi, int type)
2668 {
2669         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2670         struct curseg_info *curseg = CURSEG_I(sbi, type);
2671         unsigned int new_segno = curseg->next_segno;
2672         struct f2fs_summary_block *sum_node;
2673         struct page *sum_page;
2674
2675         write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
2676
2677         __set_test_and_inuse(sbi, new_segno);
2678
2679         mutex_lock(&dirty_i->seglist_lock);
2680         __remove_dirty_segment(sbi, new_segno, PRE);
2681         __remove_dirty_segment(sbi, new_segno, DIRTY);
2682         mutex_unlock(&dirty_i->seglist_lock);
2683
2684         reset_curseg(sbi, type, 1);
2685         curseg->alloc_type = SSR;
2686         curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
2687
2688         sum_page = f2fs_get_sum_page(sbi, new_segno);
2689         if (IS_ERR(sum_page)) {
2690                 /* GC won't be able to use stale summary pages by cp_error */
2691                 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2692                 return;
2693         }
2694         sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2695         memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2696         f2fs_put_page(sum_page, 1);
2697 }
2698
2699 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2700                                 int alloc_mode, unsigned long long age);
2701
2702 static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2703                                         int target_type, int alloc_mode,
2704                                         unsigned long long age)
2705 {
2706         struct curseg_info *curseg = CURSEG_I(sbi, type);
2707
2708         curseg->seg_type = target_type;
2709
2710         if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2711                 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2712
2713                 curseg->seg_type = se->type;
2714                 change_curseg(sbi, type);
2715         } else {
2716                 /* allocate cold segment by default */
2717                 curseg->seg_type = CURSEG_COLD_DATA;
2718                 new_curseg(sbi, type, true);
2719         }
2720         stat_inc_seg_type(sbi, curseg);
2721 }
2722
2723 static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
2724 {
2725         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2726
2727         if (!sbi->am.atgc_enabled)
2728                 return;
2729
2730         f2fs_down_read(&SM_I(sbi)->curseg_lock);
2731
2732         mutex_lock(&curseg->curseg_mutex);
2733         down_write(&SIT_I(sbi)->sentry_lock);
2734
2735         get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
2736
2737         up_write(&SIT_I(sbi)->sentry_lock);
2738         mutex_unlock(&curseg->curseg_mutex);
2739
2740         f2fs_up_read(&SM_I(sbi)->curseg_lock);
2741
2742 }
2743 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
2744 {
2745         __f2fs_init_atgc_curseg(sbi);
2746 }
2747
2748 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2749 {
2750         struct curseg_info *curseg = CURSEG_I(sbi, type);
2751
2752         mutex_lock(&curseg->curseg_mutex);
2753         if (!curseg->inited)
2754                 goto out;
2755
2756         if (get_valid_blocks(sbi, curseg->segno, false)) {
2757                 write_sum_page(sbi, curseg->sum_blk,
2758                                 GET_SUM_BLOCK(sbi, curseg->segno));
2759         } else {
2760                 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2761                 __set_test_and_free(sbi, curseg->segno, true);
2762                 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2763         }
2764 out:
2765         mutex_unlock(&curseg->curseg_mutex);
2766 }
2767
2768 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
2769 {
2770         __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2771
2772         if (sbi->am.atgc_enabled)
2773                 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2774 }
2775
2776 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2777 {
2778         struct curseg_info *curseg = CURSEG_I(sbi, type);
2779
2780         mutex_lock(&curseg->curseg_mutex);
2781         if (!curseg->inited)
2782                 goto out;
2783         if (get_valid_blocks(sbi, curseg->segno, false))
2784                 goto out;
2785
2786         mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2787         __set_test_and_inuse(sbi, curseg->segno);
2788         mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2789 out:
2790         mutex_unlock(&curseg->curseg_mutex);
2791 }
2792
2793 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
2794 {
2795         __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2796
2797         if (sbi->am.atgc_enabled)
2798                 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2799 }
2800
2801 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2802                                 int alloc_mode, unsigned long long age)
2803 {
2804         struct curseg_info *curseg = CURSEG_I(sbi, type);
2805         const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2806         unsigned segno = NULL_SEGNO;
2807         unsigned short seg_type = curseg->seg_type;
2808         int i, cnt;
2809         bool reversed = false;
2810
2811         sanity_check_seg_type(sbi, seg_type);
2812
2813         /* f2fs_need_SSR() already forces to do this */
2814         if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
2815                 curseg->next_segno = segno;
2816                 return 1;
2817         }
2818
2819         /* For node segments, let's do SSR more intensively */
2820         if (IS_NODESEG(seg_type)) {
2821                 if (seg_type >= CURSEG_WARM_NODE) {
2822                         reversed = true;
2823                         i = CURSEG_COLD_NODE;
2824                 } else {
2825                         i = CURSEG_HOT_NODE;
2826                 }
2827                 cnt = NR_CURSEG_NODE_TYPE;
2828         } else {
2829                 if (seg_type >= CURSEG_WARM_DATA) {
2830                         reversed = true;
2831                         i = CURSEG_COLD_DATA;
2832                 } else {
2833                         i = CURSEG_HOT_DATA;
2834                 }
2835                 cnt = NR_CURSEG_DATA_TYPE;
2836         }
2837
2838         for (; cnt-- > 0; reversed ? i-- : i++) {
2839                 if (i == seg_type)
2840                         continue;
2841                 if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
2842                         curseg->next_segno = segno;
2843                         return 1;
2844                 }
2845         }
2846
2847         /* find valid_blocks=0 in dirty list */
2848         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2849                 segno = get_free_segment(sbi);
2850                 if (segno != NULL_SEGNO) {
2851                         curseg->next_segno = segno;
2852                         return 1;
2853                 }
2854         }
2855         return 0;
2856 }
2857
2858 static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
2859 {
2860         struct curseg_info *curseg = CURSEG_I(sbi, type);
2861
2862         if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2863             curseg->seg_type == CURSEG_WARM_NODE)
2864                 return true;
2865         if (curseg->alloc_type == LFS &&
2866             is_next_segment_free(sbi, curseg, type) &&
2867             likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2868                 return true;
2869         if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
2870                 return true;
2871         return false;
2872 }
2873
2874 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
2875                                         unsigned int start, unsigned int end)
2876 {
2877         struct curseg_info *curseg = CURSEG_I(sbi, type);
2878         unsigned int segno;
2879
2880         f2fs_down_read(&SM_I(sbi)->curseg_lock);
2881         mutex_lock(&curseg->curseg_mutex);
2882         down_write(&SIT_I(sbi)->sentry_lock);
2883
2884         segno = CURSEG_I(sbi, type)->segno;
2885         if (segno < start || segno > end)
2886                 goto unlock;
2887
2888         if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
2889                 change_curseg(sbi, type);
2890         else
2891                 new_curseg(sbi, type, true);
2892
2893         stat_inc_seg_type(sbi, curseg);
2894
2895         locate_dirty_segment(sbi, segno);
2896 unlock:
2897         up_write(&SIT_I(sbi)->sentry_lock);
2898
2899         if (segno != curseg->segno)
2900                 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
2901                             type, segno, curseg->segno);
2902
2903         mutex_unlock(&curseg->curseg_mutex);
2904         f2fs_up_read(&SM_I(sbi)->curseg_lock);
2905 }
2906
2907 static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
2908                                                 bool new_sec, bool force)
2909 {
2910         struct curseg_info *curseg = CURSEG_I(sbi, type);
2911         unsigned int old_segno;
2912
2913         if (!curseg->inited)
2914                 goto alloc;
2915
2916         if (force || curseg->next_blkoff ||
2917                 get_valid_blocks(sbi, curseg->segno, new_sec))
2918                 goto alloc;
2919
2920         if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
2921                 return;
2922 alloc:
2923         old_segno = curseg->segno;
2924         new_curseg(sbi, type, true);
2925         stat_inc_seg_type(sbi, curseg);
2926         locate_dirty_segment(sbi, old_segno);
2927 }
2928
2929 static void __allocate_new_section(struct f2fs_sb_info *sbi,
2930                                                 int type, bool force)
2931 {
2932         __allocate_new_segment(sbi, type, true, force);
2933 }
2934
2935 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
2936 {
2937         f2fs_down_read(&SM_I(sbi)->curseg_lock);
2938         down_write(&SIT_I(sbi)->sentry_lock);
2939         __allocate_new_section(sbi, type, force);
2940         up_write(&SIT_I(sbi)->sentry_lock);
2941         f2fs_up_read(&SM_I(sbi)->curseg_lock);
2942 }
2943
2944 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
2945 {
2946         int i;
2947
2948         f2fs_down_read(&SM_I(sbi)->curseg_lock);
2949         down_write(&SIT_I(sbi)->sentry_lock);
2950         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
2951                 __allocate_new_segment(sbi, i, false, false);
2952         up_write(&SIT_I(sbi)->sentry_lock);
2953         f2fs_up_read(&SM_I(sbi)->curseg_lock);
2954 }
2955
2956 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
2957                                                 struct cp_control *cpc)
2958 {
2959         __u64 trim_start = cpc->trim_start;
2960         bool has_candidate = false;
2961
2962         down_write(&SIT_I(sbi)->sentry_lock);
2963         for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2964                 if (add_discard_addrs(sbi, cpc, true)) {
2965                         has_candidate = true;
2966                         break;
2967                 }
2968         }
2969         up_write(&SIT_I(sbi)->sentry_lock);
2970
2971         cpc->trim_start = trim_start;
2972         return has_candidate;
2973 }
2974
2975 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2976                                         struct discard_policy *dpolicy,
2977                                         unsigned int start, unsigned int end)
2978 {
2979         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2980         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
2981         struct rb_node **insert_p = NULL, *insert_parent = NULL;
2982         struct discard_cmd *dc;
2983         struct blk_plug plug;
2984         int issued;
2985         unsigned int trimmed = 0;
2986
2987 next:
2988         issued = 0;
2989
2990         mutex_lock(&dcc->cmd_lock);
2991         if (unlikely(dcc->rbtree_check))
2992                 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
2993                                                         &dcc->root, false));
2994
2995         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2996                                         NULL, start,
2997                                         (struct rb_entry **)&prev_dc,
2998                                         (struct rb_entry **)&next_dc,
2999                                         &insert_p, &insert_parent, true, NULL);
3000         if (!dc)
3001                 dc = next_dc;
3002
3003         blk_start_plug(&plug);
3004
3005         while (dc && dc->lstart <= end) {
3006                 struct rb_node *node;
3007                 int err = 0;
3008
3009                 if (dc->len < dpolicy->granularity)
3010                         goto skip;
3011
3012                 if (dc->state != D_PREP) {
3013                         list_move_tail(&dc->list, &dcc->fstrim_list);
3014                         goto skip;
3015                 }
3016
3017                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
3018
3019                 if (issued >= dpolicy->max_requests) {
3020                         start = dc->lstart + dc->len;
3021
3022                         if (err)
3023                                 __remove_discard_cmd(sbi, dc);
3024
3025                         blk_finish_plug(&plug);
3026                         mutex_unlock(&dcc->cmd_lock);
3027                         trimmed += __wait_all_discard_cmd(sbi, NULL);
3028                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
3029                         goto next;
3030                 }
3031 skip:
3032                 node = rb_next(&dc->rb_node);
3033                 if (err)
3034                         __remove_discard_cmd(sbi, dc);
3035                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3036
3037                 if (fatal_signal_pending(current))
3038                         break;
3039         }
3040
3041         blk_finish_plug(&plug);
3042         mutex_unlock(&dcc->cmd_lock);
3043
3044         return trimmed;
3045 }
3046
3047 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3048 {
3049         __u64 start = F2FS_BYTES_TO_BLK(range->start);
3050         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3051         unsigned int start_segno, end_segno;
3052         block_t start_block, end_block;
3053         struct cp_control cpc;
3054         struct discard_policy dpolicy;
3055         unsigned long long trimmed = 0;
3056         int err = 0;
3057         bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3058
3059         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3060                 return -EINVAL;
3061
3062         if (end < MAIN_BLKADDR(sbi))
3063                 goto out;
3064
3065         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3066                 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3067                 return -EFSCORRUPTED;
3068         }
3069
3070         /* start/end segment number in main_area */
3071         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3072         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3073                                                 GET_SEGNO(sbi, end);
3074         if (need_align) {
3075                 start_segno = rounddown(start_segno, sbi->segs_per_sec);
3076                 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
3077         }
3078
3079         cpc.reason = CP_DISCARD;
3080         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3081         cpc.trim_start = start_segno;
3082         cpc.trim_end = end_segno;
3083
3084         if (sbi->discard_blks == 0)
3085                 goto out;
3086
3087         f2fs_down_write(&sbi->gc_lock);
3088         err = f2fs_write_checkpoint(sbi, &cpc);
3089         f2fs_up_write(&sbi->gc_lock);
3090         if (err)
3091                 goto out;
3092
3093         /*
3094          * We filed discard candidates, but actually we don't need to wait for
3095          * all of them, since they'll be issued in idle time along with runtime
3096          * discard option. User configuration looks like using runtime discard
3097          * or periodic fstrim instead of it.
3098          */
3099         if (f2fs_realtime_discard_enable(sbi))
3100                 goto out;
3101
3102         start_block = START_BLOCK(sbi, start_segno);
3103         end_block = START_BLOCK(sbi, end_segno + 1);
3104
3105         __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3106         trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3107                                         start_block, end_block);
3108
3109         trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3110                                         start_block, end_block);
3111 out:
3112         if (!err)
3113                 range->len = F2FS_BLK_TO_BYTES(trimmed);
3114         return err;
3115 }
3116
3117 static bool __has_curseg_space(struct f2fs_sb_info *sbi,
3118                                         struct curseg_info *curseg)
3119 {
3120         return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
3121                                                         curseg->segno);
3122 }
3123
3124 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
3125 {
3126         switch (hint) {
3127         case WRITE_LIFE_SHORT:
3128                 return CURSEG_HOT_DATA;
3129         case WRITE_LIFE_EXTREME:
3130                 return CURSEG_COLD_DATA;
3131         default:
3132                 return CURSEG_WARM_DATA;
3133         }
3134 }
3135
3136 static int __get_segment_type_2(struct f2fs_io_info *fio)
3137 {
3138         if (fio->type == DATA)
3139                 return CURSEG_HOT_DATA;
3140         else
3141                 return CURSEG_HOT_NODE;
3142 }
3143
3144 static int __get_segment_type_4(struct f2fs_io_info *fio)
3145 {
3146         if (fio->type == DATA) {
3147                 struct inode *inode = fio->page->mapping->host;
3148
3149                 if (S_ISDIR(inode->i_mode))
3150                         return CURSEG_HOT_DATA;
3151                 else
3152                         return CURSEG_COLD_DATA;
3153         } else {
3154                 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3155                         return CURSEG_WARM_NODE;
3156                 else
3157                         return CURSEG_COLD_NODE;
3158         }
3159 }
3160
3161 static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
3162 {
3163         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3164         struct extent_info ei;
3165
3166         if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
3167                 if (!ei.age)
3168                         return NO_CHECK_TYPE;
3169                 if (ei.age <= sbi->hot_data_age_threshold)
3170                         return CURSEG_HOT_DATA;
3171                 if (ei.age <= sbi->warm_data_age_threshold)
3172                         return CURSEG_WARM_DATA;
3173                 return CURSEG_COLD_DATA;
3174         }
3175         return NO_CHECK_TYPE;
3176 }
3177
3178 static int __get_segment_type_6(struct f2fs_io_info *fio)
3179 {
3180         if (fio->type == DATA) {
3181                 struct inode *inode = fio->page->mapping->host;
3182                 int type;
3183
3184                 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
3185                         return CURSEG_COLD_DATA_PINNED;
3186
3187                 if (page_private_gcing(fio->page)) {
3188                         if (fio->sbi->am.atgc_enabled &&
3189                                 (fio->io_type == FS_DATA_IO) &&
3190                                 (fio->sbi->gc_mode != GC_URGENT_HIGH))
3191                                 return CURSEG_ALL_DATA_ATGC;
3192                         else
3193                                 return CURSEG_COLD_DATA;
3194                 }
3195                 if (file_is_cold(inode) || f2fs_need_compress_data(inode))
3196                         return CURSEG_COLD_DATA;
3197
3198                 type = __get_age_segment_type(inode, fio->page->index);
3199                 if (type != NO_CHECK_TYPE)
3200                         return type;
3201
3202                 if (file_is_hot(inode) ||
3203                                 is_inode_flag_set(inode, FI_HOT_DATA) ||
3204                                 f2fs_is_cow_file(inode))
3205                         return CURSEG_HOT_DATA;
3206                 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3207         } else {
3208                 if (IS_DNODE(fio->page))
3209                         return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3210                                                 CURSEG_HOT_NODE;
3211                 return CURSEG_COLD_NODE;
3212         }
3213 }
3214
3215 static int __get_segment_type(struct f2fs_io_info *fio)
3216 {
3217         int type = 0;
3218
3219         switch (F2FS_OPTION(fio->sbi).active_logs) {
3220         case 2:
3221                 type = __get_segment_type_2(fio);
3222                 break;
3223         case 4:
3224                 type = __get_segment_type_4(fio);
3225                 break;
3226         case 6:
3227                 type = __get_segment_type_6(fio);
3228                 break;
3229         default:
3230                 f2fs_bug_on(fio->sbi, true);
3231         }
3232
3233         if (IS_HOT(type))
3234                 fio->temp = HOT;
3235         else if (IS_WARM(type))
3236                 fio->temp = WARM;
3237         else
3238                 fio->temp = COLD;
3239         return type;
3240 }
3241
3242 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3243                 block_t old_blkaddr, block_t *new_blkaddr,
3244                 struct f2fs_summary *sum, int type,
3245                 struct f2fs_io_info *fio)
3246 {
3247         struct sit_info *sit_i = SIT_I(sbi);
3248         struct curseg_info *curseg = CURSEG_I(sbi, type);
3249         unsigned long long old_mtime;
3250         bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3251         struct seg_entry *se = NULL;
3252
3253         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3254
3255         mutex_lock(&curseg->curseg_mutex);
3256         down_write(&sit_i->sentry_lock);
3257
3258         if (from_gc) {
3259                 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3260                 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3261                 sanity_check_seg_type(sbi, se->type);
3262                 f2fs_bug_on(sbi, IS_NODESEG(se->type));
3263         }
3264         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3265
3266         f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
3267
3268         f2fs_wait_discard_bio(sbi, *new_blkaddr);
3269
3270         /*
3271          * __add_sum_entry should be resided under the curseg_mutex
3272          * because, this function updates a summary entry in the
3273          * current summary block.
3274          */
3275         __add_sum_entry(sbi, type, sum);
3276
3277         __refresh_next_blkoff(sbi, curseg);
3278
3279         stat_inc_block_count(sbi, curseg);
3280
3281         if (from_gc) {
3282                 old_mtime = get_segment_mtime(sbi, old_blkaddr);
3283         } else {
3284                 update_segment_mtime(sbi, old_blkaddr, 0);
3285                 old_mtime = 0;
3286         }
3287         update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3288
3289         /*
3290          * SIT information should be updated before segment allocation,
3291          * since SSR needs latest valid block information.
3292          */
3293         update_sit_entry(sbi, *new_blkaddr, 1);
3294         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3295                 update_sit_entry(sbi, old_blkaddr, -1);
3296
3297         if (!__has_curseg_space(sbi, curseg)) {
3298                 /*
3299                  * Flush out current segment and replace it with new segment.
3300                  */
3301                 if (from_gc) {
3302                         get_atssr_segment(sbi, type, se->type,
3303                                                 AT_SSR, se->mtime);
3304                 } else {
3305                         if (need_new_seg(sbi, type))
3306                                 new_curseg(sbi, type, false);
3307                         else
3308                                 change_curseg(sbi, type);
3309                         stat_inc_seg_type(sbi, curseg);
3310                 }
3311         }
3312         /*
3313          * segment dirty status should be updated after segment allocation,
3314          * so we just need to update status only one time after previous
3315          * segment being closed.
3316          */
3317         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3318         locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3319
3320         if (IS_DATASEG(type))
3321                 atomic64_inc(&sbi->allocated_data_blocks);
3322
3323         up_write(&sit_i->sentry_lock);
3324
3325         if (page && IS_NODESEG(type)) {
3326                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3327
3328                 f2fs_inode_chksum_set(sbi, page);
3329         }
3330
3331         if (fio) {
3332                 struct f2fs_bio_info *io;
3333
3334                 if (F2FS_IO_ALIGNED(sbi))
3335                         fio->retry = false;
3336
3337                 INIT_LIST_HEAD(&fio->list);
3338                 fio->in_list = true;
3339                 io = sbi->write_io[fio->type] + fio->temp;
3340                 spin_lock(&io->io_lock);
3341                 list_add_tail(&fio->list, &io->io_list);
3342                 spin_unlock(&io->io_lock);
3343         }
3344
3345         mutex_unlock(&curseg->curseg_mutex);
3346
3347         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3348 }
3349
3350 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3351                                         block_t blkaddr, unsigned int blkcnt)
3352 {
3353         if (!f2fs_is_multi_device(sbi))
3354                 return;
3355
3356         while (1) {
3357                 unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
3358                 unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
3359
3360                 /* update device state for fsync */
3361                 f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
3362
3363                 /* update device state for checkpoint */
3364                 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3365                         spin_lock(&sbi->dev_lock);
3366                         f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3367                         spin_unlock(&sbi->dev_lock);
3368                 }
3369
3370                 if (blkcnt <= blks)
3371                         break;
3372                 blkcnt -= blks;
3373                 blkaddr += blks;
3374         }
3375 }
3376
3377 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3378 {
3379         int type = __get_segment_type(fio);
3380         bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3381
3382         if (keep_order)
3383                 f2fs_down_read(&fio->sbi->io_order_lock);
3384 reallocate:
3385         f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3386                         &fio->new_blkaddr, sum, type, fio);
3387         if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) {
3388                 invalidate_mapping_pages(META_MAPPING(fio->sbi),
3389                                         fio->old_blkaddr, fio->old_blkaddr);
3390                 f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr);
3391         }
3392
3393         /* writeout dirty page into bdev */
3394         f2fs_submit_page_write(fio);
3395         if (fio->retry) {
3396                 fio->old_blkaddr = fio->new_blkaddr;
3397                 goto reallocate;
3398         }
3399
3400         f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
3401
3402         if (keep_order)
3403                 f2fs_up_read(&fio->sbi->io_order_lock);
3404 }
3405
3406 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3407                                         enum iostat_type io_type)
3408 {
3409         struct f2fs_io_info fio = {
3410                 .sbi = sbi,
3411                 .type = META,
3412                 .temp = HOT,
3413                 .op = REQ_OP_WRITE,
3414                 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3415                 .old_blkaddr = page->index,
3416                 .new_blkaddr = page->index,
3417                 .page = page,
3418                 .encrypted_page = NULL,
3419                 .in_list = false,
3420         };
3421
3422         if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3423                 fio.op_flags &= ~REQ_META;
3424
3425         set_page_writeback(page);
3426         ClearPageError(page);
3427         f2fs_submit_page_write(&fio);
3428
3429         stat_inc_meta_count(sbi, page->index);
3430         f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
3431 }
3432
3433 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3434 {
3435         struct f2fs_summary sum;
3436
3437         set_summary(&sum, nid, 0, 0);
3438         do_write_page(&sum, fio);
3439
3440         f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
3441 }
3442
3443 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3444                                         struct f2fs_io_info *fio)
3445 {
3446         struct f2fs_sb_info *sbi = fio->sbi;
3447         struct f2fs_summary sum;
3448
3449         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3450         if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
3451                 f2fs_update_age_extent_cache(dn);
3452         set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3453         do_write_page(&sum, fio);
3454         f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3455
3456         f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
3457 }
3458
3459 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3460 {
3461         int err;
3462         struct f2fs_sb_info *sbi = fio->sbi;
3463         unsigned int segno;
3464
3465         fio->new_blkaddr = fio->old_blkaddr;
3466         /* i/o temperature is needed for passing down write hints */
3467         __get_segment_type(fio);
3468
3469         segno = GET_SEGNO(sbi, fio->new_blkaddr);
3470
3471         if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3472                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3473                 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3474                           __func__, segno);
3475                 err = -EFSCORRUPTED;
3476                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
3477                 goto drop_bio;
3478         }
3479
3480         if (f2fs_cp_error(sbi)) {
3481                 err = -EIO;
3482                 goto drop_bio;
3483         }
3484
3485         if (fio->post_read)
3486                 invalidate_mapping_pages(META_MAPPING(sbi),
3487                                 fio->new_blkaddr, fio->new_blkaddr);
3488
3489         stat_inc_inplace_blocks(fio->sbi);
3490
3491         if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
3492                 err = f2fs_merge_page_bio(fio);
3493         else
3494                 err = f2fs_submit_page_bio(fio);
3495         if (!err) {
3496                 f2fs_update_device_state(fio->sbi, fio->ino,
3497                                                 fio->new_blkaddr, 1);
3498                 f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
3499                                                 fio->io_type, F2FS_BLKSIZE);
3500         }
3501
3502         return err;
3503 drop_bio:
3504         if (fio->bio && *(fio->bio)) {
3505                 struct bio *bio = *(fio->bio);
3506
3507                 bio->bi_status = BLK_STS_IOERR;
3508                 bio_endio(bio);
3509                 *(fio->bio) = NULL;
3510         }
3511         return err;
3512 }
3513
3514 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3515                                                 unsigned int segno)
3516 {
3517         int i;
3518
3519         for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3520                 if (CURSEG_I(sbi, i)->segno == segno)
3521                         break;
3522         }
3523         return i;
3524 }
3525
3526 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3527                                 block_t old_blkaddr, block_t new_blkaddr,
3528                                 bool recover_curseg, bool recover_newaddr,
3529                                 bool from_gc)
3530 {
3531         struct sit_info *sit_i = SIT_I(sbi);
3532         struct curseg_info *curseg;
3533         unsigned int segno, old_cursegno;
3534         struct seg_entry *se;
3535         int type;
3536         unsigned short old_blkoff;
3537         unsigned char old_alloc_type;
3538
3539         segno = GET_SEGNO(sbi, new_blkaddr);
3540         se = get_seg_entry(sbi, segno);
3541         type = se->type;
3542
3543         f2fs_down_write(&SM_I(sbi)->curseg_lock);
3544
3545         if (!recover_curseg) {
3546                 /* for recovery flow */
3547                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3548                         if (old_blkaddr == NULL_ADDR)
3549                                 type = CURSEG_COLD_DATA;
3550                         else
3551                                 type = CURSEG_WARM_DATA;
3552                 }
3553         } else {
3554                 if (IS_CURSEG(sbi, segno)) {
3555                         /* se->type is volatile as SSR allocation */
3556                         type = __f2fs_get_curseg(sbi, segno);
3557                         f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3558                 } else {
3559                         type = CURSEG_WARM_DATA;
3560                 }
3561         }
3562
3563         f2fs_bug_on(sbi, !IS_DATASEG(type));
3564         curseg = CURSEG_I(sbi, type);
3565
3566         mutex_lock(&curseg->curseg_mutex);
3567         down_write(&sit_i->sentry_lock);
3568
3569         old_cursegno = curseg->segno;
3570         old_blkoff = curseg->next_blkoff;
3571         old_alloc_type = curseg->alloc_type;
3572
3573         /* change the current segment */
3574         if (segno != curseg->segno) {
3575                 curseg->next_segno = segno;
3576                 change_curseg(sbi, type);
3577         }
3578
3579         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3580         __add_sum_entry(sbi, type, sum);
3581
3582         if (!recover_curseg || recover_newaddr) {
3583                 if (!from_gc)
3584                         update_segment_mtime(sbi, new_blkaddr, 0);
3585                 update_sit_entry(sbi, new_blkaddr, 1);
3586         }
3587         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3588                 invalidate_mapping_pages(META_MAPPING(sbi),
3589                                         old_blkaddr, old_blkaddr);
3590                 f2fs_invalidate_compress_page(sbi, old_blkaddr);
3591                 if (!from_gc)
3592                         update_segment_mtime(sbi, old_blkaddr, 0);
3593                 update_sit_entry(sbi, old_blkaddr, -1);
3594         }
3595
3596         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3597         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3598
3599         locate_dirty_segment(sbi, old_cursegno);
3600
3601         if (recover_curseg) {
3602                 if (old_cursegno != curseg->segno) {
3603                         curseg->next_segno = old_cursegno;
3604                         change_curseg(sbi, type);
3605                 }
3606                 curseg->next_blkoff = old_blkoff;
3607                 curseg->alloc_type = old_alloc_type;
3608         }
3609
3610         up_write(&sit_i->sentry_lock);
3611         mutex_unlock(&curseg->curseg_mutex);
3612         f2fs_up_write(&SM_I(sbi)->curseg_lock);
3613 }
3614
3615 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3616                                 block_t old_addr, block_t new_addr,
3617                                 unsigned char version, bool recover_curseg,
3618                                 bool recover_newaddr)
3619 {
3620         struct f2fs_summary sum;
3621
3622         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3623
3624         f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3625                                         recover_curseg, recover_newaddr, false);
3626
3627         f2fs_update_data_blkaddr(dn, new_addr);
3628 }
3629
3630 void f2fs_wait_on_page_writeback(struct page *page,
3631                                 enum page_type type, bool ordered, bool locked)
3632 {
3633         if (PageWriteback(page)) {
3634                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3635
3636                 /* submit cached LFS IO */
3637                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3638                 /* sbumit cached IPU IO */
3639                 f2fs_submit_merged_ipu_write(sbi, NULL, page);
3640                 if (ordered) {
3641                         wait_on_page_writeback(page);
3642                         f2fs_bug_on(sbi, locked && PageWriteback(page));
3643                 } else {
3644                         wait_for_stable_page(page);
3645                 }
3646         }
3647 }
3648
3649 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3650 {
3651         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3652         struct page *cpage;
3653
3654         if (!f2fs_post_read_required(inode))
3655                 return;
3656
3657         if (!__is_valid_data_blkaddr(blkaddr))
3658                 return;
3659
3660         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3661         if (cpage) {
3662                 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3663                 f2fs_put_page(cpage, 1);
3664         }
3665 }
3666
3667 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3668                                                                 block_t len)
3669 {
3670         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3671         block_t i;
3672
3673         if (!f2fs_post_read_required(inode))
3674                 return;
3675
3676         for (i = 0; i < len; i++)
3677                 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3678
3679         invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
3680 }
3681
3682 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3683 {
3684         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3685         struct curseg_info *seg_i;
3686         unsigned char *kaddr;
3687         struct page *page;
3688         block_t start;
3689         int i, j, offset;
3690
3691         start = start_sum_block(sbi);
3692
3693         page = f2fs_get_meta_page(sbi, start++);
3694         if (IS_ERR(page))
3695                 return PTR_ERR(page);
3696         kaddr = (unsigned char *)page_address(page);
3697
3698         /* Step 1: restore nat cache */
3699         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3700         memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3701
3702         /* Step 2: restore sit cache */
3703         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3704         memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3705         offset = 2 * SUM_JOURNAL_SIZE;
3706
3707         /* Step 3: restore summary entries */
3708         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3709                 unsigned short blk_off;
3710                 unsigned int segno;
3711
3712                 seg_i = CURSEG_I(sbi, i);
3713                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3714                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3715                 seg_i->next_segno = segno;
3716                 reset_curseg(sbi, i, 0);
3717                 seg_i->alloc_type = ckpt->alloc_type[i];
3718                 seg_i->next_blkoff = blk_off;
3719
3720                 if (seg_i->alloc_type == SSR)
3721                         blk_off = sbi->blocks_per_seg;
3722
3723                 for (j = 0; j < blk_off; j++) {
3724                         struct f2fs_summary *s;
3725
3726                         s = (struct f2fs_summary *)(kaddr + offset);
3727                         seg_i->sum_blk->entries[j] = *s;
3728                         offset += SUMMARY_SIZE;
3729                         if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3730                                                 SUM_FOOTER_SIZE)
3731                                 continue;
3732
3733                         f2fs_put_page(page, 1);
3734                         page = NULL;
3735
3736                         page = f2fs_get_meta_page(sbi, start++);
3737                         if (IS_ERR(page))
3738                                 return PTR_ERR(page);
3739                         kaddr = (unsigned char *)page_address(page);
3740                         offset = 0;
3741                 }
3742         }
3743         f2fs_put_page(page, 1);
3744         return 0;
3745 }
3746
3747 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3748 {
3749         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3750         struct f2fs_summary_block *sum;
3751         struct curseg_info *curseg;
3752         struct page *new;
3753         unsigned short blk_off;
3754         unsigned int segno = 0;
3755         block_t blk_addr = 0;
3756         int err = 0;
3757
3758         /* get segment number and block addr */
3759         if (IS_DATASEG(type)) {
3760                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3761                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3762                                                         CURSEG_HOT_DATA]);
3763                 if (__exist_node_summaries(sbi))
3764                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
3765                 else
3766                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3767         } else {
3768                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3769                                                         CURSEG_HOT_NODE]);
3770                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3771                                                         CURSEG_HOT_NODE]);
3772                 if (__exist_node_summaries(sbi))
3773                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3774                                                         type - CURSEG_HOT_NODE);
3775                 else
3776                         blk_addr = GET_SUM_BLOCK(sbi, segno);
3777         }
3778
3779         new = f2fs_get_meta_page(sbi, blk_addr);
3780         if (IS_ERR(new))
3781                 return PTR_ERR(new);
3782         sum = (struct f2fs_summary_block *)page_address(new);
3783
3784         if (IS_NODESEG(type)) {
3785                 if (__exist_node_summaries(sbi)) {
3786                         struct f2fs_summary *ns = &sum->entries[0];
3787                         int i;
3788
3789                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3790                                 ns->version = 0;
3791                                 ns->ofs_in_node = 0;
3792                         }
3793                 } else {
3794                         err = f2fs_restore_node_summary(sbi, segno, sum);
3795                         if (err)
3796                                 goto out;
3797                 }
3798         }
3799
3800         /* set uncompleted segment to curseg */
3801         curseg = CURSEG_I(sbi, type);
3802         mutex_lock(&curseg->curseg_mutex);
3803
3804         /* update journal info */
3805         down_write(&curseg->journal_rwsem);
3806         memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3807         up_write(&curseg->journal_rwsem);
3808
3809         memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3810         memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3811         curseg->next_segno = segno;
3812         reset_curseg(sbi, type, 0);
3813         curseg->alloc_type = ckpt->alloc_type[type];
3814         curseg->next_blkoff = blk_off;
3815         mutex_unlock(&curseg->curseg_mutex);
3816 out:
3817         f2fs_put_page(new, 1);
3818         return err;
3819 }
3820
3821 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3822 {
3823         struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3824         struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3825         int type = CURSEG_HOT_DATA;
3826         int err;
3827
3828         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3829                 int npages = f2fs_npages_for_summary_flush(sbi, true);
3830
3831                 if (npages >= 2)
3832                         f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3833                                                         META_CP, true);
3834
3835                 /* restore for compacted data summary */
3836                 err = read_compacted_summaries(sbi);
3837                 if (err)
3838                         return err;
3839                 type = CURSEG_HOT_NODE;
3840         }
3841
3842         if (__exist_node_summaries(sbi))
3843                 f2fs_ra_meta_pages(sbi,
3844                                 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
3845                                 NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
3846
3847         for (; type <= CURSEG_COLD_NODE; type++) {
3848                 err = read_normal_summaries(sbi, type);
3849                 if (err)
3850                         return err;
3851         }
3852
3853         /* sanity check for summary blocks */
3854         if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
3855                         sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
3856                 f2fs_err(sbi, "invalid journal entries nats %u sits %u",
3857                          nats_in_cursum(nat_j), sits_in_cursum(sit_j));
3858                 return -EINVAL;
3859         }
3860
3861         return 0;
3862 }
3863
3864 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3865 {
3866         struct page *page;
3867         unsigned char *kaddr;
3868         struct f2fs_summary *summary;
3869         struct curseg_info *seg_i;
3870         int written_size = 0;
3871         int i, j;
3872
3873         page = f2fs_grab_meta_page(sbi, blkaddr++);
3874         kaddr = (unsigned char *)page_address(page);
3875         memset(kaddr, 0, PAGE_SIZE);
3876
3877         /* Step 1: write nat cache */
3878         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3879         memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
3880         written_size += SUM_JOURNAL_SIZE;
3881
3882         /* Step 2: write sit cache */
3883         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3884         memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
3885         written_size += SUM_JOURNAL_SIZE;
3886
3887         /* Step 3: write summary entries */
3888         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3889                 unsigned short blkoff;
3890
3891                 seg_i = CURSEG_I(sbi, i);
3892                 if (sbi->ckpt->alloc_type[i] == SSR)
3893                         blkoff = sbi->blocks_per_seg;
3894                 else
3895                         blkoff = curseg_blkoff(sbi, i);
3896
3897                 for (j = 0; j < blkoff; j++) {
3898                         if (!page) {
3899                                 page = f2fs_grab_meta_page(sbi, blkaddr++);
3900                                 kaddr = (unsigned char *)page_address(page);
3901                                 memset(kaddr, 0, PAGE_SIZE);
3902                                 written_size = 0;
3903                         }
3904                         summary = (struct f2fs_summary *)(kaddr + written_size);
3905                         *summary = seg_i->sum_blk->entries[j];
3906                         written_size += SUMMARY_SIZE;
3907
3908                         if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
3909                                                         SUM_FOOTER_SIZE)
3910                                 continue;
3911
3912                         set_page_dirty(page);
3913                         f2fs_put_page(page, 1);
3914                         page = NULL;
3915                 }
3916         }
3917         if (page) {
3918                 set_page_dirty(page);
3919                 f2fs_put_page(page, 1);
3920         }
3921 }
3922
3923 static void write_normal_summaries(struct f2fs_sb_info *sbi,
3924                                         block_t blkaddr, int type)
3925 {
3926         int i, end;
3927
3928         if (IS_DATASEG(type))
3929                 end = type + NR_CURSEG_DATA_TYPE;
3930         else
3931                 end = type + NR_CURSEG_NODE_TYPE;
3932
3933         for (i = type; i < end; i++)
3934                 write_current_sum_page(sbi, i, blkaddr + (i - type));
3935 }
3936
3937 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3938 {
3939         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
3940                 write_compacted_summaries(sbi, start_blk);
3941         else
3942                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
3943 }
3944
3945 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3946 {
3947         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
3948 }
3949
3950 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
3951                                         unsigned int val, int alloc)
3952 {
3953         int i;
3954
3955         if (type == NAT_JOURNAL) {
3956                 for (i = 0; i < nats_in_cursum(journal); i++) {
3957                         if (le32_to_cpu(nid_in_journal(journal, i)) == val)
3958                                 return i;
3959                 }
3960                 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
3961                         return update_nats_in_cursum(journal, 1);
3962         } else if (type == SIT_JOURNAL) {
3963                 for (i = 0; i < sits_in_cursum(journal); i++)
3964                         if (le32_to_cpu(segno_in_journal(journal, i)) == val)
3965                                 return i;
3966                 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
3967                         return update_sits_in_cursum(journal, 1);
3968         }
3969         return -1;
3970 }
3971
3972 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
3973                                         unsigned int segno)
3974 {
3975         return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
3976 }
3977
3978 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
3979                                         unsigned int start)
3980 {
3981         struct sit_info *sit_i = SIT_I(sbi);
3982         struct page *page;
3983         pgoff_t src_off, dst_off;
3984
3985         src_off = current_sit_addr(sbi, start);
3986         dst_off = next_sit_addr(sbi, src_off);
3987
3988         page = f2fs_grab_meta_page(sbi, dst_off);
3989         seg_info_to_sit_page(sbi, page, start);
3990
3991         set_page_dirty(page);
3992         set_to_next_sit(sit_i, start);
3993
3994         return page;
3995 }
3996
3997 static struct sit_entry_set *grab_sit_entry_set(void)
3998 {
3999         struct sit_entry_set *ses =
4000                         f2fs_kmem_cache_alloc(sit_entry_set_slab,
4001                                                 GFP_NOFS, true, NULL);
4002
4003         ses->entry_cnt = 0;
4004         INIT_LIST_HEAD(&ses->set_list);
4005         return ses;
4006 }
4007
4008 static void release_sit_entry_set(struct sit_entry_set *ses)
4009 {
4010         list_del(&ses->set_list);
4011         kmem_cache_free(sit_entry_set_slab, ses);
4012 }
4013
4014 static void adjust_sit_entry_set(struct sit_entry_set *ses,
4015                                                 struct list_head *head)
4016 {
4017         struct sit_entry_set *next = ses;
4018
4019         if (list_is_last(&ses->set_list, head))
4020                 return;
4021
4022         list_for_each_entry_continue(next, head, set_list)
4023                 if (ses->entry_cnt <= next->entry_cnt) {
4024                         list_move_tail(&ses->set_list, &next->set_list);
4025                         return;
4026                 }
4027
4028         list_move_tail(&ses->set_list, head);
4029 }
4030
4031 static void add_sit_entry(unsigned int segno, struct list_head *head)
4032 {
4033         struct sit_entry_set *ses;
4034         unsigned int start_segno = START_SEGNO(segno);
4035
4036         list_for_each_entry(ses, head, set_list) {
4037                 if (ses->start_segno == start_segno) {
4038                         ses->entry_cnt++;
4039                         adjust_sit_entry_set(ses, head);
4040                         return;
4041                 }
4042         }
4043
4044         ses = grab_sit_entry_set();
4045
4046         ses->start_segno = start_segno;
4047         ses->entry_cnt++;
4048         list_add(&ses->set_list, head);
4049 }
4050
4051 static void add_sits_in_set(struct f2fs_sb_info *sbi)
4052 {
4053         struct f2fs_sm_info *sm_info = SM_I(sbi);
4054         struct list_head *set_list = &sm_info->sit_entry_set;
4055         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4056         unsigned int segno;
4057
4058         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4059                 add_sit_entry(segno, set_list);
4060 }
4061
4062 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4063 {
4064         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4065         struct f2fs_journal *journal = curseg->journal;
4066         int i;
4067
4068         down_write(&curseg->journal_rwsem);
4069         for (i = 0; i < sits_in_cursum(journal); i++) {
4070                 unsigned int segno;
4071                 bool dirtied;
4072
4073                 segno = le32_to_cpu(segno_in_journal(journal, i));
4074                 dirtied = __mark_sit_entry_dirty(sbi, segno);
4075
4076                 if (!dirtied)
4077                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4078         }
4079         update_sits_in_cursum(journal, -i);
4080         up_write(&curseg->journal_rwsem);
4081 }
4082
4083 /*
4084  * CP calls this function, which flushes SIT entries including sit_journal,
4085  * and moves prefree segs to free segs.
4086  */
4087 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4088 {
4089         struct sit_info *sit_i = SIT_I(sbi);
4090         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4091         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4092         struct f2fs_journal *journal = curseg->journal;
4093         struct sit_entry_set *ses, *tmp;
4094         struct list_head *head = &SM_I(sbi)->sit_entry_set;
4095         bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4096         struct seg_entry *se;
4097
4098         down_write(&sit_i->sentry_lock);
4099
4100         if (!sit_i->dirty_sentries)
4101                 goto out;
4102
4103         /*
4104          * add and account sit entries of dirty bitmap in sit entry
4105          * set temporarily
4106          */
4107         add_sits_in_set(sbi);
4108
4109         /*
4110          * if there are no enough space in journal to store dirty sit
4111          * entries, remove all entries from journal and add and account
4112          * them in sit entry set.
4113          */
4114         if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4115                                                                 !to_journal)
4116                 remove_sits_in_journal(sbi);
4117
4118         /*
4119          * there are two steps to flush sit entries:
4120          * #1, flush sit entries to journal in current cold data summary block.
4121          * #2, flush sit entries to sit page.
4122          */
4123         list_for_each_entry_safe(ses, tmp, head, set_list) {
4124                 struct page *page = NULL;
4125                 struct f2fs_sit_block *raw_sit = NULL;
4126                 unsigned int start_segno = ses->start_segno;
4127                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4128                                                 (unsigned long)MAIN_SEGS(sbi));
4129                 unsigned int segno = start_segno;
4130
4131                 if (to_journal &&
4132                         !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4133                         to_journal = false;
4134
4135                 if (to_journal) {
4136                         down_write(&curseg->journal_rwsem);
4137                 } else {
4138                         page = get_next_sit_page(sbi, start_segno);
4139                         raw_sit = page_address(page);
4140                 }
4141
4142                 /* flush dirty sit entries in region of current sit set */
4143                 for_each_set_bit_from(segno, bitmap, end) {
4144                         int offset, sit_offset;
4145
4146                         se = get_seg_entry(sbi, segno);
4147 #ifdef CONFIG_F2FS_CHECK_FS
4148                         if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4149                                                 SIT_VBLOCK_MAP_SIZE))
4150                                 f2fs_bug_on(sbi, 1);
4151 #endif
4152
4153                         /* add discard candidates */
4154                         if (!(cpc->reason & CP_DISCARD)) {
4155                                 cpc->trim_start = segno;
4156                                 add_discard_addrs(sbi, cpc, false);
4157                         }
4158
4159                         if (to_journal) {
4160                                 offset = f2fs_lookup_journal_in_cursum(journal,
4161                                                         SIT_JOURNAL, segno, 1);
4162                                 f2fs_bug_on(sbi, offset < 0);
4163                                 segno_in_journal(journal, offset) =
4164                                                         cpu_to_le32(segno);
4165                                 seg_info_to_raw_sit(se,
4166                                         &sit_in_journal(journal, offset));
4167                                 check_block_count(sbi, segno,
4168                                         &sit_in_journal(journal, offset));
4169                         } else {
4170                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4171                                 seg_info_to_raw_sit(se,
4172                                                 &raw_sit->entries[sit_offset]);
4173                                 check_block_count(sbi, segno,
4174                                                 &raw_sit->entries[sit_offset]);
4175                         }
4176
4177                         __clear_bit(segno, bitmap);
4178                         sit_i->dirty_sentries--;
4179                         ses->entry_cnt--;
4180                 }
4181
4182                 if (to_journal)
4183                         up_write(&curseg->journal_rwsem);
4184                 else
4185                         f2fs_put_page(page, 1);
4186
4187                 f2fs_bug_on(sbi, ses->entry_cnt);
4188                 release_sit_entry_set(ses);
4189         }
4190
4191         f2fs_bug_on(sbi, !list_empty(head));
4192         f2fs_bug_on(sbi, sit_i->dirty_sentries);
4193 out:
4194         if (cpc->reason & CP_DISCARD) {
4195                 __u64 trim_start = cpc->trim_start;
4196
4197                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4198                         add_discard_addrs(sbi, cpc, false);
4199
4200                 cpc->trim_start = trim_start;
4201         }
4202         up_write(&sit_i->sentry_lock);
4203
4204         set_prefree_as_free_segments(sbi);
4205 }
4206
4207 static int build_sit_info(struct f2fs_sb_info *sbi)
4208 {
4209         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4210         struct sit_info *sit_i;
4211         unsigned int sit_segs, start;
4212         char *src_bitmap, *bitmap;
4213         unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4214         unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
4215
4216         /* allocate memory for SIT information */
4217         sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4218         if (!sit_i)
4219                 return -ENOMEM;
4220
4221         SM_I(sbi)->sit_info = sit_i;
4222
4223         sit_i->sentries =
4224                 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4225                                               MAIN_SEGS(sbi)),
4226                               GFP_KERNEL);
4227         if (!sit_i->sentries)
4228                 return -ENOMEM;
4229
4230         main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4231         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4232                                                                 GFP_KERNEL);
4233         if (!sit_i->dirty_sentries_bitmap)
4234                 return -ENOMEM;
4235
4236 #ifdef CONFIG_F2FS_CHECK_FS
4237         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
4238 #else
4239         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
4240 #endif
4241         sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4242         if (!sit_i->bitmap)
4243                 return -ENOMEM;
4244
4245         bitmap = sit_i->bitmap;
4246
4247         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4248                 sit_i->sentries[start].cur_valid_map = bitmap;
4249                 bitmap += SIT_VBLOCK_MAP_SIZE;
4250
4251                 sit_i->sentries[start].ckpt_valid_map = bitmap;
4252                 bitmap += SIT_VBLOCK_MAP_SIZE;
4253
4254 #ifdef CONFIG_F2FS_CHECK_FS
4255                 sit_i->sentries[start].cur_valid_map_mir = bitmap;
4256                 bitmap += SIT_VBLOCK_MAP_SIZE;
4257 #endif
4258
4259                 if (discard_map) {
4260                         sit_i->sentries[start].discard_map = bitmap;
4261                         bitmap += SIT_VBLOCK_MAP_SIZE;
4262                 }
4263         }
4264
4265         sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4266         if (!sit_i->tmp_map)
4267                 return -ENOMEM;
4268
4269         if (__is_large_section(sbi)) {
4270                 sit_i->sec_entries =
4271                         f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4272                                                       MAIN_SECS(sbi)),
4273                                       GFP_KERNEL);
4274                 if (!sit_i->sec_entries)
4275                         return -ENOMEM;
4276         }
4277
4278         /* get information related with SIT */
4279         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4280
4281         /* setup SIT bitmap from ckeckpoint pack */
4282         sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4283         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4284
4285         sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4286         if (!sit_i->sit_bitmap)
4287                 return -ENOMEM;
4288
4289 #ifdef CONFIG_F2FS_CHECK_FS
4290         sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4291                                         sit_bitmap_size, GFP_KERNEL);
4292         if (!sit_i->sit_bitmap_mir)
4293                 return -ENOMEM;
4294
4295         sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4296                                         main_bitmap_size, GFP_KERNEL);
4297         if (!sit_i->invalid_segmap)
4298                 return -ENOMEM;
4299 #endif
4300
4301         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4302         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4303         sit_i->written_valid_blocks = 0;
4304         sit_i->bitmap_size = sit_bitmap_size;
4305         sit_i->dirty_sentries = 0;
4306         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4307         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4308         sit_i->mounted_time = ktime_get_boottime_seconds();
4309         init_rwsem(&sit_i->sentry_lock);
4310         return 0;
4311 }
4312
4313 static int build_free_segmap(struct f2fs_sb_info *sbi)
4314 {
4315         struct free_segmap_info *free_i;
4316         unsigned int bitmap_size, sec_bitmap_size;
4317
4318         /* allocate memory for free segmap information */
4319         free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4320         if (!free_i)
4321                 return -ENOMEM;
4322
4323         SM_I(sbi)->free_info = free_i;
4324
4325         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4326         free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4327         if (!free_i->free_segmap)
4328                 return -ENOMEM;
4329
4330         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4331         free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4332         if (!free_i->free_secmap)
4333                 return -ENOMEM;
4334
4335         /* set all segments as dirty temporarily */
4336         memset(free_i->free_segmap, 0xff, bitmap_size);
4337         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4338
4339         /* init free segmap information */
4340         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4341         free_i->free_segments = 0;
4342         free_i->free_sections = 0;
4343         spin_lock_init(&free_i->segmap_lock);
4344         return 0;
4345 }
4346
4347 static int build_curseg(struct f2fs_sb_info *sbi)
4348 {
4349         struct curseg_info *array;
4350         int i;
4351
4352         array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4353                                         sizeof(*array)), GFP_KERNEL);
4354         if (!array)
4355                 return -ENOMEM;
4356
4357         SM_I(sbi)->curseg_array = array;
4358
4359         for (i = 0; i < NO_CHECK_TYPE; i++) {
4360                 mutex_init(&array[i].curseg_mutex);
4361                 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4362                 if (!array[i].sum_blk)
4363                         return -ENOMEM;
4364                 init_rwsem(&array[i].journal_rwsem);
4365                 array[i].journal = f2fs_kzalloc(sbi,
4366                                 sizeof(struct f2fs_journal), GFP_KERNEL);
4367                 if (!array[i].journal)
4368                         return -ENOMEM;
4369                 if (i < NR_PERSISTENT_LOG)
4370                         array[i].seg_type = CURSEG_HOT_DATA + i;
4371                 else if (i == CURSEG_COLD_DATA_PINNED)
4372                         array[i].seg_type = CURSEG_COLD_DATA;
4373                 else if (i == CURSEG_ALL_DATA_ATGC)
4374                         array[i].seg_type = CURSEG_COLD_DATA;
4375                 array[i].segno = NULL_SEGNO;
4376                 array[i].next_blkoff = 0;
4377                 array[i].inited = false;
4378         }
4379         return restore_curseg_summaries(sbi);
4380 }
4381
4382 static int build_sit_entries(struct f2fs_sb_info *sbi)
4383 {
4384         struct sit_info *sit_i = SIT_I(sbi);
4385         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4386         struct f2fs_journal *journal = curseg->journal;
4387         struct seg_entry *se;
4388         struct f2fs_sit_entry sit;
4389         int sit_blk_cnt = SIT_BLK_CNT(sbi);
4390         unsigned int i, start, end;
4391         unsigned int readed, start_blk = 0;
4392         int err = 0;
4393         block_t sit_valid_blocks[2] = {0, 0};
4394
4395         do {
4396                 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
4397                                                         META_SIT, true);
4398
4399                 start = start_blk * sit_i->sents_per_block;
4400                 end = (start_blk + readed) * sit_i->sents_per_block;
4401
4402                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4403                         struct f2fs_sit_block *sit_blk;
4404                         struct page *page;
4405
4406                         se = &sit_i->sentries[start];
4407                         page = get_current_sit_page(sbi, start);
4408                         if (IS_ERR(page))
4409                                 return PTR_ERR(page);
4410                         sit_blk = (struct f2fs_sit_block *)page_address(page);
4411                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4412                         f2fs_put_page(page, 1);
4413
4414                         err = check_block_count(sbi, start, &sit);
4415                         if (err)
4416                                 return err;
4417                         seg_info_from_raw_sit(se, &sit);
4418
4419                         if (se->type >= NR_PERSISTENT_LOG) {
4420                                 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4421                                                         se->type, start);
4422                                 f2fs_handle_error(sbi,
4423                                                 ERROR_INCONSISTENT_SUM_TYPE);
4424                                 return -EFSCORRUPTED;
4425                         }
4426
4427                         sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4428
4429                         if (f2fs_block_unit_discard(sbi)) {
4430                                 /* build discard map only one time */
4431                                 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4432                                         memset(se->discard_map, 0xff,
4433                                                 SIT_VBLOCK_MAP_SIZE);
4434                                 } else {
4435                                         memcpy(se->discard_map,
4436                                                 se->cur_valid_map,
4437                                                 SIT_VBLOCK_MAP_SIZE);
4438                                         sbi->discard_blks +=
4439                                                 sbi->blocks_per_seg -
4440                                                 se->valid_blocks;
4441                                 }
4442                         }
4443
4444                         if (__is_large_section(sbi))
4445                                 get_sec_entry(sbi, start)->valid_blocks +=
4446                                                         se->valid_blocks;
4447                 }
4448                 start_blk += readed;
4449         } while (start_blk < sit_blk_cnt);
4450
4451         down_read(&curseg->journal_rwsem);
4452         for (i = 0; i < sits_in_cursum(journal); i++) {
4453                 unsigned int old_valid_blocks;
4454
4455                 start = le32_to_cpu(segno_in_journal(journal, i));
4456                 if (start >= MAIN_SEGS(sbi)) {
4457                         f2fs_err(sbi, "Wrong journal entry on segno %u",
4458                                  start);
4459                         err = -EFSCORRUPTED;
4460                         f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
4461                         break;
4462                 }
4463
4464                 se = &sit_i->sentries[start];
4465                 sit = sit_in_journal(journal, i);
4466
4467                 old_valid_blocks = se->valid_blocks;
4468
4469                 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
4470
4471                 err = check_block_count(sbi, start, &sit);
4472                 if (err)
4473                         break;
4474                 seg_info_from_raw_sit(se, &sit);
4475
4476                 if (se->type >= NR_PERSISTENT_LOG) {
4477                         f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4478                                                         se->type, start);
4479                         err = -EFSCORRUPTED;
4480                         f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
4481                         break;
4482                 }
4483
4484                 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4485
4486                 if (f2fs_block_unit_discard(sbi)) {
4487                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4488                                 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4489                         } else {
4490                                 memcpy(se->discard_map, se->cur_valid_map,
4491                                                         SIT_VBLOCK_MAP_SIZE);
4492                                 sbi->discard_blks += old_valid_blocks;
4493                                 sbi->discard_blks -= se->valid_blocks;
4494                         }
4495                 }
4496
4497                 if (__is_large_section(sbi)) {
4498                         get_sec_entry(sbi, start)->valid_blocks +=
4499                                                         se->valid_blocks;
4500                         get_sec_entry(sbi, start)->valid_blocks -=
4501                                                         old_valid_blocks;
4502                 }
4503         }
4504         up_read(&curseg->journal_rwsem);
4505
4506         if (err)
4507                 return err;
4508
4509         if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
4510                 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4511                          sit_valid_blocks[NODE], valid_node_count(sbi));
4512                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
4513                 return -EFSCORRUPTED;
4514         }
4515
4516         if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
4517                                 valid_user_blocks(sbi)) {
4518                 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
4519                          sit_valid_blocks[DATA], sit_valid_blocks[NODE],
4520                          valid_user_blocks(sbi));
4521                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
4522                 return -EFSCORRUPTED;
4523         }
4524
4525         return 0;
4526 }
4527
4528 static void init_free_segmap(struct f2fs_sb_info *sbi)
4529 {
4530         unsigned int start;
4531         int type;
4532         struct seg_entry *sentry;
4533
4534         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4535                 if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4536                         continue;
4537                 sentry = get_seg_entry(sbi, start);
4538                 if (!sentry->valid_blocks)
4539                         __set_free(sbi, start);
4540                 else
4541                         SIT_I(sbi)->written_valid_blocks +=
4542                                                 sentry->valid_blocks;
4543         }
4544
4545         /* set use the current segments */
4546         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4547                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4548
4549                 __set_test_and_inuse(sbi, curseg_t->segno);
4550         }
4551 }
4552
4553 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4554 {
4555         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4556         struct free_segmap_info *free_i = FREE_I(sbi);
4557         unsigned int segno = 0, offset = 0, secno;
4558         block_t valid_blocks, usable_blks_in_seg;
4559
4560         while (1) {
4561                 /* find dirty segment based on free segmap */
4562                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4563                 if (segno >= MAIN_SEGS(sbi))
4564                         break;
4565                 offset = segno + 1;
4566                 valid_blocks = get_valid_blocks(sbi, segno, false);
4567                 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
4568                 if (valid_blocks == usable_blks_in_seg || !valid_blocks)
4569                         continue;
4570                 if (valid_blocks > usable_blks_in_seg) {
4571                         f2fs_bug_on(sbi, 1);
4572                         continue;
4573                 }
4574                 mutex_lock(&dirty_i->seglist_lock);
4575                 __locate_dirty_segment(sbi, segno, DIRTY);
4576                 mutex_unlock(&dirty_i->seglist_lock);
4577         }
4578
4579         if (!__is_large_section(sbi))
4580                 return;
4581
4582         mutex_lock(&dirty_i->seglist_lock);
4583         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4584                 valid_blocks = get_valid_blocks(sbi, segno, true);
4585                 secno = GET_SEC_FROM_SEG(sbi, segno);
4586
4587                 if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
4588                         continue;
4589                 if (IS_CURSEC(sbi, secno))
4590                         continue;
4591                 set_bit(secno, dirty_i->dirty_secmap);
4592         }
4593         mutex_unlock(&dirty_i->seglist_lock);
4594 }
4595
4596 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4597 {
4598         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4599         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4600
4601         dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4602         if (!dirty_i->victim_secmap)
4603                 return -ENOMEM;
4604
4605         dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4606         if (!dirty_i->pinned_secmap)
4607                 return -ENOMEM;
4608
4609         dirty_i->pinned_secmap_cnt = 0;
4610         dirty_i->enable_pin_section = true;
4611         return 0;
4612 }
4613
4614 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4615 {
4616         struct dirty_seglist_info *dirty_i;
4617         unsigned int bitmap_size, i;
4618
4619         /* allocate memory for dirty segments list information */
4620         dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4621                                                                 GFP_KERNEL);
4622         if (!dirty_i)
4623                 return -ENOMEM;
4624
4625         SM_I(sbi)->dirty_info = dirty_i;
4626         mutex_init(&dirty_i->seglist_lock);
4627
4628         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4629
4630         for (i = 0; i < NR_DIRTY_TYPE; i++) {
4631                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4632                                                                 GFP_KERNEL);
4633                 if (!dirty_i->dirty_segmap[i])
4634                         return -ENOMEM;
4635         }
4636
4637         if (__is_large_section(sbi)) {
4638                 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4639                 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
4640                                                 bitmap_size, GFP_KERNEL);
4641                 if (!dirty_i->dirty_secmap)
4642                         return -ENOMEM;
4643         }
4644
4645         init_dirty_segmap(sbi);
4646         return init_victim_secmap(sbi);
4647 }
4648
4649 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4650 {
4651         int i;
4652
4653         /*
4654          * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
4655          * In LFS curseg, all blkaddr after .next_blkoff should be unused.
4656          */
4657         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
4658                 struct curseg_info *curseg = CURSEG_I(sbi, i);
4659                 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4660                 unsigned int blkofs = curseg->next_blkoff;
4661
4662                 if (f2fs_sb_has_readonly(sbi) &&
4663                         i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
4664                         continue;
4665
4666                 sanity_check_seg_type(sbi, curseg->seg_type);
4667
4668                 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
4669                         f2fs_err(sbi,
4670                                  "Current segment has invalid alloc_type:%d",
4671                                  curseg->alloc_type);
4672                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4673                         return -EFSCORRUPTED;
4674                 }
4675
4676                 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4677                         goto out;
4678
4679                 if (curseg->alloc_type == SSR)
4680                         continue;
4681
4682                 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4683                         if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4684                                 continue;
4685 out:
4686                         f2fs_err(sbi,
4687                                  "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4688                                  i, curseg->segno, curseg->alloc_type,
4689                                  curseg->next_blkoff, blkofs);
4690                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4691                         return -EFSCORRUPTED;
4692                 }
4693         }
4694         return 0;
4695 }
4696
4697 #ifdef CONFIG_BLK_DEV_ZONED
4698
4699 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
4700                                     struct f2fs_dev_info *fdev,
4701                                     struct blk_zone *zone)
4702 {
4703         unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
4704         block_t zone_block, wp_block, last_valid_block;
4705         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4706         int i, s, b, ret;
4707         struct seg_entry *se;
4708
4709         if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4710                 return 0;
4711
4712         wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
4713         wp_segno = GET_SEGNO(sbi, wp_block);
4714         wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4715         zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
4716         zone_segno = GET_SEGNO(sbi, zone_block);
4717         zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
4718
4719         if (zone_segno >= MAIN_SEGS(sbi))
4720                 return 0;
4721
4722         /*
4723          * Skip check of zones cursegs point to, since
4724          * fix_curseg_write_pointer() checks them.
4725          */
4726         for (i = 0; i < NO_CHECK_TYPE; i++)
4727                 if (zone_secno == GET_SEC_FROM_SEG(sbi,
4728                                                    CURSEG_I(sbi, i)->segno))
4729                         return 0;
4730
4731         /*
4732          * Get last valid block of the zone.
4733          */
4734         last_valid_block = zone_block - 1;
4735         for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
4736                 segno = zone_segno + s;
4737                 se = get_seg_entry(sbi, segno);
4738                 for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
4739                         if (f2fs_test_bit(b, se->cur_valid_map)) {
4740                                 last_valid_block = START_BLOCK(sbi, segno) + b;
4741                                 break;
4742                         }
4743                 if (last_valid_block >= zone_block)
4744                         break;
4745         }
4746
4747         /*
4748          * If last valid block is beyond the write pointer, report the
4749          * inconsistency. This inconsistency does not cause write error
4750          * because the zone will not be selected for write operation until
4751          * it get discarded. Just report it.
4752          */
4753         if (last_valid_block >= wp_block) {
4754                 f2fs_notice(sbi, "Valid block beyond write pointer: "
4755                             "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
4756                             GET_SEGNO(sbi, last_valid_block),
4757                             GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
4758                             wp_segno, wp_blkoff);
4759                 return 0;
4760         }
4761
4762         /*
4763          * If there is no valid block in the zone and if write pointer is
4764          * not at zone start, reset the write pointer.
4765          */
4766         if (last_valid_block + 1 == zone_block && zone->wp != zone->start) {
4767                 f2fs_notice(sbi,
4768                             "Zone without valid block has non-zero write "
4769                             "pointer. Reset the write pointer: wp[0x%x,0x%x]",
4770                             wp_segno, wp_blkoff);
4771                 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
4772                                         zone->len >> log_sectors_per_block);
4773                 if (ret) {
4774                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
4775                                  fdev->path, ret);
4776                         return ret;
4777                 }
4778         }
4779
4780         return 0;
4781 }
4782
4783 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
4784                                                   block_t zone_blkaddr)
4785 {
4786         int i;
4787
4788         for (i = 0; i < sbi->s_ndevs; i++) {
4789                 if (!bdev_is_zoned(FDEV(i).bdev))
4790                         continue;
4791                 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
4792                                 zone_blkaddr <= FDEV(i).end_blk))
4793                         return &FDEV(i);
4794         }
4795
4796         return NULL;
4797 }
4798
4799 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
4800                               void *data)
4801 {
4802         memcpy(data, zone, sizeof(struct blk_zone));
4803         return 0;
4804 }
4805
4806 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
4807 {
4808         struct curseg_info *cs = CURSEG_I(sbi, type);
4809         struct f2fs_dev_info *zbd;
4810         struct blk_zone zone;
4811         unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
4812         block_t cs_zone_block, wp_block;
4813         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4814         sector_t zone_sector;
4815         int err;
4816
4817         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4818         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4819
4820         zbd = get_target_zoned_dev(sbi, cs_zone_block);
4821         if (!zbd)
4822                 return 0;
4823
4824         /* report zone for the sector the curseg points to */
4825         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
4826                 << log_sectors_per_block;
4827         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
4828                                   report_one_zone_cb, &zone);
4829         if (err != 1) {
4830                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
4831                          zbd->path, err);
4832                 return err;
4833         }
4834
4835         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4836                 return 0;
4837
4838         wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
4839         wp_segno = GET_SEGNO(sbi, wp_block);
4840         wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4841         wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
4842
4843         if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
4844                 wp_sector_off == 0)
4845                 return 0;
4846
4847         f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
4848                     "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
4849                     type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
4850
4851         f2fs_notice(sbi, "Assign new section to curseg[%d]: "
4852                     "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
4853
4854         f2fs_allocate_new_section(sbi, type, true);
4855
4856         /* check consistency of the zone curseg pointed to */
4857         if (check_zone_write_pointer(sbi, zbd, &zone))
4858                 return -EIO;
4859
4860         /* check newly assigned zone */
4861         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4862         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4863
4864         zbd = get_target_zoned_dev(sbi, cs_zone_block);
4865         if (!zbd)
4866                 return 0;
4867
4868         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
4869                 << log_sectors_per_block;
4870         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
4871                                   report_one_zone_cb, &zone);
4872         if (err != 1) {
4873                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
4874                          zbd->path, err);
4875                 return err;
4876         }
4877
4878         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4879                 return 0;
4880
4881         if (zone.wp != zone.start) {
4882                 f2fs_notice(sbi,
4883                             "New zone for curseg[%d] is not yet discarded. "
4884                             "Reset the zone: curseg[0x%x,0x%x]",
4885                             type, cs->segno, cs->next_blkoff);
4886                 err = __f2fs_issue_discard_zone(sbi, zbd->bdev,
4887                                 zone_sector >> log_sectors_per_block,
4888                                 zone.len >> log_sectors_per_block);
4889                 if (err) {
4890                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
4891                                  zbd->path, err);
4892                         return err;
4893                 }
4894         }
4895
4896         return 0;
4897 }
4898
4899 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
4900 {
4901         int i, ret;
4902
4903         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
4904                 ret = fix_curseg_write_pointer(sbi, i);
4905                 if (ret)
4906                         return ret;
4907         }
4908
4909         return 0;
4910 }
4911
4912 struct check_zone_write_pointer_args {
4913         struct f2fs_sb_info *sbi;
4914         struct f2fs_dev_info *fdev;
4915 };
4916
4917 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
4918                                       void *data)
4919 {
4920         struct check_zone_write_pointer_args *args;
4921
4922         args = (struct check_zone_write_pointer_args *)data;
4923
4924         return check_zone_write_pointer(args->sbi, args->fdev, zone);
4925 }
4926
4927 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
4928 {
4929         int i, ret;
4930         struct check_zone_write_pointer_args args;
4931
4932         for (i = 0; i < sbi->s_ndevs; i++) {
4933                 if (!bdev_is_zoned(FDEV(i).bdev))
4934                         continue;
4935
4936                 args.sbi = sbi;
4937                 args.fdev = &FDEV(i);
4938                 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
4939                                           check_zone_write_pointer_cb, &args);
4940                 if (ret < 0)
4941                         return ret;
4942         }
4943
4944         return 0;
4945 }
4946
4947 static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
4948                                                 unsigned int dev_idx)
4949 {
4950         if (!bdev_is_zoned(FDEV(dev_idx).bdev))
4951                 return true;
4952         return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
4953 }
4954
4955 /* Return the zone index in the given device */
4956 static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
4957                                         int dev_idx)
4958 {
4959         block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
4960
4961         return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
4962                                                 sbi->log_blocks_per_blkz;
4963 }
4964
4965 /*
4966  * Return the usable segments in a section based on the zone's
4967  * corresponding zone capacity. Zone is equal to a section.
4968  */
4969 static inline unsigned int f2fs_usable_zone_segs_in_sec(
4970                 struct f2fs_sb_info *sbi, unsigned int segno)
4971 {
4972         unsigned int dev_idx, zone_idx;
4973
4974         dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
4975         zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
4976
4977         /* Conventional zone's capacity is always equal to zone size */
4978         if (is_conv_zone(sbi, zone_idx, dev_idx))
4979                 return sbi->segs_per_sec;
4980
4981         if (!sbi->unusable_blocks_per_sec)
4982                 return sbi->segs_per_sec;
4983
4984         /* Get the segment count beyond zone capacity block */
4985         return sbi->segs_per_sec - (sbi->unusable_blocks_per_sec >>
4986                                                 sbi->log_blocks_per_seg);
4987 }
4988
4989 /*
4990  * Return the number of usable blocks in a segment. The number of blocks
4991  * returned is always equal to the number of blocks in a segment for
4992  * segments fully contained within a sequential zone capacity or a
4993  * conventional zone. For segments partially contained in a sequential
4994  * zone capacity, the number of usable blocks up to the zone capacity
4995  * is returned. 0 is returned in all other cases.
4996  */
4997 static inline unsigned int f2fs_usable_zone_blks_in_seg(
4998                         struct f2fs_sb_info *sbi, unsigned int segno)
4999 {
5000         block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5001         unsigned int zone_idx, dev_idx, secno;
5002
5003         secno = GET_SEC_FROM_SEG(sbi, segno);
5004         seg_start = START_BLOCK(sbi, segno);
5005         dev_idx = f2fs_target_device_index(sbi, seg_start);
5006         zone_idx = get_zone_idx(sbi, secno, dev_idx);
5007
5008         /*
5009          * Conventional zone's capacity is always equal to zone size,
5010          * so, blocks per segment is unchanged.
5011          */
5012         if (is_conv_zone(sbi, zone_idx, dev_idx))
5013                 return sbi->blocks_per_seg;
5014
5015         if (!sbi->unusable_blocks_per_sec)
5016                 return sbi->blocks_per_seg;
5017
5018         sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5019         sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
5020
5021         /*
5022          * If segment starts before zone capacity and spans beyond
5023          * zone capacity, then usable blocks are from seg start to
5024          * zone capacity. If the segment starts after the zone capacity,
5025          * then there are no usable blocks.
5026          */
5027         if (seg_start >= sec_cap_blkaddr)
5028                 return 0;
5029         if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
5030                 return sec_cap_blkaddr - seg_start;
5031
5032         return sbi->blocks_per_seg;
5033 }
5034 #else
5035 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5036 {
5037         return 0;
5038 }
5039
5040 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5041 {
5042         return 0;
5043 }
5044
5045 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5046                                                         unsigned int segno)
5047 {
5048         return 0;
5049 }
5050
5051 static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
5052                                                         unsigned int segno)
5053 {
5054         return 0;
5055 }
5056 #endif
5057 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5058                                         unsigned int segno)
5059 {
5060         if (f2fs_sb_has_blkzoned(sbi))
5061                 return f2fs_usable_zone_blks_in_seg(sbi, segno);
5062
5063         return sbi->blocks_per_seg;
5064 }
5065
5066 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
5067                                         unsigned int segno)
5068 {
5069         if (f2fs_sb_has_blkzoned(sbi))
5070                 return f2fs_usable_zone_segs_in_sec(sbi, segno);
5071
5072         return sbi->segs_per_sec;
5073 }
5074
5075 /*
5076  * Update min, max modified time for cost-benefit GC algorithm
5077  */
5078 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5079 {
5080         struct sit_info *sit_i = SIT_I(sbi);
5081         unsigned int segno;
5082
5083         down_write(&sit_i->sentry_lock);
5084
5085         sit_i->min_mtime = ULLONG_MAX;
5086
5087         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
5088                 unsigned int i;
5089                 unsigned long long mtime = 0;
5090
5091                 for (i = 0; i < sbi->segs_per_sec; i++)
5092                         mtime += get_seg_entry(sbi, segno + i)->mtime;
5093
5094                 mtime = div_u64(mtime, sbi->segs_per_sec);
5095
5096                 if (sit_i->min_mtime > mtime)
5097                         sit_i->min_mtime = mtime;
5098         }
5099         sit_i->max_mtime = get_mtime(sbi, false);
5100         sit_i->dirty_max_mtime = 0;
5101         up_write(&sit_i->sentry_lock);
5102 }
5103
5104 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5105 {
5106         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5107         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5108         struct f2fs_sm_info *sm_info;
5109         int err;
5110
5111         sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5112         if (!sm_info)
5113                 return -ENOMEM;
5114
5115         /* init sm info */
5116         sbi->sm_info = sm_info;
5117         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5118         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5119         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5120         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5121         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5122         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5123         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5124         sm_info->rec_prefree_segments = sm_info->main_segments *
5125                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5126         if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5127                 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5128
5129         if (!f2fs_lfs_mode(sbi))
5130                 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
5131         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5132         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5133         sm_info->min_seq_blocks = sbi->blocks_per_seg;
5134         sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5135         sm_info->min_ssr_sections = reserved_sections(sbi);
5136
5137         INIT_LIST_HEAD(&sm_info->sit_entry_set);
5138
5139         init_f2fs_rwsem(&sm_info->curseg_lock);
5140
5141         if (!f2fs_readonly(sbi->sb)) {
5142                 err = f2fs_create_flush_cmd_control(sbi);
5143                 if (err)
5144                         return err;
5145         }
5146
5147         err = create_discard_cmd_control(sbi);
5148         if (err)
5149                 return err;
5150
5151         err = build_sit_info(sbi);
5152         if (err)
5153                 return err;
5154         err = build_free_segmap(sbi);
5155         if (err)
5156                 return err;
5157         err = build_curseg(sbi);
5158         if (err)
5159                 return err;
5160
5161         /* reinit free segmap based on SIT */
5162         err = build_sit_entries(sbi);
5163         if (err)
5164                 return err;
5165
5166         init_free_segmap(sbi);
5167         err = build_dirty_segmap(sbi);
5168         if (err)
5169                 return err;
5170
5171         err = sanity_check_curseg(sbi);
5172         if (err)
5173                 return err;
5174
5175         init_min_max_mtime(sbi);
5176         return 0;
5177 }
5178
5179 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5180                 enum dirty_type dirty_type)
5181 {
5182         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5183
5184         mutex_lock(&dirty_i->seglist_lock);
5185         kvfree(dirty_i->dirty_segmap[dirty_type]);
5186         dirty_i->nr_dirty[dirty_type] = 0;
5187         mutex_unlock(&dirty_i->seglist_lock);
5188 }
5189
5190 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5191 {
5192         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5193
5194         kvfree(dirty_i->pinned_secmap);
5195         kvfree(dirty_i->victim_secmap);
5196 }
5197
5198 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5199 {
5200         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5201         int i;
5202
5203         if (!dirty_i)
5204                 return;
5205
5206         /* discard pre-free/dirty segments list */
5207         for (i = 0; i < NR_DIRTY_TYPE; i++)
5208                 discard_dirty_segmap(sbi, i);
5209
5210         if (__is_large_section(sbi)) {
5211                 mutex_lock(&dirty_i->seglist_lock);
5212                 kvfree(dirty_i->dirty_secmap);
5213                 mutex_unlock(&dirty_i->seglist_lock);
5214         }
5215
5216         destroy_victim_secmap(sbi);
5217         SM_I(sbi)->dirty_info = NULL;
5218         kfree(dirty_i);
5219 }
5220
5221 static void destroy_curseg(struct f2fs_sb_info *sbi)
5222 {
5223         struct curseg_info *array = SM_I(sbi)->curseg_array;
5224         int i;
5225
5226         if (!array)
5227                 return;
5228         SM_I(sbi)->curseg_array = NULL;
5229         for (i = 0; i < NR_CURSEG_TYPE; i++) {
5230                 kfree(array[i].sum_blk);
5231                 kfree(array[i].journal);
5232         }
5233         kfree(array);
5234 }
5235
5236 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5237 {
5238         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5239
5240         if (!free_i)
5241                 return;
5242         SM_I(sbi)->free_info = NULL;
5243         kvfree(free_i->free_segmap);
5244         kvfree(free_i->free_secmap);
5245         kfree(free_i);
5246 }
5247
5248 static void destroy_sit_info(struct f2fs_sb_info *sbi)
5249 {
5250         struct sit_info *sit_i = SIT_I(sbi);
5251
5252         if (!sit_i)
5253                 return;
5254
5255         if (sit_i->sentries)
5256                 kvfree(sit_i->bitmap);
5257         kfree(sit_i->tmp_map);
5258
5259         kvfree(sit_i->sentries);
5260         kvfree(sit_i->sec_entries);
5261         kvfree(sit_i->dirty_sentries_bitmap);
5262
5263         SM_I(sbi)->sit_info = NULL;
5264         kvfree(sit_i->sit_bitmap);
5265 #ifdef CONFIG_F2FS_CHECK_FS
5266         kvfree(sit_i->sit_bitmap_mir);
5267         kvfree(sit_i->invalid_segmap);
5268 #endif
5269         kfree(sit_i);
5270 }
5271
5272 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5273 {
5274         struct f2fs_sm_info *sm_info = SM_I(sbi);
5275
5276         if (!sm_info)
5277                 return;
5278         f2fs_destroy_flush_cmd_control(sbi, true);
5279         destroy_discard_cmd_control(sbi);
5280         destroy_dirty_segmap(sbi);
5281         destroy_curseg(sbi);
5282         destroy_free_segmap(sbi);
5283         destroy_sit_info(sbi);
5284         sbi->sm_info = NULL;
5285         kfree(sm_info);
5286 }
5287
5288 int __init f2fs_create_segment_manager_caches(void)
5289 {
5290         discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5291                         sizeof(struct discard_entry));
5292         if (!discard_entry_slab)
5293                 goto fail;
5294
5295         discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5296                         sizeof(struct discard_cmd));
5297         if (!discard_cmd_slab)
5298                 goto destroy_discard_entry;
5299
5300         sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5301                         sizeof(struct sit_entry_set));
5302         if (!sit_entry_set_slab)
5303                 goto destroy_discard_cmd;
5304
5305         revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
5306                         sizeof(struct revoke_entry));
5307         if (!revoke_entry_slab)
5308                 goto destroy_sit_entry_set;
5309         return 0;
5310
5311 destroy_sit_entry_set:
5312         kmem_cache_destroy(sit_entry_set_slab);
5313 destroy_discard_cmd:
5314         kmem_cache_destroy(discard_cmd_slab);
5315 destroy_discard_entry:
5316         kmem_cache_destroy(discard_entry_slab);
5317 fail:
5318         return -ENOMEM;
5319 }
5320
5321 void f2fs_destroy_segment_manager_caches(void)
5322 {
5323         kmem_cache_destroy(sit_entry_set_slab);
5324         kmem_cache_destroy(discard_cmd_slab);
5325         kmem_cache_destroy(discard_entry_slab);
5326         kmem_cache_destroy(revoke_entry_slab);
5327 }
This page took 0.343106 seconds and 4 git commands to generate.