]> Git Repo - linux.git/blob - drivers/lightnvm/pblk-gc.c
lightnvm: pblk: redesign GC algorithm
[linux.git] / drivers / lightnvm / pblk-gc.c
1 /*
2  * Copyright (C) 2016 CNEX Labs
3  * Initial release: Javier Gonzalez <[email protected]>
4  *                  Matias Bjorling <[email protected]>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License version
8  * 2 as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * pblk-gc.c - pblk's garbage collector
16  */
17
18 #include "pblk.h"
19 #include <linux/delay.h>
20
21 static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
22 {
23         kfree(gc_rq->data);
24         kfree(gc_rq);
25 }
26
27 static int pblk_gc_write(struct pblk *pblk)
28 {
29         struct pblk_gc *gc = &pblk->gc;
30         struct pblk_gc_rq *gc_rq, *tgc_rq;
31         LIST_HEAD(w_list);
32
33         spin_lock(&gc->w_lock);
34         if (list_empty(&gc->w_list)) {
35                 spin_unlock(&gc->w_lock);
36                 return 1;
37         }
38
39         list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
40         gc->w_entries = 0;
41         spin_unlock(&gc->w_lock);
42
43         list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
44                 pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
45                                 gc_rq->nr_secs, gc_rq->secs_to_gc,
46                                 gc_rq->line, PBLK_IOTYPE_GC);
47
48                 list_del(&gc_rq->list);
49                 kref_put(&gc_rq->line->ref, pblk_line_put);
50                 pblk_gc_free_gc_rq(gc_rq);
51         }
52
53         return 0;
54 }
55
56 static void pblk_gc_writer_kick(struct pblk_gc *gc)
57 {
58         wake_up_process(gc->gc_writer_ts);
59 }
60
61 /*
62  * Responsible for managing all memory related to a gc request. Also in case of
63  * failure
64  */
65 static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
66 {
67         struct nvm_tgt_dev *dev = pblk->dev;
68         struct nvm_geo *geo = &dev->geo;
69         struct pblk_gc *gc = &pblk->gc;
70         struct pblk_line *line = gc_rq->line;
71         void *data;
72         unsigned int secs_to_gc;
73         int ret = 0;
74
75         data = kmalloc(gc_rq->nr_secs * geo->sec_size, GFP_KERNEL);
76         if (!data) {
77                 ret = -ENOMEM;
78                 goto out;
79         }
80
81         /* Read from GC victim block */
82         if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
83                                                         &secs_to_gc, line)) {
84                 ret = -EFAULT;
85                 goto free_data;
86         }
87
88         if (!secs_to_gc)
89                 goto free_rq;
90
91         gc_rq->data = data;
92         gc_rq->secs_to_gc = secs_to_gc;
93
94 retry:
95         spin_lock(&gc->w_lock);
96         if (gc->w_entries >= PBLK_GC_W_QD) {
97                 spin_unlock(&gc->w_lock);
98                 pblk_gc_writer_kick(&pblk->gc);
99                 usleep_range(128, 256);
100                 goto retry;
101         }
102         gc->w_entries++;
103         list_add_tail(&gc_rq->list, &gc->w_list);
104         spin_unlock(&gc->w_lock);
105
106         pblk_gc_writer_kick(&pblk->gc);
107
108         return 0;
109
110 free_rq:
111         kfree(gc_rq);
112 free_data:
113         kfree(data);
114 out:
115         kref_put(&line->ref, pblk_line_put);
116         return ret;
117 }
118
119 static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
120 {
121         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
122         struct list_head *move_list;
123
124         spin_lock(&line->lock);
125         WARN_ON(line->state != PBLK_LINESTATE_GC);
126         line->state = PBLK_LINESTATE_CLOSED;
127         move_list = pblk_line_gc_list(pblk, line);
128         spin_unlock(&line->lock);
129
130         if (move_list) {
131                 spin_lock(&l_mg->gc_lock);
132                 list_add_tail(&line->list, move_list);
133                 spin_unlock(&l_mg->gc_lock);
134         }
135 }
136
137 static void pblk_gc_line_ws(struct work_struct *work)
138 {
139         struct pblk_line_ws *line_rq_ws = container_of(work,
140                                                 struct pblk_line_ws, ws);
141         struct pblk *pblk = line_rq_ws->pblk;
142         struct pblk_gc *gc = &pblk->gc;
143         struct pblk_line *line = line_rq_ws->line;
144         struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
145
146         up(&gc->gc_sem);
147
148         if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
149                 pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
150                                                 line->id, *line->vsc,
151                                                 gc_rq->nr_secs);
152         }
153
154         mempool_free(line_rq_ws, pblk->line_ws_pool);
155 }
156
157 static void pblk_gc_line_prepare_ws(struct work_struct *work)
158 {
159         struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
160                                                                         ws);
161         struct pblk *pblk = line_ws->pblk;
162         struct pblk_line *line = line_ws->line;
163         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
164         struct pblk_line_meta *lm = &pblk->lm;
165         struct pblk_gc *gc = &pblk->gc;
166         struct line_emeta *emeta_buf;
167         struct pblk_line_ws *line_rq_ws;
168         struct pblk_gc_rq *gc_rq;
169         __le64 *lba_list;
170         int sec_left, nr_secs, bit;
171         int ret;
172
173         emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
174                                                                 GFP_KERNEL);
175         if (!emeta_buf) {
176                 pr_err("pblk: cannot use GC emeta\n");
177                 return;
178         }
179
180         ret = pblk_line_read_emeta(pblk, line, emeta_buf);
181         if (ret) {
182                 pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
183                 goto fail_free_emeta;
184         }
185
186         /* If this read fails, it means that emeta is corrupted. For now, leave
187          * the line untouched. TODO: Implement a recovery routine that scans and
188          * moves all sectors on the line.
189          */
190         lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
191         if (!lba_list) {
192                 pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
193                 goto fail_free_emeta;
194         }
195
196         sec_left = pblk_line_vsc(line);
197         if (sec_left < 0) {
198                 pr_err("pblk: corrupted GC line (%d)\n", line->id);
199                 goto fail_free_emeta;
200         }
201
202         bit = -1;
203 next_rq:
204         gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
205         if (!gc_rq)
206                 goto fail_free_emeta;
207
208         nr_secs = 0;
209         do {
210                 bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
211                                                                 bit + 1);
212                 if (bit > line->emeta_ssec)
213                         break;
214
215                 gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
216         } while (nr_secs < pblk->max_write_pgs);
217
218         if (unlikely(!nr_secs)) {
219                 kfree(gc_rq);
220                 goto out;
221         }
222
223         gc_rq->nr_secs = nr_secs;
224         gc_rq->line = line;
225
226         line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
227         if (!line_rq_ws)
228                 goto fail_free_gc_rq;
229
230         line_rq_ws->pblk = pblk;
231         line_rq_ws->line = line;
232         line_rq_ws->priv = gc_rq;
233
234         down(&gc->gc_sem);
235         kref_get(&line->ref);
236
237         INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
238         queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
239
240         sec_left -= nr_secs;
241         if (sec_left > 0)
242                 goto next_rq;
243
244 out:
245         pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
246         mempool_free(line_ws, pblk->line_ws_pool);
247
248         kref_put(&line->ref, pblk_line_put);
249         atomic_dec(&gc->inflight_gc);
250
251         return;
252
253 fail_free_gc_rq:
254         kfree(gc_rq);
255 fail_free_emeta:
256         pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
257         pblk_put_line_back(pblk, line);
258         kref_put(&line->ref, pblk_line_put);
259         mempool_free(line_ws, pblk->line_ws_pool);
260         atomic_dec(&gc->inflight_gc);
261
262         pr_err("pblk: Failed to GC line %d\n", line->id);
263 }
264
265 static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
266 {
267         struct pblk_gc *gc = &pblk->gc;
268         struct pblk_line_ws *line_ws;
269
270         pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
271
272         line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
273         if (!line_ws)
274                 return -ENOMEM;
275
276         line_ws->pblk = pblk;
277         line_ws->line = line;
278
279         INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
280         queue_work(gc->gc_reader_wq, &line_ws->ws);
281
282         return 0;
283 }
284
285 static int pblk_gc_read(struct pblk *pblk)
286 {
287         struct pblk_gc *gc = &pblk->gc;
288         struct pblk_line *line;
289
290         spin_lock(&gc->r_lock);
291         if (list_empty(&gc->r_list)) {
292                 spin_unlock(&gc->r_lock);
293                 return 1;
294         }
295
296         line = list_first_entry(&gc->r_list, struct pblk_line, list);
297         list_del(&line->list);
298         spin_unlock(&gc->r_lock);
299
300         pblk_gc_kick(pblk);
301
302         if (pblk_gc_line(pblk, line))
303                 pr_err("pblk: failed to GC line %d\n", line->id);
304
305         return 0;
306 }
307
308 static void pblk_gc_reader_kick(struct pblk_gc *gc)
309 {
310         wake_up_process(gc->gc_reader_ts);
311 }
312
313 static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
314                                                  struct list_head *group_list)
315 {
316         struct pblk_line *line, *victim;
317
318         victim = list_first_entry(group_list, struct pblk_line, list);
319         list_for_each_entry(line, group_list, list) {
320                 if (*line->vsc < *victim->vsc)
321                         victim = line;
322         }
323
324         return victim;
325 }
326
327 static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
328 {
329         unsigned int nr_blocks_free, nr_blocks_need;
330
331         nr_blocks_need = pblk_rl_high_thrs(rl);
332         nr_blocks_free = pblk_rl_nr_free_blks(rl);
333
334         /* This is not critical, no need to take lock here */
335         return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
336 }
337
338 /*
339  * Lines with no valid sectors will be returned to the free list immediately. If
340  * GC is activated - either because the free block count is under the determined
341  * threshold, or because it is being forced from user space - only lines with a
342  * high count of invalid sectors will be recycled.
343  */
344 static void pblk_gc_run(struct pblk *pblk)
345 {
346         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
347         struct pblk_gc *gc = &pblk->gc;
348         struct pblk_line *line;
349         struct list_head *group_list;
350         bool run_gc;
351         int inflight_gc, gc_group = 0, prev_group = 0;
352
353         do {
354                 spin_lock(&l_mg->gc_lock);
355                 if (list_empty(&l_mg->gc_full_list)) {
356                         spin_unlock(&l_mg->gc_lock);
357                         break;
358                 }
359
360                 line = list_first_entry(&l_mg->gc_full_list,
361                                                         struct pblk_line, list);
362
363                 spin_lock(&line->lock);
364                 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
365                 line->state = PBLK_LINESTATE_GC;
366                 spin_unlock(&line->lock);
367
368                 list_del(&line->list);
369                 spin_unlock(&l_mg->gc_lock);
370
371                 kref_put(&line->ref, pblk_line_put);
372         } while (1);
373
374         run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
375         if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
376                 return;
377
378 next_gc_group:
379         group_list = l_mg->gc_lists[gc_group++];
380
381         do {
382                 spin_lock(&l_mg->gc_lock);
383                 if (list_empty(group_list)) {
384                         spin_unlock(&l_mg->gc_lock);
385                         break;
386                 }
387
388                 line = pblk_gc_get_victim_line(pblk, group_list);
389
390                 spin_lock(&line->lock);
391                 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
392                 line->state = PBLK_LINESTATE_GC;
393                 spin_unlock(&line->lock);
394
395                 list_del(&line->list);
396                 spin_unlock(&l_mg->gc_lock);
397
398                 spin_lock(&gc->r_lock);
399                 list_add_tail(&line->list, &gc->r_list);
400                 spin_unlock(&gc->r_lock);
401
402                 inflight_gc = atomic_inc_return(&gc->inflight_gc);
403                 pblk_gc_reader_kick(gc);
404
405                 prev_group = 1;
406
407                 /* No need to queue up more GC lines than we can handle */
408                 run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
409                 if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
410                         break;
411         } while (1);
412
413         if (!prev_group && pblk->rl.rb_state > gc_group &&
414                                                 gc_group < PBLK_GC_NR_LISTS)
415                 goto next_gc_group;
416 }
417
418 void pblk_gc_kick(struct pblk *pblk)
419 {
420         struct pblk_gc *gc = &pblk->gc;
421
422         wake_up_process(gc->gc_ts);
423         pblk_gc_writer_kick(gc);
424         pblk_gc_reader_kick(gc);
425         mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
426 }
427
428 static void pblk_gc_timer(unsigned long data)
429 {
430         struct pblk *pblk = (struct pblk *)data;
431
432         pblk_gc_kick(pblk);
433 }
434
435 static int pblk_gc_ts(void *data)
436 {
437         struct pblk *pblk = data;
438
439         while (!kthread_should_stop()) {
440                 pblk_gc_run(pblk);
441                 set_current_state(TASK_INTERRUPTIBLE);
442                 io_schedule();
443         }
444
445         return 0;
446 }
447
448 static int pblk_gc_writer_ts(void *data)
449 {
450         struct pblk *pblk = data;
451
452         while (!kthread_should_stop()) {
453                 if (!pblk_gc_write(pblk))
454                         continue;
455                 set_current_state(TASK_INTERRUPTIBLE);
456                 io_schedule();
457         }
458
459         return 0;
460 }
461
462 static int pblk_gc_reader_ts(void *data)
463 {
464         struct pblk *pblk = data;
465
466         while (!kthread_should_stop()) {
467                 if (!pblk_gc_read(pblk))
468                         continue;
469                 set_current_state(TASK_INTERRUPTIBLE);
470                 io_schedule();
471         }
472
473         return 0;
474 }
475
476 static void pblk_gc_start(struct pblk *pblk)
477 {
478         pblk->gc.gc_active = 1;
479         pr_debug("pblk: gc start\n");
480 }
481
482 void pblk_gc_should_start(struct pblk *pblk)
483 {
484         struct pblk_gc *gc = &pblk->gc;
485
486         if (gc->gc_enabled && !gc->gc_active)
487                 pblk_gc_start(pblk);
488
489         pblk_gc_kick(pblk);
490 }
491
492 /*
493  * If flush_wq == 1 then no lock should be held by the caller since
494  * flush_workqueue can sleep
495  */
496 static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
497 {
498         pblk->gc.gc_active = 0;
499         pr_debug("pblk: gc stop\n");
500 }
501
502 void pblk_gc_should_stop(struct pblk *pblk)
503 {
504         struct pblk_gc *gc = &pblk->gc;
505
506         if (gc->gc_active && !gc->gc_forced)
507                 pblk_gc_stop(pblk, 0);
508 }
509
510 void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
511                               int *gc_active)
512 {
513         struct pblk_gc *gc = &pblk->gc;
514
515         spin_lock(&gc->lock);
516         *gc_enabled = gc->gc_enabled;
517         *gc_active = gc->gc_active;
518         spin_unlock(&gc->lock);
519 }
520
521 int pblk_gc_sysfs_force(struct pblk *pblk, int force)
522 {
523         struct pblk_gc *gc = &pblk->gc;
524
525         if (force < 0 || force > 1)
526                 return -EINVAL;
527
528         spin_lock(&gc->lock);
529         gc->gc_forced = force;
530
531         if (force)
532                 gc->gc_enabled = 1;
533         else
534                 gc->gc_enabled = 0;
535         spin_unlock(&gc->lock);
536
537         pblk_gc_should_start(pblk);
538
539         return 0;
540 }
541
542 int pblk_gc_init(struct pblk *pblk)
543 {
544         struct pblk_gc *gc = &pblk->gc;
545         int ret;
546
547         gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
548         if (IS_ERR(gc->gc_ts)) {
549                 pr_err("pblk: could not allocate GC main kthread\n");
550                 return PTR_ERR(gc->gc_ts);
551         }
552
553         gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
554                                                         "pblk-gc-writer-ts");
555         if (IS_ERR(gc->gc_writer_ts)) {
556                 pr_err("pblk: could not allocate GC writer kthread\n");
557                 ret = PTR_ERR(gc->gc_writer_ts);
558                 goto fail_free_main_kthread;
559         }
560
561         gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
562                                                         "pblk-gc-reader-ts");
563         if (IS_ERR(gc->gc_reader_ts)) {
564                 pr_err("pblk: could not allocate GC reader kthread\n");
565                 ret = PTR_ERR(gc->gc_reader_ts);
566                 goto fail_free_writer_kthread;
567         }
568
569         setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
570         mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
571
572         gc->gc_active = 0;
573         gc->gc_forced = 0;
574         gc->gc_enabled = 1;
575         gc->w_entries = 0;
576         atomic_set(&gc->inflight_gc, 0);
577
578         /* Workqueue that reads valid sectors from a line and submit them to the
579          * GC writer to be recycled.
580          */
581         gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
582                         WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
583         if (!gc->gc_line_reader_wq) {
584                 pr_err("pblk: could not allocate GC line reader workqueue\n");
585                 ret = -ENOMEM;
586                 goto fail_free_reader_kthread;
587         }
588
589         /* Workqueue that prepare lines for GC */
590         gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
591                                         WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
592         if (!gc->gc_reader_wq) {
593                 pr_err("pblk: could not allocate GC reader workqueue\n");
594                 ret = -ENOMEM;
595                 goto fail_free_reader_line_wq;
596         }
597
598         spin_lock_init(&gc->lock);
599         spin_lock_init(&gc->w_lock);
600         spin_lock_init(&gc->r_lock);
601
602         sema_init(&gc->gc_sem, 128);
603
604         INIT_LIST_HEAD(&gc->w_list);
605         INIT_LIST_HEAD(&gc->r_list);
606
607         return 0;
608
609 fail_free_reader_line_wq:
610         destroy_workqueue(gc->gc_line_reader_wq);
611 fail_free_reader_kthread:
612         kthread_stop(gc->gc_reader_ts);
613 fail_free_writer_kthread:
614         kthread_stop(gc->gc_writer_ts);
615 fail_free_main_kthread:
616         kthread_stop(gc->gc_ts);
617
618         return ret;
619 }
620
621 void pblk_gc_exit(struct pblk *pblk)
622 {
623         struct pblk_gc *gc = &pblk->gc;
624
625         flush_workqueue(gc->gc_reader_wq);
626         flush_workqueue(gc->gc_line_reader_wq);
627
628         del_timer(&gc->gc_timer);
629         pblk_gc_stop(pblk, 1);
630
631         if (gc->gc_ts)
632                 kthread_stop(gc->gc_ts);
633
634         if (gc->gc_reader_wq)
635                 destroy_workqueue(gc->gc_reader_wq);
636
637         if (gc->gc_line_reader_wq)
638                 destroy_workqueue(gc->gc_line_reader_wq);
639
640         if (gc->gc_writer_ts)
641                 kthread_stop(gc->gc_writer_ts);
642
643         if (gc->gc_reader_ts)
644                 kthread_stop(gc->gc_reader_ts);
645 }
This page took 0.066533 seconds and 4 git commands to generate.