1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2016 CNEX Labs
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * pblk-recovery.c - pblk's recovery path
19 #include "pblk-trace.h"
21 int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
25 crc = pblk_calc_emeta_crc(pblk, emeta_buf);
26 if (le32_to_cpu(emeta_buf->crc) != crc)
29 if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
35 static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
37 struct nvm_tgt_dev *dev = pblk->dev;
38 struct nvm_geo *geo = &dev->geo;
39 struct pblk_line_meta *lm = &pblk->lm;
40 struct pblk_emeta *emeta = line->emeta;
41 struct line_emeta *emeta_buf = emeta->buf;
43 u64 data_start, data_end;
44 u64 nr_valid_lbas, nr_lbas = 0;
47 lba_list = emeta_to_lbas(pblk, emeta_buf);
51 data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
52 data_end = line->emeta_ssec;
53 nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
55 for (i = data_start; i < data_end; i++) {
59 ppa = addr_to_gen_ppa(pblk, i, line->id);
60 pos = pblk_ppa_to_pos(geo, ppa);
62 /* Do not update bad blocks */
63 if (test_bit(pos, line->blk_bitmap))
66 if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
67 spin_lock(&line->lock);
68 if (test_and_set_bit(i, line->invalid_bitmap))
69 WARN_ONCE(1, "pblk: rec. double invalidate:\n");
71 le32_add_cpu(line->vsc, -1);
72 spin_unlock(&line->lock);
77 pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
81 if (nr_valid_lbas != nr_lbas)
82 pblk_err(pblk, "line %d - inconsistent lba list(%llu/%llu)\n",
83 line->id, nr_valid_lbas, nr_lbas);
90 static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
95 for (i = 0; i < written_secs; i += pblk->min_write_pgs)
96 pblk_alloc_page(pblk, line, pblk->min_write_pgs);
99 static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
101 struct pblk_line_meta *lm = &pblk->lm;
102 int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
103 u64 written_secs = 0;
104 int valid_chunks = 0;
107 for (i = 0; i < lm->blk_per_line; i++) {
108 struct nvm_chk_meta *chunk = &line->chks[i];
110 if (chunk->state & NVM_CHK_ST_OFFLINE)
113 written_secs += chunk->wp;
117 if (lm->blk_per_line - nr_bb != valid_chunks)
118 pblk_err(pblk, "recovery line %d is bad\n", line->id);
120 pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec);
125 struct pblk_recov_alloc {
126 struct ppa_addr *ppa_list;
127 struct pblk_sec_meta *meta_list;
130 dma_addr_t dma_ppa_list;
131 dma_addr_t dma_meta_list;
134 static void pblk_recov_complete(struct kref *ref)
136 struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
138 complete(&pad_rq->wait);
141 static void pblk_end_io_recov(struct nvm_rq *rqd)
143 struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
144 struct pblk_pad_rq *pad_rq = rqd->private;
145 struct pblk *pblk = pad_rq->pblk;
147 pblk_up_chunk(pblk, ppa_list[0]);
149 pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
151 atomic_dec(&pblk->inflight_io);
152 kref_put(&pad_rq->ref, pblk_recov_complete);
155 /* pad line using line bitmap. */
156 static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
159 struct nvm_tgt_dev *dev = pblk->dev;
160 struct nvm_geo *geo = &dev->geo;
161 struct pblk_sec_meta *meta_list;
162 struct pblk_pad_rq *pad_rq;
166 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
167 u64 w_ptr = line->cur_sec;
168 int left_line_ppas, rq_ppas, rq_len;
172 spin_lock(&line->lock);
173 left_line_ppas = line->left_msecs;
174 spin_unlock(&line->lock);
176 pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
180 data = vzalloc(array_size(pblk->max_write_pgs, geo->csecs));
187 init_completion(&pad_rq->wait);
188 kref_init(&pad_rq->ref);
191 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
192 if (rq_ppas < pblk->min_write_pgs) {
193 pblk_err(pblk, "corrupted pad line %d\n", line->id);
197 rq_len = rq_ppas * geo->csecs;
199 bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
200 PBLK_VMALLOC_META, GFP_KERNEL);
206 bio->bi_iter.bi_sector = 0; /* internal bio */
207 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
209 rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
211 ret = pblk_alloc_rqd_meta(pblk, rqd);
216 rqd->opcode = NVM_OP_PWRITE;
218 rqd->nr_ppas = rq_ppas;
219 rqd->end_io = pblk_end_io_recov;
220 rqd->private = pad_rq;
222 meta_list = rqd->meta_list;
224 for (i = 0; i < rqd->nr_ppas; ) {
228 w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
229 ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
230 pos = pblk_ppa_to_pos(geo, ppa);
232 while (test_bit(pos, line->blk_bitmap)) {
233 w_ptr += pblk->min_write_pgs;
234 ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
235 pos = pblk_ppa_to_pos(geo, ppa);
238 for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
239 struct ppa_addr dev_ppa;
240 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
242 dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
244 pblk_map_invalidate(pblk, dev_ppa);
245 lba_list[w_ptr] = meta_list[i].lba = addr_empty;
246 rqd->ppa_list[i] = dev_ppa;
250 kref_get(&pad_rq->ref);
251 pblk_down_chunk(pblk, rqd->ppa_list[0]);
253 ret = pblk_submit_io(pblk, rqd);
255 pblk_err(pblk, "I/O submission failed: %d\n", ret);
256 pblk_up_chunk(pblk, rqd->ppa_list[0]);
260 left_line_ppas -= rq_ppas;
261 left_ppas -= rq_ppas;
262 if (left_ppas && left_line_ppas)
265 kref_put(&pad_rq->ref, pblk_recov_complete);
267 if (!wait_for_completion_io_timeout(&pad_rq->wait,
268 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
269 pblk_err(pblk, "pad write timed out\n");
273 if (!pblk_line_is_full(line))
274 pblk_err(pblk, "corrupted padded line: %d\n", line->id);
282 pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
290 static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
292 struct nvm_tgt_dev *dev = pblk->dev;
293 struct nvm_geo *geo = &dev->geo;
294 int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt;
296 return (distance > line->left_msecs) ? line->left_msecs : distance;
299 static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
300 struct pblk_line *line)
302 struct nvm_tgt_dev *dev = pblk->dev;
303 struct nvm_geo *geo = &dev->geo;
304 struct pblk_line_meta *lm = &pblk->lm;
305 struct pblk_lun *rlun;
306 struct nvm_chk_meta *chunk;
311 rlun = &pblk->luns[0];
313 pos = pblk_ppa_to_pos(geo, ppa);
314 chunk = &line->chks[pos];
318 for (i = 1; i < lm->blk_per_line; i++) {
319 rlun = &pblk->luns[i];
321 pos = pblk_ppa_to_pos(geo, ppa);
322 chunk = &line->chks[pos];
324 if (chunk->wp > line_wp)
326 else if (chunk->wp < line_wp)
333 static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
334 struct pblk_recov_alloc p)
336 struct nvm_tgt_dev *dev = pblk->dev;
337 struct nvm_geo *geo = &dev->geo;
338 struct ppa_addr *ppa_list;
339 struct pblk_sec_meta *meta_list;
343 dma_addr_t dma_ppa_list, dma_meta_list;
350 u64 left_ppas = pblk_sec_in_open_line(pblk, line);
352 if (pblk_line_wp_is_unbalanced(pblk, line))
353 pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
355 ppa_list = p.ppa_list;
356 meta_list = p.meta_list;
359 dma_ppa_list = p.dma_ppa_list;
360 dma_meta_list = p.dma_meta_list;
362 lba_list = emeta_to_lbas(pblk, line->emeta->buf);
365 memset(rqd, 0, pblk_g_rq_size);
367 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
369 rq_ppas = pblk->min_write_pgs;
370 rq_len = rq_ppas * geo->csecs;
372 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
376 bio->bi_iter.bi_sector = 0; /* internal bio */
377 bio_set_op_attrs(bio, REQ_OP_READ, 0);
380 rqd->opcode = NVM_OP_PREAD;
381 rqd->meta_list = meta_list;
382 rqd->nr_ppas = rq_ppas;
383 rqd->ppa_list = ppa_list;
384 rqd->dma_ppa_list = dma_ppa_list;
385 rqd->dma_meta_list = dma_meta_list;
387 if (pblk_io_aligned(pblk, rq_ppas))
391 for (i = 0; i < rqd->nr_ppas; ) {
395 ppa = addr_to_gen_ppa(pblk, paddr, line->id);
396 pos = pblk_ppa_to_pos(geo, ppa);
398 while (test_bit(pos, line->blk_bitmap)) {
399 paddr += pblk->min_write_pgs;
400 ppa = addr_to_gen_ppa(pblk, paddr, line->id);
401 pos = pblk_ppa_to_pos(geo, ppa);
404 for (j = 0; j < pblk->min_write_pgs; j++, i++)
406 addr_to_gen_ppa(pblk, paddr + j, line->id);
409 ret = pblk_submit_io_sync(pblk, rqd);
411 pblk_err(pblk, "I/O submission failed: %d\n", ret);
416 atomic_dec(&pblk->inflight_io);
418 /* If a read fails, do a best effort by padding the line and retrying */
420 int pad_distance, ret;
423 pblk_log_read_err(pblk, rqd);
427 pad_distance = pblk_pad_distance(pblk, line);
428 ret = pblk_recov_pad_line(pblk, line, pad_distance);
436 for (i = 0; i < rqd->nr_ppas; i++) {
437 u64 lba = le64_to_cpu(meta_list[i].lba);
439 lba_list[paddr++] = cpu_to_le64(lba);
441 if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
444 line->nr_valid_lbas++;
445 pblk_update_map(pblk, lba, rqd->ppa_list[i]);
448 left_ppas -= rq_ppas;
452 #ifdef CONFIG_NVM_PBLK_DEBUG
453 WARN_ON(padded && !pblk_line_is_full(line));
459 /* Scan line for lbas on out of bound area */
460 static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
462 struct nvm_tgt_dev *dev = pblk->dev;
463 struct nvm_geo *geo = &dev->geo;
465 struct ppa_addr *ppa_list;
466 struct pblk_sec_meta *meta_list;
467 struct pblk_recov_alloc p;
469 dma_addr_t dma_ppa_list, dma_meta_list;
472 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
476 ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
477 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
479 data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
485 rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL);
486 memset(rqd, 0, pblk_g_rq_size);
488 p.ppa_list = ppa_list;
489 p.meta_list = meta_list;
492 p.dma_ppa_list = dma_ppa_list;
493 p.dma_meta_list = dma_meta_list;
495 ret = pblk_recov_scan_oob(pblk, line, p);
497 pblk_err(pblk, "could not recover L2P form OOB\n");
501 if (pblk_line_is_full(line))
502 pblk_line_recov_close(pblk, line);
505 mempool_free(rqd, &pblk->r_rq_pool);
508 nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
513 /* Insert lines ordered by sequence number (seq_num) on list */
514 static void pblk_recov_line_add_ordered(struct list_head *head,
515 struct pblk_line *line)
517 struct pblk_line *t = NULL;
519 list_for_each_entry(t, head, list)
520 if (t->seq_nr > line->seq_nr)
523 __list_add(&line->list, t->list.prev, &t->list);
526 static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
528 struct nvm_tgt_dev *dev = pblk->dev;
529 struct nvm_geo *geo = &dev->geo;
530 struct pblk_line_meta *lm = &pblk->lm;
531 unsigned int emeta_secs;
536 emeta_secs = lm->emeta_sec[0];
537 emeta_start = lm->sec_per_line;
541 ppa = addr_to_gen_ppa(pblk, emeta_start, line->id);
542 pos = pblk_ppa_to_pos(geo, ppa);
543 if (!test_bit(pos, line->blk_bitmap))
550 static int pblk_recov_check_line_version(struct pblk *pblk,
551 struct line_emeta *emeta)
553 struct line_header *header = &emeta->header;
555 if (header->version_major != EMETA_VERSION_MAJOR) {
556 pblk_err(pblk, "line major version mismatch: %d, expected: %d\n",
557 header->version_major, EMETA_VERSION_MAJOR);
561 #ifdef CONFIG_NVM_PBLK_DEBUG
562 if (header->version_minor > EMETA_VERSION_MINOR)
563 pblk_info(pblk, "newer line minor version found: %d\n",
564 header->version_minor);
570 static void pblk_recov_wa_counters(struct pblk *pblk,
571 struct line_emeta *emeta)
573 struct pblk_line_meta *lm = &pblk->lm;
574 struct line_header *header = &emeta->header;
575 struct wa_counters *wa = emeta_to_wa(lm, emeta);
577 /* WA counters were introduced in emeta version 0.2 */
578 if (header->version_major > 0 || header->version_minor >= 2) {
579 u64 user = le64_to_cpu(wa->user);
580 u64 pad = le64_to_cpu(wa->pad);
581 u64 gc = le64_to_cpu(wa->gc);
583 atomic64_set(&pblk->user_wa, user);
584 atomic64_set(&pblk->pad_wa, pad);
585 atomic64_set(&pblk->gc_wa, gc);
587 pblk->user_rst_wa = user;
588 pblk->pad_rst_wa = pad;
589 pblk->gc_rst_wa = gc;
593 static int pblk_line_was_written(struct pblk_line *line,
597 struct pblk_line_meta *lm = &pblk->lm;
598 struct nvm_tgt_dev *dev = pblk->dev;
599 struct nvm_geo *geo = &dev->geo;
600 struct nvm_chk_meta *chunk;
601 struct ppa_addr bppa;
604 if (line->state == PBLK_LINESTATE_BAD)
607 smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
608 if (smeta_blk >= lm->blk_per_line)
611 bppa = pblk->luns[smeta_blk].bppa;
612 chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
614 if (chunk->state & NVM_CHK_ST_FREE)
620 static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
622 struct pblk_line_meta *lm = &pblk->lm;
625 for (i = 0; i < lm->blk_per_line; i++)
626 if (line->chks[i].state & NVM_CHK_ST_OPEN)
632 struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
634 struct pblk_line_meta *lm = &pblk->lm;
635 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
636 struct pblk_line *line, *tline, *data_line = NULL;
637 struct pblk_smeta *smeta;
638 struct pblk_emeta *emeta;
639 struct line_smeta *smeta_buf;
640 int found_lines = 0, recovered_lines = 0, open_lines = 0;
643 int i, valid_uuid = 0;
644 LIST_HEAD(recov_list);
646 /* TODO: Implement FTL snapshot */
648 /* Scan recovery - takes place when FTL snapshot fails */
649 spin_lock(&l_mg->free_lock);
650 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
651 set_bit(meta_line, &l_mg->meta_bitmap);
652 smeta = l_mg->sline_meta[meta_line];
653 emeta = l_mg->eline_meta[meta_line];
654 smeta_buf = (struct line_smeta *)smeta;
655 spin_unlock(&l_mg->free_lock);
657 /* Order data lines using their sequence number */
658 for (i = 0; i < l_mg->nr_lines; i++) {
661 line = &pblk->lines[i];
663 memset(smeta, 0, lm->smeta_len);
665 line->lun_bitmap = ((void *)(smeta_buf)) +
666 sizeof(struct line_smeta);
668 if (!pblk_line_was_written(line, pblk))
671 /* Lines that cannot be read are assumed as not written here */
672 if (pblk_line_smeta_read(pblk, line))
675 crc = pblk_calc_smeta_crc(pblk, smeta_buf);
676 if (le32_to_cpu(smeta_buf->crc) != crc)
679 if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
682 if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
683 pblk_err(pblk, "found incompatible line version %u\n",
684 smeta_buf->header.version_major);
685 return ERR_PTR(-EINVAL);
688 /* The first valid instance uuid is used for initialization */
690 memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
694 if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
695 pblk_debug(pblk, "ignore line %u due to uuid mismatch\n",
700 /* Update line metadata */
701 spin_lock(&line->lock);
702 line->id = le32_to_cpu(smeta_buf->header.id);
703 line->type = le16_to_cpu(smeta_buf->header.type);
704 line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
705 spin_unlock(&line->lock);
707 /* Update general metadata */
708 spin_lock(&l_mg->free_lock);
709 if (line->seq_nr >= l_mg->d_seq_nr)
710 l_mg->d_seq_nr = line->seq_nr + 1;
711 l_mg->nr_free_lines--;
712 spin_unlock(&l_mg->free_lock);
714 if (pblk_line_recov_alloc(pblk, line))
717 pblk_recov_line_add_ordered(&recov_list, line);
719 pblk_debug(pblk, "recovering data line %d, seq:%llu\n",
720 line->id, smeta_buf->seq_nr);
724 pblk_setup_uuid(pblk);
726 spin_lock(&l_mg->free_lock);
727 WARN_ON_ONCE(!test_and_clear_bit(meta_line,
728 &l_mg->meta_bitmap));
729 spin_unlock(&l_mg->free_lock);
734 /* Verify closed blocks and recover this portion of L2P table*/
735 list_for_each_entry_safe(line, tline, &recov_list, list) {
738 line->emeta_ssec = pblk_line_emeta_start(pblk, line);
740 memset(line->emeta->buf, 0, lm->emeta_len[0]);
742 if (pblk_line_is_open(pblk, line)) {
743 pblk_recov_l2p_from_oob(pblk, line);
747 if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) {
748 pblk_recov_l2p_from_oob(pblk, line);
752 if (pblk_recov_check_emeta(pblk, line->emeta->buf)) {
753 pblk_recov_l2p_from_oob(pblk, line);
757 if (pblk_recov_check_line_version(pblk, line->emeta->buf))
758 return ERR_PTR(-EINVAL);
760 pblk_recov_wa_counters(pblk, line->emeta->buf);
762 if (pblk_recov_l2p_from_emeta(pblk, line))
763 pblk_recov_l2p_from_oob(pblk, line);
766 if (pblk_line_is_full(line)) {
767 struct list_head *move_list;
769 spin_lock(&line->lock);
770 line->state = PBLK_LINESTATE_CLOSED;
771 trace_pblk_line_state(pblk_disk_name(pblk), line->id,
773 move_list = pblk_line_gc_list(pblk, line);
774 spin_unlock(&line->lock);
776 spin_lock(&l_mg->gc_lock);
777 list_move_tail(&line->list, move_list);
778 spin_unlock(&l_mg->gc_lock);
780 mempool_free(line->map_bitmap, l_mg->bitmap_pool);
781 line->map_bitmap = NULL;
785 spin_lock(&line->lock);
786 line->state = PBLK_LINESTATE_OPEN;
787 spin_unlock(&line->lock);
789 line->emeta->mem = 0;
790 atomic_set(&line->emeta->sync, 0);
792 trace_pblk_line_state(pblk_disk_name(pblk), line->id,
796 line->meta_line = meta_line;
803 spin_lock(&l_mg->free_lock);
804 WARN_ON_ONCE(!test_and_clear_bit(meta_line,
805 &l_mg->meta_bitmap));
806 spin_unlock(&l_mg->free_lock);
807 pblk_line_replace_data(pblk);
809 spin_lock(&l_mg->free_lock);
810 /* Allocate next line for preparation */
811 l_mg->data_next = pblk_line_get(pblk);
812 if (l_mg->data_next) {
813 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
814 l_mg->data_next->type = PBLK_LINETYPE_DATA;
817 spin_unlock(&l_mg->free_lock);
821 pblk_line_erase(pblk, l_mg->data_next);
824 if (found_lines != recovered_lines)
825 pblk_err(pblk, "failed to recover all found lines %d/%d\n",
826 found_lines, recovered_lines);
834 int pblk_recov_pad(struct pblk *pblk)
836 struct pblk_line *line;
837 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
841 spin_lock(&l_mg->free_lock);
842 line = l_mg->data_line;
843 left_msecs = line->left_msecs;
844 spin_unlock(&l_mg->free_lock);
846 ret = pblk_recov_pad_line(pblk, line, left_msecs);
848 pblk_err(pblk, "tear down padding failed (%d)\n", ret);
852 pblk_line_close_meta(pblk, line);