Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.13.7.
   1/*
   2 * Copyright (C) 2016 CNEX Labs
   3 * Initial: Javier Gonzalez <javier@cnexlabs.com>
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License version
   7 * 2 as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it will be useful, but
  10 * WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 * General Public License for more details.
  13 *
  14 * pblk-recovery.c - pblk's recovery path
  15 */
  16
  17#include "pblk.h"
  18
  19void pblk_submit_rec(struct work_struct *work)
  20{
  21	struct pblk_rec_ctx *recovery =
  22			container_of(work, struct pblk_rec_ctx, ws_rec);
  23	struct pblk *pblk = recovery->pblk;
  24	struct nvm_rq *rqd = recovery->rqd;
  25	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
  26	struct bio *bio;
  27	unsigned int nr_rec_secs;
  28	unsigned int pgs_read;
  29	int ret;
  30
  31	nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
  32								NVM_MAX_VLBA);
  33
  34	bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
  35
  36	bio->bi_iter.bi_sector = 0;
  37	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
  38	rqd->bio = bio;
  39	rqd->nr_ppas = nr_rec_secs;
  40
  41	pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
  42								nr_rec_secs);
  43	if (pgs_read != nr_rec_secs) {
  44		pr_err("pblk: could not read recovery entries\n");
  45		goto err;
  46	}
  47
  48	if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
  49		pr_err("pblk: could not setup recovery request\n");
  50		goto err;
  51	}
  52
  53#ifdef CONFIG_NVM_DEBUG
  54	atomic_long_add(nr_rec_secs, &pblk->recov_writes);
  55#endif
  56
  57	ret = pblk_submit_io(pblk, rqd);
  58	if (ret) {
  59		pr_err("pblk: I/O submission failed: %d\n", ret);
  60		goto err;
  61	}
  62
  63	mempool_free(recovery, pblk->rec_pool);
  64	return;
  65
  66err:
  67	bio_put(bio);
  68	pblk_free_rqd(pblk, rqd, PBLK_WRITE);
  69}
  70
  71int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
  72			struct pblk_rec_ctx *recovery, u64 *comp_bits,
  73			unsigned int comp)
  74{
  75	struct nvm_rq *rec_rqd;
  76	struct pblk_c_ctx *rec_ctx;
  77	int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
  78
  79	rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
  80	rec_ctx = nvm_rq_to_pdu(rec_rqd);
  81
  82	/* Copy completion bitmap, but exclude the first X completed entries */
  83	bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
  84				(unsigned long int *)comp_bits,
  85				comp, NVM_MAX_VLBA);
  86
  87	/* Save the context for the entries that need to be re-written and
  88	 * update current context with the completed entries.
  89	 */
  90	rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
  91	if (comp >= c_ctx->nr_valid) {
  92		rec_ctx->nr_valid = 0;
  93		rec_ctx->nr_padded = nr_entries - comp;
  94
  95		c_ctx->nr_padded = comp - c_ctx->nr_valid;
  96	} else {
  97		rec_ctx->nr_valid = c_ctx->nr_valid - comp;
  98		rec_ctx->nr_padded = c_ctx->nr_padded;
  99
 100		c_ctx->nr_valid = comp;
 101		c_ctx->nr_padded = 0;
 102	}
 103
 104	recovery->rqd = rec_rqd;
 105	recovery->pblk = pblk;
 106
 107	return 0;
 108}
 109
 110int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
 111{
 112	u32 crc;
 113
 114	crc = pblk_calc_emeta_crc(pblk, emeta_buf);
 115	if (le32_to_cpu(emeta_buf->crc) != crc)
 116		return 1;
 117
 118	if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
 119		return 1;
 120
 121	return 0;
 122}
 123
 124static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 125{
 126	struct nvm_tgt_dev *dev = pblk->dev;
 127	struct nvm_geo *geo = &dev->geo;
 128	struct pblk_line_meta *lm = &pblk->lm;
 129	struct pblk_emeta *emeta = line->emeta;
 130	struct line_emeta *emeta_buf = emeta->buf;
 131	__le64 *lba_list;
 132	u64 data_start, data_end;
 133	u64 nr_valid_lbas, nr_lbas = 0;
 134	u64 i;
 135
 136	lba_list = emeta_to_lbas(pblk, emeta_buf);
 137	if (!lba_list)
 138		return 1;
 139
 140	data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
 141	data_end = line->emeta_ssec;
 142	nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
 143
 144	for (i = data_start; i < data_end; i++) {
 145		struct ppa_addr ppa;
 146		int pos;
 147
 148		ppa = addr_to_gen_ppa(pblk, i, line->id);
 149		pos = pblk_ppa_to_pos(geo, ppa);
 150
 151		/* Do not update bad blocks */
 152		if (test_bit(pos, line->blk_bitmap))
 153			continue;
 154
 155		if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
 156			spin_lock(&line->lock);
 157			if (test_and_set_bit(i, line->invalid_bitmap))
 158				WARN_ONCE(1, "pblk: rec. double invalidate:\n");
 159			else
 160				le32_add_cpu(line->vsc, -1);
 161			spin_unlock(&line->lock);
 162
 163			continue;
 164		}
 165
 166		pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
 167		nr_lbas++;
 168	}
 169
 170	if (nr_valid_lbas != nr_lbas)
 171		pr_err("pblk: line %d - inconsistent lba list(%llu/%llu)\n",
 172				line->id, nr_valid_lbas, nr_lbas);
 173
 174	line->left_msecs = 0;
 175
 176	return 0;
 177}
 178
 179static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
 180{
 181	struct nvm_tgt_dev *dev = pblk->dev;
 182	struct nvm_geo *geo = &dev->geo;
 183	struct pblk_line_meta *lm = &pblk->lm;
 184	int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
 185
 186	return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
 187				nr_bb * geo->clba;
 188}
 189
 190struct pblk_recov_alloc {
 191	struct ppa_addr *ppa_list;
 192	struct pblk_sec_meta *meta_list;
 193	struct nvm_rq *rqd;
 194	void *data;
 195	dma_addr_t dma_ppa_list;
 196	dma_addr_t dma_meta_list;
 197};
 198
 199static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
 200			       struct pblk_recov_alloc p, u64 r_ptr)
 201{
 202	struct nvm_tgt_dev *dev = pblk->dev;
 203	struct nvm_geo *geo = &dev->geo;
 204	struct ppa_addr *ppa_list;
 205	struct pblk_sec_meta *meta_list;
 206	struct nvm_rq *rqd;
 207	struct bio *bio;
 208	void *data;
 209	dma_addr_t dma_ppa_list, dma_meta_list;
 210	u64 r_ptr_int;
 211	int left_ppas;
 212	int rq_ppas, rq_len;
 213	int i, j;
 214	int ret = 0;
 215
 216	ppa_list = p.ppa_list;
 217	meta_list = p.meta_list;
 218	rqd = p.rqd;
 219	data = p.data;
 220	dma_ppa_list = p.dma_ppa_list;
 221	dma_meta_list = p.dma_meta_list;
 222
 223	left_ppas = line->cur_sec - r_ptr;
 224	if (!left_ppas)
 225		return 0;
 226
 227	r_ptr_int = r_ptr;
 228
 229next_read_rq:
 230	memset(rqd, 0, pblk_g_rq_size);
 231
 232	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 233	if (!rq_ppas)
 234		rq_ppas = pblk->min_write_pgs;
 235	rq_len = rq_ppas * geo->csecs;
 236
 237	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
 238	if (IS_ERR(bio))
 239		return PTR_ERR(bio);
 240
 241	bio->bi_iter.bi_sector = 0; /* internal bio */
 242	bio_set_op_attrs(bio, REQ_OP_READ, 0);
 243
 244	rqd->bio = bio;
 245	rqd->opcode = NVM_OP_PREAD;
 246	rqd->meta_list = meta_list;
 247	rqd->nr_ppas = rq_ppas;
 248	rqd->ppa_list = ppa_list;
 249	rqd->dma_ppa_list = dma_ppa_list;
 250	rqd->dma_meta_list = dma_meta_list;
 251
 252	if (pblk_io_aligned(pblk, rq_ppas))
 253		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
 254	else
 255		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 256
 257	for (i = 0; i < rqd->nr_ppas; ) {
 258		struct ppa_addr ppa;
 259		int pos;
 260
 261		ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
 262		pos = pblk_ppa_to_pos(geo, ppa);
 263
 264		while (test_bit(pos, line->blk_bitmap)) {
 265			r_ptr_int += pblk->min_write_pgs;
 266			ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
 267			pos = pblk_ppa_to_pos(geo, ppa);
 268		}
 269
 270		for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
 271			rqd->ppa_list[i] =
 272				addr_to_gen_ppa(pblk, r_ptr_int, line->id);
 273	}
 274
 275	/* If read fails, more padding is needed */
 276	ret = pblk_submit_io_sync(pblk, rqd);
 277	if (ret) {
 278		pr_err("pblk: I/O submission failed: %d\n", ret);
 279		return ret;
 280	}
 281
 282	atomic_dec(&pblk->inflight_io);
 283
 284	/* At this point, the read should not fail. If it does, it is a problem
 285	 * we cannot recover from here. Need FTL log.
 286	 */
 287	if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
 288		pr_err("pblk: L2P recovery failed (%d)\n", rqd->error);
 289		return -EINTR;
 290	}
 291
 292	for (i = 0; i < rqd->nr_ppas; i++) {
 293		u64 lba = le64_to_cpu(meta_list[i].lba);
 294
 295		if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
 296			continue;
 297
 298		pblk_update_map(pblk, lba, rqd->ppa_list[i]);
 299	}
 300
 301	left_ppas -= rq_ppas;
 302	if (left_ppas > 0)
 303		goto next_read_rq;
 304
 305	return 0;
 306}
 307
 308static void pblk_recov_complete(struct kref *ref)
 309{
 310	struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
 311
 312	complete(&pad_rq->wait);
 313}
 314
 315static void pblk_end_io_recov(struct nvm_rq *rqd)
 316{
 317	struct pblk_pad_rq *pad_rq = rqd->private;
 318	struct pblk *pblk = pad_rq->pblk;
 319
 320	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 321
 322	pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
 323
 324	atomic_dec(&pblk->inflight_io);
 325	kref_put(&pad_rq->ref, pblk_recov_complete);
 326}
 327
 328static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 329			      int left_ppas)
 330{
 331	struct nvm_tgt_dev *dev = pblk->dev;
 332	struct nvm_geo *geo = &dev->geo;
 333	struct ppa_addr *ppa_list;
 334	struct pblk_sec_meta *meta_list;
 335	struct pblk_pad_rq *pad_rq;
 336	struct nvm_rq *rqd;
 337	struct bio *bio;
 338	void *data;
 339	dma_addr_t dma_ppa_list, dma_meta_list;
 340	__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
 341	u64 w_ptr = line->cur_sec;
 342	int left_line_ppas, rq_ppas, rq_len;
 343	int i, j;
 344	int ret = 0;
 345
 346	spin_lock(&line->lock);
 347	left_line_ppas = line->left_msecs;
 348	spin_unlock(&line->lock);
 349
 350	pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
 351	if (!pad_rq)
 352		return -ENOMEM;
 353
 354	data = vzalloc(pblk->max_write_pgs * geo->csecs);
 355	if (!data) {
 356		ret = -ENOMEM;
 357		goto free_rq;
 358	}
 359
 360	pad_rq->pblk = pblk;
 361	init_completion(&pad_rq->wait);
 362	kref_init(&pad_rq->ref);
 363
 364next_pad_rq:
 365	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 366	if (rq_ppas < pblk->min_write_pgs) {
 367		pr_err("pblk: corrupted pad line %d\n", line->id);
 368		goto fail_free_pad;
 369	}
 370
 371	rq_len = rq_ppas * geo->csecs;
 372
 373	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
 374	if (!meta_list) {
 375		ret = -ENOMEM;
 376		goto fail_free_pad;
 377	}
 378
 379	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
 380	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
 381
 382	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
 383						PBLK_VMALLOC_META, GFP_KERNEL);
 384	if (IS_ERR(bio)) {
 385		ret = PTR_ERR(bio);
 386		goto fail_free_meta;
 387	}
 388
 389	bio->bi_iter.bi_sector = 0; /* internal bio */
 390	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 391
 392	rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
 393
 394	rqd->bio = bio;
 395	rqd->opcode = NVM_OP_PWRITE;
 396	rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
 397	rqd->meta_list = meta_list;
 398	rqd->nr_ppas = rq_ppas;
 399	rqd->ppa_list = ppa_list;
 400	rqd->dma_ppa_list = dma_ppa_list;
 401	rqd->dma_meta_list = dma_meta_list;
 402	rqd->end_io = pblk_end_io_recov;
 403	rqd->private = pad_rq;
 404
 405	for (i = 0; i < rqd->nr_ppas; ) {
 406		struct ppa_addr ppa;
 407		int pos;
 408
 409		w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
 410		ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
 411		pos = pblk_ppa_to_pos(geo, ppa);
 412
 413		while (test_bit(pos, line->blk_bitmap)) {
 414			w_ptr += pblk->min_write_pgs;
 415			ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
 416			pos = pblk_ppa_to_pos(geo, ppa);
 417		}
 418
 419		for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
 420			struct ppa_addr dev_ppa;
 421			__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
 422
 423			dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
 424
 425			pblk_map_invalidate(pblk, dev_ppa);
 426			lba_list[w_ptr] = meta_list[i].lba = addr_empty;
 427			rqd->ppa_list[i] = dev_ppa;
 428		}
 429	}
 430
 431	kref_get(&pad_rq->ref);
 432	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 433
 434	ret = pblk_submit_io(pblk, rqd);
 435	if (ret) {
 436		pr_err("pblk: I/O submission failed: %d\n", ret);
 437		pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 438		goto fail_free_bio;
 439	}
 440
 441	left_line_ppas -= rq_ppas;
 442	left_ppas -= rq_ppas;
 443	if (left_ppas && left_line_ppas)
 444		goto next_pad_rq;
 445
 446	kref_put(&pad_rq->ref, pblk_recov_complete);
 447
 448	if (!wait_for_completion_io_timeout(&pad_rq->wait,
 449				msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
 450		pr_err("pblk: pad write timed out\n");
 451		ret = -ETIME;
 452	}
 453
 454	if (!pblk_line_is_full(line))
 455		pr_err("pblk: corrupted padded line: %d\n", line->id);
 456
 457	vfree(data);
 458free_rq:
 459	kfree(pad_rq);
 460	return ret;
 461
 462fail_free_bio:
 463	bio_put(bio);
 464fail_free_meta:
 465	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
 466fail_free_pad:
 467	kfree(pad_rq);
 468	vfree(data);
 469	return ret;
 470}
 471
 472/* When this function is called, it means that not all upper pages have been
 473 * written in a page that contains valid data. In order to recover this data, we
 474 * first find the write pointer on the device, then we pad all necessary
 475 * sectors, and finally attempt to read the valid data
 476 */
 477static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
 478				   struct pblk_recov_alloc p)
 479{
 480	struct nvm_tgt_dev *dev = pblk->dev;
 481	struct nvm_geo *geo = &dev->geo;
 482	struct ppa_addr *ppa_list;
 483	struct pblk_sec_meta *meta_list;
 484	struct nvm_rq *rqd;
 485	struct bio *bio;
 486	void *data;
 487	dma_addr_t dma_ppa_list, dma_meta_list;
 488	u64 w_ptr = 0, r_ptr;
 489	int rq_ppas, rq_len;
 490	int i, j;
 491	int ret = 0;
 492	int rec_round;
 493	int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
 494
 495	ppa_list = p.ppa_list;
 496	meta_list = p.meta_list;
 497	rqd = p.rqd;
 498	data = p.data;
 499	dma_ppa_list = p.dma_ppa_list;
 500	dma_meta_list = p.dma_meta_list;
 501
 502	/* we could recover up until the line write pointer */
 503	r_ptr = line->cur_sec;
 504	rec_round = 0;
 505
 506next_rq:
 507	memset(rqd, 0, pblk_g_rq_size);
 508
 509	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 510	if (!rq_ppas)
 511		rq_ppas = pblk->min_write_pgs;
 512	rq_len = rq_ppas * geo->csecs;
 513
 514	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
 515	if (IS_ERR(bio))
 516		return PTR_ERR(bio);
 517
 518	bio->bi_iter.bi_sector = 0; /* internal bio */
 519	bio_set_op_attrs(bio, REQ_OP_READ, 0);
 520
 521	rqd->bio = bio;
 522	rqd->opcode = NVM_OP_PREAD;
 523	rqd->meta_list = meta_list;
 524	rqd->nr_ppas = rq_ppas;
 525	rqd->ppa_list = ppa_list;
 526	rqd->dma_ppa_list = dma_ppa_list;
 527	rqd->dma_meta_list = dma_meta_list;
 528
 529	if (pblk_io_aligned(pblk, rq_ppas))
 530		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
 531	else
 532		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 533
 534	for (i = 0; i < rqd->nr_ppas; ) {
 535		struct ppa_addr ppa;
 536		int pos;
 537
 538		w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
 539		ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
 540		pos = pblk_ppa_to_pos(geo, ppa);
 541
 542		while (test_bit(pos, line->blk_bitmap)) {
 543			w_ptr += pblk->min_write_pgs;
 544			ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
 545			pos = pblk_ppa_to_pos(geo, ppa);
 546		}
 547
 548		for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++)
 549			rqd->ppa_list[i] =
 550				addr_to_gen_ppa(pblk, w_ptr, line->id);
 551	}
 552
 553	ret = pblk_submit_io_sync(pblk, rqd);
 554	if (ret) {
 555		pr_err("pblk: I/O submission failed: %d\n", ret);
 556		return ret;
 557	}
 558
 559	atomic_dec(&pblk->inflight_io);
 560
 561	/* This should not happen since the read failed during normal recovery,
 562	 * but the media works funny sometimes...
 563	 */
 564	if (!rec_round++ && !rqd->error) {
 565		rec_round = 0;
 566		for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) {
 567			u64 lba = le64_to_cpu(meta_list[i].lba);
 568
 569			if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
 570				continue;
 571
 572			pblk_update_map(pblk, lba, rqd->ppa_list[i]);
 573		}
 574	}
 575
 576	/* Reached the end of the written line */
 577	if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
 578		int pad_secs, nr_error_bits, bit;
 579		int ret;
 580
 581		bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
 582		nr_error_bits = rqd->nr_ppas - bit;
 583
 584		/* Roll back failed sectors */
 585		line->cur_sec -= nr_error_bits;
 586		line->left_msecs += nr_error_bits;
 587		bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
 588
 589		pad_secs = pblk_pad_distance(pblk);
 590		if (pad_secs > line->left_msecs)
 591			pad_secs = line->left_msecs;
 592
 593		ret = pblk_recov_pad_oob(pblk, line, pad_secs);
 594		if (ret)
 595			pr_err("pblk: OOB padding failed (err:%d)\n", ret);
 596
 597		ret = pblk_recov_read_oob(pblk, line, p, r_ptr);
 598		if (ret)
 599			pr_err("pblk: OOB read failed (err:%d)\n", ret);
 600
 601		left_ppas = 0;
 602	}
 603
 604	left_ppas -= rq_ppas;
 605	if (left_ppas > 0)
 606		goto next_rq;
 607
 608	return ret;
 609}
 610
 611static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 612			       struct pblk_recov_alloc p, int *done)
 613{
 614	struct nvm_tgt_dev *dev = pblk->dev;
 615	struct nvm_geo *geo = &dev->geo;
 616	struct ppa_addr *ppa_list;
 617	struct pblk_sec_meta *meta_list;
 618	struct nvm_rq *rqd;
 619	struct bio *bio;
 620	void *data;
 621	dma_addr_t dma_ppa_list, dma_meta_list;
 622	u64 paddr;
 623	int rq_ppas, rq_len;
 624	int i, j;
 625	int ret = 0;
 626	int left_ppas = pblk_calc_sec_in_line(pblk, line);
 627
 628	ppa_list = p.ppa_list;
 629	meta_list = p.meta_list;
 630	rqd = p.rqd;
 631	data = p.data;
 632	dma_ppa_list = p.dma_ppa_list;
 633	dma_meta_list = p.dma_meta_list;
 634
 635	*done = 1;
 636
 637next_rq:
 638	memset(rqd, 0, pblk_g_rq_size);
 639
 640	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 641	if (!rq_ppas)
 642		rq_ppas = pblk->min_write_pgs;
 643	rq_len = rq_ppas * geo->csecs;
 644
 645	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
 646	if (IS_ERR(bio))
 647		return PTR_ERR(bio);
 648
 649	bio->bi_iter.bi_sector = 0; /* internal bio */
 650	bio_set_op_attrs(bio, REQ_OP_READ, 0);
 651
 652	rqd->bio = bio;
 653	rqd->opcode = NVM_OP_PREAD;
 654	rqd->meta_list = meta_list;
 655	rqd->nr_ppas = rq_ppas;
 656	rqd->ppa_list = ppa_list;
 657	rqd->dma_ppa_list = dma_ppa_list;
 658	rqd->dma_meta_list = dma_meta_list;
 659
 660	if (pblk_io_aligned(pblk, rq_ppas))
 661		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
 662	else
 663		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 664
 665	for (i = 0; i < rqd->nr_ppas; ) {
 666		struct ppa_addr ppa;
 667		int pos;
 668
 669		paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
 670		ppa = addr_to_gen_ppa(pblk, paddr, line->id);
 671		pos = pblk_ppa_to_pos(geo, ppa);
 672
 673		while (test_bit(pos, line->blk_bitmap)) {
 674			paddr += pblk->min_write_pgs;
 675			ppa = addr_to_gen_ppa(pblk, paddr, line->id);
 676			pos = pblk_ppa_to_pos(geo, ppa);
 677		}
 678
 679		for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++)
 680			rqd->ppa_list[i] =
 681				addr_to_gen_ppa(pblk, paddr, line->id);
 682	}
 683
 684	ret = pblk_submit_io_sync(pblk, rqd);
 685	if (ret) {
 686		pr_err("pblk: I/O submission failed: %d\n", ret);
 687		bio_put(bio);
 688		return ret;
 689	}
 690
 691	atomic_dec(&pblk->inflight_io);
 692
 693	/* Reached the end of the written line */
 694	if (rqd->error) {
 695		int nr_error_bits, bit;
 696
 697		bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
 698		nr_error_bits = rqd->nr_ppas - bit;
 699
 700		/* Roll back failed sectors */
 701		line->cur_sec -= nr_error_bits;
 702		line->left_msecs += nr_error_bits;
 703		bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
 704
 705		left_ppas = 0;
 706		rqd->nr_ppas = bit;
 707
 708		if (rqd->error != NVM_RSP_ERR_EMPTYPAGE)
 709			*done = 0;
 710	}
 711
 712	for (i = 0; i < rqd->nr_ppas; i++) {
 713		u64 lba = le64_to_cpu(meta_list[i].lba);
 714
 715		if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
 716			continue;
 717
 718		pblk_update_map(pblk, lba, rqd->ppa_list[i]);
 719	}
 720
 721	left_ppas -= rq_ppas;
 722	if (left_ppas > 0)
 723		goto next_rq;
 724
 725	return ret;
 726}
 727
 728/* Scan line for lbas on out of bound area */
 729static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
 730{
 731	struct nvm_tgt_dev *dev = pblk->dev;
 732	struct nvm_geo *geo = &dev->geo;
 733	struct nvm_rq *rqd;
 734	struct ppa_addr *ppa_list;
 735	struct pblk_sec_meta *meta_list;
 736	struct pblk_recov_alloc p;
 737	void *data;
 738	dma_addr_t dma_ppa_list, dma_meta_list;
 739	int done, ret = 0;
 740
 741	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
 742	if (!meta_list)
 743		return -ENOMEM;
 744
 745	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
 746	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
 747
 748	data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
 749	if (!data) {
 750		ret = -ENOMEM;
 751		goto free_meta_list;
 752	}
 753
 754	rqd = pblk_alloc_rqd(pblk, PBLK_READ);
 755
 756	p.ppa_list = ppa_list;
 757	p.meta_list = meta_list;
 758	p.rqd = rqd;
 759	p.data = data;
 760	p.dma_ppa_list = dma_ppa_list;
 761	p.dma_meta_list = dma_meta_list;
 762
 763	ret = pblk_recov_scan_oob(pblk, line, p, &done);
 764	if (ret) {
 765		pr_err("pblk: could not recover L2P from OOB\n");
 766		goto out;
 767	}
 768
 769	if (!done) {
 770		ret = pblk_recov_scan_all_oob(pblk, line, p);
 771		if (ret) {
 772			pr_err("pblk: could not recover L2P from OOB\n");
 773			goto out;
 774		}
 775	}
 776
 777	if (pblk_line_is_full(line))
 778		pblk_line_recov_close(pblk, line);
 779
 780out:
 781	kfree(data);
 782free_meta_list:
 783	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
 784
 785	return ret;
 786}
 787
 788/* Insert lines ordered by sequence number (seq_num) on list */
 789static void pblk_recov_line_add_ordered(struct list_head *head,
 790					struct pblk_line *line)
 791{
 792	struct pblk_line *t = NULL;
 793
 794	list_for_each_entry(t, head, list)
 795		if (t->seq_nr > line->seq_nr)
 796			break;
 797
 798	__list_add(&line->list, t->list.prev, &t->list);
 799}
 800
 801static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
 802{
 803	struct nvm_tgt_dev *dev = pblk->dev;
 804	struct nvm_geo *geo = &dev->geo;
 805	struct pblk_line_meta *lm = &pblk->lm;
 806	unsigned int emeta_secs;
 807	u64 emeta_start;
 808	struct ppa_addr ppa;
 809	int pos;
 810
 811	emeta_secs = lm->emeta_sec[0];
 812	emeta_start = lm->sec_per_line;
 813
 814	while (emeta_secs) {
 815		emeta_start--;
 816		ppa = addr_to_gen_ppa(pblk, emeta_start, line->id);
 817		pos = pblk_ppa_to_pos(geo, ppa);
 818		if (!test_bit(pos, line->blk_bitmap))
 819			emeta_secs--;
 820	}
 821
 822	return emeta_start;
 823}
 824
 825static int pblk_recov_check_line_version(struct pblk *pblk,
 826					 struct line_emeta *emeta)
 827{
 828	struct line_header *header = &emeta->header;
 829
 830	if (header->version_major != EMETA_VERSION_MAJOR) {
 831		pr_err("pblk: line major version mismatch: %d, expected: %d\n",
 832		       header->version_major, EMETA_VERSION_MAJOR);
 833		return 1;
 834	}
 835
 836#ifdef NVM_DEBUG
 837	if (header->version_minor > EMETA_VERSION_MINOR)
 838		pr_info("pblk: newer line minor version found: %d\n", line_v);
 839#endif
 840
 841	return 0;
 842}
 843
 844static void pblk_recov_wa_counters(struct pblk *pblk,
 845				   struct line_emeta *emeta)
 846{
 847	struct pblk_line_meta *lm = &pblk->lm;
 848	struct line_header *header = &emeta->header;
 849	struct wa_counters *wa = emeta_to_wa(lm, emeta);
 850
 851	/* WA counters were introduced in emeta version 0.2 */
 852	if (header->version_major > 0 || header->version_minor >= 2) {
 853		u64 user = le64_to_cpu(wa->user);
 854		u64 pad = le64_to_cpu(wa->pad);
 855		u64 gc = le64_to_cpu(wa->gc);
 856
 857		atomic64_set(&pblk->user_wa, user);
 858		atomic64_set(&pblk->pad_wa, pad);
 859		atomic64_set(&pblk->gc_wa, gc);
 860
 861		pblk->user_rst_wa = user;
 862		pblk->pad_rst_wa = pad;
 863		pblk->gc_rst_wa = gc;
 864	}
 865}
 866
 867static int pblk_line_was_written(struct pblk_line *line,
 868			    struct pblk_line_meta *lm)
 869{
 870
 871	int i;
 872	int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE;
 873
 874	for (i = 0; i < lm->blk_per_line; i++) {
 875		if (!(line->chks[i].state & state_mask))
 876			return 1;
 877	}
 878
 879	return 0;
 880}
 881
 882struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 883{
 884	struct pblk_line_meta *lm = &pblk->lm;
 885	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 886	struct pblk_line *line, *tline, *data_line = NULL;
 887	struct pblk_smeta *smeta;
 888	struct pblk_emeta *emeta;
 889	struct line_smeta *smeta_buf;
 890	int found_lines = 0, recovered_lines = 0, open_lines = 0;
 891	int is_next = 0;
 892	int meta_line;
 893	int i, valid_uuid = 0;
 894	LIST_HEAD(recov_list);
 895
 896	/* TODO: Implement FTL snapshot */
 897
 898	/* Scan recovery - takes place when FTL snapshot fails */
 899	spin_lock(&l_mg->free_lock);
 900	meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
 901	set_bit(meta_line, &l_mg->meta_bitmap);
 902	smeta = l_mg->sline_meta[meta_line];
 903	emeta = l_mg->eline_meta[meta_line];
 904	smeta_buf = (struct line_smeta *)smeta;
 905	spin_unlock(&l_mg->free_lock);
 906
 907	/* Order data lines using their sequence number */
 908	for (i = 0; i < l_mg->nr_lines; i++) {
 909		u32 crc;
 910
 911		line = &pblk->lines[i];
 912
 913		memset(smeta, 0, lm->smeta_len);
 914		line->smeta = smeta;
 915		line->lun_bitmap = ((void *)(smeta_buf)) +
 916						sizeof(struct line_smeta);
 917
 918		if (!pblk_line_was_written(line, lm))
 919			continue;
 920
 921		/* Lines that cannot be read are assumed as not written here */
 922		if (pblk_line_read_smeta(pblk, line))
 923			continue;
 924
 925		crc = pblk_calc_smeta_crc(pblk, smeta_buf);
 926		if (le32_to_cpu(smeta_buf->crc) != crc)
 927			continue;
 928
 929		if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
 930			continue;
 931
 932		if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
 933			pr_err("pblk: found incompatible line version %u\n",
 934					smeta_buf->header.version_major);
 935			return ERR_PTR(-EINVAL);
 936		}
 937
 938		/* The first valid instance uuid is used for initialization */
 939		if (!valid_uuid) {
 940			memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
 941			valid_uuid = 1;
 942		}
 943
 944		if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
 945			pr_debug("pblk: ignore line %u due to uuid mismatch\n",
 946					i);
 947			continue;
 948		}
 949
 950		/* Update line metadata */
 951		spin_lock(&line->lock);
 952		line->id = le32_to_cpu(smeta_buf->header.id);
 953		line->type = le16_to_cpu(smeta_buf->header.type);
 954		line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
 955		spin_unlock(&line->lock);
 956
 957		/* Update general metadata */
 958		spin_lock(&l_mg->free_lock);
 959		if (line->seq_nr >= l_mg->d_seq_nr)
 960			l_mg->d_seq_nr = line->seq_nr + 1;
 961		l_mg->nr_free_lines--;
 962		spin_unlock(&l_mg->free_lock);
 963
 964		if (pblk_line_recov_alloc(pblk, line))
 965			goto out;
 966
 967		pblk_recov_line_add_ordered(&recov_list, line);
 968		found_lines++;
 969		pr_debug("pblk: recovering data line %d, seq:%llu\n",
 970						line->id, smeta_buf->seq_nr);
 971	}
 972
 973	if (!found_lines) {
 974		pblk_setup_uuid(pblk);
 975
 976		spin_lock(&l_mg->free_lock);
 977		WARN_ON_ONCE(!test_and_clear_bit(meta_line,
 978							&l_mg->meta_bitmap));
 979		spin_unlock(&l_mg->free_lock);
 980
 981		goto out;
 982	}
 983
 984	/* Verify closed blocks and recover this portion of L2P table*/
 985	list_for_each_entry_safe(line, tline, &recov_list, list) {
 986		recovered_lines++;
 987
 988		line->emeta_ssec = pblk_line_emeta_start(pblk, line);
 989		line->emeta = emeta;
 990		memset(line->emeta->buf, 0, lm->emeta_len[0]);
 991
 992		if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
 993			pblk_recov_l2p_from_oob(pblk, line);
 994			goto next;
 995		}
 996
 997		if (pblk_recov_check_emeta(pblk, line->emeta->buf)) {
 998			pblk_recov_l2p_from_oob(pblk, line);
 999			goto next;
1000		}
1001
1002		if (pblk_recov_check_line_version(pblk, line->emeta->buf))
1003			return ERR_PTR(-EINVAL);
1004
1005		pblk_recov_wa_counters(pblk, line->emeta->buf);
1006
1007		if (pblk_recov_l2p_from_emeta(pblk, line))
1008			pblk_recov_l2p_from_oob(pblk, line);
1009
1010next:
1011		if (pblk_line_is_full(line)) {
1012			struct list_head *move_list;
1013
1014			spin_lock(&line->lock);
1015			line->state = PBLK_LINESTATE_CLOSED;
1016			move_list = pblk_line_gc_list(pblk, line);
1017			spin_unlock(&line->lock);
1018
1019			spin_lock(&l_mg->gc_lock);
1020			list_move_tail(&line->list, move_list);
1021			spin_unlock(&l_mg->gc_lock);
1022
1023			kfree(line->map_bitmap);
1024			line->map_bitmap = NULL;
1025			line->smeta = NULL;
1026			line->emeta = NULL;
1027		} else {
1028			if (open_lines > 1)
1029				pr_err("pblk: failed to recover L2P\n");
1030
1031			open_lines++;
1032			line->meta_line = meta_line;
1033			data_line = line;
1034		}
1035	}
1036
1037	spin_lock(&l_mg->free_lock);
1038	if (!open_lines) {
1039		WARN_ON_ONCE(!test_and_clear_bit(meta_line,
1040							&l_mg->meta_bitmap));
1041		pblk_line_replace_data(pblk);
1042	} else {
1043		/* Allocate next line for preparation */
1044		l_mg->data_next = pblk_line_get(pblk);
1045		if (l_mg->data_next) {
1046			l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1047			l_mg->data_next->type = PBLK_LINETYPE_DATA;
1048			is_next = 1;
1049		}
1050	}
1051	spin_unlock(&l_mg->free_lock);
1052
1053	if (is_next)
1054		pblk_line_erase(pblk, l_mg->data_next);
1055
1056out:
1057	if (found_lines != recovered_lines)
1058		pr_err("pblk: failed to recover all found lines %d/%d\n",
1059						found_lines, recovered_lines);
1060
1061	return data_line;
1062}
1063
1064/*
1065 * Pad current line
1066 */
1067int pblk_recov_pad(struct pblk *pblk)
1068{
1069	struct pblk_line *line;
1070	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1071	int left_msecs;
1072	int ret = 0;
1073
1074	spin_lock(&l_mg->free_lock);
1075	line = l_mg->data_line;
1076	left_msecs = line->left_msecs;
1077	spin_unlock(&l_mg->free_lock);
1078
1079	ret = pblk_recov_pad_oob(pblk, line, left_msecs);
1080	if (ret) {
1081		pr_err("pblk: Tear down padding failed (%d)\n", ret);
1082		return ret;
1083	}
1084
1085	pblk_line_close_meta(pblk, line);
1086	return ret;
1087}