Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.10.11.
  1// SPDX-License-Identifier: GPL-2.0
  2
  3#include "bcachefs.h"
  4#include "btree_cache.h"
  5#include "btree_io.h"
  6#include "btree_journal_iter.h"
  7#include "btree_node_scan.h"
  8#include "btree_update_interior.h"
  9#include "buckets.h"
 10#include "error.h"
 11#include "journal_io.h"
 12#include "recovery_passes.h"
 13
 14#include <linux/kthread.h>
 15#include <linux/sort.h>
 16
 17struct find_btree_nodes_worker {
 18	struct closure		*cl;
 19	struct find_btree_nodes	*f;
 20	struct bch_dev		*ca;
 21};
 22
 23static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
 24{
 25	prt_printf(out, "%s l=%u seq=%u journal_seq=%llu cookie=%llx ",
 26		   bch2_btree_id_str(n->btree_id), n->level, n->seq,
 27		   n->journal_seq, n->cookie);
 28	bch2_bpos_to_text(out, n->min_key);
 29	prt_str(out, "-");
 30	bch2_bpos_to_text(out, n->max_key);
 31
 32	if (n->range_updated)
 33		prt_str(out, " range updated");
 34	if (n->overwritten)
 35		prt_str(out, " overwritten");
 36
 37	for (unsigned i = 0; i < n->nr_ptrs; i++) {
 38		prt_char(out, ' ');
 39		bch2_extent_ptr_to_text(out, c, n->ptrs + i);
 40	}
 41}
 42
 43static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
 44{
 45	printbuf_indent_add(out, 2);
 46	darray_for_each(nodes, i) {
 47		found_btree_node_to_text(out, c, i);
 48		prt_newline(out);
 49	}
 50	printbuf_indent_sub(out, 2);
 51}
 52
 53static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
 54{
 55	struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k);
 56
 57	set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs);
 58	bp->k.p			= f->max_key;
 59	bp->v.seq		= cpu_to_le64(f->cookie);
 60	bp->v.sectors_written	= 0;
 61	bp->v.flags		= 0;
 62	bp->v.sectors_written	= cpu_to_le16(f->sectors_written);
 63	bp->v.min_key		= f->min_key;
 64	SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated);
 65	memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
 66}
 67
 68static inline u64 bkey_journal_seq(struct bkey_s_c k)
 69{
 70	switch (k.k->type) {
 71	case KEY_TYPE_inode_v3:
 72		return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_journal_seq);
 73	default:
 74		return 0;
 75	}
 76}
 77
 78static bool found_btree_node_is_readable(struct btree_trans *trans,
 79					 struct found_btree_node *f)
 80{
 81	struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
 82
 83	found_btree_node_to_key(&tmp.k, f);
 84
 85	struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false);
 86	bool ret = !IS_ERR_OR_NULL(b);
 87	if (!ret)
 88		return ret;
 89
 90	f->sectors_written = b->written;
 91	f->journal_seq = le64_to_cpu(b->data->keys.journal_seq);
 92
 93	struct bkey_s_c k;
 94	struct bkey unpacked;
 95	struct btree_node_iter iter;
 96	for_each_btree_node_key_unpack(b, k, &iter, &unpacked)
 97		f->journal_seq = max(f->journal_seq, bkey_journal_seq(k));
 98
 99	six_unlock_read(&b->c.lock);
100
101	/*
102	 * We might update this node's range; if that happens, we need the node
103	 * to be re-read so the read path can trim keys that are no longer in
104	 * this node
105	 */
106	if (b != btree_node_root(trans->c, b))
107		bch2_btree_node_evict(trans, &tmp.k);
108	return ret;
109}
110
111static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
112{
113	const struct found_btree_node *l = _l;
114	const struct found_btree_node *r = _r;
115
116	return  cmp_int(l->btree_id,	r->btree_id) ?:
117		cmp_int(l->level,	r->level) ?:
118		cmp_int(l->cookie,	r->cookie);
119}
120
121/*
122 * Given two found btree nodes, if their sequence numbers are equal, take the
123 * one that's readable:
124 */
125static int found_btree_node_cmp_time(const struct found_btree_node *l,
126				     const struct found_btree_node *r)
127{
128	return  cmp_int(l->seq, r->seq) ?:
129		cmp_int(l->journal_seq, r->journal_seq);
130}
131
132static int found_btree_node_cmp_pos(const void *_l, const void *_r)
133{
134	const struct found_btree_node *l = _l;
135	const struct found_btree_node *r = _r;
136
137	return  cmp_int(l->btree_id,	r->btree_id) ?:
138	       -cmp_int(l->level,	r->level) ?:
139		bpos_cmp(l->min_key,	r->min_key) ?:
140	       -found_btree_node_cmp_time(l, r);
141}
142
143static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
144				struct bio *bio, struct btree_node *bn, u64 offset)
145{
146	struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
147
148	bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
149	bio->bi_iter.bi_sector	= offset;
150	bch2_bio_map(bio, bn, PAGE_SIZE);
151
152	submit_bio_wait(bio);
153	if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
154			       "IO error in try_read_btree_node() at %llu: %s",
155			       offset, bch2_blk_status_to_str(bio->bi_status)))
156		return;
157
158	if (le64_to_cpu(bn->magic) != bset_magic(c))
159		return;
160
161	if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
162		struct nonce nonce = btree_nonce(&bn->keys, 0);
163		unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
164
165		bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
166	}
167
168	if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
169		return;
170
171	if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
172		return;
173
174	if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX)
175		return;
176
177	rcu_read_lock();
178	struct found_btree_node n = {
179		.btree_id	= BTREE_NODE_ID(bn),
180		.level		= BTREE_NODE_LEVEL(bn),
181		.seq		= BTREE_NODE_SEQ(bn),
182		.cookie		= le64_to_cpu(bn->keys.seq),
183		.min_key	= bn->min_key,
184		.max_key	= bn->max_key,
185		.nr_ptrs	= 1,
186		.ptrs[0].type	= 1 << BCH_EXTENT_ENTRY_ptr,
187		.ptrs[0].offset	= offset,
188		.ptrs[0].dev	= ca->dev_idx,
189		.ptrs[0].gen	= bucket_gen_get(ca, sector_to_bucket(ca, offset)),
190	};
191	rcu_read_unlock();
192
193	if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
194		mutex_lock(&f->lock);
195		if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
196			bch_err(c, "try_read_btree_node() can't handle endian conversion");
197			f->ret = -EINVAL;
198			goto unlock;
199		}
200
201		if (darray_push(&f->nodes, n))
202			f->ret = -ENOMEM;
203unlock:
204		mutex_unlock(&f->lock);
205	}
206}
207
208static int read_btree_nodes_worker(void *p)
209{
210	struct find_btree_nodes_worker *w = p;
211	struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
212	struct bch_dev *ca = w->ca;
213	void *buf = (void *) __get_free_page(GFP_KERNEL);
214	struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
215	unsigned long last_print = jiffies;
216
217	if (!buf || !bio) {
218		bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
219		w->f->ret = -ENOMEM;
220		goto err;
221	}
222
223	for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
224		for (unsigned bucket_offset = 0;
225		     bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
226		     bucket_offset += btree_sectors(c)) {
227			if (time_after(jiffies, last_print + HZ * 30)) {
228				u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
229				u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
230
231				bch_info(ca, "%s: %2u%% done", __func__,
232					 (unsigned) div64_u64(cur_sector * 100, end_sector));
233				last_print = jiffies;
234			}
235
236			u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
237
238			if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
239			    !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
240				continue;
241
242			try_read_btree_node(w->f, ca, bio, buf, sector);
243		}
244err:
245	bio_put(bio);
246	free_page((unsigned long) buf);
247	percpu_ref_get(&ca->io_ref);
248	closure_put(w->cl);
249	kfree(w);
250	return 0;
251}
252
253static int read_btree_nodes(struct find_btree_nodes *f)
254{
255	struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
256	struct closure cl;
257	int ret = 0;
258
259	closure_init_stack(&cl);
260
261	for_each_online_member(c, ca) {
262		if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
263			continue;
264
265		struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
266		struct task_struct *t;
267
268		if (!w) {
269			percpu_ref_put(&ca->io_ref);
270			ret = -ENOMEM;
271			goto err;
272		}
273
274		percpu_ref_get(&ca->io_ref);
275		closure_get(&cl);
276		w->cl		= &cl;
277		w->f		= f;
278		w->ca		= ca;
279
280		t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
281		ret = PTR_ERR_OR_ZERO(t);
282		if (ret) {
283			percpu_ref_put(&ca->io_ref);
284			closure_put(&cl);
285			f->ret = ret;
286			bch_err(c, "error starting kthread: %i", ret);
287			break;
288		}
289	}
290err:
291	closure_sync(&cl);
292	return f->ret ?: ret;
293}
294
295static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
296{
297	while (n + 1 < end &&
298	       found_btree_node_cmp_pos(n, n + 1) > 0) {
299		swap(n[0], n[1]);
300		n++;
301	}
302}
303
304static int handle_overwrites(struct bch_fs *c,
305			     struct found_btree_node *start,
306			     struct found_btree_node *end)
307{
308	struct found_btree_node *n;
309again:
310	for (n = start + 1;
311	     n < end &&
312	     n->btree_id	== start->btree_id &&
313	     n->level		== start->level &&
314	     bpos_lt(n->min_key, start->max_key);
315	     n++)  {
316		int cmp = found_btree_node_cmp_time(start, n);
317
318		if (cmp > 0) {
319			if (bpos_cmp(start->max_key, n->max_key) >= 0)
320				n->overwritten = true;
321			else {
322				n->range_updated = true;
323				n->min_key = bpos_successor(start->max_key);
324				n->range_updated = true;
325				bubble_up(n, end);
326				goto again;
327			}
328		} else if (cmp < 0) {
329			BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
330
331			start->max_key = bpos_predecessor(n->min_key);
332			start->range_updated = true;
333		} else if (n->level) {
334			n->overwritten = true;
335		} else {
336			if (bpos_cmp(start->max_key, n->max_key) >= 0)
337				n->overwritten = true;
338			else {
339				n->range_updated = true;
340				n->min_key = bpos_successor(start->max_key);
341				n->range_updated = true;
342				bubble_up(n, end);
343				goto again;
344			}
345		}
346	}
347
348	return 0;
349}
350
351int bch2_scan_for_btree_nodes(struct bch_fs *c)
352{
353	struct find_btree_nodes *f = &c->found_btree_nodes;
354	struct printbuf buf = PRINTBUF;
355	size_t dst;
356	int ret = 0;
357
358	if (f->nodes.nr)
359		return 0;
360
361	mutex_init(&f->lock);
362
363	ret = read_btree_nodes(f);
364	if (ret)
365		return ret;
366
367	if (!f->nodes.nr) {
368		bch_err(c, "%s: no btree nodes found", __func__);
369		ret = -EINVAL;
370		goto err;
371	}
372
373	if (0 && c->opts.verbose) {
374		printbuf_reset(&buf);
375		prt_printf(&buf, "%s: nodes found:\n", __func__);
376		found_btree_nodes_to_text(&buf, c, f->nodes);
377		bch2_print_string_as_lines(KERN_INFO, buf.buf);
378	}
379
380	sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
381
382	dst = 0;
383	darray_for_each(f->nodes, i) {
384		struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL;
385
386		if (prev &&
387		    prev->cookie == i->cookie) {
388			if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) {
389				bch_err(c, "%s: found too many replicas for btree node", __func__);
390				ret = -EINVAL;
391				goto err;
392			}
393			prev->ptrs[prev->nr_ptrs++] = i->ptrs[0];
394		} else {
395			f->nodes.data[dst++] = *i;
396		}
397	}
398	f->nodes.nr = dst;
399
400	sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
401
402	if (0 && c->opts.verbose) {
403		printbuf_reset(&buf);
404		prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
405		found_btree_nodes_to_text(&buf, c, f->nodes);
406		bch2_print_string_as_lines(KERN_INFO, buf.buf);
407	}
408
409	dst = 0;
410	darray_for_each(f->nodes, i) {
411		if (i->overwritten)
412			continue;
413
414		ret = handle_overwrites(c, i, &darray_top(f->nodes));
415		if (ret)
416			goto err;
417
418		BUG_ON(i->overwritten);
419		f->nodes.data[dst++] = *i;
420	}
421	f->nodes.nr = dst;
422
423	if (c->opts.verbose) {
424		printbuf_reset(&buf);
425		prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
426		found_btree_nodes_to_text(&buf, c, f->nodes);
427		bch2_print_string_as_lines(KERN_INFO, buf.buf);
428	}
429
430	eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
431err:
432	printbuf_exit(&buf);
433	return ret;
434}
435
436static int found_btree_node_range_start_cmp(const void *_l, const void *_r)
437{
438	const struct found_btree_node *l = _l;
439	const struct found_btree_node *r = _r;
440
441	return  cmp_int(l->btree_id,	r->btree_id) ?:
442	       -cmp_int(l->level,	r->level) ?:
443		bpos_cmp(l->max_key,	r->min_key);
444}
445
446#define for_each_found_btree_node_in_range(_f, _search, _idx)				\
447	for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr,		\
448					sizeof((_f)->nodes.data[0]),			\
449					found_btree_node_range_start_cmp, &search);	\
450	     _idx < (_f)->nodes.nr &&							\
451	     (_f)->nodes.data[_idx].btree_id == _search.btree_id &&			\
452	     (_f)->nodes.data[_idx].level == _search.level &&				\
453	     bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key);			\
454	     _idx = eytzinger0_next(_idx, (_f)->nodes.nr))
455
456bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
457{
458	struct find_btree_nodes *f = &c->found_btree_nodes;
459
460	struct found_btree_node search = {
461		.btree_id	= b->c.btree_id,
462		.level		= b->c.level,
463		.min_key	= b->data->min_key,
464		.max_key	= b->key.k.p,
465	};
466
467	for_each_found_btree_node_in_range(f, search, idx)
468		if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data))
469			return true;
470	return false;
471}
472
473bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
474{
475	struct found_btree_node search = {
476		.btree_id	= btree,
477		.level		= 0,
478		.min_key	= POS_MIN,
479		.max_key	= SPOS_MAX,
480	};
481
482	for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx)
483		return true;
484	return false;
485}
486
487int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
488			   unsigned level, struct bpos node_min, struct bpos node_max)
489{
490	if (btree_id_is_alloc(btree))
491		return 0;
492
493	struct find_btree_nodes *f = &c->found_btree_nodes;
494
495	int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
496	if (ret)
497		return ret;
498
499	if (c->opts.verbose) {
500		struct printbuf buf = PRINTBUF;
501
502		prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level);
503		bch2_bpos_to_text(&buf, node_min);
504		prt_str(&buf, " - ");
505		bch2_bpos_to_text(&buf, node_max);
506
507		bch_info(c, "%s(): %s", __func__, buf.buf);
508		printbuf_exit(&buf);
509	}
510
511	struct found_btree_node search = {
512		.btree_id	= btree,
513		.level		= level,
514		.min_key	= node_min,
515		.max_key	= node_max,
516	};
517
518	for_each_found_btree_node_in_range(f, search, idx) {
519		struct found_btree_node n = f->nodes.data[idx];
520
521		n.range_updated |= bpos_lt(n.min_key, node_min);
522		n.min_key = bpos_max(n.min_key, node_min);
523
524		n.range_updated |= bpos_gt(n.max_key, node_max);
525		n.max_key = bpos_min(n.max_key, node_max);
526
527		struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
528
529		found_btree_node_to_key(&tmp.k, &n);
530
531		struct printbuf buf = PRINTBUF;
532		bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
533		bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
534		printbuf_exit(&buf);
535
536		BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0));
537
538		ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
539		if (ret)
540			return ret;
541	}
542
543	return 0;
544}
545
546void bch2_find_btree_nodes_exit(struct find_btree_nodes *f)
547{
548	darray_exit(&f->nodes);
549}