Linux Audio

Check our new training course

Loading...
  1/*
  2 * f2fs extent cache support
  3 *
  4 * Copyright (c) 2015 Motorola Mobility
  5 * Copyright (c) 2015 Samsung Electronics
  6 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
  7 *          Chao Yu <chao2.yu@samsung.com>
  8 *
  9 * This program is free software; you can redistribute it and/or modify
 10 * it under the terms of the GNU General Public License version 2 as
 11 * published by the Free Software Foundation.
 12 */
 13
 14#include <linux/fs.h>
 15#include <linux/f2fs_fs.h>
 16
 17#include "f2fs.h"
 18#include "node.h"
 19#include <trace/events/f2fs.h>
 20
 21static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
 22							unsigned int ofs)
 23{
 24	if (cached_re) {
 25		if (cached_re->ofs <= ofs &&
 26				cached_re->ofs + cached_re->len > ofs) {
 27			return cached_re;
 28		}
 29	}
 30	return NULL;
 31}
 32
 33static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
 34							unsigned int ofs)
 35{
 36	struct rb_node *node = root->rb_node;
 37	struct rb_entry *re;
 38
 39	while (node) {
 40		re = rb_entry(node, struct rb_entry, rb_node);
 41
 42		if (ofs < re->ofs)
 43			node = node->rb_left;
 44		else if (ofs >= re->ofs + re->len)
 45			node = node->rb_right;
 46		else
 47			return re;
 48	}
 49	return NULL;
 50}
 51
 52struct rb_entry *__lookup_rb_tree(struct rb_root *root,
 53				struct rb_entry *cached_re, unsigned int ofs)
 54{
 55	struct rb_entry *re;
 56
 57	re = __lookup_rb_tree_fast(cached_re, ofs);
 58	if (!re)
 59		return __lookup_rb_tree_slow(root, ofs);
 60
 61	return re;
 62}
 63
 64struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
 65				struct rb_root *root, struct rb_node **parent,
 66				unsigned int ofs)
 67{
 68	struct rb_node **p = &root->rb_node;
 69	struct rb_entry *re;
 70
 71	while (*p) {
 72		*parent = *p;
 73		re = rb_entry(*parent, struct rb_entry, rb_node);
 74
 75		if (ofs < re->ofs)
 76			p = &(*p)->rb_left;
 77		else if (ofs >= re->ofs + re->len)
 78			p = &(*p)->rb_right;
 79		else
 80			f2fs_bug_on(sbi, 1);
 81	}
 82
 83	return p;
 84}
 85
 86/*
 87 * lookup rb entry in position of @ofs in rb-tree,
 88 * if hit, return the entry, otherwise, return NULL
 89 * @prev_ex: extent before ofs
 90 * @next_ex: extent after ofs
 91 * @insert_p: insert point for new extent at ofs
 92 * in order to simpfy the insertion after.
 93 * tree must stay unchanged between lookup and insertion.
 94 */
 95struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
 96				struct rb_entry *cached_re,
 97				unsigned int ofs,
 98				struct rb_entry **prev_entry,
 99				struct rb_entry **next_entry,
100				struct rb_node ***insert_p,
101				struct rb_node **insert_parent,
102				bool force)
103{
104	struct rb_node **pnode = &root->rb_node;
105	struct rb_node *parent = NULL, *tmp_node;
106	struct rb_entry *re = cached_re;
107
108	*insert_p = NULL;
109	*insert_parent = NULL;
110	*prev_entry = NULL;
111	*next_entry = NULL;
112
113	if (RB_EMPTY_ROOT(root))
114		return NULL;
115
116	if (re) {
117		if (re->ofs <= ofs && re->ofs + re->len > ofs)
118			goto lookup_neighbors;
119	}
120
121	while (*pnode) {
122		parent = *pnode;
123		re = rb_entry(*pnode, struct rb_entry, rb_node);
124
125		if (ofs < re->ofs)
126			pnode = &(*pnode)->rb_left;
127		else if (ofs >= re->ofs + re->len)
128			pnode = &(*pnode)->rb_right;
129		else
130			goto lookup_neighbors;
131	}
132
133	*insert_p = pnode;
134	*insert_parent = parent;
135
136	re = rb_entry(parent, struct rb_entry, rb_node);
137	tmp_node = parent;
138	if (parent && ofs > re->ofs)
139		tmp_node = rb_next(parent);
140	*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
141
142	tmp_node = parent;
143	if (parent && ofs < re->ofs)
144		tmp_node = rb_prev(parent);
145	*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
146	return NULL;
147
148lookup_neighbors:
149	if (ofs == re->ofs || force) {
150		/* lookup prev node for merging backward later */
151		tmp_node = rb_prev(&re->rb_node);
152		*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
153	}
154	if (ofs == re->ofs + re->len - 1 || force) {
155		/* lookup next node for merging frontward later */
156		tmp_node = rb_next(&re->rb_node);
157		*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
158	}
159	return re;
160}
161
162bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
163						struct rb_root *root)
164{
165#ifdef CONFIG_F2FS_CHECK_FS
166	struct rb_node *cur = rb_first(root), *next;
167	struct rb_entry *cur_re, *next_re;
168
169	if (!cur)
170		return true;
171
172	while (cur) {
173		next = rb_next(cur);
174		if (!next)
175			return true;
176
177		cur_re = rb_entry(cur, struct rb_entry, rb_node);
178		next_re = rb_entry(next, struct rb_entry, rb_node);
179
180		if (cur_re->ofs + cur_re->len > next_re->ofs) {
181			f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, "
182				"cur(%u, %u) next(%u, %u)",
183				cur_re->ofs, cur_re->len,
184				next_re->ofs, next_re->len);
185			return false;
186		}
187
188		cur = next;
189	}
190#endif
191	return true;
192}
193
194static struct kmem_cache *extent_tree_slab;
195static struct kmem_cache *extent_node_slab;
196
197static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
198				struct extent_tree *et, struct extent_info *ei,
199				struct rb_node *parent, struct rb_node **p)
200{
201	struct extent_node *en;
202
203	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
204	if (!en)
205		return NULL;
206
207	en->ei = *ei;
208	INIT_LIST_HEAD(&en->list);
209	en->et = et;
210
211	rb_link_node(&en->rb_node, parent, p);
212	rb_insert_color(&en->rb_node, &et->root);
213	atomic_inc(&et->node_cnt);
214	atomic_inc(&sbi->total_ext_node);
215	return en;
216}
217
218static void __detach_extent_node(struct f2fs_sb_info *sbi,
219				struct extent_tree *et, struct extent_node *en)
220{
221	rb_erase(&en->rb_node, &et->root);
222	atomic_dec(&et->node_cnt);
223	atomic_dec(&sbi->total_ext_node);
224
225	if (et->cached_en == en)
226		et->cached_en = NULL;
227	kmem_cache_free(extent_node_slab, en);
228}
229
230/*
231 * Flow to release an extent_node:
232 * 1. list_del_init
233 * 2. __detach_extent_node
234 * 3. kmem_cache_free.
235 */
236static void __release_extent_node(struct f2fs_sb_info *sbi,
237			struct extent_tree *et, struct extent_node *en)
238{
239	spin_lock(&sbi->extent_lock);
240	f2fs_bug_on(sbi, list_empty(&en->list));
241	list_del_init(&en->list);
242	spin_unlock(&sbi->extent_lock);
243
244	__detach_extent_node(sbi, et, en);
245}
246
247static struct extent_tree *__grab_extent_tree(struct inode *inode)
248{
249	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
250	struct extent_tree *et;
251	nid_t ino = inode->i_ino;
252
253	mutex_lock(&sbi->extent_tree_lock);
254	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
255	if (!et) {
256		et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
257		f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
258		memset(et, 0, sizeof(struct extent_tree));
259		et->ino = ino;
260		et->root = RB_ROOT;
261		et->cached_en = NULL;
262		rwlock_init(&et->lock);
263		INIT_LIST_HEAD(&et->list);
264		atomic_set(&et->node_cnt, 0);
265		atomic_inc(&sbi->total_ext_tree);
266	} else {
267		atomic_dec(&sbi->total_zombie_tree);
268		list_del_init(&et->list);
269	}
270	mutex_unlock(&sbi->extent_tree_lock);
271
272	/* never died until evict_inode */
273	F2FS_I(inode)->extent_tree = et;
274
275	return et;
276}
277
278static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
279				struct extent_tree *et, struct extent_info *ei)
280{
281	struct rb_node **p = &et->root.rb_node;
282	struct extent_node *en;
283
284	en = __attach_extent_node(sbi, et, ei, NULL, p);
285	if (!en)
286		return NULL;
287
288	et->largest = en->ei;
289	et->cached_en = en;
290	return en;
291}
292
293static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
294					struct extent_tree *et)
295{
296	struct rb_node *node, *next;
297	struct extent_node *en;
298	unsigned int count = atomic_read(&et->node_cnt);
299
300	node = rb_first(&et->root);
301	while (node) {
302		next = rb_next(node);
303		en = rb_entry(node, struct extent_node, rb_node);
304		__release_extent_node(sbi, et, en);
305		node = next;
306	}
307
308	return count - atomic_read(&et->node_cnt);
309}
310
311static void __drop_largest_extent(struct inode *inode,
312					pgoff_t fofs, unsigned int len)
313{
314	struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
315
316	if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
317		largest->len = 0;
318		f2fs_mark_inode_dirty_sync(inode, true);
319	}
320}
321
322/* return true, if inode page is changed */
323static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
324{
325	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
326	struct extent_tree *et;
327	struct extent_node *en;
328	struct extent_info ei;
329
330	if (!f2fs_may_extent_tree(inode)) {
331		/* drop largest extent */
332		if (i_ext && i_ext->len) {
333			i_ext->len = 0;
334			return true;
335		}
336		return false;
337	}
338
339	et = __grab_extent_tree(inode);
340
341	if (!i_ext || !i_ext->len)
342		return false;
343
344	get_extent_info(&ei, i_ext);
345
346	write_lock(&et->lock);
347	if (atomic_read(&et->node_cnt))
348		goto out;
349
350	en = __init_extent_tree(sbi, et, &ei);
351	if (en) {
352		spin_lock(&sbi->extent_lock);
353		list_add_tail(&en->list, &sbi->extent_list);
354		spin_unlock(&sbi->extent_lock);
355	}
356out:
357	write_unlock(&et->lock);
358	return false;
359}
360
361bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
362{
363	bool ret =  __f2fs_init_extent_tree(inode, i_ext);
364
365	if (!F2FS_I(inode)->extent_tree)
366		set_inode_flag(inode, FI_NO_EXTENT);
367
368	return ret;
369}
370
371static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
372							struct extent_info *ei)
373{
374	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
375	struct extent_tree *et = F2FS_I(inode)->extent_tree;
376	struct extent_node *en;
377	bool ret = false;
378
379	f2fs_bug_on(sbi, !et);
380
381	trace_f2fs_lookup_extent_tree_start(inode, pgofs);
382
383	read_lock(&et->lock);
384
385	if (et->largest.fofs <= pgofs &&
386			et->largest.fofs + et->largest.len > pgofs) {
387		*ei = et->largest;
388		ret = true;
389		stat_inc_largest_node_hit(sbi);
390		goto out;
391	}
392
393	en = (struct extent_node *)__lookup_rb_tree(&et->root,
394				(struct rb_entry *)et->cached_en, pgofs);
395	if (!en)
396		goto out;
397
398	if (en == et->cached_en)
399		stat_inc_cached_node_hit(sbi);
400	else
401		stat_inc_rbtree_node_hit(sbi);
402
403	*ei = en->ei;
404	spin_lock(&sbi->extent_lock);
405	if (!list_empty(&en->list)) {
406		list_move_tail(&en->list, &sbi->extent_list);
407		et->cached_en = en;
408	}
409	spin_unlock(&sbi->extent_lock);
410	ret = true;
411out:
412	stat_inc_total_hit(sbi);
413	read_unlock(&et->lock);
414
415	trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
416	return ret;
417}
418
419static struct extent_node *__try_merge_extent_node(struct inode *inode,
420				struct extent_tree *et, struct extent_info *ei,
421				struct extent_node *prev_ex,
422				struct extent_node *next_ex)
423{
424	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
425	struct extent_node *en = NULL;
426
427	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
428		prev_ex->ei.len += ei->len;
429		ei = &prev_ex->ei;
430		en = prev_ex;
431	}
432
433	if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
434		next_ex->ei.fofs = ei->fofs;
435		next_ex->ei.blk = ei->blk;
436		next_ex->ei.len += ei->len;
437		if (en)
438			__release_extent_node(sbi, et, prev_ex);
439
440		en = next_ex;
441	}
442
443	if (!en)
444		return NULL;
445
446	__try_update_largest_extent(inode, et, en);
447
448	spin_lock(&sbi->extent_lock);
449	if (!list_empty(&en->list)) {
450		list_move_tail(&en->list, &sbi->extent_list);
451		et->cached_en = en;
452	}
453	spin_unlock(&sbi->extent_lock);
454	return en;
455}
456
457static struct extent_node *__insert_extent_tree(struct inode *inode,
458				struct extent_tree *et, struct extent_info *ei,
459				struct rb_node **insert_p,
460				struct rb_node *insert_parent)
461{
462	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
463	struct rb_node **p;
464	struct rb_node *parent = NULL;
465	struct extent_node *en = NULL;
466
467	if (insert_p && insert_parent) {
468		parent = insert_parent;
469		p = insert_p;
470		goto do_insert;
471	}
472
473	p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
474do_insert:
475	en = __attach_extent_node(sbi, et, ei, parent, p);
476	if (!en)
477		return NULL;
478
479	__try_update_largest_extent(inode, et, en);
480
481	/* update in global extent list */
482	spin_lock(&sbi->extent_lock);
483	list_add_tail(&en->list, &sbi->extent_list);
484	et->cached_en = en;
485	spin_unlock(&sbi->extent_lock);
486	return en;
487}
488
489static void f2fs_update_extent_tree_range(struct inode *inode,
490				pgoff_t fofs, block_t blkaddr, unsigned int len)
491{
492	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
493	struct extent_tree *et = F2FS_I(inode)->extent_tree;
494	struct extent_node *en = NULL, *en1 = NULL;
495	struct extent_node *prev_en = NULL, *next_en = NULL;
496	struct extent_info ei, dei, prev;
497	struct rb_node **insert_p = NULL, *insert_parent = NULL;
498	unsigned int end = fofs + len;
499	unsigned int pos = (unsigned int)fofs;
500
501	if (!et)
502		return;
503
504	trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
505
506	write_lock(&et->lock);
507
508	if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
509		write_unlock(&et->lock);
510		return;
511	}
512
513	prev = et->largest;
514	dei.len = 0;
515
516	/*
517	 * drop largest extent before lookup, in case it's already
518	 * been shrunk from extent tree
519	 */
520	__drop_largest_extent(inode, fofs, len);
521
522	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
523	en = (struct extent_node *)__lookup_rb_tree_ret(&et->root,
524					(struct rb_entry *)et->cached_en, fofs,
525					(struct rb_entry **)&prev_en,
526					(struct rb_entry **)&next_en,
527					&insert_p, &insert_parent, false);
528	if (!en)
529		en = next_en;
530
531	/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
532	while (en && en->ei.fofs < end) {
533		unsigned int org_end;
534		int parts = 0;	/* # of parts current extent split into */
535
536		next_en = en1 = NULL;
537
538		dei = en->ei;
539		org_end = dei.fofs + dei.len;
540		f2fs_bug_on(sbi, pos >= org_end);
541
542		if (pos > dei.fofs &&	pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
543			en->ei.len = pos - en->ei.fofs;
544			prev_en = en;
545			parts = 1;
546		}
547
548		if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
549			if (parts) {
550				set_extent_info(&ei, end,
551						end - dei.fofs + dei.blk,
552						org_end - end);
553				en1 = __insert_extent_tree(inode, et, &ei,
554							NULL, NULL);
555				next_en = en1;
556			} else {
557				en->ei.fofs = end;
558				en->ei.blk += end - dei.fofs;
559				en->ei.len -= end - dei.fofs;
560				next_en = en;
561			}
562			parts++;
563		}
564
565		if (!next_en) {
566			struct rb_node *node = rb_next(&en->rb_node);
567
568			next_en = rb_entry_safe(node, struct extent_node,
569						rb_node);
570		}
571
572		if (parts)
573			__try_update_largest_extent(inode, et, en);
574		else
575			__release_extent_node(sbi, et, en);
576
577		/*
578		 * if original extent is split into zero or two parts, extent
579		 * tree has been altered by deletion or insertion, therefore
580		 * invalidate pointers regard to tree.
581		 */
582		if (parts != 1) {
583			insert_p = NULL;
584			insert_parent = NULL;
585		}
586		en = next_en;
587	}
588
589	/* 3. update extent in extent cache */
590	if (blkaddr) {
591
592		set_extent_info(&ei, fofs, blkaddr, len);
593		if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en))
594			__insert_extent_tree(inode, et, &ei,
595						insert_p, insert_parent);
596
597		/* give up extent_cache, if split and small updates happen */
598		if (dei.len >= 1 &&
599				prev.len < F2FS_MIN_EXTENT_LEN &&
600				et->largest.len < F2FS_MIN_EXTENT_LEN) {
601			__drop_largest_extent(inode, 0, UINT_MAX);
602			set_inode_flag(inode, FI_NO_EXTENT);
603		}
604	}
605
606	if (is_inode_flag_set(inode, FI_NO_EXTENT))
607		__free_extent_tree(sbi, et);
608
609	write_unlock(&et->lock);
610}
611
612unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
613{
614	struct extent_tree *et, *next;
615	struct extent_node *en;
616	unsigned int node_cnt = 0, tree_cnt = 0;
617	int remained;
618
619	if (!test_opt(sbi, EXTENT_CACHE))
620		return 0;
621
622	if (!atomic_read(&sbi->total_zombie_tree))
623		goto free_node;
624
625	if (!mutex_trylock(&sbi->extent_tree_lock))
626		goto out;
627
628	/* 1. remove unreferenced extent tree */
629	list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
630		if (atomic_read(&et->node_cnt)) {
631			write_lock(&et->lock);
632			node_cnt += __free_extent_tree(sbi, et);
633			write_unlock(&et->lock);
634		}
635		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
636		list_del_init(&et->list);
637		radix_tree_delete(&sbi->extent_tree_root, et->ino);
638		kmem_cache_free(extent_tree_slab, et);
639		atomic_dec(&sbi->total_ext_tree);
640		atomic_dec(&sbi->total_zombie_tree);
641		tree_cnt++;
642
643		if (node_cnt + tree_cnt >= nr_shrink)
644			goto unlock_out;
645		cond_resched();
646	}
647	mutex_unlock(&sbi->extent_tree_lock);
648
649free_node:
650	/* 2. remove LRU extent entries */
651	if (!mutex_trylock(&sbi->extent_tree_lock))
652		goto out;
653
654	remained = nr_shrink - (node_cnt + tree_cnt);
655
656	spin_lock(&sbi->extent_lock);
657	for (; remained > 0; remained--) {
658		if (list_empty(&sbi->extent_list))
659			break;
660		en = list_first_entry(&sbi->extent_list,
661					struct extent_node, list);
662		et = en->et;
663		if (!write_trylock(&et->lock)) {
664			/* refresh this extent node's position in extent list */
665			list_move_tail(&en->list, &sbi->extent_list);
666			continue;
667		}
668
669		list_del_init(&en->list);
670		spin_unlock(&sbi->extent_lock);
671
672		__detach_extent_node(sbi, et, en);
673
674		write_unlock(&et->lock);
675		node_cnt++;
676		spin_lock(&sbi->extent_lock);
677	}
678	spin_unlock(&sbi->extent_lock);
679
680unlock_out:
681	mutex_unlock(&sbi->extent_tree_lock);
682out:
683	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
684
685	return node_cnt + tree_cnt;
686}
687
688unsigned int f2fs_destroy_extent_node(struct inode *inode)
689{
690	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
691	struct extent_tree *et = F2FS_I(inode)->extent_tree;
692	unsigned int node_cnt = 0;
693
694	if (!et || !atomic_read(&et->node_cnt))
695		return 0;
696
697	write_lock(&et->lock);
698	node_cnt = __free_extent_tree(sbi, et);
699	write_unlock(&et->lock);
700
701	return node_cnt;
702}
703
704void f2fs_drop_extent_tree(struct inode *inode)
705{
706	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
707	struct extent_tree *et = F2FS_I(inode)->extent_tree;
708
709	if (!f2fs_may_extent_tree(inode))
710		return;
711
712	set_inode_flag(inode, FI_NO_EXTENT);
713
714	write_lock(&et->lock);
715	__free_extent_tree(sbi, et);
716	__drop_largest_extent(inode, 0, UINT_MAX);
717	write_unlock(&et->lock);
718}
719
720void f2fs_destroy_extent_tree(struct inode *inode)
721{
722	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
723	struct extent_tree *et = F2FS_I(inode)->extent_tree;
724	unsigned int node_cnt = 0;
725
726	if (!et)
727		return;
728
729	if (inode->i_nlink && !is_bad_inode(inode) &&
730					atomic_read(&et->node_cnt)) {
731		mutex_lock(&sbi->extent_tree_lock);
732		list_add_tail(&et->list, &sbi->zombie_list);
733		atomic_inc(&sbi->total_zombie_tree);
734		mutex_unlock(&sbi->extent_tree_lock);
735		return;
736	}
737
738	/* free all extent info belong to this extent tree */
739	node_cnt = f2fs_destroy_extent_node(inode);
740
741	/* delete extent tree entry in radix tree */
742	mutex_lock(&sbi->extent_tree_lock);
743	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
744	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
745	kmem_cache_free(extent_tree_slab, et);
746	atomic_dec(&sbi->total_ext_tree);
747	mutex_unlock(&sbi->extent_tree_lock);
748
749	F2FS_I(inode)->extent_tree = NULL;
750
751	trace_f2fs_destroy_extent_tree(inode, node_cnt);
752}
753
754bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
755					struct extent_info *ei)
756{
757	if (!f2fs_may_extent_tree(inode))
758		return false;
759
760	return f2fs_lookup_extent_tree(inode, pgofs, ei);
761}
762
763void f2fs_update_extent_cache(struct dnode_of_data *dn)
764{
765	pgoff_t fofs;
766	block_t blkaddr;
767
768	if (!f2fs_may_extent_tree(dn->inode))
769		return;
770
771	if (dn->data_blkaddr == NEW_ADDR)
772		blkaddr = NULL_ADDR;
773	else
774		blkaddr = dn->data_blkaddr;
775
776	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
777								dn->ofs_in_node;
778	f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1);
779}
780
781void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
782				pgoff_t fofs, block_t blkaddr, unsigned int len)
783
784{
785	if (!f2fs_may_extent_tree(dn->inode))
786		return;
787
788	f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len);
789}
790
791void init_extent_cache_info(struct f2fs_sb_info *sbi)
792{
793	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
794	mutex_init(&sbi->extent_tree_lock);
795	INIT_LIST_HEAD(&sbi->extent_list);
796	spin_lock_init(&sbi->extent_lock);
797	atomic_set(&sbi->total_ext_tree, 0);
798	INIT_LIST_HEAD(&sbi->zombie_list);
799	atomic_set(&sbi->total_zombie_tree, 0);
800	atomic_set(&sbi->total_ext_node, 0);
801}
802
803int __init create_extent_cache(void)
804{
805	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
806			sizeof(struct extent_tree));
807	if (!extent_tree_slab)
808		return -ENOMEM;
809	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
810			sizeof(struct extent_node));
811	if (!extent_node_slab) {
812		kmem_cache_destroy(extent_tree_slab);
813		return -ENOMEM;
814	}
815	return 0;
816}
817
818void destroy_extent_cache(void)
819{
820	kmem_cache_destroy(extent_node_slab);
821	kmem_cache_destroy(extent_tree_slab);
822}
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * f2fs extent cache support
   4 *
   5 * Copyright (c) 2015 Motorola Mobility
   6 * Copyright (c) 2015 Samsung Electronics
   7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
   8 *          Chao Yu <chao2.yu@samsung.com>
   9 *
  10 * block_age-based extent cache added by:
  11 * Copyright (c) 2022 xiaomi Co., Ltd.
  12 *             http://www.xiaomi.com/
  13 */
  14
  15#include <linux/fs.h>
  16#include <linux/f2fs_fs.h>
  17
  18#include "f2fs.h"
  19#include "node.h"
  20#include <trace/events/f2fs.h>
  21
  22bool sanity_check_extent_cache(struct inode *inode)
  23{
  24	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  25	struct f2fs_inode_info *fi = F2FS_I(inode);
  26	struct extent_tree *et = fi->extent_tree[EX_READ];
  27	struct extent_info *ei;
  28
  29	if (!et)
  30		return true;
  31
  32	ei = &et->largest;
  33	if (!ei->len)
  34		return true;
  35
  36	/* Let's drop, if checkpoint got corrupted. */
  37	if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) {
  38		ei->len = 0;
  39		et->largest_updated = true;
  40		return true;
  41	}
  42
  43	if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
  44	    !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
  45					DATA_GENERIC_ENHANCE)) {
  46		set_sbi_flag(sbi, SBI_NEED_FSCK);
  47		f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
  48			  __func__, inode->i_ino,
  49			  ei->blk, ei->fofs, ei->len);
  50		return false;
  51	}
  52	return true;
  53}
  54
  55static void __set_extent_info(struct extent_info *ei,
  56				unsigned int fofs, unsigned int len,
  57				block_t blk, bool keep_clen,
  58				unsigned long age, unsigned long last_blocks,
  59				enum extent_type type)
  60{
  61	ei->fofs = fofs;
  62	ei->len = len;
  63
  64	if (type == EX_READ) {
  65		ei->blk = blk;
  66		if (keep_clen)
  67			return;
  68#ifdef CONFIG_F2FS_FS_COMPRESSION
  69		ei->c_len = 0;
  70#endif
  71	} else if (type == EX_BLOCK_AGE) {
  72		ei->age = age;
  73		ei->last_blocks = last_blocks;
  74	}
  75}
  76
  77static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
  78{
  79	if (type == EX_READ)
  80		return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
  81			S_ISREG(inode->i_mode);
  82	if (type == EX_BLOCK_AGE)
  83		return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
  84			(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
  85	return false;
  86}
  87
  88static bool __may_extent_tree(struct inode *inode, enum extent_type type)
  89{
  90	/*
  91	 * for recovered files during mount do not create extents
  92	 * if shrinker is not registered.
  93	 */
  94	if (list_empty(&F2FS_I_SB(inode)->s_list))
  95		return false;
  96
  97	if (!__init_may_extent_tree(inode, type))
  98		return false;
  99
 100	if (type == EX_READ) {
 101		if (is_inode_flag_set(inode, FI_NO_EXTENT))
 102			return false;
 103		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
 104				 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
 105			return false;
 106	} else if (type == EX_BLOCK_AGE) {
 107		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
 108			return false;
 109		if (file_is_cold(inode))
 110			return false;
 111	}
 112	return true;
 113}
 114
 115static void __try_update_largest_extent(struct extent_tree *et,
 116						struct extent_node *en)
 117{
 118	if (et->type != EX_READ)
 119		return;
 120	if (en->ei.len <= et->largest.len)
 121		return;
 122
 123	et->largest = en->ei;
 124	et->largest_updated = true;
 125}
 126
 127static bool __is_extent_mergeable(struct extent_info *back,
 128		struct extent_info *front, enum extent_type type)
 129{
 130	if (type == EX_READ) {
 131#ifdef CONFIG_F2FS_FS_COMPRESSION
 132		if (back->c_len && back->len != back->c_len)
 133			return false;
 134		if (front->c_len && front->len != front->c_len)
 135			return false;
 136#endif
 137		return (back->fofs + back->len == front->fofs &&
 138				back->blk + back->len == front->blk);
 139	} else if (type == EX_BLOCK_AGE) {
 140		return (back->fofs + back->len == front->fofs &&
 141			abs(back->age - front->age) <= SAME_AGE_REGION &&
 142			abs(back->last_blocks - front->last_blocks) <=
 143							SAME_AGE_REGION);
 144	}
 145	return false;
 146}
 147
 148static bool __is_back_mergeable(struct extent_info *cur,
 149		struct extent_info *back, enum extent_type type)
 150{
 151	return __is_extent_mergeable(back, cur, type);
 152}
 153
 154static bool __is_front_mergeable(struct extent_info *cur,
 155		struct extent_info *front, enum extent_type type)
 156{
 157	return __is_extent_mergeable(cur, front, type);
 158}
 159
 160static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
 161			struct extent_node *cached_en, unsigned int fofs)
 162{
 163	struct rb_node *node = root->rb_root.rb_node;
 164	struct extent_node *en;
 165
 166	/* check a cached entry */
 167	if (cached_en && cached_en->ei.fofs <= fofs &&
 168			cached_en->ei.fofs + cached_en->ei.len > fofs)
 169		return cached_en;
 170
 171	/* check rb_tree */
 172	while (node) {
 173		en = rb_entry(node, struct extent_node, rb_node);
 174
 175		if (fofs < en->ei.fofs)
 176			node = node->rb_left;
 177		else if (fofs >= en->ei.fofs + en->ei.len)
 178			node = node->rb_right;
 179		else
 180			return en;
 181	}
 182	return NULL;
 183}
 184
 185/*
 186 * lookup rb entry in position of @fofs in rb-tree,
 187 * if hit, return the entry, otherwise, return NULL
 188 * @prev_ex: extent before fofs
 189 * @next_ex: extent after fofs
 190 * @insert_p: insert point for new extent at fofs
 191 * in order to simplify the insertion after.
 192 * tree must stay unchanged between lookup and insertion.
 193 */
 194static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
 195				struct extent_node *cached_en,
 196				unsigned int fofs,
 197				struct extent_node **prev_entry,
 198				struct extent_node **next_entry,
 199				struct rb_node ***insert_p,
 200				struct rb_node **insert_parent,
 201				bool *leftmost)
 202{
 203	struct rb_node **pnode = &root->rb_root.rb_node;
 204	struct rb_node *parent = NULL, *tmp_node;
 205	struct extent_node *en = cached_en;
 206
 207	*insert_p = NULL;
 208	*insert_parent = NULL;
 209	*prev_entry = NULL;
 210	*next_entry = NULL;
 211
 212	if (RB_EMPTY_ROOT(&root->rb_root))
 213		return NULL;
 214
 215	if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
 216		goto lookup_neighbors;
 217
 218	*leftmost = true;
 219
 220	while (*pnode) {
 221		parent = *pnode;
 222		en = rb_entry(*pnode, struct extent_node, rb_node);
 223
 224		if (fofs < en->ei.fofs) {
 225			pnode = &(*pnode)->rb_left;
 226		} else if (fofs >= en->ei.fofs + en->ei.len) {
 227			pnode = &(*pnode)->rb_right;
 228			*leftmost = false;
 229		} else {
 230			goto lookup_neighbors;
 231		}
 232	}
 233
 234	*insert_p = pnode;
 235	*insert_parent = parent;
 236
 237	en = rb_entry(parent, struct extent_node, rb_node);
 238	tmp_node = parent;
 239	if (parent && fofs > en->ei.fofs)
 240		tmp_node = rb_next(parent);
 241	*next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 242
 243	tmp_node = parent;
 244	if (parent && fofs < en->ei.fofs)
 245		tmp_node = rb_prev(parent);
 246	*prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 247	return NULL;
 248
 249lookup_neighbors:
 250	if (fofs == en->ei.fofs) {
 251		/* lookup prev node for merging backward later */
 252		tmp_node = rb_prev(&en->rb_node);
 253		*prev_entry = rb_entry_safe(tmp_node,
 254					struct extent_node, rb_node);
 255	}
 256	if (fofs == en->ei.fofs + en->ei.len - 1) {
 257		/* lookup next node for merging frontward later */
 258		tmp_node = rb_next(&en->rb_node);
 259		*next_entry = rb_entry_safe(tmp_node,
 260					struct extent_node, rb_node);
 261	}
 262	return en;
 263}
 264
 265static struct kmem_cache *extent_tree_slab;
 266static struct kmem_cache *extent_node_slab;
 267
 268static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 269				struct extent_tree *et, struct extent_info *ei,
 270				struct rb_node *parent, struct rb_node **p,
 271				bool leftmost)
 272{
 273	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 274	struct extent_node *en;
 275
 276	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
 277	if (!en)
 278		return NULL;
 279
 280	en->ei = *ei;
 281	INIT_LIST_HEAD(&en->list);
 282	en->et = et;
 283
 284	rb_link_node(&en->rb_node, parent, p);
 285	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
 286	atomic_inc(&et->node_cnt);
 287	atomic_inc(&eti->total_ext_node);
 288	return en;
 289}
 290
 291static void __detach_extent_node(struct f2fs_sb_info *sbi,
 292				struct extent_tree *et, struct extent_node *en)
 293{
 294	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 295
 296	rb_erase_cached(&en->rb_node, &et->root);
 297	atomic_dec(&et->node_cnt);
 298	atomic_dec(&eti->total_ext_node);
 299
 300	if (et->cached_en == en)
 301		et->cached_en = NULL;
 302	kmem_cache_free(extent_node_slab, en);
 303}
 304
 305/*
 306 * Flow to release an extent_node:
 307 * 1. list_del_init
 308 * 2. __detach_extent_node
 309 * 3. kmem_cache_free.
 310 */
 311static void __release_extent_node(struct f2fs_sb_info *sbi,
 312			struct extent_tree *et, struct extent_node *en)
 313{
 314	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 315
 316	spin_lock(&eti->extent_lock);
 317	f2fs_bug_on(sbi, list_empty(&en->list));
 318	list_del_init(&en->list);
 319	spin_unlock(&eti->extent_lock);
 320
 321	__detach_extent_node(sbi, et, en);
 322}
 323
 324static struct extent_tree *__grab_extent_tree(struct inode *inode,
 325						enum extent_type type)
 326{
 327	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 328	struct extent_tree_info *eti = &sbi->extent_tree[type];
 329	struct extent_tree *et;
 330	nid_t ino = inode->i_ino;
 331
 332	mutex_lock(&eti->extent_tree_lock);
 333	et = radix_tree_lookup(&eti->extent_tree_root, ino);
 334	if (!et) {
 335		et = f2fs_kmem_cache_alloc(extent_tree_slab,
 336					GFP_NOFS, true, NULL);
 337		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
 338		memset(et, 0, sizeof(struct extent_tree));
 339		et->ino = ino;
 340		et->type = type;
 341		et->root = RB_ROOT_CACHED;
 342		et->cached_en = NULL;
 343		rwlock_init(&et->lock);
 344		INIT_LIST_HEAD(&et->list);
 345		atomic_set(&et->node_cnt, 0);
 346		atomic_inc(&eti->total_ext_tree);
 347	} else {
 348		atomic_dec(&eti->total_zombie_tree);
 349		list_del_init(&et->list);
 350	}
 351	mutex_unlock(&eti->extent_tree_lock);
 352
 353	/* never died until evict_inode */
 354	F2FS_I(inode)->extent_tree[type] = et;
 355
 356	return et;
 357}
 358
 359static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
 360					struct extent_tree *et)
 361{
 362	struct rb_node *node, *next;
 363	struct extent_node *en;
 364	unsigned int count = atomic_read(&et->node_cnt);
 365
 366	node = rb_first_cached(&et->root);
 367	while (node) {
 368		next = rb_next(node);
 369		en = rb_entry(node, struct extent_node, rb_node);
 370		__release_extent_node(sbi, et, en);
 371		node = next;
 372	}
 373
 374	return count - atomic_read(&et->node_cnt);
 375}
 376
 377static void __drop_largest_extent(struct extent_tree *et,
 378					pgoff_t fofs, unsigned int len)
 379{
 380	if (fofs < et->largest.fofs + et->largest.len &&
 381			fofs + len > et->largest.fofs) {
 382		et->largest.len = 0;
 383		et->largest_updated = true;
 384	}
 385}
 386
 387void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
 388{
 389	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 390	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
 391	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
 392	struct extent_tree *et;
 393	struct extent_node *en;
 394	struct extent_info ei;
 395
 396	if (!__may_extent_tree(inode, EX_READ)) {
 397		/* drop largest read extent */
 398		if (i_ext && i_ext->len) {
 399			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 400			i_ext->len = 0;
 401			set_page_dirty(ipage);
 402		}
 403		goto out;
 404	}
 405
 406	et = __grab_extent_tree(inode, EX_READ);
 407
 408	if (!i_ext || !i_ext->len)
 409		goto out;
 410
 411	get_read_extent_info(&ei, i_ext);
 412
 413	write_lock(&et->lock);
 414	if (atomic_read(&et->node_cnt))
 415		goto unlock_out;
 416
 417	en = __attach_extent_node(sbi, et, &ei, NULL,
 418				&et->root.rb_root.rb_node, true);
 419	if (en) {
 420		et->largest = en->ei;
 421		et->cached_en = en;
 422
 423		spin_lock(&eti->extent_lock);
 424		list_add_tail(&en->list, &eti->extent_list);
 425		spin_unlock(&eti->extent_lock);
 426	}
 427unlock_out:
 428	write_unlock(&et->lock);
 429out:
 430	if (!F2FS_I(inode)->extent_tree[EX_READ])
 431		set_inode_flag(inode, FI_NO_EXTENT);
 432}
 433
 434void f2fs_init_age_extent_tree(struct inode *inode)
 435{
 436	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
 437		return;
 438	__grab_extent_tree(inode, EX_BLOCK_AGE);
 439}
 440
 441void f2fs_init_extent_tree(struct inode *inode)
 442{
 443	/* initialize read cache */
 444	if (__init_may_extent_tree(inode, EX_READ))
 445		__grab_extent_tree(inode, EX_READ);
 446
 447	/* initialize block age cache */
 448	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
 449		__grab_extent_tree(inode, EX_BLOCK_AGE);
 450}
 451
 452static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
 453			struct extent_info *ei, enum extent_type type)
 454{
 455	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 456	struct extent_tree_info *eti = &sbi->extent_tree[type];
 457	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 458	struct extent_node *en;
 459	bool ret = false;
 460
 461	if (!et)
 462		return false;
 463
 464	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
 465
 466	read_lock(&et->lock);
 467
 468	if (type == EX_READ &&
 469			et->largest.fofs <= pgofs &&
 470			et->largest.fofs + et->largest.len > pgofs) {
 471		*ei = et->largest;
 472		ret = true;
 473		stat_inc_largest_node_hit(sbi);
 474		goto out;
 475	}
 476
 477	en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
 478	if (!en)
 479		goto out;
 480
 481	if (en == et->cached_en)
 482		stat_inc_cached_node_hit(sbi, type);
 483	else
 484		stat_inc_rbtree_node_hit(sbi, type);
 485
 486	*ei = en->ei;
 487	spin_lock(&eti->extent_lock);
 488	if (!list_empty(&en->list)) {
 489		list_move_tail(&en->list, &eti->extent_list);
 490		et->cached_en = en;
 491	}
 492	spin_unlock(&eti->extent_lock);
 493	ret = true;
 494out:
 495	stat_inc_total_hit(sbi, type);
 496	read_unlock(&et->lock);
 497
 498	if (type == EX_READ)
 499		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
 500	else if (type == EX_BLOCK_AGE)
 501		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
 502	return ret;
 503}
 504
 505static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
 506				struct extent_tree *et, struct extent_info *ei,
 507				struct extent_node *prev_ex,
 508				struct extent_node *next_ex)
 509{
 510	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 511	struct extent_node *en = NULL;
 512
 513	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
 514		prev_ex->ei.len += ei->len;
 515		ei = &prev_ex->ei;
 516		en = prev_ex;
 517	}
 518
 519	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
 520		next_ex->ei.fofs = ei->fofs;
 521		next_ex->ei.len += ei->len;
 522		if (et->type == EX_READ)
 523			next_ex->ei.blk = ei->blk;
 524		if (en)
 525			__release_extent_node(sbi, et, prev_ex);
 526
 527		en = next_ex;
 528	}
 529
 530	if (!en)
 531		return NULL;
 532
 533	__try_update_largest_extent(et, en);
 534
 535	spin_lock(&eti->extent_lock);
 536	if (!list_empty(&en->list)) {
 537		list_move_tail(&en->list, &eti->extent_list);
 538		et->cached_en = en;
 539	}
 540	spin_unlock(&eti->extent_lock);
 541	return en;
 542}
 543
 544static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
 545				struct extent_tree *et, struct extent_info *ei,
 546				struct rb_node **insert_p,
 547				struct rb_node *insert_parent,
 548				bool leftmost)
 549{
 550	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 551	struct rb_node **p = &et->root.rb_root.rb_node;
 552	struct rb_node *parent = NULL;
 553	struct extent_node *en = NULL;
 554
 555	if (insert_p && insert_parent) {
 556		parent = insert_parent;
 557		p = insert_p;
 558		goto do_insert;
 559	}
 560
 561	leftmost = true;
 562
 563	/* look up extent_node in the rb tree */
 564	while (*p) {
 565		parent = *p;
 566		en = rb_entry(parent, struct extent_node, rb_node);
 567
 568		if (ei->fofs < en->ei.fofs) {
 569			p = &(*p)->rb_left;
 570		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
 571			p = &(*p)->rb_right;
 572			leftmost = false;
 573		} else {
 574			f2fs_bug_on(sbi, 1);
 575		}
 576	}
 577
 578do_insert:
 579	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
 580	if (!en)
 581		return NULL;
 582
 583	__try_update_largest_extent(et, en);
 584
 585	/* update in global extent list */
 586	spin_lock(&eti->extent_lock);
 587	list_add_tail(&en->list, &eti->extent_list);
 588	et->cached_en = en;
 589	spin_unlock(&eti->extent_lock);
 590	return en;
 591}
 592
 593static void __update_extent_tree_range(struct inode *inode,
 594			struct extent_info *tei, enum extent_type type)
 595{
 596	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 597	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 598	struct extent_node *en = NULL, *en1 = NULL;
 599	struct extent_node *prev_en = NULL, *next_en = NULL;
 600	struct extent_info ei, dei, prev;
 601	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 602	unsigned int fofs = tei->fofs, len = tei->len;
 603	unsigned int end = fofs + len;
 604	bool updated = false;
 605	bool leftmost = false;
 606
 607	if (!et)
 608		return;
 609
 610	if (type == EX_READ)
 611		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
 612						tei->blk, 0);
 613	else if (type == EX_BLOCK_AGE)
 614		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
 615						tei->age, tei->last_blocks);
 616
 617	write_lock(&et->lock);
 618
 619	if (type == EX_READ) {
 620		if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
 621			write_unlock(&et->lock);
 622			return;
 623		}
 624
 625		prev = et->largest;
 626		dei.len = 0;
 627
 628		/*
 629		 * drop largest extent before lookup, in case it's already
 630		 * been shrunk from extent tree
 631		 */
 632		__drop_largest_extent(et, fofs, len);
 633	}
 634
 635	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
 636	en = __lookup_extent_node_ret(&et->root,
 637					et->cached_en, fofs,
 638					&prev_en, &next_en,
 639					&insert_p, &insert_parent,
 640					&leftmost);
 641	if (!en)
 642		en = next_en;
 643
 644	/* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
 645	while (en && en->ei.fofs < end) {
 646		unsigned int org_end;
 647		int parts = 0;	/* # of parts current extent split into */
 648
 649		next_en = en1 = NULL;
 650
 651		dei = en->ei;
 652		org_end = dei.fofs + dei.len;
 653		f2fs_bug_on(sbi, fofs >= org_end);
 654
 655		if (fofs > dei.fofs && (type != EX_READ ||
 656				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
 657			en->ei.len = fofs - en->ei.fofs;
 658			prev_en = en;
 659			parts = 1;
 660		}
 661
 662		if (end < org_end && (type != EX_READ ||
 663				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
 664			if (parts) {
 665				__set_extent_info(&ei,
 666					end, org_end - end,
 667					end - dei.fofs + dei.blk, false,
 668					dei.age, dei.last_blocks,
 669					type);
 670				en1 = __insert_extent_tree(sbi, et, &ei,
 671							NULL, NULL, true);
 672				next_en = en1;
 673			} else {
 674				__set_extent_info(&en->ei,
 675					end, en->ei.len - (end - dei.fofs),
 676					en->ei.blk + (end - dei.fofs), true,
 677					dei.age, dei.last_blocks,
 678					type);
 679				next_en = en;
 680			}
 681			parts++;
 682		}
 683
 684		if (!next_en) {
 685			struct rb_node *node = rb_next(&en->rb_node);
 686
 687			next_en = rb_entry_safe(node, struct extent_node,
 688						rb_node);
 689		}
 690
 691		if (parts)
 692			__try_update_largest_extent(et, en);
 693		else
 694			__release_extent_node(sbi, et, en);
 695
 696		/*
 697		 * if original extent is split into zero or two parts, extent
 698		 * tree has been altered by deletion or insertion, therefore
 699		 * invalidate pointers regard to tree.
 700		 */
 701		if (parts != 1) {
 702			insert_p = NULL;
 703			insert_parent = NULL;
 704		}
 705		en = next_en;
 706	}
 707
 708	if (type == EX_BLOCK_AGE)
 709		goto update_age_extent_cache;
 710
 711	/* 3. update extent in read extent cache */
 712	BUG_ON(type != EX_READ);
 713
 714	if (tei->blk) {
 715		__set_extent_info(&ei, fofs, len, tei->blk, false,
 716				  0, 0, EX_READ);
 717		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 718			__insert_extent_tree(sbi, et, &ei,
 719					insert_p, insert_parent, leftmost);
 720
 721		/* give up extent_cache, if split and small updates happen */
 722		if (dei.len >= 1 &&
 723				prev.len < F2FS_MIN_EXTENT_LEN &&
 724				et->largest.len < F2FS_MIN_EXTENT_LEN) {
 725			et->largest.len = 0;
 726			et->largest_updated = true;
 727			set_inode_flag(inode, FI_NO_EXTENT);
 728		}
 729	}
 730
 731	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 732		__free_extent_tree(sbi, et);
 733
 734	if (et->largest_updated) {
 735		et->largest_updated = false;
 736		updated = true;
 737	}
 738	goto out_read_extent_cache;
 739update_age_extent_cache:
 740	if (!tei->last_blocks)
 741		goto out_read_extent_cache;
 742
 743	__set_extent_info(&ei, fofs, len, 0, false,
 744			tei->age, tei->last_blocks, EX_BLOCK_AGE);
 745	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 746		__insert_extent_tree(sbi, et, &ei,
 747					insert_p, insert_parent, leftmost);
 748out_read_extent_cache:
 749	write_unlock(&et->lock);
 750
 751	if (updated)
 752		f2fs_mark_inode_dirty_sync(inode, true);
 753}
 754
 755#ifdef CONFIG_F2FS_FS_COMPRESSION
 756void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
 757				pgoff_t fofs, block_t blkaddr, unsigned int llen,
 758				unsigned int c_len)
 759{
 760	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 761	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
 762	struct extent_node *en = NULL;
 763	struct extent_node *prev_en = NULL, *next_en = NULL;
 764	struct extent_info ei;
 765	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 766	bool leftmost = false;
 767
 768	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
 769						blkaddr, c_len);
 770
 771	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
 772	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 773		return;
 774
 775	write_lock(&et->lock);
 776
 777	en = __lookup_extent_node_ret(&et->root,
 778					et->cached_en, fofs,
 779					&prev_en, &next_en,
 780					&insert_p, &insert_parent,
 781					&leftmost);
 782	if (en)
 783		goto unlock_out;
 784
 785	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
 786	ei.c_len = c_len;
 787
 788	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 789		__insert_extent_tree(sbi, et, &ei,
 790				insert_p, insert_parent, leftmost);
 791unlock_out:
 792	write_unlock(&et->lock);
 793}
 794#endif
 795
 796static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
 797						unsigned long long new,
 798						unsigned long long old)
 799{
 800	unsigned int rem_old, rem_new;
 801	unsigned long long res;
 802	unsigned int weight = sbi->last_age_weight;
 803
 804	res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
 805		+ div_u64_rem(old, 100, &rem_old) * weight;
 806
 807	if (rem_new)
 808		res += rem_new * (100 - weight) / 100;
 809	if (rem_old)
 810		res += rem_old * weight / 100;
 811
 812	return res;
 813}
 814
 815/* This returns a new age and allocated blocks in ei */
 816static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
 817						block_t blkaddr)
 818{
 819	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 820	loff_t f_size = i_size_read(inode);
 821	unsigned long long cur_blocks =
 822				atomic64_read(&sbi->allocated_data_blocks);
 823	struct extent_info tei = *ei;	/* only fofs and len are valid */
 824
 825	/*
 826	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
 827	 * file block even in seq write. So don't record age for newly last file
 828	 * block here.
 829	 */
 830	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
 831			blkaddr == NEW_ADDR)
 832		return -EINVAL;
 833
 834	if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
 835		unsigned long long cur_age;
 836
 837		if (cur_blocks >= tei.last_blocks)
 838			cur_age = cur_blocks - tei.last_blocks;
 839		else
 840			/* allocated_data_blocks overflow */
 841			cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
 842
 843		if (tei.age)
 844			ei->age = __calculate_block_age(sbi, cur_age, tei.age);
 845		else
 846			ei->age = cur_age;
 847		ei->last_blocks = cur_blocks;
 848		WARN_ON(ei->age > cur_blocks);
 849		return 0;
 850	}
 851
 852	f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
 853
 854	/* the data block was allocated for the first time */
 855	if (blkaddr == NEW_ADDR)
 856		goto out;
 857
 858	if (__is_valid_data_blkaddr(blkaddr) &&
 859	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
 860		f2fs_bug_on(sbi, 1);
 861		return -EINVAL;
 862	}
 863out:
 864	/*
 865	 * init block age with zero, this can happen when the block age extent
 866	 * was reclaimed due to memory constraint or system reboot
 867	 */
 868	ei->age = 0;
 869	ei->last_blocks = cur_blocks;
 870	return 0;
 871}
 872
 873static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
 874{
 875	struct extent_info ei = {};
 876
 877	if (!__may_extent_tree(dn->inode, type))
 878		return;
 879
 880	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 881								dn->ofs_in_node;
 882	ei.len = 1;
 883
 884	if (type == EX_READ) {
 885		if (dn->data_blkaddr == NEW_ADDR)
 886			ei.blk = NULL_ADDR;
 887		else
 888			ei.blk = dn->data_blkaddr;
 889	} else if (type == EX_BLOCK_AGE) {
 890		if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
 891			return;
 892	}
 893	__update_extent_tree_range(dn->inode, &ei, type);
 894}
 895
 896static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
 897					enum extent_type type)
 898{
 899	struct extent_tree_info *eti = &sbi->extent_tree[type];
 900	struct extent_tree *et, *next;
 901	struct extent_node *en;
 902	unsigned int node_cnt = 0, tree_cnt = 0;
 903	int remained;
 904
 905	if (!atomic_read(&eti->total_zombie_tree))
 906		goto free_node;
 907
 908	if (!mutex_trylock(&eti->extent_tree_lock))
 909		goto out;
 910
 911	/* 1. remove unreferenced extent tree */
 912	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
 913		if (atomic_read(&et->node_cnt)) {
 914			write_lock(&et->lock);
 915			node_cnt += __free_extent_tree(sbi, et);
 916			write_unlock(&et->lock);
 917		}
 918		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
 919		list_del_init(&et->list);
 920		radix_tree_delete(&eti->extent_tree_root, et->ino);
 921		kmem_cache_free(extent_tree_slab, et);
 922		atomic_dec(&eti->total_ext_tree);
 923		atomic_dec(&eti->total_zombie_tree);
 924		tree_cnt++;
 925
 926		if (node_cnt + tree_cnt >= nr_shrink)
 927			goto unlock_out;
 928		cond_resched();
 929	}
 930	mutex_unlock(&eti->extent_tree_lock);
 931
 932free_node:
 933	/* 2. remove LRU extent entries */
 934	if (!mutex_trylock(&eti->extent_tree_lock))
 935		goto out;
 936
 937	remained = nr_shrink - (node_cnt + tree_cnt);
 938
 939	spin_lock(&eti->extent_lock);
 940	for (; remained > 0; remained--) {
 941		if (list_empty(&eti->extent_list))
 942			break;
 943		en = list_first_entry(&eti->extent_list,
 944					struct extent_node, list);
 945		et = en->et;
 946		if (!write_trylock(&et->lock)) {
 947			/* refresh this extent node's position in extent list */
 948			list_move_tail(&en->list, &eti->extent_list);
 949			continue;
 950		}
 951
 952		list_del_init(&en->list);
 953		spin_unlock(&eti->extent_lock);
 954
 955		__detach_extent_node(sbi, et, en);
 956
 957		write_unlock(&et->lock);
 958		node_cnt++;
 959		spin_lock(&eti->extent_lock);
 960	}
 961	spin_unlock(&eti->extent_lock);
 962
 963unlock_out:
 964	mutex_unlock(&eti->extent_tree_lock);
 965out:
 966	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
 967
 968	return node_cnt + tree_cnt;
 969}
 970
 971/* read extent cache operations */
 972bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
 973				struct extent_info *ei)
 974{
 975	if (!__may_extent_tree(inode, EX_READ))
 976		return false;
 977
 978	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
 979}
 980
 981bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
 982				block_t *blkaddr)
 983{
 984	struct extent_info ei = {};
 985
 986	if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
 987		return false;
 988	*blkaddr = ei.blk + index - ei.fofs;
 989	return true;
 990}
 991
 992void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
 993{
 994	return __update_extent_cache(dn, EX_READ);
 995}
 996
 997void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
 998				pgoff_t fofs, block_t blkaddr, unsigned int len)
 999{
1000	struct extent_info ei = {
1001		.fofs = fofs,
1002		.len = len,
1003		.blk = blkaddr,
1004	};
1005
1006	if (!__may_extent_tree(dn->inode, EX_READ))
1007		return;
1008
1009	__update_extent_tree_range(dn->inode, &ei, EX_READ);
1010}
1011
1012unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1013{
1014	if (!test_opt(sbi, READ_EXTENT_CACHE))
1015		return 0;
1016
1017	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1018}
1019
1020/* block age extent cache operations */
1021bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1022				struct extent_info *ei)
1023{
1024	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1025		return false;
1026
1027	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1028}
1029
1030void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1031{
1032	return __update_extent_cache(dn, EX_BLOCK_AGE);
1033}
1034
1035void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1036				pgoff_t fofs, unsigned int len)
1037{
1038	struct extent_info ei = {
1039		.fofs = fofs,
1040		.len = len,
1041	};
1042
1043	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1044		return;
1045
1046	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1047}
1048
1049unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1050{
1051	if (!test_opt(sbi, AGE_EXTENT_CACHE))
1052		return 0;
1053
1054	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1055}
1056
1057static unsigned int __destroy_extent_node(struct inode *inode,
1058					enum extent_type type)
1059{
1060	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1061	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1062	unsigned int node_cnt = 0;
1063
1064	if (!et || !atomic_read(&et->node_cnt))
1065		return 0;
1066
1067	write_lock(&et->lock);
1068	node_cnt = __free_extent_tree(sbi, et);
1069	write_unlock(&et->lock);
1070
1071	return node_cnt;
1072}
1073
1074void f2fs_destroy_extent_node(struct inode *inode)
1075{
1076	__destroy_extent_node(inode, EX_READ);
1077	__destroy_extent_node(inode, EX_BLOCK_AGE);
1078}
1079
1080static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1081{
1082	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1083	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1084	bool updated = false;
1085
1086	if (!__may_extent_tree(inode, type))
1087		return;
1088
1089	write_lock(&et->lock);
1090	__free_extent_tree(sbi, et);
1091	if (type == EX_READ) {
1092		set_inode_flag(inode, FI_NO_EXTENT);
1093		if (et->largest.len) {
1094			et->largest.len = 0;
1095			updated = true;
1096		}
1097	}
1098	write_unlock(&et->lock);
1099	if (updated)
1100		f2fs_mark_inode_dirty_sync(inode, true);
1101}
1102
1103void f2fs_drop_extent_tree(struct inode *inode)
1104{
1105	__drop_extent_tree(inode, EX_READ);
1106	__drop_extent_tree(inode, EX_BLOCK_AGE);
1107}
1108
1109static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1110{
1111	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1112	struct extent_tree_info *eti = &sbi->extent_tree[type];
1113	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1114	unsigned int node_cnt = 0;
1115
1116	if (!et)
1117		return;
1118
1119	if (inode->i_nlink && !is_bad_inode(inode) &&
1120					atomic_read(&et->node_cnt)) {
1121		mutex_lock(&eti->extent_tree_lock);
1122		list_add_tail(&et->list, &eti->zombie_list);
1123		atomic_inc(&eti->total_zombie_tree);
1124		mutex_unlock(&eti->extent_tree_lock);
1125		return;
1126	}
1127
1128	/* free all extent info belong to this extent tree */
1129	node_cnt = __destroy_extent_node(inode, type);
1130
1131	/* delete extent tree entry in radix tree */
1132	mutex_lock(&eti->extent_tree_lock);
1133	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1134	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1135	kmem_cache_free(extent_tree_slab, et);
1136	atomic_dec(&eti->total_ext_tree);
1137	mutex_unlock(&eti->extent_tree_lock);
1138
1139	F2FS_I(inode)->extent_tree[type] = NULL;
1140
1141	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1142}
1143
1144void f2fs_destroy_extent_tree(struct inode *inode)
1145{
1146	__destroy_extent_tree(inode, EX_READ);
1147	__destroy_extent_tree(inode, EX_BLOCK_AGE);
1148}
1149
1150static void __init_extent_tree_info(struct extent_tree_info *eti)
1151{
1152	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1153	mutex_init(&eti->extent_tree_lock);
1154	INIT_LIST_HEAD(&eti->extent_list);
1155	spin_lock_init(&eti->extent_lock);
1156	atomic_set(&eti->total_ext_tree, 0);
1157	INIT_LIST_HEAD(&eti->zombie_list);
1158	atomic_set(&eti->total_zombie_tree, 0);
1159	atomic_set(&eti->total_ext_node, 0);
1160}
1161
1162void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
1163{
1164	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1165	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1166
1167	/* initialize for block age extents */
1168	atomic64_set(&sbi->allocated_data_blocks, 0);
1169	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1170	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1171	sbi->last_age_weight = LAST_AGE_WEIGHT;
1172}
1173
1174int __init f2fs_create_extent_cache(void)
1175{
1176	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1177			sizeof(struct extent_tree));
1178	if (!extent_tree_slab)
1179		return -ENOMEM;
1180	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1181			sizeof(struct extent_node));
1182	if (!extent_node_slab) {
1183		kmem_cache_destroy(extent_tree_slab);
1184		return -ENOMEM;
1185	}
1186	return 0;
1187}
1188
1189void f2fs_destroy_extent_cache(void)
1190{
1191	kmem_cache_destroy(extent_node_slab);
1192	kmem_cache_destroy(extent_tree_slab);
1193}