Linux Audio

Check our new training course

Loading...
v4.10.11
 
  1/*
  2 * f2fs extent cache support
  3 *
  4 * Copyright (c) 2015 Motorola Mobility
  5 * Copyright (c) 2015 Samsung Electronics
  6 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
  7 *          Chao Yu <chao2.yu@samsung.com>
  8 *
  9 * This program is free software; you can redistribute it and/or modify
 10 * it under the terms of the GNU General Public License version 2 as
 11 * published by the Free Software Foundation.
 12 */
 13
 14#include <linux/fs.h>
 15#include <linux/f2fs_fs.h>
 16
 17#include "f2fs.h"
 18#include "node.h"
 19#include <trace/events/f2fs.h>
 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 21static struct kmem_cache *extent_tree_slab;
 22static struct kmem_cache *extent_node_slab;
 23
 24static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 25				struct extent_tree *et, struct extent_info *ei,
 26				struct rb_node *parent, struct rb_node **p)
 
 27{
 
 28	struct extent_node *en;
 29
 30	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
 31	if (!en)
 32		return NULL;
 33
 34	en->ei = *ei;
 35	INIT_LIST_HEAD(&en->list);
 36	en->et = et;
 37
 38	rb_link_node(&en->rb_node, parent, p);
 39	rb_insert_color(&en->rb_node, &et->root);
 40	atomic_inc(&et->node_cnt);
 41	atomic_inc(&sbi->total_ext_node);
 42	return en;
 43}
 44
 45static void __detach_extent_node(struct f2fs_sb_info *sbi,
 46				struct extent_tree *et, struct extent_node *en)
 47{
 48	rb_erase(&en->rb_node, &et->root);
 
 
 49	atomic_dec(&et->node_cnt);
 50	atomic_dec(&sbi->total_ext_node);
 51
 52	if (et->cached_en == en)
 53		et->cached_en = NULL;
 54	kmem_cache_free(extent_node_slab, en);
 55}
 56
 57/*
 58 * Flow to release an extent_node:
 59 * 1. list_del_init
 60 * 2. __detach_extent_node
 61 * 3. kmem_cache_free.
 62 */
 63static void __release_extent_node(struct f2fs_sb_info *sbi,
 64			struct extent_tree *et, struct extent_node *en)
 65{
 66	spin_lock(&sbi->extent_lock);
 
 
 67	f2fs_bug_on(sbi, list_empty(&en->list));
 68	list_del_init(&en->list);
 69	spin_unlock(&sbi->extent_lock);
 70
 71	__detach_extent_node(sbi, et, en);
 72}
 73
 74static struct extent_tree *__grab_extent_tree(struct inode *inode)
 
 75{
 76	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
 77	struct extent_tree *et;
 78	nid_t ino = inode->i_ino;
 79
 80	down_write(&sbi->extent_tree_lock);
 81	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
 82	if (!et) {
 83		et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
 84		f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
 
 85		memset(et, 0, sizeof(struct extent_tree));
 86		et->ino = ino;
 87		et->root = RB_ROOT;
 
 88		et->cached_en = NULL;
 89		rwlock_init(&et->lock);
 90		INIT_LIST_HEAD(&et->list);
 91		atomic_set(&et->node_cnt, 0);
 92		atomic_inc(&sbi->total_ext_tree);
 93	} else {
 94		atomic_dec(&sbi->total_zombie_tree);
 95		list_del_init(&et->list);
 96	}
 97	up_write(&sbi->extent_tree_lock);
 98
 99	/* never died until evict_inode */
100	F2FS_I(inode)->extent_tree = et;
101
102	return et;
103}
104
105static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
106				struct extent_tree *et, unsigned int fofs)
107{
108	struct rb_node *node = et->root.rb_node;
109	struct extent_node *en = et->cached_en;
110
111	if (en) {
112		struct extent_info *cei = &en->ei;
113
114		if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
115			stat_inc_cached_node_hit(sbi);
116			return en;
117		}
118	}
119
120	while (node) {
121		en = rb_entry(node, struct extent_node, rb_node);
122
123		if (fofs < en->ei.fofs) {
124			node = node->rb_left;
125		} else if (fofs >= en->ei.fofs + en->ei.len) {
126			node = node->rb_right;
127		} else {
128			stat_inc_rbtree_node_hit(sbi);
129			return en;
130		}
131	}
132	return NULL;
133}
134
135static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
136				struct extent_tree *et, struct extent_info *ei)
137{
138	struct rb_node **p = &et->root.rb_node;
139	struct extent_node *en;
140
141	en = __attach_extent_node(sbi, et, ei, NULL, p);
142	if (!en)
143		return NULL;
144
145	et->largest = en->ei;
146	et->cached_en = en;
147	return en;
148}
149
150static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
151					struct extent_tree *et)
152{
153	struct rb_node *node, *next;
154	struct extent_node *en;
155	unsigned int count = atomic_read(&et->node_cnt);
156
157	node = rb_first(&et->root);
158	while (node) {
159		next = rb_next(node);
160		en = rb_entry(node, struct extent_node, rb_node);
161		__release_extent_node(sbi, et, en);
162		node = next;
163	}
164
165	return count - atomic_read(&et->node_cnt);
166}
167
168static void __drop_largest_extent(struct inode *inode,
169					pgoff_t fofs, unsigned int len)
170{
171	struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
172
173	if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
174		largest->len = 0;
175		f2fs_mark_inode_dirty_sync(inode, true);
176	}
177}
178
179/* return true, if inode page is changed */
180bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
181{
182	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
 
183	struct extent_tree *et;
184	struct extent_node *en;
185	struct extent_info ei;
186
187	if (!f2fs_may_extent_tree(inode)) {
188		/* drop largest extent */
189		if (i_ext && i_ext->len) {
 
190			i_ext->len = 0;
191			return true;
192		}
193		return false;
194	}
195
196	et = __grab_extent_tree(inode);
197
198	if (!i_ext || !i_ext->len)
199		return false;
200
201	get_extent_info(&ei, i_ext);
202
203	write_lock(&et->lock);
204	if (atomic_read(&et->node_cnt))
205		goto out;
206
207	en = __init_extent_tree(sbi, et, &ei);
 
208	if (en) {
209		spin_lock(&sbi->extent_lock);
210		list_add_tail(&en->list, &sbi->extent_list);
211		spin_unlock(&sbi->extent_lock);
 
 
 
212	}
213out:
214	write_unlock(&et->lock);
215	return false;
 
 
 
 
 
 
 
 
 
216}
217
218static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
219							struct extent_info *ei)
 
 
 
 
 
 
 
 
 
 
 
220{
221	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
222	struct extent_tree *et = F2FS_I(inode)->extent_tree;
 
223	struct extent_node *en;
224	bool ret = false;
225
226	f2fs_bug_on(sbi, !et);
 
227
228	trace_f2fs_lookup_extent_tree_start(inode, pgofs);
229
230	read_lock(&et->lock);
231
232	if (et->largest.fofs <= pgofs &&
 
233			et->largest.fofs + et->largest.len > pgofs) {
234		*ei = et->largest;
235		ret = true;
236		stat_inc_largest_node_hit(sbi);
237		goto out;
238	}
239
240	en = __lookup_extent_tree(sbi, et, pgofs);
241	if (en) {
242		*ei = en->ei;
243		spin_lock(&sbi->extent_lock);
244		if (!list_empty(&en->list)) {
245			list_move_tail(&en->list, &sbi->extent_list);
246			et->cached_en = en;
247		}
248		spin_unlock(&sbi->extent_lock);
249		ret = true;
 
 
 
 
250	}
 
 
251out:
252	stat_inc_total_hit(sbi);
253	read_unlock(&et->lock);
254
255	trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
 
 
 
256	return ret;
257}
258
259
260/*
261 * lookup extent at @fofs, if hit, return the extent
262 * if not, return NULL and
263 * @prev_ex: extent before fofs
264 * @next_ex: extent after fofs
265 * @insert_p: insert point for new extent at fofs
266 * in order to simpfy the insertion after.
267 * tree must stay unchanged between lookup and insertion.
268 */
269static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
270				unsigned int fofs,
271				struct extent_node **prev_ex,
272				struct extent_node **next_ex,
273				struct rb_node ***insert_p,
274				struct rb_node **insert_parent)
275{
276	struct rb_node **pnode = &et->root.rb_node;
277	struct rb_node *parent = NULL, *tmp_node;
278	struct extent_node *en = et->cached_en;
279
280	*insert_p = NULL;
281	*insert_parent = NULL;
282	*prev_ex = NULL;
283	*next_ex = NULL;
284
285	if (RB_EMPTY_ROOT(&et->root))
286		return NULL;
287
288	if (en) {
289		struct extent_info *cei = &en->ei;
290
291		if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
292			goto lookup_neighbors;
293	}
294
295	while (*pnode) {
296		parent = *pnode;
297		en = rb_entry(*pnode, struct extent_node, rb_node);
298
299		if (fofs < en->ei.fofs)
300			pnode = &(*pnode)->rb_left;
301		else if (fofs >= en->ei.fofs + en->ei.len)
302			pnode = &(*pnode)->rb_right;
303		else
304			goto lookup_neighbors;
305	}
306
307	*insert_p = pnode;
308	*insert_parent = parent;
309
310	en = rb_entry(parent, struct extent_node, rb_node);
311	tmp_node = parent;
312	if (parent && fofs > en->ei.fofs)
313		tmp_node = rb_next(parent);
314	*next_ex = tmp_node ?
315		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
316
317	tmp_node = parent;
318	if (parent && fofs < en->ei.fofs)
319		tmp_node = rb_prev(parent);
320	*prev_ex = tmp_node ?
321		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
322	return NULL;
323
324lookup_neighbors:
325	if (fofs == en->ei.fofs) {
326		/* lookup prev node for merging backward later */
327		tmp_node = rb_prev(&en->rb_node);
328		*prev_ex = tmp_node ?
329			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
330	}
331	if (fofs == en->ei.fofs + en->ei.len - 1) {
332		/* lookup next node for merging frontward later */
333		tmp_node = rb_next(&en->rb_node);
334		*next_ex = tmp_node ?
335			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
336	}
337	return en;
338}
339
340static struct extent_node *__try_merge_extent_node(struct inode *inode,
341				struct extent_tree *et, struct extent_info *ei,
342				struct extent_node *prev_ex,
343				struct extent_node *next_ex)
344{
345	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
346	struct extent_node *en = NULL;
347
348	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
349		prev_ex->ei.len += ei->len;
350		ei = &prev_ex->ei;
351		en = prev_ex;
352	}
353
354	if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
355		next_ex->ei.fofs = ei->fofs;
356		next_ex->ei.blk = ei->blk;
357		next_ex->ei.len += ei->len;
 
 
358		if (en)
359			__release_extent_node(sbi, et, prev_ex);
360
361		en = next_ex;
362	}
363
364	if (!en)
365		return NULL;
366
367	__try_update_largest_extent(inode, et, en);
368
369	spin_lock(&sbi->extent_lock);
370	if (!list_empty(&en->list)) {
371		list_move_tail(&en->list, &sbi->extent_list);
372		et->cached_en = en;
373	}
374	spin_unlock(&sbi->extent_lock);
375	return en;
376}
377
378static struct extent_node *__insert_extent_tree(struct inode *inode,
379				struct extent_tree *et, struct extent_info *ei,
380				struct rb_node **insert_p,
381				struct rb_node *insert_parent)
 
382{
383	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
384	struct rb_node **p = &et->root.rb_node;
385	struct rb_node *parent = NULL;
386	struct extent_node *en = NULL;
387
388	if (insert_p && insert_parent) {
389		parent = insert_parent;
390		p = insert_p;
391		goto do_insert;
392	}
393
 
 
 
394	while (*p) {
395		parent = *p;
396		en = rb_entry(parent, struct extent_node, rb_node);
397
398		if (ei->fofs < en->ei.fofs)
399			p = &(*p)->rb_left;
400		else if (ei->fofs >= en->ei.fofs + en->ei.len)
401			p = &(*p)->rb_right;
402		else
 
403			f2fs_bug_on(sbi, 1);
 
404	}
 
405do_insert:
406	en = __attach_extent_node(sbi, et, ei, parent, p);
407	if (!en)
408		return NULL;
409
410	__try_update_largest_extent(inode, et, en);
411
412	/* update in global extent list */
413	spin_lock(&sbi->extent_lock);
414	list_add_tail(&en->list, &sbi->extent_list);
415	et->cached_en = en;
416	spin_unlock(&sbi->extent_lock);
417	return en;
418}
419
420static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
421				pgoff_t fofs, block_t blkaddr, unsigned int len)
422{
423	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
424	struct extent_tree *et = F2FS_I(inode)->extent_tree;
425	struct extent_node *en = NULL, *en1 = NULL;
426	struct extent_node *prev_en = NULL, *next_en = NULL;
427	struct extent_info ei, dei, prev;
428	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 
429	unsigned int end = fofs + len;
430	unsigned int pos = (unsigned int)fofs;
 
431
432	if (!et)
433		return false;
434
435	trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
 
 
 
 
 
436
437	write_lock(&et->lock);
438
439	if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
440		write_unlock(&et->lock);
441		return false;
442	}
 
443
444	prev = et->largest;
445	dei.len = 0;
446
447	/*
448	 * drop largest extent before lookup, in case it's already
449	 * been shrunk from extent tree
450	 */
451	__drop_largest_extent(inode, fofs, len);
 
452
453	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
454	en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
455					&insert_p, &insert_parent);
 
 
 
456	if (!en)
457		en = next_en;
458
459	/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
460	while (en && en->ei.fofs < end) {
461		unsigned int org_end;
462		int parts = 0;	/* # of parts current extent split into */
463
464		next_en = en1 = NULL;
465
466		dei = en->ei;
467		org_end = dei.fofs + dei.len;
468		f2fs_bug_on(sbi, pos >= org_end);
469
470		if (pos > dei.fofs &&	pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
471			en->ei.len = pos - en->ei.fofs;
 
472			prev_en = en;
473			parts = 1;
474		}
475
476		if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
 
477			if (parts) {
478				set_extent_info(&ei, end,
479						end - dei.fofs + dei.blk,
480						org_end - end);
481				en1 = __insert_extent_tree(inode, et, &ei,
482							NULL, NULL);
 
 
483				next_en = en1;
484			} else {
485				en->ei.fofs = end;
486				en->ei.blk += end - dei.fofs;
487				en->ei.len -= end - dei.fofs;
 
 
488				next_en = en;
489			}
490			parts++;
491		}
492
493		if (!next_en) {
494			struct rb_node *node = rb_next(&en->rb_node);
495
496			next_en = node ?
497				rb_entry(node, struct extent_node, rb_node)
498				: NULL;
499		}
500
501		if (parts)
502			__try_update_largest_extent(inode, et, en);
503		else
504			__release_extent_node(sbi, et, en);
505
506		/*
507		 * if original extent is split into zero or two parts, extent
508		 * tree has been altered by deletion or insertion, therefore
509		 * invalidate pointers regard to tree.
510		 */
511		if (parts != 1) {
512			insert_p = NULL;
513			insert_parent = NULL;
514		}
515		en = next_en;
516	}
517
518	/* 3. update extent in extent cache */
519	if (blkaddr) {
 
 
 
520
521		set_extent_info(&ei, fofs, blkaddr, len);
522		if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en))
523			__insert_extent_tree(inode, et, &ei,
524						insert_p, insert_parent);
 
 
525
526		/* give up extent_cache, if split and small updates happen */
527		if (dei.len >= 1 &&
528				prev.len < F2FS_MIN_EXTENT_LEN &&
529				et->largest.len < F2FS_MIN_EXTENT_LEN) {
530			__drop_largest_extent(inode, 0, UINT_MAX);
 
531			set_inode_flag(inode, FI_NO_EXTENT);
532		}
533	}
534
535	if (is_inode_flag_set(inode, FI_NO_EXTENT))
536		__free_extent_tree(sbi, et);
537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538	write_unlock(&et->lock);
539
540	return !__is_extent_same(&prev, &et->largest);
 
541}
542
543unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 
 
 
544{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545	struct extent_tree *et, *next;
546	struct extent_node *en;
547	unsigned int node_cnt = 0, tree_cnt = 0;
548	int remained;
549
550	if (!test_opt(sbi, EXTENT_CACHE))
551		return 0;
552
553	if (!atomic_read(&sbi->total_zombie_tree))
554		goto free_node;
555
556	if (!down_write_trylock(&sbi->extent_tree_lock))
557		goto out;
558
559	/* 1. remove unreferenced extent tree */
560	list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
561		if (atomic_read(&et->node_cnt)) {
562			write_lock(&et->lock);
563			node_cnt += __free_extent_tree(sbi, et);
564			write_unlock(&et->lock);
565		}
566		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
567		list_del_init(&et->list);
568		radix_tree_delete(&sbi->extent_tree_root, et->ino);
569		kmem_cache_free(extent_tree_slab, et);
570		atomic_dec(&sbi->total_ext_tree);
571		atomic_dec(&sbi->total_zombie_tree);
572		tree_cnt++;
573
574		if (node_cnt + tree_cnt >= nr_shrink)
575			goto unlock_out;
576		cond_resched();
577	}
578	up_write(&sbi->extent_tree_lock);
579
580free_node:
581	/* 2. remove LRU extent entries */
582	if (!down_write_trylock(&sbi->extent_tree_lock))
583		goto out;
584
585	remained = nr_shrink - (node_cnt + tree_cnt);
586
587	spin_lock(&sbi->extent_lock);
588	for (; remained > 0; remained--) {
589		if (list_empty(&sbi->extent_list))
590			break;
591		en = list_first_entry(&sbi->extent_list,
592					struct extent_node, list);
593		et = en->et;
594		if (!write_trylock(&et->lock)) {
595			/* refresh this extent node's position in extent list */
596			list_move_tail(&en->list, &sbi->extent_list);
597			continue;
598		}
599
600		list_del_init(&en->list);
601		spin_unlock(&sbi->extent_lock);
602
603		__detach_extent_node(sbi, et, en);
604
605		write_unlock(&et->lock);
606		node_cnt++;
607		spin_lock(&sbi->extent_lock);
608	}
609	spin_unlock(&sbi->extent_lock);
610
611unlock_out:
612	up_write(&sbi->extent_tree_lock);
613out:
614	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
615
616	return node_cnt + tree_cnt;
617}
618
619unsigned int f2fs_destroy_extent_node(struct inode *inode)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620{
621	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
622	struct extent_tree *et = F2FS_I(inode)->extent_tree;
623	unsigned int node_cnt = 0;
624
625	if (!et || !atomic_read(&et->node_cnt))
626		return 0;
627
628	write_lock(&et->lock);
629	node_cnt = __free_extent_tree(sbi, et);
630	write_unlock(&et->lock);
631
632	return node_cnt;
633}
634
635void f2fs_drop_extent_tree(struct inode *inode)
 
 
 
 
 
 
636{
637	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
638	struct extent_tree *et = F2FS_I(inode)->extent_tree;
 
639
640	set_inode_flag(inode, FI_NO_EXTENT);
 
641
642	write_lock(&et->lock);
643	__free_extent_tree(sbi, et);
644	__drop_largest_extent(inode, 0, UINT_MAX);
 
 
 
 
 
 
645	write_unlock(&et->lock);
 
 
646}
647
648void f2fs_destroy_extent_tree(struct inode *inode)
 
 
 
 
 
 
649{
650	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
651	struct extent_tree *et = F2FS_I(inode)->extent_tree;
 
652	unsigned int node_cnt = 0;
653
654	if (!et)
655		return;
656
657	if (inode->i_nlink && !is_bad_inode(inode) &&
658					atomic_read(&et->node_cnt)) {
659		down_write(&sbi->extent_tree_lock);
660		list_add_tail(&et->list, &sbi->zombie_list);
661		atomic_inc(&sbi->total_zombie_tree);
662		up_write(&sbi->extent_tree_lock);
663		return;
664	}
665
666	/* free all extent info belong to this extent tree */
667	node_cnt = f2fs_destroy_extent_node(inode);
668
669	/* delete extent tree entry in radix tree */
670	down_write(&sbi->extent_tree_lock);
671	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
672	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
673	kmem_cache_free(extent_tree_slab, et);
674	atomic_dec(&sbi->total_ext_tree);
675	up_write(&sbi->extent_tree_lock);
676
677	F2FS_I(inode)->extent_tree = NULL;
678
679	trace_f2fs_destroy_extent_tree(inode, node_cnt);
680}
681
682bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
683					struct extent_info *ei)
684{
685	if (!f2fs_may_extent_tree(inode))
686		return false;
687
688	return f2fs_lookup_extent_tree(inode, pgofs, ei);
689}
690
691void f2fs_update_extent_cache(struct dnode_of_data *dn)
692{
693	pgoff_t fofs;
694	block_t blkaddr;
695
696	if (!f2fs_may_extent_tree(dn->inode))
697		return;
698
699	if (dn->data_blkaddr == NEW_ADDR)
700		blkaddr = NULL_ADDR;
701	else
702		blkaddr = dn->data_blkaddr;
703
704	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
705								dn->ofs_in_node;
706	f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1);
707}
708
709void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
710				pgoff_t fofs, block_t blkaddr, unsigned int len)
711
712{
713	if (!f2fs_may_extent_tree(dn->inode))
714		return;
715
716	f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len);
717}
718
719void init_extent_cache_info(struct f2fs_sb_info *sbi)
720{
721	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
722	init_rwsem(&sbi->extent_tree_lock);
723	INIT_LIST_HEAD(&sbi->extent_list);
724	spin_lock_init(&sbi->extent_lock);
725	atomic_set(&sbi->total_ext_tree, 0);
726	INIT_LIST_HEAD(&sbi->zombie_list);
727	atomic_set(&sbi->total_zombie_tree, 0);
728	atomic_set(&sbi->total_ext_node, 0);
729}
730
731int __init create_extent_cache(void)
732{
733	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
734			sizeof(struct extent_tree));
735	if (!extent_tree_slab)
736		return -ENOMEM;
737	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
738			sizeof(struct extent_node));
739	if (!extent_node_slab) {
740		kmem_cache_destroy(extent_tree_slab);
741		return -ENOMEM;
742	}
743	return 0;
744}
745
746void destroy_extent_cache(void)
747{
748	kmem_cache_destroy(extent_node_slab);
749	kmem_cache_destroy(extent_tree_slab);
750}
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * f2fs extent cache support
   4 *
   5 * Copyright (c) 2015 Motorola Mobility
   6 * Copyright (c) 2015 Samsung Electronics
   7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
   8 *          Chao Yu <chao2.yu@samsung.com>
   9 *
  10 * block_age-based extent cache added by:
  11 * Copyright (c) 2022 xiaomi Co., Ltd.
  12 *             http://www.xiaomi.com/
  13 */
  14
  15#include <linux/fs.h>
  16#include <linux/f2fs_fs.h>
  17
  18#include "f2fs.h"
  19#include "node.h"
  20#include <trace/events/f2fs.h>
  21
  22bool sanity_check_extent_cache(struct inode *inode)
  23{
  24	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  25	struct f2fs_inode_info *fi = F2FS_I(inode);
  26	struct extent_tree *et = fi->extent_tree[EX_READ];
  27	struct extent_info *ei;
  28
  29	if (!et)
  30		return true;
  31
  32	ei = &et->largest;
  33	if (!ei->len)
  34		return true;
  35
  36	/* Let's drop, if checkpoint got corrupted. */
  37	if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) {
  38		ei->len = 0;
  39		et->largest_updated = true;
  40		return true;
  41	}
  42
  43	if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
  44	    !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
  45					DATA_GENERIC_ENHANCE)) {
  46		set_sbi_flag(sbi, SBI_NEED_FSCK);
  47		f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
  48			  __func__, inode->i_ino,
  49			  ei->blk, ei->fofs, ei->len);
  50		return false;
  51	}
  52	return true;
  53}
  54
  55static void __set_extent_info(struct extent_info *ei,
  56				unsigned int fofs, unsigned int len,
  57				block_t blk, bool keep_clen,
  58				unsigned long age, unsigned long last_blocks,
  59				enum extent_type type)
  60{
  61	ei->fofs = fofs;
  62	ei->len = len;
  63
  64	if (type == EX_READ) {
  65		ei->blk = blk;
  66		if (keep_clen)
  67			return;
  68#ifdef CONFIG_F2FS_FS_COMPRESSION
  69		ei->c_len = 0;
  70#endif
  71	} else if (type == EX_BLOCK_AGE) {
  72		ei->age = age;
  73		ei->last_blocks = last_blocks;
  74	}
  75}
  76
  77static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
  78{
  79	if (type == EX_READ)
  80		return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
  81			S_ISREG(inode->i_mode);
  82	if (type == EX_BLOCK_AGE)
  83		return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
  84			(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
  85	return false;
  86}
  87
  88static bool __may_extent_tree(struct inode *inode, enum extent_type type)
  89{
  90	/*
  91	 * for recovered files during mount do not create extents
  92	 * if shrinker is not registered.
  93	 */
  94	if (list_empty(&F2FS_I_SB(inode)->s_list))
  95		return false;
  96
  97	if (!__init_may_extent_tree(inode, type))
  98		return false;
  99
 100	if (type == EX_READ) {
 101		if (is_inode_flag_set(inode, FI_NO_EXTENT))
 102			return false;
 103		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
 104				 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
 105			return false;
 106	} else if (type == EX_BLOCK_AGE) {
 107		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
 108			return false;
 109		if (file_is_cold(inode))
 110			return false;
 111	}
 112	return true;
 113}
 114
 115static void __try_update_largest_extent(struct extent_tree *et,
 116						struct extent_node *en)
 117{
 118	if (et->type != EX_READ)
 119		return;
 120	if (en->ei.len <= et->largest.len)
 121		return;
 122
 123	et->largest = en->ei;
 124	et->largest_updated = true;
 125}
 126
 127static bool __is_extent_mergeable(struct extent_info *back,
 128		struct extent_info *front, enum extent_type type)
 129{
 130	if (type == EX_READ) {
 131#ifdef CONFIG_F2FS_FS_COMPRESSION
 132		if (back->c_len && back->len != back->c_len)
 133			return false;
 134		if (front->c_len && front->len != front->c_len)
 135			return false;
 136#endif
 137		return (back->fofs + back->len == front->fofs &&
 138				back->blk + back->len == front->blk);
 139	} else if (type == EX_BLOCK_AGE) {
 140		return (back->fofs + back->len == front->fofs &&
 141			abs(back->age - front->age) <= SAME_AGE_REGION &&
 142			abs(back->last_blocks - front->last_blocks) <=
 143							SAME_AGE_REGION);
 144	}
 145	return false;
 146}
 147
 148static bool __is_back_mergeable(struct extent_info *cur,
 149		struct extent_info *back, enum extent_type type)
 150{
 151	return __is_extent_mergeable(back, cur, type);
 152}
 153
 154static bool __is_front_mergeable(struct extent_info *cur,
 155		struct extent_info *front, enum extent_type type)
 156{
 157	return __is_extent_mergeable(cur, front, type);
 158}
 159
 160static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
 161			struct extent_node *cached_en, unsigned int fofs)
 162{
 163	struct rb_node *node = root->rb_root.rb_node;
 164	struct extent_node *en;
 165
 166	/* check a cached entry */
 167	if (cached_en && cached_en->ei.fofs <= fofs &&
 168			cached_en->ei.fofs + cached_en->ei.len > fofs)
 169		return cached_en;
 170
 171	/* check rb_tree */
 172	while (node) {
 173		en = rb_entry(node, struct extent_node, rb_node);
 174
 175		if (fofs < en->ei.fofs)
 176			node = node->rb_left;
 177		else if (fofs >= en->ei.fofs + en->ei.len)
 178			node = node->rb_right;
 179		else
 180			return en;
 181	}
 182	return NULL;
 183}
 184
 185/*
 186 * lookup rb entry in position of @fofs in rb-tree,
 187 * if hit, return the entry, otherwise, return NULL
 188 * @prev_ex: extent before fofs
 189 * @next_ex: extent after fofs
 190 * @insert_p: insert point for new extent at fofs
 191 * in order to simplify the insertion after.
 192 * tree must stay unchanged between lookup and insertion.
 193 */
 194static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
 195				struct extent_node *cached_en,
 196				unsigned int fofs,
 197				struct extent_node **prev_entry,
 198				struct extent_node **next_entry,
 199				struct rb_node ***insert_p,
 200				struct rb_node **insert_parent,
 201				bool *leftmost)
 202{
 203	struct rb_node **pnode = &root->rb_root.rb_node;
 204	struct rb_node *parent = NULL, *tmp_node;
 205	struct extent_node *en = cached_en;
 206
 207	*insert_p = NULL;
 208	*insert_parent = NULL;
 209	*prev_entry = NULL;
 210	*next_entry = NULL;
 211
 212	if (RB_EMPTY_ROOT(&root->rb_root))
 213		return NULL;
 214
 215	if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
 216		goto lookup_neighbors;
 217
 218	*leftmost = true;
 219
 220	while (*pnode) {
 221		parent = *pnode;
 222		en = rb_entry(*pnode, struct extent_node, rb_node);
 223
 224		if (fofs < en->ei.fofs) {
 225			pnode = &(*pnode)->rb_left;
 226		} else if (fofs >= en->ei.fofs + en->ei.len) {
 227			pnode = &(*pnode)->rb_right;
 228			*leftmost = false;
 229		} else {
 230			goto lookup_neighbors;
 231		}
 232	}
 233
 234	*insert_p = pnode;
 235	*insert_parent = parent;
 236
 237	en = rb_entry(parent, struct extent_node, rb_node);
 238	tmp_node = parent;
 239	if (parent && fofs > en->ei.fofs)
 240		tmp_node = rb_next(parent);
 241	*next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 242
 243	tmp_node = parent;
 244	if (parent && fofs < en->ei.fofs)
 245		tmp_node = rb_prev(parent);
 246	*prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 247	return NULL;
 248
 249lookup_neighbors:
 250	if (fofs == en->ei.fofs) {
 251		/* lookup prev node for merging backward later */
 252		tmp_node = rb_prev(&en->rb_node);
 253		*prev_entry = rb_entry_safe(tmp_node,
 254					struct extent_node, rb_node);
 255	}
 256	if (fofs == en->ei.fofs + en->ei.len - 1) {
 257		/* lookup next node for merging frontward later */
 258		tmp_node = rb_next(&en->rb_node);
 259		*next_entry = rb_entry_safe(tmp_node,
 260					struct extent_node, rb_node);
 261	}
 262	return en;
 263}
 264
 265static struct kmem_cache *extent_tree_slab;
 266static struct kmem_cache *extent_node_slab;
 267
 268static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 269				struct extent_tree *et, struct extent_info *ei,
 270				struct rb_node *parent, struct rb_node **p,
 271				bool leftmost)
 272{
 273	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 274	struct extent_node *en;
 275
 276	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
 277	if (!en)
 278		return NULL;
 279
 280	en->ei = *ei;
 281	INIT_LIST_HEAD(&en->list);
 282	en->et = et;
 283
 284	rb_link_node(&en->rb_node, parent, p);
 285	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
 286	atomic_inc(&et->node_cnt);
 287	atomic_inc(&eti->total_ext_node);
 288	return en;
 289}
 290
 291static void __detach_extent_node(struct f2fs_sb_info *sbi,
 292				struct extent_tree *et, struct extent_node *en)
 293{
 294	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 295
 296	rb_erase_cached(&en->rb_node, &et->root);
 297	atomic_dec(&et->node_cnt);
 298	atomic_dec(&eti->total_ext_node);
 299
 300	if (et->cached_en == en)
 301		et->cached_en = NULL;
 302	kmem_cache_free(extent_node_slab, en);
 303}
 304
 305/*
 306 * Flow to release an extent_node:
 307 * 1. list_del_init
 308 * 2. __detach_extent_node
 309 * 3. kmem_cache_free.
 310 */
 311static void __release_extent_node(struct f2fs_sb_info *sbi,
 312			struct extent_tree *et, struct extent_node *en)
 313{
 314	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 315
 316	spin_lock(&eti->extent_lock);
 317	f2fs_bug_on(sbi, list_empty(&en->list));
 318	list_del_init(&en->list);
 319	spin_unlock(&eti->extent_lock);
 320
 321	__detach_extent_node(sbi, et, en);
 322}
 323
 324static struct extent_tree *__grab_extent_tree(struct inode *inode,
 325						enum extent_type type)
 326{
 327	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 328	struct extent_tree_info *eti = &sbi->extent_tree[type];
 329	struct extent_tree *et;
 330	nid_t ino = inode->i_ino;
 331
 332	mutex_lock(&eti->extent_tree_lock);
 333	et = radix_tree_lookup(&eti->extent_tree_root, ino);
 334	if (!et) {
 335		et = f2fs_kmem_cache_alloc(extent_tree_slab,
 336					GFP_NOFS, true, NULL);
 337		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
 338		memset(et, 0, sizeof(struct extent_tree));
 339		et->ino = ino;
 340		et->type = type;
 341		et->root = RB_ROOT_CACHED;
 342		et->cached_en = NULL;
 343		rwlock_init(&et->lock);
 344		INIT_LIST_HEAD(&et->list);
 345		atomic_set(&et->node_cnt, 0);
 346		atomic_inc(&eti->total_ext_tree);
 347	} else {
 348		atomic_dec(&eti->total_zombie_tree);
 349		list_del_init(&et->list);
 350	}
 351	mutex_unlock(&eti->extent_tree_lock);
 352
 353	/* never died until evict_inode */
 354	F2FS_I(inode)->extent_tree[type] = et;
 355
 356	return et;
 357}
 358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 359static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
 360					struct extent_tree *et)
 361{
 362	struct rb_node *node, *next;
 363	struct extent_node *en;
 364	unsigned int count = atomic_read(&et->node_cnt);
 365
 366	node = rb_first_cached(&et->root);
 367	while (node) {
 368		next = rb_next(node);
 369		en = rb_entry(node, struct extent_node, rb_node);
 370		__release_extent_node(sbi, et, en);
 371		node = next;
 372	}
 373
 374	return count - atomic_read(&et->node_cnt);
 375}
 376
 377static void __drop_largest_extent(struct extent_tree *et,
 378					pgoff_t fofs, unsigned int len)
 379{
 380	if (fofs < et->largest.fofs + et->largest.len &&
 381			fofs + len > et->largest.fofs) {
 382		et->largest.len = 0;
 383		et->largest_updated = true;
 
 384	}
 385}
 386
 387void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
 
 388{
 389	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 390	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
 391	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
 392	struct extent_tree *et;
 393	struct extent_node *en;
 394	struct extent_info ei;
 395
 396	if (!__may_extent_tree(inode, EX_READ)) {
 397		/* drop largest read extent */
 398		if (i_ext && i_ext->len) {
 399			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 400			i_ext->len = 0;
 401			set_page_dirty(ipage);
 402		}
 403		goto out;
 404	}
 405
 406	et = __grab_extent_tree(inode, EX_READ);
 407
 408	if (!i_ext || !i_ext->len)
 409		goto out;
 410
 411	get_read_extent_info(&ei, i_ext);
 412
 413	write_lock(&et->lock);
 414	if (atomic_read(&et->node_cnt))
 415		goto unlock_out;
 416
 417	en = __attach_extent_node(sbi, et, &ei, NULL,
 418				&et->root.rb_root.rb_node, true);
 419	if (en) {
 420		et->largest = en->ei;
 421		et->cached_en = en;
 422
 423		spin_lock(&eti->extent_lock);
 424		list_add_tail(&en->list, &eti->extent_list);
 425		spin_unlock(&eti->extent_lock);
 426	}
 427unlock_out:
 428	write_unlock(&et->lock);
 429out:
 430	if (!F2FS_I(inode)->extent_tree[EX_READ])
 431		set_inode_flag(inode, FI_NO_EXTENT);
 432}
 433
 434void f2fs_init_age_extent_tree(struct inode *inode)
 435{
 436	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
 437		return;
 438	__grab_extent_tree(inode, EX_BLOCK_AGE);
 439}
 440
 441void f2fs_init_extent_tree(struct inode *inode)
 442{
 443	/* initialize read cache */
 444	if (__init_may_extent_tree(inode, EX_READ))
 445		__grab_extent_tree(inode, EX_READ);
 446
 447	/* initialize block age cache */
 448	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
 449		__grab_extent_tree(inode, EX_BLOCK_AGE);
 450}
 451
 452static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
 453			struct extent_info *ei, enum extent_type type)
 454{
 455	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 456	struct extent_tree_info *eti = &sbi->extent_tree[type];
 457	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 458	struct extent_node *en;
 459	bool ret = false;
 460
 461	if (!et)
 462		return false;
 463
 464	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
 465
 466	read_lock(&et->lock);
 467
 468	if (type == EX_READ &&
 469			et->largest.fofs <= pgofs &&
 470			et->largest.fofs + et->largest.len > pgofs) {
 471		*ei = et->largest;
 472		ret = true;
 473		stat_inc_largest_node_hit(sbi);
 474		goto out;
 475	}
 476
 477	en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
 478	if (!en)
 479		goto out;
 480
 481	if (en == et->cached_en)
 482		stat_inc_cached_node_hit(sbi, type);
 483	else
 484		stat_inc_rbtree_node_hit(sbi, type);
 485
 486	*ei = en->ei;
 487	spin_lock(&eti->extent_lock);
 488	if (!list_empty(&en->list)) {
 489		list_move_tail(&en->list, &eti->extent_list);
 490		et->cached_en = en;
 491	}
 492	spin_unlock(&eti->extent_lock);
 493	ret = true;
 494out:
 495	stat_inc_total_hit(sbi, type);
 496	read_unlock(&et->lock);
 497
 498	if (type == EX_READ)
 499		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
 500	else if (type == EX_BLOCK_AGE)
 501		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
 502	return ret;
 503}
 504
 505static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 506				struct extent_tree *et, struct extent_info *ei,
 507				struct extent_node *prev_ex,
 508				struct extent_node *next_ex)
 509{
 510	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 511	struct extent_node *en = NULL;
 512
 513	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
 514		prev_ex->ei.len += ei->len;
 515		ei = &prev_ex->ei;
 516		en = prev_ex;
 517	}
 518
 519	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
 520		next_ex->ei.fofs = ei->fofs;
 
 521		next_ex->ei.len += ei->len;
 522		if (et->type == EX_READ)
 523			next_ex->ei.blk = ei->blk;
 524		if (en)
 525			__release_extent_node(sbi, et, prev_ex);
 526
 527		en = next_ex;
 528	}
 529
 530	if (!en)
 531		return NULL;
 532
 533	__try_update_largest_extent(et, en);
 534
 535	spin_lock(&eti->extent_lock);
 536	if (!list_empty(&en->list)) {
 537		list_move_tail(&en->list, &eti->extent_list);
 538		et->cached_en = en;
 539	}
 540	spin_unlock(&eti->extent_lock);
 541	return en;
 542}
 543
 544static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
 545				struct extent_tree *et, struct extent_info *ei,
 546				struct rb_node **insert_p,
 547				struct rb_node *insert_parent,
 548				bool leftmost)
 549{
 550	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 551	struct rb_node **p = &et->root.rb_root.rb_node;
 552	struct rb_node *parent = NULL;
 553	struct extent_node *en = NULL;
 554
 555	if (insert_p && insert_parent) {
 556		parent = insert_parent;
 557		p = insert_p;
 558		goto do_insert;
 559	}
 560
 561	leftmost = true;
 562
 563	/* look up extent_node in the rb tree */
 564	while (*p) {
 565		parent = *p;
 566		en = rb_entry(parent, struct extent_node, rb_node);
 567
 568		if (ei->fofs < en->ei.fofs) {
 569			p = &(*p)->rb_left;
 570		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
 571			p = &(*p)->rb_right;
 572			leftmost = false;
 573		} else {
 574			f2fs_bug_on(sbi, 1);
 575		}
 576	}
 577
 578do_insert:
 579	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
 580	if (!en)
 581		return NULL;
 582
 583	__try_update_largest_extent(et, en);
 584
 585	/* update in global extent list */
 586	spin_lock(&eti->extent_lock);
 587	list_add_tail(&en->list, &eti->extent_list);
 588	et->cached_en = en;
 589	spin_unlock(&eti->extent_lock);
 590	return en;
 591}
 592
 593static void __update_extent_tree_range(struct inode *inode,
 594			struct extent_info *tei, enum extent_type type)
 595{
 596	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 597	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 598	struct extent_node *en = NULL, *en1 = NULL;
 599	struct extent_node *prev_en = NULL, *next_en = NULL;
 600	struct extent_info ei, dei, prev;
 601	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 602	unsigned int fofs = tei->fofs, len = tei->len;
 603	unsigned int end = fofs + len;
 604	bool updated = false;
 605	bool leftmost = false;
 606
 607	if (!et)
 608		return;
 609
 610	if (type == EX_READ)
 611		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
 612						tei->blk, 0);
 613	else if (type == EX_BLOCK_AGE)
 614		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
 615						tei->age, tei->last_blocks);
 616
 617	write_lock(&et->lock);
 618
 619	if (type == EX_READ) {
 620		if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
 621			write_unlock(&et->lock);
 622			return;
 623		}
 624
 625		prev = et->largest;
 626		dei.len = 0;
 627
 628		/*
 629		 * drop largest extent before lookup, in case it's already
 630		 * been shrunk from extent tree
 631		 */
 632		__drop_largest_extent(et, fofs, len);
 633	}
 634
 635	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
 636	en = __lookup_extent_node_ret(&et->root,
 637					et->cached_en, fofs,
 638					&prev_en, &next_en,
 639					&insert_p, &insert_parent,
 640					&leftmost);
 641	if (!en)
 642		en = next_en;
 643
 644	/* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
 645	while (en && en->ei.fofs < end) {
 646		unsigned int org_end;
 647		int parts = 0;	/* # of parts current extent split into */
 648
 649		next_en = en1 = NULL;
 650
 651		dei = en->ei;
 652		org_end = dei.fofs + dei.len;
 653		f2fs_bug_on(sbi, fofs >= org_end);
 654
 655		if (fofs > dei.fofs && (type != EX_READ ||
 656				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
 657			en->ei.len = fofs - en->ei.fofs;
 658			prev_en = en;
 659			parts = 1;
 660		}
 661
 662		if (end < org_end && (type != EX_READ ||
 663				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
 664			if (parts) {
 665				__set_extent_info(&ei,
 666					end, org_end - end,
 667					end - dei.fofs + dei.blk, false,
 668					dei.age, dei.last_blocks,
 669					type);
 670				en1 = __insert_extent_tree(sbi, et, &ei,
 671							NULL, NULL, true);
 672				next_en = en1;
 673			} else {
 674				__set_extent_info(&en->ei,
 675					end, en->ei.len - (end - dei.fofs),
 676					en->ei.blk + (end - dei.fofs), true,
 677					dei.age, dei.last_blocks,
 678					type);
 679				next_en = en;
 680			}
 681			parts++;
 682		}
 683
 684		if (!next_en) {
 685			struct rb_node *node = rb_next(&en->rb_node);
 686
 687			next_en = rb_entry_safe(node, struct extent_node,
 688						rb_node);
 
 689		}
 690
 691		if (parts)
 692			__try_update_largest_extent(et, en);
 693		else
 694			__release_extent_node(sbi, et, en);
 695
 696		/*
 697		 * if original extent is split into zero or two parts, extent
 698		 * tree has been altered by deletion or insertion, therefore
 699		 * invalidate pointers regard to tree.
 700		 */
 701		if (parts != 1) {
 702			insert_p = NULL;
 703			insert_parent = NULL;
 704		}
 705		en = next_en;
 706	}
 707
 708	if (type == EX_BLOCK_AGE)
 709		goto update_age_extent_cache;
 710
 711	/* 3. update extent in read extent cache */
 712	BUG_ON(type != EX_READ);
 713
 714	if (tei->blk) {
 715		__set_extent_info(&ei, fofs, len, tei->blk, false,
 716				  0, 0, EX_READ);
 717		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 718			__insert_extent_tree(sbi, et, &ei,
 719					insert_p, insert_parent, leftmost);
 720
 721		/* give up extent_cache, if split and small updates happen */
 722		if (dei.len >= 1 &&
 723				prev.len < F2FS_MIN_EXTENT_LEN &&
 724				et->largest.len < F2FS_MIN_EXTENT_LEN) {
 725			et->largest.len = 0;
 726			et->largest_updated = true;
 727			set_inode_flag(inode, FI_NO_EXTENT);
 728		}
 729	}
 730
 731	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 732		__free_extent_tree(sbi, et);
 733
 734	if (et->largest_updated) {
 735		et->largest_updated = false;
 736		updated = true;
 737	}
 738	goto out_read_extent_cache;
 739update_age_extent_cache:
 740	if (!tei->last_blocks)
 741		goto out_read_extent_cache;
 742
 743	__set_extent_info(&ei, fofs, len, 0, false,
 744			tei->age, tei->last_blocks, EX_BLOCK_AGE);
 745	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 746		__insert_extent_tree(sbi, et, &ei,
 747					insert_p, insert_parent, leftmost);
 748out_read_extent_cache:
 749	write_unlock(&et->lock);
 750
 751	if (updated)
 752		f2fs_mark_inode_dirty_sync(inode, true);
 753}
 754
 755#ifdef CONFIG_F2FS_FS_COMPRESSION
 756void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
 757				pgoff_t fofs, block_t blkaddr, unsigned int llen,
 758				unsigned int c_len)
 759{
 760	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 761	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
 762	struct extent_node *en = NULL;
 763	struct extent_node *prev_en = NULL, *next_en = NULL;
 764	struct extent_info ei;
 765	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 766	bool leftmost = false;
 767
 768	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
 769						blkaddr, c_len);
 770
 771	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
 772	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 773		return;
 774
 775	write_lock(&et->lock);
 776
 777	en = __lookup_extent_node_ret(&et->root,
 778					et->cached_en, fofs,
 779					&prev_en, &next_en,
 780					&insert_p, &insert_parent,
 781					&leftmost);
 782	if (en)
 783		goto unlock_out;
 784
 785	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
 786	ei.c_len = c_len;
 787
 788	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 789		__insert_extent_tree(sbi, et, &ei,
 790				insert_p, insert_parent, leftmost);
 791unlock_out:
 792	write_unlock(&et->lock);
 793}
 794#endif
 795
 796static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
 797						unsigned long long new,
 798						unsigned long long old)
 799{
 800	unsigned int rem_old, rem_new;
 801	unsigned long long res;
 802	unsigned int weight = sbi->last_age_weight;
 803
 804	res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
 805		+ div_u64_rem(old, 100, &rem_old) * weight;
 806
 807	if (rem_new)
 808		res += rem_new * (100 - weight) / 100;
 809	if (rem_old)
 810		res += rem_old * weight / 100;
 811
 812	return res;
 813}
 814
 815/* This returns a new age and allocated blocks in ei */
 816static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
 817						block_t blkaddr)
 818{
 819	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 820	loff_t f_size = i_size_read(inode);
 821	unsigned long long cur_blocks =
 822				atomic64_read(&sbi->allocated_data_blocks);
 823	struct extent_info tei = *ei;	/* only fofs and len are valid */
 824
 825	/*
 826	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
 827	 * file block even in seq write. So don't record age for newly last file
 828	 * block here.
 829	 */
 830	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
 831			blkaddr == NEW_ADDR)
 832		return -EINVAL;
 833
 834	if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
 835		unsigned long long cur_age;
 836
 837		if (cur_blocks >= tei.last_blocks)
 838			cur_age = cur_blocks - tei.last_blocks;
 839		else
 840			/* allocated_data_blocks overflow */
 841			cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
 842
 843		if (tei.age)
 844			ei->age = __calculate_block_age(sbi, cur_age, tei.age);
 845		else
 846			ei->age = cur_age;
 847		ei->last_blocks = cur_blocks;
 848		WARN_ON(ei->age > cur_blocks);
 849		return 0;
 850	}
 851
 852	f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
 853
 854	/* the data block was allocated for the first time */
 855	if (blkaddr == NEW_ADDR)
 856		goto out;
 857
 858	if (__is_valid_data_blkaddr(blkaddr) &&
 859	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
 860		f2fs_bug_on(sbi, 1);
 861		return -EINVAL;
 862	}
 863out:
 864	/*
 865	 * init block age with zero, this can happen when the block age extent
 866	 * was reclaimed due to memory constraint or system reboot
 867	 */
 868	ei->age = 0;
 869	ei->last_blocks = cur_blocks;
 870	return 0;
 871}
 872
 873static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
 874{
 875	struct extent_info ei = {};
 876
 877	if (!__may_extent_tree(dn->inode, type))
 878		return;
 879
 880	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 881								dn->ofs_in_node;
 882	ei.len = 1;
 883
 884	if (type == EX_READ) {
 885		if (dn->data_blkaddr == NEW_ADDR)
 886			ei.blk = NULL_ADDR;
 887		else
 888			ei.blk = dn->data_blkaddr;
 889	} else if (type == EX_BLOCK_AGE) {
 890		if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
 891			return;
 892	}
 893	__update_extent_tree_range(dn->inode, &ei, type);
 894}
 895
 896static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
 897					enum extent_type type)
 898{
 899	struct extent_tree_info *eti = &sbi->extent_tree[type];
 900	struct extent_tree *et, *next;
 901	struct extent_node *en;
 902	unsigned int node_cnt = 0, tree_cnt = 0;
 903	int remained;
 904
 905	if (!atomic_read(&eti->total_zombie_tree))
 
 
 
 906		goto free_node;
 907
 908	if (!mutex_trylock(&eti->extent_tree_lock))
 909		goto out;
 910
 911	/* 1. remove unreferenced extent tree */
 912	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
 913		if (atomic_read(&et->node_cnt)) {
 914			write_lock(&et->lock);
 915			node_cnt += __free_extent_tree(sbi, et);
 916			write_unlock(&et->lock);
 917		}
 918		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
 919		list_del_init(&et->list);
 920		radix_tree_delete(&eti->extent_tree_root, et->ino);
 921		kmem_cache_free(extent_tree_slab, et);
 922		atomic_dec(&eti->total_ext_tree);
 923		atomic_dec(&eti->total_zombie_tree);
 924		tree_cnt++;
 925
 926		if (node_cnt + tree_cnt >= nr_shrink)
 927			goto unlock_out;
 928		cond_resched();
 929	}
 930	mutex_unlock(&eti->extent_tree_lock);
 931
 932free_node:
 933	/* 2. remove LRU extent entries */
 934	if (!mutex_trylock(&eti->extent_tree_lock))
 935		goto out;
 936
 937	remained = nr_shrink - (node_cnt + tree_cnt);
 938
 939	spin_lock(&eti->extent_lock);
 940	for (; remained > 0; remained--) {
 941		if (list_empty(&eti->extent_list))
 942			break;
 943		en = list_first_entry(&eti->extent_list,
 944					struct extent_node, list);
 945		et = en->et;
 946		if (!write_trylock(&et->lock)) {
 947			/* refresh this extent node's position in extent list */
 948			list_move_tail(&en->list, &eti->extent_list);
 949			continue;
 950		}
 951
 952		list_del_init(&en->list);
 953		spin_unlock(&eti->extent_lock);
 954
 955		__detach_extent_node(sbi, et, en);
 956
 957		write_unlock(&et->lock);
 958		node_cnt++;
 959		spin_lock(&eti->extent_lock);
 960	}
 961	spin_unlock(&eti->extent_lock);
 962
 963unlock_out:
 964	mutex_unlock(&eti->extent_tree_lock);
 965out:
 966	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
 967
 968	return node_cnt + tree_cnt;
 969}
 970
 971/* read extent cache operations */
 972bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
 973				struct extent_info *ei)
 974{
 975	if (!__may_extent_tree(inode, EX_READ))
 976		return false;
 977
 978	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
 979}
 980
 981bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
 982				block_t *blkaddr)
 983{
 984	struct extent_info ei = {};
 985
 986	if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
 987		return false;
 988	*blkaddr = ei.blk + index - ei.fofs;
 989	return true;
 990}
 991
 992void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
 993{
 994	return __update_extent_cache(dn, EX_READ);
 995}
 996
 997void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
 998				pgoff_t fofs, block_t blkaddr, unsigned int len)
 999{
1000	struct extent_info ei = {
1001		.fofs = fofs,
1002		.len = len,
1003		.blk = blkaddr,
1004	};
1005
1006	if (!__may_extent_tree(dn->inode, EX_READ))
1007		return;
1008
1009	__update_extent_tree_range(dn->inode, &ei, EX_READ);
1010}
1011
1012unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1013{
1014	if (!test_opt(sbi, READ_EXTENT_CACHE))
1015		return 0;
1016
1017	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1018}
1019
1020/* block age extent cache operations */
1021bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1022				struct extent_info *ei)
1023{
1024	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1025		return false;
1026
1027	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1028}
1029
1030void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1031{
1032	return __update_extent_cache(dn, EX_BLOCK_AGE);
1033}
1034
1035void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1036				pgoff_t fofs, unsigned int len)
1037{
1038	struct extent_info ei = {
1039		.fofs = fofs,
1040		.len = len,
1041	};
1042
1043	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1044		return;
1045
1046	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1047}
1048
1049unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1050{
1051	if (!test_opt(sbi, AGE_EXTENT_CACHE))
1052		return 0;
1053
1054	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1055}
1056
1057static unsigned int __destroy_extent_node(struct inode *inode,
1058					enum extent_type type)
1059{
1060	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1061	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1062	unsigned int node_cnt = 0;
1063
1064	if (!et || !atomic_read(&et->node_cnt))
1065		return 0;
1066
1067	write_lock(&et->lock);
1068	node_cnt = __free_extent_tree(sbi, et);
1069	write_unlock(&et->lock);
1070
1071	return node_cnt;
1072}
1073
1074void f2fs_destroy_extent_node(struct inode *inode)
1075{
1076	__destroy_extent_node(inode, EX_READ);
1077	__destroy_extent_node(inode, EX_BLOCK_AGE);
1078}
1079
1080static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1081{
1082	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1083	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1084	bool updated = false;
1085
1086	if (!__may_extent_tree(inode, type))
1087		return;
1088
1089	write_lock(&et->lock);
1090	__free_extent_tree(sbi, et);
1091	if (type == EX_READ) {
1092		set_inode_flag(inode, FI_NO_EXTENT);
1093		if (et->largest.len) {
1094			et->largest.len = 0;
1095			updated = true;
1096		}
1097	}
1098	write_unlock(&et->lock);
1099	if (updated)
1100		f2fs_mark_inode_dirty_sync(inode, true);
1101}
1102
1103void f2fs_drop_extent_tree(struct inode *inode)
1104{
1105	__drop_extent_tree(inode, EX_READ);
1106	__drop_extent_tree(inode, EX_BLOCK_AGE);
1107}
1108
1109static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1110{
1111	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1112	struct extent_tree_info *eti = &sbi->extent_tree[type];
1113	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1114	unsigned int node_cnt = 0;
1115
1116	if (!et)
1117		return;
1118
1119	if (inode->i_nlink && !is_bad_inode(inode) &&
1120					atomic_read(&et->node_cnt)) {
1121		mutex_lock(&eti->extent_tree_lock);
1122		list_add_tail(&et->list, &eti->zombie_list);
1123		atomic_inc(&eti->total_zombie_tree);
1124		mutex_unlock(&eti->extent_tree_lock);
1125		return;
1126	}
1127
1128	/* free all extent info belong to this extent tree */
1129	node_cnt = __destroy_extent_node(inode, type);
1130
1131	/* delete extent tree entry in radix tree */
1132	mutex_lock(&eti->extent_tree_lock);
1133	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1134	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1135	kmem_cache_free(extent_tree_slab, et);
1136	atomic_dec(&eti->total_ext_tree);
1137	mutex_unlock(&eti->extent_tree_lock);
1138
1139	F2FS_I(inode)->extent_tree[type] = NULL;
1140
1141	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1142}
1143
1144void f2fs_destroy_extent_tree(struct inode *inode)
 
1145{
1146	__destroy_extent_tree(inode, EX_READ);
1147	__destroy_extent_tree(inode, EX_BLOCK_AGE);
 
 
1148}
1149
1150static void __init_extent_tree_info(struct extent_tree_info *eti)
1151{
1152	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1153	mutex_init(&eti->extent_tree_lock);
1154	INIT_LIST_HEAD(&eti->extent_list);
1155	spin_lock_init(&eti->extent_lock);
1156	atomic_set(&eti->total_ext_tree, 0);
1157	INIT_LIST_HEAD(&eti->zombie_list);
1158	atomic_set(&eti->total_zombie_tree, 0);
1159	atomic_set(&eti->total_ext_node, 0);
 
 
 
 
 
 
1160}
1161
1162void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
 
 
1163{
1164	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1165	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1166
1167	/* initialize for block age extents */
1168	atomic64_set(&sbi->allocated_data_blocks, 0);
1169	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1170	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1171	sbi->last_age_weight = LAST_AGE_WEIGHT;
 
 
 
 
 
 
 
 
1172}
1173
1174int __init f2fs_create_extent_cache(void)
1175{
1176	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1177			sizeof(struct extent_tree));
1178	if (!extent_tree_slab)
1179		return -ENOMEM;
1180	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1181			sizeof(struct extent_node));
1182	if (!extent_node_slab) {
1183		kmem_cache_destroy(extent_tree_slab);
1184		return -ENOMEM;
1185	}
1186	return 0;
1187}
1188
1189void f2fs_destroy_extent_cache(void)
1190{
1191	kmem_cache_destroy(extent_node_slab);
1192	kmem_cache_destroy(extent_tree_slab);
1193}