Linux Audio

Check our new training course

Loading...
v4.6
 
  1/*
  2 * f2fs extent cache support
  3 *
  4 * Copyright (c) 2015 Motorola Mobility
  5 * Copyright (c) 2015 Samsung Electronics
  6 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
  7 *          Chao Yu <chao2.yu@samsung.com>
  8 *
  9 * This program is free software; you can redistribute it and/or modify
 10 * it under the terms of the GNU General Public License version 2 as
 11 * published by the Free Software Foundation.
 12 */
 13
 14#include <linux/fs.h>
 15#include <linux/f2fs_fs.h>
 16
 17#include "f2fs.h"
 18#include "node.h"
 19#include <trace/events/f2fs.h>
 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 21static struct kmem_cache *extent_tree_slab;
 22static struct kmem_cache *extent_node_slab;
 23
 24static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 25				struct extent_tree *et, struct extent_info *ei,
 26				struct rb_node *parent, struct rb_node **p)
 
 27{
 
 28	struct extent_node *en;
 29
 30	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
 31	if (!en)
 32		return NULL;
 33
 34	en->ei = *ei;
 35	INIT_LIST_HEAD(&en->list);
 36	en->et = et;
 37
 38	rb_link_node(&en->rb_node, parent, p);
 39	rb_insert_color(&en->rb_node, &et->root);
 40	atomic_inc(&et->node_cnt);
 41	atomic_inc(&sbi->total_ext_node);
 42	return en;
 43}
 44
 45static void __detach_extent_node(struct f2fs_sb_info *sbi,
 46				struct extent_tree *et, struct extent_node *en)
 47{
 48	rb_erase(&en->rb_node, &et->root);
 
 
 49	atomic_dec(&et->node_cnt);
 50	atomic_dec(&sbi->total_ext_node);
 51
 52	if (et->cached_en == en)
 53		et->cached_en = NULL;
 54	kmem_cache_free(extent_node_slab, en);
 55}
 56
 57/*
 58 * Flow to release an extent_node:
 59 * 1. list_del_init
 60 * 2. __detach_extent_node
 61 * 3. kmem_cache_free.
 62 */
 63static void __release_extent_node(struct f2fs_sb_info *sbi,
 64			struct extent_tree *et, struct extent_node *en)
 65{
 66	spin_lock(&sbi->extent_lock);
 
 
 67	f2fs_bug_on(sbi, list_empty(&en->list));
 68	list_del_init(&en->list);
 69	spin_unlock(&sbi->extent_lock);
 70
 71	__detach_extent_node(sbi, et, en);
 72}
 73
 74static struct extent_tree *__grab_extent_tree(struct inode *inode)
 
 75{
 76	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
 77	struct extent_tree *et;
 78	nid_t ino = inode->i_ino;
 79
 80	down_write(&sbi->extent_tree_lock);
 81	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
 82	if (!et) {
 83		et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
 84		f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
 
 85		memset(et, 0, sizeof(struct extent_tree));
 86		et->ino = ino;
 87		et->root = RB_ROOT;
 
 88		et->cached_en = NULL;
 89		rwlock_init(&et->lock);
 90		INIT_LIST_HEAD(&et->list);
 91		atomic_set(&et->node_cnt, 0);
 92		atomic_inc(&sbi->total_ext_tree);
 93	} else {
 94		atomic_dec(&sbi->total_zombie_tree);
 95		list_del_init(&et->list);
 96	}
 97	up_write(&sbi->extent_tree_lock);
 98
 99	/* never died until evict_inode */
100	F2FS_I(inode)->extent_tree = et;
101
102	return et;
103}
104
105static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
106				struct extent_tree *et, unsigned int fofs)
107{
108	struct rb_node *node = et->root.rb_node;
109	struct extent_node *en = et->cached_en;
110
111	if (en) {
112		struct extent_info *cei = &en->ei;
113
114		if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
115			stat_inc_cached_node_hit(sbi);
116			return en;
117		}
118	}
119
120	while (node) {
121		en = rb_entry(node, struct extent_node, rb_node);
122
123		if (fofs < en->ei.fofs) {
124			node = node->rb_left;
125		} else if (fofs >= en->ei.fofs + en->ei.len) {
126			node = node->rb_right;
127		} else {
128			stat_inc_rbtree_node_hit(sbi);
129			return en;
130		}
131	}
132	return NULL;
133}
134
135static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
136				struct extent_tree *et, struct extent_info *ei)
137{
138	struct rb_node **p = &et->root.rb_node;
139	struct extent_node *en;
140
141	en = __attach_extent_node(sbi, et, ei, NULL, p);
142	if (!en)
143		return NULL;
144
145	et->largest = en->ei;
146	et->cached_en = en;
147	return en;
148}
149
150static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
151					struct extent_tree *et)
152{
153	struct rb_node *node, *next;
154	struct extent_node *en;
155	unsigned int count = atomic_read(&et->node_cnt);
156
157	node = rb_first(&et->root);
158	while (node) {
159		next = rb_next(node);
160		en = rb_entry(node, struct extent_node, rb_node);
161		__release_extent_node(sbi, et, en);
162		node = next;
163	}
164
165	return count - atomic_read(&et->node_cnt);
166}
167
168static void __drop_largest_extent(struct inode *inode,
169					pgoff_t fofs, unsigned int len)
170{
171	struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
172
173	if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs)
174		largest->len = 0;
 
175}
176
177/* return true, if inode page is changed */
178bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
179{
180	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
 
181	struct extent_tree *et;
182	struct extent_node *en;
183	struct extent_info ei;
184
185	if (!f2fs_may_extent_tree(inode)) {
186		/* drop largest extent */
187		if (i_ext && i_ext->len) {
 
188			i_ext->len = 0;
189			return true;
190		}
191		return false;
192	}
193
194	et = __grab_extent_tree(inode);
195
196	if (!i_ext || !i_ext->len)
197		return false;
198
199	set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
200		le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
201
202	write_lock(&et->lock);
203	if (atomic_read(&et->node_cnt))
204		goto out;
205
206	en = __init_extent_tree(sbi, et, &ei);
 
207	if (en) {
208		spin_lock(&sbi->extent_lock);
209		list_add_tail(&en->list, &sbi->extent_list);
210		spin_unlock(&sbi->extent_lock);
 
 
 
211	}
212out:
213	write_unlock(&et->lock);
214	return false;
 
 
215}
216
217static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
218							struct extent_info *ei)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219{
220	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
221	struct extent_tree *et = F2FS_I(inode)->extent_tree;
 
222	struct extent_node *en;
223	bool ret = false;
224
225	f2fs_bug_on(sbi, !et);
 
226
227	trace_f2fs_lookup_extent_tree_start(inode, pgofs);
228
229	read_lock(&et->lock);
230
231	if (et->largest.fofs <= pgofs &&
 
232			et->largest.fofs + et->largest.len > pgofs) {
233		*ei = et->largest;
234		ret = true;
235		stat_inc_largest_node_hit(sbi);
236		goto out;
237	}
238
239	en = __lookup_extent_tree(sbi, et, pgofs);
240	if (en) {
241		*ei = en->ei;
242		spin_lock(&sbi->extent_lock);
243		if (!list_empty(&en->list)) {
244			list_move_tail(&en->list, &sbi->extent_list);
245			et->cached_en = en;
246		}
247		spin_unlock(&sbi->extent_lock);
248		ret = true;
 
 
 
 
249	}
 
 
250out:
251	stat_inc_total_hit(sbi);
252	read_unlock(&et->lock);
253
254	trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
 
 
 
255	return ret;
256}
257
258
259/*
260 * lookup extent at @fofs, if hit, return the extent
261 * if not, return NULL and
262 * @prev_ex: extent before fofs
263 * @next_ex: extent after fofs
264 * @insert_p: insert point for new extent at fofs
265 * in order to simpfy the insertion after.
266 * tree must stay unchanged between lookup and insertion.
267 */
268static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
269				unsigned int fofs,
270				struct extent_node **prev_ex,
271				struct extent_node **next_ex,
272				struct rb_node ***insert_p,
273				struct rb_node **insert_parent)
274{
275	struct rb_node **pnode = &et->root.rb_node;
276	struct rb_node *parent = NULL, *tmp_node;
277	struct extent_node *en = et->cached_en;
278
279	*insert_p = NULL;
280	*insert_parent = NULL;
281	*prev_ex = NULL;
282	*next_ex = NULL;
283
284	if (RB_EMPTY_ROOT(&et->root))
285		return NULL;
286
287	if (en) {
288		struct extent_info *cei = &en->ei;
289
290		if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
291			goto lookup_neighbors;
292	}
293
294	while (*pnode) {
295		parent = *pnode;
296		en = rb_entry(*pnode, struct extent_node, rb_node);
297
298		if (fofs < en->ei.fofs)
299			pnode = &(*pnode)->rb_left;
300		else if (fofs >= en->ei.fofs + en->ei.len)
301			pnode = &(*pnode)->rb_right;
302		else
303			goto lookup_neighbors;
304	}
305
306	*insert_p = pnode;
307	*insert_parent = parent;
308
309	en = rb_entry(parent, struct extent_node, rb_node);
310	tmp_node = parent;
311	if (parent && fofs > en->ei.fofs)
312		tmp_node = rb_next(parent);
313	*next_ex = tmp_node ?
314		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
315
316	tmp_node = parent;
317	if (parent && fofs < en->ei.fofs)
318		tmp_node = rb_prev(parent);
319	*prev_ex = tmp_node ?
320		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
321	return NULL;
322
323lookup_neighbors:
324	if (fofs == en->ei.fofs) {
325		/* lookup prev node for merging backward later */
326		tmp_node = rb_prev(&en->rb_node);
327		*prev_ex = tmp_node ?
328			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
329	}
330	if (fofs == en->ei.fofs + en->ei.len - 1) {
331		/* lookup next node for merging frontward later */
332		tmp_node = rb_next(&en->rb_node);
333		*next_ex = tmp_node ?
334			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
335	}
336	return en;
337}
338
339static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
340				struct extent_tree *et, struct extent_info *ei,
341				struct extent_node *prev_ex,
342				struct extent_node *next_ex)
343{
 
344	struct extent_node *en = NULL;
345
346	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
347		prev_ex->ei.len += ei->len;
348		ei = &prev_ex->ei;
349		en = prev_ex;
350	}
351
352	if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
353		if (en)
354			__release_extent_node(sbi, et, prev_ex);
355		next_ex->ei.fofs = ei->fofs;
356		next_ex->ei.blk = ei->blk;
357		next_ex->ei.len += ei->len;
 
 
 
 
 
358		en = next_ex;
359	}
360
361	if (!en)
362		return NULL;
363
364	__try_update_largest_extent(et, en);
365
366	spin_lock(&sbi->extent_lock);
367	if (!list_empty(&en->list)) {
368		list_move_tail(&en->list, &sbi->extent_list);
369		et->cached_en = en;
370	}
371	spin_unlock(&sbi->extent_lock);
372	return en;
373}
374
375static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
376				struct extent_tree *et, struct extent_info *ei,
377				struct rb_node **insert_p,
378				struct rb_node *insert_parent)
 
379{
380	struct rb_node **p = &et->root.rb_node;
 
381	struct rb_node *parent = NULL;
382	struct extent_node *en = NULL;
383
384	if (insert_p && insert_parent) {
385		parent = insert_parent;
386		p = insert_p;
387		goto do_insert;
388	}
389
 
 
 
390	while (*p) {
391		parent = *p;
392		en = rb_entry(parent, struct extent_node, rb_node);
393
394		if (ei->fofs < en->ei.fofs)
395			p = &(*p)->rb_left;
396		else if (ei->fofs >= en->ei.fofs + en->ei.len)
397			p = &(*p)->rb_right;
398		else
 
399			f2fs_bug_on(sbi, 1);
 
400	}
 
401do_insert:
402	en = __attach_extent_node(sbi, et, ei, parent, p);
403	if (!en)
404		return NULL;
405
406	__try_update_largest_extent(et, en);
407
408	/* update in global extent list */
409	spin_lock(&sbi->extent_lock);
410	list_add_tail(&en->list, &sbi->extent_list);
411	et->cached_en = en;
412	spin_unlock(&sbi->extent_lock);
413	return en;
414}
415
416static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
417				pgoff_t fofs, block_t blkaddr, unsigned int len)
418{
419	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
420	struct extent_tree *et = F2FS_I(inode)->extent_tree;
421	struct extent_node *en = NULL, *en1 = NULL;
422	struct extent_node *prev_en = NULL, *next_en = NULL;
423	struct extent_info ei, dei, prev;
424	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 
425	unsigned int end = fofs + len;
426	unsigned int pos = (unsigned int)fofs;
 
427
428	if (!et)
429		return false;
430
431	trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
 
 
 
 
 
432
433	write_lock(&et->lock);
434
435	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
436		write_unlock(&et->lock);
437		return false;
438	}
 
439
440	prev = et->largest;
441	dei.len = 0;
442
443	/*
444	 * drop largest extent before lookup, in case it's already
445	 * been shrunk from extent tree
446	 */
447	__drop_largest_extent(inode, fofs, len);
 
448
449	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
450	en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
451					&insert_p, &insert_parent);
 
 
 
452	if (!en)
453		en = next_en;
454
455	/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
456	while (en && en->ei.fofs < end) {
457		unsigned int org_end;
458		int parts = 0;	/* # of parts current extent split into */
459
460		next_en = en1 = NULL;
461
462		dei = en->ei;
463		org_end = dei.fofs + dei.len;
464		f2fs_bug_on(sbi, pos >= org_end);
465
466		if (pos > dei.fofs &&	pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
467			en->ei.len = pos - en->ei.fofs;
 
468			prev_en = en;
469			parts = 1;
470		}
471
472		if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
 
473			if (parts) {
474				set_extent_info(&ei, end,
475						end - dei.fofs + dei.blk,
476						org_end - end);
 
 
477				en1 = __insert_extent_tree(sbi, et, &ei,
478							NULL, NULL);
479				next_en = en1;
480			} else {
481				en->ei.fofs = end;
482				en->ei.blk += end - dei.fofs;
483				en->ei.len -= end - dei.fofs;
 
 
484				next_en = en;
485			}
486			parts++;
487		}
488
489		if (!next_en) {
490			struct rb_node *node = rb_next(&en->rb_node);
491
492			next_en = node ?
493				rb_entry(node, struct extent_node, rb_node)
494				: NULL;
495		}
496
497		if (parts)
498			__try_update_largest_extent(et, en);
499		else
500			__release_extent_node(sbi, et, en);
501
502		/*
503		 * if original extent is split into zero or two parts, extent
504		 * tree has been altered by deletion or insertion, therefore
505		 * invalidate pointers regard to tree.
506		 */
507		if (parts != 1) {
508			insert_p = NULL;
509			insert_parent = NULL;
510		}
511		en = next_en;
512	}
513
514	/* 3. update extent in extent cache */
515	if (blkaddr) {
516
517		set_extent_info(&ei, fofs, blkaddr, len);
 
 
 
 
 
518		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
519			__insert_extent_tree(sbi, et, &ei,
520						insert_p, insert_parent);
521
522		/* give up extent_cache, if split and small updates happen */
523		if (dei.len >= 1 &&
524				prev.len < F2FS_MIN_EXTENT_LEN &&
525				et->largest.len < F2FS_MIN_EXTENT_LEN) {
526			et->largest.len = 0;
527			set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
 
528		}
529	}
530
531	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
532		__free_extent_tree(sbi, et);
533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534	write_unlock(&et->lock);
 
 
535
536	return !__is_extent_same(&prev, &et->largest);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537}
538
539unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 
 
540{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541	struct extent_tree *et, *next;
542	struct extent_node *en;
543	unsigned int node_cnt = 0, tree_cnt = 0;
544	int remained;
545
546	if (!test_opt(sbi, EXTENT_CACHE))
547		return 0;
548
549	if (!atomic_read(&sbi->total_zombie_tree))
550		goto free_node;
551
552	if (!down_write_trylock(&sbi->extent_tree_lock))
553		goto out;
554
555	/* 1. remove unreferenced extent tree */
556	list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
557		if (atomic_read(&et->node_cnt)) {
558			write_lock(&et->lock);
559			node_cnt += __free_extent_tree(sbi, et);
560			write_unlock(&et->lock);
561		}
562		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
563		list_del_init(&et->list);
564		radix_tree_delete(&sbi->extent_tree_root, et->ino);
565		kmem_cache_free(extent_tree_slab, et);
566		atomic_dec(&sbi->total_ext_tree);
567		atomic_dec(&sbi->total_zombie_tree);
568		tree_cnt++;
569
570		if (node_cnt + tree_cnt >= nr_shrink)
571			goto unlock_out;
572		cond_resched();
573	}
574	up_write(&sbi->extent_tree_lock);
575
576free_node:
577	/* 2. remove LRU extent entries */
578	if (!down_write_trylock(&sbi->extent_tree_lock))
579		goto out;
580
581	remained = nr_shrink - (node_cnt + tree_cnt);
582
583	spin_lock(&sbi->extent_lock);
584	for (; remained > 0; remained--) {
585		if (list_empty(&sbi->extent_list))
586			break;
587		en = list_first_entry(&sbi->extent_list,
588					struct extent_node, list);
589		et = en->et;
590		if (!write_trylock(&et->lock)) {
591			/* refresh this extent node's position in extent list */
592			list_move_tail(&en->list, &sbi->extent_list);
593			continue;
594		}
595
596		list_del_init(&en->list);
597		spin_unlock(&sbi->extent_lock);
598
599		__detach_extent_node(sbi, et, en);
600
601		write_unlock(&et->lock);
602		node_cnt++;
603		spin_lock(&sbi->extent_lock);
604	}
605	spin_unlock(&sbi->extent_lock);
606
607unlock_out:
608	up_write(&sbi->extent_tree_lock);
609out:
610	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
611
612	return node_cnt + tree_cnt;
613}
614
615unsigned int f2fs_destroy_extent_node(struct inode *inode)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616{
617	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
618	struct extent_tree *et = F2FS_I(inode)->extent_tree;
619	unsigned int node_cnt = 0;
620
621	if (!et || !atomic_read(&et->node_cnt))
622		return 0;
623
624	write_lock(&et->lock);
625	node_cnt = __free_extent_tree(sbi, et);
626	write_unlock(&et->lock);
627
628	return node_cnt;
629}
630
631void f2fs_destroy_extent_tree(struct inode *inode)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
632{
633	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
634	struct extent_tree *et = F2FS_I(inode)->extent_tree;
 
635	unsigned int node_cnt = 0;
636
637	if (!et)
638		return;
639
640	if (inode->i_nlink && !is_bad_inode(inode) &&
641					atomic_read(&et->node_cnt)) {
642		down_write(&sbi->extent_tree_lock);
643		list_add_tail(&et->list, &sbi->zombie_list);
644		atomic_inc(&sbi->total_zombie_tree);
645		up_write(&sbi->extent_tree_lock);
646		return;
647	}
648
649	/* free all extent info belong to this extent tree */
650	node_cnt = f2fs_destroy_extent_node(inode);
651
652	/* delete extent tree entry in radix tree */
653	down_write(&sbi->extent_tree_lock);
654	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
655	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
656	kmem_cache_free(extent_tree_slab, et);
657	atomic_dec(&sbi->total_ext_tree);
658	up_write(&sbi->extent_tree_lock);
659
660	F2FS_I(inode)->extent_tree = NULL;
661
662	trace_f2fs_destroy_extent_tree(inode, node_cnt);
663}
664
665bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
666					struct extent_info *ei)
667{
668	if (!f2fs_may_extent_tree(inode))
669		return false;
670
671	return f2fs_lookup_extent_tree(inode, pgofs, ei);
672}
673
674void f2fs_update_extent_cache(struct dnode_of_data *dn)
675{
676	pgoff_t fofs;
677	block_t blkaddr;
678
679	if (!f2fs_may_extent_tree(dn->inode))
680		return;
681
682	if (dn->data_blkaddr == NEW_ADDR)
683		blkaddr = NULL_ADDR;
684	else
685		blkaddr = dn->data_blkaddr;
686
687	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
688								dn->ofs_in_node;
689
690	if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1))
691		sync_inode_page(dn);
692}
693
694void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
695				pgoff_t fofs, block_t blkaddr, unsigned int len)
696
697{
698	if (!f2fs_may_extent_tree(dn->inode))
699		return;
700
701	if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len))
702		sync_inode_page(dn);
703}
704
705void init_extent_cache_info(struct f2fs_sb_info *sbi)
706{
707	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
708	init_rwsem(&sbi->extent_tree_lock);
709	INIT_LIST_HEAD(&sbi->extent_list);
710	spin_lock_init(&sbi->extent_lock);
711	atomic_set(&sbi->total_ext_tree, 0);
712	INIT_LIST_HEAD(&sbi->zombie_list);
713	atomic_set(&sbi->total_zombie_tree, 0);
714	atomic_set(&sbi->total_ext_node, 0);
715}
716
717int __init create_extent_cache(void)
718{
719	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
720			sizeof(struct extent_tree));
721	if (!extent_tree_slab)
722		return -ENOMEM;
723	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
724			sizeof(struct extent_node));
725	if (!extent_node_slab) {
726		kmem_cache_destroy(extent_tree_slab);
727		return -ENOMEM;
728	}
729	return 0;
730}
731
732void destroy_extent_cache(void)
733{
734	kmem_cache_destroy(extent_node_slab);
735	kmem_cache_destroy(extent_tree_slab);
736}
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * f2fs extent cache support
   4 *
   5 * Copyright (c) 2015 Motorola Mobility
   6 * Copyright (c) 2015 Samsung Electronics
   7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
   8 *          Chao Yu <chao2.yu@samsung.com>
   9 *
  10 * block_age-based extent cache added by:
  11 * Copyright (c) 2022 xiaomi Co., Ltd.
  12 *             http://www.xiaomi.com/
  13 */
  14
  15#include <linux/fs.h>
  16#include <linux/f2fs_fs.h>
  17
  18#include "f2fs.h"
  19#include "node.h"
  20#include <trace/events/f2fs.h>
  21
  22bool sanity_check_extent_cache(struct inode *inode)
  23{
  24	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  25	struct f2fs_inode_info *fi = F2FS_I(inode);
  26	struct extent_tree *et = fi->extent_tree[EX_READ];
  27	struct extent_info *ei;
  28
  29	if (!et)
  30		return true;
  31
  32	ei = &et->largest;
  33	if (!ei->len)
  34		return true;
  35
  36	/* Let's drop, if checkpoint got corrupted. */
  37	if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) {
  38		ei->len = 0;
  39		et->largest_updated = true;
  40		return true;
  41	}
  42
  43	if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
  44	    !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
  45					DATA_GENERIC_ENHANCE)) {
  46		set_sbi_flag(sbi, SBI_NEED_FSCK);
  47		f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
  48			  __func__, inode->i_ino,
  49			  ei->blk, ei->fofs, ei->len);
  50		return false;
  51	}
  52	return true;
  53}
  54
  55static void __set_extent_info(struct extent_info *ei,
  56				unsigned int fofs, unsigned int len,
  57				block_t blk, bool keep_clen,
  58				unsigned long age, unsigned long last_blocks,
  59				enum extent_type type)
  60{
  61	ei->fofs = fofs;
  62	ei->len = len;
  63
  64	if (type == EX_READ) {
  65		ei->blk = blk;
  66		if (keep_clen)
  67			return;
  68#ifdef CONFIG_F2FS_FS_COMPRESSION
  69		ei->c_len = 0;
  70#endif
  71	} else if (type == EX_BLOCK_AGE) {
  72		ei->age = age;
  73		ei->last_blocks = last_blocks;
  74	}
  75}
  76
  77static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
  78{
  79	if (type == EX_READ)
  80		return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
  81			S_ISREG(inode->i_mode);
  82	if (type == EX_BLOCK_AGE)
  83		return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
  84			(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
  85	return false;
  86}
  87
  88static bool __may_extent_tree(struct inode *inode, enum extent_type type)
  89{
  90	/*
  91	 * for recovered files during mount do not create extents
  92	 * if shrinker is not registered.
  93	 */
  94	if (list_empty(&F2FS_I_SB(inode)->s_list))
  95		return false;
  96
  97	if (!__init_may_extent_tree(inode, type))
  98		return false;
  99
 100	if (type == EX_READ) {
 101		if (is_inode_flag_set(inode, FI_NO_EXTENT))
 102			return false;
 103		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
 104				 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
 105			return false;
 106	} else if (type == EX_BLOCK_AGE) {
 107		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
 108			return false;
 109		if (file_is_cold(inode))
 110			return false;
 111	}
 112	return true;
 113}
 114
 115static void __try_update_largest_extent(struct extent_tree *et,
 116						struct extent_node *en)
 117{
 118	if (et->type != EX_READ)
 119		return;
 120	if (en->ei.len <= et->largest.len)
 121		return;
 122
 123	et->largest = en->ei;
 124	et->largest_updated = true;
 125}
 126
 127static bool __is_extent_mergeable(struct extent_info *back,
 128		struct extent_info *front, enum extent_type type)
 129{
 130	if (type == EX_READ) {
 131#ifdef CONFIG_F2FS_FS_COMPRESSION
 132		if (back->c_len && back->len != back->c_len)
 133			return false;
 134		if (front->c_len && front->len != front->c_len)
 135			return false;
 136#endif
 137		return (back->fofs + back->len == front->fofs &&
 138				back->blk + back->len == front->blk);
 139	} else if (type == EX_BLOCK_AGE) {
 140		return (back->fofs + back->len == front->fofs &&
 141			abs(back->age - front->age) <= SAME_AGE_REGION &&
 142			abs(back->last_blocks - front->last_blocks) <=
 143							SAME_AGE_REGION);
 144	}
 145	return false;
 146}
 147
 148static bool __is_back_mergeable(struct extent_info *cur,
 149		struct extent_info *back, enum extent_type type)
 150{
 151	return __is_extent_mergeable(back, cur, type);
 152}
 153
 154static bool __is_front_mergeable(struct extent_info *cur,
 155		struct extent_info *front, enum extent_type type)
 156{
 157	return __is_extent_mergeable(cur, front, type);
 158}
 159
 160static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
 161			struct extent_node *cached_en, unsigned int fofs)
 162{
 163	struct rb_node *node = root->rb_root.rb_node;
 164	struct extent_node *en;
 165
 166	/* check a cached entry */
 167	if (cached_en && cached_en->ei.fofs <= fofs &&
 168			cached_en->ei.fofs + cached_en->ei.len > fofs)
 169		return cached_en;
 170
 171	/* check rb_tree */
 172	while (node) {
 173		en = rb_entry(node, struct extent_node, rb_node);
 174
 175		if (fofs < en->ei.fofs)
 176			node = node->rb_left;
 177		else if (fofs >= en->ei.fofs + en->ei.len)
 178			node = node->rb_right;
 179		else
 180			return en;
 181	}
 182	return NULL;
 183}
 184
 185/*
 186 * lookup rb entry in position of @fofs in rb-tree,
 187 * if hit, return the entry, otherwise, return NULL
 188 * @prev_ex: extent before fofs
 189 * @next_ex: extent after fofs
 190 * @insert_p: insert point for new extent at fofs
 191 * in order to simplify the insertion after.
 192 * tree must stay unchanged between lookup and insertion.
 193 */
 194static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
 195				struct extent_node *cached_en,
 196				unsigned int fofs,
 197				struct extent_node **prev_entry,
 198				struct extent_node **next_entry,
 199				struct rb_node ***insert_p,
 200				struct rb_node **insert_parent,
 201				bool *leftmost)
 202{
 203	struct rb_node **pnode = &root->rb_root.rb_node;
 204	struct rb_node *parent = NULL, *tmp_node;
 205	struct extent_node *en = cached_en;
 206
 207	*insert_p = NULL;
 208	*insert_parent = NULL;
 209	*prev_entry = NULL;
 210	*next_entry = NULL;
 211
 212	if (RB_EMPTY_ROOT(&root->rb_root))
 213		return NULL;
 214
 215	if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
 216		goto lookup_neighbors;
 217
 218	*leftmost = true;
 219
 220	while (*pnode) {
 221		parent = *pnode;
 222		en = rb_entry(*pnode, struct extent_node, rb_node);
 223
 224		if (fofs < en->ei.fofs) {
 225			pnode = &(*pnode)->rb_left;
 226		} else if (fofs >= en->ei.fofs + en->ei.len) {
 227			pnode = &(*pnode)->rb_right;
 228			*leftmost = false;
 229		} else {
 230			goto lookup_neighbors;
 231		}
 232	}
 233
 234	*insert_p = pnode;
 235	*insert_parent = parent;
 236
 237	en = rb_entry(parent, struct extent_node, rb_node);
 238	tmp_node = parent;
 239	if (parent && fofs > en->ei.fofs)
 240		tmp_node = rb_next(parent);
 241	*next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 242
 243	tmp_node = parent;
 244	if (parent && fofs < en->ei.fofs)
 245		tmp_node = rb_prev(parent);
 246	*prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 247	return NULL;
 248
 249lookup_neighbors:
 250	if (fofs == en->ei.fofs) {
 251		/* lookup prev node for merging backward later */
 252		tmp_node = rb_prev(&en->rb_node);
 253		*prev_entry = rb_entry_safe(tmp_node,
 254					struct extent_node, rb_node);
 255	}
 256	if (fofs == en->ei.fofs + en->ei.len - 1) {
 257		/* lookup next node for merging frontward later */
 258		tmp_node = rb_next(&en->rb_node);
 259		*next_entry = rb_entry_safe(tmp_node,
 260					struct extent_node, rb_node);
 261	}
 262	return en;
 263}
 264
 265static struct kmem_cache *extent_tree_slab;
 266static struct kmem_cache *extent_node_slab;
 267
 268static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 269				struct extent_tree *et, struct extent_info *ei,
 270				struct rb_node *parent, struct rb_node **p,
 271				bool leftmost)
 272{
 273	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 274	struct extent_node *en;
 275
 276	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
 277	if (!en)
 278		return NULL;
 279
 280	en->ei = *ei;
 281	INIT_LIST_HEAD(&en->list);
 282	en->et = et;
 283
 284	rb_link_node(&en->rb_node, parent, p);
 285	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
 286	atomic_inc(&et->node_cnt);
 287	atomic_inc(&eti->total_ext_node);
 288	return en;
 289}
 290
 291static void __detach_extent_node(struct f2fs_sb_info *sbi,
 292				struct extent_tree *et, struct extent_node *en)
 293{
 294	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 295
 296	rb_erase_cached(&en->rb_node, &et->root);
 297	atomic_dec(&et->node_cnt);
 298	atomic_dec(&eti->total_ext_node);
 299
 300	if (et->cached_en == en)
 301		et->cached_en = NULL;
 302	kmem_cache_free(extent_node_slab, en);
 303}
 304
 305/*
 306 * Flow to release an extent_node:
 307 * 1. list_del_init
 308 * 2. __detach_extent_node
 309 * 3. kmem_cache_free.
 310 */
 311static void __release_extent_node(struct f2fs_sb_info *sbi,
 312			struct extent_tree *et, struct extent_node *en)
 313{
 314	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 315
 316	spin_lock(&eti->extent_lock);
 317	f2fs_bug_on(sbi, list_empty(&en->list));
 318	list_del_init(&en->list);
 319	spin_unlock(&eti->extent_lock);
 320
 321	__detach_extent_node(sbi, et, en);
 322}
 323
 324static struct extent_tree *__grab_extent_tree(struct inode *inode,
 325						enum extent_type type)
 326{
 327	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 328	struct extent_tree_info *eti = &sbi->extent_tree[type];
 329	struct extent_tree *et;
 330	nid_t ino = inode->i_ino;
 331
 332	mutex_lock(&eti->extent_tree_lock);
 333	et = radix_tree_lookup(&eti->extent_tree_root, ino);
 334	if (!et) {
 335		et = f2fs_kmem_cache_alloc(extent_tree_slab,
 336					GFP_NOFS, true, NULL);
 337		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
 338		memset(et, 0, sizeof(struct extent_tree));
 339		et->ino = ino;
 340		et->type = type;
 341		et->root = RB_ROOT_CACHED;
 342		et->cached_en = NULL;
 343		rwlock_init(&et->lock);
 344		INIT_LIST_HEAD(&et->list);
 345		atomic_set(&et->node_cnt, 0);
 346		atomic_inc(&eti->total_ext_tree);
 347	} else {
 348		atomic_dec(&eti->total_zombie_tree);
 349		list_del_init(&et->list);
 350	}
 351	mutex_unlock(&eti->extent_tree_lock);
 352
 353	/* never died until evict_inode */
 354	F2FS_I(inode)->extent_tree[type] = et;
 355
 356	return et;
 357}
 358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 359static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
 360					struct extent_tree *et)
 361{
 362	struct rb_node *node, *next;
 363	struct extent_node *en;
 364	unsigned int count = atomic_read(&et->node_cnt);
 365
 366	node = rb_first_cached(&et->root);
 367	while (node) {
 368		next = rb_next(node);
 369		en = rb_entry(node, struct extent_node, rb_node);
 370		__release_extent_node(sbi, et, en);
 371		node = next;
 372	}
 373
 374	return count - atomic_read(&et->node_cnt);
 375}
 376
 377static void __drop_largest_extent(struct extent_tree *et,
 378					pgoff_t fofs, unsigned int len)
 379{
 380	if (fofs < et->largest.fofs + et->largest.len &&
 381			fofs + len > et->largest.fofs) {
 382		et->largest.len = 0;
 383		et->largest_updated = true;
 384	}
 385}
 386
 387void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
 
 388{
 389	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 390	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
 391	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
 392	struct extent_tree *et;
 393	struct extent_node *en;
 394	struct extent_info ei;
 395
 396	if (!__may_extent_tree(inode, EX_READ)) {
 397		/* drop largest read extent */
 398		if (i_ext && i_ext->len) {
 399			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 400			i_ext->len = 0;
 401			set_page_dirty(ipage);
 402		}
 403		goto out;
 404	}
 405
 406	et = __grab_extent_tree(inode, EX_READ);
 407
 408	if (!i_ext || !i_ext->len)
 409		goto out;
 410
 411	get_read_extent_info(&ei, i_ext);
 
 412
 413	write_lock(&et->lock);
 414	if (atomic_read(&et->node_cnt))
 415		goto unlock_out;
 416
 417	en = __attach_extent_node(sbi, et, &ei, NULL,
 418				&et->root.rb_root.rb_node, true);
 419	if (en) {
 420		et->largest = en->ei;
 421		et->cached_en = en;
 422
 423		spin_lock(&eti->extent_lock);
 424		list_add_tail(&en->list, &eti->extent_list);
 425		spin_unlock(&eti->extent_lock);
 426	}
 427unlock_out:
 428	write_unlock(&et->lock);
 429out:
 430	if (!F2FS_I(inode)->extent_tree[EX_READ])
 431		set_inode_flag(inode, FI_NO_EXTENT);
 432}
 433
 434void f2fs_init_age_extent_tree(struct inode *inode)
 435{
 436	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
 437		return;
 438	__grab_extent_tree(inode, EX_BLOCK_AGE);
 439}
 440
 441void f2fs_init_extent_tree(struct inode *inode)
 442{
 443	/* initialize read cache */
 444	if (__init_may_extent_tree(inode, EX_READ))
 445		__grab_extent_tree(inode, EX_READ);
 446
 447	/* initialize block age cache */
 448	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
 449		__grab_extent_tree(inode, EX_BLOCK_AGE);
 450}
 451
 452static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
 453			struct extent_info *ei, enum extent_type type)
 454{
 455	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 456	struct extent_tree_info *eti = &sbi->extent_tree[type];
 457	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 458	struct extent_node *en;
 459	bool ret = false;
 460
 461	if (!et)
 462		return false;
 463
 464	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
 465
 466	read_lock(&et->lock);
 467
 468	if (type == EX_READ &&
 469			et->largest.fofs <= pgofs &&
 470			et->largest.fofs + et->largest.len > pgofs) {
 471		*ei = et->largest;
 472		ret = true;
 473		stat_inc_largest_node_hit(sbi);
 474		goto out;
 475	}
 476
 477	en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
 478	if (!en)
 479		goto out;
 480
 481	if (en == et->cached_en)
 482		stat_inc_cached_node_hit(sbi, type);
 483	else
 484		stat_inc_rbtree_node_hit(sbi, type);
 485
 486	*ei = en->ei;
 487	spin_lock(&eti->extent_lock);
 488	if (!list_empty(&en->list)) {
 489		list_move_tail(&en->list, &eti->extent_list);
 490		et->cached_en = en;
 491	}
 492	spin_unlock(&eti->extent_lock);
 493	ret = true;
 494out:
 495	stat_inc_total_hit(sbi, type);
 496	read_unlock(&et->lock);
 497
 498	if (type == EX_READ)
 499		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
 500	else if (type == EX_BLOCK_AGE)
 501		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
 502	return ret;
 503}
 504
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 505static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
 506				struct extent_tree *et, struct extent_info *ei,
 507				struct extent_node *prev_ex,
 508				struct extent_node *next_ex)
 509{
 510	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 511	struct extent_node *en = NULL;
 512
 513	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
 514		prev_ex->ei.len += ei->len;
 515		ei = &prev_ex->ei;
 516		en = prev_ex;
 517	}
 518
 519	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
 
 
 520		next_ex->ei.fofs = ei->fofs;
 
 521		next_ex->ei.len += ei->len;
 522		if (et->type == EX_READ)
 523			next_ex->ei.blk = ei->blk;
 524		if (en)
 525			__release_extent_node(sbi, et, prev_ex);
 526
 527		en = next_ex;
 528	}
 529
 530	if (!en)
 531		return NULL;
 532
 533	__try_update_largest_extent(et, en);
 534
 535	spin_lock(&eti->extent_lock);
 536	if (!list_empty(&en->list)) {
 537		list_move_tail(&en->list, &eti->extent_list);
 538		et->cached_en = en;
 539	}
 540	spin_unlock(&eti->extent_lock);
 541	return en;
 542}
 543
 544static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
 545				struct extent_tree *et, struct extent_info *ei,
 546				struct rb_node **insert_p,
 547				struct rb_node *insert_parent,
 548				bool leftmost)
 549{
 550	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 551	struct rb_node **p = &et->root.rb_root.rb_node;
 552	struct rb_node *parent = NULL;
 553	struct extent_node *en = NULL;
 554
 555	if (insert_p && insert_parent) {
 556		parent = insert_parent;
 557		p = insert_p;
 558		goto do_insert;
 559	}
 560
 561	leftmost = true;
 562
 563	/* look up extent_node in the rb tree */
 564	while (*p) {
 565		parent = *p;
 566		en = rb_entry(parent, struct extent_node, rb_node);
 567
 568		if (ei->fofs < en->ei.fofs) {
 569			p = &(*p)->rb_left;
 570		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
 571			p = &(*p)->rb_right;
 572			leftmost = false;
 573		} else {
 574			f2fs_bug_on(sbi, 1);
 575		}
 576	}
 577
 578do_insert:
 579	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
 580	if (!en)
 581		return NULL;
 582
 583	__try_update_largest_extent(et, en);
 584
 585	/* update in global extent list */
 586	spin_lock(&eti->extent_lock);
 587	list_add_tail(&en->list, &eti->extent_list);
 588	et->cached_en = en;
 589	spin_unlock(&eti->extent_lock);
 590	return en;
 591}
 592
 593static void __update_extent_tree_range(struct inode *inode,
 594			struct extent_info *tei, enum extent_type type)
 595{
 596	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 597	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 598	struct extent_node *en = NULL, *en1 = NULL;
 599	struct extent_node *prev_en = NULL, *next_en = NULL;
 600	struct extent_info ei, dei, prev;
 601	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 602	unsigned int fofs = tei->fofs, len = tei->len;
 603	unsigned int end = fofs + len;
 604	bool updated = false;
 605	bool leftmost = false;
 606
 607	if (!et)
 608		return;
 609
 610	if (type == EX_READ)
 611		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
 612						tei->blk, 0);
 613	else if (type == EX_BLOCK_AGE)
 614		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
 615						tei->age, tei->last_blocks);
 616
 617	write_lock(&et->lock);
 618
 619	if (type == EX_READ) {
 620		if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
 621			write_unlock(&et->lock);
 622			return;
 623		}
 624
 625		prev = et->largest;
 626		dei.len = 0;
 627
 628		/*
 629		 * drop largest extent before lookup, in case it's already
 630		 * been shrunk from extent tree
 631		 */
 632		__drop_largest_extent(et, fofs, len);
 633	}
 634
 635	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
 636	en = __lookup_extent_node_ret(&et->root,
 637					et->cached_en, fofs,
 638					&prev_en, &next_en,
 639					&insert_p, &insert_parent,
 640					&leftmost);
 641	if (!en)
 642		en = next_en;
 643
 644	/* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
 645	while (en && en->ei.fofs < end) {
 646		unsigned int org_end;
 647		int parts = 0;	/* # of parts current extent split into */
 648
 649		next_en = en1 = NULL;
 650
 651		dei = en->ei;
 652		org_end = dei.fofs + dei.len;
 653		f2fs_bug_on(sbi, fofs >= org_end);
 654
 655		if (fofs > dei.fofs && (type != EX_READ ||
 656				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
 657			en->ei.len = fofs - en->ei.fofs;
 658			prev_en = en;
 659			parts = 1;
 660		}
 661
 662		if (end < org_end && (type != EX_READ ||
 663				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
 664			if (parts) {
 665				__set_extent_info(&ei,
 666					end, org_end - end,
 667					end - dei.fofs + dei.blk, false,
 668					dei.age, dei.last_blocks,
 669					type);
 670				en1 = __insert_extent_tree(sbi, et, &ei,
 671							NULL, NULL, true);
 672				next_en = en1;
 673			} else {
 674				__set_extent_info(&en->ei,
 675					end, en->ei.len - (end - dei.fofs),
 676					en->ei.blk + (end - dei.fofs), true,
 677					dei.age, dei.last_blocks,
 678					type);
 679				next_en = en;
 680			}
 681			parts++;
 682		}
 683
 684		if (!next_en) {
 685			struct rb_node *node = rb_next(&en->rb_node);
 686
 687			next_en = rb_entry_safe(node, struct extent_node,
 688						rb_node);
 
 689		}
 690
 691		if (parts)
 692			__try_update_largest_extent(et, en);
 693		else
 694			__release_extent_node(sbi, et, en);
 695
 696		/*
 697		 * if original extent is split into zero or two parts, extent
 698		 * tree has been altered by deletion or insertion, therefore
 699		 * invalidate pointers regard to tree.
 700		 */
 701		if (parts != 1) {
 702			insert_p = NULL;
 703			insert_parent = NULL;
 704		}
 705		en = next_en;
 706	}
 707
 708	if (type == EX_BLOCK_AGE)
 709		goto update_age_extent_cache;
 710
 711	/* 3. update extent in read extent cache */
 712	BUG_ON(type != EX_READ);
 713
 714	if (tei->blk) {
 715		__set_extent_info(&ei, fofs, len, tei->blk, false,
 716				  0, 0, EX_READ);
 717		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 718			__insert_extent_tree(sbi, et, &ei,
 719					insert_p, insert_parent, leftmost);
 720
 721		/* give up extent_cache, if split and small updates happen */
 722		if (dei.len >= 1 &&
 723				prev.len < F2FS_MIN_EXTENT_LEN &&
 724				et->largest.len < F2FS_MIN_EXTENT_LEN) {
 725			et->largest.len = 0;
 726			et->largest_updated = true;
 727			set_inode_flag(inode, FI_NO_EXTENT);
 728		}
 729	}
 730
 731	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 732		__free_extent_tree(sbi, et);
 733
 734	if (et->largest_updated) {
 735		et->largest_updated = false;
 736		updated = true;
 737	}
 738	goto out_read_extent_cache;
 739update_age_extent_cache:
 740	if (!tei->last_blocks)
 741		goto out_read_extent_cache;
 742
 743	__set_extent_info(&ei, fofs, len, 0, false,
 744			tei->age, tei->last_blocks, EX_BLOCK_AGE);
 745	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 746		__insert_extent_tree(sbi, et, &ei,
 747					insert_p, insert_parent, leftmost);
 748out_read_extent_cache:
 749	write_unlock(&et->lock);
 750
 751	if (updated)
 752		f2fs_mark_inode_dirty_sync(inode, true);
 753}
 754
 755#ifdef CONFIG_F2FS_FS_COMPRESSION
 756void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
 757				pgoff_t fofs, block_t blkaddr, unsigned int llen,
 758				unsigned int c_len)
 759{
 760	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 761	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
 762	struct extent_node *en = NULL;
 763	struct extent_node *prev_en = NULL, *next_en = NULL;
 764	struct extent_info ei;
 765	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 766	bool leftmost = false;
 767
 768	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
 769						blkaddr, c_len);
 770
 771	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
 772	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 773		return;
 774
 775	write_lock(&et->lock);
 776
 777	en = __lookup_extent_node_ret(&et->root,
 778					et->cached_en, fofs,
 779					&prev_en, &next_en,
 780					&insert_p, &insert_parent,
 781					&leftmost);
 782	if (en)
 783		goto unlock_out;
 784
 785	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
 786	ei.c_len = c_len;
 787
 788	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 789		__insert_extent_tree(sbi, et, &ei,
 790				insert_p, insert_parent, leftmost);
 791unlock_out:
 792	write_unlock(&et->lock);
 793}
 794#endif
 795
 796static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
 797						unsigned long long new,
 798						unsigned long long old)
 799{
 800	unsigned int rem_old, rem_new;
 801	unsigned long long res;
 802	unsigned int weight = sbi->last_age_weight;
 803
 804	res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
 805		+ div_u64_rem(old, 100, &rem_old) * weight;
 806
 807	if (rem_new)
 808		res += rem_new * (100 - weight) / 100;
 809	if (rem_old)
 810		res += rem_old * weight / 100;
 811
 812	return res;
 813}
 814
 815/* This returns a new age and allocated blocks in ei */
 816static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
 817						block_t blkaddr)
 818{
 819	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 820	loff_t f_size = i_size_read(inode);
 821	unsigned long long cur_blocks =
 822				atomic64_read(&sbi->allocated_data_blocks);
 823	struct extent_info tei = *ei;	/* only fofs and len are valid */
 824
 825	/*
 826	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
 827	 * file block even in seq write. So don't record age for newly last file
 828	 * block here.
 829	 */
 830	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
 831			blkaddr == NEW_ADDR)
 832		return -EINVAL;
 833
 834	if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
 835		unsigned long long cur_age;
 836
 837		if (cur_blocks >= tei.last_blocks)
 838			cur_age = cur_blocks - tei.last_blocks;
 839		else
 840			/* allocated_data_blocks overflow */
 841			cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
 842
 843		if (tei.age)
 844			ei->age = __calculate_block_age(sbi, cur_age, tei.age);
 845		else
 846			ei->age = cur_age;
 847		ei->last_blocks = cur_blocks;
 848		WARN_ON(ei->age > cur_blocks);
 849		return 0;
 850	}
 851
 852	f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
 853
 854	/* the data block was allocated for the first time */
 855	if (blkaddr == NEW_ADDR)
 856		goto out;
 857
 858	if (__is_valid_data_blkaddr(blkaddr) &&
 859	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
 860		f2fs_bug_on(sbi, 1);
 861		return -EINVAL;
 862	}
 863out:
 864	/*
 865	 * init block age with zero, this can happen when the block age extent
 866	 * was reclaimed due to memory constraint or system reboot
 867	 */
 868	ei->age = 0;
 869	ei->last_blocks = cur_blocks;
 870	return 0;
 871}
 872
 873static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
 874{
 875	struct extent_info ei = {};
 876
 877	if (!__may_extent_tree(dn->inode, type))
 878		return;
 879
 880	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 881								dn->ofs_in_node;
 882	ei.len = 1;
 883
 884	if (type == EX_READ) {
 885		if (dn->data_blkaddr == NEW_ADDR)
 886			ei.blk = NULL_ADDR;
 887		else
 888			ei.blk = dn->data_blkaddr;
 889	} else if (type == EX_BLOCK_AGE) {
 890		if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
 891			return;
 892	}
 893	__update_extent_tree_range(dn->inode, &ei, type);
 894}
 895
 896static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
 897					enum extent_type type)
 898{
 899	struct extent_tree_info *eti = &sbi->extent_tree[type];
 900	struct extent_tree *et, *next;
 901	struct extent_node *en;
 902	unsigned int node_cnt = 0, tree_cnt = 0;
 903	int remained;
 904
 905	if (!atomic_read(&eti->total_zombie_tree))
 
 
 
 906		goto free_node;
 907
 908	if (!mutex_trylock(&eti->extent_tree_lock))
 909		goto out;
 910
 911	/* 1. remove unreferenced extent tree */
 912	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
 913		if (atomic_read(&et->node_cnt)) {
 914			write_lock(&et->lock);
 915			node_cnt += __free_extent_tree(sbi, et);
 916			write_unlock(&et->lock);
 917		}
 918		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
 919		list_del_init(&et->list);
 920		radix_tree_delete(&eti->extent_tree_root, et->ino);
 921		kmem_cache_free(extent_tree_slab, et);
 922		atomic_dec(&eti->total_ext_tree);
 923		atomic_dec(&eti->total_zombie_tree);
 924		tree_cnt++;
 925
 926		if (node_cnt + tree_cnt >= nr_shrink)
 927			goto unlock_out;
 928		cond_resched();
 929	}
 930	mutex_unlock(&eti->extent_tree_lock);
 931
 932free_node:
 933	/* 2. remove LRU extent entries */
 934	if (!mutex_trylock(&eti->extent_tree_lock))
 935		goto out;
 936
 937	remained = nr_shrink - (node_cnt + tree_cnt);
 938
 939	spin_lock(&eti->extent_lock);
 940	for (; remained > 0; remained--) {
 941		if (list_empty(&eti->extent_list))
 942			break;
 943		en = list_first_entry(&eti->extent_list,
 944					struct extent_node, list);
 945		et = en->et;
 946		if (!write_trylock(&et->lock)) {
 947			/* refresh this extent node's position in extent list */
 948			list_move_tail(&en->list, &eti->extent_list);
 949			continue;
 950		}
 951
 952		list_del_init(&en->list);
 953		spin_unlock(&eti->extent_lock);
 954
 955		__detach_extent_node(sbi, et, en);
 956
 957		write_unlock(&et->lock);
 958		node_cnt++;
 959		spin_lock(&eti->extent_lock);
 960	}
 961	spin_unlock(&eti->extent_lock);
 962
 963unlock_out:
 964	mutex_unlock(&eti->extent_tree_lock);
 965out:
 966	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
 967
 968	return node_cnt + tree_cnt;
 969}
 970
 971/* read extent cache operations */
 972bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
 973				struct extent_info *ei)
 974{
 975	if (!__may_extent_tree(inode, EX_READ))
 976		return false;
 977
 978	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
 979}
 980
 981bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
 982				block_t *blkaddr)
 983{
 984	struct extent_info ei = {};
 985
 986	if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
 987		return false;
 988	*blkaddr = ei.blk + index - ei.fofs;
 989	return true;
 990}
 991
 992void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
 993{
 994	return __update_extent_cache(dn, EX_READ);
 995}
 996
 997void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
 998				pgoff_t fofs, block_t blkaddr, unsigned int len)
 999{
1000	struct extent_info ei = {
1001		.fofs = fofs,
1002		.len = len,
1003		.blk = blkaddr,
1004	};
1005
1006	if (!__may_extent_tree(dn->inode, EX_READ))
1007		return;
1008
1009	__update_extent_tree_range(dn->inode, &ei, EX_READ);
1010}
1011
1012unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1013{
1014	if (!test_opt(sbi, READ_EXTENT_CACHE))
1015		return 0;
1016
1017	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1018}
1019
1020/* block age extent cache operations */
1021bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1022				struct extent_info *ei)
1023{
1024	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1025		return false;
1026
1027	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1028}
1029
1030void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1031{
1032	return __update_extent_cache(dn, EX_BLOCK_AGE);
1033}
1034
1035void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1036				pgoff_t fofs, unsigned int len)
1037{
1038	struct extent_info ei = {
1039		.fofs = fofs,
1040		.len = len,
1041	};
1042
1043	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1044		return;
1045
1046	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1047}
1048
1049unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1050{
1051	if (!test_opt(sbi, AGE_EXTENT_CACHE))
1052		return 0;
1053
1054	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1055}
1056
1057static unsigned int __destroy_extent_node(struct inode *inode,
1058					enum extent_type type)
1059{
1060	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1061	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1062	unsigned int node_cnt = 0;
1063
1064	if (!et || !atomic_read(&et->node_cnt))
1065		return 0;
1066
1067	write_lock(&et->lock);
1068	node_cnt = __free_extent_tree(sbi, et);
1069	write_unlock(&et->lock);
1070
1071	return node_cnt;
1072}
1073
1074void f2fs_destroy_extent_node(struct inode *inode)
1075{
1076	__destroy_extent_node(inode, EX_READ);
1077	__destroy_extent_node(inode, EX_BLOCK_AGE);
1078}
1079
1080static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1081{
1082	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1083	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1084	bool updated = false;
1085
1086	if (!__may_extent_tree(inode, type))
1087		return;
1088
1089	write_lock(&et->lock);
1090	__free_extent_tree(sbi, et);
1091	if (type == EX_READ) {
1092		set_inode_flag(inode, FI_NO_EXTENT);
1093		if (et->largest.len) {
1094			et->largest.len = 0;
1095			updated = true;
1096		}
1097	}
1098	write_unlock(&et->lock);
1099	if (updated)
1100		f2fs_mark_inode_dirty_sync(inode, true);
1101}
1102
1103void f2fs_drop_extent_tree(struct inode *inode)
1104{
1105	__drop_extent_tree(inode, EX_READ);
1106	__drop_extent_tree(inode, EX_BLOCK_AGE);
1107}
1108
1109static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1110{
1111	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1112	struct extent_tree_info *eti = &sbi->extent_tree[type];
1113	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1114	unsigned int node_cnt = 0;
1115
1116	if (!et)
1117		return;
1118
1119	if (inode->i_nlink && !is_bad_inode(inode) &&
1120					atomic_read(&et->node_cnt)) {
1121		mutex_lock(&eti->extent_tree_lock);
1122		list_add_tail(&et->list, &eti->zombie_list);
1123		atomic_inc(&eti->total_zombie_tree);
1124		mutex_unlock(&eti->extent_tree_lock);
1125		return;
1126	}
1127
1128	/* free all extent info belong to this extent tree */
1129	node_cnt = __destroy_extent_node(inode, type);
1130
1131	/* delete extent tree entry in radix tree */
1132	mutex_lock(&eti->extent_tree_lock);
1133	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1134	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1135	kmem_cache_free(extent_tree_slab, et);
1136	atomic_dec(&eti->total_ext_tree);
1137	mutex_unlock(&eti->extent_tree_lock);
1138
1139	F2FS_I(inode)->extent_tree[type] = NULL;
1140
1141	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1142}
1143
1144void f2fs_destroy_extent_tree(struct inode *inode)
 
1145{
1146	__destroy_extent_tree(inode, EX_READ);
1147	__destroy_extent_tree(inode, EX_BLOCK_AGE);
 
 
1148}
1149
1150static void __init_extent_tree_info(struct extent_tree_info *eti)
1151{
1152	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1153	mutex_init(&eti->extent_tree_lock);
1154	INIT_LIST_HEAD(&eti->extent_list);
1155	spin_lock_init(&eti->extent_lock);
1156	atomic_set(&eti->total_ext_tree, 0);
1157	INIT_LIST_HEAD(&eti->zombie_list);
1158	atomic_set(&eti->total_zombie_tree, 0);
1159	atomic_set(&eti->total_ext_node, 0);
 
 
 
 
 
 
 
 
1160}
1161
1162void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
 
 
1163{
1164	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1165	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1166
1167	/* initialize for block age extents */
1168	atomic64_set(&sbi->allocated_data_blocks, 0);
1169	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1170	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1171	sbi->last_age_weight = LAST_AGE_WEIGHT;
 
 
 
 
 
 
 
 
 
1172}
1173
1174int __init f2fs_create_extent_cache(void)
1175{
1176	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1177			sizeof(struct extent_tree));
1178	if (!extent_tree_slab)
1179		return -ENOMEM;
1180	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1181			sizeof(struct extent_node));
1182	if (!extent_node_slab) {
1183		kmem_cache_destroy(extent_tree_slab);
1184		return -ENOMEM;
1185	}
1186	return 0;
1187}
1188
1189void f2fs_destroy_extent_cache(void)
1190{
1191	kmem_cache_destroy(extent_node_slab);
1192	kmem_cache_destroy(extent_tree_slab);
1193}