Linux Audio

Check our new training course

Loading...
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * f2fs extent cache support
   4 *
   5 * Copyright (c) 2015 Motorola Mobility
   6 * Copyright (c) 2015 Samsung Electronics
   7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
   8 *          Chao Yu <chao2.yu@samsung.com>
   9 *
  10 * block_age-based extent cache added by:
  11 * Copyright (c) 2022 xiaomi Co., Ltd.
  12 *             http://www.xiaomi.com/
  13 */
  14
  15#include <linux/fs.h>
  16#include <linux/f2fs_fs.h>
  17
  18#include "f2fs.h"
  19#include "node.h"
  20#include <trace/events/f2fs.h>
  21
  22bool sanity_check_extent_cache(struct inode *inode)
  23{
  24	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  25	struct f2fs_inode_info *fi = F2FS_I(inode);
  26	struct extent_tree *et = fi->extent_tree[EX_READ];
  27	struct extent_info *ei;
  28
  29	if (!et)
  30		return true;
  31
  32	ei = &et->largest;
  33	if (!ei->len)
  34		return true;
  35
  36	/* Let's drop, if checkpoint got corrupted. */
  37	if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) {
  38		ei->len = 0;
  39		et->largest_updated = true;
  40		return true;
  41	}
  42
  43	if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
  44	    !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
  45					DATA_GENERIC_ENHANCE)) {
  46		f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
  47			  __func__, inode->i_ino,
  48			  ei->blk, ei->fofs, ei->len);
  49		return false;
  50	}
  51	return true;
  52}
  53
  54static void __set_extent_info(struct extent_info *ei,
  55				unsigned int fofs, unsigned int len,
  56				block_t blk, bool keep_clen,
  57				unsigned long age, unsigned long last_blocks,
  58				enum extent_type type)
  59{
  60	ei->fofs = fofs;
  61	ei->len = len;
  62
  63	if (type == EX_READ) {
  64		ei->blk = blk;
  65		if (keep_clen)
  66			return;
  67#ifdef CONFIG_F2FS_FS_COMPRESSION
  68		ei->c_len = 0;
  69#endif
  70	} else if (type == EX_BLOCK_AGE) {
  71		ei->age = age;
  72		ei->last_blocks = last_blocks;
  73	}
  74}
  75
  76static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
  77{
  78	if (type == EX_READ)
  79		return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
  80			S_ISREG(inode->i_mode);
  81	if (type == EX_BLOCK_AGE)
  82		return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
  83			(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
  84	return false;
  85}
  86
  87static bool __may_extent_tree(struct inode *inode, enum extent_type type)
  88{
  89	/*
  90	 * for recovered files during mount do not create extents
  91	 * if shrinker is not registered.
  92	 */
  93	if (list_empty(&F2FS_I_SB(inode)->s_list))
  94		return false;
  95
  96	if (!__init_may_extent_tree(inode, type))
  97		return false;
  98
  99	if (type == EX_READ) {
 100		if (is_inode_flag_set(inode, FI_NO_EXTENT))
 101			return false;
 102		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
 103				 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
 104			return false;
 105	} else if (type == EX_BLOCK_AGE) {
 106		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
 107			return false;
 108		if (file_is_cold(inode))
 109			return false;
 110	}
 111	return true;
 112}
 113
 114static void __try_update_largest_extent(struct extent_tree *et,
 115						struct extent_node *en)
 116{
 117	if (et->type != EX_READ)
 118		return;
 119	if (en->ei.len <= et->largest.len)
 120		return;
 121
 122	et->largest = en->ei;
 123	et->largest_updated = true;
 124}
 125
 126static bool __is_extent_mergeable(struct extent_info *back,
 127		struct extent_info *front, enum extent_type type)
 128{
 129	if (type == EX_READ) {
 130#ifdef CONFIG_F2FS_FS_COMPRESSION
 131		if (back->c_len && back->len != back->c_len)
 132			return false;
 133		if (front->c_len && front->len != front->c_len)
 134			return false;
 135#endif
 136		return (back->fofs + back->len == front->fofs &&
 137				back->blk + back->len == front->blk);
 138	} else if (type == EX_BLOCK_AGE) {
 139		return (back->fofs + back->len == front->fofs &&
 140			abs(back->age - front->age) <= SAME_AGE_REGION &&
 141			abs(back->last_blocks - front->last_blocks) <=
 142							SAME_AGE_REGION);
 143	}
 144	return false;
 145}
 146
 147static bool __is_back_mergeable(struct extent_info *cur,
 148		struct extent_info *back, enum extent_type type)
 149{
 150	return __is_extent_mergeable(back, cur, type);
 151}
 152
 153static bool __is_front_mergeable(struct extent_info *cur,
 154		struct extent_info *front, enum extent_type type)
 155{
 156	return __is_extent_mergeable(cur, front, type);
 157}
 158
 159static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
 160			struct extent_node *cached_en, unsigned int fofs)
 161{
 162	struct rb_node *node = root->rb_root.rb_node;
 163	struct extent_node *en;
 164
 165	/* check a cached entry */
 166	if (cached_en && cached_en->ei.fofs <= fofs &&
 167			cached_en->ei.fofs + cached_en->ei.len > fofs)
 168		return cached_en;
 169
 170	/* check rb_tree */
 171	while (node) {
 172		en = rb_entry(node, struct extent_node, rb_node);
 173
 174		if (fofs < en->ei.fofs)
 175			node = node->rb_left;
 176		else if (fofs >= en->ei.fofs + en->ei.len)
 177			node = node->rb_right;
 178		else
 179			return en;
 180	}
 181	return NULL;
 182}
 183
 184/*
 185 * lookup rb entry in position of @fofs in rb-tree,
 186 * if hit, return the entry, otherwise, return NULL
 187 * @prev_ex: extent before fofs
 188 * @next_ex: extent after fofs
 189 * @insert_p: insert point for new extent at fofs
 190 * in order to simplify the insertion after.
 191 * tree must stay unchanged between lookup and insertion.
 192 */
 193static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
 194				struct extent_node *cached_en,
 195				unsigned int fofs,
 196				struct extent_node **prev_entry,
 197				struct extent_node **next_entry,
 198				struct rb_node ***insert_p,
 199				struct rb_node **insert_parent,
 200				bool *leftmost)
 201{
 202	struct rb_node **pnode = &root->rb_root.rb_node;
 203	struct rb_node *parent = NULL, *tmp_node;
 204	struct extent_node *en = cached_en;
 205
 206	*insert_p = NULL;
 207	*insert_parent = NULL;
 208	*prev_entry = NULL;
 209	*next_entry = NULL;
 210
 211	if (RB_EMPTY_ROOT(&root->rb_root))
 212		return NULL;
 213
 214	if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
 215		goto lookup_neighbors;
 216
 217	*leftmost = true;
 218
 219	while (*pnode) {
 220		parent = *pnode;
 221		en = rb_entry(*pnode, struct extent_node, rb_node);
 222
 223		if (fofs < en->ei.fofs) {
 224			pnode = &(*pnode)->rb_left;
 225		} else if (fofs >= en->ei.fofs + en->ei.len) {
 226			pnode = &(*pnode)->rb_right;
 227			*leftmost = false;
 228		} else {
 229			goto lookup_neighbors;
 230		}
 231	}
 232
 233	*insert_p = pnode;
 234	*insert_parent = parent;
 235
 236	en = rb_entry(parent, struct extent_node, rb_node);
 237	tmp_node = parent;
 238	if (parent && fofs > en->ei.fofs)
 239		tmp_node = rb_next(parent);
 240	*next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 241
 242	tmp_node = parent;
 243	if (parent && fofs < en->ei.fofs)
 244		tmp_node = rb_prev(parent);
 245	*prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 246	return NULL;
 247
 248lookup_neighbors:
 249	if (fofs == en->ei.fofs) {
 250		/* lookup prev node for merging backward later */
 251		tmp_node = rb_prev(&en->rb_node);
 252		*prev_entry = rb_entry_safe(tmp_node,
 253					struct extent_node, rb_node);
 254	}
 255	if (fofs == en->ei.fofs + en->ei.len - 1) {
 256		/* lookup next node for merging frontward later */
 257		tmp_node = rb_next(&en->rb_node);
 258		*next_entry = rb_entry_safe(tmp_node,
 259					struct extent_node, rb_node);
 260	}
 261	return en;
 262}
 263
 264static struct kmem_cache *extent_tree_slab;
 265static struct kmem_cache *extent_node_slab;
 266
 267static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 268				struct extent_tree *et, struct extent_info *ei,
 269				struct rb_node *parent, struct rb_node **p,
 270				bool leftmost)
 271{
 272	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 273	struct extent_node *en;
 274
 275	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
 276	if (!en)
 277		return NULL;
 278
 279	en->ei = *ei;
 280	INIT_LIST_HEAD(&en->list);
 281	en->et = et;
 282
 283	rb_link_node(&en->rb_node, parent, p);
 284	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
 285	atomic_inc(&et->node_cnt);
 286	atomic_inc(&eti->total_ext_node);
 287	return en;
 288}
 289
 290static void __detach_extent_node(struct f2fs_sb_info *sbi,
 291				struct extent_tree *et, struct extent_node *en)
 292{
 293	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 294
 295	rb_erase_cached(&en->rb_node, &et->root);
 296	atomic_dec(&et->node_cnt);
 297	atomic_dec(&eti->total_ext_node);
 298
 299	if (et->cached_en == en)
 300		et->cached_en = NULL;
 301	kmem_cache_free(extent_node_slab, en);
 302}
 303
 304/*
 305 * Flow to release an extent_node:
 306 * 1. list_del_init
 307 * 2. __detach_extent_node
 308 * 3. kmem_cache_free.
 309 */
 310static void __release_extent_node(struct f2fs_sb_info *sbi,
 311			struct extent_tree *et, struct extent_node *en)
 312{
 313	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 314
 315	spin_lock(&eti->extent_lock);
 316	f2fs_bug_on(sbi, list_empty(&en->list));
 317	list_del_init(&en->list);
 318	spin_unlock(&eti->extent_lock);
 319
 320	__detach_extent_node(sbi, et, en);
 321}
 322
 323static struct extent_tree *__grab_extent_tree(struct inode *inode,
 324						enum extent_type type)
 325{
 326	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 327	struct extent_tree_info *eti = &sbi->extent_tree[type];
 328	struct extent_tree *et;
 329	nid_t ino = inode->i_ino;
 330
 331	mutex_lock(&eti->extent_tree_lock);
 332	et = radix_tree_lookup(&eti->extent_tree_root, ino);
 333	if (!et) {
 334		et = f2fs_kmem_cache_alloc(extent_tree_slab,
 335					GFP_NOFS, true, NULL);
 336		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
 337		memset(et, 0, sizeof(struct extent_tree));
 338		et->ino = ino;
 339		et->type = type;
 340		et->root = RB_ROOT_CACHED;
 341		et->cached_en = NULL;
 342		rwlock_init(&et->lock);
 343		INIT_LIST_HEAD(&et->list);
 344		atomic_set(&et->node_cnt, 0);
 345		atomic_inc(&eti->total_ext_tree);
 346	} else {
 347		atomic_dec(&eti->total_zombie_tree);
 348		list_del_init(&et->list);
 349	}
 350	mutex_unlock(&eti->extent_tree_lock);
 351
 352	/* never died until evict_inode */
 353	F2FS_I(inode)->extent_tree[type] = et;
 354
 355	return et;
 356}
 357
 358static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
 359					struct extent_tree *et)
 360{
 361	struct rb_node *node, *next;
 362	struct extent_node *en;
 363	unsigned int count = atomic_read(&et->node_cnt);
 364
 365	node = rb_first_cached(&et->root);
 366	while (node) {
 367		next = rb_next(node);
 368		en = rb_entry(node, struct extent_node, rb_node);
 369		__release_extent_node(sbi, et, en);
 370		node = next;
 371	}
 372
 373	return count - atomic_read(&et->node_cnt);
 374}
 375
 376static void __drop_largest_extent(struct extent_tree *et,
 377					pgoff_t fofs, unsigned int len)
 378{
 379	if (fofs < et->largest.fofs + et->largest.len &&
 380			fofs + len > et->largest.fofs) {
 381		et->largest.len = 0;
 382		et->largest_updated = true;
 383	}
 384}
 385
 386void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
 387{
 388	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 389	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
 390	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
 391	struct extent_tree *et;
 392	struct extent_node *en;
 393	struct extent_info ei;
 394
 395	if (!__may_extent_tree(inode, EX_READ)) {
 396		/* drop largest read extent */
 397		if (i_ext && i_ext->len) {
 398			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 399			i_ext->len = 0;
 400			set_page_dirty(ipage);
 401		}
 402		goto out;
 403	}
 404
 405	et = __grab_extent_tree(inode, EX_READ);
 406
 407	if (!i_ext || !i_ext->len)
 408		goto out;
 409
 410	get_read_extent_info(&ei, i_ext);
 411
 412	write_lock(&et->lock);
 413	if (atomic_read(&et->node_cnt))
 414		goto unlock_out;
 415
 416	en = __attach_extent_node(sbi, et, &ei, NULL,
 417				&et->root.rb_root.rb_node, true);
 418	if (en) {
 419		et->largest = en->ei;
 420		et->cached_en = en;
 421
 422		spin_lock(&eti->extent_lock);
 423		list_add_tail(&en->list, &eti->extent_list);
 424		spin_unlock(&eti->extent_lock);
 425	}
 426unlock_out:
 427	write_unlock(&et->lock);
 428out:
 429	if (!F2FS_I(inode)->extent_tree[EX_READ])
 430		set_inode_flag(inode, FI_NO_EXTENT);
 431}
 432
 433void f2fs_init_age_extent_tree(struct inode *inode)
 434{
 435	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
 436		return;
 437	__grab_extent_tree(inode, EX_BLOCK_AGE);
 438}
 439
 440void f2fs_init_extent_tree(struct inode *inode)
 441{
 442	/* initialize read cache */
 443	if (__init_may_extent_tree(inode, EX_READ))
 444		__grab_extent_tree(inode, EX_READ);
 445
 446	/* initialize block age cache */
 447	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
 448		__grab_extent_tree(inode, EX_BLOCK_AGE);
 449}
 450
 451static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
 452			struct extent_info *ei, enum extent_type type)
 453{
 454	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 455	struct extent_tree_info *eti = &sbi->extent_tree[type];
 456	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 457	struct extent_node *en;
 458	bool ret = false;
 459
 460	if (!et)
 461		return false;
 462
 463	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
 464
 465	read_lock(&et->lock);
 466
 467	if (type == EX_READ &&
 468			et->largest.fofs <= pgofs &&
 469			et->largest.fofs + et->largest.len > pgofs) {
 470		*ei = et->largest;
 471		ret = true;
 472		stat_inc_largest_node_hit(sbi);
 473		goto out;
 474	}
 475
 476	en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
 477	if (!en)
 478		goto out;
 479
 480	if (en == et->cached_en)
 481		stat_inc_cached_node_hit(sbi, type);
 482	else
 483		stat_inc_rbtree_node_hit(sbi, type);
 484
 485	*ei = en->ei;
 486	spin_lock(&eti->extent_lock);
 487	if (!list_empty(&en->list)) {
 488		list_move_tail(&en->list, &eti->extent_list);
 489		et->cached_en = en;
 490	}
 491	spin_unlock(&eti->extent_lock);
 492	ret = true;
 493out:
 494	stat_inc_total_hit(sbi, type);
 495	read_unlock(&et->lock);
 496
 497	if (type == EX_READ)
 498		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
 499	else if (type == EX_BLOCK_AGE)
 500		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
 501	return ret;
 502}
 503
 504static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
 505				struct extent_tree *et, struct extent_info *ei,
 506				struct extent_node *prev_ex,
 507				struct extent_node *next_ex)
 508{
 509	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 510	struct extent_node *en = NULL;
 511
 512	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
 513		prev_ex->ei.len += ei->len;
 514		ei = &prev_ex->ei;
 515		en = prev_ex;
 516	}
 517
 518	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
 519		next_ex->ei.fofs = ei->fofs;
 520		next_ex->ei.len += ei->len;
 521		if (et->type == EX_READ)
 522			next_ex->ei.blk = ei->blk;
 523		if (en)
 524			__release_extent_node(sbi, et, prev_ex);
 525
 526		en = next_ex;
 527	}
 528
 529	if (!en)
 530		return NULL;
 531
 532	__try_update_largest_extent(et, en);
 533
 534	spin_lock(&eti->extent_lock);
 535	if (!list_empty(&en->list)) {
 536		list_move_tail(&en->list, &eti->extent_list);
 537		et->cached_en = en;
 538	}
 539	spin_unlock(&eti->extent_lock);
 540	return en;
 541}
 542
 543static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
 544				struct extent_tree *et, struct extent_info *ei,
 545				struct rb_node **insert_p,
 546				struct rb_node *insert_parent,
 547				bool leftmost)
 548{
 549	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 550	struct rb_node **p = &et->root.rb_root.rb_node;
 551	struct rb_node *parent = NULL;
 552	struct extent_node *en = NULL;
 553
 554	if (insert_p && insert_parent) {
 555		parent = insert_parent;
 556		p = insert_p;
 557		goto do_insert;
 558	}
 559
 560	leftmost = true;
 561
 562	/* look up extent_node in the rb tree */
 563	while (*p) {
 564		parent = *p;
 565		en = rb_entry(parent, struct extent_node, rb_node);
 566
 567		if (ei->fofs < en->ei.fofs) {
 568			p = &(*p)->rb_left;
 569		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
 570			p = &(*p)->rb_right;
 571			leftmost = false;
 572		} else {
 573			f2fs_bug_on(sbi, 1);
 574		}
 575	}
 576
 577do_insert:
 578	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
 579	if (!en)
 580		return NULL;
 581
 582	__try_update_largest_extent(et, en);
 583
 584	/* update in global extent list */
 585	spin_lock(&eti->extent_lock);
 586	list_add_tail(&en->list, &eti->extent_list);
 587	et->cached_en = en;
 588	spin_unlock(&eti->extent_lock);
 589	return en;
 590}
 591
 592static void __update_extent_tree_range(struct inode *inode,
 593			struct extent_info *tei, enum extent_type type)
 594{
 595	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 596	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 597	struct extent_node *en = NULL, *en1 = NULL;
 598	struct extent_node *prev_en = NULL, *next_en = NULL;
 599	struct extent_info ei, dei, prev;
 600	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 601	unsigned int fofs = tei->fofs, len = tei->len;
 602	unsigned int end = fofs + len;
 603	bool updated = false;
 604	bool leftmost = false;
 605
 606	if (!et)
 607		return;
 608
 609	if (type == EX_READ)
 610		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
 611						tei->blk, 0);
 612	else if (type == EX_BLOCK_AGE)
 613		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
 614						tei->age, tei->last_blocks);
 615
 616	write_lock(&et->lock);
 617
 618	if (type == EX_READ) {
 619		if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
 620			write_unlock(&et->lock);
 621			return;
 622		}
 623
 624		prev = et->largest;
 625		dei.len = 0;
 626
 627		/*
 628		 * drop largest extent before lookup, in case it's already
 629		 * been shrunk from extent tree
 630		 */
 631		__drop_largest_extent(et, fofs, len);
 632	}
 633
 634	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
 635	en = __lookup_extent_node_ret(&et->root,
 636					et->cached_en, fofs,
 637					&prev_en, &next_en,
 638					&insert_p, &insert_parent,
 639					&leftmost);
 640	if (!en)
 641		en = next_en;
 642
 643	/* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
 644	while (en && en->ei.fofs < end) {
 645		unsigned int org_end;
 646		int parts = 0;	/* # of parts current extent split into */
 647
 648		next_en = en1 = NULL;
 649
 650		dei = en->ei;
 651		org_end = dei.fofs + dei.len;
 652		f2fs_bug_on(sbi, fofs >= org_end);
 653
 654		if (fofs > dei.fofs && (type != EX_READ ||
 655				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
 656			en->ei.len = fofs - en->ei.fofs;
 657			prev_en = en;
 658			parts = 1;
 659		}
 660
 661		if (end < org_end && (type != EX_READ ||
 662				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
 663			if (parts) {
 664				__set_extent_info(&ei,
 665					end, org_end - end,
 666					end - dei.fofs + dei.blk, false,
 667					dei.age, dei.last_blocks,
 668					type);
 669				en1 = __insert_extent_tree(sbi, et, &ei,
 670							NULL, NULL, true);
 671				next_en = en1;
 672			} else {
 673				__set_extent_info(&en->ei,
 674					end, en->ei.len - (end - dei.fofs),
 675					en->ei.blk + (end - dei.fofs), true,
 676					dei.age, dei.last_blocks,
 677					type);
 678				next_en = en;
 679			}
 680			parts++;
 681		}
 682
 683		if (!next_en) {
 684			struct rb_node *node = rb_next(&en->rb_node);
 685
 686			next_en = rb_entry_safe(node, struct extent_node,
 687						rb_node);
 688		}
 689
 690		if (parts)
 691			__try_update_largest_extent(et, en);
 692		else
 693			__release_extent_node(sbi, et, en);
 694
 695		/*
 696		 * if original extent is split into zero or two parts, extent
 697		 * tree has been altered by deletion or insertion, therefore
 698		 * invalidate pointers regard to tree.
 699		 */
 700		if (parts != 1) {
 701			insert_p = NULL;
 702			insert_parent = NULL;
 703		}
 704		en = next_en;
 705	}
 706
 707	if (type == EX_BLOCK_AGE)
 708		goto update_age_extent_cache;
 709
 710	/* 3. update extent in read extent cache */
 711	BUG_ON(type != EX_READ);
 712
 713	if (tei->blk) {
 714		__set_extent_info(&ei, fofs, len, tei->blk, false,
 715				  0, 0, EX_READ);
 716		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 717			__insert_extent_tree(sbi, et, &ei,
 718					insert_p, insert_parent, leftmost);
 719
 720		/* give up extent_cache, if split and small updates happen */
 721		if (dei.len >= 1 &&
 722				prev.len < F2FS_MIN_EXTENT_LEN &&
 723				et->largest.len < F2FS_MIN_EXTENT_LEN) {
 724			et->largest.len = 0;
 725			et->largest_updated = true;
 726			set_inode_flag(inode, FI_NO_EXTENT);
 727		}
 728	}
 729
 730	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 731		__free_extent_tree(sbi, et);
 732
 733	if (et->largest_updated) {
 734		et->largest_updated = false;
 735		updated = true;
 736	}
 737	goto out_read_extent_cache;
 738update_age_extent_cache:
 739	if (!tei->last_blocks)
 740		goto out_read_extent_cache;
 741
 742	__set_extent_info(&ei, fofs, len, 0, false,
 743			tei->age, tei->last_blocks, EX_BLOCK_AGE);
 744	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 745		__insert_extent_tree(sbi, et, &ei,
 746					insert_p, insert_parent, leftmost);
 747out_read_extent_cache:
 748	write_unlock(&et->lock);
 749
 750	if (updated)
 751		f2fs_mark_inode_dirty_sync(inode, true);
 752}
 753
 754#ifdef CONFIG_F2FS_FS_COMPRESSION
 755void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
 756				pgoff_t fofs, block_t blkaddr, unsigned int llen,
 757				unsigned int c_len)
 758{
 759	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 760	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
 761	struct extent_node *en = NULL;
 762	struct extent_node *prev_en = NULL, *next_en = NULL;
 763	struct extent_info ei;
 764	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 765	bool leftmost = false;
 766
 767	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
 768						blkaddr, c_len);
 769
 770	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
 771	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 772		return;
 773
 774	write_lock(&et->lock);
 775
 776	en = __lookup_extent_node_ret(&et->root,
 777					et->cached_en, fofs,
 778					&prev_en, &next_en,
 779					&insert_p, &insert_parent,
 780					&leftmost);
 781	if (en)
 782		goto unlock_out;
 783
 784	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
 785	ei.c_len = c_len;
 786
 787	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 788		__insert_extent_tree(sbi, et, &ei,
 789				insert_p, insert_parent, leftmost);
 790unlock_out:
 791	write_unlock(&et->lock);
 792}
 793#endif
 794
 795static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
 796						unsigned long long new,
 797						unsigned long long old)
 798{
 799	unsigned int rem_old, rem_new;
 800	unsigned long long res;
 801	unsigned int weight = sbi->last_age_weight;
 802
 803	res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
 804		+ div_u64_rem(old, 100, &rem_old) * weight;
 805
 806	if (rem_new)
 807		res += rem_new * (100 - weight) / 100;
 808	if (rem_old)
 809		res += rem_old * weight / 100;
 810
 811	return res;
 812}
 813
 814/* This returns a new age and allocated blocks in ei */
 815static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
 816						block_t blkaddr)
 817{
 818	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 819	loff_t f_size = i_size_read(inode);
 820	unsigned long long cur_blocks =
 821				atomic64_read(&sbi->allocated_data_blocks);
 822	struct extent_info tei = *ei;	/* only fofs and len are valid */
 823
 824	/*
 825	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
 826	 * file block even in seq write. So don't record age for newly last file
 827	 * block here.
 828	 */
 829	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
 830			blkaddr == NEW_ADDR)
 831		return -EINVAL;
 832
 833	if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
 834		unsigned long long cur_age;
 835
 836		if (cur_blocks >= tei.last_blocks)
 837			cur_age = cur_blocks - tei.last_blocks;
 838		else
 839			/* allocated_data_blocks overflow */
 840			cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
 841
 842		if (tei.age)
 843			ei->age = __calculate_block_age(sbi, cur_age, tei.age);
 844		else
 845			ei->age = cur_age;
 846		ei->last_blocks = cur_blocks;
 847		WARN_ON(ei->age > cur_blocks);
 848		return 0;
 849	}
 850
 851	f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
 852
 853	/* the data block was allocated for the first time */
 854	if (blkaddr == NEW_ADDR)
 855		goto out;
 856
 857	if (__is_valid_data_blkaddr(blkaddr) &&
 858	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
 859		return -EINVAL;
 860out:
 861	/*
 862	 * init block age with zero, this can happen when the block age extent
 863	 * was reclaimed due to memory constraint or system reboot
 864	 */
 865	ei->age = 0;
 866	ei->last_blocks = cur_blocks;
 867	return 0;
 868}
 869
 870static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
 871{
 872	struct extent_info ei = {};
 873
 874	if (!__may_extent_tree(dn->inode, type))
 875		return;
 876
 877	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 878								dn->ofs_in_node;
 879	ei.len = 1;
 880
 881	if (type == EX_READ) {
 882		if (dn->data_blkaddr == NEW_ADDR)
 883			ei.blk = NULL_ADDR;
 884		else
 885			ei.blk = dn->data_blkaddr;
 886	} else if (type == EX_BLOCK_AGE) {
 887		if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
 888			return;
 889	}
 890	__update_extent_tree_range(dn->inode, &ei, type);
 891}
 892
 893static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
 894					enum extent_type type)
 895{
 896	struct extent_tree_info *eti = &sbi->extent_tree[type];
 897	struct extent_tree *et, *next;
 898	struct extent_node *en;
 899	unsigned int node_cnt = 0, tree_cnt = 0;
 900	int remained;
 901
 902	if (!atomic_read(&eti->total_zombie_tree))
 903		goto free_node;
 904
 905	if (!mutex_trylock(&eti->extent_tree_lock))
 906		goto out;
 907
 908	/* 1. remove unreferenced extent tree */
 909	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
 910		if (atomic_read(&et->node_cnt)) {
 911			write_lock(&et->lock);
 912			node_cnt += __free_extent_tree(sbi, et);
 913			write_unlock(&et->lock);
 914		}
 915		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
 916		list_del_init(&et->list);
 917		radix_tree_delete(&eti->extent_tree_root, et->ino);
 918		kmem_cache_free(extent_tree_slab, et);
 919		atomic_dec(&eti->total_ext_tree);
 920		atomic_dec(&eti->total_zombie_tree);
 921		tree_cnt++;
 922
 923		if (node_cnt + tree_cnt >= nr_shrink)
 924			goto unlock_out;
 925		cond_resched();
 926	}
 927	mutex_unlock(&eti->extent_tree_lock);
 928
 929free_node:
 930	/* 2. remove LRU extent entries */
 931	if (!mutex_trylock(&eti->extent_tree_lock))
 932		goto out;
 933
 934	remained = nr_shrink - (node_cnt + tree_cnt);
 935
 936	spin_lock(&eti->extent_lock);
 937	for (; remained > 0; remained--) {
 938		if (list_empty(&eti->extent_list))
 939			break;
 940		en = list_first_entry(&eti->extent_list,
 941					struct extent_node, list);
 942		et = en->et;
 943		if (!write_trylock(&et->lock)) {
 944			/* refresh this extent node's position in extent list */
 945			list_move_tail(&en->list, &eti->extent_list);
 946			continue;
 947		}
 948
 949		list_del_init(&en->list);
 950		spin_unlock(&eti->extent_lock);
 951
 952		__detach_extent_node(sbi, et, en);
 953
 954		write_unlock(&et->lock);
 955		node_cnt++;
 956		spin_lock(&eti->extent_lock);
 957	}
 958	spin_unlock(&eti->extent_lock);
 959
 960unlock_out:
 961	mutex_unlock(&eti->extent_tree_lock);
 962out:
 963	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
 964
 965	return node_cnt + tree_cnt;
 966}
 967
 968/* read extent cache operations */
 969bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
 970				struct extent_info *ei)
 971{
 972	if (!__may_extent_tree(inode, EX_READ))
 973		return false;
 974
 975	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
 976}
 977
 978bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
 979				block_t *blkaddr)
 980{
 981	struct extent_info ei = {};
 982
 983	if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
 984		return false;
 985	*blkaddr = ei.blk + index - ei.fofs;
 986	return true;
 987}
 988
 989void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
 990{
 991	return __update_extent_cache(dn, EX_READ);
 992}
 993
 994void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
 995				pgoff_t fofs, block_t blkaddr, unsigned int len)
 996{
 997	struct extent_info ei = {
 998		.fofs = fofs,
 999		.len = len,
1000		.blk = blkaddr,
1001	};
1002
1003	if (!__may_extent_tree(dn->inode, EX_READ))
1004		return;
1005
1006	__update_extent_tree_range(dn->inode, &ei, EX_READ);
1007}
1008
1009unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1010{
1011	if (!test_opt(sbi, READ_EXTENT_CACHE))
1012		return 0;
1013
1014	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1015}
1016
1017/* block age extent cache operations */
1018bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1019				struct extent_info *ei)
1020{
1021	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1022		return false;
1023
1024	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1025}
1026
1027void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1028{
1029	return __update_extent_cache(dn, EX_BLOCK_AGE);
1030}
1031
1032void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1033				pgoff_t fofs, unsigned int len)
1034{
1035	struct extent_info ei = {
1036		.fofs = fofs,
1037		.len = len,
1038	};
1039
1040	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1041		return;
1042
1043	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1044}
1045
1046unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1047{
1048	if (!test_opt(sbi, AGE_EXTENT_CACHE))
1049		return 0;
1050
1051	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1052}
1053
1054static unsigned int __destroy_extent_node(struct inode *inode,
1055					enum extent_type type)
1056{
1057	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1058	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1059	unsigned int node_cnt = 0;
1060
1061	if (!et || !atomic_read(&et->node_cnt))
1062		return 0;
1063
1064	write_lock(&et->lock);
1065	node_cnt = __free_extent_tree(sbi, et);
1066	write_unlock(&et->lock);
1067
1068	return node_cnt;
1069}
1070
1071void f2fs_destroy_extent_node(struct inode *inode)
1072{
1073	__destroy_extent_node(inode, EX_READ);
1074	__destroy_extent_node(inode, EX_BLOCK_AGE);
1075}
1076
1077static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1078{
1079	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1080	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1081	bool updated = false;
1082
1083	if (!__may_extent_tree(inode, type))
1084		return;
1085
1086	write_lock(&et->lock);
1087	__free_extent_tree(sbi, et);
1088	if (type == EX_READ) {
1089		set_inode_flag(inode, FI_NO_EXTENT);
1090		if (et->largest.len) {
1091			et->largest.len = 0;
1092			updated = true;
1093		}
1094	}
1095	write_unlock(&et->lock);
1096	if (updated)
1097		f2fs_mark_inode_dirty_sync(inode, true);
1098}
1099
1100void f2fs_drop_extent_tree(struct inode *inode)
1101{
1102	__drop_extent_tree(inode, EX_READ);
1103	__drop_extent_tree(inode, EX_BLOCK_AGE);
1104}
1105
1106static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1107{
1108	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1109	struct extent_tree_info *eti = &sbi->extent_tree[type];
1110	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1111	unsigned int node_cnt = 0;
1112
1113	if (!et)
1114		return;
1115
1116	if (inode->i_nlink && !is_bad_inode(inode) &&
1117					atomic_read(&et->node_cnt)) {
1118		mutex_lock(&eti->extent_tree_lock);
1119		list_add_tail(&et->list, &eti->zombie_list);
1120		atomic_inc(&eti->total_zombie_tree);
1121		mutex_unlock(&eti->extent_tree_lock);
1122		return;
1123	}
1124
1125	/* free all extent info belong to this extent tree */
1126	node_cnt = __destroy_extent_node(inode, type);
1127
1128	/* delete extent tree entry in radix tree */
1129	mutex_lock(&eti->extent_tree_lock);
1130	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1131	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1132	kmem_cache_free(extent_tree_slab, et);
1133	atomic_dec(&eti->total_ext_tree);
1134	mutex_unlock(&eti->extent_tree_lock);
1135
1136	F2FS_I(inode)->extent_tree[type] = NULL;
1137
1138	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1139}
1140
1141void f2fs_destroy_extent_tree(struct inode *inode)
1142{
1143	__destroy_extent_tree(inode, EX_READ);
1144	__destroy_extent_tree(inode, EX_BLOCK_AGE);
1145}
1146
1147static void __init_extent_tree_info(struct extent_tree_info *eti)
1148{
1149	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1150	mutex_init(&eti->extent_tree_lock);
1151	INIT_LIST_HEAD(&eti->extent_list);
1152	spin_lock_init(&eti->extent_lock);
1153	atomic_set(&eti->total_ext_tree, 0);
1154	INIT_LIST_HEAD(&eti->zombie_list);
1155	atomic_set(&eti->total_zombie_tree, 0);
1156	atomic_set(&eti->total_ext_node, 0);
1157}
1158
1159void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
1160{
1161	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1162	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1163
1164	/* initialize for block age extents */
1165	atomic64_set(&sbi->allocated_data_blocks, 0);
1166	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1167	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1168	sbi->last_age_weight = LAST_AGE_WEIGHT;
1169}
1170
1171int __init f2fs_create_extent_cache(void)
1172{
1173	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1174			sizeof(struct extent_tree));
1175	if (!extent_tree_slab)
1176		return -ENOMEM;
1177	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1178			sizeof(struct extent_node));
1179	if (!extent_node_slab) {
1180		kmem_cache_destroy(extent_tree_slab);
1181		return -ENOMEM;
1182	}
1183	return 0;
1184}
1185
1186void f2fs_destroy_extent_cache(void)
1187{
1188	kmem_cache_destroy(extent_node_slab);
1189	kmem_cache_destroy(extent_tree_slab);
1190}
  1/*
  2 * f2fs extent cache support
  3 *
  4 * Copyright (c) 2015 Motorola Mobility
  5 * Copyright (c) 2015 Samsung Electronics
  6 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
  7 *          Chao Yu <chao2.yu@samsung.com>
  8 *
  9 * This program is free software; you can redistribute it and/or modify
 10 * it under the terms of the GNU General Public License version 2 as
 11 * published by the Free Software Foundation.
 12 */
 13
 14#include <linux/fs.h>
 15#include <linux/f2fs_fs.h>
 16
 17#include "f2fs.h"
 18#include "node.h"
 19#include <trace/events/f2fs.h>
 20
 21static struct kmem_cache *extent_tree_slab;
 22static struct kmem_cache *extent_node_slab;
 23
 24static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 25				struct extent_tree *et, struct extent_info *ei,
 26				struct rb_node *parent, struct rb_node **p)
 27{
 28	struct extent_node *en;
 29
 30	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
 31	if (!en)
 32		return NULL;
 33
 34	en->ei = *ei;
 35	INIT_LIST_HEAD(&en->list);
 36	en->et = et;
 37
 38	rb_link_node(&en->rb_node, parent, p);
 39	rb_insert_color(&en->rb_node, &et->root);
 40	atomic_inc(&et->node_cnt);
 41	atomic_inc(&sbi->total_ext_node);
 42	return en;
 43}
 44
 45static void __detach_extent_node(struct f2fs_sb_info *sbi,
 46				struct extent_tree *et, struct extent_node *en)
 47{
 48	rb_erase(&en->rb_node, &et->root);
 49	atomic_dec(&et->node_cnt);
 50	atomic_dec(&sbi->total_ext_node);
 51
 52	if (et->cached_en == en)
 53		et->cached_en = NULL;
 54	kmem_cache_free(extent_node_slab, en);
 55}
 56
 57/*
 58 * Flow to release an extent_node:
 59 * 1. list_del_init
 60 * 2. __detach_extent_node
 61 * 3. kmem_cache_free.
 62 */
 63static void __release_extent_node(struct f2fs_sb_info *sbi,
 64			struct extent_tree *et, struct extent_node *en)
 65{
 66	spin_lock(&sbi->extent_lock);
 67	f2fs_bug_on(sbi, list_empty(&en->list));
 68	list_del_init(&en->list);
 69	spin_unlock(&sbi->extent_lock);
 70
 71	__detach_extent_node(sbi, et, en);
 72}
 73
 74static struct extent_tree *__grab_extent_tree(struct inode *inode)
 75{
 76	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 77	struct extent_tree *et;
 78	nid_t ino = inode->i_ino;
 79
 80	down_write(&sbi->extent_tree_lock);
 81	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
 82	if (!et) {
 83		et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
 84		f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
 85		memset(et, 0, sizeof(struct extent_tree));
 86		et->ino = ino;
 87		et->root = RB_ROOT;
 88		et->cached_en = NULL;
 89		rwlock_init(&et->lock);
 90		INIT_LIST_HEAD(&et->list);
 91		atomic_set(&et->node_cnt, 0);
 92		atomic_inc(&sbi->total_ext_tree);
 93	} else {
 94		atomic_dec(&sbi->total_zombie_tree);
 95		list_del_init(&et->list);
 96	}
 97	up_write(&sbi->extent_tree_lock);
 98
 99	/* never died until evict_inode */
100	F2FS_I(inode)->extent_tree = et;
101
102	return et;
103}
104
105static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
106				struct extent_tree *et, unsigned int fofs)
107{
108	struct rb_node *node = et->root.rb_node;
109	struct extent_node *en = et->cached_en;
110
111	if (en) {
112		struct extent_info *cei = &en->ei;
113
114		if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
115			stat_inc_cached_node_hit(sbi);
116			return en;
117		}
118	}
119
120	while (node) {
121		en = rb_entry(node, struct extent_node, rb_node);
122
123		if (fofs < en->ei.fofs) {
124			node = node->rb_left;
125		} else if (fofs >= en->ei.fofs + en->ei.len) {
126			node = node->rb_right;
127		} else {
128			stat_inc_rbtree_node_hit(sbi);
129			return en;
130		}
131	}
132	return NULL;
133}
134
135static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
136				struct extent_tree *et, struct extent_info *ei)
137{
138	struct rb_node **p = &et->root.rb_node;
139	struct extent_node *en;
140
141	en = __attach_extent_node(sbi, et, ei, NULL, p);
142	if (!en)
143		return NULL;
144
145	et->largest = en->ei;
146	et->cached_en = en;
147	return en;
148}
149
150static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
151					struct extent_tree *et)
152{
153	struct rb_node *node, *next;
154	struct extent_node *en;
155	unsigned int count = atomic_read(&et->node_cnt);
156
157	node = rb_first(&et->root);
158	while (node) {
159		next = rb_next(node);
160		en = rb_entry(node, struct extent_node, rb_node);
161		__release_extent_node(sbi, et, en);
162		node = next;
163	}
164
165	return count - atomic_read(&et->node_cnt);
166}
167
168static void __drop_largest_extent(struct inode *inode,
169					pgoff_t fofs, unsigned int len)
170{
171	struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
172
173	if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs)
174		largest->len = 0;
175}
176
177/* return true, if inode page is changed */
178bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
179{
180	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
181	struct extent_tree *et;
182	struct extent_node *en;
183	struct extent_info ei;
184
185	if (!f2fs_may_extent_tree(inode)) {
186		/* drop largest extent */
187		if (i_ext && i_ext->len) {
188			i_ext->len = 0;
189			return true;
190		}
191		return false;
192	}
193
194	et = __grab_extent_tree(inode);
195
196	if (!i_ext || !i_ext->len)
197		return false;
198
199	set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
200		le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
201
202	write_lock(&et->lock);
203	if (atomic_read(&et->node_cnt))
204		goto out;
205
206	en = __init_extent_tree(sbi, et, &ei);
207	if (en) {
208		spin_lock(&sbi->extent_lock);
209		list_add_tail(&en->list, &sbi->extent_list);
210		spin_unlock(&sbi->extent_lock);
211	}
212out:
213	write_unlock(&et->lock);
214	return false;
215}
216
217static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
218							struct extent_info *ei)
219{
220	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
221	struct extent_tree *et = F2FS_I(inode)->extent_tree;
222	struct extent_node *en;
223	bool ret = false;
224
225	f2fs_bug_on(sbi, !et);
226
227	trace_f2fs_lookup_extent_tree_start(inode, pgofs);
228
229	read_lock(&et->lock);
230
231	if (et->largest.fofs <= pgofs &&
232			et->largest.fofs + et->largest.len > pgofs) {
233		*ei = et->largest;
234		ret = true;
235		stat_inc_largest_node_hit(sbi);
236		goto out;
237	}
238
239	en = __lookup_extent_tree(sbi, et, pgofs);
240	if (en) {
241		*ei = en->ei;
242		spin_lock(&sbi->extent_lock);
243		if (!list_empty(&en->list)) {
244			list_move_tail(&en->list, &sbi->extent_list);
245			et->cached_en = en;
246		}
247		spin_unlock(&sbi->extent_lock);
248		ret = true;
249	}
250out:
251	stat_inc_total_hit(sbi);
252	read_unlock(&et->lock);
253
254	trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
255	return ret;
256}
257
258
259/*
260 * lookup extent at @fofs, if hit, return the extent
261 * if not, return NULL and
262 * @prev_ex: extent before fofs
263 * @next_ex: extent after fofs
264 * @insert_p: insert point for new extent at fofs
265 * in order to simpfy the insertion after.
266 * tree must stay unchanged between lookup and insertion.
267 */
268static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
269				unsigned int fofs,
270				struct extent_node **prev_ex,
271				struct extent_node **next_ex,
272				struct rb_node ***insert_p,
273				struct rb_node **insert_parent)
274{
275	struct rb_node **pnode = &et->root.rb_node;
276	struct rb_node *parent = NULL, *tmp_node;
277	struct extent_node *en = et->cached_en;
278
279	*insert_p = NULL;
280	*insert_parent = NULL;
281	*prev_ex = NULL;
282	*next_ex = NULL;
283
284	if (RB_EMPTY_ROOT(&et->root))
285		return NULL;
286
287	if (en) {
288		struct extent_info *cei = &en->ei;
289
290		if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
291			goto lookup_neighbors;
292	}
293
294	while (*pnode) {
295		parent = *pnode;
296		en = rb_entry(*pnode, struct extent_node, rb_node);
297
298		if (fofs < en->ei.fofs)
299			pnode = &(*pnode)->rb_left;
300		else if (fofs >= en->ei.fofs + en->ei.len)
301			pnode = &(*pnode)->rb_right;
302		else
303			goto lookup_neighbors;
304	}
305
306	*insert_p = pnode;
307	*insert_parent = parent;
308
309	en = rb_entry(parent, struct extent_node, rb_node);
310	tmp_node = parent;
311	if (parent && fofs > en->ei.fofs)
312		tmp_node = rb_next(parent);
313	*next_ex = tmp_node ?
314		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
315
316	tmp_node = parent;
317	if (parent && fofs < en->ei.fofs)
318		tmp_node = rb_prev(parent);
319	*prev_ex = tmp_node ?
320		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
321	return NULL;
322
323lookup_neighbors:
324	if (fofs == en->ei.fofs) {
325		/* lookup prev node for merging backward later */
326		tmp_node = rb_prev(&en->rb_node);
327		*prev_ex = tmp_node ?
328			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
329	}
330	if (fofs == en->ei.fofs + en->ei.len - 1) {
331		/* lookup next node for merging frontward later */
332		tmp_node = rb_next(&en->rb_node);
333		*next_ex = tmp_node ?
334			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
335	}
336	return en;
337}
338
339static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
340				struct extent_tree *et, struct extent_info *ei,
341				struct extent_node *prev_ex,
342				struct extent_node *next_ex)
343{
344	struct extent_node *en = NULL;
345
346	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
347		prev_ex->ei.len += ei->len;
348		ei = &prev_ex->ei;
349		en = prev_ex;
350	}
351
352	if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
353		if (en)
354			__release_extent_node(sbi, et, prev_ex);
355		next_ex->ei.fofs = ei->fofs;
356		next_ex->ei.blk = ei->blk;
357		next_ex->ei.len += ei->len;
358		en = next_ex;
359	}
360
361	if (!en)
362		return NULL;
363
364	__try_update_largest_extent(et, en);
365
366	spin_lock(&sbi->extent_lock);
367	if (!list_empty(&en->list)) {
368		list_move_tail(&en->list, &sbi->extent_list);
369		et->cached_en = en;
370	}
371	spin_unlock(&sbi->extent_lock);
372	return en;
373}
374
375static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
376				struct extent_tree *et, struct extent_info *ei,
377				struct rb_node **insert_p,
378				struct rb_node *insert_parent)
379{
380	struct rb_node **p = &et->root.rb_node;
381	struct rb_node *parent = NULL;
382	struct extent_node *en = NULL;
383
384	if (insert_p && insert_parent) {
385		parent = insert_parent;
386		p = insert_p;
387		goto do_insert;
388	}
389
390	while (*p) {
391		parent = *p;
392		en = rb_entry(parent, struct extent_node, rb_node);
393
394		if (ei->fofs < en->ei.fofs)
395			p = &(*p)->rb_left;
396		else if (ei->fofs >= en->ei.fofs + en->ei.len)
397			p = &(*p)->rb_right;
398		else
399			f2fs_bug_on(sbi, 1);
400	}
401do_insert:
402	en = __attach_extent_node(sbi, et, ei, parent, p);
403	if (!en)
404		return NULL;
405
406	__try_update_largest_extent(et, en);
407
408	/* update in global extent list */
409	spin_lock(&sbi->extent_lock);
410	list_add_tail(&en->list, &sbi->extent_list);
411	et->cached_en = en;
412	spin_unlock(&sbi->extent_lock);
413	return en;
414}
415
416static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
417				pgoff_t fofs, block_t blkaddr, unsigned int len)
418{
419	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
420	struct extent_tree *et = F2FS_I(inode)->extent_tree;
421	struct extent_node *en = NULL, *en1 = NULL;
422	struct extent_node *prev_en = NULL, *next_en = NULL;
423	struct extent_info ei, dei, prev;
424	struct rb_node **insert_p = NULL, *insert_parent = NULL;
425	unsigned int end = fofs + len;
426	unsigned int pos = (unsigned int)fofs;
427
428	if (!et)
429		return false;
430
431	trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
432
433	write_lock(&et->lock);
434
435	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
436		write_unlock(&et->lock);
437		return false;
438	}
439
440	prev = et->largest;
441	dei.len = 0;
442
443	/*
444	 * drop largest extent before lookup, in case it's already
445	 * been shrunk from extent tree
446	 */
447	__drop_largest_extent(inode, fofs, len);
448
449	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
450	en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
451					&insert_p, &insert_parent);
452	if (!en)
453		en = next_en;
454
455	/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
456	while (en && en->ei.fofs < end) {
457		unsigned int org_end;
458		int parts = 0;	/* # of parts current extent split into */
459
460		next_en = en1 = NULL;
461
462		dei = en->ei;
463		org_end = dei.fofs + dei.len;
464		f2fs_bug_on(sbi, pos >= org_end);
465
466		if (pos > dei.fofs &&	pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
467			en->ei.len = pos - en->ei.fofs;
468			prev_en = en;
469			parts = 1;
470		}
471
472		if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
473			if (parts) {
474				set_extent_info(&ei, end,
475						end - dei.fofs + dei.blk,
476						org_end - end);
477				en1 = __insert_extent_tree(sbi, et, &ei,
478							NULL, NULL);
479				next_en = en1;
480			} else {
481				en->ei.fofs = end;
482				en->ei.blk += end - dei.fofs;
483				en->ei.len -= end - dei.fofs;
484				next_en = en;
485			}
486			parts++;
487		}
488
489		if (!next_en) {
490			struct rb_node *node = rb_next(&en->rb_node);
491
492			next_en = node ?
493				rb_entry(node, struct extent_node, rb_node)
494				: NULL;
495		}
496
497		if (parts)
498			__try_update_largest_extent(et, en);
499		else
500			__release_extent_node(sbi, et, en);
501
502		/*
503		 * if original extent is split into zero or two parts, extent
504		 * tree has been altered by deletion or insertion, therefore
505		 * invalidate pointers regard to tree.
506		 */
507		if (parts != 1) {
508			insert_p = NULL;
509			insert_parent = NULL;
510		}
511		en = next_en;
512	}
513
514	/* 3. update extent in extent cache */
515	if (blkaddr) {
516
517		set_extent_info(&ei, fofs, blkaddr, len);
518		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
519			__insert_extent_tree(sbi, et, &ei,
520						insert_p, insert_parent);
521
522		/* give up extent_cache, if split and small updates happen */
523		if (dei.len >= 1 &&
524				prev.len < F2FS_MIN_EXTENT_LEN &&
525				et->largest.len < F2FS_MIN_EXTENT_LEN) {
526			et->largest.len = 0;
527			set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
528		}
529	}
530
531	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
532		__free_extent_tree(sbi, et);
533
534	write_unlock(&et->lock);
535
536	return !__is_extent_same(&prev, &et->largest);
537}
538
539unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
540{
541	struct extent_tree *et, *next;
542	struct extent_node *en;
543	unsigned int node_cnt = 0, tree_cnt = 0;
544	int remained;
545
546	if (!test_opt(sbi, EXTENT_CACHE))
547		return 0;
548
549	if (!atomic_read(&sbi->total_zombie_tree))
550		goto free_node;
551
552	if (!down_write_trylock(&sbi->extent_tree_lock))
553		goto out;
554
555	/* 1. remove unreferenced extent tree */
556	list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
557		if (atomic_read(&et->node_cnt)) {
558			write_lock(&et->lock);
559			node_cnt += __free_extent_tree(sbi, et);
560			write_unlock(&et->lock);
561		}
562		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
563		list_del_init(&et->list);
564		radix_tree_delete(&sbi->extent_tree_root, et->ino);
565		kmem_cache_free(extent_tree_slab, et);
566		atomic_dec(&sbi->total_ext_tree);
567		atomic_dec(&sbi->total_zombie_tree);
568		tree_cnt++;
569
570		if (node_cnt + tree_cnt >= nr_shrink)
571			goto unlock_out;
572		cond_resched();
573	}
574	up_write(&sbi->extent_tree_lock);
575
576free_node:
577	/* 2. remove LRU extent entries */
578	if (!down_write_trylock(&sbi->extent_tree_lock))
579		goto out;
580
581	remained = nr_shrink - (node_cnt + tree_cnt);
582
583	spin_lock(&sbi->extent_lock);
584	for (; remained > 0; remained--) {
585		if (list_empty(&sbi->extent_list))
586			break;
587		en = list_first_entry(&sbi->extent_list,
588					struct extent_node, list);
589		et = en->et;
590		if (!write_trylock(&et->lock)) {
591			/* refresh this extent node's position in extent list */
592			list_move_tail(&en->list, &sbi->extent_list);
593			continue;
594		}
595
596		list_del_init(&en->list);
597		spin_unlock(&sbi->extent_lock);
598
599		__detach_extent_node(sbi, et, en);
600
601		write_unlock(&et->lock);
602		node_cnt++;
603		spin_lock(&sbi->extent_lock);
604	}
605	spin_unlock(&sbi->extent_lock);
606
607unlock_out:
608	up_write(&sbi->extent_tree_lock);
609out:
610	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
611
612	return node_cnt + tree_cnt;
613}
614
615unsigned int f2fs_destroy_extent_node(struct inode *inode)
616{
617	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
618	struct extent_tree *et = F2FS_I(inode)->extent_tree;
619	unsigned int node_cnt = 0;
620
621	if (!et || !atomic_read(&et->node_cnt))
622		return 0;
623
624	write_lock(&et->lock);
625	node_cnt = __free_extent_tree(sbi, et);
626	write_unlock(&et->lock);
627
628	return node_cnt;
629}
630
631void f2fs_destroy_extent_tree(struct inode *inode)
632{
633	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
634	struct extent_tree *et = F2FS_I(inode)->extent_tree;
635	unsigned int node_cnt = 0;
636
637	if (!et)
638		return;
639
640	if (inode->i_nlink && !is_bad_inode(inode) &&
641					atomic_read(&et->node_cnt)) {
642		down_write(&sbi->extent_tree_lock);
643		list_add_tail(&et->list, &sbi->zombie_list);
644		atomic_inc(&sbi->total_zombie_tree);
645		up_write(&sbi->extent_tree_lock);
646		return;
647	}
648
649	/* free all extent info belong to this extent tree */
650	node_cnt = f2fs_destroy_extent_node(inode);
651
652	/* delete extent tree entry in radix tree */
653	down_write(&sbi->extent_tree_lock);
654	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
655	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
656	kmem_cache_free(extent_tree_slab, et);
657	atomic_dec(&sbi->total_ext_tree);
658	up_write(&sbi->extent_tree_lock);
659
660	F2FS_I(inode)->extent_tree = NULL;
661
662	trace_f2fs_destroy_extent_tree(inode, node_cnt);
663}
664
665bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
666					struct extent_info *ei)
667{
668	if (!f2fs_may_extent_tree(inode))
669		return false;
670
671	return f2fs_lookup_extent_tree(inode, pgofs, ei);
672}
673
674void f2fs_update_extent_cache(struct dnode_of_data *dn)
675{
676	pgoff_t fofs;
677	block_t blkaddr;
678
679	if (!f2fs_may_extent_tree(dn->inode))
680		return;
681
682	if (dn->data_blkaddr == NEW_ADDR)
683		blkaddr = NULL_ADDR;
684	else
685		blkaddr = dn->data_blkaddr;
686
687	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
688								dn->ofs_in_node;
689
690	if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1))
691		sync_inode_page(dn);
692}
693
694void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
695				pgoff_t fofs, block_t blkaddr, unsigned int len)
696
697{
698	if (!f2fs_may_extent_tree(dn->inode))
699		return;
700
701	if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len))
702		sync_inode_page(dn);
703}
704
705void init_extent_cache_info(struct f2fs_sb_info *sbi)
706{
707	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
708	init_rwsem(&sbi->extent_tree_lock);
709	INIT_LIST_HEAD(&sbi->extent_list);
710	spin_lock_init(&sbi->extent_lock);
711	atomic_set(&sbi->total_ext_tree, 0);
712	INIT_LIST_HEAD(&sbi->zombie_list);
713	atomic_set(&sbi->total_zombie_tree, 0);
714	atomic_set(&sbi->total_ext_node, 0);
715}
716
717int __init create_extent_cache(void)
718{
719	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
720			sizeof(struct extent_tree));
721	if (!extent_tree_slab)
722		return -ENOMEM;
723	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
724			sizeof(struct extent_node));
725	if (!extent_node_slab) {
726		kmem_cache_destroy(extent_tree_slab);
727		return -ENOMEM;
728	}
729	return 0;
730}
731
732void destroy_extent_cache(void)
733{
734	kmem_cache_destroy(extent_node_slab);
735	kmem_cache_destroy(extent_tree_slab);
736}