Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * f2fs extent cache support
   4 *
   5 * Copyright (c) 2015 Motorola Mobility
   6 * Copyright (c) 2015 Samsung Electronics
   7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
   8 *          Chao Yu <chao2.yu@samsung.com>
   9 *
  10 * block_age-based extent cache added by:
  11 * Copyright (c) 2022 xiaomi Co., Ltd.
  12 *             http://www.xiaomi.com/
  13 */
  14
  15#include <linux/fs.h>
  16#include <linux/f2fs_fs.h>
  17
  18#include "f2fs.h"
  19#include "node.h"
  20#include <trace/events/f2fs.h>
  21
  22static void __set_extent_info(struct extent_info *ei,
  23				unsigned int fofs, unsigned int len,
  24				block_t blk, bool keep_clen,
  25				unsigned long age, unsigned long last_blocks,
  26				enum extent_type type)
  27{
  28	ei->fofs = fofs;
  29	ei->len = len;
  30
  31	if (type == EX_READ) {
  32		ei->blk = blk;
  33		if (keep_clen)
  34			return;
  35#ifdef CONFIG_F2FS_FS_COMPRESSION
  36		ei->c_len = 0;
  37#endif
  38	} else if (type == EX_BLOCK_AGE) {
  39		ei->age = age;
  40		ei->last_blocks = last_blocks;
  41	}
  42}
  43
  44static bool __may_read_extent_tree(struct inode *inode)
  45{
  46	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  47
  48	if (!test_opt(sbi, READ_EXTENT_CACHE))
  49		return false;
  50	if (is_inode_flag_set(inode, FI_NO_EXTENT))
  51		return false;
  52	if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
  53			 !f2fs_sb_has_readonly(sbi))
  54		return false;
  55	return S_ISREG(inode->i_mode);
  56}
  57
  58static bool __may_age_extent_tree(struct inode *inode)
  59{
  60	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  61
  62	if (!test_opt(sbi, AGE_EXTENT_CACHE))
  63		return false;
  64	/* don't cache block age info for cold file */
  65	if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
  66		return false;
  67	if (file_is_cold(inode))
  68		return false;
  69
  70	return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode);
  71}
  72
  73static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
  74{
  75	if (type == EX_READ)
  76		return __may_read_extent_tree(inode);
  77	else if (type == EX_BLOCK_AGE)
  78		return __may_age_extent_tree(inode);
  79	return false;
  80}
  81
  82static bool __may_extent_tree(struct inode *inode, enum extent_type type)
  83{
  84	/*
  85	 * for recovered files during mount do not create extents
  86	 * if shrinker is not registered.
  87	 */
  88	if (list_empty(&F2FS_I_SB(inode)->s_list))
  89		return false;
  90
  91	return __init_may_extent_tree(inode, type);
  92}
  93
  94static void __try_update_largest_extent(struct extent_tree *et,
  95						struct extent_node *en)
  96{
  97	if (et->type != EX_READ)
  98		return;
  99	if (en->ei.len <= et->largest.len)
 100		return;
 101
 102	et->largest = en->ei;
 103	et->largest_updated = true;
 104}
 105
 106static bool __is_extent_mergeable(struct extent_info *back,
 107		struct extent_info *front, enum extent_type type)
 108{
 109	if (type == EX_READ) {
 110#ifdef CONFIG_F2FS_FS_COMPRESSION
 111		if (back->c_len && back->len != back->c_len)
 112			return false;
 113		if (front->c_len && front->len != front->c_len)
 114			return false;
 115#endif
 116		return (back->fofs + back->len == front->fofs &&
 117				back->blk + back->len == front->blk);
 118	} else if (type == EX_BLOCK_AGE) {
 119		return (back->fofs + back->len == front->fofs &&
 120			abs(back->age - front->age) <= SAME_AGE_REGION &&
 121			abs(back->last_blocks - front->last_blocks) <=
 122							SAME_AGE_REGION);
 123	}
 124	return false;
 125}
 126
 127static bool __is_back_mergeable(struct extent_info *cur,
 128		struct extent_info *back, enum extent_type type)
 129{
 130	return __is_extent_mergeable(back, cur, type);
 131}
 132
 133static bool __is_front_mergeable(struct extent_info *cur,
 134		struct extent_info *front, enum extent_type type)
 135{
 136	return __is_extent_mergeable(cur, front, type);
 137}
 138
 139static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
 140							unsigned int ofs)
 141{
 142	if (cached_re) {
 143		if (cached_re->ofs <= ofs &&
 144				cached_re->ofs + cached_re->len > ofs) {
 145			return cached_re;
 146		}
 147	}
 148	return NULL;
 149}
 150
 151static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root,
 152							unsigned int ofs)
 153{
 154	struct rb_node *node = root->rb_root.rb_node;
 155	struct rb_entry *re;
 156
 157	while (node) {
 158		re = rb_entry(node, struct rb_entry, rb_node);
 159
 160		if (ofs < re->ofs)
 161			node = node->rb_left;
 162		else if (ofs >= re->ofs + re->len)
 163			node = node->rb_right;
 164		else
 165			return re;
 166	}
 167	return NULL;
 168}
 169
 170struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
 171				struct rb_entry *cached_re, unsigned int ofs)
 172{
 173	struct rb_entry *re;
 174
 175	re = __lookup_rb_tree_fast(cached_re, ofs);
 176	if (!re)
 177		return __lookup_rb_tree_slow(root, ofs);
 178
 179	return re;
 180}
 181
 182struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
 183					struct rb_root_cached *root,
 184					struct rb_node **parent,
 185					unsigned long long key, bool *leftmost)
 186{
 187	struct rb_node **p = &root->rb_root.rb_node;
 188	struct rb_entry *re;
 189
 190	while (*p) {
 191		*parent = *p;
 192		re = rb_entry(*parent, struct rb_entry, rb_node);
 193
 194		if (key < re->key) {
 195			p = &(*p)->rb_left;
 196		} else {
 197			p = &(*p)->rb_right;
 198			*leftmost = false;
 199		}
 200	}
 201
 202	return p;
 203}
 204
 205struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
 206				struct rb_root_cached *root,
 207				struct rb_node **parent,
 208				unsigned int ofs, bool *leftmost)
 209{
 210	struct rb_node **p = &root->rb_root.rb_node;
 211	struct rb_entry *re;
 212
 213	while (*p) {
 214		*parent = *p;
 215		re = rb_entry(*parent, struct rb_entry, rb_node);
 216
 217		if (ofs < re->ofs) {
 218			p = &(*p)->rb_left;
 219		} else if (ofs >= re->ofs + re->len) {
 220			p = &(*p)->rb_right;
 221			*leftmost = false;
 222		} else {
 223			f2fs_bug_on(sbi, 1);
 224		}
 225	}
 226
 227	return p;
 228}
 229
 230/*
 231 * lookup rb entry in position of @ofs in rb-tree,
 232 * if hit, return the entry, otherwise, return NULL
 233 * @prev_ex: extent before ofs
 234 * @next_ex: extent after ofs
 235 * @insert_p: insert point for new extent at ofs
 236 * in order to simpfy the insertion after.
 237 * tree must stay unchanged between lookup and insertion.
 238 */
 239struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
 240				struct rb_entry *cached_re,
 241				unsigned int ofs,
 242				struct rb_entry **prev_entry,
 243				struct rb_entry **next_entry,
 244				struct rb_node ***insert_p,
 245				struct rb_node **insert_parent,
 246				bool force, bool *leftmost)
 247{
 248	struct rb_node **pnode = &root->rb_root.rb_node;
 249	struct rb_node *parent = NULL, *tmp_node;
 250	struct rb_entry *re = cached_re;
 251
 252	*insert_p = NULL;
 253	*insert_parent = NULL;
 254	*prev_entry = NULL;
 255	*next_entry = NULL;
 256
 257	if (RB_EMPTY_ROOT(&root->rb_root))
 258		return NULL;
 259
 260	if (re) {
 261		if (re->ofs <= ofs && re->ofs + re->len > ofs)
 262			goto lookup_neighbors;
 263	}
 264
 265	if (leftmost)
 266		*leftmost = true;
 267
 268	while (*pnode) {
 269		parent = *pnode;
 270		re = rb_entry(*pnode, struct rb_entry, rb_node);
 271
 272		if (ofs < re->ofs) {
 273			pnode = &(*pnode)->rb_left;
 274		} else if (ofs >= re->ofs + re->len) {
 275			pnode = &(*pnode)->rb_right;
 276			if (leftmost)
 277				*leftmost = false;
 278		} else {
 279			goto lookup_neighbors;
 280		}
 281	}
 282
 283	*insert_p = pnode;
 284	*insert_parent = parent;
 285
 286	re = rb_entry(parent, struct rb_entry, rb_node);
 287	tmp_node = parent;
 288	if (parent && ofs > re->ofs)
 289		tmp_node = rb_next(parent);
 290	*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
 291
 292	tmp_node = parent;
 293	if (parent && ofs < re->ofs)
 294		tmp_node = rb_prev(parent);
 295	*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
 296	return NULL;
 297
 298lookup_neighbors:
 299	if (ofs == re->ofs || force) {
 300		/* lookup prev node for merging backward later */
 301		tmp_node = rb_prev(&re->rb_node);
 302		*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
 303	}
 304	if (ofs == re->ofs + re->len - 1 || force) {
 305		/* lookup next node for merging frontward later */
 306		tmp_node = rb_next(&re->rb_node);
 307		*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
 308	}
 309	return re;
 310}
 311
 312bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
 313				struct rb_root_cached *root, bool check_key)
 314{
 315#ifdef CONFIG_F2FS_CHECK_FS
 316	struct rb_node *cur = rb_first_cached(root), *next;
 317	struct rb_entry *cur_re, *next_re;
 318
 319	if (!cur)
 320		return true;
 321
 322	while (cur) {
 323		next = rb_next(cur);
 324		if (!next)
 325			return true;
 326
 327		cur_re = rb_entry(cur, struct rb_entry, rb_node);
 328		next_re = rb_entry(next, struct rb_entry, rb_node);
 329
 330		if (check_key) {
 331			if (cur_re->key > next_re->key) {
 332				f2fs_info(sbi, "inconsistent rbtree, "
 333					"cur(%llu) next(%llu)",
 334					cur_re->key, next_re->key);
 335				return false;
 336			}
 337			goto next;
 338		}
 339
 340		if (cur_re->ofs + cur_re->len > next_re->ofs) {
 341			f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
 342				  cur_re->ofs, cur_re->len,
 343				  next_re->ofs, next_re->len);
 344			return false;
 345		}
 346next:
 347		cur = next;
 348	}
 349#endif
 350	return true;
 351}
 352
 353static struct kmem_cache *extent_tree_slab;
 354static struct kmem_cache *extent_node_slab;
 355
 356static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 357				struct extent_tree *et, struct extent_info *ei,
 358				struct rb_node *parent, struct rb_node **p,
 359				bool leftmost)
 360{
 361	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 362	struct extent_node *en;
 363
 364	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
 365	if (!en)
 366		return NULL;
 367
 368	en->ei = *ei;
 369	INIT_LIST_HEAD(&en->list);
 370	en->et = et;
 371
 372	rb_link_node(&en->rb_node, parent, p);
 373	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
 374	atomic_inc(&et->node_cnt);
 375	atomic_inc(&eti->total_ext_node);
 376	return en;
 377}
 378
 379static void __detach_extent_node(struct f2fs_sb_info *sbi,
 380				struct extent_tree *et, struct extent_node *en)
 381{
 382	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 383
 384	rb_erase_cached(&en->rb_node, &et->root);
 385	atomic_dec(&et->node_cnt);
 386	atomic_dec(&eti->total_ext_node);
 387
 388	if (et->cached_en == en)
 389		et->cached_en = NULL;
 390	kmem_cache_free(extent_node_slab, en);
 391}
 392
 393/*
 394 * Flow to release an extent_node:
 395 * 1. list_del_init
 396 * 2. __detach_extent_node
 397 * 3. kmem_cache_free.
 398 */
 399static void __release_extent_node(struct f2fs_sb_info *sbi,
 400			struct extent_tree *et, struct extent_node *en)
 401{
 402	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 403
 404	spin_lock(&eti->extent_lock);
 405	f2fs_bug_on(sbi, list_empty(&en->list));
 406	list_del_init(&en->list);
 407	spin_unlock(&eti->extent_lock);
 408
 409	__detach_extent_node(sbi, et, en);
 410}
 411
 412static struct extent_tree *__grab_extent_tree(struct inode *inode,
 413						enum extent_type type)
 414{
 415	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 416	struct extent_tree_info *eti = &sbi->extent_tree[type];
 417	struct extent_tree *et;
 418	nid_t ino = inode->i_ino;
 419
 420	mutex_lock(&eti->extent_tree_lock);
 421	et = radix_tree_lookup(&eti->extent_tree_root, ino);
 422	if (!et) {
 423		et = f2fs_kmem_cache_alloc(extent_tree_slab,
 424					GFP_NOFS, true, NULL);
 425		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
 426		memset(et, 0, sizeof(struct extent_tree));
 427		et->ino = ino;
 428		et->type = type;
 429		et->root = RB_ROOT_CACHED;
 430		et->cached_en = NULL;
 431		rwlock_init(&et->lock);
 432		INIT_LIST_HEAD(&et->list);
 433		atomic_set(&et->node_cnt, 0);
 434		atomic_inc(&eti->total_ext_tree);
 435	} else {
 436		atomic_dec(&eti->total_zombie_tree);
 437		list_del_init(&et->list);
 438	}
 439	mutex_unlock(&eti->extent_tree_lock);
 440
 441	/* never died until evict_inode */
 442	F2FS_I(inode)->extent_tree[type] = et;
 443
 444	return et;
 445}
 446
 447static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
 448					struct extent_tree *et)
 449{
 450	struct rb_node *node, *next;
 451	struct extent_node *en;
 452	unsigned int count = atomic_read(&et->node_cnt);
 453
 454	node = rb_first_cached(&et->root);
 455	while (node) {
 456		next = rb_next(node);
 457		en = rb_entry(node, struct extent_node, rb_node);
 458		__release_extent_node(sbi, et, en);
 459		node = next;
 460	}
 461
 462	return count - atomic_read(&et->node_cnt);
 463}
 464
 465static void __drop_largest_extent(struct extent_tree *et,
 466					pgoff_t fofs, unsigned int len)
 467{
 468	if (fofs < et->largest.fofs + et->largest.len &&
 469			fofs + len > et->largest.fofs) {
 470		et->largest.len = 0;
 471		et->largest_updated = true;
 472	}
 473}
 474
 475void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
 476{
 477	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 478	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
 479	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
 480	struct extent_tree *et;
 481	struct extent_node *en;
 482	struct extent_info ei;
 483
 484	if (!__may_extent_tree(inode, EX_READ)) {
 485		/* drop largest read extent */
 486		if (i_ext && i_ext->len) {
 487			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 488			i_ext->len = 0;
 489			set_page_dirty(ipage);
 490		}
 491		goto out;
 492	}
 493
 494	et = __grab_extent_tree(inode, EX_READ);
 495
 496	if (!i_ext || !i_ext->len)
 497		goto out;
 498
 499	get_read_extent_info(&ei, i_ext);
 500
 501	write_lock(&et->lock);
 502	if (atomic_read(&et->node_cnt))
 503		goto unlock_out;
 504
 505	en = __attach_extent_node(sbi, et, &ei, NULL,
 506				&et->root.rb_root.rb_node, true);
 507	if (en) {
 508		et->largest = en->ei;
 509		et->cached_en = en;
 510
 511		spin_lock(&eti->extent_lock);
 512		list_add_tail(&en->list, &eti->extent_list);
 513		spin_unlock(&eti->extent_lock);
 514	}
 515unlock_out:
 516	write_unlock(&et->lock);
 517out:
 518	if (!F2FS_I(inode)->extent_tree[EX_READ])
 519		set_inode_flag(inode, FI_NO_EXTENT);
 520}
 521
 522void f2fs_init_age_extent_tree(struct inode *inode)
 523{
 524	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
 525		return;
 526	__grab_extent_tree(inode, EX_BLOCK_AGE);
 527}
 528
 529void f2fs_init_extent_tree(struct inode *inode)
 530{
 531	/* initialize read cache */
 532	if (__init_may_extent_tree(inode, EX_READ))
 533		__grab_extent_tree(inode, EX_READ);
 534
 535	/* initialize block age cache */
 536	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
 537		__grab_extent_tree(inode, EX_BLOCK_AGE);
 538}
 539
 540static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
 541			struct extent_info *ei, enum extent_type type)
 542{
 543	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 544	struct extent_tree_info *eti = &sbi->extent_tree[type];
 545	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 546	struct extent_node *en;
 547	bool ret = false;
 548
 549	if (!et)
 550		return false;
 551
 552	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
 553
 554	read_lock(&et->lock);
 555
 556	if (type == EX_READ &&
 557			et->largest.fofs <= pgofs &&
 558			et->largest.fofs + et->largest.len > pgofs) {
 559		*ei = et->largest;
 560		ret = true;
 561		stat_inc_largest_node_hit(sbi);
 562		goto out;
 563	}
 564
 565	en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root,
 566				(struct rb_entry *)et->cached_en, pgofs);
 567	if (!en)
 568		goto out;
 569
 570	if (en == et->cached_en)
 571		stat_inc_cached_node_hit(sbi, type);
 572	else
 573		stat_inc_rbtree_node_hit(sbi, type);
 574
 575	*ei = en->ei;
 576	spin_lock(&eti->extent_lock);
 577	if (!list_empty(&en->list)) {
 578		list_move_tail(&en->list, &eti->extent_list);
 579		et->cached_en = en;
 580	}
 581	spin_unlock(&eti->extent_lock);
 582	ret = true;
 583out:
 584	stat_inc_total_hit(sbi, type);
 585	read_unlock(&et->lock);
 586
 587	if (type == EX_READ)
 588		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
 589	else if (type == EX_BLOCK_AGE)
 590		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
 591	return ret;
 592}
 593
 594static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
 595				struct extent_tree *et, struct extent_info *ei,
 596				struct extent_node *prev_ex,
 597				struct extent_node *next_ex)
 598{
 599	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 600	struct extent_node *en = NULL;
 601
 602	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
 603		prev_ex->ei.len += ei->len;
 604		ei = &prev_ex->ei;
 605		en = prev_ex;
 606	}
 607
 608	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
 609		next_ex->ei.fofs = ei->fofs;
 610		next_ex->ei.len += ei->len;
 611		if (et->type == EX_READ)
 612			next_ex->ei.blk = ei->blk;
 613		if (en)
 614			__release_extent_node(sbi, et, prev_ex);
 615
 616		en = next_ex;
 617	}
 618
 619	if (!en)
 620		return NULL;
 621
 622	__try_update_largest_extent(et, en);
 623
 624	spin_lock(&eti->extent_lock);
 625	if (!list_empty(&en->list)) {
 626		list_move_tail(&en->list, &eti->extent_list);
 627		et->cached_en = en;
 628	}
 629	spin_unlock(&eti->extent_lock);
 630	return en;
 631}
 632
 633static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
 634				struct extent_tree *et, struct extent_info *ei,
 635				struct rb_node **insert_p,
 636				struct rb_node *insert_parent,
 637				bool leftmost)
 638{
 639	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 640	struct rb_node **p;
 641	struct rb_node *parent = NULL;
 642	struct extent_node *en = NULL;
 643
 644	if (insert_p && insert_parent) {
 645		parent = insert_parent;
 646		p = insert_p;
 647		goto do_insert;
 648	}
 649
 650	leftmost = true;
 651
 652	p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent,
 653						ei->fofs, &leftmost);
 654do_insert:
 655	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
 656	if (!en)
 657		return NULL;
 658
 659	__try_update_largest_extent(et, en);
 660
 661	/* update in global extent list */
 662	spin_lock(&eti->extent_lock);
 663	list_add_tail(&en->list, &eti->extent_list);
 664	et->cached_en = en;
 665	spin_unlock(&eti->extent_lock);
 666	return en;
 667}
 668
 669static void __update_extent_tree_range(struct inode *inode,
 670			struct extent_info *tei, enum extent_type type)
 671{
 672	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 673	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 674	struct extent_node *en = NULL, *en1 = NULL;
 675	struct extent_node *prev_en = NULL, *next_en = NULL;
 676	struct extent_info ei, dei, prev;
 677	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 678	unsigned int fofs = tei->fofs, len = tei->len;
 679	unsigned int end = fofs + len;
 680	bool updated = false;
 681	bool leftmost = false;
 682
 683	if (!et)
 684		return;
 685
 686	if (type == EX_READ)
 687		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
 688						tei->blk, 0);
 689	else if (type == EX_BLOCK_AGE)
 690		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
 691						tei->age, tei->last_blocks);
 692
 693	write_lock(&et->lock);
 694
 695	if (type == EX_READ) {
 696		if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
 697			write_unlock(&et->lock);
 698			return;
 699		}
 700
 701		prev = et->largest;
 702		dei.len = 0;
 703
 704		/*
 705		 * drop largest extent before lookup, in case it's already
 706		 * been shrunk from extent tree
 707		 */
 708		__drop_largest_extent(et, fofs, len);
 709	}
 710
 711	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
 712	en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
 713					(struct rb_entry *)et->cached_en, fofs,
 714					(struct rb_entry **)&prev_en,
 715					(struct rb_entry **)&next_en,
 716					&insert_p, &insert_parent, false,
 717					&leftmost);
 718	if (!en)
 719		en = next_en;
 720
 721	/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
 722	while (en && en->ei.fofs < end) {
 723		unsigned int org_end;
 724		int parts = 0;	/* # of parts current extent split into */
 725
 726		next_en = en1 = NULL;
 727
 728		dei = en->ei;
 729		org_end = dei.fofs + dei.len;
 730		f2fs_bug_on(sbi, fofs >= org_end);
 731
 732		if (fofs > dei.fofs && (type != EX_READ ||
 733				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
 734			en->ei.len = fofs - en->ei.fofs;
 735			prev_en = en;
 736			parts = 1;
 737		}
 738
 739		if (end < org_end && (type != EX_READ ||
 740				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
 741			if (parts) {
 742				__set_extent_info(&ei,
 743					end, org_end - end,
 744					end - dei.fofs + dei.blk, false,
 745					dei.age, dei.last_blocks,
 746					type);
 747				en1 = __insert_extent_tree(sbi, et, &ei,
 748							NULL, NULL, true);
 749				next_en = en1;
 750			} else {
 751				__set_extent_info(&en->ei,
 752					end, en->ei.len - (end - dei.fofs),
 753					en->ei.blk + (end - dei.fofs), true,
 754					dei.age, dei.last_blocks,
 755					type);
 756				next_en = en;
 757			}
 758			parts++;
 759		}
 760
 761		if (!next_en) {
 762			struct rb_node *node = rb_next(&en->rb_node);
 763
 764			next_en = rb_entry_safe(node, struct extent_node,
 765						rb_node);
 766		}
 767
 768		if (parts)
 769			__try_update_largest_extent(et, en);
 770		else
 771			__release_extent_node(sbi, et, en);
 772
 773		/*
 774		 * if original extent is split into zero or two parts, extent
 775		 * tree has been altered by deletion or insertion, therefore
 776		 * invalidate pointers regard to tree.
 777		 */
 778		if (parts != 1) {
 779			insert_p = NULL;
 780			insert_parent = NULL;
 781		}
 782		en = next_en;
 783	}
 784
 785	if (type == EX_BLOCK_AGE)
 786		goto update_age_extent_cache;
 787
 788	/* 3. update extent in read extent cache */
 789	BUG_ON(type != EX_READ);
 790
 791	if (tei->blk) {
 792		__set_extent_info(&ei, fofs, len, tei->blk, false,
 793				  0, 0, EX_READ);
 794		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 795			__insert_extent_tree(sbi, et, &ei,
 796					insert_p, insert_parent, leftmost);
 797
 798		/* give up extent_cache, if split and small updates happen */
 799		if (dei.len >= 1 &&
 800				prev.len < F2FS_MIN_EXTENT_LEN &&
 801				et->largest.len < F2FS_MIN_EXTENT_LEN) {
 802			et->largest.len = 0;
 803			et->largest_updated = true;
 804			set_inode_flag(inode, FI_NO_EXTENT);
 805		}
 806	}
 807
 808	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 809		__free_extent_tree(sbi, et);
 810
 811	if (et->largest_updated) {
 812		et->largest_updated = false;
 813		updated = true;
 814	}
 815	goto out_read_extent_cache;
 816update_age_extent_cache:
 817	if (!tei->last_blocks)
 818		goto out_read_extent_cache;
 819
 820	__set_extent_info(&ei, fofs, len, 0, false,
 821			tei->age, tei->last_blocks, EX_BLOCK_AGE);
 822	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 823		__insert_extent_tree(sbi, et, &ei,
 824					insert_p, insert_parent, leftmost);
 825out_read_extent_cache:
 826	write_unlock(&et->lock);
 827
 828	if (updated)
 829		f2fs_mark_inode_dirty_sync(inode, true);
 830}
 831
 832#ifdef CONFIG_F2FS_FS_COMPRESSION
 833void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
 834				pgoff_t fofs, block_t blkaddr, unsigned int llen,
 835				unsigned int c_len)
 836{
 837	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 838	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
 839	struct extent_node *en = NULL;
 840	struct extent_node *prev_en = NULL, *next_en = NULL;
 841	struct extent_info ei;
 842	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 843	bool leftmost = false;
 844
 845	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
 846						blkaddr, c_len);
 847
 848	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
 849	if (is_inode_flag_set(inode, FI_NO_EXTENT))
 850		return;
 851
 852	write_lock(&et->lock);
 853
 854	en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
 855				(struct rb_entry *)et->cached_en, fofs,
 856				(struct rb_entry **)&prev_en,
 857				(struct rb_entry **)&next_en,
 858				&insert_p, &insert_parent, false,
 859				&leftmost);
 860	if (en)
 861		goto unlock_out;
 862
 863	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
 864	ei.c_len = c_len;
 865
 866	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 867		__insert_extent_tree(sbi, et, &ei,
 868				insert_p, insert_parent, leftmost);
 869unlock_out:
 870	write_unlock(&et->lock);
 871}
 872#endif
 873
 874static unsigned long long __calculate_block_age(unsigned long long new,
 875						unsigned long long old)
 876{
 877	unsigned long long diff;
 878
 879	diff = (new >= old) ? new - (new - old) : new + (old - new);
 880
 881	return div_u64(diff * LAST_AGE_WEIGHT, 100);
 882}
 883
 884/* This returns a new age and allocated blocks in ei */
 885static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
 886						block_t blkaddr)
 887{
 888	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 889	loff_t f_size = i_size_read(inode);
 890	unsigned long long cur_blocks =
 891				atomic64_read(&sbi->allocated_data_blocks);
 892	struct extent_info tei = *ei;	/* only fofs and len are valid */
 893
 894	/*
 895	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
 896	 * file block even in seq write. So don't record age for newly last file
 897	 * block here.
 898	 */
 899	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
 900			blkaddr == NEW_ADDR)
 901		return -EINVAL;
 902
 903	if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
 904		unsigned long long cur_age;
 905
 906		if (cur_blocks >= tei.last_blocks)
 907			cur_age = cur_blocks - tei.last_blocks;
 908		else
 909			/* allocated_data_blocks overflow */
 910			cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
 911
 912		if (tei.age)
 913			ei->age = __calculate_block_age(cur_age, tei.age);
 914		else
 915			ei->age = cur_age;
 916		ei->last_blocks = cur_blocks;
 917		WARN_ON(ei->age > cur_blocks);
 918		return 0;
 919	}
 920
 921	f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
 922
 923	/* the data block was allocated for the first time */
 924	if (blkaddr == NEW_ADDR)
 925		goto out;
 926
 927	if (__is_valid_data_blkaddr(blkaddr) &&
 928	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
 929		f2fs_bug_on(sbi, 1);
 930		return -EINVAL;
 931	}
 932out:
 933	/*
 934	 * init block age with zero, this can happen when the block age extent
 935	 * was reclaimed due to memory constraint or system reboot
 936	 */
 937	ei->age = 0;
 938	ei->last_blocks = cur_blocks;
 939	return 0;
 940}
 941
 942static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
 943{
 944	struct extent_info ei = {};
 945
 946	if (!__may_extent_tree(dn->inode, type))
 947		return;
 948
 949	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 950								dn->ofs_in_node;
 951	ei.len = 1;
 952
 953	if (type == EX_READ) {
 954		if (dn->data_blkaddr == NEW_ADDR)
 955			ei.blk = NULL_ADDR;
 956		else
 957			ei.blk = dn->data_blkaddr;
 958	} else if (type == EX_BLOCK_AGE) {
 959		if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
 960			return;
 961	}
 962	__update_extent_tree_range(dn->inode, &ei, type);
 963}
 964
 965static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
 966					enum extent_type type)
 967{
 968	struct extent_tree_info *eti = &sbi->extent_tree[type];
 969	struct extent_tree *et, *next;
 970	struct extent_node *en;
 971	unsigned int node_cnt = 0, tree_cnt = 0;
 972	int remained;
 973
 974	if (!atomic_read(&eti->total_zombie_tree))
 975		goto free_node;
 976
 977	if (!mutex_trylock(&eti->extent_tree_lock))
 978		goto out;
 979
 980	/* 1. remove unreferenced extent tree */
 981	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
 982		if (atomic_read(&et->node_cnt)) {
 983			write_lock(&et->lock);
 984			node_cnt += __free_extent_tree(sbi, et);
 985			write_unlock(&et->lock);
 986		}
 987		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
 988		list_del_init(&et->list);
 989		radix_tree_delete(&eti->extent_tree_root, et->ino);
 990		kmem_cache_free(extent_tree_slab, et);
 991		atomic_dec(&eti->total_ext_tree);
 992		atomic_dec(&eti->total_zombie_tree);
 993		tree_cnt++;
 994
 995		if (node_cnt + tree_cnt >= nr_shrink)
 996			goto unlock_out;
 997		cond_resched();
 998	}
 999	mutex_unlock(&eti->extent_tree_lock);
1000
1001free_node:
1002	/* 2. remove LRU extent entries */
1003	if (!mutex_trylock(&eti->extent_tree_lock))
1004		goto out;
1005
1006	remained = nr_shrink - (node_cnt + tree_cnt);
1007
1008	spin_lock(&eti->extent_lock);
1009	for (; remained > 0; remained--) {
1010		if (list_empty(&eti->extent_list))
1011			break;
1012		en = list_first_entry(&eti->extent_list,
1013					struct extent_node, list);
1014		et = en->et;
1015		if (!write_trylock(&et->lock)) {
1016			/* refresh this extent node's position in extent list */
1017			list_move_tail(&en->list, &eti->extent_list);
1018			continue;
1019		}
1020
1021		list_del_init(&en->list);
1022		spin_unlock(&eti->extent_lock);
1023
1024		__detach_extent_node(sbi, et, en);
1025
1026		write_unlock(&et->lock);
1027		node_cnt++;
1028		spin_lock(&eti->extent_lock);
1029	}
1030	spin_unlock(&eti->extent_lock);
1031
1032unlock_out:
1033	mutex_unlock(&eti->extent_tree_lock);
1034out:
1035	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
1036
1037	return node_cnt + tree_cnt;
1038}
1039
1040/* read extent cache operations */
1041bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
1042				struct extent_info *ei)
1043{
1044	if (!__may_extent_tree(inode, EX_READ))
1045		return false;
1046
1047	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
1048}
1049
1050void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
1051{
1052	return __update_extent_cache(dn, EX_READ);
1053}
1054
1055void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
1056				pgoff_t fofs, block_t blkaddr, unsigned int len)
1057{
1058	struct extent_info ei = {
1059		.fofs = fofs,
1060		.len = len,
1061		.blk = blkaddr,
1062	};
1063
1064	if (!__may_extent_tree(dn->inode, EX_READ))
1065		return;
1066
1067	__update_extent_tree_range(dn->inode, &ei, EX_READ);
1068}
1069
1070unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1071{
1072	if (!test_opt(sbi, READ_EXTENT_CACHE))
1073		return 0;
1074
1075	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1076}
1077
1078/* block age extent cache operations */
1079bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1080				struct extent_info *ei)
1081{
1082	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1083		return false;
1084
1085	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1086}
1087
1088void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1089{
1090	return __update_extent_cache(dn, EX_BLOCK_AGE);
1091}
1092
1093void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1094				pgoff_t fofs, unsigned int len)
1095{
1096	struct extent_info ei = {
1097		.fofs = fofs,
1098		.len = len,
1099	};
1100
1101	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1102		return;
1103
1104	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1105}
1106
1107unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1108{
1109	if (!test_opt(sbi, AGE_EXTENT_CACHE))
1110		return 0;
1111
1112	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1113}
1114
1115static unsigned int __destroy_extent_node(struct inode *inode,
1116					enum extent_type type)
1117{
1118	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1119	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1120	unsigned int node_cnt = 0;
1121
1122	if (!et || !atomic_read(&et->node_cnt))
1123		return 0;
1124
1125	write_lock(&et->lock);
1126	node_cnt = __free_extent_tree(sbi, et);
1127	write_unlock(&et->lock);
1128
1129	return node_cnt;
1130}
1131
1132void f2fs_destroy_extent_node(struct inode *inode)
1133{
1134	__destroy_extent_node(inode, EX_READ);
1135	__destroy_extent_node(inode, EX_BLOCK_AGE);
1136}
1137
1138static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1139{
1140	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1141	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1142	bool updated = false;
1143
1144	if (!__may_extent_tree(inode, type))
1145		return;
1146
1147	write_lock(&et->lock);
1148	__free_extent_tree(sbi, et);
1149	if (type == EX_READ) {
1150		set_inode_flag(inode, FI_NO_EXTENT);
1151		if (et->largest.len) {
1152			et->largest.len = 0;
1153			updated = true;
1154		}
1155	}
1156	write_unlock(&et->lock);
1157	if (updated)
1158		f2fs_mark_inode_dirty_sync(inode, true);
1159}
1160
1161void f2fs_drop_extent_tree(struct inode *inode)
1162{
1163	__drop_extent_tree(inode, EX_READ);
1164	__drop_extent_tree(inode, EX_BLOCK_AGE);
1165}
1166
1167static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1168{
1169	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1170	struct extent_tree_info *eti = &sbi->extent_tree[type];
1171	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1172	unsigned int node_cnt = 0;
1173
1174	if (!et)
1175		return;
1176
1177	if (inode->i_nlink && !is_bad_inode(inode) &&
1178					atomic_read(&et->node_cnt)) {
1179		mutex_lock(&eti->extent_tree_lock);
1180		list_add_tail(&et->list, &eti->zombie_list);
1181		atomic_inc(&eti->total_zombie_tree);
1182		mutex_unlock(&eti->extent_tree_lock);
1183		return;
1184	}
1185
1186	/* free all extent info belong to this extent tree */
1187	node_cnt = __destroy_extent_node(inode, type);
1188
1189	/* delete extent tree entry in radix tree */
1190	mutex_lock(&eti->extent_tree_lock);
1191	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1192	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1193	kmem_cache_free(extent_tree_slab, et);
1194	atomic_dec(&eti->total_ext_tree);
1195	mutex_unlock(&eti->extent_tree_lock);
1196
1197	F2FS_I(inode)->extent_tree[type] = NULL;
1198
1199	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1200}
1201
1202void f2fs_destroy_extent_tree(struct inode *inode)
1203{
1204	__destroy_extent_tree(inode, EX_READ);
1205	__destroy_extent_tree(inode, EX_BLOCK_AGE);
1206}
1207
1208static void __init_extent_tree_info(struct extent_tree_info *eti)
1209{
1210	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1211	mutex_init(&eti->extent_tree_lock);
1212	INIT_LIST_HEAD(&eti->extent_list);
1213	spin_lock_init(&eti->extent_lock);
1214	atomic_set(&eti->total_ext_tree, 0);
1215	INIT_LIST_HEAD(&eti->zombie_list);
1216	atomic_set(&eti->total_zombie_tree, 0);
1217	atomic_set(&eti->total_ext_node, 0);
1218}
1219
1220void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
1221{
1222	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1223	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1224
1225	/* initialize for block age extents */
1226	atomic64_set(&sbi->allocated_data_blocks, 0);
1227	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1228	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1229}
1230
1231int __init f2fs_create_extent_cache(void)
1232{
1233	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1234			sizeof(struct extent_tree));
1235	if (!extent_tree_slab)
1236		return -ENOMEM;
1237	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1238			sizeof(struct extent_node));
1239	if (!extent_node_slab) {
1240		kmem_cache_destroy(extent_tree_slab);
1241		return -ENOMEM;
1242	}
1243	return 0;
1244}
1245
1246void f2fs_destroy_extent_cache(void)
1247{
1248	kmem_cache_destroy(extent_node_slab);
1249	kmem_cache_destroy(extent_tree_slab);
1250}