Linux Audio

Check our new training course

Loading...
v3.5.6
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/slab.h>
  11#include <linux/spinlock.h>
  12#include <linux/completion.h>
  13#include <linux/buffer_head.h>
  14#include <linux/fs.h>
  15#include <linux/gfs2_ondisk.h>
  16#include <linux/prefetch.h>
  17#include <linux/blkdev.h>
  18#include <linux/rbtree.h>
  19
  20#include "gfs2.h"
  21#include "incore.h"
  22#include "glock.h"
  23#include "glops.h"
  24#include "lops.h"
  25#include "meta_io.h"
  26#include "quota.h"
  27#include "rgrp.h"
  28#include "super.h"
  29#include "trans.h"
  30#include "util.h"
  31#include "log.h"
  32#include "inode.h"
  33#include "trace_gfs2.h"
  34
  35#define BFITNOENT ((u32)~0)
  36#define NO_BLOCK ((u64)~0)
  37
  38#if BITS_PER_LONG == 32
  39#define LBITMASK   (0x55555555UL)
  40#define LBITSKIP55 (0x55555555UL)
  41#define LBITSKIP00 (0x00000000UL)
  42#else
  43#define LBITMASK   (0x5555555555555555UL)
  44#define LBITSKIP55 (0x5555555555555555UL)
  45#define LBITSKIP00 (0x0000000000000000UL)
  46#endif
  47
  48/*
  49 * These routines are used by the resource group routines (rgrp.c)
  50 * to keep track of block allocation.  Each block is represented by two
  51 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
  52 *
  53 * 0 = Free
  54 * 1 = Used (not metadata)
  55 * 2 = Unlinked (still in use) inode
  56 * 3 = Used (metadata)
  57 */
  58
  59static const char valid_change[16] = {
  60	        /* current */
  61	/* n */ 0, 1, 1, 1,
  62	/* e */ 1, 0, 0, 0,
  63	/* w */ 0, 0, 0, 1,
  64	        1, 0, 0, 0
  65};
  66
  67static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
  68			unsigned char old_state,
  69			struct gfs2_bitmap **rbi);
  70
  71/**
  72 * gfs2_setbit - Set a bit in the bitmaps
  73 * @rgd: the resource group descriptor
  74 * @buf2: the clone buffer that holds the bitmaps
  75 * @bi: the bitmap structure
  76 * @block: the block to set
  77 * @new_state: the new state of the block
  78 *
  79 */
  80
  81static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
 
  82			       struct gfs2_bitmap *bi, u32 block,
  83			       unsigned char new_state)
  84{
  85	unsigned char *byte1, *byte2, *end, cur_state;
  86	unsigned int buflen = bi->bi_len;
  87	const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
  88
  89	byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY);
  90	end = bi->bi_bh->b_data + bi->bi_offset + buflen;
  91
  92	BUG_ON(byte1 >= end);
  93
  94	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
  95
  96	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
  97		printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
  98		       "new_state=%d\n",
  99		       (unsigned long long)block, cur_state, new_state);
 100		printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
 101		       (unsigned long long)rgd->rd_addr,
 102		       (unsigned long)bi->bi_start);
 103		printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
 104		       (unsigned long)bi->bi_offset,
 105		       (unsigned long)bi->bi_len);
 106		dump_stack();
 107		gfs2_consist_rgrpd(rgd);
 108		return;
 109	}
 110	*byte1 ^= (cur_state ^ new_state) << bit;
 111
 112	if (buf2) {
 113		byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY);
 114		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 115		*byte2 ^= (cur_state ^ new_state) << bit;
 116	}
 117}
 118
 119/**
 120 * gfs2_testbit - test a bit in the bitmaps
 121 * @rgd: the resource group descriptor
 122 * @buffer: the buffer that holds the bitmaps
 123 * @buflen: the length (in bytes) of the buffer
 124 * @block: the block to read
 125 *
 126 */
 127
 128static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
 129					 const unsigned char *buffer,
 130					 unsigned int buflen, u32 block)
 131{
 132	const unsigned char *byte, *end;
 133	unsigned char cur_state;
 134	unsigned int bit;
 135
 136	byte = buffer + (block / GFS2_NBBY);
 137	bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
 138	end = buffer + buflen;
 139
 140	gfs2_assert(rgd->rd_sbd, byte < end);
 141
 142	cur_state = (*byte >> bit) & GFS2_BIT_MASK;
 143
 144	return cur_state;
 145}
 146
 147/**
 148 * gfs2_bit_search
 149 * @ptr: Pointer to bitmap data
 150 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
 151 * @state: The state we are searching for
 152 *
 153 * We xor the bitmap data with a patter which is the bitwise opposite
 154 * of what we are looking for, this gives rise to a pattern of ones
 155 * wherever there is a match. Since we have two bits per entry, we
 156 * take this pattern, shift it down by one place and then and it with
 157 * the original. All the even bit positions (0,2,4, etc) then represent
 158 * successful matches, so we mask with 0x55555..... to remove the unwanted
 159 * odd bit positions.
 160 *
 161 * This allows searching of a whole u64 at once (32 blocks) with a
 162 * single test (on 64 bit arches).
 163 */
 164
 165static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 166{
 167	u64 tmp;
 168	static const u64 search[] = {
 169		[0] = 0xffffffffffffffffULL,
 170		[1] = 0xaaaaaaaaaaaaaaaaULL,
 171		[2] = 0x5555555555555555ULL,
 172		[3] = 0x0000000000000000ULL,
 173	};
 174	tmp = le64_to_cpu(*ptr) ^ search[state];
 175	tmp &= (tmp >> 1);
 176	tmp &= mask;
 177	return tmp;
 178}
 179
 180/**
 181 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 182 *       a block in a given allocation state.
 183 * @buf: the buffer that holds the bitmaps
 184 * @len: the length (in bytes) of the buffer
 185 * @goal: start search at this block's bit-pair (within @buffer)
 186 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
 187 *
 188 * Scope of @goal and returned block number is only within this bitmap buffer,
 189 * not entire rgrp or filesystem.  @buffer will be offset from the actual
 190 * beginning of a bitmap block buffer, skipping any header structures, but
 191 * headers are always a multiple of 64 bits long so that the buffer is
 192 * always aligned to a 64 bit boundary.
 193 *
 194 * The size of the buffer is in bytes, but is it assumed that it is
 195 * always ok to read a complete multiple of 64 bits at the end
 196 * of the block in case the end is no aligned to a natural boundary.
 197 *
 198 * Return: the block number (bitmap buffer scope) that was found
 199 */
 200
 201static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 202		       u32 goal, u8 state)
 203{
 204	u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
 205	const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
 206	const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
 207	u64 tmp;
 208	u64 mask = 0x5555555555555555ULL;
 209	u32 bit;
 210
 211	BUG_ON(state > 3);
 212
 213	/* Mask off bits we don't care about at the start of the search */
 214	mask <<= spoint;
 215	tmp = gfs2_bit_search(ptr, mask, state);
 216	ptr++;
 217	while(tmp == 0 && ptr < end) {
 218		tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
 219		ptr++;
 220	}
 221	/* Mask off any bits which are more than len bytes from the start */
 222	if (ptr == end && (len & (sizeof(u64) - 1)))
 223		tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
 224	/* Didn't find anything, so return */
 225	if (tmp == 0)
 226		return BFITNOENT;
 227	ptr--;
 228	bit = __ffs64(tmp);
 229	bit /= 2;	/* two bits per entry in the bitmap */
 230	return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 231}
 232
 233/**
 234 * gfs2_bitcount - count the number of bits in a certain state
 235 * @rgd: the resource group descriptor
 236 * @buffer: the buffer that holds the bitmaps
 237 * @buflen: the length (in bytes) of the buffer
 238 * @state: the state of the block we're looking for
 239 *
 240 * Returns: The number of bits
 241 */
 242
 243static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
 244			 unsigned int buflen, u8 state)
 245{
 246	const u8 *byte = buffer;
 247	const u8 *end = buffer + buflen;
 248	const u8 state1 = state << 2;
 249	const u8 state2 = state << 4;
 250	const u8 state3 = state << 6;
 251	u32 count = 0;
 252
 253	for (; byte < end; byte++) {
 254		if (((*byte) & 0x03) == state)
 255			count++;
 256		if (((*byte) & 0x0C) == state1)
 257			count++;
 258		if (((*byte) & 0x30) == state2)
 259			count++;
 260		if (((*byte) & 0xC0) == state3)
 261			count++;
 262	}
 263
 264	return count;
 265}
 266
 267/**
 268 * gfs2_rgrp_verify - Verify that a resource group is consistent
 
 269 * @rgd: the rgrp
 270 *
 271 */
 272
 273void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 274{
 275	struct gfs2_sbd *sdp = rgd->rd_sbd;
 276	struct gfs2_bitmap *bi = NULL;
 277	u32 length = rgd->rd_length;
 278	u32 count[4], tmp;
 279	int buf, x;
 280
 281	memset(count, 0, 4 * sizeof(u32));
 282
 283	/* Count # blocks in each of 4 possible allocation states */
 284	for (buf = 0; buf < length; buf++) {
 285		bi = rgd->rd_bits + buf;
 286		for (x = 0; x < 4; x++)
 287			count[x] += gfs2_bitcount(rgd,
 288						  bi->bi_bh->b_data +
 289						  bi->bi_offset,
 290						  bi->bi_len, x);
 291	}
 292
 293	if (count[0] != rgd->rd_free) {
 294		if (gfs2_consist_rgrpd(rgd))
 295			fs_err(sdp, "free data mismatch:  %u != %u\n",
 296			       count[0], rgd->rd_free);
 297		return;
 298	}
 299
 300	tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 301	if (count[1] != tmp) {
 302		if (gfs2_consist_rgrpd(rgd))
 303			fs_err(sdp, "used data mismatch:  %u != %u\n",
 304			       count[1], tmp);
 305		return;
 306	}
 307
 308	if (count[2] + count[3] != rgd->rd_dinodes) {
 309		if (gfs2_consist_rgrpd(rgd))
 310			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
 311			       count[2] + count[3], rgd->rd_dinodes);
 312		return;
 313	}
 314}
 315
 316static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 317{
 318	u64 first = rgd->rd_data0;
 319	u64 last = first + rgd->rd_data;
 320	return first <= block && block < last;
 321}
 322
 323/**
 324 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
 325 * @sdp: The GFS2 superblock
 326 * @blk: The data block number
 327 * @exact: True if this needs to be an exact match
 328 *
 329 * Returns: The resource group, or NULL if not found
 330 */
 331
 332struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
 333{
 334	struct rb_node *n, *next;
 335	struct gfs2_rgrpd *cur;
 336
 337	spin_lock(&sdp->sd_rindex_spin);
 338	n = sdp->sd_rindex_tree.rb_node;
 339	while (n) {
 340		cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
 341		next = NULL;
 342		if (blk < cur->rd_addr)
 343			next = n->rb_left;
 344		else if (blk >= cur->rd_data0 + cur->rd_data)
 345			next = n->rb_right;
 346		if (next == NULL) {
 347			spin_unlock(&sdp->sd_rindex_spin);
 348			if (exact) {
 349				if (blk < cur->rd_addr)
 350					return NULL;
 351				if (blk >= cur->rd_data0 + cur->rd_data)
 352					return NULL;
 353			}
 354			return cur;
 355		}
 356		n = next;
 357	}
 
 358	spin_unlock(&sdp->sd_rindex_spin);
 359
 360	return NULL;
 361}
 362
 363/**
 364 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
 365 * @sdp: The GFS2 superblock
 366 *
 367 * Returns: The first rgrp in the filesystem
 368 */
 369
 370struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
 371{
 372	const struct rb_node *n;
 373	struct gfs2_rgrpd *rgd;
 374
 375	spin_lock(&sdp->sd_rindex_spin);
 376	n = rb_first(&sdp->sd_rindex_tree);
 377	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 378	spin_unlock(&sdp->sd_rindex_spin);
 379
 380	return rgd;
 381}
 382
 383/**
 384 * gfs2_rgrpd_get_next - get the next RG
 385 * @rgd: the resource group descriptor
 386 *
 387 * Returns: The next rgrp
 388 */
 389
 390struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
 391{
 392	struct gfs2_sbd *sdp = rgd->rd_sbd;
 393	const struct rb_node *n;
 394
 395	spin_lock(&sdp->sd_rindex_spin);
 396	n = rb_next(&rgd->rd_node);
 397	if (n == NULL)
 398		n = rb_first(&sdp->sd_rindex_tree);
 399
 400	if (unlikely(&rgd->rd_node == n)) {
 401		spin_unlock(&sdp->sd_rindex_spin);
 402		return NULL;
 403	}
 404	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 405	spin_unlock(&sdp->sd_rindex_spin);
 406	return rgd;
 407}
 408
 409void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 410{
 411	int x;
 412
 413	for (x = 0; x < rgd->rd_length; x++) {
 414		struct gfs2_bitmap *bi = rgd->rd_bits + x;
 415		kfree(bi->bi_clone);
 416		bi->bi_clone = NULL;
 417	}
 418}
 419
 420void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 421{
 422	struct rb_node *n;
 423	struct gfs2_rgrpd *rgd;
 424	struct gfs2_glock *gl;
 425
 426	while ((n = rb_first(&sdp->sd_rindex_tree))) {
 427		rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
 
 
 
 
 
 428		gl = rgd->rd_gl;
 429
 430		rb_erase(n, &sdp->sd_rindex_tree);
 
 431
 432		if (gl) {
 433			spin_lock(&gl->gl_spin);
 434			gl->gl_object = NULL;
 435			spin_unlock(&gl->gl_spin);
 436			gfs2_glock_add_to_lru(gl);
 437			gfs2_glock_put(gl);
 438		}
 439
 440		gfs2_free_clones(rgd);
 441		kfree(rgd->rd_bits);
 442		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 443	}
 444}
 445
 
 
 
 
 
 
 
 446static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
 447{
 448	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
 449	printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
 450	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
 451	printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
 452	printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
 453}
 454
 455/**
 456 * gfs2_compute_bitstructs - Compute the bitmap sizes
 457 * @rgd: The resource group descriptor
 458 *
 459 * Calculates bitmap descriptors, one for each block that contains bitmap data
 460 *
 461 * Returns: errno
 462 */
 463
 464static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 465{
 466	struct gfs2_sbd *sdp = rgd->rd_sbd;
 467	struct gfs2_bitmap *bi;
 468	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 469	u32 bytes_left, bytes;
 470	int x;
 471
 472	if (!length)
 473		return -EINVAL;
 474
 475	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
 476	if (!rgd->rd_bits)
 477		return -ENOMEM;
 478
 479	bytes_left = rgd->rd_bitbytes;
 480
 481	for (x = 0; x < length; x++) {
 482		bi = rgd->rd_bits + x;
 483
 484		bi->bi_flags = 0;
 485		/* small rgrp; bitmap stored completely in header block */
 486		if (length == 1) {
 487			bytes = bytes_left;
 488			bi->bi_offset = sizeof(struct gfs2_rgrp);
 489			bi->bi_start = 0;
 490			bi->bi_len = bytes;
 491		/* header block */
 492		} else if (x == 0) {
 493			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
 494			bi->bi_offset = sizeof(struct gfs2_rgrp);
 495			bi->bi_start = 0;
 496			bi->bi_len = bytes;
 497		/* last block */
 498		} else if (x + 1 == length) {
 499			bytes = bytes_left;
 500			bi->bi_offset = sizeof(struct gfs2_meta_header);
 501			bi->bi_start = rgd->rd_bitbytes - bytes_left;
 502			bi->bi_len = bytes;
 503		/* other blocks */
 504		} else {
 505			bytes = sdp->sd_sb.sb_bsize -
 506				sizeof(struct gfs2_meta_header);
 507			bi->bi_offset = sizeof(struct gfs2_meta_header);
 508			bi->bi_start = rgd->rd_bitbytes - bytes_left;
 509			bi->bi_len = bytes;
 510		}
 511
 512		bytes_left -= bytes;
 513	}
 514
 515	if (bytes_left) {
 516		gfs2_consist_rgrpd(rgd);
 517		return -EIO;
 518	}
 519	bi = rgd->rd_bits + (length - 1);
 520	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
 521		if (gfs2_consist_rgrpd(rgd)) {
 522			gfs2_rindex_print(rgd);
 523			fs_err(sdp, "start=%u len=%u offset=%u\n",
 524			       bi->bi_start, bi->bi_len, bi->bi_offset);
 525		}
 526		return -EIO;
 527	}
 528
 529	return 0;
 530}
 531
 532/**
 533 * gfs2_ri_total - Total up the file system space, according to the rindex.
 534 * @sdp: the filesystem
 535 *
 536 */
 537u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 538{
 539	u64 total_data = 0;	
 540	struct inode *inode = sdp->sd_rindex;
 541	struct gfs2_inode *ip = GFS2_I(inode);
 542	char buf[sizeof(struct gfs2_rindex)];
 
 543	int error, rgrps;
 544
 
 
 545	for (rgrps = 0;; rgrps++) {
 546		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 547
 548		if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 549			break;
 550		error = gfs2_internal_read(ip, buf, &pos,
 551					   sizeof(struct gfs2_rindex));
 552		if (error != sizeof(struct gfs2_rindex))
 553			break;
 554		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 555	}
 
 556	return total_data;
 557}
 558
 559static int rgd_insert(struct gfs2_rgrpd *rgd)
 560{
 561	struct gfs2_sbd *sdp = rgd->rd_sbd;
 562	struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
 563
 564	/* Figure out where to put new node */
 565	while (*newn) {
 566		struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
 567						  rd_node);
 568
 569		parent = *newn;
 570		if (rgd->rd_addr < cur->rd_addr)
 571			newn = &((*newn)->rb_left);
 572		else if (rgd->rd_addr > cur->rd_addr)
 573			newn = &((*newn)->rb_right);
 574		else
 575			return -EEXIST;
 576	}
 577
 578	rb_link_node(&rgd->rd_node, parent, newn);
 579	rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
 580	sdp->sd_rgrps++;
 581	return 0;
 582}
 583
 584/**
 585 * read_rindex_entry - Pull in a new resource index entry from the disk
 586 * @ip: Pointer to the rindex inode
 587 *
 588 * Returns: 0 on success, > 0 on EOF, error code otherwise
 589 */
 590
 591static int read_rindex_entry(struct gfs2_inode *ip)
 
 592{
 593	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 594	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
 595	struct gfs2_rindex buf;
 596	int error;
 597	struct gfs2_rgrpd *rgd;
 598
 599	if (pos >= i_size_read(&ip->i_inode))
 600		return 1;
 601
 602	error = gfs2_internal_read(ip, (char *)&buf, &pos,
 603				   sizeof(struct gfs2_rindex));
 604
 605	if (error != sizeof(struct gfs2_rindex))
 606		return (error == 0) ? 1 : error;
 
 
 
 
 607
 608	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
 609	error = -ENOMEM;
 610	if (!rgd)
 611		return error;
 612
 
 
 613	rgd->rd_sbd = sdp;
 614	rgd->rd_addr = be64_to_cpu(buf.ri_addr);
 615	rgd->rd_length = be32_to_cpu(buf.ri_length);
 616	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 617	rgd->rd_data = be32_to_cpu(buf.ri_data);
 618	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 619
 
 
 
 
 620	error = compute_bitstructs(rgd);
 621	if (error)
 622		goto fail;
 623
 624	error = gfs2_glock_get(sdp, rgd->rd_addr,
 625			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 626	if (error)
 627		goto fail;
 628
 629	rgd->rd_gl->gl_object = rgd;
 630	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
 631	if (rgd->rd_data > sdp->sd_max_rg_data)
 632		sdp->sd_max_rg_data = rgd->rd_data;
 633	spin_lock(&sdp->sd_rindex_spin);
 634	error = rgd_insert(rgd);
 635	spin_unlock(&sdp->sd_rindex_spin);
 636	if (!error)
 637		return 0;
 638
 639	error = 0; /* someone else read in the rgrp; free it and ignore it */
 640	gfs2_glock_put(rgd->rd_gl);
 641
 642fail:
 643	kfree(rgd->rd_bits);
 644	kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 645	return error;
 646}
 647
 648/**
 649 * gfs2_ri_update - Pull in a new resource index from the disk
 650 * @ip: pointer to the rindex inode
 651 *
 652 * Returns: 0 on successful update, error code otherwise
 653 */
 654
 655static int gfs2_ri_update(struct gfs2_inode *ip)
 656{
 657	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
 
 
 
 
 658	int error;
 659
 660	do {
 661		error = read_rindex_entry(ip);
 662	} while (error == 0);
 663
 664	if (error < 0)
 665		return error;
 
 
 
 
 
 
 666
 
 
 
 
 667	sdp->sd_rindex_uptodate = 1;
 668	return 0;
 669}
 670
 671/**
 672 * gfs2_rindex_update - Update the rindex if required
 673 * @sdp: The GFS2 superblock
 
 674 *
 675 * We grab a lock on the rindex inode to make sure that it doesn't
 676 * change whilst we are performing an operation. We keep this lock
 677 * for quite long periods of time compared to other locks. This
 678 * doesn't matter, since it is shared and it is very, very rarely
 679 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
 680 *
 681 * This makes sure that we're using the latest copy of the resource index
 682 * special file, which might have been updated if someone expanded the
 683 * filesystem (via gfs2_grow utility), which adds new resource groups.
 684 *
 685 * Returns: 0 on succeess, error code otherwise
 686 */
 687
 688int gfs2_rindex_update(struct gfs2_sbd *sdp)
 689{
 690	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
 691	struct gfs2_glock *gl = ip->i_gl;
 692	struct gfs2_holder ri_gh;
 693	int error = 0;
 694	int unlock_required = 0;
 
 
 695
 696	/* Read new copy from disk if we don't have the latest */
 697	if (!sdp->sd_rindex_uptodate) {
 698		if (!gfs2_glock_is_locked_by_me(gl)) {
 699			error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
 
 700			if (error)
 701				return error;
 702			unlock_required = 1;
 703		}
 704		if (!sdp->sd_rindex_uptodate)
 705			error = gfs2_ri_update(ip);
 706		if (unlock_required)
 707			gfs2_glock_dq_uninit(&ri_gh);
 708	}
 709
 710	return error;
 711}
 712
 713static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 714{
 715	const struct gfs2_rgrp *str = buf;
 716	u32 rg_flags;
 717
 718	rg_flags = be32_to_cpu(str->rg_flags);
 719	rg_flags &= ~GFS2_RDF_MASK;
 720	rgd->rd_flags &= GFS2_RDF_MASK;
 721	rgd->rd_flags |= rg_flags;
 722	rgd->rd_free = be32_to_cpu(str->rg_free);
 723	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
 724	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
 725}
 726
 727static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 728{
 729	struct gfs2_rgrp *str = buf;
 730
 731	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
 732	str->rg_free = cpu_to_be32(rgd->rd_free);
 733	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
 734	str->__pad = cpu_to_be32(0);
 735	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
 736	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 737}
 738
 739/**
 740 * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps
 741 * @gh: The glock holder for the resource group
 742 *
 743 * Read in all of a Resource Group's header and bitmap blocks.
 744 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
 745 *
 746 * Returns: errno
 747 */
 748
 749int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
 750{
 751	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
 752	struct gfs2_sbd *sdp = rgd->rd_sbd;
 753	struct gfs2_glock *gl = rgd->rd_gl;
 754	unsigned int length = rgd->rd_length;
 755	struct gfs2_bitmap *bi;
 756	unsigned int x, y;
 757	int error;
 758
 
 
 
 
 
 
 
 
 
 
 
 759	for (x = 0; x < length; x++) {
 760		bi = rgd->rd_bits + x;
 761		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
 762		if (error)
 763			goto fail;
 764	}
 765
 766	for (y = length; y--;) {
 767		bi = rgd->rd_bits + y;
 768		error = gfs2_meta_wait(sdp, bi->bi_bh);
 769		if (error)
 770			goto fail;
 771		if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
 772					      GFS2_METATYPE_RG)) {
 773			error = -EIO;
 774			goto fail;
 775		}
 776	}
 777
 778	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
 779		for (x = 0; x < length; x++)
 780			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
 781		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
 782		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 783		rgd->rd_free_clone = rgd->rd_free;
 784	}
 785
 
 
 
 
 
 
 
 786	return 0;
 787
 788fail:
 789	while (x--) {
 790		bi = rgd->rd_bits + x;
 791		brelse(bi->bi_bh);
 792		bi->bi_bh = NULL;
 793		gfs2_assert_warn(sdp, !bi->bi_clone);
 794	}
 
 795
 796	return error;
 797}
 798
 
 
 
 
 
 
 
 
 
 
 799/**
 800 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
 801 * @gh: The glock holder for the resource group
 802 *
 803 */
 804
 805void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
 806{
 807	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
 808	int x, length = rgd->rd_length;
 809
 
 
 
 
 
 
 
 810	for (x = 0; x < length; x++) {
 811		struct gfs2_bitmap *bi = rgd->rd_bits + x;
 
 
 812		brelse(bi->bi_bh);
 813		bi->bi_bh = NULL;
 814	}
 815
 
 816}
 817
 818int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 819			     struct buffer_head *bh,
 820			     const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
 821{
 822	struct super_block *sb = sdp->sd_vfs;
 823	struct block_device *bdev = sb->s_bdev;
 824	const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
 825					   bdev_logical_block_size(sb->s_bdev);
 826	u64 blk;
 827	sector_t start = 0;
 828	sector_t nr_sects = 0;
 829	int rv;
 830	unsigned int x;
 831	u32 trimmed = 0;
 832	u8 diff;
 833
 834	for (x = 0; x < bi->bi_len; x++) {
 835		const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
 836		clone += bi->bi_offset;
 837		clone += x;
 838		if (bh) {
 839			const u8 *orig = bh->b_data + bi->bi_offset + x;
 840			diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
 841		} else {
 842			diff = ~(*clone | (*clone >> 1));
 843		}
 844		diff &= 0x55;
 845		if (diff == 0)
 846			continue;
 847		blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
 848		blk *= sects_per_blk; /* convert to sectors */
 849		while(diff) {
 850			if (diff & 1) {
 851				if (nr_sects == 0)
 852					goto start_new_extent;
 853				if ((start + nr_sects) != blk) {
 854					if (nr_sects >= minlen) {
 855						rv = blkdev_issue_discard(bdev,
 856							start, nr_sects,
 857							GFP_NOFS, 0);
 858						if (rv)
 859							goto fail;
 860						trimmed += nr_sects;
 861					}
 862					nr_sects = 0;
 863start_new_extent:
 864					start = blk;
 865				}
 866				nr_sects += sects_per_blk;
 867			}
 868			diff >>= 2;
 869			blk += sects_per_blk;
 870		}
 871	}
 872	if (nr_sects >= minlen) {
 873		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
 874		if (rv)
 875			goto fail;
 876		trimmed += nr_sects;
 877	}
 878	if (ptrimmed)
 879		*ptrimmed = trimmed;
 880	return 0;
 881
 882fail:
 883	if (sdp->sd_args.ar_discard)
 884		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
 885	sdp->sd_args.ar_discard = 0;
 886	return -EIO;
 887}
 888
 889/**
 890 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
 891 * @filp: Any file on the filesystem
 892 * @argp: Pointer to the arguments (also used to pass result)
 893 *
 894 * Returns: 0 on success, otherwise error code
 895 */
 896
 897int gfs2_fitrim(struct file *filp, void __user *argp)
 898{
 899	struct inode *inode = filp->f_dentry->d_inode;
 900	struct gfs2_sbd *sdp = GFS2_SB(inode);
 901	struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
 902	struct buffer_head *bh;
 903	struct gfs2_rgrpd *rgd;
 904	struct gfs2_rgrpd *rgd_end;
 905	struct gfs2_holder gh;
 906	struct fstrim_range r;
 907	int ret = 0;
 908	u64 amt;
 909	u64 trimmed = 0;
 910	unsigned int x;
 911
 912	if (!capable(CAP_SYS_ADMIN))
 913		return -EPERM;
 914
 915	if (!blk_queue_discard(q))
 916		return -EOPNOTSUPP;
 917
 918	if (argp == NULL) {
 919		r.start = 0;
 920		r.len = ULLONG_MAX;
 921		r.minlen = 0;
 922	} else if (copy_from_user(&r, argp, sizeof(r)))
 923		return -EFAULT;
 924
 925	ret = gfs2_rindex_update(sdp);
 926	if (ret)
 927		return ret;
 928
 929	rgd = gfs2_blk2rgrpd(sdp, r.start, 0);
 930	rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0);
 931
 932	while (1) {
 933
 934		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
 935		if (ret)
 936			goto out;
 937
 938		if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
 939			/* Trim each bitmap in the rgrp */
 940			for (x = 0; x < rgd->rd_length; x++) {
 941				struct gfs2_bitmap *bi = rgd->rd_bits + x;
 942				ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt);
 943				if (ret) {
 944					gfs2_glock_dq_uninit(&gh);
 945					goto out;
 946				}
 947				trimmed += amt;
 948			}
 949
 950			/* Mark rgrp as having been trimmed */
 951			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
 952			if (ret == 0) {
 953				bh = rgd->rd_bits[0].bi_bh;
 954				rgd->rd_flags |= GFS2_RGF_TRIMMED;
 955				gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
 956				gfs2_rgrp_out(rgd, bh->b_data);
 957				gfs2_trans_end(sdp);
 958			}
 959		}
 960		gfs2_glock_dq_uninit(&gh);
 961
 962		if (rgd == rgd_end)
 963			break;
 964
 965		rgd = gfs2_rgrpd_get_next(rgd);
 966	}
 967
 968out:
 969	r.len = trimmed << 9;
 970	if (argp && copy_to_user(argp, &r, sizeof(r)))
 971		return -EFAULT;
 972
 973	return ret;
 974}
 975
 976/**
 977 * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode
 978 * @ip: the incore GFS2 inode structure
 979 *
 980 * Returns: the struct gfs2_qadata
 981 */
 982
 983struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
 984{
 985	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 986	int error;
 987	BUG_ON(ip->i_qadata != NULL);
 988	ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS);
 989	error = gfs2_rindex_update(sdp);
 990	if (error)
 991		fs_warn(sdp, "rindex update returns %d\n", error);
 992	return ip->i_qadata;
 993}
 994
 995/**
 996 * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode
 997 * @ip: the incore GFS2 inode structure
 998 *
 999 * Returns: the struct gfs2_qadata
1000 */
1001
1002static int gfs2_blkrsv_get(struct gfs2_inode *ip)
1003{
1004	BUG_ON(ip->i_res != NULL);
1005	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
1006	if (!ip->i_res)
1007		return -ENOMEM;
1008	return 0;
1009}
1010
1011/**
1012 * try_rgrp_fit - See if a given reservation will fit in a given RG
1013 * @rgd: the RG data
1014 * @ip: the inode
1015 *
1016 * If there's room for the requested blocks to be allocated from the RG:
 
1017 *
1018 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
1019 */
1020
1021static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip)
1022{
1023	const struct gfs2_blkreserv *rs = ip->i_res;
 
1024
1025	if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
1026		return 0;
1027	if (rgd->rd_free_clone >= rs->rs_requested)
1028		return 1;
1029	return 0;
1030}
1031
1032static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
1033{
1034	return (bi->bi_start * GFS2_NBBY) + blk;
 
 
 
 
 
1035}
1036
1037/**
1038 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
1039 * @rgd: The rgrp
1040 * @last_unlinked: block address of the last dinode we unlinked
1041 * @skip: block address we should explicitly not unlink
1042 *
1043 * Returns: 0 if no error
1044 *          The inode, if one has been found, in inode.
1045 */
1046
1047static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1048{
1049	u32 goal = 0, block;
1050	u64 no_addr;
1051	struct gfs2_sbd *sdp = rgd->rd_sbd;
 
1052	struct gfs2_glock *gl;
1053	struct gfs2_inode *ip;
1054	int error;
1055	int found = 0;
1056	struct gfs2_bitmap *bi;
1057
1058	while (goal < rgd->rd_data) {
1059		down_write(&sdp->sd_log_flush_lock);
1060		block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi);
 
 
1061		up_write(&sdp->sd_log_flush_lock);
1062		if (block == BFITNOENT)
1063			break;
1064
1065		block = gfs2_bi2rgd_blk(bi, block);
1066		/* rgblk_search can return a block < goal, so we need to
1067		   keep it marching forward. */
1068		no_addr = block + rgd->rd_data0;
1069		goal = max(block + 1, goal + 1);
1070		if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
1071			continue;
1072		if (no_addr == skip)
1073			continue;
1074		*last_unlinked = no_addr;
1075
1076		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
1077		if (error)
1078			continue;
1079
1080		/* If the inode is already in cache, we can ignore it here
1081		 * because the existing inode disposal code will deal with
1082		 * it when all refs have gone away. Accessing gl_object like
1083		 * this is not safe in general. Here it is ok because we do
1084		 * not dereference the pointer, and we only need an approx
1085		 * answer to whether it is NULL or not.
1086		 */
1087		ip = gl->gl_object;
1088
1089		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1090			gfs2_glock_put(gl);
1091		else
1092			found++;
1093
1094		/* Limit reclaim to sensible number of tasks */
1095		if (found > NR_CPUS)
1096			return;
1097	}
1098
1099	rgd->rd_flags &= ~GFS2_RDF_CHECK;
1100	return;
1101}
1102
1103/**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1104 * get_local_rgrp - Choose and lock a rgrp for allocation
1105 * @ip: the inode to reserve space for
1106 * @last_unlinked: the last unlinked block
1107 *
1108 * Try to acquire rgrp in way which avoids contending with others.
1109 *
1110 * Returns: errno
1111 */
1112
1113static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1114{
1115	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1116	struct gfs2_rgrpd *rgd, *begin = NULL;
1117	struct gfs2_blkreserv *rs = ip->i_res;
1118	int error, rg_locked, flags = LM_FLAG_TRY;
 
1119	int loops = 0;
 
1120
1121	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
1122		rgd = begin = ip->i_rgd;
1123	else
1124		rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1125
1126	if (rgd == NULL)
1127		return -EBADSLT;
1128
1129	while (loops < 3) {
1130		rg_locked = 0;
1131
1132		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
1133			rg_locked = 1;
1134			error = 0;
1135		} else {
1136			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1137						   flags, &rs->rs_rgd_gh);
1138		}
1139		switch (error) {
1140		case 0:
1141			if (try_rgrp_fit(rgd, ip)) {
1142				ip->i_rgd = rgd;
1143				return 0;
1144			}
1145			if (rgd->rd_flags & GFS2_RDF_CHECK)
1146				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1147			if (!rg_locked)
1148				gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1149			/* fall through */
1150		case GLR_TRYFAILED:
1151			rgd = gfs2_rgrpd_get_next(rgd);
1152			if (rgd == begin) {
1153				flags = 0;
1154				loops++;
1155			}
1156			break;
 
1157		default:
1158			return error;
1159		}
1160	}
1161
1162	return -ENOSPC;
1163}
1164
1165static void gfs2_blkrsv_put(struct gfs2_inode *ip)
1166{
1167	BUG_ON(ip->i_res == NULL);
1168	kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
1169	ip->i_res = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1170}
1171
1172/**
1173 * gfs2_inplace_reserve - Reserve space in the filesystem
1174 * @ip: the inode to reserve space for
1175 * @requested: the number of blocks to be reserved
1176 *
1177 * Returns: errno
1178 */
1179
1180int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 
1181{
1182	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1183	struct gfs2_blkreserv *rs;
1184	int error;
1185	u64 last_unlinked = NO_BLOCK;
1186	int tries = 0;
1187
1188	error = gfs2_blkrsv_get(ip);
1189	if (error)
1190		return error;
1191
1192	rs = ip->i_res;
1193	rs->rs_requested = requested;
1194	if (gfs2_assert_warn(sdp, requested)) {
1195		error = -EINVAL;
1196		goto out;
 
 
 
 
 
1197	}
1198
 
1199	do {
1200		error = get_local_rgrp(ip, &last_unlinked);
1201		if (error != -ENOSPC)
1202			break;
1203		/* Check that fs hasn't grown if writing to rindex */
1204		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
1205			error = gfs2_ri_update(ip);
1206			if (error)
1207				break;
1208			continue;
 
 
1209		}
1210		/* Flushing the log may release space */
1211		gfs2_log_flush(sdp, NULL);
1212	} while (tries++ < 3);
1213
1214out:
1215	if (error)
1216		gfs2_blkrsv_put(ip);
1217	return error;
 
 
 
 
 
 
 
1218}
1219
1220/**
1221 * gfs2_inplace_release - release an inplace reservation
1222 * @ip: the inode the reservation was taken out on
1223 *
1224 * Release a reservation made by gfs2_inplace_reserve().
1225 */
1226
1227void gfs2_inplace_release(struct gfs2_inode *ip)
1228{
1229	struct gfs2_blkreserv *rs = ip->i_res;
 
1230
1231	if (rs->rs_rgd_gh.gh_gl)
1232		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1233	gfs2_blkrsv_put(ip);
 
 
 
 
 
 
 
 
1234}
1235
1236/**
1237 * gfs2_get_block_type - Check a block in a RG is of given type
1238 * @rgd: the resource group holding the block
1239 * @block: the block number
1240 *
1241 * Returns: The block type (GFS2_BLKST_*)
1242 */
1243
1244static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1245{
1246	struct gfs2_bitmap *bi = NULL;
1247	u32 length, rgrp_block, buf_block;
1248	unsigned int buf;
1249	unsigned char type;
1250
1251	length = rgd->rd_length;
1252	rgrp_block = block - rgd->rd_data0;
1253
1254	for (buf = 0; buf < length; buf++) {
1255		bi = rgd->rd_bits + buf;
1256		if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1257			break;
1258	}
1259
1260	gfs2_assert(rgd->rd_sbd, buf < length);
1261	buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
1262
1263	type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1264			   bi->bi_len, buf_block);
1265
1266	return type;
1267}
1268
1269/**
1270 * rgblk_search - find a block in @state
 
1271 * @rgd: the resource group descriptor
1272 * @goal: the goal block within the RG (start here to search for avail block)
1273 * @state: GFS2_BLKST_XXX the before-allocation state to find
1274 * @rbi: address of the pointer to the bitmap containing the block found
 
1275 *
1276 * Walk rgrp's bitmap to find bits that represent a block in @state.
 
 
1277 *
1278 * This function never fails, because we wouldn't call it unless we
1279 * know (from reservation results, etc.) that a block is available.
1280 *
1281 * Scope of @goal is just within rgrp, not the whole filesystem.
1282 * Scope of @returned block is just within bitmap, not the whole filesystem.
1283 *
1284 * Returns: the block number found relative to the bitmap rbi
1285 */
1286
1287static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state,
1288			struct gfs2_bitmap **rbi)
 
1289{
1290	struct gfs2_bitmap *bi = NULL;
1291	const u32 length = rgd->rd_length;
1292	u32 biblk = BFITNOENT;
1293	unsigned int buf, x;
 
1294	const u8 *buffer = NULL;
1295
1296	*rbi = NULL;
1297	/* Find bitmap block that contains bits for goal block */
1298	for (buf = 0; buf < length; buf++) {
1299		bi = rgd->rd_bits + buf;
1300		/* Convert scope of "goal" from rgrp-wide to within found bit block */
1301		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
1302			goal -= bi->bi_start * GFS2_NBBY;
1303			goto do_search;
1304		}
1305	}
1306	buf = 0;
1307	goal = 0;
1308
1309do_search:
1310	/* Search (up to entire) bitmap in this rgrp for allocatable block.
1311	   "x <= length", instead of "x < length", because we typically start
1312	   the search in the middle of a bit block, but if we can't find an
1313	   allocatable block anywhere else, we want to be able wrap around and
1314	   search in the first part of our first-searched bit block.  */
1315	for (x = 0; x <= length; x++) {
1316		bi = rgd->rd_bits + buf;
1317
1318		if (test_bit(GBF_FULL, &bi->bi_flags) &&
1319		    (state == GFS2_BLKST_FREE))
1320			goto skip;
1321
1322		/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1323		   bitmaps, so we must search the originals for that. */
1324		buffer = bi->bi_bh->b_data + bi->bi_offset;
1325		WARN_ON(!buffer_uptodate(bi->bi_bh));
1326		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1327			buffer = bi->bi_clone + bi->bi_offset;
1328
1329		biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
1330		if (biblk != BFITNOENT)
1331			break;
1332
1333		if ((goal == 0) && (state == GFS2_BLKST_FREE))
1334			set_bit(GBF_FULL, &bi->bi_flags);
1335
1336		/* Try next bitmap block (wrap back to rgrp header if at end) */
1337skip:
1338		buf++;
1339		buf %= length;
1340		goal = 0;
1341	}
1342
1343	if (biblk != BFITNOENT)
1344		*rbi = bi;
1345
1346	return biblk;
1347}
1348
1349/**
1350 * gfs2_alloc_extent - allocate an extent from a given bitmap
1351 * @rgd: the resource group descriptor
1352 * @bi: the bitmap within the rgrp
1353 * @blk: the block within the bitmap
1354 * @dinode: TRUE if the first block we allocate is for a dinode
1355 * @n: The extent length
1356 *
1357 * Add the found bitmap buffer to the transaction.
1358 * Set the found bits to @new_state to change block's allocation state.
1359 * Returns: starting block number of the extent (fs scope)
1360 */
1361static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1362			     u32 blk, bool dinode, unsigned int *n)
1363{
1364	const unsigned int elen = *n;
1365	u32 goal;
1366	const u8 *buffer = NULL;
1367
1368	*n = 0;
1369	buffer = bi->bi_bh->b_data + bi->bi_offset;
1370	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1371	gfs2_setbit(rgd, bi->bi_clone, bi, blk,
1372		    dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1373	(*n)++;
1374	goal = blk;
1375	while (*n < elen) {
1376		goal++;
1377		if (goal >= (bi->bi_len * GFS2_NBBY))
1378			break;
1379		if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1380		    GFS2_BLKST_FREE)
1381			break;
1382		gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED);
 
1383		(*n)++;
1384	}
1385	blk = gfs2_bi2rgd_blk(bi, blk);
1386	rgd->rd_last_alloc = blk + *n - 1;
1387	return rgd->rd_data0 + blk;
1388}
1389
1390/**
1391 * rgblk_free - Change alloc state of given block(s)
1392 * @sdp: the filesystem
1393 * @bstart: the start of a run of blocks to free
1394 * @blen: the length of the block run (all must lie within ONE RG!)
1395 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1396 *
1397 * Returns:  Resource group containing the block(s)
1398 */
1399
1400static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1401				     u32 blen, unsigned char new_state)
1402{
1403	struct gfs2_rgrpd *rgd;
1404	struct gfs2_bitmap *bi = NULL;
1405	u32 length, rgrp_blk, buf_blk;
1406	unsigned int buf;
1407
1408	rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
1409	if (!rgd) {
1410		if (gfs2_consist(sdp))
1411			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1412		return NULL;
1413	}
1414
1415	length = rgd->rd_length;
1416
1417	rgrp_blk = bstart - rgd->rd_data0;
1418
1419	while (blen--) {
1420		for (buf = 0; buf < length; buf++) {
1421			bi = rgd->rd_bits + buf;
1422			if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1423				break;
1424		}
1425
1426		gfs2_assert(rgd->rd_sbd, buf < length);
1427
1428		buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1429		rgrp_blk++;
1430
1431		if (!bi->bi_clone) {
1432			bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1433					       GFP_NOFS | __GFP_NOFAIL);
1434			memcpy(bi->bi_clone + bi->bi_offset,
1435			       bi->bi_bh->b_data + bi->bi_offset,
1436			       bi->bi_len);
1437		}
1438		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1439		gfs2_setbit(rgd, NULL, bi, buf_blk, new_state);
 
1440	}
1441
1442	return rgd;
1443}
1444
1445/**
1446 * gfs2_rgrp_dump - print out an rgrp
1447 * @seq: The iterator
1448 * @gl: The glock in question
1449 *
1450 */
1451
1452int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
1453{
1454	const struct gfs2_rgrpd *rgd = gl->gl_object;
1455	if (rgd == NULL)
1456		return 0;
1457	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
1458		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
1459		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
1460	return 0;
1461}
1462
1463static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1464{
1465	struct gfs2_sbd *sdp = rgd->rd_sbd;
1466	fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
1467		(unsigned long long)rgd->rd_addr);
1468	fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
1469	gfs2_rgrp_dump(NULL, rgd->rd_gl);
1470	rgd->rd_flags |= GFS2_RDF_ERROR;
1471}
1472
1473/**
1474 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
1475 * @ip: the inode to allocate the block for
1476 * @bn: Used to return the starting block number
1477 * @ndata: requested number of blocks/extent length (value/result)
1478 * @dinode: 1 if we're allocating a dinode block, else 0
1479 * @generation: the generation number of the inode
1480 *
1481 * Returns: 0 or error
1482 */
1483
1484int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
1485		      bool dinode, u64 *generation)
1486{
1487	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1488	struct buffer_head *dibh;
 
1489	struct gfs2_rgrpd *rgd;
1490	unsigned int ndata;
1491	u32 goal, blk; /* block, within the rgrp scope */
1492	u64 block; /* block, within the file system scope */
1493	int error;
1494	struct gfs2_bitmap *bi;
1495
1496	/* Only happens if there is a bug in gfs2, return something distinctive
1497	 * to ensure that it is noticed.
1498	 */
1499	if (ip->i_res == NULL)
1500		return -ECANCELED;
1501
1502	rgd = ip->i_rgd;
1503
1504	if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
1505		goal = ip->i_goal - rgd->rd_data0;
1506	else
1507		goal = rgd->rd_last_alloc;
1508
1509	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
1510
1511	/* Since all blocks are reserved in advance, this shouldn't happen */
1512	if (blk == BFITNOENT)
1513		goto rgrp_error;
1514
1515	block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
1516	ndata = *nblocks;
1517	if (dinode)
1518		ndata--;
1519
1520	if (!dinode) {
1521		ip->i_goal = block + ndata - 1;
1522		error = gfs2_meta_inode_buffer(ip, &dibh);
1523		if (error == 0) {
1524			struct gfs2_dinode *di =
1525				(struct gfs2_dinode *)dibh->b_data;
1526			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1527			di->di_goal_meta = di->di_goal_data =
1528				cpu_to_be64(ip->i_goal);
1529			brelse(dibh);
1530		}
1531	}
1532	if (rgd->rd_free < *nblocks)
1533		goto rgrp_error;
1534
1535	rgd->rd_free -= *nblocks;
1536	if (dinode) {
1537		rgd->rd_dinodes++;
1538		*generation = rgd->rd_igeneration++;
1539		if (*generation == 0)
1540			*generation = rgd->rd_igeneration++;
1541	}
1542
1543	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1544	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1545
1546	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
1547	if (dinode)
1548		gfs2_trans_add_unrevoke(sdp, block, 1);
1549
1550	/*
1551	 * This needs reviewing to see why we cannot do the quota change
1552	 * at this point in the dinode case.
1553	 */
1554	if (ndata)
1555		gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
1556				  ip->i_inode.i_gid);
1557
1558	rgd->rd_free_clone -= *nblocks;
1559	trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
1560			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1561	*bn = block;
1562	return 0;
1563
1564rgrp_error:
1565	gfs2_rgrp_error(rgd);
1566	return -EIO;
1567}
1568
1569/**
1570 * __gfs2_free_blocks - free a contiguous run of block(s)
1571 * @ip: the inode these blocks are being freed from
1572 * @bstart: first block of a run of contiguous blocks
1573 * @blen: the length of the block run
1574 * @meta: 1 if the blocks represent metadata
1575 *
1576 */
1577
1578void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
1579{
1580	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1581	struct gfs2_rgrpd *rgd;
1582
1583	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1584	if (!rgd)
1585		return;
1586	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
1587	rgd->rd_free += blen;
1588	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
1589	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1590	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1591
 
 
1592	/* Directories keep their data in the metadata address space */
1593	if (meta || ip->i_depth)
1594		gfs2_meta_wipe(ip, bstart, blen);
1595}
1596
1597/**
1598 * gfs2_free_meta - free a contiguous run of data block(s)
1599 * @ip: the inode these blocks are being freed from
1600 * @bstart: first block of a run of contiguous blocks
1601 * @blen: the length of the block run
1602 *
1603 */
1604
1605void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1606{
1607	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1608
1609	__gfs2_free_blocks(ip, bstart, blen, 1);
1610	gfs2_statfs_change(sdp, 0, +blen, 0);
1611	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1612}
1613
1614void gfs2_unlink_di(struct inode *inode)
1615{
1616	struct gfs2_inode *ip = GFS2_I(inode);
1617	struct gfs2_sbd *sdp = GFS2_SB(inode);
1618	struct gfs2_rgrpd *rgd;
1619	u64 blkno = ip->i_no_addr;
1620
1621	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
1622	if (!rgd)
1623		return;
1624	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
1625	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1626	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 
1627}
1628
1629static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1630{
1631	struct gfs2_sbd *sdp = rgd->rd_sbd;
1632	struct gfs2_rgrpd *tmp_rgd;
1633
1634	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1635	if (!tmp_rgd)
1636		return;
1637	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1638
1639	if (!rgd->rd_dinodes)
1640		gfs2_consist_rgrpd(rgd);
1641	rgd->rd_dinodes--;
1642	rgd->rd_free++;
1643
1644	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1645	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1646
1647	gfs2_statfs_change(sdp, 0, +1, -1);
 
1648}
1649
1650
1651void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1652{
1653	gfs2_free_uninit_di(rgd, ip->i_no_addr);
1654	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
1655	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
1656	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
1657}
1658
1659/**
1660 * gfs2_check_blk_type - Check the type of a block
1661 * @sdp: The superblock
1662 * @no_addr: The block number to check
1663 * @type: The block type we are looking for
1664 *
1665 * Returns: 0 if the block type matches the expected type
1666 *          -ESTALE if it doesn't match
1667 *          or -ve errno if something went wrong while checking
1668 */
1669
1670int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1671{
1672	struct gfs2_rgrpd *rgd;
1673	struct gfs2_holder rgd_gh;
1674	int error = -EINVAL;
 
 
1675
1676	rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
 
 
 
 
 
 
 
 
1677	if (!rgd)
1678		goto fail;
1679
1680	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
1681	if (error)
1682		goto fail;
1683
1684	if (gfs2_get_block_type(rgd, no_addr) != type)
1685		error = -ESTALE;
1686
1687	gfs2_glock_dq_uninit(&rgd_gh);
 
 
 
1688fail:
1689	return error;
1690}
1691
1692/**
1693 * gfs2_rlist_add - add a RG to a list of RGs
1694 * @ip: the inode
1695 * @rlist: the list of resource groups
1696 * @block: the block
1697 *
1698 * Figure out what RG a block belongs to and add that RG to the list
1699 *
1700 * FIXME: Don't use NOFAIL
1701 *
1702 */
1703
1704void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
1705		    u64 block)
1706{
1707	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1708	struct gfs2_rgrpd *rgd;
1709	struct gfs2_rgrpd **tmp;
1710	unsigned int new_space;
1711	unsigned int x;
1712
1713	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1714		return;
1715
1716	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
1717		rgd = ip->i_rgd;
1718	else
1719		rgd = gfs2_blk2rgrpd(sdp, block, 1);
1720	if (!rgd) {
1721		fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
 
1722		return;
1723	}
1724	ip->i_rgd = rgd;
1725
1726	for (x = 0; x < rlist->rl_rgrps; x++)
1727		if (rlist->rl_rgd[x] == rgd)
1728			return;
1729
1730	if (rlist->rl_rgrps == rlist->rl_space) {
1731		new_space = rlist->rl_space + 10;
1732
1733		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1734			      GFP_NOFS | __GFP_NOFAIL);
1735
1736		if (rlist->rl_rgd) {
1737			memcpy(tmp, rlist->rl_rgd,
1738			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1739			kfree(rlist->rl_rgd);
1740		}
1741
1742		rlist->rl_space = new_space;
1743		rlist->rl_rgd = tmp;
1744	}
1745
1746	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1747}
1748
1749/**
1750 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1751 *      and initialize an array of glock holders for them
1752 * @rlist: the list of resource groups
1753 * @state: the lock state to acquire the RG lock in
 
1754 *
1755 * FIXME: Don't use NOFAIL
1756 *
1757 */
1758
1759void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
1760{
1761	unsigned int x;
1762
1763	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1764				GFP_NOFS | __GFP_NOFAIL);
1765	for (x = 0; x < rlist->rl_rgrps; x++)
1766		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1767				state, 0,
1768				&rlist->rl_ghs[x]);
1769}
1770
1771/**
1772 * gfs2_rlist_free - free a resource group list
1773 * @list: the list of resource groups
1774 *
1775 */
1776
1777void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1778{
1779	unsigned int x;
1780
1781	kfree(rlist->rl_rgd);
1782
1783	if (rlist->rl_ghs) {
1784		for (x = 0; x < rlist->rl_rgrps; x++)
1785			gfs2_holder_uninit(&rlist->rl_ghs[x]);
1786		kfree(rlist->rl_ghs);
1787	}
1788}
1789
v3.1
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/slab.h>
  11#include <linux/spinlock.h>
  12#include <linux/completion.h>
  13#include <linux/buffer_head.h>
  14#include <linux/fs.h>
  15#include <linux/gfs2_ondisk.h>
  16#include <linux/prefetch.h>
  17#include <linux/blkdev.h>
 
  18
  19#include "gfs2.h"
  20#include "incore.h"
  21#include "glock.h"
  22#include "glops.h"
  23#include "lops.h"
  24#include "meta_io.h"
  25#include "quota.h"
  26#include "rgrp.h"
  27#include "super.h"
  28#include "trans.h"
  29#include "util.h"
  30#include "log.h"
  31#include "inode.h"
  32#include "trace_gfs2.h"
  33
  34#define BFITNOENT ((u32)~0)
  35#define NO_BLOCK ((u64)~0)
  36
  37#if BITS_PER_LONG == 32
  38#define LBITMASK   (0x55555555UL)
  39#define LBITSKIP55 (0x55555555UL)
  40#define LBITSKIP00 (0x00000000UL)
  41#else
  42#define LBITMASK   (0x5555555555555555UL)
  43#define LBITSKIP55 (0x5555555555555555UL)
  44#define LBITSKIP00 (0x0000000000000000UL)
  45#endif
  46
  47/*
  48 * These routines are used by the resource group routines (rgrp.c)
  49 * to keep track of block allocation.  Each block is represented by two
  50 * bits.  So, each byte represents GFS2_NBBY (i.e. 4) blocks.
  51 *
  52 * 0 = Free
  53 * 1 = Used (not metadata)
  54 * 2 = Unlinked (still in use) inode
  55 * 3 = Used (metadata)
  56 */
  57
  58static const char valid_change[16] = {
  59	        /* current */
  60	/* n */ 0, 1, 1, 1,
  61	/* e */ 1, 0, 0, 0,
  62	/* w */ 0, 0, 0, 1,
  63	        1, 0, 0, 0
  64};
  65
  66static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
  67                        unsigned char old_state, unsigned char new_state,
  68			unsigned int *n);
  69
  70/**
  71 * gfs2_setbit - Set a bit in the bitmaps
  72 * @buffer: the buffer that holds the bitmaps
  73 * @buflen: the length (in bytes) of the buffer
 
  74 * @block: the block to set
  75 * @new_state: the new state of the block
  76 *
  77 */
  78
  79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
  80			       unsigned char *buf2, unsigned int offset,
  81			       struct gfs2_bitmap *bi, u32 block,
  82			       unsigned char new_state)
  83{
  84	unsigned char *byte1, *byte2, *end, cur_state;
  85	unsigned int buflen = bi->bi_len;
  86	const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
  87
  88	byte1 = buf1 + offset + (block / GFS2_NBBY);
  89	end = buf1 + offset + buflen;
  90
  91	BUG_ON(byte1 >= end);
  92
  93	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
  94
  95	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
  96		printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
  97		       "new_state=%d\n",
  98		       (unsigned long long)block, cur_state, new_state);
  99		printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
 100		       (unsigned long long)rgd->rd_addr,
 101		       (unsigned long)bi->bi_start);
 102		printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
 103		       (unsigned long)bi->bi_offset,
 104		       (unsigned long)bi->bi_len);
 105		dump_stack();
 106		gfs2_consist_rgrpd(rgd);
 107		return;
 108	}
 109	*byte1 ^= (cur_state ^ new_state) << bit;
 110
 111	if (buf2) {
 112		byte2 = buf2 + offset + (block / GFS2_NBBY);
 113		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 114		*byte2 ^= (cur_state ^ new_state) << bit;
 115	}
 116}
 117
 118/**
 119 * gfs2_testbit - test a bit in the bitmaps
 
 120 * @buffer: the buffer that holds the bitmaps
 121 * @buflen: the length (in bytes) of the buffer
 122 * @block: the block to read
 123 *
 124 */
 125
 126static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
 127					 const unsigned char *buffer,
 128					 unsigned int buflen, u32 block)
 129{
 130	const unsigned char *byte, *end;
 131	unsigned char cur_state;
 132	unsigned int bit;
 133
 134	byte = buffer + (block / GFS2_NBBY);
 135	bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
 136	end = buffer + buflen;
 137
 138	gfs2_assert(rgd->rd_sbd, byte < end);
 139
 140	cur_state = (*byte >> bit) & GFS2_BIT_MASK;
 141
 142	return cur_state;
 143}
 144
 145/**
 146 * gfs2_bit_search
 147 * @ptr: Pointer to bitmap data
 148 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
 149 * @state: The state we are searching for
 150 *
 151 * We xor the bitmap data with a patter which is the bitwise opposite
 152 * of what we are looking for, this gives rise to a pattern of ones
 153 * wherever there is a match. Since we have two bits per entry, we
 154 * take this pattern, shift it down by one place and then and it with
 155 * the original. All the even bit positions (0,2,4, etc) then represent
 156 * successful matches, so we mask with 0x55555..... to remove the unwanted
 157 * odd bit positions.
 158 *
 159 * This allows searching of a whole u64 at once (32 blocks) with a
 160 * single test (on 64 bit arches).
 161 */
 162
 163static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 164{
 165	u64 tmp;
 166	static const u64 search[] = {
 167		[0] = 0xffffffffffffffffULL,
 168		[1] = 0xaaaaaaaaaaaaaaaaULL,
 169		[2] = 0x5555555555555555ULL,
 170		[3] = 0x0000000000000000ULL,
 171	};
 172	tmp = le64_to_cpu(*ptr) ^ search[state];
 173	tmp &= (tmp >> 1);
 174	tmp &= mask;
 175	return tmp;
 176}
 177
 178/**
 179 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 180 *       a block in a given allocation state.
 181 * @buffer: the buffer that holds the bitmaps
 182 * @len: the length (in bytes) of the buffer
 183 * @goal: start search at this block's bit-pair (within @buffer)
 184 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
 185 *
 186 * Scope of @goal and returned block number is only within this bitmap buffer,
 187 * not entire rgrp or filesystem.  @buffer will be offset from the actual
 188 * beginning of a bitmap block buffer, skipping any header structures, but
 189 * headers are always a multiple of 64 bits long so that the buffer is
 190 * always aligned to a 64 bit boundary.
 191 *
 192 * The size of the buffer is in bytes, but is it assumed that it is
 193 * always ok to read a complete multiple of 64 bits at the end
 194 * of the block in case the end is no aligned to a natural boundary.
 195 *
 196 * Return: the block number (bitmap buffer scope) that was found
 197 */
 198
 199static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 200		       u32 goal, u8 state)
 201{
 202	u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
 203	const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
 204	const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
 205	u64 tmp;
 206	u64 mask = 0x5555555555555555ULL;
 207	u32 bit;
 208
 209	BUG_ON(state > 3);
 210
 211	/* Mask off bits we don't care about at the start of the search */
 212	mask <<= spoint;
 213	tmp = gfs2_bit_search(ptr, mask, state);
 214	ptr++;
 215	while(tmp == 0 && ptr < end) {
 216		tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
 217		ptr++;
 218	}
 219	/* Mask off any bits which are more than len bytes from the start */
 220	if (ptr == end && (len & (sizeof(u64) - 1)))
 221		tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
 222	/* Didn't find anything, so return */
 223	if (tmp == 0)
 224		return BFITNOENT;
 225	ptr--;
 226	bit = __ffs64(tmp);
 227	bit /= 2;	/* two bits per entry in the bitmap */
 228	return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 229}
 230
 231/**
 232 * gfs2_bitcount - count the number of bits in a certain state
 
 233 * @buffer: the buffer that holds the bitmaps
 234 * @buflen: the length (in bytes) of the buffer
 235 * @state: the state of the block we're looking for
 236 *
 237 * Returns: The number of bits
 238 */
 239
 240static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
 241			 unsigned int buflen, u8 state)
 242{
 243	const u8 *byte = buffer;
 244	const u8 *end = buffer + buflen;
 245	const u8 state1 = state << 2;
 246	const u8 state2 = state << 4;
 247	const u8 state3 = state << 6;
 248	u32 count = 0;
 249
 250	for (; byte < end; byte++) {
 251		if (((*byte) & 0x03) == state)
 252			count++;
 253		if (((*byte) & 0x0C) == state1)
 254			count++;
 255		if (((*byte) & 0x30) == state2)
 256			count++;
 257		if (((*byte) & 0xC0) == state3)
 258			count++;
 259	}
 260
 261	return count;
 262}
 263
 264/**
 265 * gfs2_rgrp_verify - Verify that a resource group is consistent
 266 * @sdp: the filesystem
 267 * @rgd: the rgrp
 268 *
 269 */
 270
 271void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 272{
 273	struct gfs2_sbd *sdp = rgd->rd_sbd;
 274	struct gfs2_bitmap *bi = NULL;
 275	u32 length = rgd->rd_length;
 276	u32 count[4], tmp;
 277	int buf, x;
 278
 279	memset(count, 0, 4 * sizeof(u32));
 280
 281	/* Count # blocks in each of 4 possible allocation states */
 282	for (buf = 0; buf < length; buf++) {
 283		bi = rgd->rd_bits + buf;
 284		for (x = 0; x < 4; x++)
 285			count[x] += gfs2_bitcount(rgd,
 286						  bi->bi_bh->b_data +
 287						  bi->bi_offset,
 288						  bi->bi_len, x);
 289	}
 290
 291	if (count[0] != rgd->rd_free) {
 292		if (gfs2_consist_rgrpd(rgd))
 293			fs_err(sdp, "free data mismatch:  %u != %u\n",
 294			       count[0], rgd->rd_free);
 295		return;
 296	}
 297
 298	tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
 299	if (count[1] != tmp) {
 300		if (gfs2_consist_rgrpd(rgd))
 301			fs_err(sdp, "used data mismatch:  %u != %u\n",
 302			       count[1], tmp);
 303		return;
 304	}
 305
 306	if (count[2] + count[3] != rgd->rd_dinodes) {
 307		if (gfs2_consist_rgrpd(rgd))
 308			fs_err(sdp, "used metadata mismatch:  %u != %u\n",
 309			       count[2] + count[3], rgd->rd_dinodes);
 310		return;
 311	}
 312}
 313
 314static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 315{
 316	u64 first = rgd->rd_data0;
 317	u64 last = first + rgd->rd_data;
 318	return first <= block && block < last;
 319}
 320
 321/**
 322 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
 323 * @sdp: The GFS2 superblock
 324 * @n: The data block number
 
 325 *
 326 * Returns: The resource group, or NULL if not found
 327 */
 328
 329struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
 330{
 331	struct gfs2_rgrpd *rgd;
 
 332
 333	spin_lock(&sdp->sd_rindex_spin);
 334
 335	list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
 336		if (rgrp_contains_block(rgd, blk)) {
 337			list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
 
 
 
 
 
 338			spin_unlock(&sdp->sd_rindex_spin);
 339			return rgd;
 
 
 
 
 
 
 340		}
 
 341	}
 342
 343	spin_unlock(&sdp->sd_rindex_spin);
 344
 345	return NULL;
 346}
 347
 348/**
 349 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
 350 * @sdp: The GFS2 superblock
 351 *
 352 * Returns: The first rgrp in the filesystem
 353 */
 354
 355struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
 356{
 357	gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
 358	return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
 
 
 
 
 
 
 
 359}
 360
 361/**
 362 * gfs2_rgrpd_get_next - get the next RG
 363 * @rgd: A RG
 364 *
 365 * Returns: The next rgrp
 366 */
 367
 368struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
 369{
 370	if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
 
 
 
 
 
 
 
 
 
 371		return NULL;
 372	return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 373}
 374
 375static void clear_rgrpdi(struct gfs2_sbd *sdp)
 376{
 377	struct list_head *head;
 378	struct gfs2_rgrpd *rgd;
 379	struct gfs2_glock *gl;
 380
 381	spin_lock(&sdp->sd_rindex_spin);
 382	sdp->sd_rindex_forward = NULL;
 383	spin_unlock(&sdp->sd_rindex_spin);
 384
 385	head = &sdp->sd_rindex_list;
 386	while (!list_empty(head)) {
 387		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
 388		gl = rgd->rd_gl;
 389
 390		list_del(&rgd->rd_list);
 391		list_del(&rgd->rd_list_mru);
 392
 393		if (gl) {
 
 394			gl->gl_object = NULL;
 
 395			gfs2_glock_add_to_lru(gl);
 396			gfs2_glock_put(gl);
 397		}
 398
 
 399		kfree(rgd->rd_bits);
 400		kmem_cache_free(gfs2_rgrpd_cachep, rgd);
 401	}
 402}
 403
 404void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 405{
 406	mutex_lock(&sdp->sd_rindex_mutex);
 407	clear_rgrpdi(sdp);
 408	mutex_unlock(&sdp->sd_rindex_mutex);
 409}
 410
 411static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
 412{
 413	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
 414	printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
 415	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
 416	printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
 417	printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
 418}
 419
 420/**
 421 * gfs2_compute_bitstructs - Compute the bitmap sizes
 422 * @rgd: The resource group descriptor
 423 *
 424 * Calculates bitmap descriptors, one for each block that contains bitmap data
 425 *
 426 * Returns: errno
 427 */
 428
 429static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 430{
 431	struct gfs2_sbd *sdp = rgd->rd_sbd;
 432	struct gfs2_bitmap *bi;
 433	u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
 434	u32 bytes_left, bytes;
 435	int x;
 436
 437	if (!length)
 438		return -EINVAL;
 439
 440	rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS);
 441	if (!rgd->rd_bits)
 442		return -ENOMEM;
 443
 444	bytes_left = rgd->rd_bitbytes;
 445
 446	for (x = 0; x < length; x++) {
 447		bi = rgd->rd_bits + x;
 448
 449		bi->bi_flags = 0;
 450		/* small rgrp; bitmap stored completely in header block */
 451		if (length == 1) {
 452			bytes = bytes_left;
 453			bi->bi_offset = sizeof(struct gfs2_rgrp);
 454			bi->bi_start = 0;
 455			bi->bi_len = bytes;
 456		/* header block */
 457		} else if (x == 0) {
 458			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
 459			bi->bi_offset = sizeof(struct gfs2_rgrp);
 460			bi->bi_start = 0;
 461			bi->bi_len = bytes;
 462		/* last block */
 463		} else if (x + 1 == length) {
 464			bytes = bytes_left;
 465			bi->bi_offset = sizeof(struct gfs2_meta_header);
 466			bi->bi_start = rgd->rd_bitbytes - bytes_left;
 467			bi->bi_len = bytes;
 468		/* other blocks */
 469		} else {
 470			bytes = sdp->sd_sb.sb_bsize -
 471				sizeof(struct gfs2_meta_header);
 472			bi->bi_offset = sizeof(struct gfs2_meta_header);
 473			bi->bi_start = rgd->rd_bitbytes - bytes_left;
 474			bi->bi_len = bytes;
 475		}
 476
 477		bytes_left -= bytes;
 478	}
 479
 480	if (bytes_left) {
 481		gfs2_consist_rgrpd(rgd);
 482		return -EIO;
 483	}
 484	bi = rgd->rd_bits + (length - 1);
 485	if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
 486		if (gfs2_consist_rgrpd(rgd)) {
 487			gfs2_rindex_print(rgd);
 488			fs_err(sdp, "start=%u len=%u offset=%u\n",
 489			       bi->bi_start, bi->bi_len, bi->bi_offset);
 490		}
 491		return -EIO;
 492	}
 493
 494	return 0;
 495}
 496
 497/**
 498 * gfs2_ri_total - Total up the file system space, according to the rindex.
 
 499 *
 500 */
 501u64 gfs2_ri_total(struct gfs2_sbd *sdp)
 502{
 503	u64 total_data = 0;	
 504	struct inode *inode = sdp->sd_rindex;
 505	struct gfs2_inode *ip = GFS2_I(inode);
 506	char buf[sizeof(struct gfs2_rindex)];
 507	struct file_ra_state ra_state;
 508	int error, rgrps;
 509
 510	mutex_lock(&sdp->sd_rindex_mutex);
 511	file_ra_state_init(&ra_state, inode->i_mapping);
 512	for (rgrps = 0;; rgrps++) {
 513		loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 514
 515		if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
 516			break;
 517		error = gfs2_internal_read(ip, &ra_state, buf, &pos,
 518					   sizeof(struct gfs2_rindex));
 519		if (error != sizeof(struct gfs2_rindex))
 520			break;
 521		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
 522	}
 523	mutex_unlock(&sdp->sd_rindex_mutex);
 524	return total_data;
 525}
 526
 527static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
 528{
 529	const struct gfs2_rindex *str = buf;
 
 530
 531	rgd->rd_addr = be64_to_cpu(str->ri_addr);
 532	rgd->rd_length = be32_to_cpu(str->ri_length);
 533	rgd->rd_data0 = be64_to_cpu(str->ri_data0);
 534	rgd->rd_data = be32_to_cpu(str->ri_data);
 535	rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
 
 
 
 
 
 
 
 
 
 
 
 
 
 536}
 537
 538/**
 539 * read_rindex_entry - Pull in a new resource index entry from the disk
 540 * @gl: The glock covering the rindex inode
 541 *
 542 * Returns: 0 on success, error code otherwise
 543 */
 544
 545static int read_rindex_entry(struct gfs2_inode *ip,
 546			     struct file_ra_state *ra_state)
 547{
 548	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 549	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
 550	char buf[sizeof(struct gfs2_rindex)];
 551	int error;
 552	struct gfs2_rgrpd *rgd;
 553
 554	error = gfs2_internal_read(ip, ra_state, buf, &pos,
 
 
 
 555				   sizeof(struct gfs2_rindex));
 556	if (!error)
 557		return 0;
 558	if (error != sizeof(struct gfs2_rindex)) {
 559		if (error > 0)
 560			error = -EIO;
 561		return error;
 562	}
 563
 564	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
 565	error = -ENOMEM;
 566	if (!rgd)
 567		return error;
 568
 569	mutex_init(&rgd->rd_mutex);
 570	lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
 571	rgd->rd_sbd = sdp;
 
 
 
 
 
 572
 573	list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
 574	list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
 575
 576	gfs2_rindex_in(rgd, buf);
 577	error = compute_bitstructs(rgd);
 578	if (error)
 579		return error;
 580
 581	error = gfs2_glock_get(sdp, rgd->rd_addr,
 582			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
 583	if (error)
 584		return error;
 585
 586	rgd->rd_gl->gl_object = rgd;
 587	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 588	return error;
 589}
 590
 591/**
 592 * gfs2_ri_update - Pull in a new resource index from the disk
 593 * @ip: pointer to the rindex inode
 594 *
 595 * Returns: 0 on successful update, error code otherwise
 596 */
 597
 598int gfs2_ri_update(struct gfs2_inode *ip)
 599{
 600	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 601	struct inode *inode = &ip->i_inode;
 602	struct file_ra_state ra_state;
 603	u64 rgrp_count = i_size_read(inode);
 604	struct gfs2_rgrpd *rgd;
 605	unsigned int max_data = 0;
 606	int error;
 607
 608	do_div(rgrp_count, sizeof(struct gfs2_rindex));
 609	clear_rgrpdi(sdp);
 
 610
 611	file_ra_state_init(&ra_state, inode->i_mapping);
 612	for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
 613		error = read_rindex_entry(ip, &ra_state);
 614		if (error) {
 615			clear_rgrpdi(sdp);
 616			return error;
 617		}
 618	}
 619
 620	list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
 621		if (rgd->rd_data > max_data)
 622			max_data = rgd->rd_data;
 623	sdp->sd_max_rg_data = max_data;
 624	sdp->sd_rindex_uptodate = 1;
 625	return 0;
 626}
 627
 628/**
 629 * gfs2_rindex_hold - Grab a lock on the rindex
 630 * @sdp: The GFS2 superblock
 631 * @ri_gh: the glock holder
 632 *
 633 * We grab a lock on the rindex inode to make sure that it doesn't
 634 * change whilst we are performing an operation. We keep this lock
 635 * for quite long periods of time compared to other locks. This
 636 * doesn't matter, since it is shared and it is very, very rarely
 637 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
 638 *
 639 * This makes sure that we're using the latest copy of the resource index
 640 * special file, which might have been updated if someone expanded the
 641 * filesystem (via gfs2_grow utility), which adds new resource groups.
 642 *
 643 * Returns: 0 on success, error code otherwise
 644 */
 645
 646int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
 647{
 648	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
 649	struct gfs2_glock *gl = ip->i_gl;
 650	int error;
 651
 652	error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
 653	if (error)
 654		return error;
 655
 656	/* Read new copy from disk if we don't have the latest */
 657	if (!sdp->sd_rindex_uptodate) {
 658		mutex_lock(&sdp->sd_rindex_mutex);
 659		if (!sdp->sd_rindex_uptodate) {
 660			error = gfs2_ri_update(ip);
 661			if (error)
 662				gfs2_glock_dq_uninit(ri_gh);
 
 663		}
 664		mutex_unlock(&sdp->sd_rindex_mutex);
 
 
 
 665	}
 666
 667	return error;
 668}
 669
 670static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
 671{
 672	const struct gfs2_rgrp *str = buf;
 673	u32 rg_flags;
 674
 675	rg_flags = be32_to_cpu(str->rg_flags);
 676	rg_flags &= ~GFS2_RDF_MASK;
 677	rgd->rd_flags &= GFS2_RDF_MASK;
 678	rgd->rd_flags |= rg_flags;
 679	rgd->rd_free = be32_to_cpu(str->rg_free);
 680	rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
 681	rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
 682}
 683
 684static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 685{
 686	struct gfs2_rgrp *str = buf;
 687
 688	str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
 689	str->rg_free = cpu_to_be32(rgd->rd_free);
 690	str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
 691	str->__pad = cpu_to_be32(0);
 692	str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
 693	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 694}
 695
 696/**
 697 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
 698 * @rgd: the struct gfs2_rgrpd describing the RG to read in
 699 *
 700 * Read in all of a Resource Group's header and bitmap blocks.
 701 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
 702 *
 703 * Returns: errno
 704 */
 705
 706int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 707{
 
 708	struct gfs2_sbd *sdp = rgd->rd_sbd;
 709	struct gfs2_glock *gl = rgd->rd_gl;
 710	unsigned int length = rgd->rd_length;
 711	struct gfs2_bitmap *bi;
 712	unsigned int x, y;
 713	int error;
 714
 715	mutex_lock(&rgd->rd_mutex);
 716
 717	spin_lock(&sdp->sd_rindex_spin);
 718	if (rgd->rd_bh_count) {
 719		rgd->rd_bh_count++;
 720		spin_unlock(&sdp->sd_rindex_spin);
 721		mutex_unlock(&rgd->rd_mutex);
 722		return 0;
 723	}
 724	spin_unlock(&sdp->sd_rindex_spin);
 725
 726	for (x = 0; x < length; x++) {
 727		bi = rgd->rd_bits + x;
 728		error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
 729		if (error)
 730			goto fail;
 731	}
 732
 733	for (y = length; y--;) {
 734		bi = rgd->rd_bits + y;
 735		error = gfs2_meta_wait(sdp, bi->bi_bh);
 736		if (error)
 737			goto fail;
 738		if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB :
 739					      GFS2_METATYPE_RG)) {
 740			error = -EIO;
 741			goto fail;
 742		}
 743	}
 744
 745	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
 746		for (x = 0; x < length; x++)
 747			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
 748		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
 749		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 
 750	}
 751
 752	spin_lock(&sdp->sd_rindex_spin);
 753	rgd->rd_free_clone = rgd->rd_free;
 754	rgd->rd_bh_count++;
 755	spin_unlock(&sdp->sd_rindex_spin);
 756
 757	mutex_unlock(&rgd->rd_mutex);
 758
 759	return 0;
 760
 761fail:
 762	while (x--) {
 763		bi = rgd->rd_bits + x;
 764		brelse(bi->bi_bh);
 765		bi->bi_bh = NULL;
 766		gfs2_assert_warn(sdp, !bi->bi_clone);
 767	}
 768	mutex_unlock(&rgd->rd_mutex);
 769
 770	return error;
 771}
 772
 773void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
 774{
 775	struct gfs2_sbd *sdp = rgd->rd_sbd;
 776
 777	spin_lock(&sdp->sd_rindex_spin);
 778	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
 779	rgd->rd_bh_count++;
 780	spin_unlock(&sdp->sd_rindex_spin);
 781}
 782
 783/**
 784 * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
 785 * @rgd: the struct gfs2_rgrpd describing the RG to read in
 786 *
 787 */
 788
 789void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 790{
 791	struct gfs2_sbd *sdp = rgd->rd_sbd;
 792	int x, length = rgd->rd_length;
 793
 794	spin_lock(&sdp->sd_rindex_spin);
 795	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
 796	if (--rgd->rd_bh_count) {
 797		spin_unlock(&sdp->sd_rindex_spin);
 798		return;
 799	}
 800
 801	for (x = 0; x < length; x++) {
 802		struct gfs2_bitmap *bi = rgd->rd_bits + x;
 803		kfree(bi->bi_clone);
 804		bi->bi_clone = NULL;
 805		brelse(bi->bi_bh);
 806		bi->bi_bh = NULL;
 807	}
 808
 809	spin_unlock(&sdp->sd_rindex_spin);
 810}
 811
 812static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 813				    const struct gfs2_bitmap *bi)
 
 814{
 815	struct super_block *sb = sdp->sd_vfs;
 816	struct block_device *bdev = sb->s_bdev;
 817	const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
 818					   bdev_logical_block_size(sb->s_bdev);
 819	u64 blk;
 820	sector_t start = 0;
 821	sector_t nr_sects = 0;
 822	int rv;
 823	unsigned int x;
 
 
 824
 825	for (x = 0; x < bi->bi_len; x++) {
 826		const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x;
 827		const u8 *clone = bi->bi_clone + bi->bi_offset + x;
 828		u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
 
 
 
 
 
 
 829		diff &= 0x55;
 830		if (diff == 0)
 831			continue;
 832		blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
 833		blk *= sects_per_blk; /* convert to sectors */
 834		while(diff) {
 835			if (diff & 1) {
 836				if (nr_sects == 0)
 837					goto start_new_extent;
 838				if ((start + nr_sects) != blk) {
 839					rv = blkdev_issue_discard(bdev, start,
 840							    nr_sects, GFP_NOFS,
 841							    0);
 842					if (rv)
 843						goto fail;
 
 
 
 844					nr_sects = 0;
 845start_new_extent:
 846					start = blk;
 847				}
 848				nr_sects += sects_per_blk;
 849			}
 850			diff >>= 2;
 851			blk += sects_per_blk;
 852		}
 853	}
 854	if (nr_sects) {
 855		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
 856		if (rv)
 857			goto fail;
 
 858	}
 859	return;
 
 
 
 860fail:
 861	fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
 
 862	sdp->sd_args.ar_discard = 0;
 
 863}
 864
 865void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
 
 
 
 
 
 
 
 
 866{
 867	struct gfs2_sbd *sdp = rgd->rd_sbd;
 868	unsigned int length = rgd->rd_length;
 
 
 
 
 
 
 
 
 
 869	unsigned int x;
 870
 871	for (x = 0; x < length; x++) {
 872		struct gfs2_bitmap *bi = rgd->rd_bits + x;
 873		if (!bi->bi_clone)
 874			continue;
 875		if (sdp->sd_args.ar_discard)
 876			gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi);
 877		clear_bit(GBF_FULL, &bi->bi_flags);
 878		memcpy(bi->bi_clone + bi->bi_offset,
 879		       bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 880	}
 881
 882	spin_lock(&sdp->sd_rindex_spin);
 883	rgd->rd_free_clone = rgd->rd_free;
 884	spin_unlock(&sdp->sd_rindex_spin);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 885}
 886
 887/**
 888 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
 889 * @ip: the incore GFS2 inode structure
 890 *
 891 * Returns: the struct gfs2_alloc
 892 */
 893
 894struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
 895{
 896	BUG_ON(ip->i_alloc != NULL);
 897	ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS);
 898	return ip->i_alloc;
 
 
 899}
 900
 901/**
 902 * try_rgrp_fit - See if a given reservation will fit in a given RG
 903 * @rgd: the RG data
 904 * @al: the struct gfs2_alloc structure describing the reservation
 905 *
 906 * If there's room for the requested blocks to be allocated from the RG:
 907 *   Sets the $al_rgd field in @al.
 908 *
 909 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
 910 */
 911
 912static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
 913{
 914	struct gfs2_sbd *sdp = rgd->rd_sbd;
 915	int ret = 0;
 916
 917	if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
 918		return 0;
 
 
 
 
 919
 920	spin_lock(&sdp->sd_rindex_spin);
 921	if (rgd->rd_free_clone >= al->al_requested) {
 922		al->al_rgd = rgd;
 923		ret = 1;
 924	}
 925	spin_unlock(&sdp->sd_rindex_spin);
 926
 927	return ret;
 928}
 929
 930/**
 931 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
 932 * @rgd: The rgrp
 
 
 933 *
 934 * Returns: 0 if no error
 935 *          The inode, if one has been found, in inode.
 936 */
 937
 938static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
 939{
 940	u32 goal = 0, block;
 941	u64 no_addr;
 942	struct gfs2_sbd *sdp = rgd->rd_sbd;
 943	unsigned int n;
 944	struct gfs2_glock *gl;
 945	struct gfs2_inode *ip;
 946	int error;
 947	int found = 0;
 
 948
 949	while (goal < rgd->rd_data) {
 950		down_write(&sdp->sd_log_flush_lock);
 951		n = 1;
 952		block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
 953				     GFS2_BLKST_UNLINKED, &n);
 954		up_write(&sdp->sd_log_flush_lock);
 955		if (block == BFITNOENT)
 956			break;
 
 
 957		/* rgblk_search can return a block < goal, so we need to
 958		   keep it marching forward. */
 959		no_addr = block + rgd->rd_data0;
 960		goal = max(block + 1, goal + 1);
 961		if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
 962			continue;
 963		if (no_addr == skip)
 964			continue;
 965		*last_unlinked = no_addr;
 966
 967		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
 968		if (error)
 969			continue;
 970
 971		/* If the inode is already in cache, we can ignore it here
 972		 * because the existing inode disposal code will deal with
 973		 * it when all refs have gone away. Accessing gl_object like
 974		 * this is not safe in general. Here it is ok because we do
 975		 * not dereference the pointer, and we only need an approx
 976		 * answer to whether it is NULL or not.
 977		 */
 978		ip = gl->gl_object;
 979
 980		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
 981			gfs2_glock_put(gl);
 982		else
 983			found++;
 984
 985		/* Limit reclaim to sensible number of tasks */
 986		if (found > NR_CPUS)
 987			return;
 988	}
 989
 990	rgd->rd_flags &= ~GFS2_RDF_CHECK;
 991	return;
 992}
 993
 994/**
 995 * recent_rgrp_next - get next RG from "recent" list
 996 * @cur_rgd: current rgrp
 997 *
 998 * Returns: The next rgrp in the recent list
 999 */
1000
1001static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd)
1002{
1003	struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
1004	struct list_head *head;
1005	struct gfs2_rgrpd *rgd;
1006
1007	spin_lock(&sdp->sd_rindex_spin);
1008	head = &sdp->sd_rindex_mru_list;
1009	if (unlikely(cur_rgd->rd_list_mru.next == head)) {
1010		spin_unlock(&sdp->sd_rindex_spin);
1011		return NULL;
1012	}
1013	rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru);
1014	spin_unlock(&sdp->sd_rindex_spin);
1015	return rgd;
1016}
1017
1018/**
1019 * forward_rgrp_get - get an rgrp to try next from full list
1020 * @sdp: The GFS2 superblock
1021 *
1022 * Returns: The rgrp to try next
1023 */
1024
1025static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
1026{
1027	struct gfs2_rgrpd *rgd;
1028	unsigned int journals = gfs2_jindex_size(sdp);
1029	unsigned int rg = 0, x;
1030
1031	spin_lock(&sdp->sd_rindex_spin);
1032
1033	rgd = sdp->sd_rindex_forward;
1034	if (!rgd) {
1035		if (sdp->sd_rgrps >= journals)
1036			rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
1037
1038		for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg;
1039		     x++, rgd = gfs2_rgrpd_get_next(rgd))
1040			/* Do Nothing */;
1041
1042		sdp->sd_rindex_forward = rgd;
1043	}
1044
1045	spin_unlock(&sdp->sd_rindex_spin);
1046
1047	return rgd;
1048}
1049
1050/**
1051 * forward_rgrp_set - set the forward rgrp pointer
1052 * @sdp: the filesystem
1053 * @rgd: The new forward rgrp
1054 *
1055 */
1056
1057static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
1058{
1059	spin_lock(&sdp->sd_rindex_spin);
1060	sdp->sd_rindex_forward = rgd;
1061	spin_unlock(&sdp->sd_rindex_spin);
1062}
1063
1064/**
1065 * get_local_rgrp - Choose and lock a rgrp for allocation
1066 * @ip: the inode to reserve space for
1067 * @rgp: the chosen and locked rgrp
1068 *
1069 * Try to acquire rgrp in way which avoids contending with others.
1070 *
1071 * Returns: errno
1072 */
1073
1074static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1075{
1076	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1077	struct gfs2_rgrpd *rgd, *begin = NULL;
1078	struct gfs2_alloc *al = ip->i_alloc;
1079	int flags = LM_FLAG_TRY;
1080	int skipped = 0;
1081	int loops = 0;
1082	int error, rg_locked;
1083
1084	rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
 
 
 
 
 
 
1085
1086	while (rgd) {
1087		rg_locked = 0;
1088
1089		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
1090			rg_locked = 1;
1091			error = 0;
1092		} else {
1093			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1094						   LM_FLAG_TRY, &al->al_rgd_gh);
1095		}
1096		switch (error) {
1097		case 0:
1098			if (try_rgrp_fit(rgd, al))
1099				goto out;
 
 
1100			if (rgd->rd_flags & GFS2_RDF_CHECK)
1101				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1102			if (!rg_locked)
1103				gfs2_glock_dq_uninit(&al->al_rgd_gh);
1104			/* fall through */
1105		case GLR_TRYFAILED:
1106			rgd = recent_rgrp_next(rgd);
 
 
 
 
1107			break;
1108
1109		default:
1110			return error;
1111		}
1112	}
1113
1114	/* Go through full list of rgrps */
 
1115
1116	begin = rgd = forward_rgrp_get(sdp);
1117
1118	for (;;) {
1119		rg_locked = 0;
1120
1121		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
1122			rg_locked = 1;
1123			error = 0;
1124		} else {
1125			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
1126						   &al->al_rgd_gh);
1127		}
1128		switch (error) {
1129		case 0:
1130			if (try_rgrp_fit(rgd, al))
1131				goto out;
1132			if (rgd->rd_flags & GFS2_RDF_CHECK)
1133				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1134			if (!rg_locked)
1135				gfs2_glock_dq_uninit(&al->al_rgd_gh);
1136			break;
1137
1138		case GLR_TRYFAILED:
1139			skipped++;
1140			break;
1141
1142		default:
1143			return error;
1144		}
1145
1146		rgd = gfs2_rgrpd_get_next(rgd);
1147		if (!rgd)
1148			rgd = gfs2_rgrpd_get_first(sdp);
1149
1150		if (rgd == begin) {
1151			if (++loops >= 3)
1152				return -ENOSPC;
1153			if (!skipped)
1154				loops++;
1155			flags = 0;
1156			if (loops == 2)
1157				gfs2_log_flush(sdp, NULL);
1158		}
1159	}
1160
1161out:
1162	if (begin) {
1163		spin_lock(&sdp->sd_rindex_spin);
1164		list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
1165		spin_unlock(&sdp->sd_rindex_spin);
1166		rgd = gfs2_rgrpd_get_next(rgd);
1167		if (!rgd)
1168			rgd = gfs2_rgrpd_get_first(sdp);
1169		forward_rgrp_set(sdp, rgd);
1170	}
1171
1172	return 0;
1173}
1174
1175/**
1176 * gfs2_inplace_reserve_i - Reserve space in the filesystem
1177 * @ip: the inode to reserve space for
 
1178 *
1179 * Returns: errno
1180 */
1181
1182int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
1183			   char *file, unsigned int line)
1184{
1185	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1186	struct gfs2_alloc *al = ip->i_alloc;
1187	int error = 0;
1188	u64 last_unlinked = NO_BLOCK;
1189	int tries = 0;
1190
1191	if (gfs2_assert_warn(sdp, al->al_requested))
1192		return -EINVAL;
 
1193
1194	if (hold_rindex) {
1195		/* We need to hold the rindex unless the inode we're using is
1196		   the rindex itself, in which case it's already held. */
1197		if (ip != GFS2_I(sdp->sd_rindex))
1198			error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1199		else if (!sdp->sd_rgrps) /* We may not have the rindex read
1200					    in, so: */
1201			error = gfs2_ri_update(ip);
1202		if (error)
1203			return error;
1204	}
1205
1206try_again:
1207	do {
1208		error = get_local_rgrp(ip, &last_unlinked);
1209		/* If there is no space, flushing the log may release some */
1210		if (error) {
1211			if (ip == GFS2_I(sdp->sd_rindex) &&
1212			    !sdp->sd_rindex_uptodate) {
1213				error = gfs2_ri_update(ip);
1214				if (error)
1215					return error;
1216				goto try_again;
1217			}
1218			gfs2_log_flush(sdp, NULL);
1219		}
1220	} while (error && tries++ < 3);
 
 
1221
1222	if (error) {
1223		if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
1224			gfs2_glock_dq_uninit(&al->al_ri_gh);
1225		return error;
1226	}
1227
1228	/* no error, so we have the rgrp set in the inode's allocation. */
1229	al->al_file = file;
1230	al->al_line = line;
1231
1232	return 0;
1233}
1234
1235/**
1236 * gfs2_inplace_release - release an inplace reservation
1237 * @ip: the inode the reservation was taken out on
1238 *
1239 * Release a reservation made by gfs2_inplace_reserve().
1240 */
1241
1242void gfs2_inplace_release(struct gfs2_inode *ip)
1243{
1244	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1245	struct gfs2_alloc *al = ip->i_alloc;
1246
1247	if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
1248		fs_warn(sdp, "al_alloced = %u, al_requested = %u "
1249			     "al_file = %s, al_line = %u\n",
1250		             al->al_alloced, al->al_requested, al->al_file,
1251			     al->al_line);
1252
1253	al->al_rgd = NULL;
1254	if (al->al_rgd_gh.gh_gl)
1255		gfs2_glock_dq_uninit(&al->al_rgd_gh);
1256	if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl)
1257		gfs2_glock_dq_uninit(&al->al_ri_gh);
1258}
1259
1260/**
1261 * gfs2_get_block_type - Check a block in a RG is of given type
1262 * @rgd: the resource group holding the block
1263 * @block: the block number
1264 *
1265 * Returns: The block type (GFS2_BLKST_*)
1266 */
1267
1268static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1269{
1270	struct gfs2_bitmap *bi = NULL;
1271	u32 length, rgrp_block, buf_block;
1272	unsigned int buf;
1273	unsigned char type;
1274
1275	length = rgd->rd_length;
1276	rgrp_block = block - rgd->rd_data0;
1277
1278	for (buf = 0; buf < length; buf++) {
1279		bi = rgd->rd_bits + buf;
1280		if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1281			break;
1282	}
1283
1284	gfs2_assert(rgd->rd_sbd, buf < length);
1285	buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
1286
1287	type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1288			   bi->bi_len, buf_block);
1289
1290	return type;
1291}
1292
1293/**
1294 * rgblk_search - find a block in @old_state, change allocation
1295 *           state to @new_state
1296 * @rgd: the resource group descriptor
1297 * @goal: the goal block within the RG (start here to search for avail block)
1298 * @old_state: GFS2_BLKST_XXX the before-allocation state to find
1299 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1300 * @n: The extent length
1301 *
1302 * Walk rgrp's bitmap to find bits that represent a block in @old_state.
1303 * Add the found bitmap buffer to the transaction.
1304 * Set the found bits to @new_state to change block's allocation state.
1305 *
1306 * This function never fails, because we wouldn't call it unless we
1307 * know (from reservation results, etc.) that a block is available.
1308 *
1309 * Scope of @goal and returned block is just within rgrp, not the whole
1310 * filesystem.
1311 *
1312 * Returns:  the block number allocated
1313 */
1314
1315static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1316			unsigned char old_state, unsigned char new_state,
1317			unsigned int *n)
1318{
1319	struct gfs2_bitmap *bi = NULL;
1320	const u32 length = rgd->rd_length;
1321	u32 blk = BFITNOENT;
1322	unsigned int buf, x;
1323	const unsigned int elen = *n;
1324	const u8 *buffer = NULL;
1325
1326	*n = 0;
1327	/* Find bitmap block that contains bits for goal block */
1328	for (buf = 0; buf < length; buf++) {
1329		bi = rgd->rd_bits + buf;
1330		/* Convert scope of "goal" from rgrp-wide to within found bit block */
1331		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
1332			goal -= bi->bi_start * GFS2_NBBY;
1333			goto do_search;
1334		}
1335	}
1336	buf = 0;
1337	goal = 0;
1338
1339do_search:
1340	/* Search (up to entire) bitmap in this rgrp for allocatable block.
1341	   "x <= length", instead of "x < length", because we typically start
1342	   the search in the middle of a bit block, but if we can't find an
1343	   allocatable block anywhere else, we want to be able wrap around and
1344	   search in the first part of our first-searched bit block.  */
1345	for (x = 0; x <= length; x++) {
1346		bi = rgd->rd_bits + buf;
1347
1348		if (test_bit(GBF_FULL, &bi->bi_flags) &&
1349		    (old_state == GFS2_BLKST_FREE))
1350			goto skip;
1351
1352		/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1353		   bitmaps, so we must search the originals for that. */
1354		buffer = bi->bi_bh->b_data + bi->bi_offset;
1355		if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
 
1356			buffer = bi->bi_clone + bi->bi_offset;
1357
1358		blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state);
1359		if (blk != BFITNOENT)
1360			break;
1361
1362		if ((goal == 0) && (old_state == GFS2_BLKST_FREE))
1363			set_bit(GBF_FULL, &bi->bi_flags);
1364
1365		/* Try next bitmap block (wrap back to rgrp header if at end) */
1366skip:
1367		buf++;
1368		buf %= length;
1369		goal = 0;
1370	}
1371
1372	if (blk == BFITNOENT)
1373		return blk;
1374	*n = 1;
1375	if (old_state == new_state)
1376		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1377
 
 
1378	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1379	gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1380		    bi, blk, new_state);
 
1381	goal = blk;
1382	while (*n < elen) {
1383		goal++;
1384		if (goal >= (bi->bi_len * GFS2_NBBY))
1385			break;
1386		if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1387		    GFS2_BLKST_FREE)
1388			break;
1389		gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1390			    bi, goal, new_state);
1391		(*n)++;
1392	}
1393out:
1394	return (bi->bi_start * GFS2_NBBY) + blk;
 
1395}
1396
1397/**
1398 * rgblk_free - Change alloc state of given block(s)
1399 * @sdp: the filesystem
1400 * @bstart: the start of a run of blocks to free
1401 * @blen: the length of the block run (all must lie within ONE RG!)
1402 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1403 *
1404 * Returns:  Resource group containing the block(s)
1405 */
1406
1407static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1408				     u32 blen, unsigned char new_state)
1409{
1410	struct gfs2_rgrpd *rgd;
1411	struct gfs2_bitmap *bi = NULL;
1412	u32 length, rgrp_blk, buf_blk;
1413	unsigned int buf;
1414
1415	rgd = gfs2_blk2rgrpd(sdp, bstart);
1416	if (!rgd) {
1417		if (gfs2_consist(sdp))
1418			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1419		return NULL;
1420	}
1421
1422	length = rgd->rd_length;
1423
1424	rgrp_blk = bstart - rgd->rd_data0;
1425
1426	while (blen--) {
1427		for (buf = 0; buf < length; buf++) {
1428			bi = rgd->rd_bits + buf;
1429			if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1430				break;
1431		}
1432
1433		gfs2_assert(rgd->rd_sbd, buf < length);
1434
1435		buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1436		rgrp_blk++;
1437
1438		if (!bi->bi_clone) {
1439			bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1440					       GFP_NOFS | __GFP_NOFAIL);
1441			memcpy(bi->bi_clone + bi->bi_offset,
1442			       bi->bi_bh->b_data + bi->bi_offset,
1443			       bi->bi_len);
1444		}
1445		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1446		gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
1447			    bi, buf_blk, new_state);
1448	}
1449
1450	return rgd;
1451}
1452
1453/**
1454 * gfs2_rgrp_dump - print out an rgrp
1455 * @seq: The iterator
1456 * @gl: The glock in question
1457 *
1458 */
1459
1460int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
1461{
1462	const struct gfs2_rgrpd *rgd = gl->gl_object;
1463	if (rgd == NULL)
1464		return 0;
1465	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
1466		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
1467		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
1468	return 0;
1469}
1470
1471static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1472{
1473	struct gfs2_sbd *sdp = rgd->rd_sbd;
1474	fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
1475		(unsigned long long)rgd->rd_addr);
1476	fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
1477	gfs2_rgrp_dump(NULL, rgd->rd_gl);
1478	rgd->rd_flags |= GFS2_RDF_ERROR;
1479}
1480
1481/**
1482 * gfs2_alloc_block - Allocate one or more blocks
1483 * @ip: the inode to allocate the block for
1484 * @bn: Used to return the starting block number
1485 * @n: requested number of blocks/extent length (value/result)
 
 
1486 *
1487 * Returns: 0 or error
1488 */
1489
1490int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
 
1491{
1492	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1493	struct buffer_head *dibh;
1494	struct gfs2_alloc *al = ip->i_alloc;
1495	struct gfs2_rgrpd *rgd;
1496	u32 goal, blk;
1497	u64 block;
 
1498	int error;
 
1499
1500	/* Only happens if there is a bug in gfs2, return something distinctive
1501	 * to ensure that it is noticed.
1502	 */
1503	if (al == NULL)
1504		return -ECANCELED;
1505
1506	rgd = al->al_rgd;
1507
1508	if (rgrp_contains_block(rgd, ip->i_goal))
1509		goal = ip->i_goal - rgd->rd_data0;
1510	else
1511		goal = rgd->rd_last_alloc;
1512
1513	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n);
1514
1515	/* Since all blocks are reserved in advance, this shouldn't happen */
1516	if (blk == BFITNOENT)
1517		goto rgrp_error;
1518
1519	rgd->rd_last_alloc = blk;
1520	block = rgd->rd_data0 + blk;
1521	ip->i_goal = block;
1522	error = gfs2_meta_inode_buffer(ip, &dibh);
1523	if (error == 0) {
1524		struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
1525		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1526		di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal);
1527		brelse(dibh);
 
 
 
 
 
 
 
1528	}
1529	if (rgd->rd_free < *n)
1530		goto rgrp_error;
1531
1532	rgd->rd_free -= *n;
 
 
 
 
 
 
1533
1534	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1535	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1536
1537	al->al_alloced += *n;
1538
1539	gfs2_statfs_change(sdp, 0, -(s64)*n, 0);
1540	gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid);
1541
1542	spin_lock(&sdp->sd_rindex_spin);
1543	rgd->rd_free_clone -= *n;
1544	spin_unlock(&sdp->sd_rindex_spin);
1545	trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
1546	*bn = block;
1547	return 0;
1548
1549rgrp_error:
1550	gfs2_rgrp_error(rgd);
1551	return -EIO;
1552}
1553
1554/**
1555 * gfs2_alloc_di - Allocate a dinode
1556 * @dip: the directory that the inode is going in
1557 * @bn: the block number which is allocated
1558 * @generation: the generation number of the inode
1559 *
1560 * Returns: 0 on success or error
1561 */
1562
1563int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
1564{
1565	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1566	struct gfs2_alloc *al = dip->i_alloc;
1567	struct gfs2_rgrpd *rgd = al->al_rgd;
1568	u32 blk;
1569	u64 block;
1570	unsigned int n = 1;
1571
1572	blk = rgblk_search(rgd, rgd->rd_last_alloc,
1573			   GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
1574
1575	/* Since all blocks are reserved in advance, this shouldn't happen */
1576	if (blk == BFITNOENT)
1577		goto rgrp_error;
1578
1579	rgd->rd_last_alloc = blk;
1580	block = rgd->rd_data0 + blk;
1581	if (rgd->rd_free == 0)
1582		goto rgrp_error;
1583
1584	rgd->rd_free--;
1585	rgd->rd_dinodes++;
1586	*generation = rgd->rd_igeneration++;
1587	if (*generation == 0)
1588		*generation = rgd->rd_igeneration++;
1589	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1590	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1591
1592	al->al_alloced++;
1593
1594	gfs2_statfs_change(sdp, 0, -1, +1);
1595	gfs2_trans_add_unrevoke(sdp, block, 1);
1596
1597	spin_lock(&sdp->sd_rindex_spin);
1598	rgd->rd_free_clone--;
1599	spin_unlock(&sdp->sd_rindex_spin);
1600	trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
1601	*bn = block;
1602	return 0;
1603
1604rgrp_error:
1605	gfs2_rgrp_error(rgd);
1606	return -EIO;
1607}
1608
1609/**
1610 * __gfs2_free_blocks - free a contiguous run of block(s)
1611 * @ip: the inode these blocks are being freed from
1612 * @bstart: first block of a run of contiguous blocks
1613 * @blen: the length of the block run
1614 * @meta: 1 if the blocks represent metadata
1615 *
1616 */
1617
1618void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
1619{
1620	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1621	struct gfs2_rgrpd *rgd;
1622
1623	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1624	if (!rgd)
1625		return;
1626	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
1627	rgd->rd_free += blen;
1628
1629	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1630	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1631
1632	gfs2_trans_add_rg(rgd);
1633
1634	/* Directories keep their data in the metadata address space */
1635	if (meta || ip->i_depth)
1636		gfs2_meta_wipe(ip, bstart, blen);
1637}
1638
1639/**
1640 * gfs2_free_meta - free a contiguous run of data block(s)
1641 * @ip: the inode these blocks are being freed from
1642 * @bstart: first block of a run of contiguous blocks
1643 * @blen: the length of the block run
1644 *
1645 */
1646
1647void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1648{
1649	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1650
1651	__gfs2_free_blocks(ip, bstart, blen, 1);
1652	gfs2_statfs_change(sdp, 0, +blen, 0);
1653	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1654}
1655
1656void gfs2_unlink_di(struct inode *inode)
1657{
1658	struct gfs2_inode *ip = GFS2_I(inode);
1659	struct gfs2_sbd *sdp = GFS2_SB(inode);
1660	struct gfs2_rgrpd *rgd;
1661	u64 blkno = ip->i_no_addr;
1662
1663	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
1664	if (!rgd)
1665		return;
1666	trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
1667	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1668	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1669	gfs2_trans_add_rg(rgd);
1670}
1671
1672static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1673{
1674	struct gfs2_sbd *sdp = rgd->rd_sbd;
1675	struct gfs2_rgrpd *tmp_rgd;
1676
1677	tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1678	if (!tmp_rgd)
1679		return;
1680	gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1681
1682	if (!rgd->rd_dinodes)
1683		gfs2_consist_rgrpd(rgd);
1684	rgd->rd_dinodes--;
1685	rgd->rd_free++;
1686
1687	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1688	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1689
1690	gfs2_statfs_change(sdp, 0, +1, -1);
1691	gfs2_trans_add_rg(rgd);
1692}
1693
1694
1695void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1696{
1697	gfs2_free_uninit_di(rgd, ip->i_no_addr);
1698	trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
1699	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
1700	gfs2_meta_wipe(ip, ip->i_no_addr, 1);
1701}
1702
1703/**
1704 * gfs2_check_blk_type - Check the type of a block
1705 * @sdp: The superblock
1706 * @no_addr: The block number to check
1707 * @type: The block type we are looking for
1708 *
1709 * Returns: 0 if the block type matches the expected type
1710 *          -ESTALE if it doesn't match
1711 *          or -ve errno if something went wrong while checking
1712 */
1713
1714int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1715{
1716	struct gfs2_rgrpd *rgd;
1717	struct gfs2_holder ri_gh, rgd_gh;
1718	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1719	int ri_locked = 0;
1720	int error;
1721
1722	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
1723		error = gfs2_rindex_hold(sdp, &ri_gh);
1724		if (error)
1725			goto fail;
1726		ri_locked = 1;
1727	}
1728
1729	error = -EINVAL;
1730	rgd = gfs2_blk2rgrpd(sdp, no_addr);
1731	if (!rgd)
1732		goto fail_rindex;
1733
1734	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
1735	if (error)
1736		goto fail_rindex;
1737
1738	if (gfs2_get_block_type(rgd, no_addr) != type)
1739		error = -ESTALE;
1740
1741	gfs2_glock_dq_uninit(&rgd_gh);
1742fail_rindex:
1743	if (ri_locked)
1744		gfs2_glock_dq_uninit(&ri_gh);
1745fail:
1746	return error;
1747}
1748
1749/**
1750 * gfs2_rlist_add - add a RG to a list of RGs
1751 * @sdp: the filesystem
1752 * @rlist: the list of resource groups
1753 * @block: the block
1754 *
1755 * Figure out what RG a block belongs to and add that RG to the list
1756 *
1757 * FIXME: Don't use NOFAIL
1758 *
1759 */
1760
1761void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1762		    u64 block)
1763{
 
1764	struct gfs2_rgrpd *rgd;
1765	struct gfs2_rgrpd **tmp;
1766	unsigned int new_space;
1767	unsigned int x;
1768
1769	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1770		return;
1771
1772	rgd = gfs2_blk2rgrpd(sdp, block);
 
 
 
1773	if (!rgd) {
1774		if (gfs2_consist(sdp))
1775			fs_err(sdp, "block = %llu\n", (unsigned long long)block);
1776		return;
1777	}
 
1778
1779	for (x = 0; x < rlist->rl_rgrps; x++)
1780		if (rlist->rl_rgd[x] == rgd)
1781			return;
1782
1783	if (rlist->rl_rgrps == rlist->rl_space) {
1784		new_space = rlist->rl_space + 10;
1785
1786		tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1787			      GFP_NOFS | __GFP_NOFAIL);
1788
1789		if (rlist->rl_rgd) {
1790			memcpy(tmp, rlist->rl_rgd,
1791			       rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1792			kfree(rlist->rl_rgd);
1793		}
1794
1795		rlist->rl_space = new_space;
1796		rlist->rl_rgd = tmp;
1797	}
1798
1799	rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1800}
1801
1802/**
1803 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1804 *      and initialize an array of glock holders for them
1805 * @rlist: the list of resource groups
1806 * @state: the lock state to acquire the RG lock in
1807 * @flags: the modifier flags for the holder structures
1808 *
1809 * FIXME: Don't use NOFAIL
1810 *
1811 */
1812
1813void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
1814{
1815	unsigned int x;
1816
1817	rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1818				GFP_NOFS | __GFP_NOFAIL);
1819	for (x = 0; x < rlist->rl_rgrps; x++)
1820		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1821				state, 0,
1822				&rlist->rl_ghs[x]);
1823}
1824
1825/**
1826 * gfs2_rlist_free - free a resource group list
1827 * @list: the list of resource groups
1828 *
1829 */
1830
1831void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1832{
1833	unsigned int x;
1834
1835	kfree(rlist->rl_rgd);
1836
1837	if (rlist->rl_ghs) {
1838		for (x = 0; x < rlist->rl_rgrps; x++)
1839			gfs2_holder_uninit(&rlist->rl_ghs[x]);
1840		kfree(rlist->rl_ghs);
1841	}
1842}
1843