Linux Audio

Check our new training course

Loading...
v3.1
  1/*
  2 * Copyright (C) 2007 Oracle.  All rights reserved.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of the GNU General Public
  6 * License v2 as published by the Free Software Foundation.
  7 *
  8 * This program is distributed in the hope that it will be useful,
  9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 * General Public License for more details.
 12 *
 13 * You should have received a copy of the GNU General Public
 14 * License along with this program; if not, write to the
 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 16 * Boston, MA 021110-1307, USA.
 17 */
 18
 19#include <linux/slab.h>
 20#include <linux/blkdev.h>
 21#include <linux/writeback.h>
 22#include <linux/pagevec.h>
 23#include "ctree.h"
 24#include "transaction.h"
 25#include "btrfs_inode.h"
 26#include "extent_io.h"
 
 
 
 
 27
 28static u64 entry_end(struct btrfs_ordered_extent *entry)
 29{
 30	if (entry->file_offset + entry->len < entry->file_offset)
 31		return (u64)-1;
 32	return entry->file_offset + entry->len;
 33}
 34
 35/* returns NULL if the insertion worked, or it returns the node it did find
 36 * in the tree
 37 */
 38static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
 39				   struct rb_node *node)
 40{
 41	struct rb_node **p = &root->rb_node;
 42	struct rb_node *parent = NULL;
 43	struct btrfs_ordered_extent *entry;
 44
 45	while (*p) {
 46		parent = *p;
 47		entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
 48
 49		if (file_offset < entry->file_offset)
 50			p = &(*p)->rb_left;
 51		else if (file_offset >= entry_end(entry))
 52			p = &(*p)->rb_right;
 53		else
 54			return parent;
 55	}
 56
 57	rb_link_node(node, parent, p);
 58	rb_insert_color(node, root);
 59	return NULL;
 60}
 61
 
 
 
 
 
 
 
 
 62/*
 63 * look for a given offset in the tree, and if it can't be found return the
 64 * first lesser offset
 65 */
 66static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
 67				     struct rb_node **prev_ret)
 68{
 69	struct rb_node *n = root->rb_node;
 70	struct rb_node *prev = NULL;
 71	struct rb_node *test;
 72	struct btrfs_ordered_extent *entry;
 73	struct btrfs_ordered_extent *prev_entry = NULL;
 74
 75	while (n) {
 76		entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
 77		prev = n;
 78		prev_entry = entry;
 79
 80		if (file_offset < entry->file_offset)
 81			n = n->rb_left;
 82		else if (file_offset >= entry_end(entry))
 83			n = n->rb_right;
 84		else
 85			return n;
 86	}
 87	if (!prev_ret)
 88		return NULL;
 89
 90	while (prev && file_offset >= entry_end(prev_entry)) {
 91		test = rb_next(prev);
 92		if (!test)
 93			break;
 94		prev_entry = rb_entry(test, struct btrfs_ordered_extent,
 95				      rb_node);
 96		if (file_offset < entry_end(prev_entry))
 97			break;
 98
 99		prev = test;
100	}
101	if (prev)
102		prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
103				      rb_node);
104	while (prev && file_offset < entry_end(prev_entry)) {
105		test = rb_prev(prev);
106		if (!test)
107			break;
108		prev_entry = rb_entry(test, struct btrfs_ordered_extent,
109				      rb_node);
110		prev = test;
111	}
112	*prev_ret = prev;
113	return NULL;
114}
115
116/*
117 * helper to check if a given offset is inside a given entry
118 */
119static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
120{
121	if (file_offset < entry->file_offset ||
122	    entry->file_offset + entry->len <= file_offset)
123		return 0;
124	return 1;
125}
126
127static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
128			  u64 len)
129{
130	if (file_offset + len <= entry->file_offset ||
131	    entry->file_offset + entry->len <= file_offset)
132		return 0;
133	return 1;
134}
135
136/*
137 * look find the first ordered struct that has this offset, otherwise
138 * the first one less than this offset
139 */
140static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
141					  u64 file_offset)
142{
143	struct rb_root *root = &tree->tree;
144	struct rb_node *prev = NULL;
145	struct rb_node *ret;
146	struct btrfs_ordered_extent *entry;
147
148	if (tree->last) {
149		entry = rb_entry(tree->last, struct btrfs_ordered_extent,
150				 rb_node);
151		if (offset_in_entry(entry, file_offset))
152			return tree->last;
153	}
154	ret = __tree_search(root, file_offset, &prev);
155	if (!ret)
156		ret = prev;
157	if (ret)
158		tree->last = ret;
159	return ret;
160}
161
162/* allocate and add a new ordered_extent into the per-inode tree.
163 * file_offset is the logical offset in the file
164 *
165 * start is the disk block number of an extent already reserved in the
166 * extent allocation tree
167 *
168 * len is the length of the extent
169 *
170 * The tree is given a single reference on the ordered extent that was
171 * inserted.
172 */
173static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
174				      u64 start, u64 len, u64 disk_len,
175				      int type, int dio, int compress_type)
176{
 
 
177	struct btrfs_ordered_inode_tree *tree;
178	struct rb_node *node;
179	struct btrfs_ordered_extent *entry;
180
181	tree = &BTRFS_I(inode)->ordered_tree;
182	entry = kzalloc(sizeof(*entry), GFP_NOFS);
183	if (!entry)
184		return -ENOMEM;
185
186	entry->file_offset = file_offset;
187	entry->start = start;
188	entry->len = len;
189	entry->disk_len = disk_len;
190	entry->bytes_left = len;
191	entry->inode = inode;
192	entry->compress_type = compress_type;
 
193	if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
194		set_bit(type, &entry->flags);
195
196	if (dio)
197		set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
198
199	/* one ref for the tree */
200	atomic_set(&entry->refs, 1);
201	init_waitqueue_head(&entry->wait);
202	INIT_LIST_HEAD(&entry->list);
203	INIT_LIST_HEAD(&entry->root_extent_list);
 
 
 
 
204
205	trace_btrfs_ordered_extent_add(inode, entry);
206
207	spin_lock(&tree->lock);
208	node = tree_insert(&tree->tree, file_offset,
209			   &entry->rb_node);
210	BUG_ON(node);
211	spin_unlock(&tree->lock);
 
212
213	spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
214	list_add_tail(&entry->root_extent_list,
215		      &BTRFS_I(inode)->root->fs_info->ordered_extents);
216	spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
 
 
 
 
 
 
 
217
218	BUG_ON(node);
219	return 0;
220}
221
222int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
223			     u64 start, u64 len, u64 disk_len, int type)
224{
225	return __btrfs_add_ordered_extent(inode, file_offset, start, len,
226					  disk_len, type, 0,
227					  BTRFS_COMPRESS_NONE);
228}
229
230int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
231				 u64 start, u64 len, u64 disk_len, int type)
232{
233	return __btrfs_add_ordered_extent(inode, file_offset, start, len,
234					  disk_len, type, 1,
235					  BTRFS_COMPRESS_NONE);
236}
237
238int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
239				      u64 start, u64 len, u64 disk_len,
240				      int type, int compress_type)
241{
242	return __btrfs_add_ordered_extent(inode, file_offset, start, len,
243					  disk_len, type, 0,
244					  compress_type);
245}
246
247/*
248 * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
249 * when an ordered extent is finished.  If the list covers more than one
250 * ordered extent, it is split across multiples.
251 */
252int btrfs_add_ordered_sum(struct inode *inode,
253			  struct btrfs_ordered_extent *entry,
254			  struct btrfs_ordered_sum *sum)
255{
256	struct btrfs_ordered_inode_tree *tree;
257
258	tree = &BTRFS_I(inode)->ordered_tree;
259	spin_lock(&tree->lock);
260	list_add_tail(&sum->list, &entry->list);
261	spin_unlock(&tree->lock);
262	return 0;
263}
264
265/*
266 * this is used to account for finished IO across a given range
267 * of the file.  The IO may span ordered extents.  If
268 * a given ordered_extent is completely done, 1 is returned, otherwise
269 * 0.
270 *
271 * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
272 * to make sure this function only returns 1 once for a given ordered extent.
273 *
274 * file_offset is updated to one byte past the range that is recorded as
275 * complete.  This allows you to walk forward in the file.
276 */
277int btrfs_dec_test_first_ordered_pending(struct inode *inode,
278				   struct btrfs_ordered_extent **cached,
279				   u64 *file_offset, u64 io_size)
280{
 
281	struct btrfs_ordered_inode_tree *tree;
282	struct rb_node *node;
283	struct btrfs_ordered_extent *entry = NULL;
284	int ret;
 
285	u64 dec_end;
286	u64 dec_start;
287	u64 to_dec;
288
289	tree = &BTRFS_I(inode)->ordered_tree;
290	spin_lock(&tree->lock);
291	node = tree_search(tree, *file_offset);
292	if (!node) {
293		ret = 1;
294		goto out;
295	}
296
297	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
298	if (!offset_in_entry(entry, *file_offset)) {
299		ret = 1;
300		goto out;
301	}
302
303	dec_start = max(*file_offset, entry->file_offset);
304	dec_end = min(*file_offset + io_size, entry->file_offset +
305		      entry->len);
306	*file_offset = dec_end;
307	if (dec_start > dec_end) {
308		printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
309		       (unsigned long long)dec_start,
310		       (unsigned long long)dec_end);
311	}
312	to_dec = dec_end - dec_start;
313	if (to_dec > entry->bytes_left) {
314		printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
315		       (unsigned long long)entry->bytes_left,
316		       (unsigned long long)to_dec);
317	}
318	entry->bytes_left -= to_dec;
319	if (entry->bytes_left == 0)
 
 
 
320		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
321	else
 
 
 
 
 
322		ret = 1;
 
323out:
324	if (!ret && cached && entry) {
325		*cached = entry;
326		atomic_inc(&entry->refs);
327	}
328	spin_unlock(&tree->lock);
329	return ret == 0;
330}
331
332/*
333 * this is used to account for finished IO across a given range
334 * of the file.  The IO should not span ordered extents.  If
335 * a given ordered_extent is completely done, 1 is returned, otherwise
336 * 0.
337 *
338 * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
339 * to make sure this function only returns 1 once for a given ordered extent.
340 */
341int btrfs_dec_test_ordered_pending(struct inode *inode,
342				   struct btrfs_ordered_extent **cached,
343				   u64 file_offset, u64 io_size)
344{
345	struct btrfs_ordered_inode_tree *tree;
346	struct rb_node *node;
347	struct btrfs_ordered_extent *entry = NULL;
 
348	int ret;
349
350	tree = &BTRFS_I(inode)->ordered_tree;
351	spin_lock(&tree->lock);
 
 
 
 
 
352	node = tree_search(tree, file_offset);
353	if (!node) {
354		ret = 1;
355		goto out;
356	}
357
358	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
 
359	if (!offset_in_entry(entry, file_offset)) {
360		ret = 1;
361		goto out;
362	}
363
364	if (io_size > entry->bytes_left) {
365		printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
366		       (unsigned long long)entry->bytes_left,
367		       (unsigned long long)io_size);
368	}
369	entry->bytes_left -= io_size;
370	if (entry->bytes_left == 0)
 
 
 
371		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
372	else
 
 
 
 
 
373		ret = 1;
 
374out:
375	if (!ret && cached && entry) {
376		*cached = entry;
377		atomic_inc(&entry->refs);
378	}
379	spin_unlock(&tree->lock);
380	return ret == 0;
381}
382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383/*
384 * used to drop a reference on an ordered extent.  This will free
385 * the extent if the last reference is dropped
386 */
387int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
388{
389	struct list_head *cur;
390	struct btrfs_ordered_sum *sum;
391
392	trace_btrfs_ordered_extent_put(entry->inode, entry);
393
394	if (atomic_dec_and_test(&entry->refs)) {
 
 
 
 
 
 
395		while (!list_empty(&entry->list)) {
396			cur = entry->list.next;
397			sum = list_entry(cur, struct btrfs_ordered_sum, list);
398			list_del(&sum->list);
399			kfree(sum);
400		}
401		kfree(entry);
402	}
403	return 0;
404}
405
406/*
407 * remove an ordered extent from the tree.  No references are dropped
408 * and you must wake_up entry->wait.  You must hold the tree lock
409 * while you call this function.
410 */
411static int __btrfs_remove_ordered_extent(struct inode *inode,
412				struct btrfs_ordered_extent *entry)
413{
 
414	struct btrfs_ordered_inode_tree *tree;
415	struct btrfs_root *root = BTRFS_I(inode)->root;
416	struct rb_node *node;
 
417
418	tree = &BTRFS_I(inode)->ordered_tree;
 
419	node = &entry->rb_node;
420	rb_erase(node, &tree->tree);
421	tree->last = NULL;
 
 
422	set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
 
 
 
 
 
 
 
 
 
 
423
424	spin_lock(&root->fs_info->ordered_extent_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425	list_del_init(&entry->root_extent_list);
 
426
427	trace_btrfs_ordered_extent_remove(inode, entry);
428
429	/*
430	 * we have no more ordered extents for this inode and
431	 * no dirty pages.  We can safely remove it from the
432	 * list of ordered extents
433	 */
434	if (RB_EMPTY_ROOT(&tree->tree) &&
435	    !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
436		list_del_init(&BTRFS_I(inode)->ordered_operations);
437	}
438	spin_unlock(&root->fs_info->ordered_extent_lock);
439
440	return 0;
441}
442
443/*
444 * remove an ordered extent from the tree.  No references are dropped
445 * but any waiters are woken.
446 */
447int btrfs_remove_ordered_extent(struct inode *inode,
448				struct btrfs_ordered_extent *entry)
449{
450	struct btrfs_ordered_inode_tree *tree;
451	int ret;
452
453	tree = &BTRFS_I(inode)->ordered_tree;
454	spin_lock(&tree->lock);
455	ret = __btrfs_remove_ordered_extent(inode, entry);
456	spin_unlock(&tree->lock);
457	wake_up(&entry->wait);
458
459	return ret;
 
 
460}
461
462/*
463 * wait for all the ordered extents in a root.  This is done when balancing
464 * space between drives.
465 */
466int btrfs_wait_ordered_extents(struct btrfs_root *root,
467			       int nocow_only, int delay_iput)
468{
469	struct list_head splice;
470	struct list_head *cur;
471	struct btrfs_ordered_extent *ordered;
472	struct inode *inode;
473
474	INIT_LIST_HEAD(&splice);
475
476	spin_lock(&root->fs_info->ordered_extent_lock);
477	list_splice_init(&root->fs_info->ordered_extents, &splice);
478	while (!list_empty(&splice)) {
479		cur = splice.next;
480		ordered = list_entry(cur, struct btrfs_ordered_extent,
481				     root_extent_list);
482		if (nocow_only &&
483		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
484		    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
485			list_move(&ordered->root_extent_list,
486				  &root->fs_info->ordered_extents);
487			cond_resched_lock(&root->fs_info->ordered_extent_lock);
488			continue;
489		}
490
491		list_del_init(&ordered->root_extent_list);
 
492		atomic_inc(&ordered->refs);
 
493
494		/*
495		 * the inode may be getting freed (in sys_unlink path).
496		 */
497		inode = igrab(ordered->inode);
498
499		spin_unlock(&root->fs_info->ordered_extent_lock);
500
501		if (inode) {
502			btrfs_start_ordered_extent(inode, ordered, 1);
503			btrfs_put_ordered_extent(ordered);
504			if (delay_iput)
505				btrfs_add_delayed_iput(inode);
506			else
507				iput(inode);
508		} else {
509			btrfs_put_ordered_extent(ordered);
510		}
511
512		spin_lock(&root->fs_info->ordered_extent_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513	}
514	spin_unlock(&root->fs_info->ordered_extent_lock);
515	return 0;
 
516}
517
518/*
519 * this is used during transaction commit to write all the inodes
520 * added to the ordered operation list.  These files must be fully on
521 * disk before the transaction commits.
522 *
523 * we have two modes here, one is to just start the IO via filemap_flush
524 * and the other is to wait for all the io.  When we wait, we have an
525 * extra check to make sure the ordered operation list really is empty
526 * before we return
527 */
528int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
529{
530	struct btrfs_inode *btrfs_inode;
531	struct inode *inode;
532	struct list_head splice;
 
 
533
534	INIT_LIST_HEAD(&splice);
535
536	mutex_lock(&root->fs_info->ordered_operations_mutex);
537	spin_lock(&root->fs_info->ordered_extent_lock);
538again:
539	list_splice_init(&root->fs_info->ordered_operations, &splice);
540
541	while (!list_empty(&splice)) {
542		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
543				   ordered_operations);
544
545		inode = &btrfs_inode->vfs_inode;
546
547		list_del_init(&btrfs_inode->ordered_operations);
548
549		/*
550		 * the inode may be getting freed (in sys_unlink path).
551		 */
552		inode = igrab(inode);
553
554		if (!wait && inode) {
555			list_add_tail(&BTRFS_I(inode)->ordered_operations,
556			      &root->fs_info->ordered_operations);
557		}
558		spin_unlock(&root->fs_info->ordered_extent_lock);
559
560		if (inode) {
561			if (wait)
562				btrfs_wait_ordered_range(inode, 0, (u64)-1);
563			else
564				filemap_flush(inode->i_mapping);
565			btrfs_add_delayed_iput(inode);
566		}
567
568		cond_resched();
569		spin_lock(&root->fs_info->ordered_extent_lock);
570	}
571	if (wait && !list_empty(&root->fs_info->ordered_operations))
572		goto again;
573
574	spin_unlock(&root->fs_info->ordered_extent_lock);
575	mutex_unlock(&root->fs_info->ordered_operations_mutex);
576
577	return 0;
578}
579
580/*
581 * Used to start IO or wait for a given ordered extent to finish.
582 *
583 * If wait is one, this effectively waits on page writeback for all the pages
584 * in the extent, and it waits on the io completion code to insert
585 * metadata into the btree corresponding to the extent
586 */
587void btrfs_start_ordered_extent(struct inode *inode,
588				       struct btrfs_ordered_extent *entry,
589				       int wait)
590{
591	u64 start = entry->file_offset;
592	u64 end = start + entry->len - 1;
593
594	trace_btrfs_ordered_extent_start(inode, entry);
595
596	/*
597	 * pages in the range can be dirty, clean or writeback.  We
598	 * start IO on any dirty ones so the wait doesn't stall waiting
599	 * for pdflush to find them
600	 */
601	if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
602		filemap_fdatawrite_range(inode->i_mapping, start, end);
603	if (wait) {
604		wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
605						 &entry->flags));
606	}
607}
608
609/*
610 * Used to wait on ordered extents across a large range of bytes.
611 */
612int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
613{
 
 
614	u64 end;
615	u64 orig_end;
616	struct btrfs_ordered_extent *ordered;
617	int found;
618
619	if (start + len < start) {
620		orig_end = INT_LIMIT(loff_t);
621	} else {
622		orig_end = start + len - 1;
623		if (orig_end > INT_LIMIT(loff_t))
624			orig_end = INT_LIMIT(loff_t);
625	}
626again:
627	/* start IO across the range first to instantiate any delalloc
628	 * extents
629	 */
630	filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
 
 
631
632	/* The compression code will leave pages locked but return from
633	 * writepage without setting the page writeback.  Starting again
634	 * with WB_SYNC_ALL will end up waiting for the IO to actually start.
 
 
 
635	 */
636	filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
637
638	filemap_fdatawait_range(inode->i_mapping, start, orig_end);
639
640	end = orig_end;
641	found = 0;
642	while (1) {
643		ordered = btrfs_lookup_first_ordered_extent(inode, end);
644		if (!ordered)
645			break;
646		if (ordered->file_offset > orig_end) {
647			btrfs_put_ordered_extent(ordered);
648			break;
649		}
650		if (ordered->file_offset + ordered->len < start) {
651			btrfs_put_ordered_extent(ordered);
652			break;
653		}
654		found++;
655		btrfs_start_ordered_extent(inode, ordered, 1);
656		end = ordered->file_offset;
 
 
657		btrfs_put_ordered_extent(ordered);
658		if (end == 0 || end == start)
659			break;
660		end--;
661	}
662	if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
663			   EXTENT_DELALLOC, 0, NULL)) {
664		schedule_timeout(1);
665		goto again;
666	}
667	return 0;
668}
669
670/*
671 * find an ordered extent corresponding to file_offset.  return NULL if
672 * nothing is found, otherwise take a reference on the extent and return it
673 */
674struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
675							 u64 file_offset)
676{
677	struct btrfs_ordered_inode_tree *tree;
678	struct rb_node *node;
679	struct btrfs_ordered_extent *entry = NULL;
680
681	tree = &BTRFS_I(inode)->ordered_tree;
682	spin_lock(&tree->lock);
683	node = tree_search(tree, file_offset);
684	if (!node)
685		goto out;
686
687	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
688	if (!offset_in_entry(entry, file_offset))
689		entry = NULL;
690	if (entry)
691		atomic_inc(&entry->refs);
692out:
693	spin_unlock(&tree->lock);
694	return entry;
695}
696
697/* Since the DIO code tries to lock a wide area we need to look for any ordered
698 * extents that exist in the range, rather than just the start of the range.
699 */
700struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
701							u64 file_offset,
702							u64 len)
703{
704	struct btrfs_ordered_inode_tree *tree;
705	struct rb_node *node;
706	struct btrfs_ordered_extent *entry = NULL;
707
708	tree = &BTRFS_I(inode)->ordered_tree;
709	spin_lock(&tree->lock);
710	node = tree_search(tree, file_offset);
711	if (!node) {
712		node = tree_search(tree, file_offset + len);
713		if (!node)
714			goto out;
715	}
716
717	while (1) {
718		entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
719		if (range_overlaps(entry, file_offset, len))
720			break;
721
722		if (entry->file_offset >= file_offset + len) {
723			entry = NULL;
724			break;
725		}
726		entry = NULL;
727		node = rb_next(node);
728		if (!node)
729			break;
730	}
731out:
732	if (entry)
733		atomic_inc(&entry->refs);
734	spin_unlock(&tree->lock);
735	return entry;
736}
737
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738/*
739 * lookup and return any extent before 'file_offset'.  NULL is returned
740 * if none is found
741 */
742struct btrfs_ordered_extent *
743btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
744{
745	struct btrfs_ordered_inode_tree *tree;
746	struct rb_node *node;
747	struct btrfs_ordered_extent *entry = NULL;
748
749	tree = &BTRFS_I(inode)->ordered_tree;
750	spin_lock(&tree->lock);
751	node = tree_search(tree, file_offset);
752	if (!node)
753		goto out;
754
755	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
756	atomic_inc(&entry->refs);
757out:
758	spin_unlock(&tree->lock);
759	return entry;
760}
761
762/*
763 * After an extent is done, call this to conditionally update the on disk
764 * i_size.  i_size is updated to cover any fully written part of the file.
765 */
766int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
767				struct btrfs_ordered_extent *ordered)
768{
769	struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
770	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
771	u64 disk_i_size;
772	u64 new_i_size;
773	u64 i_size_test;
774	u64 i_size = i_size_read(inode);
775	struct rb_node *node;
776	struct rb_node *prev = NULL;
777	struct btrfs_ordered_extent *test;
778	int ret = 1;
 
779
780	if (ordered)
 
781		offset = entry_end(ordered);
782	else
783		offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
784
785	spin_lock(&tree->lock);
 
 
 
786	disk_i_size = BTRFS_I(inode)->disk_i_size;
787
788	/* truncate file */
789	if (disk_i_size > i_size) {
790		BTRFS_I(inode)->disk_i_size = i_size;
791		ret = 0;
792		goto out;
793	}
794
795	/*
796	 * if the disk i_size is already at the inode->i_size, or
797	 * this ordered extent is inside the disk i_size, we're done
798	 */
799	if (disk_i_size == i_size || offset <= disk_i_size) {
800		goto out;
801	}
802
803	/*
804	 * we can't update the disk_isize if there are delalloc bytes
805	 * between disk_i_size and  this ordered extent
806	 */
807	if (test_range_bit(io_tree, disk_i_size, offset - 1,
808			   EXTENT_DELALLOC, 0, NULL)) {
809		goto out;
810	}
811	/*
812	 * walk backward from this ordered extent to disk_i_size.
813	 * if we find an ordered extent then we can't update disk i_size
814	 * yet
815	 */
816	if (ordered) {
817		node = rb_prev(&ordered->rb_node);
818	} else {
819		prev = tree_search(tree, offset);
820		/*
821		 * we insert file extents without involving ordered struct,
822		 * so there should be no ordered struct cover this offset
823		 */
824		if (prev) {
825			test = rb_entry(prev, struct btrfs_ordered_extent,
826					rb_node);
827			BUG_ON(offset_in_entry(test, offset));
828		}
829		node = prev;
830	}
831	while (node) {
832		test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
 
 
 
 
833		if (test->file_offset + test->len <= disk_i_size)
834			break;
835		if (test->file_offset >= i_size)
836			break;
837		if (test->file_offset >= disk_i_size)
 
 
 
 
 
 
 
 
 
 
 
 
838			goto out;
839		node = rb_prev(node);
840	}
841	new_i_size = min_t(u64, offset, i_size);
842
843	/*
844	 * at this point, we know we can safely update i_size to at least
845	 * the offset from this ordered extent.  But, we need to
846	 * walk forward and see if ios from higher up in the file have
847	 * finished.
848	 */
849	if (ordered) {
850		node = rb_next(&ordered->rb_node);
851	} else {
852		if (prev)
853			node = rb_next(prev);
854		else
855			node = rb_first(&tree->tree);
856	}
857	i_size_test = 0;
858	if (node) {
859		/*
860		 * do we have an area where IO might have finished
861		 * between our ordered extent and the next one.
862		 */
863		test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
864		if (test->file_offset > offset)
865			i_size_test = test->file_offset;
866	} else {
867		i_size_test = i_size;
868	}
869
870	/*
871	 * i_size_test is the end of a region after this ordered
872	 * extent where there are no ordered extents.  As long as there
873	 * are no delalloc bytes in this area, it is safe to update
874	 * disk_i_size to the end of the region.
875	 */
876	if (i_size_test > offset &&
877	    !test_range_bit(io_tree, offset, i_size_test - 1,
878			    EXTENT_DELALLOC, 0, NULL)) {
879		new_i_size = min_t(u64, i_size_test, i_size);
880	}
881	BTRFS_I(inode)->disk_i_size = new_i_size;
882	ret = 0;
883out:
884	/*
885	 * we need to remove the ordered extent with the tree lock held
886	 * so that other people calling this function don't find our fully
887	 * processed ordered entry and skip updating the i_size
 
 
888	 */
889	if (ordered)
890		__btrfs_remove_ordered_extent(inode, ordered);
891	spin_unlock(&tree->lock);
892	if (ordered)
893		wake_up(&ordered->wait);
894	return ret;
895}
896
897/*
898 * search the ordered extents for one corresponding to 'offset' and
899 * try to find a checksum.  This is used because we allow pages to
900 * be reclaimed before their checksum is actually put into the btree
901 */
902int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
903			   u32 *sum)
904{
905	struct btrfs_ordered_sum *ordered_sum;
906	struct btrfs_sector_sum *sector_sums;
907	struct btrfs_ordered_extent *ordered;
908	struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
909	unsigned long num_sectors;
910	unsigned long i;
911	u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
912	int ret = 1;
913
914	ordered = btrfs_lookup_ordered_extent(inode, offset);
915	if (!ordered)
916		return 1;
917
918	spin_lock(&tree->lock);
919	list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
920		if (disk_bytenr >= ordered_sum->bytenr) {
921			num_sectors = ordered_sum->len / sectorsize;
922			sector_sums = ordered_sum->sums;
923			for (i = 0; i < num_sectors; i++) {
924				if (sector_sums[i].bytenr == disk_bytenr) {
925					*sum = sector_sums[i].sum;
926					ret = 0;
927					goto out;
928				}
929			}
 
 
 
 
930		}
931	}
932out:
933	spin_unlock(&tree->lock);
934	btrfs_put_ordered_extent(ordered);
935	return ret;
936}
937
938
939/*
940 * add a given inode to the list of inodes that must be fully on
941 * disk before a transaction commit finishes.
942 *
943 * This basically gives us the ext3 style data=ordered mode, and it is mostly
944 * used to make sure renamed files are fully on disk.
945 *
946 * It is a noop if the inode is already fully on disk.
947 *
948 * If trans is not null, we'll do a friendly check for a transaction that
949 * is already flushing things and force the IO down ourselves.
950 */
951int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
952				struct btrfs_root *root,
953				struct inode *inode)
954{
955	u64 last_mod;
956
957	last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
958
959	/*
960	 * if this file hasn't been changed since the last transaction
961	 * commit, we can safely return without doing anything
962	 */
963	if (last_mod < root->fs_info->last_trans_committed)
964		return 0;
965
966	/*
967	 * the transaction is already committing.  Just start the IO and
968	 * don't bother with all of this list nonsense
969	 */
970	if (trans && root->fs_info->running_transaction->blocked) {
971		btrfs_wait_ordered_range(inode, 0, (u64)-1);
972		return 0;
973	}
974
975	spin_lock(&root->fs_info->ordered_extent_lock);
976	if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
977		list_add_tail(&BTRFS_I(inode)->ordered_operations,
978			      &root->fs_info->ordered_operations);
979	}
980	spin_unlock(&root->fs_info->ordered_extent_lock);
981
982	return 0;
 
 
 
 
 
983}
v4.10.11
   1/*
   2 * Copyright (C) 2007 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/slab.h>
  20#include <linux/blkdev.h>
  21#include <linux/writeback.h>
  22#include <linux/pagevec.h>
  23#include "ctree.h"
  24#include "transaction.h"
  25#include "btrfs_inode.h"
  26#include "extent_io.h"
  27#include "disk-io.h"
  28#include "compression.h"
  29
  30static struct kmem_cache *btrfs_ordered_extent_cache;
  31
  32static u64 entry_end(struct btrfs_ordered_extent *entry)
  33{
  34	if (entry->file_offset + entry->len < entry->file_offset)
  35		return (u64)-1;
  36	return entry->file_offset + entry->len;
  37}
  38
  39/* returns NULL if the insertion worked, or it returns the node it did find
  40 * in the tree
  41 */
  42static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
  43				   struct rb_node *node)
  44{
  45	struct rb_node **p = &root->rb_node;
  46	struct rb_node *parent = NULL;
  47	struct btrfs_ordered_extent *entry;
  48
  49	while (*p) {
  50		parent = *p;
  51		entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
  52
  53		if (file_offset < entry->file_offset)
  54			p = &(*p)->rb_left;
  55		else if (file_offset >= entry_end(entry))
  56			p = &(*p)->rb_right;
  57		else
  58			return parent;
  59	}
  60
  61	rb_link_node(node, parent, p);
  62	rb_insert_color(node, root);
  63	return NULL;
  64}
  65
  66static void ordered_data_tree_panic(struct inode *inode, int errno,
  67					       u64 offset)
  68{
  69	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
  70	btrfs_panic(fs_info, errno,
  71		    "Inconsistency in ordered tree at offset %llu", offset);
  72}
  73
  74/*
  75 * look for a given offset in the tree, and if it can't be found return the
  76 * first lesser offset
  77 */
  78static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
  79				     struct rb_node **prev_ret)
  80{
  81	struct rb_node *n = root->rb_node;
  82	struct rb_node *prev = NULL;
  83	struct rb_node *test;
  84	struct btrfs_ordered_extent *entry;
  85	struct btrfs_ordered_extent *prev_entry = NULL;
  86
  87	while (n) {
  88		entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
  89		prev = n;
  90		prev_entry = entry;
  91
  92		if (file_offset < entry->file_offset)
  93			n = n->rb_left;
  94		else if (file_offset >= entry_end(entry))
  95			n = n->rb_right;
  96		else
  97			return n;
  98	}
  99	if (!prev_ret)
 100		return NULL;
 101
 102	while (prev && file_offset >= entry_end(prev_entry)) {
 103		test = rb_next(prev);
 104		if (!test)
 105			break;
 106		prev_entry = rb_entry(test, struct btrfs_ordered_extent,
 107				      rb_node);
 108		if (file_offset < entry_end(prev_entry))
 109			break;
 110
 111		prev = test;
 112	}
 113	if (prev)
 114		prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
 115				      rb_node);
 116	while (prev && file_offset < entry_end(prev_entry)) {
 117		test = rb_prev(prev);
 118		if (!test)
 119			break;
 120		prev_entry = rb_entry(test, struct btrfs_ordered_extent,
 121				      rb_node);
 122		prev = test;
 123	}
 124	*prev_ret = prev;
 125	return NULL;
 126}
 127
 128/*
 129 * helper to check if a given offset is inside a given entry
 130 */
 131static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
 132{
 133	if (file_offset < entry->file_offset ||
 134	    entry->file_offset + entry->len <= file_offset)
 135		return 0;
 136	return 1;
 137}
 138
 139static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
 140			  u64 len)
 141{
 142	if (file_offset + len <= entry->file_offset ||
 143	    entry->file_offset + entry->len <= file_offset)
 144		return 0;
 145	return 1;
 146}
 147
 148/*
 149 * look find the first ordered struct that has this offset, otherwise
 150 * the first one less than this offset
 151 */
 152static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
 153					  u64 file_offset)
 154{
 155	struct rb_root *root = &tree->tree;
 156	struct rb_node *prev = NULL;
 157	struct rb_node *ret;
 158	struct btrfs_ordered_extent *entry;
 159
 160	if (tree->last) {
 161		entry = rb_entry(tree->last, struct btrfs_ordered_extent,
 162				 rb_node);
 163		if (offset_in_entry(entry, file_offset))
 164			return tree->last;
 165	}
 166	ret = __tree_search(root, file_offset, &prev);
 167	if (!ret)
 168		ret = prev;
 169	if (ret)
 170		tree->last = ret;
 171	return ret;
 172}
 173
 174/* allocate and add a new ordered_extent into the per-inode tree.
 175 * file_offset is the logical offset in the file
 176 *
 177 * start is the disk block number of an extent already reserved in the
 178 * extent allocation tree
 179 *
 180 * len is the length of the extent
 181 *
 182 * The tree is given a single reference on the ordered extent that was
 183 * inserted.
 184 */
 185static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 186				      u64 start, u64 len, u64 disk_len,
 187				      int type, int dio, int compress_type)
 188{
 189	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 190	struct btrfs_root *root = BTRFS_I(inode)->root;
 191	struct btrfs_ordered_inode_tree *tree;
 192	struct rb_node *node;
 193	struct btrfs_ordered_extent *entry;
 194
 195	tree = &BTRFS_I(inode)->ordered_tree;
 196	entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
 197	if (!entry)
 198		return -ENOMEM;
 199
 200	entry->file_offset = file_offset;
 201	entry->start = start;
 202	entry->len = len;
 203	entry->disk_len = disk_len;
 204	entry->bytes_left = len;
 205	entry->inode = igrab(inode);
 206	entry->compress_type = compress_type;
 207	entry->truncated_len = (u64)-1;
 208	if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
 209		set_bit(type, &entry->flags);
 210
 211	if (dio)
 212		set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
 213
 214	/* one ref for the tree */
 215	atomic_set(&entry->refs, 1);
 216	init_waitqueue_head(&entry->wait);
 217	INIT_LIST_HEAD(&entry->list);
 218	INIT_LIST_HEAD(&entry->root_extent_list);
 219	INIT_LIST_HEAD(&entry->work_list);
 220	init_completion(&entry->completion);
 221	INIT_LIST_HEAD(&entry->log_list);
 222	INIT_LIST_HEAD(&entry->trans_list);
 223
 224	trace_btrfs_ordered_extent_add(inode, entry);
 225
 226	spin_lock_irq(&tree->lock);
 227	node = tree_insert(&tree->tree, file_offset,
 228			   &entry->rb_node);
 229	if (node)
 230		ordered_data_tree_panic(inode, -EEXIST, file_offset);
 231	spin_unlock_irq(&tree->lock);
 232
 233	spin_lock(&root->ordered_extent_lock);
 234	list_add_tail(&entry->root_extent_list,
 235		      &root->ordered_extents);
 236	root->nr_ordered_extents++;
 237	if (root->nr_ordered_extents == 1) {
 238		spin_lock(&fs_info->ordered_root_lock);
 239		BUG_ON(!list_empty(&root->ordered_root));
 240		list_add_tail(&root->ordered_root, &fs_info->ordered_roots);
 241		spin_unlock(&fs_info->ordered_root_lock);
 242	}
 243	spin_unlock(&root->ordered_extent_lock);
 244
 
 245	return 0;
 246}
 247
 248int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 249			     u64 start, u64 len, u64 disk_len, int type)
 250{
 251	return __btrfs_add_ordered_extent(inode, file_offset, start, len,
 252					  disk_len, type, 0,
 253					  BTRFS_COMPRESS_NONE);
 254}
 255
 256int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
 257				 u64 start, u64 len, u64 disk_len, int type)
 258{
 259	return __btrfs_add_ordered_extent(inode, file_offset, start, len,
 260					  disk_len, type, 1,
 261					  BTRFS_COMPRESS_NONE);
 262}
 263
 264int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
 265				      u64 start, u64 len, u64 disk_len,
 266				      int type, int compress_type)
 267{
 268	return __btrfs_add_ordered_extent(inode, file_offset, start, len,
 269					  disk_len, type, 0,
 270					  compress_type);
 271}
 272
 273/*
 274 * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
 275 * when an ordered extent is finished.  If the list covers more than one
 276 * ordered extent, it is split across multiples.
 277 */
 278void btrfs_add_ordered_sum(struct inode *inode,
 279			   struct btrfs_ordered_extent *entry,
 280			   struct btrfs_ordered_sum *sum)
 281{
 282	struct btrfs_ordered_inode_tree *tree;
 283
 284	tree = &BTRFS_I(inode)->ordered_tree;
 285	spin_lock_irq(&tree->lock);
 286	list_add_tail(&sum->list, &entry->list);
 287	spin_unlock_irq(&tree->lock);
 
 288}
 289
 290/*
 291 * this is used to account for finished IO across a given range
 292 * of the file.  The IO may span ordered extents.  If
 293 * a given ordered_extent is completely done, 1 is returned, otherwise
 294 * 0.
 295 *
 296 * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
 297 * to make sure this function only returns 1 once for a given ordered extent.
 298 *
 299 * file_offset is updated to one byte past the range that is recorded as
 300 * complete.  This allows you to walk forward in the file.
 301 */
 302int btrfs_dec_test_first_ordered_pending(struct inode *inode,
 303				   struct btrfs_ordered_extent **cached,
 304				   u64 *file_offset, u64 io_size, int uptodate)
 305{
 306	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 307	struct btrfs_ordered_inode_tree *tree;
 308	struct rb_node *node;
 309	struct btrfs_ordered_extent *entry = NULL;
 310	int ret;
 311	unsigned long flags;
 312	u64 dec_end;
 313	u64 dec_start;
 314	u64 to_dec;
 315
 316	tree = &BTRFS_I(inode)->ordered_tree;
 317	spin_lock_irqsave(&tree->lock, flags);
 318	node = tree_search(tree, *file_offset);
 319	if (!node) {
 320		ret = 1;
 321		goto out;
 322	}
 323
 324	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
 325	if (!offset_in_entry(entry, *file_offset)) {
 326		ret = 1;
 327		goto out;
 328	}
 329
 330	dec_start = max(*file_offset, entry->file_offset);
 331	dec_end = min(*file_offset + io_size, entry->file_offset +
 332		      entry->len);
 333	*file_offset = dec_end;
 334	if (dec_start > dec_end) {
 335		btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu",
 336			   dec_start, dec_end);
 
 337	}
 338	to_dec = dec_end - dec_start;
 339	if (to_dec > entry->bytes_left) {
 340		btrfs_crit(fs_info,
 341			   "bad ordered accounting left %llu size %llu",
 342			   entry->bytes_left, to_dec);
 343	}
 344	entry->bytes_left -= to_dec;
 345	if (!uptodate)
 346		set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
 347
 348	if (entry->bytes_left == 0) {
 349		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
 350		/*
 351		 * Implicit memory barrier after test_and_set_bit
 352		 */
 353		if (waitqueue_active(&entry->wait))
 354			wake_up(&entry->wait);
 355	} else {
 356		ret = 1;
 357	}
 358out:
 359	if (!ret && cached && entry) {
 360		*cached = entry;
 361		atomic_inc(&entry->refs);
 362	}
 363	spin_unlock_irqrestore(&tree->lock, flags);
 364	return ret == 0;
 365}
 366
 367/*
 368 * this is used to account for finished IO across a given range
 369 * of the file.  The IO should not span ordered extents.  If
 370 * a given ordered_extent is completely done, 1 is returned, otherwise
 371 * 0.
 372 *
 373 * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
 374 * to make sure this function only returns 1 once for a given ordered extent.
 375 */
 376int btrfs_dec_test_ordered_pending(struct inode *inode,
 377				   struct btrfs_ordered_extent **cached,
 378				   u64 file_offset, u64 io_size, int uptodate)
 379{
 380	struct btrfs_ordered_inode_tree *tree;
 381	struct rb_node *node;
 382	struct btrfs_ordered_extent *entry = NULL;
 383	unsigned long flags;
 384	int ret;
 385
 386	tree = &BTRFS_I(inode)->ordered_tree;
 387	spin_lock_irqsave(&tree->lock, flags);
 388	if (cached && *cached) {
 389		entry = *cached;
 390		goto have_entry;
 391	}
 392
 393	node = tree_search(tree, file_offset);
 394	if (!node) {
 395		ret = 1;
 396		goto out;
 397	}
 398
 399	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
 400have_entry:
 401	if (!offset_in_entry(entry, file_offset)) {
 402		ret = 1;
 403		goto out;
 404	}
 405
 406	if (io_size > entry->bytes_left) {
 407		btrfs_crit(BTRFS_I(inode)->root->fs_info,
 408			   "bad ordered accounting left %llu size %llu",
 409		       entry->bytes_left, io_size);
 410	}
 411	entry->bytes_left -= io_size;
 412	if (!uptodate)
 413		set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
 414
 415	if (entry->bytes_left == 0) {
 416		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
 417		/*
 418		 * Implicit memory barrier after test_and_set_bit
 419		 */
 420		if (waitqueue_active(&entry->wait))
 421			wake_up(&entry->wait);
 422	} else {
 423		ret = 1;
 424	}
 425out:
 426	if (!ret && cached && entry) {
 427		*cached = entry;
 428		atomic_inc(&entry->refs);
 429	}
 430	spin_unlock_irqrestore(&tree->lock, flags);
 431	return ret == 0;
 432}
 433
 434/* Needs to either be called under a log transaction or the log_mutex */
 435void btrfs_get_logged_extents(struct inode *inode,
 436			      struct list_head *logged_list,
 437			      const loff_t start,
 438			      const loff_t end)
 439{
 440	struct btrfs_ordered_inode_tree *tree;
 441	struct btrfs_ordered_extent *ordered;
 442	struct rb_node *n;
 443	struct rb_node *prev;
 444
 445	tree = &BTRFS_I(inode)->ordered_tree;
 446	spin_lock_irq(&tree->lock);
 447	n = __tree_search(&tree->tree, end, &prev);
 448	if (!n)
 449		n = prev;
 450	for (; n; n = rb_prev(n)) {
 451		ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
 452		if (ordered->file_offset > end)
 453			continue;
 454		if (entry_end(ordered) <= start)
 455			break;
 456		if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
 457			continue;
 458		list_add(&ordered->log_list, logged_list);
 459		atomic_inc(&ordered->refs);
 460	}
 461	spin_unlock_irq(&tree->lock);
 462}
 463
 464void btrfs_put_logged_extents(struct list_head *logged_list)
 465{
 466	struct btrfs_ordered_extent *ordered;
 467
 468	while (!list_empty(logged_list)) {
 469		ordered = list_first_entry(logged_list,
 470					   struct btrfs_ordered_extent,
 471					   log_list);
 472		list_del_init(&ordered->log_list);
 473		btrfs_put_ordered_extent(ordered);
 474	}
 475}
 476
 477void btrfs_submit_logged_extents(struct list_head *logged_list,
 478				 struct btrfs_root *log)
 479{
 480	int index = log->log_transid % 2;
 481
 482	spin_lock_irq(&log->log_extents_lock[index]);
 483	list_splice_tail(logged_list, &log->logged_list[index]);
 484	spin_unlock_irq(&log->log_extents_lock[index]);
 485}
 486
 487void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
 488			       struct btrfs_root *log, u64 transid)
 489{
 490	struct btrfs_ordered_extent *ordered;
 491	int index = transid % 2;
 492
 493	spin_lock_irq(&log->log_extents_lock[index]);
 494	while (!list_empty(&log->logged_list[index])) {
 495		struct inode *inode;
 496		ordered = list_first_entry(&log->logged_list[index],
 497					   struct btrfs_ordered_extent,
 498					   log_list);
 499		list_del_init(&ordered->log_list);
 500		inode = ordered->inode;
 501		spin_unlock_irq(&log->log_extents_lock[index]);
 502
 503		if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
 504		    !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
 505			u64 start = ordered->file_offset;
 506			u64 end = ordered->file_offset + ordered->len - 1;
 507
 508			WARN_ON(!inode);
 509			filemap_fdatawrite_range(inode->i_mapping, start, end);
 510		}
 511		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
 512						   &ordered->flags));
 513
 514		/*
 515		 * In order to keep us from losing our ordered extent
 516		 * information when committing the transaction we have to make
 517		 * sure that any logged extents are completed when we go to
 518		 * commit the transaction.  To do this we simply increase the
 519		 * current transactions pending_ordered counter and decrement it
 520		 * when the ordered extent completes.
 521		 */
 522		if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
 523			struct btrfs_ordered_inode_tree *tree;
 524
 525			tree = &BTRFS_I(inode)->ordered_tree;
 526			spin_lock_irq(&tree->lock);
 527			if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
 528				set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
 529				atomic_inc(&trans->transaction->pending_ordered);
 530			}
 531			spin_unlock_irq(&tree->lock);
 532		}
 533		btrfs_put_ordered_extent(ordered);
 534		spin_lock_irq(&log->log_extents_lock[index]);
 535	}
 536	spin_unlock_irq(&log->log_extents_lock[index]);
 537}
 538
 539void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
 540{
 541	struct btrfs_ordered_extent *ordered;
 542	int index = transid % 2;
 543
 544	spin_lock_irq(&log->log_extents_lock[index]);
 545	while (!list_empty(&log->logged_list[index])) {
 546		ordered = list_first_entry(&log->logged_list[index],
 547					   struct btrfs_ordered_extent,
 548					   log_list);
 549		list_del_init(&ordered->log_list);
 550		spin_unlock_irq(&log->log_extents_lock[index]);
 551		btrfs_put_ordered_extent(ordered);
 552		spin_lock_irq(&log->log_extents_lock[index]);
 553	}
 554	spin_unlock_irq(&log->log_extents_lock[index]);
 555}
 556
 557/*
 558 * used to drop a reference on an ordered extent.  This will free
 559 * the extent if the last reference is dropped
 560 */
 561void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
 562{
 563	struct list_head *cur;
 564	struct btrfs_ordered_sum *sum;
 565
 566	trace_btrfs_ordered_extent_put(entry->inode, entry);
 567
 568	if (atomic_dec_and_test(&entry->refs)) {
 569		ASSERT(list_empty(&entry->log_list));
 570		ASSERT(list_empty(&entry->trans_list));
 571		ASSERT(list_empty(&entry->root_extent_list));
 572		ASSERT(RB_EMPTY_NODE(&entry->rb_node));
 573		if (entry->inode)
 574			btrfs_add_delayed_iput(entry->inode);
 575		while (!list_empty(&entry->list)) {
 576			cur = entry->list.next;
 577			sum = list_entry(cur, struct btrfs_ordered_sum, list);
 578			list_del(&sum->list);
 579			kfree(sum);
 580		}
 581		kmem_cache_free(btrfs_ordered_extent_cache, entry);
 582	}
 
 583}
 584
 585/*
 586 * remove an ordered extent from the tree.  No references are dropped
 587 * and waiters are woken up.
 
 588 */
 589void btrfs_remove_ordered_extent(struct inode *inode,
 590				 struct btrfs_ordered_extent *entry)
 591{
 592	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 593	struct btrfs_ordered_inode_tree *tree;
 594	struct btrfs_root *root = BTRFS_I(inode)->root;
 595	struct rb_node *node;
 596	bool dec_pending_ordered = false;
 597
 598	tree = &BTRFS_I(inode)->ordered_tree;
 599	spin_lock_irq(&tree->lock);
 600	node = &entry->rb_node;
 601	rb_erase(node, &tree->tree);
 602	RB_CLEAR_NODE(node);
 603	if (tree->last == node)
 604		tree->last = NULL;
 605	set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
 606	if (test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags))
 607		dec_pending_ordered = true;
 608	spin_unlock_irq(&tree->lock);
 609
 610	/*
 611	 * The current running transaction is waiting on us, we need to let it
 612	 * know that we're complete and wake it up.
 613	 */
 614	if (dec_pending_ordered) {
 615		struct btrfs_transaction *trans;
 616
 617		/*
 618		 * The checks for trans are just a formality, it should be set,
 619		 * but if it isn't we don't want to deref/assert under the spin
 620		 * lock, so be nice and check if trans is set, but ASSERT() so
 621		 * if it isn't set a developer will notice.
 622		 */
 623		spin_lock(&fs_info->trans_lock);
 624		trans = fs_info->running_transaction;
 625		if (trans)
 626			atomic_inc(&trans->use_count);
 627		spin_unlock(&fs_info->trans_lock);
 628
 629		ASSERT(trans);
 630		if (trans) {
 631			if (atomic_dec_and_test(&trans->pending_ordered))
 632				wake_up(&trans->pending_wait);
 633			btrfs_put_transaction(trans);
 634		}
 635	}
 636
 637	spin_lock(&root->ordered_extent_lock);
 638	list_del_init(&entry->root_extent_list);
 639	root->nr_ordered_extents--;
 640
 641	trace_btrfs_ordered_extent_remove(inode, entry);
 642
 643	if (!root->nr_ordered_extents) {
 644		spin_lock(&fs_info->ordered_root_lock);
 645		BUG_ON(list_empty(&root->ordered_root));
 646		list_del_init(&root->ordered_root);
 647		spin_unlock(&fs_info->ordered_root_lock);
 
 
 
 648	}
 649	spin_unlock(&root->ordered_extent_lock);
 650	wake_up(&entry->wait);
 
 651}
 652
 653static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
 
 
 
 
 
 654{
 655	struct btrfs_ordered_extent *ordered;
 
 
 
 
 
 
 
 656
 657	ordered = container_of(work, struct btrfs_ordered_extent, flush_work);
 658	btrfs_start_ordered_extent(ordered->inode, ordered, 1);
 659	complete(&ordered->completion);
 660}
 661
 662/*
 663 * wait for all the ordered extents in a root.  This is done when balancing
 664 * space between drives.
 665 */
 666int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
 667			       const u64 range_start, const u64 range_len)
 668{
 669	struct btrfs_fs_info *fs_info = root->fs_info;
 670	LIST_HEAD(splice);
 671	LIST_HEAD(skipped);
 672	LIST_HEAD(works);
 673	struct btrfs_ordered_extent *ordered, *next;
 674	int count = 0;
 675	const u64 range_end = range_start + range_len;
 676
 677	mutex_lock(&root->ordered_extent_mutex);
 678	spin_lock(&root->ordered_extent_lock);
 679	list_splice_init(&root->ordered_extents, &splice);
 680	while (!list_empty(&splice) && nr) {
 681		ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
 682					   root_extent_list);
 683
 684		if (range_end <= ordered->start ||
 685		    ordered->start + ordered->disk_len <= range_start) {
 686			list_move_tail(&ordered->root_extent_list, &skipped);
 687			cond_resched_lock(&root->ordered_extent_lock);
 688			continue;
 689		}
 690
 691		list_move_tail(&ordered->root_extent_list,
 692			       &root->ordered_extents);
 693		atomic_inc(&ordered->refs);
 694		spin_unlock(&root->ordered_extent_lock);
 695
 696		btrfs_init_work(&ordered->flush_work,
 697				btrfs_flush_delalloc_helper,
 698				btrfs_run_ordered_extent_work, NULL, NULL);
 699		list_add_tail(&ordered->work_list, &works);
 700		btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
 
 
 
 
 
 
 
 
 
 
 
 
 701
 702		cond_resched();
 703		spin_lock(&root->ordered_extent_lock);
 704		if (nr != -1)
 705			nr--;
 706		count++;
 707	}
 708	list_splice_tail(&skipped, &root->ordered_extents);
 709	list_splice_tail(&splice, &root->ordered_extents);
 710	spin_unlock(&root->ordered_extent_lock);
 711
 712	list_for_each_entry_safe(ordered, next, &works, work_list) {
 713		list_del_init(&ordered->work_list);
 714		wait_for_completion(&ordered->completion);
 715		btrfs_put_ordered_extent(ordered);
 716		cond_resched();
 717	}
 718	mutex_unlock(&root->ordered_extent_mutex);
 719
 720	return count;
 721}
 722
 723int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
 724			      const u64 range_start, const u64 range_len)
 
 
 
 
 
 
 
 
 
 725{
 726	struct btrfs_root *root;
 
 727	struct list_head splice;
 728	int done;
 729	int total_done = 0;
 730
 731	INIT_LIST_HEAD(&splice);
 732
 733	mutex_lock(&fs_info->ordered_operations_mutex);
 734	spin_lock(&fs_info->ordered_root_lock);
 735	list_splice_init(&fs_info->ordered_roots, &splice);
 736	while (!list_empty(&splice) && nr) {
 737		root = list_first_entry(&splice, struct btrfs_root,
 738					ordered_root);
 739		root = btrfs_grab_fs_root(root);
 740		BUG_ON(!root);
 741		list_move_tail(&root->ordered_root,
 742			       &fs_info->ordered_roots);
 743		spin_unlock(&fs_info->ordered_root_lock);
 744
 745		done = btrfs_wait_ordered_extents(root, nr,
 746						  range_start, range_len);
 747		btrfs_put_fs_root(root);
 748		total_done += done;
 749
 750		spin_lock(&fs_info->ordered_root_lock);
 751		if (nr != -1) {
 752			nr -= done;
 753			WARN_ON(nr < 0);
 
 
 
 
 
 
 
 
 
 754		}
 
 
 
 755	}
 756	list_splice_tail(&splice, &fs_info->ordered_roots);
 757	spin_unlock(&fs_info->ordered_root_lock);
 758	mutex_unlock(&fs_info->ordered_operations_mutex);
 
 
 759
 760	return total_done;
 761}
 762
 763/*
 764 * Used to start IO or wait for a given ordered extent to finish.
 765 *
 766 * If wait is one, this effectively waits on page writeback for all the pages
 767 * in the extent, and it waits on the io completion code to insert
 768 * metadata into the btree corresponding to the extent
 769 */
 770void btrfs_start_ordered_extent(struct inode *inode,
 771				       struct btrfs_ordered_extent *entry,
 772				       int wait)
 773{
 774	u64 start = entry->file_offset;
 775	u64 end = start + entry->len - 1;
 776
 777	trace_btrfs_ordered_extent_start(inode, entry);
 778
 779	/*
 780	 * pages in the range can be dirty, clean or writeback.  We
 781	 * start IO on any dirty ones so the wait doesn't stall waiting
 782	 * for the flusher thread to find them
 783	 */
 784	if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
 785		filemap_fdatawrite_range(inode->i_mapping, start, end);
 786	if (wait) {
 787		wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
 788						 &entry->flags));
 789	}
 790}
 791
 792/*
 793 * Used to wait on ordered extents across a large range of bytes.
 794 */
 795int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 796{
 797	int ret = 0;
 798	int ret_wb = 0;
 799	u64 end;
 800	u64 orig_end;
 801	struct btrfs_ordered_extent *ordered;
 
 802
 803	if (start + len < start) {
 804		orig_end = INT_LIMIT(loff_t);
 805	} else {
 806		orig_end = start + len - 1;
 807		if (orig_end > INT_LIMIT(loff_t))
 808			orig_end = INT_LIMIT(loff_t);
 809	}
 810
 811	/* start IO across the range first to instantiate any delalloc
 812	 * extents
 813	 */
 814	ret = btrfs_fdatawrite_range(inode, start, orig_end);
 815	if (ret)
 816		return ret;
 817
 818	/*
 819	 * If we have a writeback error don't return immediately. Wait first
 820	 * for any ordered extents that haven't completed yet. This is to make
 821	 * sure no one can dirty the same page ranges and call writepages()
 822	 * before the ordered extents complete - to avoid failures (-EEXIST)
 823	 * when adding the new ordered extents to the ordered tree.
 824	 */
 825	ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
 
 
 826
 827	end = orig_end;
 
 828	while (1) {
 829		ordered = btrfs_lookup_first_ordered_extent(inode, end);
 830		if (!ordered)
 831			break;
 832		if (ordered->file_offset > orig_end) {
 833			btrfs_put_ordered_extent(ordered);
 834			break;
 835		}
 836		if (ordered->file_offset + ordered->len <= start) {
 837			btrfs_put_ordered_extent(ordered);
 838			break;
 839		}
 
 840		btrfs_start_ordered_extent(inode, ordered, 1);
 841		end = ordered->file_offset;
 842		if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
 843			ret = -EIO;
 844		btrfs_put_ordered_extent(ordered);
 845		if (ret || end == 0 || end == start)
 846			break;
 847		end--;
 848	}
 849	return ret_wb ? ret_wb : ret;
 
 
 
 
 
 850}
 851
 852/*
 853 * find an ordered extent corresponding to file_offset.  return NULL if
 854 * nothing is found, otherwise take a reference on the extent and return it
 855 */
 856struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
 857							 u64 file_offset)
 858{
 859	struct btrfs_ordered_inode_tree *tree;
 860	struct rb_node *node;
 861	struct btrfs_ordered_extent *entry = NULL;
 862
 863	tree = &BTRFS_I(inode)->ordered_tree;
 864	spin_lock_irq(&tree->lock);
 865	node = tree_search(tree, file_offset);
 866	if (!node)
 867		goto out;
 868
 869	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
 870	if (!offset_in_entry(entry, file_offset))
 871		entry = NULL;
 872	if (entry)
 873		atomic_inc(&entry->refs);
 874out:
 875	spin_unlock_irq(&tree->lock);
 876	return entry;
 877}
 878
 879/* Since the DIO code tries to lock a wide area we need to look for any ordered
 880 * extents that exist in the range, rather than just the start of the range.
 881 */
 882struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
 883							u64 file_offset,
 884							u64 len)
 885{
 886	struct btrfs_ordered_inode_tree *tree;
 887	struct rb_node *node;
 888	struct btrfs_ordered_extent *entry = NULL;
 889
 890	tree = &BTRFS_I(inode)->ordered_tree;
 891	spin_lock_irq(&tree->lock);
 892	node = tree_search(tree, file_offset);
 893	if (!node) {
 894		node = tree_search(tree, file_offset + len);
 895		if (!node)
 896			goto out;
 897	}
 898
 899	while (1) {
 900		entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
 901		if (range_overlaps(entry, file_offset, len))
 902			break;
 903
 904		if (entry->file_offset >= file_offset + len) {
 905			entry = NULL;
 906			break;
 907		}
 908		entry = NULL;
 909		node = rb_next(node);
 910		if (!node)
 911			break;
 912	}
 913out:
 914	if (entry)
 915		atomic_inc(&entry->refs);
 916	spin_unlock_irq(&tree->lock);
 917	return entry;
 918}
 919
 920bool btrfs_have_ordered_extents_in_range(struct inode *inode,
 921					 u64 file_offset,
 922					 u64 len)
 923{
 924	struct btrfs_ordered_extent *oe;
 925
 926	oe = btrfs_lookup_ordered_range(inode, file_offset, len);
 927	if (oe) {
 928		btrfs_put_ordered_extent(oe);
 929		return true;
 930	}
 931	return false;
 932}
 933
 934/*
 935 * lookup and return any extent before 'file_offset'.  NULL is returned
 936 * if none is found
 937 */
 938struct btrfs_ordered_extent *
 939btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
 940{
 941	struct btrfs_ordered_inode_tree *tree;
 942	struct rb_node *node;
 943	struct btrfs_ordered_extent *entry = NULL;
 944
 945	tree = &BTRFS_I(inode)->ordered_tree;
 946	spin_lock_irq(&tree->lock);
 947	node = tree_search(tree, file_offset);
 948	if (!node)
 949		goto out;
 950
 951	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
 952	atomic_inc(&entry->refs);
 953out:
 954	spin_unlock_irq(&tree->lock);
 955	return entry;
 956}
 957
 958/*
 959 * After an extent is done, call this to conditionally update the on disk
 960 * i_size.  i_size is updated to cover any fully written part of the file.
 961 */
 962int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 963				struct btrfs_ordered_extent *ordered)
 964{
 965	struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
 
 966	u64 disk_i_size;
 967	u64 new_i_size;
 
 968	u64 i_size = i_size_read(inode);
 969	struct rb_node *node;
 970	struct rb_node *prev = NULL;
 971	struct btrfs_ordered_extent *test;
 972	int ret = 1;
 973	u64 orig_offset = offset;
 974
 975	spin_lock_irq(&tree->lock);
 976	if (ordered) {
 977		offset = entry_end(ordered);
 978		if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags))
 979			offset = min(offset,
 980				     ordered->file_offset +
 981				     ordered->truncated_len);
 982	} else {
 983		offset = ALIGN(offset, btrfs_inode_sectorsize(inode));
 984	}
 985	disk_i_size = BTRFS_I(inode)->disk_i_size;
 986
 987	/* truncate file */
 988	if (disk_i_size > i_size) {
 989		BTRFS_I(inode)->disk_i_size = orig_offset;
 990		ret = 0;
 991		goto out;
 992	}
 993
 994	/*
 995	 * if the disk i_size is already at the inode->i_size, or
 996	 * this ordered extent is inside the disk i_size, we're done
 997	 */
 998	if (disk_i_size == i_size)
 999		goto out;
 
1000
1001	/*
1002	 * We still need to update disk_i_size if outstanding_isize is greater
1003	 * than disk_i_size.
1004	 */
1005	if (offset <= disk_i_size &&
1006	    (!ordered || ordered->outstanding_isize <= disk_i_size))
1007		goto out;
1008
1009	/*
1010	 * walk backward from this ordered extent to disk_i_size.
1011	 * if we find an ordered extent then we can't update disk i_size
1012	 * yet
1013	 */
1014	if (ordered) {
1015		node = rb_prev(&ordered->rb_node);
1016	} else {
1017		prev = tree_search(tree, offset);
1018		/*
1019		 * we insert file extents without involving ordered struct,
1020		 * so there should be no ordered struct cover this offset
1021		 */
1022		if (prev) {
1023			test = rb_entry(prev, struct btrfs_ordered_extent,
1024					rb_node);
1025			BUG_ON(offset_in_entry(test, offset));
1026		}
1027		node = prev;
1028	}
1029	for (; node; node = rb_prev(node)) {
1030		test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
1031
1032		/* We treat this entry as if it doesn't exist */
1033		if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
1034			continue;
1035		if (test->file_offset + test->len <= disk_i_size)
1036			break;
1037		if (test->file_offset >= i_size)
1038			break;
1039		if (entry_end(test) > disk_i_size) {
1040			/*
1041			 * we don't update disk_i_size now, so record this
1042			 * undealt i_size. Or we will not know the real
1043			 * i_size.
1044			 */
1045			if (test->outstanding_isize < offset)
1046				test->outstanding_isize = offset;
1047			if (ordered &&
1048			    ordered->outstanding_isize >
1049			    test->outstanding_isize)
1050				test->outstanding_isize =
1051						ordered->outstanding_isize;
1052			goto out;
1053		}
1054	}
1055	new_i_size = min_t(u64, offset, i_size);
1056
1057	/*
1058	 * Some ordered extents may completed before the current one, and
1059	 * we hold the real i_size in ->outstanding_isize.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1060	 */
1061	if (ordered && ordered->outstanding_isize > new_i_size)
1062		new_i_size = min_t(u64, ordered->outstanding_isize, i_size);
 
 
 
1063	BTRFS_I(inode)->disk_i_size = new_i_size;
1064	ret = 0;
1065out:
1066	/*
1067	 * We need to do this because we can't remove ordered extents until
1068	 * after the i_disk_size has been updated and then the inode has been
1069	 * updated to reflect the change, so we need to tell anybody who finds
1070	 * this ordered extent that we've already done all the real work, we
1071	 * just haven't completed all the other work.
1072	 */
1073	if (ordered)
1074		set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags);
1075	spin_unlock_irq(&tree->lock);
 
 
1076	return ret;
1077}
1078
1079/*
1080 * search the ordered extents for one corresponding to 'offset' and
1081 * try to find a checksum.  This is used because we allow pages to
1082 * be reclaimed before their checksum is actually put into the btree
1083 */
1084int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
1085			   u32 *sum, int len)
1086{
1087	struct btrfs_ordered_sum *ordered_sum;
 
1088	struct btrfs_ordered_extent *ordered;
1089	struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
1090	unsigned long num_sectors;
1091	unsigned long i;
1092	u32 sectorsize = btrfs_inode_sectorsize(inode);
1093	int index = 0;
1094
1095	ordered = btrfs_lookup_ordered_extent(inode, offset);
1096	if (!ordered)
1097		return 0;
1098
1099	spin_lock_irq(&tree->lock);
1100	list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
1101		if (disk_bytenr >= ordered_sum->bytenr &&
1102		    disk_bytenr < ordered_sum->bytenr + ordered_sum->len) {
1103			i = (disk_bytenr - ordered_sum->bytenr) >>
1104			    inode->i_sb->s_blocksize_bits;
1105			num_sectors = ordered_sum->len >>
1106				      inode->i_sb->s_blocksize_bits;
1107			num_sectors = min_t(int, len - index, num_sectors - i);
1108			memcpy(sum + index, ordered_sum->sums + i,
1109			       num_sectors);
1110
1111			index += (int)num_sectors;
1112			if (index == len)
1113				goto out;
1114			disk_bytenr += num_sectors * sectorsize;
1115		}
1116	}
1117out:
1118	spin_unlock_irq(&tree->lock);
1119	btrfs_put_ordered_extent(ordered);
1120	return index;
1121}
1122
1123int __init ordered_data_init(void)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1124{
1125	btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
1126				     sizeof(struct btrfs_ordered_extent), 0,
1127				     SLAB_MEM_SPREAD,
1128				     NULL);
1129	if (!btrfs_ordered_extent_cache)
1130		return -ENOMEM;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1131
1132	return 0;
1133}
1134
1135void ordered_data_exit(void)
1136{
1137	kmem_cache_destroy(btrfs_ordered_extent_cache);
1138}