Loading...
1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5#include <linux/time.h>
6#include <linux/fs.h>
7#include "reiserfs.h"
8#include "acl.h"
9#include "xattr.h"
10#include <linux/exportfs.h>
11#include <linux/pagemap.h>
12#include <linux/highmem.h>
13#include <linux/slab.h>
14#include <linux/uaccess.h>
15#include <asm/unaligned.h>
16#include <linux/buffer_head.h>
17#include <linux/mpage.h>
18#include <linux/writeback.h>
19#include <linux/quotaops.h>
20#include <linux/swap.h>
21#include <linux/uio.h>
22#include <linux/bio.h>
23
24int reiserfs_commit_write(struct file *f, struct page *page,
25 unsigned from, unsigned to);
26
27void reiserfs_evict_inode(struct inode *inode)
28{
29 /*
30 * We need blocks for transaction + (user+group) quota
31 * update (possibly delete)
32 */
33 int jbegin_count =
34 JOURNAL_PER_BALANCE_CNT * 2 +
35 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
36 struct reiserfs_transaction_handle th;
37 int err;
38
39 if (!inode->i_nlink && !is_bad_inode(inode))
40 dquot_initialize(inode);
41
42 truncate_inode_pages_final(&inode->i_data);
43 if (inode->i_nlink)
44 goto no_delete;
45
46 /*
47 * The = 0 happens when we abort creating a new inode
48 * for some reason like lack of space..
49 * also handles bad_inode case
50 */
51 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {
52
53 reiserfs_delete_xattrs(inode);
54
55 reiserfs_write_lock(inode->i_sb);
56
57 if (journal_begin(&th, inode->i_sb, jbegin_count))
58 goto out;
59 reiserfs_update_inode_transaction(inode);
60
61 reiserfs_discard_prealloc(&th, inode);
62
63 err = reiserfs_delete_object(&th, inode);
64
65 /*
66 * Do quota update inside a transaction for journaled quotas.
67 * We must do that after delete_object so that quota updates
68 * go into the same transaction as stat data deletion
69 */
70 if (!err) {
71 int depth = reiserfs_write_unlock_nested(inode->i_sb);
72 dquot_free_inode(inode);
73 reiserfs_write_lock_nested(inode->i_sb, depth);
74 }
75
76 if (journal_end(&th))
77 goto out;
78
79 /*
80 * check return value from reiserfs_delete_object after
81 * ending the transaction
82 */
83 if (err)
84 goto out;
85
86 /*
87 * all items of file are deleted, so we can remove
88 * "save" link
89 * we can't do anything about an error here
90 */
91 remove_save_link(inode, 0 /* not truncate */);
92out:
93 reiserfs_write_unlock(inode->i_sb);
94 } else {
95 /* no object items are in the tree */
96 ;
97 }
98
99 /* note this must go after the journal_end to prevent deadlock */
100 clear_inode(inode);
101
102 dquot_drop(inode);
103 inode->i_blocks = 0;
104 return;
105
106no_delete:
107 clear_inode(inode);
108 dquot_drop(inode);
109}
110
111static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
112 __u32 objectid, loff_t offset, int type, int length)
113{
114 key->version = version;
115
116 key->on_disk_key.k_dir_id = dirid;
117 key->on_disk_key.k_objectid = objectid;
118 set_cpu_key_k_offset(key, offset);
119 set_cpu_key_k_type(key, type);
120 key->key_length = length;
121}
122
123/*
124 * take base of inode_key (it comes from inode always) (dirid, objectid)
125 * and version from an inode, set offset and type of key
126 */
127void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
128 int type, int length)
129{
130 _make_cpu_key(key, get_inode_item_key_version(inode),
131 le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
132 le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
133 length);
134}
135
136/* when key is 0, do not set version and short key */
137inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
138 int version,
139 loff_t offset, int type, int length,
140 int entry_count /*or ih_free_space */ )
141{
142 if (key) {
143 ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
144 ih->ih_key.k_objectid =
145 cpu_to_le32(key->on_disk_key.k_objectid);
146 }
147 put_ih_version(ih, version);
148 set_le_ih_k_offset(ih, offset);
149 set_le_ih_k_type(ih, type);
150 put_ih_item_len(ih, length);
151 /* set_ih_free_space (ih, 0); */
152 /*
153 * for directory items it is entry count, for directs and stat
154 * datas - 0xffff, for indirects - 0
155 */
156 put_ih_entry_count(ih, entry_count);
157}
158
159/*
160 * FIXME: we might cache recently accessed indirect item
161 * Ugh. Not too eager for that....
162 * I cut the code until such time as I see a convincing argument (benchmark).
163 * I don't want a bloated inode struct..., and I don't like code complexity....
164 */
165
166/*
167 * cutting the code is fine, since it really isn't in use yet and is easy
168 * to add back in. But, Vladimir has a really good idea here. Think
169 * about what happens for reading a file. For each page,
170 * The VFS layer calls reiserfs_readpage, who searches the tree to find
171 * an indirect item. This indirect item has X number of pointers, where
172 * X is a big number if we've done the block allocation right. But,
173 * we only use one or two of these pointers during each call to readpage,
174 * needlessly researching again later on.
175 *
176 * The size of the cache could be dynamic based on the size of the file.
177 *
178 * I'd also like to see us cache the location the stat data item, since
179 * we are needlessly researching for that frequently.
180 *
181 * --chris
182 */
183
184/*
185 * If this page has a file tail in it, and
186 * it was read in by get_block_create_0, the page data is valid,
187 * but tail is still sitting in a direct item, and we can't write to
188 * it. So, look through this page, and check all the mapped buffers
189 * to make sure they have valid block numbers. Any that don't need
190 * to be unmapped, so that __block_write_begin will correctly call
191 * reiserfs_get_block to convert the tail into an unformatted node
192 */
193static inline void fix_tail_page_for_writing(struct page *page)
194{
195 struct buffer_head *head, *next, *bh;
196
197 if (page && page_has_buffers(page)) {
198 head = page_buffers(page);
199 bh = head;
200 do {
201 next = bh->b_this_page;
202 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
203 reiserfs_unmap_buffer(bh);
204 }
205 bh = next;
206 } while (bh != head);
207 }
208}
209
210/*
211 * reiserfs_get_block does not need to allocate a block only if it has been
212 * done already or non-hole position has been found in the indirect item
213 */
214static inline int allocation_needed(int retval, b_blocknr_t allocated,
215 struct item_head *ih,
216 __le32 * item, int pos_in_item)
217{
218 if (allocated)
219 return 0;
220 if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
221 get_block_num(item, pos_in_item))
222 return 0;
223 return 1;
224}
225
226static inline int indirect_item_found(int retval, struct item_head *ih)
227{
228 return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
229}
230
231static inline void set_block_dev_mapped(struct buffer_head *bh,
232 b_blocknr_t block, struct inode *inode)
233{
234 map_bh(bh, inode->i_sb, block);
235}
236
237/*
238 * files which were created in the earlier version can not be longer,
239 * than 2 gb
240 */
241static int file_capable(struct inode *inode, sector_t block)
242{
243 /* it is new file. */
244 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||
245 /* old file, but 'block' is inside of 2gb */
246 block < (1 << (31 - inode->i_sb->s_blocksize_bits)))
247 return 1;
248
249 return 0;
250}
251
252static int restart_transaction(struct reiserfs_transaction_handle *th,
253 struct inode *inode, struct treepath *path)
254{
255 struct super_block *s = th->t_super;
256 int err;
257
258 BUG_ON(!th->t_trans_id);
259 BUG_ON(!th->t_refcount);
260
261 pathrelse(path);
262
263 /* we cannot restart while nested */
264 if (th->t_refcount > 1) {
265 return 0;
266 }
267 reiserfs_update_sd(th, inode);
268 err = journal_end(th);
269 if (!err) {
270 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
271 if (!err)
272 reiserfs_update_inode_transaction(inode);
273 }
274 return err;
275}
276
277/*
278 * it is called by get_block when create == 0. Returns block number
279 * for 'block'-th logical block of file. When it hits direct item it
280 * returns 0 (being called from bmap) or read direct item into piece
281 * of page (bh_result)
282 * Please improve the english/clarity in the comment above, as it is
283 * hard to understand.
284 */
285static int _get_block_create_0(struct inode *inode, sector_t block,
286 struct buffer_head *bh_result, int args)
287{
288 INITIALIZE_PATH(path);
289 struct cpu_key key;
290 struct buffer_head *bh;
291 struct item_head *ih, tmp_ih;
292 b_blocknr_t blocknr;
293 char *p = NULL;
294 int chars;
295 int ret;
296 int result;
297 int done = 0;
298 unsigned long offset;
299
300 /* prepare the key to look for the 'block'-th block of file */
301 make_cpu_key(&key, inode,
302 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
303 3);
304
305 result = search_for_position_by_key(inode->i_sb, &key, &path);
306 if (result != POSITION_FOUND) {
307 pathrelse(&path);
308 if (p)
309 kunmap(bh_result->b_page);
310 if (result == IO_ERROR)
311 return -EIO;
312 /*
313 * We do not return -ENOENT if there is a hole but page is
314 * uptodate, because it means that there is some MMAPED data
315 * associated with it that is yet to be written to disk.
316 */
317 if ((args & GET_BLOCK_NO_HOLE)
318 && !PageUptodate(bh_result->b_page)) {
319 return -ENOENT;
320 }
321 return 0;
322 }
323
324 bh = get_last_bh(&path);
325 ih = tp_item_head(&path);
326 if (is_indirect_le_ih(ih)) {
327 __le32 *ind_item = (__le32 *) ih_item_body(bh, ih);
328
329 /*
330 * FIXME: here we could cache indirect item or part of it in
331 * the inode to avoid search_by_key in case of subsequent
332 * access to file
333 */
334 blocknr = get_block_num(ind_item, path.pos_in_item);
335 ret = 0;
336 if (blocknr) {
337 map_bh(bh_result, inode->i_sb, blocknr);
338 if (path.pos_in_item ==
339 ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
340 set_buffer_boundary(bh_result);
341 }
342 } else
343 /*
344 * We do not return -ENOENT if there is a hole but
345 * page is uptodate, because it means that there is
346 * some MMAPED data associated with it that is
347 * yet to be written to disk.
348 */
349 if ((args & GET_BLOCK_NO_HOLE)
350 && !PageUptodate(bh_result->b_page)) {
351 ret = -ENOENT;
352 }
353
354 pathrelse(&path);
355 if (p)
356 kunmap(bh_result->b_page);
357 return ret;
358 }
359 /* requested data are in direct item(s) */
360 if (!(args & GET_BLOCK_READ_DIRECT)) {
361 /*
362 * we are called by bmap. FIXME: we can not map block of file
363 * when it is stored in direct item(s)
364 */
365 pathrelse(&path);
366 if (p)
367 kunmap(bh_result->b_page);
368 return -ENOENT;
369 }
370
371 /*
372 * if we've got a direct item, and the buffer or page was uptodate,
373 * we don't want to pull data off disk again. skip to the
374 * end, where we map the buffer and return
375 */
376 if (buffer_uptodate(bh_result)) {
377 goto finished;
378 } else
379 /*
380 * grab_tail_page can trigger calls to reiserfs_get_block on
381 * up to date pages without any buffers. If the page is up
382 * to date, we don't want read old data off disk. Set the up
383 * to date bit on the buffer instead and jump to the end
384 */
385 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
386 set_buffer_uptodate(bh_result);
387 goto finished;
388 }
389 /* read file tail into part of page */
390 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_SIZE - 1);
391 copy_item_head(&tmp_ih, ih);
392
393 /*
394 * we only want to kmap if we are reading the tail into the page.
395 * this is not the common case, so we don't kmap until we are
396 * sure we need to. But, this means the item might move if
397 * kmap schedules
398 */
399 if (!p)
400 p = (char *)kmap(bh_result->b_page);
401
402 p += offset;
403 memset(p, 0, inode->i_sb->s_blocksize);
404 do {
405 if (!is_direct_le_ih(ih)) {
406 BUG();
407 }
408 /*
409 * make sure we don't read more bytes than actually exist in
410 * the file. This can happen in odd cases where i_size isn't
411 * correct, and when direct item padding results in a few
412 * extra bytes at the end of the direct item
413 */
414 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
415 break;
416 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
417 chars =
418 inode->i_size - (le_ih_k_offset(ih) - 1) -
419 path.pos_in_item;
420 done = 1;
421 } else {
422 chars = ih_item_len(ih) - path.pos_in_item;
423 }
424 memcpy(p, ih_item_body(bh, ih) + path.pos_in_item, chars);
425
426 if (done)
427 break;
428
429 p += chars;
430
431 /*
432 * we done, if read direct item is not the last item of
433 * node FIXME: we could try to check right delimiting key
434 * to see whether direct item continues in the right
435 * neighbor or rely on i_size
436 */
437 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
438 break;
439
440 /* update key to look for the next piece */
441 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
442 result = search_for_position_by_key(inode->i_sb, &key, &path);
443 if (result != POSITION_FOUND)
444 /* i/o error most likely */
445 break;
446 bh = get_last_bh(&path);
447 ih = tp_item_head(&path);
448 } while (1);
449
450 flush_dcache_page(bh_result->b_page);
451 kunmap(bh_result->b_page);
452
453finished:
454 pathrelse(&path);
455
456 if (result == IO_ERROR)
457 return -EIO;
458
459 /*
460 * this buffer has valid data, but isn't valid for io. mapping it to
461 * block #0 tells the rest of reiserfs it just has a tail in it
462 */
463 map_bh(bh_result, inode->i_sb, 0);
464 set_buffer_uptodate(bh_result);
465 return 0;
466}
467
468/*
469 * this is called to create file map. So, _get_block_create_0 will not
470 * read direct item
471 */
472static int reiserfs_bmap(struct inode *inode, sector_t block,
473 struct buffer_head *bh_result, int create)
474{
475 if (!file_capable(inode, block))
476 return -EFBIG;
477
478 reiserfs_write_lock(inode->i_sb);
479 /* do not read the direct item */
480 _get_block_create_0(inode, block, bh_result, 0);
481 reiserfs_write_unlock(inode->i_sb);
482 return 0;
483}
484
485/*
486 * special version of get_block that is only used by grab_tail_page right
487 * now. It is sent to __block_write_begin, and when you try to get a
488 * block past the end of the file (or a block from a hole) it returns
489 * -ENOENT instead of a valid buffer. __block_write_begin expects to
490 * be able to do i/o on the buffers returned, unless an error value
491 * is also returned.
492 *
493 * So, this allows __block_write_begin to be used for reading a single block
494 * in a page. Where it does not produce a valid page for holes, or past the
495 * end of the file. This turns out to be exactly what we need for reading
496 * tails for conversion.
497 *
498 * The point of the wrapper is forcing a certain value for create, even
499 * though the VFS layer is calling this function with create==1. If you
500 * don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
501 * don't use this function.
502*/
503static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
504 struct buffer_head *bh_result,
505 int create)
506{
507 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
508}
509
510/*
511 * This is special helper for reiserfs_get_block in case we are executing
512 * direct_IO request.
513 */
514static int reiserfs_get_blocks_direct_io(struct inode *inode,
515 sector_t iblock,
516 struct buffer_head *bh_result,
517 int create)
518{
519 int ret;
520
521 bh_result->b_page = NULL;
522
523 /*
524 * We set the b_size before reiserfs_get_block call since it is
525 * referenced in convert_tail_for_hole() that may be called from
526 * reiserfs_get_block()
527 */
528 bh_result->b_size = i_blocksize(inode);
529
530 ret = reiserfs_get_block(inode, iblock, bh_result,
531 create | GET_BLOCK_NO_DANGLE);
532 if (ret)
533 goto out;
534
535 /* don't allow direct io onto tail pages */
536 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
537 /*
538 * make sure future calls to the direct io funcs for this
539 * offset in the file fail by unmapping the buffer
540 */
541 clear_buffer_mapped(bh_result);
542 ret = -EINVAL;
543 }
544
545 /*
546 * Possible unpacked tail. Flush the data before pages have
547 * disappeared
548 */
549 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
550 int err;
551
552 reiserfs_write_lock(inode->i_sb);
553
554 err = reiserfs_commit_for_inode(inode);
555 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
556
557 reiserfs_write_unlock(inode->i_sb);
558
559 if (err < 0)
560 ret = err;
561 }
562out:
563 return ret;
564}
565
566/*
567 * helper function for when reiserfs_get_block is called for a hole
568 * but the file tail is still in a direct item
569 * bh_result is the buffer head for the hole
570 * tail_offset is the offset of the start of the tail in the file
571 *
572 * This calls prepare_write, which will start a new transaction
573 * you should not be in a transaction, or have any paths held when you
574 * call this.
575 */
576static int convert_tail_for_hole(struct inode *inode,
577 struct buffer_head *bh_result,
578 loff_t tail_offset)
579{
580 unsigned long index;
581 unsigned long tail_end;
582 unsigned long tail_start;
583 struct page *tail_page;
584 struct page *hole_page = bh_result->b_page;
585 int retval = 0;
586
587 if ((tail_offset & (bh_result->b_size - 1)) != 1)
588 return -EIO;
589
590 /* always try to read until the end of the block */
591 tail_start = tail_offset & (PAGE_SIZE - 1);
592 tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
593
594 index = tail_offset >> PAGE_SHIFT;
595 /*
596 * hole_page can be zero in case of direct_io, we are sure
597 * that we cannot get here if we write with O_DIRECT into tail page
598 */
599 if (!hole_page || index != hole_page->index) {
600 tail_page = grab_cache_page(inode->i_mapping, index);
601 retval = -ENOMEM;
602 if (!tail_page) {
603 goto out;
604 }
605 } else {
606 tail_page = hole_page;
607 }
608
609 /*
610 * we don't have to make sure the conversion did not happen while
611 * we were locking the page because anyone that could convert
612 * must first take i_mutex.
613 *
614 * We must fix the tail page for writing because it might have buffers
615 * that are mapped, but have a block number of 0. This indicates tail
616 * data that has been read directly into the page, and
617 * __block_write_begin won't trigger a get_block in this case.
618 */
619 fix_tail_page_for_writing(tail_page);
620 retval = __reiserfs_write_begin(tail_page, tail_start,
621 tail_end - tail_start);
622 if (retval)
623 goto unlock;
624
625 /* tail conversion might change the data in the page */
626 flush_dcache_page(tail_page);
627
628 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
629
630unlock:
631 if (tail_page != hole_page) {
632 unlock_page(tail_page);
633 put_page(tail_page);
634 }
635out:
636 return retval;
637}
638
639static inline int _allocate_block(struct reiserfs_transaction_handle *th,
640 sector_t block,
641 struct inode *inode,
642 b_blocknr_t * allocated_block_nr,
643 struct treepath *path, int flags)
644{
645 BUG_ON(!th->t_trans_id);
646
647#ifdef REISERFS_PREALLOCATE
648 if (!(flags & GET_BLOCK_NO_IMUX)) {
649 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
650 path, block);
651 }
652#endif
653 return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
654 block);
655}
656
657int reiserfs_get_block(struct inode *inode, sector_t block,
658 struct buffer_head *bh_result, int create)
659{
660 int repeat, retval = 0;
661 /* b_blocknr_t is (unsigned) 32 bit int*/
662 b_blocknr_t allocated_block_nr = 0;
663 INITIALIZE_PATH(path);
664 int pos_in_item;
665 struct cpu_key key;
666 struct buffer_head *bh, *unbh = NULL;
667 struct item_head *ih, tmp_ih;
668 __le32 *item;
669 int done;
670 int fs_gen;
671 struct reiserfs_transaction_handle *th = NULL;
672 /*
673 * space reserved in transaction batch:
674 * . 3 balancings in direct->indirect conversion
675 * . 1 block involved into reiserfs_update_sd()
676 * XXX in practically impossible worst case direct2indirect()
677 * can incur (much) more than 3 balancings.
678 * quota update for user, group
679 */
680 int jbegin_count =
681 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
682 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
683 int version;
684 int dangle = 1;
685 loff_t new_offset =
686 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
687
688 reiserfs_write_lock(inode->i_sb);
689 version = get_inode_item_key_version(inode);
690
691 if (!file_capable(inode, block)) {
692 reiserfs_write_unlock(inode->i_sb);
693 return -EFBIG;
694 }
695
696 /*
697 * if !create, we aren't changing the FS, so we don't need to
698 * log anything, so we don't need to start a transaction
699 */
700 if (!(create & GET_BLOCK_CREATE)) {
701 int ret;
702 /* find number of block-th logical block of the file */
703 ret = _get_block_create_0(inode, block, bh_result,
704 create | GET_BLOCK_READ_DIRECT);
705 reiserfs_write_unlock(inode->i_sb);
706 return ret;
707 }
708
709 /*
710 * if we're already in a transaction, make sure to close
711 * any new transactions we start in this func
712 */
713 if ((create & GET_BLOCK_NO_DANGLE) ||
714 reiserfs_transaction_running(inode->i_sb))
715 dangle = 0;
716
717 /*
718 * If file is of such a size, that it might have a tail and
719 * tails are enabled we should mark it as possibly needing
720 * tail packing on close
721 */
722 if ((have_large_tails(inode->i_sb)
723 && inode->i_size < i_block_size(inode) * 4)
724 || (have_small_tails(inode->i_sb)
725 && inode->i_size < i_block_size(inode)))
726 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
727
728 /* set the key of the first byte in the 'block'-th block of file */
729 make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
730 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
731start_trans:
732 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
733 if (!th) {
734 retval = -ENOMEM;
735 goto failure;
736 }
737 reiserfs_update_inode_transaction(inode);
738 }
739research:
740
741 retval = search_for_position_by_key(inode->i_sb, &key, &path);
742 if (retval == IO_ERROR) {
743 retval = -EIO;
744 goto failure;
745 }
746
747 bh = get_last_bh(&path);
748 ih = tp_item_head(&path);
749 item = tp_item_body(&path);
750 pos_in_item = path.pos_in_item;
751
752 fs_gen = get_generation(inode->i_sb);
753 copy_item_head(&tmp_ih, ih);
754
755 if (allocation_needed
756 (retval, allocated_block_nr, ih, item, pos_in_item)) {
757 /* we have to allocate block for the unformatted node */
758 if (!th) {
759 pathrelse(&path);
760 goto start_trans;
761 }
762
763 repeat =
764 _allocate_block(th, block, inode, &allocated_block_nr,
765 &path, create);
766
767 /*
768 * restart the transaction to give the journal a chance to free
769 * some blocks. releases the path, so we have to go back to
770 * research if we succeed on the second try
771 */
772 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
773 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
774 retval = restart_transaction(th, inode, &path);
775 if (retval)
776 goto failure;
777 repeat =
778 _allocate_block(th, block, inode,
779 &allocated_block_nr, NULL, create);
780
781 if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
782 goto research;
783 }
784 if (repeat == QUOTA_EXCEEDED)
785 retval = -EDQUOT;
786 else
787 retval = -ENOSPC;
788 goto failure;
789 }
790
791 if (fs_changed(fs_gen, inode->i_sb)
792 && item_moved(&tmp_ih, &path)) {
793 goto research;
794 }
795 }
796
797 if (indirect_item_found(retval, ih)) {
798 b_blocknr_t unfm_ptr;
799 /*
800 * 'block'-th block is in the file already (there is
801 * corresponding cell in some indirect item). But it may be
802 * zero unformatted node pointer (hole)
803 */
804 unfm_ptr = get_block_num(item, pos_in_item);
805 if (unfm_ptr == 0) {
806 /* use allocated block to plug the hole */
807 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
808 if (fs_changed(fs_gen, inode->i_sb)
809 && item_moved(&tmp_ih, &path)) {
810 reiserfs_restore_prepared_buffer(inode->i_sb,
811 bh);
812 goto research;
813 }
814 set_buffer_new(bh_result);
815 if (buffer_dirty(bh_result)
816 && reiserfs_data_ordered(inode->i_sb))
817 reiserfs_add_ordered_list(inode, bh_result);
818 put_block_num(item, pos_in_item, allocated_block_nr);
819 unfm_ptr = allocated_block_nr;
820 journal_mark_dirty(th, bh);
821 reiserfs_update_sd(th, inode);
822 }
823 set_block_dev_mapped(bh_result, unfm_ptr, inode);
824 pathrelse(&path);
825 retval = 0;
826 if (!dangle && th)
827 retval = reiserfs_end_persistent_transaction(th);
828
829 reiserfs_write_unlock(inode->i_sb);
830
831 /*
832 * the item was found, so new blocks were not added to the file
833 * there is no need to make sure the inode is updated with this
834 * transaction
835 */
836 return retval;
837 }
838
839 if (!th) {
840 pathrelse(&path);
841 goto start_trans;
842 }
843
844 /*
845 * desired position is not found or is in the direct item. We have
846 * to append file with holes up to 'block'-th block converting
847 * direct items to indirect one if necessary
848 */
849 done = 0;
850 do {
851 if (is_statdata_le_ih(ih)) {
852 __le32 unp = 0;
853 struct cpu_key tmp_key;
854
855 /* indirect item has to be inserted */
856 make_le_item_head(&tmp_ih, &key, version, 1,
857 TYPE_INDIRECT, UNFM_P_SIZE,
858 0 /* free_space */ );
859
860 /*
861 * we are going to add 'block'-th block to the file.
862 * Use allocated block for that
863 */
864 if (cpu_key_k_offset(&key) == 1) {
865 unp = cpu_to_le32(allocated_block_nr);
866 set_block_dev_mapped(bh_result,
867 allocated_block_nr, inode);
868 set_buffer_new(bh_result);
869 done = 1;
870 }
871 tmp_key = key; /* ;) */
872 set_cpu_key_k_offset(&tmp_key, 1);
873 PATH_LAST_POSITION(&path)++;
874
875 retval =
876 reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
877 inode, (char *)&unp);
878 if (retval) {
879 reiserfs_free_block(th, inode,
880 allocated_block_nr, 1);
881 /*
882 * retval == -ENOSPC, -EDQUOT or -EIO
883 * or -EEXIST
884 */
885 goto failure;
886 }
887 } else if (is_direct_le_ih(ih)) {
888 /* direct item has to be converted */
889 loff_t tail_offset;
890
891 tail_offset =
892 ((le_ih_k_offset(ih) -
893 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
894
895 /*
896 * direct item we just found fits into block we have
897 * to map. Convert it into unformatted node: use
898 * bh_result for the conversion
899 */
900 if (tail_offset == cpu_key_k_offset(&key)) {
901 set_block_dev_mapped(bh_result,
902 allocated_block_nr, inode);
903 unbh = bh_result;
904 done = 1;
905 } else {
906 /*
907 * we have to pad file tail stored in direct
908 * item(s) up to block size and convert it
909 * to unformatted node. FIXME: this should
910 * also get into page cache
911 */
912
913 pathrelse(&path);
914 /*
915 * ugly, but we can only end the transaction if
916 * we aren't nested
917 */
918 BUG_ON(!th->t_refcount);
919 if (th->t_refcount == 1) {
920 retval =
921 reiserfs_end_persistent_transaction
922 (th);
923 th = NULL;
924 if (retval)
925 goto failure;
926 }
927
928 retval =
929 convert_tail_for_hole(inode, bh_result,
930 tail_offset);
931 if (retval) {
932 if (retval != -ENOSPC)
933 reiserfs_error(inode->i_sb,
934 "clm-6004",
935 "convert tail failed "
936 "inode %lu, error %d",
937 inode->i_ino,
938 retval);
939 if (allocated_block_nr) {
940 /*
941 * the bitmap, the super,
942 * and the stat data == 3
943 */
944 if (!th)
945 th = reiserfs_persistent_transaction(inode->i_sb, 3);
946 if (th)
947 reiserfs_free_block(th,
948 inode,
949 allocated_block_nr,
950 1);
951 }
952 goto failure;
953 }
954 goto research;
955 }
956 retval =
957 direct2indirect(th, inode, &path, unbh,
958 tail_offset);
959 if (retval) {
960 reiserfs_unmap_buffer(unbh);
961 reiserfs_free_block(th, inode,
962 allocated_block_nr, 1);
963 goto failure;
964 }
965 /*
966 * it is important the set_buffer_uptodate is done
967 * after the direct2indirect. The buffer might
968 * contain valid data newer than the data on disk
969 * (read by readpage, changed, and then sent here by
970 * writepage). direct2indirect needs to know if unbh
971 * was already up to date, so it can decide if the
972 * data in unbh needs to be replaced with data from
973 * the disk
974 */
975 set_buffer_uptodate(unbh);
976
977 /*
978 * unbh->b_page == NULL in case of DIRECT_IO request,
979 * this means buffer will disappear shortly, so it
980 * should not be added to
981 */
982 if (unbh->b_page) {
983 /*
984 * we've converted the tail, so we must
985 * flush unbh before the transaction commits
986 */
987 reiserfs_add_tail_list(inode, unbh);
988
989 /*
990 * mark it dirty now to prevent commit_write
991 * from adding this buffer to the inode's
992 * dirty buffer list
993 */
994 /*
995 * AKPM: changed __mark_buffer_dirty to
996 * mark_buffer_dirty(). It's still atomic,
997 * but it sets the page dirty too, which makes
998 * it eligible for writeback at any time by the
999 * VM (which was also the case with
1000 * __mark_buffer_dirty())
1001 */
1002 mark_buffer_dirty(unbh);
1003 }
1004 } else {
1005 /*
1006 * append indirect item with holes if needed, when
1007 * appending pointer to 'block'-th block use block,
1008 * which is already allocated
1009 */
1010 struct cpu_key tmp_key;
1011 /*
1012 * We use this in case we need to allocate
1013 * only one block which is a fastpath
1014 */
1015 unp_t unf_single = 0;
1016 unp_t *un;
1017 __u64 max_to_insert =
1018 MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
1019 UNFM_P_SIZE;
1020 __u64 blocks_needed;
1021
1022 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
1023 "vs-804: invalid position for append");
1024 /*
1025 * indirect item has to be appended,
1026 * set up key of that position
1027 * (key type is unimportant)
1028 */
1029 make_cpu_key(&tmp_key, inode,
1030 le_key_k_offset(version,
1031 &ih->ih_key) +
1032 op_bytes_number(ih,
1033 inode->i_sb->s_blocksize),
1034 TYPE_INDIRECT, 3);
1035
1036 RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
1037 "green-805: invalid offset");
1038 blocks_needed =
1039 1 +
1040 ((cpu_key_k_offset(&key) -
1041 cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
1042 s_blocksize_bits);
1043
1044 if (blocks_needed == 1) {
1045 un = &unf_single;
1046 } else {
1047 un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
1048 if (!un) {
1049 un = &unf_single;
1050 blocks_needed = 1;
1051 max_to_insert = 0;
1052 }
1053 }
1054 if (blocks_needed <= max_to_insert) {
1055 /*
1056 * we are going to add target block to
1057 * the file. Use allocated block for that
1058 */
1059 un[blocks_needed - 1] =
1060 cpu_to_le32(allocated_block_nr);
1061 set_block_dev_mapped(bh_result,
1062 allocated_block_nr, inode);
1063 set_buffer_new(bh_result);
1064 done = 1;
1065 } else {
1066 /* paste hole to the indirect item */
1067 /*
1068 * If kmalloc failed, max_to_insert becomes
1069 * zero and it means we only have space for
1070 * one block
1071 */
1072 blocks_needed =
1073 max_to_insert ? max_to_insert : 1;
1074 }
1075 retval =
1076 reiserfs_paste_into_item(th, &path, &tmp_key, inode,
1077 (char *)un,
1078 UNFM_P_SIZE *
1079 blocks_needed);
1080
1081 if (blocks_needed != 1)
1082 kfree(un);
1083
1084 if (retval) {
1085 reiserfs_free_block(th, inode,
1086 allocated_block_nr, 1);
1087 goto failure;
1088 }
1089 if (!done) {
1090 /*
1091 * We need to mark new file size in case
1092 * this function will be interrupted/aborted
1093 * later on. And we may do this only for
1094 * holes.
1095 */
1096 inode->i_size +=
1097 inode->i_sb->s_blocksize * blocks_needed;
1098 }
1099 }
1100
1101 if (done == 1)
1102 break;
1103
1104 /*
1105 * this loop could log more blocks than we had originally
1106 * asked for. So, we have to allow the transaction to end
1107 * if it is too big or too full. Update the inode so things
1108 * are consistent if we crash before the function returns
1109 * release the path so that anybody waiting on the path before
1110 * ending their transaction will be able to continue.
1111 */
1112 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1113 retval = restart_transaction(th, inode, &path);
1114 if (retval)
1115 goto failure;
1116 }
1117 /*
1118 * inserting indirect pointers for a hole can take a
1119 * long time. reschedule if needed and also release the write
1120 * lock for others.
1121 */
1122 reiserfs_cond_resched(inode->i_sb);
1123
1124 retval = search_for_position_by_key(inode->i_sb, &key, &path);
1125 if (retval == IO_ERROR) {
1126 retval = -EIO;
1127 goto failure;
1128 }
1129 if (retval == POSITION_FOUND) {
1130 reiserfs_warning(inode->i_sb, "vs-825",
1131 "%K should not be found", &key);
1132 retval = -EEXIST;
1133 if (allocated_block_nr)
1134 reiserfs_free_block(th, inode,
1135 allocated_block_nr, 1);
1136 pathrelse(&path);
1137 goto failure;
1138 }
1139 bh = get_last_bh(&path);
1140 ih = tp_item_head(&path);
1141 item = tp_item_body(&path);
1142 pos_in_item = path.pos_in_item;
1143 } while (1);
1144
1145 retval = 0;
1146
1147failure:
1148 if (th && (!dangle || (retval && !th->t_trans_id))) {
1149 int err;
1150 if (th->t_trans_id)
1151 reiserfs_update_sd(th, inode);
1152 err = reiserfs_end_persistent_transaction(th);
1153 if (err)
1154 retval = err;
1155 }
1156
1157 reiserfs_write_unlock(inode->i_sb);
1158 reiserfs_check_path(&path);
1159 return retval;
1160}
1161
1162static int
1163reiserfs_readpages(struct file *file, struct address_space *mapping,
1164 struct list_head *pages, unsigned nr_pages)
1165{
1166 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
1167}
1168
1169/*
1170 * Compute real number of used bytes by file
1171 * Following three functions can go away when we'll have enough space in
1172 * stat item
1173 */
1174static int real_space_diff(struct inode *inode, int sd_size)
1175{
1176 int bytes;
1177 loff_t blocksize = inode->i_sb->s_blocksize;
1178
1179 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1180 return sd_size;
1181
1182 /*
1183 * End of file is also in full block with indirect reference, so round
1184 * up to the next block.
1185 *
1186 * there is just no way to know if the tail is actually packed
1187 * on the file, so we have to assume it isn't. When we pack the
1188 * tail, we add 4 bytes to pretend there really is an unformatted
1189 * node pointer
1190 */
1191 bytes =
1192 ((inode->i_size +
1193 (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
1194 sd_size;
1195 return bytes;
1196}
1197
1198static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1199 int sd_size)
1200{
1201 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1202 return inode->i_size +
1203 (loff_t) (real_space_diff(inode, sd_size));
1204 }
1205 return ((loff_t) real_space_diff(inode, sd_size)) +
1206 (((loff_t) blocks) << 9);
1207}
1208
1209/* Compute number of blocks used by file in ReiserFS counting */
1210static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1211{
1212 loff_t bytes = inode_get_bytes(inode);
1213 loff_t real_space = real_space_diff(inode, sd_size);
1214
1215 /* keeps fsck and non-quota versions of reiserfs happy */
1216 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1217 bytes += (loff_t) 511;
1218 }
1219
1220 /*
1221 * files from before the quota patch might i_blocks such that
1222 * bytes < real_space. Deal with that here to prevent it from
1223 * going negative.
1224 */
1225 if (bytes < real_space)
1226 return 0;
1227 return (bytes - real_space) >> 9;
1228}
1229
1230/*
1231 * BAD: new directories have stat data of new type and all other items
1232 * of old type. Version stored in the inode says about body items, so
1233 * in update_stat_data we can not rely on inode, but have to check
1234 * item version directly
1235 */
1236
1237/* called by read_locked_inode */
1238static void init_inode(struct inode *inode, struct treepath *path)
1239{
1240 struct buffer_head *bh;
1241 struct item_head *ih;
1242 __u32 rdev;
1243
1244 bh = PATH_PLAST_BUFFER(path);
1245 ih = tp_item_head(path);
1246
1247 copy_key(INODE_PKEY(inode), &ih->ih_key);
1248
1249 INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list);
1250 REISERFS_I(inode)->i_flags = 0;
1251 REISERFS_I(inode)->i_prealloc_block = 0;
1252 REISERFS_I(inode)->i_prealloc_count = 0;
1253 REISERFS_I(inode)->i_trans_id = 0;
1254 REISERFS_I(inode)->i_jl = NULL;
1255 reiserfs_init_xattr_rwsem(inode);
1256
1257 if (stat_data_v1(ih)) {
1258 struct stat_data_v1 *sd =
1259 (struct stat_data_v1 *)ih_item_body(bh, ih);
1260 unsigned long blocks;
1261
1262 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1263 set_inode_sd_version(inode, STAT_DATA_V1);
1264 inode->i_mode = sd_v1_mode(sd);
1265 set_nlink(inode, sd_v1_nlink(sd));
1266 i_uid_write(inode, sd_v1_uid(sd));
1267 i_gid_write(inode, sd_v1_gid(sd));
1268 inode->i_size = sd_v1_size(sd);
1269 inode->i_atime.tv_sec = sd_v1_atime(sd);
1270 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
1271 inode->i_ctime.tv_sec = sd_v1_ctime(sd);
1272 inode->i_atime.tv_nsec = 0;
1273 inode->i_ctime.tv_nsec = 0;
1274 inode->i_mtime.tv_nsec = 0;
1275
1276 inode->i_blocks = sd_v1_blocks(sd);
1277 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1278 blocks = (inode->i_size + 511) >> 9;
1279 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1280
1281 /*
1282 * there was a bug in <=3.5.23 when i_blocks could take
1283 * negative values. Starting from 3.5.17 this value could
1284 * even be stored in stat data. For such files we set
1285 * i_blocks based on file size. Just 2 notes: this can be
1286 * wrong for sparse files. On-disk value will be only
1287 * updated if file's inode will ever change
1288 */
1289 if (inode->i_blocks > blocks) {
1290 inode->i_blocks = blocks;
1291 }
1292
1293 rdev = sd_v1_rdev(sd);
1294 REISERFS_I(inode)->i_first_direct_byte =
1295 sd_v1_first_direct_byte(sd);
1296
1297 /*
1298 * an early bug in the quota code can give us an odd
1299 * number for the block count. This is incorrect, fix it here.
1300 */
1301 if (inode->i_blocks & 1) {
1302 inode->i_blocks++;
1303 }
1304 inode_set_bytes(inode,
1305 to_real_used_space(inode, inode->i_blocks,
1306 SD_V1_SIZE));
1307 /*
1308 * nopack is initially zero for v1 objects. For v2 objects,
1309 * nopack is initialised from sd_attrs
1310 */
1311 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1312 } else {
1313 /*
1314 * new stat data found, but object may have old items
1315 * (directories and symlinks)
1316 */
1317 struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih);
1318
1319 inode->i_mode = sd_v2_mode(sd);
1320 set_nlink(inode, sd_v2_nlink(sd));
1321 i_uid_write(inode, sd_v2_uid(sd));
1322 inode->i_size = sd_v2_size(sd);
1323 i_gid_write(inode, sd_v2_gid(sd));
1324 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1325 inode->i_atime.tv_sec = sd_v2_atime(sd);
1326 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1327 inode->i_ctime.tv_nsec = 0;
1328 inode->i_mtime.tv_nsec = 0;
1329 inode->i_atime.tv_nsec = 0;
1330 inode->i_blocks = sd_v2_blocks(sd);
1331 rdev = sd_v2_rdev(sd);
1332 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1333 inode->i_generation =
1334 le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1335 else
1336 inode->i_generation = sd_v2_generation(sd);
1337
1338 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1339 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1340 else
1341 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1342 REISERFS_I(inode)->i_first_direct_byte = 0;
1343 set_inode_sd_version(inode, STAT_DATA_V2);
1344 inode_set_bytes(inode,
1345 to_real_used_space(inode, inode->i_blocks,
1346 SD_V2_SIZE));
1347 /*
1348 * read persistent inode attributes from sd and initialise
1349 * generic inode flags from them
1350 */
1351 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1352 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1353 }
1354
1355 pathrelse(path);
1356 if (S_ISREG(inode->i_mode)) {
1357 inode->i_op = &reiserfs_file_inode_operations;
1358 inode->i_fop = &reiserfs_file_operations;
1359 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1360 } else if (S_ISDIR(inode->i_mode)) {
1361 inode->i_op = &reiserfs_dir_inode_operations;
1362 inode->i_fop = &reiserfs_dir_operations;
1363 } else if (S_ISLNK(inode->i_mode)) {
1364 inode->i_op = &reiserfs_symlink_inode_operations;
1365 inode_nohighmem(inode);
1366 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1367 } else {
1368 inode->i_blocks = 0;
1369 inode->i_op = &reiserfs_special_inode_operations;
1370 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
1371 }
1372}
1373
1374/* update new stat data with inode fields */
1375static void inode2sd(void *sd, struct inode *inode, loff_t size)
1376{
1377 struct stat_data *sd_v2 = (struct stat_data *)sd;
1378
1379 set_sd_v2_mode(sd_v2, inode->i_mode);
1380 set_sd_v2_nlink(sd_v2, inode->i_nlink);
1381 set_sd_v2_uid(sd_v2, i_uid_read(inode));
1382 set_sd_v2_size(sd_v2, size);
1383 set_sd_v2_gid(sd_v2, i_gid_read(inode));
1384 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
1385 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
1386 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
1387 set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
1388 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1389 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
1390 else
1391 set_sd_v2_generation(sd_v2, inode->i_generation);
1392 set_sd_v2_attrs(sd_v2, REISERFS_I(inode)->i_attrs);
1393}
1394
1395/* used to copy inode's fields to old stat data */
1396static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1397{
1398 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
1399
1400 set_sd_v1_mode(sd_v1, inode->i_mode);
1401 set_sd_v1_uid(sd_v1, i_uid_read(inode));
1402 set_sd_v1_gid(sd_v1, i_gid_read(inode));
1403 set_sd_v1_nlink(sd_v1, inode->i_nlink);
1404 set_sd_v1_size(sd_v1, size);
1405 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
1406 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
1407 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
1408
1409 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1410 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
1411 else
1412 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1413
1414 /* Sigh. i_first_direct_byte is back */
1415 set_sd_v1_first_direct_byte(sd_v1,
1416 REISERFS_I(inode)->i_first_direct_byte);
1417}
1418
1419/*
1420 * NOTE, you must prepare the buffer head before sending it here,
1421 * and then log it after the call
1422 */
1423static void update_stat_data(struct treepath *path, struct inode *inode,
1424 loff_t size)
1425{
1426 struct buffer_head *bh;
1427 struct item_head *ih;
1428
1429 bh = PATH_PLAST_BUFFER(path);
1430 ih = tp_item_head(path);
1431
1432 if (!is_statdata_le_ih(ih))
1433 reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
1434 INODE_PKEY(inode), ih);
1435
1436 /* path points to old stat data */
1437 if (stat_data_v1(ih)) {
1438 inode2sd_v1(ih_item_body(bh, ih), inode, size);
1439 } else {
1440 inode2sd(ih_item_body(bh, ih), inode, size);
1441 }
1442
1443 return;
1444}
1445
1446void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1447 struct inode *inode, loff_t size)
1448{
1449 struct cpu_key key;
1450 INITIALIZE_PATH(path);
1451 struct buffer_head *bh;
1452 int fs_gen;
1453 struct item_head *ih, tmp_ih;
1454 int retval;
1455
1456 BUG_ON(!th->t_trans_id);
1457
1458 /* key type is unimportant */
1459 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);
1460
1461 for (;;) {
1462 int pos;
1463 /* look for the object's stat data */
1464 retval = search_item(inode->i_sb, &key, &path);
1465 if (retval == IO_ERROR) {
1466 reiserfs_error(inode->i_sb, "vs-13050",
1467 "i/o failure occurred trying to "
1468 "update %K stat data", &key);
1469 return;
1470 }
1471 if (retval == ITEM_NOT_FOUND) {
1472 pos = PATH_LAST_POSITION(&path);
1473 pathrelse(&path);
1474 if (inode->i_nlink == 0) {
1475 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
1476 return;
1477 }
1478 reiserfs_warning(inode->i_sb, "vs-13060",
1479 "stat data of object %k (nlink == %d) "
1480 "not found (pos %d)",
1481 INODE_PKEY(inode), inode->i_nlink,
1482 pos);
1483 reiserfs_check_path(&path);
1484 return;
1485 }
1486
1487 /*
1488 * sigh, prepare_for_journal might schedule. When it
1489 * schedules the FS might change. We have to detect that,
1490 * and loop back to the search if the stat data item has moved
1491 */
1492 bh = get_last_bh(&path);
1493 ih = tp_item_head(&path);
1494 copy_item_head(&tmp_ih, ih);
1495 fs_gen = get_generation(inode->i_sb);
1496 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1497
1498 /* Stat_data item has been moved after scheduling. */
1499 if (fs_changed(fs_gen, inode->i_sb)
1500 && item_moved(&tmp_ih, &path)) {
1501 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1502 continue;
1503 }
1504 break;
1505 }
1506 update_stat_data(&path, inode, size);
1507 journal_mark_dirty(th, bh);
1508 pathrelse(&path);
1509 return;
1510}
1511
1512/*
1513 * reiserfs_read_locked_inode is called to read the inode off disk, and it
1514 * does a make_bad_inode when things go wrong. But, we need to make sure
1515 * and clear the key in the private portion of the inode, otherwise a
1516 * corresponding iput might try to delete whatever object the inode last
1517 * represented.
1518 */
1519static void reiserfs_make_bad_inode(struct inode *inode)
1520{
1521 memset(INODE_PKEY(inode), 0, KEY_SIZE);
1522 make_bad_inode(inode);
1523}
1524
1525/*
1526 * initially this function was derived from minix or ext2's analog and
1527 * evolved as the prototype did
1528 */
1529int reiserfs_init_locked_inode(struct inode *inode, void *p)
1530{
1531 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
1532 inode->i_ino = args->objectid;
1533 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
1534 return 0;
1535}
1536
1537/*
1538 * looks for stat data in the tree, and fills up the fields of in-core
1539 * inode stat data fields
1540 */
1541void reiserfs_read_locked_inode(struct inode *inode,
1542 struct reiserfs_iget_args *args)
1543{
1544 INITIALIZE_PATH(path_to_sd);
1545 struct cpu_key key;
1546 unsigned long dirino;
1547 int retval;
1548
1549 dirino = args->dirid;
1550
1551 /*
1552 * set version 1, version 2 could be used too, because stat data
1553 * key is the same in both versions
1554 */
1555 key.version = KEY_FORMAT_3_5;
1556 key.on_disk_key.k_dir_id = dirino;
1557 key.on_disk_key.k_objectid = inode->i_ino;
1558 key.on_disk_key.k_offset = 0;
1559 key.on_disk_key.k_type = 0;
1560
1561 /* look for the object's stat data */
1562 retval = search_item(inode->i_sb, &key, &path_to_sd);
1563 if (retval == IO_ERROR) {
1564 reiserfs_error(inode->i_sb, "vs-13070",
1565 "i/o failure occurred trying to find "
1566 "stat data of %K", &key);
1567 reiserfs_make_bad_inode(inode);
1568 return;
1569 }
1570
1571 /* a stale NFS handle can trigger this without it being an error */
1572 if (retval != ITEM_FOUND) {
1573 pathrelse(&path_to_sd);
1574 reiserfs_make_bad_inode(inode);
1575 clear_nlink(inode);
1576 return;
1577 }
1578
1579 init_inode(inode, &path_to_sd);
1580
1581 /*
1582 * It is possible that knfsd is trying to access inode of a file
1583 * that is being removed from the disk by some other thread. As we
1584 * update sd on unlink all that is required is to check for nlink
1585 * here. This bug was first found by Sizif when debugging
1586 * SquidNG/Butterfly, forgotten, and found again after Philippe
1587 * Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1588
1589 * More logical fix would require changes in fs/inode.c:iput() to
1590 * remove inode from hash-table _after_ fs cleaned disk stuff up and
1591 * in iget() to return NULL if I_FREEING inode is found in
1592 * hash-table.
1593 */
1594
1595 /*
1596 * Currently there is one place where it's ok to meet inode with
1597 * nlink==0: processing of open-unlinked and half-truncated files
1598 * during mount (fs/reiserfs/super.c:finish_unfinished()).
1599 */
1600 if ((inode->i_nlink == 0) &&
1601 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1602 reiserfs_warning(inode->i_sb, "vs-13075",
1603 "dead inode read from disk %K. "
1604 "This is likely to be race with knfsd. Ignore",
1605 &key);
1606 reiserfs_make_bad_inode(inode);
1607 }
1608
1609 /* init inode should be relsing */
1610 reiserfs_check_path(&path_to_sd);
1611
1612 /*
1613 * Stat data v1 doesn't support ACLs.
1614 */
1615 if (get_inode_sd_version(inode) == STAT_DATA_V1)
1616 cache_no_acl(inode);
1617}
1618
1619/*
1620 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
1621 *
1622 * @inode: inode from hash table to check
1623 * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args.
1624 *
1625 * This function is called by iget5_locked() to distinguish reiserfs inodes
1626 * having the same inode numbers. Such inodes can only exist due to some
1627 * error condition. One of them should be bad. Inodes with identical
1628 * inode numbers (objectids) are distinguished by parent directory ids.
1629 *
1630 */
1631int reiserfs_find_actor(struct inode *inode, void *opaque)
1632{
1633 struct reiserfs_iget_args *args;
1634
1635 args = opaque;
1636 /* args is already in CPU order */
1637 return (inode->i_ino == args->objectid) &&
1638 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
1639}
1640
1641struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1642{
1643 struct inode *inode;
1644 struct reiserfs_iget_args args;
1645 int depth;
1646
1647 args.objectid = key->on_disk_key.k_objectid;
1648 args.dirid = key->on_disk_key.k_dir_id;
1649 depth = reiserfs_write_unlock_nested(s);
1650 inode = iget5_locked(s, key->on_disk_key.k_objectid,
1651 reiserfs_find_actor, reiserfs_init_locked_inode,
1652 (void *)(&args));
1653 reiserfs_write_lock_nested(s, depth);
1654 if (!inode)
1655 return ERR_PTR(-ENOMEM);
1656
1657 if (inode->i_state & I_NEW) {
1658 reiserfs_read_locked_inode(inode, &args);
1659 unlock_new_inode(inode);
1660 }
1661
1662 if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) {
1663 /* either due to i/o error or a stale NFS handle */
1664 iput(inode);
1665 inode = NULL;
1666 }
1667 return inode;
1668}
1669
1670static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1671 u32 objectid, u32 dir_id, u32 generation)
1672
1673{
1674 struct cpu_key key;
1675 struct inode *inode;
1676
1677 key.on_disk_key.k_objectid = objectid;
1678 key.on_disk_key.k_dir_id = dir_id;
1679 reiserfs_write_lock(sb);
1680 inode = reiserfs_iget(sb, &key);
1681 if (inode && !IS_ERR(inode) && generation != 0 &&
1682 generation != inode->i_generation) {
1683 iput(inode);
1684 inode = NULL;
1685 }
1686 reiserfs_write_unlock(sb);
1687
1688 return d_obtain_alias(inode);
1689}
1690
1691struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1692 int fh_len, int fh_type)
1693{
1694 /*
1695 * fhtype happens to reflect the number of u32s encoded.
1696 * due to a bug in earlier code, fhtype might indicate there
1697 * are more u32s then actually fitted.
1698 * so if fhtype seems to be more than len, reduce fhtype.
1699 * Valid types are:
1700 * 2 - objectid + dir_id - legacy support
1701 * 3 - objectid + dir_id + generation
1702 * 4 - objectid + dir_id + objectid and dirid of parent - legacy
1703 * 5 - objectid + dir_id + generation + objectid and dirid of parent
1704 * 6 - as above plus generation of directory
1705 * 6 does not fit in NFSv2 handles
1706 */
1707 if (fh_type > fh_len) {
1708 if (fh_type != 6 || fh_len != 5)
1709 reiserfs_warning(sb, "reiserfs-13077",
1710 "nfsd/reiserfs, fhtype=%d, len=%d - odd",
1711 fh_type, fh_len);
1712 fh_type = fh_len;
1713 }
1714 if (fh_len < 2)
1715 return NULL;
1716
1717 return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1],
1718 (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0);
1719}
1720
1721struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
1722 int fh_len, int fh_type)
1723{
1724 if (fh_type > fh_len)
1725 fh_type = fh_len;
1726 if (fh_type < 4)
1727 return NULL;
1728
1729 return reiserfs_get_dentry(sb,
1730 (fh_type >= 5) ? fid->raw[3] : fid->raw[2],
1731 (fh_type >= 5) ? fid->raw[4] : fid->raw[3],
1732 (fh_type == 6) ? fid->raw[5] : 0);
1733}
1734
1735int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp,
1736 struct inode *parent)
1737{
1738 int maxlen = *lenp;
1739
1740 if (parent && (maxlen < 5)) {
1741 *lenp = 5;
1742 return FILEID_INVALID;
1743 } else if (maxlen < 3) {
1744 *lenp = 3;
1745 return FILEID_INVALID;
1746 }
1747
1748 data[0] = inode->i_ino;
1749 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1750 data[2] = inode->i_generation;
1751 *lenp = 3;
1752 if (parent) {
1753 data[3] = parent->i_ino;
1754 data[4] = le32_to_cpu(INODE_PKEY(parent)->k_dir_id);
1755 *lenp = 5;
1756 if (maxlen >= 6) {
1757 data[5] = parent->i_generation;
1758 *lenp = 6;
1759 }
1760 }
1761 return *lenp;
1762}
1763
1764/*
1765 * looks for stat data, then copies fields to it, marks the buffer
1766 * containing stat data as dirty
1767 */
1768/*
1769 * reiserfs inodes are never really dirty, since the dirty inode call
1770 * always logs them. This call allows the VFS inode marking routines
1771 * to properly mark inodes for datasync and such, but only actually
1772 * does something when called for a synchronous update.
1773 */
1774int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1775{
1776 struct reiserfs_transaction_handle th;
1777 int jbegin_count = 1;
1778
1779 if (sb_rdonly(inode->i_sb))
1780 return -EROFS;
1781 /*
1782 * memory pressure can sometimes initiate write_inode calls with
1783 * sync == 1,
1784 * these cases are just when the system needs ram, not when the
1785 * inode needs to reach disk for safety, and they can safely be
1786 * ignored because the altered inode has already been logged.
1787 */
1788 if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
1789 reiserfs_write_lock(inode->i_sb);
1790 if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1791 reiserfs_update_sd(&th, inode);
1792 journal_end_sync(&th);
1793 }
1794 reiserfs_write_unlock(inode->i_sb);
1795 }
1796 return 0;
1797}
1798
1799/*
1800 * stat data of new object is inserted already, this inserts the item
1801 * containing "." and ".." entries
1802 */
1803static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1804 struct inode *inode,
1805 struct item_head *ih, struct treepath *path,
1806 struct inode *dir)
1807{
1808 struct super_block *sb = th->t_super;
1809 char empty_dir[EMPTY_DIR_SIZE];
1810 char *body = empty_dir;
1811 struct cpu_key key;
1812 int retval;
1813
1814 BUG_ON(!th->t_trans_id);
1815
1816 _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id),
1817 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1818 TYPE_DIRENTRY, 3 /*key length */ );
1819
1820 /*
1821 * compose item head for new item. Directories consist of items of
1822 * old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1823 * is done by reiserfs_new_inode
1824 */
1825 if (old_format_only(sb)) {
1826 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1827 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1828
1829 make_empty_dir_item_v1(body, ih->ih_key.k_dir_id,
1830 ih->ih_key.k_objectid,
1831 INODE_PKEY(dir)->k_dir_id,
1832 INODE_PKEY(dir)->k_objectid);
1833 } else {
1834 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1835 TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1836
1837 make_empty_dir_item(body, ih->ih_key.k_dir_id,
1838 ih->ih_key.k_objectid,
1839 INODE_PKEY(dir)->k_dir_id,
1840 INODE_PKEY(dir)->k_objectid);
1841 }
1842
1843 /* look for place in the tree for new item */
1844 retval = search_item(sb, &key, path);
1845 if (retval == IO_ERROR) {
1846 reiserfs_error(sb, "vs-13080",
1847 "i/o failure occurred creating new directory");
1848 return -EIO;
1849 }
1850 if (retval == ITEM_FOUND) {
1851 pathrelse(path);
1852 reiserfs_warning(sb, "vs-13070",
1853 "object with this key exists (%k)",
1854 &(ih->ih_key));
1855 return -EEXIST;
1856 }
1857
1858 /* insert item, that is empty directory item */
1859 return reiserfs_insert_item(th, path, &key, ih, inode, body);
1860}
1861
1862/*
1863 * stat data of object has been inserted, this inserts the item
1864 * containing the body of symlink
1865 */
1866static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th,
1867 struct inode *inode,
1868 struct item_head *ih,
1869 struct treepath *path, const char *symname,
1870 int item_len)
1871{
1872 struct super_block *sb = th->t_super;
1873 struct cpu_key key;
1874 int retval;
1875
1876 BUG_ON(!th->t_trans_id);
1877
1878 _make_cpu_key(&key, KEY_FORMAT_3_5,
1879 le32_to_cpu(ih->ih_key.k_dir_id),
1880 le32_to_cpu(ih->ih_key.k_objectid),
1881 1, TYPE_DIRECT, 3 /*key length */ );
1882
1883 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len,
1884 0 /*free_space */ );
1885
1886 /* look for place in the tree for new item */
1887 retval = search_item(sb, &key, path);
1888 if (retval == IO_ERROR) {
1889 reiserfs_error(sb, "vs-13080",
1890 "i/o failure occurred creating new symlink");
1891 return -EIO;
1892 }
1893 if (retval == ITEM_FOUND) {
1894 pathrelse(path);
1895 reiserfs_warning(sb, "vs-13080",
1896 "object with this key exists (%k)",
1897 &(ih->ih_key));
1898 return -EEXIST;
1899 }
1900
1901 /* insert item, that is body of symlink */
1902 return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1903}
1904
1905/*
1906 * inserts the stat data into the tree, and then calls
1907 * reiserfs_new_directory (to insert ".", ".." item if new object is
1908 * directory) or reiserfs_new_symlink (to insert symlink body if new
1909 * object is symlink) or nothing (if new object is regular file)
1910
1911 * NOTE! uid and gid must already be set in the inode. If we return
1912 * non-zero due to an error, we have to drop the quota previously allocated
1913 * for the fresh inode. This can only be done outside a transaction, so
1914 * if we return non-zero, we also end the transaction.
1915 *
1916 * @th: active transaction handle
1917 * @dir: parent directory for new inode
1918 * @mode: mode of new inode
1919 * @symname: symlink contents if inode is symlink
1920 * @isize: 0 for regular file, EMPTY_DIR_SIZE for dirs, strlen(symname) for
1921 * symlinks
1922 * @inode: inode to be filled
1923 * @security: optional security context to associate with this inode
1924 */
1925int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1926 struct inode *dir, umode_t mode, const char *symname,
1927 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1928 strlen (symname) for symlinks) */
1929 loff_t i_size, struct dentry *dentry,
1930 struct inode *inode,
1931 struct reiserfs_security_handle *security)
1932{
1933 struct super_block *sb = dir->i_sb;
1934 struct reiserfs_iget_args args;
1935 INITIALIZE_PATH(path_to_key);
1936 struct cpu_key key;
1937 struct item_head ih;
1938 struct stat_data sd;
1939 int retval;
1940 int err;
1941 int depth;
1942
1943 BUG_ON(!th->t_trans_id);
1944
1945 depth = reiserfs_write_unlock_nested(sb);
1946 err = dquot_alloc_inode(inode);
1947 reiserfs_write_lock_nested(sb, depth);
1948 if (err)
1949 goto out_end_trans;
1950 if (!dir->i_nlink) {
1951 err = -EPERM;
1952 goto out_bad_inode;
1953 }
1954
1955 /* item head of new item */
1956 ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
1957 ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
1958 if (!ih.ih_key.k_objectid) {
1959 err = -ENOMEM;
1960 goto out_bad_inode;
1961 }
1962 args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1963 if (old_format_only(sb))
1964 make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
1965 TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1966 else
1967 make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
1968 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1969 memcpy(INODE_PKEY(inode), &ih.ih_key, KEY_SIZE);
1970 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1971
1972 depth = reiserfs_write_unlock_nested(inode->i_sb);
1973 err = insert_inode_locked4(inode, args.objectid,
1974 reiserfs_find_actor, &args);
1975 reiserfs_write_lock_nested(inode->i_sb, depth);
1976 if (err) {
1977 err = -EINVAL;
1978 goto out_bad_inode;
1979 }
1980
1981 if (old_format_only(sb))
1982 /*
1983 * not a perfect generation count, as object ids can be reused,
1984 * but this is as good as reiserfs can do right now.
1985 * note that the private part of inode isn't filled in yet,
1986 * we have to use the directory.
1987 */
1988 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1989 else
1990#if defined( USE_INODE_GENERATION_COUNTER )
1991 inode->i_generation =
1992 le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
1993#else
1994 inode->i_generation = ++event;
1995#endif
1996
1997 /* fill stat data */
1998 set_nlink(inode, (S_ISDIR(mode) ? 2 : 1));
1999
2000 /* uid and gid must already be set by the caller for quota init */
2001
2002 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
2003 inode->i_size = i_size;
2004 inode->i_blocks = 0;
2005 inode->i_bytes = 0;
2006 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
2007 U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
2008
2009 INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list);
2010 REISERFS_I(inode)->i_flags = 0;
2011 REISERFS_I(inode)->i_prealloc_block = 0;
2012 REISERFS_I(inode)->i_prealloc_count = 0;
2013 REISERFS_I(inode)->i_trans_id = 0;
2014 REISERFS_I(inode)->i_jl = NULL;
2015 REISERFS_I(inode)->i_attrs =
2016 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
2017 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
2018 reiserfs_init_xattr_rwsem(inode);
2019
2020 /* key to search for correct place for new stat data */
2021 _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
2022 le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
2023 TYPE_STAT_DATA, 3 /*key length */ );
2024
2025 /* find proper place for inserting of stat data */
2026 retval = search_item(sb, &key, &path_to_key);
2027 if (retval == IO_ERROR) {
2028 err = -EIO;
2029 goto out_bad_inode;
2030 }
2031 if (retval == ITEM_FOUND) {
2032 pathrelse(&path_to_key);
2033 err = -EEXIST;
2034 goto out_bad_inode;
2035 }
2036 if (old_format_only(sb)) {
2037 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
2038 if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) {
2039 pathrelse(&path_to_key);
2040 err = -EINVAL;
2041 goto out_bad_inode;
2042 }
2043 inode2sd_v1(&sd, inode, inode->i_size);
2044 } else {
2045 inode2sd(&sd, inode, inode->i_size);
2046 }
2047 /*
2048 * store in in-core inode the key of stat data and version all
2049 * object items will have (directory items will have old offset
2050 * format, other new objects will consist of new items)
2051 */
2052 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
2053 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
2054 else
2055 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
2056 if (old_format_only(sb))
2057 set_inode_sd_version(inode, STAT_DATA_V1);
2058 else
2059 set_inode_sd_version(inode, STAT_DATA_V2);
2060
2061 /* insert the stat data into the tree */
2062#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2063 if (REISERFS_I(dir)->new_packing_locality)
2064 th->displace_new_blocks = 1;
2065#endif
2066 retval =
2067 reiserfs_insert_item(th, &path_to_key, &key, &ih, inode,
2068 (char *)(&sd));
2069 if (retval) {
2070 err = retval;
2071 reiserfs_check_path(&path_to_key);
2072 goto out_bad_inode;
2073 }
2074#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2075 if (!th->displace_new_blocks)
2076 REISERFS_I(dir)->new_packing_locality = 0;
2077#endif
2078 if (S_ISDIR(mode)) {
2079 /* insert item with "." and ".." */
2080 retval =
2081 reiserfs_new_directory(th, inode, &ih, &path_to_key, dir);
2082 }
2083
2084 if (S_ISLNK(mode)) {
2085 /* insert body of symlink */
2086 if (!old_format_only(sb))
2087 i_size = ROUND_UP(i_size);
2088 retval =
2089 reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname,
2090 i_size);
2091 }
2092 if (retval) {
2093 err = retval;
2094 reiserfs_check_path(&path_to_key);
2095 journal_end(th);
2096 goto out_inserted_sd;
2097 }
2098
2099 if (reiserfs_posixacl(inode->i_sb)) {
2100 reiserfs_write_unlock(inode->i_sb);
2101 retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
2102 reiserfs_write_lock(inode->i_sb);
2103 if (retval) {
2104 err = retval;
2105 reiserfs_check_path(&path_to_key);
2106 journal_end(th);
2107 goto out_inserted_sd;
2108 }
2109 } else if (inode->i_sb->s_flags & SB_POSIXACL) {
2110 reiserfs_warning(inode->i_sb, "jdm-13090",
2111 "ACLs aren't enabled in the fs, "
2112 "but vfs thinks they are!");
2113 } else if (IS_PRIVATE(dir))
2114 inode->i_flags |= S_PRIVATE;
2115
2116 if (security->name) {
2117 reiserfs_write_unlock(inode->i_sb);
2118 retval = reiserfs_security_write(th, inode, security);
2119 reiserfs_write_lock(inode->i_sb);
2120 if (retval) {
2121 err = retval;
2122 reiserfs_check_path(&path_to_key);
2123 retval = journal_end(th);
2124 if (retval)
2125 err = retval;
2126 goto out_inserted_sd;
2127 }
2128 }
2129
2130 reiserfs_update_sd(th, inode);
2131 reiserfs_check_path(&path_to_key);
2132
2133 return 0;
2134
2135out_bad_inode:
2136 /* Invalidate the object, nothing was inserted yet */
2137 INODE_PKEY(inode)->k_objectid = 0;
2138
2139 /* Quota change must be inside a transaction for journaling */
2140 depth = reiserfs_write_unlock_nested(inode->i_sb);
2141 dquot_free_inode(inode);
2142 reiserfs_write_lock_nested(inode->i_sb, depth);
2143
2144out_end_trans:
2145 journal_end(th);
2146 /*
2147 * Drop can be outside and it needs more credits so it's better
2148 * to have it outside
2149 */
2150 depth = reiserfs_write_unlock_nested(inode->i_sb);
2151 dquot_drop(inode);
2152 reiserfs_write_lock_nested(inode->i_sb, depth);
2153 inode->i_flags |= S_NOQUOTA;
2154 make_bad_inode(inode);
2155
2156out_inserted_sd:
2157 clear_nlink(inode);
2158 th->t_trans_id = 0; /* so the caller can't use this handle later */
2159 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
2160 iput(inode);
2161 return err;
2162}
2163
2164/*
2165 * finds the tail page in the page cache,
2166 * reads the last block in.
2167 *
2168 * On success, page_result is set to a locked, pinned page, and bh_result
2169 * is set to an up to date buffer for the last block in the file. returns 0.
2170 *
2171 * tail conversion is not done, so bh_result might not be valid for writing
2172 * check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
2173 * trying to write the block.
2174 *
2175 * on failure, nonzero is returned, page_result and bh_result are untouched.
2176 */
2177static int grab_tail_page(struct inode *inode,
2178 struct page **page_result,
2179 struct buffer_head **bh_result)
2180{
2181
2182 /*
2183 * we want the page with the last byte in the file,
2184 * not the page that will hold the next byte for appending
2185 */
2186 unsigned long index = (inode->i_size - 1) >> PAGE_SHIFT;
2187 unsigned long pos = 0;
2188 unsigned long start = 0;
2189 unsigned long blocksize = inode->i_sb->s_blocksize;
2190 unsigned long offset = (inode->i_size) & (PAGE_SIZE - 1);
2191 struct buffer_head *bh;
2192 struct buffer_head *head;
2193 struct page *page;
2194 int error;
2195
2196 /*
2197 * we know that we are only called with inode->i_size > 0.
2198 * we also know that a file tail can never be as big as a block
2199 * If i_size % blocksize == 0, our file is currently block aligned
2200 * and it won't need converting or zeroing after a truncate.
2201 */
2202 if ((offset & (blocksize - 1)) == 0) {
2203 return -ENOENT;
2204 }
2205 page = grab_cache_page(inode->i_mapping, index);
2206 error = -ENOMEM;
2207 if (!page) {
2208 goto out;
2209 }
2210 /* start within the page of the last block in the file */
2211 start = (offset / blocksize) * blocksize;
2212
2213 error = __block_write_begin(page, start, offset - start,
2214 reiserfs_get_block_create_0);
2215 if (error)
2216 goto unlock;
2217
2218 head = page_buffers(page);
2219 bh = head;
2220 do {
2221 if (pos >= start) {
2222 break;
2223 }
2224 bh = bh->b_this_page;
2225 pos += blocksize;
2226 } while (bh != head);
2227
2228 if (!buffer_uptodate(bh)) {
2229 /*
2230 * note, this should never happen, prepare_write should be
2231 * taking care of this for us. If the buffer isn't up to
2232 * date, I've screwed up the code to find the buffer, or the
2233 * code to call prepare_write
2234 */
2235 reiserfs_error(inode->i_sb, "clm-6000",
2236 "error reading block %lu", bh->b_blocknr);
2237 error = -EIO;
2238 goto unlock;
2239 }
2240 *bh_result = bh;
2241 *page_result = page;
2242
2243out:
2244 return error;
2245
2246unlock:
2247 unlock_page(page);
2248 put_page(page);
2249 return error;
2250}
2251
2252/*
2253 * vfs version of truncate file. Must NOT be called with
2254 * a transaction already started.
2255 *
2256 * some code taken from block_truncate_page
2257 */
2258int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2259{
2260 struct reiserfs_transaction_handle th;
2261 /* we want the offset for the first byte after the end of the file */
2262 unsigned long offset = inode->i_size & (PAGE_SIZE - 1);
2263 unsigned blocksize = inode->i_sb->s_blocksize;
2264 unsigned length;
2265 struct page *page = NULL;
2266 int error;
2267 struct buffer_head *bh = NULL;
2268 int err2;
2269
2270 reiserfs_write_lock(inode->i_sb);
2271
2272 if (inode->i_size > 0) {
2273 error = grab_tail_page(inode, &page, &bh);
2274 if (error) {
2275 /*
2276 * -ENOENT means we truncated past the end of the
2277 * file, and get_block_create_0 could not find a
2278 * block to read in, which is ok.
2279 */
2280 if (error != -ENOENT)
2281 reiserfs_error(inode->i_sb, "clm-6001",
2282 "grab_tail_page failed %d",
2283 error);
2284 page = NULL;
2285 bh = NULL;
2286 }
2287 }
2288
2289 /*
2290 * so, if page != NULL, we have a buffer head for the offset at
2291 * the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2292 * then we have an unformatted node. Otherwise, we have a direct item,
2293 * and no zeroing is required on disk. We zero after the truncate,
2294 * because the truncate might pack the item anyway
2295 * (it will unmap bh if it packs).
2296 *
2297 * it is enough to reserve space in transaction for 2 balancings:
2298 * one for "save" link adding and another for the first
2299 * cut_from_item. 1 is for update_sd
2300 */
2301 error = journal_begin(&th, inode->i_sb,
2302 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2303 if (error)
2304 goto out;
2305 reiserfs_update_inode_transaction(inode);
2306 if (update_timestamps)
2307 /*
2308 * we are doing real truncate: if the system crashes
2309 * before the last transaction of truncating gets committed
2310 * - on reboot the file either appears truncated properly
2311 * or not truncated at all
2312 */
2313 add_save_link(&th, inode, 1);
2314 err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
2315 error = journal_end(&th);
2316 if (error)
2317 goto out;
2318
2319 /* check reiserfs_do_truncate after ending the transaction */
2320 if (err2) {
2321 error = err2;
2322 goto out;
2323 }
2324
2325 if (update_timestamps) {
2326 error = remove_save_link(inode, 1 /* truncate */);
2327 if (error)
2328 goto out;
2329 }
2330
2331 if (page) {
2332 length = offset & (blocksize - 1);
2333 /* if we are not on a block boundary */
2334 if (length) {
2335 length = blocksize - length;
2336 zero_user(page, offset, length);
2337 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2338 mark_buffer_dirty(bh);
2339 }
2340 }
2341 unlock_page(page);
2342 put_page(page);
2343 }
2344
2345 reiserfs_write_unlock(inode->i_sb);
2346
2347 return 0;
2348out:
2349 if (page) {
2350 unlock_page(page);
2351 put_page(page);
2352 }
2353
2354 reiserfs_write_unlock(inode->i_sb);
2355
2356 return error;
2357}
2358
2359static int map_block_for_writepage(struct inode *inode,
2360 struct buffer_head *bh_result,
2361 unsigned long block)
2362{
2363 struct reiserfs_transaction_handle th;
2364 int fs_gen;
2365 struct item_head tmp_ih;
2366 struct item_head *ih;
2367 struct buffer_head *bh;
2368 __le32 *item;
2369 struct cpu_key key;
2370 INITIALIZE_PATH(path);
2371 int pos_in_item;
2372 int jbegin_count = JOURNAL_PER_BALANCE_CNT;
2373 loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1;
2374 int retval;
2375 int use_get_block = 0;
2376 int bytes_copied = 0;
2377 int copy_size;
2378 int trans_running = 0;
2379
2380 /*
2381 * catch places below that try to log something without
2382 * starting a trans
2383 */
2384 th.t_trans_id = 0;
2385
2386 if (!buffer_uptodate(bh_result)) {
2387 return -EIO;
2388 }
2389
2390 kmap(bh_result->b_page);
2391start_over:
2392 reiserfs_write_lock(inode->i_sb);
2393 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
2394
2395research:
2396 retval = search_for_position_by_key(inode->i_sb, &key, &path);
2397 if (retval != POSITION_FOUND) {
2398 use_get_block = 1;
2399 goto out;
2400 }
2401
2402 bh = get_last_bh(&path);
2403 ih = tp_item_head(&path);
2404 item = tp_item_body(&path);
2405 pos_in_item = path.pos_in_item;
2406
2407 /* we've found an unformatted node */
2408 if (indirect_item_found(retval, ih)) {
2409 if (bytes_copied > 0) {
2410 reiserfs_warning(inode->i_sb, "clm-6002",
2411 "bytes_copied %d", bytes_copied);
2412 }
2413 if (!get_block_num(item, pos_in_item)) {
2414 /* crap, we are writing to a hole */
2415 use_get_block = 1;
2416 goto out;
2417 }
2418 set_block_dev_mapped(bh_result,
2419 get_block_num(item, pos_in_item), inode);
2420 } else if (is_direct_le_ih(ih)) {
2421 char *p;
2422 p = page_address(bh_result->b_page);
2423 p += (byte_offset - 1) & (PAGE_SIZE - 1);
2424 copy_size = ih_item_len(ih) - pos_in_item;
2425
2426 fs_gen = get_generation(inode->i_sb);
2427 copy_item_head(&tmp_ih, ih);
2428
2429 if (!trans_running) {
2430 /* vs-3050 is gone, no need to drop the path */
2431 retval = journal_begin(&th, inode->i_sb, jbegin_count);
2432 if (retval)
2433 goto out;
2434 reiserfs_update_inode_transaction(inode);
2435 trans_running = 1;
2436 if (fs_changed(fs_gen, inode->i_sb)
2437 && item_moved(&tmp_ih, &path)) {
2438 reiserfs_restore_prepared_buffer(inode->i_sb,
2439 bh);
2440 goto research;
2441 }
2442 }
2443
2444 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
2445
2446 if (fs_changed(fs_gen, inode->i_sb)
2447 && item_moved(&tmp_ih, &path)) {
2448 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
2449 goto research;
2450 }
2451
2452 memcpy(ih_item_body(bh, ih) + pos_in_item, p + bytes_copied,
2453 copy_size);
2454
2455 journal_mark_dirty(&th, bh);
2456 bytes_copied += copy_size;
2457 set_block_dev_mapped(bh_result, 0, inode);
2458
2459 /* are there still bytes left? */
2460 if (bytes_copied < bh_result->b_size &&
2461 (byte_offset + bytes_copied) < inode->i_size) {
2462 set_cpu_key_k_offset(&key,
2463 cpu_key_k_offset(&key) +
2464 copy_size);
2465 goto research;
2466 }
2467 } else {
2468 reiserfs_warning(inode->i_sb, "clm-6003",
2469 "bad item inode %lu", inode->i_ino);
2470 retval = -EIO;
2471 goto out;
2472 }
2473 retval = 0;
2474
2475out:
2476 pathrelse(&path);
2477 if (trans_running) {
2478 int err = journal_end(&th);
2479 if (err)
2480 retval = err;
2481 trans_running = 0;
2482 }
2483 reiserfs_write_unlock(inode->i_sb);
2484
2485 /* this is where we fill in holes in the file. */
2486 if (use_get_block) {
2487 retval = reiserfs_get_block(inode, block, bh_result,
2488 GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
2489 | GET_BLOCK_NO_DANGLE);
2490 if (!retval) {
2491 if (!buffer_mapped(bh_result)
2492 || bh_result->b_blocknr == 0) {
2493 /* get_block failed to find a mapped unformatted node. */
2494 use_get_block = 0;
2495 goto start_over;
2496 }
2497 }
2498 }
2499 kunmap(bh_result->b_page);
2500
2501 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2502 /*
2503 * we've copied data from the page into the direct item, so the
2504 * buffer in the page is now clean, mark it to reflect that.
2505 */
2506 lock_buffer(bh_result);
2507 clear_buffer_dirty(bh_result);
2508 unlock_buffer(bh_result);
2509 }
2510 return retval;
2511}
2512
2513/*
2514 * mason@suse.com: updated in 2.5.54 to follow the same general io
2515 * start/recovery path as __block_write_full_page, along with special
2516 * code to handle reiserfs tails.
2517 */
2518static int reiserfs_write_full_page(struct page *page,
2519 struct writeback_control *wbc)
2520{
2521 struct inode *inode = page->mapping->host;
2522 unsigned long end_index = inode->i_size >> PAGE_SHIFT;
2523 int error = 0;
2524 unsigned long block;
2525 sector_t last_block;
2526 struct buffer_head *head, *bh;
2527 int partial = 0;
2528 int nr = 0;
2529 int checked = PageChecked(page);
2530 struct reiserfs_transaction_handle th;
2531 struct super_block *s = inode->i_sb;
2532 int bh_per_page = PAGE_SIZE / s->s_blocksize;
2533 th.t_trans_id = 0;
2534
2535 /* no logging allowed when nonblocking or from PF_MEMALLOC */
2536 if (checked && (current->flags & PF_MEMALLOC)) {
2537 redirty_page_for_writepage(wbc, page);
2538 unlock_page(page);
2539 return 0;
2540 }
2541
2542 /*
2543 * The page dirty bit is cleared before writepage is called, which
2544 * means we have to tell create_empty_buffers to make dirty buffers
2545 * The page really should be up to date at this point, so tossing
2546 * in the BH_Uptodate is just a sanity check.
2547 */
2548 if (!page_has_buffers(page)) {
2549 create_empty_buffers(page, s->s_blocksize,
2550 (1 << BH_Dirty) | (1 << BH_Uptodate));
2551 }
2552 head = page_buffers(page);
2553
2554 /*
2555 * last page in the file, zero out any contents past the
2556 * last byte in the file
2557 */
2558 if (page->index >= end_index) {
2559 unsigned last_offset;
2560
2561 last_offset = inode->i_size & (PAGE_SIZE - 1);
2562 /* no file contents in this page */
2563 if (page->index >= end_index + 1 || !last_offset) {
2564 unlock_page(page);
2565 return 0;
2566 }
2567 zero_user_segment(page, last_offset, PAGE_SIZE);
2568 }
2569 bh = head;
2570 block = page->index << (PAGE_SHIFT - s->s_blocksize_bits);
2571 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
2572 /* first map all the buffers, logging any direct items we find */
2573 do {
2574 if (block > last_block) {
2575 /*
2576 * This can happen when the block size is less than
2577 * the page size. The corresponding bytes in the page
2578 * were zero filled above
2579 */
2580 clear_buffer_dirty(bh);
2581 set_buffer_uptodate(bh);
2582 } else if ((checked || buffer_dirty(bh)) &&
2583 (!buffer_mapped(bh) || (buffer_mapped(bh)
2584 && bh->b_blocknr ==
2585 0))) {
2586 /*
2587 * not mapped yet, or it points to a direct item, search
2588 * the btree for the mapping info, and log any direct
2589 * items found
2590 */
2591 if ((error = map_block_for_writepage(inode, bh, block))) {
2592 goto fail;
2593 }
2594 }
2595 bh = bh->b_this_page;
2596 block++;
2597 } while (bh != head);
2598
2599 /*
2600 * we start the transaction after map_block_for_writepage,
2601 * because it can create holes in the file (an unbounded operation).
2602 * starting it here, we can make a reliable estimate for how many
2603 * blocks we're going to log
2604 */
2605 if (checked) {
2606 ClearPageChecked(page);
2607 reiserfs_write_lock(s);
2608 error = journal_begin(&th, s, bh_per_page + 1);
2609 if (error) {
2610 reiserfs_write_unlock(s);
2611 goto fail;
2612 }
2613 reiserfs_update_inode_transaction(inode);
2614 }
2615 /* now go through and lock any dirty buffers on the page */
2616 do {
2617 get_bh(bh);
2618 if (!buffer_mapped(bh))
2619 continue;
2620 if (buffer_mapped(bh) && bh->b_blocknr == 0)
2621 continue;
2622
2623 if (checked) {
2624 reiserfs_prepare_for_journal(s, bh, 1);
2625 journal_mark_dirty(&th, bh);
2626 continue;
2627 }
2628 /*
2629 * from this point on, we know the buffer is mapped to a
2630 * real block and not a direct item
2631 */
2632 if (wbc->sync_mode != WB_SYNC_NONE) {
2633 lock_buffer(bh);
2634 } else {
2635 if (!trylock_buffer(bh)) {
2636 redirty_page_for_writepage(wbc, page);
2637 continue;
2638 }
2639 }
2640 if (test_clear_buffer_dirty(bh)) {
2641 mark_buffer_async_write(bh);
2642 } else {
2643 unlock_buffer(bh);
2644 }
2645 } while ((bh = bh->b_this_page) != head);
2646
2647 if (checked) {
2648 error = journal_end(&th);
2649 reiserfs_write_unlock(s);
2650 if (error)
2651 goto fail;
2652 }
2653 BUG_ON(PageWriteback(page));
2654 set_page_writeback(page);
2655 unlock_page(page);
2656
2657 /*
2658 * since any buffer might be the only dirty buffer on the page,
2659 * the first submit_bh can bring the page out of writeback.
2660 * be careful with the buffers.
2661 */
2662 do {
2663 struct buffer_head *next = bh->b_this_page;
2664 if (buffer_async_write(bh)) {
2665 submit_bh(REQ_OP_WRITE, 0, bh);
2666 nr++;
2667 }
2668 put_bh(bh);
2669 bh = next;
2670 } while (bh != head);
2671
2672 error = 0;
2673done:
2674 if (nr == 0) {
2675 /*
2676 * if this page only had a direct item, it is very possible for
2677 * no io to be required without there being an error. Or,
2678 * someone else could have locked them and sent them down the
2679 * pipe without locking the page
2680 */
2681 bh = head;
2682 do {
2683 if (!buffer_uptodate(bh)) {
2684 partial = 1;
2685 break;
2686 }
2687 bh = bh->b_this_page;
2688 } while (bh != head);
2689 if (!partial)
2690 SetPageUptodate(page);
2691 end_page_writeback(page);
2692 }
2693 return error;
2694
2695fail:
2696 /*
2697 * catches various errors, we need to make sure any valid dirty blocks
2698 * get to the media. The page is currently locked and not marked for
2699 * writeback
2700 */
2701 ClearPageUptodate(page);
2702 bh = head;
2703 do {
2704 get_bh(bh);
2705 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2706 lock_buffer(bh);
2707 mark_buffer_async_write(bh);
2708 } else {
2709 /*
2710 * clear any dirty bits that might have come from
2711 * getting attached to a dirty page
2712 */
2713 clear_buffer_dirty(bh);
2714 }
2715 bh = bh->b_this_page;
2716 } while (bh != head);
2717 SetPageError(page);
2718 BUG_ON(PageWriteback(page));
2719 set_page_writeback(page);
2720 unlock_page(page);
2721 do {
2722 struct buffer_head *next = bh->b_this_page;
2723 if (buffer_async_write(bh)) {
2724 clear_buffer_dirty(bh);
2725 submit_bh(REQ_OP_WRITE, 0, bh);
2726 nr++;
2727 }
2728 put_bh(bh);
2729 bh = next;
2730 } while (bh != head);
2731 goto done;
2732}
2733
2734static int reiserfs_readpage(struct file *f, struct page *page)
2735{
2736 return block_read_full_page(page, reiserfs_get_block);
2737}
2738
2739static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2740{
2741 struct inode *inode = page->mapping->host;
2742 reiserfs_wait_on_write_block(inode->i_sb);
2743 return reiserfs_write_full_page(page, wbc);
2744}
2745
2746static void reiserfs_truncate_failed_write(struct inode *inode)
2747{
2748 truncate_inode_pages(inode->i_mapping, inode->i_size);
2749 reiserfs_truncate_file(inode, 0);
2750}
2751
2752static int reiserfs_write_begin(struct file *file,
2753 struct address_space *mapping,
2754 loff_t pos, unsigned len, unsigned flags,
2755 struct page **pagep, void **fsdata)
2756{
2757 struct inode *inode;
2758 struct page *page;
2759 pgoff_t index;
2760 int ret;
2761 int old_ref = 0;
2762
2763 inode = mapping->host;
2764 *fsdata = NULL;
2765 if (flags & AOP_FLAG_CONT_EXPAND &&
2766 (pos & (inode->i_sb->s_blocksize - 1)) == 0) {
2767 pos ++;
2768 *fsdata = (void *)(unsigned long)flags;
2769 }
2770
2771 index = pos >> PAGE_SHIFT;
2772 page = grab_cache_page_write_begin(mapping, index, flags);
2773 if (!page)
2774 return -ENOMEM;
2775 *pagep = page;
2776
2777 reiserfs_wait_on_write_block(inode->i_sb);
2778 fix_tail_page_for_writing(page);
2779 if (reiserfs_transaction_running(inode->i_sb)) {
2780 struct reiserfs_transaction_handle *th;
2781 th = (struct reiserfs_transaction_handle *)current->
2782 journal_info;
2783 BUG_ON(!th->t_refcount);
2784 BUG_ON(!th->t_trans_id);
2785 old_ref = th->t_refcount;
2786 th->t_refcount++;
2787 }
2788 ret = __block_write_begin(page, pos, len, reiserfs_get_block);
2789 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2790 struct reiserfs_transaction_handle *th = current->journal_info;
2791 /*
2792 * this gets a little ugly. If reiserfs_get_block returned an
2793 * error and left a transacstion running, we've got to close
2794 * it, and we've got to free handle if it was a persistent
2795 * transaction.
2796 *
2797 * But, if we had nested into an existing transaction, we need
2798 * to just drop the ref count on the handle.
2799 *
2800 * If old_ref == 0, the transaction is from reiserfs_get_block,
2801 * and it was a persistent trans. Otherwise, it was nested
2802 * above.
2803 */
2804 if (th->t_refcount > old_ref) {
2805 if (old_ref)
2806 th->t_refcount--;
2807 else {
2808 int err;
2809 reiserfs_write_lock(inode->i_sb);
2810 err = reiserfs_end_persistent_transaction(th);
2811 reiserfs_write_unlock(inode->i_sb);
2812 if (err)
2813 ret = err;
2814 }
2815 }
2816 }
2817 if (ret) {
2818 unlock_page(page);
2819 put_page(page);
2820 /* Truncate allocated blocks */
2821 reiserfs_truncate_failed_write(inode);
2822 }
2823 return ret;
2824}
2825
2826int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
2827{
2828 struct inode *inode = page->mapping->host;
2829 int ret;
2830 int old_ref = 0;
2831 int depth;
2832
2833 depth = reiserfs_write_unlock_nested(inode->i_sb);
2834 reiserfs_wait_on_write_block(inode->i_sb);
2835 reiserfs_write_lock_nested(inode->i_sb, depth);
2836
2837 fix_tail_page_for_writing(page);
2838 if (reiserfs_transaction_running(inode->i_sb)) {
2839 struct reiserfs_transaction_handle *th;
2840 th = (struct reiserfs_transaction_handle *)current->
2841 journal_info;
2842 BUG_ON(!th->t_refcount);
2843 BUG_ON(!th->t_trans_id);
2844 old_ref = th->t_refcount;
2845 th->t_refcount++;
2846 }
2847
2848 ret = __block_write_begin(page, from, len, reiserfs_get_block);
2849 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2850 struct reiserfs_transaction_handle *th = current->journal_info;
2851 /*
2852 * this gets a little ugly. If reiserfs_get_block returned an
2853 * error and left a transacstion running, we've got to close
2854 * it, and we've got to free handle if it was a persistent
2855 * transaction.
2856 *
2857 * But, if we had nested into an existing transaction, we need
2858 * to just drop the ref count on the handle.
2859 *
2860 * If old_ref == 0, the transaction is from reiserfs_get_block,
2861 * and it was a persistent trans. Otherwise, it was nested
2862 * above.
2863 */
2864 if (th->t_refcount > old_ref) {
2865 if (old_ref)
2866 th->t_refcount--;
2867 else {
2868 int err;
2869 reiserfs_write_lock(inode->i_sb);
2870 err = reiserfs_end_persistent_transaction(th);
2871 reiserfs_write_unlock(inode->i_sb);
2872 if (err)
2873 ret = err;
2874 }
2875 }
2876 }
2877 return ret;
2878
2879}
2880
2881static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
2882{
2883 return generic_block_bmap(as, block, reiserfs_bmap);
2884}
2885
2886static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2887 loff_t pos, unsigned len, unsigned copied,
2888 struct page *page, void *fsdata)
2889{
2890 struct inode *inode = page->mapping->host;
2891 int ret = 0;
2892 int update_sd = 0;
2893 struct reiserfs_transaction_handle *th;
2894 unsigned start;
2895 bool locked = false;
2896
2897 if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
2898 pos ++;
2899
2900 reiserfs_wait_on_write_block(inode->i_sb);
2901 if (reiserfs_transaction_running(inode->i_sb))
2902 th = current->journal_info;
2903 else
2904 th = NULL;
2905
2906 start = pos & (PAGE_SIZE - 1);
2907 if (unlikely(copied < len)) {
2908 if (!PageUptodate(page))
2909 copied = 0;
2910
2911 page_zero_new_buffers(page, start + copied, start + len);
2912 }
2913 flush_dcache_page(page);
2914
2915 reiserfs_commit_page(inode, page, start, start + copied);
2916
2917 /*
2918 * generic_commit_write does this for us, but does not update the
2919 * transaction tracking stuff when the size changes. So, we have
2920 * to do the i_size updates here.
2921 */
2922 if (pos + copied > inode->i_size) {
2923 struct reiserfs_transaction_handle myth;
2924 reiserfs_write_lock(inode->i_sb);
2925 locked = true;
2926 /*
2927 * If the file have grown beyond the border where it
2928 * can have a tail, unmark it as needing a tail
2929 * packing
2930 */
2931 if ((have_large_tails(inode->i_sb)
2932 && inode->i_size > i_block_size(inode) * 4)
2933 || (have_small_tails(inode->i_sb)
2934 && inode->i_size > i_block_size(inode)))
2935 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2936
2937 ret = journal_begin(&myth, inode->i_sb, 1);
2938 if (ret)
2939 goto journal_error;
2940
2941 reiserfs_update_inode_transaction(inode);
2942 inode->i_size = pos + copied;
2943 /*
2944 * this will just nest into our transaction. It's important
2945 * to use mark_inode_dirty so the inode gets pushed around on
2946 * the dirty lists, and so that O_SYNC works as expected
2947 */
2948 mark_inode_dirty(inode);
2949 reiserfs_update_sd(&myth, inode);
2950 update_sd = 1;
2951 ret = journal_end(&myth);
2952 if (ret)
2953 goto journal_error;
2954 }
2955 if (th) {
2956 if (!locked) {
2957 reiserfs_write_lock(inode->i_sb);
2958 locked = true;
2959 }
2960 if (!update_sd)
2961 mark_inode_dirty(inode);
2962 ret = reiserfs_end_persistent_transaction(th);
2963 if (ret)
2964 goto out;
2965 }
2966
2967out:
2968 if (locked)
2969 reiserfs_write_unlock(inode->i_sb);
2970 unlock_page(page);
2971 put_page(page);
2972
2973 if (pos + len > inode->i_size)
2974 reiserfs_truncate_failed_write(inode);
2975
2976 return ret == 0 ? copied : ret;
2977
2978journal_error:
2979 reiserfs_write_unlock(inode->i_sb);
2980 locked = false;
2981 if (th) {
2982 if (!update_sd)
2983 reiserfs_update_sd(th, inode);
2984 ret = reiserfs_end_persistent_transaction(th);
2985 }
2986 goto out;
2987}
2988
2989int reiserfs_commit_write(struct file *f, struct page *page,
2990 unsigned from, unsigned to)
2991{
2992 struct inode *inode = page->mapping->host;
2993 loff_t pos = ((loff_t) page->index << PAGE_SHIFT) + to;
2994 int ret = 0;
2995 int update_sd = 0;
2996 struct reiserfs_transaction_handle *th = NULL;
2997 int depth;
2998
2999 depth = reiserfs_write_unlock_nested(inode->i_sb);
3000 reiserfs_wait_on_write_block(inode->i_sb);
3001 reiserfs_write_lock_nested(inode->i_sb, depth);
3002
3003 if (reiserfs_transaction_running(inode->i_sb)) {
3004 th = current->journal_info;
3005 }
3006 reiserfs_commit_page(inode, page, from, to);
3007
3008 /*
3009 * generic_commit_write does this for us, but does not update the
3010 * transaction tracking stuff when the size changes. So, we have
3011 * to do the i_size updates here.
3012 */
3013 if (pos > inode->i_size) {
3014 struct reiserfs_transaction_handle myth;
3015 /*
3016 * If the file have grown beyond the border where it
3017 * can have a tail, unmark it as needing a tail
3018 * packing
3019 */
3020 if ((have_large_tails(inode->i_sb)
3021 && inode->i_size > i_block_size(inode) * 4)
3022 || (have_small_tails(inode->i_sb)
3023 && inode->i_size > i_block_size(inode)))
3024 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
3025
3026 ret = journal_begin(&myth, inode->i_sb, 1);
3027 if (ret)
3028 goto journal_error;
3029
3030 reiserfs_update_inode_transaction(inode);
3031 inode->i_size = pos;
3032 /*
3033 * this will just nest into our transaction. It's important
3034 * to use mark_inode_dirty so the inode gets pushed around
3035 * on the dirty lists, and so that O_SYNC works as expected
3036 */
3037 mark_inode_dirty(inode);
3038 reiserfs_update_sd(&myth, inode);
3039 update_sd = 1;
3040 ret = journal_end(&myth);
3041 if (ret)
3042 goto journal_error;
3043 }
3044 if (th) {
3045 if (!update_sd)
3046 mark_inode_dirty(inode);
3047 ret = reiserfs_end_persistent_transaction(th);
3048 if (ret)
3049 goto out;
3050 }
3051
3052out:
3053 return ret;
3054
3055journal_error:
3056 if (th) {
3057 if (!update_sd)
3058 reiserfs_update_sd(th, inode);
3059 ret = reiserfs_end_persistent_transaction(th);
3060 }
3061
3062 return ret;
3063}
3064
3065void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
3066{
3067 if (reiserfs_attrs(inode->i_sb)) {
3068 if (sd_attrs & REISERFS_SYNC_FL)
3069 inode->i_flags |= S_SYNC;
3070 else
3071 inode->i_flags &= ~S_SYNC;
3072 if (sd_attrs & REISERFS_IMMUTABLE_FL)
3073 inode->i_flags |= S_IMMUTABLE;
3074 else
3075 inode->i_flags &= ~S_IMMUTABLE;
3076 if (sd_attrs & REISERFS_APPEND_FL)
3077 inode->i_flags |= S_APPEND;
3078 else
3079 inode->i_flags &= ~S_APPEND;
3080 if (sd_attrs & REISERFS_NOATIME_FL)
3081 inode->i_flags |= S_NOATIME;
3082 else
3083 inode->i_flags &= ~S_NOATIME;
3084 if (sd_attrs & REISERFS_NOTAIL_FL)
3085 REISERFS_I(inode)->i_flags |= i_nopack_mask;
3086 else
3087 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
3088 }
3089}
3090
3091/*
3092 * decide if this buffer needs to stay around for data logging or ordered
3093 * write purposes
3094 */
3095static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
3096{
3097 int ret = 1;
3098 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
3099
3100 lock_buffer(bh);
3101 spin_lock(&j->j_dirty_buffers_lock);
3102 if (!buffer_mapped(bh)) {
3103 goto free_jh;
3104 }
3105 /*
3106 * the page is locked, and the only places that log a data buffer
3107 * also lock the page.
3108 */
3109 if (reiserfs_file_data_log(inode)) {
3110 /*
3111 * very conservative, leave the buffer pinned if
3112 * anyone might need it.
3113 */
3114 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
3115 ret = 0;
3116 }
3117 } else if (buffer_dirty(bh)) {
3118 struct reiserfs_journal_list *jl;
3119 struct reiserfs_jh *jh = bh->b_private;
3120
3121 /*
3122 * why is this safe?
3123 * reiserfs_setattr updates i_size in the on disk
3124 * stat data before allowing vmtruncate to be called.
3125 *
3126 * If buffer was put onto the ordered list for this
3127 * transaction, we know for sure either this transaction
3128 * or an older one already has updated i_size on disk,
3129 * and this ordered data won't be referenced in the file
3130 * if we crash.
3131 *
3132 * if the buffer was put onto the ordered list for an older
3133 * transaction, we need to leave it around
3134 */
3135 if (jh && (jl = jh->jl)
3136 && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
3137 ret = 0;
3138 }
3139free_jh:
3140 if (ret && bh->b_private) {
3141 reiserfs_free_jh(bh);
3142 }
3143 spin_unlock(&j->j_dirty_buffers_lock);
3144 unlock_buffer(bh);
3145 return ret;
3146}
3147
3148/* clm -- taken from fs/buffer.c:block_invalidate_page */
3149static void reiserfs_invalidatepage(struct page *page, unsigned int offset,
3150 unsigned int length)
3151{
3152 struct buffer_head *head, *bh, *next;
3153 struct inode *inode = page->mapping->host;
3154 unsigned int curr_off = 0;
3155 unsigned int stop = offset + length;
3156 int partial_page = (offset || length < PAGE_SIZE);
3157 int ret = 1;
3158
3159 BUG_ON(!PageLocked(page));
3160
3161 if (!partial_page)
3162 ClearPageChecked(page);
3163
3164 if (!page_has_buffers(page))
3165 goto out;
3166
3167 head = page_buffers(page);
3168 bh = head;
3169 do {
3170 unsigned int next_off = curr_off + bh->b_size;
3171 next = bh->b_this_page;
3172
3173 if (next_off > stop)
3174 goto out;
3175
3176 /*
3177 * is this block fully invalidated?
3178 */
3179 if (offset <= curr_off) {
3180 if (invalidatepage_can_drop(inode, bh))
3181 reiserfs_unmap_buffer(bh);
3182 else
3183 ret = 0;
3184 }
3185 curr_off = next_off;
3186 bh = next;
3187 } while (bh != head);
3188
3189 /*
3190 * We release buffers only if the entire page is being invalidated.
3191 * The get_block cached value has been unconditionally invalidated,
3192 * so real IO is not possible anymore.
3193 */
3194 if (!partial_page && ret) {
3195 ret = try_to_release_page(page, 0);
3196 /* maybe should BUG_ON(!ret); - neilb */
3197 }
3198out:
3199 return;
3200}
3201
3202static int reiserfs_set_page_dirty(struct page *page)
3203{
3204 struct inode *inode = page->mapping->host;
3205 if (reiserfs_file_data_log(inode)) {
3206 SetPageChecked(page);
3207 return __set_page_dirty_nobuffers(page);
3208 }
3209 return __set_page_dirty_buffers(page);
3210}
3211
3212/*
3213 * Returns 1 if the page's buffers were dropped. The page is locked.
3214 *
3215 * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
3216 * in the buffers at page_buffers(page).
3217 *
3218 * even in -o notail mode, we can't be sure an old mount without -o notail
3219 * didn't create files with tails.
3220 */
3221static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3222{
3223 struct inode *inode = page->mapping->host;
3224 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
3225 struct buffer_head *head;
3226 struct buffer_head *bh;
3227 int ret = 1;
3228
3229 WARN_ON(PageChecked(page));
3230 spin_lock(&j->j_dirty_buffers_lock);
3231 head = page_buffers(page);
3232 bh = head;
3233 do {
3234 if (bh->b_private) {
3235 if (!buffer_dirty(bh) && !buffer_locked(bh)) {
3236 reiserfs_free_jh(bh);
3237 } else {
3238 ret = 0;
3239 break;
3240 }
3241 }
3242 bh = bh->b_this_page;
3243 } while (bh != head);
3244 if (ret)
3245 ret = try_to_free_buffers(page);
3246 spin_unlock(&j->j_dirty_buffers_lock);
3247 return ret;
3248}
3249
3250/*
3251 * We thank Mingming Cao for helping us understand in great detail what
3252 * to do in this section of the code.
3253 */
3254static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
3255{
3256 struct file *file = iocb->ki_filp;
3257 struct inode *inode = file->f_mapping->host;
3258 size_t count = iov_iter_count(iter);
3259 ssize_t ret;
3260
3261 ret = blockdev_direct_IO(iocb, inode, iter,
3262 reiserfs_get_blocks_direct_io);
3263
3264 /*
3265 * In case of error extending write may have instantiated a few
3266 * blocks outside i_size. Trim these off again.
3267 */
3268 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
3269 loff_t isize = i_size_read(inode);
3270 loff_t end = iocb->ki_pos + count;
3271
3272 if ((end > isize) && inode_newsize_ok(inode, isize) == 0) {
3273 truncate_setsize(inode, isize);
3274 reiserfs_vfs_truncate_file(inode);
3275 }
3276 }
3277
3278 return ret;
3279}
3280
3281int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3282{
3283 struct inode *inode = d_inode(dentry);
3284 unsigned int ia_valid;
3285 int error;
3286
3287 error = setattr_prepare(dentry, attr);
3288 if (error)
3289 return error;
3290
3291 /* must be turned off for recursive notify_change calls */
3292 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3293
3294 if (is_quota_modification(inode, attr)) {
3295 error = dquot_initialize(inode);
3296 if (error)
3297 return error;
3298 }
3299 reiserfs_write_lock(inode->i_sb);
3300 if (attr->ia_valid & ATTR_SIZE) {
3301 /*
3302 * version 2 items will be caught by the s_maxbytes check
3303 * done for us in vmtruncate
3304 */
3305 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
3306 attr->ia_size > MAX_NON_LFS) {
3307 reiserfs_write_unlock(inode->i_sb);
3308 error = -EFBIG;
3309 goto out;
3310 }
3311
3312 inode_dio_wait(inode);
3313
3314 /* fill in hole pointers in the expanding truncate case. */
3315 if (attr->ia_size > inode->i_size) {
3316 error = generic_cont_expand_simple(inode, attr->ia_size);
3317 if (REISERFS_I(inode)->i_prealloc_count > 0) {
3318 int err;
3319 struct reiserfs_transaction_handle th;
3320 /* we're changing at most 2 bitmaps, inode + super */
3321 err = journal_begin(&th, inode->i_sb, 4);
3322 if (!err) {
3323 reiserfs_discard_prealloc(&th, inode);
3324 err = journal_end(&th);
3325 }
3326 if (err)
3327 error = err;
3328 }
3329 if (error) {
3330 reiserfs_write_unlock(inode->i_sb);
3331 goto out;
3332 }
3333 /*
3334 * file size is changed, ctime and mtime are
3335 * to be updated
3336 */
3337 attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME);
3338 }
3339 }
3340 reiserfs_write_unlock(inode->i_sb);
3341
3342 if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) ||
3343 ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) &&
3344 (get_inode_sd_version(inode) == STAT_DATA_V1)) {
3345 /* stat data of format v3.5 has 16 bit uid and gid */
3346 error = -EINVAL;
3347 goto out;
3348 }
3349
3350 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
3351 (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
3352 struct reiserfs_transaction_handle th;
3353 int jbegin_count =
3354 2 *
3355 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
3356 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
3357 2;
3358
3359 error = reiserfs_chown_xattrs(inode, attr);
3360
3361 if (error)
3362 return error;
3363
3364 /*
3365 * (user+group)*(old+new) structure - we count quota
3366 * info and , inode write (sb, inode)
3367 */
3368 reiserfs_write_lock(inode->i_sb);
3369 error = journal_begin(&th, inode->i_sb, jbegin_count);
3370 reiserfs_write_unlock(inode->i_sb);
3371 if (error)
3372 goto out;
3373 error = dquot_transfer(inode, attr);
3374 reiserfs_write_lock(inode->i_sb);
3375 if (error) {
3376 journal_end(&th);
3377 reiserfs_write_unlock(inode->i_sb);
3378 goto out;
3379 }
3380
3381 /*
3382 * Update corresponding info in inode so that everything
3383 * is in one transaction
3384 */
3385 if (attr->ia_valid & ATTR_UID)
3386 inode->i_uid = attr->ia_uid;
3387 if (attr->ia_valid & ATTR_GID)
3388 inode->i_gid = attr->ia_gid;
3389 mark_inode_dirty(inode);
3390 error = journal_end(&th);
3391 reiserfs_write_unlock(inode->i_sb);
3392 if (error)
3393 goto out;
3394 }
3395
3396 if ((attr->ia_valid & ATTR_SIZE) &&
3397 attr->ia_size != i_size_read(inode)) {
3398 error = inode_newsize_ok(inode, attr->ia_size);
3399 if (!error) {
3400 /*
3401 * Could race against reiserfs_file_release
3402 * if called from NFS, so take tailpack mutex.
3403 */
3404 mutex_lock(&REISERFS_I(inode)->tailpack);
3405 truncate_setsize(inode, attr->ia_size);
3406 reiserfs_truncate_file(inode, 1);
3407 mutex_unlock(&REISERFS_I(inode)->tailpack);
3408 }
3409 }
3410
3411 if (!error) {
3412 setattr_copy(inode, attr);
3413 mark_inode_dirty(inode);
3414 }
3415
3416 if (!error && reiserfs_posixacl(inode->i_sb)) {
3417 if (attr->ia_valid & ATTR_MODE)
3418 error = reiserfs_acl_chmod(inode);
3419 }
3420
3421out:
3422 return error;
3423}
3424
3425const struct address_space_operations reiserfs_address_space_operations = {
3426 .writepage = reiserfs_writepage,
3427 .readpage = reiserfs_readpage,
3428 .readpages = reiserfs_readpages,
3429 .releasepage = reiserfs_releasepage,
3430 .invalidatepage = reiserfs_invalidatepage,
3431 .write_begin = reiserfs_write_begin,
3432 .write_end = reiserfs_write_end,
3433 .bmap = reiserfs_aop_bmap,
3434 .direct_IO = reiserfs_direct_IO,
3435 .set_page_dirty = reiserfs_set_page_dirty,
3436};
1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5#include <linux/time.h>
6#include <linux/fs.h>
7#include "reiserfs.h"
8#include "acl.h"
9#include "xattr.h"
10#include <linux/exportfs.h>
11#include <linux/pagemap.h>
12#include <linux/highmem.h>
13#include <linux/slab.h>
14#include <asm/uaccess.h>
15#include <asm/unaligned.h>
16#include <linux/buffer_head.h>
17#include <linux/mpage.h>
18#include <linux/writeback.h>
19#include <linux/quotaops.h>
20#include <linux/swap.h>
21#include <linux/aio.h>
22
23int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to);
25
26void reiserfs_evict_inode(struct inode *inode)
27{
28 /* We need blocks for transaction + (user+group) quota update (possibly delete) */
29 int jbegin_count =
30 JOURNAL_PER_BALANCE_CNT * 2 +
31 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
32 struct reiserfs_transaction_handle th;
33 int err;
34
35 if (!inode->i_nlink && !is_bad_inode(inode))
36 dquot_initialize(inode);
37
38 truncate_inode_pages_final(&inode->i_data);
39 if (inode->i_nlink)
40 goto no_delete;
41
42 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
43 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
44
45 reiserfs_delete_xattrs(inode);
46
47 reiserfs_write_lock(inode->i_sb);
48
49 if (journal_begin(&th, inode->i_sb, jbegin_count))
50 goto out;
51 reiserfs_update_inode_transaction(inode);
52
53 reiserfs_discard_prealloc(&th, inode);
54
55 err = reiserfs_delete_object(&th, inode);
56
57 /* Do quota update inside a transaction for journaled quotas. We must do that
58 * after delete_object so that quota updates go into the same transaction as
59 * stat data deletion */
60 if (!err) {
61 int depth = reiserfs_write_unlock_nested(inode->i_sb);
62 dquot_free_inode(inode);
63 reiserfs_write_lock_nested(inode->i_sb, depth);
64 }
65
66 if (journal_end(&th, inode->i_sb, jbegin_count))
67 goto out;
68
69 /* check return value from reiserfs_delete_object after
70 * ending the transaction
71 */
72 if (err)
73 goto out;
74
75 /* all items of file are deleted, so we can remove "save" link */
76 remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything
77 * about an error here */
78out:
79 reiserfs_write_unlock(inode->i_sb);
80 } else {
81 /* no object items are in the tree */
82 ;
83 }
84 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
85 dquot_drop(inode);
86 inode->i_blocks = 0;
87 return;
88
89no_delete:
90 clear_inode(inode);
91 dquot_drop(inode);
92}
93
94static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
95 __u32 objectid, loff_t offset, int type, int length)
96{
97 key->version = version;
98
99 key->on_disk_key.k_dir_id = dirid;
100 key->on_disk_key.k_objectid = objectid;
101 set_cpu_key_k_offset(key, offset);
102 set_cpu_key_k_type(key, type);
103 key->key_length = length;
104}
105
106/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
107 offset and type of key */
108void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
109 int type, int length)
110{
111 _make_cpu_key(key, get_inode_item_key_version(inode),
112 le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
113 le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
114 length);
115}
116
117//
118// when key is 0, do not set version and short key
119//
120inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
121 int version,
122 loff_t offset, int type, int length,
123 int entry_count /*or ih_free_space */ )
124{
125 if (key) {
126 ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
127 ih->ih_key.k_objectid =
128 cpu_to_le32(key->on_disk_key.k_objectid);
129 }
130 put_ih_version(ih, version);
131 set_le_ih_k_offset(ih, offset);
132 set_le_ih_k_type(ih, type);
133 put_ih_item_len(ih, length);
134 /* set_ih_free_space (ih, 0); */
135 // for directory items it is entry count, for directs and stat
136 // datas - 0xffff, for indirects - 0
137 put_ih_entry_count(ih, entry_count);
138}
139
140//
141// FIXME: we might cache recently accessed indirect item
142
143// Ugh. Not too eager for that....
144// I cut the code until such time as I see a convincing argument (benchmark).
145// I don't want a bloated inode struct..., and I don't like code complexity....
146
147/* cutting the code is fine, since it really isn't in use yet and is easy
148** to add back in. But, Vladimir has a really good idea here. Think
149** about what happens for reading a file. For each page,
150** The VFS layer calls reiserfs_readpage, who searches the tree to find
151** an indirect item. This indirect item has X number of pointers, where
152** X is a big number if we've done the block allocation right. But,
153** we only use one or two of these pointers during each call to readpage,
154** needlessly researching again later on.
155**
156** The size of the cache could be dynamic based on the size of the file.
157**
158** I'd also like to see us cache the location the stat data item, since
159** we are needlessly researching for that frequently.
160**
161** --chris
162*/
163
164/* If this page has a file tail in it, and
165** it was read in by get_block_create_0, the page data is valid,
166** but tail is still sitting in a direct item, and we can't write to
167** it. So, look through this page, and check all the mapped buffers
168** to make sure they have valid block numbers. Any that don't need
169** to be unmapped, so that __block_write_begin will correctly call
170** reiserfs_get_block to convert the tail into an unformatted node
171*/
172static inline void fix_tail_page_for_writing(struct page *page)
173{
174 struct buffer_head *head, *next, *bh;
175
176 if (page && page_has_buffers(page)) {
177 head = page_buffers(page);
178 bh = head;
179 do {
180 next = bh->b_this_page;
181 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
182 reiserfs_unmap_buffer(bh);
183 }
184 bh = next;
185 } while (bh != head);
186 }
187}
188
189/* reiserfs_get_block does not need to allocate a block only if it has been
190 done already or non-hole position has been found in the indirect item */
191static inline int allocation_needed(int retval, b_blocknr_t allocated,
192 struct item_head *ih,
193 __le32 * item, int pos_in_item)
194{
195 if (allocated)
196 return 0;
197 if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
198 get_block_num(item, pos_in_item))
199 return 0;
200 return 1;
201}
202
203static inline int indirect_item_found(int retval, struct item_head *ih)
204{
205 return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
206}
207
208static inline void set_block_dev_mapped(struct buffer_head *bh,
209 b_blocknr_t block, struct inode *inode)
210{
211 map_bh(bh, inode->i_sb, block);
212}
213
214//
215// files which were created in the earlier version can not be longer,
216// than 2 gb
217//
218static int file_capable(struct inode *inode, sector_t block)
219{
220 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file.
221 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
222 return 1;
223
224 return 0;
225}
226
227static int restart_transaction(struct reiserfs_transaction_handle *th,
228 struct inode *inode, struct treepath *path)
229{
230 struct super_block *s = th->t_super;
231 int len = th->t_blocks_allocated;
232 int err;
233
234 BUG_ON(!th->t_trans_id);
235 BUG_ON(!th->t_refcount);
236
237 pathrelse(path);
238
239 /* we cannot restart while nested */
240 if (th->t_refcount > 1) {
241 return 0;
242 }
243 reiserfs_update_sd(th, inode);
244 err = journal_end(th, s, len);
245 if (!err) {
246 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
247 if (!err)
248 reiserfs_update_inode_transaction(inode);
249 }
250 return err;
251}
252
253// it is called by get_block when create == 0. Returns block number
254// for 'block'-th logical block of file. When it hits direct item it
255// returns 0 (being called from bmap) or read direct item into piece
256// of page (bh_result)
257
258// Please improve the english/clarity in the comment above, as it is
259// hard to understand.
260
261static int _get_block_create_0(struct inode *inode, sector_t block,
262 struct buffer_head *bh_result, int args)
263{
264 INITIALIZE_PATH(path);
265 struct cpu_key key;
266 struct buffer_head *bh;
267 struct item_head *ih, tmp_ih;
268 b_blocknr_t blocknr;
269 char *p = NULL;
270 int chars;
271 int ret;
272 int result;
273 int done = 0;
274 unsigned long offset;
275
276 // prepare the key to look for the 'block'-th block of file
277 make_cpu_key(&key, inode,
278 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
279 3);
280
281 result = search_for_position_by_key(inode->i_sb, &key, &path);
282 if (result != POSITION_FOUND) {
283 pathrelse(&path);
284 if (p)
285 kunmap(bh_result->b_page);
286 if (result == IO_ERROR)
287 return -EIO;
288 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
289 // That there is some MMAPED data associated with it that is yet to be written to disk.
290 if ((args & GET_BLOCK_NO_HOLE)
291 && !PageUptodate(bh_result->b_page)) {
292 return -ENOENT;
293 }
294 return 0;
295 }
296 //
297 bh = get_last_bh(&path);
298 ih = get_ih(&path);
299 if (is_indirect_le_ih(ih)) {
300 __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih);
301
302 /* FIXME: here we could cache indirect item or part of it in
303 the inode to avoid search_by_key in case of subsequent
304 access to file */
305 blocknr = get_block_num(ind_item, path.pos_in_item);
306 ret = 0;
307 if (blocknr) {
308 map_bh(bh_result, inode->i_sb, blocknr);
309 if (path.pos_in_item ==
310 ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
311 set_buffer_boundary(bh_result);
312 }
313 } else
314 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
315 // That there is some MMAPED data associated with it that is yet to be written to disk.
316 if ((args & GET_BLOCK_NO_HOLE)
317 && !PageUptodate(bh_result->b_page)) {
318 ret = -ENOENT;
319 }
320
321 pathrelse(&path);
322 if (p)
323 kunmap(bh_result->b_page);
324 return ret;
325 }
326 // requested data are in direct item(s)
327 if (!(args & GET_BLOCK_READ_DIRECT)) {
328 // we are called by bmap. FIXME: we can not map block of file
329 // when it is stored in direct item(s)
330 pathrelse(&path);
331 if (p)
332 kunmap(bh_result->b_page);
333 return -ENOENT;
334 }
335
336 /* if we've got a direct item, and the buffer or page was uptodate,
337 ** we don't want to pull data off disk again. skip to the
338 ** end, where we map the buffer and return
339 */
340 if (buffer_uptodate(bh_result)) {
341 goto finished;
342 } else
343 /*
344 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
345 ** pages without any buffers. If the page is up to date, we don't want
346 ** read old data off disk. Set the up to date bit on the buffer instead
347 ** and jump to the end
348 */
349 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
350 set_buffer_uptodate(bh_result);
351 goto finished;
352 }
353 // read file tail into part of page
354 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
355 copy_item_head(&tmp_ih, ih);
356
357 /* we only want to kmap if we are reading the tail into the page.
358 ** this is not the common case, so we don't kmap until we are
359 ** sure we need to. But, this means the item might move if
360 ** kmap schedules
361 */
362 if (!p)
363 p = (char *)kmap(bh_result->b_page);
364
365 p += offset;
366 memset(p, 0, inode->i_sb->s_blocksize);
367 do {
368 if (!is_direct_le_ih(ih)) {
369 BUG();
370 }
371 /* make sure we don't read more bytes than actually exist in
372 ** the file. This can happen in odd cases where i_size isn't
373 ** correct, and when direct item padding results in a few
374 ** extra bytes at the end of the direct item
375 */
376 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
377 break;
378 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
379 chars =
380 inode->i_size - (le_ih_k_offset(ih) - 1) -
381 path.pos_in_item;
382 done = 1;
383 } else {
384 chars = ih_item_len(ih) - path.pos_in_item;
385 }
386 memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars);
387
388 if (done)
389 break;
390
391 p += chars;
392
393 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
394 // we done, if read direct item is not the last item of
395 // node FIXME: we could try to check right delimiting key
396 // to see whether direct item continues in the right
397 // neighbor or rely on i_size
398 break;
399
400 // update key to look for the next piece
401 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
402 result = search_for_position_by_key(inode->i_sb, &key, &path);
403 if (result != POSITION_FOUND)
404 // i/o error most likely
405 break;
406 bh = get_last_bh(&path);
407 ih = get_ih(&path);
408 } while (1);
409
410 flush_dcache_page(bh_result->b_page);
411 kunmap(bh_result->b_page);
412
413 finished:
414 pathrelse(&path);
415
416 if (result == IO_ERROR)
417 return -EIO;
418
419 /* this buffer has valid data, but isn't valid for io. mapping it to
420 * block #0 tells the rest of reiserfs it just has a tail in it
421 */
422 map_bh(bh_result, inode->i_sb, 0);
423 set_buffer_uptodate(bh_result);
424 return 0;
425}
426
427// this is called to create file map. So, _get_block_create_0 will not
428// read direct item
429static int reiserfs_bmap(struct inode *inode, sector_t block,
430 struct buffer_head *bh_result, int create)
431{
432 if (!file_capable(inode, block))
433 return -EFBIG;
434
435 reiserfs_write_lock(inode->i_sb);
436 /* do not read the direct item */
437 _get_block_create_0(inode, block, bh_result, 0);
438 reiserfs_write_unlock(inode->i_sb);
439 return 0;
440}
441
442/* special version of get_block that is only used by grab_tail_page right
443** now. It is sent to __block_write_begin, and when you try to get a
444** block past the end of the file (or a block from a hole) it returns
445** -ENOENT instead of a valid buffer. __block_write_begin expects to
446** be able to do i/o on the buffers returned, unless an error value
447** is also returned.
448**
449** So, this allows __block_write_begin to be used for reading a single block
450** in a page. Where it does not produce a valid page for holes, or past the
451** end of the file. This turns out to be exactly what we need for reading
452** tails for conversion.
453**
454** The point of the wrapper is forcing a certain value for create, even
455** though the VFS layer is calling this function with create==1. If you
456** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
457** don't use this function.
458*/
459static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
460 struct buffer_head *bh_result,
461 int create)
462{
463 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
464}
465
466/* This is special helper for reiserfs_get_block in case we are executing
467 direct_IO request. */
468static int reiserfs_get_blocks_direct_io(struct inode *inode,
469 sector_t iblock,
470 struct buffer_head *bh_result,
471 int create)
472{
473 int ret;
474
475 bh_result->b_page = NULL;
476
477 /* We set the b_size before reiserfs_get_block call since it is
478 referenced in convert_tail_for_hole() that may be called from
479 reiserfs_get_block() */
480 bh_result->b_size = (1 << inode->i_blkbits);
481
482 ret = reiserfs_get_block(inode, iblock, bh_result,
483 create | GET_BLOCK_NO_DANGLE);
484 if (ret)
485 goto out;
486
487 /* don't allow direct io onto tail pages */
488 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
489 /* make sure future calls to the direct io funcs for this offset
490 ** in the file fail by unmapping the buffer
491 */
492 clear_buffer_mapped(bh_result);
493 ret = -EINVAL;
494 }
495 /* Possible unpacked tail. Flush the data before pages have
496 disappeared */
497 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
498 int err;
499
500 reiserfs_write_lock(inode->i_sb);
501
502 err = reiserfs_commit_for_inode(inode);
503 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
504
505 reiserfs_write_unlock(inode->i_sb);
506
507 if (err < 0)
508 ret = err;
509 }
510 out:
511 return ret;
512}
513
514/*
515** helper function for when reiserfs_get_block is called for a hole
516** but the file tail is still in a direct item
517** bh_result is the buffer head for the hole
518** tail_offset is the offset of the start of the tail in the file
519**
520** This calls prepare_write, which will start a new transaction
521** you should not be in a transaction, or have any paths held when you
522** call this.
523*/
524static int convert_tail_for_hole(struct inode *inode,
525 struct buffer_head *bh_result,
526 loff_t tail_offset)
527{
528 unsigned long index;
529 unsigned long tail_end;
530 unsigned long tail_start;
531 struct page *tail_page;
532 struct page *hole_page = bh_result->b_page;
533 int retval = 0;
534
535 if ((tail_offset & (bh_result->b_size - 1)) != 1)
536 return -EIO;
537
538 /* always try to read until the end of the block */
539 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
540 tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
541
542 index = tail_offset >> PAGE_CACHE_SHIFT;
543 /* hole_page can be zero in case of direct_io, we are sure
544 that we cannot get here if we write with O_DIRECT into
545 tail page */
546 if (!hole_page || index != hole_page->index) {
547 tail_page = grab_cache_page(inode->i_mapping, index);
548 retval = -ENOMEM;
549 if (!tail_page) {
550 goto out;
551 }
552 } else {
553 tail_page = hole_page;
554 }
555
556 /* we don't have to make sure the conversion did not happen while
557 ** we were locking the page because anyone that could convert
558 ** must first take i_mutex.
559 **
560 ** We must fix the tail page for writing because it might have buffers
561 ** that are mapped, but have a block number of 0. This indicates tail
562 ** data that has been read directly into the page, and
563 ** __block_write_begin won't trigger a get_block in this case.
564 */
565 fix_tail_page_for_writing(tail_page);
566 retval = __reiserfs_write_begin(tail_page, tail_start,
567 tail_end - tail_start);
568 if (retval)
569 goto unlock;
570
571 /* tail conversion might change the data in the page */
572 flush_dcache_page(tail_page);
573
574 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
575
576 unlock:
577 if (tail_page != hole_page) {
578 unlock_page(tail_page);
579 page_cache_release(tail_page);
580 }
581 out:
582 return retval;
583}
584
585static inline int _allocate_block(struct reiserfs_transaction_handle *th,
586 sector_t block,
587 struct inode *inode,
588 b_blocknr_t * allocated_block_nr,
589 struct treepath *path, int flags)
590{
591 BUG_ON(!th->t_trans_id);
592
593#ifdef REISERFS_PREALLOCATE
594 if (!(flags & GET_BLOCK_NO_IMUX)) {
595 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
596 path, block);
597 }
598#endif
599 return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
600 block);
601}
602
603int reiserfs_get_block(struct inode *inode, sector_t block,
604 struct buffer_head *bh_result, int create)
605{
606 int repeat, retval = 0;
607 b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int
608 INITIALIZE_PATH(path);
609 int pos_in_item;
610 struct cpu_key key;
611 struct buffer_head *bh, *unbh = NULL;
612 struct item_head *ih, tmp_ih;
613 __le32 *item;
614 int done;
615 int fs_gen;
616 struct reiserfs_transaction_handle *th = NULL;
617 /* space reserved in transaction batch:
618 . 3 balancings in direct->indirect conversion
619 . 1 block involved into reiserfs_update_sd()
620 XXX in practically impossible worst case direct2indirect()
621 can incur (much) more than 3 balancings.
622 quota update for user, group */
623 int jbegin_count =
624 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
625 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
626 int version;
627 int dangle = 1;
628 loff_t new_offset =
629 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
630
631 reiserfs_write_lock(inode->i_sb);
632 version = get_inode_item_key_version(inode);
633
634 if (!file_capable(inode, block)) {
635 reiserfs_write_unlock(inode->i_sb);
636 return -EFBIG;
637 }
638
639 /* if !create, we aren't changing the FS, so we don't need to
640 ** log anything, so we don't need to start a transaction
641 */
642 if (!(create & GET_BLOCK_CREATE)) {
643 int ret;
644 /* find number of block-th logical block of the file */
645 ret = _get_block_create_0(inode, block, bh_result,
646 create | GET_BLOCK_READ_DIRECT);
647 reiserfs_write_unlock(inode->i_sb);
648 return ret;
649 }
650 /*
651 * if we're already in a transaction, make sure to close
652 * any new transactions we start in this func
653 */
654 if ((create & GET_BLOCK_NO_DANGLE) ||
655 reiserfs_transaction_running(inode->i_sb))
656 dangle = 0;
657
658 /* If file is of such a size, that it might have a tail and tails are enabled
659 ** we should mark it as possibly needing tail packing on close
660 */
661 if ((have_large_tails(inode->i_sb)
662 && inode->i_size < i_block_size(inode) * 4)
663 || (have_small_tails(inode->i_sb)
664 && inode->i_size < i_block_size(inode)))
665 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
666
667 /* set the key of the first byte in the 'block'-th block of file */
668 make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
669 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
670 start_trans:
671 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
672 if (!th) {
673 retval = -ENOMEM;
674 goto failure;
675 }
676 reiserfs_update_inode_transaction(inode);
677 }
678 research:
679
680 retval = search_for_position_by_key(inode->i_sb, &key, &path);
681 if (retval == IO_ERROR) {
682 retval = -EIO;
683 goto failure;
684 }
685
686 bh = get_last_bh(&path);
687 ih = get_ih(&path);
688 item = get_item(&path);
689 pos_in_item = path.pos_in_item;
690
691 fs_gen = get_generation(inode->i_sb);
692 copy_item_head(&tmp_ih, ih);
693
694 if (allocation_needed
695 (retval, allocated_block_nr, ih, item, pos_in_item)) {
696 /* we have to allocate block for the unformatted node */
697 if (!th) {
698 pathrelse(&path);
699 goto start_trans;
700 }
701
702 repeat =
703 _allocate_block(th, block, inode, &allocated_block_nr,
704 &path, create);
705
706 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
707 /* restart the transaction to give the journal a chance to free
708 ** some blocks. releases the path, so we have to go back to
709 ** research if we succeed on the second try
710 */
711 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
712 retval = restart_transaction(th, inode, &path);
713 if (retval)
714 goto failure;
715 repeat =
716 _allocate_block(th, block, inode,
717 &allocated_block_nr, NULL, create);
718
719 if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
720 goto research;
721 }
722 if (repeat == QUOTA_EXCEEDED)
723 retval = -EDQUOT;
724 else
725 retval = -ENOSPC;
726 goto failure;
727 }
728
729 if (fs_changed(fs_gen, inode->i_sb)
730 && item_moved(&tmp_ih, &path)) {
731 goto research;
732 }
733 }
734
735 if (indirect_item_found(retval, ih)) {
736 b_blocknr_t unfm_ptr;
737 /* 'block'-th block is in the file already (there is
738 corresponding cell in some indirect item). But it may be
739 zero unformatted node pointer (hole) */
740 unfm_ptr = get_block_num(item, pos_in_item);
741 if (unfm_ptr == 0) {
742 /* use allocated block to plug the hole */
743 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
744 if (fs_changed(fs_gen, inode->i_sb)
745 && item_moved(&tmp_ih, &path)) {
746 reiserfs_restore_prepared_buffer(inode->i_sb,
747 bh);
748 goto research;
749 }
750 set_buffer_new(bh_result);
751 if (buffer_dirty(bh_result)
752 && reiserfs_data_ordered(inode->i_sb))
753 reiserfs_add_ordered_list(inode, bh_result);
754 put_block_num(item, pos_in_item, allocated_block_nr);
755 unfm_ptr = allocated_block_nr;
756 journal_mark_dirty(th, inode->i_sb, bh);
757 reiserfs_update_sd(th, inode);
758 }
759 set_block_dev_mapped(bh_result, unfm_ptr, inode);
760 pathrelse(&path);
761 retval = 0;
762 if (!dangle && th)
763 retval = reiserfs_end_persistent_transaction(th);
764
765 reiserfs_write_unlock(inode->i_sb);
766
767 /* the item was found, so new blocks were not added to the file
768 ** there is no need to make sure the inode is updated with this
769 ** transaction
770 */
771 return retval;
772 }
773
774 if (!th) {
775 pathrelse(&path);
776 goto start_trans;
777 }
778
779 /* desired position is not found or is in the direct item. We have
780 to append file with holes up to 'block'-th block converting
781 direct items to indirect one if necessary */
782 done = 0;
783 do {
784 if (is_statdata_le_ih(ih)) {
785 __le32 unp = 0;
786 struct cpu_key tmp_key;
787
788 /* indirect item has to be inserted */
789 make_le_item_head(&tmp_ih, &key, version, 1,
790 TYPE_INDIRECT, UNFM_P_SIZE,
791 0 /* free_space */ );
792
793 if (cpu_key_k_offset(&key) == 1) {
794 /* we are going to add 'block'-th block to the file. Use
795 allocated block for that */
796 unp = cpu_to_le32(allocated_block_nr);
797 set_block_dev_mapped(bh_result,
798 allocated_block_nr, inode);
799 set_buffer_new(bh_result);
800 done = 1;
801 }
802 tmp_key = key; // ;)
803 set_cpu_key_k_offset(&tmp_key, 1);
804 PATH_LAST_POSITION(&path)++;
805
806 retval =
807 reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
808 inode, (char *)&unp);
809 if (retval) {
810 reiserfs_free_block(th, inode,
811 allocated_block_nr, 1);
812 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
813 }
814 //mark_tail_converted (inode);
815 } else if (is_direct_le_ih(ih)) {
816 /* direct item has to be converted */
817 loff_t tail_offset;
818
819 tail_offset =
820 ((le_ih_k_offset(ih) -
821 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
822 if (tail_offset == cpu_key_k_offset(&key)) {
823 /* direct item we just found fits into block we have
824 to map. Convert it into unformatted node: use
825 bh_result for the conversion */
826 set_block_dev_mapped(bh_result,
827 allocated_block_nr, inode);
828 unbh = bh_result;
829 done = 1;
830 } else {
831 /* we have to padd file tail stored in direct item(s)
832 up to block size and convert it to unformatted
833 node. FIXME: this should also get into page cache */
834
835 pathrelse(&path);
836 /*
837 * ugly, but we can only end the transaction if
838 * we aren't nested
839 */
840 BUG_ON(!th->t_refcount);
841 if (th->t_refcount == 1) {
842 retval =
843 reiserfs_end_persistent_transaction
844 (th);
845 th = NULL;
846 if (retval)
847 goto failure;
848 }
849
850 retval =
851 convert_tail_for_hole(inode, bh_result,
852 tail_offset);
853 if (retval) {
854 if (retval != -ENOSPC)
855 reiserfs_error(inode->i_sb,
856 "clm-6004",
857 "convert tail failed "
858 "inode %lu, error %d",
859 inode->i_ino,
860 retval);
861 if (allocated_block_nr) {
862 /* the bitmap, the super, and the stat data == 3 */
863 if (!th)
864 th = reiserfs_persistent_transaction(inode->i_sb, 3);
865 if (th)
866 reiserfs_free_block(th,
867 inode,
868 allocated_block_nr,
869 1);
870 }
871 goto failure;
872 }
873 goto research;
874 }
875 retval =
876 direct2indirect(th, inode, &path, unbh,
877 tail_offset);
878 if (retval) {
879 reiserfs_unmap_buffer(unbh);
880 reiserfs_free_block(th, inode,
881 allocated_block_nr, 1);
882 goto failure;
883 }
884 /* it is important the set_buffer_uptodate is done after
885 ** the direct2indirect. The buffer might contain valid
886 ** data newer than the data on disk (read by readpage, changed,
887 ** and then sent here by writepage). direct2indirect needs
888 ** to know if unbh was already up to date, so it can decide
889 ** if the data in unbh needs to be replaced with data from
890 ** the disk
891 */
892 set_buffer_uptodate(unbh);
893
894 /* unbh->b_page == NULL in case of DIRECT_IO request, this means
895 buffer will disappear shortly, so it should not be added to
896 */
897 if (unbh->b_page) {
898 /* we've converted the tail, so we must
899 ** flush unbh before the transaction commits
900 */
901 reiserfs_add_tail_list(inode, unbh);
902
903 /* mark it dirty now to prevent commit_write from adding
904 ** this buffer to the inode's dirty buffer list
905 */
906 /*
907 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
908 * It's still atomic, but it sets the page dirty too,
909 * which makes it eligible for writeback at any time by the
910 * VM (which was also the case with __mark_buffer_dirty())
911 */
912 mark_buffer_dirty(unbh);
913 }
914 } else {
915 /* append indirect item with holes if needed, when appending
916 pointer to 'block'-th block use block, which is already
917 allocated */
918 struct cpu_key tmp_key;
919 unp_t unf_single = 0; // We use this in case we need to allocate only
920 // one block which is a fastpath
921 unp_t *un;
922 __u64 max_to_insert =
923 MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
924 UNFM_P_SIZE;
925 __u64 blocks_needed;
926
927 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
928 "vs-804: invalid position for append");
929 /* indirect item has to be appended, set up key of that position */
930 make_cpu_key(&tmp_key, inode,
931 le_key_k_offset(version,
932 &(ih->ih_key)) +
933 op_bytes_number(ih,
934 inode->i_sb->s_blocksize),
935 //pos_in_item * inode->i_sb->s_blocksize,
936 TYPE_INDIRECT, 3); // key type is unimportant
937
938 RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
939 "green-805: invalid offset");
940 blocks_needed =
941 1 +
942 ((cpu_key_k_offset(&key) -
943 cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
944 s_blocksize_bits);
945
946 if (blocks_needed == 1) {
947 un = &unf_single;
948 } else {
949 un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
950 if (!un) {
951 un = &unf_single;
952 blocks_needed = 1;
953 max_to_insert = 0;
954 }
955 }
956 if (blocks_needed <= max_to_insert) {
957 /* we are going to add target block to the file. Use allocated
958 block for that */
959 un[blocks_needed - 1] =
960 cpu_to_le32(allocated_block_nr);
961 set_block_dev_mapped(bh_result,
962 allocated_block_nr, inode);
963 set_buffer_new(bh_result);
964 done = 1;
965 } else {
966 /* paste hole to the indirect item */
967 /* If kmalloc failed, max_to_insert becomes zero and it means we
968 only have space for one block */
969 blocks_needed =
970 max_to_insert ? max_to_insert : 1;
971 }
972 retval =
973 reiserfs_paste_into_item(th, &path, &tmp_key, inode,
974 (char *)un,
975 UNFM_P_SIZE *
976 blocks_needed);
977
978 if (blocks_needed != 1)
979 kfree(un);
980
981 if (retval) {
982 reiserfs_free_block(th, inode,
983 allocated_block_nr, 1);
984 goto failure;
985 }
986 if (!done) {
987 /* We need to mark new file size in case this function will be
988 interrupted/aborted later on. And we may do this only for
989 holes. */
990 inode->i_size +=
991 inode->i_sb->s_blocksize * blocks_needed;
992 }
993 }
994
995 if (done == 1)
996 break;
997
998 /* this loop could log more blocks than we had originally asked
999 ** for. So, we have to allow the transaction to end if it is
1000 ** too big or too full. Update the inode so things are
1001 ** consistent if we crash before the function returns
1002 **
1003 ** release the path so that anybody waiting on the path before
1004 ** ending their transaction will be able to continue.
1005 */
1006 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1007 retval = restart_transaction(th, inode, &path);
1008 if (retval)
1009 goto failure;
1010 }
1011 /*
1012 * inserting indirect pointers for a hole can take a
1013 * long time. reschedule if needed and also release the write
1014 * lock for others.
1015 */
1016 reiserfs_cond_resched(inode->i_sb);
1017
1018 retval = search_for_position_by_key(inode->i_sb, &key, &path);
1019 if (retval == IO_ERROR) {
1020 retval = -EIO;
1021 goto failure;
1022 }
1023 if (retval == POSITION_FOUND) {
1024 reiserfs_warning(inode->i_sb, "vs-825",
1025 "%K should not be found", &key);
1026 retval = -EEXIST;
1027 if (allocated_block_nr)
1028 reiserfs_free_block(th, inode,
1029 allocated_block_nr, 1);
1030 pathrelse(&path);
1031 goto failure;
1032 }
1033 bh = get_last_bh(&path);
1034 ih = get_ih(&path);
1035 item = get_item(&path);
1036 pos_in_item = path.pos_in_item;
1037 } while (1);
1038
1039 retval = 0;
1040
1041 failure:
1042 if (th && (!dangle || (retval && !th->t_trans_id))) {
1043 int err;
1044 if (th->t_trans_id)
1045 reiserfs_update_sd(th, inode);
1046 err = reiserfs_end_persistent_transaction(th);
1047 if (err)
1048 retval = err;
1049 }
1050
1051 reiserfs_write_unlock(inode->i_sb);
1052 reiserfs_check_path(&path);
1053 return retval;
1054}
1055
1056static int
1057reiserfs_readpages(struct file *file, struct address_space *mapping,
1058 struct list_head *pages, unsigned nr_pages)
1059{
1060 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
1061}
1062
1063/* Compute real number of used bytes by file
1064 * Following three functions can go away when we'll have enough space in stat item
1065 */
1066static int real_space_diff(struct inode *inode, int sd_size)
1067{
1068 int bytes;
1069 loff_t blocksize = inode->i_sb->s_blocksize;
1070
1071 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1072 return sd_size;
1073
1074 /* End of file is also in full block with indirect reference, so round
1075 ** up to the next block.
1076 **
1077 ** there is just no way to know if the tail is actually packed
1078 ** on the file, so we have to assume it isn't. When we pack the
1079 ** tail, we add 4 bytes to pretend there really is an unformatted
1080 ** node pointer
1081 */
1082 bytes =
1083 ((inode->i_size +
1084 (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
1085 sd_size;
1086 return bytes;
1087}
1088
1089static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1090 int sd_size)
1091{
1092 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1093 return inode->i_size +
1094 (loff_t) (real_space_diff(inode, sd_size));
1095 }
1096 return ((loff_t) real_space_diff(inode, sd_size)) +
1097 (((loff_t) blocks) << 9);
1098}
1099
1100/* Compute number of blocks used by file in ReiserFS counting */
1101static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1102{
1103 loff_t bytes = inode_get_bytes(inode);
1104 loff_t real_space = real_space_diff(inode, sd_size);
1105
1106 /* keeps fsck and non-quota versions of reiserfs happy */
1107 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1108 bytes += (loff_t) 511;
1109 }
1110
1111 /* files from before the quota patch might i_blocks such that
1112 ** bytes < real_space. Deal with that here to prevent it from
1113 ** going negative.
1114 */
1115 if (bytes < real_space)
1116 return 0;
1117 return (bytes - real_space) >> 9;
1118}
1119
1120//
1121// BAD: new directories have stat data of new type and all other items
1122// of old type. Version stored in the inode says about body items, so
1123// in update_stat_data we can not rely on inode, but have to check
1124// item version directly
1125//
1126
1127// called by read_locked_inode
1128static void init_inode(struct inode *inode, struct treepath *path)
1129{
1130 struct buffer_head *bh;
1131 struct item_head *ih;
1132 __u32 rdev;
1133 //int version = ITEM_VERSION_1;
1134
1135 bh = PATH_PLAST_BUFFER(path);
1136 ih = PATH_PITEM_HEAD(path);
1137
1138 copy_key(INODE_PKEY(inode), &(ih->ih_key));
1139
1140 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1141 REISERFS_I(inode)->i_flags = 0;
1142 REISERFS_I(inode)->i_prealloc_block = 0;
1143 REISERFS_I(inode)->i_prealloc_count = 0;
1144 REISERFS_I(inode)->i_trans_id = 0;
1145 REISERFS_I(inode)->i_jl = NULL;
1146 reiserfs_init_xattr_rwsem(inode);
1147
1148 if (stat_data_v1(ih)) {
1149 struct stat_data_v1 *sd =
1150 (struct stat_data_v1 *)B_I_PITEM(bh, ih);
1151 unsigned long blocks;
1152
1153 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1154 set_inode_sd_version(inode, STAT_DATA_V1);
1155 inode->i_mode = sd_v1_mode(sd);
1156 set_nlink(inode, sd_v1_nlink(sd));
1157 i_uid_write(inode, sd_v1_uid(sd));
1158 i_gid_write(inode, sd_v1_gid(sd));
1159 inode->i_size = sd_v1_size(sd);
1160 inode->i_atime.tv_sec = sd_v1_atime(sd);
1161 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
1162 inode->i_ctime.tv_sec = sd_v1_ctime(sd);
1163 inode->i_atime.tv_nsec = 0;
1164 inode->i_ctime.tv_nsec = 0;
1165 inode->i_mtime.tv_nsec = 0;
1166
1167 inode->i_blocks = sd_v1_blocks(sd);
1168 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1169 blocks = (inode->i_size + 511) >> 9;
1170 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1171 if (inode->i_blocks > blocks) {
1172 // there was a bug in <=3.5.23 when i_blocks could take negative
1173 // values. Starting from 3.5.17 this value could even be stored in
1174 // stat data. For such files we set i_blocks based on file
1175 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1176 // only updated if file's inode will ever change
1177 inode->i_blocks = blocks;
1178 }
1179
1180 rdev = sd_v1_rdev(sd);
1181 REISERFS_I(inode)->i_first_direct_byte =
1182 sd_v1_first_direct_byte(sd);
1183 /* an early bug in the quota code can give us an odd number for the
1184 ** block count. This is incorrect, fix it here.
1185 */
1186 if (inode->i_blocks & 1) {
1187 inode->i_blocks++;
1188 }
1189 inode_set_bytes(inode,
1190 to_real_used_space(inode, inode->i_blocks,
1191 SD_V1_SIZE));
1192 /* nopack is initially zero for v1 objects. For v2 objects,
1193 nopack is initialised from sd_attrs */
1194 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1195 } else {
1196 // new stat data found, but object may have old items
1197 // (directories and symlinks)
1198 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1199
1200 inode->i_mode = sd_v2_mode(sd);
1201 set_nlink(inode, sd_v2_nlink(sd));
1202 i_uid_write(inode, sd_v2_uid(sd));
1203 inode->i_size = sd_v2_size(sd);
1204 i_gid_write(inode, sd_v2_gid(sd));
1205 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1206 inode->i_atime.tv_sec = sd_v2_atime(sd);
1207 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1208 inode->i_ctime.tv_nsec = 0;
1209 inode->i_mtime.tv_nsec = 0;
1210 inode->i_atime.tv_nsec = 0;
1211 inode->i_blocks = sd_v2_blocks(sd);
1212 rdev = sd_v2_rdev(sd);
1213 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1214 inode->i_generation =
1215 le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1216 else
1217 inode->i_generation = sd_v2_generation(sd);
1218
1219 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1220 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1221 else
1222 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1223 REISERFS_I(inode)->i_first_direct_byte = 0;
1224 set_inode_sd_version(inode, STAT_DATA_V2);
1225 inode_set_bytes(inode,
1226 to_real_used_space(inode, inode->i_blocks,
1227 SD_V2_SIZE));
1228 /* read persistent inode attributes from sd and initialise
1229 generic inode flags from them */
1230 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1231 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1232 }
1233
1234 pathrelse(path);
1235 if (S_ISREG(inode->i_mode)) {
1236 inode->i_op = &reiserfs_file_inode_operations;
1237 inode->i_fop = &reiserfs_file_operations;
1238 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1239 } else if (S_ISDIR(inode->i_mode)) {
1240 inode->i_op = &reiserfs_dir_inode_operations;
1241 inode->i_fop = &reiserfs_dir_operations;
1242 } else if (S_ISLNK(inode->i_mode)) {
1243 inode->i_op = &reiserfs_symlink_inode_operations;
1244 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1245 } else {
1246 inode->i_blocks = 0;
1247 inode->i_op = &reiserfs_special_inode_operations;
1248 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
1249 }
1250}
1251
1252// update new stat data with inode fields
1253static void inode2sd(void *sd, struct inode *inode, loff_t size)
1254{
1255 struct stat_data *sd_v2 = (struct stat_data *)sd;
1256 __u16 flags;
1257
1258 set_sd_v2_mode(sd_v2, inode->i_mode);
1259 set_sd_v2_nlink(sd_v2, inode->i_nlink);
1260 set_sd_v2_uid(sd_v2, i_uid_read(inode));
1261 set_sd_v2_size(sd_v2, size);
1262 set_sd_v2_gid(sd_v2, i_gid_read(inode));
1263 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
1264 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
1265 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
1266 set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
1267 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1268 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
1269 else
1270 set_sd_v2_generation(sd_v2, inode->i_generation);
1271 flags = REISERFS_I(inode)->i_attrs;
1272 i_attrs_to_sd_attrs(inode, &flags);
1273 set_sd_v2_attrs(sd_v2, flags);
1274}
1275
1276// used to copy inode's fields to old stat data
1277static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1278{
1279 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
1280
1281 set_sd_v1_mode(sd_v1, inode->i_mode);
1282 set_sd_v1_uid(sd_v1, i_uid_read(inode));
1283 set_sd_v1_gid(sd_v1, i_gid_read(inode));
1284 set_sd_v1_nlink(sd_v1, inode->i_nlink);
1285 set_sd_v1_size(sd_v1, size);
1286 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
1287 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
1288 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
1289
1290 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1291 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
1292 else
1293 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1294
1295 // Sigh. i_first_direct_byte is back
1296 set_sd_v1_first_direct_byte(sd_v1,
1297 REISERFS_I(inode)->i_first_direct_byte);
1298}
1299
1300/* NOTE, you must prepare the buffer head before sending it here,
1301** and then log it after the call
1302*/
1303static void update_stat_data(struct treepath *path, struct inode *inode,
1304 loff_t size)
1305{
1306 struct buffer_head *bh;
1307 struct item_head *ih;
1308
1309 bh = PATH_PLAST_BUFFER(path);
1310 ih = PATH_PITEM_HEAD(path);
1311
1312 if (!is_statdata_le_ih(ih))
1313 reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
1314 INODE_PKEY(inode), ih);
1315
1316 if (stat_data_v1(ih)) {
1317 // path points to old stat data
1318 inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
1319 } else {
1320 inode2sd(B_I_PITEM(bh, ih), inode, size);
1321 }
1322
1323 return;
1324}
1325
1326void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1327 struct inode *inode, loff_t size)
1328{
1329 struct cpu_key key;
1330 INITIALIZE_PATH(path);
1331 struct buffer_head *bh;
1332 int fs_gen;
1333 struct item_head *ih, tmp_ih;
1334 int retval;
1335
1336 BUG_ON(!th->t_trans_id);
1337
1338 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant
1339
1340 for (;;) {
1341 int pos;
1342 /* look for the object's stat data */
1343 retval = search_item(inode->i_sb, &key, &path);
1344 if (retval == IO_ERROR) {
1345 reiserfs_error(inode->i_sb, "vs-13050",
1346 "i/o failure occurred trying to "
1347 "update %K stat data", &key);
1348 return;
1349 }
1350 if (retval == ITEM_NOT_FOUND) {
1351 pos = PATH_LAST_POSITION(&path);
1352 pathrelse(&path);
1353 if (inode->i_nlink == 0) {
1354 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
1355 return;
1356 }
1357 reiserfs_warning(inode->i_sb, "vs-13060",
1358 "stat data of object %k (nlink == %d) "
1359 "not found (pos %d)",
1360 INODE_PKEY(inode), inode->i_nlink,
1361 pos);
1362 reiserfs_check_path(&path);
1363 return;
1364 }
1365
1366 /* sigh, prepare_for_journal might schedule. When it schedules the
1367 ** FS might change. We have to detect that, and loop back to the
1368 ** search if the stat data item has moved
1369 */
1370 bh = get_last_bh(&path);
1371 ih = get_ih(&path);
1372 copy_item_head(&tmp_ih, ih);
1373 fs_gen = get_generation(inode->i_sb);
1374 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1375 if (fs_changed(fs_gen, inode->i_sb)
1376 && item_moved(&tmp_ih, &path)) {
1377 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1378 continue; /* Stat_data item has been moved after scheduling. */
1379 }
1380 break;
1381 }
1382 update_stat_data(&path, inode, size);
1383 journal_mark_dirty(th, th->t_super, bh);
1384 pathrelse(&path);
1385 return;
1386}
1387
1388/* reiserfs_read_locked_inode is called to read the inode off disk, and it
1389** does a make_bad_inode when things go wrong. But, we need to make sure
1390** and clear the key in the private portion of the inode, otherwise a
1391** corresponding iput might try to delete whatever object the inode last
1392** represented.
1393*/
1394static void reiserfs_make_bad_inode(struct inode *inode)
1395{
1396 memset(INODE_PKEY(inode), 0, KEY_SIZE);
1397 make_bad_inode(inode);
1398}
1399
1400//
1401// initially this function was derived from minix or ext2's analog and
1402// evolved as the prototype did
1403//
1404
1405int reiserfs_init_locked_inode(struct inode *inode, void *p)
1406{
1407 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
1408 inode->i_ino = args->objectid;
1409 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
1410 return 0;
1411}
1412
1413/* looks for stat data in the tree, and fills up the fields of in-core
1414 inode stat data fields */
1415void reiserfs_read_locked_inode(struct inode *inode,
1416 struct reiserfs_iget_args *args)
1417{
1418 INITIALIZE_PATH(path_to_sd);
1419 struct cpu_key key;
1420 unsigned long dirino;
1421 int retval;
1422
1423 dirino = args->dirid;
1424
1425 /* set version 1, version 2 could be used too, because stat data
1426 key is the same in both versions */
1427 key.version = KEY_FORMAT_3_5;
1428 key.on_disk_key.k_dir_id = dirino;
1429 key.on_disk_key.k_objectid = inode->i_ino;
1430 key.on_disk_key.k_offset = 0;
1431 key.on_disk_key.k_type = 0;
1432
1433 /* look for the object's stat data */
1434 retval = search_item(inode->i_sb, &key, &path_to_sd);
1435 if (retval == IO_ERROR) {
1436 reiserfs_error(inode->i_sb, "vs-13070",
1437 "i/o failure occurred trying to find "
1438 "stat data of %K", &key);
1439 reiserfs_make_bad_inode(inode);
1440 return;
1441 }
1442 if (retval != ITEM_FOUND) {
1443 /* a stale NFS handle can trigger this without it being an error */
1444 pathrelse(&path_to_sd);
1445 reiserfs_make_bad_inode(inode);
1446 clear_nlink(inode);
1447 return;
1448 }
1449
1450 init_inode(inode, &path_to_sd);
1451
1452 /* It is possible that knfsd is trying to access inode of a file
1453 that is being removed from the disk by some other thread. As we
1454 update sd on unlink all that is required is to check for nlink
1455 here. This bug was first found by Sizif when debugging
1456 SquidNG/Butterfly, forgotten, and found again after Philippe
1457 Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1458
1459 More logical fix would require changes in fs/inode.c:iput() to
1460 remove inode from hash-table _after_ fs cleaned disk stuff up and
1461 in iget() to return NULL if I_FREEING inode is found in
1462 hash-table. */
1463 /* Currently there is one place where it's ok to meet inode with
1464 nlink==0: processing of open-unlinked and half-truncated files
1465 during mount (fs/reiserfs/super.c:finish_unfinished()). */
1466 if ((inode->i_nlink == 0) &&
1467 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1468 reiserfs_warning(inode->i_sb, "vs-13075",
1469 "dead inode read from disk %K. "
1470 "This is likely to be race with knfsd. Ignore",
1471 &key);
1472 reiserfs_make_bad_inode(inode);
1473 }
1474
1475 reiserfs_check_path(&path_to_sd); /* init inode should be relsing */
1476
1477 /*
1478 * Stat data v1 doesn't support ACLs.
1479 */
1480 if (get_inode_sd_version(inode) == STAT_DATA_V1)
1481 cache_no_acl(inode);
1482}
1483
1484/**
1485 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
1486 *
1487 * @inode: inode from hash table to check
1488 * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args.
1489 *
1490 * This function is called by iget5_locked() to distinguish reiserfs inodes
1491 * having the same inode numbers. Such inodes can only exist due to some
1492 * error condition. One of them should be bad. Inodes with identical
1493 * inode numbers (objectids) are distinguished by parent directory ids.
1494 *
1495 */
1496int reiserfs_find_actor(struct inode *inode, void *opaque)
1497{
1498 struct reiserfs_iget_args *args;
1499
1500 args = opaque;
1501 /* args is already in CPU order */
1502 return (inode->i_ino == args->objectid) &&
1503 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
1504}
1505
1506struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1507{
1508 struct inode *inode;
1509 struct reiserfs_iget_args args;
1510 int depth;
1511
1512 args.objectid = key->on_disk_key.k_objectid;
1513 args.dirid = key->on_disk_key.k_dir_id;
1514 depth = reiserfs_write_unlock_nested(s);
1515 inode = iget5_locked(s, key->on_disk_key.k_objectid,
1516 reiserfs_find_actor, reiserfs_init_locked_inode,
1517 (void *)(&args));
1518 reiserfs_write_lock_nested(s, depth);
1519 if (!inode)
1520 return ERR_PTR(-ENOMEM);
1521
1522 if (inode->i_state & I_NEW) {
1523 reiserfs_read_locked_inode(inode, &args);
1524 unlock_new_inode(inode);
1525 }
1526
1527 if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) {
1528 /* either due to i/o error or a stale NFS handle */
1529 iput(inode);
1530 inode = NULL;
1531 }
1532 return inode;
1533}
1534
1535static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1536 u32 objectid, u32 dir_id, u32 generation)
1537
1538{
1539 struct cpu_key key;
1540 struct inode *inode;
1541
1542 key.on_disk_key.k_objectid = objectid;
1543 key.on_disk_key.k_dir_id = dir_id;
1544 reiserfs_write_lock(sb);
1545 inode = reiserfs_iget(sb, &key);
1546 if (inode && !IS_ERR(inode) && generation != 0 &&
1547 generation != inode->i_generation) {
1548 iput(inode);
1549 inode = NULL;
1550 }
1551 reiserfs_write_unlock(sb);
1552
1553 return d_obtain_alias(inode);
1554}
1555
1556struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1557 int fh_len, int fh_type)
1558{
1559 /* fhtype happens to reflect the number of u32s encoded.
1560 * due to a bug in earlier code, fhtype might indicate there
1561 * are more u32s then actually fitted.
1562 * so if fhtype seems to be more than len, reduce fhtype.
1563 * Valid types are:
1564 * 2 - objectid + dir_id - legacy support
1565 * 3 - objectid + dir_id + generation
1566 * 4 - objectid + dir_id + objectid and dirid of parent - legacy
1567 * 5 - objectid + dir_id + generation + objectid and dirid of parent
1568 * 6 - as above plus generation of directory
1569 * 6 does not fit in NFSv2 handles
1570 */
1571 if (fh_type > fh_len) {
1572 if (fh_type != 6 || fh_len != 5)
1573 reiserfs_warning(sb, "reiserfs-13077",
1574 "nfsd/reiserfs, fhtype=%d, len=%d - odd",
1575 fh_type, fh_len);
1576 fh_type = fh_len;
1577 }
1578 if (fh_len < 2)
1579 return NULL;
1580
1581 return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1],
1582 (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0);
1583}
1584
1585struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
1586 int fh_len, int fh_type)
1587{
1588 if (fh_type > fh_len)
1589 fh_type = fh_len;
1590 if (fh_type < 4)
1591 return NULL;
1592
1593 return reiserfs_get_dentry(sb,
1594 (fh_type >= 5) ? fid->raw[3] : fid->raw[2],
1595 (fh_type >= 5) ? fid->raw[4] : fid->raw[3],
1596 (fh_type == 6) ? fid->raw[5] : 0);
1597}
1598
1599int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp,
1600 struct inode *parent)
1601{
1602 int maxlen = *lenp;
1603
1604 if (parent && (maxlen < 5)) {
1605 *lenp = 5;
1606 return FILEID_INVALID;
1607 } else if (maxlen < 3) {
1608 *lenp = 3;
1609 return FILEID_INVALID;
1610 }
1611
1612 data[0] = inode->i_ino;
1613 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1614 data[2] = inode->i_generation;
1615 *lenp = 3;
1616 if (parent) {
1617 data[3] = parent->i_ino;
1618 data[4] = le32_to_cpu(INODE_PKEY(parent)->k_dir_id);
1619 *lenp = 5;
1620 if (maxlen >= 6) {
1621 data[5] = parent->i_generation;
1622 *lenp = 6;
1623 }
1624 }
1625 return *lenp;
1626}
1627
1628/* looks for stat data, then copies fields to it, marks the buffer
1629 containing stat data as dirty */
1630/* reiserfs inodes are never really dirty, since the dirty inode call
1631** always logs them. This call allows the VFS inode marking routines
1632** to properly mark inodes for datasync and such, but only actually
1633** does something when called for a synchronous update.
1634*/
1635int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1636{
1637 struct reiserfs_transaction_handle th;
1638 int jbegin_count = 1;
1639
1640 if (inode->i_sb->s_flags & MS_RDONLY)
1641 return -EROFS;
1642 /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1643 ** these cases are just when the system needs ram, not when the
1644 ** inode needs to reach disk for safety, and they can safely be
1645 ** ignored because the altered inode has already been logged.
1646 */
1647 if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
1648 reiserfs_write_lock(inode->i_sb);
1649 if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1650 reiserfs_update_sd(&th, inode);
1651 journal_end_sync(&th, inode->i_sb, jbegin_count);
1652 }
1653 reiserfs_write_unlock(inode->i_sb);
1654 }
1655 return 0;
1656}
1657
1658/* stat data of new object is inserted already, this inserts the item
1659 containing "." and ".." entries */
1660static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1661 struct inode *inode,
1662 struct item_head *ih, struct treepath *path,
1663 struct inode *dir)
1664{
1665 struct super_block *sb = th->t_super;
1666 char empty_dir[EMPTY_DIR_SIZE];
1667 char *body = empty_dir;
1668 struct cpu_key key;
1669 int retval;
1670
1671 BUG_ON(!th->t_trans_id);
1672
1673 _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id),
1674 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1675 TYPE_DIRENTRY, 3 /*key length */ );
1676
1677 /* compose item head for new item. Directories consist of items of
1678 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1679 is done by reiserfs_new_inode */
1680 if (old_format_only(sb)) {
1681 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1682 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1683
1684 make_empty_dir_item_v1(body, ih->ih_key.k_dir_id,
1685 ih->ih_key.k_objectid,
1686 INODE_PKEY(dir)->k_dir_id,
1687 INODE_PKEY(dir)->k_objectid);
1688 } else {
1689 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1690 TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1691
1692 make_empty_dir_item(body, ih->ih_key.k_dir_id,
1693 ih->ih_key.k_objectid,
1694 INODE_PKEY(dir)->k_dir_id,
1695 INODE_PKEY(dir)->k_objectid);
1696 }
1697
1698 /* look for place in the tree for new item */
1699 retval = search_item(sb, &key, path);
1700 if (retval == IO_ERROR) {
1701 reiserfs_error(sb, "vs-13080",
1702 "i/o failure occurred creating new directory");
1703 return -EIO;
1704 }
1705 if (retval == ITEM_FOUND) {
1706 pathrelse(path);
1707 reiserfs_warning(sb, "vs-13070",
1708 "object with this key exists (%k)",
1709 &(ih->ih_key));
1710 return -EEXIST;
1711 }
1712
1713 /* insert item, that is empty directory item */
1714 return reiserfs_insert_item(th, path, &key, ih, inode, body);
1715}
1716
1717/* stat data of object has been inserted, this inserts the item
1718 containing the body of symlink */
1719static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */
1720 struct item_head *ih,
1721 struct treepath *path, const char *symname,
1722 int item_len)
1723{
1724 struct super_block *sb = th->t_super;
1725 struct cpu_key key;
1726 int retval;
1727
1728 BUG_ON(!th->t_trans_id);
1729
1730 _make_cpu_key(&key, KEY_FORMAT_3_5,
1731 le32_to_cpu(ih->ih_key.k_dir_id),
1732 le32_to_cpu(ih->ih_key.k_objectid),
1733 1, TYPE_DIRECT, 3 /*key length */ );
1734
1735 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len,
1736 0 /*free_space */ );
1737
1738 /* look for place in the tree for new item */
1739 retval = search_item(sb, &key, path);
1740 if (retval == IO_ERROR) {
1741 reiserfs_error(sb, "vs-13080",
1742 "i/o failure occurred creating new symlink");
1743 return -EIO;
1744 }
1745 if (retval == ITEM_FOUND) {
1746 pathrelse(path);
1747 reiserfs_warning(sb, "vs-13080",
1748 "object with this key exists (%k)",
1749 &(ih->ih_key));
1750 return -EEXIST;
1751 }
1752
1753 /* insert item, that is body of symlink */
1754 return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1755}
1756
1757/* inserts the stat data into the tree, and then calls
1758 reiserfs_new_directory (to insert ".", ".." item if new object is
1759 directory) or reiserfs_new_symlink (to insert symlink body if new
1760 object is symlink) or nothing (if new object is regular file)
1761
1762 NOTE! uid and gid must already be set in the inode. If we return
1763 non-zero due to an error, we have to drop the quota previously allocated
1764 for the fresh inode. This can only be done outside a transaction, so
1765 if we return non-zero, we also end the transaction. */
1766int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1767 struct inode *dir, umode_t mode, const char *symname,
1768 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1769 strlen (symname) for symlinks) */
1770 loff_t i_size, struct dentry *dentry,
1771 struct inode *inode,
1772 struct reiserfs_security_handle *security)
1773{
1774 struct super_block *sb = dir->i_sb;
1775 struct reiserfs_iget_args args;
1776 INITIALIZE_PATH(path_to_key);
1777 struct cpu_key key;
1778 struct item_head ih;
1779 struct stat_data sd;
1780 int retval;
1781 int err;
1782 int depth;
1783
1784 BUG_ON(!th->t_trans_id);
1785
1786 depth = reiserfs_write_unlock_nested(sb);
1787 err = dquot_alloc_inode(inode);
1788 reiserfs_write_lock_nested(sb, depth);
1789 if (err)
1790 goto out_end_trans;
1791 if (!dir->i_nlink) {
1792 err = -EPERM;
1793 goto out_bad_inode;
1794 }
1795
1796 /* item head of new item */
1797 ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
1798 ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
1799 if (!ih.ih_key.k_objectid) {
1800 err = -ENOMEM;
1801 goto out_bad_inode;
1802 }
1803 args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1804 if (old_format_only(sb))
1805 make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
1806 TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1807 else
1808 make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
1809 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1810 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1811 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1812
1813 depth = reiserfs_write_unlock_nested(inode->i_sb);
1814 err = insert_inode_locked4(inode, args.objectid,
1815 reiserfs_find_actor, &args);
1816 reiserfs_write_lock_nested(inode->i_sb, depth);
1817 if (err) {
1818 err = -EINVAL;
1819 goto out_bad_inode;
1820 }
1821
1822 if (old_format_only(sb))
1823 /* not a perfect generation count, as object ids can be reused, but
1824 ** this is as good as reiserfs can do right now.
1825 ** note that the private part of inode isn't filled in yet, we have
1826 ** to use the directory.
1827 */
1828 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1829 else
1830#if defined( USE_INODE_GENERATION_COUNTER )
1831 inode->i_generation =
1832 le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
1833#else
1834 inode->i_generation = ++event;
1835#endif
1836
1837 /* fill stat data */
1838 set_nlink(inode, (S_ISDIR(mode) ? 2 : 1));
1839
1840 /* uid and gid must already be set by the caller for quota init */
1841
1842 /* symlink cannot be immutable or append only, right? */
1843 if (S_ISLNK(inode->i_mode))
1844 inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
1845
1846 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
1847 inode->i_size = i_size;
1848 inode->i_blocks = 0;
1849 inode->i_bytes = 0;
1850 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
1851 U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
1852
1853 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1854 REISERFS_I(inode)->i_flags = 0;
1855 REISERFS_I(inode)->i_prealloc_block = 0;
1856 REISERFS_I(inode)->i_prealloc_count = 0;
1857 REISERFS_I(inode)->i_trans_id = 0;
1858 REISERFS_I(inode)->i_jl = NULL;
1859 REISERFS_I(inode)->i_attrs =
1860 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1861 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1862 reiserfs_init_xattr_rwsem(inode);
1863
1864 /* key to search for correct place for new stat data */
1865 _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
1866 le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
1867 TYPE_STAT_DATA, 3 /*key length */ );
1868
1869 /* find proper place for inserting of stat data */
1870 retval = search_item(sb, &key, &path_to_key);
1871 if (retval == IO_ERROR) {
1872 err = -EIO;
1873 goto out_bad_inode;
1874 }
1875 if (retval == ITEM_FOUND) {
1876 pathrelse(&path_to_key);
1877 err = -EEXIST;
1878 goto out_bad_inode;
1879 }
1880 if (old_format_only(sb)) {
1881 if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) {
1882 pathrelse(&path_to_key);
1883 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1884 err = -EINVAL;
1885 goto out_bad_inode;
1886 }
1887 inode2sd_v1(&sd, inode, inode->i_size);
1888 } else {
1889 inode2sd(&sd, inode, inode->i_size);
1890 }
1891 // store in in-core inode the key of stat data and version all
1892 // object items will have (directory items will have old offset
1893 // format, other new objects will consist of new items)
1894 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1895 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1896 else
1897 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1898 if (old_format_only(sb))
1899 set_inode_sd_version(inode, STAT_DATA_V1);
1900 else
1901 set_inode_sd_version(inode, STAT_DATA_V2);
1902
1903 /* insert the stat data into the tree */
1904#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1905 if (REISERFS_I(dir)->new_packing_locality)
1906 th->displace_new_blocks = 1;
1907#endif
1908 retval =
1909 reiserfs_insert_item(th, &path_to_key, &key, &ih, inode,
1910 (char *)(&sd));
1911 if (retval) {
1912 err = retval;
1913 reiserfs_check_path(&path_to_key);
1914 goto out_bad_inode;
1915 }
1916#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1917 if (!th->displace_new_blocks)
1918 REISERFS_I(dir)->new_packing_locality = 0;
1919#endif
1920 if (S_ISDIR(mode)) {
1921 /* insert item with "." and ".." */
1922 retval =
1923 reiserfs_new_directory(th, inode, &ih, &path_to_key, dir);
1924 }
1925
1926 if (S_ISLNK(mode)) {
1927 /* insert body of symlink */
1928 if (!old_format_only(sb))
1929 i_size = ROUND_UP(i_size);
1930 retval =
1931 reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname,
1932 i_size);
1933 }
1934 if (retval) {
1935 err = retval;
1936 reiserfs_check_path(&path_to_key);
1937 journal_end(th, th->t_super, th->t_blocks_allocated);
1938 goto out_inserted_sd;
1939 }
1940
1941 if (reiserfs_posixacl(inode->i_sb)) {
1942 reiserfs_write_unlock(inode->i_sb);
1943 retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
1944 reiserfs_write_lock(inode->i_sb);
1945 if (retval) {
1946 err = retval;
1947 reiserfs_check_path(&path_to_key);
1948 journal_end(th, th->t_super, th->t_blocks_allocated);
1949 goto out_inserted_sd;
1950 }
1951 } else if (inode->i_sb->s_flags & MS_POSIXACL) {
1952 reiserfs_warning(inode->i_sb, "jdm-13090",
1953 "ACLs aren't enabled in the fs, "
1954 "but vfs thinks they are!");
1955 } else if (IS_PRIVATE(dir))
1956 inode->i_flags |= S_PRIVATE;
1957
1958 if (security->name) {
1959 reiserfs_write_unlock(inode->i_sb);
1960 retval = reiserfs_security_write(th, inode, security);
1961 reiserfs_write_lock(inode->i_sb);
1962 if (retval) {
1963 err = retval;
1964 reiserfs_check_path(&path_to_key);
1965 retval = journal_end(th, th->t_super,
1966 th->t_blocks_allocated);
1967 if (retval)
1968 err = retval;
1969 goto out_inserted_sd;
1970 }
1971 }
1972
1973 reiserfs_update_sd(th, inode);
1974 reiserfs_check_path(&path_to_key);
1975
1976 return 0;
1977
1978/* it looks like you can easily compress these two goto targets into
1979 * one. Keeping it like this doesn't actually hurt anything, and they
1980 * are place holders for what the quota code actually needs.
1981 */
1982 out_bad_inode:
1983 /* Invalidate the object, nothing was inserted yet */
1984 INODE_PKEY(inode)->k_objectid = 0;
1985
1986 /* Quota change must be inside a transaction for journaling */
1987 depth = reiserfs_write_unlock_nested(inode->i_sb);
1988 dquot_free_inode(inode);
1989 reiserfs_write_lock_nested(inode->i_sb, depth);
1990
1991 out_end_trans:
1992 journal_end(th, th->t_super, th->t_blocks_allocated);
1993 /* Drop can be outside and it needs more credits so it's better to have it outside */
1994 depth = reiserfs_write_unlock_nested(inode->i_sb);
1995 dquot_drop(inode);
1996 reiserfs_write_lock_nested(inode->i_sb, depth);
1997 inode->i_flags |= S_NOQUOTA;
1998 make_bad_inode(inode);
1999
2000 out_inserted_sd:
2001 clear_nlink(inode);
2002 th->t_trans_id = 0; /* so the caller can't use this handle later */
2003 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
2004 iput(inode);
2005 return err;
2006}
2007
2008/*
2009** finds the tail page in the page cache,
2010** reads the last block in.
2011**
2012** On success, page_result is set to a locked, pinned page, and bh_result
2013** is set to an up to date buffer for the last block in the file. returns 0.
2014**
2015** tail conversion is not done, so bh_result might not be valid for writing
2016** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
2017** trying to write the block.
2018**
2019** on failure, nonzero is returned, page_result and bh_result are untouched.
2020*/
2021static int grab_tail_page(struct inode *inode,
2022 struct page **page_result,
2023 struct buffer_head **bh_result)
2024{
2025
2026 /* we want the page with the last byte in the file,
2027 ** not the page that will hold the next byte for appending
2028 */
2029 unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
2030 unsigned long pos = 0;
2031 unsigned long start = 0;
2032 unsigned long blocksize = inode->i_sb->s_blocksize;
2033 unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
2034 struct buffer_head *bh;
2035 struct buffer_head *head;
2036 struct page *page;
2037 int error;
2038
2039 /* we know that we are only called with inode->i_size > 0.
2040 ** we also know that a file tail can never be as big as a block
2041 ** If i_size % blocksize == 0, our file is currently block aligned
2042 ** and it won't need converting or zeroing after a truncate.
2043 */
2044 if ((offset & (blocksize - 1)) == 0) {
2045 return -ENOENT;
2046 }
2047 page = grab_cache_page(inode->i_mapping, index);
2048 error = -ENOMEM;
2049 if (!page) {
2050 goto out;
2051 }
2052 /* start within the page of the last block in the file */
2053 start = (offset / blocksize) * blocksize;
2054
2055 error = __block_write_begin(page, start, offset - start,
2056 reiserfs_get_block_create_0);
2057 if (error)
2058 goto unlock;
2059
2060 head = page_buffers(page);
2061 bh = head;
2062 do {
2063 if (pos >= start) {
2064 break;
2065 }
2066 bh = bh->b_this_page;
2067 pos += blocksize;
2068 } while (bh != head);
2069
2070 if (!buffer_uptodate(bh)) {
2071 /* note, this should never happen, prepare_write should
2072 ** be taking care of this for us. If the buffer isn't up to date,
2073 ** I've screwed up the code to find the buffer, or the code to
2074 ** call prepare_write
2075 */
2076 reiserfs_error(inode->i_sb, "clm-6000",
2077 "error reading block %lu", bh->b_blocknr);
2078 error = -EIO;
2079 goto unlock;
2080 }
2081 *bh_result = bh;
2082 *page_result = page;
2083
2084 out:
2085 return error;
2086
2087 unlock:
2088 unlock_page(page);
2089 page_cache_release(page);
2090 return error;
2091}
2092
2093/*
2094** vfs version of truncate file. Must NOT be called with
2095** a transaction already started.
2096**
2097** some code taken from block_truncate_page
2098*/
2099int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2100{
2101 struct reiserfs_transaction_handle th;
2102 /* we want the offset for the first byte after the end of the file */
2103 unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2104 unsigned blocksize = inode->i_sb->s_blocksize;
2105 unsigned length;
2106 struct page *page = NULL;
2107 int error;
2108 struct buffer_head *bh = NULL;
2109 int err2;
2110
2111 reiserfs_write_lock(inode->i_sb);
2112
2113 if (inode->i_size > 0) {
2114 error = grab_tail_page(inode, &page, &bh);
2115 if (error) {
2116 // -ENOENT means we truncated past the end of the file,
2117 // and get_block_create_0 could not find a block to read in,
2118 // which is ok.
2119 if (error != -ENOENT)
2120 reiserfs_error(inode->i_sb, "clm-6001",
2121 "grab_tail_page failed %d",
2122 error);
2123 page = NULL;
2124 bh = NULL;
2125 }
2126 }
2127
2128 /* so, if page != NULL, we have a buffer head for the offset at
2129 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2130 ** then we have an unformatted node. Otherwise, we have a direct item,
2131 ** and no zeroing is required on disk. We zero after the truncate,
2132 ** because the truncate might pack the item anyway
2133 ** (it will unmap bh if it packs).
2134 */
2135 /* it is enough to reserve space in transaction for 2 balancings:
2136 one for "save" link adding and another for the first
2137 cut_from_item. 1 is for update_sd */
2138 error = journal_begin(&th, inode->i_sb,
2139 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2140 if (error)
2141 goto out;
2142 reiserfs_update_inode_transaction(inode);
2143 if (update_timestamps)
2144 /* we are doing real truncate: if the system crashes before the last
2145 transaction of truncating gets committed - on reboot the file
2146 either appears truncated properly or not truncated at all */
2147 add_save_link(&th, inode, 1);
2148 err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
2149 error =
2150 journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2151 if (error)
2152 goto out;
2153
2154 /* check reiserfs_do_truncate after ending the transaction */
2155 if (err2) {
2156 error = err2;
2157 goto out;
2158 }
2159
2160 if (update_timestamps) {
2161 error = remove_save_link(inode, 1 /* truncate */);
2162 if (error)
2163 goto out;
2164 }
2165
2166 if (page) {
2167 length = offset & (blocksize - 1);
2168 /* if we are not on a block boundary */
2169 if (length) {
2170 length = blocksize - length;
2171 zero_user(page, offset, length);
2172 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2173 mark_buffer_dirty(bh);
2174 }
2175 }
2176 unlock_page(page);
2177 page_cache_release(page);
2178 }
2179
2180 reiserfs_write_unlock(inode->i_sb);
2181
2182 return 0;
2183 out:
2184 if (page) {
2185 unlock_page(page);
2186 page_cache_release(page);
2187 }
2188
2189 reiserfs_write_unlock(inode->i_sb);
2190
2191 return error;
2192}
2193
2194static int map_block_for_writepage(struct inode *inode,
2195 struct buffer_head *bh_result,
2196 unsigned long block)
2197{
2198 struct reiserfs_transaction_handle th;
2199 int fs_gen;
2200 struct item_head tmp_ih;
2201 struct item_head *ih;
2202 struct buffer_head *bh;
2203 __le32 *item;
2204 struct cpu_key key;
2205 INITIALIZE_PATH(path);
2206 int pos_in_item;
2207 int jbegin_count = JOURNAL_PER_BALANCE_CNT;
2208 loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1;
2209 int retval;
2210 int use_get_block = 0;
2211 int bytes_copied = 0;
2212 int copy_size;
2213 int trans_running = 0;
2214
2215 /* catch places below that try to log something without starting a trans */
2216 th.t_trans_id = 0;
2217
2218 if (!buffer_uptodate(bh_result)) {
2219 return -EIO;
2220 }
2221
2222 kmap(bh_result->b_page);
2223 start_over:
2224 reiserfs_write_lock(inode->i_sb);
2225 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
2226
2227 research:
2228 retval = search_for_position_by_key(inode->i_sb, &key, &path);
2229 if (retval != POSITION_FOUND) {
2230 use_get_block = 1;
2231 goto out;
2232 }
2233
2234 bh = get_last_bh(&path);
2235 ih = get_ih(&path);
2236 item = get_item(&path);
2237 pos_in_item = path.pos_in_item;
2238
2239 /* we've found an unformatted node */
2240 if (indirect_item_found(retval, ih)) {
2241 if (bytes_copied > 0) {
2242 reiserfs_warning(inode->i_sb, "clm-6002",
2243 "bytes_copied %d", bytes_copied);
2244 }
2245 if (!get_block_num(item, pos_in_item)) {
2246 /* crap, we are writing to a hole */
2247 use_get_block = 1;
2248 goto out;
2249 }
2250 set_block_dev_mapped(bh_result,
2251 get_block_num(item, pos_in_item), inode);
2252 } else if (is_direct_le_ih(ih)) {
2253 char *p;
2254 p = page_address(bh_result->b_page);
2255 p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
2256 copy_size = ih_item_len(ih) - pos_in_item;
2257
2258 fs_gen = get_generation(inode->i_sb);
2259 copy_item_head(&tmp_ih, ih);
2260
2261 if (!trans_running) {
2262 /* vs-3050 is gone, no need to drop the path */
2263 retval = journal_begin(&th, inode->i_sb, jbegin_count);
2264 if (retval)
2265 goto out;
2266 reiserfs_update_inode_transaction(inode);
2267 trans_running = 1;
2268 if (fs_changed(fs_gen, inode->i_sb)
2269 && item_moved(&tmp_ih, &path)) {
2270 reiserfs_restore_prepared_buffer(inode->i_sb,
2271 bh);
2272 goto research;
2273 }
2274 }
2275
2276 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
2277
2278 if (fs_changed(fs_gen, inode->i_sb)
2279 && item_moved(&tmp_ih, &path)) {
2280 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
2281 goto research;
2282 }
2283
2284 memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied,
2285 copy_size);
2286
2287 journal_mark_dirty(&th, inode->i_sb, bh);
2288 bytes_copied += copy_size;
2289 set_block_dev_mapped(bh_result, 0, inode);
2290
2291 /* are there still bytes left? */
2292 if (bytes_copied < bh_result->b_size &&
2293 (byte_offset + bytes_copied) < inode->i_size) {
2294 set_cpu_key_k_offset(&key,
2295 cpu_key_k_offset(&key) +
2296 copy_size);
2297 goto research;
2298 }
2299 } else {
2300 reiserfs_warning(inode->i_sb, "clm-6003",
2301 "bad item inode %lu", inode->i_ino);
2302 retval = -EIO;
2303 goto out;
2304 }
2305 retval = 0;
2306
2307 out:
2308 pathrelse(&path);
2309 if (trans_running) {
2310 int err = journal_end(&th, inode->i_sb, jbegin_count);
2311 if (err)
2312 retval = err;
2313 trans_running = 0;
2314 }
2315 reiserfs_write_unlock(inode->i_sb);
2316
2317 /* this is where we fill in holes in the file. */
2318 if (use_get_block) {
2319 retval = reiserfs_get_block(inode, block, bh_result,
2320 GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
2321 | GET_BLOCK_NO_DANGLE);
2322 if (!retval) {
2323 if (!buffer_mapped(bh_result)
2324 || bh_result->b_blocknr == 0) {
2325 /* get_block failed to find a mapped unformatted node. */
2326 use_get_block = 0;
2327 goto start_over;
2328 }
2329 }
2330 }
2331 kunmap(bh_result->b_page);
2332
2333 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2334 /* we've copied data from the page into the direct item, so the
2335 * buffer in the page is now clean, mark it to reflect that.
2336 */
2337 lock_buffer(bh_result);
2338 clear_buffer_dirty(bh_result);
2339 unlock_buffer(bh_result);
2340 }
2341 return retval;
2342}
2343
2344/*
2345 * mason@suse.com: updated in 2.5.54 to follow the same general io
2346 * start/recovery path as __block_write_full_page, along with special
2347 * code to handle reiserfs tails.
2348 */
2349static int reiserfs_write_full_page(struct page *page,
2350 struct writeback_control *wbc)
2351{
2352 struct inode *inode = page->mapping->host;
2353 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
2354 int error = 0;
2355 unsigned long block;
2356 sector_t last_block;
2357 struct buffer_head *head, *bh;
2358 int partial = 0;
2359 int nr = 0;
2360 int checked = PageChecked(page);
2361 struct reiserfs_transaction_handle th;
2362 struct super_block *s = inode->i_sb;
2363 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
2364 th.t_trans_id = 0;
2365
2366 /* no logging allowed when nonblocking or from PF_MEMALLOC */
2367 if (checked && (current->flags & PF_MEMALLOC)) {
2368 redirty_page_for_writepage(wbc, page);
2369 unlock_page(page);
2370 return 0;
2371 }
2372
2373 /* The page dirty bit is cleared before writepage is called, which
2374 * means we have to tell create_empty_buffers to make dirty buffers
2375 * The page really should be up to date at this point, so tossing
2376 * in the BH_Uptodate is just a sanity check.
2377 */
2378 if (!page_has_buffers(page)) {
2379 create_empty_buffers(page, s->s_blocksize,
2380 (1 << BH_Dirty) | (1 << BH_Uptodate));
2381 }
2382 head = page_buffers(page);
2383
2384 /* last page in the file, zero out any contents past the
2385 ** last byte in the file
2386 */
2387 if (page->index >= end_index) {
2388 unsigned last_offset;
2389
2390 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2391 /* no file contents in this page */
2392 if (page->index >= end_index + 1 || !last_offset) {
2393 unlock_page(page);
2394 return 0;
2395 }
2396 zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
2397 }
2398 bh = head;
2399 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
2400 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
2401 /* first map all the buffers, logging any direct items we find */
2402 do {
2403 if (block > last_block) {
2404 /*
2405 * This can happen when the block size is less than
2406 * the page size. The corresponding bytes in the page
2407 * were zero filled above
2408 */
2409 clear_buffer_dirty(bh);
2410 set_buffer_uptodate(bh);
2411 } else if ((checked || buffer_dirty(bh)) &&
2412 (!buffer_mapped(bh) || (buffer_mapped(bh)
2413 && bh->b_blocknr ==
2414 0))) {
2415 /* not mapped yet, or it points to a direct item, search
2416 * the btree for the mapping info, and log any direct
2417 * items found
2418 */
2419 if ((error = map_block_for_writepage(inode, bh, block))) {
2420 goto fail;
2421 }
2422 }
2423 bh = bh->b_this_page;
2424 block++;
2425 } while (bh != head);
2426
2427 /*
2428 * we start the transaction after map_block_for_writepage,
2429 * because it can create holes in the file (an unbounded operation).
2430 * starting it here, we can make a reliable estimate for how many
2431 * blocks we're going to log
2432 */
2433 if (checked) {
2434 ClearPageChecked(page);
2435 reiserfs_write_lock(s);
2436 error = journal_begin(&th, s, bh_per_page + 1);
2437 if (error) {
2438 reiserfs_write_unlock(s);
2439 goto fail;
2440 }
2441 reiserfs_update_inode_transaction(inode);
2442 }
2443 /* now go through and lock any dirty buffers on the page */
2444 do {
2445 get_bh(bh);
2446 if (!buffer_mapped(bh))
2447 continue;
2448 if (buffer_mapped(bh) && bh->b_blocknr == 0)
2449 continue;
2450
2451 if (checked) {
2452 reiserfs_prepare_for_journal(s, bh, 1);
2453 journal_mark_dirty(&th, s, bh);
2454 continue;
2455 }
2456 /* from this point on, we know the buffer is mapped to a
2457 * real block and not a direct item
2458 */
2459 if (wbc->sync_mode != WB_SYNC_NONE) {
2460 lock_buffer(bh);
2461 } else {
2462 if (!trylock_buffer(bh)) {
2463 redirty_page_for_writepage(wbc, page);
2464 continue;
2465 }
2466 }
2467 if (test_clear_buffer_dirty(bh)) {
2468 mark_buffer_async_write(bh);
2469 } else {
2470 unlock_buffer(bh);
2471 }
2472 } while ((bh = bh->b_this_page) != head);
2473
2474 if (checked) {
2475 error = journal_end(&th, s, bh_per_page + 1);
2476 reiserfs_write_unlock(s);
2477 if (error)
2478 goto fail;
2479 }
2480 BUG_ON(PageWriteback(page));
2481 set_page_writeback(page);
2482 unlock_page(page);
2483
2484 /*
2485 * since any buffer might be the only dirty buffer on the page,
2486 * the first submit_bh can bring the page out of writeback.
2487 * be careful with the buffers.
2488 */
2489 do {
2490 struct buffer_head *next = bh->b_this_page;
2491 if (buffer_async_write(bh)) {
2492 submit_bh(WRITE, bh);
2493 nr++;
2494 }
2495 put_bh(bh);
2496 bh = next;
2497 } while (bh != head);
2498
2499 error = 0;
2500 done:
2501 if (nr == 0) {
2502 /*
2503 * if this page only had a direct item, it is very possible for
2504 * no io to be required without there being an error. Or,
2505 * someone else could have locked them and sent them down the
2506 * pipe without locking the page
2507 */
2508 bh = head;
2509 do {
2510 if (!buffer_uptodate(bh)) {
2511 partial = 1;
2512 break;
2513 }
2514 bh = bh->b_this_page;
2515 } while (bh != head);
2516 if (!partial)
2517 SetPageUptodate(page);
2518 end_page_writeback(page);
2519 }
2520 return error;
2521
2522 fail:
2523 /* catches various errors, we need to make sure any valid dirty blocks
2524 * get to the media. The page is currently locked and not marked for
2525 * writeback
2526 */
2527 ClearPageUptodate(page);
2528 bh = head;
2529 do {
2530 get_bh(bh);
2531 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2532 lock_buffer(bh);
2533 mark_buffer_async_write(bh);
2534 } else {
2535 /*
2536 * clear any dirty bits that might have come from getting
2537 * attached to a dirty page
2538 */
2539 clear_buffer_dirty(bh);
2540 }
2541 bh = bh->b_this_page;
2542 } while (bh != head);
2543 SetPageError(page);
2544 BUG_ON(PageWriteback(page));
2545 set_page_writeback(page);
2546 unlock_page(page);
2547 do {
2548 struct buffer_head *next = bh->b_this_page;
2549 if (buffer_async_write(bh)) {
2550 clear_buffer_dirty(bh);
2551 submit_bh(WRITE, bh);
2552 nr++;
2553 }
2554 put_bh(bh);
2555 bh = next;
2556 } while (bh != head);
2557 goto done;
2558}
2559
2560static int reiserfs_readpage(struct file *f, struct page *page)
2561{
2562 return block_read_full_page(page, reiserfs_get_block);
2563}
2564
2565static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2566{
2567 struct inode *inode = page->mapping->host;
2568 reiserfs_wait_on_write_block(inode->i_sb);
2569 return reiserfs_write_full_page(page, wbc);
2570}
2571
2572static void reiserfs_truncate_failed_write(struct inode *inode)
2573{
2574 truncate_inode_pages(inode->i_mapping, inode->i_size);
2575 reiserfs_truncate_file(inode, 0);
2576}
2577
2578static int reiserfs_write_begin(struct file *file,
2579 struct address_space *mapping,
2580 loff_t pos, unsigned len, unsigned flags,
2581 struct page **pagep, void **fsdata)
2582{
2583 struct inode *inode;
2584 struct page *page;
2585 pgoff_t index;
2586 int ret;
2587 int old_ref = 0;
2588
2589 inode = mapping->host;
2590 *fsdata = 0;
2591 if (flags & AOP_FLAG_CONT_EXPAND &&
2592 (pos & (inode->i_sb->s_blocksize - 1)) == 0) {
2593 pos ++;
2594 *fsdata = (void *)(unsigned long)flags;
2595 }
2596
2597 index = pos >> PAGE_CACHE_SHIFT;
2598 page = grab_cache_page_write_begin(mapping, index, flags);
2599 if (!page)
2600 return -ENOMEM;
2601 *pagep = page;
2602
2603 reiserfs_wait_on_write_block(inode->i_sb);
2604 fix_tail_page_for_writing(page);
2605 if (reiserfs_transaction_running(inode->i_sb)) {
2606 struct reiserfs_transaction_handle *th;
2607 th = (struct reiserfs_transaction_handle *)current->
2608 journal_info;
2609 BUG_ON(!th->t_refcount);
2610 BUG_ON(!th->t_trans_id);
2611 old_ref = th->t_refcount;
2612 th->t_refcount++;
2613 }
2614 ret = __block_write_begin(page, pos, len, reiserfs_get_block);
2615 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2616 struct reiserfs_transaction_handle *th = current->journal_info;
2617 /* this gets a little ugly. If reiserfs_get_block returned an
2618 * error and left a transacstion running, we've got to close it,
2619 * and we've got to free handle if it was a persistent transaction.
2620 *
2621 * But, if we had nested into an existing transaction, we need
2622 * to just drop the ref count on the handle.
2623 *
2624 * If old_ref == 0, the transaction is from reiserfs_get_block,
2625 * and it was a persistent trans. Otherwise, it was nested above.
2626 */
2627 if (th->t_refcount > old_ref) {
2628 if (old_ref)
2629 th->t_refcount--;
2630 else {
2631 int err;
2632 reiserfs_write_lock(inode->i_sb);
2633 err = reiserfs_end_persistent_transaction(th);
2634 reiserfs_write_unlock(inode->i_sb);
2635 if (err)
2636 ret = err;
2637 }
2638 }
2639 }
2640 if (ret) {
2641 unlock_page(page);
2642 page_cache_release(page);
2643 /* Truncate allocated blocks */
2644 reiserfs_truncate_failed_write(inode);
2645 }
2646 return ret;
2647}
2648
2649int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
2650{
2651 struct inode *inode = page->mapping->host;
2652 int ret;
2653 int old_ref = 0;
2654 int depth;
2655
2656 depth = reiserfs_write_unlock_nested(inode->i_sb);
2657 reiserfs_wait_on_write_block(inode->i_sb);
2658 reiserfs_write_lock_nested(inode->i_sb, depth);
2659
2660 fix_tail_page_for_writing(page);
2661 if (reiserfs_transaction_running(inode->i_sb)) {
2662 struct reiserfs_transaction_handle *th;
2663 th = (struct reiserfs_transaction_handle *)current->
2664 journal_info;
2665 BUG_ON(!th->t_refcount);
2666 BUG_ON(!th->t_trans_id);
2667 old_ref = th->t_refcount;
2668 th->t_refcount++;
2669 }
2670
2671 ret = __block_write_begin(page, from, len, reiserfs_get_block);
2672 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2673 struct reiserfs_transaction_handle *th = current->journal_info;
2674 /* this gets a little ugly. If reiserfs_get_block returned an
2675 * error and left a transacstion running, we've got to close it,
2676 * and we've got to free handle if it was a persistent transaction.
2677 *
2678 * But, if we had nested into an existing transaction, we need
2679 * to just drop the ref count on the handle.
2680 *
2681 * If old_ref == 0, the transaction is from reiserfs_get_block,
2682 * and it was a persistent trans. Otherwise, it was nested above.
2683 */
2684 if (th->t_refcount > old_ref) {
2685 if (old_ref)
2686 th->t_refcount--;
2687 else {
2688 int err;
2689 reiserfs_write_lock(inode->i_sb);
2690 err = reiserfs_end_persistent_transaction(th);
2691 reiserfs_write_unlock(inode->i_sb);
2692 if (err)
2693 ret = err;
2694 }
2695 }
2696 }
2697 return ret;
2698
2699}
2700
2701static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
2702{
2703 return generic_block_bmap(as, block, reiserfs_bmap);
2704}
2705
2706static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2707 loff_t pos, unsigned len, unsigned copied,
2708 struct page *page, void *fsdata)
2709{
2710 struct inode *inode = page->mapping->host;
2711 int ret = 0;
2712 int update_sd = 0;
2713 struct reiserfs_transaction_handle *th;
2714 unsigned start;
2715 bool locked = false;
2716
2717 if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
2718 pos ++;
2719
2720 reiserfs_wait_on_write_block(inode->i_sb);
2721 if (reiserfs_transaction_running(inode->i_sb))
2722 th = current->journal_info;
2723 else
2724 th = NULL;
2725
2726 start = pos & (PAGE_CACHE_SIZE - 1);
2727 if (unlikely(copied < len)) {
2728 if (!PageUptodate(page))
2729 copied = 0;
2730
2731 page_zero_new_buffers(page, start + copied, start + len);
2732 }
2733 flush_dcache_page(page);
2734
2735 reiserfs_commit_page(inode, page, start, start + copied);
2736
2737 /* generic_commit_write does this for us, but does not update the
2738 ** transaction tracking stuff when the size changes. So, we have
2739 ** to do the i_size updates here.
2740 */
2741 if (pos + copied > inode->i_size) {
2742 struct reiserfs_transaction_handle myth;
2743 reiserfs_write_lock(inode->i_sb);
2744 locked = true;
2745 /* If the file have grown beyond the border where it
2746 can have a tail, unmark it as needing a tail
2747 packing */
2748 if ((have_large_tails(inode->i_sb)
2749 && inode->i_size > i_block_size(inode) * 4)
2750 || (have_small_tails(inode->i_sb)
2751 && inode->i_size > i_block_size(inode)))
2752 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2753
2754 ret = journal_begin(&myth, inode->i_sb, 1);
2755 if (ret)
2756 goto journal_error;
2757
2758 reiserfs_update_inode_transaction(inode);
2759 inode->i_size = pos + copied;
2760 /*
2761 * this will just nest into our transaction. It's important
2762 * to use mark_inode_dirty so the inode gets pushed around on the
2763 * dirty lists, and so that O_SYNC works as expected
2764 */
2765 mark_inode_dirty(inode);
2766 reiserfs_update_sd(&myth, inode);
2767 update_sd = 1;
2768 ret = journal_end(&myth, inode->i_sb, 1);
2769 if (ret)
2770 goto journal_error;
2771 }
2772 if (th) {
2773 if (!locked) {
2774 reiserfs_write_lock(inode->i_sb);
2775 locked = true;
2776 }
2777 if (!update_sd)
2778 mark_inode_dirty(inode);
2779 ret = reiserfs_end_persistent_transaction(th);
2780 if (ret)
2781 goto out;
2782 }
2783
2784 out:
2785 if (locked)
2786 reiserfs_write_unlock(inode->i_sb);
2787 unlock_page(page);
2788 page_cache_release(page);
2789
2790 if (pos + len > inode->i_size)
2791 reiserfs_truncate_failed_write(inode);
2792
2793 return ret == 0 ? copied : ret;
2794
2795 journal_error:
2796 reiserfs_write_unlock(inode->i_sb);
2797 locked = false;
2798 if (th) {
2799 if (!update_sd)
2800 reiserfs_update_sd(th, inode);
2801 ret = reiserfs_end_persistent_transaction(th);
2802 }
2803 goto out;
2804}
2805
2806int reiserfs_commit_write(struct file *f, struct page *page,
2807 unsigned from, unsigned to)
2808{
2809 struct inode *inode = page->mapping->host;
2810 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
2811 int ret = 0;
2812 int update_sd = 0;
2813 struct reiserfs_transaction_handle *th = NULL;
2814 int depth;
2815
2816 depth = reiserfs_write_unlock_nested(inode->i_sb);
2817 reiserfs_wait_on_write_block(inode->i_sb);
2818 reiserfs_write_lock_nested(inode->i_sb, depth);
2819
2820 if (reiserfs_transaction_running(inode->i_sb)) {
2821 th = current->journal_info;
2822 }
2823 reiserfs_commit_page(inode, page, from, to);
2824
2825 /* generic_commit_write does this for us, but does not update the
2826 ** transaction tracking stuff when the size changes. So, we have
2827 ** to do the i_size updates here.
2828 */
2829 if (pos > inode->i_size) {
2830 struct reiserfs_transaction_handle myth;
2831 /* If the file have grown beyond the border where it
2832 can have a tail, unmark it as needing a tail
2833 packing */
2834 if ((have_large_tails(inode->i_sb)
2835 && inode->i_size > i_block_size(inode) * 4)
2836 || (have_small_tails(inode->i_sb)
2837 && inode->i_size > i_block_size(inode)))
2838 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2839
2840 ret = journal_begin(&myth, inode->i_sb, 1);
2841 if (ret)
2842 goto journal_error;
2843
2844 reiserfs_update_inode_transaction(inode);
2845 inode->i_size = pos;
2846 /*
2847 * this will just nest into our transaction. It's important
2848 * to use mark_inode_dirty so the inode gets pushed around on the
2849 * dirty lists, and so that O_SYNC works as expected
2850 */
2851 mark_inode_dirty(inode);
2852 reiserfs_update_sd(&myth, inode);
2853 update_sd = 1;
2854 ret = journal_end(&myth, inode->i_sb, 1);
2855 if (ret)
2856 goto journal_error;
2857 }
2858 if (th) {
2859 if (!update_sd)
2860 mark_inode_dirty(inode);
2861 ret = reiserfs_end_persistent_transaction(th);
2862 if (ret)
2863 goto out;
2864 }
2865
2866 out:
2867 return ret;
2868
2869 journal_error:
2870 if (th) {
2871 if (!update_sd)
2872 reiserfs_update_sd(th, inode);
2873 ret = reiserfs_end_persistent_transaction(th);
2874 }
2875
2876 return ret;
2877}
2878
2879void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
2880{
2881 if (reiserfs_attrs(inode->i_sb)) {
2882 if (sd_attrs & REISERFS_SYNC_FL)
2883 inode->i_flags |= S_SYNC;
2884 else
2885 inode->i_flags &= ~S_SYNC;
2886 if (sd_attrs & REISERFS_IMMUTABLE_FL)
2887 inode->i_flags |= S_IMMUTABLE;
2888 else
2889 inode->i_flags &= ~S_IMMUTABLE;
2890 if (sd_attrs & REISERFS_APPEND_FL)
2891 inode->i_flags |= S_APPEND;
2892 else
2893 inode->i_flags &= ~S_APPEND;
2894 if (sd_attrs & REISERFS_NOATIME_FL)
2895 inode->i_flags |= S_NOATIME;
2896 else
2897 inode->i_flags &= ~S_NOATIME;
2898 if (sd_attrs & REISERFS_NOTAIL_FL)
2899 REISERFS_I(inode)->i_flags |= i_nopack_mask;
2900 else
2901 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
2902 }
2903}
2904
2905void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
2906{
2907 if (reiserfs_attrs(inode->i_sb)) {
2908 if (inode->i_flags & S_IMMUTABLE)
2909 *sd_attrs |= REISERFS_IMMUTABLE_FL;
2910 else
2911 *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2912 if (inode->i_flags & S_SYNC)
2913 *sd_attrs |= REISERFS_SYNC_FL;
2914 else
2915 *sd_attrs &= ~REISERFS_SYNC_FL;
2916 if (inode->i_flags & S_NOATIME)
2917 *sd_attrs |= REISERFS_NOATIME_FL;
2918 else
2919 *sd_attrs &= ~REISERFS_NOATIME_FL;
2920 if (REISERFS_I(inode)->i_flags & i_nopack_mask)
2921 *sd_attrs |= REISERFS_NOTAIL_FL;
2922 else
2923 *sd_attrs &= ~REISERFS_NOTAIL_FL;
2924 }
2925}
2926
2927/* decide if this buffer needs to stay around for data logging or ordered
2928** write purposes
2929*/
2930static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2931{
2932 int ret = 1;
2933 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2934
2935 lock_buffer(bh);
2936 spin_lock(&j->j_dirty_buffers_lock);
2937 if (!buffer_mapped(bh)) {
2938 goto free_jh;
2939 }
2940 /* the page is locked, and the only places that log a data buffer
2941 * also lock the page.
2942 */
2943 if (reiserfs_file_data_log(inode)) {
2944 /*
2945 * very conservative, leave the buffer pinned if
2946 * anyone might need it.
2947 */
2948 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2949 ret = 0;
2950 }
2951 } else if (buffer_dirty(bh)) {
2952 struct reiserfs_journal_list *jl;
2953 struct reiserfs_jh *jh = bh->b_private;
2954
2955 /* why is this safe?
2956 * reiserfs_setattr updates i_size in the on disk
2957 * stat data before allowing vmtruncate to be called.
2958 *
2959 * If buffer was put onto the ordered list for this
2960 * transaction, we know for sure either this transaction
2961 * or an older one already has updated i_size on disk,
2962 * and this ordered data won't be referenced in the file
2963 * if we crash.
2964 *
2965 * if the buffer was put onto the ordered list for an older
2966 * transaction, we need to leave it around
2967 */
2968 if (jh && (jl = jh->jl)
2969 && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
2970 ret = 0;
2971 }
2972 free_jh:
2973 if (ret && bh->b_private) {
2974 reiserfs_free_jh(bh);
2975 }
2976 spin_unlock(&j->j_dirty_buffers_lock);
2977 unlock_buffer(bh);
2978 return ret;
2979}
2980
2981/* clm -- taken from fs/buffer.c:block_invalidate_page */
2982static void reiserfs_invalidatepage(struct page *page, unsigned int offset,
2983 unsigned int length)
2984{
2985 struct buffer_head *head, *bh, *next;
2986 struct inode *inode = page->mapping->host;
2987 unsigned int curr_off = 0;
2988 unsigned int stop = offset + length;
2989 int partial_page = (offset || length < PAGE_CACHE_SIZE);
2990 int ret = 1;
2991
2992 BUG_ON(!PageLocked(page));
2993
2994 if (!partial_page)
2995 ClearPageChecked(page);
2996
2997 if (!page_has_buffers(page))
2998 goto out;
2999
3000 head = page_buffers(page);
3001 bh = head;
3002 do {
3003 unsigned int next_off = curr_off + bh->b_size;
3004 next = bh->b_this_page;
3005
3006 if (next_off > stop)
3007 goto out;
3008
3009 /*
3010 * is this block fully invalidated?
3011 */
3012 if (offset <= curr_off) {
3013 if (invalidatepage_can_drop(inode, bh))
3014 reiserfs_unmap_buffer(bh);
3015 else
3016 ret = 0;
3017 }
3018 curr_off = next_off;
3019 bh = next;
3020 } while (bh != head);
3021
3022 /*
3023 * We release buffers only if the entire page is being invalidated.
3024 * The get_block cached value has been unconditionally invalidated,
3025 * so real IO is not possible anymore.
3026 */
3027 if (!partial_page && ret) {
3028 ret = try_to_release_page(page, 0);
3029 /* maybe should BUG_ON(!ret); - neilb */
3030 }
3031 out:
3032 return;
3033}
3034
3035static int reiserfs_set_page_dirty(struct page *page)
3036{
3037 struct inode *inode = page->mapping->host;
3038 if (reiserfs_file_data_log(inode)) {
3039 SetPageChecked(page);
3040 return __set_page_dirty_nobuffers(page);
3041 }
3042 return __set_page_dirty_buffers(page);
3043}
3044
3045/*
3046 * Returns 1 if the page's buffers were dropped. The page is locked.
3047 *
3048 * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
3049 * in the buffers at page_buffers(page).
3050 *
3051 * even in -o notail mode, we can't be sure an old mount without -o notail
3052 * didn't create files with tails.
3053 */
3054static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3055{
3056 struct inode *inode = page->mapping->host;
3057 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
3058 struct buffer_head *head;
3059 struct buffer_head *bh;
3060 int ret = 1;
3061
3062 WARN_ON(PageChecked(page));
3063 spin_lock(&j->j_dirty_buffers_lock);
3064 head = page_buffers(page);
3065 bh = head;
3066 do {
3067 if (bh->b_private) {
3068 if (!buffer_dirty(bh) && !buffer_locked(bh)) {
3069 reiserfs_free_jh(bh);
3070 } else {
3071 ret = 0;
3072 break;
3073 }
3074 }
3075 bh = bh->b_this_page;
3076 } while (bh != head);
3077 if (ret)
3078 ret = try_to_free_buffers(page);
3079 spin_unlock(&j->j_dirty_buffers_lock);
3080 return ret;
3081}
3082
3083/* We thank Mingming Cao for helping us understand in great detail what
3084 to do in this section of the code. */
3085static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3086 const struct iovec *iov, loff_t offset,
3087 unsigned long nr_segs)
3088{
3089 struct file *file = iocb->ki_filp;
3090 struct inode *inode = file->f_mapping->host;
3091 ssize_t ret;
3092
3093 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
3094 reiserfs_get_blocks_direct_io);
3095
3096 /*
3097 * In case of error extending write may have instantiated a few
3098 * blocks outside i_size. Trim these off again.
3099 */
3100 if (unlikely((rw & WRITE) && ret < 0)) {
3101 loff_t isize = i_size_read(inode);
3102 loff_t end = offset + iov_length(iov, nr_segs);
3103
3104 if ((end > isize) && inode_newsize_ok(inode, isize) == 0) {
3105 truncate_setsize(inode, isize);
3106 reiserfs_vfs_truncate_file(inode);
3107 }
3108 }
3109
3110 return ret;
3111}
3112
3113int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3114{
3115 struct inode *inode = dentry->d_inode;
3116 unsigned int ia_valid;
3117 int error;
3118
3119 error = inode_change_ok(inode, attr);
3120 if (error)
3121 return error;
3122
3123 /* must be turned off for recursive notify_change calls */
3124 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3125
3126 if (is_quota_modification(inode, attr))
3127 dquot_initialize(inode);
3128 reiserfs_write_lock(inode->i_sb);
3129 if (attr->ia_valid & ATTR_SIZE) {
3130 /* version 2 items will be caught by the s_maxbytes check
3131 ** done for us in vmtruncate
3132 */
3133 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
3134 attr->ia_size > MAX_NON_LFS) {
3135 reiserfs_write_unlock(inode->i_sb);
3136 error = -EFBIG;
3137 goto out;
3138 }
3139
3140 inode_dio_wait(inode);
3141
3142 /* fill in hole pointers in the expanding truncate case. */
3143 if (attr->ia_size > inode->i_size) {
3144 error = generic_cont_expand_simple(inode, attr->ia_size);
3145 if (REISERFS_I(inode)->i_prealloc_count > 0) {
3146 int err;
3147 struct reiserfs_transaction_handle th;
3148 /* we're changing at most 2 bitmaps, inode + super */
3149 err = journal_begin(&th, inode->i_sb, 4);
3150 if (!err) {
3151 reiserfs_discard_prealloc(&th, inode);
3152 err = journal_end(&th, inode->i_sb, 4);
3153 }
3154 if (err)
3155 error = err;
3156 }
3157 if (error) {
3158 reiserfs_write_unlock(inode->i_sb);
3159 goto out;
3160 }
3161 /*
3162 * file size is changed, ctime and mtime are
3163 * to be updated
3164 */
3165 attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME);
3166 }
3167 }
3168 reiserfs_write_unlock(inode->i_sb);
3169
3170 if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) ||
3171 ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) &&
3172 (get_inode_sd_version(inode) == STAT_DATA_V1)) {
3173 /* stat data of format v3.5 has 16 bit uid and gid */
3174 error = -EINVAL;
3175 goto out;
3176 }
3177
3178 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
3179 (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
3180 struct reiserfs_transaction_handle th;
3181 int jbegin_count =
3182 2 *
3183 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
3184 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
3185 2;
3186
3187 error = reiserfs_chown_xattrs(inode, attr);
3188
3189 if (error)
3190 return error;
3191
3192 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
3193 reiserfs_write_lock(inode->i_sb);
3194 error = journal_begin(&th, inode->i_sb, jbegin_count);
3195 reiserfs_write_unlock(inode->i_sb);
3196 if (error)
3197 goto out;
3198 error = dquot_transfer(inode, attr);
3199 reiserfs_write_lock(inode->i_sb);
3200 if (error) {
3201 journal_end(&th, inode->i_sb, jbegin_count);
3202 reiserfs_write_unlock(inode->i_sb);
3203 goto out;
3204 }
3205
3206 /* Update corresponding info in inode so that everything is in
3207 * one transaction */
3208 if (attr->ia_valid & ATTR_UID)
3209 inode->i_uid = attr->ia_uid;
3210 if (attr->ia_valid & ATTR_GID)
3211 inode->i_gid = attr->ia_gid;
3212 mark_inode_dirty(inode);
3213 error = journal_end(&th, inode->i_sb, jbegin_count);
3214 reiserfs_write_unlock(inode->i_sb);
3215 if (error)
3216 goto out;
3217 }
3218
3219 if ((attr->ia_valid & ATTR_SIZE) &&
3220 attr->ia_size != i_size_read(inode)) {
3221 error = inode_newsize_ok(inode, attr->ia_size);
3222 if (!error) {
3223 truncate_setsize(inode, attr->ia_size);
3224 reiserfs_vfs_truncate_file(inode);
3225 }
3226 }
3227
3228 if (!error) {
3229 setattr_copy(inode, attr);
3230 mark_inode_dirty(inode);
3231 }
3232
3233 if (!error && reiserfs_posixacl(inode->i_sb)) {
3234 if (attr->ia_valid & ATTR_MODE)
3235 error = reiserfs_acl_chmod(inode);
3236 }
3237
3238out:
3239 return error;
3240}
3241
3242const struct address_space_operations reiserfs_address_space_operations = {
3243 .writepage = reiserfs_writepage,
3244 .readpage = reiserfs_readpage,
3245 .readpages = reiserfs_readpages,
3246 .releasepage = reiserfs_releasepage,
3247 .invalidatepage = reiserfs_invalidatepage,
3248 .write_begin = reiserfs_write_begin,
3249 .write_end = reiserfs_write_end,
3250 .bmap = reiserfs_aop_bmap,
3251 .direct_IO = reiserfs_direct_IO,
3252 .set_page_dirty = reiserfs_set_page_dirty,
3253};