Loading...
1// SPDX-License-Identifier: GPL-2.0
2#ifndef NO_BCACHEFS_FS
3
4#include "bcachefs.h"
5#include "btree_iter.h"
6#include "extents.h"
7#include "fs-io.h"
8#include "fs-io-pagecache.h"
9#include "subvolume.h"
10
11#include <linux/pagevec.h>
12#include <linux/writeback.h>
13
14int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
15 loff_t start, u64 end,
16 fgf_t fgp_flags, gfp_t gfp,
17 folios *fs)
18{
19 struct folio *f;
20 u64 pos = start;
21 int ret = 0;
22
23 while (pos < end) {
24 if ((u64) pos >= (u64) start + (1ULL << 20))
25 fgp_flags &= ~FGP_CREAT;
26
27 ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
28 if (ret)
29 break;
30
31 f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
32 if (IS_ERR_OR_NULL(f))
33 break;
34
35 BUG_ON(fs->nr && folio_pos(f) != pos);
36
37 pos = folio_end_pos(f);
38 darray_push(fs, f);
39 }
40
41 if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
42 ret = -ENOMEM;
43
44 return fs->nr ? 0 : ret;
45}
46
47/* pagecache_block must be held */
48int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
49 loff_t start, loff_t end)
50{
51 int ret;
52
53 /*
54 * XXX: the way this is currently implemented, we can spin if a process
55 * is continually redirtying a specific page
56 */
57 do {
58 if (!mapping->nrpages)
59 return 0;
60
61 ret = filemap_write_and_wait_range(mapping, start, end);
62 if (ret)
63 break;
64
65 if (!mapping->nrpages)
66 return 0;
67
68 ret = invalidate_inode_pages2_range(mapping,
69 start >> PAGE_SHIFT,
70 end >> PAGE_SHIFT);
71 } while (ret == -EBUSY);
72
73 return ret;
74}
75
76#if 0
77/* Useful for debug tracing: */
78static const char * const bch2_folio_sector_states[] = {
79#define x(n) #n,
80 BCH_FOLIO_SECTOR_STATE()
81#undef x
82 NULL
83};
84#endif
85
86static inline enum bch_folio_sector_state
87folio_sector_dirty(enum bch_folio_sector_state state)
88{
89 switch (state) {
90 case SECTOR_unallocated:
91 return SECTOR_dirty;
92 case SECTOR_reserved:
93 return SECTOR_dirty_reserved;
94 default:
95 return state;
96 }
97}
98
99static inline enum bch_folio_sector_state
100folio_sector_undirty(enum bch_folio_sector_state state)
101{
102 switch (state) {
103 case SECTOR_dirty:
104 return SECTOR_unallocated;
105 case SECTOR_dirty_reserved:
106 return SECTOR_reserved;
107 default:
108 return state;
109 }
110}
111
112static inline enum bch_folio_sector_state
113folio_sector_reserve(enum bch_folio_sector_state state)
114{
115 switch (state) {
116 case SECTOR_unallocated:
117 return SECTOR_reserved;
118 case SECTOR_dirty:
119 return SECTOR_dirty_reserved;
120 default:
121 return state;
122 }
123}
124
125/* for newly allocated folios: */
126struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
127{
128 struct bch_folio *s;
129
130 s = kzalloc(sizeof(*s) +
131 sizeof(struct bch_folio_sector) *
132 folio_sectors(folio), gfp);
133 if (!s)
134 return NULL;
135
136 spin_lock_init(&s->lock);
137 folio_attach_private(folio, s);
138 return s;
139}
140
141struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
142{
143 return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
144}
145
146static unsigned bkey_to_sector_state(struct bkey_s_c k)
147{
148 if (bkey_extent_is_reservation(k))
149 return SECTOR_reserved;
150 if (bkey_extent_is_allocation(k.k))
151 return SECTOR_allocated;
152 return SECTOR_unallocated;
153}
154
155static void __bch2_folio_set(struct folio *folio,
156 unsigned pg_offset, unsigned pg_len,
157 unsigned nr_ptrs, unsigned state)
158{
159 struct bch_folio *s = bch2_folio(folio);
160 unsigned i, sectors = folio_sectors(folio);
161
162 BUG_ON(pg_offset >= sectors);
163 BUG_ON(pg_offset + pg_len > sectors);
164
165 spin_lock(&s->lock);
166
167 for (i = pg_offset; i < pg_offset + pg_len; i++) {
168 s->s[i].nr_replicas = nr_ptrs;
169 bch2_folio_sector_set(folio, s, i, state);
170 }
171
172 if (i == sectors)
173 s->uptodate = true;
174
175 spin_unlock(&s->lock);
176}
177
178/*
179 * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
180 * extents btree:
181 */
182int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
183 struct folio **fs, unsigned nr_folios)
184{
185 struct btree_trans *trans;
186 struct btree_iter iter;
187 struct bkey_s_c k;
188 struct bch_folio *s;
189 u64 offset = folio_sector(fs[0]);
190 unsigned folio_idx;
191 u32 snapshot;
192 bool need_set = false;
193 int ret;
194
195 for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
196 s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
197 if (!s)
198 return -ENOMEM;
199
200 need_set |= !s->uptodate;
201 }
202
203 if (!need_set)
204 return 0;
205
206 folio_idx = 0;
207 trans = bch2_trans_get(c);
208retry:
209 bch2_trans_begin(trans);
210
211 ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
212 if (ret)
213 goto err;
214
215 for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
216 SPOS(inum.inum, offset, snapshot),
217 BTREE_ITER_SLOTS, k, ret) {
218 unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
219 unsigned state = bkey_to_sector_state(k);
220
221 while (folio_idx < nr_folios) {
222 struct folio *folio = fs[folio_idx];
223 u64 folio_start = folio_sector(folio);
224 u64 folio_end = folio_end_sector(folio);
225 unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
226 folio_start;
227 unsigned folio_len = min(k.k->p.offset, folio_end) -
228 folio_offset - folio_start;
229
230 BUG_ON(k.k->p.offset < folio_start);
231 BUG_ON(bkey_start_offset(k.k) > folio_end);
232
233 if (!bch2_folio(folio)->uptodate)
234 __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
235
236 if (k.k->p.offset < folio_end)
237 break;
238 folio_idx++;
239 }
240
241 if (folio_idx == nr_folios)
242 break;
243 }
244
245 offset = iter.pos.offset;
246 bch2_trans_iter_exit(trans, &iter);
247err:
248 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
249 goto retry;
250 bch2_trans_put(trans);
251
252 return ret;
253}
254
255void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
256{
257 struct bvec_iter iter;
258 struct folio_vec fv;
259 unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
260 ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
261 unsigned state = bkey_to_sector_state(k);
262
263 bio_for_each_folio(fv, bio, iter)
264 __bch2_folio_set(fv.fv_folio,
265 fv.fv_offset >> 9,
266 fv.fv_len >> 9,
267 nr_ptrs, state);
268}
269
270void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
271 u64 start, u64 end)
272{
273 pgoff_t index = start >> PAGE_SECTORS_SHIFT;
274 pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
275 struct folio_batch fbatch;
276 unsigned i, j;
277
278 if (end <= start)
279 return;
280
281 folio_batch_init(&fbatch);
282
283 while (filemap_get_folios(inode->v.i_mapping,
284 &index, end_index, &fbatch)) {
285 for (i = 0; i < folio_batch_count(&fbatch); i++) {
286 struct folio *folio = fbatch.folios[i];
287 u64 folio_start = folio_sector(folio);
288 u64 folio_end = folio_end_sector(folio);
289 unsigned folio_offset = max(start, folio_start) - folio_start;
290 unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
291 struct bch_folio *s;
292
293 BUG_ON(end <= folio_start);
294
295 folio_lock(folio);
296 s = bch2_folio(folio);
297
298 if (s) {
299 spin_lock(&s->lock);
300 for (j = folio_offset; j < folio_offset + folio_len; j++)
301 s->s[j].nr_replicas = 0;
302 spin_unlock(&s->lock);
303 }
304
305 folio_unlock(folio);
306 }
307 folio_batch_release(&fbatch);
308 cond_resched();
309 }
310}
311
312int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
313 u64 *start, u64 end,
314 bool nonblocking)
315{
316 struct bch_fs *c = inode->v.i_sb->s_fs_info;
317 pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
318 pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
319 struct folio_batch fbatch;
320 s64 i_sectors_delta = 0;
321 int ret = 0;
322
323 if (end <= *start)
324 return 0;
325
326 folio_batch_init(&fbatch);
327
328 while (filemap_get_folios(inode->v.i_mapping,
329 &index, end_index, &fbatch)) {
330 for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
331 struct folio *folio = fbatch.folios[i];
332
333 if (!nonblocking)
334 folio_lock(folio);
335 else if (!folio_trylock(folio)) {
336 folio_batch_release(&fbatch);
337 ret = -EAGAIN;
338 break;
339 }
340
341 u64 folio_start = folio_sector(folio);
342 u64 folio_end = folio_end_sector(folio);
343
344 BUG_ON(end <= folio_start);
345
346 *start = min(end, folio_end);
347
348 struct bch_folio *s = bch2_folio(folio);
349 if (s) {
350 unsigned folio_offset = max(*start, folio_start) - folio_start;
351 unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
352
353 spin_lock(&s->lock);
354 for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
355 i_sectors_delta -= s->s[j].state == SECTOR_dirty;
356 bch2_folio_sector_set(folio, s, j,
357 folio_sector_reserve(s->s[j].state));
358 }
359 spin_unlock(&s->lock);
360 }
361
362 folio_unlock(folio);
363 }
364 folio_batch_release(&fbatch);
365 cond_resched();
366 }
367
368 bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
369 return ret;
370}
371
372static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
373 unsigned nr_replicas)
374{
375 return max(0, (int) nr_replicas -
376 s->nr_replicas -
377 s->replicas_reserved);
378}
379
380int bch2_get_folio_disk_reservation(struct bch_fs *c,
381 struct bch_inode_info *inode,
382 struct folio *folio, bool check_enospc)
383{
384 struct bch_folio *s = bch2_folio_create(folio, 0);
385 unsigned nr_replicas = inode_nr_replicas(c, inode);
386 struct disk_reservation disk_res = { 0 };
387 unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
388 int ret;
389
390 if (!s)
391 return -ENOMEM;
392
393 for (i = 0; i < sectors; i++)
394 disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
395
396 if (!disk_res_sectors)
397 return 0;
398
399 ret = bch2_disk_reservation_get(c, &disk_res,
400 disk_res_sectors, 1,
401 !check_enospc
402 ? BCH_DISK_RESERVATION_NOFAIL
403 : 0);
404 if (unlikely(ret))
405 return ret;
406
407 for (i = 0; i < sectors; i++)
408 s->s[i].replicas_reserved +=
409 sectors_to_reserve(&s->s[i], nr_replicas);
410
411 return 0;
412}
413
414void bch2_folio_reservation_put(struct bch_fs *c,
415 struct bch_inode_info *inode,
416 struct bch2_folio_reservation *res)
417{
418 bch2_disk_reservation_put(c, &res->disk);
419 bch2_quota_reservation_put(c, inode, &res->quota);
420}
421
422int bch2_folio_reservation_get(struct bch_fs *c,
423 struct bch_inode_info *inode,
424 struct folio *folio,
425 struct bch2_folio_reservation *res,
426 unsigned offset, unsigned len)
427{
428 struct bch_folio *s = bch2_folio_create(folio, 0);
429 unsigned i, disk_sectors = 0, quota_sectors = 0;
430 int ret;
431
432 if (!s)
433 return -ENOMEM;
434
435 BUG_ON(!s->uptodate);
436
437 for (i = round_down(offset, block_bytes(c)) >> 9;
438 i < round_up(offset + len, block_bytes(c)) >> 9;
439 i++) {
440 disk_sectors += sectors_to_reserve(&s->s[i],
441 res->disk.nr_replicas);
442 quota_sectors += s->s[i].state == SECTOR_unallocated;
443 }
444
445 if (disk_sectors) {
446 ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
447 if (unlikely(ret))
448 return ret;
449 }
450
451 if (quota_sectors) {
452 ret = bch2_quota_reservation_add(c, inode, &res->quota,
453 quota_sectors, true);
454 if (unlikely(ret)) {
455 struct disk_reservation tmp = {
456 .sectors = disk_sectors
457 };
458
459 bch2_disk_reservation_put(c, &tmp);
460 res->disk.sectors -= disk_sectors;
461 return ret;
462 }
463 }
464
465 return 0;
466}
467
468static void bch2_clear_folio_bits(struct folio *folio)
469{
470 struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
471 struct bch_fs *c = inode->v.i_sb->s_fs_info;
472 struct bch_folio *s = bch2_folio(folio);
473 struct disk_reservation disk_res = { 0 };
474 int i, sectors = folio_sectors(folio), dirty_sectors = 0;
475
476 if (!s)
477 return;
478
479 EBUG_ON(!folio_test_locked(folio));
480 EBUG_ON(folio_test_writeback(folio));
481
482 for (i = 0; i < sectors; i++) {
483 disk_res.sectors += s->s[i].replicas_reserved;
484 s->s[i].replicas_reserved = 0;
485
486 dirty_sectors -= s->s[i].state == SECTOR_dirty;
487 bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
488 }
489
490 bch2_disk_reservation_put(c, &disk_res);
491
492 bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
493
494 bch2_folio_release(folio);
495}
496
497void bch2_set_folio_dirty(struct bch_fs *c,
498 struct bch_inode_info *inode,
499 struct folio *folio,
500 struct bch2_folio_reservation *res,
501 unsigned offset, unsigned len)
502{
503 struct bch_folio *s = bch2_folio(folio);
504 unsigned i, dirty_sectors = 0;
505
506 WARN_ON((u64) folio_pos(folio) + offset + len >
507 round_up((u64) i_size_read(&inode->v), block_bytes(c)));
508
509 BUG_ON(!s->uptodate);
510
511 spin_lock(&s->lock);
512
513 for (i = round_down(offset, block_bytes(c)) >> 9;
514 i < round_up(offset + len, block_bytes(c)) >> 9;
515 i++) {
516 unsigned sectors = sectors_to_reserve(&s->s[i],
517 res->disk.nr_replicas);
518
519 /*
520 * This can happen if we race with the error path in
521 * bch2_writepage_io_done():
522 */
523 sectors = min_t(unsigned, sectors, res->disk.sectors);
524
525 s->s[i].replicas_reserved += sectors;
526 res->disk.sectors -= sectors;
527
528 dirty_sectors += s->s[i].state == SECTOR_unallocated;
529
530 bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
531 }
532
533 spin_unlock(&s->lock);
534
535 bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
536
537 if (!folio_test_dirty(folio))
538 filemap_dirty_folio(inode->v.i_mapping, folio);
539}
540
541vm_fault_t bch2_page_fault(struct vm_fault *vmf)
542{
543 struct file *file = vmf->vma->vm_file;
544 struct address_space *mapping = file->f_mapping;
545 struct address_space *fdm = faults_disabled_mapping();
546 struct bch_inode_info *inode = file_bch_inode(file);
547 vm_fault_t ret;
548
549 if (fdm == mapping)
550 return VM_FAULT_SIGBUS;
551
552 /* Lock ordering: */
553 if (fdm > mapping) {
554 struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
555
556 if (bch2_pagecache_add_tryget(inode))
557 goto got_lock;
558
559 bch2_pagecache_block_put(fdm_host);
560
561 bch2_pagecache_add_get(inode);
562 bch2_pagecache_add_put(inode);
563
564 bch2_pagecache_block_get(fdm_host);
565
566 /* Signal that lock has been dropped: */
567 set_fdm_dropped_locks();
568 return VM_FAULT_SIGBUS;
569 }
570
571 bch2_pagecache_add_get(inode);
572got_lock:
573 ret = filemap_fault(vmf);
574 bch2_pagecache_add_put(inode);
575
576 return ret;
577}
578
579vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
580{
581 struct folio *folio = page_folio(vmf->page);
582 struct file *file = vmf->vma->vm_file;
583 struct bch_inode_info *inode = file_bch_inode(file);
584 struct address_space *mapping = file->f_mapping;
585 struct bch_fs *c = inode->v.i_sb->s_fs_info;
586 struct bch2_folio_reservation res;
587 unsigned len;
588 loff_t isize;
589 vm_fault_t ret;
590
591 bch2_folio_reservation_init(c, inode, &res);
592
593 sb_start_pagefault(inode->v.i_sb);
594 file_update_time(file);
595
596 /*
597 * Not strictly necessary, but helps avoid dio writes livelocking in
598 * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
599 * a bch2_write_invalidate_inode_pages_range() that works without dropping
600 * page lock before invalidating page
601 */
602 bch2_pagecache_add_get(inode);
603
604 folio_lock(folio);
605 isize = i_size_read(&inode->v);
606
607 if (folio->mapping != mapping || folio_pos(folio) >= isize) {
608 folio_unlock(folio);
609 ret = VM_FAULT_NOPAGE;
610 goto out;
611 }
612
613 len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
614
615 if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
616 bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
617 folio_unlock(folio);
618 ret = VM_FAULT_SIGBUS;
619 goto out;
620 }
621
622 bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
623 bch2_folio_reservation_put(c, inode, &res);
624
625 folio_wait_stable(folio);
626 ret = VM_FAULT_LOCKED;
627out:
628 bch2_pagecache_add_put(inode);
629 sb_end_pagefault(inode->v.i_sb);
630
631 return ret;
632}
633
634void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
635{
636 if (offset || length < folio_size(folio))
637 return;
638
639 bch2_clear_folio_bits(folio);
640}
641
642bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
643{
644 if (folio_test_dirty(folio) || folio_test_writeback(folio))
645 return false;
646
647 bch2_clear_folio_bits(folio);
648 return true;
649}
650
651/* fseek: */
652
653static int folio_data_offset(struct folio *folio, loff_t pos,
654 unsigned min_replicas)
655{
656 struct bch_folio *s = bch2_folio(folio);
657 unsigned i, sectors = folio_sectors(folio);
658
659 if (s)
660 for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
661 if (s->s[i].state >= SECTOR_dirty &&
662 s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
663 return i << SECTOR_SHIFT;
664
665 return -1;
666}
667
668loff_t bch2_seek_pagecache_data(struct inode *vinode,
669 loff_t start_offset,
670 loff_t end_offset,
671 unsigned min_replicas,
672 bool nonblock)
673{
674 struct folio_batch fbatch;
675 pgoff_t start_index = start_offset >> PAGE_SHIFT;
676 pgoff_t end_index = end_offset >> PAGE_SHIFT;
677 pgoff_t index = start_index;
678 unsigned i;
679 loff_t ret;
680 int offset;
681
682 folio_batch_init(&fbatch);
683
684 while (filemap_get_folios(vinode->i_mapping,
685 &index, end_index, &fbatch)) {
686 for (i = 0; i < folio_batch_count(&fbatch); i++) {
687 struct folio *folio = fbatch.folios[i];
688
689 if (!nonblock) {
690 folio_lock(folio);
691 } else if (!folio_trylock(folio)) {
692 folio_batch_release(&fbatch);
693 return -EAGAIN;
694 }
695
696 offset = folio_data_offset(folio,
697 max(folio_pos(folio), start_offset),
698 min_replicas);
699 if (offset >= 0) {
700 ret = clamp(folio_pos(folio) + offset,
701 start_offset, end_offset);
702 folio_unlock(folio);
703 folio_batch_release(&fbatch);
704 return ret;
705 }
706 folio_unlock(folio);
707 }
708 folio_batch_release(&fbatch);
709 cond_resched();
710 }
711
712 return end_offset;
713}
714
715/*
716 * Search for a hole in a folio.
717 *
718 * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
719 * code to indicate a pagecache hole exists at the returned offset. Otherwise
720 * return 0 if the folio is filled with data, or an error code. This function
721 * can return -EAGAIN if nonblock is specified.
722 */
723static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
724 unsigned min_replicas, bool nonblock)
725{
726 struct folio *folio;
727 struct bch_folio *s;
728 unsigned i, sectors;
729 int ret = -ENOENT;
730
731 folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
732 FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
733 if (IS_ERR(folio))
734 return PTR_ERR(folio);
735
736 s = bch2_folio(folio);
737 if (!s)
738 goto unlock;
739
740 sectors = folio_sectors(folio);
741 for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
742 if (s->s[i].state < SECTOR_dirty ||
743 s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
744 *offset = max(*offset,
745 folio_pos(folio) + (i << SECTOR_SHIFT));
746 goto unlock;
747 }
748
749 *offset = folio_end_pos(folio);
750 ret = 0;
751unlock:
752 folio_unlock(folio);
753 folio_put(folio);
754 return ret;
755}
756
757loff_t bch2_seek_pagecache_hole(struct inode *vinode,
758 loff_t start_offset,
759 loff_t end_offset,
760 unsigned min_replicas,
761 bool nonblock)
762{
763 struct address_space *mapping = vinode->i_mapping;
764 loff_t offset = start_offset;
765 loff_t ret = 0;
766
767 while (!ret && offset < end_offset)
768 ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
769
770 if (ret && ret != -ENOENT)
771 return ret;
772 return min(offset, end_offset);
773}
774
775int bch2_clamp_data_hole(struct inode *inode,
776 u64 *hole_start,
777 u64 *hole_end,
778 unsigned min_replicas,
779 bool nonblock)
780{
781 loff_t ret;
782
783 ret = bch2_seek_pagecache_hole(inode,
784 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
785 if (ret < 0)
786 return ret;
787
788 *hole_start = ret;
789
790 if (*hole_start == *hole_end)
791 return 0;
792
793 ret = bch2_seek_pagecache_data(inode,
794 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
795 if (ret < 0)
796 return ret;
797
798 *hole_end = ret;
799 return 0;
800}
801
802#endif /* NO_BCACHEFS_FS */
1// SPDX-License-Identifier: GPL-2.0
2#ifndef NO_BCACHEFS_FS
3
4#include "bcachefs.h"
5#include "btree_iter.h"
6#include "extents.h"
7#include "fs-io.h"
8#include "fs-io-pagecache.h"
9#include "subvolume.h"
10
11#include <linux/pagevec.h>
12#include <linux/writeback.h>
13
14int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
15 loff_t start, u64 end,
16 fgf_t fgp_flags, gfp_t gfp,
17 folios *fs)
18{
19 struct folio *f;
20 u64 pos = start;
21 int ret = 0;
22
23 while (pos < end) {
24 if ((u64) pos >= (u64) start + (1ULL << 20))
25 fgp_flags &= ~FGP_CREAT;
26
27 ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
28 if (ret)
29 break;
30
31 f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
32 if (IS_ERR_OR_NULL(f))
33 break;
34
35 BUG_ON(fs->nr && folio_pos(f) != pos);
36
37 pos = folio_end_pos(f);
38 darray_push(fs, f);
39 }
40
41 if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
42 ret = -ENOMEM;
43
44 return fs->nr ? 0 : ret;
45}
46
47/* pagecache_block must be held */
48int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
49 loff_t start, loff_t end)
50{
51 int ret;
52
53 /*
54 * XXX: the way this is currently implemented, we can spin if a process
55 * is continually redirtying a specific page
56 */
57 do {
58 if (!mapping->nrpages)
59 return 0;
60
61 ret = filemap_write_and_wait_range(mapping, start, end);
62 if (ret)
63 break;
64
65 if (!mapping->nrpages)
66 return 0;
67
68 ret = invalidate_inode_pages2_range(mapping,
69 start >> PAGE_SHIFT,
70 end >> PAGE_SHIFT);
71 } while (ret == -EBUSY);
72
73 return ret;
74}
75
76#if 0
77/* Useful for debug tracing: */
78static const char * const bch2_folio_sector_states[] = {
79#define x(n) #n,
80 BCH_FOLIO_SECTOR_STATE()
81#undef x
82 NULL
83};
84#endif
85
86static inline enum bch_folio_sector_state
87folio_sector_dirty(enum bch_folio_sector_state state)
88{
89 switch (state) {
90 case SECTOR_unallocated:
91 return SECTOR_dirty;
92 case SECTOR_reserved:
93 return SECTOR_dirty_reserved;
94 default:
95 return state;
96 }
97}
98
99static inline enum bch_folio_sector_state
100folio_sector_undirty(enum bch_folio_sector_state state)
101{
102 switch (state) {
103 case SECTOR_dirty:
104 return SECTOR_unallocated;
105 case SECTOR_dirty_reserved:
106 return SECTOR_reserved;
107 default:
108 return state;
109 }
110}
111
112static inline enum bch_folio_sector_state
113folio_sector_reserve(enum bch_folio_sector_state state)
114{
115 switch (state) {
116 case SECTOR_unallocated:
117 return SECTOR_reserved;
118 case SECTOR_dirty:
119 return SECTOR_dirty_reserved;
120 default:
121 return state;
122 }
123}
124
125/* for newly allocated folios: */
126struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
127{
128 struct bch_folio *s;
129
130 s = kzalloc(sizeof(*s) +
131 sizeof(struct bch_folio_sector) *
132 folio_sectors(folio), gfp);
133 if (!s)
134 return NULL;
135
136 spin_lock_init(&s->lock);
137 folio_attach_private(folio, s);
138 return s;
139}
140
141struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
142{
143 return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
144}
145
146static unsigned bkey_to_sector_state(struct bkey_s_c k)
147{
148 if (bkey_extent_is_reservation(k))
149 return SECTOR_reserved;
150 if (bkey_extent_is_allocation(k.k))
151 return SECTOR_allocated;
152 return SECTOR_unallocated;
153}
154
155static void __bch2_folio_set(struct folio *folio,
156 unsigned pg_offset, unsigned pg_len,
157 unsigned nr_ptrs, unsigned state)
158{
159 struct bch_folio *s = bch2_folio(folio);
160 unsigned i, sectors = folio_sectors(folio);
161
162 BUG_ON(pg_offset >= sectors);
163 BUG_ON(pg_offset + pg_len > sectors);
164
165 spin_lock(&s->lock);
166
167 for (i = pg_offset; i < pg_offset + pg_len; i++) {
168 s->s[i].nr_replicas = nr_ptrs;
169 bch2_folio_sector_set(folio, s, i, state);
170 }
171
172 if (i == sectors)
173 s->uptodate = true;
174
175 spin_unlock(&s->lock);
176}
177
178/*
179 * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
180 * extents btree:
181 */
182int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
183 struct folio **fs, unsigned nr_folios)
184{
185 u64 offset = folio_sector(fs[0]);
186 bool need_set = false;
187
188 for (unsigned folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
189 struct bch_folio *s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
190 if (!s)
191 return -ENOMEM;
192
193 need_set |= !s->uptodate;
194 }
195
196 if (!need_set)
197 return 0;
198
199 unsigned folio_idx = 0;
200
201 return bch2_trans_run(c,
202 for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents,
203 POS(inum.inum, offset),
204 POS(inum.inum, U64_MAX),
205 inum.subvol, BTREE_ITER_slots, k, ({
206 unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
207 unsigned state = bkey_to_sector_state(k);
208
209 while (folio_idx < nr_folios) {
210 struct folio *folio = fs[folio_idx];
211 u64 folio_start = folio_sector(folio);
212 u64 folio_end = folio_end_sector(folio);
213 unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
214 folio_start;
215 unsigned folio_len = min(k.k->p.offset, folio_end) -
216 folio_offset - folio_start;
217
218 BUG_ON(k.k->p.offset < folio_start);
219 BUG_ON(bkey_start_offset(k.k) > folio_end);
220
221 if (!bch2_folio(folio)->uptodate)
222 __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
223
224 if (k.k->p.offset < folio_end)
225 break;
226 folio_idx++;
227 }
228
229 if (folio_idx == nr_folios)
230 break;
231 0;
232 })));
233}
234
235void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
236{
237 struct bvec_iter iter;
238 struct folio_vec fv;
239 unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
240 ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
241 unsigned state = bkey_to_sector_state(k);
242
243 bio_for_each_folio(fv, bio, iter)
244 __bch2_folio_set(fv.fv_folio,
245 fv.fv_offset >> 9,
246 fv.fv_len >> 9,
247 nr_ptrs, state);
248}
249
250void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
251 u64 start, u64 end)
252{
253 pgoff_t index = start >> PAGE_SECTORS_SHIFT;
254 pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
255 struct folio_batch fbatch;
256 unsigned i, j;
257
258 if (end <= start)
259 return;
260
261 folio_batch_init(&fbatch);
262
263 while (filemap_get_folios(inode->v.i_mapping,
264 &index, end_index, &fbatch)) {
265 for (i = 0; i < folio_batch_count(&fbatch); i++) {
266 struct folio *folio = fbatch.folios[i];
267 u64 folio_start = folio_sector(folio);
268 u64 folio_end = folio_end_sector(folio);
269 unsigned folio_offset = max(start, folio_start) - folio_start;
270 unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
271 struct bch_folio *s;
272
273 BUG_ON(end <= folio_start);
274
275 folio_lock(folio);
276 s = bch2_folio(folio);
277
278 if (s) {
279 spin_lock(&s->lock);
280 for (j = folio_offset; j < folio_offset + folio_len; j++)
281 s->s[j].nr_replicas = 0;
282 spin_unlock(&s->lock);
283 }
284
285 folio_unlock(folio);
286 }
287 folio_batch_release(&fbatch);
288 cond_resched();
289 }
290}
291
292int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
293 u64 *start, u64 end,
294 bool nonblocking)
295{
296 struct bch_fs *c = inode->v.i_sb->s_fs_info;
297 pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
298 pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
299 struct folio_batch fbatch;
300 s64 i_sectors_delta = 0;
301 int ret = 0;
302
303 if (end <= *start)
304 return 0;
305
306 folio_batch_init(&fbatch);
307
308 while (filemap_get_folios(inode->v.i_mapping,
309 &index, end_index, &fbatch)) {
310 for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
311 struct folio *folio = fbatch.folios[i];
312
313 if (!nonblocking)
314 folio_lock(folio);
315 else if (!folio_trylock(folio)) {
316 folio_batch_release(&fbatch);
317 ret = -EAGAIN;
318 break;
319 }
320
321 u64 folio_start = folio_sector(folio);
322 u64 folio_end = folio_end_sector(folio);
323
324 BUG_ON(end <= folio_start);
325
326 *start = min(end, folio_end);
327
328 struct bch_folio *s = bch2_folio(folio);
329 if (s) {
330 unsigned folio_offset = max(*start, folio_start) - folio_start;
331 unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
332
333 spin_lock(&s->lock);
334 for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
335 i_sectors_delta -= s->s[j].state == SECTOR_dirty;
336 bch2_folio_sector_set(folio, s, j,
337 folio_sector_reserve(s->s[j].state));
338 }
339 spin_unlock(&s->lock);
340 }
341
342 folio_unlock(folio);
343 }
344 folio_batch_release(&fbatch);
345 cond_resched();
346 }
347
348 bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
349 return ret;
350}
351
352static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
353 unsigned nr_replicas)
354{
355 return max(0, (int) nr_replicas -
356 s->nr_replicas -
357 s->replicas_reserved);
358}
359
360int bch2_get_folio_disk_reservation(struct bch_fs *c,
361 struct bch_inode_info *inode,
362 struct folio *folio, bool check_enospc)
363{
364 struct bch_folio *s = bch2_folio_create(folio, 0);
365 unsigned nr_replicas = inode_nr_replicas(c, inode);
366 struct disk_reservation disk_res = { 0 };
367 unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
368 int ret;
369
370 if (!s)
371 return -ENOMEM;
372
373 for (i = 0; i < sectors; i++)
374 disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
375
376 if (!disk_res_sectors)
377 return 0;
378
379 ret = bch2_disk_reservation_get(c, &disk_res,
380 disk_res_sectors, 1,
381 !check_enospc
382 ? BCH_DISK_RESERVATION_NOFAIL
383 : 0);
384 if (unlikely(ret))
385 return ret;
386
387 for (i = 0; i < sectors; i++)
388 s->s[i].replicas_reserved +=
389 sectors_to_reserve(&s->s[i], nr_replicas);
390
391 return 0;
392}
393
394void bch2_folio_reservation_put(struct bch_fs *c,
395 struct bch_inode_info *inode,
396 struct bch2_folio_reservation *res)
397{
398 bch2_disk_reservation_put(c, &res->disk);
399 bch2_quota_reservation_put(c, inode, &res->quota);
400}
401
402static int __bch2_folio_reservation_get(struct bch_fs *c,
403 struct bch_inode_info *inode,
404 struct folio *folio,
405 struct bch2_folio_reservation *res,
406 size_t offset, size_t len,
407 bool partial)
408{
409 struct bch_folio *s = bch2_folio_create(folio, 0);
410 unsigned i, disk_sectors = 0, quota_sectors = 0;
411 struct disk_reservation disk_res = {};
412 size_t reserved = len;
413 int ret;
414
415 if (!s)
416 return -ENOMEM;
417
418 BUG_ON(!s->uptodate);
419
420 for (i = round_down(offset, block_bytes(c)) >> 9;
421 i < round_up(offset + len, block_bytes(c)) >> 9;
422 i++) {
423 disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
424 quota_sectors += s->s[i].state == SECTOR_unallocated;
425 }
426
427 if (disk_sectors) {
428 ret = bch2_disk_reservation_add(c, &disk_res, disk_sectors,
429 partial ? BCH_DISK_RESERVATION_PARTIAL : 0);
430 if (unlikely(ret))
431 return ret;
432
433 if (unlikely(disk_res.sectors != disk_sectors)) {
434 disk_sectors = quota_sectors = 0;
435
436 for (i = round_down(offset, block_bytes(c)) >> 9;
437 i < round_up(offset + len, block_bytes(c)) >> 9;
438 i++) {
439 disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
440 if (disk_sectors > disk_res.sectors) {
441 /*
442 * Make sure to get a reservation that's
443 * aligned to the filesystem blocksize:
444 */
445 unsigned reserved_offset = round_down(i << 9, block_bytes(c));
446 reserved = clamp(reserved_offset, offset, offset + len) - offset;
447
448 if (!reserved) {
449 bch2_disk_reservation_put(c, &disk_res);
450 return -BCH_ERR_ENOSPC_disk_reservation;
451 }
452 break;
453 }
454 quota_sectors += s->s[i].state == SECTOR_unallocated;
455 }
456 }
457 }
458
459 if (quota_sectors) {
460 ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
461 if (unlikely(ret)) {
462 bch2_disk_reservation_put(c, &disk_res);
463 return ret;
464 }
465 }
466
467 res->disk.sectors += disk_res.sectors;
468 return partial ? reserved : 0;
469}
470
471int bch2_folio_reservation_get(struct bch_fs *c,
472 struct bch_inode_info *inode,
473 struct folio *folio,
474 struct bch2_folio_reservation *res,
475 size_t offset, size_t len)
476{
477 return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
478}
479
480ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
481 struct bch_inode_info *inode,
482 struct folio *folio,
483 struct bch2_folio_reservation *res,
484 size_t offset, size_t len)
485{
486 return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, true);
487}
488
489static void bch2_clear_folio_bits(struct folio *folio)
490{
491 struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
492 struct bch_fs *c = inode->v.i_sb->s_fs_info;
493 struct bch_folio *s = bch2_folio(folio);
494 struct disk_reservation disk_res = { 0 };
495 int i, sectors = folio_sectors(folio), dirty_sectors = 0;
496
497 if (!s)
498 return;
499
500 EBUG_ON(!folio_test_locked(folio));
501 EBUG_ON(folio_test_writeback(folio));
502
503 for (i = 0; i < sectors; i++) {
504 disk_res.sectors += s->s[i].replicas_reserved;
505 s->s[i].replicas_reserved = 0;
506
507 dirty_sectors -= s->s[i].state == SECTOR_dirty;
508 bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
509 }
510
511 bch2_disk_reservation_put(c, &disk_res);
512
513 bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
514
515 bch2_folio_release(folio);
516}
517
518void bch2_set_folio_dirty(struct bch_fs *c,
519 struct bch_inode_info *inode,
520 struct folio *folio,
521 struct bch2_folio_reservation *res,
522 unsigned offset, unsigned len)
523{
524 struct bch_folio *s = bch2_folio(folio);
525 unsigned i, dirty_sectors = 0;
526
527 WARN_ON((u64) folio_pos(folio) + offset + len >
528 round_up((u64) i_size_read(&inode->v), block_bytes(c)));
529
530 BUG_ON(!s->uptodate);
531
532 spin_lock(&s->lock);
533
534 for (i = round_down(offset, block_bytes(c)) >> 9;
535 i < round_up(offset + len, block_bytes(c)) >> 9;
536 i++) {
537 unsigned sectors = sectors_to_reserve(&s->s[i],
538 res->disk.nr_replicas);
539
540 /*
541 * This can happen if we race with the error path in
542 * bch2_writepage_io_done():
543 */
544 sectors = min_t(unsigned, sectors, res->disk.sectors);
545
546 s->s[i].replicas_reserved += sectors;
547 res->disk.sectors -= sectors;
548
549 dirty_sectors += s->s[i].state == SECTOR_unallocated;
550
551 bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
552 }
553
554 spin_unlock(&s->lock);
555
556 bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
557
558 if (!folio_test_dirty(folio))
559 filemap_dirty_folio(inode->v.i_mapping, folio);
560}
561
562vm_fault_t bch2_page_fault(struct vm_fault *vmf)
563{
564 struct file *file = vmf->vma->vm_file;
565 struct address_space *mapping = file->f_mapping;
566 struct address_space *fdm = faults_disabled_mapping();
567 struct bch_inode_info *inode = file_bch_inode(file);
568 vm_fault_t ret;
569
570 if (fdm == mapping)
571 return VM_FAULT_SIGBUS;
572
573 /* Lock ordering: */
574 if (fdm > mapping) {
575 struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
576
577 if (bch2_pagecache_add_tryget(inode))
578 goto got_lock;
579
580 bch2_pagecache_block_put(fdm_host);
581
582 bch2_pagecache_add_get(inode);
583 bch2_pagecache_add_put(inode);
584
585 bch2_pagecache_block_get(fdm_host);
586
587 /* Signal that lock has been dropped: */
588 set_fdm_dropped_locks();
589 return VM_FAULT_SIGBUS;
590 }
591
592 bch2_pagecache_add_get(inode);
593got_lock:
594 ret = filemap_fault(vmf);
595 bch2_pagecache_add_put(inode);
596
597 return ret;
598}
599
600vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
601{
602 struct folio *folio = page_folio(vmf->page);
603 struct file *file = vmf->vma->vm_file;
604 struct bch_inode_info *inode = file_bch_inode(file);
605 struct address_space *mapping = file->f_mapping;
606 struct bch_fs *c = inode->v.i_sb->s_fs_info;
607 struct bch2_folio_reservation res;
608 unsigned len;
609 loff_t isize;
610 vm_fault_t ret;
611
612 bch2_folio_reservation_init(c, inode, &res);
613
614 sb_start_pagefault(inode->v.i_sb);
615 file_update_time(file);
616
617 /*
618 * Not strictly necessary, but helps avoid dio writes livelocking in
619 * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
620 * a bch2_write_invalidate_inode_pages_range() that works without dropping
621 * page lock before invalidating page
622 */
623 bch2_pagecache_add_get(inode);
624
625 folio_lock(folio);
626 isize = i_size_read(&inode->v);
627
628 if (folio->mapping != mapping || folio_pos(folio) >= isize) {
629 folio_unlock(folio);
630 ret = VM_FAULT_NOPAGE;
631 goto out;
632 }
633
634 len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
635
636 if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
637 bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
638 folio_unlock(folio);
639 ret = VM_FAULT_SIGBUS;
640 goto out;
641 }
642
643 bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
644 bch2_folio_reservation_put(c, inode, &res);
645
646 folio_wait_stable(folio);
647 ret = VM_FAULT_LOCKED;
648out:
649 bch2_pagecache_add_put(inode);
650 sb_end_pagefault(inode->v.i_sb);
651
652 return ret;
653}
654
655void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
656{
657 if (offset || length < folio_size(folio))
658 return;
659
660 bch2_clear_folio_bits(folio);
661}
662
663bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
664{
665 if (folio_test_dirty(folio) || folio_test_writeback(folio))
666 return false;
667
668 bch2_clear_folio_bits(folio);
669 return true;
670}
671
672/* fseek: */
673
674static int folio_data_offset(struct folio *folio, loff_t pos,
675 unsigned min_replicas)
676{
677 struct bch_folio *s = bch2_folio(folio);
678 unsigned i, sectors = folio_sectors(folio);
679
680 if (s)
681 for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
682 if (s->s[i].state >= SECTOR_dirty &&
683 s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
684 return i << SECTOR_SHIFT;
685
686 return -1;
687}
688
689loff_t bch2_seek_pagecache_data(struct inode *vinode,
690 loff_t start_offset,
691 loff_t end_offset,
692 unsigned min_replicas,
693 bool nonblock)
694{
695 struct folio_batch fbatch;
696 pgoff_t start_index = start_offset >> PAGE_SHIFT;
697 pgoff_t end_index = end_offset >> PAGE_SHIFT;
698 pgoff_t index = start_index;
699 unsigned i;
700 loff_t ret;
701 int offset;
702
703 folio_batch_init(&fbatch);
704
705 while (filemap_get_folios(vinode->i_mapping,
706 &index, end_index, &fbatch)) {
707 for (i = 0; i < folio_batch_count(&fbatch); i++) {
708 struct folio *folio = fbatch.folios[i];
709
710 if (!nonblock) {
711 folio_lock(folio);
712 } else if (!folio_trylock(folio)) {
713 folio_batch_release(&fbatch);
714 return -EAGAIN;
715 }
716
717 offset = folio_data_offset(folio,
718 max(folio_pos(folio), start_offset),
719 min_replicas);
720 if (offset >= 0) {
721 ret = clamp(folio_pos(folio) + offset,
722 start_offset, end_offset);
723 folio_unlock(folio);
724 folio_batch_release(&fbatch);
725 return ret;
726 }
727 folio_unlock(folio);
728 }
729 folio_batch_release(&fbatch);
730 cond_resched();
731 }
732
733 return end_offset;
734}
735
736/*
737 * Search for a hole in a folio.
738 *
739 * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
740 * code to indicate a pagecache hole exists at the returned offset. Otherwise
741 * return 0 if the folio is filled with data, or an error code. This function
742 * can return -EAGAIN if nonblock is specified.
743 */
744static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
745 unsigned min_replicas, bool nonblock)
746{
747 struct folio *folio;
748 struct bch_folio *s;
749 unsigned i, sectors;
750 int ret = -ENOENT;
751
752 folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
753 FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
754 if (IS_ERR(folio))
755 return PTR_ERR(folio);
756
757 s = bch2_folio(folio);
758 if (!s)
759 goto unlock;
760
761 sectors = folio_sectors(folio);
762 for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
763 if (s->s[i].state < SECTOR_dirty ||
764 s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
765 *offset = max(*offset,
766 folio_pos(folio) + (i << SECTOR_SHIFT));
767 goto unlock;
768 }
769
770 *offset = folio_end_pos(folio);
771 ret = 0;
772unlock:
773 folio_unlock(folio);
774 folio_put(folio);
775 return ret;
776}
777
778loff_t bch2_seek_pagecache_hole(struct inode *vinode,
779 loff_t start_offset,
780 loff_t end_offset,
781 unsigned min_replicas,
782 bool nonblock)
783{
784 struct address_space *mapping = vinode->i_mapping;
785 loff_t offset = start_offset;
786 loff_t ret = 0;
787
788 while (!ret && offset < end_offset)
789 ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
790
791 if (ret && ret != -ENOENT)
792 return ret;
793 return min(offset, end_offset);
794}
795
796int bch2_clamp_data_hole(struct inode *inode,
797 u64 *hole_start,
798 u64 *hole_end,
799 unsigned min_replicas,
800 bool nonblock)
801{
802 loff_t ret;
803
804 ret = bch2_seek_pagecache_hole(inode,
805 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
806 if (ret < 0)
807 return ret;
808
809 *hole_start = ret;
810
811 if (*hole_start == *hole_end)
812 return 0;
813
814 ret = bch2_seek_pagecache_data(inode,
815 *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
816 if (ret < 0)
817 return ret;
818
819 *hole_end = ret;
820 return 0;
821}
822
823#endif /* NO_BCACHEFS_FS */