Linux Audio

Check our new training course

Loading...
v6.8
  1// SPDX-License-Identifier: GPL-2.0
  2#ifndef NO_BCACHEFS_FS
  3
  4#include "bcachefs.h"
  5#include "btree_iter.h"
  6#include "extents.h"
  7#include "fs-io.h"
  8#include "fs-io-pagecache.h"
  9#include "subvolume.h"
 10
 11#include <linux/pagevec.h>
 12#include <linux/writeback.h>
 13
 14int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
 15				     loff_t start, u64 end,
 16				     fgf_t fgp_flags, gfp_t gfp,
 17				     folios *fs)
 18{
 19	struct folio *f;
 20	u64 pos = start;
 21	int ret = 0;
 22
 23	while (pos < end) {
 24		if ((u64) pos >= (u64) start + (1ULL << 20))
 25			fgp_flags &= ~FGP_CREAT;
 26
 27		ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
 28		if (ret)
 29			break;
 30
 31		f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
 32		if (IS_ERR_OR_NULL(f))
 33			break;
 34
 35		BUG_ON(fs->nr && folio_pos(f) != pos);
 36
 37		pos = folio_end_pos(f);
 38		darray_push(fs, f);
 39	}
 40
 41	if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
 42		ret = -ENOMEM;
 43
 44	return fs->nr ? 0 : ret;
 45}
 46
 47/* pagecache_block must be held */
 48int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
 49					    loff_t start, loff_t end)
 50{
 51	int ret;
 52
 53	/*
 54	 * XXX: the way this is currently implemented, we can spin if a process
 55	 * is continually redirtying a specific page
 56	 */
 57	do {
 58		if (!mapping->nrpages)
 59			return 0;
 60
 61		ret = filemap_write_and_wait_range(mapping, start, end);
 62		if (ret)
 63			break;
 64
 65		if (!mapping->nrpages)
 66			return 0;
 67
 68		ret = invalidate_inode_pages2_range(mapping,
 69				start >> PAGE_SHIFT,
 70				end >> PAGE_SHIFT);
 71	} while (ret == -EBUSY);
 72
 73	return ret;
 74}
 75
 76#if 0
 77/* Useful for debug tracing: */
 78static const char * const bch2_folio_sector_states[] = {
 79#define x(n)	#n,
 80	BCH_FOLIO_SECTOR_STATE()
 81#undef x
 82	NULL
 83};
 84#endif
 85
 86static inline enum bch_folio_sector_state
 87folio_sector_dirty(enum bch_folio_sector_state state)
 88{
 89	switch (state) {
 90	case SECTOR_unallocated:
 91		return SECTOR_dirty;
 92	case SECTOR_reserved:
 93		return SECTOR_dirty_reserved;
 94	default:
 95		return state;
 96	}
 97}
 98
 99static inline enum bch_folio_sector_state
100folio_sector_undirty(enum bch_folio_sector_state state)
101{
102	switch (state) {
103	case SECTOR_dirty:
104		return SECTOR_unallocated;
105	case SECTOR_dirty_reserved:
106		return SECTOR_reserved;
107	default:
108		return state;
109	}
110}
111
112static inline enum bch_folio_sector_state
113folio_sector_reserve(enum bch_folio_sector_state state)
114{
115	switch (state) {
116	case SECTOR_unallocated:
117		return SECTOR_reserved;
118	case SECTOR_dirty:
119		return SECTOR_dirty_reserved;
120	default:
121		return state;
122	}
123}
124
125/* for newly allocated folios: */
126struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
127{
128	struct bch_folio *s;
129
130	s = kzalloc(sizeof(*s) +
131		    sizeof(struct bch_folio_sector) *
132		    folio_sectors(folio), gfp);
133	if (!s)
134		return NULL;
135
136	spin_lock_init(&s->lock);
137	folio_attach_private(folio, s);
138	return s;
139}
140
141struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
142{
143	return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
144}
145
146static unsigned bkey_to_sector_state(struct bkey_s_c k)
147{
148	if (bkey_extent_is_reservation(k))
149		return SECTOR_reserved;
150	if (bkey_extent_is_allocation(k.k))
151		return SECTOR_allocated;
152	return SECTOR_unallocated;
153}
154
155static void __bch2_folio_set(struct folio *folio,
156			     unsigned pg_offset, unsigned pg_len,
157			     unsigned nr_ptrs, unsigned state)
158{
159	struct bch_folio *s = bch2_folio(folio);
160	unsigned i, sectors = folio_sectors(folio);
161
162	BUG_ON(pg_offset >= sectors);
163	BUG_ON(pg_offset + pg_len > sectors);
164
165	spin_lock(&s->lock);
166
167	for (i = pg_offset; i < pg_offset + pg_len; i++) {
168		s->s[i].nr_replicas	= nr_ptrs;
169		bch2_folio_sector_set(folio, s, i, state);
170	}
171
172	if (i == sectors)
173		s->uptodate = true;
174
175	spin_unlock(&s->lock);
176}
177
178/*
179 * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
180 * extents btree:
181 */
182int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
183		   struct folio **fs, unsigned nr_folios)
184{
185	struct btree_trans *trans;
186	struct btree_iter iter;
187	struct bkey_s_c k;
188	struct bch_folio *s;
189	u64 offset = folio_sector(fs[0]);
190	unsigned folio_idx;
191	u32 snapshot;
192	bool need_set = false;
193	int ret;
194
195	for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
196		s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
197		if (!s)
198			return -ENOMEM;
199
200		need_set |= !s->uptodate;
201	}
202
203	if (!need_set)
204		return 0;
205
206	folio_idx = 0;
207	trans = bch2_trans_get(c);
208retry:
209	bch2_trans_begin(trans);
210
211	ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
212	if (ret)
213		goto err;
214
215	for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
216			   SPOS(inum.inum, offset, snapshot),
217			   BTREE_ITER_SLOTS, k, ret) {
218		unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
219		unsigned state = bkey_to_sector_state(k);
220
221		while (folio_idx < nr_folios) {
222			struct folio *folio = fs[folio_idx];
223			u64 folio_start	= folio_sector(folio);
224			u64 folio_end	= folio_end_sector(folio);
225			unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
226				folio_start;
227			unsigned folio_len = min(k.k->p.offset, folio_end) -
228				folio_offset - folio_start;
229
230			BUG_ON(k.k->p.offset < folio_start);
231			BUG_ON(bkey_start_offset(k.k) > folio_end);
232
233			if (!bch2_folio(folio)->uptodate)
234				__bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
236			if (k.k->p.offset < folio_end)
237				break;
238			folio_idx++;
239		}
240
241		if (folio_idx == nr_folios)
242			break;
243	}
244
245	offset = iter.pos.offset;
246	bch2_trans_iter_exit(trans, &iter);
247err:
248	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
249		goto retry;
250	bch2_trans_put(trans);
251
252	return ret;
253}
254
255void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
256{
257	struct bvec_iter iter;
258	struct folio_vec fv;
259	unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
260		? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
261	unsigned state = bkey_to_sector_state(k);
262
263	bio_for_each_folio(fv, bio, iter)
264		__bch2_folio_set(fv.fv_folio,
265				 fv.fv_offset >> 9,
266				 fv.fv_len >> 9,
267				 nr_ptrs, state);
268}
269
270void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
271				     u64 start, u64 end)
272{
273	pgoff_t index = start >> PAGE_SECTORS_SHIFT;
274	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
275	struct folio_batch fbatch;
276	unsigned i, j;
277
278	if (end <= start)
279		return;
280
281	folio_batch_init(&fbatch);
282
283	while (filemap_get_folios(inode->v.i_mapping,
284				  &index, end_index, &fbatch)) {
285		for (i = 0; i < folio_batch_count(&fbatch); i++) {
286			struct folio *folio = fbatch.folios[i];
287			u64 folio_start = folio_sector(folio);
288			u64 folio_end = folio_end_sector(folio);
289			unsigned folio_offset = max(start, folio_start) - folio_start;
290			unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
291			struct bch_folio *s;
292
293			BUG_ON(end <= folio_start);
294
295			folio_lock(folio);
296			s = bch2_folio(folio);
297
298			if (s) {
299				spin_lock(&s->lock);
300				for (j = folio_offset; j < folio_offset + folio_len; j++)
301					s->s[j].nr_replicas = 0;
302				spin_unlock(&s->lock);
303			}
304
305			folio_unlock(folio);
306		}
307		folio_batch_release(&fbatch);
308		cond_resched();
309	}
310}
311
312int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
313				 u64 *start, u64 end,
314				 bool nonblocking)
315{
316	struct bch_fs *c = inode->v.i_sb->s_fs_info;
317	pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
318	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
319	struct folio_batch fbatch;
320	s64 i_sectors_delta = 0;
321	int ret = 0;
322
323	if (end <= *start)
324		return 0;
325
326	folio_batch_init(&fbatch);
327
328	while (filemap_get_folios(inode->v.i_mapping,
329				  &index, end_index, &fbatch)) {
330		for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
331			struct folio *folio = fbatch.folios[i];
332
333			if (!nonblocking)
334				folio_lock(folio);
335			else if (!folio_trylock(folio)) {
336				folio_batch_release(&fbatch);
337				ret = -EAGAIN;
338				break;
339			}
340
341			u64 folio_start = folio_sector(folio);
342			u64 folio_end = folio_end_sector(folio);
343
344			BUG_ON(end <= folio_start);
345
346			*start = min(end, folio_end);
347
348			struct bch_folio *s = bch2_folio(folio);
349			if (s) {
350				unsigned folio_offset = max(*start, folio_start) - folio_start;
351				unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
352
353				spin_lock(&s->lock);
354				for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
355					i_sectors_delta -= s->s[j].state == SECTOR_dirty;
356					bch2_folio_sector_set(folio, s, j,
357						folio_sector_reserve(s->s[j].state));
358				}
359				spin_unlock(&s->lock);
360			}
361
362			folio_unlock(folio);
363		}
364		folio_batch_release(&fbatch);
365		cond_resched();
366	}
367
368	bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
369	return ret;
370}
371
372static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
373					  unsigned nr_replicas)
374{
375	return max(0, (int) nr_replicas -
376		   s->nr_replicas -
377		   s->replicas_reserved);
378}
379
380int bch2_get_folio_disk_reservation(struct bch_fs *c,
381				struct bch_inode_info *inode,
382				struct folio *folio, bool check_enospc)
383{
384	struct bch_folio *s = bch2_folio_create(folio, 0);
385	unsigned nr_replicas = inode_nr_replicas(c, inode);
386	struct disk_reservation disk_res = { 0 };
387	unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
388	int ret;
389
390	if (!s)
391		return -ENOMEM;
392
393	for (i = 0; i < sectors; i++)
394		disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
395
396	if (!disk_res_sectors)
397		return 0;
398
399	ret = bch2_disk_reservation_get(c, &disk_res,
400					disk_res_sectors, 1,
401					!check_enospc
402					? BCH_DISK_RESERVATION_NOFAIL
403					: 0);
404	if (unlikely(ret))
405		return ret;
406
407	for (i = 0; i < sectors; i++)
408		s->s[i].replicas_reserved +=
409			sectors_to_reserve(&s->s[i], nr_replicas);
410
411	return 0;
412}
413
414void bch2_folio_reservation_put(struct bch_fs *c,
415			struct bch_inode_info *inode,
416			struct bch2_folio_reservation *res)
417{
418	bch2_disk_reservation_put(c, &res->disk);
419	bch2_quota_reservation_put(c, inode, &res->quota);
420}
421
422int bch2_folio_reservation_get(struct bch_fs *c,
423			struct bch_inode_info *inode,
424			struct folio *folio,
425			struct bch2_folio_reservation *res,
426			unsigned offset, unsigned len)
 
427{
428	struct bch_folio *s = bch2_folio_create(folio, 0);
429	unsigned i, disk_sectors = 0, quota_sectors = 0;
 
 
430	int ret;
431
432	if (!s)
433		return -ENOMEM;
434
435	BUG_ON(!s->uptodate);
436
437	for (i = round_down(offset, block_bytes(c)) >> 9;
438	     i < round_up(offset + len, block_bytes(c)) >> 9;
439	     i++) {
440		disk_sectors += sectors_to_reserve(&s->s[i],
441						res->disk.nr_replicas);
442		quota_sectors += s->s[i].state == SECTOR_unallocated;
443	}
444
445	if (disk_sectors) {
446		ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
 
447		if (unlikely(ret))
448			return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449	}
450
451	if (quota_sectors) {
452		ret = bch2_quota_reservation_add(c, inode, &res->quota,
453						 quota_sectors, true);
454		if (unlikely(ret)) {
455			struct disk_reservation tmp = {
456				.sectors = disk_sectors
457			};
458
459			bch2_disk_reservation_put(c, &tmp);
460			res->disk.sectors -= disk_sectors;
461			return ret;
462		}
463	}
464
465	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466}
467
468static void bch2_clear_folio_bits(struct folio *folio)
469{
470	struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
471	struct bch_fs *c = inode->v.i_sb->s_fs_info;
472	struct bch_folio *s = bch2_folio(folio);
473	struct disk_reservation disk_res = { 0 };
474	int i, sectors = folio_sectors(folio), dirty_sectors = 0;
475
476	if (!s)
477		return;
478
479	EBUG_ON(!folio_test_locked(folio));
480	EBUG_ON(folio_test_writeback(folio));
481
482	for (i = 0; i < sectors; i++) {
483		disk_res.sectors += s->s[i].replicas_reserved;
484		s->s[i].replicas_reserved = 0;
485
486		dirty_sectors -= s->s[i].state == SECTOR_dirty;
487		bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
488	}
489
490	bch2_disk_reservation_put(c, &disk_res);
491
492	bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
493
494	bch2_folio_release(folio);
495}
496
497void bch2_set_folio_dirty(struct bch_fs *c,
498			  struct bch_inode_info *inode,
499			  struct folio *folio,
500			  struct bch2_folio_reservation *res,
501			  unsigned offset, unsigned len)
502{
503	struct bch_folio *s = bch2_folio(folio);
504	unsigned i, dirty_sectors = 0;
505
506	WARN_ON((u64) folio_pos(folio) + offset + len >
507		round_up((u64) i_size_read(&inode->v), block_bytes(c)));
508
509	BUG_ON(!s->uptodate);
510
511	spin_lock(&s->lock);
512
513	for (i = round_down(offset, block_bytes(c)) >> 9;
514	     i < round_up(offset + len, block_bytes(c)) >> 9;
515	     i++) {
516		unsigned sectors = sectors_to_reserve(&s->s[i],
517						res->disk.nr_replicas);
518
519		/*
520		 * This can happen if we race with the error path in
521		 * bch2_writepage_io_done():
522		 */
523		sectors = min_t(unsigned, sectors, res->disk.sectors);
524
525		s->s[i].replicas_reserved += sectors;
526		res->disk.sectors -= sectors;
527
528		dirty_sectors += s->s[i].state == SECTOR_unallocated;
529
530		bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
531	}
532
533	spin_unlock(&s->lock);
534
535	bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
536
537	if (!folio_test_dirty(folio))
538		filemap_dirty_folio(inode->v.i_mapping, folio);
539}
540
541vm_fault_t bch2_page_fault(struct vm_fault *vmf)
542{
543	struct file *file = vmf->vma->vm_file;
544	struct address_space *mapping = file->f_mapping;
545	struct address_space *fdm = faults_disabled_mapping();
546	struct bch_inode_info *inode = file_bch_inode(file);
547	vm_fault_t ret;
548
549	if (fdm == mapping)
550		return VM_FAULT_SIGBUS;
551
552	/* Lock ordering: */
553	if (fdm > mapping) {
554		struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
555
556		if (bch2_pagecache_add_tryget(inode))
557			goto got_lock;
558
559		bch2_pagecache_block_put(fdm_host);
560
561		bch2_pagecache_add_get(inode);
562		bch2_pagecache_add_put(inode);
563
564		bch2_pagecache_block_get(fdm_host);
565
566		/* Signal that lock has been dropped: */
567		set_fdm_dropped_locks();
568		return VM_FAULT_SIGBUS;
569	}
570
571	bch2_pagecache_add_get(inode);
572got_lock:
573	ret = filemap_fault(vmf);
574	bch2_pagecache_add_put(inode);
575
576	return ret;
577}
578
579vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
580{
581	struct folio *folio = page_folio(vmf->page);
582	struct file *file = vmf->vma->vm_file;
583	struct bch_inode_info *inode = file_bch_inode(file);
584	struct address_space *mapping = file->f_mapping;
585	struct bch_fs *c = inode->v.i_sb->s_fs_info;
586	struct bch2_folio_reservation res;
587	unsigned len;
588	loff_t isize;
589	vm_fault_t ret;
590
591	bch2_folio_reservation_init(c, inode, &res);
592
593	sb_start_pagefault(inode->v.i_sb);
594	file_update_time(file);
595
596	/*
597	 * Not strictly necessary, but helps avoid dio writes livelocking in
598	 * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
599	 * a bch2_write_invalidate_inode_pages_range() that works without dropping
600	 * page lock before invalidating page
601	 */
602	bch2_pagecache_add_get(inode);
603
604	folio_lock(folio);
605	isize = i_size_read(&inode->v);
606
607	if (folio->mapping != mapping || folio_pos(folio) >= isize) {
608		folio_unlock(folio);
609		ret = VM_FAULT_NOPAGE;
610		goto out;
611	}
612
613	len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
614
615	if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
616	    bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
617		folio_unlock(folio);
618		ret = VM_FAULT_SIGBUS;
619		goto out;
620	}
621
622	bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
623	bch2_folio_reservation_put(c, inode, &res);
624
625	folio_wait_stable(folio);
626	ret = VM_FAULT_LOCKED;
627out:
628	bch2_pagecache_add_put(inode);
629	sb_end_pagefault(inode->v.i_sb);
630
631	return ret;
632}
633
634void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
635{
636	if (offset || length < folio_size(folio))
637		return;
638
639	bch2_clear_folio_bits(folio);
640}
641
642bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
643{
644	if (folio_test_dirty(folio) || folio_test_writeback(folio))
645		return false;
646
647	bch2_clear_folio_bits(folio);
648	return true;
649}
650
651/* fseek: */
652
653static int folio_data_offset(struct folio *folio, loff_t pos,
654			     unsigned min_replicas)
655{
656	struct bch_folio *s = bch2_folio(folio);
657	unsigned i, sectors = folio_sectors(folio);
658
659	if (s)
660		for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
661			if (s->s[i].state >= SECTOR_dirty &&
662			    s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
663				return i << SECTOR_SHIFT;
664
665	return -1;
666}
667
668loff_t bch2_seek_pagecache_data(struct inode *vinode,
669				loff_t start_offset,
670				loff_t end_offset,
671				unsigned min_replicas,
672				bool nonblock)
673{
674	struct folio_batch fbatch;
675	pgoff_t start_index	= start_offset >> PAGE_SHIFT;
676	pgoff_t end_index	= end_offset >> PAGE_SHIFT;
677	pgoff_t index		= start_index;
678	unsigned i;
679	loff_t ret;
680	int offset;
681
682	folio_batch_init(&fbatch);
683
684	while (filemap_get_folios(vinode->i_mapping,
685				  &index, end_index, &fbatch)) {
686		for (i = 0; i < folio_batch_count(&fbatch); i++) {
687			struct folio *folio = fbatch.folios[i];
688
689			if (!nonblock) {
690				folio_lock(folio);
691			} else if (!folio_trylock(folio)) {
692				folio_batch_release(&fbatch);
693				return -EAGAIN;
694			}
695
696			offset = folio_data_offset(folio,
697					max(folio_pos(folio), start_offset),
698					min_replicas);
699			if (offset >= 0) {
700				ret = clamp(folio_pos(folio) + offset,
701					    start_offset, end_offset);
702				folio_unlock(folio);
703				folio_batch_release(&fbatch);
704				return ret;
705			}
706			folio_unlock(folio);
707		}
708		folio_batch_release(&fbatch);
709		cond_resched();
710	}
711
712	return end_offset;
713}
714
715/*
716 * Search for a hole in a folio.
717 *
718 * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
719 * code to indicate a pagecache hole exists at the returned offset. Otherwise
720 * return 0 if the folio is filled with data, or an error code. This function
721 * can return -EAGAIN if nonblock is specified.
722 */
723static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
724			      unsigned min_replicas, bool nonblock)
725{
726	struct folio *folio;
727	struct bch_folio *s;
728	unsigned i, sectors;
729	int ret = -ENOENT;
730
731	folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
732				    FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
733	if (IS_ERR(folio))
734		return PTR_ERR(folio);
735
736	s = bch2_folio(folio);
737	if (!s)
738		goto unlock;
739
740	sectors = folio_sectors(folio);
741	for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
742		if (s->s[i].state < SECTOR_dirty ||
743		    s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
744			*offset = max(*offset,
745				      folio_pos(folio) + (i << SECTOR_SHIFT));
746			goto unlock;
747		}
748
749	*offset = folio_end_pos(folio);
750	ret = 0;
751unlock:
752	folio_unlock(folio);
753	folio_put(folio);
754	return ret;
755}
756
757loff_t bch2_seek_pagecache_hole(struct inode *vinode,
758				loff_t start_offset,
759				loff_t end_offset,
760				unsigned min_replicas,
761				bool nonblock)
762{
763	struct address_space *mapping = vinode->i_mapping;
764	loff_t offset = start_offset;
765	loff_t ret = 0;
766
767	while (!ret && offset < end_offset)
768		ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
769
770	if (ret && ret != -ENOENT)
771		return ret;
772	return min(offset, end_offset);
773}
774
775int bch2_clamp_data_hole(struct inode *inode,
776			 u64 *hole_start,
777			 u64 *hole_end,
778			 unsigned min_replicas,
779			 bool nonblock)
780{
781	loff_t ret;
782
783	ret = bch2_seek_pagecache_hole(inode,
784		*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
785	if (ret < 0)
786		return ret;
787
788	*hole_start = ret;
789
790	if (*hole_start == *hole_end)
791		return 0;
792
793	ret = bch2_seek_pagecache_data(inode,
794		*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
795	if (ret < 0)
796		return ret;
797
798	*hole_end = ret;
799	return 0;
800}
801
802#endif /* NO_BCACHEFS_FS */
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0
  2#ifndef NO_BCACHEFS_FS
  3
  4#include "bcachefs.h"
  5#include "btree_iter.h"
  6#include "extents.h"
  7#include "fs-io.h"
  8#include "fs-io-pagecache.h"
  9#include "subvolume.h"
 10
 11#include <linux/pagevec.h>
 12#include <linux/writeback.h>
 13
 14int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
 15				     loff_t start, u64 end,
 16				     fgf_t fgp_flags, gfp_t gfp,
 17				     folios *fs)
 18{
 19	struct folio *f;
 20	u64 pos = start;
 21	int ret = 0;
 22
 23	while (pos < end) {
 24		if ((u64) pos >= (u64) start + (1ULL << 20))
 25			fgp_flags &= ~FGP_CREAT;
 26
 27		ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
 28		if (ret)
 29			break;
 30
 31		f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
 32		if (IS_ERR_OR_NULL(f))
 33			break;
 34
 35		BUG_ON(fs->nr && folio_pos(f) != pos);
 36
 37		pos = folio_end_pos(f);
 38		darray_push(fs, f);
 39	}
 40
 41	if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
 42		ret = -ENOMEM;
 43
 44	return fs->nr ? 0 : ret;
 45}
 46
 47/* pagecache_block must be held */
 48int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
 49					    loff_t start, loff_t end)
 50{
 51	int ret;
 52
 53	/*
 54	 * XXX: the way this is currently implemented, we can spin if a process
 55	 * is continually redirtying a specific page
 56	 */
 57	do {
 58		if (!mapping->nrpages)
 59			return 0;
 60
 61		ret = filemap_write_and_wait_range(mapping, start, end);
 62		if (ret)
 63			break;
 64
 65		if (!mapping->nrpages)
 66			return 0;
 67
 68		ret = invalidate_inode_pages2_range(mapping,
 69				start >> PAGE_SHIFT,
 70				end >> PAGE_SHIFT);
 71	} while (ret == -EBUSY);
 72
 73	return ret;
 74}
 75
 76#if 0
 77/* Useful for debug tracing: */
 78static const char * const bch2_folio_sector_states[] = {
 79#define x(n)	#n,
 80	BCH_FOLIO_SECTOR_STATE()
 81#undef x
 82	NULL
 83};
 84#endif
 85
 86static inline enum bch_folio_sector_state
 87folio_sector_dirty(enum bch_folio_sector_state state)
 88{
 89	switch (state) {
 90	case SECTOR_unallocated:
 91		return SECTOR_dirty;
 92	case SECTOR_reserved:
 93		return SECTOR_dirty_reserved;
 94	default:
 95		return state;
 96	}
 97}
 98
 99static inline enum bch_folio_sector_state
100folio_sector_undirty(enum bch_folio_sector_state state)
101{
102	switch (state) {
103	case SECTOR_dirty:
104		return SECTOR_unallocated;
105	case SECTOR_dirty_reserved:
106		return SECTOR_reserved;
107	default:
108		return state;
109	}
110}
111
112static inline enum bch_folio_sector_state
113folio_sector_reserve(enum bch_folio_sector_state state)
114{
115	switch (state) {
116	case SECTOR_unallocated:
117		return SECTOR_reserved;
118	case SECTOR_dirty:
119		return SECTOR_dirty_reserved;
120	default:
121		return state;
122	}
123}
124
125/* for newly allocated folios: */
126struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
127{
128	struct bch_folio *s;
129
130	s = kzalloc(sizeof(*s) +
131		    sizeof(struct bch_folio_sector) *
132		    folio_sectors(folio), gfp);
133	if (!s)
134		return NULL;
135
136	spin_lock_init(&s->lock);
137	folio_attach_private(folio, s);
138	return s;
139}
140
141struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
142{
143	return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
144}
145
146static unsigned bkey_to_sector_state(struct bkey_s_c k)
147{
148	if (bkey_extent_is_reservation(k))
149		return SECTOR_reserved;
150	if (bkey_extent_is_allocation(k.k))
151		return SECTOR_allocated;
152	return SECTOR_unallocated;
153}
154
155static void __bch2_folio_set(struct folio *folio,
156			     unsigned pg_offset, unsigned pg_len,
157			     unsigned nr_ptrs, unsigned state)
158{
159	struct bch_folio *s = bch2_folio(folio);
160	unsigned i, sectors = folio_sectors(folio);
161
162	BUG_ON(pg_offset >= sectors);
163	BUG_ON(pg_offset + pg_len > sectors);
164
165	spin_lock(&s->lock);
166
167	for (i = pg_offset; i < pg_offset + pg_len; i++) {
168		s->s[i].nr_replicas	= nr_ptrs;
169		bch2_folio_sector_set(folio, s, i, state);
170	}
171
172	if (i == sectors)
173		s->uptodate = true;
174
175	spin_unlock(&s->lock);
176}
177
178/*
179 * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
180 * extents btree:
181 */
182int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
183		   struct folio **fs, unsigned nr_folios)
184{
 
 
 
 
185	u64 offset = folio_sector(fs[0]);
 
 
186	bool need_set = false;
 
187
188	for (unsigned folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
189		struct bch_folio *s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
190		if (!s)
191			return -ENOMEM;
192
193		need_set |= !s->uptodate;
194	}
195
196	if (!need_set)
197		return 0;
198
199	unsigned folio_idx = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
201	return bch2_trans_run(c,
202		for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents,
203				   POS(inum.inum, offset),
204				   POS(inum.inum, U64_MAX),
205				   inum.subvol, BTREE_ITER_slots, k, ({
206			unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
207			unsigned state = bkey_to_sector_state(k);
208
209			while (folio_idx < nr_folios) {
210				struct folio *folio = fs[folio_idx];
211				u64 folio_start	= folio_sector(folio);
212				u64 folio_end	= folio_end_sector(folio);
213				unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
214					folio_start;
215				unsigned folio_len = min(k.k->p.offset, folio_end) -
216					folio_offset - folio_start;
217
218				BUG_ON(k.k->p.offset < folio_start);
219				BUG_ON(bkey_start_offset(k.k) > folio_end);
220
221				if (!bch2_folio(folio)->uptodate)
222					__bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
223
224				if (k.k->p.offset < folio_end)
225					break;
226				folio_idx++;
227			}
228
229			if (folio_idx == nr_folios)
230				break;
231			0;
232		})));
 
 
 
 
 
 
 
 
 
 
 
 
 
233}
234
235void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
236{
237	struct bvec_iter iter;
238	struct folio_vec fv;
239	unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
240		? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
241	unsigned state = bkey_to_sector_state(k);
242
243	bio_for_each_folio(fv, bio, iter)
244		__bch2_folio_set(fv.fv_folio,
245				 fv.fv_offset >> 9,
246				 fv.fv_len >> 9,
247				 nr_ptrs, state);
248}
249
250void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
251				     u64 start, u64 end)
252{
253	pgoff_t index = start >> PAGE_SECTORS_SHIFT;
254	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
255	struct folio_batch fbatch;
256	unsigned i, j;
257
258	if (end <= start)
259		return;
260
261	folio_batch_init(&fbatch);
262
263	while (filemap_get_folios(inode->v.i_mapping,
264				  &index, end_index, &fbatch)) {
265		for (i = 0; i < folio_batch_count(&fbatch); i++) {
266			struct folio *folio = fbatch.folios[i];
267			u64 folio_start = folio_sector(folio);
268			u64 folio_end = folio_end_sector(folio);
269			unsigned folio_offset = max(start, folio_start) - folio_start;
270			unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
271			struct bch_folio *s;
272
273			BUG_ON(end <= folio_start);
274
275			folio_lock(folio);
276			s = bch2_folio(folio);
277
278			if (s) {
279				spin_lock(&s->lock);
280				for (j = folio_offset; j < folio_offset + folio_len; j++)
281					s->s[j].nr_replicas = 0;
282				spin_unlock(&s->lock);
283			}
284
285			folio_unlock(folio);
286		}
287		folio_batch_release(&fbatch);
288		cond_resched();
289	}
290}
291
292int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
293				 u64 *start, u64 end,
294				 bool nonblocking)
295{
296	struct bch_fs *c = inode->v.i_sb->s_fs_info;
297	pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
298	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
299	struct folio_batch fbatch;
300	s64 i_sectors_delta = 0;
301	int ret = 0;
302
303	if (end <= *start)
304		return 0;
305
306	folio_batch_init(&fbatch);
307
308	while (filemap_get_folios(inode->v.i_mapping,
309				  &index, end_index, &fbatch)) {
310		for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
311			struct folio *folio = fbatch.folios[i];
312
313			if (!nonblocking)
314				folio_lock(folio);
315			else if (!folio_trylock(folio)) {
316				folio_batch_release(&fbatch);
317				ret = -EAGAIN;
318				break;
319			}
320
321			u64 folio_start = folio_sector(folio);
322			u64 folio_end = folio_end_sector(folio);
323
324			BUG_ON(end <= folio_start);
325
326			*start = min(end, folio_end);
327
328			struct bch_folio *s = bch2_folio(folio);
329			if (s) {
330				unsigned folio_offset = max(*start, folio_start) - folio_start;
331				unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
332
333				spin_lock(&s->lock);
334				for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
335					i_sectors_delta -= s->s[j].state == SECTOR_dirty;
336					bch2_folio_sector_set(folio, s, j,
337						folio_sector_reserve(s->s[j].state));
338				}
339				spin_unlock(&s->lock);
340			}
341
342			folio_unlock(folio);
343		}
344		folio_batch_release(&fbatch);
345		cond_resched();
346	}
347
348	bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
349	return ret;
350}
351
352static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
353					  unsigned nr_replicas)
354{
355	return max(0, (int) nr_replicas -
356		   s->nr_replicas -
357		   s->replicas_reserved);
358}
359
360int bch2_get_folio_disk_reservation(struct bch_fs *c,
361				struct bch_inode_info *inode,
362				struct folio *folio, bool check_enospc)
363{
364	struct bch_folio *s = bch2_folio_create(folio, 0);
365	unsigned nr_replicas = inode_nr_replicas(c, inode);
366	struct disk_reservation disk_res = { 0 };
367	unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
368	int ret;
369
370	if (!s)
371		return -ENOMEM;
372
373	for (i = 0; i < sectors; i++)
374		disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
375
376	if (!disk_res_sectors)
377		return 0;
378
379	ret = bch2_disk_reservation_get(c, &disk_res,
380					disk_res_sectors, 1,
381					!check_enospc
382					? BCH_DISK_RESERVATION_NOFAIL
383					: 0);
384	if (unlikely(ret))
385		return ret;
386
387	for (i = 0; i < sectors; i++)
388		s->s[i].replicas_reserved +=
389			sectors_to_reserve(&s->s[i], nr_replicas);
390
391	return 0;
392}
393
394void bch2_folio_reservation_put(struct bch_fs *c,
395			struct bch_inode_info *inode,
396			struct bch2_folio_reservation *res)
397{
398	bch2_disk_reservation_put(c, &res->disk);
399	bch2_quota_reservation_put(c, inode, &res->quota);
400}
401
402static int __bch2_folio_reservation_get(struct bch_fs *c,
403			struct bch_inode_info *inode,
404			struct folio *folio,
405			struct bch2_folio_reservation *res,
406			size_t offset, size_t len,
407			bool partial)
408{
409	struct bch_folio *s = bch2_folio_create(folio, 0);
410	unsigned i, disk_sectors = 0, quota_sectors = 0;
411	struct disk_reservation disk_res = {};
412	size_t reserved = len;
413	int ret;
414
415	if (!s)
416		return -ENOMEM;
417
418	BUG_ON(!s->uptodate);
419
420	for (i = round_down(offset, block_bytes(c)) >> 9;
421	     i < round_up(offset + len, block_bytes(c)) >> 9;
422	     i++) {
423		disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
 
424		quota_sectors += s->s[i].state == SECTOR_unallocated;
425	}
426
427	if (disk_sectors) {
428		ret = bch2_disk_reservation_add(c, &disk_res, disk_sectors,
429				partial ? BCH_DISK_RESERVATION_PARTIAL : 0);
430		if (unlikely(ret))
431			return ret;
432
433		if (unlikely(disk_res.sectors != disk_sectors)) {
434			disk_sectors = quota_sectors = 0;
435
436			for (i = round_down(offset, block_bytes(c)) >> 9;
437			     i < round_up(offset + len, block_bytes(c)) >> 9;
438			     i++) {
439				disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
440				if (disk_sectors > disk_res.sectors) {
441					/*
442					 * Make sure to get a reservation that's
443					 * aligned to the filesystem blocksize:
444					 */
445					unsigned reserved_offset = round_down(i << 9, block_bytes(c));
446					reserved = clamp(reserved_offset, offset, offset + len) - offset;
447
448					if (!reserved) {
449						bch2_disk_reservation_put(c, &disk_res);
450						return -BCH_ERR_ENOSPC_disk_reservation;
451					}
452					break;
453				}
454				quota_sectors += s->s[i].state == SECTOR_unallocated;
455			}
456		}
457	}
458
459	if (quota_sectors) {
460		ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
 
461		if (unlikely(ret)) {
462			bch2_disk_reservation_put(c, &disk_res);
 
 
 
 
 
463			return ret;
464		}
465	}
466
467	res->disk.sectors += disk_res.sectors;
468	return partial ? reserved : 0;
469}
470
471int bch2_folio_reservation_get(struct bch_fs *c,
472			struct bch_inode_info *inode,
473			struct folio *folio,
474			struct bch2_folio_reservation *res,
475			size_t offset, size_t len)
476{
477	return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
478}
479
480ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
481			struct bch_inode_info *inode,
482			struct folio *folio,
483			struct bch2_folio_reservation *res,
484			size_t offset, size_t len)
485{
486	return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, true);
487}
488
489static void bch2_clear_folio_bits(struct folio *folio)
490{
491	struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
492	struct bch_fs *c = inode->v.i_sb->s_fs_info;
493	struct bch_folio *s = bch2_folio(folio);
494	struct disk_reservation disk_res = { 0 };
495	int i, sectors = folio_sectors(folio), dirty_sectors = 0;
496
497	if (!s)
498		return;
499
500	EBUG_ON(!folio_test_locked(folio));
501	EBUG_ON(folio_test_writeback(folio));
502
503	for (i = 0; i < sectors; i++) {
504		disk_res.sectors += s->s[i].replicas_reserved;
505		s->s[i].replicas_reserved = 0;
506
507		dirty_sectors -= s->s[i].state == SECTOR_dirty;
508		bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
509	}
510
511	bch2_disk_reservation_put(c, &disk_res);
512
513	bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
514
515	bch2_folio_release(folio);
516}
517
518void bch2_set_folio_dirty(struct bch_fs *c,
519			  struct bch_inode_info *inode,
520			  struct folio *folio,
521			  struct bch2_folio_reservation *res,
522			  unsigned offset, unsigned len)
523{
524	struct bch_folio *s = bch2_folio(folio);
525	unsigned i, dirty_sectors = 0;
526
527	WARN_ON((u64) folio_pos(folio) + offset + len >
528		round_up((u64) i_size_read(&inode->v), block_bytes(c)));
529
530	BUG_ON(!s->uptodate);
531
532	spin_lock(&s->lock);
533
534	for (i = round_down(offset, block_bytes(c)) >> 9;
535	     i < round_up(offset + len, block_bytes(c)) >> 9;
536	     i++) {
537		unsigned sectors = sectors_to_reserve(&s->s[i],
538						res->disk.nr_replicas);
539
540		/*
541		 * This can happen if we race with the error path in
542		 * bch2_writepage_io_done():
543		 */
544		sectors = min_t(unsigned, sectors, res->disk.sectors);
545
546		s->s[i].replicas_reserved += sectors;
547		res->disk.sectors -= sectors;
548
549		dirty_sectors += s->s[i].state == SECTOR_unallocated;
550
551		bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
552	}
553
554	spin_unlock(&s->lock);
555
556	bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
557
558	if (!folio_test_dirty(folio))
559		filemap_dirty_folio(inode->v.i_mapping, folio);
560}
561
562vm_fault_t bch2_page_fault(struct vm_fault *vmf)
563{
564	struct file *file = vmf->vma->vm_file;
565	struct address_space *mapping = file->f_mapping;
566	struct address_space *fdm = faults_disabled_mapping();
567	struct bch_inode_info *inode = file_bch_inode(file);
568	vm_fault_t ret;
569
570	if (fdm == mapping)
571		return VM_FAULT_SIGBUS;
572
573	/* Lock ordering: */
574	if (fdm > mapping) {
575		struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
576
577		if (bch2_pagecache_add_tryget(inode))
578			goto got_lock;
579
580		bch2_pagecache_block_put(fdm_host);
581
582		bch2_pagecache_add_get(inode);
583		bch2_pagecache_add_put(inode);
584
585		bch2_pagecache_block_get(fdm_host);
586
587		/* Signal that lock has been dropped: */
588		set_fdm_dropped_locks();
589		return VM_FAULT_SIGBUS;
590	}
591
592	bch2_pagecache_add_get(inode);
593got_lock:
594	ret = filemap_fault(vmf);
595	bch2_pagecache_add_put(inode);
596
597	return ret;
598}
599
600vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
601{
602	struct folio *folio = page_folio(vmf->page);
603	struct file *file = vmf->vma->vm_file;
604	struct bch_inode_info *inode = file_bch_inode(file);
605	struct address_space *mapping = file->f_mapping;
606	struct bch_fs *c = inode->v.i_sb->s_fs_info;
607	struct bch2_folio_reservation res;
608	unsigned len;
609	loff_t isize;
610	vm_fault_t ret;
611
612	bch2_folio_reservation_init(c, inode, &res);
613
614	sb_start_pagefault(inode->v.i_sb);
615	file_update_time(file);
616
617	/*
618	 * Not strictly necessary, but helps avoid dio writes livelocking in
619	 * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
620	 * a bch2_write_invalidate_inode_pages_range() that works without dropping
621	 * page lock before invalidating page
622	 */
623	bch2_pagecache_add_get(inode);
624
625	folio_lock(folio);
626	isize = i_size_read(&inode->v);
627
628	if (folio->mapping != mapping || folio_pos(folio) >= isize) {
629		folio_unlock(folio);
630		ret = VM_FAULT_NOPAGE;
631		goto out;
632	}
633
634	len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
635
636	if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
637	    bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
638		folio_unlock(folio);
639		ret = VM_FAULT_SIGBUS;
640		goto out;
641	}
642
643	bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
644	bch2_folio_reservation_put(c, inode, &res);
645
646	folio_wait_stable(folio);
647	ret = VM_FAULT_LOCKED;
648out:
649	bch2_pagecache_add_put(inode);
650	sb_end_pagefault(inode->v.i_sb);
651
652	return ret;
653}
654
655void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
656{
657	if (offset || length < folio_size(folio))
658		return;
659
660	bch2_clear_folio_bits(folio);
661}
662
663bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
664{
665	if (folio_test_dirty(folio) || folio_test_writeback(folio))
666		return false;
667
668	bch2_clear_folio_bits(folio);
669	return true;
670}
671
672/* fseek: */
673
674static int folio_data_offset(struct folio *folio, loff_t pos,
675			     unsigned min_replicas)
676{
677	struct bch_folio *s = bch2_folio(folio);
678	unsigned i, sectors = folio_sectors(folio);
679
680	if (s)
681		for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
682			if (s->s[i].state >= SECTOR_dirty &&
683			    s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
684				return i << SECTOR_SHIFT;
685
686	return -1;
687}
688
689loff_t bch2_seek_pagecache_data(struct inode *vinode,
690				loff_t start_offset,
691				loff_t end_offset,
692				unsigned min_replicas,
693				bool nonblock)
694{
695	struct folio_batch fbatch;
696	pgoff_t start_index	= start_offset >> PAGE_SHIFT;
697	pgoff_t end_index	= end_offset >> PAGE_SHIFT;
698	pgoff_t index		= start_index;
699	unsigned i;
700	loff_t ret;
701	int offset;
702
703	folio_batch_init(&fbatch);
704
705	while (filemap_get_folios(vinode->i_mapping,
706				  &index, end_index, &fbatch)) {
707		for (i = 0; i < folio_batch_count(&fbatch); i++) {
708			struct folio *folio = fbatch.folios[i];
709
710			if (!nonblock) {
711				folio_lock(folio);
712			} else if (!folio_trylock(folio)) {
713				folio_batch_release(&fbatch);
714				return -EAGAIN;
715			}
716
717			offset = folio_data_offset(folio,
718					max(folio_pos(folio), start_offset),
719					min_replicas);
720			if (offset >= 0) {
721				ret = clamp(folio_pos(folio) + offset,
722					    start_offset, end_offset);
723				folio_unlock(folio);
724				folio_batch_release(&fbatch);
725				return ret;
726			}
727			folio_unlock(folio);
728		}
729		folio_batch_release(&fbatch);
730		cond_resched();
731	}
732
733	return end_offset;
734}
735
736/*
737 * Search for a hole in a folio.
738 *
739 * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
740 * code to indicate a pagecache hole exists at the returned offset. Otherwise
741 * return 0 if the folio is filled with data, or an error code. This function
742 * can return -EAGAIN if nonblock is specified.
743 */
744static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
745			      unsigned min_replicas, bool nonblock)
746{
747	struct folio *folio;
748	struct bch_folio *s;
749	unsigned i, sectors;
750	int ret = -ENOENT;
751
752	folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
753				    FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
754	if (IS_ERR(folio))
755		return PTR_ERR(folio);
756
757	s = bch2_folio(folio);
758	if (!s)
759		goto unlock;
760
761	sectors = folio_sectors(folio);
762	for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
763		if (s->s[i].state < SECTOR_dirty ||
764		    s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
765			*offset = max(*offset,
766				      folio_pos(folio) + (i << SECTOR_SHIFT));
767			goto unlock;
768		}
769
770	*offset = folio_end_pos(folio);
771	ret = 0;
772unlock:
773	folio_unlock(folio);
774	folio_put(folio);
775	return ret;
776}
777
778loff_t bch2_seek_pagecache_hole(struct inode *vinode,
779				loff_t start_offset,
780				loff_t end_offset,
781				unsigned min_replicas,
782				bool nonblock)
783{
784	struct address_space *mapping = vinode->i_mapping;
785	loff_t offset = start_offset;
786	loff_t ret = 0;
787
788	while (!ret && offset < end_offset)
789		ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
790
791	if (ret && ret != -ENOENT)
792		return ret;
793	return min(offset, end_offset);
794}
795
796int bch2_clamp_data_hole(struct inode *inode,
797			 u64 *hole_start,
798			 u64 *hole_end,
799			 unsigned min_replicas,
800			 bool nonblock)
801{
802	loff_t ret;
803
804	ret = bch2_seek_pagecache_hole(inode,
805		*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
806	if (ret < 0)
807		return ret;
808
809	*hole_start = ret;
810
811	if (*hole_start == *hole_end)
812		return 0;
813
814	ret = bch2_seek_pagecache_data(inode,
815		*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
816	if (ret < 0)
817		return ret;
818
819	*hole_end = ret;
820	return 0;
821}
822
823#endif /* NO_BCACHEFS_FS */