read_collect.c - fs/netfs/read_collect.c - Linux source code v6.2

Note: File does not exist in v6.2.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/* Network filesystem read subrequest result collection, assessment and
  3 * retrying.
  4 *
  5 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
  6 * Written by David Howells (dhowells@redhat.com)
  7 */
  8
  9#include <linux/export.h>
 10#include <linux/fs.h>
 11#include <linux/mm.h>
 12#include <linux/pagemap.h>
 13#include <linux/slab.h>
 14#include <linux/task_io_accounting_ops.h>
 15#include "internal.h"
 16
 17/*
 18 * Clear the unread part of an I/O request.
 19 */
 20static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
 21{
 22	netfs_reset_iter(subreq);
 23	WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter));
 24	iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
 25	if (subreq->start + subreq->transferred >= subreq->rreq->i_size)
 26		__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
 27}
 28
 29/*
 30 * Flush, mark and unlock a folio that's now completely read.  If we want to
 31 * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
 32 * dirty and let writeback handle it.
 33 */
 34static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
 35				    struct netfs_io_request *rreq,
 36				    struct folio_queue *folioq,
 37				    int slot)
 38{
 39	struct netfs_folio *finfo;
 40	struct folio *folio = folioq_folio(folioq, slot);
 41
 42	flush_dcache_folio(folio);
 43	folio_mark_uptodate(folio);
 44
 45	if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
 46		finfo = netfs_folio_info(folio);
 47		if (finfo) {
 48			trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
 49			if (finfo->netfs_group)
 50				folio_change_private(folio, finfo->netfs_group);
 51			else
 52				folio_detach_private(folio);
 53			kfree(finfo);
 54		}
 55
 56		if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
 57			if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
 58				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
 59				folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
 60				folio_mark_dirty(folio);
 61			}
 62		} else {
 63			trace_netfs_folio(folio, netfs_folio_trace_read_done);
 64		}
 65
 66		folioq_clear(folioq, slot);
 67	} else {
 68		// TODO: Use of PG_private_2 is deprecated.
 69		if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
 70			netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot);
 71		else
 72			folioq_clear(folioq, slot);
 73	}
 74
 75	if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
 76		if (folio->index == rreq->no_unlock_folio &&
 77		    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
 78			_debug("no unlock");
 79		} else {
 80			trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
 81			folio_unlock(folio);
 82		}
 83	}
 84}
 85
 86/*
 87 * Unlock any folios that are now completely read.  Returns true if the
 88 * subrequest is removed from the list.
 89 */
 90static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq, bool was_async)
 91{
 92	struct netfs_io_subrequest *prev, *next;
 93	struct netfs_io_request *rreq = subreq->rreq;
 94	struct folio_queue *folioq = subreq->curr_folioq;
 95	size_t avail, prev_donated, next_donated, fsize, part, excess;
 96	loff_t fpos, start;
 97	loff_t fend;
 98	int slot = subreq->curr_folioq_slot;
 99
100	if (WARN(subreq->transferred > subreq->len,
101		 "Subreq overread: R%x[%x] %zu > %zu",
102		 rreq->debug_id, subreq->debug_index,
103		 subreq->transferred, subreq->len))
104		subreq->transferred = subreq->len;
105
106next_folio:
107	fsize = PAGE_SIZE << subreq->curr_folio_order;
108	fpos = round_down(subreq->start + subreq->consumed, fsize);
109	fend = fpos + fsize;
110
111	if (WARN_ON_ONCE(!folioq) ||
112	    WARN_ON_ONCE(!folioq_folio(folioq, slot)) ||
113	    WARN_ON_ONCE(folioq_folio(folioq, slot)->index != fpos / PAGE_SIZE)) {
114		pr_err("R=%08x[%x] s=%llx-%llx ctl=%zx/%zx/%zx sl=%u\n",
115		       rreq->debug_id, subreq->debug_index,
116		       subreq->start, subreq->start + subreq->transferred - 1,
117		       subreq->consumed, subreq->transferred, subreq->len,
118		       slot);
119		if (folioq) {
120			struct folio *folio = folioq_folio(folioq, slot);
121
122			pr_err("folioq: orders=%02x%02x%02x%02x\n",
123			       folioq->orders[0], folioq->orders[1],
124			       folioq->orders[2], folioq->orders[3]);
125			if (folio)
126				pr_err("folio: %llx-%llx ix=%llx o=%u qo=%u\n",
127				       fpos, fend - 1, folio_pos(folio), folio_order(folio),
128				       folioq_folio_order(folioq, slot));
129		}
130	}
131
132donation_changed:
133	/* Try to consume the current folio if we've hit or passed the end of
134	 * it.  There's a possibility that this subreq doesn't start at the
135	 * beginning of the folio, in which case we need to donate to/from the
136	 * preceding subreq.
137	 *
138	 * We also need to include any potential donation back from the
139	 * following subreq.
140	 */
141	prev_donated = READ_ONCE(subreq->prev_donated);
142	next_donated =  READ_ONCE(subreq->next_donated);
143	if (prev_donated || next_donated) {
144		spin_lock_bh(&rreq->lock);
145		prev_donated = subreq->prev_donated;
146		next_donated =  subreq->next_donated;
147		subreq->start -= prev_donated;
148		subreq->len += prev_donated;
149		subreq->transferred += prev_donated;
150		prev_donated = subreq->prev_donated = 0;
151		if (subreq->transferred == subreq->len) {
152			subreq->len += next_donated;
153			subreq->transferred += next_donated;
154			next_donated = subreq->next_donated = 0;
155		}
156		trace_netfs_sreq(subreq, netfs_sreq_trace_add_donations);
157		spin_unlock_bh(&rreq->lock);
158	}
159
160	avail = subreq->transferred;
161	if (avail == subreq->len)
162		avail += next_donated;
163	start = subreq->start;
164	if (subreq->consumed == 0) {
165		start -= prev_donated;
166		avail += prev_donated;
167	} else {
168		start += subreq->consumed;
169		avail -= subreq->consumed;
170	}
171	part = umin(avail, fsize);
172
173	trace_netfs_progress(subreq, start, avail, part);
174
175	if (start + avail >= fend) {
176		if (fpos == start) {
177			/* Flush, unlock and mark for caching any folio we've just read. */
178			subreq->consumed = fend - subreq->start;
179			netfs_unlock_read_folio(subreq, rreq, folioq, slot);
180			folioq_mark2(folioq, slot);
181			if (subreq->consumed >= subreq->len)
182				goto remove_subreq;
183		} else if (fpos < start) {
184			excess = fend - subreq->start;
185
186			spin_lock_bh(&rreq->lock);
187			/* If we complete first on a folio split with the
188			 * preceding subreq, donate to that subreq - otherwise
189			 * we get the responsibility.
190			 */
191			if (subreq->prev_donated != prev_donated) {
192				spin_unlock_bh(&rreq->lock);
193				goto donation_changed;
194			}
195
196			if (list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
197				spin_unlock_bh(&rreq->lock);
198				pr_err("Can't donate prior to front\n");
199				goto bad;
200			}
201
202			prev = list_prev_entry(subreq, rreq_link);
203			WRITE_ONCE(prev->next_donated, prev->next_donated + excess);
204			subreq->start += excess;
205			subreq->len -= excess;
206			subreq->transferred -= excess;
207			trace_netfs_donate(rreq, subreq, prev, excess,
208					   netfs_trace_donate_tail_to_prev);
209			trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
210
211			if (subreq->consumed >= subreq->len)
212				goto remove_subreq_locked;
213			spin_unlock_bh(&rreq->lock);
214		} else {
215			pr_err("fpos > start\n");
216			goto bad;
217		}
218
219		/* Advance the rolling buffer to the next folio. */
220		slot++;
221		if (slot >= folioq_nr_slots(folioq)) {
222			slot = 0;
223			folioq = folioq->next;
224			subreq->curr_folioq = folioq;
225		}
226		subreq->curr_folioq_slot = slot;
227		if (folioq && folioq_folio(folioq, slot))
228			subreq->curr_folio_order = folioq->orders[slot];
229		if (!was_async)
230			cond_resched();
231		goto next_folio;
232	}
233
234	/* Deal with partial progress. */
235	if (subreq->transferred < subreq->len)
236		return false;
237
238	/* Donate the remaining downloaded data to one of the neighbouring
239	 * subrequests.  Note that we may race with them doing the same thing.
240	 */
241	spin_lock_bh(&rreq->lock);
242
243	if (subreq->prev_donated != prev_donated ||
244	    subreq->next_donated != next_donated) {
245		spin_unlock_bh(&rreq->lock);
246		cond_resched();
247		goto donation_changed;
248	}
249
250	/* Deal with the trickiest case: that this subreq is in the middle of a
251	 * folio, not touching either edge, but finishes first.  In such a
252	 * case, we donate to the previous subreq, if there is one and if it is
253	 * contiguous, so that the donation is only handled when that completes
254	 * - and remove this subreq from the list.
255	 *
256	 * If the previous subreq finished first, we will have acquired their
257	 * donation and should be able to unlock folios and/or donate nextwards.
258	 */
259	if (!subreq->consumed &&
260	    !prev_donated &&
261	    !list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
262		prev = list_prev_entry(subreq, rreq_link);
263		if (subreq->start == prev->start + prev->len) {
264			WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len);
265			subreq->start += subreq->len;
266			subreq->len = 0;
267			subreq->transferred = 0;
268			trace_netfs_donate(rreq, subreq, prev, subreq->len,
269					   netfs_trace_donate_to_prev);
270			trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
271			goto remove_subreq_locked;
272		}
273	}
274
275	/* If we can't donate down the chain, donate up the chain instead. */
276	excess = subreq->len - subreq->consumed + next_donated;
277
278	if (!subreq->consumed)
279		excess += prev_donated;
280
281	if (list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
282		rreq->prev_donated = excess;
283		trace_netfs_donate(rreq, subreq, NULL, excess,
284				   netfs_trace_donate_to_deferred_next);
285	} else {
286		next = list_next_entry(subreq, rreq_link);
287		WRITE_ONCE(next->prev_donated, excess);
288		trace_netfs_donate(rreq, subreq, next, excess,
289				   netfs_trace_donate_to_next);
290	}
291	trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_next);
292	subreq->len = subreq->consumed;
293	subreq->transferred = subreq->consumed;
294	goto remove_subreq_locked;
295
296remove_subreq:
297	spin_lock_bh(&rreq->lock);
298remove_subreq_locked:
299	subreq->consumed = subreq->len;
300	list_del(&subreq->rreq_link);
301	spin_unlock_bh(&rreq->lock);
302	netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_consumed);
303	return true;
304
305bad:
306	/* Errr... prev and next both donated to us, but insufficient to finish
307	 * the folio.
308	 */
309	printk("R=%08x[%x] s=%llx-%llx %zx/%zx/%zx\n",
310	       rreq->debug_id, subreq->debug_index,
311	       subreq->start, subreq->start + subreq->transferred - 1,
312	       subreq->consumed, subreq->transferred, subreq->len);
313	printk("folio: %llx-%llx\n", fpos, fend - 1);
314	printk("donated: prev=%zx next=%zx\n", prev_donated, next_donated);
315	printk("s=%llx av=%zx part=%zx\n", start, avail, part);
316	BUG();
317}
318
319/*
320 * Do page flushing and suchlike after DIO.
321 */
322static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
323{
324	struct netfs_io_subrequest *subreq;
325	unsigned int i;
326
327	/* Collect unbuffered reads and direct reads, adding up the transfer
328	 * sizes until we find the first short or failed subrequest.
329	 */
330	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
331		rreq->transferred += subreq->transferred;
332
333		if (subreq->transferred < subreq->len ||
334		    test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
335			rreq->error = subreq->error;
336			break;
337		}
338	}
339
340	if (rreq->origin == NETFS_DIO_READ) {
341		for (i = 0; i < rreq->direct_bv_count; i++) {
342			flush_dcache_page(rreq->direct_bv[i].bv_page);
343			// TODO: cifs marks pages in the destination buffer
344			// dirty under some circumstances after a read.  Do we
345			// need to do that too?
346			set_page_dirty(rreq->direct_bv[i].bv_page);
347		}
348	}
349
350	if (rreq->iocb) {
351		rreq->iocb->ki_pos += rreq->transferred;
352		if (rreq->iocb->ki_complete)
353			rreq->iocb->ki_complete(
354				rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
355	}
356	if (rreq->netfs_ops->done)
357		rreq->netfs_ops->done(rreq);
358	if (rreq->origin == NETFS_DIO_READ)
359		inode_dio_end(rreq->inode);
360}
361
362/*
363 * Assess the state of a read request and decide what to do next.
364 *
365 * Note that we're in normal kernel thread context at this point, possibly
366 * running on a workqueue.
367 */
368static void netfs_rreq_assess(struct netfs_io_request *rreq)
369{
370	trace_netfs_rreq(rreq, netfs_rreq_trace_assess);
371
372	//netfs_rreq_is_still_valid(rreq);
373
374	if (test_and_clear_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags)) {
375		netfs_retry_reads(rreq);
376		return;
377	}
378
379	if (rreq->origin == NETFS_DIO_READ ||
380	    rreq->origin == NETFS_READ_GAPS)
381		netfs_rreq_assess_dio(rreq);
382	task_io_account_read(rreq->transferred);
383
384	trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
385	clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
386
387	trace_netfs_rreq(rreq, netfs_rreq_trace_done);
388	netfs_clear_subrequests(rreq, false);
389	netfs_unlock_abandoned_read_pages(rreq);
390	if (unlikely(test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)))
391		netfs_pgpriv2_write_to_the_cache(rreq);
392}
393
394void netfs_read_termination_worker(struct work_struct *work)
395{
396	struct netfs_io_request *rreq =
397		container_of(work, struct netfs_io_request, work);
398	netfs_see_request(rreq, netfs_rreq_trace_see_work);
399	netfs_rreq_assess(rreq);
400	netfs_put_request(rreq, false, netfs_rreq_trace_put_work_complete);
401}
402
403/*
404 * Handle the completion of all outstanding I/O operations on a read request.
405 * We inherit a ref from the caller.
406 */
407void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async)
408{
409	if (!was_async)
410		return netfs_rreq_assess(rreq);
411	if (!work_pending(&rreq->work)) {
412		netfs_get_request(rreq, netfs_rreq_trace_get_work);
413		if (!queue_work(system_unbound_wq, &rreq->work))
414			netfs_put_request(rreq, was_async, netfs_rreq_trace_put_work_nq);
415	}
416}
417
418/**
419 * netfs_read_subreq_progress - Note progress of a read operation.
420 * @subreq: The read request that has terminated.
421 * @was_async: True if we're in an asynchronous context.
422 *
423 * This tells the read side of netfs lib that a contributory I/O operation has
424 * made some progress and that it may be possible to unlock some folios.
425 *
426 * Before calling, the filesystem should update subreq->transferred to track
427 * the amount of data copied into the output buffer.
428 *
429 * If @was_async is true, the caller might be running in softirq or interrupt
430 * context and we can't sleep.
431 */
432void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
433				bool was_async)
434{
435	struct netfs_io_request *rreq = subreq->rreq;
436
437	trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
438
439	if (subreq->transferred > subreq->consumed &&
440	    (rreq->origin == NETFS_READAHEAD ||
441	     rreq->origin == NETFS_READPAGE ||
442	     rreq->origin == NETFS_READ_FOR_WRITE)) {
443		netfs_consume_read_data(subreq, was_async);
444		__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
445	}
446}
447EXPORT_SYMBOL(netfs_read_subreq_progress);
448
449/**
450 * netfs_read_subreq_terminated - Note the termination of an I/O operation.
451 * @subreq: The I/O request that has terminated.
452 * @error: Error code indicating type of completion.
453 * @was_async: The termination was asynchronous
454 *
455 * This tells the read helper that a contributory I/O operation has terminated,
456 * one way or another, and that it should integrate the results.
457 *
458 * The caller indicates the outcome of the operation through @error, supplying
459 * 0 to indicate a successful or retryable transfer (if NETFS_SREQ_NEED_RETRY
460 * is set) or a negative error code.  The helper will look after reissuing I/O
461 * operations as appropriate and writing downloaded data to the cache.
462 *
463 * Before calling, the filesystem should update subreq->transferred to track
464 * the amount of data copied into the output buffer.
465 *
466 * If @was_async is true, the caller might be running in softirq or interrupt
467 * context and we can't sleep.
468 */
469void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
470				  int error, bool was_async)
471{
472	struct netfs_io_request *rreq = subreq->rreq;
473
474	switch (subreq->source) {
475	case NETFS_READ_FROM_CACHE:
476		netfs_stat(&netfs_n_rh_read_done);
477		break;
478	case NETFS_DOWNLOAD_FROM_SERVER:
479		netfs_stat(&netfs_n_rh_download_done);
480		break;
481	default:
482		break;
483	}
484
485	if (rreq->origin != NETFS_DIO_READ) {
486		/* Collect buffered reads.
487		 *
488		 * If the read completed validly short, then we can clear the
489		 * tail before going on to unlock the folios.
490		 */
491		if (error == 0 && subreq->transferred < subreq->len &&
492		    (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags) ||
493		     test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags))) {
494			netfs_clear_unread(subreq);
495			subreq->transferred = subreq->len;
496			trace_netfs_sreq(subreq, netfs_sreq_trace_clear);
497		}
498		if (subreq->transferred > subreq->consumed &&
499		    (rreq->origin == NETFS_READAHEAD ||
500		     rreq->origin == NETFS_READPAGE ||
501		     rreq->origin == NETFS_READ_FOR_WRITE)) {
502			netfs_consume_read_data(subreq, was_async);
503			__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
504		}
505		rreq->transferred += subreq->transferred;
506	}
507
508	/* Deal with retry requests, short reads and errors.  If we retry
509	 * but don't make progress, we abandon the attempt.
510	 */
511	if (!error && subreq->transferred < subreq->len) {
512		if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
513			trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
514		} else {
515			trace_netfs_sreq(subreq, netfs_sreq_trace_short);
516			if (subreq->transferred > subreq->consumed) {
517				/* If we didn't read new data, abandon retry. */
518				if (subreq->retry_count &&
519				    test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
520					__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
521					set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
522				}
523			} else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
524				__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
525				set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
526			} else {
527				__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
528				error = -ENODATA;
529			}
530		}
531	}
532
533	subreq->error = error;
534	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
535
536	if (unlikely(error < 0)) {
537		trace_netfs_failure(rreq, subreq, error, netfs_fail_read);
538		if (subreq->source == NETFS_READ_FROM_CACHE) {
539			netfs_stat(&netfs_n_rh_read_failed);
540		} else {
541			netfs_stat(&netfs_n_rh_download_failed);
542			set_bit(NETFS_RREQ_FAILED, &rreq->flags);
543			rreq->error = subreq->error;
544		}
545	}
546
547	if (atomic_dec_and_test(&rreq->nr_outstanding))
548		netfs_rreq_terminated(rreq, was_async);
549
550	netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
551}
552EXPORT_SYMBOL(netfs_read_subreq_terminated);