Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.9.
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/* kiocb-using read/write
  3 *
  4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
  5 * Written by David Howells (dhowells@redhat.com)
  6 */
  7
  8#include <linux/mount.h>
  9#include <linux/slab.h>
 10#include <linux/file.h>
 11#include <linux/uio.h>
 12#include <linux/falloc.h>
 13#include <linux/sched/mm.h>
 14#include <trace/events/fscache.h>
 15#include "internal.h"
 16
 17struct cachefiles_kiocb {
 18	struct kiocb		iocb;
 19	refcount_t		ki_refcnt;
 20	loff_t			start;
 21	union {
 22		size_t		skipped;
 23		size_t		len;
 24	};
 25	struct cachefiles_object *object;
 26	netfs_io_terminated_t	term_func;
 27	void			*term_func_priv;
 28	bool			was_async;
 29	unsigned int		inval_counter;	/* Copy of cookie->inval_counter */
 30	u64			b_writing;
 31};
 32
 33static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
 34{
 35	if (refcount_dec_and_test(&ki->ki_refcnt)) {
 36		cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
 37		fput(ki->iocb.ki_filp);
 38		kfree(ki);
 39	}
 40}
 41
 42/*
 43 * Handle completion of a read from the cache.
 44 */
 45static void cachefiles_read_complete(struct kiocb *iocb, long ret)
 46{
 47	struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
 48	struct inode *inode = file_inode(ki->iocb.ki_filp);
 49
 50	_enter("%ld", ret);
 51
 52	if (ret < 0)
 53		trace_cachefiles_io_error(ki->object, inode, ret,
 54					  cachefiles_trace_read_error);
 55
 56	if (ki->term_func) {
 57		if (ret >= 0) {
 58			if (ki->object->cookie->inval_counter == ki->inval_counter)
 59				ki->skipped += ret;
 60			else
 61				ret = -ESTALE;
 62		}
 63
 64		ki->term_func(ki->term_func_priv, ret, ki->was_async);
 65	}
 66
 67	cachefiles_put_kiocb(ki);
 68}
 69
 70/*
 71 * Initiate a read from the cache.
 72 */
 73static int cachefiles_read(struct netfs_cache_resources *cres,
 74			   loff_t start_pos,
 75			   struct iov_iter *iter,
 76			   enum netfs_read_from_hole read_hole,
 77			   netfs_io_terminated_t term_func,
 78			   void *term_func_priv)
 79{
 80	struct cachefiles_object *object;
 81	struct cachefiles_kiocb *ki;
 82	struct file *file;
 83	unsigned int old_nofs;
 84	ssize_t ret = -ENOBUFS;
 85	size_t len = iov_iter_count(iter), skipped = 0;
 86
 87	if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
 88		goto presubmission_error;
 89
 90	fscache_count_read();
 91	object = cachefiles_cres_object(cres);
 92	file = cachefiles_cres_file(cres);
 93
 94	_enter("%pD,%li,%llx,%zx/%llx",
 95	       file, file_inode(file)->i_ino, start_pos, len,
 96	       i_size_read(file_inode(file)));
 97
 98	/* If the caller asked us to seek for data before doing the read, then
 99	 * we should do that now.  If we find a gap, we fill it with zeros.
100	 */
101	if (read_hole != NETFS_READ_HOLE_IGNORE) {
102		loff_t off = start_pos, off2;
103
104		off2 = cachefiles_inject_read_error();
105		if (off2 == 0)
106			off2 = vfs_llseek(file, off, SEEK_DATA);
107		if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
108			skipped = 0;
109			ret = off2;
110			goto presubmission_error;
111		}
112
113		if (off2 == -ENXIO || off2 >= start_pos + len) {
114			/* The region is beyond the EOF or there's no more data
115			 * in the region, so clear the rest of the buffer and
116			 * return success.
117			 */
118			ret = -ENODATA;
119			if (read_hole == NETFS_READ_HOLE_FAIL)
120				goto presubmission_error;
121
122			iov_iter_zero(len, iter);
123			skipped = len;
124			ret = 0;
125			goto presubmission_error;
126		}
127
128		skipped = off2 - off;
129		iov_iter_zero(skipped, iter);
130	}
131
132	ret = -ENOMEM;
133	ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
134	if (!ki)
135		goto presubmission_error;
136
137	refcount_set(&ki->ki_refcnt, 2);
138	ki->iocb.ki_filp	= file;
139	ki->iocb.ki_pos		= start_pos + skipped;
140	ki->iocb.ki_flags	= IOCB_DIRECT;
141	ki->iocb.ki_ioprio	= get_current_ioprio();
142	ki->skipped		= skipped;
143	ki->object		= object;
144	ki->inval_counter	= cres->inval_counter;
145	ki->term_func		= term_func;
146	ki->term_func_priv	= term_func_priv;
147	ki->was_async		= true;
148
149	if (ki->term_func)
150		ki->iocb.ki_complete = cachefiles_read_complete;
151
152	get_file(ki->iocb.ki_filp);
153	cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
154
155	trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
156	old_nofs = memalloc_nofs_save();
157	ret = cachefiles_inject_read_error();
158	if (ret == 0)
159		ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
160	memalloc_nofs_restore(old_nofs);
161	switch (ret) {
162	case -EIOCBQUEUED:
163		goto in_progress;
164
165	case -ERESTARTSYS:
166	case -ERESTARTNOINTR:
167	case -ERESTARTNOHAND:
168	case -ERESTART_RESTARTBLOCK:
169		/* There's no easy way to restart the syscall since other AIO's
170		 * may be already running. Just fail this IO with EINTR.
171		 */
172		ret = -EINTR;
173		fallthrough;
174	default:
175		ki->was_async = false;
176		cachefiles_read_complete(&ki->iocb, ret);
177		if (ret > 0)
178			ret = 0;
179		break;
180	}
181
182in_progress:
183	cachefiles_put_kiocb(ki);
184	_leave(" = %zd", ret);
185	return ret;
186
187presubmission_error:
188	if (term_func)
189		term_func(term_func_priv, ret < 0 ? ret : skipped, false);
190	return ret;
191}
192
193/*
194 * Query the occupancy of the cache in a region, returning where the next chunk
195 * of data starts and how long it is.
196 */
197static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
198				      loff_t start, size_t len, size_t granularity,
199				      loff_t *_data_start, size_t *_data_len)
200{
201	struct cachefiles_object *object;
202	struct file *file;
203	loff_t off, off2;
204
205	*_data_start = -1;
206	*_data_len = 0;
207
208	if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
209		return -ENOBUFS;
210
211	object = cachefiles_cres_object(cres);
212	file = cachefiles_cres_file(cres);
213	granularity = max_t(size_t, object->volume->cache->bsize, granularity);
214
215	_enter("%pD,%li,%llx,%zx/%llx",
216	       file, file_inode(file)->i_ino, start, len,
217	       i_size_read(file_inode(file)));
218
219	off = cachefiles_inject_read_error();
220	if (off == 0)
221		off = vfs_llseek(file, start, SEEK_DATA);
222	if (off == -ENXIO)
223		return -ENODATA; /* Beyond EOF */
224	if (off < 0 && off >= (loff_t)-MAX_ERRNO)
225		return -ENOBUFS; /* Error. */
226	if (round_up(off, granularity) >= start + len)
227		return -ENODATA; /* No data in range */
228
229	off2 = cachefiles_inject_read_error();
230	if (off2 == 0)
231		off2 = vfs_llseek(file, off, SEEK_HOLE);
232	if (off2 == -ENXIO)
233		return -ENODATA; /* Beyond EOF */
234	if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
235		return -ENOBUFS; /* Error. */
236
237	/* Round away partial blocks */
238	off = round_up(off, granularity);
239	off2 = round_down(off2, granularity);
240	if (off2 <= off)
241		return -ENODATA;
242
243	*_data_start = off;
244	if (off2 > start + len)
245		*_data_len = len;
246	else
247		*_data_len = off2 - off;
248	return 0;
249}
250
251/*
252 * Handle completion of a write to the cache.
253 */
254static void cachefiles_write_complete(struct kiocb *iocb, long ret)
255{
256	struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
257	struct cachefiles_object *object = ki->object;
258	struct inode *inode = file_inode(ki->iocb.ki_filp);
259
260	_enter("%ld", ret);
261
262	/* Tell lockdep we inherited freeze protection from submission thread */
263	__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
264	__sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
265
266	if (ret < 0)
267		trace_cachefiles_io_error(object, inode, ret,
268					  cachefiles_trace_write_error);
269
270	atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
271	set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
272	if (ki->term_func)
273		ki->term_func(ki->term_func_priv, ret, ki->was_async);
274	cachefiles_put_kiocb(ki);
275}
276
277/*
278 * Initiate a write to the cache.
279 */
280int __cachefiles_write(struct cachefiles_object *object,
281		       struct file *file,
282		       loff_t start_pos,
283		       struct iov_iter *iter,
284		       netfs_io_terminated_t term_func,
285		       void *term_func_priv)
286{
287	struct cachefiles_cache *cache;
288	struct cachefiles_kiocb *ki;
289	struct inode *inode;
290	unsigned int old_nofs;
291	ssize_t ret;
292	size_t len = iov_iter_count(iter);
293
294	fscache_count_write();
295	cache = object->volume->cache;
296
297	_enter("%pD,%li,%llx,%zx/%llx",
298	       file, file_inode(file)->i_ino, start_pos, len,
299	       i_size_read(file_inode(file)));
300
301	ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
302	if (!ki) {
303		if (term_func)
304			term_func(term_func_priv, -ENOMEM, false);
305		return -ENOMEM;
306	}
307
308	refcount_set(&ki->ki_refcnt, 2);
309	ki->iocb.ki_filp	= file;
310	ki->iocb.ki_pos		= start_pos;
311	ki->iocb.ki_flags	= IOCB_DIRECT | IOCB_WRITE;
312	ki->iocb.ki_ioprio	= get_current_ioprio();
313	ki->object		= object;
314	ki->start		= start_pos;
315	ki->len			= len;
316	ki->term_func		= term_func;
317	ki->term_func_priv	= term_func_priv;
318	ki->was_async		= true;
319	ki->b_writing		= (len + (1 << cache->bshift) - 1) >> cache->bshift;
320
321	if (ki->term_func)
322		ki->iocb.ki_complete = cachefiles_write_complete;
323	atomic_long_add(ki->b_writing, &cache->b_writing);
324
325	/* Open-code file_start_write here to grab freeze protection, which
326	 * will be released by another thread in aio_complete_rw().  Fool
327	 * lockdep by telling it the lock got released so that it doesn't
328	 * complain about the held lock when we return to userspace.
329	 */
330	inode = file_inode(file);
331	__sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
332	__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
333
334	get_file(ki->iocb.ki_filp);
335	cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
336
337	trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
338	old_nofs = memalloc_nofs_save();
339	ret = cachefiles_inject_write_error();
340	if (ret == 0)
341		ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
342	memalloc_nofs_restore(old_nofs);
343	switch (ret) {
344	case -EIOCBQUEUED:
345		goto in_progress;
346
347	case -ERESTARTSYS:
348	case -ERESTARTNOINTR:
349	case -ERESTARTNOHAND:
350	case -ERESTART_RESTARTBLOCK:
351		/* There's no easy way to restart the syscall since other AIO's
352		 * may be already running. Just fail this IO with EINTR.
353		 */
354		ret = -EINTR;
355		fallthrough;
356	default:
357		ki->was_async = false;
358		cachefiles_write_complete(&ki->iocb, ret);
359		if (ret > 0)
360			ret = 0;
361		break;
362	}
363
364in_progress:
365	cachefiles_put_kiocb(ki);
366	_leave(" = %zd", ret);
367	return ret;
368}
369
370static int cachefiles_write(struct netfs_cache_resources *cres,
371			    loff_t start_pos,
372			    struct iov_iter *iter,
373			    netfs_io_terminated_t term_func,
374			    void *term_func_priv)
375{
376	if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
377		if (term_func)
378			term_func(term_func_priv, -ENOBUFS, false);
379		return -ENOBUFS;
380	}
381
382	return __cachefiles_write(cachefiles_cres_object(cres),
383				  cachefiles_cres_file(cres),
384				  start_pos, iter,
385				  term_func, term_func_priv);
386}
387
388static inline enum netfs_io_source
389cachefiles_do_prepare_read(struct netfs_cache_resources *cres,
390			   loff_t start, size_t *_len, loff_t i_size,
391			   unsigned long *_flags, ino_t netfs_ino)
392{
393	enum cachefiles_prepare_read_trace why;
394	struct cachefiles_object *object = NULL;
395	struct cachefiles_cache *cache;
396	struct fscache_cookie *cookie = fscache_cres_cookie(cres);
397	const struct cred *saved_cred;
398	struct file *file = cachefiles_cres_file(cres);
399	enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER;
400	size_t len = *_len;
401	loff_t off, to;
402	ino_t ino = file ? file_inode(file)->i_ino : 0;
403	int rc;
404
405	_enter("%zx @%llx/%llx", len, start, i_size);
406
407	if (start >= i_size) {
408		ret = NETFS_FILL_WITH_ZEROES;
409		why = cachefiles_trace_read_after_eof;
410		goto out_no_object;
411	}
412
413	if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
414		__set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
415		why = cachefiles_trace_read_no_data;
416		if (!test_bit(NETFS_SREQ_ONDEMAND, _flags))
417			goto out_no_object;
418	}
419
420	/* The object and the file may be being created in the background. */
421	if (!file) {
422		why = cachefiles_trace_read_no_file;
423		if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
424			goto out_no_object;
425		file = cachefiles_cres_file(cres);
426		if (!file)
427			goto out_no_object;
428		ino = file_inode(file)->i_ino;
429	}
430
431	object = cachefiles_cres_object(cres);
432	cache = object->volume->cache;
433	cachefiles_begin_secure(cache, &saved_cred);
434retry:
435	off = cachefiles_inject_read_error();
436	if (off == 0)
437		off = vfs_llseek(file, start, SEEK_DATA);
438	if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
439		if (off == (loff_t)-ENXIO) {
440			why = cachefiles_trace_read_seek_nxio;
441			goto download_and_store;
442		}
443		trace_cachefiles_io_error(object, file_inode(file), off,
444					  cachefiles_trace_seek_error);
445		why = cachefiles_trace_read_seek_error;
446		goto out;
447	}
448
449	if (off >= start + len) {
450		why = cachefiles_trace_read_found_hole;
451		goto download_and_store;
452	}
453
454	if (off > start) {
455		off = round_up(off, cache->bsize);
456		len = off - start;
457		*_len = len;
458		why = cachefiles_trace_read_found_part;
459		goto download_and_store;
460	}
461
462	to = cachefiles_inject_read_error();
463	if (to == 0)
464		to = vfs_llseek(file, start, SEEK_HOLE);
465	if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
466		trace_cachefiles_io_error(object, file_inode(file), to,
467					  cachefiles_trace_seek_error);
468		why = cachefiles_trace_read_seek_error;
469		goto out;
470	}
471
472	if (to < start + len) {
473		if (start + len >= i_size)
474			to = round_up(to, cache->bsize);
475		else
476			to = round_down(to, cache->bsize);
477		len = to - start;
478		*_len = len;
479	}
480
481	why = cachefiles_trace_read_have_data;
482	ret = NETFS_READ_FROM_CACHE;
483	goto out;
484
485download_and_store:
486	__set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags);
487	if (test_bit(NETFS_SREQ_ONDEMAND, _flags)) {
488		rc = cachefiles_ondemand_read(object, start, len);
489		if (!rc) {
490			__clear_bit(NETFS_SREQ_ONDEMAND, _flags);
491			goto retry;
492		}
493		ret = NETFS_INVALID_READ;
494	}
495out:
496	cachefiles_end_secure(cache, saved_cred);
497out_no_object:
498	trace_cachefiles_prep_read(object, start, len, *_flags, ret, why, ino, netfs_ino);
499	return ret;
500}
501
502/*
503 * Prepare a read operation, shortening it to a cached/uncached
504 * boundary as appropriate.
505 */
506static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
507						    loff_t i_size)
508{
509	return cachefiles_do_prepare_read(&subreq->rreq->cache_resources,
510					  subreq->start, &subreq->len, i_size,
511					  &subreq->flags, subreq->rreq->inode->i_ino);
512}
513
514/*
515 * Prepare an on-demand read operation, shortening it to a cached/uncached
516 * boundary as appropriate.
517 */
518static enum netfs_io_source
519cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres,
520				 loff_t start, size_t *_len, loff_t i_size,
521				 unsigned long *_flags, ino_t ino)
522{
523	return cachefiles_do_prepare_read(cres, start, _len, i_size, _flags, ino);
524}
525
526/*
527 * Prepare for a write to occur.
528 */
529int __cachefiles_prepare_write(struct cachefiles_object *object,
530			       struct file *file,
531			       loff_t *_start, size_t *_len,
532			       bool no_space_allocated_yet)
533{
534	struct cachefiles_cache *cache = object->volume->cache;
535	loff_t start = *_start, pos;
536	size_t len = *_len, down;
537	int ret;
538
539	/* Round to DIO size */
540	down = start - round_down(start, PAGE_SIZE);
541	*_start = start - down;
542	*_len = round_up(down + len, PAGE_SIZE);
543
544	/* We need to work out whether there's sufficient disk space to perform
545	 * the write - but we can skip that check if we have space already
546	 * allocated.
547	 */
548	if (no_space_allocated_yet)
549		goto check_space;
550
551	pos = cachefiles_inject_read_error();
552	if (pos == 0)
553		pos = vfs_llseek(file, *_start, SEEK_DATA);
554	if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
555		if (pos == -ENXIO)
556			goto check_space; /* Unallocated tail */
557		trace_cachefiles_io_error(object, file_inode(file), pos,
558					  cachefiles_trace_seek_error);
559		return pos;
560	}
561	if ((u64)pos >= (u64)*_start + *_len)
562		goto check_space; /* Unallocated region */
563
564	/* We have a block that's at least partially filled - if we're low on
565	 * space, we need to see if it's fully allocated.  If it's not, we may
566	 * want to cull it.
567	 */
568	if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
569				 cachefiles_has_space_check) == 0)
570		return 0; /* Enough space to simply overwrite the whole block */
571
572	pos = cachefiles_inject_read_error();
573	if (pos == 0)
574		pos = vfs_llseek(file, *_start, SEEK_HOLE);
575	if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
576		trace_cachefiles_io_error(object, file_inode(file), pos,
577					  cachefiles_trace_seek_error);
578		return pos;
579	}
580	if ((u64)pos >= (u64)*_start + *_len)
581		return 0; /* Fully allocated */
582
583	/* Partially allocated, but insufficient space: cull. */
584	fscache_count_no_write_space();
585	ret = cachefiles_inject_remove_error();
586	if (ret == 0)
587		ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
588				    *_start, *_len);
589	if (ret < 0) {
590		trace_cachefiles_io_error(object, file_inode(file), ret,
591					  cachefiles_trace_fallocate_error);
592		cachefiles_io_error_obj(object,
593					"CacheFiles: fallocate failed (%d)\n", ret);
594		ret = -EIO;
595	}
596
597	return ret;
598
599check_space:
600	return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
601				    cachefiles_has_space_for_write);
602}
603
604static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
605				    loff_t *_start, size_t *_len, loff_t i_size,
606				    bool no_space_allocated_yet)
607{
608	struct cachefiles_object *object = cachefiles_cres_object(cres);
609	struct cachefiles_cache *cache = object->volume->cache;
610	const struct cred *saved_cred;
611	int ret;
612
613	if (!cachefiles_cres_file(cres)) {
614		if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
615			return -ENOBUFS;
616		if (!cachefiles_cres_file(cres))
617			return -ENOBUFS;
618	}
619
620	cachefiles_begin_secure(cache, &saved_cred);
621	ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
622					 _start, _len,
623					 no_space_allocated_yet);
624	cachefiles_end_secure(cache, saved_cred);
625	return ret;
626}
627
628/*
629 * Clean up an operation.
630 */
631static void cachefiles_end_operation(struct netfs_cache_resources *cres)
632{
633	struct file *file = cachefiles_cres_file(cres);
634
635	if (file)
636		fput(file);
637	fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
638}
639
640static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
641	.end_operation		= cachefiles_end_operation,
642	.read			= cachefiles_read,
643	.write			= cachefiles_write,
644	.prepare_read		= cachefiles_prepare_read,
645	.prepare_write		= cachefiles_prepare_write,
646	.prepare_ondemand_read	= cachefiles_prepare_ondemand_read,
647	.query_occupancy	= cachefiles_query_occupancy,
648};
649
650/*
651 * Open the cache file when beginning a cache operation.
652 */
653bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
654				enum fscache_want_state want_state)
655{
656	struct cachefiles_object *object = cachefiles_cres_object(cres);
657
658	if (!cachefiles_cres_file(cres)) {
659		cres->ops = &cachefiles_netfs_cache_ops;
660		if (object->file) {
661			spin_lock(&object->lock);
662			if (!cres->cache_priv2 && object->file)
663				cres->cache_priv2 = get_file(object->file);
664			spin_unlock(&object->lock);
665		}
666	}
667
668	if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
669		pr_err("failed to get cres->file\n");
670		return false;
671	}
672
673	return true;
674}