Loading...
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2022, Alibaba Cloud
4 * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5 */
6#include <linux/fscache.h>
7#include "internal.h"
8
9static DEFINE_MUTEX(erofs_domain_list_lock);
10static DEFINE_MUTEX(erofs_domain_cookies_lock);
11static LIST_HEAD(erofs_domain_list);
12static LIST_HEAD(erofs_domain_cookies_list);
13static struct vfsmount *erofs_pseudo_mnt;
14
15struct erofs_fscache_request {
16 struct erofs_fscache_request *primary;
17 struct netfs_cache_resources cache_resources;
18 struct address_space *mapping; /* The mapping being accessed */
19 loff_t start; /* Start position */
20 size_t len; /* Length of the request */
21 size_t submitted; /* Length of submitted */
22 short error; /* 0 or error that occurred */
23 refcount_t ref;
24};
25
26static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
27 loff_t start, size_t len)
28{
29 struct erofs_fscache_request *req;
30
31 req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
32 if (!req)
33 return ERR_PTR(-ENOMEM);
34
35 req->mapping = mapping;
36 req->start = start;
37 req->len = len;
38 refcount_set(&req->ref, 1);
39
40 return req;
41}
42
43static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
44 size_t len)
45{
46 struct erofs_fscache_request *req;
47
48 /* use primary request for the first submission */
49 if (!primary->submitted) {
50 refcount_inc(&primary->ref);
51 return primary;
52 }
53
54 req = erofs_fscache_req_alloc(primary->mapping,
55 primary->start + primary->submitted, len);
56 if (!IS_ERR(req)) {
57 req->primary = primary;
58 refcount_inc(&primary->ref);
59 }
60 return req;
61}
62
63static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
64{
65 struct folio *folio;
66 bool failed = req->error;
67 pgoff_t start_page = req->start / PAGE_SIZE;
68 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
69
70 XA_STATE(xas, &req->mapping->i_pages, start_page);
71
72 rcu_read_lock();
73 xas_for_each(&xas, folio, last_page) {
74 if (xas_retry(&xas, folio))
75 continue;
76 if (!failed)
77 folio_mark_uptodate(folio);
78 folio_unlock(folio);
79 }
80 rcu_read_unlock();
81}
82
83static void erofs_fscache_req_put(struct erofs_fscache_request *req)
84{
85 if (refcount_dec_and_test(&req->ref)) {
86 if (req->cache_resources.ops)
87 req->cache_resources.ops->end_operation(&req->cache_resources);
88 if (!req->primary)
89 erofs_fscache_req_complete(req);
90 else
91 erofs_fscache_req_put(req->primary);
92 kfree(req);
93 }
94}
95
96static void erofs_fscache_subreq_complete(void *priv,
97 ssize_t transferred_or_error, bool was_async)
98{
99 struct erofs_fscache_request *req = priv;
100
101 if (IS_ERR_VALUE(transferred_or_error)) {
102 if (req->primary)
103 req->primary->error = transferred_or_error;
104 else
105 req->error = transferred_or_error;
106 }
107 erofs_fscache_req_put(req);
108}
109
110/*
111 * Read data from fscache (cookie, pstart, len), and fill the read data into
112 * page cache described by (req->mapping, lstart, len). @pstart describeis the
113 * start physical address in the cache file.
114 */
115static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
116 struct erofs_fscache_request *req, loff_t pstart, size_t len)
117{
118 enum netfs_io_source source;
119 struct super_block *sb = req->mapping->host->i_sb;
120 struct netfs_cache_resources *cres = &req->cache_resources;
121 struct iov_iter iter;
122 loff_t lstart = req->start + req->submitted;
123 size_t done = 0;
124 int ret;
125
126 DBG_BUGON(len > req->len - req->submitted);
127
128 ret = fscache_begin_read_operation(cres, cookie);
129 if (ret)
130 return ret;
131
132 while (done < len) {
133 loff_t sstart = pstart + done;
134 size_t slen = len - done;
135 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
136
137 source = cres->ops->prepare_ondemand_read(cres,
138 sstart, &slen, LLONG_MAX, &flags, 0);
139 if (WARN_ON(slen == 0))
140 source = NETFS_INVALID_READ;
141 if (source != NETFS_READ_FROM_CACHE) {
142 erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
143 return -EIO;
144 }
145
146 refcount_inc(&req->ref);
147 iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
148 lstart + done, slen);
149
150 ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
151 erofs_fscache_subreq_complete, req);
152 if (ret == -EIOCBQUEUED)
153 ret = 0;
154 if (ret) {
155 erofs_err(sb, "failed to fscache_read (ret %d)", ret);
156 return ret;
157 }
158
159 done += slen;
160 }
161 DBG_BUGON(done != len);
162 return 0;
163}
164
165static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
166{
167 int ret;
168 struct erofs_fscache *ctx = folio->mapping->host->i_private;
169 struct erofs_fscache_request *req;
170
171 req = erofs_fscache_req_alloc(folio->mapping,
172 folio_pos(folio), folio_size(folio));
173 if (IS_ERR(req)) {
174 folio_unlock(folio);
175 return PTR_ERR(req);
176 }
177
178 ret = erofs_fscache_read_folios_async(ctx->cookie, req,
179 folio_pos(folio), folio_size(folio));
180 if (ret)
181 req->error = ret;
182
183 erofs_fscache_req_put(req);
184 return ret;
185}
186
187static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
188{
189 struct address_space *mapping = primary->mapping;
190 struct inode *inode = mapping->host;
191 struct super_block *sb = inode->i_sb;
192 struct erofs_fscache_request *req;
193 struct erofs_map_blocks map;
194 struct erofs_map_dev mdev;
195 struct iov_iter iter;
196 loff_t pos = primary->start + primary->submitted;
197 size_t count;
198 int ret;
199
200 map.m_la = pos;
201 ret = erofs_map_blocks(inode, &map);
202 if (ret)
203 return ret;
204
205 if (map.m_flags & EROFS_MAP_META) {
206 struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
207 erofs_blk_t blknr;
208 size_t offset, size;
209 void *src;
210
211 /* For tail packing layout, the offset may be non-zero. */
212 offset = erofs_blkoff(sb, map.m_pa);
213 blknr = erofs_blknr(sb, map.m_pa);
214 size = map.m_llen;
215
216 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
217 if (IS_ERR(src))
218 return PTR_ERR(src);
219
220 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
221 if (copy_to_iter(src + offset, size, &iter) != size) {
222 erofs_put_metabuf(&buf);
223 return -EFAULT;
224 }
225 iov_iter_zero(PAGE_SIZE - size, &iter);
226 erofs_put_metabuf(&buf);
227 primary->submitted += PAGE_SIZE;
228 return 0;
229 }
230
231 count = primary->len - primary->submitted;
232 if (!(map.m_flags & EROFS_MAP_MAPPED)) {
233 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
234 iov_iter_zero(count, &iter);
235 primary->submitted += count;
236 return 0;
237 }
238
239 count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
240 DBG_BUGON(!count || count % PAGE_SIZE);
241
242 mdev = (struct erofs_map_dev) {
243 .m_deviceid = map.m_deviceid,
244 .m_pa = map.m_pa,
245 };
246 ret = erofs_map_dev(sb, &mdev);
247 if (ret)
248 return ret;
249
250 req = erofs_fscache_req_chain(primary, count);
251 if (IS_ERR(req))
252 return PTR_ERR(req);
253
254 ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
255 req, mdev.m_pa + (pos - map.m_la), count);
256 erofs_fscache_req_put(req);
257 primary->submitted += count;
258 return ret;
259}
260
261static int erofs_fscache_data_read(struct erofs_fscache_request *req)
262{
263 int ret;
264
265 do {
266 ret = erofs_fscache_data_read_slice(req);
267 if (ret)
268 req->error = ret;
269 } while (!ret && req->submitted < req->len);
270
271 return ret;
272}
273
274static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
275{
276 struct erofs_fscache_request *req;
277 int ret;
278
279 req = erofs_fscache_req_alloc(folio->mapping,
280 folio_pos(folio), folio_size(folio));
281 if (IS_ERR(req)) {
282 folio_unlock(folio);
283 return PTR_ERR(req);
284 }
285
286 ret = erofs_fscache_data_read(req);
287 erofs_fscache_req_put(req);
288 return ret;
289}
290
291static void erofs_fscache_readahead(struct readahead_control *rac)
292{
293 struct erofs_fscache_request *req;
294
295 if (!readahead_count(rac))
296 return;
297
298 req = erofs_fscache_req_alloc(rac->mapping,
299 readahead_pos(rac), readahead_length(rac));
300 if (IS_ERR(req))
301 return;
302
303 /* The request completion will drop refs on the folios. */
304 while (readahead_folio(rac))
305 ;
306
307 erofs_fscache_data_read(req);
308 erofs_fscache_req_put(req);
309}
310
311static const struct address_space_operations erofs_fscache_meta_aops = {
312 .read_folio = erofs_fscache_meta_read_folio,
313};
314
315const struct address_space_operations erofs_fscache_access_aops = {
316 .read_folio = erofs_fscache_read_folio,
317 .readahead = erofs_fscache_readahead,
318};
319
320static void erofs_fscache_domain_put(struct erofs_domain *domain)
321{
322 mutex_lock(&erofs_domain_list_lock);
323 if (refcount_dec_and_test(&domain->ref)) {
324 list_del(&domain->list);
325 if (list_empty(&erofs_domain_list)) {
326 kern_unmount(erofs_pseudo_mnt);
327 erofs_pseudo_mnt = NULL;
328 }
329 fscache_relinquish_volume(domain->volume, NULL, false);
330 mutex_unlock(&erofs_domain_list_lock);
331 kfree(domain->domain_id);
332 kfree(domain);
333 return;
334 }
335 mutex_unlock(&erofs_domain_list_lock);
336}
337
338static int erofs_fscache_register_volume(struct super_block *sb)
339{
340 struct erofs_sb_info *sbi = EROFS_SB(sb);
341 char *domain_id = sbi->domain_id;
342 struct fscache_volume *volume;
343 char *name;
344 int ret = 0;
345
346 name = kasprintf(GFP_KERNEL, "erofs,%s",
347 domain_id ? domain_id : sbi->fsid);
348 if (!name)
349 return -ENOMEM;
350
351 volume = fscache_acquire_volume(name, NULL, NULL, 0);
352 if (IS_ERR_OR_NULL(volume)) {
353 erofs_err(sb, "failed to register volume for %s", name);
354 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
355 volume = NULL;
356 }
357
358 sbi->volume = volume;
359 kfree(name);
360 return ret;
361}
362
363static int erofs_fscache_init_domain(struct super_block *sb)
364{
365 int err;
366 struct erofs_domain *domain;
367 struct erofs_sb_info *sbi = EROFS_SB(sb);
368
369 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
370 if (!domain)
371 return -ENOMEM;
372
373 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
374 if (!domain->domain_id) {
375 kfree(domain);
376 return -ENOMEM;
377 }
378
379 err = erofs_fscache_register_volume(sb);
380 if (err)
381 goto out;
382
383 if (!erofs_pseudo_mnt) {
384 struct vfsmount *mnt = kern_mount(&erofs_fs_type);
385 if (IS_ERR(mnt)) {
386 err = PTR_ERR(mnt);
387 goto out;
388 }
389 erofs_pseudo_mnt = mnt;
390 }
391
392 domain->volume = sbi->volume;
393 refcount_set(&domain->ref, 1);
394 list_add(&domain->list, &erofs_domain_list);
395 sbi->domain = domain;
396 return 0;
397out:
398 kfree(domain->domain_id);
399 kfree(domain);
400 return err;
401}
402
403static int erofs_fscache_register_domain(struct super_block *sb)
404{
405 int err;
406 struct erofs_domain *domain;
407 struct erofs_sb_info *sbi = EROFS_SB(sb);
408
409 mutex_lock(&erofs_domain_list_lock);
410 list_for_each_entry(domain, &erofs_domain_list, list) {
411 if (!strcmp(domain->domain_id, sbi->domain_id)) {
412 sbi->domain = domain;
413 sbi->volume = domain->volume;
414 refcount_inc(&domain->ref);
415 mutex_unlock(&erofs_domain_list_lock);
416 return 0;
417 }
418 }
419 err = erofs_fscache_init_domain(sb);
420 mutex_unlock(&erofs_domain_list_lock);
421 return err;
422}
423
424static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
425 char *name, unsigned int flags)
426{
427 struct fscache_volume *volume = EROFS_SB(sb)->volume;
428 struct erofs_fscache *ctx;
429 struct fscache_cookie *cookie;
430 struct super_block *isb;
431 struct inode *inode;
432 int ret;
433
434 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
435 if (!ctx)
436 return ERR_PTR(-ENOMEM);
437 INIT_LIST_HEAD(&ctx->node);
438 refcount_set(&ctx->ref, 1);
439
440 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
441 name, strlen(name), NULL, 0, 0);
442 if (!cookie) {
443 erofs_err(sb, "failed to get cookie for %s", name);
444 ret = -EINVAL;
445 goto err;
446 }
447 fscache_use_cookie(cookie, false);
448
449 /*
450 * Allocate anonymous inode in global pseudo mount for shareable blobs,
451 * so that they are accessible among erofs fs instances.
452 */
453 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
454 inode = new_inode(isb);
455 if (!inode) {
456 erofs_err(sb, "failed to get anon inode for %s", name);
457 ret = -ENOMEM;
458 goto err_cookie;
459 }
460
461 inode->i_size = OFFSET_MAX;
462 inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
463 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
464 inode->i_blkbits = EROFS_SB(sb)->blkszbits;
465 inode->i_private = ctx;
466
467 ctx->cookie = cookie;
468 ctx->inode = inode;
469 return ctx;
470
471err_cookie:
472 fscache_unuse_cookie(cookie, NULL, NULL);
473 fscache_relinquish_cookie(cookie, false);
474err:
475 kfree(ctx);
476 return ERR_PTR(ret);
477}
478
479static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
480{
481 fscache_unuse_cookie(ctx->cookie, NULL, NULL);
482 fscache_relinquish_cookie(ctx->cookie, false);
483 iput(ctx->inode);
484 kfree(ctx->name);
485 kfree(ctx);
486}
487
488static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
489 char *name, unsigned int flags)
490{
491 struct erofs_fscache *ctx;
492 struct erofs_domain *domain = EROFS_SB(sb)->domain;
493
494 ctx = erofs_fscache_acquire_cookie(sb, name, flags);
495 if (IS_ERR(ctx))
496 return ctx;
497
498 ctx->name = kstrdup(name, GFP_KERNEL);
499 if (!ctx->name) {
500 erofs_fscache_relinquish_cookie(ctx);
501 return ERR_PTR(-ENOMEM);
502 }
503
504 refcount_inc(&domain->ref);
505 ctx->domain = domain;
506 list_add(&ctx->node, &erofs_domain_cookies_list);
507 return ctx;
508}
509
510static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
511 char *name, unsigned int flags)
512{
513 struct erofs_fscache *ctx;
514 struct erofs_domain *domain = EROFS_SB(sb)->domain;
515
516 flags |= EROFS_REG_COOKIE_SHARE;
517 mutex_lock(&erofs_domain_cookies_lock);
518 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
519 if (ctx->domain != domain || strcmp(ctx->name, name))
520 continue;
521 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
522 refcount_inc(&ctx->ref);
523 } else {
524 erofs_err(sb, "%s already exists in domain %s", name,
525 domain->domain_id);
526 ctx = ERR_PTR(-EEXIST);
527 }
528 mutex_unlock(&erofs_domain_cookies_lock);
529 return ctx;
530 }
531 ctx = erofs_domain_init_cookie(sb, name, flags);
532 mutex_unlock(&erofs_domain_cookies_lock);
533 return ctx;
534}
535
536struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
537 char *name,
538 unsigned int flags)
539{
540 if (EROFS_SB(sb)->domain_id)
541 return erofs_domain_register_cookie(sb, name, flags);
542 return erofs_fscache_acquire_cookie(sb, name, flags);
543}
544
545void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
546{
547 struct erofs_domain *domain = NULL;
548
549 if (!ctx)
550 return;
551 if (!ctx->domain)
552 return erofs_fscache_relinquish_cookie(ctx);
553
554 mutex_lock(&erofs_domain_cookies_lock);
555 if (refcount_dec_and_test(&ctx->ref)) {
556 domain = ctx->domain;
557 list_del(&ctx->node);
558 erofs_fscache_relinquish_cookie(ctx);
559 }
560 mutex_unlock(&erofs_domain_cookies_lock);
561 if (domain)
562 erofs_fscache_domain_put(domain);
563}
564
565int erofs_fscache_register_fs(struct super_block *sb)
566{
567 int ret;
568 struct erofs_sb_info *sbi = EROFS_SB(sb);
569 struct erofs_fscache *fscache;
570 unsigned int flags = 0;
571
572 if (sbi->domain_id)
573 ret = erofs_fscache_register_domain(sb);
574 else
575 ret = erofs_fscache_register_volume(sb);
576 if (ret)
577 return ret;
578
579 /*
580 * When shared domain is enabled, using NEED_NOEXIST to guarantee
581 * the primary data blob (aka fsid) is unique in the shared domain.
582 *
583 * For non-shared-domain case, fscache_acquire_volume() invoked by
584 * erofs_fscache_register_volume() has already guaranteed
585 * the uniqueness of primary data blob.
586 *
587 * Acquired domain/volume will be relinquished in kill_sb() on error.
588 */
589 if (sbi->domain_id)
590 flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
591 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
592 if (IS_ERR(fscache))
593 return PTR_ERR(fscache);
594
595 sbi->s_fscache = fscache;
596 return 0;
597}
598
599void erofs_fscache_unregister_fs(struct super_block *sb)
600{
601 struct erofs_sb_info *sbi = EROFS_SB(sb);
602
603 erofs_fscache_unregister_cookie(sbi->s_fscache);
604
605 if (sbi->domain)
606 erofs_fscache_domain_put(sbi->domain);
607 else
608 fscache_relinquish_volume(sbi->volume, NULL, false);
609
610 sbi->s_fscache = NULL;
611 sbi->volume = NULL;
612 sbi->domain = NULL;
613}
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2022, Alibaba Cloud
4 * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5 */
6#include <linux/pseudo_fs.h>
7#include <linux/fscache.h>
8#include "internal.h"
9
10static DEFINE_MUTEX(erofs_domain_list_lock);
11static DEFINE_MUTEX(erofs_domain_cookies_lock);
12static LIST_HEAD(erofs_domain_list);
13static LIST_HEAD(erofs_domain_cookies_list);
14static struct vfsmount *erofs_pseudo_mnt;
15
16static int erofs_anon_init_fs_context(struct fs_context *fc)
17{
18 return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
19}
20
21static struct file_system_type erofs_anon_fs_type = {
22 .owner = THIS_MODULE,
23 .name = "pseudo_erofs",
24 .init_fs_context = erofs_anon_init_fs_context,
25 .kill_sb = kill_anon_super,
26};
27
28struct erofs_fscache_io {
29 struct netfs_cache_resources cres;
30 struct iov_iter iter;
31 netfs_io_terminated_t end_io;
32 void *private;
33 refcount_t ref;
34};
35
36struct erofs_fscache_rq {
37 struct address_space *mapping; /* The mapping being accessed */
38 loff_t start; /* Start position */
39 size_t len; /* Length of the request */
40 size_t submitted; /* Length of submitted */
41 short error; /* 0 or error that occurred */
42 refcount_t ref;
43};
44
45static bool erofs_fscache_io_put(struct erofs_fscache_io *io)
46{
47 if (!refcount_dec_and_test(&io->ref))
48 return false;
49 if (io->cres.ops)
50 io->cres.ops->end_operation(&io->cres);
51 kfree(io);
52 return true;
53}
54
55static void erofs_fscache_req_complete(struct erofs_fscache_rq *req)
56{
57 struct folio *folio;
58 bool failed = req->error;
59 pgoff_t start_page = req->start / PAGE_SIZE;
60 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
61
62 XA_STATE(xas, &req->mapping->i_pages, start_page);
63
64 rcu_read_lock();
65 xas_for_each(&xas, folio, last_page) {
66 if (xas_retry(&xas, folio))
67 continue;
68 if (!failed)
69 folio_mark_uptodate(folio);
70 folio_unlock(folio);
71 }
72 rcu_read_unlock();
73}
74
75static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
76{
77 if (!refcount_dec_and_test(&req->ref))
78 return;
79 erofs_fscache_req_complete(req);
80 kfree(req);
81}
82
83static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping,
84 loff_t start, size_t len)
85{
86 struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL);
87
88 if (!req)
89 return NULL;
90 req->mapping = mapping;
91 req->start = start;
92 req->len = len;
93 refcount_set(&req->ref, 1);
94 return req;
95}
96
97static void erofs_fscache_req_io_put(struct erofs_fscache_io *io)
98{
99 struct erofs_fscache_rq *req = io->private;
100
101 if (erofs_fscache_io_put(io))
102 erofs_fscache_req_put(req);
103}
104
105static void erofs_fscache_req_end_io(void *priv,
106 ssize_t transferred_or_error, bool was_async)
107{
108 struct erofs_fscache_io *io = priv;
109 struct erofs_fscache_rq *req = io->private;
110
111 if (IS_ERR_VALUE(transferred_or_error))
112 req->error = transferred_or_error;
113 erofs_fscache_req_io_put(io);
114}
115
116static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req)
117{
118 struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL);
119
120 if (!io)
121 return NULL;
122 io->end_io = erofs_fscache_req_end_io;
123 io->private = req;
124 refcount_inc(&req->ref);
125 refcount_set(&io->ref, 1);
126 return io;
127}
128
129/*
130 * Read data from fscache described by cookie at pstart physical address
131 * offset, and fill the read data into buffer described by io->iter.
132 */
133static int erofs_fscache_read_io_async(struct fscache_cookie *cookie,
134 loff_t pstart, struct erofs_fscache_io *io)
135{
136 enum netfs_io_source source;
137 struct netfs_cache_resources *cres = &io->cres;
138 struct iov_iter *iter = &io->iter;
139 int ret;
140
141 ret = fscache_begin_read_operation(cres, cookie);
142 if (ret)
143 return ret;
144
145 while (iov_iter_count(iter)) {
146 size_t orig_count = iov_iter_count(iter), len = orig_count;
147 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
148
149 source = cres->ops->prepare_ondemand_read(cres,
150 pstart, &len, LLONG_MAX, &flags, 0);
151 if (WARN_ON(len == 0))
152 source = NETFS_INVALID_READ;
153 if (source != NETFS_READ_FROM_CACHE) {
154 erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source);
155 return -EIO;
156 }
157
158 iov_iter_truncate(iter, len);
159 refcount_inc(&io->ref);
160 ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL,
161 io->end_io, io);
162 if (ret == -EIOCBQUEUED)
163 ret = 0;
164 if (ret) {
165 erofs_err(NULL, "fscache_read failed (ret %d)", ret);
166 return ret;
167 }
168 if (WARN_ON(iov_iter_count(iter)))
169 return -EIO;
170
171 iov_iter_reexpand(iter, orig_count - len);
172 pstart += len;
173 }
174 return 0;
175}
176
177struct erofs_fscache_bio {
178 struct erofs_fscache_io io;
179 struct bio bio; /* w/o bdev to share bio_add_page/endio() */
180 struct bio_vec bvecs[BIO_MAX_VECS];
181};
182
183static void erofs_fscache_bio_endio(void *priv,
184 ssize_t transferred_or_error, bool was_async)
185{
186 struct erofs_fscache_bio *io = priv;
187
188 if (IS_ERR_VALUE(transferred_or_error))
189 io->bio.bi_status = errno_to_blk_status(transferred_or_error);
190 io->bio.bi_end_io(&io->bio);
191 BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0);
192 erofs_fscache_io_put(&io->io);
193}
194
195struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
196{
197 struct erofs_fscache_bio *io;
198
199 io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
200 bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
201 io->io.private = mdev->m_fscache->cookie;
202 io->io.end_io = erofs_fscache_bio_endio;
203 refcount_set(&io->io.ref, 1);
204 return &io->bio;
205}
206
207void erofs_fscache_submit_bio(struct bio *bio)
208{
209 struct erofs_fscache_bio *io = container_of(bio,
210 struct erofs_fscache_bio, bio);
211 int ret;
212
213 iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt,
214 bio->bi_iter.bi_size);
215 ret = erofs_fscache_read_io_async(io->io.private,
216 bio->bi_iter.bi_sector << 9, &io->io);
217 erofs_fscache_io_put(&io->io);
218 if (!ret)
219 return;
220 bio->bi_status = errno_to_blk_status(ret);
221 bio->bi_end_io(bio);
222}
223
224static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
225{
226 struct erofs_fscache *ctx = folio->mapping->host->i_private;
227 int ret = -ENOMEM;
228 struct erofs_fscache_rq *req;
229 struct erofs_fscache_io *io;
230
231 req = erofs_fscache_req_alloc(folio->mapping,
232 folio_pos(folio), folio_size(folio));
233 if (!req) {
234 folio_unlock(folio);
235 return ret;
236 }
237
238 io = erofs_fscache_req_io_alloc(req);
239 if (!io) {
240 req->error = ret;
241 goto out;
242 }
243 iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages,
244 folio_pos(folio), folio_size(folio));
245
246 ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io);
247 if (ret)
248 req->error = ret;
249
250 erofs_fscache_req_io_put(io);
251out:
252 erofs_fscache_req_put(req);
253 return ret;
254}
255
256static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
257{
258 struct address_space *mapping = req->mapping;
259 struct inode *inode = mapping->host;
260 struct super_block *sb = inode->i_sb;
261 struct erofs_fscache_io *io;
262 struct erofs_map_blocks map;
263 struct erofs_map_dev mdev;
264 loff_t pos = req->start + req->submitted;
265 size_t count;
266 int ret;
267
268 map.m_la = pos;
269 ret = erofs_map_blocks(inode, &map);
270 if (ret)
271 return ret;
272
273 if (map.m_flags & EROFS_MAP_META) {
274 struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
275 struct iov_iter iter;
276 erofs_blk_t blknr;
277 size_t offset, size;
278 void *src;
279
280 /* For tail packing layout, the offset may be non-zero. */
281 offset = erofs_blkoff(sb, map.m_pa);
282 blknr = erofs_blknr(sb, map.m_pa);
283 size = map.m_llen;
284
285 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
286 if (IS_ERR(src))
287 return PTR_ERR(src);
288
289 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
290 if (copy_to_iter(src + offset, size, &iter) != size) {
291 erofs_put_metabuf(&buf);
292 return -EFAULT;
293 }
294 iov_iter_zero(PAGE_SIZE - size, &iter);
295 erofs_put_metabuf(&buf);
296 req->submitted += PAGE_SIZE;
297 return 0;
298 }
299
300 count = req->len - req->submitted;
301 if (!(map.m_flags & EROFS_MAP_MAPPED)) {
302 struct iov_iter iter;
303
304 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
305 iov_iter_zero(count, &iter);
306 req->submitted += count;
307 return 0;
308 }
309
310 count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
311 DBG_BUGON(!count || count % PAGE_SIZE);
312
313 mdev = (struct erofs_map_dev) {
314 .m_deviceid = map.m_deviceid,
315 .m_pa = map.m_pa,
316 };
317 ret = erofs_map_dev(sb, &mdev);
318 if (ret)
319 return ret;
320
321 io = erofs_fscache_req_io_alloc(req);
322 if (!io)
323 return -ENOMEM;
324 iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
325 ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
326 mdev.m_pa + (pos - map.m_la), io);
327 erofs_fscache_req_io_put(io);
328
329 req->submitted += count;
330 return ret;
331}
332
333static int erofs_fscache_data_read(struct erofs_fscache_rq *req)
334{
335 int ret;
336
337 do {
338 ret = erofs_fscache_data_read_slice(req);
339 if (ret)
340 req->error = ret;
341 } while (!ret && req->submitted < req->len);
342 return ret;
343}
344
345static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
346{
347 struct erofs_fscache_rq *req;
348 int ret;
349
350 req = erofs_fscache_req_alloc(folio->mapping,
351 folio_pos(folio), folio_size(folio));
352 if (!req) {
353 folio_unlock(folio);
354 return -ENOMEM;
355 }
356
357 ret = erofs_fscache_data_read(req);
358 erofs_fscache_req_put(req);
359 return ret;
360}
361
362static void erofs_fscache_readahead(struct readahead_control *rac)
363{
364 struct erofs_fscache_rq *req;
365
366 if (!readahead_count(rac))
367 return;
368
369 req = erofs_fscache_req_alloc(rac->mapping,
370 readahead_pos(rac), readahead_length(rac));
371 if (!req)
372 return;
373
374 /* The request completion will drop refs on the folios. */
375 while (readahead_folio(rac))
376 ;
377
378 erofs_fscache_data_read(req);
379 erofs_fscache_req_put(req);
380}
381
382static const struct address_space_operations erofs_fscache_meta_aops = {
383 .read_folio = erofs_fscache_meta_read_folio,
384};
385
386const struct address_space_operations erofs_fscache_access_aops = {
387 .read_folio = erofs_fscache_read_folio,
388 .readahead = erofs_fscache_readahead,
389};
390
391static void erofs_fscache_domain_put(struct erofs_domain *domain)
392{
393 mutex_lock(&erofs_domain_list_lock);
394 if (refcount_dec_and_test(&domain->ref)) {
395 list_del(&domain->list);
396 if (list_empty(&erofs_domain_list)) {
397 kern_unmount(erofs_pseudo_mnt);
398 erofs_pseudo_mnt = NULL;
399 }
400 fscache_relinquish_volume(domain->volume, NULL, false);
401 mutex_unlock(&erofs_domain_list_lock);
402 kfree(domain->domain_id);
403 kfree(domain);
404 return;
405 }
406 mutex_unlock(&erofs_domain_list_lock);
407}
408
409static int erofs_fscache_register_volume(struct super_block *sb)
410{
411 struct erofs_sb_info *sbi = EROFS_SB(sb);
412 char *domain_id = sbi->domain_id;
413 struct fscache_volume *volume;
414 char *name;
415 int ret = 0;
416
417 name = kasprintf(GFP_KERNEL, "erofs,%s",
418 domain_id ? domain_id : sbi->fsid);
419 if (!name)
420 return -ENOMEM;
421
422 volume = fscache_acquire_volume(name, NULL, NULL, 0);
423 if (IS_ERR_OR_NULL(volume)) {
424 erofs_err(sb, "failed to register volume for %s", name);
425 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
426 volume = NULL;
427 }
428
429 sbi->volume = volume;
430 kfree(name);
431 return ret;
432}
433
434static int erofs_fscache_init_domain(struct super_block *sb)
435{
436 int err;
437 struct erofs_domain *domain;
438 struct erofs_sb_info *sbi = EROFS_SB(sb);
439
440 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
441 if (!domain)
442 return -ENOMEM;
443
444 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
445 if (!domain->domain_id) {
446 kfree(domain);
447 return -ENOMEM;
448 }
449
450 err = erofs_fscache_register_volume(sb);
451 if (err)
452 goto out;
453
454 if (!erofs_pseudo_mnt) {
455 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
456 if (IS_ERR(mnt)) {
457 err = PTR_ERR(mnt);
458 goto out;
459 }
460 erofs_pseudo_mnt = mnt;
461 }
462
463 domain->volume = sbi->volume;
464 refcount_set(&domain->ref, 1);
465 list_add(&domain->list, &erofs_domain_list);
466 sbi->domain = domain;
467 return 0;
468out:
469 kfree(domain->domain_id);
470 kfree(domain);
471 return err;
472}
473
474static int erofs_fscache_register_domain(struct super_block *sb)
475{
476 int err;
477 struct erofs_domain *domain;
478 struct erofs_sb_info *sbi = EROFS_SB(sb);
479
480 mutex_lock(&erofs_domain_list_lock);
481 list_for_each_entry(domain, &erofs_domain_list, list) {
482 if (!strcmp(domain->domain_id, sbi->domain_id)) {
483 sbi->domain = domain;
484 sbi->volume = domain->volume;
485 refcount_inc(&domain->ref);
486 mutex_unlock(&erofs_domain_list_lock);
487 return 0;
488 }
489 }
490 err = erofs_fscache_init_domain(sb);
491 mutex_unlock(&erofs_domain_list_lock);
492 return err;
493}
494
495static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
496 char *name, unsigned int flags)
497{
498 struct fscache_volume *volume = EROFS_SB(sb)->volume;
499 struct erofs_fscache *ctx;
500 struct fscache_cookie *cookie;
501 struct super_block *isb;
502 struct inode *inode;
503 int ret;
504
505 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
506 if (!ctx)
507 return ERR_PTR(-ENOMEM);
508 INIT_LIST_HEAD(&ctx->node);
509 refcount_set(&ctx->ref, 1);
510
511 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
512 name, strlen(name), NULL, 0, 0);
513 if (!cookie) {
514 erofs_err(sb, "failed to get cookie for %s", name);
515 ret = -EINVAL;
516 goto err;
517 }
518 fscache_use_cookie(cookie, false);
519
520 /*
521 * Allocate anonymous inode in global pseudo mount for shareable blobs,
522 * so that they are accessible among erofs fs instances.
523 */
524 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
525 inode = new_inode(isb);
526 if (!inode) {
527 erofs_err(sb, "failed to get anon inode for %s", name);
528 ret = -ENOMEM;
529 goto err_cookie;
530 }
531
532 inode->i_size = OFFSET_MAX;
533 inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
534 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
535 inode->i_blkbits = EROFS_SB(sb)->blkszbits;
536 inode->i_private = ctx;
537
538 ctx->cookie = cookie;
539 ctx->inode = inode;
540 return ctx;
541
542err_cookie:
543 fscache_unuse_cookie(cookie, NULL, NULL);
544 fscache_relinquish_cookie(cookie, false);
545err:
546 kfree(ctx);
547 return ERR_PTR(ret);
548}
549
550static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
551{
552 fscache_unuse_cookie(ctx->cookie, NULL, NULL);
553 fscache_relinquish_cookie(ctx->cookie, false);
554 iput(ctx->inode);
555 kfree(ctx->name);
556 kfree(ctx);
557}
558
559static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
560 char *name, unsigned int flags)
561{
562 struct erofs_fscache *ctx;
563 struct erofs_domain *domain = EROFS_SB(sb)->domain;
564
565 ctx = erofs_fscache_acquire_cookie(sb, name, flags);
566 if (IS_ERR(ctx))
567 return ctx;
568
569 ctx->name = kstrdup(name, GFP_KERNEL);
570 if (!ctx->name) {
571 erofs_fscache_relinquish_cookie(ctx);
572 return ERR_PTR(-ENOMEM);
573 }
574
575 refcount_inc(&domain->ref);
576 ctx->domain = domain;
577 list_add(&ctx->node, &erofs_domain_cookies_list);
578 return ctx;
579}
580
581static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
582 char *name, unsigned int flags)
583{
584 struct erofs_fscache *ctx;
585 struct erofs_domain *domain = EROFS_SB(sb)->domain;
586
587 flags |= EROFS_REG_COOKIE_SHARE;
588 mutex_lock(&erofs_domain_cookies_lock);
589 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
590 if (ctx->domain != domain || strcmp(ctx->name, name))
591 continue;
592 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
593 refcount_inc(&ctx->ref);
594 } else {
595 erofs_err(sb, "%s already exists in domain %s", name,
596 domain->domain_id);
597 ctx = ERR_PTR(-EEXIST);
598 }
599 mutex_unlock(&erofs_domain_cookies_lock);
600 return ctx;
601 }
602 ctx = erofs_domain_init_cookie(sb, name, flags);
603 mutex_unlock(&erofs_domain_cookies_lock);
604 return ctx;
605}
606
607struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
608 char *name,
609 unsigned int flags)
610{
611 if (EROFS_SB(sb)->domain_id)
612 return erofs_domain_register_cookie(sb, name, flags);
613 return erofs_fscache_acquire_cookie(sb, name, flags);
614}
615
616void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
617{
618 struct erofs_domain *domain = NULL;
619
620 if (!ctx)
621 return;
622 if (!ctx->domain)
623 return erofs_fscache_relinquish_cookie(ctx);
624
625 mutex_lock(&erofs_domain_cookies_lock);
626 if (refcount_dec_and_test(&ctx->ref)) {
627 domain = ctx->domain;
628 list_del(&ctx->node);
629 erofs_fscache_relinquish_cookie(ctx);
630 }
631 mutex_unlock(&erofs_domain_cookies_lock);
632 if (domain)
633 erofs_fscache_domain_put(domain);
634}
635
636int erofs_fscache_register_fs(struct super_block *sb)
637{
638 int ret;
639 struct erofs_sb_info *sbi = EROFS_SB(sb);
640 struct erofs_fscache *fscache;
641 unsigned int flags = 0;
642
643 if (sbi->domain_id)
644 ret = erofs_fscache_register_domain(sb);
645 else
646 ret = erofs_fscache_register_volume(sb);
647 if (ret)
648 return ret;
649
650 /*
651 * When shared domain is enabled, using NEED_NOEXIST to guarantee
652 * the primary data blob (aka fsid) is unique in the shared domain.
653 *
654 * For non-shared-domain case, fscache_acquire_volume() invoked by
655 * erofs_fscache_register_volume() has already guaranteed
656 * the uniqueness of primary data blob.
657 *
658 * Acquired domain/volume will be relinquished in kill_sb() on error.
659 */
660 if (sbi->domain_id)
661 flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
662 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
663 if (IS_ERR(fscache))
664 return PTR_ERR(fscache);
665
666 sbi->s_fscache = fscache;
667 return 0;
668}
669
670void erofs_fscache_unregister_fs(struct super_block *sb)
671{
672 struct erofs_sb_info *sbi = EROFS_SB(sb);
673
674 erofs_fscache_unregister_cookie(sbi->s_fscache);
675
676 if (sbi->domain)
677 erofs_fscache_domain_put(sbi->domain);
678 else
679 fscache_relinquish_volume(sbi->volume, NULL, false);
680
681 sbi->s_fscache = NULL;
682 sbi->volume = NULL;
683 sbi->domain = NULL;
684}