super.c - fs/ceph/super.c - Linux diff v3.1 - Bootlin Elixir Cross Referencer

  1
  2#include <linux/ceph/ceph_debug.h>
  3
  4#include <linux/backing-dev.h>
  5#include <linux/ctype.h>
  6#include <linux/fs.h>
  7#include <linux/inet.h>
  8#include <linux/in6.h>
  9#include <linux/module.h>
 10#include <linux/mount.h>
 11#include <linux/parser.h>
 12#include <linux/sched.h>
 13#include <linux/seq_file.h>
 14#include <linux/slab.h>
 15#include <linux/statfs.h>
 16#include <linux/string.h>
 17
 18#include "super.h"
 19#include "mds_client.h"
 20
 21#include <linux/ceph/decode.h>
 22#include <linux/ceph/mon_client.h>
 23#include <linux/ceph/auth.h>
 24#include <linux/ceph/debugfs.h>
 25
 26/*
 27 * Ceph superblock operations
 28 *
 29 * Handle the basics of mounting, unmounting.
 30 */
 31
 32/*
 33 * super ops
 34 */
 35static void ceph_put_super(struct super_block *s)
 36{
 37	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
 38
 39	dout("put_super\n");
 40	ceph_mdsc_close_sessions(fsc->mdsc);
 41
 42	/*
 43	 * ensure we release the bdi before put_anon_super releases
 44	 * the device name.
 45	 */
 46	if (s->s_bdi == &fsc->backing_dev_info) {
 47		bdi_unregister(&fsc->backing_dev_info);
 48		s->s_bdi = NULL;
 49	}
 50
 51	return;
 52}
 53
 54static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 55{
 56	struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
 57	struct ceph_monmap *monmap = fsc->client->monc.monmap;
 58	struct ceph_statfs st;
 59	u64 fsid;
 60	int err;
 61
 62	dout("statfs\n");
 63	err = ceph_monc_do_statfs(&fsc->client->monc, &st);
 64	if (err < 0)
 65		return err;
 66
 67	/* fill in kstatfs */
 68	buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
 69
 70	/*
 71	 * express utilization in terms of large blocks to avoid
 72	 * overflow on 32-bit machines.
 73	 */
 74	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
 75	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
 76	buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
 77	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
 78
 79	buf->f_files = le64_to_cpu(st.num_objects);
 80	buf->f_ffree = -1;
 81	buf->f_namelen = NAME_MAX;
 82	buf->f_frsize = PAGE_CACHE_SIZE;
 83
 84	/* leave fsid little-endian, regardless of host endianness */
 85	fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
 86	buf->f_fsid.val[0] = fsid & 0xffffffff;
 87	buf->f_fsid.val[1] = fsid >> 32;
 88
 89	return 0;
 90}
 91
 92
 93static int ceph_sync_fs(struct super_block *sb, int wait)
 94{
 95	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 96
 97	if (!wait) {
 98		dout("sync_fs (non-blocking)\n");
 99		ceph_flush_dirty_caps(fsc->mdsc);
100		dout("sync_fs (non-blocking) done\n");
101		return 0;
102	}
103
104	dout("sync_fs (blocking)\n");
105	ceph_osdc_sync(&fsc->client->osdc);
106	ceph_mdsc_sync(fsc->mdsc);
107	dout("sync_fs (blocking) done\n");
108	return 0;
109}
110
111/*
112 * mount options
113 */
114enum {
115	Opt_wsize,
116	Opt_rsize,
 
117	Opt_caps_wanted_delay_min,
118	Opt_caps_wanted_delay_max,
119	Opt_cap_release_safety,
120	Opt_readdir_max_entries,
121	Opt_readdir_max_bytes,
122	Opt_congestion_kb,
123	Opt_last_int,
124	/* int args above */
125	Opt_snapdirname,
126	Opt_last_string,
127	/* string args above */
128	Opt_dirstat,
129	Opt_nodirstat,
130	Opt_rbytes,
131	Opt_norbytes,
 
132	Opt_noasyncreaddir,
 
 
133	Opt_ino32,
 
134};
135
136static match_table_t fsopt_tokens = {
137	{Opt_wsize, "wsize=%d"},
138	{Opt_rsize, "rsize=%d"},
 
139	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
140	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
141	{Opt_cap_release_safety, "cap_release_safety=%d"},
142	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
143	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
144	{Opt_congestion_kb, "write_congestion_kb=%d"},
145	/* int args above */
146	{Opt_snapdirname, "snapdirname=%s"},
147	/* string args above */
148	{Opt_dirstat, "dirstat"},
149	{Opt_nodirstat, "nodirstat"},
150	{Opt_rbytes, "rbytes"},
151	{Opt_norbytes, "norbytes"},
 
152	{Opt_noasyncreaddir, "noasyncreaddir"},
 
 
153	{Opt_ino32, "ino32"},
 
154	{-1, NULL}
155};
156
157static int parse_fsopt_token(char *c, void *private)
158{
159	struct ceph_mount_options *fsopt = private;
160	substring_t argstr[MAX_OPT_ARGS];
161	int token, intval, ret;
162
163	token = match_token((char *)c, fsopt_tokens, argstr);
164	if (token < 0)
165		return -EINVAL;
166
167	if (token < Opt_last_int) {
168		ret = match_int(&argstr[0], &intval);
169		if (ret < 0) {
170			pr_err("bad mount option arg (not int) "
171			       "at '%s'\n", c);
172			return ret;
173		}
174		dout("got int token %d val %d\n", token, intval);
175	} else if (token > Opt_last_int && token < Opt_last_string) {
176		dout("got string token %d val %s\n", token,
177		     argstr[0].from);
178	} else {
179		dout("got token %d\n", token);
180	}
181
182	switch (token) {
183	case Opt_snapdirname:
184		kfree(fsopt->snapdir_name);
185		fsopt->snapdir_name = kstrndup(argstr[0].from,
186					       argstr[0].to-argstr[0].from,
187					       GFP_KERNEL);
188		if (!fsopt->snapdir_name)
189			return -ENOMEM;
190		break;
191
192		/* misc */
193	case Opt_wsize:
194		fsopt->wsize = intval;
195		break;
196	case Opt_rsize:
197		fsopt->rsize = intval;
198		break;
 
 
 
199	case Opt_caps_wanted_delay_min:
200		fsopt->caps_wanted_delay_min = intval;
201		break;
202	case Opt_caps_wanted_delay_max:
203		fsopt->caps_wanted_delay_max = intval;
204		break;
205	case Opt_readdir_max_entries:
206		fsopt->max_readdir = intval;
207		break;
208	case Opt_readdir_max_bytes:
209		fsopt->max_readdir_bytes = intval;
210		break;
211	case Opt_congestion_kb:
212		fsopt->congestion_kb = intval;
213		break;
214	case Opt_dirstat:
215		fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
216		break;
217	case Opt_nodirstat:
218		fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
219		break;
220	case Opt_rbytes:
221		fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
222		break;
223	case Opt_norbytes:
224		fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
225		break;
 
 
 
226	case Opt_noasyncreaddir:
227		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
228		break;
 
 
 
 
 
 
229	case Opt_ino32:
230		fsopt->flags |= CEPH_MOUNT_OPT_INO32;
231		break;
 
 
 
232	default:
233		BUG_ON(token);
234	}
235	return 0;
236}
237
238static void destroy_mount_options(struct ceph_mount_options *args)
239{
240	dout("destroy_mount_options %p\n", args);
241	kfree(args->snapdir_name);
242	kfree(args);
243}
244
245static int strcmp_null(const char *s1, const char *s2)
246{
247	if (!s1 && !s2)
248		return 0;
249	if (s1 && !s2)
250		return -1;
251	if (!s1 && s2)
252		return 1;
253	return strcmp(s1, s2);
254}
255
256static int compare_mount_options(struct ceph_mount_options *new_fsopt,
257				 struct ceph_options *new_opt,
258				 struct ceph_fs_client *fsc)
259{
260	struct ceph_mount_options *fsopt1 = new_fsopt;
261	struct ceph_mount_options *fsopt2 = fsc->mount_options;
262	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
263	int ret;
264
265	ret = memcmp(fsopt1, fsopt2, ofs);
266	if (ret)
267		return ret;
268
269	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
270	if (ret)
271		return ret;
272
273	return ceph_compare_options(new_opt, fsc->client);
274}
275
276static int parse_mount_options(struct ceph_mount_options **pfsopt,
277			       struct ceph_options **popt,
278			       int flags, char *options,
279			       const char *dev_name,
280			       const char **path)
281{
282	struct ceph_mount_options *fsopt;
283	const char *dev_name_end;
284	int err = -ENOMEM;
285
286	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
287	if (!fsopt)
288		return -ENOMEM;
289
290	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
291
292        fsopt->sb_flags = flags;
293        fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
294
295        fsopt->rsize = CEPH_RSIZE_DEFAULT;
296        fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
 
297	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
298	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
299        fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
300        fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
301        fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
302        fsopt->congestion_kb = default_congestion_kb();
303	
304        /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
305        err = -EINVAL;
306        if (!dev_name)
307                goto out;
308        *path = strstr(dev_name, ":/");
309        if (*path == NULL) {
310                pr_err("device name is missing path (no :/ in %s)\n",
311                       dev_name);
312                goto out;
313        }
314	dev_name_end = *path;
315	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
316
317	/* path on server */
318	*path += 2;
319	dout("server path '%s'\n", *path);
320
321	err = ceph_parse_options(popt, options, dev_name, dev_name_end,
322				 parse_fsopt_token, (void *)fsopt);
323	if (err)
 
324		goto out;
 
325
326	/* success */
327	*pfsopt = fsopt;
328	return 0;
329
330out:
331	destroy_mount_options(fsopt);
332	return err;
333}
334
335/**
336 * ceph_show_options - Show mount options in /proc/mounts
337 * @m: seq_file to write to
338 * @mnt: mount descriptor
339 */
340static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
341{
342	struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
343	struct ceph_mount_options *fsopt = fsc->mount_options;
344	struct ceph_options *opt = fsc->client->options;
345
346	if (opt->flags & CEPH_OPT_FSID)
347		seq_printf(m, ",fsid=%pU", &opt->fsid);
348	if (opt->flags & CEPH_OPT_NOSHARE)
349		seq_puts(m, ",noshare");
350	if (opt->flags & CEPH_OPT_NOCRC)
351		seq_puts(m, ",nocrc");
352
353	if (opt->name)
354		seq_printf(m, ",name=%s", opt->name);
355	if (opt->key)
356		seq_puts(m, ",secret=<hidden>");
357
358	if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
359		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
360	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
361		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
362	if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
363		seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
364	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
365		seq_printf(m, ",osdkeepalivetimeout=%d",
366			   opt->osd_keepalive_timeout);
367
368	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
369		seq_puts(m, ",dirstat");
370	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
371		seq_puts(m, ",norbytes");
372	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
373		seq_puts(m, ",noasyncreaddir");
 
 
 
 
374
375	if (fsopt->wsize)
376		seq_printf(m, ",wsize=%d", fsopt->wsize);
377	if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
378		seq_printf(m, ",rsize=%d", fsopt->rsize);
 
 
379	if (fsopt->congestion_kb != default_congestion_kb())
380		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
381	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
382		seq_printf(m, ",caps_wanted_delay_min=%d",
383			 fsopt->caps_wanted_delay_min);
384	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
385		seq_printf(m, ",caps_wanted_delay_max=%d",
386			   fsopt->caps_wanted_delay_max);
387	if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
388		seq_printf(m, ",cap_release_safety=%d",
389			   fsopt->cap_release_safety);
390	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
391		seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
392	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
393		seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
394	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
395		seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
396	return 0;
397}
398
399/*
400 * handle any mon messages the standard library doesn't understand.
401 * return error if we don't either.
402 */
403static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
404{
405	struct ceph_fs_client *fsc = client->private;
406	int type = le16_to_cpu(msg->hdr.type);
407
408	switch (type) {
409	case CEPH_MSG_MDS_MAP:
410		ceph_mdsc_handle_map(fsc->mdsc, msg);
411		return 0;
412
413	default:
414		return -1;
415	}
416}
417
418/*
419 * create a new fs client
420 */
421struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
422					struct ceph_options *opt)
423{
424	struct ceph_fs_client *fsc;
 
 
 
 
425	int err = -ENOMEM;
426
427	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
428	if (!fsc)
429		return ERR_PTR(-ENOMEM);
430
431	fsc->client = ceph_create_client(opt, fsc);
 
432	if (IS_ERR(fsc->client)) {
433		err = PTR_ERR(fsc->client);
434		goto fail;
435	}
436	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
437	fsc->client->supported_features |= CEPH_FEATURE_FLOCK |
438		CEPH_FEATURE_DIRLAYOUTHASH;
439	fsc->client->monc.want_mdsmap = 1;
440
441	fsc->mount_options = fsopt;
442
443	fsc->sb = NULL;
444	fsc->mount_state = CEPH_MOUNT_MOUNTING;
445
446	atomic_long_set(&fsc->writeback_count, 0);
447
448	err = bdi_init(&fsc->backing_dev_info);
449	if (err < 0)
450		goto fail_client;
451
452	err = -ENOMEM;
453	/*
454	 * The number of concurrent works can be high but they don't need
455	 * to be processed in parallel, limit concurrency.
456	 */
457	fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
458	if (fsc->wb_wq == NULL)
459		goto fail_bdi;
460	fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
461	if (fsc->pg_inv_wq == NULL)
462		goto fail_wb_wq;
463	fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
464	if (fsc->trunc_wq == NULL)
465		goto fail_pg_inv_wq;
466
467	/* set up mempools */
468	err = -ENOMEM;
469	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
470			      fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
471	if (!fsc->wb_pagevec_pool)
472		goto fail_trunc_wq;
473
474	/* caps */
475	fsc->min_caps = fsopt->max_readdir;
476
477	return fsc;
478
479fail_trunc_wq:
480	destroy_workqueue(fsc->trunc_wq);
481fail_pg_inv_wq:
482	destroy_workqueue(fsc->pg_inv_wq);
483fail_wb_wq:
484	destroy_workqueue(fsc->wb_wq);
485fail_bdi:
486	bdi_destroy(&fsc->backing_dev_info);
487fail_client:
488	ceph_destroy_client(fsc->client);
489fail:
490	kfree(fsc);
491	return ERR_PTR(err);
492}
493
494void destroy_fs_client(struct ceph_fs_client *fsc)
495{
496	dout("destroy_fs_client %p\n", fsc);
497
498	destroy_workqueue(fsc->wb_wq);
499	destroy_workqueue(fsc->pg_inv_wq);
500	destroy_workqueue(fsc->trunc_wq);
501
502	bdi_destroy(&fsc->backing_dev_info);
503
504	mempool_destroy(fsc->wb_pagevec_pool);
505
506	destroy_mount_options(fsc->mount_options);
507
508	ceph_fs_debugfs_cleanup(fsc);
509
510	ceph_destroy_client(fsc->client);
511
512	kfree(fsc);
513	dout("destroy_fs_client %p done\n", fsc);
514}
515
516/*
517 * caches
518 */
519struct kmem_cache *ceph_inode_cachep;
520struct kmem_cache *ceph_cap_cachep;
521struct kmem_cache *ceph_dentry_cachep;
522struct kmem_cache *ceph_file_cachep;
523
524static void ceph_inode_init_once(void *foo)
525{
526	struct ceph_inode_info *ci = foo;
527	inode_init_once(&ci->vfs_inode);
528}
529
530static int __init init_caches(void)
531{
532	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
533				      sizeof(struct ceph_inode_info),
534				      __alignof__(struct ceph_inode_info),
535				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
536				      ceph_inode_init_once);
537	if (ceph_inode_cachep == NULL)
538		return -ENOMEM;
539
540	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
541				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
542	if (ceph_cap_cachep == NULL)
543		goto bad_cap;
544
545	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
546					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
547	if (ceph_dentry_cachep == NULL)
548		goto bad_dentry;
549
550	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
551				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
552	if (ceph_file_cachep == NULL)
553		goto bad_file;
554
555	return 0;
556
557bad_file:
558	kmem_cache_destroy(ceph_dentry_cachep);
559bad_dentry:
560	kmem_cache_destroy(ceph_cap_cachep);
561bad_cap:
562	kmem_cache_destroy(ceph_inode_cachep);
563	return -ENOMEM;
564}
565
566static void destroy_caches(void)
567{
568	kmem_cache_destroy(ceph_inode_cachep);
569	kmem_cache_destroy(ceph_cap_cachep);
570	kmem_cache_destroy(ceph_dentry_cachep);
571	kmem_cache_destroy(ceph_file_cachep);
572}
573
574
575/*
576 * ceph_umount_begin - initiate forced umount.  Tear down down the
577 * mount, skipping steps that may hang while waiting for server(s).
578 */
579static void ceph_umount_begin(struct super_block *sb)
580{
581	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
582
583	dout("ceph_umount_begin - starting forced umount\n");
584	if (!fsc)
585		return;
586	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
587	return;
588}
589
590static const struct super_operations ceph_super_ops = {
591	.alloc_inode	= ceph_alloc_inode,
592	.destroy_inode	= ceph_destroy_inode,
593	.write_inode    = ceph_write_inode,
594	.sync_fs        = ceph_sync_fs,
595	.put_super	= ceph_put_super,
596	.show_options   = ceph_show_options,
597	.statfs		= ceph_statfs,
598	.umount_begin   = ceph_umount_begin,
599};
600
601/*
602 * Bootstrap mount by opening the root directory.  Note the mount
603 * @started time from caller, and time out if this takes too long.
604 */
605static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
606				       const char *path,
607				       unsigned long started)
608{
609	struct ceph_mds_client *mdsc = fsc->mdsc;
610	struct ceph_mds_request *req = NULL;
611	int err;
612	struct dentry *root;
613
614	/* open dir */
615	dout("open_root_inode opening '%s'\n", path);
616	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
617	if (IS_ERR(req))
618		return ERR_CAST(req);
619	req->r_path1 = kstrdup(path, GFP_NOFS);
620	req->r_ino1.ino = CEPH_INO_ROOT;
621	req->r_ino1.snap = CEPH_NOSNAP;
622	req->r_started = started;
623	req->r_timeout = fsc->client->options->mount_timeout * HZ;
624	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
625	req->r_num_caps = 2;
626	err = ceph_mdsc_do_request(mdsc, NULL, req);
627	if (err == 0) {
628		dout("open_root_inode success\n");
629		if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
630		    fsc->sb->s_root == NULL)
631			root = d_alloc_root(req->r_target_inode);
632		else
633			root = d_obtain_alias(req->r_target_inode);
634		req->r_target_inode = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
635		dout("open_root_inode success, root dentry is %p\n", root);
636	} else {
637		root = ERR_PTR(err);
638	}
 
639	ceph_mdsc_put_request(req);
640	return root;
641}
642
643
644
645
646/*
647 * mount: join the ceph cluster, and open root directory.
648 */
649static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
650		      const char *path)
651{
652	int err;
653	unsigned long started = jiffies;  /* note the start time */
654	struct dentry *root;
655	int first = 0;   /* first vfsmount for this super_block */
656
657	dout("mount start\n");
658	mutex_lock(&fsc->client->mount_mutex);
659
660	err = __ceph_open_session(fsc->client, started);
661	if (err < 0)
662		goto out;
663
664	dout("mount opening root\n");
665	root = open_root_dentry(fsc, "", started);
666	if (IS_ERR(root)) {
667		err = PTR_ERR(root);
668		goto out;
669	}
670	if (fsc->sb->s_root) {
671		dput(root);
672	} else {
673		fsc->sb->s_root = root;
674		first = 1;
675
676		err = ceph_fs_debugfs_init(fsc);
677		if (err < 0)
678			goto fail;
679	}
680
681	if (path[0] == 0) {
682		dget(root);
683	} else {
684		dout("mount opening base mountpoint\n");
685		root = open_root_dentry(fsc, path, started);
686		if (IS_ERR(root)) {
687			err = PTR_ERR(root);
688			goto fail;
689		}
690	}
691
692	fsc->mount_state = CEPH_MOUNT_MOUNTED;
693	dout("mount success\n");
694	mutex_unlock(&fsc->client->mount_mutex);
695	return root;
696
697out:
698	mutex_unlock(&fsc->client->mount_mutex);
699	return ERR_PTR(err);
700
701fail:
702	if (first) {
703		dput(fsc->sb->s_root);
704		fsc->sb->s_root = NULL;
705	}
706	goto out;
707}
708
709static int ceph_set_super(struct super_block *s, void *data)
710{
711	struct ceph_fs_client *fsc = data;
712	int ret;
713
714	dout("set_super %p data %p\n", s, data);
715
716	s->s_flags = fsc->mount_options->sb_flags;
717	s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
718
719	s->s_fs_info = fsc;
720	fsc->sb = s;
721
722	s->s_op = &ceph_super_ops;
723	s->s_export_op = &ceph_export_ops;
724
725	s->s_time_gran = 1000;  /* 1000 ns == 1 us */
726
727	ret = set_anon_super(s, NULL);  /* what is that second arg for? */
728	if (ret != 0)
729		goto fail;
730
731	return ret;
732
733fail:
734	s->s_fs_info = NULL;
735	fsc->sb = NULL;
736	return ret;
737}
738
739/*
740 * share superblock if same fs AND options
741 */
742static int ceph_compare_super(struct super_block *sb, void *data)
743{
744	struct ceph_fs_client *new = data;
745	struct ceph_mount_options *fsopt = new->mount_options;
746	struct ceph_options *opt = new->client->options;
747	struct ceph_fs_client *other = ceph_sb_to_client(sb);
748
749	dout("ceph_compare_super %p\n", sb);
750
751	if (compare_mount_options(fsopt, opt, other)) {
752		dout("monitor(s)/mount options don't match\n");
753		return 0;
754	}
755	if ((opt->flags & CEPH_OPT_FSID) &&
756	    ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
757		dout("fsid doesn't match\n");
758		return 0;
759	}
760	if (fsopt->sb_flags != other->mount_options->sb_flags) {
761		dout("flags differ\n");
762		return 0;
763	}
764	return 1;
765}
766
767/*
768 * construct our own bdi so we can control readahead, etc.
769 */
770static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
771
772static int ceph_register_bdi(struct super_block *sb,
773			     struct ceph_fs_client *fsc)
774{
775	int err;
776
777	/* set ra_pages based on rsize mount option? */
778	if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
779		fsc->backing_dev_info.ra_pages =
780			(fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
781			>> PAGE_SHIFT;
782	else
783		fsc->backing_dev_info.ra_pages =
784			default_backing_dev_info.ra_pages;
785
786	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
787			   atomic_long_inc_return(&bdi_seq));
788	if (!err)
789		sb->s_bdi = &fsc->backing_dev_info;
790	return err;
791}
792
793static struct dentry *ceph_mount(struct file_system_type *fs_type,
794		       int flags, const char *dev_name, void *data)
795{
796	struct super_block *sb;
797	struct ceph_fs_client *fsc;
798	struct dentry *res;
799	int err;
800	int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
801	const char *path = NULL;
802	struct ceph_mount_options *fsopt = NULL;
803	struct ceph_options *opt = NULL;
804
805	dout("ceph_mount\n");
806	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
807	if (err < 0) {
808		res = ERR_PTR(err);
809		goto out_final;
810	}
811
812	/* create client (which we may/may not use) */
813	fsc = create_fs_client(fsopt, opt);
814	if (IS_ERR(fsc)) {
815		res = ERR_CAST(fsc);
816		destroy_mount_options(fsopt);
817		ceph_destroy_options(opt);
818		goto out_final;
819	}
820
821	err = ceph_mdsc_init(fsc);
822	if (err < 0) {
823		res = ERR_PTR(err);
824		goto out;
825	}
826
827	if (ceph_test_opt(fsc->client, NOSHARE))
828		compare_super = NULL;
829	sb = sget(fs_type, compare_super, ceph_set_super, fsc);
830	if (IS_ERR(sb)) {
831		res = ERR_CAST(sb);
832		goto out;
833	}
834
835	if (ceph_sb_to_client(sb) != fsc) {
836		ceph_mdsc_destroy(fsc);
837		destroy_fs_client(fsc);
838		fsc = ceph_sb_to_client(sb);
839		dout("get_sb got existing client %p\n", fsc);
840	} else {
841		dout("get_sb using new client %p\n", fsc);
842		err = ceph_register_bdi(sb, fsc);
843		if (err < 0) {
844			res = ERR_PTR(err);
845			goto out_splat;
846		}
847	}
848
849	res = ceph_real_mount(fsc, path);
850	if (IS_ERR(res))
851		goto out_splat;
852	dout("root %p inode %p ino %llx.%llx\n", res,
853	     res->d_inode, ceph_vinop(res->d_inode));
854	return res;
855
856out_splat:
857	ceph_mdsc_close_sessions(fsc->mdsc);
858	deactivate_locked_super(sb);
859	goto out_final;
860
861out:
862	ceph_mdsc_destroy(fsc);
863	destroy_fs_client(fsc);
864out_final:
865	dout("ceph_mount fail %ld\n", PTR_ERR(res));
866	return res;
867}
868
869static void ceph_kill_sb(struct super_block *s)
870{
871	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
872	dout("kill_sb %p\n", s);
873	ceph_mdsc_pre_umount(fsc->mdsc);
874	kill_anon_super(s);    /* will call put_super after sb is r/o */
875	ceph_mdsc_destroy(fsc);
876	destroy_fs_client(fsc);
877}
878
879static struct file_system_type ceph_fs_type = {
880	.owner		= THIS_MODULE,
881	.name		= "ceph",
882	.mount		= ceph_mount,
883	.kill_sb	= ceph_kill_sb,
884	.fs_flags	= FS_RENAME_DOES_D_MOVE,
885};
886
887#define _STRINGIFY(x) #x
888#define STRINGIFY(x) _STRINGIFY(x)
889
890static int __init init_ceph(void)
891{
892	int ret = init_caches();
893	if (ret)
894		goto out;
895
 
896	ret = register_filesystem(&ceph_fs_type);
897	if (ret)
898		goto out_icache;
899
900	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
901
902	return 0;
903
904out_icache:
 
905	destroy_caches();
906out:
907	return ret;
908}
909
910static void __exit exit_ceph(void)
911{
912	dout("exit_ceph\n");
913	unregister_filesystem(&ceph_fs_type);
 
914	destroy_caches();
915}
916
917module_init(init_ceph);
918module_exit(exit_ceph);
919
920MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
921MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
922MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
923MODULE_DESCRIPTION("Ceph filesystem for Linux");
924MODULE_LICENSE("GPL");

  1
  2#include <linux/ceph/ceph_debug.h>
  3
  4#include <linux/backing-dev.h>
  5#include <linux/ctype.h>
  6#include <linux/fs.h>
  7#include <linux/inet.h>
  8#include <linux/in6.h>
  9#include <linux/module.h>
 10#include <linux/mount.h>
 11#include <linux/parser.h>
 12#include <linux/sched.h>
 13#include <linux/seq_file.h>
 14#include <linux/slab.h>
 15#include <linux/statfs.h>
 16#include <linux/string.h>
 17
 18#include "super.h"
 19#include "mds_client.h"
 20
 21#include <linux/ceph/decode.h>
 22#include <linux/ceph/mon_client.h>
 23#include <linux/ceph/auth.h>
 24#include <linux/ceph/debugfs.h>
 25
 26/*
 27 * Ceph superblock operations
 28 *
 29 * Handle the basics of mounting, unmounting.
 30 */
 31
 32/*
 33 * super ops
 34 */
 35static void ceph_put_super(struct super_block *s)
 36{
 37	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
 38
 39	dout("put_super\n");
 40	ceph_mdsc_close_sessions(fsc->mdsc);
 41
 42	/*
 43	 * ensure we release the bdi before put_anon_super releases
 44	 * the device name.
 45	 */
 46	if (s->s_bdi == &fsc->backing_dev_info) {
 47		bdi_unregister(&fsc->backing_dev_info);
 48		s->s_bdi = NULL;
 49	}
 50
 51	return;
 52}
 53
 54static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 55{
 56	struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
 57	struct ceph_monmap *monmap = fsc->client->monc.monmap;
 58	struct ceph_statfs st;
 59	u64 fsid;
 60	int err;
 61
 62	dout("statfs\n");
 63	err = ceph_monc_do_statfs(&fsc->client->monc, &st);
 64	if (err < 0)
 65		return err;
 66
 67	/* fill in kstatfs */
 68	buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
 69
 70	/*
 71	 * express utilization in terms of large blocks to avoid
 72	 * overflow on 32-bit machines.
 73	 */
 74	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
 75	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
 76	buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
 77	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
 78
 79	buf->f_files = le64_to_cpu(st.num_objects);
 80	buf->f_ffree = -1;
 81	buf->f_namelen = NAME_MAX;
 82	buf->f_frsize = PAGE_CACHE_SIZE;
 83
 84	/* leave fsid little-endian, regardless of host endianness */
 85	fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
 86	buf->f_fsid.val[0] = fsid & 0xffffffff;
 87	buf->f_fsid.val[1] = fsid >> 32;
 88
 89	return 0;
 90}
 91
 92
 93static int ceph_sync_fs(struct super_block *sb, int wait)
 94{
 95	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 96
 97	if (!wait) {
 98		dout("sync_fs (non-blocking)\n");
 99		ceph_flush_dirty_caps(fsc->mdsc);
100		dout("sync_fs (non-blocking) done\n");
101		return 0;
102	}
103
104	dout("sync_fs (blocking)\n");
105	ceph_osdc_sync(&fsc->client->osdc);
106	ceph_mdsc_sync(fsc->mdsc);
107	dout("sync_fs (blocking) done\n");
108	return 0;
109}
110
111/*
112 * mount options
113 */
114enum {
115	Opt_wsize,
116	Opt_rsize,
117	Opt_rasize,
118	Opt_caps_wanted_delay_min,
119	Opt_caps_wanted_delay_max,
120	Opt_cap_release_safety,
121	Opt_readdir_max_entries,
122	Opt_readdir_max_bytes,
123	Opt_congestion_kb,
124	Opt_last_int,
125	/* int args above */
126	Opt_snapdirname,
127	Opt_last_string,
128	/* string args above */
129	Opt_dirstat,
130	Opt_nodirstat,
131	Opt_rbytes,
132	Opt_norbytes,
133	Opt_asyncreaddir,
134	Opt_noasyncreaddir,
135	Opt_dcache,
136	Opt_nodcache,
137	Opt_ino32,
138	Opt_noino32,
139};
140
141static match_table_t fsopt_tokens = {
142	{Opt_wsize, "wsize=%d"},
143	{Opt_rsize, "rsize=%d"},
144	{Opt_rasize, "rasize=%d"},
145	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
146	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
147	{Opt_cap_release_safety, "cap_release_safety=%d"},
148	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
149	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
150	{Opt_congestion_kb, "write_congestion_kb=%d"},
151	/* int args above */
152	{Opt_snapdirname, "snapdirname=%s"},
153	/* string args above */
154	{Opt_dirstat, "dirstat"},
155	{Opt_nodirstat, "nodirstat"},
156	{Opt_rbytes, "rbytes"},
157	{Opt_norbytes, "norbytes"},
158	{Opt_asyncreaddir, "asyncreaddir"},
159	{Opt_noasyncreaddir, "noasyncreaddir"},
160	{Opt_dcache, "dcache"},
161	{Opt_nodcache, "nodcache"},
162	{Opt_ino32, "ino32"},
163	{Opt_noino32, "noino32"},
164	{-1, NULL}
165};
166
167static int parse_fsopt_token(char *c, void *private)
168{
169	struct ceph_mount_options *fsopt = private;
170	substring_t argstr[MAX_OPT_ARGS];
171	int token, intval, ret;
172
173	token = match_token((char *)c, fsopt_tokens, argstr);
174	if (token < 0)
175		return -EINVAL;
176
177	if (token < Opt_last_int) {
178		ret = match_int(&argstr[0], &intval);
179		if (ret < 0) {
180			pr_err("bad mount option arg (not int) "
181			       "at '%s'\n", c);
182			return ret;
183		}
184		dout("got int token %d val %d\n", token, intval);
185	} else if (token > Opt_last_int && token < Opt_last_string) {
186		dout("got string token %d val %s\n", token,
187		     argstr[0].from);
188	} else {
189		dout("got token %d\n", token);
190	}
191
192	switch (token) {
193	case Opt_snapdirname:
194		kfree(fsopt->snapdir_name);
195		fsopt->snapdir_name = kstrndup(argstr[0].from,
196					       argstr[0].to-argstr[0].from,
197					       GFP_KERNEL);
198		if (!fsopt->snapdir_name)
199			return -ENOMEM;
200		break;
201
202		/* misc */
203	case Opt_wsize:
204		fsopt->wsize = intval;
205		break;
206	case Opt_rsize:
207		fsopt->rsize = intval;
208		break;
209	case Opt_rasize:
210		fsopt->rasize = intval;
211		break;
212	case Opt_caps_wanted_delay_min:
213		fsopt->caps_wanted_delay_min = intval;
214		break;
215	case Opt_caps_wanted_delay_max:
216		fsopt->caps_wanted_delay_max = intval;
217		break;
218	case Opt_readdir_max_entries:
219		fsopt->max_readdir = intval;
220		break;
221	case Opt_readdir_max_bytes:
222		fsopt->max_readdir_bytes = intval;
223		break;
224	case Opt_congestion_kb:
225		fsopt->congestion_kb = intval;
226		break;
227	case Opt_dirstat:
228		fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
229		break;
230	case Opt_nodirstat:
231		fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
232		break;
233	case Opt_rbytes:
234		fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
235		break;
236	case Opt_norbytes:
237		fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
238		break;
239	case Opt_asyncreaddir:
240		fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
241		break;
242	case Opt_noasyncreaddir:
243		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
244		break;
245	case Opt_dcache:
246		fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
247		break;
248	case Opt_nodcache:
249		fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
250		break;
251	case Opt_ino32:
252		fsopt->flags |= CEPH_MOUNT_OPT_INO32;
253		break;
254	case Opt_noino32:
255		fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
256		break;
257	default:
258		BUG_ON(token);
259	}
260	return 0;
261}
262
263static void destroy_mount_options(struct ceph_mount_options *args)
264{
265	dout("destroy_mount_options %p\n", args);
266	kfree(args->snapdir_name);
267	kfree(args);
268}
269
270static int strcmp_null(const char *s1, const char *s2)
271{
272	if (!s1 && !s2)
273		return 0;
274	if (s1 && !s2)
275		return -1;
276	if (!s1 && s2)
277		return 1;
278	return strcmp(s1, s2);
279}
280
281static int compare_mount_options(struct ceph_mount_options *new_fsopt,
282				 struct ceph_options *new_opt,
283				 struct ceph_fs_client *fsc)
284{
285	struct ceph_mount_options *fsopt1 = new_fsopt;
286	struct ceph_mount_options *fsopt2 = fsc->mount_options;
287	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
288	int ret;
289
290	ret = memcmp(fsopt1, fsopt2, ofs);
291	if (ret)
292		return ret;
293
294	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
295	if (ret)
296		return ret;
297
298	return ceph_compare_options(new_opt, fsc->client);
299}
300
301static int parse_mount_options(struct ceph_mount_options **pfsopt,
302			       struct ceph_options **popt,
303			       int flags, char *options,
304			       const char *dev_name,
305			       const char **path)
306{
307	struct ceph_mount_options *fsopt;
308	const char *dev_name_end;
309	int err = -ENOMEM;
310
311	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
312	if (!fsopt)
313		return -ENOMEM;
314
315	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
316
317	fsopt->sb_flags = flags;
318	fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
319
320	fsopt->rsize = CEPH_RSIZE_DEFAULT;
321	fsopt->rasize = CEPH_RASIZE_DEFAULT;
322	fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
323	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
324	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
325	fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
326	fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
327	fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
328	fsopt->congestion_kb = default_congestion_kb();
329
330	/* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
331	err = -EINVAL;
332	if (!dev_name)
333		goto out;
334	*path = strstr(dev_name, ":/");
335	if (*path == NULL) {
336		pr_err("device name is missing path (no :/ in %s)\n",
337				dev_name);
338		goto out;
339	}
340	dev_name_end = *path;
341	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
342
343	/* path on server */
344	*path += 2;
345	dout("server path '%s'\n", *path);
346
347	*popt = ceph_parse_options(options, dev_name, dev_name_end,
348				 parse_fsopt_token, (void *)fsopt);
349	if (IS_ERR(*popt)) {
350		err = PTR_ERR(*popt);
351		goto out;
352	}
353
354	/* success */
355	*pfsopt = fsopt;
356	return 0;
357
358out:
359	destroy_mount_options(fsopt);
360	return err;
361}
362
363/**
364 * ceph_show_options - Show mount options in /proc/mounts
365 * @m: seq_file to write to
366 * @root: root of that (sub)tree
367 */
368static int ceph_show_options(struct seq_file *m, struct dentry *root)
369{
370	struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
371	struct ceph_mount_options *fsopt = fsc->mount_options;
372	struct ceph_options *opt = fsc->client->options;
373
374	if (opt->flags & CEPH_OPT_FSID)
375		seq_printf(m, ",fsid=%pU", &opt->fsid);
376	if (opt->flags & CEPH_OPT_NOSHARE)
377		seq_puts(m, ",noshare");
378	if (opt->flags & CEPH_OPT_NOCRC)
379		seq_puts(m, ",nocrc");
380
381	if (opt->name)
382		seq_printf(m, ",name=%s", opt->name);
383	if (opt->key)
384		seq_puts(m, ",secret=<hidden>");
385
386	if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
387		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
388	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
389		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
390	if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
391		seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
392	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
393		seq_printf(m, ",osdkeepalivetimeout=%d",
394			   opt->osd_keepalive_timeout);
395
396	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
397		seq_puts(m, ",dirstat");
398	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
399		seq_puts(m, ",norbytes");
400	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
401		seq_puts(m, ",noasyncreaddir");
402	if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
403		seq_puts(m, ",dcache");
404	else
405		seq_puts(m, ",nodcache");
406
407	if (fsopt->wsize)
408		seq_printf(m, ",wsize=%d", fsopt->wsize);
409	if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
410		seq_printf(m, ",rsize=%d", fsopt->rsize);
411	if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
412		seq_printf(m, ",rasize=%d", fsopt->rasize);
413	if (fsopt->congestion_kb != default_congestion_kb())
414		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
415	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
416		seq_printf(m, ",caps_wanted_delay_min=%d",
417			 fsopt->caps_wanted_delay_min);
418	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
419		seq_printf(m, ",caps_wanted_delay_max=%d",
420			   fsopt->caps_wanted_delay_max);
421	if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
422		seq_printf(m, ",cap_release_safety=%d",
423			   fsopt->cap_release_safety);
424	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
425		seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
426	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
427		seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
428	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
429		seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
430	return 0;
431}
432
433/*
434 * handle any mon messages the standard library doesn't understand.
435 * return error if we don't either.
436 */
437static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
438{
439	struct ceph_fs_client *fsc = client->private;
440	int type = le16_to_cpu(msg->hdr.type);
441
442	switch (type) {
443	case CEPH_MSG_MDS_MAP:
444		ceph_mdsc_handle_map(fsc->mdsc, msg);
445		return 0;
446
447	default:
448		return -1;
449	}
450}
451
452/*
453 * create a new fs client
454 */
455static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
456					struct ceph_options *opt)
457{
458	struct ceph_fs_client *fsc;
459	const unsigned supported_features =
460		CEPH_FEATURE_FLOCK |
461		CEPH_FEATURE_DIRLAYOUTHASH;
462	const unsigned required_features = 0;
463	int err = -ENOMEM;
464
465	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
466	if (!fsc)
467		return ERR_PTR(-ENOMEM);
468
469	fsc->client = ceph_create_client(opt, fsc, supported_features,
470					 required_features);
471	if (IS_ERR(fsc->client)) {
472		err = PTR_ERR(fsc->client);
473		goto fail;
474	}
475	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
 
 
476	fsc->client->monc.want_mdsmap = 1;
477
478	fsc->mount_options = fsopt;
479
480	fsc->sb = NULL;
481	fsc->mount_state = CEPH_MOUNT_MOUNTING;
482
483	atomic_long_set(&fsc->writeback_count, 0);
484
485	err = bdi_init(&fsc->backing_dev_info);
486	if (err < 0)
487		goto fail_client;
488
489	err = -ENOMEM;
490	/*
491	 * The number of concurrent works can be high but they don't need
492	 * to be processed in parallel, limit concurrency.
493	 */
494	fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
495	if (fsc->wb_wq == NULL)
496		goto fail_bdi;
497	fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
498	if (fsc->pg_inv_wq == NULL)
499		goto fail_wb_wq;
500	fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
501	if (fsc->trunc_wq == NULL)
502		goto fail_pg_inv_wq;
503
504	/* set up mempools */
505	err = -ENOMEM;
506	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
507			      fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
508	if (!fsc->wb_pagevec_pool)
509		goto fail_trunc_wq;
510
511	/* caps */
512	fsc->min_caps = fsopt->max_readdir;
513
514	return fsc;
515
516fail_trunc_wq:
517	destroy_workqueue(fsc->trunc_wq);
518fail_pg_inv_wq:
519	destroy_workqueue(fsc->pg_inv_wq);
520fail_wb_wq:
521	destroy_workqueue(fsc->wb_wq);
522fail_bdi:
523	bdi_destroy(&fsc->backing_dev_info);
524fail_client:
525	ceph_destroy_client(fsc->client);
526fail:
527	kfree(fsc);
528	return ERR_PTR(err);
529}
530
531static void destroy_fs_client(struct ceph_fs_client *fsc)
532{
533	dout("destroy_fs_client %p\n", fsc);
534
535	destroy_workqueue(fsc->wb_wq);
536	destroy_workqueue(fsc->pg_inv_wq);
537	destroy_workqueue(fsc->trunc_wq);
538
539	bdi_destroy(&fsc->backing_dev_info);
540
541	mempool_destroy(fsc->wb_pagevec_pool);
542
543	destroy_mount_options(fsc->mount_options);
544
545	ceph_fs_debugfs_cleanup(fsc);
546
547	ceph_destroy_client(fsc->client);
548
549	kfree(fsc);
550	dout("destroy_fs_client %p done\n", fsc);
551}
552
553/*
554 * caches
555 */
556struct kmem_cache *ceph_inode_cachep;
557struct kmem_cache *ceph_cap_cachep;
558struct kmem_cache *ceph_dentry_cachep;
559struct kmem_cache *ceph_file_cachep;
560
561static void ceph_inode_init_once(void *foo)
562{
563	struct ceph_inode_info *ci = foo;
564	inode_init_once(&ci->vfs_inode);
565}
566
567static int __init init_caches(void)
568{
569	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
570				      sizeof(struct ceph_inode_info),
571				      __alignof__(struct ceph_inode_info),
572				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
573				      ceph_inode_init_once);
574	if (ceph_inode_cachep == NULL)
575		return -ENOMEM;
576
577	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
578				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
579	if (ceph_cap_cachep == NULL)
580		goto bad_cap;
581
582	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
583					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
584	if (ceph_dentry_cachep == NULL)
585		goto bad_dentry;
586
587	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
588				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
589	if (ceph_file_cachep == NULL)
590		goto bad_file;
591
592	return 0;
593
594bad_file:
595	kmem_cache_destroy(ceph_dentry_cachep);
596bad_dentry:
597	kmem_cache_destroy(ceph_cap_cachep);
598bad_cap:
599	kmem_cache_destroy(ceph_inode_cachep);
600	return -ENOMEM;
601}
602
603static void destroy_caches(void)
604{
605	kmem_cache_destroy(ceph_inode_cachep);
606	kmem_cache_destroy(ceph_cap_cachep);
607	kmem_cache_destroy(ceph_dentry_cachep);
608	kmem_cache_destroy(ceph_file_cachep);
609}
610
611
612/*
613 * ceph_umount_begin - initiate forced umount.  Tear down down the
614 * mount, skipping steps that may hang while waiting for server(s).
615 */
616static void ceph_umount_begin(struct super_block *sb)
617{
618	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
619
620	dout("ceph_umount_begin - starting forced umount\n");
621	if (!fsc)
622		return;
623	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
624	return;
625}
626
627static const struct super_operations ceph_super_ops = {
628	.alloc_inode	= ceph_alloc_inode,
629	.destroy_inode	= ceph_destroy_inode,
630	.write_inode    = ceph_write_inode,
631	.sync_fs        = ceph_sync_fs,
632	.put_super	= ceph_put_super,
633	.show_options   = ceph_show_options,
634	.statfs		= ceph_statfs,
635	.umount_begin   = ceph_umount_begin,
636};
637
638/*
639 * Bootstrap mount by opening the root directory.  Note the mount
640 * @started time from caller, and time out if this takes too long.
641 */
642static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
643				       const char *path,
644				       unsigned long started)
645{
646	struct ceph_mds_client *mdsc = fsc->mdsc;
647	struct ceph_mds_request *req = NULL;
648	int err;
649	struct dentry *root;
650
651	/* open dir */
652	dout("open_root_inode opening '%s'\n", path);
653	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
654	if (IS_ERR(req))
655		return ERR_CAST(req);
656	req->r_path1 = kstrdup(path, GFP_NOFS);
657	req->r_ino1.ino = CEPH_INO_ROOT;
658	req->r_ino1.snap = CEPH_NOSNAP;
659	req->r_started = started;
660	req->r_timeout = fsc->client->options->mount_timeout * HZ;
661	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
662	req->r_num_caps = 2;
663	err = ceph_mdsc_do_request(mdsc, NULL, req);
664	if (err == 0) {
665		struct inode *inode = req->r_target_inode;
 
 
 
 
 
666		req->r_target_inode = NULL;
667		dout("open_root_inode success\n");
668		if (ceph_ino(inode) == CEPH_INO_ROOT &&
669		    fsc->sb->s_root == NULL) {
670			root = d_make_root(inode);
671			if (!root) {
672				root = ERR_PTR(-ENOMEM);
673				goto out;
674			}
675		} else {
676			root = d_obtain_alias(inode);
677		}
678		ceph_init_dentry(root);
679		dout("open_root_inode success, root dentry is %p\n", root);
680	} else {
681		root = ERR_PTR(err);
682	}
683out:
684	ceph_mdsc_put_request(req);
685	return root;
686}
687
688
689
690
691/*
692 * mount: join the ceph cluster, and open root directory.
693 */
694static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
695		      const char *path)
696{
697	int err;
698	unsigned long started = jiffies;  /* note the start time */
699	struct dentry *root;
700	int first = 0;   /* first vfsmount for this super_block */
701
702	dout("mount start\n");
703	mutex_lock(&fsc->client->mount_mutex);
704
705	err = __ceph_open_session(fsc->client, started);
706	if (err < 0)
707		goto out;
708
709	dout("mount opening root\n");
710	root = open_root_dentry(fsc, "", started);
711	if (IS_ERR(root)) {
712		err = PTR_ERR(root);
713		goto out;
714	}
715	if (fsc->sb->s_root) {
716		dput(root);
717	} else {
718		fsc->sb->s_root = root;
719		first = 1;
720
721		err = ceph_fs_debugfs_init(fsc);
722		if (err < 0)
723			goto fail;
724	}
725
726	if (path[0] == 0) {
727		dget(root);
728	} else {
729		dout("mount opening base mountpoint\n");
730		root = open_root_dentry(fsc, path, started);
731		if (IS_ERR(root)) {
732			err = PTR_ERR(root);
733			goto fail;
734		}
735	}
736
737	fsc->mount_state = CEPH_MOUNT_MOUNTED;
738	dout("mount success\n");
739	mutex_unlock(&fsc->client->mount_mutex);
740	return root;
741
742out:
743	mutex_unlock(&fsc->client->mount_mutex);
744	return ERR_PTR(err);
745
746fail:
747	if (first) {
748		dput(fsc->sb->s_root);
749		fsc->sb->s_root = NULL;
750	}
751	goto out;
752}
753
754static int ceph_set_super(struct super_block *s, void *data)
755{
756	struct ceph_fs_client *fsc = data;
757	int ret;
758
759	dout("set_super %p data %p\n", s, data);
760
761	s->s_flags = fsc->mount_options->sb_flags;
762	s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
763
764	s->s_fs_info = fsc;
765	fsc->sb = s;
766
767	s->s_op = &ceph_super_ops;
768	s->s_export_op = &ceph_export_ops;
769
770	s->s_time_gran = 1000;  /* 1000 ns == 1 us */
771
772	ret = set_anon_super(s, NULL);  /* what is that second arg for? */
773	if (ret != 0)
774		goto fail;
775
776	return ret;
777
778fail:
779	s->s_fs_info = NULL;
780	fsc->sb = NULL;
781	return ret;
782}
783
784/*
785 * share superblock if same fs AND options
786 */
787static int ceph_compare_super(struct super_block *sb, void *data)
788{
789	struct ceph_fs_client *new = data;
790	struct ceph_mount_options *fsopt = new->mount_options;
791	struct ceph_options *opt = new->client->options;
792	struct ceph_fs_client *other = ceph_sb_to_client(sb);
793
794	dout("ceph_compare_super %p\n", sb);
795
796	if (compare_mount_options(fsopt, opt, other)) {
797		dout("monitor(s)/mount options don't match\n");
798		return 0;
799	}
800	if ((opt->flags & CEPH_OPT_FSID) &&
801	    ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
802		dout("fsid doesn't match\n");
803		return 0;
804	}
805	if (fsopt->sb_flags != other->mount_options->sb_flags) {
806		dout("flags differ\n");
807		return 0;
808	}
809	return 1;
810}
811
812/*
813 * construct our own bdi so we can control readahead, etc.
814 */
815static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
816
817static int ceph_register_bdi(struct super_block *sb,
818			     struct ceph_fs_client *fsc)
819{
820	int err;
821
822	/* set ra_pages based on rasize mount option? */
823	if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
824		fsc->backing_dev_info.ra_pages =
825			(fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
826			>> PAGE_SHIFT;
827	else
828		fsc->backing_dev_info.ra_pages =
829			default_backing_dev_info.ra_pages;
830
831	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
832			   atomic_long_inc_return(&bdi_seq));
833	if (!err)
834		sb->s_bdi = &fsc->backing_dev_info;
835	return err;
836}
837
838static struct dentry *ceph_mount(struct file_system_type *fs_type,
839		       int flags, const char *dev_name, void *data)
840{
841	struct super_block *sb;
842	struct ceph_fs_client *fsc;
843	struct dentry *res;
844	int err;
845	int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
846	const char *path = NULL;
847	struct ceph_mount_options *fsopt = NULL;
848	struct ceph_options *opt = NULL;
849
850	dout("ceph_mount\n");
851	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
852	if (err < 0) {
853		res = ERR_PTR(err);
854		goto out_final;
855	}
856
857	/* create client (which we may/may not use) */
858	fsc = create_fs_client(fsopt, opt);
859	if (IS_ERR(fsc)) {
860		res = ERR_CAST(fsc);
861		destroy_mount_options(fsopt);
862		ceph_destroy_options(opt);
863		goto out_final;
864	}
865
866	err = ceph_mdsc_init(fsc);
867	if (err < 0) {
868		res = ERR_PTR(err);
869		goto out;
870	}
871
872	if (ceph_test_opt(fsc->client, NOSHARE))
873		compare_super = NULL;
874	sb = sget(fs_type, compare_super, ceph_set_super, fsc);
875	if (IS_ERR(sb)) {
876		res = ERR_CAST(sb);
877		goto out;
878	}
879
880	if (ceph_sb_to_client(sb) != fsc) {
881		ceph_mdsc_destroy(fsc);
882		destroy_fs_client(fsc);
883		fsc = ceph_sb_to_client(sb);
884		dout("get_sb got existing client %p\n", fsc);
885	} else {
886		dout("get_sb using new client %p\n", fsc);
887		err = ceph_register_bdi(sb, fsc);
888		if (err < 0) {
889			res = ERR_PTR(err);
890			goto out_splat;
891		}
892	}
893
894	res = ceph_real_mount(fsc, path);
895	if (IS_ERR(res))
896		goto out_splat;
897	dout("root %p inode %p ino %llx.%llx\n", res,
898	     res->d_inode, ceph_vinop(res->d_inode));
899	return res;
900
901out_splat:
902	ceph_mdsc_close_sessions(fsc->mdsc);
903	deactivate_locked_super(sb);
904	goto out_final;
905
906out:
907	ceph_mdsc_destroy(fsc);
908	destroy_fs_client(fsc);
909out_final:
910	dout("ceph_mount fail %ld\n", PTR_ERR(res));
911	return res;
912}
913
914static void ceph_kill_sb(struct super_block *s)
915{
916	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
917	dout("kill_sb %p\n", s);
918	ceph_mdsc_pre_umount(fsc->mdsc);
919	kill_anon_super(s);    /* will call put_super after sb is r/o */
920	ceph_mdsc_destroy(fsc);
921	destroy_fs_client(fsc);
922}
923
924static struct file_system_type ceph_fs_type = {
925	.owner		= THIS_MODULE,
926	.name		= "ceph",
927	.mount		= ceph_mount,
928	.kill_sb	= ceph_kill_sb,
929	.fs_flags	= FS_RENAME_DOES_D_MOVE,
930};
931
932#define _STRINGIFY(x) #x
933#define STRINGIFY(x) _STRINGIFY(x)
934
935static int __init init_ceph(void)
936{
937	int ret = init_caches();
938	if (ret)
939		goto out;
940
941	ceph_xattr_init();
942	ret = register_filesystem(&ceph_fs_type);
943	if (ret)
944		goto out_icache;
945
946	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
947
948	return 0;
949
950out_icache:
951	ceph_xattr_exit();
952	destroy_caches();
953out:
954	return ret;
955}
956
957static void __exit exit_ceph(void)
958{
959	dout("exit_ceph\n");
960	unregister_filesystem(&ceph_fs_type);
961	ceph_xattr_exit();
962	destroy_caches();
963}
964
965module_init(init_ceph);
966module_exit(exit_ceph);
967
968MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
969MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
970MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
971MODULE_DESCRIPTION("Ceph filesystem for Linux");
972MODULE_LICENSE("GPL");