Linux Audio

Check our new training course

Loading...
v4.10.11
 
  1/*
  2 * binfmt_misc.c
  3 *
  4 * Copyright (C) 1997 Richard Günther
  5 *
  6 * binfmt_misc detects binaries via a magic or filename extension and invokes
  7 * a specified wrapper. See Documentation/binfmt_misc.txt for more details.
  8 */
  9
 10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 11
 12#include <linux/kernel.h>
 13#include <linux/module.h>
 14#include <linux/init.h>
 15#include <linux/sched.h>
 16#include <linux/magic.h>
 17#include <linux/binfmts.h>
 18#include <linux/slab.h>
 19#include <linux/ctype.h>
 20#include <linux/string_helpers.h>
 21#include <linux/file.h>
 22#include <linux/pagemap.h>
 23#include <linux/namei.h>
 24#include <linux/mount.h>
 
 25#include <linux/syscalls.h>
 26#include <linux/fs.h>
 27#include <linux/uaccess.h>
 28
 29#include "internal.h"
 30
 31#ifdef DEBUG
 32# define USE_DEBUG 1
 33#else
 34# define USE_DEBUG 0
 35#endif
 36
 37enum {
 38	VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
 39};
 40
 41static LIST_HEAD(entries);
 42static int enabled = 1;
 43
 44enum {Enabled, Magic};
 45#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
 46#define MISC_FMT_OPEN_BINARY (1 << 30)
 47#define MISC_FMT_CREDENTIALS (1 << 29)
 48#define MISC_FMT_OPEN_FILE (1 << 28)
 49
 50typedef struct {
 51	struct list_head list;
 52	unsigned long flags;		/* type, status, etc. */
 53	int offset;			/* offset of magic */
 54	int size;			/* size of magic/mask */
 55	char *magic;			/* magic or filename extension */
 56	char *mask;			/* mask, NULL for exact match */
 57	char *interpreter;		/* filename of interpreter */
 58	char *name;
 59	struct dentry *dentry;
 60	struct file *interp_file;
 
 61} Node;
 62
 63static DEFINE_RWLOCK(entries_lock);
 64static struct file_system_type bm_fs_type;
 65static struct vfsmount *bm_mnt;
 66static int entry_count;
 67
 68/*
 69 * Max length of the register string.  Determined by:
 70 *  - 7 delimiters
 71 *  - name:   ~50 bytes
 72 *  - type:   1 byte
 73 *  - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE)
 74 *  - magic:  128 bytes (512 in escaped form)
 75 *  - mask:   128 bytes (512 in escaped form)
 76 *  - interp: ~50 bytes
 77 *  - flags:  5 bytes
 78 * Round that up a bit, and then back off to hold the internal data
 79 * (like struct Node).
 80 */
 81#define MAX_REGISTER_LENGTH 1920
 82
 83/*
 84 * Check if we support the binfmt
 85 * if we do, return the node, else NULL
 86 * locking is done in load_misc_binary
 
 
 
 
 
 87 */
 88static Node *check_file(struct linux_binprm *bprm)
 
 89{
 90	char *p = strrchr(bprm->interp, '.');
 91	struct list_head *l;
 92
 93	/* Walk all the registered handlers. */
 94	list_for_each(l, &entries) {
 95		Node *e = list_entry(l, Node, list);
 96		char *s;
 97		int j;
 98
 99		/* Make sure this one is currently enabled. */
100		if (!test_bit(Enabled, &e->flags))
101			continue;
102
103		/* Do matching based on extension if applicable. */
104		if (!test_bit(Magic, &e->flags)) {
105			if (p && !strcmp(e->magic, p + 1))
106				return e;
107			continue;
108		}
109
110		/* Do matching based on magic & mask. */
111		s = bprm->buf + e->offset;
112		if (e->mask) {
113			for (j = 0; j < e->size; j++)
114				if ((*s++ ^ e->magic[j]) & e->mask[j])
115					break;
116		} else {
117			for (j = 0; j < e->size; j++)
118				if ((*s++ ^ e->magic[j]))
119					break;
120		}
121		if (j == e->size)
122			return e;
123	}
 
124	return NULL;
125}
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127/*
128 * the loader itself
129 */
130static int load_misc_binary(struct linux_binprm *bprm)
131{
132	Node *fmt;
133	struct file *interp_file = NULL;
134	char iname[BINPRM_BUF_SIZE];
135	const char *iname_addr = iname;
136	int retval;
137	int fd_binary = -1;
138
139	retval = -ENOEXEC;
140	if (!enabled)
141		goto ret;
142
143	/* to keep locking time low, we copy the interpreter string */
144	read_lock(&entries_lock);
145	fmt = check_file(bprm);
146	if (fmt)
147		strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
148	read_unlock(&entries_lock);
149	if (!fmt)
150		goto ret;
151
152	/* Need to be able to load the file after exec */
 
153	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
154		return -ENOENT;
155
156	if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
 
 
157		retval = remove_arg_zero(bprm);
158		if (retval)
159			goto ret;
160	}
161
162	if (fmt->flags & MISC_FMT_OPEN_BINARY) {
163
164		/* if the binary should be opened on behalf of the
165		 * interpreter than keep it open and assign descriptor
166		 * to it
167		 */
168		fd_binary = get_unused_fd_flags(0);
169		if (fd_binary < 0) {
170			retval = fd_binary;
171			goto ret;
172		}
173		fd_install(fd_binary, bprm->file);
174
175		/* if the binary is not readable than enforce mm->dumpable=0
176		   regardless of the interpreter's permissions */
177		would_dump(bprm, bprm->file);
178
179		allow_write_access(bprm->file);
180		bprm->file = NULL;
181
182		/* mark the bprm that fd should be passed to interp */
183		bprm->interp_flags |= BINPRM_FLAGS_EXECFD;
184		bprm->interp_data = fd_binary;
185
186	} else {
187		allow_write_access(bprm->file);
188		fput(bprm->file);
189		bprm->file = NULL;
190	}
191	/* make argv[1] be the path to the binary */
192	retval = copy_strings_kernel(1, &bprm->interp, bprm);
193	if (retval < 0)
194		goto error;
195	bprm->argc++;
196
197	/* add the interp as argv[0] */
198	retval = copy_strings_kernel(1, &iname_addr, bprm);
199	if (retval < 0)
200		goto error;
201	bprm->argc++;
202
203	/* Update interp in case binfmt_script needs it. */
204	retval = bprm_change_interp(iname, bprm);
205	if (retval < 0)
206		goto error;
207
208	if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) {
209		interp_file = filp_clone_open(fmt->interp_file);
210		if (!IS_ERR(interp_file))
211			deny_write_access(interp_file);
212	} else {
213		interp_file = open_exec(iname);
214	}
215	retval = PTR_ERR(interp_file);
216	if (IS_ERR(interp_file))
217		goto error;
218
219	bprm->file = interp_file;
220	if (fmt->flags & MISC_FMT_CREDENTIALS) {
221		/*
222		 * No need to call prepare_binprm(), it's already been
223		 * done.  bprm->buf is stale, update from interp_file.
224		 */
225		memset(bprm->buf, 0, BINPRM_BUF_SIZE);
226		retval = kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
227	} else
228		retval = prepare_binprm(bprm);
229
230	if (retval < 0)
231		goto error;
232
233	retval = search_binary_handler(bprm);
234	if (retval < 0)
235		goto error;
 
 
 
 
 
236
237ret:
238	return retval;
239error:
240	if (fd_binary > 0)
241		sys_close(fd_binary);
242	bprm->interp_flags = 0;
243	bprm->interp_data = 0;
244	goto ret;
245}
246
247/* Command parsers */
248
249/*
250 * parses and copies one argument enclosed in del from *sp to *dp,
251 * recognising the \x special.
252 * returns pointer to the copied argument or NULL in case of an
253 * error (and sets err) or null argument length.
254 */
255static char *scanarg(char *s, char del)
256{
257	char c;
258
259	while ((c = *s++) != del) {
260		if (c == '\\' && *s == 'x') {
261			s++;
262			if (!isxdigit(*s++))
263				return NULL;
264			if (!isxdigit(*s++))
265				return NULL;
266		}
267	}
268	s[-1] ='\0';
269	return s;
270}
271
272static char *check_special_flags(char *sfs, Node *e)
273{
274	char *p = sfs;
275	int cont = 1;
276
277	/* special flags */
278	while (cont) {
279		switch (*p) {
280		case 'P':
281			pr_debug("register: flag: P (preserve argv0)\n");
282			p++;
283			e->flags |= MISC_FMT_PRESERVE_ARGV0;
284			break;
285		case 'O':
286			pr_debug("register: flag: O (open binary)\n");
287			p++;
288			e->flags |= MISC_FMT_OPEN_BINARY;
289			break;
290		case 'C':
291			pr_debug("register: flag: C (preserve creds)\n");
292			p++;
293			/* this flags also implies the
294			   open-binary flag */
295			e->flags |= (MISC_FMT_CREDENTIALS |
296					MISC_FMT_OPEN_BINARY);
297			break;
298		case 'F':
299			pr_debug("register: flag: F: open interpreter file now\n");
300			p++;
301			e->flags |= MISC_FMT_OPEN_FILE;
302			break;
303		default:
304			cont = 0;
305		}
306	}
307
308	return p;
309}
310
311/*
312 * This registers a new binary format, it recognises the syntax
313 * ':name:type:offset:magic:mask:interpreter:flags'
314 * where the ':' is the IFS, that can be chosen with the first char
315 */
316static Node *create_entry(const char __user *buffer, size_t count)
317{
318	Node *e;
319	int memsize, err;
320	char *buf, *p;
321	char del;
322
323	pr_debug("register: received %zu bytes\n", count);
324
325	/* some sanity checks */
326	err = -EINVAL;
327	if ((count < 11) || (count > MAX_REGISTER_LENGTH))
328		goto out;
329
330	err = -ENOMEM;
331	memsize = sizeof(Node) + count + 8;
332	e = kmalloc(memsize, GFP_KERNEL);
333	if (!e)
334		goto out;
335
336	p = buf = (char *)e + sizeof(Node);
337
338	memset(e, 0, sizeof(Node));
339	if (copy_from_user(buf, buffer, count))
340		goto efault;
341
342	del = *p++;	/* delimeter */
343
344	pr_debug("register: delim: %#x {%c}\n", del, del);
345
346	/* Pad the buffer with the delim to simplify parsing below. */
347	memset(buf + count, del, 8);
348
349	/* Parse the 'name' field. */
350	e->name = p;
351	p = strchr(p, del);
352	if (!p)
353		goto einval;
354	*p++ = '\0';
355	if (!e->name[0] ||
356	    !strcmp(e->name, ".") ||
357	    !strcmp(e->name, "..") ||
358	    strchr(e->name, '/'))
359		goto einval;
360
361	pr_debug("register: name: {%s}\n", e->name);
362
363	/* Parse the 'type' field. */
364	switch (*p++) {
365	case 'E':
366		pr_debug("register: type: E (extension)\n");
367		e->flags = 1 << Enabled;
368		break;
369	case 'M':
370		pr_debug("register: type: M (magic)\n");
371		e->flags = (1 << Enabled) | (1 << Magic);
372		break;
373	default:
374		goto einval;
375	}
376	if (*p++ != del)
377		goto einval;
378
379	if (test_bit(Magic, &e->flags)) {
380		/* Handle the 'M' (magic) format. */
381		char *s;
382
383		/* Parse the 'offset' field. */
384		s = strchr(p, del);
385		if (!s)
386			goto einval;
387		*s++ = '\0';
388		e->offset = simple_strtoul(p, &p, 10);
 
 
 
 
 
389		if (*p++)
390			goto einval;
391		pr_debug("register: offset: %#x\n", e->offset);
392
393		/* Parse the 'magic' field. */
394		e->magic = p;
395		p = scanarg(p, del);
396		if (!p)
397			goto einval;
398		if (!e->magic[0])
399			goto einval;
400		if (USE_DEBUG)
401			print_hex_dump_bytes(
402				KBUILD_MODNAME ": register: magic[raw]: ",
403				DUMP_PREFIX_NONE, e->magic, p - e->magic);
404
405		/* Parse the 'mask' field. */
406		e->mask = p;
407		p = scanarg(p, del);
408		if (!p)
409			goto einval;
410		if (!e->mask[0]) {
411			e->mask = NULL;
412			pr_debug("register:  mask[raw]: none\n");
413		} else if (USE_DEBUG)
414			print_hex_dump_bytes(
415				KBUILD_MODNAME ": register:  mask[raw]: ",
416				DUMP_PREFIX_NONE, e->mask, p - e->mask);
417
418		/*
419		 * Decode the magic & mask fields.
420		 * Note: while we might have accepted embedded NUL bytes from
421		 * above, the unescape helpers here will stop at the first one
422		 * it encounters.
423		 */
424		e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
425		if (e->mask &&
426		    string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
427			goto einval;
428		if (e->size + e->offset > BINPRM_BUF_SIZE)
 
429			goto einval;
430		pr_debug("register: magic/mask length: %i\n", e->size);
431		if (USE_DEBUG) {
432			print_hex_dump_bytes(
433				KBUILD_MODNAME ": register: magic[decoded]: ",
434				DUMP_PREFIX_NONE, e->magic, e->size);
435
436			if (e->mask) {
437				int i;
438				char *masked = kmalloc(e->size, GFP_KERNEL);
439
440				print_hex_dump_bytes(
441					KBUILD_MODNAME ": register:  mask[decoded]: ",
442					DUMP_PREFIX_NONE, e->mask, e->size);
443
444				if (masked) {
445					for (i = 0; i < e->size; ++i)
446						masked[i] = e->magic[i] & e->mask[i];
447					print_hex_dump_bytes(
448						KBUILD_MODNAME ": register:  magic[masked]: ",
449						DUMP_PREFIX_NONE, masked, e->size);
450
451					kfree(masked);
452				}
453			}
454		}
455	} else {
456		/* Handle the 'E' (extension) format. */
457
458		/* Skip the 'offset' field. */
459		p = strchr(p, del);
460		if (!p)
461			goto einval;
462		*p++ = '\0';
463
464		/* Parse the 'magic' field. */
465		e->magic = p;
466		p = strchr(p, del);
467		if (!p)
468			goto einval;
469		*p++ = '\0';
470		if (!e->magic[0] || strchr(e->magic, '/'))
471			goto einval;
472		pr_debug("register: extension: {%s}\n", e->magic);
473
474		/* Skip the 'mask' field. */
475		p = strchr(p, del);
476		if (!p)
477			goto einval;
478		*p++ = '\0';
479	}
480
481	/* Parse the 'interpreter' field. */
482	e->interpreter = p;
483	p = strchr(p, del);
484	if (!p)
485		goto einval;
486	*p++ = '\0';
487	if (!e->interpreter[0])
488		goto einval;
489	pr_debug("register: interpreter: {%s}\n", e->interpreter);
490
491	/* Parse the 'flags' field. */
492	p = check_special_flags(p, e);
493	if (*p == '\n')
494		p++;
495	if (p != buf + count)
496		goto einval;
497
498	return e;
499
500out:
501	return ERR_PTR(err);
502
503efault:
504	kfree(e);
505	return ERR_PTR(-EFAULT);
506einval:
507	kfree(e);
508	return ERR_PTR(-EINVAL);
509}
510
511/*
512 * Set status of entry/binfmt_misc:
513 * '1' enables, '0' disables and '-1' clears entry/binfmt_misc
514 */
515static int parse_command(const char __user *buffer, size_t count)
516{
517	char s[4];
518
519	if (count > 3)
520		return -EINVAL;
521	if (copy_from_user(s, buffer, count))
522		return -EFAULT;
523	if (!count)
524		return 0;
525	if (s[count - 1] == '\n')
526		count--;
527	if (count == 1 && s[0] == '0')
528		return 1;
529	if (count == 1 && s[0] == '1')
530		return 2;
531	if (count == 2 && s[0] == '-' && s[1] == '1')
532		return 3;
533	return -EINVAL;
534}
535
536/* generic stuff */
537
538static void entry_status(Node *e, char *page)
539{
540	char *dp = page;
541	const char *status = "disabled";
542
543	if (test_bit(Enabled, &e->flags))
544		status = "enabled";
545
546	if (!VERBOSE_STATUS) {
547		sprintf(page, "%s\n", status);
548		return;
549	}
550
551	dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter);
552
553	/* print the special flags */
554	dp += sprintf(dp, "flags: ");
555	if (e->flags & MISC_FMT_PRESERVE_ARGV0)
556		*dp++ = 'P';
557	if (e->flags & MISC_FMT_OPEN_BINARY)
558		*dp++ = 'O';
559	if (e->flags & MISC_FMT_CREDENTIALS)
560		*dp++ = 'C';
561	if (e->flags & MISC_FMT_OPEN_FILE)
562		*dp++ = 'F';
563	*dp++ = '\n';
564
565	if (!test_bit(Magic, &e->flags)) {
566		sprintf(dp, "extension .%s\n", e->magic);
567	} else {
568		dp += sprintf(dp, "offset %i\nmagic ", e->offset);
569		dp = bin2hex(dp, e->magic, e->size);
570		if (e->mask) {
571			dp += sprintf(dp, "\nmask ");
572			dp = bin2hex(dp, e->mask, e->size);
573		}
574		*dp++ = '\n';
575		*dp = '\0';
576	}
577}
578
579static struct inode *bm_get_inode(struct super_block *sb, int mode)
580{
581	struct inode *inode = new_inode(sb);
582
583	if (inode) {
584		inode->i_ino = get_next_ino();
585		inode->i_mode = mode;
586		inode->i_atime = inode->i_mtime = inode->i_ctime =
587			current_time(inode);
588	}
589	return inode;
590}
591
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592static void bm_evict_inode(struct inode *inode)
593{
 
 
594	clear_inode(inode);
595	kfree(inode->i_private);
 
 
 
 
 
 
 
 
 
 
596}
597
598static void kill_node(Node *e)
 
 
 
 
 
 
 
 
 
 
 
599{
600	struct dentry *dentry;
 
 
 
 
 
 
 
 
 
 
601
602	write_lock(&entries_lock);
603	dentry = e->dentry;
604	if (dentry) {
605		list_del_init(&e->list);
606		e->dentry = NULL;
607	}
608	write_unlock(&entries_lock);
609
610	if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) {
611		filp_close(e->interp_file, NULL);
612		e->interp_file = NULL;
613	}
614
615	if (dentry) {
616		drop_nlink(d_inode(dentry));
617		d_drop(dentry);
618		dput(dentry);
619		simple_release_fs(&bm_mnt, &entry_count);
620	}
621}
622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623/* /<entry> */
624
625static ssize_t
626bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
627{
628	Node *e = file_inode(file)->i_private;
629	ssize_t res;
630	char *page;
631
632	page = (char *) __get_free_page(GFP_KERNEL);
633	if (!page)
634		return -ENOMEM;
635
636	entry_status(e, page);
637
638	res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
639
640	free_page((unsigned long) page);
641	return res;
642}
643
644static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
645				size_t count, loff_t *ppos)
646{
647	struct dentry *root;
648	Node *e = file_inode(file)->i_private;
649	int res = parse_command(buffer, count);
650
651	switch (res) {
652	case 1:
653		/* Disable this handler. */
654		clear_bit(Enabled, &e->flags);
655		break;
656	case 2:
657		/* Enable this handler. */
658		set_bit(Enabled, &e->flags);
659		break;
660	case 3:
661		/* Delete this handler. */
662		root = file_inode(file)->i_sb->s_root;
663		inode_lock(d_inode(root));
664
665		kill_node(e);
 
 
 
 
 
 
 
 
 
 
666
667		inode_unlock(d_inode(root));
668		break;
669	default:
670		return res;
671	}
672
673	return count;
674}
675
676static const struct file_operations bm_entry_operations = {
677	.read		= bm_entry_read,
678	.write		= bm_entry_write,
679	.llseek		= default_llseek,
680};
681
682/* /register */
683
684static ssize_t bm_register_write(struct file *file, const char __user *buffer,
685			       size_t count, loff_t *ppos)
686{
687	Node *e;
688	struct inode *inode;
689	struct super_block *sb = file_inode(file)->i_sb;
690	struct dentry *root = sb->s_root, *dentry;
 
691	int err = 0;
 
692
693	e = create_entry(buffer, count);
694
695	if (IS_ERR(e))
696		return PTR_ERR(e);
697
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698	inode_lock(d_inode(root));
699	dentry = lookup_one_len(e->name, root, strlen(e->name));
700	err = PTR_ERR(dentry);
701	if (IS_ERR(dentry))
702		goto out;
703
704	err = -EEXIST;
705	if (d_really_is_positive(dentry))
706		goto out2;
707
708	inode = bm_get_inode(sb, S_IFREG | 0644);
709
710	err = -ENOMEM;
711	if (!inode)
712		goto out2;
713
714	err = simple_pin_fs(&bm_fs_type, &bm_mnt, &entry_count);
715	if (err) {
716		iput(inode);
717		inode = NULL;
718		goto out2;
719	}
720
721	if (e->flags & MISC_FMT_OPEN_FILE) {
722		struct file *f;
723
724		f = open_exec(e->interpreter);
725		if (IS_ERR(f)) {
726			err = PTR_ERR(f);
727			pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
728			simple_release_fs(&bm_mnt, &entry_count);
729			iput(inode);
730			inode = NULL;
731			goto out2;
732		}
733		e->interp_file = f;
734	}
735
736	e->dentry = dget(dentry);
737	inode->i_private = e;
738	inode->i_fop = &bm_entry_operations;
739
740	d_instantiate(dentry, inode);
741	write_lock(&entries_lock);
742	list_add(&e->list, &entries);
743	write_unlock(&entries_lock);
 
744
745	err = 0;
746out2:
747	dput(dentry);
748out:
749	inode_unlock(d_inode(root));
750
751	if (err) {
 
 
752		kfree(e);
753		return err;
754	}
755	return count;
756}
757
758static const struct file_operations bm_register_operations = {
759	.write		= bm_register_write,
760	.llseek		= noop_llseek,
761};
762
763/* /status */
764
765static ssize_t
766bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
767{
768	char *s = enabled ? "enabled\n" : "disabled\n";
 
769
 
 
770	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
771}
772
773static ssize_t bm_status_write(struct file *file, const char __user *buffer,
774		size_t count, loff_t *ppos)
775{
 
776	int res = parse_command(buffer, count);
777	struct dentry *root;
 
778
 
779	switch (res) {
780	case 1:
781		/* Disable all handlers. */
782		enabled = 0;
783		break;
784	case 2:
785		/* Enable all handlers. */
786		enabled = 1;
787		break;
788	case 3:
789		/* Delete all handlers. */
790		root = file_inode(file)->i_sb->s_root;
791		inode_lock(d_inode(root));
792
793		while (!list_empty(&entries))
794			kill_node(list_entry(entries.next, Node, list));
 
 
 
 
 
 
 
 
 
795
796		inode_unlock(d_inode(root));
797		break;
798	default:
799		return res;
800	}
801
802	return count;
803}
804
805static const struct file_operations bm_status_operations = {
806	.read		= bm_status_read,
807	.write		= bm_status_write,
808	.llseek		= default_llseek,
809};
810
811/* Superblock handling */
812
 
 
 
 
 
 
 
 
813static const struct super_operations s_ops = {
814	.statfs		= simple_statfs,
815	.evict_inode	= bm_evict_inode,
 
816};
817
818static int bm_fill_super(struct super_block *sb, void *data, int silent)
819{
820	int err;
821	static struct tree_descr bm_files[] = {
 
 
822		[2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
823		[3] = {"register", &bm_register_operations, S_IWUSR},
824		/* last one */ {""}
825	};
826
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
827	err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
828	if (!err)
829		sb->s_op = &s_ops;
830	return err;
831}
832
833static struct dentry *bm_mount(struct file_system_type *fs_type,
834	int flags, const char *dev_name, void *data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
835{
836	return mount_single(fs_type, flags, data, bm_fill_super);
 
837}
838
839static struct linux_binfmt misc_format = {
840	.module = THIS_MODULE,
841	.load_binary = load_misc_binary,
842};
843
844static struct file_system_type bm_fs_type = {
845	.owner		= THIS_MODULE,
846	.name		= "binfmt_misc",
847	.mount		= bm_mount,
 
848	.kill_sb	= kill_litter_super,
849};
850MODULE_ALIAS_FS("binfmt_misc");
851
852static int __init init_misc_binfmt(void)
853{
854	int err = register_filesystem(&bm_fs_type);
855	if (!err)
856		insert_binfmt(&misc_format);
857	return err;
858}
859
860static void __exit exit_misc_binfmt(void)
861{
862	unregister_binfmt(&misc_format);
863	unregister_filesystem(&bm_fs_type);
864}
865
866core_initcall(init_misc_binfmt);
867module_exit(exit_misc_binfmt);
 
868MODULE_LICENSE("GPL");
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * binfmt_misc.c
   4 *
   5 * Copyright (C) 1997 Richard Günther
   6 *
   7 * binfmt_misc detects binaries via a magic or filename extension and invokes
   8 * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details.
   9 */
  10
  11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  12
  13#include <linux/kernel.h>
  14#include <linux/module.h>
  15#include <linux/init.h>
  16#include <linux/sched/mm.h>
  17#include <linux/magic.h>
  18#include <linux/binfmts.h>
  19#include <linux/slab.h>
  20#include <linux/ctype.h>
  21#include <linux/string_helpers.h>
  22#include <linux/file.h>
  23#include <linux/pagemap.h>
  24#include <linux/namei.h>
  25#include <linux/mount.h>
  26#include <linux/fs_context.h>
  27#include <linux/syscalls.h>
  28#include <linux/fs.h>
  29#include <linux/uaccess.h>
  30
  31#include "internal.h"
  32
  33#ifdef DEBUG
  34# define USE_DEBUG 1
  35#else
  36# define USE_DEBUG 0
  37#endif
  38
  39enum {
  40	VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
  41};
  42
 
 
 
  43enum {Enabled, Magic};
  44#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
  45#define MISC_FMT_OPEN_BINARY (1UL << 30)
  46#define MISC_FMT_CREDENTIALS (1UL << 29)
  47#define MISC_FMT_OPEN_FILE (1UL << 28)
  48
  49typedef struct {
  50	struct list_head list;
  51	unsigned long flags;		/* type, status, etc. */
  52	int offset;			/* offset of magic */
  53	int size;			/* size of magic/mask */
  54	char *magic;			/* magic or filename extension */
  55	char *mask;			/* mask, NULL for exact match */
  56	const char *interpreter;	/* filename of interpreter */
  57	char *name;
  58	struct dentry *dentry;
  59	struct file *interp_file;
  60	refcount_t users;		/* sync removal with load_misc_binary() */
  61} Node;
  62
 
  63static struct file_system_type bm_fs_type;
 
 
  64
  65/*
  66 * Max length of the register string.  Determined by:
  67 *  - 7 delimiters
  68 *  - name:   ~50 bytes
  69 *  - type:   1 byte
  70 *  - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE)
  71 *  - magic:  128 bytes (512 in escaped form)
  72 *  - mask:   128 bytes (512 in escaped form)
  73 *  - interp: ~50 bytes
  74 *  - flags:  5 bytes
  75 * Round that up a bit, and then back off to hold the internal data
  76 * (like struct Node).
  77 */
  78#define MAX_REGISTER_LENGTH 1920
  79
  80/**
  81 * search_binfmt_handler - search for a binary handler for @bprm
  82 * @misc: handle to binfmt_misc instance
  83 * @bprm: binary for which we are looking for a handler
  84 *
  85 * Search for a binary type handler for @bprm in the list of registered binary
  86 * type handlers.
  87 *
  88 * Return: binary type list entry on success, NULL on failure
  89 */
  90static Node *search_binfmt_handler(struct binfmt_misc *misc,
  91				   struct linux_binprm *bprm)
  92{
  93	char *p = strrchr(bprm->interp, '.');
  94	Node *e;
  95
  96	/* Walk all the registered handlers. */
  97	list_for_each_entry(e, &misc->entries, list) {
 
  98		char *s;
  99		int j;
 100
 101		/* Make sure this one is currently enabled. */
 102		if (!test_bit(Enabled, &e->flags))
 103			continue;
 104
 105		/* Do matching based on extension if applicable. */
 106		if (!test_bit(Magic, &e->flags)) {
 107			if (p && !strcmp(e->magic, p + 1))
 108				return e;
 109			continue;
 110		}
 111
 112		/* Do matching based on magic & mask. */
 113		s = bprm->buf + e->offset;
 114		if (e->mask) {
 115			for (j = 0; j < e->size; j++)
 116				if ((*s++ ^ e->magic[j]) & e->mask[j])
 117					break;
 118		} else {
 119			for (j = 0; j < e->size; j++)
 120				if ((*s++ ^ e->magic[j]))
 121					break;
 122		}
 123		if (j == e->size)
 124			return e;
 125	}
 126
 127	return NULL;
 128}
 129
 130/**
 131 * get_binfmt_handler - try to find a binary type handler
 132 * @misc: handle to binfmt_misc instance
 133 * @bprm: binary for which we are looking for a handler
 134 *
 135 * Try to find a binfmt handler for the binary type. If one is found take a
 136 * reference to protect against removal via bm_{entry,status}_write().
 137 *
 138 * Return: binary type list entry on success, NULL on failure
 139 */
 140static Node *get_binfmt_handler(struct binfmt_misc *misc,
 141				struct linux_binprm *bprm)
 142{
 143	Node *e;
 144
 145	read_lock(&misc->entries_lock);
 146	e = search_binfmt_handler(misc, bprm);
 147	if (e)
 148		refcount_inc(&e->users);
 149	read_unlock(&misc->entries_lock);
 150	return e;
 151}
 152
 153/**
 154 * put_binfmt_handler - put binary handler node
 155 * @e: node to put
 156 *
 157 * Free node syncing with load_misc_binary() and defer final free to
 158 * load_misc_binary() in case it is using the binary type handler we were
 159 * requested to remove.
 160 */
 161static void put_binfmt_handler(Node *e)
 162{
 163	if (refcount_dec_and_test(&e->users)) {
 164		if (e->flags & MISC_FMT_OPEN_FILE)
 165			filp_close(e->interp_file, NULL);
 166		kfree(e);
 167	}
 168}
 169
 170/**
 171 * load_binfmt_misc - load the binfmt_misc of the caller's user namespace
 172 *
 173 * To be called in load_misc_binary() to load the relevant struct binfmt_misc.
 174 * If a user namespace doesn't have its own binfmt_misc mount it can make use
 175 * of its ancestor's binfmt_misc handlers. This mimicks the behavior of
 176 * pre-namespaced binfmt_misc where all registered binfmt_misc handlers where
 177 * available to all user and user namespaces on the system.
 178 *
 179 * Return: the binfmt_misc instance of the caller's user namespace
 180 */
 181static struct binfmt_misc *load_binfmt_misc(void)
 182{
 183	const struct user_namespace *user_ns;
 184	struct binfmt_misc *misc;
 185
 186	user_ns = current_user_ns();
 187	while (user_ns) {
 188		/* Pairs with smp_store_release() in bm_fill_super(). */
 189		misc = smp_load_acquire(&user_ns->binfmt_misc);
 190		if (misc)
 191			return misc;
 192
 193		user_ns = user_ns->parent;
 194	}
 195
 196	return &init_binfmt_misc;
 197}
 198
 199/*
 200 * the loader itself
 201 */
 202static int load_misc_binary(struct linux_binprm *bprm)
 203{
 204	Node *fmt;
 205	struct file *interp_file = NULL;
 206	int retval = -ENOEXEC;
 207	struct binfmt_misc *misc;
 
 
 208
 209	misc = load_binfmt_misc();
 210	if (!misc->enabled)
 211		return retval;
 212
 213	fmt = get_binfmt_handler(misc, bprm);
 
 
 
 
 
 214	if (!fmt)
 215		return retval;
 216
 217	/* Need to be able to load the file after exec */
 218	retval = -ENOENT;
 219	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
 220		goto ret;
 221
 222	if (fmt->flags & MISC_FMT_PRESERVE_ARGV0) {
 223		bprm->interp_flags |= BINPRM_FLAGS_PRESERVE_ARGV0;
 224	} else {
 225		retval = remove_arg_zero(bprm);
 226		if (retval)
 227			goto ret;
 228	}
 229
 230	if (fmt->flags & MISC_FMT_OPEN_BINARY)
 231		bprm->have_execfd = 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 232
 
 
 
 
 
 233	/* make argv[1] be the path to the binary */
 234	retval = copy_string_kernel(bprm->interp, bprm);
 235	if (retval < 0)
 236		goto ret;
 237	bprm->argc++;
 238
 239	/* add the interp as argv[0] */
 240	retval = copy_string_kernel(fmt->interpreter, bprm);
 241	if (retval < 0)
 242		goto ret;
 243	bprm->argc++;
 244
 245	/* Update interp in case binfmt_script needs it. */
 246	retval = bprm_change_interp(fmt->interpreter, bprm);
 247	if (retval < 0)
 248		goto ret;
 249
 250	if (fmt->flags & MISC_FMT_OPEN_FILE) {
 251		interp_file = file_clone_open(fmt->interp_file);
 252		if (!IS_ERR(interp_file))
 253			deny_write_access(interp_file);
 254	} else {
 255		interp_file = open_exec(fmt->interpreter);
 256	}
 257	retval = PTR_ERR(interp_file);
 258	if (IS_ERR(interp_file))
 259		goto ret;
 260
 261	bprm->interpreter = interp_file;
 262	if (fmt->flags & MISC_FMT_CREDENTIALS)
 263		bprm->execfd_creds = 1;
 
 
 
 
 
 
 
 264
 265	retval = 0;
 266ret:
 267
 268	/*
 269	 * If we actually put the node here all concurrent calls to
 270	 * load_misc_binary() will have finished. We also know
 271	 * that for the refcount to be zero someone must have concurently
 272	 * removed the binary type handler from the list and it's our job to
 273	 * free it.
 274	 */
 275	put_binfmt_handler(fmt);
 276
 
 277	return retval;
 
 
 
 
 
 
 278}
 279
 280/* Command parsers */
 281
 282/*
 283 * parses and copies one argument enclosed in del from *sp to *dp,
 284 * recognising the \x special.
 285 * returns pointer to the copied argument or NULL in case of an
 286 * error (and sets err) or null argument length.
 287 */
 288static char *scanarg(char *s, char del)
 289{
 290	char c;
 291
 292	while ((c = *s++) != del) {
 293		if (c == '\\' && *s == 'x') {
 294			s++;
 295			if (!isxdigit(*s++))
 296				return NULL;
 297			if (!isxdigit(*s++))
 298				return NULL;
 299		}
 300	}
 301	s[-1] ='\0';
 302	return s;
 303}
 304
 305static char *check_special_flags(char *sfs, Node *e)
 306{
 307	char *p = sfs;
 308	int cont = 1;
 309
 310	/* special flags */
 311	while (cont) {
 312		switch (*p) {
 313		case 'P':
 314			pr_debug("register: flag: P (preserve argv0)\n");
 315			p++;
 316			e->flags |= MISC_FMT_PRESERVE_ARGV0;
 317			break;
 318		case 'O':
 319			pr_debug("register: flag: O (open binary)\n");
 320			p++;
 321			e->flags |= MISC_FMT_OPEN_BINARY;
 322			break;
 323		case 'C':
 324			pr_debug("register: flag: C (preserve creds)\n");
 325			p++;
 326			/* this flags also implies the
 327			   open-binary flag */
 328			e->flags |= (MISC_FMT_CREDENTIALS |
 329					MISC_FMT_OPEN_BINARY);
 330			break;
 331		case 'F':
 332			pr_debug("register: flag: F: open interpreter file now\n");
 333			p++;
 334			e->flags |= MISC_FMT_OPEN_FILE;
 335			break;
 336		default:
 337			cont = 0;
 338		}
 339	}
 340
 341	return p;
 342}
 343
 344/*
 345 * This registers a new binary format, it recognises the syntax
 346 * ':name:type:offset:magic:mask:interpreter:flags'
 347 * where the ':' is the IFS, that can be chosen with the first char
 348 */
 349static Node *create_entry(const char __user *buffer, size_t count)
 350{
 351	Node *e;
 352	int memsize, err;
 353	char *buf, *p;
 354	char del;
 355
 356	pr_debug("register: received %zu bytes\n", count);
 357
 358	/* some sanity checks */
 359	err = -EINVAL;
 360	if ((count < 11) || (count > MAX_REGISTER_LENGTH))
 361		goto out;
 362
 363	err = -ENOMEM;
 364	memsize = sizeof(Node) + count + 8;
 365	e = kmalloc(memsize, GFP_KERNEL_ACCOUNT);
 366	if (!e)
 367		goto out;
 368
 369	p = buf = (char *)e + sizeof(Node);
 370
 371	memset(e, 0, sizeof(Node));
 372	if (copy_from_user(buf, buffer, count))
 373		goto efault;
 374
 375	del = *p++;	/* delimeter */
 376
 377	pr_debug("register: delim: %#x {%c}\n", del, del);
 378
 379	/* Pad the buffer with the delim to simplify parsing below. */
 380	memset(buf + count, del, 8);
 381
 382	/* Parse the 'name' field. */
 383	e->name = p;
 384	p = strchr(p, del);
 385	if (!p)
 386		goto einval;
 387	*p++ = '\0';
 388	if (!e->name[0] ||
 389	    !strcmp(e->name, ".") ||
 390	    !strcmp(e->name, "..") ||
 391	    strchr(e->name, '/'))
 392		goto einval;
 393
 394	pr_debug("register: name: {%s}\n", e->name);
 395
 396	/* Parse the 'type' field. */
 397	switch (*p++) {
 398	case 'E':
 399		pr_debug("register: type: E (extension)\n");
 400		e->flags = 1 << Enabled;
 401		break;
 402	case 'M':
 403		pr_debug("register: type: M (magic)\n");
 404		e->flags = (1 << Enabled) | (1 << Magic);
 405		break;
 406	default:
 407		goto einval;
 408	}
 409	if (*p++ != del)
 410		goto einval;
 411
 412	if (test_bit(Magic, &e->flags)) {
 413		/* Handle the 'M' (magic) format. */
 414		char *s;
 415
 416		/* Parse the 'offset' field. */
 417		s = strchr(p, del);
 418		if (!s)
 419			goto einval;
 420		*s = '\0';
 421		if (p != s) {
 422			int r = kstrtoint(p, 10, &e->offset);
 423			if (r != 0 || e->offset < 0)
 424				goto einval;
 425		}
 426		p = s;
 427		if (*p++)
 428			goto einval;
 429		pr_debug("register: offset: %#x\n", e->offset);
 430
 431		/* Parse the 'magic' field. */
 432		e->magic = p;
 433		p = scanarg(p, del);
 434		if (!p)
 435			goto einval;
 436		if (!e->magic[0])
 437			goto einval;
 438		if (USE_DEBUG)
 439			print_hex_dump_bytes(
 440				KBUILD_MODNAME ": register: magic[raw]: ",
 441				DUMP_PREFIX_NONE, e->magic, p - e->magic);
 442
 443		/* Parse the 'mask' field. */
 444		e->mask = p;
 445		p = scanarg(p, del);
 446		if (!p)
 447			goto einval;
 448		if (!e->mask[0]) {
 449			e->mask = NULL;
 450			pr_debug("register:  mask[raw]: none\n");
 451		} else if (USE_DEBUG)
 452			print_hex_dump_bytes(
 453				KBUILD_MODNAME ": register:  mask[raw]: ",
 454				DUMP_PREFIX_NONE, e->mask, p - e->mask);
 455
 456		/*
 457		 * Decode the magic & mask fields.
 458		 * Note: while we might have accepted embedded NUL bytes from
 459		 * above, the unescape helpers here will stop at the first one
 460		 * it encounters.
 461		 */
 462		e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
 463		if (e->mask &&
 464		    string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
 465			goto einval;
 466		if (e->size > BINPRM_BUF_SIZE ||
 467		    BINPRM_BUF_SIZE - e->size < e->offset)
 468			goto einval;
 469		pr_debug("register: magic/mask length: %i\n", e->size);
 470		if (USE_DEBUG) {
 471			print_hex_dump_bytes(
 472				KBUILD_MODNAME ": register: magic[decoded]: ",
 473				DUMP_PREFIX_NONE, e->magic, e->size);
 474
 475			if (e->mask) {
 476				int i;
 477				char *masked = kmalloc(e->size, GFP_KERNEL_ACCOUNT);
 478
 479				print_hex_dump_bytes(
 480					KBUILD_MODNAME ": register:  mask[decoded]: ",
 481					DUMP_PREFIX_NONE, e->mask, e->size);
 482
 483				if (masked) {
 484					for (i = 0; i < e->size; ++i)
 485						masked[i] = e->magic[i] & e->mask[i];
 486					print_hex_dump_bytes(
 487						KBUILD_MODNAME ": register:  magic[masked]: ",
 488						DUMP_PREFIX_NONE, masked, e->size);
 489
 490					kfree(masked);
 491				}
 492			}
 493		}
 494	} else {
 495		/* Handle the 'E' (extension) format. */
 496
 497		/* Skip the 'offset' field. */
 498		p = strchr(p, del);
 499		if (!p)
 500			goto einval;
 501		*p++ = '\0';
 502
 503		/* Parse the 'magic' field. */
 504		e->magic = p;
 505		p = strchr(p, del);
 506		if (!p)
 507			goto einval;
 508		*p++ = '\0';
 509		if (!e->magic[0] || strchr(e->magic, '/'))
 510			goto einval;
 511		pr_debug("register: extension: {%s}\n", e->magic);
 512
 513		/* Skip the 'mask' field. */
 514		p = strchr(p, del);
 515		if (!p)
 516			goto einval;
 517		*p++ = '\0';
 518	}
 519
 520	/* Parse the 'interpreter' field. */
 521	e->interpreter = p;
 522	p = strchr(p, del);
 523	if (!p)
 524		goto einval;
 525	*p++ = '\0';
 526	if (!e->interpreter[0])
 527		goto einval;
 528	pr_debug("register: interpreter: {%s}\n", e->interpreter);
 529
 530	/* Parse the 'flags' field. */
 531	p = check_special_flags(p, e);
 532	if (*p == '\n')
 533		p++;
 534	if (p != buf + count)
 535		goto einval;
 536
 537	return e;
 538
 539out:
 540	return ERR_PTR(err);
 541
 542efault:
 543	kfree(e);
 544	return ERR_PTR(-EFAULT);
 545einval:
 546	kfree(e);
 547	return ERR_PTR(-EINVAL);
 548}
 549
 550/*
 551 * Set status of entry/binfmt_misc:
 552 * '1' enables, '0' disables and '-1' clears entry/binfmt_misc
 553 */
 554static int parse_command(const char __user *buffer, size_t count)
 555{
 556	char s[4];
 557
 558	if (count > 3)
 559		return -EINVAL;
 560	if (copy_from_user(s, buffer, count))
 561		return -EFAULT;
 562	if (!count)
 563		return 0;
 564	if (s[count - 1] == '\n')
 565		count--;
 566	if (count == 1 && s[0] == '0')
 567		return 1;
 568	if (count == 1 && s[0] == '1')
 569		return 2;
 570	if (count == 2 && s[0] == '-' && s[1] == '1')
 571		return 3;
 572	return -EINVAL;
 573}
 574
 575/* generic stuff */
 576
 577static void entry_status(Node *e, char *page)
 578{
 579	char *dp = page;
 580	const char *status = "disabled";
 581
 582	if (test_bit(Enabled, &e->flags))
 583		status = "enabled";
 584
 585	if (!VERBOSE_STATUS) {
 586		sprintf(page, "%s\n", status);
 587		return;
 588	}
 589
 590	dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter);
 591
 592	/* print the special flags */
 593	dp += sprintf(dp, "flags: ");
 594	if (e->flags & MISC_FMT_PRESERVE_ARGV0)
 595		*dp++ = 'P';
 596	if (e->flags & MISC_FMT_OPEN_BINARY)
 597		*dp++ = 'O';
 598	if (e->flags & MISC_FMT_CREDENTIALS)
 599		*dp++ = 'C';
 600	if (e->flags & MISC_FMT_OPEN_FILE)
 601		*dp++ = 'F';
 602	*dp++ = '\n';
 603
 604	if (!test_bit(Magic, &e->flags)) {
 605		sprintf(dp, "extension .%s\n", e->magic);
 606	} else {
 607		dp += sprintf(dp, "offset %i\nmagic ", e->offset);
 608		dp = bin2hex(dp, e->magic, e->size);
 609		if (e->mask) {
 610			dp += sprintf(dp, "\nmask ");
 611			dp = bin2hex(dp, e->mask, e->size);
 612		}
 613		*dp++ = '\n';
 614		*dp = '\0';
 615	}
 616}
 617
 618static struct inode *bm_get_inode(struct super_block *sb, int mode)
 619{
 620	struct inode *inode = new_inode(sb);
 621
 622	if (inode) {
 623		inode->i_ino = get_next_ino();
 624		inode->i_mode = mode;
 625		simple_inode_init_ts(inode);
 
 626	}
 627	return inode;
 628}
 629
 630/**
 631 * i_binfmt_misc - retrieve struct binfmt_misc from a binfmt_misc inode
 632 * @inode: inode of the relevant binfmt_misc instance
 633 *
 634 * This helper retrieves struct binfmt_misc from a binfmt_misc inode. This can
 635 * be done without any memory barriers because we are guaranteed that
 636 * user_ns->binfmt_misc is fully initialized. It was fully initialized when the
 637 * binfmt_misc mount was first created.
 638 *
 639 * Return: struct binfmt_misc of the relevant binfmt_misc instance
 640 */
 641static struct binfmt_misc *i_binfmt_misc(struct inode *inode)
 642{
 643	return inode->i_sb->s_user_ns->binfmt_misc;
 644}
 645
 646/**
 647 * bm_evict_inode - cleanup data associated with @inode
 648 * @inode: inode to which the data is attached
 649 *
 650 * Cleanup the binary type handler data associated with @inode if a binary type
 651 * entry is removed or the filesystem is unmounted and the super block is
 652 * shutdown.
 653 *
 654 * If the ->evict call was not caused by a super block shutdown but by a write
 655 * to remove the entry or all entries via bm_{entry,status}_write() the entry
 656 * will have already been removed from the list. We keep the list_empty() check
 657 * to make that explicit.
 658*/
 659static void bm_evict_inode(struct inode *inode)
 660{
 661	Node *e = inode->i_private;
 662
 663	clear_inode(inode);
 664
 665	if (e) {
 666		struct binfmt_misc *misc;
 667
 668		misc = i_binfmt_misc(inode);
 669		write_lock(&misc->entries_lock);
 670		if (!list_empty(&e->list))
 671			list_del_init(&e->list);
 672		write_unlock(&misc->entries_lock);
 673		put_binfmt_handler(e);
 674	}
 675}
 676
 677/**
 678 * unlink_binfmt_dentry - remove the dentry for the binary type handler
 679 * @dentry: dentry associated with the binary type handler
 680 *
 681 * Do the actual filesystem work to remove a dentry for a registered binary
 682 * type handler. Since binfmt_misc only allows simple files to be created
 683 * directly under the root dentry of the filesystem we ensure that we are
 684 * indeed passed a dentry directly beneath the root dentry, that the inode
 685 * associated with the root dentry is locked, and that it is a regular file we
 686 * are asked to remove.
 687 */
 688static void unlink_binfmt_dentry(struct dentry *dentry)
 689{
 690	struct dentry *parent = dentry->d_parent;
 691	struct inode *inode, *parent_inode;
 692
 693	/* All entries are immediate descendants of the root dentry. */
 694	if (WARN_ON_ONCE(dentry->d_sb->s_root != parent))
 695		return;
 696
 697	/* We only expect to be called on regular files. */
 698	inode = d_inode(dentry);
 699	if (WARN_ON_ONCE(!S_ISREG(inode->i_mode)))
 700		return;
 701
 702	/* The parent inode must be locked. */
 703	parent_inode = d_inode(parent);
 704	if (WARN_ON_ONCE(!inode_is_locked(parent_inode)))
 705		return;
 706
 707	if (simple_positive(dentry)) {
 708		dget(dentry);
 709		simple_unlink(parent_inode, dentry);
 710		d_delete(dentry);
 
 
 
 
 
 
 
 711		dput(dentry);
 
 712	}
 713}
 714
 715/**
 716 * remove_binfmt_handler - remove a binary type handler
 717 * @misc: handle to binfmt_misc instance
 718 * @e: binary type handler to remove
 719 *
 720 * Remove a binary type handler from the list of binary type handlers and
 721 * remove its associated dentry. This is called from
 722 * binfmt_{entry,status}_write(). In the future, we might want to think about
 723 * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's
 724 * to use writes to files in order to delete binary type handlers. But it has
 725 * worked for so long that it's not a pressing issue.
 726 */
 727static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e)
 728{
 729	write_lock(&misc->entries_lock);
 730	list_del_init(&e->list);
 731	write_unlock(&misc->entries_lock);
 732	unlink_binfmt_dentry(e->dentry);
 733}
 734
 735/* /<entry> */
 736
 737static ssize_t
 738bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 739{
 740	Node *e = file_inode(file)->i_private;
 741	ssize_t res;
 742	char *page;
 743
 744	page = (char *) __get_free_page(GFP_KERNEL);
 745	if (!page)
 746		return -ENOMEM;
 747
 748	entry_status(e, page);
 749
 750	res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
 751
 752	free_page((unsigned long) page);
 753	return res;
 754}
 755
 756static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 757				size_t count, loff_t *ppos)
 758{
 759	struct inode *inode = file_inode(file);
 760	Node *e = inode->i_private;
 761	int res = parse_command(buffer, count);
 762
 763	switch (res) {
 764	case 1:
 765		/* Disable this handler. */
 766		clear_bit(Enabled, &e->flags);
 767		break;
 768	case 2:
 769		/* Enable this handler. */
 770		set_bit(Enabled, &e->flags);
 771		break;
 772	case 3:
 773		/* Delete this handler. */
 774		inode = d_inode(inode->i_sb->s_root);
 775		inode_lock(inode);
 776
 777		/*
 778		 * In order to add new element or remove elements from the list
 779		 * via bm_{entry,register,status}_write() inode_lock() on the
 780		 * root inode must be held.
 781		 * The lock is exclusive ensuring that the list can't be
 782		 * modified. Only load_misc_binary() can access but does so
 783		 * read-only. So we only need to take the write lock when we
 784		 * actually remove the entry from the list.
 785		 */
 786		if (!list_empty(&e->list))
 787			remove_binfmt_handler(i_binfmt_misc(inode), e);
 788
 789		inode_unlock(inode);
 790		break;
 791	default:
 792		return res;
 793	}
 794
 795	return count;
 796}
 797
 798static const struct file_operations bm_entry_operations = {
 799	.read		= bm_entry_read,
 800	.write		= bm_entry_write,
 801	.llseek		= default_llseek,
 802};
 803
 804/* /register */
 805
 806static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 807			       size_t count, loff_t *ppos)
 808{
 809	Node *e;
 810	struct inode *inode;
 811	struct super_block *sb = file_inode(file)->i_sb;
 812	struct dentry *root = sb->s_root, *dentry;
 813	struct binfmt_misc *misc;
 814	int err = 0;
 815	struct file *f = NULL;
 816
 817	e = create_entry(buffer, count);
 818
 819	if (IS_ERR(e))
 820		return PTR_ERR(e);
 821
 822	if (e->flags & MISC_FMT_OPEN_FILE) {
 823		const struct cred *old_cred;
 824
 825		/*
 826		 * Now that we support unprivileged binfmt_misc mounts make
 827		 * sure we use the credentials that the register @file was
 828		 * opened with to also open the interpreter. Before that this
 829		 * didn't matter much as only a privileged process could open
 830		 * the register file.
 831		 */
 832		old_cred = override_creds(file->f_cred);
 833		f = open_exec(e->interpreter);
 834		revert_creds(old_cred);
 835		if (IS_ERR(f)) {
 836			pr_notice("register: failed to install interpreter file %s\n",
 837				 e->interpreter);
 838			kfree(e);
 839			return PTR_ERR(f);
 840		}
 841		e->interp_file = f;
 842	}
 843
 844	inode_lock(d_inode(root));
 845	dentry = lookup_one_len(e->name, root, strlen(e->name));
 846	err = PTR_ERR(dentry);
 847	if (IS_ERR(dentry))
 848		goto out;
 849
 850	err = -EEXIST;
 851	if (d_really_is_positive(dentry))
 852		goto out2;
 853
 854	inode = bm_get_inode(sb, S_IFREG | 0644);
 855
 856	err = -ENOMEM;
 857	if (!inode)
 858		goto out2;
 859
 860	refcount_set(&e->users, 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 861	e->dentry = dget(dentry);
 862	inode->i_private = e;
 863	inode->i_fop = &bm_entry_operations;
 864
 865	d_instantiate(dentry, inode);
 866	misc = i_binfmt_misc(inode);
 867	write_lock(&misc->entries_lock);
 868	list_add(&e->list, &misc->entries);
 869	write_unlock(&misc->entries_lock);
 870
 871	err = 0;
 872out2:
 873	dput(dentry);
 874out:
 875	inode_unlock(d_inode(root));
 876
 877	if (err) {
 878		if (f)
 879			filp_close(f, NULL);
 880		kfree(e);
 881		return err;
 882	}
 883	return count;
 884}
 885
 886static const struct file_operations bm_register_operations = {
 887	.write		= bm_register_write,
 888	.llseek		= noop_llseek,
 889};
 890
 891/* /status */
 892
 893static ssize_t
 894bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 895{
 896	struct binfmt_misc *misc;
 897	char *s;
 898
 899	misc = i_binfmt_misc(file_inode(file));
 900	s = misc->enabled ? "enabled\n" : "disabled\n";
 901	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
 902}
 903
 904static ssize_t bm_status_write(struct file *file, const char __user *buffer,
 905		size_t count, loff_t *ppos)
 906{
 907	struct binfmt_misc *misc;
 908	int res = parse_command(buffer, count);
 909	Node *e, *next;
 910	struct inode *inode;
 911
 912	misc = i_binfmt_misc(file_inode(file));
 913	switch (res) {
 914	case 1:
 915		/* Disable all handlers. */
 916		misc->enabled = false;
 917		break;
 918	case 2:
 919		/* Enable all handlers. */
 920		misc->enabled = true;
 921		break;
 922	case 3:
 923		/* Delete all handlers. */
 924		inode = d_inode(file_inode(file)->i_sb->s_root);
 925		inode_lock(inode);
 926
 927		/*
 928		 * In order to add new element or remove elements from the list
 929		 * via bm_{entry,register,status}_write() inode_lock() on the
 930		 * root inode must be held.
 931		 * The lock is exclusive ensuring that the list can't be
 932		 * modified. Only load_misc_binary() can access but does so
 933		 * read-only. So we only need to take the write lock when we
 934		 * actually remove the entry from the list.
 935		 */
 936		list_for_each_entry_safe(e, next, &misc->entries, list)
 937			remove_binfmt_handler(misc, e);
 938
 939		inode_unlock(inode);
 940		break;
 941	default:
 942		return res;
 943	}
 944
 945	return count;
 946}
 947
 948static const struct file_operations bm_status_operations = {
 949	.read		= bm_status_read,
 950	.write		= bm_status_write,
 951	.llseek		= default_llseek,
 952};
 953
 954/* Superblock handling */
 955
 956static void bm_put_super(struct super_block *sb)
 957{
 958	struct user_namespace *user_ns = sb->s_fs_info;
 959
 960	sb->s_fs_info = NULL;
 961	put_user_ns(user_ns);
 962}
 963
 964static const struct super_operations s_ops = {
 965	.statfs		= simple_statfs,
 966	.evict_inode	= bm_evict_inode,
 967	.put_super	= bm_put_super,
 968};
 969
 970static int bm_fill_super(struct super_block *sb, struct fs_context *fc)
 971{
 972	int err;
 973	struct user_namespace *user_ns = sb->s_user_ns;
 974	struct binfmt_misc *misc;
 975	static const struct tree_descr bm_files[] = {
 976		[2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
 977		[3] = {"register", &bm_register_operations, S_IWUSR},
 978		/* last one */ {""}
 979	};
 980
 981	if (WARN_ON(user_ns != current_user_ns()))
 982		return -EINVAL;
 983
 984	/*
 985	 * Lazily allocate a new binfmt_misc instance for this namespace, i.e.
 986	 * do it here during the first mount of binfmt_misc. We don't need to
 987	 * waste memory for every user namespace allocation. It's likely much
 988	 * more common to not mount a separate binfmt_misc instance than it is
 989	 * to mount one.
 990	 *
 991	 * While multiple superblocks can exist they are keyed by userns in
 992	 * s_fs_info for binfmt_misc. Hence, the vfs guarantees that
 993	 * bm_fill_super() is called exactly once whenever a binfmt_misc
 994	 * superblock for a userns is created. This in turn lets us conclude
 995	 * that when a binfmt_misc superblock is created for the first time for
 996	 * a userns there's no one racing us. Therefore we don't need any
 997	 * barriers when we dereference binfmt_misc.
 998	 */
 999	misc = user_ns->binfmt_misc;
1000	if (!misc) {
1001		/*
1002		 * If it turns out that most user namespaces actually want to
1003		 * register their own binary type handler and therefore all
1004		 * create their own separate binfm_misc mounts we should
1005		 * consider turning this into a kmem cache.
1006		 */
1007		misc = kzalloc(sizeof(struct binfmt_misc), GFP_KERNEL);
1008		if (!misc)
1009			return -ENOMEM;
1010
1011		INIT_LIST_HEAD(&misc->entries);
1012		rwlock_init(&misc->entries_lock);
1013
1014		/* Pairs with smp_load_acquire() in load_binfmt_misc(). */
1015		smp_store_release(&user_ns->binfmt_misc, misc);
1016	}
1017
1018	/*
1019	 * When the binfmt_misc superblock for this userns is shutdown
1020	 * ->enabled might have been set to false and we don't reinitialize
1021	 * ->enabled again in put_super() as someone might already be mounting
1022	 * binfmt_misc again. It also would be pointless since by the time
1023	 * ->put_super() is called we know that the binary type list for this
1024	 * bintfmt_misc mount is empty making load_misc_binary() return
1025	 * -ENOEXEC independent of whether ->enabled is true. Instead, if
1026	 * someone mounts binfmt_misc for the first time or again we simply
1027	 * reset ->enabled to true.
1028	 */
1029	misc->enabled = true;
1030
1031	err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
1032	if (!err)
1033		sb->s_op = &s_ops;
1034	return err;
1035}
1036
1037static void bm_free(struct fs_context *fc)
1038{
1039	if (fc->s_fs_info)
1040		put_user_ns(fc->s_fs_info);
1041}
1042
1043static int bm_get_tree(struct fs_context *fc)
1044{
1045	return get_tree_keyed(fc, bm_fill_super, get_user_ns(fc->user_ns));
1046}
1047
1048static const struct fs_context_operations bm_context_ops = {
1049	.free		= bm_free,
1050	.get_tree	= bm_get_tree,
1051};
1052
1053static int bm_init_fs_context(struct fs_context *fc)
1054{
1055	fc->ops = &bm_context_ops;
1056	return 0;
1057}
1058
1059static struct linux_binfmt misc_format = {
1060	.module = THIS_MODULE,
1061	.load_binary = load_misc_binary,
1062};
1063
1064static struct file_system_type bm_fs_type = {
1065	.owner		= THIS_MODULE,
1066	.name		= "binfmt_misc",
1067	.init_fs_context = bm_init_fs_context,
1068	.fs_flags	= FS_USERNS_MOUNT,
1069	.kill_sb	= kill_litter_super,
1070};
1071MODULE_ALIAS_FS("binfmt_misc");
1072
1073static int __init init_misc_binfmt(void)
1074{
1075	int err = register_filesystem(&bm_fs_type);
1076	if (!err)
1077		insert_binfmt(&misc_format);
1078	return err;
1079}
1080
1081static void __exit exit_misc_binfmt(void)
1082{
1083	unregister_binfmt(&misc_format);
1084	unregister_filesystem(&bm_fs_type);
1085}
1086
1087core_initcall(init_misc_binfmt);
1088module_exit(exit_misc_binfmt);
1089MODULE_DESCRIPTION("Kernel support for miscellaneous binaries");
1090MODULE_LICENSE("GPL");