Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
  1#include <signal.h>
  2#include <stdio.h>
  3#include <stdlib.h>
  4#include <unistd.h>
  5#include <errno.h>
  6#include <fcntl.h>
  7#include <string.h>
  8#include <stddef.h>
  9#include <sys/sysmacros.h>
 10#include <sys/types.h>
 11#include <sys/wait.h>
 12#include <sys/socket.h>
 13#include <sys/stat.h>
 14#include <sys/mman.h>
 15#include <sys/syscall.h>
 16#include <sys/user.h>
 17#include <sys/ioctl.h>
 18#include <sys/ptrace.h>
 19#include <sys/mount.h>
 20#include <linux/limits.h>
 21#include <linux/filter.h>
 22#include <linux/seccomp.h>
 23
 24#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
 25
 26static int seccomp(unsigned int op, unsigned int flags, void *args)
 27{
 28	errno = 0;
 29	return syscall(__NR_seccomp, op, flags, args);
 30}
 31
 32static int send_fd(int sock, int fd)
 33{
 34	struct msghdr msg = {};
 35	struct cmsghdr *cmsg;
 36	char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
 37	struct iovec io = {
 38		.iov_base = &c,
 39		.iov_len = 1,
 40	};
 41
 42	msg.msg_iov = &io;
 43	msg.msg_iovlen = 1;
 44	msg.msg_control = buf;
 45	msg.msg_controllen = sizeof(buf);
 46	cmsg = CMSG_FIRSTHDR(&msg);
 47	cmsg->cmsg_level = SOL_SOCKET;
 48	cmsg->cmsg_type = SCM_RIGHTS;
 49	cmsg->cmsg_len = CMSG_LEN(sizeof(int));
 50	*((int *)CMSG_DATA(cmsg)) = fd;
 51	msg.msg_controllen = cmsg->cmsg_len;
 52
 53	if (sendmsg(sock, &msg, 0) < 0) {
 54		perror("sendmsg");
 55		return -1;
 56	}
 57
 58	return 0;
 59}
 60
 61static int recv_fd(int sock)
 62{
 63	struct msghdr msg = {};
 64	struct cmsghdr *cmsg;
 65	char buf[CMSG_SPACE(sizeof(int))] = {0}, c = 'c';
 66	struct iovec io = {
 67		.iov_base = &c,
 68		.iov_len = 1,
 69	};
 70
 71	msg.msg_iov = &io;
 72	msg.msg_iovlen = 1;
 73	msg.msg_control = buf;
 74	msg.msg_controllen = sizeof(buf);
 75
 76	if (recvmsg(sock, &msg, 0) < 0) {
 77		perror("recvmsg");
 78		return -1;
 79	}
 80
 81	cmsg = CMSG_FIRSTHDR(&msg);
 82
 83	return *((int *)CMSG_DATA(cmsg));
 84}
 85
 86static int user_trap_syscall(int nr, unsigned int flags)
 87{
 88	struct sock_filter filter[] = {
 89		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
 90			offsetof(struct seccomp_data, nr)),
 91		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
 92		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
 93		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
 94	};
 95
 96	struct sock_fprog prog = {
 97		.len = (unsigned short)ARRAY_SIZE(filter),
 98		.filter = filter,
 99	};
100
101	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
102}
103
104static int handle_req(struct seccomp_notif *req,
105		      struct seccomp_notif_resp *resp, int listener)
106{
107	char path[PATH_MAX], source[PATH_MAX], target[PATH_MAX];
108	int ret = -1, mem;
109
110	resp->id = req->id;
111	resp->error = -EPERM;
112	resp->val = 0;
113
114	if (req->data.nr != __NR_mount) {
115		fprintf(stderr, "huh? trapped something besides mount? %d\n", req->data.nr);
116		return -1;
117	}
118
119	/* Only allow bind mounts. */
120	if (!(req->data.args[3] & MS_BIND))
121		return 0;
122
123	/*
124	 * Ok, let's read the task's memory to see where they wanted their
125	 * mount to go.
126	 */
127	snprintf(path, sizeof(path), "/proc/%d/mem", req->pid);
128	mem = open(path, O_RDONLY);
129	if (mem < 0) {
130		perror("open mem");
131		return -1;
132	}
133
134	/*
135	 * Now we avoid a TOCTOU: we referred to a pid by its pid, but since
136	 * the pid that made the syscall may have died, we need to confirm that
137	 * the pid is still valid after we open its /proc/pid/mem file. We can
138	 * ask the listener fd this as follows.
139	 *
140	 * Note that this check should occur *after* any task-specific
141	 * resources are opened, to make sure that the task has not died and
142	 * we're not wrongly reading someone else's state in order to make
143	 * decisions.
144	 */
145	if (ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req->id) < 0) {
146		fprintf(stderr, "task died before we could map its memory\n");
147		goto out;
148	}
149
150	/*
151	 * Phew, we've got the right /proc/pid/mem. Now we can read it. Note
152	 * that to avoid another TOCTOU, we should read all of the pointer args
153	 * before we decide to allow the syscall.
154	 */
155	if (lseek(mem, req->data.args[0], SEEK_SET) < 0) {
156		perror("seek");
157		goto out;
158	}
159
160	ret = read(mem, source, sizeof(source));
161	if (ret < 0) {
162		perror("read");
163		goto out;
164	}
165
166	if (lseek(mem, req->data.args[1], SEEK_SET) < 0) {
167		perror("seek");
168		goto out;
169	}
170
171	ret = read(mem, target, sizeof(target));
172	if (ret < 0) {
173		perror("read");
174		goto out;
175	}
176
177	/*
178	 * Our policy is to only allow bind mounts inside /tmp. This isn't very
179	 * interesting, because we could do unprivlieged bind mounts with user
180	 * namespaces already, but you get the idea.
181	 */
182	if (!strncmp(source, "/tmp/", 5) && !strncmp(target, "/tmp/", 5)) {
183		if (mount(source, target, NULL, req->data.args[3], NULL) < 0) {
184			ret = -1;
185			perror("actual mount");
186			goto out;
187		}
188		resp->error = 0;
189	}
190
191	/* Even if we didn't allow it because of policy, generating the
192	 * response was be a success, because we want to tell the worker EPERM.
193	 */
194	ret = 0;
195
196out:
197	close(mem);
198	return ret;
199}
200
201int main(void)
202{
203	int sk_pair[2], ret = 1, status, listener;
204	pid_t worker = 0 , tracer = 0;
205
206	if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair) < 0) {
207		perror("socketpair");
208		return 1;
209	}
210
211	worker = fork();
212	if (worker < 0) {
213		perror("fork");
214		goto close_pair;
215	}
216
217	if (worker == 0) {
218		listener = user_trap_syscall(__NR_mount,
219					     SECCOMP_FILTER_FLAG_NEW_LISTENER);
220		if (listener < 0) {
221			perror("seccomp");
222			exit(1);
223		}
224
225		/*
226		 * Drop privileges. We definitely can't mount as uid 1000.
227		 */
228		if (setuid(1000) < 0) {
229			perror("setuid");
230			exit(1);
231		}
232
233		/*
234		 * Send the listener to the parent; also serves as
235		 * synchronization.
236		 */
237		if (send_fd(sk_pair[1], listener) < 0)
238			exit(1);
239		close(listener);
240
241		if (mkdir("/tmp/foo", 0755) < 0) {
242			perror("mkdir");
243			exit(1);
244		}
245
246		/*
247		 * Try a bad mount just for grins.
248		 */
249		if (mount("/dev/sda", "/tmp/foo", NULL, 0, NULL) != -1) {
250			fprintf(stderr, "huh? mounted /dev/sda?\n");
251			exit(1);
252		}
253
254		if (errno != EPERM) {
255			perror("bad error from mount");
256			exit(1);
257		}
258
259		/*
260		 * Ok, we expect this one to succeed.
261		 */
262		if (mount("/tmp/foo", "/tmp/foo", NULL, MS_BIND, NULL) < 0) {
263			perror("mount");
264			exit(1);
265		}
266
267		exit(0);
268	}
269
270	/*
271	 * Get the listener from the child.
272	 */
273	listener = recv_fd(sk_pair[0]);
274	if (listener < 0)
275		goto out_kill;
276
277	/*
278	 * Fork a task to handle the requests. This isn't strictly necessary,
279	 * but it makes the particular writing of this sample easier, since we
280	 * can just wait ofr the tracee to exit and kill the tracer.
281	 */
282	tracer = fork();
283	if (tracer < 0) {
284		perror("fork");
285		goto out_kill;
286	}
287
288	if (tracer == 0) {
289		struct seccomp_notif *req;
290		struct seccomp_notif_resp *resp;
291		struct seccomp_notif_sizes sizes;
292
293		if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes) < 0) {
294			perror("seccomp(GET_NOTIF_SIZES)");
295			goto out_close;
296		}
297
298		req = malloc(sizes.seccomp_notif);
299		if (!req)
300			goto out_close;
301
302		resp = malloc(sizes.seccomp_notif_resp);
303		if (!resp)
304			goto out_req;
305		memset(resp, 0, sizes.seccomp_notif_resp);
306
307		while (1) {
308			memset(req, 0, sizes.seccomp_notif);
309			if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
310				perror("ioctl recv");
311				goto out_resp;
312			}
313
314			if (handle_req(req, resp, listener) < 0)
315				goto out_resp;
316
317			/*
318			 * ENOENT here means that the task may have gotten a
319			 * signal and restarted the syscall. It's up to the
320			 * handler to decide what to do in this case, but for
321			 * the sample code, we just ignore it. Probably
322			 * something better should happen, like undoing the
323			 * mount, or keeping track of the args to make sure we
324			 * don't do it again.
325			 */
326			if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, resp) < 0 &&
327			    errno != ENOENT) {
328				perror("ioctl send");
329				goto out_resp;
330			}
331		}
332out_resp:
333		free(resp);
334out_req:
335		free(req);
336out_close:
337		close(listener);
338		exit(1);
339	}
340
341	close(listener);
342
343	if (waitpid(worker, &status, 0) != worker) {
344		perror("waitpid");
345		goto out_kill;
346	}
347
348	if (umount2("/tmp/foo", MNT_DETACH) < 0 && errno != EINVAL) {
349		perror("umount2");
350		goto out_kill;
351	}
352
353	if (remove("/tmp/foo") < 0 && errno != ENOENT) {
354		perror("remove");
355		exit(1);
356	}
357
358	if (!WIFEXITED(status) || WEXITSTATUS(status)) {
359		fprintf(stderr, "worker exited nonzero\n");
360		goto out_kill;
361	}
362
363	ret = 0;
364
365out_kill:
366	if (tracer > 0)
367		kill(tracer, SIGKILL);
368	if (worker > 0)
369		kill(worker, SIGKILL);
370
371close_pair:
372	close(sk_pair[0]);
373	close(sk_pair[1]);
374	return ret;
375}