Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/kernel.h>
   3#include <linux/errno.h>
   4#include <linux/file.h>
   5#include <linux/slab.h>
   6#include <linux/net.h>
   7#include <linux/compat.h>
   8#include <net/compat.h>
   9#include <linux/io_uring.h>
  10
  11#include <uapi/linux/io_uring.h>
  12
  13#include "io_uring.h"
  14#include "kbuf.h"
  15#include "alloc_cache.h"
  16#include "net.h"
  17#include "notif.h"
  18#include "rsrc.h"
  19
  20#if defined(CONFIG_NET)
  21struct io_shutdown {
  22	struct file			*file;
  23	int				how;
  24};
  25
  26struct io_accept {
  27	struct file			*file;
  28	struct sockaddr __user		*addr;
  29	int __user			*addr_len;
  30	int				flags;
  31	int				iou_flags;
  32	u32				file_slot;
  33	unsigned long			nofile;
  34};
  35
  36struct io_socket {
  37	struct file			*file;
  38	int				domain;
  39	int				type;
  40	int				protocol;
  41	int				flags;
  42	u32				file_slot;
  43	unsigned long			nofile;
  44};
  45
  46struct io_connect {
  47	struct file			*file;
  48	struct sockaddr __user		*addr;
  49	int				addr_len;
  50	bool				in_progress;
  51	bool				seen_econnaborted;
  52};
  53
  54struct io_bind {
  55	struct file			*file;
  56	int				addr_len;
  57};
  58
  59struct io_listen {
  60	struct file			*file;
  61	int				backlog;
  62};
  63
  64struct io_sr_msg {
  65	struct file			*file;
  66	union {
  67		struct compat_msghdr __user	*umsg_compat;
  68		struct user_msghdr __user	*umsg;
  69		void __user			*buf;
  70	};
  71	int				len;
  72	unsigned			done_io;
  73	unsigned			msg_flags;
  74	unsigned			nr_multishot_loops;
  75	u16				flags;
  76	/* initialised and used only by !msg send variants */
 
  77	u16				buf_group;
  78	u16				buf_index;
  79	void __user			*msg_control;
  80	/* used only for send zerocopy */
  81	struct io_kiocb 		*notif;
  82};
  83
  84/*
  85 * Number of times we'll try and do receives if there's more data. If we
  86 * exceed this limit, then add us to the back of the queue and retry from
  87 * there. This helps fairness between flooding clients.
  88 */
  89#define MULTISHOT_MAX_RETRY	32
  90
  91int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  92{
  93	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
  94
  95	if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
  96		     sqe->buf_index || sqe->splice_fd_in))
  97		return -EINVAL;
  98
  99	shutdown->how = READ_ONCE(sqe->len);
 100	req->flags |= REQ_F_FORCE_ASYNC;
 101	return 0;
 102}
 103
 104int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
 105{
 106	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
 107	struct socket *sock;
 108	int ret;
 109
 110	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
 111
 112	sock = sock_from_file(req->file);
 113	if (unlikely(!sock))
 114		return -ENOTSOCK;
 115
 116	ret = __sys_shutdown_sock(sock, shutdown->how);
 117	io_req_set_res(req, ret, 0);
 118	return IOU_OK;
 119}
 120
 121static bool io_net_retry(struct socket *sock, int flags)
 122{
 123	if (!(flags & MSG_WAITALL))
 124		return false;
 125	return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
 126}
 127
 128static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
 129{
 130	if (kmsg->free_iov) {
 131		kfree(kmsg->free_iov);
 132		kmsg->free_iov_nr = 0;
 133		kmsg->free_iov = NULL;
 134	}
 135}
 136
 137static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
 138{
 139	struct io_async_msghdr *hdr = req->async_data;
 140	struct iovec *iov;
 141
 142	/* can't recycle, ensure we free the iovec if we have one */
 143	if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
 144		io_netmsg_iovec_free(hdr);
 145		return;
 146	}
 147
 148	/* Let normal cleanup path reap it if we fail adding to the cache */
 149	iov = hdr->free_iov;
 150	if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
 151		if (iov)
 152			kasan_mempool_poison_object(iov);
 153		req->async_data = NULL;
 154		req->flags &= ~REQ_F_ASYNC_DATA;
 155	}
 156}
 157
 158static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
 
 159{
 160	struct io_ring_ctx *ctx = req->ctx;
 
 161	struct io_async_msghdr *hdr;
 162
 163	hdr = io_alloc_cache_get(&ctx->netmsg_cache);
 164	if (hdr) {
 165		if (hdr->free_iov) {
 166			kasan_mempool_unpoison_object(hdr->free_iov,
 167				hdr->free_iov_nr * sizeof(struct iovec));
 168			req->flags |= REQ_F_NEED_CLEANUP;
 
 
 169		}
 170		req->flags |= REQ_F_ASYNC_DATA;
 171		req->async_data = hdr;
 172		return hdr;
 173	}
 174
 175	if (!io_alloc_async_data(req)) {
 176		hdr = req->async_data;
 177		hdr->free_iov_nr = 0;
 178		hdr->free_iov = NULL;
 179		return hdr;
 180	}
 181	return NULL;
 182}
 183
 184/* assign new iovec to kmsg, if we need to */
 185static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
 186			     struct iovec *iov)
 187{
 188	if (iov) {
 189		req->flags |= REQ_F_NEED_CLEANUP;
 190		kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs;
 191		if (kmsg->free_iov)
 192			kfree(kmsg->free_iov);
 193		kmsg->free_iov = iov;
 194	}
 195	return 0;
 196}
 197
 198static inline void io_mshot_prep_retry(struct io_kiocb *req,
 199				       struct io_async_msghdr *kmsg)
 
 200{
 201	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 202
 203	req->flags &= ~REQ_F_BL_EMPTY;
 204	sr->done_io = 0;
 205	sr->len = 0; /* get from the provided buffer */
 206	req->buf_index = sr->buf_group;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 207}
 208
 209#ifdef CONFIG_COMPAT
 210static int io_compat_msg_copy_hdr(struct io_kiocb *req,
 211				  struct io_async_msghdr *iomsg,
 212				  struct compat_msghdr *msg, int ddir)
 213{
 214	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 215	struct compat_iovec __user *uiov;
 216	struct iovec *iov;
 217	int ret, nr_segs;
 218
 219	if (iomsg->free_iov) {
 220		nr_segs = iomsg->free_iov_nr;
 221		iov = iomsg->free_iov;
 222	} else {
 223		iov = &iomsg->fast_iov;
 224		nr_segs = 1;
 225	}
 226
 227	if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
 228		return -EFAULT;
 229
 230	uiov = compat_ptr(msg->msg_iov);
 231	if (req->flags & REQ_F_BUFFER_SELECT) {
 232		compat_ssize_t clen;
 233
 
 234		if (msg->msg_iovlen == 0) {
 235			sr->len = iov->iov_len = 0;
 236			iov->iov_base = NULL;
 237		} else if (msg->msg_iovlen > 1) {
 238			return -EINVAL;
 239		} else {
 240			if (!access_ok(uiov, sizeof(*uiov)))
 241				return -EFAULT;
 242			if (__get_user(clen, &uiov->iov_len))
 243				return -EFAULT;
 244			if (clen < 0)
 245				return -EINVAL;
 246			sr->len = clen;
 247		}
 248
 249		return 0;
 250	}
 251
 
 252	ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
 253				nr_segs, &iov, &iomsg->msg.msg_iter, true);
 
 254	if (unlikely(ret < 0))
 255		return ret;
 256
 257	return io_net_vec_assign(req, iomsg, iov);
 258}
 259#endif
 260
 261static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
 262			   struct user_msghdr *msg, int ddir)
 263{
 264	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 265	struct user_msghdr __user *umsg = sr->umsg;
 266	struct iovec *iov;
 267	int ret, nr_segs;
 268
 269	if (iomsg->free_iov) {
 270		nr_segs = iomsg->free_iov_nr;
 271		iov = iomsg->free_iov;
 272	} else {
 273		iov = &iomsg->fast_iov;
 274		nr_segs = 1;
 275	}
 276
 277	if (!user_access_begin(umsg, sizeof(*umsg)))
 278		return -EFAULT;
 279
 280	ret = -EFAULT;
 281	unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
 282	unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
 283	unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
 284	unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
 285	unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
 286	unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
 287	msg->msg_flags = 0;
 288
 289	if (req->flags & REQ_F_BUFFER_SELECT) {
 290		if (msg->msg_iovlen == 0) {
 291			sr->len = iov->iov_len = 0;
 292			iov->iov_base = NULL;
 
 293		} else if (msg->msg_iovlen > 1) {
 294			ret = -EINVAL;
 295			goto ua_end;
 296		} else {
 297			/* we only need the length for provided buffers */
 298			if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
 299				goto ua_end;
 300			unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len,
 301					ua_end);
 302			sr->len = iov->iov_len;
 
 303		}
 304		ret = 0;
 305ua_end:
 306		user_access_end();
 307		return ret;
 308	}
 309
 310	user_access_end();
 311	ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs,
 312				&iov, &iomsg->msg.msg_iter, false);
 
 313	if (unlikely(ret < 0))
 314		return ret;
 315
 316	return io_net_vec_assign(req, iomsg, iov);
 317}
 318
 319static int io_sendmsg_copy_hdr(struct io_kiocb *req,
 320			       struct io_async_msghdr *iomsg)
 321{
 322	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 323	struct user_msghdr msg;
 324	int ret;
 325
 326	iomsg->msg.msg_name = &iomsg->addr;
 327	iomsg->msg.msg_iter.nr_segs = 0;
 328
 329#ifdef CONFIG_COMPAT
 330	if (unlikely(req->ctx->compat)) {
 331		struct compat_msghdr cmsg;
 332
 333		ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
 334		if (unlikely(ret))
 335			return ret;
 336
 337		ret = __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
 338		sr->msg_control = iomsg->msg.msg_control_user;
 339		return ret;
 340	}
 341#endif
 342
 343	ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
 344	if (unlikely(ret))
 345		return ret;
 346
 347	ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
 348
 349	/* save msg_control as sys_sendmsg() overwrites it */
 350	sr->msg_control = iomsg->msg.msg_control_user;
 351	return ret;
 352}
 353
 354void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
 355{
 356	struct io_async_msghdr *io = req->async_data;
 
 
 357
 358	io_netmsg_iovec_free(io);
 
 
 
 
 
 
 
 
 
 359}
 360
 361static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 
 362{
 363	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 364	struct io_async_msghdr *kmsg = req->async_data;
 365	void __user *addr;
 366	u16 addr_len;
 367	int ret;
 368
 369	sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
 370
 371	if (READ_ONCE(sqe->__pad3[0]))
 372		return -EINVAL;
 373
 374	kmsg->msg.msg_name = NULL;
 375	kmsg->msg.msg_namelen = 0;
 376	kmsg->msg.msg_control = NULL;
 377	kmsg->msg.msg_controllen = 0;
 378	kmsg->msg.msg_ubuf = NULL;
 379
 380	addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
 381	addr_len = READ_ONCE(sqe->addr_len);
 382	if (addr) {
 383		ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr);
 384		if (unlikely(ret < 0))
 385			return ret;
 386		kmsg->msg.msg_name = &kmsg->addr;
 387		kmsg->msg.msg_namelen = addr_len;
 388	}
 389	if (!io_do_buffer_select(req)) {
 390		ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
 391				  &kmsg->msg.msg_iter);
 392		if (unlikely(ret < 0))
 393			return ret;
 394	}
 395	return 0;
 396}
 397
 398static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 399{
 400	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 401	struct io_async_msghdr *kmsg = req->async_data;
 402	int ret;
 403
 404	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 405
 406	ret = io_sendmsg_copy_hdr(req, kmsg);
 
 407	if (!ret)
 408		req->flags |= REQ_F_NEED_CLEANUP;
 409	return ret;
 410}
 411
 412#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
 
 
 
 
 
 413
 414int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 415{
 416	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 417
 418	sr->done_io = 0;
 419
 420	if (req->opcode != IORING_OP_SEND) {
 421		if (sqe->addr2 || sqe->file_index)
 422			return -EINVAL;
 
 
 
 
 423	}
 424
 
 425	sr->len = READ_ONCE(sqe->len);
 426	sr->flags = READ_ONCE(sqe->ioprio);
 427	if (sr->flags & ~SENDMSG_FLAGS)
 428		return -EINVAL;
 429	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 430	if (sr->msg_flags & MSG_DONTWAIT)
 431		req->flags |= REQ_F_NOWAIT;
 432	if (sr->flags & IORING_RECVSEND_BUNDLE) {
 433		if (req->opcode == IORING_OP_SENDMSG)
 434			return -EINVAL;
 435		if (!(req->flags & REQ_F_BUFFER_SELECT))
 436			return -EINVAL;
 437		sr->msg_flags |= MSG_WAITALL;
 438		sr->buf_group = req->buf_index;
 439		req->buf_list = NULL;
 440	}
 441
 442#ifdef CONFIG_COMPAT
 443	if (req->ctx->compat)
 444		sr->msg_flags |= MSG_CMSG_COMPAT;
 445#endif
 446	if (unlikely(!io_msg_alloc_async(req)))
 447		return -ENOMEM;
 448	if (req->opcode != IORING_OP_SENDMSG)
 449		return io_send_setup(req, sqe);
 450	return io_sendmsg_setup(req, sqe);
 451}
 452
 453static void io_req_msg_cleanup(struct io_kiocb *req,
 
 454			       unsigned int issue_flags)
 455{
 456	req->flags &= ~REQ_F_NEED_CLEANUP;
 
 
 
 457	io_netmsg_recycle(req, issue_flags);
 458}
 459
 460/*
 461 * For bundle completions, we need to figure out how many segments we consumed.
 462 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
 463 * could be using an ITER_IOVEC. If the latter, then if we consumed all of
 464 * the segments, then it's a trivial questiont o answer. If we have residual
 465 * data in the iter, then loop the segments to figure out how much we
 466 * transferred.
 467 */
 468static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
 469{
 470	struct iovec *iov;
 471	int nbufs;
 472
 473	/* no data is always zero segments, and a ubuf is always 1 segment */
 474	if (ret <= 0)
 475		return 0;
 476	if (iter_is_ubuf(&kmsg->msg.msg_iter))
 477		return 1;
 478
 479	iov = kmsg->free_iov;
 480	if (!iov)
 481		iov = &kmsg->fast_iov;
 482
 483	/* if all data was transferred, it's basic pointer math */
 484	if (!iov_iter_count(&kmsg->msg.msg_iter))
 485		return iter_iov(&kmsg->msg.msg_iter) - iov;
 486
 487	/* short transfer, count segments */
 488	nbufs = 0;
 489	do {
 490		int this_len = min_t(int, iov[nbufs].iov_len, ret);
 491
 492		nbufs++;
 493		ret -= this_len;
 494	} while (ret);
 495
 496	return nbufs;
 497}
 498
 499static inline bool io_send_finish(struct io_kiocb *req, int *ret,
 500				  struct io_async_msghdr *kmsg,
 501				  unsigned issue_flags)
 502{
 503	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 504	bool bundle_finished = *ret <= 0;
 505	unsigned int cflags;
 506
 507	if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
 508		cflags = io_put_kbuf(req, *ret, issue_flags);
 509		goto finish;
 510	}
 511
 512	cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags);
 513
 514	if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
 515		goto finish;
 516
 517	/*
 518	 * Fill CQE for this receive and see if we should keep trying to
 519	 * receive from this socket.
 520	 */
 521	if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
 522		io_mshot_prep_retry(req, kmsg);
 523		return false;
 524	}
 525
 526	/* Otherwise stop bundle and use the current result. */
 527finish:
 528	io_req_set_res(req, *ret, cflags);
 529	*ret = IOU_OK;
 530	return true;
 531}
 532
 533int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 534{
 535	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 536	struct io_async_msghdr *kmsg = req->async_data;
 537	struct socket *sock;
 538	unsigned flags;
 539	int min_ret = 0;
 540	int ret;
 541
 542	sock = sock_from_file(req->file);
 543	if (unlikely(!sock))
 544		return -ENOTSOCK;
 545
 
 
 
 
 
 
 
 
 
 
 546	if (!(req->flags & REQ_F_POLLED) &&
 547	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 548		return -EAGAIN;
 549
 550	flags = sr->msg_flags;
 551	if (issue_flags & IO_URING_F_NONBLOCK)
 552		flags |= MSG_DONTWAIT;
 553	if (flags & MSG_WAITALL)
 554		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 555
 556	kmsg->msg.msg_control_user = sr->msg_control;
 557
 558	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
 559
 560	if (ret < min_ret) {
 561		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
 562			return -EAGAIN;
 563		if (ret > 0 && io_net_retry(sock, flags)) {
 564			kmsg->msg.msg_controllen = 0;
 565			kmsg->msg.msg_control = NULL;
 566			sr->done_io += ret;
 567			req->flags |= REQ_F_BL_NO_RECYCLE;
 568			return -EAGAIN;
 569		}
 570		if (ret == -ERESTARTSYS)
 571			ret = -EINTR;
 572		req_set_fail(req);
 573	}
 574	io_req_msg_cleanup(req, issue_flags);
 575	if (ret >= 0)
 576		ret += sr->done_io;
 577	else if (sr->done_io)
 578		ret = sr->done_io;
 579	io_req_set_res(req, ret, 0);
 580	return IOU_OK;
 581}
 582
 583int io_send(struct io_kiocb *req, unsigned int issue_flags)
 584{
 
 585	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 586	struct io_async_msghdr *kmsg = req->async_data;
 587	struct socket *sock;
 588	unsigned flags;
 589	int min_ret = 0;
 590	int ret;
 591
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 592	sock = sock_from_file(req->file);
 593	if (unlikely(!sock))
 594		return -ENOTSOCK;
 595
 596	if (!(req->flags & REQ_F_POLLED) &&
 597	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 598		return -EAGAIN;
 599
 600	flags = sr->msg_flags;
 601	if (issue_flags & IO_URING_F_NONBLOCK)
 602		flags |= MSG_DONTWAIT;
 603
 604retry_bundle:
 605	if (io_do_buffer_select(req)) {
 606		struct buf_sel_arg arg = {
 607			.iovs = &kmsg->fast_iov,
 608			.max_len = min_not_zero(sr->len, INT_MAX),
 609			.nr_iovs = 1,
 610		};
 611
 612		if (kmsg->free_iov) {
 613			arg.nr_iovs = kmsg->free_iov_nr;
 614			arg.iovs = kmsg->free_iov;
 615			arg.mode = KBUF_MODE_FREE;
 616		}
 617
 618		if (!(sr->flags & IORING_RECVSEND_BUNDLE))
 619			arg.nr_iovs = 1;
 620		else
 621			arg.mode |= KBUF_MODE_EXPAND;
 622
 623		ret = io_buffers_select(req, &arg, issue_flags);
 624		if (unlikely(ret < 0))
 625			return ret;
 626
 627		if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
 628			kmsg->free_iov_nr = ret;
 629			kmsg->free_iov = arg.iovs;
 630			req->flags |= REQ_F_NEED_CLEANUP;
 631		}
 632		sr->len = arg.out_len;
 633
 634		if (ret == 1) {
 635			sr->buf = arg.iovs[0].iov_base;
 636			ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
 637						&kmsg->msg.msg_iter);
 638			if (unlikely(ret))
 639				return ret;
 640		} else {
 641			iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
 642					arg.iovs, ret, arg.out_len);
 643		}
 644	}
 645
 646	/*
 647	 * If MSG_WAITALL is set, or this is a bundle send, then we need
 648	 * the full amount. If just bundle is set, if we do a short send
 649	 * then we complete the bundle sequence rather than continue on.
 650	 */
 651	if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
 652		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 653
 654	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 655	kmsg->msg.msg_flags = flags;
 656	ret = sock_sendmsg(sock, &kmsg->msg);
 657	if (ret < min_ret) {
 658		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
 659			return -EAGAIN;
 660
 661		if (ret > 0 && io_net_retry(sock, flags)) {
 662			sr->len -= ret;
 663			sr->buf += ret;
 664			sr->done_io += ret;
 665			req->flags |= REQ_F_BL_NO_RECYCLE;
 666			return -EAGAIN;
 667		}
 668		if (ret == -ERESTARTSYS)
 669			ret = -EINTR;
 670		req_set_fail(req);
 671	}
 672	if (ret >= 0)
 673		ret += sr->done_io;
 674	else if (sr->done_io)
 675		ret = sr->done_io;
 676
 677	if (!io_send_finish(req, &ret, kmsg, issue_flags))
 678		goto retry_bundle;
 679
 680	io_req_msg_cleanup(req, issue_flags);
 681	return ret;
 682}
 683
 684static int io_recvmsg_mshot_prep(struct io_kiocb *req,
 685				 struct io_async_msghdr *iomsg,
 686				 int namelen, size_t controllen)
 687{
 688	if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
 689			  (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
 690		int hdr;
 691
 692		if (unlikely(namelen < 0))
 693			return -EOVERFLOW;
 694		if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
 695					namelen, &hdr))
 696			return -EOVERFLOW;
 697		if (check_add_overflow(hdr, controllen, &hdr))
 698			return -EOVERFLOW;
 699
 700		iomsg->namelen = namelen;
 701		iomsg->controllen = controllen;
 702		return 0;
 703	}
 704
 705	return 0;
 706}
 707
 708static int io_recvmsg_copy_hdr(struct io_kiocb *req,
 709			       struct io_async_msghdr *iomsg)
 710{
 711	struct user_msghdr msg;
 712	int ret;
 713
 714	iomsg->msg.msg_name = &iomsg->addr;
 715	iomsg->msg.msg_iter.nr_segs = 0;
 716
 717#ifdef CONFIG_COMPAT
 718	if (unlikely(req->ctx->compat)) {
 719		struct compat_msghdr cmsg;
 720
 721		ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
 722		if (unlikely(ret))
 723			return ret;
 724
 725		ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
 726		if (unlikely(ret))
 727			return ret;
 728
 729		return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
 730						cmsg.msg_controllen);
 731	}
 732#endif
 733
 734	ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
 735	if (unlikely(ret))
 736		return ret;
 737
 738	ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
 739	if (unlikely(ret))
 740		return ret;
 741
 742	return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
 743					msg.msg_controllen);
 744}
 745
 746static int io_recvmsg_prep_setup(struct io_kiocb *req)
 747{
 748	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 749	struct io_async_msghdr *kmsg;
 750	int ret;
 751
 752	kmsg = io_msg_alloc_async(req);
 753	if (unlikely(!kmsg))
 754		return -ENOMEM;
 755
 756	if (req->opcode == IORING_OP_RECV) {
 757		kmsg->msg.msg_name = NULL;
 758		kmsg->msg.msg_namelen = 0;
 759		kmsg->msg.msg_inq = 0;
 760		kmsg->msg.msg_control = NULL;
 761		kmsg->msg.msg_get_inq = 1;
 762		kmsg->msg.msg_controllen = 0;
 763		kmsg->msg.msg_iocb = NULL;
 764		kmsg->msg.msg_ubuf = NULL;
 765
 766		if (!io_do_buffer_select(req)) {
 767			ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
 768					  &kmsg->msg.msg_iter);
 769			if (unlikely(ret))
 770				return ret;
 771		}
 772		return 0;
 773	}
 774
 775	ret = io_recvmsg_copy_hdr(req, kmsg);
 776	if (!ret)
 777		req->flags |= REQ_F_NEED_CLEANUP;
 778	return ret;
 779}
 780
 781#define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
 782			IORING_RECVSEND_BUNDLE)
 783
 784int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 785{
 786	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 787
 788	sr->done_io = 0;
 789
 790	if (unlikely(sqe->file_index || sqe->addr2))
 791		return -EINVAL;
 792
 793	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 794	sr->len = READ_ONCE(sqe->len);
 795	sr->flags = READ_ONCE(sqe->ioprio);
 796	if (sr->flags & ~RECVMSG_FLAGS)
 797		return -EINVAL;
 798	sr->msg_flags = READ_ONCE(sqe->msg_flags);
 799	if (sr->msg_flags & MSG_DONTWAIT)
 800		req->flags |= REQ_F_NOWAIT;
 801	if (sr->msg_flags & MSG_ERRQUEUE)
 802		req->flags |= REQ_F_CLEAR_POLLIN;
 803	if (req->flags & REQ_F_BUFFER_SELECT) {
 
 
 
 
 
 
 
 804		/*
 805		 * Store the buffer group for this multishot receive separately,
 806		 * as if we end up doing an io-wq based issue that selects a
 807		 * buffer, it has to be committed immediately and that will
 808		 * clear ->buf_list. This means we lose the link to the buffer
 809		 * list, and the eventual buffer put on completion then cannot
 810		 * restore it.
 811		 */
 812		sr->buf_group = req->buf_index;
 813		req->buf_list = NULL;
 814	}
 815	if (sr->flags & IORING_RECV_MULTISHOT) {
 816		if (!(req->flags & REQ_F_BUFFER_SELECT))
 817			return -EINVAL;
 818		if (sr->msg_flags & MSG_WAITALL)
 819			return -EINVAL;
 820		if (req->opcode == IORING_OP_RECV && sr->len)
 821			return -EINVAL;
 822		req->flags |= REQ_F_APOLL_MULTISHOT;
 823	}
 824	if (sr->flags & IORING_RECVSEND_BUNDLE) {
 825		if (req->opcode == IORING_OP_RECVMSG)
 826			return -EINVAL;
 827	}
 828
 829#ifdef CONFIG_COMPAT
 830	if (req->ctx->compat)
 831		sr->msg_flags |= MSG_CMSG_COMPAT;
 832#endif
 833	sr->nr_multishot_loops = 0;
 834	return io_recvmsg_prep_setup(req);
 
 
 
 
 
 
 
 
 
 
 835}
 836
 837/*
 838 * Finishes io_recv and io_recvmsg.
 839 *
 840 * Returns true if it is actually finished, or false if it should run
 841 * again (for multishot).
 842 */
 843static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
 844				  struct io_async_msghdr *kmsg,
 845				  bool mshot_finished, unsigned issue_flags)
 846{
 847	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 848	unsigned int cflags = 0;
 849
 850	if (kmsg->msg.msg_inq > 0)
 
 851		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
 852
 853	if (sr->flags & IORING_RECVSEND_BUNDLE) {
 854		cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
 855				      issue_flags);
 856		/* bundle with no more immediate buffers, we're done */
 857		if (req->flags & REQ_F_BL_EMPTY)
 858			goto finish;
 859	} else {
 860		cflags |= io_put_kbuf(req, *ret, issue_flags);
 861	}
 862
 863	/*
 864	 * Fill CQE for this receive and see if we should keep trying to
 865	 * receive from this socket.
 866	 */
 867	if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
 868	    io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
 
 
 869		int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
 870
 871		io_mshot_prep_retry(req, kmsg);
 872		/* Known not-empty or unknown state, retry */
 873		if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
 874			if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
 875				return false;
 876			/* mshot retries exceeded, force a requeue */
 877			sr->nr_multishot_loops = 0;
 878			mshot_retry_ret = IOU_REQUEUE;
 879		}
 880		if (issue_flags & IO_URING_F_MULTISHOT)
 881			*ret = mshot_retry_ret;
 882		else
 883			*ret = -EAGAIN;
 884		return true;
 885	}
 886
 887	/* Finish the request / stop multishot. */
 888finish:
 889	io_req_set_res(req, *ret, cflags);
 890
 891	if (issue_flags & IO_URING_F_MULTISHOT)
 892		*ret = IOU_STOP_MULTISHOT;
 893	else
 894		*ret = IOU_OK;
 895	io_req_msg_cleanup(req, issue_flags);
 896	return true;
 897}
 898
 899static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
 900				     struct io_sr_msg *sr, void __user **buf,
 901				     size_t *len)
 902{
 903	unsigned long ubuf = (unsigned long) *buf;
 904	unsigned long hdr;
 905
 906	hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
 907		kmsg->controllen;
 908	if (*len < hdr)
 909		return -EFAULT;
 910
 911	if (kmsg->controllen) {
 912		unsigned long control = ubuf + hdr - kmsg->controllen;
 913
 914		kmsg->msg.msg_control_user = (void __user *) control;
 915		kmsg->msg.msg_controllen = kmsg->controllen;
 916	}
 917
 918	sr->buf = *buf; /* stash for later copy */
 919	*buf = (void __user *) (ubuf + hdr);
 920	kmsg->payloadlen = *len = *len - hdr;
 921	return 0;
 922}
 923
 924struct io_recvmsg_multishot_hdr {
 925	struct io_uring_recvmsg_out msg;
 926	struct sockaddr_storage addr;
 927};
 928
 929static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
 930				struct io_async_msghdr *kmsg,
 931				unsigned int flags, bool *finished)
 932{
 933	int err;
 934	int copy_len;
 935	struct io_recvmsg_multishot_hdr hdr;
 936
 937	if (kmsg->namelen)
 938		kmsg->msg.msg_name = &hdr.addr;
 939	kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
 940	kmsg->msg.msg_namelen = 0;
 941
 942	if (sock->file->f_flags & O_NONBLOCK)
 943		flags |= MSG_DONTWAIT;
 944
 945	err = sock_recvmsg(sock, &kmsg->msg, flags);
 946	*finished = err <= 0;
 947	if (err < 0)
 948		return err;
 949
 950	hdr.msg = (struct io_uring_recvmsg_out) {
 951		.controllen = kmsg->controllen - kmsg->msg.msg_controllen,
 952		.flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
 953	};
 954
 955	hdr.msg.payloadlen = err;
 956	if (err > kmsg->payloadlen)
 957		err = kmsg->payloadlen;
 958
 959	copy_len = sizeof(struct io_uring_recvmsg_out);
 960	if (kmsg->msg.msg_namelen > kmsg->namelen)
 961		copy_len += kmsg->namelen;
 962	else
 963		copy_len += kmsg->msg.msg_namelen;
 964
 965	/*
 966	 *      "fromlen shall refer to the value before truncation.."
 967	 *                      1003.1g
 968	 */
 969	hdr.msg.namelen = kmsg->msg.msg_namelen;
 970
 971	/* ensure that there is no gap between hdr and sockaddr_storage */
 972	BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
 973		     sizeof(struct io_uring_recvmsg_out));
 974	if (copy_to_user(io->buf, &hdr, copy_len)) {
 975		*finished = true;
 976		return -EFAULT;
 977	}
 978
 979	return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
 980			kmsg->controllen + err;
 981}
 982
 983int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 984{
 985	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 986	struct io_async_msghdr *kmsg = req->async_data;
 987	struct socket *sock;
 988	unsigned flags;
 989	int ret, min_ret = 0;
 990	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 991	bool mshot_finished = true;
 992
 993	sock = sock_from_file(req->file);
 994	if (unlikely(!sock))
 995		return -ENOTSOCK;
 996
 
 
 
 
 
 
 
 
 
 997	if (!(req->flags & REQ_F_POLLED) &&
 998	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 999		return -EAGAIN;
1000
1001	flags = sr->msg_flags;
1002	if (force_nonblock)
1003		flags |= MSG_DONTWAIT;
1004
1005retry_multishot:
1006	if (io_do_buffer_select(req)) {
1007		void __user *buf;
1008		size_t len = sr->len;
1009
1010		buf = io_buffer_select(req, &len, issue_flags);
1011		if (!buf)
1012			return -ENOBUFS;
1013
1014		if (req->flags & REQ_F_APOLL_MULTISHOT) {
1015			ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
1016			if (ret) {
1017				io_kbuf_recycle(req, issue_flags);
1018				return ret;
1019			}
1020		}
1021
1022		iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
1023	}
1024
1025	kmsg->msg.msg_get_inq = 1;
1026	kmsg->msg.msg_inq = -1;
1027	if (req->flags & REQ_F_APOLL_MULTISHOT) {
1028		ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
1029					   &mshot_finished);
1030	} else {
1031		/* disable partial retry for recvmsg with cmsg attached */
1032		if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
1033			min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1034
1035		ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
1036					 kmsg->uaddr, flags);
1037	}
1038
1039	if (ret < min_ret) {
1040		if (ret == -EAGAIN && force_nonblock) {
1041			if (issue_flags & IO_URING_F_MULTISHOT) {
 
1042				io_kbuf_recycle(req, issue_flags);
1043				return IOU_ISSUE_SKIP_COMPLETE;
1044			}
1045			return -EAGAIN;
1046		}
1047		if (ret > 0 && io_net_retry(sock, flags)) {
1048			sr->done_io += ret;
1049			req->flags |= REQ_F_BL_NO_RECYCLE;
1050			return -EAGAIN;
1051		}
1052		if (ret == -ERESTARTSYS)
1053			ret = -EINTR;
1054		req_set_fail(req);
1055	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1056		req_set_fail(req);
1057	}
1058
1059	if (ret > 0)
1060		ret += sr->done_io;
1061	else if (sr->done_io)
1062		ret = sr->done_io;
1063	else
1064		io_kbuf_recycle(req, issue_flags);
1065
1066	if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
1067		goto retry_multishot;
1068
1069	return ret;
1070}
1071
1072static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
1073			      size_t *len, unsigned int issue_flags)
1074{
1075	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1076	int ret;
1077
1078	/*
1079	 * If the ring isn't locked, then don't use the peek interface
1080	 * to grab multiple buffers as we will lock/unlock between
1081	 * this selection and posting the buffers.
1082	 */
1083	if (!(issue_flags & IO_URING_F_UNLOCKED) &&
1084	    sr->flags & IORING_RECVSEND_BUNDLE) {
1085		struct buf_sel_arg arg = {
1086			.iovs = &kmsg->fast_iov,
1087			.nr_iovs = 1,
1088			.mode = KBUF_MODE_EXPAND,
1089		};
1090
1091		if (kmsg->free_iov) {
1092			arg.nr_iovs = kmsg->free_iov_nr;
1093			arg.iovs = kmsg->free_iov;
1094			arg.mode |= KBUF_MODE_FREE;
1095		}
1096
1097		if (kmsg->msg.msg_inq > 0)
1098			arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
1099
1100		ret = io_buffers_peek(req, &arg);
1101		if (unlikely(ret < 0))
1102			return ret;
1103
1104		/* special case 1 vec, can be a fast path */
1105		if (ret == 1) {
1106			sr->buf = arg.iovs[0].iov_base;
1107			sr->len = arg.iovs[0].iov_len;
1108			goto map_ubuf;
1109		}
1110		iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
1111				arg.out_len);
1112		if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
1113			kmsg->free_iov_nr = ret;
1114			kmsg->free_iov = arg.iovs;
1115			req->flags |= REQ_F_NEED_CLEANUP;
1116		}
1117	} else {
1118		void __user *buf;
1119
1120		*len = sr->len;
1121		buf = io_buffer_select(req, len, issue_flags);
1122		if (!buf)
1123			return -ENOBUFS;
1124		sr->buf = buf;
1125		sr->len = *len;
1126map_ubuf:
1127		ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
1128				  &kmsg->msg.msg_iter);
1129		if (unlikely(ret))
1130			return ret;
1131	}
1132
1133	return 0;
1134}
1135
1136int io_recv(struct io_kiocb *req, unsigned int issue_flags)
1137{
1138	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1139	struct io_async_msghdr *kmsg = req->async_data;
1140	struct socket *sock;
1141	unsigned flags;
1142	int ret, min_ret = 0;
1143	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1144	size_t len = sr->len;
1145	bool mshot_finished;
1146
1147	if (!(req->flags & REQ_F_POLLED) &&
1148	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
1149		return -EAGAIN;
1150
1151	sock = sock_from_file(req->file);
1152	if (unlikely(!sock))
1153		return -ENOTSOCK;
1154
 
 
 
 
 
 
 
 
1155	flags = sr->msg_flags;
1156	if (force_nonblock)
1157		flags |= MSG_DONTWAIT;
1158
1159retry_multishot:
1160	if (io_do_buffer_select(req)) {
1161		ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
1162		if (unlikely(ret)) {
1163			kmsg->msg.msg_inq = -1;
1164			goto out_free;
1165		}
1166		sr->buf = NULL;
 
1167	}
1168
1169	kmsg->msg.msg_flags = 0;
1170	kmsg->msg.msg_inq = -1;
 
 
 
 
1171
1172	if (flags & MSG_WAITALL)
1173		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1174
1175	ret = sock_recvmsg(sock, &kmsg->msg, flags);
1176	if (ret < min_ret) {
1177		if (ret == -EAGAIN && force_nonblock) {
1178			if (issue_flags & IO_URING_F_MULTISHOT) {
1179				io_kbuf_recycle(req, issue_flags);
1180				return IOU_ISSUE_SKIP_COMPLETE;
1181			}
1182
1183			return -EAGAIN;
1184		}
1185		if (ret > 0 && io_net_retry(sock, flags)) {
1186			sr->len -= ret;
1187			sr->buf += ret;
1188			sr->done_io += ret;
1189			req->flags |= REQ_F_BL_NO_RECYCLE;
1190			return -EAGAIN;
1191		}
1192		if (ret == -ERESTARTSYS)
1193			ret = -EINTR;
1194		req_set_fail(req);
1195	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1196out_free:
1197		req_set_fail(req);
1198	}
1199
1200	mshot_finished = ret <= 0;
1201	if (ret > 0)
1202		ret += sr->done_io;
1203	else if (sr->done_io)
1204		ret = sr->done_io;
1205	else
1206		io_kbuf_recycle(req, issue_flags);
1207
1208	if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
1209		goto retry_multishot;
1210
1211	return ret;
1212}
1213
1214void io_send_zc_cleanup(struct io_kiocb *req)
1215{
1216	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1217	struct io_async_msghdr *io = req->async_data;
1218
1219	if (req_has_async_data(req))
1220		io_netmsg_iovec_free(io);
 
 
 
 
1221	if (zc->notif) {
1222		io_notif_flush(zc->notif);
1223		zc->notif = NULL;
1224	}
1225}
1226
1227#define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1228#define IO_ZC_FLAGS_VALID  (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
1229
1230int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1231{
1232	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1233	struct io_ring_ctx *ctx = req->ctx;
1234	struct io_kiocb *notif;
1235
1236	zc->done_io = 0;
1237	req->flags |= REQ_F_POLL_NO_LAZY;
1238
1239	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1240		return -EINVAL;
1241	/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1242	if (req->flags & REQ_F_CQE_SKIP)
1243		return -EINVAL;
1244
1245	notif = zc->notif = io_alloc_notif(ctx);
1246	if (!notif)
1247		return -ENOMEM;
1248	notif->cqe.user_data = req->cqe.user_data;
1249	notif->cqe.res = 0;
1250	notif->cqe.flags = IORING_CQE_F_NOTIF;
1251	req->flags |= REQ_F_NEED_CLEANUP;
1252
1253	zc->flags = READ_ONCE(sqe->ioprio);
1254	if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1255		if (zc->flags & ~IO_ZC_FLAGS_VALID)
1256			return -EINVAL;
1257		if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1258			struct io_notif_data *nd = io_notif_to_data(notif);
1259
1260			nd->zc_report = true;
1261			nd->zc_used = false;
1262			nd->zc_copied = false;
1263		}
1264	}
1265
1266	if (req->opcode != IORING_OP_SEND_ZC) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1267		if (unlikely(sqe->addr2 || sqe->file_index))
1268			return -EINVAL;
1269		if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
1270			return -EINVAL;
1271	}
1272
 
1273	zc->len = READ_ONCE(sqe->len);
1274	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
1275	zc->buf_index = READ_ONCE(sqe->buf_index);
1276	if (zc->msg_flags & MSG_DONTWAIT)
1277		req->flags |= REQ_F_NOWAIT;
1278
1279#ifdef CONFIG_COMPAT
1280	if (req->ctx->compat)
1281		zc->msg_flags |= MSG_CMSG_COMPAT;
1282#endif
1283	if (unlikely(!io_msg_alloc_async(req)))
1284		return -ENOMEM;
1285	if (req->opcode != IORING_OP_SENDMSG_ZC)
1286		return io_send_setup(req, sqe);
1287	return io_sendmsg_setup(req, sqe);
1288}
1289
1290static int io_sg_from_iter_iovec(struct sk_buff *skb,
1291				 struct iov_iter *from, size_t length)
1292{
1293	skb_zcopy_downgrade_managed(skb);
1294	return zerocopy_fill_skb_from_iter(skb, from, length);
1295}
1296
1297static int io_sg_from_iter(struct sk_buff *skb,
1298			   struct iov_iter *from, size_t length)
1299{
1300	struct skb_shared_info *shinfo = skb_shinfo(skb);
1301	int frag = shinfo->nr_frags;
1302	int ret = 0;
1303	struct bvec_iter bi;
1304	ssize_t copied = 0;
1305	unsigned long truesize = 0;
1306
1307	if (!frag)
1308		shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1309	else if (unlikely(!skb_zcopy_managed(skb)))
1310		return zerocopy_fill_skb_from_iter(skb, from, length);
1311
1312	bi.bi_size = min(from->count, length);
1313	bi.bi_bvec_done = from->iov_offset;
1314	bi.bi_idx = 0;
1315
1316	while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1317		struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1318
1319		copied += v.bv_len;
1320		truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1321		__skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1322					   v.bv_offset, v.bv_len);
1323		bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1324	}
1325	if (bi.bi_size)
1326		ret = -EMSGSIZE;
1327
1328	shinfo->nr_frags = frag;
1329	from->bvec += bi.bi_idx;
1330	from->nr_segs -= bi.bi_idx;
1331	from->count -= copied;
1332	from->iov_offset = bi.bi_bvec_done;
1333
1334	skb->data_len += copied;
1335	skb->len += copied;
1336	skb->truesize += truesize;
1337	return ret;
1338}
1339
1340static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
1341{
1342	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1343	struct io_async_msghdr *kmsg = req->async_data;
1344	int ret;
1345
1346	if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
1347		struct io_ring_ctx *ctx = req->ctx;
1348		struct io_rsrc_node *node;
1349
1350		ret = -EFAULT;
1351		io_ring_submit_lock(ctx, issue_flags);
1352		node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index);
1353		if (node) {
1354			io_req_assign_buf_node(sr->notif, node);
1355			ret = 0;
1356		}
1357		io_ring_submit_unlock(ctx, issue_flags);
1358
1359		if (unlikely(ret))
1360			return ret;
1361
1362		ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter,
1363					node->buf, (u64)(uintptr_t)sr->buf,
1364					sr->len);
1365		if (unlikely(ret))
1366			return ret;
1367		kmsg->msg.sg_from_iter = io_sg_from_iter;
1368	} else {
1369		ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
1370		if (unlikely(ret))
1371			return ret;
1372		ret = io_notif_account_mem(sr->notif, sr->len);
1373		if (unlikely(ret))
1374			return ret;
1375		kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1376	}
1377
1378	return ret;
1379}
1380
1381int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1382{
 
1383	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1384	struct io_async_msghdr *kmsg = req->async_data;
1385	struct socket *sock;
1386	unsigned msg_flags;
1387	int ret, min_ret = 0;
1388
1389	sock = sock_from_file(req->file);
1390	if (unlikely(!sock))
1391		return -ENOTSOCK;
1392	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1393		return -EOPNOTSUPP;
1394
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1395	if (!(req->flags & REQ_F_POLLED) &&
1396	    (zc->flags & IORING_RECVSEND_POLL_FIRST))
1397		return -EAGAIN;
1398
1399	if (!zc->done_io) {
1400		ret = io_send_zc_import(req, issue_flags);
 
1401		if (unlikely(ret))
1402			return ret;
 
 
 
 
 
 
 
 
 
 
1403	}
1404
1405	msg_flags = zc->msg_flags;
1406	if (issue_flags & IO_URING_F_NONBLOCK)
1407		msg_flags |= MSG_DONTWAIT;
1408	if (msg_flags & MSG_WAITALL)
1409		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1410	msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1411
1412	kmsg->msg.msg_flags = msg_flags;
1413	kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1414	ret = sock_sendmsg(sock, &kmsg->msg);
1415
1416	if (unlikely(ret < min_ret)) {
1417		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1418			return -EAGAIN;
1419
1420		if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) {
1421			zc->len -= ret;
1422			zc->buf += ret;
1423			zc->done_io += ret;
1424			req->flags |= REQ_F_BL_NO_RECYCLE;
1425			return -EAGAIN;
1426		}
1427		if (ret == -ERESTARTSYS)
1428			ret = -EINTR;
1429		req_set_fail(req);
1430	}
1431
1432	if (ret >= 0)
1433		ret += zc->done_io;
1434	else if (zc->done_io)
1435		ret = zc->done_io;
1436
1437	/*
1438	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1439	 * flushing notif to io_send_zc_cleanup()
1440	 */
1441	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1442		io_notif_flush(zc->notif);
1443		io_req_msg_cleanup(req, 0);
1444	}
1445	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1446	return IOU_OK;
1447}
1448
1449int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1450{
1451	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1452	struct io_async_msghdr *kmsg = req->async_data;
1453	struct socket *sock;
1454	unsigned flags;
1455	int ret, min_ret = 0;
1456
 
 
1457	sock = sock_from_file(req->file);
1458	if (unlikely(!sock))
1459		return -ENOTSOCK;
1460	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1461		return -EOPNOTSUPP;
1462
 
 
 
 
 
 
 
 
 
 
1463	if (!(req->flags & REQ_F_POLLED) &&
1464	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
1465		return -EAGAIN;
1466
1467	flags = sr->msg_flags;
1468	if (issue_flags & IO_URING_F_NONBLOCK)
1469		flags |= MSG_DONTWAIT;
1470	if (flags & MSG_WAITALL)
1471		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1472
1473	kmsg->msg.msg_control_user = sr->msg_control;
1474	kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1475	kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1476	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1477
1478	if (unlikely(ret < min_ret)) {
1479		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1480			return -EAGAIN;
1481
1482		if (ret > 0 && io_net_retry(sock, flags)) {
1483			sr->done_io += ret;
1484			req->flags |= REQ_F_BL_NO_RECYCLE;
1485			return -EAGAIN;
1486		}
1487		if (ret == -ERESTARTSYS)
1488			ret = -EINTR;
1489		req_set_fail(req);
1490	}
 
 
 
 
 
1491
 
1492	if (ret >= 0)
1493		ret += sr->done_io;
1494	else if (sr->done_io)
1495		ret = sr->done_io;
1496
1497	/*
1498	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1499	 * flushing notif to io_send_zc_cleanup()
1500	 */
1501	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1502		io_notif_flush(sr->notif);
1503		io_req_msg_cleanup(req, 0);
1504	}
1505	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1506	return IOU_OK;
1507}
1508
1509void io_sendrecv_fail(struct io_kiocb *req)
1510{
1511	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1512
1513	if (sr->done_io)
1514		req->cqe.res = sr->done_io;
1515
1516	if ((req->flags & REQ_F_NEED_CLEANUP) &&
1517	    (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1518		req->cqe.flags |= IORING_CQE_F_MORE;
1519}
1520
1521#define ACCEPT_FLAGS	(IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1522			 IORING_ACCEPT_POLL_FIRST)
1523
1524int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1525{
1526	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
 
1527
1528	if (sqe->len || sqe->buf_index)
1529		return -EINVAL;
1530
1531	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1532	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1533	accept->flags = READ_ONCE(sqe->accept_flags);
1534	accept->nofile = rlimit(RLIMIT_NOFILE);
1535	accept->iou_flags = READ_ONCE(sqe->ioprio);
1536	if (accept->iou_flags & ~ACCEPT_FLAGS)
1537		return -EINVAL;
1538
1539	accept->file_slot = READ_ONCE(sqe->file_index);
1540	if (accept->file_slot) {
1541		if (accept->flags & SOCK_CLOEXEC)
1542			return -EINVAL;
1543		if (accept->iou_flags & IORING_ACCEPT_MULTISHOT &&
1544		    accept->file_slot != IORING_FILE_INDEX_ALLOC)
1545			return -EINVAL;
1546	}
1547	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1548		return -EINVAL;
1549	if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1550		accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1551	if (accept->iou_flags & IORING_ACCEPT_MULTISHOT)
1552		req->flags |= REQ_F_APOLL_MULTISHOT;
1553	if (accept->iou_flags & IORING_ACCEPT_DONTWAIT)
1554		req->flags |= REQ_F_NOWAIT;
1555	return 0;
1556}
1557
1558int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1559{
1560	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1561	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 
1562	bool fixed = !!accept->file_slot;
1563	struct proto_accept_arg arg = {
1564		.flags = force_nonblock ? O_NONBLOCK : 0,
1565	};
1566	struct file *file;
1567	unsigned cflags;
1568	int ret, fd;
1569
1570	if (!(req->flags & REQ_F_POLLED) &&
1571	    accept->iou_flags & IORING_ACCEPT_POLL_FIRST)
1572		return -EAGAIN;
1573
1574retry:
1575	if (!fixed) {
1576		fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1577		if (unlikely(fd < 0))
1578			return fd;
1579	}
1580	arg.err = 0;
1581	arg.is_empty = -1;
1582	file = do_accept(req->file, &arg, accept->addr, accept->addr_len,
1583			 accept->flags);
1584	if (IS_ERR(file)) {
1585		if (!fixed)
1586			put_unused_fd(fd);
1587		ret = PTR_ERR(file);
1588		if (ret == -EAGAIN && force_nonblock &&
1589		    !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) {
1590			/*
1591			 * if it's multishot and polled, we don't need to
1592			 * return EAGAIN to arm the poll infra since it
1593			 * has already been done
1594			 */
1595			if (issue_flags & IO_URING_F_MULTISHOT)
1596				return IOU_ISSUE_SKIP_COMPLETE;
1597			return ret;
1598		}
1599		if (ret == -ERESTARTSYS)
1600			ret = -EINTR;
1601		req_set_fail(req);
1602	} else if (!fixed) {
1603		fd_install(fd, file);
1604		ret = fd;
1605	} else {
1606		ret = io_fixed_fd_install(req, issue_flags, file,
1607						accept->file_slot);
1608	}
1609
1610	cflags = 0;
1611	if (!arg.is_empty)
1612		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
1613
1614	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1615		io_req_set_res(req, ret, cflags);
1616		return IOU_OK;
1617	}
1618
1619	if (ret < 0)
1620		return ret;
1621	if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
1622		if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
1623			goto retry;
1624		if (issue_flags & IO_URING_F_MULTISHOT)
1625			return IOU_ISSUE_SKIP_COMPLETE;
1626		return -EAGAIN;
1627	}
1628
1629	io_req_set_res(req, ret, cflags);
1630	return IOU_STOP_MULTISHOT;
1631}
1632
1633int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1634{
1635	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1636
1637	if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1638		return -EINVAL;
1639
1640	sock->domain = READ_ONCE(sqe->fd);
1641	sock->type = READ_ONCE(sqe->off);
1642	sock->protocol = READ_ONCE(sqe->len);
1643	sock->file_slot = READ_ONCE(sqe->file_index);
1644	sock->nofile = rlimit(RLIMIT_NOFILE);
1645
1646	sock->flags = sock->type & ~SOCK_TYPE_MASK;
1647	if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1648		return -EINVAL;
1649	if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1650		return -EINVAL;
1651	return 0;
1652}
1653
1654int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1655{
1656	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1657	bool fixed = !!sock->file_slot;
1658	struct file *file;
1659	int ret, fd;
1660
1661	if (!fixed) {
1662		fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1663		if (unlikely(fd < 0))
1664			return fd;
1665	}
1666	file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1667	if (IS_ERR(file)) {
1668		if (!fixed)
1669			put_unused_fd(fd);
1670		ret = PTR_ERR(file);
1671		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1672			return -EAGAIN;
1673		if (ret == -ERESTARTSYS)
1674			ret = -EINTR;
1675		req_set_fail(req);
1676	} else if (!fixed) {
1677		fd_install(fd, file);
1678		ret = fd;
1679	} else {
1680		ret = io_fixed_fd_install(req, issue_flags, file,
1681					    sock->file_slot);
1682	}
1683	io_req_set_res(req, ret, 0);
1684	return IOU_OK;
1685}
1686
 
 
 
 
 
 
 
 
1687int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1688{
1689	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1690	struct io_async_msghdr *io;
1691
1692	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1693		return -EINVAL;
1694
1695	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1696	conn->addr_len =  READ_ONCE(sqe->addr2);
1697	conn->in_progress = conn->seen_econnaborted = false;
1698
1699	io = io_msg_alloc_async(req);
1700	if (unlikely(!io))
1701		return -ENOMEM;
1702
1703	return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr);
1704}
1705
1706int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1707{
1708	struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1709	struct io_async_msghdr *io = req->async_data;
1710	unsigned file_flags;
1711	int ret;
1712	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1713
1714	if (unlikely(req->flags & REQ_F_FAIL)) {
1715		ret = -ECONNRESET;
1716		goto out;
 
 
 
 
 
 
1717	}
1718
1719	file_flags = force_nonblock ? O_NONBLOCK : 0;
1720
1721	ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
1722				 file_flags);
1723	if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1724	    && force_nonblock) {
1725		if (ret == -EINPROGRESS) {
1726			connect->in_progress = true;
1727		} else if (ret == -ECONNABORTED) {
1728			if (connect->seen_econnaborted)
1729				goto out;
1730			connect->seen_econnaborted = true;
1731		}
 
 
 
 
 
 
 
1732		return -EAGAIN;
1733	}
1734	if (connect->in_progress) {
1735		/*
1736		 * At least bluetooth will return -EBADFD on a re-connect
1737		 * attempt, and it's (supposedly) also valid to get -EISCONN
1738		 * which means the previous result is good. For both of these,
1739		 * grab the sock_error() and use that for the completion.
1740		 */
1741		if (ret == -EBADFD || ret == -EISCONN)
1742			ret = sock_error(sock_from_file(req->file)->sk);
1743	}
1744	if (ret == -ERESTARTSYS)
1745		ret = -EINTR;
1746out:
1747	if (ret < 0)
1748		req_set_fail(req);
1749	io_req_msg_cleanup(req, issue_flags);
1750	io_req_set_res(req, ret, 0);
1751	return IOU_OK;
1752}
1753
1754int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1755{
1756	struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1757	struct sockaddr __user *uaddr;
1758	struct io_async_msghdr *io;
1759
1760	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1761		return -EINVAL;
1762
1763	uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1764	bind->addr_len =  READ_ONCE(sqe->addr2);
1765
1766	io = io_msg_alloc_async(req);
1767	if (unlikely(!io))
1768		return -ENOMEM;
1769	return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
1770}
1771
1772int io_bind(struct io_kiocb *req, unsigned int issue_flags)
1773{
1774	struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1775	struct io_async_msghdr *io = req->async_data;
1776	struct socket *sock;
1777	int ret;
1778
1779	sock = sock_from_file(req->file);
1780	if (unlikely(!sock))
1781		return -ENOTSOCK;
1782
1783	ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
1784	if (ret < 0)
1785		req_set_fail(req);
1786	io_req_set_res(req, ret, 0);
1787	return 0;
1788}
1789
1790int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1791{
1792	struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1793
1794	if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
1795		return -EINVAL;
1796
1797	listen->backlog = READ_ONCE(sqe->len);
1798	return 0;
1799}
1800
1801int io_listen(struct io_kiocb *req, unsigned int issue_flags)
1802{
1803	struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1804	struct socket *sock;
1805	int ret;
1806
1807	sock = sock_from_file(req->file);
1808	if (unlikely(!sock))
1809		return -ENOTSOCK;
1810
1811	ret = __sys_listen_socket(sock, listen->backlog);
1812	if (ret < 0)
1813		req_set_fail(req);
1814	io_req_set_res(req, ret, 0);
1815	return 0;
1816}
1817
1818void io_netmsg_cache_free(const void *entry)
1819{
1820	struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
1821
1822	if (kmsg->free_iov) {
1823		kasan_mempool_unpoison_object(kmsg->free_iov,
1824				kmsg->free_iov_nr * sizeof(struct iovec));
1825		io_netmsg_iovec_free(kmsg);
1826	}
1827	kfree(kmsg);
1828}
1829#endif
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/kernel.h>
   3#include <linux/errno.h>
   4#include <linux/file.h>
   5#include <linux/slab.h>
   6#include <linux/net.h>
   7#include <linux/compat.h>
   8#include <net/compat.h>
   9#include <linux/io_uring.h>
  10
  11#include <uapi/linux/io_uring.h>
  12
  13#include "io_uring.h"
  14#include "kbuf.h"
  15#include "alloc_cache.h"
  16#include "net.h"
  17#include "notif.h"
  18#include "rsrc.h"
  19
  20#if defined(CONFIG_NET)
  21struct io_shutdown {
  22	struct file			*file;
  23	int				how;
  24};
  25
  26struct io_accept {
  27	struct file			*file;
  28	struct sockaddr __user		*addr;
  29	int __user			*addr_len;
  30	int				flags;
 
  31	u32				file_slot;
  32	unsigned long			nofile;
  33};
  34
  35struct io_socket {
  36	struct file			*file;
  37	int				domain;
  38	int				type;
  39	int				protocol;
  40	int				flags;
  41	u32				file_slot;
  42	unsigned long			nofile;
  43};
  44
  45struct io_connect {
  46	struct file			*file;
  47	struct sockaddr __user		*addr;
  48	int				addr_len;
  49	bool				in_progress;
  50	bool				seen_econnaborted;
  51};
  52
 
 
 
 
 
 
 
 
 
 
  53struct io_sr_msg {
  54	struct file			*file;
  55	union {
  56		struct compat_msghdr __user	*umsg_compat;
  57		struct user_msghdr __user	*umsg;
  58		void __user			*buf;
  59	};
  60	unsigned			len;
  61	unsigned			done_io;
  62	unsigned			msg_flags;
  63	unsigned			nr_multishot_loops;
  64	u16				flags;
  65	/* initialised and used only by !msg send variants */
  66	u16				addr_len;
  67	u16				buf_group;
  68	void __user			*addr;
  69	void __user			*msg_control;
  70	/* used only for send zerocopy */
  71	struct io_kiocb 		*notif;
  72};
  73
  74/*
  75 * Number of times we'll try and do receives if there's more data. If we
  76 * exceed this limit, then add us to the back of the queue and retry from
  77 * there. This helps fairness between flooding clients.
  78 */
  79#define MULTISHOT_MAX_RETRY	32
  80
  81int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  82{
  83	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
  84
  85	if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
  86		     sqe->buf_index || sqe->splice_fd_in))
  87		return -EINVAL;
  88
  89	shutdown->how = READ_ONCE(sqe->len);
  90	req->flags |= REQ_F_FORCE_ASYNC;
  91	return 0;
  92}
  93
  94int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
  95{
  96	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
  97	struct socket *sock;
  98	int ret;
  99
 100	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
 101
 102	sock = sock_from_file(req->file);
 103	if (unlikely(!sock))
 104		return -ENOTSOCK;
 105
 106	ret = __sys_shutdown_sock(sock, shutdown->how);
 107	io_req_set_res(req, ret, 0);
 108	return IOU_OK;
 109}
 110
 111static bool io_net_retry(struct socket *sock, int flags)
 112{
 113	if (!(flags & MSG_WAITALL))
 114		return false;
 115	return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
 116}
 117
 
 
 
 
 
 
 
 
 
 118static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
 119{
 120	struct io_async_msghdr *hdr = req->async_data;
 
 121
 122	if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
 
 
 123		return;
 
 124
 125	/* Let normal cleanup path reap it if we fail adding to the cache */
 126	if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
 
 
 
 127		req->async_data = NULL;
 128		req->flags &= ~REQ_F_ASYNC_DATA;
 129	}
 130}
 131
 132static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req,
 133						  unsigned int issue_flags)
 134{
 135	struct io_ring_ctx *ctx = req->ctx;
 136	struct io_cache_entry *entry;
 137	struct io_async_msghdr *hdr;
 138
 139	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
 140		entry = io_alloc_cache_get(&ctx->netmsg_cache);
 141		if (entry) {
 142			hdr = container_of(entry, struct io_async_msghdr, cache);
 143			hdr->free_iov = NULL;
 144			req->flags |= REQ_F_ASYNC_DATA;
 145			req->async_data = hdr;
 146			return hdr;
 147		}
 
 
 
 148	}
 149
 150	if (!io_alloc_async_data(req)) {
 151		hdr = req->async_data;
 
 152		hdr->free_iov = NULL;
 153		return hdr;
 154	}
 155	return NULL;
 156}
 157
 158static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req)
 
 
 159{
 160	/* ->prep_async is always called from the submission context */
 161	return io_msg_alloc_async(req, 0);
 
 
 
 
 
 
 162}
 163
 164static int io_setup_async_msg(struct io_kiocb *req,
 165			      struct io_async_msghdr *kmsg,
 166			      unsigned int issue_flags)
 167{
 168	struct io_async_msghdr *async_msg;
 169
 170	if (req_has_async_data(req))
 171		return -EAGAIN;
 172	async_msg = io_msg_alloc_async(req, issue_flags);
 173	if (!async_msg) {
 174		kfree(kmsg->free_iov);
 175		return -ENOMEM;
 176	}
 177	req->flags |= REQ_F_NEED_CLEANUP;
 178	memcpy(async_msg, kmsg, sizeof(*kmsg));
 179	if (async_msg->msg.msg_name)
 180		async_msg->msg.msg_name = &async_msg->addr;
 181
 182	if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs)
 183		return -EAGAIN;
 184
 185	/* if were using fast_iov, set it to the new one */
 186	if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) {
 187		size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov;
 188		async_msg->msg.msg_iter.__iov = &async_msg->fast_iov[fast_idx];
 189	}
 190
 191	return -EAGAIN;
 192}
 193
 194#ifdef CONFIG_COMPAT
 195static int io_compat_msg_copy_hdr(struct io_kiocb *req,
 196				  struct io_async_msghdr *iomsg,
 197				  struct compat_msghdr *msg, int ddir)
 198{
 199	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 200	struct compat_iovec __user *uiov;
 201	int ret;
 
 
 
 
 
 
 
 
 
 202
 203	if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
 204		return -EFAULT;
 205
 206	uiov = compat_ptr(msg->msg_iov);
 207	if (req->flags & REQ_F_BUFFER_SELECT) {
 208		compat_ssize_t clen;
 209
 210		iomsg->free_iov = NULL;
 211		if (msg->msg_iovlen == 0) {
 212			sr->len = 0;
 
 213		} else if (msg->msg_iovlen > 1) {
 214			return -EINVAL;
 215		} else {
 216			if (!access_ok(uiov, sizeof(*uiov)))
 217				return -EFAULT;
 218			if (__get_user(clen, &uiov->iov_len))
 219				return -EFAULT;
 220			if (clen < 0)
 221				return -EINVAL;
 222			sr->len = clen;
 223		}
 224
 225		return 0;
 226	}
 227
 228	iomsg->free_iov = iomsg->fast_iov;
 229	ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
 230				UIO_FASTIOV, &iomsg->free_iov,
 231				&iomsg->msg.msg_iter, true);
 232	if (unlikely(ret < 0))
 233		return ret;
 234
 235	return 0;
 236}
 237#endif
 238
 239static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
 240			   struct user_msghdr *msg, int ddir)
 241{
 242	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 243	int ret;
 
 
 
 
 
 
 
 
 
 
 244
 245	if (!user_access_begin(sr->umsg, sizeof(*sr->umsg)))
 246		return -EFAULT;
 247
 248	ret = -EFAULT;
 249	unsafe_get_user(msg->msg_name, &sr->umsg->msg_name, ua_end);
 250	unsafe_get_user(msg->msg_namelen, &sr->umsg->msg_namelen, ua_end);
 251	unsafe_get_user(msg->msg_iov, &sr->umsg->msg_iov, ua_end);
 252	unsafe_get_user(msg->msg_iovlen, &sr->umsg->msg_iovlen, ua_end);
 253	unsafe_get_user(msg->msg_control, &sr->umsg->msg_control, ua_end);
 254	unsafe_get_user(msg->msg_controllen, &sr->umsg->msg_controllen, ua_end);
 255	msg->msg_flags = 0;
 256
 257	if (req->flags & REQ_F_BUFFER_SELECT) {
 258		if (msg->msg_iovlen == 0) {
 259			sr->len = iomsg->fast_iov[0].iov_len = 0;
 260			iomsg->fast_iov[0].iov_base = NULL;
 261			iomsg->free_iov = NULL;
 262		} else if (msg->msg_iovlen > 1) {
 263			ret = -EINVAL;
 264			goto ua_end;
 265		} else {
 266			/* we only need the length for provided buffers */
 267			if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
 268				goto ua_end;
 269			unsafe_get_user(iomsg->fast_iov[0].iov_len,
 270					&msg->msg_iov[0].iov_len, ua_end);
 271			sr->len = iomsg->fast_iov[0].iov_len;
 272			iomsg->free_iov = NULL;
 273		}
 274		ret = 0;
 275ua_end:
 276		user_access_end();
 277		return ret;
 278	}
 279
 280	user_access_end();
 281	iomsg->free_iov = iomsg->fast_iov;
 282	ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV,
 283				&iomsg->free_iov, &iomsg->msg.msg_iter, false);
 284	if (unlikely(ret < 0))
 285		return ret;
 286
 287	return 0;
 288}
 289
 290static int io_sendmsg_copy_hdr(struct io_kiocb *req,
 291			       struct io_async_msghdr *iomsg)
 292{
 293	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 294	struct user_msghdr msg;
 295	int ret;
 296
 297	iomsg->msg.msg_name = &iomsg->addr;
 298	iomsg->msg.msg_iter.nr_segs = 0;
 299
 300#ifdef CONFIG_COMPAT
 301	if (unlikely(req->ctx->compat)) {
 302		struct compat_msghdr cmsg;
 303
 304		ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
 305		if (unlikely(ret))
 306			return ret;
 307
 308		return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
 
 
 309	}
 310#endif
 311
 312	ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
 313	if (unlikely(ret))
 314		return ret;
 315
 316	ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
 317
 318	/* save msg_control as sys_sendmsg() overwrites it */
 319	sr->msg_control = iomsg->msg.msg_control_user;
 320	return ret;
 321}
 322
 323int io_send_prep_async(struct io_kiocb *req)
 324{
 325	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
 326	struct io_async_msghdr *io;
 327	int ret;
 328
 329	if (req_has_async_data(req))
 330		return 0;
 331	zc->done_io = 0;
 332	if (!zc->addr)
 333		return 0;
 334	io = io_msg_alloc_async_prep(req);
 335	if (!io)
 336		return -ENOMEM;
 337	ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
 338	return ret;
 339}
 340
 341static int io_setup_async_addr(struct io_kiocb *req,
 342			      struct sockaddr_storage *addr_storage,
 343			      unsigned int issue_flags)
 344{
 345	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 346	struct io_async_msghdr *io;
 
 
 
 
 
 
 
 
 347
 348	if (!sr->addr || req_has_async_data(req))
 349		return -EAGAIN;
 350	io = io_msg_alloc_async(req, issue_flags);
 351	if (!io)
 352		return -ENOMEM;
 353	memcpy(&io->addr, addr_storage, sizeof(io->addr));
 354	return -EAGAIN;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 355}
 356
 357int io_sendmsg_prep_async(struct io_kiocb *req)
 358{
 359	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 
 360	int ret;
 361
 362	sr->done_io = 0;
 363	if (!io_msg_alloc_async_prep(req))
 364		return -ENOMEM;
 365	ret = io_sendmsg_copy_hdr(req, req->async_data);
 366	if (!ret)
 367		req->flags |= REQ_F_NEED_CLEANUP;
 368	return ret;
 369}
 370
 371void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
 372{
 373	struct io_async_msghdr *io = req->async_data;
 374
 375	kfree(io->free_iov);
 376}
 377
 378int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 379{
 380	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 381
 382	sr->done_io = 0;
 383
 384	if (req->opcode == IORING_OP_SEND) {
 385		if (READ_ONCE(sqe->__pad3[0]))
 386			return -EINVAL;
 387		sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
 388		sr->addr_len = READ_ONCE(sqe->addr_len);
 389	} else if (sqe->addr2 || sqe->file_index) {
 390		return -EINVAL;
 391	}
 392
 393	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 394	sr->len = READ_ONCE(sqe->len);
 395	sr->flags = READ_ONCE(sqe->ioprio);
 396	if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
 397		return -EINVAL;
 398	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 399	if (sr->msg_flags & MSG_DONTWAIT)
 400		req->flags |= REQ_F_NOWAIT;
 
 
 
 
 
 
 
 
 
 401
 402#ifdef CONFIG_COMPAT
 403	if (req->ctx->compat)
 404		sr->msg_flags |= MSG_CMSG_COMPAT;
 405#endif
 406	return 0;
 
 
 
 
 407}
 408
 409static void io_req_msg_cleanup(struct io_kiocb *req,
 410			       struct io_async_msghdr *kmsg,
 411			       unsigned int issue_flags)
 412{
 413	req->flags &= ~REQ_F_NEED_CLEANUP;
 414	/* fast path, check for non-NULL to avoid function call */
 415	if (kmsg->free_iov)
 416		kfree(kmsg->free_iov);
 417	io_netmsg_recycle(req, issue_flags);
 418}
 419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 420int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 421{
 422	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 423	struct io_async_msghdr iomsg, *kmsg;
 424	struct socket *sock;
 425	unsigned flags;
 426	int min_ret = 0;
 427	int ret;
 428
 429	sock = sock_from_file(req->file);
 430	if (unlikely(!sock))
 431		return -ENOTSOCK;
 432
 433	if (req_has_async_data(req)) {
 434		kmsg = req->async_data;
 435		kmsg->msg.msg_control_user = sr->msg_control;
 436	} else {
 437		ret = io_sendmsg_copy_hdr(req, &iomsg);
 438		if (ret)
 439			return ret;
 440		kmsg = &iomsg;
 441	}
 442
 443	if (!(req->flags & REQ_F_POLLED) &&
 444	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 445		return io_setup_async_msg(req, kmsg, issue_flags);
 446
 447	flags = sr->msg_flags;
 448	if (issue_flags & IO_URING_F_NONBLOCK)
 449		flags |= MSG_DONTWAIT;
 450	if (flags & MSG_WAITALL)
 451		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 452
 
 
 453	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
 454
 455	if (ret < min_ret) {
 456		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
 457			return io_setup_async_msg(req, kmsg, issue_flags);
 458		if (ret > 0 && io_net_retry(sock, flags)) {
 459			kmsg->msg.msg_controllen = 0;
 460			kmsg->msg.msg_control = NULL;
 461			sr->done_io += ret;
 462			req->flags |= REQ_F_BL_NO_RECYCLE;
 463			return io_setup_async_msg(req, kmsg, issue_flags);
 464		}
 465		if (ret == -ERESTARTSYS)
 466			ret = -EINTR;
 467		req_set_fail(req);
 468	}
 469	io_req_msg_cleanup(req, kmsg, issue_flags);
 470	if (ret >= 0)
 471		ret += sr->done_io;
 472	else if (sr->done_io)
 473		ret = sr->done_io;
 474	io_req_set_res(req, ret, 0);
 475	return IOU_OK;
 476}
 477
 478int io_send(struct io_kiocb *req, unsigned int issue_flags)
 479{
 480	struct sockaddr_storage __address;
 481	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 482	struct msghdr msg;
 483	struct socket *sock;
 484	unsigned flags;
 485	int min_ret = 0;
 486	int ret;
 487
 488	msg.msg_name = NULL;
 489	msg.msg_control = NULL;
 490	msg.msg_controllen = 0;
 491	msg.msg_namelen = 0;
 492	msg.msg_ubuf = NULL;
 493
 494	if (sr->addr) {
 495		if (req_has_async_data(req)) {
 496			struct io_async_msghdr *io = req->async_data;
 497
 498			msg.msg_name = &io->addr;
 499		} else {
 500			ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address);
 501			if (unlikely(ret < 0))
 502				return ret;
 503			msg.msg_name = (struct sockaddr *)&__address;
 504		}
 505		msg.msg_namelen = sr->addr_len;
 506	}
 507
 508	if (!(req->flags & REQ_F_POLLED) &&
 509	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 510		return io_setup_async_addr(req, &__address, issue_flags);
 511
 512	sock = sock_from_file(req->file);
 513	if (unlikely(!sock))
 514		return -ENOTSOCK;
 515
 516	ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &msg.msg_iter);
 517	if (unlikely(ret))
 518		return ret;
 519
 520	flags = sr->msg_flags;
 521	if (issue_flags & IO_URING_F_NONBLOCK)
 522		flags |= MSG_DONTWAIT;
 523	if (flags & MSG_WAITALL)
 524		min_ret = iov_iter_count(&msg.msg_iter);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 525
 526	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 527	msg.msg_flags = flags;
 528	ret = sock_sendmsg(sock, &msg);
 529	if (ret < min_ret) {
 530		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
 531			return io_setup_async_addr(req, &__address, issue_flags);
 532
 533		if (ret > 0 && io_net_retry(sock, flags)) {
 534			sr->len -= ret;
 535			sr->buf += ret;
 536			sr->done_io += ret;
 537			req->flags |= REQ_F_BL_NO_RECYCLE;
 538			return io_setup_async_addr(req, &__address, issue_flags);
 539		}
 540		if (ret == -ERESTARTSYS)
 541			ret = -EINTR;
 542		req_set_fail(req);
 543	}
 544	if (ret >= 0)
 545		ret += sr->done_io;
 546	else if (sr->done_io)
 547		ret = sr->done_io;
 548	io_req_set_res(req, ret, 0);
 549	return IOU_OK;
 
 
 
 
 550}
 551
 552static int io_recvmsg_mshot_prep(struct io_kiocb *req,
 553				 struct io_async_msghdr *iomsg,
 554				 int namelen, size_t controllen)
 555{
 556	if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
 557			  (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
 558		int hdr;
 559
 560		if (unlikely(namelen < 0))
 561			return -EOVERFLOW;
 562		if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
 563					namelen, &hdr))
 564			return -EOVERFLOW;
 565		if (check_add_overflow(hdr, controllen, &hdr))
 566			return -EOVERFLOW;
 567
 568		iomsg->namelen = namelen;
 569		iomsg->controllen = controllen;
 570		return 0;
 571	}
 572
 573	return 0;
 574}
 575
 576static int io_recvmsg_copy_hdr(struct io_kiocb *req,
 577			       struct io_async_msghdr *iomsg)
 578{
 579	struct user_msghdr msg;
 580	int ret;
 581
 582	iomsg->msg.msg_name = &iomsg->addr;
 583	iomsg->msg.msg_iter.nr_segs = 0;
 584
 585#ifdef CONFIG_COMPAT
 586	if (unlikely(req->ctx->compat)) {
 587		struct compat_msghdr cmsg;
 588
 589		ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
 590		if (unlikely(ret))
 591			return ret;
 592
 593		ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
 594		if (unlikely(ret))
 595			return ret;
 596
 597		return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
 598						cmsg.msg_controllen);
 599	}
 600#endif
 601
 602	ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
 603	if (unlikely(ret))
 604		return ret;
 605
 606	ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
 607	if (unlikely(ret))
 608		return ret;
 609
 610	return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
 611					msg.msg_controllen);
 612}
 613
 614int io_recvmsg_prep_async(struct io_kiocb *req)
 615{
 616	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 617	struct io_async_msghdr *iomsg;
 618	int ret;
 619
 620	sr->done_io = 0;
 621	if (!io_msg_alloc_async_prep(req))
 622		return -ENOMEM;
 623	iomsg = req->async_data;
 624	ret = io_recvmsg_copy_hdr(req, iomsg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 625	if (!ret)
 626		req->flags |= REQ_F_NEED_CLEANUP;
 627	return ret;
 628}
 629
 630#define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)
 
 631
 632int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 633{
 634	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 635
 636	sr->done_io = 0;
 637
 638	if (unlikely(sqe->file_index || sqe->addr2))
 639		return -EINVAL;
 640
 641	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 642	sr->len = READ_ONCE(sqe->len);
 643	sr->flags = READ_ONCE(sqe->ioprio);
 644	if (sr->flags & ~(RECVMSG_FLAGS))
 645		return -EINVAL;
 646	sr->msg_flags = READ_ONCE(sqe->msg_flags);
 647	if (sr->msg_flags & MSG_DONTWAIT)
 648		req->flags |= REQ_F_NOWAIT;
 649	if (sr->msg_flags & MSG_ERRQUEUE)
 650		req->flags |= REQ_F_CLEAR_POLLIN;
 651	if (sr->flags & IORING_RECV_MULTISHOT) {
 652		if (!(req->flags & REQ_F_BUFFER_SELECT))
 653			return -EINVAL;
 654		if (sr->msg_flags & MSG_WAITALL)
 655			return -EINVAL;
 656		if (req->opcode == IORING_OP_RECV && sr->len)
 657			return -EINVAL;
 658		req->flags |= REQ_F_APOLL_MULTISHOT;
 659		/*
 660		 * Store the buffer group for this multishot receive separately,
 661		 * as if we end up doing an io-wq based issue that selects a
 662		 * buffer, it has to be committed immediately and that will
 663		 * clear ->buf_list. This means we lose the link to the buffer
 664		 * list, and the eventual buffer put on completion then cannot
 665		 * restore it.
 666		 */
 667		sr->buf_group = req->buf_index;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 668	}
 669
 670#ifdef CONFIG_COMPAT
 671	if (req->ctx->compat)
 672		sr->msg_flags |= MSG_CMSG_COMPAT;
 673#endif
 674	sr->nr_multishot_loops = 0;
 675	return 0;
 676}
 677
 678static inline void io_recv_prep_retry(struct io_kiocb *req)
 679{
 680	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 681
 682	req->flags &= ~REQ_F_BL_EMPTY;
 683	sr->done_io = 0;
 684	sr->len = 0; /* get from the provided buffer */
 685	req->buf_index = sr->buf_group;
 686}
 687
 688/*
 689 * Finishes io_recv and io_recvmsg.
 690 *
 691 * Returns true if it is actually finished, or false if it should run
 692 * again (for multishot).
 693 */
 694static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
 695				  struct msghdr *msg, bool mshot_finished,
 696				  unsigned issue_flags)
 697{
 698	unsigned int cflags;
 
 699
 700	cflags = io_put_kbuf(req, issue_flags);
 701	if (msg->msg_inq > 0)
 702		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
 703
 
 
 
 
 
 
 
 
 
 
 704	/*
 705	 * Fill CQE for this receive and see if we should keep trying to
 706	 * receive from this socket.
 707	 */
 708	if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
 709	    io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
 710				*ret, cflags | IORING_CQE_F_MORE)) {
 711		struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 712		int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
 713
 714		io_recv_prep_retry(req);
 715		/* Known not-empty or unknown state, retry */
 716		if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq < 0) {
 717			if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
 718				return false;
 719			/* mshot retries exceeded, force a requeue */
 720			sr->nr_multishot_loops = 0;
 721			mshot_retry_ret = IOU_REQUEUE;
 722		}
 723		if (issue_flags & IO_URING_F_MULTISHOT)
 724			*ret = mshot_retry_ret;
 725		else
 726			*ret = -EAGAIN;
 727		return true;
 728	}
 729
 730	/* Finish the request / stop multishot. */
 
 731	io_req_set_res(req, *ret, cflags);
 732
 733	if (issue_flags & IO_URING_F_MULTISHOT)
 734		*ret = IOU_STOP_MULTISHOT;
 735	else
 736		*ret = IOU_OK;
 
 737	return true;
 738}
 739
 740static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
 741				     struct io_sr_msg *sr, void __user **buf,
 742				     size_t *len)
 743{
 744	unsigned long ubuf = (unsigned long) *buf;
 745	unsigned long hdr;
 746
 747	hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
 748		kmsg->controllen;
 749	if (*len < hdr)
 750		return -EFAULT;
 751
 752	if (kmsg->controllen) {
 753		unsigned long control = ubuf + hdr - kmsg->controllen;
 754
 755		kmsg->msg.msg_control_user = (void __user *) control;
 756		kmsg->msg.msg_controllen = kmsg->controllen;
 757	}
 758
 759	sr->buf = *buf; /* stash for later copy */
 760	*buf = (void __user *) (ubuf + hdr);
 761	kmsg->payloadlen = *len = *len - hdr;
 762	return 0;
 763}
 764
 765struct io_recvmsg_multishot_hdr {
 766	struct io_uring_recvmsg_out msg;
 767	struct sockaddr_storage addr;
 768};
 769
 770static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
 771				struct io_async_msghdr *kmsg,
 772				unsigned int flags, bool *finished)
 773{
 774	int err;
 775	int copy_len;
 776	struct io_recvmsg_multishot_hdr hdr;
 777
 778	if (kmsg->namelen)
 779		kmsg->msg.msg_name = &hdr.addr;
 780	kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
 781	kmsg->msg.msg_namelen = 0;
 782
 783	if (sock->file->f_flags & O_NONBLOCK)
 784		flags |= MSG_DONTWAIT;
 785
 786	err = sock_recvmsg(sock, &kmsg->msg, flags);
 787	*finished = err <= 0;
 788	if (err < 0)
 789		return err;
 790
 791	hdr.msg = (struct io_uring_recvmsg_out) {
 792		.controllen = kmsg->controllen - kmsg->msg.msg_controllen,
 793		.flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
 794	};
 795
 796	hdr.msg.payloadlen = err;
 797	if (err > kmsg->payloadlen)
 798		err = kmsg->payloadlen;
 799
 800	copy_len = sizeof(struct io_uring_recvmsg_out);
 801	if (kmsg->msg.msg_namelen > kmsg->namelen)
 802		copy_len += kmsg->namelen;
 803	else
 804		copy_len += kmsg->msg.msg_namelen;
 805
 806	/*
 807	 *      "fromlen shall refer to the value before truncation.."
 808	 *                      1003.1g
 809	 */
 810	hdr.msg.namelen = kmsg->msg.msg_namelen;
 811
 812	/* ensure that there is no gap between hdr and sockaddr_storage */
 813	BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
 814		     sizeof(struct io_uring_recvmsg_out));
 815	if (copy_to_user(io->buf, &hdr, copy_len)) {
 816		*finished = true;
 817		return -EFAULT;
 818	}
 819
 820	return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
 821			kmsg->controllen + err;
 822}
 823
 824int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 825{
 826	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 827	struct io_async_msghdr iomsg, *kmsg;
 828	struct socket *sock;
 829	unsigned flags;
 830	int ret, min_ret = 0;
 831	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 832	bool mshot_finished = true;
 833
 834	sock = sock_from_file(req->file);
 835	if (unlikely(!sock))
 836		return -ENOTSOCK;
 837
 838	if (req_has_async_data(req)) {
 839		kmsg = req->async_data;
 840	} else {
 841		ret = io_recvmsg_copy_hdr(req, &iomsg);
 842		if (ret)
 843			return ret;
 844		kmsg = &iomsg;
 845	}
 846
 847	if (!(req->flags & REQ_F_POLLED) &&
 848	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 849		return io_setup_async_msg(req, kmsg, issue_flags);
 850
 851	flags = sr->msg_flags;
 852	if (force_nonblock)
 853		flags |= MSG_DONTWAIT;
 854
 855retry_multishot:
 856	if (io_do_buffer_select(req)) {
 857		void __user *buf;
 858		size_t len = sr->len;
 859
 860		buf = io_buffer_select(req, &len, issue_flags);
 861		if (!buf)
 862			return -ENOBUFS;
 863
 864		if (req->flags & REQ_F_APOLL_MULTISHOT) {
 865			ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
 866			if (ret) {
 867				io_kbuf_recycle(req, issue_flags);
 868				return ret;
 869			}
 870		}
 871
 872		iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
 873	}
 874
 875	kmsg->msg.msg_get_inq = 1;
 876	kmsg->msg.msg_inq = -1;
 877	if (req->flags & REQ_F_APOLL_MULTISHOT) {
 878		ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
 879					   &mshot_finished);
 880	} else {
 881		/* disable partial retry for recvmsg with cmsg attached */
 882		if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
 883			min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 884
 885		ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
 886					 kmsg->uaddr, flags);
 887	}
 888
 889	if (ret < min_ret) {
 890		if (ret == -EAGAIN && force_nonblock) {
 891			ret = io_setup_async_msg(req, kmsg, issue_flags);
 892			if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) {
 893				io_kbuf_recycle(req, issue_flags);
 894				return IOU_ISSUE_SKIP_COMPLETE;
 895			}
 896			return ret;
 897		}
 898		if (ret > 0 && io_net_retry(sock, flags)) {
 899			sr->done_io += ret;
 900			req->flags |= REQ_F_BL_NO_RECYCLE;
 901			return io_setup_async_msg(req, kmsg, issue_flags);
 902		}
 903		if (ret == -ERESTARTSYS)
 904			ret = -EINTR;
 905		req_set_fail(req);
 906	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
 907		req_set_fail(req);
 908	}
 909
 910	if (ret > 0)
 911		ret += sr->done_io;
 912	else if (sr->done_io)
 913		ret = sr->done_io;
 914	else
 915		io_kbuf_recycle(req, issue_flags);
 916
 917	if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags))
 918		goto retry_multishot;
 919
 920	if (mshot_finished)
 921		io_req_msg_cleanup(req, kmsg, issue_flags);
 922	else if (ret == -EAGAIN)
 923		return io_setup_async_msg(req, kmsg, issue_flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 924
 925	return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 926}
 927
 928int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 929{
 930	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 931	struct msghdr msg;
 932	struct socket *sock;
 933	unsigned flags;
 934	int ret, min_ret = 0;
 935	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 936	size_t len = sr->len;
 
 937
 938	if (!(req->flags & REQ_F_POLLED) &&
 939	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 940		return -EAGAIN;
 941
 942	sock = sock_from_file(req->file);
 943	if (unlikely(!sock))
 944		return -ENOTSOCK;
 945
 946	msg.msg_name = NULL;
 947	msg.msg_namelen = 0;
 948	msg.msg_control = NULL;
 949	msg.msg_get_inq = 1;
 950	msg.msg_controllen = 0;
 951	msg.msg_iocb = NULL;
 952	msg.msg_ubuf = NULL;
 953
 954	flags = sr->msg_flags;
 955	if (force_nonblock)
 956		flags |= MSG_DONTWAIT;
 957
 958retry_multishot:
 959	if (io_do_buffer_select(req)) {
 960		void __user *buf;
 961
 962		buf = io_buffer_select(req, &len, issue_flags);
 963		if (!buf)
 964			return -ENOBUFS;
 965		sr->buf = buf;
 966		sr->len = len;
 967	}
 968
 969	ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter);
 970	if (unlikely(ret))
 971		goto out_free;
 972
 973	msg.msg_inq = -1;
 974	msg.msg_flags = 0;
 975
 976	if (flags & MSG_WAITALL)
 977		min_ret = iov_iter_count(&msg.msg_iter);
 978
 979	ret = sock_recvmsg(sock, &msg, flags);
 980	if (ret < min_ret) {
 981		if (ret == -EAGAIN && force_nonblock) {
 982			if (issue_flags & IO_URING_F_MULTISHOT) {
 983				io_kbuf_recycle(req, issue_flags);
 984				return IOU_ISSUE_SKIP_COMPLETE;
 985			}
 986
 987			return -EAGAIN;
 988		}
 989		if (ret > 0 && io_net_retry(sock, flags)) {
 990			sr->len -= ret;
 991			sr->buf += ret;
 992			sr->done_io += ret;
 993			req->flags |= REQ_F_BL_NO_RECYCLE;
 994			return -EAGAIN;
 995		}
 996		if (ret == -ERESTARTSYS)
 997			ret = -EINTR;
 998		req_set_fail(req);
 999	} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1000out_free:
1001		req_set_fail(req);
1002	}
1003
 
1004	if (ret > 0)
1005		ret += sr->done_io;
1006	else if (sr->done_io)
1007		ret = sr->done_io;
1008	else
1009		io_kbuf_recycle(req, issue_flags);
1010
1011	if (!io_recv_finish(req, &ret, &msg, ret <= 0, issue_flags))
1012		goto retry_multishot;
1013
1014	return ret;
1015}
1016
1017void io_send_zc_cleanup(struct io_kiocb *req)
1018{
1019	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1020	struct io_async_msghdr *io;
1021
1022	if (req_has_async_data(req)) {
1023		io = req->async_data;
1024		/* might be ->fast_iov if *msg_copy_hdr failed */
1025		if (io->free_iov != io->fast_iov)
1026			kfree(io->free_iov);
1027	}
1028	if (zc->notif) {
1029		io_notif_flush(zc->notif);
1030		zc->notif = NULL;
1031	}
1032}
1033
1034#define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1035#define IO_ZC_FLAGS_VALID  (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
1036
1037int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1038{
1039	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1040	struct io_ring_ctx *ctx = req->ctx;
1041	struct io_kiocb *notif;
1042
1043	zc->done_io = 0;
1044	req->flags |= REQ_F_POLL_NO_LAZY;
1045
1046	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1047		return -EINVAL;
1048	/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1049	if (req->flags & REQ_F_CQE_SKIP)
1050		return -EINVAL;
1051
1052	notif = zc->notif = io_alloc_notif(ctx);
1053	if (!notif)
1054		return -ENOMEM;
1055	notif->cqe.user_data = req->cqe.user_data;
1056	notif->cqe.res = 0;
1057	notif->cqe.flags = IORING_CQE_F_NOTIF;
1058	req->flags |= REQ_F_NEED_CLEANUP;
1059
1060	zc->flags = READ_ONCE(sqe->ioprio);
1061	if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1062		if (zc->flags & ~IO_ZC_FLAGS_VALID)
1063			return -EINVAL;
1064		if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1065			io_notif_set_extended(notif);
1066			io_notif_to_data(notif)->zc_report = true;
 
 
 
1067		}
1068	}
1069
1070	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1071		unsigned idx = READ_ONCE(sqe->buf_index);
1072
1073		if (unlikely(idx >= ctx->nr_user_bufs))
1074			return -EFAULT;
1075		idx = array_index_nospec(idx, ctx->nr_user_bufs);
1076		req->imu = READ_ONCE(ctx->user_bufs[idx]);
1077		io_req_set_rsrc_node(notif, ctx, 0);
1078	}
1079
1080	if (req->opcode == IORING_OP_SEND_ZC) {
1081		if (READ_ONCE(sqe->__pad3[0]))
1082			return -EINVAL;
1083		zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1084		zc->addr_len = READ_ONCE(sqe->addr_len);
1085	} else {
1086		if (unlikely(sqe->addr2 || sqe->file_index))
1087			return -EINVAL;
1088		if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
1089			return -EINVAL;
1090	}
1091
1092	zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
1093	zc->len = READ_ONCE(sqe->len);
1094	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 
1095	if (zc->msg_flags & MSG_DONTWAIT)
1096		req->flags |= REQ_F_NOWAIT;
1097
1098#ifdef CONFIG_COMPAT
1099	if (req->ctx->compat)
1100		zc->msg_flags |= MSG_CMSG_COMPAT;
1101#endif
1102	return 0;
 
 
 
 
1103}
1104
1105static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb,
1106				 struct iov_iter *from, size_t length)
1107{
1108	skb_zcopy_downgrade_managed(skb);
1109	return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
1110}
1111
1112static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
1113			   struct iov_iter *from, size_t length)
1114{
1115	struct skb_shared_info *shinfo = skb_shinfo(skb);
1116	int frag = shinfo->nr_frags;
1117	int ret = 0;
1118	struct bvec_iter bi;
1119	ssize_t copied = 0;
1120	unsigned long truesize = 0;
1121
1122	if (!frag)
1123		shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1124	else if (unlikely(!skb_zcopy_managed(skb)))
1125		return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
1126
1127	bi.bi_size = min(from->count, length);
1128	bi.bi_bvec_done = from->iov_offset;
1129	bi.bi_idx = 0;
1130
1131	while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1132		struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1133
1134		copied += v.bv_len;
1135		truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1136		__skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1137					   v.bv_offset, v.bv_len);
1138		bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1139	}
1140	if (bi.bi_size)
1141		ret = -EMSGSIZE;
1142
1143	shinfo->nr_frags = frag;
1144	from->bvec += bi.bi_idx;
1145	from->nr_segs -= bi.bi_idx;
1146	from->count -= copied;
1147	from->iov_offset = bi.bi_bvec_done;
1148
1149	skb->data_len += copied;
1150	skb->len += copied;
1151	skb->truesize += truesize;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1152
1153	if (sk && sk->sk_type == SOCK_STREAM) {
1154		sk_wmem_queued_add(sk, truesize);
1155		if (!skb_zcopy_pure(skb))
1156			sk_mem_charge(sk, truesize);
 
 
1157	} else {
1158		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
 
 
 
 
 
 
1159	}
 
1160	return ret;
1161}
1162
1163int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1164{
1165	struct sockaddr_storage __address;
1166	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1167	struct msghdr msg;
1168	struct socket *sock;
1169	unsigned msg_flags;
1170	int ret, min_ret = 0;
1171
1172	sock = sock_from_file(req->file);
1173	if (unlikely(!sock))
1174		return -ENOTSOCK;
1175	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1176		return -EOPNOTSUPP;
1177
1178	msg.msg_name = NULL;
1179	msg.msg_control = NULL;
1180	msg.msg_controllen = 0;
1181	msg.msg_namelen = 0;
1182
1183	if (zc->addr) {
1184		if (req_has_async_data(req)) {
1185			struct io_async_msghdr *io = req->async_data;
1186
1187			msg.msg_name = &io->addr;
1188		} else {
1189			ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
1190			if (unlikely(ret < 0))
1191				return ret;
1192			msg.msg_name = (struct sockaddr *)&__address;
1193		}
1194		msg.msg_namelen = zc->addr_len;
1195	}
1196
1197	if (!(req->flags & REQ_F_POLLED) &&
1198	    (zc->flags & IORING_RECVSEND_POLL_FIRST))
1199		return io_setup_async_addr(req, &__address, issue_flags);
1200
1201	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1202		ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu,
1203					(u64)(uintptr_t)zc->buf, zc->len);
1204		if (unlikely(ret))
1205			return ret;
1206		msg.sg_from_iter = io_sg_from_iter;
1207	} else {
1208		io_notif_set_extended(zc->notif);
1209		ret = import_ubuf(ITER_SOURCE, zc->buf, zc->len, &msg.msg_iter);
1210		if (unlikely(ret))
1211			return ret;
1212		ret = io_notif_account_mem(zc->notif, zc->len);
1213		if (unlikely(ret))
1214			return ret;
1215		msg.sg_from_iter = io_sg_from_iter_iovec;
1216	}
1217
1218	msg_flags = zc->msg_flags | MSG_ZEROCOPY;
1219	if (issue_flags & IO_URING_F_NONBLOCK)
1220		msg_flags |= MSG_DONTWAIT;
1221	if (msg_flags & MSG_WAITALL)
1222		min_ret = iov_iter_count(&msg.msg_iter);
1223	msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1224
1225	msg.msg_flags = msg_flags;
1226	msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1227	ret = sock_sendmsg(sock, &msg);
1228
1229	if (unlikely(ret < min_ret)) {
1230		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1231			return io_setup_async_addr(req, &__address, issue_flags);
1232
1233		if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
1234			zc->len -= ret;
1235			zc->buf += ret;
1236			zc->done_io += ret;
1237			req->flags |= REQ_F_BL_NO_RECYCLE;
1238			return io_setup_async_addr(req, &__address, issue_flags);
1239		}
1240		if (ret == -ERESTARTSYS)
1241			ret = -EINTR;
1242		req_set_fail(req);
1243	}
1244
1245	if (ret >= 0)
1246		ret += zc->done_io;
1247	else if (zc->done_io)
1248		ret = zc->done_io;
1249
1250	/*
1251	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1252	 * flushing notif to io_send_zc_cleanup()
1253	 */
1254	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1255		io_notif_flush(zc->notif);
1256		req->flags &= ~REQ_F_NEED_CLEANUP;
1257	}
1258	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1259	return IOU_OK;
1260}
1261
1262int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1263{
1264	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1265	struct io_async_msghdr iomsg, *kmsg;
1266	struct socket *sock;
1267	unsigned flags;
1268	int ret, min_ret = 0;
1269
1270	io_notif_set_extended(sr->notif);
1271
1272	sock = sock_from_file(req->file);
1273	if (unlikely(!sock))
1274		return -ENOTSOCK;
1275	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1276		return -EOPNOTSUPP;
1277
1278	if (req_has_async_data(req)) {
1279		kmsg = req->async_data;
1280		kmsg->msg.msg_control_user = sr->msg_control;
1281	} else {
1282		ret = io_sendmsg_copy_hdr(req, &iomsg);
1283		if (ret)
1284			return ret;
1285		kmsg = &iomsg;
1286	}
1287
1288	if (!(req->flags & REQ_F_POLLED) &&
1289	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
1290		return io_setup_async_msg(req, kmsg, issue_flags);
1291
1292	flags = sr->msg_flags | MSG_ZEROCOPY;
1293	if (issue_flags & IO_URING_F_NONBLOCK)
1294		flags |= MSG_DONTWAIT;
1295	if (flags & MSG_WAITALL)
1296		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1297
 
1298	kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1299	kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1300	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1301
1302	if (unlikely(ret < min_ret)) {
1303		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1304			return io_setup_async_msg(req, kmsg, issue_flags);
1305
1306		if (ret > 0 && io_net_retry(sock, flags)) {
1307			sr->done_io += ret;
1308			req->flags |= REQ_F_BL_NO_RECYCLE;
1309			return io_setup_async_msg(req, kmsg, issue_flags);
1310		}
1311		if (ret == -ERESTARTSYS)
1312			ret = -EINTR;
1313		req_set_fail(req);
1314	}
1315	/* fast path, check for non-NULL to avoid function call */
1316	if (kmsg->free_iov) {
1317		kfree(kmsg->free_iov);
1318		kmsg->free_iov = NULL;
1319	}
1320
1321	io_netmsg_recycle(req, issue_flags);
1322	if (ret >= 0)
1323		ret += sr->done_io;
1324	else if (sr->done_io)
1325		ret = sr->done_io;
1326
1327	/*
1328	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1329	 * flushing notif to io_send_zc_cleanup()
1330	 */
1331	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1332		io_notif_flush(sr->notif);
1333		req->flags &= ~REQ_F_NEED_CLEANUP;
1334	}
1335	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1336	return IOU_OK;
1337}
1338
1339void io_sendrecv_fail(struct io_kiocb *req)
1340{
1341	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1342
1343	if (sr->done_io)
1344		req->cqe.res = sr->done_io;
1345
1346	if ((req->flags & REQ_F_NEED_CLEANUP) &&
1347	    (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1348		req->cqe.flags |= IORING_CQE_F_MORE;
1349}
1350
 
 
 
1351int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1352{
1353	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1354	unsigned flags;
1355
1356	if (sqe->len || sqe->buf_index)
1357		return -EINVAL;
1358
1359	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1360	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1361	accept->flags = READ_ONCE(sqe->accept_flags);
1362	accept->nofile = rlimit(RLIMIT_NOFILE);
1363	flags = READ_ONCE(sqe->ioprio);
1364	if (flags & ~IORING_ACCEPT_MULTISHOT)
1365		return -EINVAL;
1366
1367	accept->file_slot = READ_ONCE(sqe->file_index);
1368	if (accept->file_slot) {
1369		if (accept->flags & SOCK_CLOEXEC)
1370			return -EINVAL;
1371		if (flags & IORING_ACCEPT_MULTISHOT &&
1372		    accept->file_slot != IORING_FILE_INDEX_ALLOC)
1373			return -EINVAL;
1374	}
1375	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1376		return -EINVAL;
1377	if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1378		accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1379	if (flags & IORING_ACCEPT_MULTISHOT)
1380		req->flags |= REQ_F_APOLL_MULTISHOT;
 
 
1381	return 0;
1382}
1383
1384int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1385{
1386	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1387	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1388	unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
1389	bool fixed = !!accept->file_slot;
 
 
 
1390	struct file *file;
 
1391	int ret, fd;
1392
 
 
 
 
1393retry:
1394	if (!fixed) {
1395		fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1396		if (unlikely(fd < 0))
1397			return fd;
1398	}
1399	file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
 
 
1400			 accept->flags);
1401	if (IS_ERR(file)) {
1402		if (!fixed)
1403			put_unused_fd(fd);
1404		ret = PTR_ERR(file);
1405		if (ret == -EAGAIN && force_nonblock) {
 
1406			/*
1407			 * if it's multishot and polled, we don't need to
1408			 * return EAGAIN to arm the poll infra since it
1409			 * has already been done
1410			 */
1411			if (issue_flags & IO_URING_F_MULTISHOT)
1412				return IOU_ISSUE_SKIP_COMPLETE;
1413			return ret;
1414		}
1415		if (ret == -ERESTARTSYS)
1416			ret = -EINTR;
1417		req_set_fail(req);
1418	} else if (!fixed) {
1419		fd_install(fd, file);
1420		ret = fd;
1421	} else {
1422		ret = io_fixed_fd_install(req, issue_flags, file,
1423						accept->file_slot);
1424	}
1425
 
 
 
 
1426	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1427		io_req_set_res(req, ret, 0);
1428		return IOU_OK;
1429	}
1430
1431	if (ret < 0)
1432		return ret;
1433	if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
1434				ret, IORING_CQE_F_MORE))
1435		goto retry;
 
 
 
 
1436
1437	io_req_set_res(req, ret, 0);
1438	return IOU_STOP_MULTISHOT;
1439}
1440
1441int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1442{
1443	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1444
1445	if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1446		return -EINVAL;
1447
1448	sock->domain = READ_ONCE(sqe->fd);
1449	sock->type = READ_ONCE(sqe->off);
1450	sock->protocol = READ_ONCE(sqe->len);
1451	sock->file_slot = READ_ONCE(sqe->file_index);
1452	sock->nofile = rlimit(RLIMIT_NOFILE);
1453
1454	sock->flags = sock->type & ~SOCK_TYPE_MASK;
1455	if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1456		return -EINVAL;
1457	if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1458		return -EINVAL;
1459	return 0;
1460}
1461
1462int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1463{
1464	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1465	bool fixed = !!sock->file_slot;
1466	struct file *file;
1467	int ret, fd;
1468
1469	if (!fixed) {
1470		fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1471		if (unlikely(fd < 0))
1472			return fd;
1473	}
1474	file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1475	if (IS_ERR(file)) {
1476		if (!fixed)
1477			put_unused_fd(fd);
1478		ret = PTR_ERR(file);
1479		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1480			return -EAGAIN;
1481		if (ret == -ERESTARTSYS)
1482			ret = -EINTR;
1483		req_set_fail(req);
1484	} else if (!fixed) {
1485		fd_install(fd, file);
1486		ret = fd;
1487	} else {
1488		ret = io_fixed_fd_install(req, issue_flags, file,
1489					    sock->file_slot);
1490	}
1491	io_req_set_res(req, ret, 0);
1492	return IOU_OK;
1493}
1494
1495int io_connect_prep_async(struct io_kiocb *req)
1496{
1497	struct io_async_connect *io = req->async_data;
1498	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1499
1500	return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
1501}
1502
1503int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1504{
1505	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
 
1506
1507	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1508		return -EINVAL;
1509
1510	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1511	conn->addr_len =  READ_ONCE(sqe->addr2);
1512	conn->in_progress = conn->seen_econnaborted = false;
1513	return 0;
 
 
 
 
 
1514}
1515
1516int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1517{
1518	struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1519	struct io_async_connect __io, *io;
1520	unsigned file_flags;
1521	int ret;
1522	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1523
1524	if (req_has_async_data(req)) {
1525		io = req->async_data;
1526	} else {
1527		ret = move_addr_to_kernel(connect->addr,
1528						connect->addr_len,
1529						&__io.address);
1530		if (ret)
1531			goto out;
1532		io = &__io;
1533	}
1534
1535	file_flags = force_nonblock ? O_NONBLOCK : 0;
1536
1537	ret = __sys_connect_file(req->file, &io->address,
1538					connect->addr_len, file_flags);
1539	if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1540	    && force_nonblock) {
1541		if (ret == -EINPROGRESS) {
1542			connect->in_progress = true;
1543		} else if (ret == -ECONNABORTED) {
1544			if (connect->seen_econnaborted)
1545				goto out;
1546			connect->seen_econnaborted = true;
1547		}
1548		if (req_has_async_data(req))
1549			return -EAGAIN;
1550		if (io_alloc_async_data(req)) {
1551			ret = -ENOMEM;
1552			goto out;
1553		}
1554		memcpy(req->async_data, &__io, sizeof(__io));
1555		return -EAGAIN;
1556	}
1557	if (connect->in_progress) {
1558		/*
1559		 * At least bluetooth will return -EBADFD on a re-connect
1560		 * attempt, and it's (supposedly) also valid to get -EISCONN
1561		 * which means the previous result is good. For both of these,
1562		 * grab the sock_error() and use that for the completion.
1563		 */
1564		if (ret == -EBADFD || ret == -EISCONN)
1565			ret = sock_error(sock_from_file(req->file)->sk);
1566	}
1567	if (ret == -ERESTARTSYS)
1568		ret = -EINTR;
1569out:
1570	if (ret < 0)
1571		req_set_fail(req);
 
1572	io_req_set_res(req, ret, 0);
1573	return IOU_OK;
1574}
1575
1576void io_netmsg_cache_free(struct io_cache_entry *entry)
1577{
1578	kfree(container_of(entry, struct io_async_msghdr, cache));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1579}
1580#endif