Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.14.15.
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/kernel.h>
   3#include <linux/errno.h>
   4#include <linux/file.h>
   5#include <linux/slab.h>
   6#include <linux/net.h>
   7#include <linux/compat.h>
   8#include <net/compat.h>
   9#include <linux/io_uring.h>
  10
  11#include <uapi/linux/io_uring.h>
  12
  13#include "io_uring.h"
  14#include "kbuf.h"
  15#include "alloc_cache.h"
  16#include "net.h"
  17#include "notif.h"
  18#include "rsrc.h"
  19
  20#if defined(CONFIG_NET)
  21struct io_shutdown {
  22	struct file			*file;
  23	int				how;
  24};
  25
  26struct io_accept {
  27	struct file			*file;
  28	struct sockaddr __user		*addr;
  29	int __user			*addr_len;
  30	int				flags;
  31	u32				file_slot;
  32	unsigned long			nofile;
  33};
  34
  35struct io_socket {
  36	struct file			*file;
  37	int				domain;
  38	int				type;
  39	int				protocol;
  40	int				flags;
  41	u32				file_slot;
  42	unsigned long			nofile;
  43};
  44
  45struct io_connect {
  46	struct file			*file;
  47	struct sockaddr __user		*addr;
  48	int				addr_len;
  49	bool				in_progress;
  50	bool				seen_econnaborted;
  51};
  52
  53struct io_sr_msg {
  54	struct file			*file;
  55	union {
  56		struct compat_msghdr __user	*umsg_compat;
  57		struct user_msghdr __user	*umsg;
  58		void __user			*buf;
  59	};
  60	unsigned			len;
  61	unsigned			done_io;
  62	unsigned			msg_flags;
  63	unsigned			nr_multishot_loops;
  64	u16				flags;
  65	/* initialised and used only by !msg send variants */
  66	u16				addr_len;
  67	u16				buf_group;
  68	void __user			*addr;
  69	void __user			*msg_control;
  70	/* used only for send zerocopy */
  71	struct io_kiocb 		*notif;
  72};
  73
  74/*
  75 * Number of times we'll try and do receives if there's more data. If we
  76 * exceed this limit, then add us to the back of the queue and retry from
  77 * there. This helps fairness between flooding clients.
  78 */
  79#define MULTISHOT_MAX_RETRY	32
  80
  81static inline bool io_check_multishot(struct io_kiocb *req,
  82				      unsigned int issue_flags)
  83{
  84	/*
  85	 * When ->locked_cq is set we only allow to post CQEs from the original
  86	 * task context. Usual request completions will be handled in other
  87	 * generic paths but multipoll may decide to post extra cqes.
  88	 */
  89	return !(issue_flags & IO_URING_F_IOWQ) ||
  90		!(issue_flags & IO_URING_F_MULTISHOT) ||
  91		!req->ctx->task_complete;
  92}
  93
  94int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  95{
  96	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
  97
  98	if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
  99		     sqe->buf_index || sqe->splice_fd_in))
 100		return -EINVAL;
 101
 102	shutdown->how = READ_ONCE(sqe->len);
 103	req->flags |= REQ_F_FORCE_ASYNC;
 104	return 0;
 105}
 106
 107int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
 108{
 109	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
 110	struct socket *sock;
 111	int ret;
 112
 113	WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
 114
 115	sock = sock_from_file(req->file);
 116	if (unlikely(!sock))
 117		return -ENOTSOCK;
 118
 119	ret = __sys_shutdown_sock(sock, shutdown->how);
 120	io_req_set_res(req, ret, 0);
 121	return IOU_OK;
 122}
 123
 124static bool io_net_retry(struct socket *sock, int flags)
 125{
 126	if (!(flags & MSG_WAITALL))
 127		return false;
 128	return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
 129}
 130
 131static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
 132{
 133	struct io_async_msghdr *hdr = req->async_data;
 134
 135	if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
 136		return;
 137
 138	/* Let normal cleanup path reap it if we fail adding to the cache */
 139	if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
 140		req->async_data = NULL;
 141		req->flags &= ~REQ_F_ASYNC_DATA;
 142	}
 143}
 144
 145static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req,
 146						  unsigned int issue_flags)
 147{
 148	struct io_ring_ctx *ctx = req->ctx;
 149	struct io_cache_entry *entry;
 150	struct io_async_msghdr *hdr;
 151
 152	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
 153		entry = io_alloc_cache_get(&ctx->netmsg_cache);
 154		if (entry) {
 155			hdr = container_of(entry, struct io_async_msghdr, cache);
 156			hdr->free_iov = NULL;
 157			req->flags |= REQ_F_ASYNC_DATA;
 158			req->async_data = hdr;
 159			return hdr;
 160		}
 161	}
 162
 163	if (!io_alloc_async_data(req)) {
 164		hdr = req->async_data;
 165		hdr->free_iov = NULL;
 166		return hdr;
 167	}
 168	return NULL;
 169}
 170
 171static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req)
 172{
 173	/* ->prep_async is always called from the submission context */
 174	return io_msg_alloc_async(req, 0);
 175}
 176
 177static int io_setup_async_msg(struct io_kiocb *req,
 178			      struct io_async_msghdr *kmsg,
 179			      unsigned int issue_flags)
 180{
 181	struct io_async_msghdr *async_msg;
 182
 183	if (req_has_async_data(req))
 184		return -EAGAIN;
 185	async_msg = io_msg_alloc_async(req, issue_flags);
 186	if (!async_msg) {
 187		kfree(kmsg->free_iov);
 188		return -ENOMEM;
 189	}
 190	req->flags |= REQ_F_NEED_CLEANUP;
 191	memcpy(async_msg, kmsg, sizeof(*kmsg));
 192	if (async_msg->msg.msg_name)
 193		async_msg->msg.msg_name = &async_msg->addr;
 194
 195	if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs)
 196		return -EAGAIN;
 197
 198	/* if were using fast_iov, set it to the new one */
 199	if (iter_is_iovec(&kmsg->msg.msg_iter) && !kmsg->free_iov) {
 200		size_t fast_idx = iter_iov(&kmsg->msg.msg_iter) - kmsg->fast_iov;
 201		async_msg->msg.msg_iter.__iov = &async_msg->fast_iov[fast_idx];
 202	}
 203
 204	return -EAGAIN;
 205}
 206
 207static int io_sendmsg_copy_hdr(struct io_kiocb *req,
 208			       struct io_async_msghdr *iomsg)
 209{
 210	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 211	int ret;
 212
 213	iomsg->msg.msg_name = &iomsg->addr;
 214	iomsg->free_iov = iomsg->fast_iov;
 215	ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
 216					&iomsg->free_iov);
 217	/* save msg_control as sys_sendmsg() overwrites it */
 218	sr->msg_control = iomsg->msg.msg_control_user;
 219	return ret;
 220}
 221
 222int io_send_prep_async(struct io_kiocb *req)
 223{
 224	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
 225	struct io_async_msghdr *io;
 226	int ret;
 227
 228	if (!zc->addr || req_has_async_data(req))
 229		return 0;
 230	io = io_msg_alloc_async_prep(req);
 231	if (!io)
 232		return -ENOMEM;
 233	ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
 234	return ret;
 235}
 236
 237static int io_setup_async_addr(struct io_kiocb *req,
 238			      struct sockaddr_storage *addr_storage,
 239			      unsigned int issue_flags)
 240{
 241	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 242	struct io_async_msghdr *io;
 243
 244	if (!sr->addr || req_has_async_data(req))
 245		return -EAGAIN;
 246	io = io_msg_alloc_async(req, issue_flags);
 247	if (!io)
 248		return -ENOMEM;
 249	memcpy(&io->addr, addr_storage, sizeof(io->addr));
 250	return -EAGAIN;
 251}
 252
 253int io_sendmsg_prep_async(struct io_kiocb *req)
 254{
 255	int ret;
 256
 257	if (!io_msg_alloc_async_prep(req))
 258		return -ENOMEM;
 259	ret = io_sendmsg_copy_hdr(req, req->async_data);
 260	if (!ret)
 261		req->flags |= REQ_F_NEED_CLEANUP;
 262	return ret;
 263}
 264
 265void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
 266{
 267	struct io_async_msghdr *io = req->async_data;
 268
 269	kfree(io->free_iov);
 270}
 271
 272int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 273{
 274	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 275
 276	if (req->opcode == IORING_OP_SEND) {
 277		if (READ_ONCE(sqe->__pad3[0]))
 278			return -EINVAL;
 279		sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
 280		sr->addr_len = READ_ONCE(sqe->addr_len);
 281	} else if (sqe->addr2 || sqe->file_index) {
 282		return -EINVAL;
 283	}
 284
 285	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 286	sr->len = READ_ONCE(sqe->len);
 287	sr->flags = READ_ONCE(sqe->ioprio);
 288	if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
 289		return -EINVAL;
 290	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 291	if (sr->msg_flags & MSG_DONTWAIT)
 292		req->flags |= REQ_F_NOWAIT;
 293
 294#ifdef CONFIG_COMPAT
 295	if (req->ctx->compat)
 296		sr->msg_flags |= MSG_CMSG_COMPAT;
 297#endif
 298	sr->done_io = 0;
 299	return 0;
 300}
 301
 302int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 303{
 304	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 305	struct io_async_msghdr iomsg, *kmsg;
 306	struct socket *sock;
 307	unsigned flags;
 308	int min_ret = 0;
 309	int ret;
 310
 311	sock = sock_from_file(req->file);
 312	if (unlikely(!sock))
 313		return -ENOTSOCK;
 314
 315	if (req_has_async_data(req)) {
 316		kmsg = req->async_data;
 317		kmsg->msg.msg_control_user = sr->msg_control;
 318	} else {
 319		ret = io_sendmsg_copy_hdr(req, &iomsg);
 320		if (ret)
 321			return ret;
 322		kmsg = &iomsg;
 323	}
 324
 325	if (!(req->flags & REQ_F_POLLED) &&
 326	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 327		return io_setup_async_msg(req, kmsg, issue_flags);
 328
 329	flags = sr->msg_flags;
 330	if (issue_flags & IO_URING_F_NONBLOCK)
 331		flags |= MSG_DONTWAIT;
 332	if (flags & MSG_WAITALL)
 333		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 334
 335	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
 336
 337	if (ret < min_ret) {
 338		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
 339			return io_setup_async_msg(req, kmsg, issue_flags);
 340		if (ret > 0 && io_net_retry(sock, flags)) {
 341			kmsg->msg.msg_controllen = 0;
 342			kmsg->msg.msg_control = NULL;
 343			sr->done_io += ret;
 344			req->flags |= REQ_F_PARTIAL_IO;
 345			return io_setup_async_msg(req, kmsg, issue_flags);
 346		}
 347		if (ret == -ERESTARTSYS)
 348			ret = -EINTR;
 349		req_set_fail(req);
 350	}
 351	/* fast path, check for non-NULL to avoid function call */
 352	if (kmsg->free_iov)
 353		kfree(kmsg->free_iov);
 354	req->flags &= ~REQ_F_NEED_CLEANUP;
 355	io_netmsg_recycle(req, issue_flags);
 356	if (ret >= 0)
 357		ret += sr->done_io;
 358	else if (sr->done_io)
 359		ret = sr->done_io;
 360	io_req_set_res(req, ret, 0);
 361	return IOU_OK;
 362}
 363
 364int io_send(struct io_kiocb *req, unsigned int issue_flags)
 365{
 366	struct sockaddr_storage __address;
 367	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 368	struct msghdr msg;
 369	struct socket *sock;
 370	unsigned flags;
 371	int min_ret = 0;
 372	int ret;
 373
 374	msg.msg_name = NULL;
 375	msg.msg_control = NULL;
 376	msg.msg_controllen = 0;
 377	msg.msg_namelen = 0;
 378	msg.msg_ubuf = NULL;
 379
 380	if (sr->addr) {
 381		if (req_has_async_data(req)) {
 382			struct io_async_msghdr *io = req->async_data;
 383
 384			msg.msg_name = &io->addr;
 385		} else {
 386			ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address);
 387			if (unlikely(ret < 0))
 388				return ret;
 389			msg.msg_name = (struct sockaddr *)&__address;
 390		}
 391		msg.msg_namelen = sr->addr_len;
 392	}
 393
 394	if (!(req->flags & REQ_F_POLLED) &&
 395	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 396		return io_setup_async_addr(req, &__address, issue_flags);
 397
 398	sock = sock_from_file(req->file);
 399	if (unlikely(!sock))
 400		return -ENOTSOCK;
 401
 402	ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &msg.msg_iter);
 403	if (unlikely(ret))
 404		return ret;
 405
 406	flags = sr->msg_flags;
 407	if (issue_flags & IO_URING_F_NONBLOCK)
 408		flags |= MSG_DONTWAIT;
 409	if (flags & MSG_WAITALL)
 410		min_ret = iov_iter_count(&msg.msg_iter);
 411
 412	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 413	msg.msg_flags = flags;
 414	ret = sock_sendmsg(sock, &msg);
 415	if (ret < min_ret) {
 416		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
 417			return io_setup_async_addr(req, &__address, issue_flags);
 418
 419		if (ret > 0 && io_net_retry(sock, flags)) {
 420			sr->len -= ret;
 421			sr->buf += ret;
 422			sr->done_io += ret;
 423			req->flags |= REQ_F_PARTIAL_IO;
 424			return io_setup_async_addr(req, &__address, issue_flags);
 425		}
 426		if (ret == -ERESTARTSYS)
 427			ret = -EINTR;
 428		req_set_fail(req);
 429	}
 430	if (ret >= 0)
 431		ret += sr->done_io;
 432	else if (sr->done_io)
 433		ret = sr->done_io;
 434	io_req_set_res(req, ret, 0);
 435	return IOU_OK;
 436}
 437
 438static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
 439{
 440	int hdr;
 441
 442	if (iomsg->namelen < 0)
 443		return true;
 444	if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out),
 445			       iomsg->namelen, &hdr))
 446		return true;
 447	if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr))
 448		return true;
 449
 450	return false;
 451}
 452
 453static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
 454				 struct io_async_msghdr *iomsg)
 455{
 456	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 457	struct user_msghdr msg;
 458	int ret;
 459
 460	if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg)))
 461		return -EFAULT;
 462
 463	ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
 464	if (ret)
 465		return ret;
 466
 467	if (req->flags & REQ_F_BUFFER_SELECT) {
 468		if (msg.msg_iovlen == 0) {
 469			sr->len = iomsg->fast_iov[0].iov_len = 0;
 470			iomsg->fast_iov[0].iov_base = NULL;
 471			iomsg->free_iov = NULL;
 472		} else if (msg.msg_iovlen > 1) {
 473			return -EINVAL;
 474		} else {
 475			if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov)))
 476				return -EFAULT;
 477			sr->len = iomsg->fast_iov[0].iov_len;
 478			iomsg->free_iov = NULL;
 479		}
 480
 481		if (req->flags & REQ_F_APOLL_MULTISHOT) {
 482			iomsg->namelen = msg.msg_namelen;
 483			iomsg->controllen = msg.msg_controllen;
 484			if (io_recvmsg_multishot_overflow(iomsg))
 485				return -EOVERFLOW;
 486		}
 487	} else {
 488		iomsg->free_iov = iomsg->fast_iov;
 489		ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV,
 490				     &iomsg->free_iov, &iomsg->msg.msg_iter,
 491				     false);
 492		if (ret > 0)
 493			ret = 0;
 494	}
 495
 496	return ret;
 497}
 498
 499#ifdef CONFIG_COMPAT
 500static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
 501					struct io_async_msghdr *iomsg)
 502{
 503	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 504	struct compat_msghdr msg;
 505	struct compat_iovec __user *uiov;
 506	int ret;
 507
 508	if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg)))
 509		return -EFAULT;
 510
 511	ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
 512	if (ret)
 513		return ret;
 514
 515	uiov = compat_ptr(msg.msg_iov);
 516	if (req->flags & REQ_F_BUFFER_SELECT) {
 517		compat_ssize_t clen;
 518
 519		iomsg->free_iov = NULL;
 520		if (msg.msg_iovlen == 0) {
 521			sr->len = 0;
 522		} else if (msg.msg_iovlen > 1) {
 523			return -EINVAL;
 524		} else {
 525			if (!access_ok(uiov, sizeof(*uiov)))
 526				return -EFAULT;
 527			if (__get_user(clen, &uiov->iov_len))
 528				return -EFAULT;
 529			if (clen < 0)
 530				return -EINVAL;
 531			sr->len = clen;
 532		}
 533
 534		if (req->flags & REQ_F_APOLL_MULTISHOT) {
 535			iomsg->namelen = msg.msg_namelen;
 536			iomsg->controllen = msg.msg_controllen;
 537			if (io_recvmsg_multishot_overflow(iomsg))
 538				return -EOVERFLOW;
 539		}
 540	} else {
 541		iomsg->free_iov = iomsg->fast_iov;
 542		ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen,
 543				   UIO_FASTIOV, &iomsg->free_iov,
 544				   &iomsg->msg.msg_iter, true);
 545		if (ret < 0)
 546			return ret;
 547	}
 548
 549	return 0;
 550}
 551#endif
 552
 553static int io_recvmsg_copy_hdr(struct io_kiocb *req,
 554			       struct io_async_msghdr *iomsg)
 555{
 556	iomsg->msg.msg_name = &iomsg->addr;
 557	iomsg->msg.msg_iter.nr_segs = 0;
 558
 559#ifdef CONFIG_COMPAT
 560	if (req->ctx->compat)
 561		return __io_compat_recvmsg_copy_hdr(req, iomsg);
 562#endif
 563
 564	return __io_recvmsg_copy_hdr(req, iomsg);
 565}
 566
 567int io_recvmsg_prep_async(struct io_kiocb *req)
 568{
 569	int ret;
 570
 571	if (!io_msg_alloc_async_prep(req))
 572		return -ENOMEM;
 573	ret = io_recvmsg_copy_hdr(req, req->async_data);
 574	if (!ret)
 575		req->flags |= REQ_F_NEED_CLEANUP;
 576	return ret;
 577}
 578
 579#define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)
 580
 581int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 582{
 583	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 584
 585	if (unlikely(sqe->file_index || sqe->addr2))
 586		return -EINVAL;
 587
 588	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 589	sr->len = READ_ONCE(sqe->len);
 590	sr->flags = READ_ONCE(sqe->ioprio);
 591	if (sr->flags & ~(RECVMSG_FLAGS))
 592		return -EINVAL;
 593	sr->msg_flags = READ_ONCE(sqe->msg_flags);
 594	if (sr->msg_flags & MSG_DONTWAIT)
 595		req->flags |= REQ_F_NOWAIT;
 596	if (sr->msg_flags & MSG_ERRQUEUE)
 597		req->flags |= REQ_F_CLEAR_POLLIN;
 598	if (sr->flags & IORING_RECV_MULTISHOT) {
 599		if (!(req->flags & REQ_F_BUFFER_SELECT))
 600			return -EINVAL;
 601		if (sr->msg_flags & MSG_WAITALL)
 602			return -EINVAL;
 603		if (req->opcode == IORING_OP_RECV && sr->len)
 604			return -EINVAL;
 605		req->flags |= REQ_F_APOLL_MULTISHOT;
 606		/*
 607		 * Store the buffer group for this multishot receive separately,
 608		 * as if we end up doing an io-wq based issue that selects a
 609		 * buffer, it has to be committed immediately and that will
 610		 * clear ->buf_list. This means we lose the link to the buffer
 611		 * list, and the eventual buffer put on completion then cannot
 612		 * restore it.
 613		 */
 614		sr->buf_group = req->buf_index;
 615	}
 616
 617#ifdef CONFIG_COMPAT
 618	if (req->ctx->compat)
 619		sr->msg_flags |= MSG_CMSG_COMPAT;
 620#endif
 621	sr->done_io = 0;
 622	sr->nr_multishot_loops = 0;
 623	return 0;
 624}
 625
 626static inline void io_recv_prep_retry(struct io_kiocb *req)
 627{
 628	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 629
 630	sr->done_io = 0;
 631	sr->len = 0; /* get from the provided buffer */
 632	req->buf_index = sr->buf_group;
 633}
 634
 635/*
 636 * Finishes io_recv and io_recvmsg.
 637 *
 638 * Returns true if it is actually finished, or false if it should run
 639 * again (for multishot).
 640 */
 641static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
 642				  struct msghdr *msg, bool mshot_finished,
 643				  unsigned issue_flags)
 644{
 645	unsigned int cflags;
 646
 647	cflags = io_put_kbuf(req, issue_flags);
 648	if (msg->msg_inq && msg->msg_inq != -1)
 649		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
 650
 651	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
 652		io_req_set_res(req, *ret, cflags);
 653		*ret = IOU_OK;
 654		return true;
 655	}
 656
 657	if (mshot_finished)
 658		goto finish;
 659
 660	/*
 661	 * Fill CQE for this receive and see if we should keep trying to
 662	 * receive from this socket.
 663	 */
 664	if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
 665				*ret, cflags | IORING_CQE_F_MORE)) {
 666		struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 667		int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
 668
 669		io_recv_prep_retry(req);
 670		/* Known not-empty or unknown state, retry */
 671		if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) {
 672			if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
 673				return false;
 674			/* mshot retries exceeded, force a requeue */
 675			sr->nr_multishot_loops = 0;
 676			mshot_retry_ret = IOU_REQUEUE;
 677		}
 678		if (issue_flags & IO_URING_F_MULTISHOT)
 679			*ret = mshot_retry_ret;
 680		else
 681			*ret = -EAGAIN;
 682		return true;
 683	}
 684	/* Otherwise stop multishot but use the current result. */
 685finish:
 686	io_req_set_res(req, *ret, cflags);
 687
 688	if (issue_flags & IO_URING_F_MULTISHOT)
 689		*ret = IOU_STOP_MULTISHOT;
 690	else
 691		*ret = IOU_OK;
 692	return true;
 693}
 694
 695static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
 696				     struct io_sr_msg *sr, void __user **buf,
 697				     size_t *len)
 698{
 699	unsigned long ubuf = (unsigned long) *buf;
 700	unsigned long hdr;
 701
 702	hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
 703		kmsg->controllen;
 704	if (*len < hdr)
 705		return -EFAULT;
 706
 707	if (kmsg->controllen) {
 708		unsigned long control = ubuf + hdr - kmsg->controllen;
 709
 710		kmsg->msg.msg_control_user = (void __user *) control;
 711		kmsg->msg.msg_controllen = kmsg->controllen;
 712	}
 713
 714	sr->buf = *buf; /* stash for later copy */
 715	*buf = (void __user *) (ubuf + hdr);
 716	kmsg->payloadlen = *len = *len - hdr;
 717	return 0;
 718}
 719
 720struct io_recvmsg_multishot_hdr {
 721	struct io_uring_recvmsg_out msg;
 722	struct sockaddr_storage addr;
 723};
 724
 725static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
 726				struct io_async_msghdr *kmsg,
 727				unsigned int flags, bool *finished)
 728{
 729	int err;
 730	int copy_len;
 731	struct io_recvmsg_multishot_hdr hdr;
 732
 733	if (kmsg->namelen)
 734		kmsg->msg.msg_name = &hdr.addr;
 735	kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
 736	kmsg->msg.msg_namelen = 0;
 737
 738	if (sock->file->f_flags & O_NONBLOCK)
 739		flags |= MSG_DONTWAIT;
 740
 741	err = sock_recvmsg(sock, &kmsg->msg, flags);
 742	*finished = err <= 0;
 743	if (err < 0)
 744		return err;
 745
 746	hdr.msg = (struct io_uring_recvmsg_out) {
 747		.controllen = kmsg->controllen - kmsg->msg.msg_controllen,
 748		.flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
 749	};
 750
 751	hdr.msg.payloadlen = err;
 752	if (err > kmsg->payloadlen)
 753		err = kmsg->payloadlen;
 754
 755	copy_len = sizeof(struct io_uring_recvmsg_out);
 756	if (kmsg->msg.msg_namelen > kmsg->namelen)
 757		copy_len += kmsg->namelen;
 758	else
 759		copy_len += kmsg->msg.msg_namelen;
 760
 761	/*
 762	 *      "fromlen shall refer to the value before truncation.."
 763	 *                      1003.1g
 764	 */
 765	hdr.msg.namelen = kmsg->msg.msg_namelen;
 766
 767	/* ensure that there is no gap between hdr and sockaddr_storage */
 768	BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
 769		     sizeof(struct io_uring_recvmsg_out));
 770	if (copy_to_user(io->buf, &hdr, copy_len)) {
 771		*finished = true;
 772		return -EFAULT;
 773	}
 774
 775	return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
 776			kmsg->controllen + err;
 777}
 778
 779int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 780{
 781	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 782	struct io_async_msghdr iomsg, *kmsg;
 783	struct socket *sock;
 784	unsigned flags;
 785	int ret, min_ret = 0;
 786	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 787	bool mshot_finished = true;
 788
 789	sock = sock_from_file(req->file);
 790	if (unlikely(!sock))
 791		return -ENOTSOCK;
 792
 793	if (req_has_async_data(req)) {
 794		kmsg = req->async_data;
 795	} else {
 796		ret = io_recvmsg_copy_hdr(req, &iomsg);
 797		if (ret)
 798			return ret;
 799		kmsg = &iomsg;
 800	}
 801
 802	if (!(req->flags & REQ_F_POLLED) &&
 803	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 804		return io_setup_async_msg(req, kmsg, issue_flags);
 805
 806	if (!io_check_multishot(req, issue_flags))
 807		return io_setup_async_msg(req, kmsg, issue_flags);
 808
 809retry_multishot:
 810	if (io_do_buffer_select(req)) {
 811		void __user *buf;
 812		size_t len = sr->len;
 813
 814		buf = io_buffer_select(req, &len, issue_flags);
 815		if (!buf)
 816			return -ENOBUFS;
 817
 818		if (req->flags & REQ_F_APOLL_MULTISHOT) {
 819			ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
 820			if (ret) {
 821				io_kbuf_recycle(req, issue_flags);
 822				return ret;
 823			}
 824		}
 825
 826		iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
 827	}
 828
 829	flags = sr->msg_flags;
 830	if (force_nonblock)
 831		flags |= MSG_DONTWAIT;
 832
 833	kmsg->msg.msg_get_inq = 1;
 834	kmsg->msg.msg_inq = -1;
 835	if (req->flags & REQ_F_APOLL_MULTISHOT) {
 836		ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
 837					   &mshot_finished);
 838	} else {
 839		/* disable partial retry for recvmsg with cmsg attached */
 840		if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
 841			min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 842
 843		ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
 844					 kmsg->uaddr, flags);
 845	}
 846
 847	if (ret < min_ret) {
 848		if (ret == -EAGAIN && force_nonblock) {
 849			ret = io_setup_async_msg(req, kmsg, issue_flags);
 850			if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) {
 851				io_kbuf_recycle(req, issue_flags);
 852				return IOU_ISSUE_SKIP_COMPLETE;
 853			}
 854			return ret;
 855		}
 856		if (ret > 0 && io_net_retry(sock, flags)) {
 857			sr->done_io += ret;
 858			req->flags |= REQ_F_PARTIAL_IO;
 859			return io_setup_async_msg(req, kmsg, issue_flags);
 860		}
 861		if (ret == -ERESTARTSYS)
 862			ret = -EINTR;
 863		req_set_fail(req);
 864	} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
 865		req_set_fail(req);
 866	}
 867
 868	if (ret > 0)
 869		ret += sr->done_io;
 870	else if (sr->done_io)
 871		ret = sr->done_io;
 872	else
 873		io_kbuf_recycle(req, issue_flags);
 874
 875	if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags))
 876		goto retry_multishot;
 877
 878	if (mshot_finished) {
 879		/* fast path, check for non-NULL to avoid function call */
 880		if (kmsg->free_iov)
 881			kfree(kmsg->free_iov);
 882		io_netmsg_recycle(req, issue_flags);
 883		req->flags &= ~REQ_F_NEED_CLEANUP;
 884	}
 885
 886	return ret;
 887}
 888
 889int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 890{
 891	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 892	struct msghdr msg;
 893	struct socket *sock;
 894	unsigned flags;
 895	int ret, min_ret = 0;
 896	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 897	size_t len = sr->len;
 898
 899	if (!(req->flags & REQ_F_POLLED) &&
 900	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 901		return -EAGAIN;
 902
 903	if (!io_check_multishot(req, issue_flags))
 904		return -EAGAIN;
 905
 906	sock = sock_from_file(req->file);
 907	if (unlikely(!sock))
 908		return -ENOTSOCK;
 909
 910	msg.msg_name = NULL;
 911	msg.msg_namelen = 0;
 912	msg.msg_control = NULL;
 913	msg.msg_get_inq = 1;
 914	msg.msg_controllen = 0;
 915	msg.msg_iocb = NULL;
 916	msg.msg_ubuf = NULL;
 917
 918retry_multishot:
 919	if (io_do_buffer_select(req)) {
 920		void __user *buf;
 921
 922		buf = io_buffer_select(req, &len, issue_flags);
 923		if (!buf)
 924			return -ENOBUFS;
 925		sr->buf = buf;
 926		sr->len = len;
 927	}
 928
 929	ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter);
 930	if (unlikely(ret))
 931		goto out_free;
 932
 933	msg.msg_inq = -1;
 934	msg.msg_flags = 0;
 935
 936	flags = sr->msg_flags;
 937	if (force_nonblock)
 938		flags |= MSG_DONTWAIT;
 939	if (flags & MSG_WAITALL)
 940		min_ret = iov_iter_count(&msg.msg_iter);
 941
 942	ret = sock_recvmsg(sock, &msg, flags);
 943	if (ret < min_ret) {
 944		if (ret == -EAGAIN && force_nonblock) {
 945			if (issue_flags & IO_URING_F_MULTISHOT) {
 946				io_kbuf_recycle(req, issue_flags);
 947				return IOU_ISSUE_SKIP_COMPLETE;
 948			}
 949
 950			return -EAGAIN;
 951		}
 952		if (ret > 0 && io_net_retry(sock, flags)) {
 953			sr->len -= ret;
 954			sr->buf += ret;
 955			sr->done_io += ret;
 956			req->flags |= REQ_F_PARTIAL_IO;
 957			return -EAGAIN;
 958		}
 959		if (ret == -ERESTARTSYS)
 960			ret = -EINTR;
 961		req_set_fail(req);
 962	} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
 963out_free:
 964		req_set_fail(req);
 965	}
 966
 967	if (ret > 0)
 968		ret += sr->done_io;
 969	else if (sr->done_io)
 970		ret = sr->done_io;
 971	else
 972		io_kbuf_recycle(req, issue_flags);
 973
 974	if (!io_recv_finish(req, &ret, &msg, ret <= 0, issue_flags))
 975		goto retry_multishot;
 976
 977	return ret;
 978}
 979
 980void io_send_zc_cleanup(struct io_kiocb *req)
 981{
 982	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
 983	struct io_async_msghdr *io;
 984
 985	if (req_has_async_data(req)) {
 986		io = req->async_data;
 987		/* might be ->fast_iov if *msg_copy_hdr failed */
 988		if (io->free_iov != io->fast_iov)
 989			kfree(io->free_iov);
 990	}
 991	if (zc->notif) {
 992		io_notif_flush(zc->notif);
 993		zc->notif = NULL;
 994	}
 995}
 996
 997#define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
 998#define IO_ZC_FLAGS_VALID  (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
 999
1000int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1001{
1002	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1003	struct io_ring_ctx *ctx = req->ctx;
1004	struct io_kiocb *notif;
1005
1006	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1007		return -EINVAL;
1008	/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1009	if (req->flags & REQ_F_CQE_SKIP)
1010		return -EINVAL;
1011
1012	notif = zc->notif = io_alloc_notif(ctx);
1013	if (!notif)
1014		return -ENOMEM;
1015	notif->cqe.user_data = req->cqe.user_data;
1016	notif->cqe.res = 0;
1017	notif->cqe.flags = IORING_CQE_F_NOTIF;
1018	req->flags |= REQ_F_NEED_CLEANUP;
1019
1020	zc->flags = READ_ONCE(sqe->ioprio);
1021	if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1022		if (zc->flags & ~IO_ZC_FLAGS_VALID)
1023			return -EINVAL;
1024		if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1025			io_notif_set_extended(notif);
1026			io_notif_to_data(notif)->zc_report = true;
1027		}
1028	}
1029
1030	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1031		unsigned idx = READ_ONCE(sqe->buf_index);
1032
1033		if (unlikely(idx >= ctx->nr_user_bufs))
1034			return -EFAULT;
1035		idx = array_index_nospec(idx, ctx->nr_user_bufs);
1036		req->imu = READ_ONCE(ctx->user_bufs[idx]);
1037		io_req_set_rsrc_node(notif, ctx, 0);
1038	}
1039
1040	if (req->opcode == IORING_OP_SEND_ZC) {
1041		if (READ_ONCE(sqe->__pad3[0]))
1042			return -EINVAL;
1043		zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1044		zc->addr_len = READ_ONCE(sqe->addr_len);
1045	} else {
1046		if (unlikely(sqe->addr2 || sqe->file_index))
1047			return -EINVAL;
1048		if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
1049			return -EINVAL;
1050	}
1051
1052	zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
1053	zc->len = READ_ONCE(sqe->len);
1054	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
1055	if (zc->msg_flags & MSG_DONTWAIT)
1056		req->flags |= REQ_F_NOWAIT;
1057
1058	zc->done_io = 0;
1059
1060#ifdef CONFIG_COMPAT
1061	if (req->ctx->compat)
1062		zc->msg_flags |= MSG_CMSG_COMPAT;
1063#endif
1064	return 0;
1065}
1066
1067static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb,
1068				 struct iov_iter *from, size_t length)
1069{
1070	skb_zcopy_downgrade_managed(skb);
1071	return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
1072}
1073
1074static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
1075			   struct iov_iter *from, size_t length)
1076{
1077	struct skb_shared_info *shinfo = skb_shinfo(skb);
1078	int frag = shinfo->nr_frags;
1079	int ret = 0;
1080	struct bvec_iter bi;
1081	ssize_t copied = 0;
1082	unsigned long truesize = 0;
1083
1084	if (!frag)
1085		shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1086	else if (unlikely(!skb_zcopy_managed(skb)))
1087		return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
1088
1089	bi.bi_size = min(from->count, length);
1090	bi.bi_bvec_done = from->iov_offset;
1091	bi.bi_idx = 0;
1092
1093	while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1094		struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1095
1096		copied += v.bv_len;
1097		truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1098		__skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1099					   v.bv_offset, v.bv_len);
1100		bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1101	}
1102	if (bi.bi_size)
1103		ret = -EMSGSIZE;
1104
1105	shinfo->nr_frags = frag;
1106	from->bvec += bi.bi_idx;
1107	from->nr_segs -= bi.bi_idx;
1108	from->count -= copied;
1109	from->iov_offset = bi.bi_bvec_done;
1110
1111	skb->data_len += copied;
1112	skb->len += copied;
1113	skb->truesize += truesize;
1114
1115	if (sk && sk->sk_type == SOCK_STREAM) {
1116		sk_wmem_queued_add(sk, truesize);
1117		if (!skb_zcopy_pure(skb))
1118			sk_mem_charge(sk, truesize);
1119	} else {
1120		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
1121	}
1122	return ret;
1123}
1124
1125int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1126{
1127	struct sockaddr_storage __address;
1128	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1129	struct msghdr msg;
1130	struct socket *sock;
1131	unsigned msg_flags;
1132	int ret, min_ret = 0;
1133
1134	sock = sock_from_file(req->file);
1135	if (unlikely(!sock))
1136		return -ENOTSOCK;
1137	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1138		return -EOPNOTSUPP;
1139
1140	msg.msg_name = NULL;
1141	msg.msg_control = NULL;
1142	msg.msg_controllen = 0;
1143	msg.msg_namelen = 0;
1144
1145	if (zc->addr) {
1146		if (req_has_async_data(req)) {
1147			struct io_async_msghdr *io = req->async_data;
1148
1149			msg.msg_name = &io->addr;
1150		} else {
1151			ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
1152			if (unlikely(ret < 0))
1153				return ret;
1154			msg.msg_name = (struct sockaddr *)&__address;
1155		}
1156		msg.msg_namelen = zc->addr_len;
1157	}
1158
1159	if (!(req->flags & REQ_F_POLLED) &&
1160	    (zc->flags & IORING_RECVSEND_POLL_FIRST))
1161		return io_setup_async_addr(req, &__address, issue_flags);
1162
1163	if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1164		ret = io_import_fixed(ITER_SOURCE, &msg.msg_iter, req->imu,
1165					(u64)(uintptr_t)zc->buf, zc->len);
1166		if (unlikely(ret))
1167			return ret;
1168		msg.sg_from_iter = io_sg_from_iter;
1169	} else {
1170		io_notif_set_extended(zc->notif);
1171		ret = import_ubuf(ITER_SOURCE, zc->buf, zc->len, &msg.msg_iter);
1172		if (unlikely(ret))
1173			return ret;
1174		ret = io_notif_account_mem(zc->notif, zc->len);
1175		if (unlikely(ret))
1176			return ret;
1177		msg.sg_from_iter = io_sg_from_iter_iovec;
1178	}
1179
1180	msg_flags = zc->msg_flags | MSG_ZEROCOPY;
1181	if (issue_flags & IO_URING_F_NONBLOCK)
1182		msg_flags |= MSG_DONTWAIT;
1183	if (msg_flags & MSG_WAITALL)
1184		min_ret = iov_iter_count(&msg.msg_iter);
1185	msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1186
1187	msg.msg_flags = msg_flags;
1188	msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1189	ret = sock_sendmsg(sock, &msg);
1190
1191	if (unlikely(ret < min_ret)) {
1192		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1193			return io_setup_async_addr(req, &__address, issue_flags);
1194
1195		if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
1196			zc->len -= ret;
1197			zc->buf += ret;
1198			zc->done_io += ret;
1199			req->flags |= REQ_F_PARTIAL_IO;
1200			return io_setup_async_addr(req, &__address, issue_flags);
1201		}
1202		if (ret == -ERESTARTSYS)
1203			ret = -EINTR;
1204		req_set_fail(req);
1205	}
1206
1207	if (ret >= 0)
1208		ret += zc->done_io;
1209	else if (zc->done_io)
1210		ret = zc->done_io;
1211
1212	/*
1213	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1214	 * flushing notif to io_send_zc_cleanup()
1215	 */
1216	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1217		io_notif_flush(zc->notif);
1218		req->flags &= ~REQ_F_NEED_CLEANUP;
1219	}
1220	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1221	return IOU_OK;
1222}
1223
1224int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1225{
1226	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1227	struct io_async_msghdr iomsg, *kmsg;
1228	struct socket *sock;
1229	unsigned flags;
1230	int ret, min_ret = 0;
1231
1232	io_notif_set_extended(sr->notif);
1233
1234	sock = sock_from_file(req->file);
1235	if (unlikely(!sock))
1236		return -ENOTSOCK;
1237	if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1238		return -EOPNOTSUPP;
1239
1240	if (req_has_async_data(req)) {
1241		kmsg = req->async_data;
1242	} else {
1243		ret = io_sendmsg_copy_hdr(req, &iomsg);
1244		if (ret)
1245			return ret;
1246		kmsg = &iomsg;
1247	}
1248
1249	if (!(req->flags & REQ_F_POLLED) &&
1250	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
1251		return io_setup_async_msg(req, kmsg, issue_flags);
1252
1253	flags = sr->msg_flags | MSG_ZEROCOPY;
1254	if (issue_flags & IO_URING_F_NONBLOCK)
1255		flags |= MSG_DONTWAIT;
1256	if (flags & MSG_WAITALL)
1257		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1258
1259	kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1260	kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1261	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1262
1263	if (unlikely(ret < min_ret)) {
1264		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1265			return io_setup_async_msg(req, kmsg, issue_flags);
1266
1267		if (ret > 0 && io_net_retry(sock, flags)) {
1268			sr->done_io += ret;
1269			req->flags |= REQ_F_PARTIAL_IO;
1270			return io_setup_async_msg(req, kmsg, issue_flags);
1271		}
1272		if (ret == -ERESTARTSYS)
1273			ret = -EINTR;
1274		req_set_fail(req);
1275	}
1276	/* fast path, check for non-NULL to avoid function call */
1277	if (kmsg->free_iov) {
1278		kfree(kmsg->free_iov);
1279		kmsg->free_iov = NULL;
1280	}
1281
1282	io_netmsg_recycle(req, issue_flags);
1283	if (ret >= 0)
1284		ret += sr->done_io;
1285	else if (sr->done_io)
1286		ret = sr->done_io;
1287
1288	/*
1289	 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1290	 * flushing notif to io_send_zc_cleanup()
1291	 */
1292	if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1293		io_notif_flush(sr->notif);
1294		req->flags &= ~REQ_F_NEED_CLEANUP;
1295	}
1296	io_req_set_res(req, ret, IORING_CQE_F_MORE);
1297	return IOU_OK;
1298}
1299
1300void io_sendrecv_fail(struct io_kiocb *req)
1301{
1302	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1303
1304	if (req->flags & REQ_F_PARTIAL_IO)
1305		req->cqe.res = sr->done_io;
1306
1307	if ((req->flags & REQ_F_NEED_CLEANUP) &&
1308	    (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1309		req->cqe.flags |= IORING_CQE_F_MORE;
1310}
1311
1312int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1313{
1314	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1315	unsigned flags;
1316
1317	if (sqe->len || sqe->buf_index)
1318		return -EINVAL;
1319
1320	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1321	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1322	accept->flags = READ_ONCE(sqe->accept_flags);
1323	accept->nofile = rlimit(RLIMIT_NOFILE);
1324	flags = READ_ONCE(sqe->ioprio);
1325	if (flags & ~IORING_ACCEPT_MULTISHOT)
1326		return -EINVAL;
1327
1328	accept->file_slot = READ_ONCE(sqe->file_index);
1329	if (accept->file_slot) {
1330		if (accept->flags & SOCK_CLOEXEC)
1331			return -EINVAL;
1332		if (flags & IORING_ACCEPT_MULTISHOT &&
1333		    accept->file_slot != IORING_FILE_INDEX_ALLOC)
1334			return -EINVAL;
1335	}
1336	if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1337		return -EINVAL;
1338	if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1339		accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1340	if (flags & IORING_ACCEPT_MULTISHOT)
1341		req->flags |= REQ_F_APOLL_MULTISHOT;
1342	return 0;
1343}
1344
1345int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1346{
1347	struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1348	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1349	unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
1350	bool fixed = !!accept->file_slot;
1351	struct file *file;
1352	int ret, fd;
1353
1354	if (!io_check_multishot(req, issue_flags))
1355		return -EAGAIN;
1356retry:
1357	if (!fixed) {
1358		fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1359		if (unlikely(fd < 0))
1360			return fd;
1361	}
1362	file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
1363			 accept->flags);
1364	if (IS_ERR(file)) {
1365		if (!fixed)
1366			put_unused_fd(fd);
1367		ret = PTR_ERR(file);
1368		if (ret == -EAGAIN && force_nonblock) {
1369			/*
1370			 * if it's multishot and polled, we don't need to
1371			 * return EAGAIN to arm the poll infra since it
1372			 * has already been done
1373			 */
1374			if (issue_flags & IO_URING_F_MULTISHOT)
1375				return IOU_ISSUE_SKIP_COMPLETE;
1376			return ret;
1377		}
1378		if (ret == -ERESTARTSYS)
1379			ret = -EINTR;
1380		req_set_fail(req);
1381	} else if (!fixed) {
1382		fd_install(fd, file);
1383		ret = fd;
1384	} else {
1385		ret = io_fixed_fd_install(req, issue_flags, file,
1386						accept->file_slot);
1387	}
1388
1389	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1390		io_req_set_res(req, ret, 0);
1391		return IOU_OK;
1392	}
1393
1394	if (ret < 0)
1395		return ret;
1396	if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
1397				ret, IORING_CQE_F_MORE))
1398		goto retry;
1399
1400	io_req_set_res(req, ret, 0);
1401	return IOU_STOP_MULTISHOT;
1402}
1403
1404int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1405{
1406	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1407
1408	if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1409		return -EINVAL;
1410
1411	sock->domain = READ_ONCE(sqe->fd);
1412	sock->type = READ_ONCE(sqe->off);
1413	sock->protocol = READ_ONCE(sqe->len);
1414	sock->file_slot = READ_ONCE(sqe->file_index);
1415	sock->nofile = rlimit(RLIMIT_NOFILE);
1416
1417	sock->flags = sock->type & ~SOCK_TYPE_MASK;
1418	if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1419		return -EINVAL;
1420	if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1421		return -EINVAL;
1422	return 0;
1423}
1424
1425int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1426{
1427	struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1428	bool fixed = !!sock->file_slot;
1429	struct file *file;
1430	int ret, fd;
1431
1432	if (!fixed) {
1433		fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1434		if (unlikely(fd < 0))
1435			return fd;
1436	}
1437	file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1438	if (IS_ERR(file)) {
1439		if (!fixed)
1440			put_unused_fd(fd);
1441		ret = PTR_ERR(file);
1442		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1443			return -EAGAIN;
1444		if (ret == -ERESTARTSYS)
1445			ret = -EINTR;
1446		req_set_fail(req);
1447	} else if (!fixed) {
1448		fd_install(fd, file);
1449		ret = fd;
1450	} else {
1451		ret = io_fixed_fd_install(req, issue_flags, file,
1452					    sock->file_slot);
1453	}
1454	io_req_set_res(req, ret, 0);
1455	return IOU_OK;
1456}
1457
1458int io_connect_prep_async(struct io_kiocb *req)
1459{
1460	struct io_async_connect *io = req->async_data;
1461	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1462
1463	return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
1464}
1465
1466int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1467{
1468	struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1469
1470	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1471		return -EINVAL;
1472
1473	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1474	conn->addr_len =  READ_ONCE(sqe->addr2);
1475	conn->in_progress = conn->seen_econnaborted = false;
1476	return 0;
1477}
1478
1479int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1480{
1481	struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1482	struct io_async_connect __io, *io;
1483	unsigned file_flags;
1484	int ret;
1485	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1486
1487	if (req_has_async_data(req)) {
1488		io = req->async_data;
1489	} else {
1490		ret = move_addr_to_kernel(connect->addr,
1491						connect->addr_len,
1492						&__io.address);
1493		if (ret)
1494			goto out;
1495		io = &__io;
1496	}
1497
1498	file_flags = force_nonblock ? O_NONBLOCK : 0;
1499
1500	ret = __sys_connect_file(req->file, &io->address,
1501					connect->addr_len, file_flags);
1502	if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1503	    && force_nonblock) {
1504		if (ret == -EINPROGRESS) {
1505			connect->in_progress = true;
1506		} else if (ret == -ECONNABORTED) {
1507			if (connect->seen_econnaborted)
1508				goto out;
1509			connect->seen_econnaborted = true;
1510		}
1511		if (req_has_async_data(req))
1512			return -EAGAIN;
1513		if (io_alloc_async_data(req)) {
1514			ret = -ENOMEM;
1515			goto out;
1516		}
1517		memcpy(req->async_data, &__io, sizeof(__io));
1518		return -EAGAIN;
1519	}
1520	if (connect->in_progress) {
1521		/*
1522		 * At least bluetooth will return -EBADFD on a re-connect
1523		 * attempt, and it's (supposedly) also valid to get -EISCONN
1524		 * which means the previous result is good. For both of these,
1525		 * grab the sock_error() and use that for the completion.
1526		 */
1527		if (ret == -EBADFD || ret == -EISCONN)
1528			ret = sock_error(sock_from_file(req->file)->sk);
1529	}
1530	if (ret == -ERESTARTSYS)
1531		ret = -EINTR;
1532out:
1533	if (ret < 0)
1534		req_set_fail(req);
1535	io_req_set_res(req, ret, 0);
1536	return IOU_OK;
1537}
1538
1539void io_netmsg_cache_free(struct io_cache_entry *entry)
1540{
1541	kfree(container_of(entry, struct io_async_msghdr, cache));
1542}
1543#endif