Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/io_uring/net.c
29264 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <linux/kernel.h>
3
#include <linux/errno.h>
4
#include <linux/file.h>
5
#include <linux/slab.h>
6
#include <linux/net.h>
7
#include <linux/compat.h>
8
#include <net/compat.h>
9
#include <linux/io_uring.h>
10
11
#include <uapi/linux/io_uring.h>
12
13
#include "filetable.h"
14
#include "io_uring.h"
15
#include "kbuf.h"
16
#include "alloc_cache.h"
17
#include "net.h"
18
#include "notif.h"
19
#include "rsrc.h"
20
#include "zcrx.h"
21
22
struct io_shutdown {
23
struct file *file;
24
int how;
25
};
26
27
struct io_accept {
28
struct file *file;
29
struct sockaddr __user *addr;
30
int __user *addr_len;
31
int flags;
32
int iou_flags;
33
u32 file_slot;
34
unsigned long nofile;
35
};
36
37
struct io_socket {
38
struct file *file;
39
int domain;
40
int type;
41
int protocol;
42
int flags;
43
u32 file_slot;
44
unsigned long nofile;
45
};
46
47
struct io_connect {
48
struct file *file;
49
struct sockaddr __user *addr;
50
int addr_len;
51
bool in_progress;
52
bool seen_econnaborted;
53
};
54
55
struct io_bind {
56
struct file *file;
57
int addr_len;
58
};
59
60
struct io_listen {
61
struct file *file;
62
int backlog;
63
};
64
65
struct io_sr_msg {
66
struct file *file;
67
union {
68
struct compat_msghdr __user *umsg_compat;
69
struct user_msghdr __user *umsg;
70
void __user *buf;
71
};
72
int len;
73
unsigned done_io;
74
unsigned msg_flags;
75
unsigned nr_multishot_loops;
76
u16 flags;
77
/* initialised and used only by !msg send variants */
78
u16 buf_group;
79
/* per-invocation mshot limit */
80
unsigned mshot_len;
81
/* overall mshot byte limit */
82
unsigned mshot_total_len;
83
void __user *msg_control;
84
/* used only for send zerocopy */
85
struct io_kiocb *notif;
86
};
87
88
/*
89
* The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold
90
* anyway. Use the upper 8 bits for internal uses.
91
*/
92
enum sr_retry_flags {
93
IORING_RECV_RETRY = (1U << 15),
94
IORING_RECV_PARTIAL_MAP = (1U << 14),
95
IORING_RECV_MSHOT_CAP = (1U << 13),
96
IORING_RECV_MSHOT_LIM = (1U << 12),
97
IORING_RECV_MSHOT_DONE = (1U << 11),
98
99
IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
100
IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP |
101
IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE,
102
};
103
104
/*
105
* Number of times we'll try and do receives if there's more data. If we
106
* exceed this limit, then add us to the back of the queue and retry from
107
* there. This helps fairness between flooding clients.
108
*/
109
#define MULTISHOT_MAX_RETRY 32
110
111
struct io_recvzc {
112
struct file *file;
113
unsigned msg_flags;
114
u16 flags;
115
u32 len;
116
struct io_zcrx_ifq *ifq;
117
};
118
119
static int io_sg_from_iter_iovec(struct sk_buff *skb,
120
struct iov_iter *from, size_t length);
121
static int io_sg_from_iter(struct sk_buff *skb,
122
struct iov_iter *from, size_t length);
123
124
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
125
{
126
struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
127
128
if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
129
sqe->buf_index || sqe->splice_fd_in))
130
return -EINVAL;
131
132
shutdown->how = READ_ONCE(sqe->len);
133
req->flags |= REQ_F_FORCE_ASYNC;
134
return 0;
135
}
136
137
int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
138
{
139
struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
140
struct socket *sock;
141
int ret;
142
143
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
144
145
sock = sock_from_file(req->file);
146
if (unlikely(!sock))
147
return -ENOTSOCK;
148
149
ret = __sys_shutdown_sock(sock, shutdown->how);
150
io_req_set_res(req, ret, 0);
151
return IOU_COMPLETE;
152
}
153
154
static bool io_net_retry(struct socket *sock, int flags)
155
{
156
if (!(flags & MSG_WAITALL))
157
return false;
158
return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
159
}
160
161
static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
162
{
163
if (kmsg->vec.iovec)
164
io_vec_free(&kmsg->vec);
165
}
166
167
static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
168
{
169
struct io_async_msghdr *hdr = req->async_data;
170
171
/* can't recycle, ensure we free the iovec if we have one */
172
if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
173
io_netmsg_iovec_free(hdr);
174
return;
175
}
176
177
/* Let normal cleanup path reap it if we fail adding to the cache */
178
io_alloc_cache_vec_kasan(&hdr->vec);
179
if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP)
180
io_vec_free(&hdr->vec);
181
182
if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr))
183
io_req_async_data_clear(req, REQ_F_NEED_CLEANUP);
184
}
185
186
static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
187
{
188
struct io_ring_ctx *ctx = req->ctx;
189
struct io_async_msghdr *hdr;
190
191
hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req);
192
if (!hdr)
193
return NULL;
194
195
/* If the async data was cached, we might have an iov cached inside. */
196
if (hdr->vec.iovec)
197
req->flags |= REQ_F_NEED_CLEANUP;
198
return hdr;
199
}
200
201
static inline void io_mshot_prep_retry(struct io_kiocb *req,
202
struct io_async_msghdr *kmsg)
203
{
204
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
205
206
req->flags &= ~REQ_F_BL_EMPTY;
207
sr->done_io = 0;
208
sr->flags &= ~IORING_RECV_RETRY_CLEAR;
209
sr->len = sr->mshot_len;
210
}
211
212
static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
213
const struct iovec __user *uiov, unsigned uvec_seg,
214
int ddir)
215
{
216
struct iovec *iov;
217
int ret, nr_segs;
218
219
if (iomsg->vec.iovec) {
220
nr_segs = iomsg->vec.nr;
221
iov = iomsg->vec.iovec;
222
} else {
223
nr_segs = 1;
224
iov = &iomsg->fast_iov;
225
}
226
227
ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov,
228
&iomsg->msg.msg_iter, io_is_compat(req->ctx));
229
if (unlikely(ret < 0))
230
return ret;
231
232
if (iov) {
233
req->flags |= REQ_F_NEED_CLEANUP;
234
io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs);
235
}
236
return 0;
237
}
238
239
static int io_compat_msg_copy_hdr(struct io_kiocb *req,
240
struct io_async_msghdr *iomsg,
241
struct compat_msghdr *msg, int ddir,
242
struct sockaddr __user **save_addr)
243
{
244
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
245
struct compat_iovec __user *uiov;
246
int ret;
247
248
if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
249
return -EFAULT;
250
251
ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr);
252
if (ret)
253
return ret;
254
255
uiov = compat_ptr(msg->msg_iov);
256
if (req->flags & REQ_F_BUFFER_SELECT) {
257
if (msg->msg_iovlen == 0) {
258
sr->len = 0;
259
} else if (msg->msg_iovlen > 1) {
260
return -EINVAL;
261
} else {
262
struct compat_iovec tmp_iov;
263
264
if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
265
return -EFAULT;
266
sr->len = tmp_iov.iov_len;
267
}
268
}
269
return 0;
270
}
271
272
static int io_copy_msghdr_from_user(struct user_msghdr *msg,
273
struct user_msghdr __user *umsg)
274
{
275
if (!user_access_begin(umsg, sizeof(*umsg)))
276
return -EFAULT;
277
unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
278
unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
279
unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
280
unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
281
unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
282
unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
283
user_access_end();
284
return 0;
285
ua_end:
286
user_access_end();
287
return -EFAULT;
288
}
289
290
static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
291
struct user_msghdr *msg, int ddir,
292
struct sockaddr __user **save_addr)
293
{
294
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
295
struct user_msghdr __user *umsg = sr->umsg;
296
int ret;
297
298
iomsg->msg.msg_name = &iomsg->addr;
299
iomsg->msg.msg_iter.nr_segs = 0;
300
301
if (io_is_compat(req->ctx)) {
302
struct compat_msghdr cmsg;
303
304
ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr);
305
if (ret)
306
return ret;
307
308
memset(msg, 0, sizeof(*msg));
309
msg->msg_namelen = cmsg.msg_namelen;
310
msg->msg_controllen = cmsg.msg_controllen;
311
msg->msg_iov = compat_ptr(cmsg.msg_iov);
312
msg->msg_iovlen = cmsg.msg_iovlen;
313
return 0;
314
}
315
316
ret = io_copy_msghdr_from_user(msg, umsg);
317
if (unlikely(ret))
318
return ret;
319
320
msg->msg_flags = 0;
321
322
ret = __copy_msghdr(&iomsg->msg, msg, save_addr);
323
if (ret)
324
return ret;
325
326
if (req->flags & REQ_F_BUFFER_SELECT) {
327
if (msg->msg_iovlen == 0) {
328
sr->len = 0;
329
} else if (msg->msg_iovlen > 1) {
330
return -EINVAL;
331
} else {
332
struct iovec __user *uiov = msg->msg_iov;
333
struct iovec tmp_iov;
334
335
if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
336
return -EFAULT;
337
sr->len = tmp_iov.iov_len;
338
}
339
}
340
return 0;
341
}
342
343
void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
344
{
345
struct io_async_msghdr *io = req->async_data;
346
347
io_netmsg_iovec_free(io);
348
}
349
350
static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
351
{
352
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
353
struct io_async_msghdr *kmsg = req->async_data;
354
void __user *addr;
355
u16 addr_len;
356
int ret;
357
358
sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
359
360
if (READ_ONCE(sqe->__pad3[0]))
361
return -EINVAL;
362
363
kmsg->msg.msg_name = NULL;
364
kmsg->msg.msg_namelen = 0;
365
kmsg->msg.msg_control = NULL;
366
kmsg->msg.msg_controllen = 0;
367
kmsg->msg.msg_ubuf = NULL;
368
369
addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
370
addr_len = READ_ONCE(sqe->addr_len);
371
if (addr) {
372
ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr);
373
if (unlikely(ret < 0))
374
return ret;
375
kmsg->msg.msg_name = &kmsg->addr;
376
kmsg->msg.msg_namelen = addr_len;
377
}
378
if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
379
req->flags |= REQ_F_IMPORT_BUFFER;
380
return 0;
381
}
382
if (req->flags & REQ_F_BUFFER_SELECT)
383
return 0;
384
385
if (sr->flags & IORING_SEND_VECTORIZED)
386
return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
387
388
return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
389
}
390
391
static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
392
{
393
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
394
struct io_async_msghdr *kmsg = req->async_data;
395
struct user_msghdr msg;
396
int ret;
397
398
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
399
ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL);
400
if (unlikely(ret))
401
return ret;
402
/* save msg_control as sys_sendmsg() overwrites it */
403
sr->msg_control = kmsg->msg.msg_control_user;
404
405
if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
406
kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen;
407
return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov,
408
msg.msg_iovlen);
409
}
410
if (req->flags & REQ_F_BUFFER_SELECT)
411
return 0;
412
return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE);
413
}
414
415
#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED)
416
417
int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
418
{
419
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
420
421
sr->done_io = 0;
422
sr->len = READ_ONCE(sqe->len);
423
sr->flags = READ_ONCE(sqe->ioprio);
424
if (sr->flags & ~SENDMSG_FLAGS)
425
return -EINVAL;
426
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
427
if (sr->msg_flags & MSG_DONTWAIT)
428
req->flags |= REQ_F_NOWAIT;
429
if (req->flags & REQ_F_BUFFER_SELECT)
430
sr->buf_group = req->buf_index;
431
if (sr->flags & IORING_RECVSEND_BUNDLE) {
432
if (req->opcode == IORING_OP_SENDMSG)
433
return -EINVAL;
434
sr->msg_flags |= MSG_WAITALL;
435
req->flags |= REQ_F_MULTISHOT;
436
}
437
438
if (io_is_compat(req->ctx))
439
sr->msg_flags |= MSG_CMSG_COMPAT;
440
441
if (unlikely(!io_msg_alloc_async(req)))
442
return -ENOMEM;
443
if (req->opcode != IORING_OP_SENDMSG)
444
return io_send_setup(req, sqe);
445
if (unlikely(sqe->addr2 || sqe->file_index))
446
return -EINVAL;
447
return io_sendmsg_setup(req, sqe);
448
}
449
450
static void io_req_msg_cleanup(struct io_kiocb *req,
451
unsigned int issue_flags)
452
{
453
io_netmsg_recycle(req, issue_flags);
454
}
455
456
/*
457
* For bundle completions, we need to figure out how many segments we consumed.
458
* A bundle could be using a single ITER_UBUF if that's all we mapped, or it
459
* could be using an ITER_IOVEC. If the latter, then if we consumed all of
460
* the segments, then it's a trivial questiont o answer. If we have residual
461
* data in the iter, then loop the segments to figure out how much we
462
* transferred.
463
*/
464
static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
465
{
466
struct iovec *iov;
467
int nbufs;
468
469
/* no data is always zero segments, and a ubuf is always 1 segment */
470
if (ret <= 0)
471
return 0;
472
if (iter_is_ubuf(&kmsg->msg.msg_iter))
473
return 1;
474
475
iov = kmsg->vec.iovec;
476
if (!iov)
477
iov = &kmsg->fast_iov;
478
479
/* if all data was transferred, it's basic pointer math */
480
if (!iov_iter_count(&kmsg->msg.msg_iter))
481
return iter_iov(&kmsg->msg.msg_iter) - iov;
482
483
/* short transfer, count segments */
484
nbufs = 0;
485
do {
486
int this_len = min_t(int, iov[nbufs].iov_len, ret);
487
488
nbufs++;
489
ret -= this_len;
490
} while (ret);
491
492
return nbufs;
493
}
494
495
static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl,
496
struct io_async_msghdr *kmsg, int len)
497
{
498
req->flags |= REQ_F_BL_NO_RECYCLE;
499
if (req->flags & REQ_F_BUFFERS_COMMIT)
500
io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len));
501
return IOU_RETRY;
502
}
503
504
static inline bool io_send_finish(struct io_kiocb *req,
505
struct io_async_msghdr *kmsg,
506
struct io_br_sel *sel)
507
{
508
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
509
bool bundle_finished = sel->val <= 0;
510
unsigned int cflags;
511
512
if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
513
cflags = io_put_kbuf(req, sel->val, sel->buf_list);
514
goto finish;
515
}
516
517
cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val));
518
519
if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
520
goto finish;
521
522
/*
523
* Fill CQE for this receive and see if we should keep trying to
524
* receive from this socket.
525
*/
526
if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
527
io_mshot_prep_retry(req, kmsg);
528
return false;
529
}
530
531
/* Otherwise stop bundle and use the current result. */
532
finish:
533
io_req_set_res(req, sel->val, cflags);
534
sel->val = IOU_COMPLETE;
535
return true;
536
}
537
538
int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
539
{
540
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
541
struct io_async_msghdr *kmsg = req->async_data;
542
struct socket *sock;
543
unsigned flags;
544
int min_ret = 0;
545
int ret;
546
547
sock = sock_from_file(req->file);
548
if (unlikely(!sock))
549
return -ENOTSOCK;
550
551
if (!(req->flags & REQ_F_POLLED) &&
552
(sr->flags & IORING_RECVSEND_POLL_FIRST))
553
return -EAGAIN;
554
555
flags = sr->msg_flags;
556
if (issue_flags & IO_URING_F_NONBLOCK)
557
flags |= MSG_DONTWAIT;
558
if (flags & MSG_WAITALL)
559
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
560
561
kmsg->msg.msg_control_user = sr->msg_control;
562
563
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
564
565
if (ret < min_ret) {
566
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
567
return -EAGAIN;
568
if (ret > 0 && io_net_retry(sock, flags)) {
569
kmsg->msg.msg_controllen = 0;
570
kmsg->msg.msg_control = NULL;
571
sr->done_io += ret;
572
return -EAGAIN;
573
}
574
if (ret == -ERESTARTSYS)
575
ret = -EINTR;
576
req_set_fail(req);
577
}
578
io_req_msg_cleanup(req, issue_flags);
579
if (ret >= 0)
580
ret += sr->done_io;
581
else if (sr->done_io)
582
ret = sr->done_io;
583
io_req_set_res(req, ret, 0);
584
return IOU_COMPLETE;
585
}
586
587
static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
588
struct io_br_sel *sel, struct io_async_msghdr *kmsg)
589
{
590
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
591
struct buf_sel_arg arg = {
592
.iovs = &kmsg->fast_iov,
593
.max_len = min_not_zero(sr->len, INT_MAX),
594
.nr_iovs = 1,
595
.buf_group = sr->buf_group,
596
};
597
int ret;
598
599
if (kmsg->vec.iovec) {
600
arg.nr_iovs = kmsg->vec.nr;
601
arg.iovs = kmsg->vec.iovec;
602
arg.mode = KBUF_MODE_FREE;
603
}
604
605
if (!(sr->flags & IORING_RECVSEND_BUNDLE))
606
arg.nr_iovs = 1;
607
else
608
arg.mode |= KBUF_MODE_EXPAND;
609
610
ret = io_buffers_select(req, &arg, sel, issue_flags);
611
if (unlikely(ret < 0))
612
return ret;
613
614
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
615
kmsg->vec.nr = ret;
616
kmsg->vec.iovec = arg.iovs;
617
req->flags |= REQ_F_NEED_CLEANUP;
618
}
619
sr->len = arg.out_len;
620
621
if (ret == 1) {
622
sr->buf = arg.iovs[0].iov_base;
623
ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
624
&kmsg->msg.msg_iter);
625
if (unlikely(ret))
626
return ret;
627
} else {
628
iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
629
arg.iovs, ret, arg.out_len);
630
}
631
632
return 0;
633
}
634
635
int io_send(struct io_kiocb *req, unsigned int issue_flags)
636
{
637
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
638
struct io_async_msghdr *kmsg = req->async_data;
639
struct io_br_sel sel = { };
640
struct socket *sock;
641
unsigned flags;
642
int min_ret = 0;
643
int ret;
644
645
sock = sock_from_file(req->file);
646
if (unlikely(!sock))
647
return -ENOTSOCK;
648
649
if (!(req->flags & REQ_F_POLLED) &&
650
(sr->flags & IORING_RECVSEND_POLL_FIRST))
651
return -EAGAIN;
652
653
flags = sr->msg_flags;
654
if (issue_flags & IO_URING_F_NONBLOCK)
655
flags |= MSG_DONTWAIT;
656
657
retry_bundle:
658
sel.buf_list = NULL;
659
if (io_do_buffer_select(req)) {
660
ret = io_send_select_buffer(req, issue_flags, &sel, kmsg);
661
if (ret)
662
return ret;
663
}
664
665
/*
666
* If MSG_WAITALL is set, or this is a bundle send, then we need
667
* the full amount. If just bundle is set, if we do a short send
668
* then we complete the bundle sequence rather than continue on.
669
*/
670
if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
671
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
672
673
flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
674
kmsg->msg.msg_flags = flags;
675
ret = sock_sendmsg(sock, &kmsg->msg);
676
if (ret < min_ret) {
677
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
678
return -EAGAIN;
679
680
if (ret > 0 && io_net_retry(sock, flags)) {
681
sr->len -= ret;
682
sr->buf += ret;
683
sr->done_io += ret;
684
return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
685
}
686
if (ret == -ERESTARTSYS)
687
ret = -EINTR;
688
req_set_fail(req);
689
}
690
if (ret >= 0)
691
ret += sr->done_io;
692
else if (sr->done_io)
693
ret = sr->done_io;
694
695
sel.val = ret;
696
if (!io_send_finish(req, kmsg, &sel))
697
goto retry_bundle;
698
699
io_req_msg_cleanup(req, issue_flags);
700
return sel.val;
701
}
702
703
static int io_recvmsg_mshot_prep(struct io_kiocb *req,
704
struct io_async_msghdr *iomsg,
705
int namelen, size_t controllen)
706
{
707
if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
708
(REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
709
int hdr;
710
711
if (unlikely(namelen < 0))
712
return -EOVERFLOW;
713
if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
714
namelen, &hdr))
715
return -EOVERFLOW;
716
if (check_add_overflow(hdr, controllen, &hdr))
717
return -EOVERFLOW;
718
719
iomsg->namelen = namelen;
720
iomsg->controllen = controllen;
721
return 0;
722
}
723
724
return 0;
725
}
726
727
static int io_recvmsg_copy_hdr(struct io_kiocb *req,
728
struct io_async_msghdr *iomsg)
729
{
730
struct user_msghdr msg;
731
int ret;
732
733
ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr);
734
if (unlikely(ret))
735
return ret;
736
737
if (!(req->flags & REQ_F_BUFFER_SELECT)) {
738
ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen,
739
ITER_DEST);
740
if (unlikely(ret))
741
return ret;
742
}
743
return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
744
msg.msg_controllen);
745
}
746
747
static int io_recvmsg_prep_setup(struct io_kiocb *req)
748
{
749
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
750
struct io_async_msghdr *kmsg;
751
752
kmsg = io_msg_alloc_async(req);
753
if (unlikely(!kmsg))
754
return -ENOMEM;
755
756
if (req->opcode == IORING_OP_RECV) {
757
kmsg->msg.msg_name = NULL;
758
kmsg->msg.msg_namelen = 0;
759
kmsg->msg.msg_inq = 0;
760
kmsg->msg.msg_control = NULL;
761
kmsg->msg.msg_get_inq = 1;
762
kmsg->msg.msg_controllen = 0;
763
kmsg->msg.msg_iocb = NULL;
764
kmsg->msg.msg_ubuf = NULL;
765
766
if (req->flags & REQ_F_BUFFER_SELECT)
767
return 0;
768
return import_ubuf(ITER_DEST, sr->buf, sr->len,
769
&kmsg->msg.msg_iter);
770
}
771
772
return io_recvmsg_copy_hdr(req, kmsg);
773
}
774
775
#define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
776
IORING_RECVSEND_BUNDLE)
777
778
int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
779
{
780
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
781
782
sr->done_io = 0;
783
784
if (unlikely(sqe->addr2))
785
return -EINVAL;
786
787
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
788
sr->len = READ_ONCE(sqe->len);
789
sr->flags = READ_ONCE(sqe->ioprio);
790
if (sr->flags & ~RECVMSG_FLAGS)
791
return -EINVAL;
792
sr->msg_flags = READ_ONCE(sqe->msg_flags);
793
if (sr->msg_flags & MSG_DONTWAIT)
794
req->flags |= REQ_F_NOWAIT;
795
if (sr->msg_flags & MSG_ERRQUEUE)
796
req->flags |= REQ_F_CLEAR_POLLIN;
797
if (req->flags & REQ_F_BUFFER_SELECT)
798
sr->buf_group = req->buf_index;
799
sr->mshot_total_len = sr->mshot_len = 0;
800
if (sr->flags & IORING_RECV_MULTISHOT) {
801
if (!(req->flags & REQ_F_BUFFER_SELECT))
802
return -EINVAL;
803
if (sr->msg_flags & MSG_WAITALL)
804
return -EINVAL;
805
if (req->opcode == IORING_OP_RECV) {
806
sr->mshot_len = sr->len;
807
sr->mshot_total_len = READ_ONCE(sqe->optlen);
808
if (sr->mshot_total_len)
809
sr->flags |= IORING_RECV_MSHOT_LIM;
810
} else if (sqe->optlen) {
811
return -EINVAL;
812
}
813
req->flags |= REQ_F_APOLL_MULTISHOT;
814
} else if (sqe->optlen) {
815
return -EINVAL;
816
}
817
818
if (sr->flags & IORING_RECVSEND_BUNDLE) {
819
if (req->opcode == IORING_OP_RECVMSG)
820
return -EINVAL;
821
}
822
823
if (io_is_compat(req->ctx))
824
sr->msg_flags |= MSG_CMSG_COMPAT;
825
826
sr->nr_multishot_loops = 0;
827
return io_recvmsg_prep_setup(req);
828
}
829
830
/* bits to clear in old and inherit in new cflags on bundle retry */
831
#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
832
833
/*
834
* Finishes io_recv and io_recvmsg.
835
*
836
* Returns true if it is actually finished, or false if it should run
837
* again (for multishot).
838
*/
839
static inline bool io_recv_finish(struct io_kiocb *req,
840
struct io_async_msghdr *kmsg,
841
struct io_br_sel *sel, bool mshot_finished,
842
unsigned issue_flags)
843
{
844
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
845
unsigned int cflags = 0;
846
847
if (kmsg->msg.msg_inq > 0)
848
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
849
850
if (sel->val > 0 && sr->flags & IORING_RECV_MSHOT_LIM) {
851
/*
852
* If sr->len hits zero, the limit has been reached. Mark
853
* mshot as finished, and flag MSHOT_DONE as well to prevent
854
* a potential bundle from being retried.
855
*/
856
sr->mshot_total_len -= min_t(int, sel->val, sr->mshot_total_len);
857
if (!sr->mshot_total_len) {
858
sr->flags |= IORING_RECV_MSHOT_DONE;
859
mshot_finished = true;
860
}
861
}
862
863
if (sr->flags & IORING_RECVSEND_BUNDLE) {
864
size_t this_ret = sel->val - sr->done_io;
865
866
cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret));
867
if (sr->flags & IORING_RECV_RETRY)
868
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
869
if (sr->mshot_len && sel->val >= sr->mshot_len)
870
sr->flags |= IORING_RECV_MSHOT_CAP;
871
/* bundle with no more immediate buffers, we're done */
872
if (req->flags & REQ_F_BL_EMPTY)
873
goto finish;
874
/*
875
* If more is available AND it was a full transfer, retry and
876
* append to this one
877
*/
878
if (!(sr->flags & IORING_RECV_NO_RETRY) &&
879
kmsg->msg.msg_inq > 1 && this_ret > 0 &&
880
!iov_iter_count(&kmsg->msg.msg_iter)) {
881
req->cqe.flags = cflags & ~CQE_F_MASK;
882
sr->len = kmsg->msg.msg_inq;
883
sr->done_io += this_ret;
884
sr->flags |= IORING_RECV_RETRY;
885
return false;
886
}
887
} else {
888
cflags |= io_put_kbuf(req, sel->val, sel->buf_list);
889
}
890
891
/*
892
* Fill CQE for this receive and see if we should keep trying to
893
* receive from this socket.
894
*/
895
if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
896
io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
897
sel->val = IOU_RETRY;
898
io_mshot_prep_retry(req, kmsg);
899
/* Known not-empty or unknown state, retry */
900
if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
901
if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY &&
902
!(sr->flags & IORING_RECV_MSHOT_CAP)) {
903
return false;
904
}
905
/* mshot retries exceeded, force a requeue */
906
sr->nr_multishot_loops = 0;
907
sr->flags &= ~IORING_RECV_MSHOT_CAP;
908
if (issue_flags & IO_URING_F_MULTISHOT)
909
sel->val = IOU_REQUEUE;
910
}
911
return true;
912
}
913
914
/* Finish the request / stop multishot. */
915
finish:
916
io_req_set_res(req, sel->val, cflags);
917
sel->val = IOU_COMPLETE;
918
io_req_msg_cleanup(req, issue_flags);
919
return true;
920
}
921
922
static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
923
struct io_sr_msg *sr, void __user **buf,
924
size_t *len)
925
{
926
unsigned long ubuf = (unsigned long) *buf;
927
unsigned long hdr;
928
929
hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
930
kmsg->controllen;
931
if (*len < hdr)
932
return -EFAULT;
933
934
if (kmsg->controllen) {
935
unsigned long control = ubuf + hdr - kmsg->controllen;
936
937
kmsg->msg.msg_control_user = (void __user *) control;
938
kmsg->msg.msg_controllen = kmsg->controllen;
939
}
940
941
sr->buf = *buf; /* stash for later copy */
942
*buf = (void __user *) (ubuf + hdr);
943
kmsg->payloadlen = *len = *len - hdr;
944
return 0;
945
}
946
947
struct io_recvmsg_multishot_hdr {
948
struct io_uring_recvmsg_out msg;
949
struct sockaddr_storage addr;
950
};
951
952
static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
953
struct io_async_msghdr *kmsg,
954
unsigned int flags, bool *finished)
955
{
956
int err;
957
int copy_len;
958
struct io_recvmsg_multishot_hdr hdr;
959
960
if (kmsg->namelen)
961
kmsg->msg.msg_name = &hdr.addr;
962
kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
963
kmsg->msg.msg_namelen = 0;
964
965
if (sock->file->f_flags & O_NONBLOCK)
966
flags |= MSG_DONTWAIT;
967
968
err = sock_recvmsg(sock, &kmsg->msg, flags);
969
*finished = err <= 0;
970
if (err < 0)
971
return err;
972
973
hdr.msg = (struct io_uring_recvmsg_out) {
974
.controllen = kmsg->controllen - kmsg->msg.msg_controllen,
975
.flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
976
};
977
978
hdr.msg.payloadlen = err;
979
if (err > kmsg->payloadlen)
980
err = kmsg->payloadlen;
981
982
copy_len = sizeof(struct io_uring_recvmsg_out);
983
if (kmsg->msg.msg_namelen > kmsg->namelen)
984
copy_len += kmsg->namelen;
985
else
986
copy_len += kmsg->msg.msg_namelen;
987
988
/*
989
* "fromlen shall refer to the value before truncation.."
990
* 1003.1g
991
*/
992
hdr.msg.namelen = kmsg->msg.msg_namelen;
993
994
/* ensure that there is no gap between hdr and sockaddr_storage */
995
BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
996
sizeof(struct io_uring_recvmsg_out));
997
if (copy_to_user(io->buf, &hdr, copy_len)) {
998
*finished = true;
999
return -EFAULT;
1000
}
1001
1002
return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
1003
kmsg->controllen + err;
1004
}
1005
1006
int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
1007
{
1008
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1009
struct io_async_msghdr *kmsg = req->async_data;
1010
struct io_br_sel sel = { };
1011
struct socket *sock;
1012
unsigned flags;
1013
int ret, min_ret = 0;
1014
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1015
bool mshot_finished = true;
1016
1017
sock = sock_from_file(req->file);
1018
if (unlikely(!sock))
1019
return -ENOTSOCK;
1020
1021
if (!(req->flags & REQ_F_POLLED) &&
1022
(sr->flags & IORING_RECVSEND_POLL_FIRST))
1023
return -EAGAIN;
1024
1025
flags = sr->msg_flags;
1026
if (force_nonblock)
1027
flags |= MSG_DONTWAIT;
1028
1029
retry_multishot:
1030
sel.buf_list = NULL;
1031
if (io_do_buffer_select(req)) {
1032
size_t len = sr->len;
1033
1034
sel = io_buffer_select(req, &len, sr->buf_group, issue_flags);
1035
if (!sel.addr)
1036
return -ENOBUFS;
1037
1038
if (req->flags & REQ_F_APOLL_MULTISHOT) {
1039
ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len);
1040
if (ret) {
1041
io_kbuf_recycle(req, sel.buf_list, issue_flags);
1042
return ret;
1043
}
1044
}
1045
1046
iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len);
1047
}
1048
1049
kmsg->msg.msg_get_inq = 1;
1050
kmsg->msg.msg_inq = -1;
1051
if (req->flags & REQ_F_APOLL_MULTISHOT) {
1052
ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
1053
&mshot_finished);
1054
} else {
1055
/* disable partial retry for recvmsg with cmsg attached */
1056
if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
1057
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1058
1059
ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
1060
kmsg->uaddr, flags);
1061
}
1062
1063
if (ret < min_ret) {
1064
if (ret == -EAGAIN && force_nonblock) {
1065
io_kbuf_recycle(req, sel.buf_list, issue_flags);
1066
return IOU_RETRY;
1067
}
1068
if (ret > 0 && io_net_retry(sock, flags)) {
1069
sr->done_io += ret;
1070
return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
1071
}
1072
if (ret == -ERESTARTSYS)
1073
ret = -EINTR;
1074
req_set_fail(req);
1075
} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1076
req_set_fail(req);
1077
}
1078
1079
if (ret > 0)
1080
ret += sr->done_io;
1081
else if (sr->done_io)
1082
ret = sr->done_io;
1083
else
1084
io_kbuf_recycle(req, sel.buf_list, issue_flags);
1085
1086
sel.val = ret;
1087
if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
1088
goto retry_multishot;
1089
1090
return sel.val;
1091
}
1092
1093
static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
1094
struct io_br_sel *sel, unsigned int issue_flags)
1095
{
1096
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1097
int ret;
1098
1099
/*
1100
* If the ring isn't locked, then don't use the peek interface
1101
* to grab multiple buffers as we will lock/unlock between
1102
* this selection and posting the buffers.
1103
*/
1104
if (!(issue_flags & IO_URING_F_UNLOCKED) &&
1105
sr->flags & IORING_RECVSEND_BUNDLE) {
1106
struct buf_sel_arg arg = {
1107
.iovs = &kmsg->fast_iov,
1108
.nr_iovs = 1,
1109
.mode = KBUF_MODE_EXPAND,
1110
.buf_group = sr->buf_group,
1111
};
1112
1113
if (kmsg->vec.iovec) {
1114
arg.nr_iovs = kmsg->vec.nr;
1115
arg.iovs = kmsg->vec.iovec;
1116
arg.mode |= KBUF_MODE_FREE;
1117
}
1118
1119
if (sel->val)
1120
arg.max_len = sel->val;
1121
else if (kmsg->msg.msg_inq > 1)
1122
arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq);
1123
1124
/* if mshot limited, ensure we don't go over */
1125
if (sr->flags & IORING_RECV_MSHOT_LIM)
1126
arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len);
1127
ret = io_buffers_peek(req, &arg, sel);
1128
if (unlikely(ret < 0))
1129
return ret;
1130
1131
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
1132
kmsg->vec.nr = ret;
1133
kmsg->vec.iovec = arg.iovs;
1134
req->flags |= REQ_F_NEED_CLEANUP;
1135
}
1136
if (arg.partial_map)
1137
sr->flags |= IORING_RECV_PARTIAL_MAP;
1138
1139
/* special case 1 vec, can be a fast path */
1140
if (ret == 1) {
1141
sr->buf = arg.iovs[0].iov_base;
1142
sr->len = arg.iovs[0].iov_len;
1143
goto map_ubuf;
1144
}
1145
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
1146
arg.out_len);
1147
} else {
1148
size_t len = sel->val;
1149
1150
*sel = io_buffer_select(req, &len, sr->buf_group, issue_flags);
1151
if (!sel->addr)
1152
return -ENOBUFS;
1153
sr->buf = sel->addr;
1154
sr->len = len;
1155
map_ubuf:
1156
ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
1157
&kmsg->msg.msg_iter);
1158
if (unlikely(ret))
1159
return ret;
1160
}
1161
1162
return 0;
1163
}
1164
1165
int io_recv(struct io_kiocb *req, unsigned int issue_flags)
1166
{
1167
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1168
struct io_async_msghdr *kmsg = req->async_data;
1169
struct io_br_sel sel;
1170
struct socket *sock;
1171
unsigned flags;
1172
int ret, min_ret = 0;
1173
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1174
bool mshot_finished;
1175
1176
if (!(req->flags & REQ_F_POLLED) &&
1177
(sr->flags & IORING_RECVSEND_POLL_FIRST))
1178
return -EAGAIN;
1179
1180
sock = sock_from_file(req->file);
1181
if (unlikely(!sock))
1182
return -ENOTSOCK;
1183
1184
flags = sr->msg_flags;
1185
if (force_nonblock)
1186
flags |= MSG_DONTWAIT;
1187
1188
retry_multishot:
1189
sel.buf_list = NULL;
1190
if (io_do_buffer_select(req)) {
1191
sel.val = sr->len;
1192
ret = io_recv_buf_select(req, kmsg, &sel, issue_flags);
1193
if (unlikely(ret < 0)) {
1194
kmsg->msg.msg_inq = -1;
1195
goto out_free;
1196
}
1197
sr->buf = NULL;
1198
}
1199
1200
kmsg->msg.msg_flags = 0;
1201
kmsg->msg.msg_inq = -1;
1202
1203
if (flags & MSG_WAITALL)
1204
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1205
1206
ret = sock_recvmsg(sock, &kmsg->msg, flags);
1207
if (ret < min_ret) {
1208
if (ret == -EAGAIN && force_nonblock) {
1209
io_kbuf_recycle(req, sel.buf_list, issue_flags);
1210
return IOU_RETRY;
1211
}
1212
if (ret > 0 && io_net_retry(sock, flags)) {
1213
sr->len -= ret;
1214
sr->buf += ret;
1215
sr->done_io += ret;
1216
return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
1217
}
1218
if (ret == -ERESTARTSYS)
1219
ret = -EINTR;
1220
req_set_fail(req);
1221
} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1222
out_free:
1223
req_set_fail(req);
1224
}
1225
1226
mshot_finished = ret <= 0;
1227
if (ret > 0)
1228
ret += sr->done_io;
1229
else if (sr->done_io)
1230
ret = sr->done_io;
1231
else
1232
io_kbuf_recycle(req, sel.buf_list, issue_flags);
1233
1234
sel.val = ret;
1235
if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
1236
goto retry_multishot;
1237
1238
return sel.val;
1239
}
1240
1241
int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1242
{
1243
struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1244
unsigned ifq_idx;
1245
1246
if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3))
1247
return -EINVAL;
1248
1249
ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
1250
zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx);
1251
if (!zc->ifq)
1252
return -EINVAL;
1253
1254
zc->len = READ_ONCE(sqe->len);
1255
zc->flags = READ_ONCE(sqe->ioprio);
1256
zc->msg_flags = READ_ONCE(sqe->msg_flags);
1257
if (zc->msg_flags)
1258
return -EINVAL;
1259
if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT))
1260
return -EINVAL;
1261
/* multishot required */
1262
if (!(zc->flags & IORING_RECV_MULTISHOT))
1263
return -EINVAL;
1264
/* All data completions are posted as aux CQEs. */
1265
req->flags |= REQ_F_APOLL_MULTISHOT;
1266
1267
return 0;
1268
}
1269
1270
int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
1271
{
1272
struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
1273
struct socket *sock;
1274
unsigned int len;
1275
int ret;
1276
1277
if (!(req->flags & REQ_F_POLLED) &&
1278
(zc->flags & IORING_RECVSEND_POLL_FIRST))
1279
return -EAGAIN;
1280
1281
sock = sock_from_file(req->file);
1282
if (unlikely(!sock))
1283
return -ENOTSOCK;
1284
1285
len = zc->len;
1286
ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT,
1287
issue_flags, &zc->len);
1288
if (len && zc->len == 0) {
1289
io_req_set_res(req, 0, 0);
1290
1291
return IOU_COMPLETE;
1292
}
1293
if (unlikely(ret <= 0) && ret != -EAGAIN) {
1294
if (ret == -ERESTARTSYS)
1295
ret = -EINTR;
1296
if (ret == IOU_REQUEUE)
1297
return IOU_REQUEUE;
1298
1299
req_set_fail(req);
1300
io_req_set_res(req, ret, 0);
1301
return IOU_COMPLETE;
1302
}
1303
return IOU_RETRY;
1304
}
1305
1306
void io_send_zc_cleanup(struct io_kiocb *req)
1307
{
1308
struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1309
struct io_async_msghdr *io = req->async_data;
1310
1311
if (req_has_async_data(req))
1312
io_netmsg_iovec_free(io);
1313
if (zc->notif) {
1314
io_notif_flush(zc->notif);
1315
zc->notif = NULL;
1316
}
1317
}
1318
1319
#define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1320
#define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \
1321
IORING_SEND_VECTORIZED)
1322
1323
int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1324
{
1325
struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1326
struct io_ring_ctx *ctx = req->ctx;
1327
struct io_async_msghdr *iomsg;
1328
struct io_kiocb *notif;
1329
int ret;
1330
1331
zc->done_io = 0;
1332
1333
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1334
return -EINVAL;
1335
/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1336
if (req->flags & REQ_F_CQE_SKIP)
1337
return -EINVAL;
1338
1339
notif = zc->notif = io_alloc_notif(ctx);
1340
if (!notif)
1341
return -ENOMEM;
1342
notif->cqe.user_data = req->cqe.user_data;
1343
notif->cqe.res = 0;
1344
notif->cqe.flags = IORING_CQE_F_NOTIF;
1345
req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY;
1346
1347
zc->flags = READ_ONCE(sqe->ioprio);
1348
if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1349
if (zc->flags & ~IO_ZC_FLAGS_VALID)
1350
return -EINVAL;
1351
if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1352
struct io_notif_data *nd = io_notif_to_data(notif);
1353
1354
nd->zc_report = true;
1355
nd->zc_used = false;
1356
nd->zc_copied = false;
1357
}
1358
}
1359
1360
zc->len = READ_ONCE(sqe->len);
1361
zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
1362
req->buf_index = READ_ONCE(sqe->buf_index);
1363
if (zc->msg_flags & MSG_DONTWAIT)
1364
req->flags |= REQ_F_NOWAIT;
1365
1366
if (io_is_compat(req->ctx))
1367
zc->msg_flags |= MSG_CMSG_COMPAT;
1368
1369
iomsg = io_msg_alloc_async(req);
1370
if (unlikely(!iomsg))
1371
return -ENOMEM;
1372
1373
if (req->opcode == IORING_OP_SEND_ZC) {
1374
ret = io_send_setup(req, sqe);
1375
} else {
1376
if (unlikely(sqe->addr2 || sqe->file_index))
1377
return -EINVAL;
1378
ret = io_sendmsg_setup(req, sqe);
1379
}
1380
if (unlikely(ret))
1381
return ret;
1382
1383
if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) {
1384
iomsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1385
return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count);
1386
}
1387
iomsg->msg.sg_from_iter = io_sg_from_iter;
1388
return 0;
1389
}
1390
1391
static int io_sg_from_iter_iovec(struct sk_buff *skb,
1392
struct iov_iter *from, size_t length)
1393
{
1394
skb_zcopy_downgrade_managed(skb);
1395
return zerocopy_fill_skb_from_iter(skb, from, length);
1396
}
1397
1398
static int io_sg_from_iter(struct sk_buff *skb,
1399
struct iov_iter *from, size_t length)
1400
{
1401
struct skb_shared_info *shinfo = skb_shinfo(skb);
1402
int frag = shinfo->nr_frags;
1403
int ret = 0;
1404
struct bvec_iter bi;
1405
ssize_t copied = 0;
1406
unsigned long truesize = 0;
1407
1408
if (!frag)
1409
shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1410
else if (unlikely(!skb_zcopy_managed(skb)))
1411
return zerocopy_fill_skb_from_iter(skb, from, length);
1412
1413
bi.bi_size = min(from->count, length);
1414
bi.bi_bvec_done = from->iov_offset;
1415
bi.bi_idx = 0;
1416
1417
while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1418
struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1419
1420
copied += v.bv_len;
1421
truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1422
__skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1423
v.bv_offset, v.bv_len);
1424
bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1425
}
1426
if (bi.bi_size)
1427
ret = -EMSGSIZE;
1428
1429
shinfo->nr_frags = frag;
1430
from->bvec += bi.bi_idx;
1431
from->nr_segs -= bi.bi_idx;
1432
from->count -= copied;
1433
from->iov_offset = bi.bi_bvec_done;
1434
1435
skb->data_len += copied;
1436
skb->len += copied;
1437
skb->truesize += truesize;
1438
return ret;
1439
}
1440
1441
static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
1442
{
1443
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1444
struct io_async_msghdr *kmsg = req->async_data;
1445
1446
WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF));
1447
1448
sr->notif->buf_index = req->buf_index;
1449
return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter,
1450
(u64)(uintptr_t)sr->buf, sr->len,
1451
ITER_SOURCE, issue_flags);
1452
}
1453
1454
int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1455
{
1456
struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1457
struct io_async_msghdr *kmsg = req->async_data;
1458
struct socket *sock;
1459
unsigned msg_flags;
1460
int ret, min_ret = 0;
1461
1462
sock = sock_from_file(req->file);
1463
if (unlikely(!sock))
1464
return -ENOTSOCK;
1465
if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1466
return -EOPNOTSUPP;
1467
1468
if (!(req->flags & REQ_F_POLLED) &&
1469
(zc->flags & IORING_RECVSEND_POLL_FIRST))
1470
return -EAGAIN;
1471
1472
if (req->flags & REQ_F_IMPORT_BUFFER) {
1473
req->flags &= ~REQ_F_IMPORT_BUFFER;
1474
ret = io_send_zc_import(req, issue_flags);
1475
if (unlikely(ret))
1476
return ret;
1477
}
1478
1479
msg_flags = zc->msg_flags;
1480
if (issue_flags & IO_URING_F_NONBLOCK)
1481
msg_flags |= MSG_DONTWAIT;
1482
if (msg_flags & MSG_WAITALL)
1483
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1484
msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1485
1486
kmsg->msg.msg_flags = msg_flags;
1487
kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1488
ret = sock_sendmsg(sock, &kmsg->msg);
1489
1490
if (unlikely(ret < min_ret)) {
1491
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1492
return -EAGAIN;
1493
1494
if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) {
1495
zc->len -= ret;
1496
zc->buf += ret;
1497
zc->done_io += ret;
1498
return -EAGAIN;
1499
}
1500
if (ret == -ERESTARTSYS)
1501
ret = -EINTR;
1502
req_set_fail(req);
1503
}
1504
1505
if (ret >= 0)
1506
ret += zc->done_io;
1507
else if (zc->done_io)
1508
ret = zc->done_io;
1509
1510
/*
1511
* If we're in io-wq we can't rely on tw ordering guarantees, defer
1512
* flushing notif to io_send_zc_cleanup()
1513
*/
1514
if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1515
io_notif_flush(zc->notif);
1516
zc->notif = NULL;
1517
io_req_msg_cleanup(req, 0);
1518
}
1519
io_req_set_res(req, ret, IORING_CQE_F_MORE);
1520
return IOU_COMPLETE;
1521
}
1522
1523
int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1524
{
1525
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1526
struct io_async_msghdr *kmsg = req->async_data;
1527
struct socket *sock;
1528
unsigned flags;
1529
int ret, min_ret = 0;
1530
1531
if (req->flags & REQ_F_IMPORT_BUFFER) {
1532
unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;
1533
int ret;
1534
1535
ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, req,
1536
&kmsg->vec, uvec_segs, issue_flags);
1537
if (unlikely(ret))
1538
return ret;
1539
req->flags &= ~REQ_F_IMPORT_BUFFER;
1540
}
1541
1542
sock = sock_from_file(req->file);
1543
if (unlikely(!sock))
1544
return -ENOTSOCK;
1545
if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1546
return -EOPNOTSUPP;
1547
1548
if (!(req->flags & REQ_F_POLLED) &&
1549
(sr->flags & IORING_RECVSEND_POLL_FIRST))
1550
return -EAGAIN;
1551
1552
flags = sr->msg_flags;
1553
if (issue_flags & IO_URING_F_NONBLOCK)
1554
flags |= MSG_DONTWAIT;
1555
if (flags & MSG_WAITALL)
1556
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1557
1558
kmsg->msg.msg_control_user = sr->msg_control;
1559
kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1560
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1561
1562
if (unlikely(ret < min_ret)) {
1563
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1564
return -EAGAIN;
1565
1566
if (ret > 0 && io_net_retry(sock, flags)) {
1567
sr->done_io += ret;
1568
return -EAGAIN;
1569
}
1570
if (ret == -ERESTARTSYS)
1571
ret = -EINTR;
1572
req_set_fail(req);
1573
}
1574
1575
if (ret >= 0)
1576
ret += sr->done_io;
1577
else if (sr->done_io)
1578
ret = sr->done_io;
1579
1580
/*
1581
* If we're in io-wq we can't rely on tw ordering guarantees, defer
1582
* flushing notif to io_send_zc_cleanup()
1583
*/
1584
if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1585
io_notif_flush(sr->notif);
1586
sr->notif = NULL;
1587
io_req_msg_cleanup(req, 0);
1588
}
1589
io_req_set_res(req, ret, IORING_CQE_F_MORE);
1590
return IOU_COMPLETE;
1591
}
1592
1593
void io_sendrecv_fail(struct io_kiocb *req)
1594
{
1595
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1596
1597
if (sr->done_io)
1598
req->cqe.res = sr->done_io;
1599
1600
if ((req->flags & REQ_F_NEED_CLEANUP) &&
1601
(req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1602
req->cqe.flags |= IORING_CQE_F_MORE;
1603
}
1604
1605
#define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1606
IORING_ACCEPT_POLL_FIRST)
1607
1608
int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1609
{
1610
struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1611
1612
if (sqe->len || sqe->buf_index)
1613
return -EINVAL;
1614
1615
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1616
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1617
accept->flags = READ_ONCE(sqe->accept_flags);
1618
accept->nofile = rlimit(RLIMIT_NOFILE);
1619
accept->iou_flags = READ_ONCE(sqe->ioprio);
1620
if (accept->iou_flags & ~ACCEPT_FLAGS)
1621
return -EINVAL;
1622
1623
accept->file_slot = READ_ONCE(sqe->file_index);
1624
if (accept->file_slot) {
1625
if (accept->flags & SOCK_CLOEXEC)
1626
return -EINVAL;
1627
if (accept->iou_flags & IORING_ACCEPT_MULTISHOT &&
1628
accept->file_slot != IORING_FILE_INDEX_ALLOC)
1629
return -EINVAL;
1630
}
1631
if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1632
return -EINVAL;
1633
if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1634
accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1635
if (accept->iou_flags & IORING_ACCEPT_MULTISHOT)
1636
req->flags |= REQ_F_APOLL_MULTISHOT;
1637
if (accept->iou_flags & IORING_ACCEPT_DONTWAIT)
1638
req->flags |= REQ_F_NOWAIT;
1639
return 0;
1640
}
1641
1642
int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1643
{
1644
struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1645
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1646
bool fixed = !!accept->file_slot;
1647
struct proto_accept_arg arg = {
1648
.flags = force_nonblock ? O_NONBLOCK : 0,
1649
};
1650
struct file *file;
1651
unsigned cflags;
1652
int ret, fd;
1653
1654
if (!(req->flags & REQ_F_POLLED) &&
1655
accept->iou_flags & IORING_ACCEPT_POLL_FIRST)
1656
return -EAGAIN;
1657
1658
retry:
1659
if (!fixed) {
1660
fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1661
if (unlikely(fd < 0))
1662
return fd;
1663
}
1664
arg.err = 0;
1665
arg.is_empty = -1;
1666
file = do_accept(req->file, &arg, accept->addr, accept->addr_len,
1667
accept->flags);
1668
if (IS_ERR(file)) {
1669
if (!fixed)
1670
put_unused_fd(fd);
1671
ret = PTR_ERR(file);
1672
if (ret == -EAGAIN && force_nonblock &&
1673
!(accept->iou_flags & IORING_ACCEPT_DONTWAIT))
1674
return IOU_RETRY;
1675
1676
if (ret == -ERESTARTSYS)
1677
ret = -EINTR;
1678
} else if (!fixed) {
1679
fd_install(fd, file);
1680
ret = fd;
1681
} else {
1682
ret = io_fixed_fd_install(req, issue_flags, file,
1683
accept->file_slot);
1684
}
1685
1686
cflags = 0;
1687
if (!arg.is_empty)
1688
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
1689
1690
if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) &&
1691
io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
1692
if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
1693
goto retry;
1694
return IOU_RETRY;
1695
}
1696
1697
io_req_set_res(req, ret, cflags);
1698
if (ret < 0)
1699
req_set_fail(req);
1700
return IOU_COMPLETE;
1701
}
1702
1703
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1704
{
1705
struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1706
1707
if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1708
return -EINVAL;
1709
1710
sock->domain = READ_ONCE(sqe->fd);
1711
sock->type = READ_ONCE(sqe->off);
1712
sock->protocol = READ_ONCE(sqe->len);
1713
sock->file_slot = READ_ONCE(sqe->file_index);
1714
sock->nofile = rlimit(RLIMIT_NOFILE);
1715
1716
sock->flags = sock->type & ~SOCK_TYPE_MASK;
1717
if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1718
return -EINVAL;
1719
if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1720
return -EINVAL;
1721
return 0;
1722
}
1723
1724
int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1725
{
1726
struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1727
bool fixed = !!sock->file_slot;
1728
struct file *file;
1729
int ret, fd;
1730
1731
if (!fixed) {
1732
fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1733
if (unlikely(fd < 0))
1734
return fd;
1735
}
1736
file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1737
if (IS_ERR(file)) {
1738
if (!fixed)
1739
put_unused_fd(fd);
1740
ret = PTR_ERR(file);
1741
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1742
return -EAGAIN;
1743
if (ret == -ERESTARTSYS)
1744
ret = -EINTR;
1745
req_set_fail(req);
1746
} else if (!fixed) {
1747
fd_install(fd, file);
1748
ret = fd;
1749
} else {
1750
ret = io_fixed_fd_install(req, issue_flags, file,
1751
sock->file_slot);
1752
}
1753
io_req_set_res(req, ret, 0);
1754
return IOU_COMPLETE;
1755
}
1756
1757
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1758
{
1759
struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1760
struct io_async_msghdr *io;
1761
1762
if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1763
return -EINVAL;
1764
1765
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1766
conn->addr_len = READ_ONCE(sqe->addr2);
1767
conn->in_progress = conn->seen_econnaborted = false;
1768
1769
io = io_msg_alloc_async(req);
1770
if (unlikely(!io))
1771
return -ENOMEM;
1772
1773
return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr);
1774
}
1775
1776
int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1777
{
1778
struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1779
struct io_async_msghdr *io = req->async_data;
1780
unsigned file_flags;
1781
int ret;
1782
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1783
1784
if (connect->in_progress) {
1785
struct poll_table_struct pt = { ._key = EPOLLERR };
1786
1787
if (vfs_poll(req->file, &pt) & EPOLLERR)
1788
goto get_sock_err;
1789
}
1790
1791
file_flags = force_nonblock ? O_NONBLOCK : 0;
1792
1793
ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
1794
file_flags);
1795
if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1796
&& force_nonblock) {
1797
if (ret == -EINPROGRESS) {
1798
connect->in_progress = true;
1799
} else if (ret == -ECONNABORTED) {
1800
if (connect->seen_econnaborted)
1801
goto out;
1802
connect->seen_econnaborted = true;
1803
}
1804
return -EAGAIN;
1805
}
1806
if (connect->in_progress) {
1807
/*
1808
* At least bluetooth will return -EBADFD on a re-connect
1809
* attempt, and it's (supposedly) also valid to get -EISCONN
1810
* which means the previous result is good. For both of these,
1811
* grab the sock_error() and use that for the completion.
1812
*/
1813
if (ret == -EBADFD || ret == -EISCONN) {
1814
get_sock_err:
1815
ret = sock_error(sock_from_file(req->file)->sk);
1816
}
1817
}
1818
if (ret == -ERESTARTSYS)
1819
ret = -EINTR;
1820
out:
1821
if (ret < 0)
1822
req_set_fail(req);
1823
io_req_msg_cleanup(req, issue_flags);
1824
io_req_set_res(req, ret, 0);
1825
return IOU_COMPLETE;
1826
}
1827
1828
int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1829
{
1830
struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1831
struct sockaddr __user *uaddr;
1832
struct io_async_msghdr *io;
1833
1834
if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1835
return -EINVAL;
1836
1837
uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1838
bind->addr_len = READ_ONCE(sqe->addr2);
1839
1840
io = io_msg_alloc_async(req);
1841
if (unlikely(!io))
1842
return -ENOMEM;
1843
return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
1844
}
1845
1846
int io_bind(struct io_kiocb *req, unsigned int issue_flags)
1847
{
1848
struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1849
struct io_async_msghdr *io = req->async_data;
1850
struct socket *sock;
1851
int ret;
1852
1853
sock = sock_from_file(req->file);
1854
if (unlikely(!sock))
1855
return -ENOTSOCK;
1856
1857
ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
1858
if (ret < 0)
1859
req_set_fail(req);
1860
io_req_set_res(req, ret, 0);
1861
return 0;
1862
}
1863
1864
int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1865
{
1866
struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1867
1868
if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
1869
return -EINVAL;
1870
1871
listen->backlog = READ_ONCE(sqe->len);
1872
return 0;
1873
}
1874
1875
int io_listen(struct io_kiocb *req, unsigned int issue_flags)
1876
{
1877
struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1878
struct socket *sock;
1879
int ret;
1880
1881
sock = sock_from_file(req->file);
1882
if (unlikely(!sock))
1883
return -ENOTSOCK;
1884
1885
ret = __sys_listen_socket(sock, listen->backlog);
1886
if (ret < 0)
1887
req_set_fail(req);
1888
io_req_set_res(req, ret, 0);
1889
return 0;
1890
}
1891
1892
void io_netmsg_cache_free(const void *entry)
1893
{
1894
struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
1895
1896
io_vec_free(&kmsg->vec);
1897
kfree(kmsg);
1898
}
1899
1900