Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/io_uring/poll.c
29266 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <linux/kernel.h>
3
#include <linux/errno.h>
4
#include <linux/fs.h>
5
#include <linux/file.h>
6
#include <linux/mm.h>
7
#include <linux/slab.h>
8
#include <linux/poll.h>
9
#include <linux/hashtable.h>
10
#include <linux/io_uring.h>
11
12
#include <trace/events/io_uring.h>
13
14
#include <uapi/linux/io_uring.h>
15
16
#include "io_uring.h"
17
#include "alloc_cache.h"
18
#include "refs.h"
19
#include "napi.h"
20
#include "opdef.h"
21
#include "kbuf.h"
22
#include "poll.h"
23
#include "cancel.h"
24
25
struct io_poll_update {
26
struct file *file;
27
u64 old_user_data;
28
u64 new_user_data;
29
__poll_t events;
30
bool update_events;
31
bool update_user_data;
32
};
33
34
struct io_poll_table {
35
struct poll_table_struct pt;
36
struct io_kiocb *req;
37
int nr_entries;
38
int error;
39
bool owning;
40
/* output value, set only if arm poll returns >0 */
41
__poll_t result_mask;
42
};
43
44
#define IO_POLL_CANCEL_FLAG BIT(31)
45
#define IO_POLL_RETRY_FLAG BIT(30)
46
#define IO_POLL_REF_MASK GENMASK(29, 0)
47
48
/*
49
* We usually have 1-2 refs taken, 128 is more than enough and we want to
50
* maximise the margin between this amount and the moment when it overflows.
51
*/
52
#define IO_POLL_REF_BIAS 128
53
54
#define IO_WQE_F_DOUBLE 1
55
56
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
57
void *key);
58
59
static inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe)
60
{
61
unsigned long priv = (unsigned long)wqe->private;
62
63
return (struct io_kiocb *)(priv & ~IO_WQE_F_DOUBLE);
64
}
65
66
static inline bool wqe_is_double(struct wait_queue_entry *wqe)
67
{
68
unsigned long priv = (unsigned long)wqe->private;
69
70
return priv & IO_WQE_F_DOUBLE;
71
}
72
73
static bool io_poll_get_ownership_slowpath(struct io_kiocb *req)
74
{
75
int v;
76
77
/*
78
* poll_refs are already elevated and we don't have much hope for
79
* grabbing the ownership. Instead of incrementing set a retry flag
80
* to notify the loop that there might have been some change.
81
*/
82
v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs);
83
if (v & IO_POLL_REF_MASK)
84
return false;
85
return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
86
}
87
88
/*
89
* If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
90
* bump it and acquire ownership. It's disallowed to modify requests while not
91
* owning it, that prevents from races for enqueueing task_work's and b/w
92
* arming poll and wakeups.
93
*/
94
static inline bool io_poll_get_ownership(struct io_kiocb *req)
95
{
96
if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS))
97
return io_poll_get_ownership_slowpath(req);
98
return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
99
}
100
101
static void io_poll_mark_cancelled(struct io_kiocb *req)
102
{
103
atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs);
104
}
105
106
static struct io_poll *io_poll_get_double(struct io_kiocb *req)
107
{
108
/* pure poll stashes this in ->async_data, poll driven retry elsewhere */
109
if (req->opcode == IORING_OP_POLL_ADD)
110
return req->async_data;
111
return req->apoll->double_poll;
112
}
113
114
static struct io_poll *io_poll_get_single(struct io_kiocb *req)
115
{
116
if (req->opcode == IORING_OP_POLL_ADD)
117
return io_kiocb_to_cmd(req, struct io_poll);
118
return &req->apoll->poll;
119
}
120
121
static void io_poll_req_insert(struct io_kiocb *req)
122
{
123
struct io_hash_table *table = &req->ctx->cancel_table;
124
u32 index = hash_long(req->cqe.user_data, table->hash_bits);
125
126
lockdep_assert_held(&req->ctx->uring_lock);
127
128
hlist_add_head(&req->hash_node, &table->hbs[index].list);
129
}
130
131
static void io_init_poll_iocb(struct io_poll *poll, __poll_t events)
132
{
133
poll->head = NULL;
134
#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
135
/* mask in events that we always want/need */
136
poll->events = events | IO_POLL_UNMASK;
137
INIT_LIST_HEAD(&poll->wait.entry);
138
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
139
}
140
141
static inline void io_poll_remove_entry(struct io_poll *poll)
142
{
143
struct wait_queue_head *head = smp_load_acquire(&poll->head);
144
145
if (head) {
146
spin_lock_irq(&head->lock);
147
list_del_init(&poll->wait.entry);
148
poll->head = NULL;
149
spin_unlock_irq(&head->lock);
150
}
151
}
152
153
static void io_poll_remove_entries(struct io_kiocb *req)
154
{
155
/*
156
* Nothing to do if neither of those flags are set. Avoid dipping
157
* into the poll/apoll/double cachelines if we can.
158
*/
159
if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL)))
160
return;
161
162
/*
163
* While we hold the waitqueue lock and the waitqueue is nonempty,
164
* wake_up_pollfree() will wait for us. However, taking the waitqueue
165
* lock in the first place can race with the waitqueue being freed.
166
*
167
* We solve this as eventpoll does: by taking advantage of the fact that
168
* all users of wake_up_pollfree() will RCU-delay the actual free. If
169
* we enter rcu_read_lock() and see that the pointer to the queue is
170
* non-NULL, we can then lock it without the memory being freed out from
171
* under us.
172
*
173
* Keep holding rcu_read_lock() as long as we hold the queue lock, in
174
* case the caller deletes the entry from the queue, leaving it empty.
175
* In that case, only RCU prevents the queue memory from being freed.
176
*/
177
rcu_read_lock();
178
if (req->flags & REQ_F_SINGLE_POLL)
179
io_poll_remove_entry(io_poll_get_single(req));
180
if (req->flags & REQ_F_DOUBLE_POLL)
181
io_poll_remove_entry(io_poll_get_double(req));
182
rcu_read_unlock();
183
}
184
185
enum {
186
IOU_POLL_DONE = 0,
187
IOU_POLL_NO_ACTION = 1,
188
IOU_POLL_REMOVE_POLL_USE_RES = 2,
189
IOU_POLL_REISSUE = 3,
190
IOU_POLL_REQUEUE = 4,
191
};
192
193
static void __io_poll_execute(struct io_kiocb *req, int mask)
194
{
195
unsigned flags = 0;
196
197
io_req_set_res(req, mask, 0);
198
req->io_task_work.func = io_poll_task_func;
199
200
trace_io_uring_task_add(req, mask);
201
202
if (!(req->flags & REQ_F_POLL_NO_LAZY))
203
flags = IOU_F_TWQ_LAZY_WAKE;
204
__io_req_task_work_add(req, flags);
205
}
206
207
static inline void io_poll_execute(struct io_kiocb *req, int res)
208
{
209
if (io_poll_get_ownership(req))
210
__io_poll_execute(req, res);
211
}
212
213
/*
214
* All poll tw should go through this. Checks for poll events, manages
215
* references, does rewait, etc.
216
*
217
* Returns a negative error on failure. IOU_POLL_NO_ACTION when no action
218
* require, which is either spurious wakeup or multishot CQE is served.
219
* IOU_POLL_DONE when it's done with the request, then the mask is stored in
220
* req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot
221
* poll and that the result is stored in req->cqe.
222
*/
223
static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw)
224
{
225
int v;
226
227
if (unlikely(io_should_terminate_tw(req->ctx)))
228
return -ECANCELED;
229
230
do {
231
v = atomic_read(&req->poll_refs);
232
233
if (unlikely(v != 1)) {
234
/* tw should be the owner and so have some refs */
235
if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK)))
236
return IOU_POLL_NO_ACTION;
237
if (v & IO_POLL_CANCEL_FLAG)
238
return -ECANCELED;
239
/*
240
* cqe.res contains only events of the first wake up
241
* and all others are to be lost. Redo vfs_poll() to get
242
* up to date state.
243
*/
244
if ((v & IO_POLL_REF_MASK) != 1)
245
req->cqe.res = 0;
246
247
if (v & IO_POLL_RETRY_FLAG) {
248
req->cqe.res = 0;
249
/*
250
* We won't find new events that came in between
251
* vfs_poll and the ref put unless we clear the
252
* flag in advance.
253
*/
254
atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs);
255
v &= ~IO_POLL_RETRY_FLAG;
256
}
257
}
258
259
/* the mask was stashed in __io_poll_execute */
260
if (!req->cqe.res) {
261
struct poll_table_struct pt = { ._key = req->apoll_events };
262
req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
263
/*
264
* We got woken with a mask, but someone else got to
265
* it first. The above vfs_poll() doesn't add us back
266
* to the waitqueue, so if we get nothing back, we
267
* should be safe and attempt a reissue.
268
*/
269
if (unlikely(!req->cqe.res)) {
270
/* Multishot armed need not reissue */
271
if (!(req->apoll_events & EPOLLONESHOT))
272
continue;
273
return IOU_POLL_REISSUE;
274
}
275
}
276
if (req->apoll_events & EPOLLONESHOT)
277
return IOU_POLL_DONE;
278
279
/* multishot, just fill a CQE and proceed */
280
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
281
__poll_t mask = mangle_poll(req->cqe.res &
282
req->apoll_events);
283
284
if (!io_req_post_cqe(req, mask, IORING_CQE_F_MORE)) {
285
io_req_set_res(req, mask, 0);
286
return IOU_POLL_REMOVE_POLL_USE_RES;
287
}
288
} else {
289
int ret = io_poll_issue(req, tw);
290
291
if (ret == IOU_COMPLETE)
292
return IOU_POLL_REMOVE_POLL_USE_RES;
293
else if (ret == IOU_REQUEUE)
294
return IOU_POLL_REQUEUE;
295
if (ret != IOU_RETRY && ret < 0)
296
return ret;
297
}
298
299
/* force the next iteration to vfs_poll() */
300
req->cqe.res = 0;
301
302
/*
303
* Release all references, retry if someone tried to restart
304
* task_work while we were executing it.
305
*/
306
v &= IO_POLL_REF_MASK;
307
} while (atomic_sub_return(v, &req->poll_refs) & IO_POLL_REF_MASK);
308
309
io_napi_add(req);
310
return IOU_POLL_NO_ACTION;
311
}
312
313
void io_poll_task_func(struct io_kiocb *req, io_tw_token_t tw)
314
{
315
int ret;
316
317
ret = io_poll_check_events(req, tw);
318
if (ret == IOU_POLL_NO_ACTION) {
319
return;
320
} else if (ret == IOU_POLL_REQUEUE) {
321
__io_poll_execute(req, 0);
322
return;
323
}
324
io_poll_remove_entries(req);
325
/* task_work always has ->uring_lock held */
326
hash_del(&req->hash_node);
327
328
if (req->opcode == IORING_OP_POLL_ADD) {
329
if (ret == IOU_POLL_DONE) {
330
struct io_poll *poll;
331
332
poll = io_kiocb_to_cmd(req, struct io_poll);
333
req->cqe.res = mangle_poll(req->cqe.res & poll->events);
334
} else if (ret == IOU_POLL_REISSUE) {
335
io_req_task_submit(req, tw);
336
return;
337
} else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) {
338
req->cqe.res = ret;
339
req_set_fail(req);
340
}
341
342
io_req_set_res(req, req->cqe.res, 0);
343
io_req_task_complete(req, tw);
344
} else {
345
io_tw_lock(req->ctx, tw);
346
347
if (ret == IOU_POLL_REMOVE_POLL_USE_RES)
348
io_req_task_complete(req, tw);
349
else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE)
350
io_req_task_submit(req, tw);
351
else
352
io_req_defer_failed(req, ret);
353
}
354
}
355
356
static void io_poll_cancel_req(struct io_kiocb *req)
357
{
358
io_poll_mark_cancelled(req);
359
/* kick tw, which should complete the request */
360
io_poll_execute(req, 0);
361
}
362
363
#define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI)
364
365
static __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll)
366
{
367
io_poll_mark_cancelled(req);
368
/* we have to kick tw in case it's not already */
369
io_poll_execute(req, 0);
370
371
/*
372
* If the waitqueue is being freed early but someone is already
373
* holds ownership over it, we have to tear down the request as
374
* best we can. That means immediately removing the request from
375
* its waitqueue and preventing all further accesses to the
376
* waitqueue via the request.
377
*/
378
list_del_init(&poll->wait.entry);
379
380
/*
381
* Careful: this *must* be the last step, since as soon
382
* as req->head is NULL'ed out, the request can be
383
* completed and freed, since aio_poll_complete_work()
384
* will no longer need to take the waitqueue lock.
385
*/
386
smp_store_release(&poll->head, NULL);
387
return 1;
388
}
389
390
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
391
void *key)
392
{
393
struct io_kiocb *req = wqe_to_req(wait);
394
struct io_poll *poll = container_of(wait, struct io_poll, wait);
395
__poll_t mask = key_to_poll(key);
396
397
if (unlikely(mask & POLLFREE))
398
return io_pollfree_wake(req, poll);
399
400
/* for instances that support it check for an event match first */
401
if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON)))
402
return 0;
403
404
if (io_poll_get_ownership(req)) {
405
/*
406
* If we trigger a multishot poll off our own wakeup path,
407
* disable multishot as there is a circular dependency between
408
* CQ posting and triggering the event.
409
*/
410
if (mask & EPOLL_URING_WAKE)
411
poll->events |= EPOLLONESHOT;
412
413
/* optional, saves extra locking for removal in tw handler */
414
if (mask && poll->events & EPOLLONESHOT) {
415
list_del_init(&poll->wait.entry);
416
poll->head = NULL;
417
if (wqe_is_double(wait))
418
req->flags &= ~REQ_F_DOUBLE_POLL;
419
else
420
req->flags &= ~REQ_F_SINGLE_POLL;
421
}
422
__io_poll_execute(req, mask);
423
}
424
return 1;
425
}
426
427
/* fails only when polling is already completing by the first entry */
428
static bool io_poll_double_prepare(struct io_kiocb *req)
429
{
430
struct wait_queue_head *head;
431
struct io_poll *poll = io_poll_get_single(req);
432
433
/* head is RCU protected, see io_poll_remove_entries() comments */
434
rcu_read_lock();
435
head = smp_load_acquire(&poll->head);
436
/*
437
* poll arm might not hold ownership and so race for req->flags with
438
* io_poll_wake(). There is only one poll entry queued, serialise with
439
* it by taking its head lock. As we're still arming the tw hanlder
440
* is not going to be run, so there are no races with it.
441
*/
442
if (head) {
443
spin_lock_irq(&head->lock);
444
req->flags |= REQ_F_DOUBLE_POLL;
445
if (req->opcode == IORING_OP_POLL_ADD)
446
req->flags |= REQ_F_ASYNC_DATA;
447
spin_unlock_irq(&head->lock);
448
}
449
rcu_read_unlock();
450
return !!head;
451
}
452
453
static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
454
struct wait_queue_head *head,
455
struct io_poll **poll_ptr)
456
{
457
struct io_kiocb *req = pt->req;
458
unsigned long wqe_private = (unsigned long) req;
459
460
/*
461
* The file being polled uses multiple waitqueues for poll handling
462
* (e.g. one for read, one for write). Setup a separate io_poll
463
* if this happens.
464
*/
465
if (unlikely(pt->nr_entries)) {
466
struct io_poll *first = poll;
467
468
/* double add on the same waitqueue head, ignore */
469
if (first->head == head)
470
return;
471
/* already have a 2nd entry, fail a third attempt */
472
if (*poll_ptr) {
473
if ((*poll_ptr)->head == head)
474
return;
475
pt->error = -EINVAL;
476
return;
477
}
478
479
poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
480
if (!poll) {
481
pt->error = -ENOMEM;
482
return;
483
}
484
485
/* mark as double wq entry */
486
wqe_private |= IO_WQE_F_DOUBLE;
487
io_init_poll_iocb(poll, first->events);
488
if (!io_poll_double_prepare(req)) {
489
/* the request is completing, just back off */
490
kfree(poll);
491
return;
492
}
493
*poll_ptr = poll;
494
} else {
495
/* fine to modify, there is no poll queued to race with us */
496
req->flags |= REQ_F_SINGLE_POLL;
497
}
498
499
pt->nr_entries++;
500
poll->head = head;
501
poll->wait.private = (void *) wqe_private;
502
503
if (poll->events & EPOLLEXCLUSIVE) {
504
add_wait_queue_exclusive(head, &poll->wait);
505
} else {
506
add_wait_queue(head, &poll->wait);
507
}
508
}
509
510
static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
511
struct poll_table_struct *p)
512
{
513
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
514
struct io_poll *poll = io_kiocb_to_cmd(pt->req, struct io_poll);
515
516
__io_queue_proc(poll, pt, head,
517
(struct io_poll **) &pt->req->async_data);
518
}
519
520
static bool io_poll_can_finish_inline(struct io_kiocb *req,
521
struct io_poll_table *pt)
522
{
523
return pt->owning || io_poll_get_ownership(req);
524
}
525
526
static void io_poll_add_hash(struct io_kiocb *req, unsigned int issue_flags)
527
{
528
struct io_ring_ctx *ctx = req->ctx;
529
530
io_ring_submit_lock(ctx, issue_flags);
531
io_poll_req_insert(req);
532
io_ring_submit_unlock(ctx, issue_flags);
533
}
534
535
/*
536
* Returns 0 when it's handed over for polling. The caller owns the requests if
537
* it returns non-zero, but otherwise should not touch it. Negative values
538
* contain an error code. When the result is >0, the polling has completed
539
* inline and ipt.result_mask is set to the mask.
540
*/
541
static int __io_arm_poll_handler(struct io_kiocb *req,
542
struct io_poll *poll,
543
struct io_poll_table *ipt, __poll_t mask,
544
unsigned issue_flags)
545
{
546
INIT_HLIST_NODE(&req->hash_node);
547
io_init_poll_iocb(poll, mask);
548
poll->file = req->file;
549
req->apoll_events = poll->events;
550
551
ipt->pt._key = mask;
552
ipt->req = req;
553
ipt->error = 0;
554
ipt->nr_entries = 0;
555
/*
556
* Polling is either completed here or via task_work, so if we're in the
557
* task context we're naturally serialised with tw by merit of running
558
* the same task. When it's io-wq, take the ownership to prevent tw
559
* from running. However, when we're in the task context, skip taking
560
* it as an optimisation.
561
*
562
* Note: even though the request won't be completed/freed, without
563
* ownership we still can race with io_poll_wake().
564
* io_poll_can_finish_inline() tries to deal with that.
565
*/
566
ipt->owning = issue_flags & IO_URING_F_UNLOCKED;
567
atomic_set(&req->poll_refs, (int)ipt->owning);
568
569
/*
570
* Exclusive waits may only wake a limited amount of entries
571
* rather than all of them, this may interfere with lazy
572
* wake if someone does wait(events > 1). Ensure we don't do
573
* lazy wake for those, as we need to process each one as they
574
* come in.
575
*/
576
if (poll->events & EPOLLEXCLUSIVE)
577
req->flags |= REQ_F_POLL_NO_LAZY;
578
579
mask = vfs_poll(req->file, &ipt->pt) & poll->events;
580
581
if (unlikely(ipt->error || !ipt->nr_entries)) {
582
io_poll_remove_entries(req);
583
584
if (!io_poll_can_finish_inline(req, ipt)) {
585
io_poll_mark_cancelled(req);
586
return 0;
587
} else if (mask && (poll->events & EPOLLET)) {
588
ipt->result_mask = mask;
589
return 1;
590
}
591
return ipt->error ?: -EINVAL;
592
}
593
594
if (mask &&
595
((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) {
596
if (!io_poll_can_finish_inline(req, ipt)) {
597
io_poll_add_hash(req, issue_flags);
598
return 0;
599
}
600
io_poll_remove_entries(req);
601
ipt->result_mask = mask;
602
/* no one else has access to the req, forget about the ref */
603
return 1;
604
}
605
606
io_poll_add_hash(req, issue_flags);
607
608
if (mask && (poll->events & EPOLLET) &&
609
io_poll_can_finish_inline(req, ipt)) {
610
__io_poll_execute(req, mask);
611
return 0;
612
}
613
io_napi_add(req);
614
615
if (ipt->owning) {
616
/*
617
* Try to release ownership. If we see a change of state, e.g.
618
* poll was waken up, queue up a tw, it'll deal with it.
619
*/
620
if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1)
621
__io_poll_execute(req, 0);
622
}
623
return 0;
624
}
625
626
static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
627
struct poll_table_struct *p)
628
{
629
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
630
struct async_poll *apoll = pt->req->apoll;
631
632
__io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
633
}
634
635
/*
636
* We can't reliably detect loops in repeated poll triggers and issue
637
* subsequently failing. But rather than fail these immediately, allow a
638
* certain amount of retries before we give up. Given that this condition
639
* should _rarely_ trigger even once, we should be fine with a larger value.
640
*/
641
#define APOLL_MAX_RETRY 128
642
643
static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
644
unsigned issue_flags)
645
{
646
struct io_ring_ctx *ctx = req->ctx;
647
struct async_poll *apoll;
648
649
if (req->flags & REQ_F_POLLED) {
650
apoll = req->apoll;
651
kfree(apoll->double_poll);
652
} else {
653
if (!(issue_flags & IO_URING_F_UNLOCKED))
654
apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC);
655
else
656
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
657
if (!apoll)
658
return NULL;
659
apoll->poll.retries = APOLL_MAX_RETRY;
660
}
661
apoll->double_poll = NULL;
662
req->apoll = apoll;
663
if (unlikely(!--apoll->poll.retries))
664
return NULL;
665
return apoll;
666
}
667
668
int io_arm_apoll(struct io_kiocb *req, unsigned issue_flags, __poll_t mask)
669
{
670
struct async_poll *apoll;
671
struct io_poll_table ipt;
672
int ret;
673
674
mask |= EPOLLET;
675
if (!io_file_can_poll(req))
676
return IO_APOLL_ABORTED;
677
if (!(req->flags & REQ_F_APOLL_MULTISHOT))
678
mask |= EPOLLONESHOT;
679
680
apoll = io_req_alloc_apoll(req, issue_flags);
681
if (!apoll)
682
return IO_APOLL_ABORTED;
683
req->flags &= ~(REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL);
684
req->flags |= REQ_F_POLLED;
685
ipt.pt._qproc = io_async_queue_proc;
686
687
ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags);
688
if (ret)
689
return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED;
690
trace_io_uring_poll_arm(req, mask, apoll->poll.events);
691
return IO_APOLL_OK;
692
}
693
694
int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
695
{
696
const struct io_issue_def *def = &io_issue_defs[req->opcode];
697
__poll_t mask = POLLPRI | POLLERR;
698
699
if (!def->pollin && !def->pollout)
700
return IO_APOLL_ABORTED;
701
if (!io_file_can_poll(req))
702
return IO_APOLL_ABORTED;
703
704
if (def->pollin) {
705
mask |= EPOLLIN | EPOLLRDNORM;
706
707
/* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
708
if (req->flags & REQ_F_CLEAR_POLLIN)
709
mask &= ~EPOLLIN;
710
} else {
711
mask |= EPOLLOUT | EPOLLWRNORM;
712
}
713
if (def->poll_exclusive)
714
mask |= EPOLLEXCLUSIVE;
715
716
return io_arm_apoll(req, issue_flags, mask);
717
}
718
719
/*
720
* Returns true if we found and killed one or more poll requests
721
*/
722
__cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
723
bool cancel_all)
724
{
725
unsigned nr_buckets = 1U << ctx->cancel_table.hash_bits;
726
struct hlist_node *tmp;
727
struct io_kiocb *req;
728
bool found = false;
729
int i;
730
731
lockdep_assert_held(&ctx->uring_lock);
732
733
for (i = 0; i < nr_buckets; i++) {
734
struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
735
736
hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) {
737
if (io_match_task_safe(req, tctx, cancel_all)) {
738
hlist_del_init(&req->hash_node);
739
io_poll_cancel_req(req);
740
found = true;
741
}
742
}
743
}
744
return found;
745
}
746
747
static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
748
struct io_cancel_data *cd)
749
{
750
struct io_kiocb *req;
751
u32 index = hash_long(cd->data, ctx->cancel_table.hash_bits);
752
struct io_hash_bucket *hb = &ctx->cancel_table.hbs[index];
753
754
hlist_for_each_entry(req, &hb->list, hash_node) {
755
if (cd->data != req->cqe.user_data)
756
continue;
757
if (poll_only && req->opcode != IORING_OP_POLL_ADD)
758
continue;
759
if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
760
if (io_cancel_match_sequence(req, cd->seq))
761
continue;
762
}
763
return req;
764
}
765
return NULL;
766
}
767
768
static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
769
struct io_cancel_data *cd)
770
{
771
unsigned nr_buckets = 1U << ctx->cancel_table.hash_bits;
772
struct io_kiocb *req;
773
int i;
774
775
for (i = 0; i < nr_buckets; i++) {
776
struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
777
778
hlist_for_each_entry(req, &hb->list, hash_node) {
779
if (io_cancel_req_match(req, cd))
780
return req;
781
}
782
}
783
return NULL;
784
}
785
786
static int io_poll_disarm(struct io_kiocb *req)
787
{
788
if (!req)
789
return -ENOENT;
790
if (!io_poll_get_ownership(req))
791
return -EALREADY;
792
io_poll_remove_entries(req);
793
hash_del(&req->hash_node);
794
return 0;
795
}
796
797
static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
798
{
799
struct io_kiocb *req;
800
801
if (cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP |
802
IORING_ASYNC_CANCEL_ANY))
803
req = io_poll_file_find(ctx, cd);
804
else
805
req = io_poll_find(ctx, false, cd);
806
807
if (req) {
808
io_poll_cancel_req(req);
809
return 0;
810
}
811
return -ENOENT;
812
}
813
814
int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
815
unsigned issue_flags)
816
{
817
int ret;
818
819
io_ring_submit_lock(ctx, issue_flags);
820
ret = __io_poll_cancel(ctx, cd);
821
io_ring_submit_unlock(ctx, issue_flags);
822
return ret;
823
}
824
825
static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
826
unsigned int flags)
827
{
828
u32 events;
829
830
events = READ_ONCE(sqe->poll32_events);
831
#ifdef __BIG_ENDIAN
832
events = swahw32(events);
833
#endif
834
if (!(flags & IORING_POLL_ADD_MULTI))
835
events |= EPOLLONESHOT;
836
if (!(flags & IORING_POLL_ADD_LEVEL))
837
events |= EPOLLET;
838
return demangle_poll(events) |
839
(events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET));
840
}
841
842
int io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
843
{
844
struct io_poll_update *upd = io_kiocb_to_cmd(req, struct io_poll_update);
845
u32 flags;
846
847
if (sqe->buf_index || sqe->splice_fd_in)
848
return -EINVAL;
849
flags = READ_ONCE(sqe->len);
850
if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
851
IORING_POLL_ADD_MULTI))
852
return -EINVAL;
853
/* meaningless without update */
854
if (flags == IORING_POLL_ADD_MULTI)
855
return -EINVAL;
856
857
upd->old_user_data = READ_ONCE(sqe->addr);
858
upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
859
upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
860
861
upd->new_user_data = READ_ONCE(sqe->off);
862
if (!upd->update_user_data && upd->new_user_data)
863
return -EINVAL;
864
if (upd->update_events)
865
upd->events = io_poll_parse_events(sqe, flags);
866
else if (sqe->poll32_events)
867
return -EINVAL;
868
869
return 0;
870
}
871
872
int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
873
{
874
struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll);
875
u32 flags;
876
877
if (sqe->buf_index || sqe->off || sqe->addr)
878
return -EINVAL;
879
flags = READ_ONCE(sqe->len);
880
if (flags & ~IORING_POLL_ADD_MULTI)
881
return -EINVAL;
882
if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP))
883
return -EINVAL;
884
885
poll->events = io_poll_parse_events(sqe, flags);
886
return 0;
887
}
888
889
int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
890
{
891
struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll);
892
struct io_poll_table ipt;
893
int ret;
894
895
ipt.pt._qproc = io_poll_queue_proc;
896
897
ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags);
898
if (ret > 0) {
899
io_req_set_res(req, ipt.result_mask, 0);
900
return IOU_COMPLETE;
901
}
902
return ret ?: IOU_ISSUE_SKIP_COMPLETE;
903
}
904
905
int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
906
{
907
struct io_poll_update *poll_update = io_kiocb_to_cmd(req, struct io_poll_update);
908
struct io_ring_ctx *ctx = req->ctx;
909
struct io_cancel_data cd = { .ctx = ctx, .data = poll_update->old_user_data, };
910
struct io_kiocb *preq;
911
int ret2, ret = 0;
912
913
io_ring_submit_lock(ctx, issue_flags);
914
preq = io_poll_find(ctx, true, &cd);
915
ret2 = io_poll_disarm(preq);
916
if (ret2) {
917
ret = ret2;
918
goto out;
919
}
920
if (WARN_ON_ONCE(preq->opcode != IORING_OP_POLL_ADD)) {
921
ret = -EFAULT;
922
goto out;
923
}
924
925
if (poll_update->update_events || poll_update->update_user_data) {
926
/* only mask one event flags, keep behavior flags */
927
if (poll_update->update_events) {
928
struct io_poll *poll = io_kiocb_to_cmd(preq, struct io_poll);
929
930
poll->events &= ~0xffff;
931
poll->events |= poll_update->events & 0xffff;
932
poll->events |= IO_POLL_UNMASK;
933
}
934
if (poll_update->update_user_data)
935
preq->cqe.user_data = poll_update->new_user_data;
936
937
ret2 = io_poll_add(preq, issue_flags & ~IO_URING_F_UNLOCKED);
938
/* successfully updated, don't complete poll request */
939
if (!ret2 || ret2 == -EIOCBQUEUED)
940
goto out;
941
}
942
943
req_set_fail(preq);
944
io_req_set_res(preq, -ECANCELED, 0);
945
preq->io_task_work.func = io_req_task_complete;
946
io_req_task_work_add(preq);
947
out:
948
io_ring_submit_unlock(ctx, issue_flags);
949
if (ret < 0) {
950
req_set_fail(req);
951
return ret;
952
}
953
/* complete update request, we're done with it */
954
io_req_set_res(req, ret, 0);
955
return IOU_COMPLETE;
956
}
957
958