Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/mptcp/pm_kernel.c
29265 views
1
// SPDX-License-Identifier: GPL-2.0
2
/* Multipath TCP
3
*
4
* Copyright (c) 2025, Matthieu Baerts.
5
*/
6
7
#define pr_fmt(fmt) "MPTCP: " fmt
8
9
#include <net/netns/generic.h>
10
11
#include "protocol.h"
12
#include "mib.h"
13
#include "mptcp_pm_gen.h"
14
15
static int pm_nl_pernet_id;
16
17
struct pm_nl_pernet {
18
/* protects pernet updates */
19
spinlock_t lock;
20
struct list_head endp_list;
21
u8 endpoints;
22
u8 endp_signal_max;
23
u8 endp_subflow_max;
24
u8 endp_laminar_max;
25
u8 limit_add_addr_accepted;
26
u8 limit_extra_subflows;
27
u8 next_id;
28
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
29
};
30
31
#define MPTCP_PM_ADDR_MAX 8
32
33
static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net)
34
{
35
return net_generic(net, pm_nl_pernet_id);
36
}
37
38
static struct pm_nl_pernet *
39
pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk)
40
{
41
return pm_nl_get_pernet(sock_net((struct sock *)msk));
42
}
43
44
static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
45
{
46
return pm_nl_get_pernet(genl_info_net(info));
47
}
48
49
u8 mptcp_pm_get_endp_signal_max(const struct mptcp_sock *msk)
50
{
51
const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
52
53
return READ_ONCE(pernet->endp_signal_max);
54
}
55
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_signal_max);
56
57
u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk)
58
{
59
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
60
61
return READ_ONCE(pernet->endp_subflow_max);
62
}
63
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_subflow_max);
64
65
u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk)
66
{
67
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
68
69
return READ_ONCE(pernet->endp_laminar_max);
70
}
71
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_laminar_max);
72
73
u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk)
74
{
75
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
76
77
return READ_ONCE(pernet->limit_add_addr_accepted);
78
}
79
EXPORT_SYMBOL_GPL(mptcp_pm_get_limit_add_addr_accepted);
80
81
u8 mptcp_pm_get_limit_extra_subflows(const struct mptcp_sock *msk)
82
{
83
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
84
85
return READ_ONCE(pernet->limit_extra_subflows);
86
}
87
EXPORT_SYMBOL_GPL(mptcp_pm_get_limit_extra_subflows);
88
89
static bool lookup_subflow_by_daddr(const struct list_head *list,
90
const struct mptcp_addr_info *daddr)
91
{
92
struct mptcp_subflow_context *subflow;
93
struct mptcp_addr_info cur;
94
95
list_for_each_entry(subflow, list, node) {
96
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
97
98
if (!((1 << inet_sk_state_load(ssk)) &
99
(TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV)))
100
continue;
101
102
mptcp_remote_address((struct sock_common *)ssk, &cur);
103
if (mptcp_addresses_equal(&cur, daddr, daddr->port))
104
return true;
105
}
106
107
return false;
108
}
109
110
static bool
111
select_local_address(const struct pm_nl_pernet *pernet,
112
const struct mptcp_sock *msk,
113
struct mptcp_pm_local *new_local)
114
{
115
struct mptcp_pm_addr_entry *entry;
116
bool found = false;
117
118
msk_owned_by_me(msk);
119
120
rcu_read_lock();
121
list_for_each_entry_rcu(entry, &pernet->endp_list, list) {
122
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
123
continue;
124
125
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
126
continue;
127
128
new_local->addr = entry->addr;
129
new_local->flags = entry->flags;
130
new_local->ifindex = entry->ifindex;
131
found = true;
132
break;
133
}
134
rcu_read_unlock();
135
136
return found;
137
}
138
139
static bool
140
select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
141
struct mptcp_pm_local *new_local)
142
{
143
struct mptcp_pm_addr_entry *entry;
144
bool found = false;
145
146
rcu_read_lock();
147
/* do not keep any additional per socket state, just signal
148
* the address list in order.
149
* Note: removal from the local address list during the msk life-cycle
150
* can lead to additional addresses not being announced.
151
*/
152
list_for_each_entry_rcu(entry, &pernet->endp_list, list) {
153
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
154
continue;
155
156
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
157
continue;
158
159
new_local->addr = entry->addr;
160
new_local->flags = entry->flags;
161
new_local->ifindex = entry->ifindex;
162
found = true;
163
break;
164
}
165
rcu_read_unlock();
166
167
return found;
168
}
169
170
static unsigned int
171
fill_remote_addr(struct mptcp_sock *msk, struct mptcp_addr_info *local,
172
struct mptcp_addr_info *addrs)
173
{
174
bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
175
struct mptcp_addr_info remote = { 0 };
176
struct sock *sk = (struct sock *)msk;
177
178
if (deny_id0)
179
return 0;
180
181
mptcp_remote_address((struct sock_common *)sk, &remote);
182
183
if (!mptcp_pm_addr_families_match(sk, local, &remote))
184
return 0;
185
186
msk->pm.extra_subflows++;
187
*addrs = remote;
188
189
return 1;
190
}
191
192
static unsigned int
193
fill_remote_addresses_fullmesh(struct mptcp_sock *msk,
194
struct mptcp_addr_info *local,
195
struct mptcp_addr_info *addrs)
196
{
197
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
198
bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
199
DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
200
struct sock *sk = (struct sock *)msk, *ssk;
201
struct mptcp_subflow_context *subflow;
202
int i = 0;
203
204
/* Forbid creation of new subflows matching existing ones, possibly
205
* already created by incoming ADD_ADDR
206
*/
207
bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
208
mptcp_for_each_subflow(msk, subflow)
209
if (READ_ONCE(subflow->local_id) == local->id)
210
__set_bit(subflow->remote_id, unavail_id);
211
212
mptcp_for_each_subflow(msk, subflow) {
213
ssk = mptcp_subflow_tcp_sock(subflow);
214
mptcp_remote_address((struct sock_common *)ssk, &addrs[i]);
215
addrs[i].id = READ_ONCE(subflow->remote_id);
216
if (deny_id0 && !addrs[i].id)
217
continue;
218
219
if (test_bit(addrs[i].id, unavail_id))
220
continue;
221
222
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
223
continue;
224
225
/* forbid creating multiple address towards this id */
226
__set_bit(addrs[i].id, unavail_id);
227
msk->pm.extra_subflows++;
228
i++;
229
230
if (msk->pm.extra_subflows >= limit_extra_subflows)
231
break;
232
}
233
234
return i;
235
}
236
237
/* Fill all the remote addresses into the array addrs[],
238
* and return the array size.
239
*/
240
static unsigned int
241
fill_remote_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *local,
242
bool fullmesh, struct mptcp_addr_info *addrs)
243
{
244
/* Non-fullmesh: fill in the single entry corresponding to the primary
245
* MPC subflow remote address, and return 1, corresponding to 1 entry.
246
*/
247
if (!fullmesh)
248
return fill_remote_addr(msk, local, addrs);
249
250
/* Fullmesh endpoint: fill all possible remote addresses */
251
return fill_remote_addresses_fullmesh(msk, local, addrs);
252
}
253
254
static struct mptcp_pm_addr_entry *
255
__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
256
{
257
struct mptcp_pm_addr_entry *entry;
258
259
list_for_each_entry_rcu(entry, &pernet->endp_list, list,
260
lockdep_is_held(&pernet->lock)) {
261
if (entry->addr.id == id)
262
return entry;
263
}
264
return NULL;
265
}
266
267
static struct mptcp_pm_addr_entry *
268
__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
269
{
270
struct mptcp_pm_addr_entry *entry;
271
272
list_for_each_entry_rcu(entry, &pernet->endp_list, list,
273
lockdep_is_held(&pernet->lock)) {
274
if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port))
275
return entry;
276
}
277
return NULL;
278
}
279
280
static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk,
281
const struct mptcp_addr_info *addr)
282
{
283
return msk->mpc_endpoint_id == addr->id ? 0 : addr->id;
284
}
285
286
/* Set mpc_endpoint_id, and send MP_PRIO for ID0 if needed */
287
static void mptcp_mpc_endpoint_setup(struct mptcp_sock *msk)
288
{
289
struct mptcp_subflow_context *subflow;
290
struct mptcp_pm_addr_entry *entry;
291
struct mptcp_addr_info mpc_addr;
292
struct pm_nl_pernet *pernet;
293
bool backup = false;
294
295
/* do lazy endpoint usage accounting for the MPC subflows */
296
if (likely(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED)) ||
297
!msk->first)
298
return;
299
300
subflow = mptcp_subflow_ctx(msk->first);
301
pernet = pm_nl_get_pernet_from_msk(msk);
302
303
mptcp_local_address((struct sock_common *)msk->first, &mpc_addr);
304
rcu_read_lock();
305
entry = __lookup_addr(pernet, &mpc_addr);
306
if (entry) {
307
__clear_bit(entry->addr.id, msk->pm.id_avail_bitmap);
308
msk->mpc_endpoint_id = entry->addr.id;
309
backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
310
}
311
rcu_read_unlock();
312
313
/* Send MP_PRIO */
314
if (backup)
315
mptcp_pm_send_ack(msk, subflow, true, backup);
316
317
msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
318
}
319
320
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
321
{
322
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
323
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
324
u8 endp_subflow_max = mptcp_pm_get_endp_subflow_max(msk);
325
u8 endp_signal_max = mptcp_pm_get_endp_signal_max(msk);
326
struct sock *sk = (struct sock *)msk;
327
bool signal_and_subflow = false;
328
struct mptcp_pm_local local;
329
330
mptcp_mpc_endpoint_setup(msk);
331
332
pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
333
msk->pm.local_addr_used, endp_subflow_max,
334
msk->pm.add_addr_signaled, endp_signal_max,
335
msk->pm.extra_subflows, limit_extra_subflows);
336
337
/* check first for announce */
338
if (msk->pm.add_addr_signaled < endp_signal_max) {
339
/* due to racing events on both ends we can reach here while
340
* previous add address is still running: if we invoke now
341
* mptcp_pm_announce_addr(), that will fail and the
342
* corresponding id will be marked as used.
343
* Instead let the PM machinery reschedule us when the
344
* current address announce will be completed.
345
*/
346
if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
347
return;
348
349
if (!select_signal_address(pernet, msk, &local))
350
goto subflow;
351
352
/* If the alloc fails, we are on memory pressure, not worth
353
* continuing, and trying to create subflows.
354
*/
355
if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
356
return;
357
358
__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
359
msk->pm.add_addr_signaled++;
360
361
/* Special case for ID0: set the correct ID */
362
if (local.addr.id == msk->mpc_endpoint_id)
363
local.addr.id = 0;
364
365
mptcp_pm_announce_addr(msk, &local.addr, false);
366
mptcp_pm_addr_send_ack(msk);
367
368
if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
369
signal_and_subflow = true;
370
}
371
372
subflow:
373
/* check if should create a new subflow */
374
while (msk->pm.local_addr_used < endp_subflow_max &&
375
msk->pm.extra_subflows < limit_extra_subflows) {
376
struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
377
bool fullmesh;
378
int i, nr;
379
380
if (signal_and_subflow)
381
signal_and_subflow = false;
382
else if (!select_local_address(pernet, msk, &local))
383
break;
384
385
fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
386
387
__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
388
389
/* Special case for ID0: set the correct ID */
390
if (local.addr.id == msk->mpc_endpoint_id)
391
local.addr.id = 0;
392
else /* local_addr_used is not decr for ID 0 */
393
msk->pm.local_addr_used++;
394
395
nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs);
396
if (nr == 0)
397
continue;
398
399
spin_unlock_bh(&msk->pm.lock);
400
for (i = 0; i < nr; i++)
401
__mptcp_subflow_connect(sk, &local, &addrs[i]);
402
spin_lock_bh(&msk->pm.lock);
403
}
404
mptcp_pm_nl_check_work_pending(msk);
405
}
406
407
static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
408
{
409
mptcp_pm_create_subflow_or_signal_addr(msk);
410
}
411
412
static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
413
{
414
mptcp_pm_create_subflow_or_signal_addr(msk);
415
}
416
417
static unsigned int
418
fill_local_addresses_vec_fullmesh(struct mptcp_sock *msk,
419
struct mptcp_addr_info *remote,
420
struct mptcp_pm_local *locals,
421
bool c_flag_case)
422
{
423
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
424
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
425
struct sock *sk = (struct sock *)msk;
426
struct mptcp_pm_addr_entry *entry;
427
struct mptcp_pm_local *local;
428
int i = 0;
429
430
rcu_read_lock();
431
list_for_each_entry_rcu(entry, &pernet->endp_list, list) {
432
bool is_id0;
433
434
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
435
continue;
436
437
if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
438
continue;
439
440
local = &locals[i];
441
local->addr = entry->addr;
442
local->flags = entry->flags;
443
local->ifindex = entry->ifindex;
444
445
is_id0 = local->addr.id == msk->mpc_endpoint_id;
446
447
if (c_flag_case &&
448
(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) {
449
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
450
451
if (!is_id0)
452
msk->pm.local_addr_used++;
453
}
454
455
/* Special case for ID0: set the correct ID */
456
if (is_id0)
457
local->addr.id = 0;
458
459
msk->pm.extra_subflows++;
460
i++;
461
462
if (msk->pm.extra_subflows >= limit_extra_subflows)
463
break;
464
}
465
rcu_read_unlock();
466
467
return i;
468
}
469
470
static unsigned int
471
fill_local_laminar_endp(struct mptcp_sock *msk, struct mptcp_addr_info *remote,
472
struct mptcp_pm_local *locals)
473
{
474
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
475
DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
476
struct mptcp_subflow_context *subflow;
477
struct sock *sk = (struct sock *)msk;
478
struct mptcp_pm_addr_entry *entry;
479
struct mptcp_pm_local *local;
480
int found = 0;
481
482
/* Forbid creation of new subflows matching existing ones, possibly
483
* already created by 'subflow' endpoints
484
*/
485
bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
486
mptcp_for_each_subflow(msk, subflow) {
487
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
488
489
if ((1 << inet_sk_state_load(ssk)) &
490
(TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING |
491
TCPF_CLOSE))
492
continue;
493
494
__set_bit(subflow_get_local_id(subflow), unavail_id);
495
}
496
497
rcu_read_lock();
498
list_for_each_entry_rcu(entry, &pernet->endp_list, list) {
499
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR))
500
continue;
501
502
if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
503
continue;
504
505
if (test_bit(mptcp_endp_get_local_id(msk, &entry->addr),
506
unavail_id))
507
continue;
508
509
local = &locals[0];
510
local->addr = entry->addr;
511
local->flags = entry->flags;
512
local->ifindex = entry->ifindex;
513
514
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
515
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
516
517
if (local->addr.id != msk->mpc_endpoint_id)
518
msk->pm.local_addr_used++;
519
}
520
521
msk->pm.extra_subflows++;
522
found = 1;
523
break;
524
}
525
rcu_read_unlock();
526
527
return found;
528
}
529
530
static unsigned int
531
fill_local_addresses_vec_c_flag(struct mptcp_sock *msk,
532
struct mptcp_addr_info *remote,
533
struct mptcp_pm_local *locals)
534
{
535
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
536
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
537
u8 endp_subflow_max = mptcp_pm_get_endp_subflow_max(msk);
538
struct sock *sk = (struct sock *)msk;
539
struct mptcp_pm_local *local;
540
int i = 0;
541
542
while (msk->pm.local_addr_used < endp_subflow_max) {
543
local = &locals[i];
544
545
if (!select_local_address(pernet, msk, local))
546
break;
547
548
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
549
550
if (!mptcp_pm_addr_families_match(sk, &local->addr, remote))
551
continue;
552
553
if (local->addr.id == msk->mpc_endpoint_id)
554
continue;
555
556
msk->pm.local_addr_used++;
557
msk->pm.extra_subflows++;
558
i++;
559
560
if (msk->pm.extra_subflows >= limit_extra_subflows)
561
break;
562
}
563
564
return i;
565
}
566
567
static unsigned int
568
fill_local_address_any(struct mptcp_sock *msk, struct mptcp_addr_info *remote,
569
struct mptcp_pm_local *local)
570
{
571
struct sock *sk = (struct sock *)msk;
572
573
memset(local, 0, sizeof(*local));
574
local->addr.family =
575
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
576
remote->family == AF_INET6 &&
577
ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
578
#endif
579
remote->family;
580
581
if (!mptcp_pm_addr_families_match(sk, &local->addr, remote))
582
return 0;
583
584
msk->pm.extra_subflows++;
585
586
return 1;
587
}
588
589
/* Fill all the local addresses into the array addrs[],
590
* and return the array size.
591
*/
592
static unsigned int
593
fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote,
594
struct mptcp_pm_local *locals)
595
{
596
bool c_flag_case = remote->id && mptcp_pm_add_addr_c_flag_case(msk);
597
int i;
598
599
/* If there is at least one MPTCP endpoint with a fullmesh flag */
600
i = fill_local_addresses_vec_fullmesh(msk, remote, locals, c_flag_case);
601
if (i)
602
return i;
603
604
/* If there is at least one MPTCP endpoint with a laminar flag */
605
if (mptcp_pm_get_endp_laminar_max(msk))
606
return fill_local_laminar_endp(msk, remote, locals);
607
608
/* Special case: peer sets the C flag, accept one ADD_ADDR if default
609
* limits are used -- accepting no ADD_ADDR -- and use subflow endpoints
610
*/
611
if (c_flag_case)
612
return fill_local_addresses_vec_c_flag(msk, remote, locals);
613
614
/* No special case: fill in the single 'IPADDRANY' local address */
615
return fill_local_address_any(msk, remote, &locals[0]);
616
}
617
618
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
619
{
620
u8 limit_add_addr_accepted = mptcp_pm_get_limit_add_addr_accepted(msk);
621
u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk);
622
struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX];
623
struct sock *sk = (struct sock *)msk;
624
struct mptcp_addr_info remote;
625
bool sf_created = false;
626
int i, nr;
627
628
pr_debug("accepted %d:%d remote family %d\n",
629
msk->pm.add_addr_accepted, limit_add_addr_accepted,
630
msk->pm.remote.family);
631
632
remote = msk->pm.remote;
633
mptcp_pm_announce_addr(msk, &remote, true);
634
mptcp_pm_addr_send_ack(msk);
635
mptcp_mpc_endpoint_setup(msk);
636
637
if (lookup_subflow_by_daddr(&msk->conn_list, &remote))
638
return;
639
640
/* pick id 0 port, if none is provided the remote address */
641
if (!remote.port)
642
remote.port = sk->sk_dport;
643
644
/* connect to the specified remote address, using whatever
645
* local address the routing configuration will pick.
646
*/
647
nr = fill_local_addresses_vec(msk, &remote, locals);
648
if (nr == 0)
649
return;
650
651
spin_unlock_bh(&msk->pm.lock);
652
for (i = 0; i < nr; i++)
653
if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0)
654
sf_created = true;
655
spin_lock_bh(&msk->pm.lock);
656
657
if (sf_created) {
658
/* add_addr_accepted is not decr for ID 0 */
659
if (remote.id)
660
msk->pm.add_addr_accepted++;
661
if (msk->pm.add_addr_accepted >= limit_add_addr_accepted ||
662
msk->pm.extra_subflows >= limit_extra_subflows)
663
WRITE_ONCE(msk->pm.accept_addr, false);
664
}
665
}
666
667
void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id)
668
{
669
if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
670
u8 limit_add_addr_accepted =
671
mptcp_pm_get_limit_add_addr_accepted(msk);
672
673
/* Note: if the subflow has been closed before, this
674
* add_addr_accepted counter will not be decremented.
675
*/
676
if (--msk->pm.add_addr_accepted < limit_add_addr_accepted)
677
WRITE_ONCE(msk->pm.accept_addr, true);
678
}
679
}
680
681
static bool address_use_port(struct mptcp_pm_addr_entry *entry)
682
{
683
return (entry->flags &
684
(MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) ==
685
MPTCP_PM_ADDR_FLAG_SIGNAL;
686
}
687
688
/* caller must ensure the RCU grace period is already elapsed */
689
static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
690
{
691
if (entry->lsk)
692
sock_release(entry->lsk);
693
kfree(entry);
694
}
695
696
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
697
struct mptcp_pm_addr_entry *entry,
698
bool needs_id, bool replace)
699
{
700
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
701
int ret = -EINVAL;
702
u8 addr_max;
703
704
spin_lock_bh(&pernet->lock);
705
/* to keep the code simple, don't do IDR-like allocation for address ID,
706
* just bail when we exceed limits
707
*/
708
if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
709
pernet->next_id = 1;
710
if (pernet->endpoints >= MPTCP_PM_ADDR_MAX) {
711
ret = -ERANGE;
712
goto out;
713
}
714
if (test_bit(entry->addr.id, pernet->id_bitmap)) {
715
ret = -EBUSY;
716
goto out;
717
}
718
719
/* do not insert duplicate address, differentiate on port only
720
* singled addresses
721
*/
722
if (!address_use_port(entry))
723
entry->addr.port = 0;
724
list_for_each_entry(cur, &pernet->endp_list, list) {
725
if (mptcp_addresses_equal(&cur->addr, &entry->addr,
726
cur->addr.port || entry->addr.port)) {
727
/* allow replacing the exiting endpoint only if such
728
* endpoint is an implicit one and the user-space
729
* did not provide an endpoint id
730
*/
731
if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) {
732
ret = -EEXIST;
733
goto out;
734
}
735
if (entry->addr.id)
736
goto out;
737
738
/* allow callers that only need to look up the local
739
* addr's id to skip replacement. This allows them to
740
* avoid calling synchronize_rcu in the packet recv
741
* path.
742
*/
743
if (!replace) {
744
kfree(entry);
745
ret = cur->addr.id;
746
goto out;
747
}
748
749
pernet->endpoints--;
750
entry->addr.id = cur->addr.id;
751
list_del_rcu(&cur->list);
752
del_entry = cur;
753
break;
754
}
755
}
756
757
if (!entry->addr.id && needs_id) {
758
find_next:
759
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
760
MPTCP_PM_MAX_ADDR_ID + 1,
761
pernet->next_id);
762
if (!entry->addr.id && pernet->next_id != 1) {
763
pernet->next_id = 1;
764
goto find_next;
765
}
766
}
767
768
if (!entry->addr.id && needs_id)
769
goto out;
770
771
__set_bit(entry->addr.id, pernet->id_bitmap);
772
if (entry->addr.id > pernet->next_id)
773
pernet->next_id = entry->addr.id;
774
775
if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
776
addr_max = pernet->endp_signal_max;
777
WRITE_ONCE(pernet->endp_signal_max, addr_max + 1);
778
}
779
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
780
addr_max = pernet->endp_subflow_max;
781
WRITE_ONCE(pernet->endp_subflow_max, addr_max + 1);
782
}
783
if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) {
784
addr_max = pernet->endp_laminar_max;
785
WRITE_ONCE(pernet->endp_laminar_max, addr_max + 1);
786
}
787
788
pernet->endpoints++;
789
if (!entry->addr.port)
790
list_add_tail_rcu(&entry->list, &pernet->endp_list);
791
else
792
list_add_rcu(&entry->list, &pernet->endp_list);
793
ret = entry->addr.id;
794
795
out:
796
spin_unlock_bh(&pernet->lock);
797
798
/* just replaced an existing entry, free it */
799
if (del_entry) {
800
synchronize_rcu();
801
__mptcp_pm_release_addr_entry(del_entry);
802
}
803
return ret;
804
}
805
806
static struct lock_class_key mptcp_slock_keys[2];
807
static struct lock_class_key mptcp_keys[2];
808
809
static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
810
struct mptcp_pm_addr_entry *entry)
811
{
812
bool is_ipv6 = sk->sk_family == AF_INET6;
813
int addrlen = sizeof(struct sockaddr_in);
814
struct sockaddr_storage addr;
815
struct sock *newsk, *ssk;
816
int backlog = 1024;
817
int err;
818
819
err = sock_create_kern(sock_net(sk), entry->addr.family,
820
SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk);
821
if (err)
822
return err;
823
824
newsk = entry->lsk->sk;
825
if (!newsk)
826
return -EINVAL;
827
828
/* The subflow socket lock is acquired in a nested to the msk one
829
* in several places, even by the TCP stack, and this msk is a kernel
830
* socket: lockdep complains. Instead of propagating the _nested
831
* modifiers in several places, re-init the lock class for the msk
832
* socket to an mptcp specific one.
833
*/
834
sock_lock_init_class_and_name(newsk,
835
is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET",
836
&mptcp_slock_keys[is_ipv6],
837
is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET",
838
&mptcp_keys[is_ipv6]);
839
840
lock_sock(newsk);
841
ssk = __mptcp_nmpc_sk(mptcp_sk(newsk));
842
release_sock(newsk);
843
if (IS_ERR(ssk))
844
return PTR_ERR(ssk);
845
846
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
847
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
848
if (entry->addr.family == AF_INET6)
849
addrlen = sizeof(struct sockaddr_in6);
850
#endif
851
if (ssk->sk_family == AF_INET)
852
err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
853
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
854
else if (ssk->sk_family == AF_INET6)
855
err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
856
#endif
857
if (err)
858
return err;
859
860
/* We don't use mptcp_set_state() here because it needs to be called
861
* under the msk socket lock. For the moment, that will not bring
862
* anything more than only calling inet_sk_state_store(), because the
863
* old status is known (TCP_CLOSE).
864
*/
865
inet_sk_state_store(newsk, TCP_LISTEN);
866
lock_sock(ssk);
867
WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true);
868
err = __inet_listen_sk(ssk, backlog);
869
if (!err)
870
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
871
release_sock(ssk);
872
return err;
873
}
874
875
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk,
876
struct mptcp_pm_addr_entry *skc)
877
{
878
struct mptcp_pm_addr_entry *entry;
879
struct pm_nl_pernet *pernet;
880
int ret;
881
882
pernet = pm_nl_get_pernet_from_msk(msk);
883
884
rcu_read_lock();
885
entry = __lookup_addr(pernet, &skc->addr);
886
ret = entry ? entry->addr.id : -1;
887
rcu_read_unlock();
888
if (ret >= 0)
889
return ret;
890
891
/* address not found, add to local list */
892
entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC);
893
if (!entry)
894
return -ENOMEM;
895
896
entry->addr.port = 0;
897
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false);
898
if (ret < 0)
899
kfree(entry);
900
901
return ret;
902
}
903
904
bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
905
{
906
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
907
struct mptcp_pm_addr_entry *entry;
908
bool backup;
909
910
rcu_read_lock();
911
entry = __lookup_addr(pernet, skc);
912
backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
913
rcu_read_unlock();
914
915
return backup;
916
}
917
918
static int mptcp_nl_add_subflow_or_signal_addr(struct net *net,
919
struct mptcp_addr_info *addr)
920
{
921
struct mptcp_sock *msk;
922
long s_slot = 0, s_num = 0;
923
924
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
925
struct sock *sk = (struct sock *)msk;
926
struct mptcp_addr_info mpc_addr;
927
928
if (!READ_ONCE(msk->fully_established) ||
929
mptcp_pm_is_userspace(msk))
930
goto next;
931
932
/* if the endp linked to the init sf is re-added with a != ID */
933
mptcp_local_address((struct sock_common *)msk, &mpc_addr);
934
935
lock_sock(sk);
936
spin_lock_bh(&msk->pm.lock);
937
if (mptcp_addresses_equal(addr, &mpc_addr, addr->port))
938
msk->mpc_endpoint_id = addr->id;
939
mptcp_pm_create_subflow_or_signal_addr(msk);
940
spin_unlock_bh(&msk->pm.lock);
941
release_sock(sk);
942
943
next:
944
sock_put(sk);
945
cond_resched();
946
}
947
948
return 0;
949
}
950
951
static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
952
struct genl_info *info)
953
{
954
struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
955
956
if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
957
mptcp_pm_address_nl_policy, info->extack) &&
958
tb[MPTCP_PM_ADDR_ATTR_ID])
959
return true;
960
return false;
961
}
962
963
/* Add an MPTCP endpoint */
964
int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
965
{
966
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
967
struct mptcp_pm_addr_entry addr, *entry;
968
struct nlattr *attr;
969
int ret;
970
971
if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR))
972
return -EINVAL;
973
974
attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
975
ret = mptcp_pm_parse_entry(attr, info, true, &addr);
976
if (ret < 0)
977
return ret;
978
979
if (addr.addr.port && !address_use_port(&addr)) {
980
NL_SET_ERR_MSG_ATTR(info->extack, attr,
981
"flags must have signal and not subflow when using port");
982
return -EINVAL;
983
}
984
985
if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
986
addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
987
NL_SET_ERR_MSG_ATTR(info->extack, attr,
988
"flags mustn't have both signal and fullmesh");
989
return -EINVAL;
990
}
991
992
if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
993
NL_SET_ERR_MSG_ATTR(info->extack, attr,
994
"can't create IMPLICIT endpoint");
995
return -EINVAL;
996
}
997
998
entry = kmemdup(&addr, sizeof(addr), GFP_KERNEL_ACCOUNT);
999
if (!entry) {
1000
GENL_SET_ERR_MSG(info, "can't allocate addr");
1001
return -ENOMEM;
1002
}
1003
1004
if (entry->addr.port) {
1005
ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry);
1006
if (ret) {
1007
GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret);
1008
goto out_free;
1009
}
1010
}
1011
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
1012
!mptcp_pm_has_addr_attr_id(attr, info),
1013
true);
1014
if (ret < 0) {
1015
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
1016
goto out_free;
1017
}
1018
1019
mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr);
1020
return 0;
1021
1022
out_free:
1023
__mptcp_pm_release_addr_entry(entry);
1024
return ret;
1025
}
1026
1027
static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
1028
const struct mptcp_addr_info *addr,
1029
bool force)
1030
{
1031
struct mptcp_rm_list list = { .nr = 0 };
1032
bool ret;
1033
1034
list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr);
1035
1036
ret = mptcp_remove_anno_list_by_saddr(msk, addr);
1037
if (ret || force) {
1038
spin_lock_bh(&msk->pm.lock);
1039
if (ret) {
1040
__set_bit(addr->id, msk->pm.id_avail_bitmap);
1041
msk->pm.add_addr_signaled--;
1042
}
1043
mptcp_pm_remove_addr(msk, &list);
1044
spin_unlock_bh(&msk->pm.lock);
1045
}
1046
return ret;
1047
}
1048
1049
static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id)
1050
{
1051
/* If it was marked as used, and not ID 0, decrement local_addr_used */
1052
if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) &&
1053
id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0))
1054
msk->pm.local_addr_used--;
1055
}
1056
1057
static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
1058
const struct mptcp_pm_addr_entry *entry)
1059
{
1060
const struct mptcp_addr_info *addr = &entry->addr;
1061
struct mptcp_rm_list list = { .nr = 1 };
1062
long s_slot = 0, s_num = 0;
1063
struct mptcp_sock *msk;
1064
1065
pr_debug("remove_id=%d\n", addr->id);
1066
1067
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
1068
struct sock *sk = (struct sock *)msk;
1069
bool remove_subflow;
1070
1071
if (mptcp_pm_is_userspace(msk))
1072
goto next;
1073
1074
lock_sock(sk);
1075
remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr);
1076
mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
1077
!(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
1078
1079
list.ids[0] = mptcp_endp_get_local_id(msk, addr);
1080
if (remove_subflow) {
1081
spin_lock_bh(&msk->pm.lock);
1082
mptcp_pm_rm_subflow(msk, &list);
1083
spin_unlock_bh(&msk->pm.lock);
1084
}
1085
1086
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
1087
spin_lock_bh(&msk->pm.lock);
1088
__mark_subflow_endp_available(msk, list.ids[0]);
1089
spin_unlock_bh(&msk->pm.lock);
1090
}
1091
1092
if (msk->mpc_endpoint_id == entry->addr.id)
1093
msk->mpc_endpoint_id = 0;
1094
release_sock(sk);
1095
1096
next:
1097
sock_put(sk);
1098
cond_resched();
1099
}
1100
1101
return 0;
1102
}
1103
1104
static int mptcp_nl_remove_id_zero_address(struct net *net,
1105
struct mptcp_addr_info *addr)
1106
{
1107
struct mptcp_rm_list list = { .nr = 0 };
1108
long s_slot = 0, s_num = 0;
1109
struct mptcp_sock *msk;
1110
1111
list.ids[list.nr++] = 0;
1112
1113
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
1114
struct sock *sk = (struct sock *)msk;
1115
struct mptcp_addr_info msk_local;
1116
1117
if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
1118
goto next;
1119
1120
mptcp_local_address((struct sock_common *)msk, &msk_local);
1121
if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
1122
goto next;
1123
1124
lock_sock(sk);
1125
spin_lock_bh(&msk->pm.lock);
1126
mptcp_pm_remove_addr(msk, &list);
1127
mptcp_pm_rm_subflow(msk, &list);
1128
__mark_subflow_endp_available(msk, 0);
1129
spin_unlock_bh(&msk->pm.lock);
1130
release_sock(sk);
1131
1132
next:
1133
sock_put(sk);
1134
cond_resched();
1135
}
1136
1137
return 0;
1138
}
1139
1140
/* Remove an MPTCP endpoint */
1141
int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
1142
{
1143
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1144
struct mptcp_pm_addr_entry addr, *entry;
1145
struct nlattr *attr;
1146
u8 addr_max;
1147
int ret;
1148
1149
if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR))
1150
return -EINVAL;
1151
1152
attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
1153
ret = mptcp_pm_parse_entry(attr, info, false, &addr);
1154
if (ret < 0)
1155
return ret;
1156
1157
/* the zero id address is special: the first address used by the msk
1158
* always gets such an id, so different subflows can have different zero
1159
* id addresses. Additionally zero id is not accounted for in id_bitmap.
1160
* Let's use an 'mptcp_rm_list' instead of the common remove code.
1161
*/
1162
if (addr.addr.id == 0)
1163
return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr);
1164
1165
spin_lock_bh(&pernet->lock);
1166
entry = __lookup_addr_by_id(pernet, addr.addr.id);
1167
if (!entry) {
1168
NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found");
1169
spin_unlock_bh(&pernet->lock);
1170
return -EINVAL;
1171
}
1172
if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
1173
addr_max = pernet->endp_signal_max;
1174
WRITE_ONCE(pernet->endp_signal_max, addr_max - 1);
1175
}
1176
if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
1177
addr_max = pernet->endp_subflow_max;
1178
WRITE_ONCE(pernet->endp_subflow_max, addr_max - 1);
1179
}
1180
if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) {
1181
addr_max = pernet->endp_laminar_max;
1182
WRITE_ONCE(pernet->endp_laminar_max, addr_max - 1);
1183
}
1184
1185
pernet->endpoints--;
1186
list_del_rcu(&entry->list);
1187
__clear_bit(entry->addr.id, pernet->id_bitmap);
1188
spin_unlock_bh(&pernet->lock);
1189
1190
mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
1191
synchronize_rcu();
1192
__mptcp_pm_release_addr_entry(entry);
1193
1194
return ret;
1195
}
1196
1197
static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk,
1198
struct list_head *rm_list)
1199
{
1200
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
1201
struct mptcp_pm_addr_entry *entry;
1202
1203
list_for_each_entry(entry, rm_list, list) {
1204
if (slist.nr < MPTCP_RM_IDS_MAX &&
1205
mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr))
1206
slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr);
1207
1208
if (alist.nr < MPTCP_RM_IDS_MAX &&
1209
mptcp_remove_anno_list_by_saddr(msk, &entry->addr))
1210
alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr);
1211
}
1212
1213
spin_lock_bh(&msk->pm.lock);
1214
if (alist.nr) {
1215
msk->pm.add_addr_signaled -= alist.nr;
1216
mptcp_pm_remove_addr(msk, &alist);
1217
}
1218
if (slist.nr)
1219
mptcp_pm_rm_subflow(msk, &slist);
1220
/* Reset counters: maybe some subflows have been removed before */
1221
bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
1222
msk->pm.local_addr_used = 0;
1223
spin_unlock_bh(&msk->pm.lock);
1224
}
1225
1226
static void mptcp_nl_flush_addrs_list(struct net *net,
1227
struct list_head *rm_list)
1228
{
1229
long s_slot = 0, s_num = 0;
1230
struct mptcp_sock *msk;
1231
1232
if (list_empty(rm_list))
1233
return;
1234
1235
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
1236
struct sock *sk = (struct sock *)msk;
1237
1238
if (!mptcp_pm_is_userspace(msk)) {
1239
lock_sock(sk);
1240
mptcp_pm_flush_addrs_and_subflows(msk, rm_list);
1241
release_sock(sk);
1242
}
1243
1244
sock_put(sk);
1245
cond_resched();
1246
}
1247
}
1248
1249
/* caller must ensure the RCU grace period is already elapsed */
1250
static void __flush_addrs(struct list_head *list)
1251
{
1252
while (!list_empty(list)) {
1253
struct mptcp_pm_addr_entry *cur;
1254
1255
cur = list_entry(list->next,
1256
struct mptcp_pm_addr_entry, list);
1257
list_del_rcu(&cur->list);
1258
__mptcp_pm_release_addr_entry(cur);
1259
}
1260
}
1261
1262
static void __reset_counters(struct pm_nl_pernet *pernet)
1263
{
1264
WRITE_ONCE(pernet->endp_signal_max, 0);
1265
WRITE_ONCE(pernet->endp_subflow_max, 0);
1266
WRITE_ONCE(pernet->endp_laminar_max, 0);
1267
pernet->endpoints = 0;
1268
}
1269
1270
int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
1271
{
1272
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1273
LIST_HEAD(free_list);
1274
1275
spin_lock_bh(&pernet->lock);
1276
list_splice_init(&pernet->endp_list, &free_list);
1277
__reset_counters(pernet);
1278
pernet->next_id = 1;
1279
bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
1280
spin_unlock_bh(&pernet->lock);
1281
mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list);
1282
synchronize_rcu();
1283
__flush_addrs(&free_list);
1284
return 0;
1285
}
1286
1287
int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr,
1288
struct genl_info *info)
1289
{
1290
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1291
struct mptcp_pm_addr_entry *entry;
1292
int ret = -EINVAL;
1293
1294
rcu_read_lock();
1295
entry = __lookup_addr_by_id(pernet, id);
1296
if (entry) {
1297
*addr = *entry;
1298
ret = 0;
1299
}
1300
rcu_read_unlock();
1301
1302
return ret;
1303
}
1304
1305
int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
1306
struct netlink_callback *cb)
1307
{
1308
struct net *net = sock_net(msg->sk);
1309
struct mptcp_pm_addr_entry *entry;
1310
struct pm_nl_pernet *pernet;
1311
int id = cb->args[0];
1312
int i;
1313
1314
pernet = pm_nl_get_pernet(net);
1315
1316
rcu_read_lock();
1317
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
1318
if (test_bit(i, pernet->id_bitmap)) {
1319
entry = __lookup_addr_by_id(pernet, i);
1320
if (!entry)
1321
break;
1322
1323
if (entry->addr.id <= id)
1324
continue;
1325
1326
if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0)
1327
break;
1328
1329
id = entry->addr.id;
1330
}
1331
}
1332
rcu_read_unlock();
1333
1334
cb->args[0] = id;
1335
return msg->len;
1336
}
1337
1338
static int parse_limit(struct genl_info *info, int id, unsigned int *limit)
1339
{
1340
struct nlattr *attr = info->attrs[id];
1341
1342
if (!attr)
1343
return 0;
1344
1345
*limit = nla_get_u32(attr);
1346
if (*limit > MPTCP_PM_ADDR_MAX) {
1347
NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr,
1348
"limit greater than maximum (%u)",
1349
MPTCP_PM_ADDR_MAX);
1350
return -EINVAL;
1351
}
1352
return 0;
1353
}
1354
1355
int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info)
1356
{
1357
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1358
unsigned int rcv_addrs, subflows;
1359
int ret;
1360
1361
spin_lock_bh(&pernet->lock);
1362
rcv_addrs = pernet->limit_add_addr_accepted;
1363
ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs);
1364
if (ret)
1365
goto unlock;
1366
1367
subflows = pernet->limit_extra_subflows;
1368
ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows);
1369
if (ret)
1370
goto unlock;
1371
1372
WRITE_ONCE(pernet->limit_add_addr_accepted, rcv_addrs);
1373
WRITE_ONCE(pernet->limit_extra_subflows, subflows);
1374
1375
unlock:
1376
spin_unlock_bh(&pernet->lock);
1377
return ret;
1378
}
1379
1380
int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info)
1381
{
1382
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
1383
struct sk_buff *msg;
1384
void *reply;
1385
1386
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1387
if (!msg)
1388
return -ENOMEM;
1389
1390
reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0,
1391
MPTCP_PM_CMD_GET_LIMITS);
1392
if (!reply)
1393
goto fail;
1394
1395
if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS,
1396
READ_ONCE(pernet->limit_add_addr_accepted)))
1397
goto fail;
1398
1399
if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS,
1400
READ_ONCE(pernet->limit_extra_subflows)))
1401
goto fail;
1402
1403
genlmsg_end(msg, reply);
1404
return genlmsg_reply(msg, info);
1405
1406
fail:
1407
GENL_SET_ERR_MSG(info, "not enough space in Netlink message");
1408
nlmsg_free(msg);
1409
return -EMSGSIZE;
1410
}
1411
1412
static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
1413
struct mptcp_addr_info *addr)
1414
{
1415
struct mptcp_rm_list list = { .nr = 0 };
1416
1417
list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr);
1418
1419
spin_lock_bh(&msk->pm.lock);
1420
mptcp_pm_rm_subflow(msk, &list);
1421
__mark_subflow_endp_available(msk, list.ids[0]);
1422
mptcp_pm_create_subflow_or_signal_addr(msk);
1423
spin_unlock_bh(&msk->pm.lock);
1424
}
1425
1426
static void mptcp_pm_nl_set_flags_all(struct net *net,
1427
struct mptcp_pm_addr_entry *local,
1428
u8 changed)
1429
{
1430
u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW);
1431
u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
1432
long s_slot = 0, s_num = 0;
1433
struct mptcp_sock *msk;
1434
1435
if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow)
1436
return;
1437
1438
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
1439
struct sock *sk = (struct sock *)msk;
1440
1441
if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
1442
goto next;
1443
1444
lock_sock(sk);
1445
if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
1446
mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup);
1447
/* Subflows will only be recreated if the SUBFLOW flag is set */
1448
if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH))
1449
mptcp_pm_nl_fullmesh(msk, &local->addr);
1450
release_sock(sk);
1451
1452
next:
1453
sock_put(sk);
1454
cond_resched();
1455
}
1456
}
1457
1458
int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local,
1459
struct genl_info *info)
1460
{
1461
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
1462
u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
1463
MPTCP_PM_ADDR_FLAG_FULLMESH;
1464
struct net *net = genl_info_net(info);
1465
struct mptcp_pm_addr_entry *entry;
1466
struct pm_nl_pernet *pernet;
1467
u8 lookup_by_id = 0;
1468
1469
pernet = pm_nl_get_pernet(net);
1470
1471
if (local->addr.family == AF_UNSPEC) {
1472
lookup_by_id = 1;
1473
if (!local->addr.id) {
1474
NL_SET_ERR_MSG_ATTR(info->extack, attr,
1475
"missing address ID");
1476
return -EOPNOTSUPP;
1477
}
1478
}
1479
1480
spin_lock_bh(&pernet->lock);
1481
entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) :
1482
__lookup_addr(pernet, &local->addr);
1483
if (!entry) {
1484
spin_unlock_bh(&pernet->lock);
1485
NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found");
1486
return -EINVAL;
1487
}
1488
if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
1489
(entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL |
1490
MPTCP_PM_ADDR_FLAG_IMPLICIT))) {
1491
spin_unlock_bh(&pernet->lock);
1492
NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags");
1493
return -EINVAL;
1494
}
1495
1496
changed = (local->flags ^ entry->flags) & mask;
1497
entry->flags = (entry->flags & ~mask) | (local->flags & mask);
1498
*local = *entry;
1499
spin_unlock_bh(&pernet->lock);
1500
1501
mptcp_pm_nl_set_flags_all(net, local, changed);
1502
return 0;
1503
}
1504
1505
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
1506
{
1507
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
1508
1509
if (msk->pm.extra_subflows == mptcp_pm_get_limit_extra_subflows(msk) ||
1510
(find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
1511
MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) {
1512
WRITE_ONCE(msk->pm.work_pending, false);
1513
return false;
1514
}
1515
return true;
1516
}
1517
1518
/* Called under PM lock */
1519
void __mptcp_pm_kernel_worker(struct mptcp_sock *msk)
1520
{
1521
struct mptcp_pm_data *pm = &msk->pm;
1522
1523
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
1524
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
1525
mptcp_pm_nl_add_addr_received(msk);
1526
}
1527
if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
1528
pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
1529
mptcp_pm_nl_fully_established(msk);
1530
}
1531
if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
1532
pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
1533
mptcp_pm_nl_subflow_established(msk);
1534
}
1535
}
1536
1537
static int __net_init pm_nl_init_net(struct net *net)
1538
{
1539
struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
1540
1541
INIT_LIST_HEAD_RCU(&pernet->endp_list);
1542
1543
/* Cit. 2 subflows ought to be enough for anybody. */
1544
pernet->limit_extra_subflows = 2;
1545
pernet->next_id = 1;
1546
spin_lock_init(&pernet->lock);
1547
1548
/* No need to initialize other pernet fields, the struct is zeroed at
1549
* allocation time.
1550
*/
1551
1552
return 0;
1553
}
1554
1555
static void __net_exit pm_nl_exit_net(struct list_head *net_list)
1556
{
1557
struct net *net;
1558
1559
list_for_each_entry(net, net_list, exit_list) {
1560
struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
1561
1562
/* net is removed from namespace list, can't race with
1563
* other modifiers, also netns core already waited for a
1564
* RCU grace period.
1565
*/
1566
__flush_addrs(&pernet->endp_list);
1567
}
1568
}
1569
1570
static struct pernet_operations mptcp_pm_pernet_ops = {
1571
.init = pm_nl_init_net,
1572
.exit_batch = pm_nl_exit_net,
1573
.id = &pm_nl_pernet_id,
1574
.size = sizeof(struct pm_nl_pernet),
1575
};
1576
1577
struct mptcp_pm_ops mptcp_pm_kernel = {
1578
.name = "kernel",
1579
.owner = THIS_MODULE,
1580
};
1581
1582
void __init mptcp_pm_kernel_register(void)
1583
{
1584
if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0)
1585
panic("Failed to register MPTCP PM pernet subsystem.\n");
1586
1587
mptcp_pm_register(&mptcp_pm_kernel);
1588
}
1589
1590