Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/ipv6/ip6_output.c
29265 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* IPv6 output functions
4
* Linux INET6 implementation
5
*
6
* Authors:
7
* Pedro Roque <[email protected]>
8
*
9
* Based on linux/net/ipv4/ip_output.c
10
*
11
* Changes:
12
* A.N.Kuznetsov : airthmetics in fragmentation.
13
* extension headers are implemented.
14
* route changes now work.
15
* ip6_forward does not confuse sniffers.
16
* etc.
17
*
18
* H. von Brand : Added missing #include <linux/string.h>
19
* Imran Patel : frag id should be in NBO
20
* Kazunori MIYAZAWA @USAGI
21
* : add ip6_append_data and related functions
22
* for datagram xmit
23
*/
24
25
#include <linux/errno.h>
26
#include <linux/kernel.h>
27
#include <linux/string.h>
28
#include <linux/socket.h>
29
#include <linux/net.h>
30
#include <linux/netdevice.h>
31
#include <linux/if_arp.h>
32
#include <linux/in6.h>
33
#include <linux/tcp.h>
34
#include <linux/route.h>
35
#include <linux/module.h>
36
#include <linux/slab.h>
37
38
#include <linux/bpf-cgroup.h>
39
#include <linux/netfilter.h>
40
#include <linux/netfilter_ipv6.h>
41
42
#include <net/sock.h>
43
#include <net/snmp.h>
44
45
#include <net/gso.h>
46
#include <net/ipv6.h>
47
#include <net/ndisc.h>
48
#include <net/protocol.h>
49
#include <net/ip6_route.h>
50
#include <net/addrconf.h>
51
#include <net/rawv6.h>
52
#include <net/icmp.h>
53
#include <net/xfrm.h>
54
#include <net/checksum.h>
55
#include <linux/mroute6.h>
56
#include <net/l3mdev.h>
57
#include <net/lwtunnel.h>
58
#include <net/ip_tunnels.h>
59
60
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
61
{
62
struct dst_entry *dst = skb_dst(skb);
63
struct net_device *dev = dst_dev_rcu(dst);
64
struct inet6_dev *idev = ip6_dst_idev(dst);
65
unsigned int hh_len = LL_RESERVED_SPACE(dev);
66
const struct in6_addr *daddr, *nexthop;
67
struct ipv6hdr *hdr;
68
struct neighbour *neigh;
69
int ret;
70
71
/* Be paranoid, rather than too clever. */
72
if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
73
/* idev stays alive because we hold rcu_read_lock(). */
74
skb = skb_expand_head(skb, hh_len);
75
if (!skb) {
76
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
77
return -ENOMEM;
78
}
79
}
80
81
hdr = ipv6_hdr(skb);
82
daddr = &hdr->daddr;
83
if (ipv6_addr_is_multicast(daddr)) {
84
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
85
((mroute6_is_socket(net, skb) &&
86
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
87
ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
88
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
89
90
/* Do not check for IFF_ALLMULTI; multicast routing
91
is not supported in any case.
92
*/
93
if (newskb)
94
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
95
net, sk, newskb, NULL, newskb->dev,
96
dev_loopback_xmit);
97
98
if (hdr->hop_limit == 0) {
99
IP6_INC_STATS(net, idev,
100
IPSTATS_MIB_OUTDISCARDS);
101
kfree_skb(skb);
102
return 0;
103
}
104
}
105
106
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
107
if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
108
!(dev->flags & IFF_LOOPBACK)) {
109
kfree_skb(skb);
110
return 0;
111
}
112
}
113
114
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
115
int res = lwtunnel_xmit(skb);
116
117
if (res != LWTUNNEL_XMIT_CONTINUE)
118
return res;
119
}
120
121
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
122
123
nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
124
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
125
126
if (IS_ERR_OR_NULL(neigh)) {
127
if (unlikely(!neigh))
128
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
129
if (IS_ERR(neigh)) {
130
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
131
kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
132
return -EINVAL;
133
}
134
}
135
sock_confirm_neigh(skb, neigh);
136
ret = neigh_output(neigh, skb, false);
137
return ret;
138
}
139
140
static int
141
ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
142
struct sk_buff *skb, unsigned int mtu)
143
{
144
struct sk_buff *segs, *nskb;
145
netdev_features_t features;
146
int ret = 0;
147
148
/* Please see corresponding comment in ip_finish_output_gso
149
* describing the cases where GSO segment length exceeds the
150
* egress MTU.
151
*/
152
features = netif_skb_features(skb);
153
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
154
if (IS_ERR_OR_NULL(segs)) {
155
kfree_skb(skb);
156
return -ENOMEM;
157
}
158
159
consume_skb(skb);
160
161
skb_list_walk_safe(segs, segs, nskb) {
162
int err;
163
164
skb_mark_not_on_list(segs);
165
/* Last GSO segment can be smaller than gso_size (and MTU).
166
* Adding a fragment header would produce an "atomic fragment",
167
* which is considered harmful (RFC-8021). Avoid that.
168
*/
169
err = segs->len > mtu ?
170
ip6_fragment(net, sk, segs, ip6_finish_output2) :
171
ip6_finish_output2(net, sk, segs);
172
if (err && ret == 0)
173
ret = err;
174
}
175
176
return ret;
177
}
178
179
static int ip6_finish_output_gso(struct net *net, struct sock *sk,
180
struct sk_buff *skb, unsigned int mtu)
181
{
182
if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
183
!skb_gso_validate_network_len(skb, mtu))
184
return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
185
186
return ip6_finish_output2(net, sk, skb);
187
}
188
189
static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
190
{
191
unsigned int mtu;
192
193
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
194
/* Policy lookup after SNAT yielded a new policy */
195
if (skb_dst(skb)->xfrm) {
196
IP6CB(skb)->flags |= IP6SKB_REROUTED;
197
return dst_output(net, sk, skb);
198
}
199
#endif
200
201
mtu = ip6_skb_dst_mtu(skb);
202
if (skb_is_gso(skb))
203
return ip6_finish_output_gso(net, sk, skb, mtu);
204
205
if (skb->len > mtu ||
206
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
207
return ip6_fragment(net, sk, skb, ip6_finish_output2);
208
209
return ip6_finish_output2(net, sk, skb);
210
}
211
212
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
213
{
214
int ret;
215
216
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
217
switch (ret) {
218
case NET_XMIT_SUCCESS:
219
case NET_XMIT_CN:
220
return __ip6_finish_output(net, sk, skb) ? : ret;
221
default:
222
kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
223
return ret;
224
}
225
}
226
227
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
228
{
229
struct dst_entry *dst = skb_dst(skb);
230
struct net_device *dev, *indev = skb->dev;
231
struct inet6_dev *idev;
232
int ret;
233
234
skb->protocol = htons(ETH_P_IPV6);
235
rcu_read_lock();
236
dev = dst_dev_rcu(dst);
237
idev = ip6_dst_idev(dst);
238
skb->dev = dev;
239
240
if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
241
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
242
rcu_read_unlock();
243
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
244
return 0;
245
}
246
247
ret = NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
248
net, sk, skb, indev, dev,
249
ip6_finish_output,
250
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
251
rcu_read_unlock();
252
return ret;
253
}
254
EXPORT_SYMBOL(ip6_output);
255
256
bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
257
{
258
if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk))
259
return ip6_default_np_autolabel(net);
260
return inet6_test_bit(AUTOFLOWLABEL, sk);
261
}
262
263
/*
264
* xmit an sk_buff (used by TCP and SCTP)
265
* Note : socket lock is not held for SYNACK packets, but might be modified
266
* by calls to skb_set_owner_w() and ipv6_local_error(),
267
* which are using proper atomic operations or spinlocks.
268
*/
269
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
270
__u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
271
{
272
const struct ipv6_pinfo *np = inet6_sk(sk);
273
struct in6_addr *first_hop = &fl6->daddr;
274
struct dst_entry *dst = skb_dst(skb);
275
struct inet6_dev *idev = ip6_dst_idev(dst);
276
struct hop_jumbo_hdr *hop_jumbo;
277
int hoplen = sizeof(*hop_jumbo);
278
struct net *net = sock_net(sk);
279
unsigned int head_room;
280
struct net_device *dev;
281
struct ipv6hdr *hdr;
282
u8 proto = fl6->flowi6_proto;
283
int seg_len = skb->len;
284
int ret, hlimit = -1;
285
u32 mtu;
286
287
rcu_read_lock();
288
289
dev = dst_dev_rcu(dst);
290
head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
291
if (opt)
292
head_room += opt->opt_nflen + opt->opt_flen;
293
294
if (unlikely(head_room > skb_headroom(skb))) {
295
/* idev stays alive while we hold rcu_read_lock(). */
296
skb = skb_expand_head(skb, head_room);
297
if (!skb) {
298
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
299
ret = -ENOBUFS;
300
goto unlock;
301
}
302
}
303
304
if (opt) {
305
seg_len += opt->opt_nflen + opt->opt_flen;
306
307
if (opt->opt_flen)
308
ipv6_push_frag_opts(skb, opt, &proto);
309
310
if (opt->opt_nflen)
311
ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
312
&fl6->saddr);
313
}
314
315
if (unlikely(seg_len > IPV6_MAXPLEN)) {
316
hop_jumbo = skb_push(skb, hoplen);
317
318
hop_jumbo->nexthdr = proto;
319
hop_jumbo->hdrlen = 0;
320
hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
321
hop_jumbo->tlv_len = 4;
322
hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen);
323
324
proto = IPPROTO_HOPOPTS;
325
seg_len = 0;
326
IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO;
327
}
328
329
skb_push(skb, sizeof(struct ipv6hdr));
330
skb_reset_network_header(skb);
331
hdr = ipv6_hdr(skb);
332
333
/*
334
* Fill in the IPv6 header
335
*/
336
if (np)
337
hlimit = READ_ONCE(np->hop_limit);
338
if (hlimit < 0)
339
hlimit = ip6_dst_hoplimit(dst);
340
341
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
342
ip6_autoflowlabel(net, sk), fl6));
343
344
hdr->payload_len = htons(seg_len);
345
hdr->nexthdr = proto;
346
hdr->hop_limit = hlimit;
347
348
hdr->saddr = fl6->saddr;
349
hdr->daddr = *first_hop;
350
351
skb->protocol = htons(ETH_P_IPV6);
352
skb->priority = priority;
353
skb->mark = mark;
354
355
mtu = dst_mtu(dst);
356
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
357
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
358
359
/* if egress device is enslaved to an L3 master device pass the
360
* skb to its handler for processing
361
*/
362
skb = l3mdev_ip6_out((struct sock *)sk, skb);
363
if (unlikely(!skb)) {
364
ret = 0;
365
goto unlock;
366
}
367
368
/* hooks should never assume socket lock is held.
369
* we promote our socket to non const
370
*/
371
ret = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
372
net, (struct sock *)sk, skb, NULL, dev,
373
dst_output);
374
goto unlock;
375
}
376
377
ret = -EMSGSIZE;
378
skb->dev = dev;
379
/* ipv6_local_error() does not require socket lock,
380
* we promote our socket to non const
381
*/
382
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
383
384
IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
385
kfree_skb(skb);
386
unlock:
387
rcu_read_unlock();
388
return ret;
389
}
390
EXPORT_SYMBOL(ip6_xmit);
391
392
static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
393
{
394
struct ip6_ra_chain *ra;
395
struct sock *last = NULL;
396
397
read_lock(&ip6_ra_lock);
398
for (ra = ip6_ra_chain; ra; ra = ra->next) {
399
struct sock *sk = ra->sk;
400
if (sk && ra->sel == sel &&
401
(!sk->sk_bound_dev_if ||
402
sk->sk_bound_dev_if == skb->dev->ifindex)) {
403
404
if (inet6_test_bit(RTALERT_ISOLATE, sk) &&
405
!net_eq(sock_net(sk), dev_net(skb->dev))) {
406
continue;
407
}
408
if (last) {
409
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
410
if (skb2)
411
rawv6_rcv(last, skb2);
412
}
413
last = sk;
414
}
415
}
416
417
if (last) {
418
rawv6_rcv(last, skb);
419
read_unlock(&ip6_ra_lock);
420
return 1;
421
}
422
read_unlock(&ip6_ra_lock);
423
return 0;
424
}
425
426
static int ip6_forward_proxy_check(struct sk_buff *skb)
427
{
428
struct ipv6hdr *hdr = ipv6_hdr(skb);
429
u8 nexthdr = hdr->nexthdr;
430
__be16 frag_off;
431
int offset;
432
433
if (ipv6_ext_hdr(nexthdr)) {
434
offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
435
if (offset < 0)
436
return 0;
437
} else
438
offset = sizeof(struct ipv6hdr);
439
440
if (nexthdr == IPPROTO_ICMPV6) {
441
struct icmp6hdr *icmp6;
442
443
if (!pskb_may_pull(skb, (skb_network_header(skb) +
444
offset + 1 - skb->data)))
445
return 0;
446
447
icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
448
449
switch (icmp6->icmp6_type) {
450
case NDISC_ROUTER_SOLICITATION:
451
case NDISC_ROUTER_ADVERTISEMENT:
452
case NDISC_NEIGHBOUR_SOLICITATION:
453
case NDISC_NEIGHBOUR_ADVERTISEMENT:
454
case NDISC_REDIRECT:
455
/* For reaction involving unicast neighbor discovery
456
* message destined to the proxied address, pass it to
457
* input function.
458
*/
459
return 1;
460
default:
461
break;
462
}
463
}
464
465
/*
466
* The proxying router can't forward traffic sent to a link-local
467
* address, so signal the sender and discard the packet. This
468
* behavior is clarified by the MIPv6 specification.
469
*/
470
if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
471
dst_link_failure(skb);
472
return -1;
473
}
474
475
return 0;
476
}
477
478
static inline int ip6_forward_finish(struct net *net, struct sock *sk,
479
struct sk_buff *skb)
480
{
481
#ifdef CONFIG_NET_SWITCHDEV
482
if (skb->offload_l3_fwd_mark) {
483
consume_skb(skb);
484
return 0;
485
}
486
#endif
487
488
skb_clear_tstamp(skb);
489
return dst_output(net, sk, skb);
490
}
491
492
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
493
{
494
if (skb->len <= mtu)
495
return false;
496
497
/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
498
if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
499
return true;
500
501
if (skb->ignore_df)
502
return false;
503
504
if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
505
return false;
506
507
return true;
508
}
509
510
int ip6_forward(struct sk_buff *skb)
511
{
512
struct dst_entry *dst = skb_dst(skb);
513
struct ipv6hdr *hdr = ipv6_hdr(skb);
514
struct inet6_skb_parm *opt = IP6CB(skb);
515
struct net *net = dev_net(dst_dev(dst));
516
struct net_device *dev;
517
struct inet6_dev *idev;
518
SKB_DR(reason);
519
u32 mtu;
520
521
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
522
if (!READ_ONCE(net->ipv6.devconf_all->forwarding) &&
523
(!idev || !READ_ONCE(idev->cnf.force_forwarding)))
524
goto error;
525
526
if (skb->pkt_type != PACKET_HOST)
527
goto drop;
528
529
if (unlikely(skb->sk))
530
goto drop;
531
532
if (skb_warn_if_lro(skb))
533
goto drop;
534
535
if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) &&
536
(!idev || !READ_ONCE(idev->cnf.disable_policy)) &&
537
!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
538
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
539
goto drop;
540
}
541
542
skb_forward_csum(skb);
543
544
/*
545
* We DO NOT make any processing on
546
* RA packets, pushing them to user level AS IS
547
* without ane WARRANTY that application will be able
548
* to interpret them. The reason is that we
549
* cannot make anything clever here.
550
*
551
* We are not end-node, so that if packet contains
552
* AH/ESP, we cannot make anything.
553
* Defragmentation also would be mistake, RA packets
554
* cannot be fragmented, because there is no warranty
555
* that different fragments will go along one path. --ANK
556
*/
557
if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
558
if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
559
return 0;
560
}
561
562
/*
563
* check and decrement ttl
564
*/
565
if (hdr->hop_limit <= 1) {
566
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
567
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
568
569
kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
570
return -ETIMEDOUT;
571
}
572
573
/* XXX: idev->cnf.proxy_ndp? */
574
if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
575
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) {
576
int proxied = ip6_forward_proxy_check(skb);
577
if (proxied > 0) {
578
/* It's tempting to decrease the hop limit
579
* here by 1, as we do at the end of the
580
* function too.
581
*
582
* But that would be incorrect, as proxying is
583
* not forwarding. The ip6_input function
584
* will handle this packet locally, and it
585
* depends on the hop limit being unchanged.
586
*
587
* One example is the NDP hop limit, that
588
* always has to stay 255, but other would be
589
* similar checks around RA packets, where the
590
* user can even change the desired limit.
591
*/
592
return ip6_input(skb);
593
} else if (proxied < 0) {
594
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
595
goto drop;
596
}
597
}
598
599
if (!xfrm6_route_forward(skb)) {
600
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
601
SKB_DR_SET(reason, XFRM_POLICY);
602
goto drop;
603
}
604
dst = skb_dst(skb);
605
dev = dst_dev(dst);
606
/* IPv6 specs say nothing about it, but it is clear that we cannot
607
send redirects to source routed frames.
608
We don't send redirects to frames decapsulated from IPsec.
609
*/
610
if (IP6CB(skb)->iif == dev->ifindex &&
611
opt->srcrt == 0 && !skb_sec_path(skb)) {
612
struct in6_addr *target = NULL;
613
struct inet_peer *peer;
614
struct rt6_info *rt;
615
616
/*
617
* incoming and outgoing devices are the same
618
* send a redirect.
619
*/
620
621
rt = dst_rt6_info(dst);
622
if (rt->rt6i_flags & RTF_GATEWAY)
623
target = &rt->rt6i_gateway;
624
else
625
target = &hdr->daddr;
626
627
rcu_read_lock();
628
peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
629
630
/* Limit redirects both by destination (here)
631
and by source (inside ndisc_send_redirect)
632
*/
633
if (inet_peer_xrlim_allow(peer, 1*HZ))
634
ndisc_send_redirect(skb, target);
635
rcu_read_unlock();
636
} else {
637
int addrtype = ipv6_addr_type(&hdr->saddr);
638
639
/* This check is security critical. */
640
if (addrtype == IPV6_ADDR_ANY ||
641
addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
642
goto error;
643
if (addrtype & IPV6_ADDR_LINKLOCAL) {
644
icmpv6_send(skb, ICMPV6_DEST_UNREACH,
645
ICMPV6_NOT_NEIGHBOUR, 0);
646
goto error;
647
}
648
}
649
650
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
651
652
mtu = ip6_dst_mtu_maybe_forward(dst, true);
653
if (mtu < IPV6_MIN_MTU)
654
mtu = IPV6_MIN_MTU;
655
656
if (ip6_pkt_too_big(skb, mtu)) {
657
/* Again, force OUTPUT device used as source address */
658
skb->dev = dev;
659
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
660
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
661
__IP6_INC_STATS(net, ip6_dst_idev(dst),
662
IPSTATS_MIB_FRAGFAILS);
663
kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
664
return -EMSGSIZE;
665
}
666
667
if (skb_cow(skb, dev->hard_header_len)) {
668
__IP6_INC_STATS(net, ip6_dst_idev(dst),
669
IPSTATS_MIB_OUTDISCARDS);
670
goto drop;
671
}
672
673
hdr = ipv6_hdr(skb);
674
675
/* Mangling hops number delayed to point after skb COW */
676
677
hdr->hop_limit--;
678
679
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
680
net, NULL, skb, skb->dev, dev,
681
ip6_forward_finish);
682
683
error:
684
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
685
SKB_DR_SET(reason, IP_INADDRERRORS);
686
drop:
687
kfree_skb_reason(skb, reason);
688
return -EINVAL;
689
}
690
691
static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
692
{
693
to->pkt_type = from->pkt_type;
694
to->priority = from->priority;
695
to->protocol = from->protocol;
696
skb_dst_drop(to);
697
skb_dst_set(to, dst_clone(skb_dst(from)));
698
to->dev = from->dev;
699
to->mark = from->mark;
700
701
skb_copy_hash(to, from);
702
703
#ifdef CONFIG_NET_SCHED
704
to->tc_index = from->tc_index;
705
#endif
706
nf_copy(to, from);
707
skb_ext_copy(to, from);
708
skb_copy_secmark(to, from);
709
}
710
711
int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
712
u8 nexthdr, __be32 frag_id,
713
struct ip6_fraglist_iter *iter)
714
{
715
unsigned int first_len;
716
struct frag_hdr *fh;
717
718
/* BUILD HEADER */
719
*prevhdr = NEXTHDR_FRAGMENT;
720
iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
721
if (!iter->tmp_hdr)
722
return -ENOMEM;
723
724
iter->frag = skb_shinfo(skb)->frag_list;
725
skb_frag_list_init(skb);
726
727
iter->offset = 0;
728
iter->hlen = hlen;
729
iter->frag_id = frag_id;
730
iter->nexthdr = nexthdr;
731
732
__skb_pull(skb, hlen);
733
fh = __skb_push(skb, sizeof(struct frag_hdr));
734
__skb_push(skb, hlen);
735
skb_reset_network_header(skb);
736
memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
737
738
fh->nexthdr = nexthdr;
739
fh->reserved = 0;
740
fh->frag_off = htons(IP6_MF);
741
fh->identification = frag_id;
742
743
first_len = skb_pagelen(skb);
744
skb->data_len = first_len - skb_headlen(skb);
745
skb->len = first_len;
746
ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
747
748
return 0;
749
}
750
EXPORT_SYMBOL(ip6_fraglist_init);
751
752
void ip6_fraglist_prepare(struct sk_buff *skb,
753
struct ip6_fraglist_iter *iter)
754
{
755
struct sk_buff *frag = iter->frag;
756
unsigned int hlen = iter->hlen;
757
struct frag_hdr *fh;
758
759
frag->ip_summed = CHECKSUM_NONE;
760
skb_reset_transport_header(frag);
761
fh = __skb_push(frag, sizeof(struct frag_hdr));
762
__skb_push(frag, hlen);
763
skb_reset_network_header(frag);
764
memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
765
iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
766
fh->nexthdr = iter->nexthdr;
767
fh->reserved = 0;
768
fh->frag_off = htons(iter->offset);
769
if (frag->next)
770
fh->frag_off |= htons(IP6_MF);
771
fh->identification = iter->frag_id;
772
ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
773
ip6_copy_metadata(frag, skb);
774
}
775
EXPORT_SYMBOL(ip6_fraglist_prepare);
776
777
void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
778
unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
779
u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
780
{
781
state->prevhdr = prevhdr;
782
state->nexthdr = nexthdr;
783
state->frag_id = frag_id;
784
785
state->hlen = hlen;
786
state->mtu = mtu;
787
788
state->left = skb->len - hlen; /* Space per frame */
789
state->ptr = hlen; /* Where to start from */
790
791
state->hroom = hdr_room;
792
state->troom = needed_tailroom;
793
794
state->offset = 0;
795
}
796
EXPORT_SYMBOL(ip6_frag_init);
797
798
struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
799
{
800
u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
801
struct sk_buff *frag;
802
struct frag_hdr *fh;
803
unsigned int len;
804
805
len = state->left;
806
/* IF: it doesn't fit, use 'mtu' - the data space left */
807
if (len > state->mtu)
808
len = state->mtu;
809
/* IF: we are not sending up to and including the packet end
810
then align the next start on an eight byte boundary */
811
if (len < state->left)
812
len &= ~7;
813
814
/* Allocate buffer */
815
frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
816
state->hroom + state->troom, GFP_ATOMIC);
817
if (!frag)
818
return ERR_PTR(-ENOMEM);
819
820
/*
821
* Set up data on packet
822
*/
823
824
ip6_copy_metadata(frag, skb);
825
skb_reserve(frag, state->hroom);
826
skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
827
skb_reset_network_header(frag);
828
fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
829
frag->transport_header = (frag->network_header + state->hlen +
830
sizeof(struct frag_hdr));
831
832
/*
833
* Charge the memory for the fragment to any owner
834
* it might possess
835
*/
836
if (skb->sk)
837
skb_set_owner_w(frag, skb->sk);
838
839
/*
840
* Copy the packet header into the new buffer.
841
*/
842
skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
843
844
fragnexthdr_offset = skb_network_header(frag);
845
fragnexthdr_offset += prevhdr - skb_network_header(skb);
846
*fragnexthdr_offset = NEXTHDR_FRAGMENT;
847
848
/*
849
* Build fragment header.
850
*/
851
fh->nexthdr = state->nexthdr;
852
fh->reserved = 0;
853
fh->identification = state->frag_id;
854
855
/*
856
* Copy a block of the IP datagram.
857
*/
858
BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
859
len));
860
state->left -= len;
861
862
fh->frag_off = htons(state->offset);
863
if (state->left > 0)
864
fh->frag_off |= htons(IP6_MF);
865
ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
866
867
state->ptr += len;
868
state->offset += len;
869
870
return frag;
871
}
872
EXPORT_SYMBOL(ip6_frag_next);
873
874
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
875
int (*output)(struct net *, struct sock *, struct sk_buff *))
876
{
877
struct sk_buff *frag;
878
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
879
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
880
inet6_sk(skb->sk) : NULL;
881
u8 tstamp_type = skb->tstamp_type;
882
struct ip6_frag_state state;
883
unsigned int mtu, hlen, nexthdr_offset;
884
ktime_t tstamp = skb->tstamp;
885
int hroom, err = 0;
886
__be32 frag_id;
887
u8 *prevhdr, nexthdr = 0;
888
889
err = ip6_find_1stfragopt(skb, &prevhdr);
890
if (err < 0)
891
goto fail;
892
hlen = err;
893
nexthdr = *prevhdr;
894
nexthdr_offset = prevhdr - skb_network_header(skb);
895
896
mtu = ip6_skb_dst_mtu(skb);
897
898
/* We must not fragment if the socket is set to force MTU discovery
899
* or if the skb it not generated by a local socket.
900
*/
901
if (unlikely(!skb->ignore_df && skb->len > mtu))
902
goto fail_toobig;
903
904
if (IP6CB(skb)->frag_max_size) {
905
if (IP6CB(skb)->frag_max_size > mtu)
906
goto fail_toobig;
907
908
/* don't send fragments larger than what we received */
909
mtu = IP6CB(skb)->frag_max_size;
910
if (mtu < IPV6_MIN_MTU)
911
mtu = IPV6_MIN_MTU;
912
}
913
914
if (np) {
915
u32 frag_size = READ_ONCE(np->frag_size);
916
917
if (frag_size && frag_size < mtu)
918
mtu = frag_size;
919
}
920
if (mtu < hlen + sizeof(struct frag_hdr) + 8)
921
goto fail_toobig;
922
mtu -= hlen + sizeof(struct frag_hdr);
923
924
frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
925
&ipv6_hdr(skb)->saddr);
926
927
if (skb->ip_summed == CHECKSUM_PARTIAL &&
928
(err = skb_checksum_help(skb)))
929
goto fail;
930
931
prevhdr = skb_network_header(skb) + nexthdr_offset;
932
hroom = LL_RESERVED_SPACE(rt->dst.dev);
933
if (skb_has_frag_list(skb)) {
934
unsigned int first_len = skb_pagelen(skb);
935
struct ip6_fraglist_iter iter;
936
struct sk_buff *frag2;
937
938
if (first_len - hlen > mtu ||
939
((first_len - hlen) & 7) ||
940
skb_cloned(skb) ||
941
skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
942
goto slow_path;
943
944
skb_walk_frags(skb, frag) {
945
/* Correct geometry. */
946
if (frag->len > mtu ||
947
((frag->len & 7) && frag->next) ||
948
skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
949
goto slow_path_clean;
950
951
/* Partially cloned skb? */
952
if (skb_shared(frag))
953
goto slow_path_clean;
954
955
BUG_ON(frag->sk);
956
if (skb->sk) {
957
frag->sk = skb->sk;
958
frag->destructor = sock_wfree;
959
}
960
skb->truesize -= frag->truesize;
961
}
962
963
err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
964
&iter);
965
if (err < 0)
966
goto fail;
967
968
/* We prevent @rt from being freed. */
969
rcu_read_lock();
970
971
for (;;) {
972
/* Prepare header of the next frame,
973
* before previous one went down. */
974
if (iter.frag)
975
ip6_fraglist_prepare(skb, &iter);
976
977
skb_set_delivery_time(skb, tstamp, tstamp_type);
978
err = output(net, sk, skb);
979
if (!err)
980
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
981
IPSTATS_MIB_FRAGCREATES);
982
983
if (err || !iter.frag)
984
break;
985
986
skb = ip6_fraglist_next(&iter);
987
}
988
989
kfree(iter.tmp_hdr);
990
991
if (err == 0) {
992
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
993
IPSTATS_MIB_FRAGOKS);
994
rcu_read_unlock();
995
return 0;
996
}
997
998
kfree_skb_list(iter.frag);
999
1000
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
1001
IPSTATS_MIB_FRAGFAILS);
1002
rcu_read_unlock();
1003
return err;
1004
1005
slow_path_clean:
1006
skb_walk_frags(skb, frag2) {
1007
if (frag2 == frag)
1008
break;
1009
frag2->sk = NULL;
1010
frag2->destructor = NULL;
1011
skb->truesize += frag2->truesize;
1012
}
1013
}
1014
1015
slow_path:
1016
/*
1017
* Fragment the datagram.
1018
*/
1019
1020
ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
1021
LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
1022
&state);
1023
1024
/*
1025
* Keep copying data until we run out.
1026
*/
1027
1028
while (state.left > 0) {
1029
frag = ip6_frag_next(skb, &state);
1030
if (IS_ERR(frag)) {
1031
err = PTR_ERR(frag);
1032
goto fail;
1033
}
1034
1035
/*
1036
* Put this fragment into the sending queue.
1037
*/
1038
skb_set_delivery_time(frag, tstamp, tstamp_type);
1039
err = output(net, sk, frag);
1040
if (err)
1041
goto fail;
1042
1043
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1044
IPSTATS_MIB_FRAGCREATES);
1045
}
1046
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1047
IPSTATS_MIB_FRAGOKS);
1048
consume_skb(skb);
1049
return err;
1050
1051
fail_toobig:
1052
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1053
err = -EMSGSIZE;
1054
1055
fail:
1056
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1057
IPSTATS_MIB_FRAGFAILS);
1058
kfree_skb(skb);
1059
return err;
1060
}
1061
1062
static inline int ip6_rt_check(const struct rt6key *rt_key,
1063
const struct in6_addr *fl_addr,
1064
const struct in6_addr *addr_cache)
1065
{
1066
return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1067
(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1068
}
1069
1070
static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1071
struct dst_entry *dst,
1072
const struct flowi6 *fl6)
1073
{
1074
struct ipv6_pinfo *np = inet6_sk(sk);
1075
struct rt6_info *rt;
1076
1077
if (!dst)
1078
goto out;
1079
1080
if (dst->ops->family != AF_INET6) {
1081
dst_release(dst);
1082
return NULL;
1083
}
1084
1085
rt = dst_rt6_info(dst);
1086
/* Yes, checking route validity in not connected
1087
* case is not very simple. Take into account,
1088
* that we do not support routing by source, TOS,
1089
* and MSG_DONTROUTE --ANK (980726)
1090
*
1091
* 1. ip6_rt_check(): If route was host route,
1092
* check that cached destination is current.
1093
* If it is network route, we still may
1094
* check its validity using saved pointer
1095
* to the last used address: daddr_cache.
1096
* We do not want to save whole address now,
1097
* (because main consumer of this service
1098
* is tcp, which has not this problem),
1099
* so that the last trick works only on connected
1100
* sockets.
1101
* 2. oif also should be the same.
1102
*/
1103
if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr,
1104
np->daddr_cache ? &sk->sk_v6_daddr : NULL) ||
1105
#ifdef CONFIG_IPV6_SUBTREES
1106
ip6_rt_check(&rt->rt6i_src, &fl6->saddr,
1107
np->saddr_cache ? &np->saddr : NULL) ||
1108
#endif
1109
(fl6->flowi6_oif && fl6->flowi6_oif != dst_dev(dst)->ifindex)) {
1110
dst_release(dst);
1111
dst = NULL;
1112
}
1113
1114
out:
1115
return dst;
1116
}
1117
1118
static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1119
struct dst_entry **dst, struct flowi6 *fl6)
1120
{
1121
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1122
struct neighbour *n;
1123
struct rt6_info *rt;
1124
#endif
1125
int err;
1126
int flags = 0;
1127
1128
/* The correct way to handle this would be to do
1129
* ip6_route_get_saddr, and then ip6_route_output; however,
1130
* the route-specific preferred source forces the
1131
* ip6_route_output call _before_ ip6_route_get_saddr.
1132
*
1133
* In source specific routing (no src=any default route),
1134
* ip6_route_output will fail given src=any saddr, though, so
1135
* that's why we try it again later.
1136
*/
1137
if (ipv6_addr_any(&fl6->saddr)) {
1138
struct fib6_info *from;
1139
struct rt6_info *rt;
1140
1141
*dst = ip6_route_output(net, sk, fl6);
1142
rt = (*dst)->error ? NULL : dst_rt6_info(*dst);
1143
1144
rcu_read_lock();
1145
from = rt ? rcu_dereference(rt->from) : NULL;
1146
err = ip6_route_get_saddr(net, from, &fl6->daddr,
1147
sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
1148
fl6->flowi6_l3mdev,
1149
&fl6->saddr);
1150
rcu_read_unlock();
1151
1152
if (err)
1153
goto out_err_release;
1154
1155
/* If we had an erroneous initial result, pretend it
1156
* never existed and let the SA-enabled version take
1157
* over.
1158
*/
1159
if ((*dst)->error) {
1160
dst_release(*dst);
1161
*dst = NULL;
1162
}
1163
1164
if (fl6->flowi6_oif)
1165
flags |= RT6_LOOKUP_F_IFACE;
1166
}
1167
1168
if (!*dst)
1169
*dst = ip6_route_output_flags(net, sk, fl6, flags);
1170
1171
err = (*dst)->error;
1172
if (err)
1173
goto out_err_release;
1174
1175
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1176
/*
1177
* Here if the dst entry we've looked up
1178
* has a neighbour entry that is in the INCOMPLETE
1179
* state and the src address from the flow is
1180
* marked as OPTIMISTIC, we release the found
1181
* dst entry and replace it instead with the
1182
* dst entry of the nexthop router
1183
*/
1184
rt = dst_rt6_info(*dst);
1185
rcu_read_lock();
1186
n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1187
rt6_nexthop(rt, &fl6->daddr));
1188
err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0;
1189
rcu_read_unlock();
1190
1191
if (err) {
1192
struct inet6_ifaddr *ifp;
1193
struct flowi6 fl_gw6;
1194
int redirect;
1195
1196
ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1197
(*dst)->dev, 1);
1198
1199
redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1200
if (ifp)
1201
in6_ifa_put(ifp);
1202
1203
if (redirect) {
1204
/*
1205
* We need to get the dst entry for the
1206
* default router instead
1207
*/
1208
dst_release(*dst);
1209
memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1210
memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1211
*dst = ip6_route_output(net, sk, &fl_gw6);
1212
err = (*dst)->error;
1213
if (err)
1214
goto out_err_release;
1215
}
1216
}
1217
#endif
1218
if (ipv6_addr_v4mapped(&fl6->saddr) &&
1219
!(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1220
err = -EAFNOSUPPORT;
1221
goto out_err_release;
1222
}
1223
1224
return 0;
1225
1226
out_err_release:
1227
dst_release(*dst);
1228
*dst = NULL;
1229
1230
if (err == -ENETUNREACH)
1231
IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1232
return err;
1233
}
1234
1235
/**
1236
* ip6_dst_lookup - perform route lookup on flow
1237
* @net: Network namespace to perform lookup in
1238
* @sk: socket which provides route info
1239
* @dst: pointer to dst_entry * for result
1240
* @fl6: flow to lookup
1241
*
1242
* This function performs a route lookup on the given flow.
1243
*
1244
* It returns zero on success, or a standard errno code on error.
1245
*/
1246
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1247
struct flowi6 *fl6)
1248
{
1249
*dst = NULL;
1250
return ip6_dst_lookup_tail(net, sk, dst, fl6);
1251
}
1252
EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1253
1254
/**
1255
* ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1256
* @net: Network namespace to perform lookup in
1257
* @sk: socket which provides route info
1258
* @fl6: flow to lookup
1259
* @final_dst: final destination address for ipsec lookup
1260
*
1261
* This function performs a route lookup on the given flow.
1262
*
1263
* It returns a valid dst pointer on success, or a pointer encoded
1264
* error code.
1265
*/
1266
struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1267
const struct in6_addr *final_dst)
1268
{
1269
struct dst_entry *dst = NULL;
1270
int err;
1271
1272
err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1273
if (err)
1274
return ERR_PTR(err);
1275
if (final_dst)
1276
fl6->daddr = *final_dst;
1277
1278
return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1279
}
1280
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1281
1282
/**
1283
* ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1284
* @sk: socket which provides the dst cache and route info
1285
* @fl6: flow to lookup
1286
* @final_dst: final destination address for ipsec lookup
1287
* @connected: whether @sk is connected or not
1288
*
1289
* This function performs a route lookup on the given flow with the
1290
* possibility of using the cached route in the socket if it is valid.
1291
* It will take the socket dst lock when operating on the dst cache.
1292
* As a result, this function can only be used in process context.
1293
*
1294
* In addition, for a connected socket, cache the dst in the socket
1295
* if the current cache is not valid.
1296
*
1297
* It returns a valid dst pointer on success, or a pointer encoded
1298
* error code.
1299
*/
1300
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1301
const struct in6_addr *final_dst,
1302
bool connected)
1303
{
1304
struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1305
1306
dst = ip6_sk_dst_check(sk, dst, fl6);
1307
if (dst)
1308
return dst;
1309
1310
dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1311
if (connected && !IS_ERR(dst))
1312
ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1313
1314
return dst;
1315
}
1316
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1317
1318
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1319
gfp_t gfp)
1320
{
1321
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1322
}
1323
1324
static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1325
gfp_t gfp)
1326
{
1327
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1328
}
1329
1330
static void ip6_append_data_mtu(unsigned int *mtu,
1331
int *maxfraglen,
1332
unsigned int fragheaderlen,
1333
struct sk_buff *skb,
1334
struct rt6_info *rt,
1335
unsigned int orig_mtu)
1336
{
1337
if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1338
if (!skb) {
1339
/* first fragment, reserve header_len */
1340
*mtu = orig_mtu - rt->dst.header_len;
1341
1342
} else {
1343
/*
1344
* this fragment is not first, the headers
1345
* space is regarded as data space.
1346
*/
1347
*mtu = orig_mtu;
1348
}
1349
*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1350
+ fragheaderlen - sizeof(struct frag_hdr);
1351
}
1352
}
1353
1354
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1355
struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1356
struct rt6_info *rt)
1357
{
1358
struct ipv6_pinfo *np = inet6_sk(sk);
1359
unsigned int mtu, frag_size;
1360
struct ipv6_txoptions *nopt, *opt = ipc6->opt;
1361
1362
/* callers pass dst together with a reference, set it first so
1363
* ip6_cork_release() can put it down even in case of an error.
1364
*/
1365
cork->base.dst = &rt->dst;
1366
1367
/*
1368
* setup for corking
1369
*/
1370
if (opt) {
1371
if (WARN_ON(v6_cork->opt))
1372
return -EINVAL;
1373
1374
nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1375
if (unlikely(!nopt))
1376
return -ENOBUFS;
1377
1378
nopt->tot_len = sizeof(*opt);
1379
nopt->opt_flen = opt->opt_flen;
1380
nopt->opt_nflen = opt->opt_nflen;
1381
1382
nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation);
1383
if (opt->dst0opt && !nopt->dst0opt)
1384
return -ENOBUFS;
1385
1386
nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation);
1387
if (opt->dst1opt && !nopt->dst1opt)
1388
return -ENOBUFS;
1389
1390
nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation);
1391
if (opt->hopopt && !nopt->hopopt)
1392
return -ENOBUFS;
1393
1394
nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation);
1395
if (opt->srcrt && !nopt->srcrt)
1396
return -ENOBUFS;
1397
1398
/* need source address above miyazawa*/
1399
}
1400
v6_cork->hop_limit = ipc6->hlimit;
1401
v6_cork->tclass = ipc6->tclass;
1402
v6_cork->dontfrag = ipc6->dontfrag;
1403
if (rt->dst.flags & DST_XFRM_TUNNEL)
1404
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
1405
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1406
else
1407
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
1408
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1409
1410
frag_size = READ_ONCE(np->frag_size);
1411
if (frag_size && frag_size < mtu)
1412
mtu = frag_size;
1413
1414
cork->base.fragsize = mtu;
1415
cork->base.gso_size = ipc6->gso_size;
1416
cork->base.tx_flags = 0;
1417
cork->base.mark = ipc6->sockc.mark;
1418
cork->base.priority = ipc6->sockc.priority;
1419
sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags);
1420
if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
1421
cork->base.flags |= IPCORK_TS_OPT_ID;
1422
cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
1423
}
1424
cork->base.length = 0;
1425
cork->base.transmit_time = ipc6->sockc.transmit_time;
1426
1427
return 0;
1428
}
1429
1430
static int __ip6_append_data(struct sock *sk,
1431
struct sk_buff_head *queue,
1432
struct inet_cork_full *cork_full,
1433
struct inet6_cork *v6_cork,
1434
struct page_frag *pfrag,
1435
int getfrag(void *from, char *to, int offset,
1436
int len, int odd, struct sk_buff *skb),
1437
void *from, size_t length, int transhdrlen,
1438
unsigned int flags)
1439
{
1440
struct sk_buff *skb, *skb_prev = NULL;
1441
struct inet_cork *cork = &cork_full->base;
1442
struct flowi6 *fl6 = &cork_full->fl.u.ip6;
1443
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1444
struct ubuf_info *uarg = NULL;
1445
int exthdrlen = 0;
1446
int dst_exthdrlen = 0;
1447
int hh_len;
1448
int copy;
1449
int err;
1450
int offset = 0;
1451
bool zc = false;
1452
u32 tskey = 0;
1453
struct rt6_info *rt = dst_rt6_info(cork->dst);
1454
bool paged, hold_tskey = false, extra_uref = false;
1455
struct ipv6_txoptions *opt = v6_cork->opt;
1456
int csummode = CHECKSUM_NONE;
1457
unsigned int maxnonfragsize, headersize;
1458
unsigned int wmem_alloc_delta = 0;
1459
1460
skb = skb_peek_tail(queue);
1461
if (!skb) {
1462
exthdrlen = opt ? opt->opt_flen : 0;
1463
dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1464
}
1465
1466
paged = !!cork->gso_size;
1467
mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1468
orig_mtu = mtu;
1469
1470
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1471
1472
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1473
(opt ? opt->opt_nflen : 0);
1474
1475
headersize = sizeof(struct ipv6hdr) +
1476
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
1477
rt->rt6i_nfheader_len;
1478
1479
if (mtu <= fragheaderlen ||
1480
((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1481
goto emsgsize;
1482
1483
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1484
sizeof(struct frag_hdr);
1485
1486
/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1487
* the first fragment
1488
*/
1489
if (headersize + transhdrlen > mtu)
1490
goto emsgsize;
1491
1492
if (cork->length + length > mtu - headersize && v6_cork->dontfrag &&
1493
(sk->sk_protocol == IPPROTO_UDP ||
1494
sk->sk_protocol == IPPROTO_ICMPV6 ||
1495
sk->sk_protocol == IPPROTO_RAW)) {
1496
ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1497
sizeof(struct ipv6hdr));
1498
goto emsgsize;
1499
}
1500
1501
if (ip6_sk_ignore_df(sk))
1502
maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1503
else
1504
maxnonfragsize = mtu;
1505
1506
if (cork->length + length > maxnonfragsize - headersize) {
1507
emsgsize:
1508
pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1509
ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1510
return -EMSGSIZE;
1511
}
1512
1513
/* CHECKSUM_PARTIAL only with no extension headers and when
1514
* we are not going to fragment
1515
*/
1516
if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1517
headersize == sizeof(struct ipv6hdr) &&
1518
length <= mtu - headersize &&
1519
(!(flags & MSG_MORE) || cork->gso_size) &&
1520
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1521
csummode = CHECKSUM_PARTIAL;
1522
1523
if ((flags & MSG_ZEROCOPY) && length) {
1524
struct msghdr *msg = from;
1525
1526
if (getfrag == ip_generic_getfrag && msg->msg_ubuf) {
1527
if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb))
1528
return -EINVAL;
1529
1530
/* Leave uarg NULL if can't zerocopy, callers should
1531
* be able to handle it.
1532
*/
1533
if ((rt->dst.dev->features & NETIF_F_SG) &&
1534
csummode == CHECKSUM_PARTIAL) {
1535
paged = true;
1536
zc = true;
1537
uarg = msg->msg_ubuf;
1538
}
1539
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
1540
uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb),
1541
false);
1542
if (!uarg)
1543
return -ENOBUFS;
1544
extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1545
if (rt->dst.dev->features & NETIF_F_SG &&
1546
csummode == CHECKSUM_PARTIAL) {
1547
paged = true;
1548
zc = true;
1549
} else {
1550
uarg_to_msgzc(uarg)->zerocopy = 0;
1551
skb_zcopy_set(skb, uarg, &extra_uref);
1552
}
1553
}
1554
} else if ((flags & MSG_SPLICE_PAGES) && length) {
1555
if (inet_test_bit(HDRINCL, sk))
1556
return -EPERM;
1557
if (rt->dst.dev->features & NETIF_F_SG &&
1558
getfrag == ip_generic_getfrag)
1559
/* We need an empty buffer to attach stuff to */
1560
paged = true;
1561
else
1562
flags &= ~MSG_SPLICE_PAGES;
1563
}
1564
1565
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
1566
READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
1567
if (cork->flags & IPCORK_TS_OPT_ID) {
1568
tskey = cork->ts_opt_id;
1569
} else {
1570
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
1571
hold_tskey = true;
1572
}
1573
}
1574
1575
/*
1576
* Let's try using as much space as possible.
1577
* Use MTU if total length of the message fits into the MTU.
1578
* Otherwise, we need to reserve fragment header and
1579
* fragment alignment (= 8-15 octects, in total).
1580
*
1581
* Note that we may need to "move" the data from the tail
1582
* of the buffer to the new fragment when we split
1583
* the message.
1584
*
1585
* FIXME: It may be fragmented into multiple chunks
1586
* at once if non-fragmentable extension headers
1587
* are too large.
1588
* --yoshfuji
1589
*/
1590
1591
cork->length += length;
1592
if (!skb)
1593
goto alloc_new_skb;
1594
1595
while (length > 0) {
1596
/* Check if the remaining data fits into current packet. */
1597
copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len;
1598
if (copy < length)
1599
copy = maxfraglen - skb->len;
1600
1601
if (copy <= 0) {
1602
char *data;
1603
unsigned int datalen;
1604
unsigned int fraglen;
1605
unsigned int fraggap;
1606
unsigned int alloclen, alloc_extra;
1607
unsigned int pagedlen;
1608
alloc_new_skb:
1609
/* There's no room in the current skb */
1610
if (skb)
1611
fraggap = skb->len - maxfraglen;
1612
else
1613
fraggap = 0;
1614
/* update mtu and maxfraglen if necessary */
1615
if (!skb || !skb_prev)
1616
ip6_append_data_mtu(&mtu, &maxfraglen,
1617
fragheaderlen, skb, rt,
1618
orig_mtu);
1619
1620
skb_prev = skb;
1621
1622
/*
1623
* If remaining data exceeds the mtu,
1624
* we know we need more fragment(s).
1625
*/
1626
datalen = length + fraggap;
1627
1628
if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen)
1629
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1630
fraglen = datalen + fragheaderlen;
1631
pagedlen = 0;
1632
1633
alloc_extra = hh_len;
1634
alloc_extra += dst_exthdrlen;
1635
alloc_extra += rt->dst.trailer_len;
1636
1637
/* We just reserve space for fragment header.
1638
* Note: this may be overallocation if the message
1639
* (without MSG_MORE) fits into the MTU.
1640
*/
1641
alloc_extra += sizeof(struct frag_hdr);
1642
1643
if ((flags & MSG_MORE) &&
1644
!(rt->dst.dev->features&NETIF_F_SG))
1645
alloclen = mtu;
1646
else if (!paged &&
1647
(fraglen + alloc_extra < SKB_MAX_ALLOC ||
1648
!(rt->dst.dev->features & NETIF_F_SG)))
1649
alloclen = fraglen;
1650
else {
1651
alloclen = fragheaderlen + transhdrlen;
1652
pagedlen = datalen - transhdrlen;
1653
}
1654
alloclen += alloc_extra;
1655
1656
if (datalen != length + fraggap) {
1657
/*
1658
* this is not the last fragment, the trailer
1659
* space is regarded as data space.
1660
*/
1661
datalen += rt->dst.trailer_len;
1662
}
1663
1664
fraglen = datalen + fragheaderlen;
1665
1666
copy = datalen - transhdrlen - fraggap - pagedlen;
1667
/* [!] NOTE: copy may be negative if pagedlen>0
1668
* because then the equation may reduces to -fraggap.
1669
*/
1670
if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) {
1671
err = -EINVAL;
1672
goto error;
1673
}
1674
if (transhdrlen) {
1675
skb = sock_alloc_send_skb(sk, alloclen,
1676
(flags & MSG_DONTWAIT), &err);
1677
} else {
1678
skb = NULL;
1679
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1680
2 * sk->sk_sndbuf)
1681
skb = alloc_skb(alloclen,
1682
sk->sk_allocation);
1683
if (unlikely(!skb))
1684
err = -ENOBUFS;
1685
}
1686
if (!skb)
1687
goto error;
1688
/*
1689
* Fill in the control structures
1690
*/
1691
skb->protocol = htons(ETH_P_IPV6);
1692
skb->ip_summed = csummode;
1693
skb->csum = 0;
1694
/* reserve for fragmentation and ipsec header */
1695
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1696
dst_exthdrlen);
1697
1698
/*
1699
* Find where to start putting bytes
1700
*/
1701
data = skb_put(skb, fraglen - pagedlen);
1702
skb_set_network_header(skb, exthdrlen);
1703
data += fragheaderlen;
1704
skb->transport_header = (skb->network_header +
1705
fragheaderlen);
1706
if (fraggap) {
1707
skb->csum = skb_copy_and_csum_bits(
1708
skb_prev, maxfraglen,
1709
data + transhdrlen, fraggap);
1710
skb_prev->csum = csum_sub(skb_prev->csum,
1711
skb->csum);
1712
data += fraggap;
1713
pskb_trim_unique(skb_prev, maxfraglen);
1714
}
1715
if (copy > 0 &&
1716
INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
1717
from, data + transhdrlen, offset,
1718
copy, fraggap, skb) < 0) {
1719
err = -EFAULT;
1720
kfree_skb(skb);
1721
goto error;
1722
} else if (flags & MSG_SPLICE_PAGES) {
1723
copy = 0;
1724
}
1725
1726
offset += copy;
1727
length -= copy + transhdrlen;
1728
transhdrlen = 0;
1729
exthdrlen = 0;
1730
dst_exthdrlen = 0;
1731
1732
/* Only the initial fragment is time stamped */
1733
skb_shinfo(skb)->tx_flags = cork->tx_flags;
1734
cork->tx_flags = 0;
1735
skb_shinfo(skb)->tskey = tskey;
1736
tskey = 0;
1737
skb_zcopy_set(skb, uarg, &extra_uref);
1738
1739
if ((flags & MSG_CONFIRM) && !skb_prev)
1740
skb_set_dst_pending_confirm(skb, 1);
1741
1742
/*
1743
* Put the packet on the pending queue
1744
*/
1745
if (!skb->destructor) {
1746
skb->destructor = sock_wfree;
1747
skb->sk = sk;
1748
wmem_alloc_delta += skb->truesize;
1749
}
1750
__skb_queue_tail(queue, skb);
1751
continue;
1752
}
1753
1754
if (copy > length)
1755
copy = length;
1756
1757
if (!(rt->dst.dev->features&NETIF_F_SG) &&
1758
skb_tailroom(skb) >= copy) {
1759
unsigned int off;
1760
1761
off = skb->len;
1762
if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
1763
from, skb_put(skb, copy),
1764
offset, copy, off, skb) < 0) {
1765
__skb_trim(skb, off);
1766
err = -EFAULT;
1767
goto error;
1768
}
1769
} else if (flags & MSG_SPLICE_PAGES) {
1770
struct msghdr *msg = from;
1771
1772
err = -EIO;
1773
if (WARN_ON_ONCE(copy > msg->msg_iter.count))
1774
goto error;
1775
1776
err = skb_splice_from_iter(skb, &msg->msg_iter, copy);
1777
if (err < 0)
1778
goto error;
1779
copy = err;
1780
wmem_alloc_delta += copy;
1781
} else if (!zc) {
1782
int i = skb_shinfo(skb)->nr_frags;
1783
1784
err = -ENOMEM;
1785
if (!sk_page_frag_refill(sk, pfrag))
1786
goto error;
1787
1788
skb_zcopy_downgrade_managed(skb);
1789
if (!skb_can_coalesce(skb, i, pfrag->page,
1790
pfrag->offset)) {
1791
err = -EMSGSIZE;
1792
if (i == MAX_SKB_FRAGS)
1793
goto error;
1794
1795
__skb_fill_page_desc(skb, i, pfrag->page,
1796
pfrag->offset, 0);
1797
skb_shinfo(skb)->nr_frags = ++i;
1798
get_page(pfrag->page);
1799
}
1800
copy = min_t(int, copy, pfrag->size - pfrag->offset);
1801
if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
1802
from,
1803
page_address(pfrag->page) + pfrag->offset,
1804
offset, copy, skb->len, skb) < 0)
1805
goto error_efault;
1806
1807
pfrag->offset += copy;
1808
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1809
skb->len += copy;
1810
skb->data_len += copy;
1811
skb->truesize += copy;
1812
wmem_alloc_delta += copy;
1813
} else {
1814
err = skb_zerocopy_iter_dgram(skb, from, copy);
1815
if (err < 0)
1816
goto error;
1817
}
1818
offset += copy;
1819
length -= copy;
1820
}
1821
1822
if (wmem_alloc_delta)
1823
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1824
return 0;
1825
1826
error_efault:
1827
err = -EFAULT;
1828
error:
1829
net_zcopy_put_abort(uarg, extra_uref);
1830
cork->length -= length;
1831
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1832
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1833
if (hold_tskey)
1834
atomic_dec(&sk->sk_tskey);
1835
return err;
1836
}
1837
1838
int ip6_append_data(struct sock *sk,
1839
int getfrag(void *from, char *to, int offset, int len,
1840
int odd, struct sk_buff *skb),
1841
void *from, size_t length, int transhdrlen,
1842
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1843
struct rt6_info *rt, unsigned int flags)
1844
{
1845
struct inet_sock *inet = inet_sk(sk);
1846
struct ipv6_pinfo *np = inet6_sk(sk);
1847
int exthdrlen;
1848
int err;
1849
1850
if (flags&MSG_PROBE)
1851
return 0;
1852
if (skb_queue_empty(&sk->sk_write_queue)) {
1853
/*
1854
* setup for corking
1855
*/
1856
dst_hold(&rt->dst);
1857
err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1858
ipc6, rt);
1859
if (err)
1860
return err;
1861
1862
inet->cork.fl.u.ip6 = *fl6;
1863
exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1864
length += exthdrlen;
1865
transhdrlen += exthdrlen;
1866
} else {
1867
transhdrlen = 0;
1868
}
1869
1870
return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
1871
&np->cork, sk_page_frag(sk), getfrag,
1872
from, length, transhdrlen, flags);
1873
}
1874
EXPORT_SYMBOL_GPL(ip6_append_data);
1875
1876
static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
1877
{
1878
struct dst_entry *dst = cork->base.dst;
1879
1880
cork->base.dst = NULL;
1881
skb_dst_set(skb, dst);
1882
}
1883
1884
static void ip6_cork_release(struct inet_cork_full *cork,
1885
struct inet6_cork *v6_cork)
1886
{
1887
if (v6_cork->opt) {
1888
struct ipv6_txoptions *opt = v6_cork->opt;
1889
1890
kfree(opt->dst0opt);
1891
kfree(opt->dst1opt);
1892
kfree(opt->hopopt);
1893
kfree(opt->srcrt);
1894
kfree(opt);
1895
v6_cork->opt = NULL;
1896
}
1897
1898
if (cork->base.dst) {
1899
dst_release(cork->base.dst);
1900
cork->base.dst = NULL;
1901
}
1902
}
1903
1904
struct sk_buff *__ip6_make_skb(struct sock *sk,
1905
struct sk_buff_head *queue,
1906
struct inet_cork_full *cork,
1907
struct inet6_cork *v6_cork)
1908
{
1909
struct sk_buff *skb, *tmp_skb;
1910
struct sk_buff **tail_skb;
1911
struct in6_addr *final_dst;
1912
struct net *net = sock_net(sk);
1913
struct ipv6hdr *hdr;
1914
struct ipv6_txoptions *opt = v6_cork->opt;
1915
struct rt6_info *rt = dst_rt6_info(cork->base.dst);
1916
struct flowi6 *fl6 = &cork->fl.u.ip6;
1917
unsigned char proto = fl6->flowi6_proto;
1918
1919
skb = __skb_dequeue(queue);
1920
if (!skb)
1921
goto out;
1922
tail_skb = &(skb_shinfo(skb)->frag_list);
1923
1924
/* move skb->data to ip header from ext header */
1925
if (skb->data < skb_network_header(skb))
1926
__skb_pull(skb, skb_network_offset(skb));
1927
while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1928
__skb_pull(tmp_skb, skb_network_header_len(skb));
1929
*tail_skb = tmp_skb;
1930
tail_skb = &(tmp_skb->next);
1931
skb->len += tmp_skb->len;
1932
skb->data_len += tmp_skb->len;
1933
skb->truesize += tmp_skb->truesize;
1934
tmp_skb->destructor = NULL;
1935
tmp_skb->sk = NULL;
1936
}
1937
1938
/* Allow local fragmentation. */
1939
skb->ignore_df = ip6_sk_ignore_df(sk);
1940
__skb_pull(skb, skb_network_header_len(skb));
1941
1942
final_dst = &fl6->daddr;
1943
if (opt && opt->opt_flen)
1944
ipv6_push_frag_opts(skb, opt, &proto);
1945
if (opt && opt->opt_nflen)
1946
ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1947
1948
skb_push(skb, sizeof(struct ipv6hdr));
1949
skb_reset_network_header(skb);
1950
hdr = ipv6_hdr(skb);
1951
1952
ip6_flow_hdr(hdr, v6_cork->tclass,
1953
ip6_make_flowlabel(net, skb, fl6->flowlabel,
1954
ip6_autoflowlabel(net, sk), fl6));
1955
hdr->hop_limit = v6_cork->hop_limit;
1956
hdr->nexthdr = proto;
1957
hdr->saddr = fl6->saddr;
1958
hdr->daddr = *final_dst;
1959
1960
skb->priority = cork->base.priority;
1961
skb->mark = cork->base.mark;
1962
if (sk_is_tcp(sk))
1963
skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
1964
else
1965
skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid);
1966
1967
ip6_cork_steal_dst(skb, cork);
1968
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1969
if (proto == IPPROTO_ICMPV6) {
1970
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1971
u8 icmp6_type;
1972
1973
if (sk->sk_socket->type == SOCK_RAW &&
1974
!(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
1975
icmp6_type = fl6->fl6_icmp_type;
1976
else
1977
icmp6_type = icmp6_hdr(skb)->icmp6_type;
1978
ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1979
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1980
}
1981
1982
ip6_cork_release(cork, v6_cork);
1983
out:
1984
return skb;
1985
}
1986
1987
int ip6_send_skb(struct sk_buff *skb)
1988
{
1989
struct net *net = sock_net(skb->sk);
1990
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
1991
int err;
1992
1993
rcu_read_lock();
1994
err = ip6_local_out(net, skb->sk, skb);
1995
if (err) {
1996
if (err > 0)
1997
err = net_xmit_errno(err);
1998
if (err)
1999
IP6_INC_STATS(net, rt->rt6i_idev,
2000
IPSTATS_MIB_OUTDISCARDS);
2001
}
2002
2003
rcu_read_unlock();
2004
return err;
2005
}
2006
2007
int ip6_push_pending_frames(struct sock *sk)
2008
{
2009
struct sk_buff *skb;
2010
2011
skb = ip6_finish_skb(sk);
2012
if (!skb)
2013
return 0;
2014
2015
return ip6_send_skb(skb);
2016
}
2017
EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
2018
2019
static void __ip6_flush_pending_frames(struct sock *sk,
2020
struct sk_buff_head *queue,
2021
struct inet_cork_full *cork,
2022
struct inet6_cork *v6_cork)
2023
{
2024
struct sk_buff *skb;
2025
2026
while ((skb = __skb_dequeue_tail(queue)) != NULL) {
2027
if (skb_dst(skb))
2028
IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
2029
IPSTATS_MIB_OUTDISCARDS);
2030
kfree_skb(skb);
2031
}
2032
2033
ip6_cork_release(cork, v6_cork);
2034
}
2035
2036
void ip6_flush_pending_frames(struct sock *sk)
2037
{
2038
__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
2039
&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
2040
}
2041
EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
2042
2043
struct sk_buff *ip6_make_skb(struct sock *sk,
2044
int getfrag(void *from, char *to, int offset,
2045
int len, int odd, struct sk_buff *skb),
2046
void *from, size_t length, int transhdrlen,
2047
struct ipcm6_cookie *ipc6, struct rt6_info *rt,
2048
unsigned int flags, struct inet_cork_full *cork)
2049
{
2050
struct inet6_cork v6_cork;
2051
struct sk_buff_head queue;
2052
int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
2053
int err;
2054
2055
if (flags & MSG_PROBE) {
2056
dst_release(&rt->dst);
2057
return NULL;
2058
}
2059
2060
__skb_queue_head_init(&queue);
2061
2062
cork->base.flags = 0;
2063
cork->base.addr = 0;
2064
cork->base.opt = NULL;
2065
v6_cork.opt = NULL;
2066
err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
2067
if (err) {
2068
ip6_cork_release(cork, &v6_cork);
2069
return ERR_PTR(err);
2070
}
2071
2072
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
2073
&current->task_frag, getfrag, from,
2074
length + exthdrlen, transhdrlen + exthdrlen,
2075
flags);
2076
if (err) {
2077
__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2078
return ERR_PTR(err);
2079
}
2080
2081
return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2082
}
2083
2084