Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/net/ipv6/icmp.c
29265 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Internet Control Message Protocol (ICMPv6)
4
* Linux INET6 implementation
5
*
6
* Authors:
7
* Pedro Roque <[email protected]>
8
*
9
* Based on net/ipv4/icmp.c
10
*
11
* RFC 1885
12
*/
13
14
/*
15
* Changes:
16
*
17
* Andi Kleen : exception handling
18
* Andi Kleen add rate limits. never reply to a icmp.
19
* add more length checks and other fixes.
20
* yoshfuji : ensure to sent parameter problem for
21
* fragments.
22
* YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
23
* Randy Dunlap and
24
* YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
25
* Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
26
*/
27
28
#define pr_fmt(fmt) "IPv6: " fmt
29
30
#include <linux/module.h>
31
#include <linux/errno.h>
32
#include <linux/types.h>
33
#include <linux/socket.h>
34
#include <linux/in.h>
35
#include <linux/kernel.h>
36
#include <linux/sockios.h>
37
#include <linux/net.h>
38
#include <linux/skbuff.h>
39
#include <linux/init.h>
40
#include <linux/netfilter.h>
41
#include <linux/slab.h>
42
43
#ifdef CONFIG_SYSCTL
44
#include <linux/sysctl.h>
45
#endif
46
47
#include <linux/inet.h>
48
#include <linux/netdevice.h>
49
#include <linux/icmpv6.h>
50
51
#include <net/ip.h>
52
#include <net/sock.h>
53
54
#include <net/ipv6.h>
55
#include <net/ip6_checksum.h>
56
#include <net/ping.h>
57
#include <net/protocol.h>
58
#include <net/raw.h>
59
#include <net/rawv6.h>
60
#include <net/seg6.h>
61
#include <net/transp_v6.h>
62
#include <net/ip6_route.h>
63
#include <net/addrconf.h>
64
#include <net/icmp.h>
65
#include <net/xfrm.h>
66
#include <net/inet_common.h>
67
#include <net/dsfield.h>
68
#include <net/l3mdev.h>
69
70
#include <linux/uaccess.h>
71
72
static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73
74
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75
u8 type, u8 code, int offset, __be32 info)
76
{
77
/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78
struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79
struct net *net = dev_net_rcu(skb->dev);
80
81
if (type == ICMPV6_PKT_TOOBIG)
82
ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83
else if (type == NDISC_REDIRECT)
84
ip6_redirect(skb, net, skb->dev->ifindex, 0,
85
sock_net_uid(net, NULL));
86
87
if (!(type & ICMPV6_INFOMSG_MASK))
88
if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89
ping_err(skb, offset, ntohl(info));
90
91
return 0;
92
}
93
94
static int icmpv6_rcv(struct sk_buff *skb);
95
96
static const struct inet6_protocol icmpv6_protocol = {
97
.handler = icmpv6_rcv,
98
.err_handler = icmpv6_err,
99
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100
};
101
102
/* Called with BH disabled */
103
static struct sock *icmpv6_xmit_lock(struct net *net)
104
{
105
struct sock *sk;
106
107
sk = this_cpu_read(ipv6_icmp_sk);
108
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109
/* This can happen if the output path (f.e. SIT or
110
* ip6ip6 tunnel) signals dst_link_failure() for an
111
* outgoing ICMP6 packet.
112
*/
113
return NULL;
114
}
115
sock_net_set(sk, net);
116
return sk;
117
}
118
119
static void icmpv6_xmit_unlock(struct sock *sk)
120
{
121
sock_net_set(sk, &init_net);
122
spin_unlock(&sk->sk_lock.slock);
123
}
124
125
/*
126
* Figure out, may we reply to this packet with icmp error.
127
*
128
* We do not reply, if:
129
* - it was icmp error message.
130
* - it is truncated, so that it is known, that protocol is ICMPV6
131
* (i.e. in the middle of some exthdr)
132
*
133
* --ANK (980726)
134
*/
135
136
static bool is_ineligible(const struct sk_buff *skb)
137
{
138
int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139
int len = skb->len - ptr;
140
__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141
__be16 frag_off;
142
143
if (len < 0)
144
return true;
145
146
ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147
if (ptr < 0)
148
return false;
149
if (nexthdr == IPPROTO_ICMPV6) {
150
u8 _type, *tp;
151
tp = skb_header_pointer(skb,
152
ptr+offsetof(struct icmp6hdr, icmp6_type),
153
sizeof(_type), &_type);
154
155
/* Based on RFC 8200, Section 4.5 Fragment Header, return
156
* false if this is a fragment packet with no icmp header info.
157
*/
158
if (!tp && frag_off != 0)
159
return false;
160
else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161
return true;
162
}
163
return false;
164
}
165
166
static bool icmpv6_mask_allow(struct net *net, int type)
167
{
168
if (type > ICMPV6_MSG_MAX)
169
return true;
170
171
/* Limit if icmp type is set in ratemask. */
172
if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173
return true;
174
175
return false;
176
}
177
178
static bool icmpv6_global_allow(struct net *net, int type,
179
bool *apply_ratelimit)
180
{
181
if (icmpv6_mask_allow(net, type))
182
return true;
183
184
if (icmp_global_allow(net)) {
185
*apply_ratelimit = true;
186
return true;
187
}
188
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
189
return false;
190
}
191
192
/*
193
* Check the ICMP output rate limit
194
*/
195
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
196
struct flowi6 *fl6, bool apply_ratelimit)
197
{
198
struct net *net = sock_net(sk);
199
struct net_device *dev;
200
struct dst_entry *dst;
201
bool res = false;
202
203
if (!apply_ratelimit)
204
return true;
205
206
/*
207
* Look up the output route.
208
* XXX: perhaps the expire for routing entries cloned by
209
* this lookup should be more aggressive (not longer than timeout).
210
*/
211
dst = ip6_route_output(net, sk, fl6);
212
rcu_read_lock();
213
dev = dst_dev_rcu(dst);
214
if (dst->error) {
215
IP6_INC_STATS(net, ip6_dst_idev(dst),
216
IPSTATS_MIB_OUTNOROUTES);
217
} else if (dev && (dev->flags & IFF_LOOPBACK)) {
218
res = true;
219
} else {
220
struct rt6_info *rt = dst_rt6_info(dst);
221
int tmo = net->ipv6.sysctl.icmpv6_time;
222
struct inet_peer *peer;
223
224
/* Give more bandwidth to wider prefixes. */
225
if (rt->rt6i_dst.plen < 128)
226
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
227
228
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
229
res = inet_peer_xrlim_allow(peer, tmo);
230
}
231
rcu_read_unlock();
232
if (!res)
233
__ICMP6_INC_STATS(net, NULL, ICMP6_MIB_RATELIMITHOST);
234
else
235
icmp_global_consume(net);
236
dst_release(dst);
237
return res;
238
}
239
240
static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
241
struct flowi6 *fl6)
242
{
243
struct net *net = sock_net(sk);
244
struct dst_entry *dst;
245
bool res = false;
246
247
dst = ip6_route_output(net, sk, fl6);
248
if (!dst->error) {
249
struct rt6_info *rt = dst_rt6_info(dst);
250
struct in6_addr prefsrc;
251
252
rt6_get_prefsrc(rt, &prefsrc);
253
res = !ipv6_addr_any(&prefsrc);
254
}
255
dst_release(dst);
256
return res;
257
}
258
259
/*
260
* an inline helper for the "simple" if statement below
261
* checks if parameter problem report is caused by an
262
* unrecognized IPv6 option that has the Option Type
263
* highest-order two bits set to 10
264
*/
265
266
static bool opt_unrec(struct sk_buff *skb, __u32 offset)
267
{
268
u8 _optval, *op;
269
270
offset += skb_network_offset(skb);
271
op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
272
if (!op)
273
return true;
274
return (*op & 0xC0) == 0x80;
275
}
276
277
void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
278
struct icmp6hdr *thdr, int len)
279
{
280
struct sk_buff *skb;
281
struct icmp6hdr *icmp6h;
282
283
skb = skb_peek(&sk->sk_write_queue);
284
if (!skb)
285
return;
286
287
icmp6h = icmp6_hdr(skb);
288
memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
289
icmp6h->icmp6_cksum = 0;
290
291
if (skb_queue_len(&sk->sk_write_queue) == 1) {
292
skb->csum = csum_partial(icmp6h,
293
sizeof(struct icmp6hdr), skb->csum);
294
icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
295
&fl6->daddr,
296
len, fl6->flowi6_proto,
297
skb->csum);
298
} else {
299
__wsum tmp_csum = 0;
300
301
skb_queue_walk(&sk->sk_write_queue, skb) {
302
tmp_csum = csum_add(tmp_csum, skb->csum);
303
}
304
305
tmp_csum = csum_partial(icmp6h,
306
sizeof(struct icmp6hdr), tmp_csum);
307
icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
308
&fl6->daddr,
309
len, fl6->flowi6_proto,
310
tmp_csum);
311
}
312
ip6_push_pending_frames(sk);
313
}
314
315
struct icmpv6_msg {
316
struct sk_buff *skb;
317
int offset;
318
uint8_t type;
319
};
320
321
static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
322
{
323
struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
324
struct sk_buff *org_skb = msg->skb;
325
__wsum csum;
326
327
csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
328
to, len);
329
skb->csum = csum_block_add(skb->csum, csum, odd);
330
if (!(msg->type & ICMPV6_INFOMSG_MASK))
331
nf_ct_attach(skb, org_skb);
332
return 0;
333
}
334
335
#if IS_ENABLED(CONFIG_IPV6_MIP6)
336
static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
337
{
338
struct ipv6hdr *iph = ipv6_hdr(skb);
339
struct ipv6_destopt_hao *hao;
340
int off;
341
342
if (opt->dsthao) {
343
off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
344
if (likely(off >= 0)) {
345
hao = (struct ipv6_destopt_hao *)
346
(skb_network_header(skb) + off);
347
swap(iph->saddr, hao->addr);
348
}
349
}
350
}
351
#else
352
static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
353
#endif
354
355
static struct dst_entry *icmpv6_route_lookup(struct net *net,
356
struct sk_buff *skb,
357
struct sock *sk,
358
struct flowi6 *fl6)
359
{
360
struct dst_entry *dst, *dst2;
361
struct flowi6 fl2;
362
int err;
363
364
err = ip6_dst_lookup(net, sk, &dst, fl6);
365
if (err)
366
return ERR_PTR(err);
367
368
/*
369
* We won't send icmp if the destination is known
370
* anycast unless we need to treat anycast as unicast.
371
*/
372
if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
373
ipv6_anycast_destination(dst, &fl6->daddr)) {
374
net_dbg_ratelimited("icmp6_send: acast source\n");
375
dst_release(dst);
376
return ERR_PTR(-EINVAL);
377
}
378
379
/* No need to clone since we're just using its address. */
380
dst2 = dst;
381
382
dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
383
if (!IS_ERR(dst)) {
384
if (dst != dst2)
385
return dst;
386
} else {
387
if (PTR_ERR(dst) == -EPERM)
388
dst = NULL;
389
else
390
return dst;
391
}
392
393
err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
394
if (err)
395
goto relookup_failed;
396
397
err = ip6_dst_lookup(net, sk, &dst2, &fl2);
398
if (err)
399
goto relookup_failed;
400
401
dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
402
if (!IS_ERR(dst2)) {
403
dst_release(dst);
404
dst = dst2;
405
} else {
406
err = PTR_ERR(dst2);
407
if (err == -EPERM) {
408
dst_release(dst);
409
return dst2;
410
} else
411
goto relookup_failed;
412
}
413
414
relookup_failed:
415
if (dst)
416
return dst;
417
return ERR_PTR(err);
418
}
419
420
static struct net_device *icmp6_dev(const struct sk_buff *skb)
421
{
422
struct net_device *dev = skb->dev;
423
424
/* for local traffic to local address, skb dev is the loopback
425
* device. Check if there is a dst attached to the skb and if so
426
* get the real device index. Same is needed for replies to a link
427
* local address on a device enslaved to an L3 master device
428
*/
429
if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
430
const struct rt6_info *rt6 = skb_rt6_info(skb);
431
432
/* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
433
* and ip6_null_entry could be set to skb if no route is found.
434
*/
435
if (rt6 && rt6->rt6i_idev)
436
dev = rt6->rt6i_idev->dev;
437
}
438
439
return dev;
440
}
441
442
static int icmp6_iif(const struct sk_buff *skb)
443
{
444
return icmp6_dev(skb)->ifindex;
445
}
446
447
/*
448
* Send an ICMP message in response to a packet in error
449
*/
450
void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
451
const struct in6_addr *force_saddr,
452
const struct inet6_skb_parm *parm)
453
{
454
struct inet6_dev *idev = NULL;
455
struct ipv6hdr *hdr = ipv6_hdr(skb);
456
struct sock *sk;
457
struct net *net;
458
struct ipv6_pinfo *np;
459
const struct in6_addr *saddr = NULL;
460
bool apply_ratelimit = false;
461
struct dst_entry *dst;
462
struct icmp6hdr tmp_hdr;
463
struct flowi6 fl6;
464
struct icmpv6_msg msg;
465
struct ipcm6_cookie ipc6;
466
int iif = 0;
467
int addr_type = 0;
468
int len;
469
u32 mark;
470
471
if ((u8 *)hdr < skb->head ||
472
(skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
473
return;
474
475
if (!skb->dev)
476
return;
477
478
rcu_read_lock();
479
480
net = dev_net_rcu(skb->dev);
481
mark = IP6_REPLY_MARK(net, skb->mark);
482
/*
483
* Make sure we respect the rules
484
* i.e. RFC 1885 2.4(e)
485
* Rule (e.1) is enforced by not using icmp6_send
486
* in any code that processes icmp errors.
487
*/
488
addr_type = ipv6_addr_type(&hdr->daddr);
489
490
if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
491
ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
492
saddr = &hdr->daddr;
493
494
/*
495
* Dest addr check
496
*/
497
498
if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
499
if (type != ICMPV6_PKT_TOOBIG &&
500
!(type == ICMPV6_PARAMPROB &&
501
code == ICMPV6_UNK_OPTION &&
502
(opt_unrec(skb, info))))
503
goto out;
504
505
saddr = NULL;
506
}
507
508
addr_type = ipv6_addr_type(&hdr->saddr);
509
510
/*
511
* Source addr check
512
*/
513
514
if (__ipv6_addr_needs_scope_id(addr_type)) {
515
iif = icmp6_iif(skb);
516
} else {
517
/*
518
* The source device is used for looking up which routing table
519
* to use for sending an ICMP error.
520
*/
521
iif = l3mdev_master_ifindex(skb->dev);
522
}
523
524
/*
525
* Must not send error if the source does not uniquely
526
* identify a single node (RFC2463 Section 2.4).
527
* We check unspecified / multicast addresses here,
528
* and anycast addresses will be checked later.
529
*/
530
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
531
net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
532
&hdr->saddr, &hdr->daddr);
533
goto out;
534
}
535
536
/*
537
* Never answer to a ICMP packet.
538
*/
539
if (is_ineligible(skb)) {
540
net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
541
&hdr->saddr, &hdr->daddr);
542
goto out;
543
}
544
545
/* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
546
local_bh_disable();
547
548
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
549
if (!(skb->dev->flags & IFF_LOOPBACK) &&
550
!icmpv6_global_allow(net, type, &apply_ratelimit))
551
goto out_bh_enable;
552
553
mip6_addr_swap(skb, parm);
554
555
sk = icmpv6_xmit_lock(net);
556
if (!sk)
557
goto out_bh_enable;
558
559
memset(&fl6, 0, sizeof(fl6));
560
fl6.flowi6_proto = IPPROTO_ICMPV6;
561
fl6.daddr = hdr->saddr;
562
if (force_saddr)
563
saddr = force_saddr;
564
if (saddr) {
565
fl6.saddr = *saddr;
566
} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
567
/* select a more meaningful saddr from input if */
568
struct net_device *in_netdev;
569
570
in_netdev = dev_get_by_index(net, parm->iif);
571
if (in_netdev) {
572
ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
573
inet6_sk(sk)->srcprefs,
574
&fl6.saddr);
575
dev_put(in_netdev);
576
}
577
}
578
fl6.flowi6_mark = mark;
579
fl6.flowi6_oif = iif;
580
fl6.fl6_icmp_type = type;
581
fl6.fl6_icmp_code = code;
582
fl6.flowi6_uid = sock_net_uid(net, NULL);
583
fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
584
security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
585
586
np = inet6_sk(sk);
587
588
if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
589
goto out_unlock;
590
591
tmp_hdr.icmp6_type = type;
592
tmp_hdr.icmp6_code = code;
593
tmp_hdr.icmp6_cksum = 0;
594
tmp_hdr.icmp6_pointer = htonl(info);
595
596
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
597
fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
598
else if (!fl6.flowi6_oif)
599
fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
600
601
ipcm6_init_sk(&ipc6, sk);
602
ipc6.sockc.mark = mark;
603
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
604
605
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
606
if (IS_ERR(dst))
607
goto out_unlock;
608
609
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
610
611
msg.skb = skb;
612
msg.offset = skb_network_offset(skb);
613
msg.type = type;
614
615
len = skb->len - msg.offset;
616
len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
617
if (len < 0) {
618
net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
619
&hdr->saddr, &hdr->daddr);
620
goto out_dst_release;
621
}
622
623
idev = __in6_dev_get(skb->dev);
624
625
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
626
len + sizeof(struct icmp6hdr),
627
sizeof(struct icmp6hdr),
628
&ipc6, &fl6, dst_rt6_info(dst),
629
MSG_DONTWAIT)) {
630
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
631
ip6_flush_pending_frames(sk);
632
} else {
633
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
634
len + sizeof(struct icmp6hdr));
635
}
636
637
out_dst_release:
638
dst_release(dst);
639
out_unlock:
640
icmpv6_xmit_unlock(sk);
641
out_bh_enable:
642
local_bh_enable();
643
out:
644
rcu_read_unlock();
645
}
646
EXPORT_SYMBOL(icmp6_send);
647
648
/* Slightly more convenient version of icmp6_send with drop reasons.
649
*/
650
void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
651
enum skb_drop_reason reason)
652
{
653
icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
654
kfree_skb_reason(skb, reason);
655
}
656
657
/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
658
* if sufficient data bytes are available
659
* @nhs is the size of the tunnel header(s) :
660
* Either an IPv4 header for SIT encap
661
* an IPv4 header + GRE header for GRE encap
662
*/
663
int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
664
unsigned int data_len)
665
{
666
struct in6_addr temp_saddr;
667
struct rt6_info *rt;
668
struct sk_buff *skb2;
669
u32 info = 0;
670
671
if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
672
return 1;
673
674
/* RFC 4884 (partial) support for ICMP extensions */
675
if (data_len < 128 || (data_len & 7) || skb->len < data_len)
676
data_len = 0;
677
678
skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
679
680
if (!skb2)
681
return 1;
682
683
skb_dst_drop(skb2);
684
skb_pull(skb2, nhs);
685
skb_reset_network_header(skb2);
686
687
rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr,
688
NULL, 0, skb, 0);
689
690
if (rt && rt->dst.dev)
691
skb2->dev = rt->dst.dev;
692
693
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
694
695
if (data_len) {
696
/* RFC 4884 (partial) support :
697
* insert 0 padding at the end, before the extensions
698
*/
699
__skb_push(skb2, nhs);
700
skb_reset_network_header(skb2);
701
memmove(skb2->data, skb2->data + nhs, data_len - nhs);
702
memset(skb2->data + data_len - nhs, 0, nhs);
703
/* RFC 4884 4.5 : Length is measured in 64-bit words,
704
* and stored in reserved[0]
705
*/
706
info = (data_len/8) << 24;
707
}
708
if (type == ICMP_TIME_EXCEEDED)
709
icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
710
info, &temp_saddr, IP6CB(skb2));
711
else
712
icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
713
info, &temp_saddr, IP6CB(skb2));
714
if (rt)
715
ip6_rt_put(rt);
716
717
kfree_skb(skb2);
718
719
return 0;
720
}
721
EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
722
723
static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
724
{
725
struct net *net = dev_net_rcu(skb->dev);
726
struct sock *sk;
727
struct inet6_dev *idev;
728
struct ipv6_pinfo *np;
729
const struct in6_addr *saddr = NULL;
730
struct icmp6hdr *icmph = icmp6_hdr(skb);
731
bool apply_ratelimit = false;
732
struct icmp6hdr tmp_hdr;
733
struct flowi6 fl6;
734
struct icmpv6_msg msg;
735
struct dst_entry *dst;
736
struct ipcm6_cookie ipc6;
737
u32 mark = IP6_REPLY_MARK(net, skb->mark);
738
SKB_DR(reason);
739
bool acast;
740
u8 type;
741
742
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
743
net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
744
return reason;
745
746
saddr = &ipv6_hdr(skb)->daddr;
747
748
acast = ipv6_anycast_destination(skb_dst(skb), saddr);
749
if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
750
return reason;
751
752
if (!ipv6_unicast_destination(skb) &&
753
!(net->ipv6.sysctl.anycast_src_echo_reply && acast))
754
saddr = NULL;
755
756
if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
757
type = ICMPV6_EXT_ECHO_REPLY;
758
else
759
type = ICMPV6_ECHO_REPLY;
760
761
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
762
tmp_hdr.icmp6_type = type;
763
764
memset(&fl6, 0, sizeof(fl6));
765
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
766
fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
767
768
fl6.flowi6_proto = IPPROTO_ICMPV6;
769
fl6.daddr = ipv6_hdr(skb)->saddr;
770
if (saddr)
771
fl6.saddr = *saddr;
772
fl6.flowi6_oif = icmp6_iif(skb);
773
fl6.fl6_icmp_type = type;
774
fl6.flowi6_mark = mark;
775
fl6.flowi6_uid = sock_net_uid(net, NULL);
776
security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
777
778
local_bh_disable();
779
sk = icmpv6_xmit_lock(net);
780
if (!sk)
781
goto out_bh_enable;
782
np = inet6_sk(sk);
783
784
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
785
fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
786
else if (!fl6.flowi6_oif)
787
fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
788
789
if (ip6_dst_lookup(net, sk, &dst, &fl6))
790
goto out;
791
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
792
if (IS_ERR(dst))
793
goto out;
794
795
/* Check the ratelimit */
796
if ((!(skb->dev->flags & IFF_LOOPBACK) &&
797
!icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
798
!icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
799
goto out_dst_release;
800
801
idev = __in6_dev_get(skb->dev);
802
803
msg.skb = skb;
804
msg.offset = 0;
805
msg.type = type;
806
807
ipcm6_init_sk(&ipc6, sk);
808
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
809
ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
810
ipc6.sockc.mark = mark;
811
812
if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
813
if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
814
goto out_dst_release;
815
816
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
817
skb->len + sizeof(struct icmp6hdr),
818
sizeof(struct icmp6hdr), &ipc6, &fl6,
819
dst_rt6_info(dst), MSG_DONTWAIT)) {
820
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
821
ip6_flush_pending_frames(sk);
822
} else {
823
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
824
skb->len + sizeof(struct icmp6hdr));
825
reason = SKB_CONSUMED;
826
}
827
out_dst_release:
828
dst_release(dst);
829
out:
830
icmpv6_xmit_unlock(sk);
831
out_bh_enable:
832
local_bh_enable();
833
return reason;
834
}
835
836
enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
837
u8 code, __be32 info)
838
{
839
struct inet6_skb_parm *opt = IP6CB(skb);
840
struct net *net = dev_net_rcu(skb->dev);
841
const struct inet6_protocol *ipprot;
842
enum skb_drop_reason reason;
843
int inner_offset;
844
__be16 frag_off;
845
u8 nexthdr;
846
847
reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
848
if (reason != SKB_NOT_DROPPED_YET)
849
goto out;
850
851
seg6_icmp_srh(skb, opt);
852
853
nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
854
if (ipv6_ext_hdr(nexthdr)) {
855
/* now skip over extension headers */
856
inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
857
&nexthdr, &frag_off);
858
if (inner_offset < 0) {
859
SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
860
goto out;
861
}
862
} else {
863
inner_offset = sizeof(struct ipv6hdr);
864
}
865
866
/* Checkin header including 8 bytes of inner protocol header. */
867
reason = pskb_may_pull_reason(skb, inner_offset + 8);
868
if (reason != SKB_NOT_DROPPED_YET)
869
goto out;
870
871
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
872
Without this we will not able f.e. to make source routed
873
pmtu discovery.
874
Corresponding argument (opt) to notifiers is already added.
875
--ANK (980726)
876
*/
877
878
ipprot = rcu_dereference(inet6_protos[nexthdr]);
879
if (ipprot && ipprot->err_handler)
880
ipprot->err_handler(skb, opt, type, code, inner_offset, info);
881
882
raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
883
return SKB_CONSUMED;
884
885
out:
886
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
887
return reason;
888
}
889
890
/*
891
* Handle icmp messages
892
*/
893
894
static int icmpv6_rcv(struct sk_buff *skb)
895
{
896
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
897
struct net *net = dev_net_rcu(skb->dev);
898
struct net_device *dev = icmp6_dev(skb);
899
struct inet6_dev *idev = __in6_dev_get(dev);
900
const struct in6_addr *saddr, *daddr;
901
struct icmp6hdr *hdr;
902
u8 type;
903
904
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
905
struct sec_path *sp = skb_sec_path(skb);
906
int nh;
907
908
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
909
XFRM_STATE_ICMP)) {
910
reason = SKB_DROP_REASON_XFRM_POLICY;
911
goto drop_no_count;
912
}
913
914
if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
915
goto drop_no_count;
916
917
nh = skb_network_offset(skb);
918
skb_set_network_header(skb, sizeof(*hdr));
919
920
if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
921
skb)) {
922
reason = SKB_DROP_REASON_XFRM_POLICY;
923
goto drop_no_count;
924
}
925
926
skb_set_network_header(skb, nh);
927
}
928
929
__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
930
931
saddr = &ipv6_hdr(skb)->saddr;
932
daddr = &ipv6_hdr(skb)->daddr;
933
934
if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
935
net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
936
saddr, daddr);
937
goto csum_error;
938
}
939
940
if (!pskb_pull(skb, sizeof(*hdr)))
941
goto discard_it;
942
943
hdr = icmp6_hdr(skb);
944
945
type = hdr->icmp6_type;
946
947
ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
948
949
switch (type) {
950
case ICMPV6_ECHO_REQUEST:
951
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
952
reason = icmpv6_echo_reply(skb);
953
break;
954
case ICMPV6_EXT_ECHO_REQUEST:
955
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
956
READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
957
reason = icmpv6_echo_reply(skb);
958
break;
959
960
case ICMPV6_ECHO_REPLY:
961
case ICMPV6_EXT_ECHO_REPLY:
962
ping_rcv(skb);
963
return 0;
964
965
case ICMPV6_PKT_TOOBIG:
966
/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
967
standard destination cache. Seems, only "advanced"
968
destination cache will allow to solve this problem
969
--ANK (980726)
970
*/
971
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
972
goto discard_it;
973
hdr = icmp6_hdr(skb);
974
975
/* to notify */
976
fallthrough;
977
case ICMPV6_DEST_UNREACH:
978
case ICMPV6_TIME_EXCEED:
979
case ICMPV6_PARAMPROB:
980
reason = icmpv6_notify(skb, type, hdr->icmp6_code,
981
hdr->icmp6_mtu);
982
break;
983
984
case NDISC_ROUTER_SOLICITATION:
985
case NDISC_ROUTER_ADVERTISEMENT:
986
case NDISC_NEIGHBOUR_SOLICITATION:
987
case NDISC_NEIGHBOUR_ADVERTISEMENT:
988
case NDISC_REDIRECT:
989
reason = ndisc_rcv(skb);
990
break;
991
992
case ICMPV6_MGM_QUERY:
993
igmp6_event_query(skb);
994
return 0;
995
996
case ICMPV6_MGM_REPORT:
997
igmp6_event_report(skb);
998
return 0;
999
1000
case ICMPV6_MGM_REDUCTION:
1001
case ICMPV6_NI_QUERY:
1002
case ICMPV6_NI_REPLY:
1003
case ICMPV6_MLD2_REPORT:
1004
case ICMPV6_DHAAD_REQUEST:
1005
case ICMPV6_DHAAD_REPLY:
1006
case ICMPV6_MOBILE_PREFIX_SOL:
1007
case ICMPV6_MOBILE_PREFIX_ADV:
1008
break;
1009
1010
default:
1011
/* informational */
1012
if (type & ICMPV6_INFOMSG_MASK)
1013
break;
1014
1015
net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1016
saddr, daddr);
1017
1018
/*
1019
* error of unknown type.
1020
* must pass to upper level
1021
*/
1022
1023
reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1024
hdr->icmp6_mtu);
1025
}
1026
1027
/* until the v6 path can be better sorted assume failure and
1028
* preserve the status quo behaviour for the rest of the paths to here
1029
*/
1030
if (reason)
1031
kfree_skb_reason(skb, reason);
1032
else
1033
consume_skb(skb);
1034
1035
return 0;
1036
1037
csum_error:
1038
reason = SKB_DROP_REASON_ICMP_CSUM;
1039
__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
1040
discard_it:
1041
__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
1042
drop_no_count:
1043
kfree_skb_reason(skb, reason);
1044
return 0;
1045
}
1046
1047
void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1048
const struct in6_addr *saddr,
1049
const struct in6_addr *daddr, int oif)
1050
{
1051
memset(fl6, 0, sizeof(*fl6));
1052
fl6->saddr = *saddr;
1053
fl6->daddr = *daddr;
1054
fl6->flowi6_proto = IPPROTO_ICMPV6;
1055
fl6->fl6_icmp_type = type;
1056
fl6->fl6_icmp_code = 0;
1057
fl6->flowi6_oif = oif;
1058
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1059
}
1060
1061
int __init icmpv6_init(void)
1062
{
1063
struct sock *sk;
1064
int err, i;
1065
1066
for_each_possible_cpu(i) {
1067
err = inet_ctl_sock_create(&sk, PF_INET6,
1068
SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1069
if (err < 0) {
1070
pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1071
err);
1072
return err;
1073
}
1074
1075
per_cpu(ipv6_icmp_sk, i) = sk;
1076
1077
/* Enough space for 2 64K ICMP packets, including
1078
* sk_buff struct overhead.
1079
*/
1080
sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1081
}
1082
1083
err = -EAGAIN;
1084
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1085
goto fail;
1086
1087
err = inet6_register_icmp_sender(icmp6_send);
1088
if (err)
1089
goto sender_reg_err;
1090
return 0;
1091
1092
sender_reg_err:
1093
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1094
fail:
1095
pr_err("Failed to register ICMP6 protocol\n");
1096
return err;
1097
}
1098
1099
void icmpv6_cleanup(void)
1100
{
1101
inet6_unregister_icmp_sender(icmp6_send);
1102
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1103
}
1104
1105
1106
static const struct icmp6_err {
1107
int err;
1108
int fatal;
1109
} tab_unreach[] = {
1110
{ /* NOROUTE */
1111
.err = ENETUNREACH,
1112
.fatal = 0,
1113
},
1114
{ /* ADM_PROHIBITED */
1115
.err = EACCES,
1116
.fatal = 1,
1117
},
1118
{ /* Was NOT_NEIGHBOUR, now reserved */
1119
.err = EHOSTUNREACH,
1120
.fatal = 0,
1121
},
1122
{ /* ADDR_UNREACH */
1123
.err = EHOSTUNREACH,
1124
.fatal = 0,
1125
},
1126
{ /* PORT_UNREACH */
1127
.err = ECONNREFUSED,
1128
.fatal = 1,
1129
},
1130
{ /* POLICY_FAIL */
1131
.err = EACCES,
1132
.fatal = 1,
1133
},
1134
{ /* REJECT_ROUTE */
1135
.err = EACCES,
1136
.fatal = 1,
1137
},
1138
};
1139
1140
int icmpv6_err_convert(u8 type, u8 code, int *err)
1141
{
1142
int fatal = 0;
1143
1144
*err = EPROTO;
1145
1146
switch (type) {
1147
case ICMPV6_DEST_UNREACH:
1148
fatal = 1;
1149
if (code < ARRAY_SIZE(tab_unreach)) {
1150
*err = tab_unreach[code].err;
1151
fatal = tab_unreach[code].fatal;
1152
}
1153
break;
1154
1155
case ICMPV6_PKT_TOOBIG:
1156
*err = EMSGSIZE;
1157
break;
1158
1159
case ICMPV6_PARAMPROB:
1160
*err = EPROTO;
1161
fatal = 1;
1162
break;
1163
1164
case ICMPV6_TIME_EXCEED:
1165
*err = EHOSTUNREACH;
1166
break;
1167
}
1168
1169
return fatal;
1170
}
1171
EXPORT_SYMBOL(icmpv6_err_convert);
1172
1173
#ifdef CONFIG_SYSCTL
1174
static struct ctl_table ipv6_icmp_table_template[] = {
1175
{
1176
.procname = "ratelimit",
1177
.data = &init_net.ipv6.sysctl.icmpv6_time,
1178
.maxlen = sizeof(int),
1179
.mode = 0644,
1180
.proc_handler = proc_dointvec_ms_jiffies,
1181
},
1182
{
1183
.procname = "echo_ignore_all",
1184
.data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1185
.maxlen = sizeof(u8),
1186
.mode = 0644,
1187
.proc_handler = proc_dou8vec_minmax,
1188
},
1189
{
1190
.procname = "echo_ignore_multicast",
1191
.data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1192
.maxlen = sizeof(u8),
1193
.mode = 0644,
1194
.proc_handler = proc_dou8vec_minmax,
1195
},
1196
{
1197
.procname = "echo_ignore_anycast",
1198
.data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1199
.maxlen = sizeof(u8),
1200
.mode = 0644,
1201
.proc_handler = proc_dou8vec_minmax,
1202
},
1203
{
1204
.procname = "ratemask",
1205
.data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1206
.maxlen = ICMPV6_MSG_MAX + 1,
1207
.mode = 0644,
1208
.proc_handler = proc_do_large_bitmap,
1209
},
1210
{
1211
.procname = "error_anycast_as_unicast",
1212
.data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1213
.maxlen = sizeof(u8),
1214
.mode = 0644,
1215
.proc_handler = proc_dou8vec_minmax,
1216
.extra1 = SYSCTL_ZERO,
1217
.extra2 = SYSCTL_ONE,
1218
},
1219
};
1220
1221
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1222
{
1223
struct ctl_table *table;
1224
1225
table = kmemdup(ipv6_icmp_table_template,
1226
sizeof(ipv6_icmp_table_template),
1227
GFP_KERNEL);
1228
1229
if (table) {
1230
table[0].data = &net->ipv6.sysctl.icmpv6_time;
1231
table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1232
table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1233
table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1234
table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1235
table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1236
}
1237
return table;
1238
}
1239
1240
size_t ipv6_icmp_sysctl_table_size(void)
1241
{
1242
return ARRAY_SIZE(ipv6_icmp_table_template);
1243
}
1244
#endif
1245
1246