1 /*
2 * Copyright (c) 2004-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 */
61
62 #include <kern/debug.h>
63 #include <netinet/in_arp.h>
64 #include <sys/types.h>
65 #include <sys/param.h>
66 #include <sys/kernel_types.h>
67 #include <sys/syslog.h>
68 #include <sys/systm.h>
69 #include <sys/time.h>
70 #include <sys/kernel.h>
71 #include <sys/mbuf.h>
72 #include <sys/sysctl.h>
73 #include <sys/mcache.h>
74 #include <sys/protosw.h>
75 #include <string.h>
76 #include <net/if_arp.h>
77 #include <net/if_dl.h>
78 #include <net/dlil.h>
79 #include <net/if_types.h>
80 #include <net/if_llreach.h>
81 #include <net/route.h>
82 #include <net/nwk_wq.h>
83
84 #include <netinet/if_ether.h>
85 #include <netinet/in_var.h>
86 #include <netinet/ip.h>
87 #include <netinet/ip6.h>
88 #include <kern/zalloc.h>
89
90 #include <kern/thread.h>
91 #include <kern/sched_prim.h>
92
93 #define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen))
94
95 static const size_t MAX_HW_LEN = 10;
96
97 /*
98 * Synchronization notes:
99 *
100 * The global list of ARP entries are stored in llinfo_arp; an entry
101 * gets inserted into the list when the route is created and gets
102 * removed from the list when it is deleted; this is done as part
103 * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in arp_rtrequest().
104 *
105 * Because rnh_lock and rt_lock for the entry are held during those
106 * operations, the same locks (and thus lock ordering) must be used
107 * elsewhere to access the relevant data structure fields:
108 *
109 * la_le.{le_next,le_prev}, la_rt
110 *
111 * - Routing lock (rnh_lock)
112 *
113 * la_holdq, la_asked, la_llreach, la_lastused, la_flags
114 *
115 * - Routing entry lock (rt_lock)
116 *
117 * Due to the dependency on rt_lock, llinfo_arp has the same lifetime
118 * as the route entry itself. When a route is deleted (RTM_DELETE),
119 * it is simply removed from the global list but the memory is not
120 * freed until the route itself is freed.
121 */
122 struct llinfo_arp {
123 /*
124 * The following are protected by rnh_lock
125 */
126 LIST_ENTRY(llinfo_arp) la_le;
127 struct rtentry *la_rt;
128 /*
129 * The following are protected by rt_lock
130 */
131 class_queue_t la_holdq; /* packets awaiting resolution */
132 struct if_llreach *la_llreach; /* link-layer reachability record */
133 u_int64_t la_lastused; /* last used timestamp */
134 u_int32_t la_asked; /* # of requests sent */
135 u_int32_t la_maxtries; /* retry limit */
136 u_int64_t la_probeexp; /* probe deadline timestamp */
137 u_int32_t la_prbreq_cnt; /* probe request count */
138 u_int32_t la_flags;
139 #define LLINFO_RTRFAIL_EVTSENT 0x1 /* sent an ARP event */
140 #define LLINFO_PROBING 0x2 /* waiting for an ARP reply */
141 };
142
143 static LIST_HEAD(, llinfo_arp) llinfo_arp;
144
145 static thread_call_t arp_timeout_tcall;
146 static int arp_timeout_run; /* arp_timeout is scheduled to run */
147 static void arp_timeout(thread_call_param_t arg0, thread_call_param_t arg1);
148 static void arp_sched_timeout(struct timeval *);
149
150 static thread_call_t arp_probe_tcall;
151 static int arp_probe_run; /* arp_probe is scheduled to run */
152 static void arp_probe(thread_call_param_t arg0, thread_call_param_t arg1);
153 static void arp_sched_probe(struct timeval *);
154
155 static void arptfree(struct llinfo_arp *, void *);
156 static errno_t arp_lookup_route(const struct in_addr *, int,
157 int, route_t *, unsigned int);
158 static int arp_getstat SYSCTL_HANDLER_ARGS;
159
160 static struct llinfo_arp *arp_llinfo_alloc(zalloc_flags_t);
161 static void arp_llinfo_free(void *);
162 static uint32_t arp_llinfo_flushq(struct llinfo_arp *);
163 static void arp_llinfo_purge(struct rtentry *);
164 static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
165 static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
166 static void arp_llinfo_refresh(struct rtentry *);
167
168 static __inline void arp_llreach_use(struct llinfo_arp *);
169 static __inline int arp_llreach_reachable(struct llinfo_arp *);
170 static void arp_llreach_alloc(struct rtentry *, struct ifnet *, void *,
171 unsigned int, boolean_t, uint32_t *);
172
173 extern int tvtohz(struct timeval *);
174
175 static int arpinit_done;
176
177 SYSCTL_DECL(_net_link_ether);
178 SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "");
179
180 static int arpt_prune = (5 * 60 * 1); /* walk list every 5 minutes */
181 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl,
182 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_prune, 0, "");
183
184 #define ARP_PROBE_TIME 7 /* seconds */
185 static u_int32_t arpt_probe = ARP_PROBE_TIME;
186 SYSCTL_UINT(_net_link_ether_inet, OID_AUTO, probe_intvl,
187 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_probe, 0, "");
188
189 static int arpt_keep = (20 * 60); /* once resolved, good for 20 more minutes */
190 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age,
191 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_keep, 0, "");
192
193 static int arpt_down = 20; /* once declared down, don't send for 20 sec */
194 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time,
195 CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_down, 0, "");
196
197 static int arp_llreach_base = 120; /* seconds */
198 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_llreach_base,
199 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_llreach_base, 0,
200 "default ARP link-layer reachability max lifetime (in seconds)");
201
202 #define ARP_UNICAST_LIMIT 3 /* # of probes until ARP refresh broadcast */
203 static u_int32_t arp_unicast_lim = ARP_UNICAST_LIMIT;
204 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_unicast_lim,
205 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_unicast_lim, ARP_UNICAST_LIMIT,
206 "number of unicast ARP refresh probes before using broadcast");
207
208 static u_int32_t arp_maxtries = 5;
209 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries,
210 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_maxtries, 0, "");
211
212 static u_int32_t arp_maxhold = 16;
213 SYSCTL_UINT(_net_link_ether_inet, OID_AUTO, maxhold,
214 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_maxhold, 0, "");
215
216 static int useloopback = 1; /* use loopback interface for local traffic */
217 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback,
218 CTLFLAG_RW | CTLFLAG_LOCKED, &useloopback, 0, "");
219
220 static int arp_proxyall = 0;
221 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall,
222 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_proxyall, 0, "");
223
224 static int arp_sendllconflict = 0;
225 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, sendllconflict,
226 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_sendllconflict, 0, "");
227
228 static int log_arp_warnings = 0; /* Thread safe: no accumulated state */
229 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_warnings,
230 CTLFLAG_RW | CTLFLAG_LOCKED,
231 &log_arp_warnings, 0,
232 "log arp warning messages");
233
234 static int keep_announcements = 1; /* Thread safe: no aging of state */
235 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, keep_announcements,
236 CTLFLAG_RW | CTLFLAG_LOCKED,
237 &keep_announcements, 0,
238 "keep arp announcements");
239
240 static int send_conflicting_probes = 1; /* Thread safe: no accumulated state */
241 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, send_conflicting_probes,
242 CTLFLAG_RW | CTLFLAG_LOCKED,
243 &send_conflicting_probes, 0,
244 "send conflicting link-local arp probes");
245
246 static int arp_verbose;
247 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, verbose,
248 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_verbose, 0, "");
249
250 static uint32_t arp_maxhold_total = 1024; /* max total packets in the holdq */
251 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxhold_total,
252 CTLFLAG_RW | CTLFLAG_LOCKED, &arp_maxhold_total, 0, "");
253
254
255 /*
256 * Generally protected by rnh_lock; use atomic operations on fields
257 * that are also modified outside of that lock (if needed).
258 */
259 struct arpstat arpstat __attribute__((aligned(sizeof(uint64_t))));
260 SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, stats,
261 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
262 0, 0, arp_getstat, "S,arpstat",
263 "ARP statistics (struct arpstat, net/if_arp.h)");
264
265 static KALLOC_TYPE_DEFINE(llinfo_arp_zone, struct llinfo_arp, NET_KT_DEFAULT);
266
267 void
arp_init(void)268 arp_init(void)
269 {
270 VERIFY(!arpinit_done);
271
272 LIST_INIT(&llinfo_arp);
273
274 arpinit_done = 1;
275 }
276
277 static struct llinfo_arp *
arp_llinfo_alloc(zalloc_flags_t how)278 arp_llinfo_alloc(zalloc_flags_t how)
279 {
280 struct llinfo_arp *la = zalloc_flags(llinfo_arp_zone, how | Z_ZERO);
281
282 if (la) {
283 /*
284 * The type of queue (Q_DROPHEAD) here is just a hint;
285 * the actual logic that works on this queue performs
286 * a head drop, details in arp_llinfo_addq().
287 */
288 _qinit(&la->la_holdq, Q_DROPHEAD, (arp_maxhold == 0) ?
289 (uint32_t)-1 : arp_maxhold, QP_MBUF);
290 }
291 return la;
292 }
293
294 static void
arp_llinfo_free(void * arg)295 arp_llinfo_free(void *arg)
296 {
297 struct llinfo_arp *la = arg;
298
299 if (la->la_le.le_next != NULL || la->la_le.le_prev != NULL) {
300 panic("%s: trying to free %p when it is in use", __func__, la);
301 /* NOTREACHED */
302 }
303
304 /* Free any held packets */
305 (void) arp_llinfo_flushq(la);
306
307 /* Purge any link-layer info caching */
308 VERIFY(la->la_rt->rt_llinfo == la);
309 if (la->la_rt->rt_llinfo_purge != NULL) {
310 la->la_rt->rt_llinfo_purge(la->la_rt);
311 }
312
313 zfree(llinfo_arp_zone, la);
314 }
315
316 static bool
arp_llinfo_addq(struct llinfo_arp * la,struct mbuf * m)317 arp_llinfo_addq(struct llinfo_arp *la, struct mbuf *m)
318 {
319 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
320
321 if (arpstat.held >= arp_maxhold_total) {
322 if (arp_verbose) {
323 log(LOG_DEBUG,
324 "%s: dropping packet due to maxhold_total\n",
325 __func__);
326 }
327 os_atomic_inc(&arpstat.dropped, relaxed);
328 return false;
329 }
330
331 if (qlen(&la->la_holdq) >= qlimit(&la->la_holdq)) {
332 struct mbuf *_m;
333 /* prune less than CTL, else take what's at the head */
334 _getq_scidx_lt(&la->la_holdq, &pkt, SCIDX_CTL);
335 _m = pkt.cp_mbuf;
336 if (_m == NULL) {
337 _getq(&la->la_holdq, &pkt);
338 _m = pkt.cp_mbuf;
339 }
340 VERIFY(_m != NULL);
341 if (arp_verbose) {
342 log(LOG_DEBUG, "%s: dropping packet (scidx %u)\n",
343 __func__, MBUF_SCIDX(mbuf_get_service_class(_m)));
344 }
345 m_freem(_m);
346 os_atomic_inc(&arpstat.dropped, relaxed);
347 os_atomic_dec(&arpstat.held, relaxed);
348 }
349 CLASSQ_PKT_INIT_MBUF(&pkt, m);
350 _addq(&la->la_holdq, &pkt);
351 os_atomic_inc(&arpstat.held, relaxed);
352 if (arp_verbose) {
353 log(LOG_DEBUG, "%s: enqueued packet (scidx %u), qlen now %u\n",
354 __func__, MBUF_SCIDX(mbuf_get_service_class(m)),
355 qlen(&la->la_holdq));
356 }
357
358 return true;
359 }
360
361 static uint32_t
arp_llinfo_flushq(struct llinfo_arp * la)362 arp_llinfo_flushq(struct llinfo_arp *la)
363 {
364 uint32_t held = qlen(&la->la_holdq);
365
366 if (held != 0) {
367 os_atomic_add(&arpstat.purged, held, relaxed);
368 os_atomic_add(&arpstat.held, -held, relaxed);
369 _flushq(&la->la_holdq);
370 }
371 la->la_prbreq_cnt = 0;
372 VERIFY(qempty(&la->la_holdq));
373 return held;
374 }
375
376 static void
arp_llinfo_purge(struct rtentry * rt)377 arp_llinfo_purge(struct rtentry *rt)
378 {
379 struct llinfo_arp *la = rt->rt_llinfo;
380
381 RT_LOCK_ASSERT_HELD(rt);
382 VERIFY(rt->rt_llinfo_purge == arp_llinfo_purge && la != NULL);
383
384 if (la->la_llreach != NULL) {
385 RT_CONVERT_LOCK(rt);
386 ifnet_llreach_free(la->la_llreach);
387 la->la_llreach = NULL;
388 }
389 la->la_lastused = 0;
390 }
391
392 static void
arp_llinfo_get_ri(struct rtentry * rt,struct rt_reach_info * ri)393 arp_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
394 {
395 struct llinfo_arp *la = rt->rt_llinfo;
396 struct if_llreach *lr = la->la_llreach;
397
398 if (lr == NULL) {
399 bzero(ri, sizeof(*ri));
400 ri->ri_rssi = IFNET_RSSI_UNKNOWN;
401 ri->ri_lqm = IFNET_LQM_THRESH_OFF;
402 ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
403 } else {
404 IFLR_LOCK(lr);
405 /* Export to rt_reach_info structure */
406 ifnet_lr2ri(lr, ri);
407 /* Export ARP send expiration (calendar) time */
408 ri->ri_snd_expire =
409 ifnet_llreach_up2calexp(lr, la->la_lastused);
410 IFLR_UNLOCK(lr);
411 }
412 }
413
414 static void
arp_llinfo_get_iflri(struct rtentry * rt,struct ifnet_llreach_info * iflri)415 arp_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
416 {
417 struct llinfo_arp *la = rt->rt_llinfo;
418 struct if_llreach *lr = la->la_llreach;
419
420 if (lr == NULL) {
421 bzero(iflri, sizeof(*iflri));
422 iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
423 iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
424 iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
425 } else {
426 IFLR_LOCK(lr);
427 /* Export to ifnet_llreach_info structure */
428 ifnet_lr2iflri(lr, iflri);
429 /* Export ARP send expiration (uptime) time */
430 iflri->iflri_snd_expire =
431 ifnet_llreach_up2upexp(lr, la->la_lastused);
432 IFLR_UNLOCK(lr);
433 }
434 }
435
436 static void
arp_llinfo_refresh(struct rtentry * rt)437 arp_llinfo_refresh(struct rtentry *rt)
438 {
439 uint64_t timenow = net_uptime();
440 /*
441 * If route entry is permanent or if expiry is less
442 * than timenow and extra time taken for unicast probe
443 * we can't expedite the refresh
444 */
445 if ((rt->rt_expire == 0) ||
446 (rt->rt_flags & RTF_STATIC) ||
447 !(rt->rt_flags & RTF_LLINFO)) {
448 return;
449 }
450
451 if (rt->rt_expire > timenow) {
452 rt->rt_expire = timenow;
453 }
454 return;
455 }
456
457 void
arp_llreach_set_reachable(struct ifnet * ifp,void * addr,unsigned int alen)458 arp_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen)
459 {
460 /* Nothing more to do if it's disabled */
461 if (arp_llreach_base == 0) {
462 return;
463 }
464
465 ifnet_llreach_set_reachable(ifp, ETHERTYPE_IP, addr, alen);
466 }
467
468 static __inline void
arp_llreach_use(struct llinfo_arp * la)469 arp_llreach_use(struct llinfo_arp *la)
470 {
471 if (la->la_llreach != NULL) {
472 la->la_lastused = net_uptime();
473 }
474 }
475
476 static __inline int
arp_llreach_reachable(struct llinfo_arp * la)477 arp_llreach_reachable(struct llinfo_arp *la)
478 {
479 struct if_llreach *lr;
480 const char *why = NULL;
481
482 /* Nothing more to do if it's disabled; pretend it's reachable */
483 if (arp_llreach_base == 0) {
484 return 1;
485 }
486
487 if ((lr = la->la_llreach) == NULL) {
488 /*
489 * Link-layer reachability record isn't present for this
490 * ARP entry; pretend it's reachable and use it as is.
491 */
492 return 1;
493 } else if (ifnet_llreach_reachable(lr)) {
494 /*
495 * Record is present, it's not shared with other ARP
496 * entries and a packet has recently been received
497 * from the remote host; consider it reachable.
498 */
499 if (lr->lr_reqcnt == 1) {
500 return 1;
501 }
502
503 /* Prime it up, if this is the first time */
504 if (la->la_lastused == 0) {
505 VERIFY(la->la_llreach != NULL);
506 arp_llreach_use(la);
507 }
508
509 /*
510 * Record is present and shared with one or more ARP
511 * entries, and a packet has recently been received
512 * from the remote host. Since it's shared by more
513 * than one IP addresses, we can't rely on the link-
514 * layer reachability alone; consider it reachable if
515 * this ARP entry has been used "recently."
516 */
517 if (ifnet_llreach_reachable_delta(lr, la->la_lastused)) {
518 return 1;
519 }
520
521 why = "has alias(es) and hasn't been used in a while";
522 } else {
523 why = "haven't heard from it in a while";
524 }
525
526 if (arp_verbose > 1) {
527 char tmp[MAX_IPv4_STR_LEN];
528 u_int64_t now = net_uptime();
529
530 log(LOG_DEBUG, "%s: ARP probe(s) needed for %s; "
531 "%s [lastused %lld, lastrcvd %lld] secs ago\n",
532 if_name(lr->lr_ifp), inet_ntop(AF_INET,
533 &SIN(rt_key(la->la_rt))->sin_addr, tmp, sizeof(tmp)), why,
534 (la->la_lastused ? (int64_t)(now - la->la_lastused) : -1),
535 (lr->lr_lastrcvd ? (int64_t)(now - lr->lr_lastrcvd) : -1));
536 }
537 return 0;
538 }
539
540 /*
541 * Obtain a link-layer source cache entry for the sender.
542 *
543 * NOTE: This is currently only for ARP/Ethernet.
544 */
545 static void
arp_llreach_alloc(struct rtentry * rt,struct ifnet * ifp,void * addr,unsigned int alen,boolean_t solicited,uint32_t * p_rt_event_code)546 arp_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr,
547 unsigned int alen, boolean_t solicited, uint32_t *p_rt_event_code)
548 {
549 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
550 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
551
552 if (arp_llreach_base != 0 && rt->rt_expire != 0 &&
553 !(rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
554 ifp->if_addrlen == IF_LLREACH_MAXLEN && /* Ethernet */
555 alen == ifp->if_addrlen) {
556 struct llinfo_arp *la = rt->rt_llinfo;
557 struct if_llreach *lr;
558 const char *why = NULL, *type = "";
559
560 /* Become a regular mutex, just in case */
561 RT_CONVERT_LOCK(rt);
562
563 if ((lr = la->la_llreach) != NULL) {
564 type = (solicited ? "ARP reply" : "ARP announcement");
565 /*
566 * If target has changed, create a new record;
567 * otherwise keep existing record.
568 */
569 IFLR_LOCK(lr);
570 if (bcmp(addr, lr->lr_key.addr, alen) != 0) {
571 IFLR_UNLOCK(lr);
572 /* Purge any link-layer info caching */
573 VERIFY(rt->rt_llinfo_purge != NULL);
574 rt->rt_llinfo_purge(rt);
575 lr = NULL;
576 why = " for different target HW address; "
577 "using new llreach record";
578 *p_rt_event_code = ROUTE_LLENTRY_CHANGED;
579 } else {
580 /*
581 * If we were doing unicast probing, we need to
582 * deliver an event for neighbor cache resolution
583 */
584 if (lr->lr_probes != 0) {
585 *p_rt_event_code = ROUTE_LLENTRY_RESOLVED;
586 }
587
588 lr->lr_probes = 0; /* reset probe count */
589 IFLR_UNLOCK(lr);
590 if (solicited) {
591 why = " for same target HW address; "
592 "keeping existing llreach record";
593 }
594 }
595 }
596
597 if (lr == NULL) {
598 lr = la->la_llreach = ifnet_llreach_alloc(ifp,
599 ETHERTYPE_IP, addr, alen, arp_llreach_base);
600 if (lr != NULL) {
601 lr->lr_probes = 0; /* reset probe count */
602 if (why == NULL) {
603 why = "creating new llreach record";
604 }
605 }
606 *p_rt_event_code = ROUTE_LLENTRY_RESOLVED;
607 }
608
609 if (arp_verbose > 1 && lr != NULL && why != NULL) {
610 char tmp[MAX_IPv4_STR_LEN];
611
612 log(LOG_DEBUG, "%s: %s%s for %s\n", if_name(ifp),
613 type, why, inet_ntop(AF_INET,
614 &SIN(rt_key(rt))->sin_addr, tmp, sizeof(tmp)));
615 }
616 }
617 }
618
619 struct arptf_arg {
620 boolean_t draining;
621 boolean_t probing;
622 uint32_t killed;
623 uint32_t aging;
624 uint32_t sticky;
625 uint32_t found;
626 uint32_t qlen;
627 uint32_t qsize;
628 };
629
630 /*
631 * Free an arp entry.
632 */
633 static void
arptfree(struct llinfo_arp * la,void * arg)634 arptfree(struct llinfo_arp *la, void *arg)
635 {
636 struct arptf_arg *ap = arg;
637 struct rtentry *rt = la->la_rt;
638 uint64_t timenow;
639
640 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
641
642 /* rnh_lock acquired by caller protects rt from going away */
643 RT_LOCK(rt);
644
645 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
646 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
647
648 ap->found++;
649 timenow = net_uptime();
650
651 /* If we're probing, flush out held packets upon probe expiration */
652 if (ap->probing && (la->la_flags & LLINFO_PROBING) &&
653 la->la_probeexp <= timenow) {
654 struct sockaddr_dl *sdl = SDL(rt->rt_gateway);
655 if (sdl != NULL) {
656 sdl->sdl_alen = 0;
657 }
658 (void) arp_llinfo_flushq(la);
659 /*
660 * Enqueue work item to invoke callback for this route entry
661 */
662 route_event_enqueue_nwk_wq_entry(rt, NULL,
663 ROUTE_LLENTRY_UNREACH, NULL, TRUE);
664 }
665
666 /*
667 * The following is mostly being used to arm the timer
668 * again and for logging.
669 * qlen is used to re-arm the timer. Therefore, pure probe
670 * requests can be considered as 0 length packets
671 * contributing only to length but not to the size.
672 */
673 ap->qlen += qlen(&la->la_holdq);
674 ap->qlen += la->la_prbreq_cnt;
675 ap->qsize += qsize(&la->la_holdq);
676
677 if (rt->rt_expire == 0 || (rt->rt_flags & RTF_STATIC)) {
678 ap->sticky++;
679 /* ARP entry is permanent? */
680 if (rt->rt_expire == 0) {
681 RT_UNLOCK(rt);
682 return;
683 }
684 }
685
686 /* ARP entry hasn't expired and we're not draining? */
687 if (!ap->draining && rt->rt_expire > timenow) {
688 RT_UNLOCK(rt);
689 ap->aging++;
690 return;
691 }
692
693 if (rt->rt_refcnt > 0) {
694 /*
695 * ARP entry has expired, with outstanding refcnt.
696 * If we're not draining, force ARP query to be
697 * generated next time this entry is used.
698 */
699 if (!ap->draining && !ap->probing) {
700 struct sockaddr_dl *sdl = SDL(rt->rt_gateway);
701 if (sdl != NULL) {
702 sdl->sdl_alen = 0;
703 }
704 la->la_asked = 0;
705 rt->rt_flags &= ~RTF_REJECT;
706 }
707 RT_UNLOCK(rt);
708 } else if (!(rt->rt_flags & RTF_STATIC) && !ap->probing) {
709 /*
710 * ARP entry has no outstanding refcnt, and we're either
711 * draining or it has expired; delete it from the routing
712 * table. Safe to drop rt_lock and use rt_key, since holding
713 * rnh_lock here prevents another thread from calling
714 * rt_setgate() on this route.
715 */
716 RT_UNLOCK(rt);
717 rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
718 rt_mask(rt), 0, NULL);
719 arpstat.timeouts++;
720 ap->killed++;
721 } else {
722 /* ARP entry is static; let it linger */
723 RT_UNLOCK(rt);
724 }
725 }
726
727 void
in_arpdrain(void * arg)728 in_arpdrain(void *arg)
729 {
730 #pragma unused(arg)
731 struct llinfo_arp *la, *ola;
732 struct arptf_arg farg;
733
734 if (arp_verbose) {
735 log(LOG_DEBUG, "%s: draining ARP entries\n", __func__);
736 }
737
738 lck_mtx_lock(rnh_lock);
739 la = llinfo_arp.lh_first;
740 bzero(&farg, sizeof(farg));
741 farg.draining = TRUE;
742 while ((ola = la) != NULL) {
743 la = la->la_le.le_next;
744 arptfree(ola, &farg);
745 }
746 if (arp_verbose) {
747 log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u; "
748 "%u pkts held (%u bytes)\n", __func__, farg.found,
749 farg.aging, farg.sticky, farg.killed, farg.qlen,
750 farg.qsize);
751 }
752 lck_mtx_unlock(rnh_lock);
753 }
754
755 /*
756 * Timeout routine. Age arp_tab entries periodically.
757 */
758 static void
arp_timeout(thread_call_param_t arg0,thread_call_param_t arg1)759 arp_timeout(thread_call_param_t arg0, thread_call_param_t arg1)
760 {
761 #pragma unused(arg0, arg1)
762 struct llinfo_arp *la, *ola;
763 struct timeval atv;
764 struct arptf_arg farg;
765
766 lck_mtx_lock(rnh_lock);
767 la = llinfo_arp.lh_first;
768 bzero(&farg, sizeof(farg));
769 while ((ola = la) != NULL) {
770 la = la->la_le.le_next;
771 arptfree(ola, &farg);
772 }
773 if (arp_verbose) {
774 log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u; "
775 "%u pkts held (%u bytes)\n", __func__, farg.found,
776 farg.aging, farg.sticky, farg.killed, farg.qlen,
777 farg.qsize);
778 }
779 atv.tv_usec = 0;
780 atv.tv_sec = MAX(arpt_prune, 5);
781 /* re-arm the timer if there's work to do */
782 arp_timeout_run = 0;
783 if (farg.aging > 0) {
784 arp_sched_timeout(&atv);
785 } else if (arp_verbose) {
786 log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__);
787 }
788 lck_mtx_unlock(rnh_lock);
789 }
790
791 static void
arp_sched_timeout(struct timeval * atv)792 arp_sched_timeout(struct timeval *atv)
793 {
794 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
795
796 if (!arp_timeout_run) {
797 struct timeval tv;
798 uint64_t deadline = 0;
799
800 if (arp_timeout_tcall == NULL) {
801 arp_timeout_tcall =
802 thread_call_allocate(arp_timeout, NULL);
803 VERIFY(arp_timeout_tcall != NULL);
804 }
805
806 if (atv == NULL) {
807 tv.tv_usec = 0;
808 tv.tv_sec = MAX(arpt_prune / 5, 1);
809 atv = &tv;
810 }
811 if (arp_verbose) {
812 log(LOG_DEBUG, "%s: timer scheduled in "
813 "T+%llus.%lluu\n", __func__,
814 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
815 }
816 arp_timeout_run = 1;
817
818 clock_deadline_for_periodic_event(atv->tv_sec * NSEC_PER_SEC,
819 mach_absolute_time(), &deadline);
820 (void) thread_call_enter_delayed(arp_timeout_tcall, deadline);
821 }
822 }
823
824 /*
825 * Probe routine.
826 */
827 static void
arp_probe(thread_call_param_t arg0,thread_call_param_t arg1)828 arp_probe(thread_call_param_t arg0, thread_call_param_t arg1)
829 {
830 #pragma unused(arg0, arg1)
831 struct llinfo_arp *la, *ola;
832 struct timeval atv;
833 struct arptf_arg farg;
834
835 lck_mtx_lock(rnh_lock);
836 la = llinfo_arp.lh_first;
837 bzero(&farg, sizeof(farg));
838 farg.probing = TRUE;
839 while ((ola = la) != NULL) {
840 la = la->la_le.le_next;
841 arptfree(ola, &farg);
842 }
843 if (arp_verbose) {
844 log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u; "
845 "%u pkts held (%u bytes)\n", __func__, farg.found,
846 farg.aging, farg.sticky, farg.killed, farg.qlen,
847 farg.qsize);
848 }
849 atv.tv_usec = 0;
850 atv.tv_sec = MAX(arpt_probe, ARP_PROBE_TIME);
851 /* re-arm the probe if there's work to do */
852 arp_probe_run = 0;
853 if (farg.qlen > 0) {
854 arp_sched_probe(&atv);
855 } else if (arp_verbose) {
856 log(LOG_DEBUG, "%s: not rescheduling probe\n", __func__);
857 }
858 lck_mtx_unlock(rnh_lock);
859 }
860
861 static void
arp_sched_probe(struct timeval * atv)862 arp_sched_probe(struct timeval *atv)
863 {
864 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
865
866 if (!arp_probe_run) {
867 struct timeval tv;
868 uint64_t deadline = 0;
869
870 if (arp_probe_tcall == NULL) {
871 arp_probe_tcall =
872 thread_call_allocate(arp_probe, NULL);
873 VERIFY(arp_probe_tcall != NULL);
874 }
875
876 if (atv == NULL) {
877 tv.tv_usec = 0;
878 tv.tv_sec = MAX(arpt_probe, ARP_PROBE_TIME);
879 atv = &tv;
880 }
881 if (arp_verbose) {
882 log(LOG_DEBUG, "%s: probe scheduled in "
883 "T+%llus.%lluu\n", __func__,
884 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
885 }
886 arp_probe_run = 1;
887
888 clock_deadline_for_periodic_event(atv->tv_sec * NSEC_PER_SEC,
889 mach_absolute_time(), &deadline);
890 (void) thread_call_enter_delayed(arp_probe_tcall, deadline);
891 }
892 }
893
894 /*
895 * ifa_rtrequest() callback
896 */
897 static void
arp_rtrequest(int req,struct rtentry * rt,struct sockaddr * sa)898 arp_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
899 {
900 #pragma unused(sa)
901 struct sockaddr *gate = rt->rt_gateway;
902 struct llinfo_arp *la = rt->rt_llinfo;
903 static struct sockaddr_dl null_sdl =
904 { .sdl_len = sizeof(null_sdl), .sdl_family = AF_LINK };
905 uint64_t timenow;
906 char buf[MAX_IPv4_STR_LEN];
907
908 VERIFY(arpinit_done);
909 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
910 RT_LOCK_ASSERT_HELD(rt);
911
912 if (rt->rt_flags & RTF_GATEWAY) {
913 return;
914 }
915
916 timenow = net_uptime();
917 switch (req) {
918 case RTM_ADD:
919 /*
920 * XXX: If this is a manually added route to interface
921 * such as older version of routed or gated might provide,
922 * restore cloning bit.
923 */
924 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL &&
925 SIN(rt_mask(rt))->sin_addr.s_addr != INADDR_BROADCAST) {
926 rt->rt_flags |= RTF_CLONING;
927 }
928
929 if (rt->rt_flags & RTF_CLONING) {
930 /*
931 * Case 1: This route should come from a route to iface.
932 */
933 if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) {
934 gate = rt->rt_gateway;
935 SDL(gate)->sdl_type = rt->rt_ifp->if_type;
936 SDL(gate)->sdl_index = rt->rt_ifp->if_index;
937 /*
938 * In case we're called before 1.0 sec.
939 * has elapsed.
940 */
941 rt_setexpire(rt, MAX(timenow, 1));
942 }
943 break;
944 }
945 /* Announce a new entry if requested. */
946 if (rt->rt_flags & RTF_ANNOUNCE) {
947 if (la != NULL) {
948 arp_llreach_use(la); /* Mark use timestamp */
949 }
950 if ((rt->rt_ifp->if_flags & IFF_NOARP) == 0) {
951 RT_UNLOCK(rt);
952 dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST,
953 SDL(gate), rt_key(rt), NULL, rt_key(rt), 0);
954 RT_LOCK(rt);
955 arpstat.txannounces++;
956 }
957 }
958 OS_FALLTHROUGH;
959 case RTM_RESOLVE:
960 if (gate->sa_family != AF_LINK ||
961 gate->sa_len < sizeof(null_sdl)) {
962 arpstat.invalidreqs++;
963 log(LOG_ERR, "%s: route to %s has bad gateway address "
964 "(sa_family %u sa_len %u) on %s\n",
965 __func__, inet_ntop(AF_INET,
966 &SIN(rt_key(rt))->sin_addr.s_addr, buf,
967 sizeof(buf)), gate->sa_family, gate->sa_len,
968 if_name(rt->rt_ifp));
969 break;
970 }
971 SDL(gate)->sdl_type = rt->rt_ifp->if_type;
972 SDL(gate)->sdl_index = rt->rt_ifp->if_index;
973
974 if (la != NULL) {
975 break; /* This happens on a route change */
976 }
977 /*
978 * Case 2: This route may come from cloning, or a manual route
979 * add with a LL address.
980 */
981 rt->rt_llinfo = la = arp_llinfo_alloc(Z_WAITOK);
982
983 rt->rt_llinfo_get_ri = arp_llinfo_get_ri;
984 rt->rt_llinfo_get_iflri = arp_llinfo_get_iflri;
985 rt->rt_llinfo_purge = arp_llinfo_purge;
986 rt->rt_llinfo_free = arp_llinfo_free;
987 rt->rt_llinfo_refresh = arp_llinfo_refresh;
988 rt->rt_flags |= RTF_LLINFO;
989 la->la_rt = rt;
990 LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
991 arpstat.inuse++;
992
993 /* We have at least one entry; arm the timer if not already */
994 arp_sched_timeout(NULL);
995
996 /*
997 * This keeps the multicast addresses from showing up
998 * in `arp -a' listings as unresolved. It's not actually
999 * functional. Then the same for broadcast. For IPv4
1000 * link-local address, keep the entry around even after
1001 * it has expired.
1002 */
1003 if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
1004 RT_UNLOCK(rt);
1005 dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate,
1006 sizeof(struct sockaddr_dl));
1007 RT_LOCK(rt);
1008 rt_setexpire(rt, 0);
1009 } else if (in_broadcast(SIN(rt_key(rt))->sin_addr,
1010 rt->rt_ifp)) {
1011 struct sockaddr_dl *gate_ll = SDL(gate);
1012 size_t broadcast_len;
1013 int ret = ifnet_llbroadcast_copy_bytes(rt->rt_ifp,
1014 LLADDR(gate_ll), sizeof(gate_ll->sdl_data),
1015 &broadcast_len);
1016 if (ret == 0 && broadcast_len <= UINT8_MAX) {
1017 gate_ll->sdl_alen = (u_char)broadcast_len;
1018 gate_ll->sdl_family = AF_LINK;
1019 gate_ll->sdl_len = sizeof(struct sockaddr_dl);
1020 }
1021 /* In case we're called before 1.0 sec. has elapsed */
1022 rt_setexpire(rt, MAX(timenow, 1));
1023 } else if (IN_LINKLOCAL(ntohl(SIN(rt_key(rt))->
1024 sin_addr.s_addr))) {
1025 rt->rt_flags |= RTF_STATIC;
1026 }
1027
1028 /* Set default maximum number of retries */
1029 la->la_maxtries = arp_maxtries;
1030
1031 /* Become a regular mutex, just in case */
1032 RT_CONVERT_LOCK(rt);
1033 IFA_LOCK_SPIN(rt->rt_ifa);
1034 if (SIN(rt_key(rt))->sin_addr.s_addr ==
1035 (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
1036 IFA_UNLOCK(rt->rt_ifa);
1037 /*
1038 * This test used to be
1039 * if (loif.if_flags & IFF_UP)
1040 * It allowed local traffic to be forced through the
1041 * hardware by configuring the loopback down. However,
1042 * it causes problems during network configuration
1043 * for boards that can't receive packets they send.
1044 * It is now necessary to clear "useloopback" and
1045 * remove the route to force traffic out to the
1046 * hardware.
1047 */
1048 rt_setexpire(rt, 0);
1049 ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)),
1050 SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
1051 if (useloopback) {
1052 if (rt->rt_ifp != lo_ifp) {
1053 /*
1054 * Purge any link-layer info caching.
1055 */
1056 if (rt->rt_llinfo_purge != NULL) {
1057 rt->rt_llinfo_purge(rt);
1058 }
1059
1060 /*
1061 * Adjust route ref count for the
1062 * interfaces.
1063 */
1064 if (rt->rt_if_ref_fn != NULL) {
1065 rt->rt_if_ref_fn(lo_ifp, 1);
1066 rt->rt_if_ref_fn(rt->rt_ifp, -1);
1067 }
1068 }
1069 rt->rt_ifp = lo_ifp;
1070 /*
1071 * If rmx_mtu is not locked, update it
1072 * to the MTU used by the new interface.
1073 */
1074 if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
1075 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1076 }
1077 }
1078 } else {
1079 IFA_UNLOCK(rt->rt_ifa);
1080 }
1081 break;
1082
1083 case RTM_DELETE:
1084 if (la == NULL) {
1085 break;
1086 }
1087 /*
1088 * Unchain it but defer the actual freeing until the route
1089 * itself is to be freed. rt->rt_llinfo still points to
1090 * llinfo_arp, and likewise, la->la_rt still points to this
1091 * route entry, except that RTF_LLINFO is now cleared.
1092 */
1093 LIST_REMOVE(la, la_le);
1094 la->la_le.le_next = NULL;
1095 la->la_le.le_prev = NULL;
1096 arpstat.inuse--;
1097
1098 /*
1099 * Purge any link-layer info caching.
1100 */
1101 if (rt->rt_llinfo_purge != NULL) {
1102 rt->rt_llinfo_purge(rt);
1103 }
1104
1105 rt->rt_flags &= ~RTF_LLINFO;
1106 (void) arp_llinfo_flushq(la);
1107 }
1108 }
1109
1110 /*
1111 * convert hardware address to hex string for logging errors.
1112 */
1113 static const char *
sdl_addr_to_hex(const struct sockaddr_dl * sdl,char * orig_buf,int buflen)1114 sdl_addr_to_hex(const struct sockaddr_dl *sdl, char *orig_buf, int buflen)
1115 {
1116 char *buf = orig_buf;
1117 int i;
1118 const u_char *lladdr = (u_char *)(size_t)sdl->sdl_data;
1119 int maxbytes = buflen / 3;
1120
1121 if (maxbytes > sdl->sdl_alen) {
1122 maxbytes = sdl->sdl_alen;
1123 }
1124 *buf = '\0';
1125 for (i = 0; i < maxbytes; i++) {
1126 snprintf(buf, 3, "%02x", lladdr[i]);
1127 buf += 2;
1128 *buf = (i == maxbytes - 1) ? '\0' : ':';
1129 buf++;
1130 }
1131 return orig_buf;
1132 }
1133
1134 /*
1135 * arp_lookup_route will lookup the route for a given address.
1136 *
1137 * The address must be for a host on a local network on this interface.
1138 * If the returned route is non-NULL, the route is locked and the caller
1139 * is responsible for unlocking it and releasing its reference.
1140 */
1141 static errno_t
arp_lookup_route(const struct in_addr * addr,int create,int proxy,route_t * route,unsigned int ifscope)1142 arp_lookup_route(const struct in_addr *addr, int create, int proxy,
1143 route_t *route, unsigned int ifscope)
1144 {
1145 struct sockaddr_inarp sin =
1146 { sizeof(sin), AF_INET, 0, { 0 }, { 0 }, 0, 0 };
1147 const char *why = NULL;
1148 errno_t error = 0;
1149 route_t rt;
1150
1151 *route = NULL;
1152
1153 sin.sin_addr.s_addr = addr->s_addr;
1154 sin.sin_other = proxy ? SIN_PROXY : 0;
1155
1156 /*
1157 * If the destination is a link-local address, don't
1158 * constrain the lookup (don't scope it).
1159 */
1160 if (IN_LINKLOCAL(ntohl(addr->s_addr))) {
1161 ifscope = IFSCOPE_NONE;
1162 }
1163
1164 rt = rtalloc1_scoped((struct sockaddr *)&sin, create, 0, ifscope);
1165 if (rt == NULL) {
1166 return ENETUNREACH;
1167 }
1168
1169 RT_LOCK(rt);
1170
1171 if (rt->rt_flags & RTF_GATEWAY) {
1172 why = "host is not on local network";
1173 error = ENETUNREACH;
1174 } else if (!(rt->rt_flags & RTF_LLINFO)) {
1175 why = "could not allocate llinfo";
1176 error = ENOMEM;
1177 } else if (rt->rt_gateway->sa_family != AF_LINK) {
1178 why = "gateway route is not ours";
1179 error = EPROTONOSUPPORT;
1180 }
1181
1182 if (error != 0) {
1183 if (create && (arp_verbose || log_arp_warnings)) {
1184 char tmp[MAX_IPv4_STR_LEN];
1185 log(LOG_DEBUG, "%s: link#%d %s failed: %s\n",
1186 __func__, ifscope, inet_ntop(AF_INET, addr, tmp,
1187 sizeof(tmp)), why);
1188 }
1189
1190 /*
1191 * If there are no references to this route, and it is
1192 * a cloned route, and not static, and ARP had created
1193 * the route, then purge it from the routing table as
1194 * it is probably bogus.
1195 */
1196 if (rt->rt_refcnt == 1 &&
1197 (rt->rt_flags & (RTF_WASCLONED | RTF_STATIC)) ==
1198 RTF_WASCLONED) {
1199 /*
1200 * Prevent another thread from modiying rt_key,
1201 * rt_gateway via rt_setgate() after rt_lock is
1202 * dropped by marking the route as defunct.
1203 */
1204 rt->rt_flags |= RTF_CONDEMNED;
1205 RT_UNLOCK(rt);
1206 rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
1207 rt_mask(rt), rt->rt_flags, NULL);
1208 rtfree(rt);
1209 } else {
1210 RT_REMREF_LOCKED(rt);
1211 RT_UNLOCK(rt);
1212 }
1213 return error;
1214 }
1215
1216 /*
1217 * Caller releases reference and does RT_UNLOCK(rt).
1218 */
1219 *route = rt;
1220 return 0;
1221 }
1222
1223 boolean_t
arp_is_entry_probing(route_t p_route)1224 arp_is_entry_probing(route_t p_route)
1225 {
1226 struct llinfo_arp *llinfo = p_route->rt_llinfo;
1227
1228 if (llinfo != NULL &&
1229 llinfo->la_llreach != NULL &&
1230 llinfo->la_llreach->lr_probes != 0) {
1231 return TRUE;
1232 }
1233
1234 return FALSE;
1235 }
1236
1237 __attribute__((noinline))
1238 static void
post_kev_in_arpfailure(struct ifnet * ifp)1239 post_kev_in_arpfailure(struct ifnet *ifp)
1240 {
1241 struct kev_msg ev_msg = {};
1242 struct kev_in_arpfailure in_arpfailure = {};
1243
1244 in_arpfailure.link_data.if_family = ifp->if_family;
1245 in_arpfailure.link_data.if_unit = ifp->if_unit;
1246 strlcpy(in_arpfailure.link_data.if_name, ifp->if_name, IFNAMSIZ);
1247 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1248 ev_msg.kev_class = KEV_NETWORK_CLASS;
1249 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
1250 ev_msg.event_code = KEV_INET_ARPRTRFAILURE;
1251 ev_msg.dv[0].data_ptr = &in_arpfailure;
1252 ev_msg.dv[0].data_length = sizeof(struct kev_in_arpfailure);
1253 dlil_post_complete_msg(NULL, &ev_msg);
1254 }
1255
1256 __attribute__((noinline))
1257 static void
arp_send_probe_notification(route_t route)1258 arp_send_probe_notification(route_t route)
1259 {
1260 route_event_enqueue_nwk_wq_entry(route, NULL,
1261 ROUTE_LLENTRY_PROBED, NULL, TRUE);
1262
1263 if (route->rt_flags & RTF_ROUTER) {
1264 struct radix_node_head *rnh = NULL;
1265 struct route_event rt_ev;
1266 route_event_init(&rt_ev, route, NULL, ROUTE_LLENTRY_PROBED);
1267 /*
1268 * We already have a reference on rt. The function
1269 * frees it before returning.
1270 */
1271 RT_UNLOCK(route);
1272 lck_mtx_lock(rnh_lock);
1273 rnh = rt_tables[AF_INET];
1274
1275 if (rnh != NULL) {
1276 (void) rnh->rnh_walktree(rnh,
1277 route_event_walktree, (void *)&rt_ev);
1278 }
1279 lck_mtx_unlock(rnh_lock);
1280 RT_LOCK(route);
1281 }
1282 }
1283
1284 /*
1285 * This is the ARP pre-output routine; care must be taken to ensure that
1286 * the "hint" route never gets freed via rtfree(), since the caller may
1287 * have stored it inside a struct route with a reference held for that
1288 * placeholder.
1289 */
1290 errno_t
arp_lookup_ip(ifnet_t ifp,const struct sockaddr_in * net_dest,struct sockaddr_dl * ll_dest,size_t ll_dest_len,route_t hint,mbuf_t packet)1291 arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
1292 struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint,
1293 mbuf_t packet)
1294 {
1295 route_t route = NULL; /* output route */
1296 errno_t result = 0;
1297 struct sockaddr_dl *gateway;
1298 struct llinfo_arp *llinfo = NULL;
1299 boolean_t usable, probing = FALSE;
1300 uint64_t timenow;
1301 struct if_llreach *lr;
1302 struct ifaddr *rt_ifa;
1303 struct sockaddr *sa;
1304 uint32_t rtflags;
1305 struct sockaddr_dl sdl = {};
1306 boolean_t send_probe_notif = FALSE;
1307 boolean_t enqueued = FALSE;
1308
1309 if (ifp == NULL || net_dest == NULL) {
1310 return EINVAL;
1311 }
1312
1313 if (net_dest->sin_family != AF_INET) {
1314 return EAFNOSUPPORT;
1315 }
1316
1317 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING)) {
1318 return ENETDOWN;
1319 }
1320
1321 /*
1322 * If we were given a route, verify the route and grab the gateway
1323 */
1324 if (hint != NULL) {
1325 /*
1326 * Callee holds a reference on the route and returns
1327 * with the route entry locked, upon success.
1328 */
1329 result = route_to_gwroute((const struct sockaddr *)
1330 net_dest, hint, &route);
1331 if (result != 0) {
1332 return result;
1333 }
1334 if (route != NULL) {
1335 RT_LOCK_ASSERT_HELD(route);
1336 }
1337 }
1338
1339 if ((packet != NULL && (packet->m_flags & M_BCAST)) ||
1340 in_broadcast(net_dest->sin_addr, ifp)) {
1341 size_t broadcast_len;
1342 bzero(ll_dest, ll_dest_len);
1343 result = ifnet_llbroadcast_copy_bytes(ifp, LLADDR(ll_dest),
1344 ll_dest_len - offsetof(struct sockaddr_dl, sdl_data),
1345 &broadcast_len);
1346 if (result == 0 && broadcast_len <= UINT8_MAX) {
1347 ll_dest->sdl_alen = (u_char)broadcast_len;
1348 ll_dest->sdl_family = AF_LINK;
1349 ll_dest->sdl_len = sizeof(struct sockaddr_dl);
1350 }
1351 goto release;
1352 }
1353 if ((packet != NULL && (packet->m_flags & M_MCAST)) ||
1354 ((ifp->if_flags & IFF_MULTICAST) &&
1355 IN_MULTICAST(ntohl(net_dest->sin_addr.s_addr)))) {
1356 if (route != NULL) {
1357 RT_UNLOCK(route);
1358 }
1359 result = dlil_resolve_multi(ifp,
1360 (const struct sockaddr *)net_dest,
1361 (struct sockaddr *)ll_dest, ll_dest_len);
1362 if (route != NULL) {
1363 RT_LOCK(route);
1364 }
1365 goto release;
1366 }
1367
1368 /*
1369 * If we didn't find a route, or the route doesn't have
1370 * link layer information, trigger the creation of the
1371 * route and link layer information.
1372 */
1373 if (route == NULL || route->rt_llinfo == NULL) {
1374 /* Clean up now while we can */
1375 if (route != NULL) {
1376 if (route == hint) {
1377 RT_REMREF_LOCKED(route);
1378 RT_UNLOCK(route);
1379 } else {
1380 RT_UNLOCK(route);
1381 rtfree(route);
1382 }
1383 }
1384 /*
1385 * Callee holds a reference on the route and returns
1386 * with the route entry locked, upon success.
1387 */
1388 result = arp_lookup_route(&net_dest->sin_addr, 1, 0, &route,
1389 ifp->if_index);
1390 if (result == 0) {
1391 RT_LOCK_ASSERT_HELD(route);
1392 }
1393 }
1394
1395 if (result || route == NULL || (llinfo = route->rt_llinfo) == NULL) {
1396 /* In case result is 0 but no route, return an error */
1397 if (result == 0) {
1398 result = EHOSTUNREACH;
1399 }
1400
1401 if (route != NULL && route->rt_llinfo == NULL) {
1402 char tmp[MAX_IPv4_STR_LEN];
1403 log(LOG_ERR, "%s: can't allocate llinfo for %s\n",
1404 __func__, inet_ntop(AF_INET, &net_dest->sin_addr,
1405 tmp, sizeof(tmp)));
1406 }
1407 goto release;
1408 }
1409
1410 if ((ifp->if_flags & IFF_NOARP) != 0) {
1411 result = ENOTSUP;
1412 goto release;
1413 }
1414
1415 /*
1416 * Now that we have the right route, is it filled in?
1417 */
1418 gateway = SDL(route->rt_gateway);
1419 timenow = net_uptime();
1420 VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
1421 VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1422
1423 usable = ((route->rt_expire == 0 || route->rt_expire > timenow) &&
1424 gateway != NULL && gateway->sdl_family == AF_LINK &&
1425 gateway->sdl_alen != 0);
1426
1427 if (usable) {
1428 boolean_t unreachable = !arp_llreach_reachable(llinfo);
1429
1430 /* Entry is usable, so fill in info for caller */
1431 bcopy(gateway, ll_dest, MIN(gateway->sdl_len, ll_dest_len));
1432 result = 0;
1433 arp_llreach_use(llinfo); /* Mark use timestamp */
1434
1435 lr = llinfo->la_llreach;
1436 if (lr == NULL) {
1437 goto release;
1438 }
1439 rt_ifa = route->rt_ifa;
1440
1441 /* Become a regular mutex, just in case */
1442 RT_CONVERT_LOCK(route);
1443 IFLR_LOCK_SPIN(lr);
1444
1445 if ((unreachable || (llinfo->la_flags & LLINFO_PROBING)) &&
1446 lr->lr_probes < arp_unicast_lim) {
1447 /*
1448 * Thus mark the entry with la_probeexp deadline to
1449 * trigger the probe timer to be scheduled (if not
1450 * already). This gets cleared the moment we get
1451 * an ARP reply.
1452 */
1453 probing = TRUE;
1454 if (lr->lr_probes == 0) {
1455 llinfo->la_probeexp = (timenow + arpt_probe);
1456 llinfo->la_flags |= LLINFO_PROBING;
1457 /*
1458 * Provide notification that ARP unicast
1459 * probing has started.
1460 * We only do it for the first unicast probe
1461 * attempt.
1462 */
1463 send_probe_notif = TRUE;
1464 }
1465
1466 /*
1467 * Start the unicast probe and anticipate a reply;
1468 * afterwards, return existing entry to caller and
1469 * let it be used anyway. If peer is non-existent
1470 * we'll broadcast ARP next time around.
1471 */
1472 lr->lr_probes++;
1473 bzero(&sdl, sizeof(sdl));
1474 sdl.sdl_alen = ifp->if_addrlen;
1475 bcopy(&lr->lr_key.addr, LLADDR(&sdl),
1476 ifp->if_addrlen);
1477 IFLR_UNLOCK(lr);
1478 IFA_LOCK_SPIN(rt_ifa);
1479 IFA_ADDREF_LOCKED(rt_ifa);
1480 sa = rt_ifa->ifa_addr;
1481 IFA_UNLOCK(rt_ifa);
1482 rtflags = route->rt_flags;
1483 RT_UNLOCK(route);
1484 dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa,
1485 (const struct sockaddr_dl *)&sdl,
1486 (const struct sockaddr *)net_dest, rtflags);
1487 IFA_REMREF(rt_ifa);
1488 RT_LOCK(route);
1489 goto release;
1490 } else {
1491 IFLR_UNLOCK(lr);
1492 if (!unreachable &&
1493 !(llinfo->la_flags & LLINFO_PROBING)) {
1494 /*
1495 * Normal case where peer is still reachable,
1496 * we're not probing and if_addrlen is anything
1497 * but IF_LLREACH_MAXLEN.
1498 */
1499 goto release;
1500 }
1501 }
1502 }
1503
1504 /*
1505 * Route wasn't complete/valid; we need to send out ARP request.
1506 * If we've exceeded the limit of la_holdq, drop from the head
1507 * of queue and add this packet to the tail. If we end up with
1508 * RTF_REJECT below, we'll dequeue this from tail and have the
1509 * caller free the packet instead. It's safe to do that since
1510 * we still hold the route's rt_lock.
1511 */
1512 if (packet != NULL) {
1513 enqueued = arp_llinfo_addq(llinfo, packet);
1514 } else {
1515 llinfo->la_prbreq_cnt++;
1516 }
1517 /*
1518 * Regardless of permanent vs. expirable entry, we need to
1519 * avoid having packets sit in la_holdq forever; thus mark the
1520 * entry with la_probeexp deadline to trigger the probe timer
1521 * to be scheduled (if not already). This gets cleared the
1522 * moment we get an ARP reply.
1523 */
1524 probing = TRUE;
1525 if ((qlen(&llinfo->la_holdq) + llinfo->la_prbreq_cnt) == 1) {
1526 llinfo->la_probeexp = (timenow + arpt_probe);
1527 llinfo->la_flags |= LLINFO_PROBING;
1528 }
1529
1530 if (route->rt_expire) {
1531 route->rt_flags &= ~RTF_REJECT;
1532 if (llinfo->la_asked == 0 || route->rt_expire != timenow) {
1533 rt_setexpire(route, timenow);
1534 if (llinfo->la_asked++ < llinfo->la_maxtries) {
1535 boolean_t sendkev = FALSE;
1536
1537 rt_ifa = route->rt_ifa;
1538 lr = llinfo->la_llreach;
1539 /* Become a regular mutex, just in case */
1540 RT_CONVERT_LOCK(route);
1541 /* Update probe count, if applicable */
1542 if (lr != NULL) {
1543 IFLR_LOCK_SPIN(lr);
1544 lr->lr_probes++;
1545 IFLR_UNLOCK(lr);
1546 }
1547 if (ifp->if_addrlen == IF_LLREACH_MAXLEN &&
1548 route->rt_flags & RTF_ROUTER &&
1549 llinfo->la_asked > 1) {
1550 sendkev = TRUE;
1551 llinfo->la_flags |= LLINFO_RTRFAIL_EVTSENT;
1552 }
1553 IFA_LOCK_SPIN(rt_ifa);
1554 IFA_ADDREF_LOCKED(rt_ifa);
1555 sa = rt_ifa->ifa_addr;
1556 IFA_UNLOCK(rt_ifa);
1557 arp_llreach_use(llinfo); /* Mark use tstamp */
1558 rtflags = route->rt_flags;
1559 RT_UNLOCK(route);
1560 dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa,
1561 NULL, (const struct sockaddr *)net_dest,
1562 rtflags);
1563 IFA_REMREF(rt_ifa);
1564 if (sendkev) {
1565 post_kev_in_arpfailure(ifp);
1566 }
1567 RT_LOCK(route);
1568 goto release_just_return;
1569 } else {
1570 route->rt_flags |= RTF_REJECT;
1571 rt_setexpire(route,
1572 route->rt_expire + arpt_down);
1573 llinfo->la_asked = 0;
1574 /*
1575 * Remove the packet that was just added above;
1576 * don't free it since we're not returning
1577 * EJUSTRETURN. The caller will handle the
1578 * freeing. Since we haven't dropped rt_lock
1579 * from the time of _addq() above, this packet
1580 * must be at the tail.
1581 */
1582 if (packet != NULL && enqueued) {
1583 classq_pkt_t pkt =
1584 CLASSQ_PKT_INITIALIZER(pkt);
1585
1586 _getq_tail(&llinfo->la_holdq, &pkt);
1587 os_atomic_dec(&arpstat.held, relaxed);
1588 VERIFY(pkt.cp_mbuf == packet);
1589 }
1590 result = EHOSTUNREACH;
1591 /*
1592 * Enqueue work item to invoke callback for this route entry
1593 */
1594 route_event_enqueue_nwk_wq_entry(route, NULL,
1595 ROUTE_LLENTRY_UNREACH, NULL, TRUE);
1596 goto release;
1597 }
1598 }
1599 }
1600
1601
1602 release_just_return:
1603 /* The packet is now held inside la_holdq or dropped */
1604 result = EJUSTRETURN;
1605 if (packet != NULL && !enqueued) {
1606 m_freem(packet);
1607 packet = NULL;
1608 }
1609
1610 release:
1611 if (result == EHOSTUNREACH) {
1612 os_atomic_inc(&arpstat.dropped, relaxed);
1613 }
1614
1615 if (route != NULL) {
1616 if (send_probe_notif) {
1617 arp_send_probe_notification(route);
1618 }
1619
1620 if (route == hint) {
1621 RT_REMREF_LOCKED(route);
1622 RT_UNLOCK(route);
1623 } else {
1624 RT_UNLOCK(route);
1625 rtfree(route);
1626 }
1627 }
1628 if (probing) {
1629 /* Do this after we drop rt_lock to preserve ordering */
1630 lck_mtx_lock(rnh_lock);
1631 arp_sched_probe(NULL);
1632 lck_mtx_unlock(rnh_lock);
1633 }
1634 return result;
1635 }
1636
1637 errno_t
arp_ip_handle_input(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr_in * sender_ip,const struct sockaddr_in * target_ip)1638 arp_ip_handle_input(ifnet_t ifp, u_short arpop,
1639 const struct sockaddr_dl *sender_hw, const struct sockaddr_in *sender_ip,
1640 const struct sockaddr_in *target_ip)
1641 {
1642 char ipv4str[MAX_IPv4_STR_LEN];
1643 struct sockaddr_dl proxied = {};
1644 struct sockaddr_dl *gateway, *target_hw = NULL;
1645 struct ifaddr *ifa;
1646 struct in_ifaddr *ia;
1647 struct in_ifaddr *best_ia = NULL;
1648 struct sockaddr_in best_ia_sin;
1649 route_t route = NULL;
1650 char buf[3 * MAX_HW_LEN]; /* enough for MAX_HW_LEN byte hw address */
1651 struct llinfo_arp *llinfo;
1652 errno_t error;
1653 int created_announcement = 0;
1654 int bridged = 0, is_bridge = 0;
1655 uint32_t rt_evcode = 0;
1656
1657 /*
1658 * Here and other places within this routine where we don't hold
1659 * rnh_lock, trade accuracy for speed for the common scenarios
1660 * and avoid the use of atomic updates.
1661 */
1662 arpstat.received++;
1663
1664 /* Do not respond to requests for 0.0.0.0 */
1665 if (target_ip->sin_addr.s_addr == INADDR_ANY && arpop == ARPOP_REQUEST) {
1666 goto done;
1667 }
1668
1669 if (ifp->if_bridge) {
1670 bridged = 1;
1671 }
1672 if (ifp->if_type == IFT_BRIDGE) {
1673 is_bridge = 1;
1674 }
1675
1676 if (arpop == ARPOP_REPLY) {
1677 arpstat.rxreplies++;
1678 }
1679
1680 /*
1681 * Determine if this ARP is for us
1682 */
1683 lck_rw_lock_shared(&in_ifaddr_rwlock);
1684 TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
1685 IFA_LOCK_SPIN(&ia->ia_ifa);
1686 if (ia->ia_ifp == ifp &&
1687 ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) {
1688 best_ia = ia;
1689 best_ia_sin = best_ia->ia_addr;
1690 IFA_ADDREF_LOCKED(&ia->ia_ifa);
1691 IFA_UNLOCK(&ia->ia_ifa);
1692 lck_rw_done(&in_ifaddr_rwlock);
1693 goto match;
1694 }
1695 IFA_UNLOCK(&ia->ia_ifa);
1696 }
1697
1698 TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) {
1699 IFA_LOCK_SPIN(&ia->ia_ifa);
1700 if (ia->ia_ifp == ifp &&
1701 ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1702 best_ia = ia;
1703 best_ia_sin = best_ia->ia_addr;
1704 IFA_ADDREF_LOCKED(&ia->ia_ifa);
1705 IFA_UNLOCK(&ia->ia_ifa);
1706 lck_rw_done(&in_ifaddr_rwlock);
1707 goto match;
1708 }
1709 IFA_UNLOCK(&ia->ia_ifa);
1710 }
1711
1712 #define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \
1713 (ia->ia_ifp->if_bridge == ifp->if_softc && \
1714 bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) == 0 && \
1715 addr == ia->ia_addr.sin_addr.s_addr)
1716 /*
1717 * Check the case when bridge shares its MAC address with
1718 * some of its children, so packets are claimed by bridge
1719 * itself (bridge_input() does it first), but they are really
1720 * meant to be destined to the bridge member.
1721 */
1722 if (is_bridge) {
1723 TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr),
1724 ia_hash) {
1725 IFA_LOCK_SPIN(&ia->ia_ifa);
1726 if (BDG_MEMBER_MATCHES_ARP(target_ip->sin_addr.s_addr,
1727 ifp, ia)) {
1728 ifp = ia->ia_ifp;
1729 best_ia = ia;
1730 best_ia_sin = best_ia->ia_addr;
1731 IFA_ADDREF_LOCKED(&ia->ia_ifa);
1732 IFA_UNLOCK(&ia->ia_ifa);
1733 lck_rw_done(&in_ifaddr_rwlock);
1734 goto match;
1735 }
1736 IFA_UNLOCK(&ia->ia_ifa);
1737 }
1738 }
1739 #undef BDG_MEMBER_MATCHES_ARP
1740 lck_rw_done(&in_ifaddr_rwlock);
1741
1742 /*
1743 * No match, use the first inet address on the receive interface
1744 * as a dummy address for the rest of the function; we may be
1745 * proxying for another address.
1746 */
1747 ifnet_lock_shared(ifp);
1748 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1749 IFA_LOCK_SPIN(ifa);
1750 if (ifa->ifa_addr->sa_family != AF_INET) {
1751 IFA_UNLOCK(ifa);
1752 continue;
1753 }
1754 best_ia = (struct in_ifaddr *)ifa;
1755 best_ia_sin = best_ia->ia_addr;
1756 IFA_ADDREF_LOCKED(ifa);
1757 IFA_UNLOCK(ifa);
1758 ifnet_lock_done(ifp);
1759 goto match;
1760 }
1761 ifnet_lock_done(ifp);
1762
1763 /*
1764 * If we're not a bridge member, or if we are but there's no
1765 * IPv4 address to use for the interface, drop the packet.
1766 */
1767 if (!bridged || best_ia == NULL) {
1768 goto done;
1769 }
1770
1771 match:
1772 /* If the packet is from this interface, ignore the packet */
1773 if (bcmp(CONST_LLADDR(sender_hw), IF_LLADDR(ifp),
1774 sender_hw->sdl_alen) == 0) {
1775 goto done;
1776 }
1777
1778 /* Check for a conflict */
1779 if (!bridged &&
1780 sender_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr) {
1781 struct kev_msg ev_msg;
1782 struct kev_in_collision *in_collision;
1783 u_char storage[sizeof(struct kev_in_collision) + MAX_HW_LEN];
1784
1785 bzero(&ev_msg, sizeof(struct kev_msg));
1786 bzero(storage, (sizeof(struct kev_in_collision) + MAX_HW_LEN));
1787 in_collision = (struct kev_in_collision *)(void *)storage;
1788 log(LOG_ERR, "%s duplicate IP address %s sent from "
1789 "address %s\n", if_name(ifp),
1790 inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
1791 sizeof(ipv4str)), sdl_addr_to_hex(sender_hw, buf,
1792 (int)sizeof(buf)));
1793
1794 /* Send a kernel event so anyone can learn of the conflict */
1795 in_collision->link_data.if_family = ifp->if_family;
1796 in_collision->link_data.if_unit = ifp->if_unit;
1797 strlcpy(&in_collision->link_data.if_name[0],
1798 ifp->if_name, IFNAMSIZ);
1799 in_collision->ia_ipaddr = sender_ip->sin_addr;
1800 in_collision->hw_len = (sender_hw->sdl_alen < MAX_HW_LEN) ?
1801 sender_hw->sdl_alen : MAX_HW_LEN;
1802 bcopy(CONST_LLADDR(sender_hw), (caddr_t)in_collision->hw_addr,
1803 in_collision->hw_len);
1804 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1805 ev_msg.kev_class = KEV_NETWORK_CLASS;
1806 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
1807 ev_msg.event_code = KEV_INET_ARPCOLLISION;
1808 ev_msg.dv[0].data_ptr = in_collision;
1809 ev_msg.dv[0].data_length =
1810 sizeof(struct kev_in_collision) + in_collision->hw_len;
1811 ev_msg.dv[1].data_length = 0;
1812 dlil_post_complete_msg(NULL, &ev_msg);
1813 os_atomic_inc(&arpstat.dupips, relaxed);
1814 goto respond;
1815 }
1816
1817 /*
1818 * Look up the routing entry. If it doesn't exist and we are the
1819 * target, and the sender isn't 0.0.0.0, go ahead and create one.
1820 * Callee holds a reference on the route and returns with the route
1821 * entry locked, upon success.
1822 */
1823 error = arp_lookup_route(&sender_ip->sin_addr,
1824 (target_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr &&
1825 sender_ip->sin_addr.s_addr != 0), 0, &route, ifp->if_index);
1826
1827 if (error == 0) {
1828 RT_LOCK_ASSERT_HELD(route);
1829 }
1830
1831 if (error || route == NULL || route->rt_gateway == NULL) {
1832 if (arpop != ARPOP_REQUEST) {
1833 goto respond;
1834 }
1835
1836 if (arp_sendllconflict && send_conflicting_probes != 0 &&
1837 (ifp->if_eflags & IFEF_ARPLL) &&
1838 IN_LINKLOCAL(ntohl(target_ip->sin_addr.s_addr)) &&
1839 sender_ip->sin_addr.s_addr == INADDR_ANY) {
1840 /*
1841 * Verify this ARP probe doesn't conflict with
1842 * an IPv4LL we know of on another interface.
1843 */
1844 if (route != NULL) {
1845 RT_REMREF_LOCKED(route);
1846 RT_UNLOCK(route);
1847 route = NULL;
1848 }
1849 /*
1850 * Callee holds a reference on the route and returns
1851 * with the route entry locked, upon success.
1852 */
1853 error = arp_lookup_route(&target_ip->sin_addr, 0, 0,
1854 &route, ifp->if_index);
1855
1856 if (error != 0 || route == NULL ||
1857 route->rt_gateway == NULL) {
1858 goto respond;
1859 }
1860
1861 RT_LOCK_ASSERT_HELD(route);
1862
1863 gateway = SDL(route->rt_gateway);
1864 if (route->rt_ifp != ifp && gateway->sdl_alen != 0 &&
1865 (gateway->sdl_alen != sender_hw->sdl_alen ||
1866 bcmp(CONST_LLADDR(gateway), CONST_LLADDR(sender_hw),
1867 gateway->sdl_alen) != 0)) {
1868 /*
1869 * A node is probing for an IPv4LL we know
1870 * exists on a different interface. We respond
1871 * with a conflicting probe to force the new
1872 * device to pick a different IPv4LL address.
1873 */
1874 if (arp_verbose || log_arp_warnings) {
1875 log(LOG_INFO, "arp: %s on %s sent "
1876 "probe for %s, already on %s\n",
1877 sdl_addr_to_hex(sender_hw, buf,
1878 (int)sizeof(buf)), if_name(ifp),
1879 inet_ntop(AF_INET,
1880 &target_ip->sin_addr, ipv4str,
1881 sizeof(ipv4str)),
1882 if_name(route->rt_ifp));
1883 log(LOG_INFO, "arp: sending "
1884 "conflicting probe to %s on %s\n",
1885 sdl_addr_to_hex(sender_hw, buf,
1886 (int)sizeof(buf)), if_name(ifp));
1887 }
1888 /* Mark use timestamp */
1889 if (route->rt_llinfo != NULL) {
1890 arp_llreach_use(route->rt_llinfo);
1891 }
1892 /* We're done with the route */
1893 RT_REMREF_LOCKED(route);
1894 RT_UNLOCK(route);
1895 route = NULL;
1896 /*
1897 * Send a conservative unicast "ARP probe".
1898 * This should force the other device to pick
1899 * a new number. This will not force the
1900 * device to pick a new number if the device
1901 * has already assigned that number. This will
1902 * not imply to the device that we own that
1903 * address. The link address is always
1904 * present; it's never freed.
1905 */
1906 ifnet_lock_shared(ifp);
1907 ifa = ifp->if_lladdr;
1908 IFA_ADDREF(ifa);
1909 ifnet_lock_done(ifp);
1910 dlil_send_arp_internal(ifp, ARPOP_REQUEST,
1911 SDL(ifa->ifa_addr),
1912 (const struct sockaddr *)sender_ip,
1913 sender_hw,
1914 (const struct sockaddr *)target_ip);
1915 IFA_REMREF(ifa);
1916 ifa = NULL;
1917 os_atomic_inc(&arpstat.txconflicts, relaxed);
1918 }
1919 goto respond;
1920 } else if (keep_announcements != 0 &&
1921 target_ip->sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
1922 /*
1923 * Don't create entry if link-local address and
1924 * link-local is disabled
1925 */
1926 if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) ||
1927 (ifp->if_eflags & IFEF_ARPLL)) {
1928 if (route != NULL) {
1929 RT_REMREF_LOCKED(route);
1930 RT_UNLOCK(route);
1931 route = NULL;
1932 }
1933 /*
1934 * Callee holds a reference on the route and
1935 * returns with the route entry locked, upon
1936 * success.
1937 */
1938 error = arp_lookup_route(&sender_ip->sin_addr,
1939 1, 0, &route, ifp->if_index);
1940
1941 if (error == 0) {
1942 RT_LOCK_ASSERT_HELD(route);
1943 }
1944
1945 if (error == 0 && route != NULL &&
1946 route->rt_gateway != NULL) {
1947 created_announcement = 1;
1948 }
1949 }
1950 if (created_announcement == 0) {
1951 goto respond;
1952 }
1953 } else {
1954 goto respond;
1955 }
1956 }
1957
1958 RT_LOCK_ASSERT_HELD(route);
1959 VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
1960 VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
1961
1962 gateway = SDL(route->rt_gateway);
1963 if (!bridged && route->rt_ifp != ifp) {
1964 if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) ||
1965 !(ifp->if_eflags & IFEF_ARPLL)) {
1966 if (arp_verbose || log_arp_warnings) {
1967 log(LOG_ERR, "arp: %s is on %s but got "
1968 "reply from %s on %s\n",
1969 inet_ntop(AF_INET, &sender_ip->sin_addr,
1970 ipv4str, sizeof(ipv4str)),
1971 if_name(route->rt_ifp),
1972 sdl_addr_to_hex(sender_hw, buf,
1973 (int)sizeof(buf)), if_name(ifp));
1974 }
1975 goto respond;
1976 } else {
1977 /* Don't change a permanent address */
1978 if (route->rt_expire == 0) {
1979 goto respond;
1980 }
1981
1982 /*
1983 * We're about to check and/or change the route's ifp
1984 * and ifa, so do the lock dance: drop rt_lock, hold
1985 * rnh_lock and re-hold rt_lock to avoid violating the
1986 * lock ordering. We have an extra reference on the
1987 * route, so it won't go away while we do this.
1988 */
1989 RT_UNLOCK(route);
1990 lck_mtx_lock(rnh_lock);
1991 RT_LOCK(route);
1992 /*
1993 * Don't change the cloned route away from the
1994 * parent's interface if the address did resolve
1995 * or if the route is defunct. rt_ifp on both
1996 * the parent and the clone can now be freely
1997 * accessed now that we have acquired rnh_lock.
1998 */
1999 gateway = SDL(route->rt_gateway);
2000 if ((gateway->sdl_alen != 0 &&
2001 route->rt_parent != NULL &&
2002 route->rt_parent->rt_ifp == route->rt_ifp) ||
2003 (route->rt_flags & RTF_CONDEMNED)) {
2004 RT_REMREF_LOCKED(route);
2005 RT_UNLOCK(route);
2006 route = NULL;
2007 lck_mtx_unlock(rnh_lock);
2008 goto respond;
2009 }
2010 if (route->rt_ifp != ifp) {
2011 /*
2012 * Purge any link-layer info caching.
2013 */
2014 if (route->rt_llinfo_purge != NULL) {
2015 route->rt_llinfo_purge(route);
2016 }
2017
2018 /* Adjust route ref count for the interfaces */
2019 if (route->rt_if_ref_fn != NULL) {
2020 route->rt_if_ref_fn(ifp, 1);
2021 route->rt_if_ref_fn(route->rt_ifp, -1);
2022 }
2023 }
2024 /* Change the interface when the existing route is on */
2025 route->rt_ifp = ifp;
2026 /*
2027 * If rmx_mtu is not locked, update it
2028 * to the MTU used by the new interface.
2029 */
2030 if (!(route->rt_rmx.rmx_locks & RTV_MTU)) {
2031 route->rt_rmx.rmx_mtu = route->rt_ifp->if_mtu;
2032 if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
2033 route->rt_rmx.rmx_mtu = IN6_LINKMTU(route->rt_ifp);
2034 /* Further adjust the size for CLAT46 expansion */
2035 route->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
2036 }
2037 }
2038
2039 rtsetifa(route, &best_ia->ia_ifa);
2040 gateway->sdl_index = ifp->if_index;
2041 RT_UNLOCK(route);
2042 lck_mtx_unlock(rnh_lock);
2043 RT_LOCK(route);
2044 /* Don't bother if the route is down */
2045 if (!(route->rt_flags & RTF_UP)) {
2046 goto respond;
2047 }
2048 /* Refresh gateway pointer */
2049 gateway = SDL(route->rt_gateway);
2050 }
2051 RT_LOCK_ASSERT_HELD(route);
2052 }
2053
2054 if (gateway->sdl_alen != 0 && bcmp(LLADDR(gateway),
2055 CONST_LLADDR(sender_hw), gateway->sdl_alen) != 0) {
2056 if (route->rt_expire != 0 &&
2057 (arp_verbose || log_arp_warnings)) {
2058 char buf2[3 * MAX_HW_LEN];
2059 log(LOG_INFO, "arp: %s moved from %s to %s on %s\n",
2060 inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
2061 sizeof(ipv4str)),
2062 sdl_addr_to_hex(gateway, buf, (int)sizeof(buf)),
2063 sdl_addr_to_hex(sender_hw, buf2, (int)sizeof(buf2)),
2064 if_name(ifp));
2065 } else if (route->rt_expire == 0) {
2066 if (arp_verbose || log_arp_warnings) {
2067 log(LOG_ERR, "arp: %s attempts to modify "
2068 "permanent entry for %s on %s\n",
2069 sdl_addr_to_hex(sender_hw, buf,
2070 (int)sizeof(buf)),
2071 inet_ntop(AF_INET, &sender_ip->sin_addr,
2072 ipv4str, sizeof(ipv4str)),
2073 if_name(ifp));
2074 }
2075 goto respond;
2076 }
2077 }
2078
2079 /* Copy the sender hardware address in to the route's gateway address */
2080 gateway->sdl_alen = sender_hw->sdl_alen;
2081 bcopy(CONST_LLADDR(sender_hw), LLADDR(gateway), gateway->sdl_alen);
2082
2083 /* Update the expire time for the route and clear the reject flag */
2084 if (route->rt_expire != 0) {
2085 rt_setexpire(route, net_uptime() + arpt_keep);
2086 }
2087 route->rt_flags &= ~RTF_REJECT;
2088
2089 /* cache the gateway (sender HW) address */
2090 arp_llreach_alloc(route, ifp, LLADDR(gateway), gateway->sdl_alen,
2091 (arpop == ARPOP_REPLY), &rt_evcode);
2092
2093 llinfo = route->rt_llinfo;
2094 /* send a notification that the route is back up */
2095 if (ifp->if_addrlen == IF_LLREACH_MAXLEN &&
2096 route->rt_flags & RTF_ROUTER &&
2097 llinfo->la_flags & LLINFO_RTRFAIL_EVTSENT) {
2098 struct kev_msg ev_msg;
2099 struct kev_in_arpalive in_arpalive;
2100
2101 llinfo->la_flags &= ~LLINFO_RTRFAIL_EVTSENT;
2102 RT_UNLOCK(route);
2103 bzero(&ev_msg, sizeof(ev_msg));
2104 bzero(&in_arpalive, sizeof(in_arpalive));
2105 in_arpalive.link_data.if_family = ifp->if_family;
2106 in_arpalive.link_data.if_unit = ifp->if_unit;
2107 strlcpy(in_arpalive.link_data.if_name, ifp->if_name, IFNAMSIZ);
2108 ev_msg.vendor_code = KEV_VENDOR_APPLE;
2109 ev_msg.kev_class = KEV_NETWORK_CLASS;
2110 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
2111 ev_msg.event_code = KEV_INET_ARPRTRALIVE;
2112 ev_msg.dv[0].data_ptr = &in_arpalive;
2113 ev_msg.dv[0].data_length = sizeof(struct kev_in_arpalive);
2114 dlil_post_complete_msg(NULL, &ev_msg);
2115 RT_LOCK(route);
2116 }
2117 /* Update the llinfo, send out all queued packets at once */
2118 llinfo->la_asked = 0;
2119 llinfo->la_flags &= ~LLINFO_PROBING;
2120 llinfo->la_prbreq_cnt = 0;
2121
2122 if (rt_evcode) {
2123 /*
2124 * Enqueue work item to invoke callback for this route entry
2125 */
2126 route_event_enqueue_nwk_wq_entry(route, NULL, rt_evcode, NULL, TRUE);
2127
2128 if (route->rt_flags & RTF_ROUTER) {
2129 struct radix_node_head *rnh = NULL;
2130 struct route_event rt_ev;
2131 route_event_init(&rt_ev, route, NULL, rt_evcode);
2132 /*
2133 * We already have a reference on rt. The function
2134 * frees it before returning.
2135 */
2136 RT_UNLOCK(route);
2137 lck_mtx_lock(rnh_lock);
2138 rnh = rt_tables[AF_INET];
2139
2140 if (rnh != NULL) {
2141 (void) rnh->rnh_walktree(rnh, route_event_walktree,
2142 (void *)&rt_ev);
2143 }
2144 lck_mtx_unlock(rnh_lock);
2145 RT_LOCK(route);
2146 }
2147 }
2148
2149 if (!qempty(&llinfo->la_holdq)) {
2150 uint32_t held;
2151 struct mbuf *m0;
2152 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2153
2154 _getq_all(&llinfo->la_holdq, &pkt, NULL, &held, NULL);
2155 m0 = pkt.cp_mbuf;
2156 if (arp_verbose) {
2157 log(LOG_DEBUG, "%s: sending %u held packets\n",
2158 __func__, held);
2159 }
2160 os_atomic_add(&arpstat.held, -held, relaxed);
2161 VERIFY(qempty(&llinfo->la_holdq));
2162 RT_UNLOCK(route);
2163 dlil_output(ifp, PF_INET, m0, (caddr_t)route,
2164 rt_key(route), 0, NULL);
2165 RT_REMREF(route);
2166 route = NULL;
2167 }
2168
2169 respond:
2170 if (route != NULL) {
2171 /* Mark use timestamp if we're going to send a reply */
2172 if (arpop == ARPOP_REQUEST && route->rt_llinfo != NULL) {
2173 arp_llreach_use(route->rt_llinfo);
2174 }
2175 RT_REMREF_LOCKED(route);
2176 RT_UNLOCK(route);
2177 route = NULL;
2178 }
2179
2180 if (arpop != ARPOP_REQUEST) {
2181 goto done;
2182 }
2183
2184 /* See comments at the beginning of this routine */
2185 arpstat.rxrequests++;
2186
2187 /* If we are not the target, check if we should proxy */
2188 if (target_ip->sin_addr.s_addr != best_ia_sin.sin_addr.s_addr) {
2189 /*
2190 * Find a proxy route; callee holds a reference on the
2191 * route and returns with the route entry locked, upon
2192 * success.
2193 */
2194 error = arp_lookup_route(&target_ip->sin_addr, 0, SIN_PROXY,
2195 &route, ifp->if_index);
2196
2197 if (error == 0) {
2198 RT_LOCK_ASSERT_HELD(route);
2199 /*
2200 * Return proxied ARP replies only on the interface
2201 * or bridge cluster where this network resides.
2202 * Otherwise we may conflict with the host we are
2203 * proxying for.
2204 */
2205 if (route->rt_ifp != ifp &&
2206 (route->rt_ifp->if_bridge != ifp->if_bridge ||
2207 ifp->if_bridge == NULL)) {
2208 RT_REMREF_LOCKED(route);
2209 RT_UNLOCK(route);
2210 goto done;
2211 }
2212 proxied = *SDL(route->rt_gateway);
2213 target_hw = &proxied;
2214 } else {
2215 /*
2216 * We don't have a route entry indicating we should
2217 * use proxy. If we aren't supposed to proxy all,
2218 * we are done.
2219 */
2220 if (!arp_proxyall) {
2221 goto done;
2222 }
2223
2224 /*
2225 * See if we have a route to the target ip before
2226 * we proxy it.
2227 */
2228 route = rtalloc1_scoped((struct sockaddr *)
2229 (size_t)target_ip, 0, 0, ifp->if_index);
2230 if (!route) {
2231 goto done;
2232 }
2233
2234 /*
2235 * Don't proxy for hosts already on the same interface.
2236 */
2237 RT_LOCK(route);
2238 if (route->rt_ifp == ifp) {
2239 RT_UNLOCK(route);
2240 rtfree(route);
2241 goto done;
2242 }
2243 }
2244 /* Mark use timestamp */
2245 if (route->rt_llinfo != NULL) {
2246 arp_llreach_use(route->rt_llinfo);
2247 }
2248 RT_REMREF_LOCKED(route);
2249 RT_UNLOCK(route);
2250 }
2251
2252 dlil_send_arp(ifp, ARPOP_REPLY,
2253 target_hw, (const struct sockaddr *)target_ip,
2254 sender_hw, (const struct sockaddr *)sender_ip, 0);
2255
2256 done:
2257 if (best_ia != NULL) {
2258 IFA_REMREF(&best_ia->ia_ifa);
2259 }
2260 return 0;
2261 }
2262
2263 void
arp_ifinit(struct ifnet * ifp,struct ifaddr * ifa)2264 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
2265 {
2266 struct sockaddr *sa;
2267
2268 IFA_LOCK(ifa);
2269 ifa->ifa_rtrequest = arp_rtrequest;
2270 ifa->ifa_flags |= RTF_CLONING;
2271 sa = ifa->ifa_addr;
2272 IFA_UNLOCK(ifa);
2273 if ((ifp->if_flags & IFF_NOARP) == 0) {
2274 dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, NULL, sa, 0);
2275 }
2276 }
2277
2278 static int
2279 arp_getstat SYSCTL_HANDLER_ARGS
2280 {
2281 #pragma unused(oidp, arg1, arg2)
2282 if (req->oldptr == USER_ADDR_NULL) {
2283 req->oldlen = (size_t)sizeof(struct arpstat);
2284 }
2285
2286 return SYSCTL_OUT(req, &arpstat, MIN(sizeof(arpstat), req->oldlen));
2287 }
2288