1 /*
2 * Copyright (c) 1999-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "net/if_var.h"
30 #include <net/dlil_var_private.h>
31
32
33 LCK_ATTR_DECLARE(dlil_lck_attributes, 0, 0);
34
35 LCK_GRP_DECLARE(dlil_lock_group, "DLIL internal locks");
36 LCK_GRP_DECLARE(ifnet_lock_group, "ifnet locks");
37 LCK_GRP_DECLARE(ifnet_head_lock_group, "ifnet head lock");
38 LCK_GRP_DECLARE(ifnet_snd_lock_group, "ifnet snd locks");
39 LCK_GRP_DECLARE(ifnet_rcv_lock_group, "ifnet rcv locks");
40
41 LCK_ATTR_DECLARE(ifnet_lock_attr, 0, 0);
42 LCK_RW_DECLARE_ATTR(ifnet_head_lock, &ifnet_head_lock_group,
43 &dlil_lck_attributes);
44 LCK_MTX_DECLARE_ATTR(dlil_ifnet_lock, &dlil_lock_group,
45 &dlil_lck_attributes);
46
47
48 LCK_MTX_DECLARE_ATTR(dlil_thread_sync_lock, &dlil_lock_group,
49 &dlil_lck_attributes);
50
51 uint32_t dlil_pending_thread_cnt = 0;
52
53
54 /*
55 * Forward declarations.
56 */
57 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
58 __private_extern__ void if_rtproto_del(struct ifnet *ifp, int protocol);
59
60 /*
61 * Utility routines
62 */
63 kern_return_t
dlil_affinity_set(struct thread * tp,u_int32_t tag)64 dlil_affinity_set(struct thread *tp, u_int32_t tag)
65 {
66 thread_affinity_policy_data_t policy;
67
68 bzero(&policy, sizeof(policy));
69 policy.affinity_tag = tag;
70 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
71 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
72 }
73
74 void
dlil_incr_pending_thread_count(void)75 dlil_incr_pending_thread_count(void)
76 {
77 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
78 lck_mtx_lock(&dlil_thread_sync_lock);
79 dlil_pending_thread_cnt++;
80 lck_mtx_unlock(&dlil_thread_sync_lock);
81 }
82
83 void
dlil_decr_pending_thread_count(void)84 dlil_decr_pending_thread_count(void)
85 {
86 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
87 lck_mtx_lock(&dlil_thread_sync_lock);
88 VERIFY(dlil_pending_thread_cnt > 0);
89 dlil_pending_thread_cnt--;
90 if (dlil_pending_thread_cnt == 0) {
91 wakeup(&dlil_pending_thread_cnt);
92 }
93 lck_mtx_unlock(&dlil_thread_sync_lock);
94 }
95
96 boolean_t
packet_has_vlan_tag(struct mbuf * m)97 packet_has_vlan_tag(struct mbuf * m)
98 {
99 u_int tag = 0;
100
101 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
102 tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
103 if (tag == 0) {
104 /* the packet is just priority-tagged, clear the bit */
105 m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
106 }
107 }
108 return tag != 0;
109 }
110
111 /*
112 * Monitor functions.
113 */
114 void
if_flt_monitor_busy(struct ifnet * ifp)115 if_flt_monitor_busy(struct ifnet *ifp)
116 {
117 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
118
119 ++ifp->if_flt_busy;
120 VERIFY(ifp->if_flt_busy != 0);
121 }
122
123 void
if_flt_monitor_unbusy(struct ifnet * ifp)124 if_flt_monitor_unbusy(struct ifnet *ifp)
125 {
126 if_flt_monitor_leave(ifp);
127 }
128
129 void
if_flt_monitor_enter(struct ifnet * ifp)130 if_flt_monitor_enter(struct ifnet *ifp)
131 {
132 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
133
134 while (ifp->if_flt_busy) {
135 ++ifp->if_flt_waiters;
136 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
137 (PZERO - 1), "if_flt_monitor", NULL);
138 }
139 if_flt_monitor_busy(ifp);
140 }
141
142 void
if_flt_monitor_leave(struct ifnet * ifp)143 if_flt_monitor_leave(struct ifnet *ifp)
144 {
145 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
146
147 VERIFY(ifp->if_flt_busy != 0);
148 --ifp->if_flt_busy;
149
150 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
151 ifp->if_flt_waiters = 0;
152 wakeup(&ifp->if_flt_head);
153 }
154 }
155
156
157 struct dlil_ifnet *
dlif_ifnet_alloc(void)158 dlif_ifnet_alloc(void)
159 {
160 return kalloc_type(struct dlil_ifnet, Z_WAITOK | Z_ZERO | Z_NOFAIL);
161 }
162
163 void
dlif_ifnet_free(struct dlil_ifnet * ifnet)164 dlif_ifnet_free(struct dlil_ifnet *ifnet)
165 {
166 if (ifnet != NULL) {
167 kfree_type(struct dlil_ifnet, ifnet);
168 }
169 }
170
171 struct ifnet_filter *
dlif_filt_alloc(void)172 dlif_filt_alloc(void)
173 {
174 return kalloc_type(struct ifnet_filter, Z_WAITOK | Z_ZERO | Z_NOFAIL);
175 }
176
177 void
dlif_filt_free(struct ifnet_filter * filt)178 dlif_filt_free(struct ifnet_filter *filt)
179 {
180 if (filt != NULL) {
181 kfree_type(struct ifnet_filter, filt);
182 }
183 }
184
185 struct if_proto *
dlif_proto_alloc(void)186 dlif_proto_alloc(void)
187 {
188 return kalloc_type(struct if_proto, Z_WAITOK | Z_ZERO | Z_NOFAIL);
189 }
190
191 void
dlif_proto_free(struct if_proto * ifproto)192 dlif_proto_free(struct if_proto *ifproto)
193 {
194 if (ifproto != NULL) {
195 kfree_type(struct if_proto, ifproto);
196 }
197 }
198
199 struct tcpstat_local *
dlif_tcpstat_alloc(void)200 dlif_tcpstat_alloc(void)
201 {
202 return kalloc_type(struct tcpstat_local, Z_WAITOK | Z_ZERO | Z_NOFAIL);
203 }
204
205 void
dlif_tcpstat_free(struct tcpstat_local * if_tcp_stat)206 dlif_tcpstat_free(struct tcpstat_local *if_tcp_stat)
207 {
208 if (if_tcp_stat != NULL) {
209 kfree_type(struct tcpstat_local, if_tcp_stat);
210 }
211 }
212
213 struct udpstat_local *
dlif_udpstat_alloc(void)214 dlif_udpstat_alloc(void)
215 {
216 return kalloc_type(struct udpstat_local, Z_WAITOK | Z_ZERO | Z_NOFAIL);
217 }
218
219 void
dlif_udpstat_free(struct udpstat_local * if_udp_stat)220 dlif_udpstat_free(struct udpstat_local *if_udp_stat)
221 {
222 if (if_udp_stat != NULL) {
223 kfree_type(struct udpstat_local, if_udp_stat);
224 }
225 }
226
227 struct ifaddr *
dlil_alloc_lladdr(struct ifnet * ifp,const struct sockaddr_dl * ll_addr)228 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
229 {
230 struct ifaddr *ifa, *oifa = NULL;
231 struct sockaddr_dl *addr_sdl, *mask_sdl;
232 char workbuf[IFNAMSIZ * 2];
233 int namelen, masklen, socksize;
234 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
235
236 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
237 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
238
239 namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
240 if_name(ifp));
241 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
242 + ((namelen > 0) ? namelen : 0);
243 socksize = masklen + ifp->if_addrlen;
244 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
245 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
246 socksize = sizeof(struct sockaddr_dl);
247 }
248 socksize = ROUNDUP(socksize);
249 #undef ROUNDUP
250
251 ifa = ifp->if_lladdr;
252 if (socksize > DLIL_SDLMAXLEN ||
253 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
254 /*
255 * Rare, but in the event that the link address requires
256 * more storage space than DLIL_SDLMAXLEN, allocate the
257 * largest possible storages for address and mask, such
258 * that we can reuse the same space when if_addrlen grows.
259 * This same space will be used when if_addrlen shrinks.
260 */
261 struct dl_if_lladdr_xtra_space *__single dl_if_lladdr_ext;
262
263 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
264 dl_if_lladdr_ext = zalloc_permanent(
265 sizeof(*dl_if_lladdr_ext), ZALIGN(struct ifaddr));
266
267 ifa = &dl_if_lladdr_ext->ifa;
268 ifa_lock_init(ifa);
269 ifa_initref(ifa);
270 /* Don't set IFD_ALLOC, as this is permanent */
271 ifa->ifa_debug = IFD_LINK;
272 } else {
273 dl_if_lladdr_ext = __unsafe_forge_single(
274 struct dl_if_lladdr_xtra_space*, ifa);
275 ifa = &dl_if_lladdr_ext->ifa;
276 }
277
278 IFA_LOCK(ifa);
279 /* address and mask sockaddr_dl locations */
280 bzero(dl_if_lladdr_ext->addr_sdl_bytes,
281 sizeof(dl_if_lladdr_ext->addr_sdl_bytes));
282 bzero(dl_if_lladdr_ext->mask_sdl_bytes,
283 sizeof(dl_if_lladdr_ext->mask_sdl_bytes));
284 addr_sdl = SDL(dl_if_lladdr_ext->addr_sdl_bytes);
285 mask_sdl = SDL(dl_if_lladdr_ext->mask_sdl_bytes);
286 } else {
287 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
288 /*
289 * Use the storage areas for address and mask within the
290 * dlil_ifnet structure. This is the most common case.
291 */
292 if (ifa == NULL) {
293 ifa = &dl_if->dl_if_lladdr.ifa;
294 ifa_lock_init(ifa);
295 ifa_initref(ifa);
296 /* Don't set IFD_ALLOC, as this is permanent */
297 ifa->ifa_debug = IFD_LINK;
298 }
299 IFA_LOCK(ifa);
300 /* address and mask sockaddr_dl locations */
301 bzero(dl_if->dl_if_lladdr.addr_sdl_bytes,
302 sizeof(dl_if->dl_if_lladdr.addr_sdl_bytes));
303 bzero(dl_if->dl_if_lladdr.mask_sdl_bytes,
304 sizeof(dl_if->dl_if_lladdr.mask_sdl_bytes));
305 addr_sdl = SDL(dl_if->dl_if_lladdr.addr_sdl_bytes);
306 mask_sdl = SDL(dl_if->dl_if_lladdr.mask_sdl_bytes);
307 }
308
309 if (ifp->if_lladdr != ifa) {
310 oifa = ifp->if_lladdr;
311 ifp->if_lladdr = ifa;
312 }
313
314 VERIFY(ifa->ifa_debug == IFD_LINK);
315 ifa->ifa_ifp = ifp;
316 ifa->ifa_rtrequest = link_rtrequest;
317 ifa->ifa_addr = SA(addr_sdl);
318 addr_sdl->sdl_len = (u_char)socksize;
319 addr_sdl->sdl_family = AF_LINK;
320 if (namelen > 0) {
321 bcopy(workbuf, addr_sdl->sdl_data, min(namelen,
322 sizeof(addr_sdl->sdl_data)));
323 addr_sdl->sdl_nlen = (u_char)namelen;
324 } else {
325 addr_sdl->sdl_nlen = 0;
326 }
327 addr_sdl->sdl_index = ifp->if_index;
328 addr_sdl->sdl_type = ifp->if_type;
329 if (ll_addr != NULL) {
330 addr_sdl->sdl_alen = ll_addr->sdl_alen;
331 bcopy(CONST_LLADDR(ll_addr), LLADDR(addr_sdl), addr_sdl->sdl_alen);
332 } else {
333 addr_sdl->sdl_alen = 0;
334 }
335 ifa->ifa_netmask = SA(mask_sdl);
336 mask_sdl->sdl_len = (u_char)masklen;
337 while (namelen > 0) {
338 mask_sdl->sdl_data[--namelen] = 0xff;
339 }
340 IFA_UNLOCK(ifa);
341
342 if (oifa != NULL) {
343 ifa_remref(oifa);
344 }
345
346 return ifa;
347 }
348
349
350 __private_extern__ int
dlil_alloc_local_stats(struct ifnet * ifp)351 dlil_alloc_local_stats(struct ifnet *ifp)
352 {
353 int ret = EINVAL;
354
355 if (ifp == NULL) {
356 goto end;
357 }
358
359 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
360 ifp->if_tcp_stat = dlif_tcpstat_alloc();
361 ifp->if_udp_stat = dlif_udpstat_alloc();
362
363 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
364 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
365
366 ret = 0;
367 }
368
369 if (ifp->if_ipv4_stat == NULL) {
370 ifp->if_ipv4_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
371 }
372
373 if (ifp->if_ipv6_stat == NULL) {
374 ifp->if_ipv6_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
375 }
376 end:
377 if (ifp != NULL && ret != 0) {
378 if (ifp->if_tcp_stat != NULL) {
379 dlif_tcpstat_free(ifp->if_tcp_stat);
380 ifp->if_tcp_stat = NULL;
381 }
382 if (ifp->if_udp_stat != NULL) {
383 dlif_udpstat_free(ifp->if_udp_stat);
384 ifp->if_udp_stat = NULL;
385 }
386 /* The macro kfree_type sets the passed pointer to NULL */
387 if (ifp->if_ipv4_stat != NULL) {
388 kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv4_stat);
389 }
390 if (ifp->if_ipv6_stat != NULL) {
391 kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv6_stat);
392 }
393 }
394
395 return ret;
396 }
397
398 errno_t
dlil_if_ref(struct ifnet * ifp)399 dlil_if_ref(struct ifnet *ifp)
400 {
401 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
402
403 if (dl_if == NULL) {
404 return EINVAL;
405 }
406
407 lck_mtx_lock_spin(&dl_if->dl_if_lock);
408 ++dl_if->dl_if_refcnt;
409 if (dl_if->dl_if_refcnt == 0) {
410 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
411 /* NOTREACHED */
412 }
413 lck_mtx_unlock(&dl_if->dl_if_lock);
414
415 return 0;
416 }
417
418 errno_t
dlil_if_free(struct ifnet * ifp)419 dlil_if_free(struct ifnet *ifp)
420 {
421 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
422 bool need_release = FALSE;
423
424 if (dl_if == NULL) {
425 return EINVAL;
426 }
427
428 lck_mtx_lock_spin(&dl_if->dl_if_lock);
429 switch (dl_if->dl_if_refcnt) {
430 case 0:
431 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
432 /* NOTREACHED */
433 break;
434 case 1:
435 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
436 need_release = TRUE;
437 }
438 break;
439 default:
440 break;
441 }
442 --dl_if->dl_if_refcnt;
443 lck_mtx_unlock(&dl_if->dl_if_lock);
444 if (need_release) {
445 _dlil_if_release(ifp, true);
446 }
447 return 0;
448 }
449
450 void
_dlil_if_release(ifnet_t ifp,bool clear_in_use)451 _dlil_if_release(ifnet_t ifp, bool clear_in_use)
452 {
453 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
454
455 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
456 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
457 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
458 }
459
460 ifnet_lock_exclusive(ifp);
461 kfree_data_counted_by(ifp->if_broadcast.ptr, ifp->if_broadcast.length);
462 lck_mtx_lock(&dlifp->dl_if_lock);
463 /* Copy the if name to the dedicated storage */
464 ifp->if_name = tsnprintf(dlifp->dl_if_namestorage, sizeof(dlifp->dl_if_namestorage),
465 "%s", ifp->if_name);
466 /* Reset external name (name + unit) */
467 ifp->if_xname = tsnprintf(dlifp->dl_if_xnamestorage, sizeof(dlifp->dl_if_xnamestorage),
468 "%s?", ifp->if_name);
469 if (clear_in_use) {
470 ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
471 dlifp->dl_if_flags &= ~DLIF_INUSE;
472 }
473 lck_mtx_unlock(&dlifp->dl_if_lock);
474 ifnet_lock_done(ifp);
475 }
476
477 __private_extern__ void
dlil_if_release(ifnet_t ifp)478 dlil_if_release(ifnet_t ifp)
479 {
480 _dlil_if_release(ifp, false);
481 }
482
483 void
if_proto_ref(struct if_proto * proto)484 if_proto_ref(struct if_proto *proto)
485 {
486 os_atomic_inc(&proto->refcount, relaxed);
487 }
488
489 void
if_proto_free(struct if_proto * proto)490 if_proto_free(struct if_proto *proto)
491 {
492 u_int32_t oldval;
493 struct ifnet *ifp = proto->ifp;
494 u_int32_t proto_family = proto->protocol_family;
495 struct kev_dl_proto_data ev_pr_data;
496
497 oldval = os_atomic_dec_orig(&proto->refcount, relaxed);
498 if (oldval > 1) {
499 return;
500 }
501
502 if (proto->proto_kpi == kProtoKPI_v1) {
503 if (proto->kpi.v1.detached) {
504 proto->kpi.v1.detached(ifp, proto->protocol_family);
505 }
506 }
507 if (proto->proto_kpi == kProtoKPI_v2) {
508 if (proto->kpi.v2.detached) {
509 proto->kpi.v2.detached(ifp, proto->protocol_family);
510 }
511 }
512
513 /*
514 * Cleanup routes that may still be in the routing table for that
515 * interface/protocol pair.
516 */
517 if_rtproto_del(ifp, proto_family);
518
519 ifnet_lock_shared(ifp);
520
521 /* No more reference on this, protocol must have been detached */
522 VERIFY(proto->detached);
523
524 /*
525 * The reserved field carries the number of protocol still attached
526 * (subject to change)
527 */
528 ev_pr_data.proto_family = proto_family;
529 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
530
531 ifnet_lock_done(ifp);
532
533 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
534 (struct net_event_data *)&ev_pr_data,
535 sizeof(struct kev_dl_proto_data), FALSE);
536
537 if (ev_pr_data.proto_remaining_count == 0) {
538 /*
539 * The protocol count has gone to zero, mark the interface down.
540 * This used to be done by configd.KernelEventMonitor, but that
541 * is inherently prone to races (rdar://problem/30810208).
542 */
543 (void) ifnet_set_flags(ifp, 0, IFF_UP);
544 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
545 dlil_post_sifflags_msg(ifp);
546 }
547
548 dlif_proto_free(proto);
549 }
550
551 __private_extern__ u_int32_t
dlil_ifp_protolist(struct ifnet * ifp,protocol_family_t * list __counted_by (list_count),u_int32_t list_count)552 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list __counted_by(list_count),
553 u_int32_t list_count)
554 {
555 u_int32_t count = 0;
556 int i;
557
558 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
559
560 if (ifp->if_proto_hash == NULL) {
561 goto done;
562 }
563
564 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
565 if_proto_ref_t proto;
566 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
567 if (list != NULL && count < list_count) {
568 list[count] = proto->protocol_family;
569 }
570 count++;
571 }
572 }
573 done:
574 return count;
575 }
576
577 __private_extern__ u_int32_t
if_get_protolist(struct ifnet * ifp,u_int32_t * __counted_by (count)protolist,u_int32_t count)578 if_get_protolist(struct ifnet * ifp, u_int32_t *__counted_by(count) protolist, u_int32_t count)
579 {
580 u_int32_t actual_count;
581 ifnet_lock_shared(ifp);
582 actual_count = dlil_ifp_protolist(ifp, protolist, count);
583 ifnet_lock_done(ifp);
584 return actual_count;
585 }
586
587 __private_extern__ void
if_free_protolist(u_int32_t * list)588 if_free_protolist(u_int32_t *list)
589 {
590 kfree_data_addr(list);
591 }
592
593 boolean_t
dlil_is_native_netif_nexus(ifnet_t ifp)594 dlil_is_native_netif_nexus(ifnet_t ifp)
595 {
596 return (ifp->if_eflags & IFEF_SKYWALK_NATIVE) && ifp->if_na != NULL;
597 }
598
599
600 /*
601 * Caller must already be holding ifnet lock.
602 */
603 struct if_proto *
find_attached_proto(struct ifnet * ifp,u_int32_t protocol_family)604 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
605 {
606 struct if_proto *proto = NULL;
607 u_int32_t i = proto_hash_value(protocol_family);
608
609 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
610
611 if (ifp->if_proto_hash != NULL) {
612 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
613 }
614
615 while (proto != NULL && proto->protocol_family != protocol_family) {
616 proto = SLIST_NEXT(proto, next_hash);
617 }
618
619 if (proto != NULL) {
620 if_proto_ref(proto);
621 }
622
623 return proto;
624 }
625
626 /*
627 * Clat routines.
628 */
629
630 /*
631 * This routine checks if the destination address is not a loopback, link-local,
632 * multicast or broadcast address.
633 */
634 int
dlil_is_clat_needed(protocol_family_t proto_family,mbuf_t m)635 dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
636 {
637 int ret = 0;
638 switch (proto_family) {
639 case PF_INET: {
640 struct ip *iph = mtod(m, struct ip *);
641 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
642 ret = 1;
643 }
644 break;
645 }
646 case PF_INET6: {
647 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
648 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
649 CLAT64_NEEDED(&ip6h->ip6_dst)) {
650 ret = 1;
651 }
652 break;
653 }
654 }
655
656 return ret;
657 }
658
659 /*
660 * @brief This routine translates IPv4 packet to IPv6 packet,
661 * updates protocol checksum and also translates ICMP for code
662 * along with inner header translation.
663 *
664 * @param ifp Pointer to the interface
665 * @param proto_family pointer to protocol family. It is updated if function
666 * performs the translation successfully.
667 * @param m Pointer to the pointer pointing to the packet. Needed because this
668 * routine can end up changing the mbuf to a different one.
669 *
670 * @return 0 on success or else a negative value.
671 */
672 errno_t
dlil_clat46(ifnet_t ifp,protocol_family_t * proto_family,mbuf_t * m)673 dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
674 {
675 VERIFY(*proto_family == PF_INET);
676 VERIFY(IS_INTF_CLAT46(ifp));
677
678 pbuf_t pbuf_store, *pbuf = NULL;
679 struct ip *iph = NULL;
680 struct in_addr osrc, odst;
681 uint8_t proto = 0;
682 struct in6_addr src_storage = {};
683 struct in6_addr *src = NULL;
684 struct sockaddr_in6 dstsock = {};
685 int error = 0;
686 uint16_t off = 0;
687 uint16_t tot_len = 0;
688 uint16_t ip_id_val = 0;
689 uint16_t ip_frag_off = 0;
690
691 boolean_t is_frag = FALSE;
692 boolean_t is_first_frag = TRUE;
693 boolean_t is_last_frag = TRUE;
694
695 /*
696 * Ensure that the incoming mbuf chain contains a valid
697 * IPv4 header in contiguous memory, or exit early.
698 */
699 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip) ||
700 ((size_t)(*m)->m_len < sizeof(struct ip) &&
701 (*m = m_pullup(*m, sizeof(struct ip))) == NULL)) {
702 ip6stat.ip6s_clat464_in_tooshort_drop++;
703 return -1;
704 }
705
706 iph = mtod(*m, struct ip *);
707 osrc = iph->ip_src;
708 odst = iph->ip_dst;
709 proto = iph->ip_p;
710 off = (uint16_t)(iph->ip_hl << 2);
711 ip_id_val = iph->ip_id;
712 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
713
714 tot_len = ntohs(iph->ip_len);
715
716 /* Validate that mbuf contains IP payload equal to `iph->ip_len' */
717 if ((size_t)(*m)->m_pkthdr.len < tot_len) {
718 ip6stat.ip6s_clat464_in_tooshort_drop++;
719 return -1;
720 }
721
722 pbuf_init_mbuf(&pbuf_store, *m, ifp);
723 pbuf = &pbuf_store;
724
725 /*
726 * For packets that are not first frags
727 * we only need to adjust CSUM.
728 * For 4 to 6, Fragmentation header gets appended
729 * after proto translation.
730 */
731 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
732 is_frag = TRUE;
733
734 /* If the offset is not zero, it is not first frag */
735 if (ip_frag_off != 0) {
736 is_first_frag = FALSE;
737 }
738
739 /* If IP_MF is set, then it is not last frag */
740 if (ntohs(iph->ip_off) & IP_MF) {
741 is_last_frag = FALSE;
742 }
743 }
744
745 /*
746 * Translate IPv4 destination to IPv6 destination by using the
747 * prefixes learned through prior PLAT discovery.
748 */
749 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dstsock.sin6_addr)) != 0) {
750 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
751 goto cleanup;
752 }
753
754 dstsock.sin6_len = sizeof(struct sockaddr_in6);
755 dstsock.sin6_family = AF_INET6;
756
757 /*
758 * Retrive the local IPv6 CLAT46 address reserved for stateless
759 * translation.
760 */
761 src = in6_selectsrc_core(&dstsock, 0, ifp, 0, &src_storage, NULL, &error,
762 NULL, NULL, TRUE);
763
764 if (src == NULL) {
765 ip6stat.ip6s_clat464_out_nov6addr_drop++;
766 error = -1;
767 goto cleanup;
768 }
769
770 /*
771 * Translate the IP header part first.
772 * NOTE: `nat464_translate_46' handles the situation where the value
773 * `off' is past the end of the mbuf chain that is associated with
774 * the pbuf, in a graceful manner.
775 */
776 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
777 iph->ip_ttl, src_storage, dstsock.sin6_addr, tot_len) == NT_NAT64) ? 0 : -1;
778
779 iph = NULL; /* Invalidate iph as pbuf has been modified */
780
781 if (error != 0) {
782 ip6stat.ip6s_clat464_out_46transfail_drop++;
783 goto cleanup;
784 }
785
786 /*
787 * Translate protocol header, update checksum, checksum flags
788 * and related fields.
789 */
790 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
791 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
792
793 if (error != 0) {
794 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
795 goto cleanup;
796 }
797
798 /* Now insert the IPv6 fragment header */
799 if (is_frag) {
800 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
801
802 if (error != 0) {
803 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
804 goto cleanup;
805 }
806 }
807
808 cleanup:
809 if (pbuf_is_valid(pbuf)) {
810 *m = pbuf->pb_mbuf;
811 pbuf->pb_mbuf = NULL;
812 pbuf_destroy(pbuf);
813 } else {
814 error = -1;
815 *m = NULL;
816 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
817 }
818
819 if (error == 0) {
820 *proto_family = PF_INET6;
821 ip6stat.ip6s_clat464_out_success++;
822 }
823
824 return error;
825 }
826
827 /*
828 * @brief This routine translates incoming IPv6 to IPv4 packet,
829 * updates protocol checksum and also translates ICMPv6 outer
830 * and inner headers
831 *
832 * @return 0 on success or else a negative value.
833 */
834 errno_t
dlil_clat64(ifnet_t ifp,protocol_family_t * proto_family,mbuf_t * m)835 dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
836 {
837 VERIFY(*proto_family == PF_INET6);
838 VERIFY(IS_INTF_CLAT46(ifp));
839
840 struct ip6_hdr *ip6h = NULL;
841 struct in6_addr osrc, odst;
842 uint8_t proto = 0;
843 struct in6_ifaddr *ia6_clat_dst = NULL;
844 struct in_ifaddr *ia4_clat_dst = NULL;
845 struct in_addr *dst = NULL;
846 struct in_addr src;
847 int error = 0;
848 uint32_t off = 0;
849 u_int64_t tot_len = 0;
850 uint8_t tos = 0;
851 boolean_t is_first_frag = TRUE;
852
853 /*
854 * Ensure that the incoming mbuf chain contains a valid
855 * IPv6 header in contiguous memory, or exit early.
856 */
857 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
858 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
859 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
860 ip6stat.ip6s_clat464_in_tooshort_drop++;
861 return -1;
862 }
863
864 ip6h = mtod(*m, struct ip6_hdr *);
865 /* Validate that mbuf contains IP payload equal to ip6_plen */
866 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
867 ip6stat.ip6s_clat464_in_tooshort_drop++;
868 return -1;
869 }
870
871 osrc = ip6h->ip6_src;
872 odst = ip6h->ip6_dst;
873
874 /*
875 * Retrieve the local CLAT46 reserved IPv6 address.
876 * Let the packet pass if we don't find one, as the flag
877 * may get set before IPv6 configuration has taken place.
878 */
879 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
880 if (ia6_clat_dst == NULL) {
881 goto done;
882 }
883
884 /*
885 * Check if the original dest in the packet is same as the reserved
886 * CLAT46 IPv6 address
887 */
888 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
889 bool translate = false;
890 pbuf_t pbuf_store, *pbuf = NULL;
891 pbuf_init_mbuf(&pbuf_store, *m, ifp);
892 pbuf = &pbuf_store;
893
894 /*
895 * Retrieve the local CLAT46 IPv4 address reserved for stateless
896 * translation.
897 */
898 ia4_clat_dst = inifa_ifpclatv4(ifp);
899 if (ia4_clat_dst == NULL) {
900 ifa_remref(&ia6_clat_dst->ia_ifa);
901 ip6stat.ip6s_clat464_in_nov4addr_drop++;
902 error = -1;
903 goto cleanup;
904 }
905 ifa_remref(&ia6_clat_dst->ia_ifa);
906
907 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
908 dst = &ia4_clat_dst->ia_addr.sin_addr;
909 error = nat464_synthesize_ipv4(ifp, &osrc, &src, &translate);
910 if (error != 0) {
911 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
912 error = -1;
913 goto cleanup;
914 }
915 if (!translate) {
916 /* no translation required */
917 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
918 /* only allow icmpv6 */
919 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
920 error = -1;
921 }
922 goto cleanup;
923 }
924
925 ip6h = pbuf->pb_data;
926 off = sizeof(struct ip6_hdr);
927 proto = ip6h->ip6_nxt;
928 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
929 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
930
931 /*
932 * Translate the IP header and update the fragmentation
933 * header if needed
934 */
935 error = (nat464_translate_64(pbuf, off, tos, &proto,
936 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
937 0 : -1;
938
939 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
940
941 if (error != 0) {
942 ip6stat.ip6s_clat464_in_64transfail_drop++;
943 goto cleanup;
944 }
945
946 /*
947 * Translate protocol header, update checksum, checksum flags
948 * and related fields.
949 */
950 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
951 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
952 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
953
954 if (error != 0) {
955 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
956 goto cleanup;
957 }
958
959 cleanup:
960 if (ia4_clat_dst != NULL) {
961 ifa_remref(&ia4_clat_dst->ia_ifa);
962 }
963
964 if (pbuf_is_valid(pbuf)) {
965 *m = pbuf->pb_mbuf;
966 pbuf->pb_mbuf = NULL;
967 pbuf_destroy(pbuf);
968 } else {
969 error = -1;
970 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
971 }
972
973 if (error == 0 && translate) {
974 *proto_family = PF_INET;
975 ip6stat.ip6s_clat464_in_success++;
976 }
977 } /* CLAT traffic */
978
979 done:
980 return error;
981 }
982
983 /*
984 * Thread management
985 */
986 void
dlil_clean_threading_info(struct dlil_threading_info * inp)987 dlil_clean_threading_info(struct dlil_threading_info *inp)
988 {
989 lck_mtx_destroy(&inp->dlth_lock, inp->dlth_lock_grp);
990 lck_grp_free(inp->dlth_lock_grp);
991 inp->dlth_lock_grp = NULL;
992
993 inp->dlth_flags = 0;
994 inp->dlth_wtot = 0;
995 bzero(inp->dlth_name_storage, sizeof(inp->dlth_name_storage));
996 inp->dlth_name = NULL;
997 inp->dlth_ifp = NULL;
998 VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts));
999 qlimit(&inp->dlth_pkts) = 0;
1000 bzero(&inp->dlth_stats, sizeof(inp->dlth_stats));
1001
1002 VERIFY(!inp->dlth_affinity);
1003 inp->dlth_thread = THREAD_NULL;
1004 inp->dlth_strategy = NULL;
1005 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
1006 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
1007 VERIFY(inp->dlth_affinity_tag == 0);
1008 #if IFNET_INPUT_SANITY_CHK
1009 inp->dlth_pkts_cnt = 0;
1010 #endif /* IFNET_INPUT_SANITY_CHK */
1011 }
1012
1013 /*
1014 * Lock management
1015 */
1016 static errno_t
_dlil_get_lock_assertion_type(ifnet_lock_assert_t what,unsigned int * type)1017 _dlil_get_lock_assertion_type(ifnet_lock_assert_t what, unsigned int *type)
1018 {
1019 switch (what) {
1020 case IFNET_LCK_ASSERT_EXCLUSIVE:
1021 *type = LCK_RW_ASSERT_EXCLUSIVE;
1022 return 0;
1023
1024 case IFNET_LCK_ASSERT_SHARED:
1025 *type = LCK_RW_ASSERT_SHARED;
1026 return 0;
1027
1028 case IFNET_LCK_ASSERT_OWNED:
1029 *type = LCK_RW_ASSERT_HELD;
1030 return 0;
1031
1032 case IFNET_LCK_ASSERT_NOTOWNED:
1033 /* nothing to do here for RW lock; bypass assert */
1034 return ENOENT;
1035
1036 default:
1037 panic("bad ifnet assert type: %d", what);
1038 /* NOTREACHED */
1039 }
1040 }
1041
1042 __private_extern__ void
dlil_if_lock(void)1043 dlil_if_lock(void)
1044 {
1045 lck_mtx_lock(&dlil_ifnet_lock);
1046 }
1047
1048 __private_extern__ void
dlil_if_unlock(void)1049 dlil_if_unlock(void)
1050 {
1051 lck_mtx_unlock(&dlil_ifnet_lock);
1052 }
1053
1054 __private_extern__ void
dlil_if_lock_assert(void)1055 dlil_if_lock_assert(void)
1056 {
1057 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
1058 }
1059
1060 __private_extern__ void
ifnet_head_lock_assert(ifnet_lock_assert_t what)1061 ifnet_head_lock_assert(ifnet_lock_assert_t what)
1062 {
1063 unsigned int type = 0;
1064
1065 if (_dlil_get_lock_assertion_type(what, &type) == 0) {
1066 LCK_RW_ASSERT(&ifnet_head_lock, type);
1067 }
1068 }
1069
1070 __private_extern__ void
ifnet_lock_assert(struct ifnet * ifp,ifnet_lock_assert_t what)1071 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1072 {
1073 #if !MACH_ASSERT
1074 #pragma unused(ifp)
1075 #endif
1076 unsigned int type = 0;
1077
1078 if (_dlil_get_lock_assertion_type(what, &type) == 0) {
1079 LCK_RW_ASSERT(&ifp->if_lock, type);
1080 }
1081 }
1082
1083 __private_extern__ void
ifnet_lock_shared(struct ifnet * ifp)1084 ifnet_lock_shared(struct ifnet *ifp)
1085 {
1086 lck_rw_lock_shared(&ifp->if_lock);
1087 }
1088
1089 __private_extern__ void
ifnet_lock_exclusive(struct ifnet * ifp)1090 ifnet_lock_exclusive(struct ifnet *ifp)
1091 {
1092 lck_rw_lock_exclusive(&ifp->if_lock);
1093 }
1094
1095 __private_extern__ void
ifnet_lock_done(struct ifnet * ifp)1096 ifnet_lock_done(struct ifnet *ifp)
1097 {
1098 lck_rw_done(&ifp->if_lock);
1099 }
1100
1101 #if INET
1102 __private_extern__ void
if_inetdata_lock_shared(struct ifnet * ifp)1103 if_inetdata_lock_shared(struct ifnet *ifp)
1104 {
1105 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1106 }
1107
1108 __private_extern__ void
if_inetdata_lock_exclusive(struct ifnet * ifp)1109 if_inetdata_lock_exclusive(struct ifnet *ifp)
1110 {
1111 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1112 }
1113
1114 __private_extern__ void
if_inetdata_lock_done(struct ifnet * ifp)1115 if_inetdata_lock_done(struct ifnet *ifp)
1116 {
1117 lck_rw_done(&ifp->if_inetdata_lock);
1118 }
1119 #endif /* INET */
1120
1121 __private_extern__ void
if_inet6data_lock_shared(struct ifnet * ifp)1122 if_inet6data_lock_shared(struct ifnet *ifp)
1123 {
1124 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1125 }
1126
1127 __private_extern__ void
if_inet6data_lock_exclusive(struct ifnet * ifp)1128 if_inet6data_lock_exclusive(struct ifnet *ifp)
1129 {
1130 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1131 }
1132
1133 __private_extern__ void
if_inet6data_lock_done(struct ifnet * ifp)1134 if_inet6data_lock_done(struct ifnet *ifp)
1135 {
1136 lck_rw_done(&ifp->if_inet6data_lock);
1137 }
1138
1139 __private_extern__ void
ifnet_head_lock_shared(void)1140 ifnet_head_lock_shared(void)
1141 {
1142 lck_rw_lock_shared(&ifnet_head_lock);
1143 }
1144
1145 __private_extern__ void
ifnet_head_lock_exclusive(void)1146 ifnet_head_lock_exclusive(void)
1147 {
1148 lck_rw_lock_exclusive(&ifnet_head_lock);
1149 }
1150
1151 __private_extern__ void
ifnet_head_done(void)1152 ifnet_head_done(void)
1153 {
1154 lck_rw_done(&ifnet_head_lock);
1155 }
1156
1157 __private_extern__ void
ifnet_head_assert_exclusive(void)1158 ifnet_head_assert_exclusive(void)
1159 {
1160 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1161 }
1162
1163 static errno_t
if_mcasts_update_common(struct ifnet * ifp,bool sync)1164 if_mcasts_update_common(struct ifnet * ifp, bool sync)
1165 {
1166 errno_t err;
1167
1168 if (sync) {
1169 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
1170 if (err == EAFNOSUPPORT) {
1171 err = 0;
1172 }
1173 } else {
1174 ifnet_ioctl_async(ifp, SIOCADDMULTI);
1175 err = 0;
1176 }
1177 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
1178 "(err=%d)\n", if_name(ifp),
1179 (err == 0 ? "successfully restored" : "failed to restore"),
1180 ifp->if_updatemcasts, err);
1181
1182 /* just return success */
1183 return 0;
1184 }
1185
1186 errno_t
if_mcasts_update_async(struct ifnet * ifp)1187 if_mcasts_update_async(struct ifnet *ifp)
1188 {
1189 return if_mcasts_update_common(ifp, false);
1190 }
1191
1192 errno_t
if_mcasts_update(struct ifnet * ifp)1193 if_mcasts_update(struct ifnet *ifp)
1194 {
1195 return if_mcasts_update_common(ifp, true);
1196 }
1197