xref: /xnu-12377.1.9/bsd/net/dlil.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 1999-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30  * support for mandatory and extensible security protections.  This notice
31  * is included in support of clause 2.2 (b) of the Apple Public License,
32  * Version 2.0.
33  */
34 #include <stddef.h>
35 #include <ptrauth.h>
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/dlil_sysctl.h>
53 #include <net/dlil_var_private.h>
54 #include <net/if_arp.h>
55 #include <net/if_var_private.h>
56 #include <net/iptap.h>
57 #include <net/pktap.h>
58 #include <net/droptap.h>
59 #include <net/nwk_wq.h>
60 #include <sys/kern_event.h>
61 #include <sys/kdebug.h>
62 #include <sys/mcache.h>
63 #include <sys/syslog.h>
64 #include <sys/protosw.h>
65 #include <sys/priv.h>
66 
67 #include <kern/assert.h>
68 #include <kern/locks.h>
69 #include <kern/sched_prim.h>
70 #include <kern/task.h>
71 #include <kern/thread.h>
72 #include <kern/uipc_domain.h>
73 #include <kern/zalloc.h>
74 #include <kern/thread_group.h>
75 
76 #include <net/kpi_protocol.h>
77 #include <net/kpi_interface.h>
78 #include <net/if_types.h>
79 #include <net/if_ipsec.h>
80 #include <net/if_llreach.h>
81 #include <net/if_utun.h>
82 #include <net/kpi_interfacefilter.h>
83 #include <net/classq/classq.h>
84 #include <net/classq/classq_sfb.h>
85 #include <net/flowhash.h>
86 #include <net/ntstat.h>
87 #if SKYWALK
88 #include <skywalk/lib/net_filter_event.h>
89 #endif /* SKYWALK */
90 #include <net/net_api_stats.h>
91 #include <net/if_ports_used.h>
92 #include <net/if_vlan_var.h>
93 #include <netinet/in.h>
94 #if INET
95 #include <netinet/in_var.h>
96 #include <netinet/igmp_var.h>
97 #include <netinet/ip_var.h>
98 #include <netinet/tcp.h>
99 #include <netinet/tcp_var.h>
100 #include <netinet/udp.h>
101 #include <netinet/udp_var.h>
102 #include <netinet/if_ether.h>
103 #include <netinet/in_pcb.h>
104 #include <netinet/in_tclass.h>
105 #include <netinet/ip.h>
106 #include <netinet/ip_icmp.h>
107 #include <netinet/icmp_var.h>
108 #endif /* INET */
109 
110 #include <net/nat464_utils.h>
111 #include <netinet6/in6_var.h>
112 #include <netinet6/nd6.h>
113 #include <netinet6/mld6_var.h>
114 #include <netinet6/scope6_var.h>
115 #include <netinet/ip6.h>
116 #include <netinet/icmp6.h>
117 #include <net/pf_pbuf.h>
118 #include <libkern/OSAtomic.h>
119 #include <libkern/tree.h>
120 
121 #include <dev/random/randomdev.h>
122 #include <machine/machine_routines.h>
123 
124 #include <mach/thread_act.h>
125 #include <mach/sdt.h>
126 
127 #if CONFIG_MACF
128 #include <sys/kauth.h>
129 #include <security/mac_framework.h>
130 #include <net/ethernet.h>
131 #include <net/firewire.h>
132 #endif
133 
134 #if PF
135 #include <net/pfvar.h>
136 #endif /* PF */
137 #include <net/pktsched/pktsched.h>
138 #include <net/pktsched/pktsched_netem.h>
139 
140 #if NECP
141 #include <net/necp.h>
142 #endif /* NECP */
143 
144 #if SKYWALK
145 #include <skywalk/packet/packet_queue.h>
146 #include <skywalk/nexus/netif/nx_netif.h>
147 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
148 #endif /* SKYWALK */
149 
150 #include <net/sockaddr_utils.h>
151 
152 #include <os/log.h>
153 
154 uint64_t if_creation_generation_count = 0;
155 
156 dlil_ifnet_queue_t dlil_ifnet_head;
157 
158 static u_int32_t net_rtref;
159 
160 static struct dlil_main_threading_info dlil_main_input_thread_info;
161 struct dlil_threading_info *__single dlil_main_input_thread;
162 
163 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
164 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
165 
166 static int ifnet_lookup(struct ifnet *);
167 static void if_purgeaddrs(struct ifnet *);
168 
169 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
170     struct mbuf *, char *);
171 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
172     struct mbuf *);
173 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
174     mbuf_t *, const struct sockaddr *, void *,
175     IFNET_FRAME_TYPE_RW_T, IFNET_LLADDR_RW_T);
176 static void ifproto_media_event(struct ifnet *, protocol_family_t,
177     const struct kev_msg *);
178 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
179     unsigned long, void *);
180 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
181     struct sockaddr_dl *, size_t);
182 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
183     const struct sockaddr_dl *, const struct sockaddr *,
184     const struct sockaddr_dl *, const struct sockaddr *);
185 
186 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
187     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
188     boolean_t poll, struct thread *tp);
189 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
190     struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
191 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
192 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
193     protocol_family_t *);
194 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
195     const struct ifnet_demux_desc *, u_int32_t);
196 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
197 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
198 #if !XNU_TARGET_OS_OSX
199 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
200     const struct sockaddr *, IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
201     u_int32_t *, u_int32_t *);
202 #else /* XNU_TARGET_OS_OSX */
203 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
204     const struct sockaddr *,
205     IFNET_LLADDR_T, IFNET_FRAME_TYPE_T);
206 #endif /* XNU_TARGET_OS_OSX */
207 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
208     const struct sockaddr *,
209     IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
210     u_int32_t *, u_int32_t *);
211 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
212 static void ifp_if_free(struct ifnet *);
213 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
214 
215 
216 
217 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
218     const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
219 #if DEBUG || DEVELOPMENT
220 static void dlil_verify_sum16(void);
221 #endif /* DEBUG || DEVELOPMENT */
222 
223 
224 static void ifnet_detacher_thread_func(void *, wait_result_t);
225 static void ifnet_detacher_thread_cont(void *, wait_result_t);
226 static void ifnet_detach_final(struct ifnet *);
227 static void ifnet_detaching_enqueue(struct ifnet *);
228 static struct ifnet *ifnet_detaching_dequeue(void);
229 
230 static void ifnet_start_thread_func(void *, wait_result_t);
231 static void ifnet_start_thread_cont(void *, wait_result_t);
232 
233 static void ifnet_poll_thread_func(void *, wait_result_t);
234 static void ifnet_poll_thread_cont(void *, wait_result_t);
235 
236 static errno_t ifnet_enqueue_common_single(struct ifnet *, struct ifclassq *,
237     classq_pkt_t *, boolean_t, boolean_t *);
238 
239 static void ifp_src_route_copyout(struct ifnet *, struct route *);
240 static void ifp_src_route_copyin(struct ifnet *, struct route *);
241 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
242 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
243 
244 
245 /* The following are protected by dlil_ifnet_lock */
246 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
247 static u_int32_t ifnet_detaching_cnt;
248 static boolean_t ifnet_detaching_embryonic;
249 static void *ifnet_delayed_run; /* wait channel for detaching thread */
250 
251 static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group,
252     &dlil_lck_attributes);
253 
254 static uint32_t ifnet_flowhash_seed;
255 
256 struct ifnet_flowhash_key {
257 	char            ifk_name[IFNAMSIZ];
258 	uint32_t        ifk_unit;
259 	uint32_t        ifk_flags;
260 	uint32_t        ifk_eflags;
261 	uint32_t        ifk_capabilities;
262 	uint32_t        ifk_capenable;
263 	uint32_t        ifk_output_sched_model;
264 	uint32_t        ifk_rand1;
265 	uint32_t        ifk_rand2;
266 };
267 
268 /* Flow control entry per interface */
269 struct ifnet_fc_entry {
270 	RB_ENTRY(ifnet_fc_entry) ifce_entry;
271 	u_int32_t       ifce_flowhash;
272 	ifnet_ref_t     ifce_ifp;
273 };
274 
275 static uint32_t ifnet_calc_flowhash(struct ifnet *);
276 static int ifce_cmp(const struct ifnet_fc_entry *,
277     const struct ifnet_fc_entry *);
278 static int ifnet_fc_add(struct ifnet *);
279 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
280 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
281 
282 /* protected by ifnet_fc_lock */
283 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
284 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
285 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
286 
287 static KALLOC_TYPE_DEFINE(ifnet_fc_zone, struct ifnet_fc_entry, NET_KT_DEFAULT);
288 
289 extern void bpfdetach(struct ifnet *);
290 
291 
292 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
293     u_int32_t flags);
294 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
295     u_int32_t flags);
296 
297 
298 #if CONFIG_MACF
299 #if !XNU_TARGET_OS_OSX
300 int dlil_lladdr_ckreq = 1;
301 #else /* XNU_TARGET_OS_OSX */
302 int dlil_lladdr_ckreq = 0;
303 #endif /* XNU_TARGET_OS_OSX */
304 #endif /* CONFIG_MACF */
305 
306 
307 static inline void
ifnet_delay_start_disabled_increment(void)308 ifnet_delay_start_disabled_increment(void)
309 {
310 	OSIncrementAtomic(&ifnet_delay_start_disabled);
311 }
312 
313 unsigned int net_rxpoll = 1;
314 unsigned int net_affinity = 1;
315 unsigned int net_async = 1;     /* 0: synchronous, 1: asynchronous */
316 
317 extern u_int32_t        inject_buckets;
318 
319 void
ifnet_filter_update_tso(struct ifnet * ifp,boolean_t filter_enable)320 ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable)
321 {
322 	/*
323 	 * update filter count and route_generation ID to let TCP
324 	 * know it should reevalute doing TSO or not
325 	 */
326 	if (filter_enable) {
327 		OSAddAtomic(1, &ifp->if_flt_no_tso_count);
328 	} else {
329 		VERIFY(ifp->if_flt_no_tso_count != 0);
330 		OSAddAtomic(-1, &ifp->if_flt_no_tso_count);
331 	}
332 	routegenid_update();
333 }
334 
335 os_refgrp_decl(static, if_refiogrp, "if refio refcounts", NULL);
336 os_refgrp_decl(static, if_datamovgrp, "if datamov refcounts", NULL);
337 #define IF_DATAMOV_BITS 1
338 #define IF_DATAMOV_DRAINING 1
339 
340 #if SKYWALK
341 
342 static bool net_check_compatible_if_filter(struct ifnet *ifp);
343 
344 /* if_attach_nx flags defined in os_skywalk_private.h */
345 unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT;
346 unsigned int if_enable_fsw_ip_netagent =
347     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
348 unsigned int if_enable_fsw_transport_netagent =
349     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
350 
351 unsigned int if_netif_all =
352     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0);
353 
354 /* Configure flowswitch to use max mtu sized buffer */
355 static bool fsw_use_max_mtu_buffer = false;
356 
357 
358 static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw);
359 
360 #include <skywalk/os_skywalk_private.h>
361 
362 boolean_t
ifnet_nx_noauto(ifnet_t ifp)363 ifnet_nx_noauto(ifnet_t ifp)
364 {
365 	return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0;
366 }
367 
368 boolean_t
ifnet_nx_noauto_flowswitch(ifnet_t ifp)369 ifnet_nx_noauto_flowswitch(ifnet_t ifp)
370 {
371 	return ifnet_is_low_latency(ifp);
372 }
373 
374 boolean_t
ifnet_is_low_latency(ifnet_t ifp)375 ifnet_is_low_latency(ifnet_t ifp)
376 {
377 	return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0;
378 }
379 
380 boolean_t
ifnet_needs_compat(ifnet_t ifp)381 ifnet_needs_compat(ifnet_t ifp)
382 {
383 	if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
384 		return FALSE;
385 	}
386 #if !XNU_TARGET_OS_OSX
387 	/*
388 	 * To conserve memory, we plumb in the compat layer selectively; this
389 	 * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL.
390 	 * In particular, we check for Wi-Fi Access Point.
391 	 */
392 	if (IFNET_IS_WIFI(ifp)) {
393 		/* Wi-Fi Access Point */
394 		if (strcmp(ifp->if_name, "ap") == 0) {
395 			return if_netif_all;
396 		}
397 	}
398 #else /* XNU_TARGET_OS_OSX */
399 #pragma unused(ifp)
400 #endif /* XNU_TARGET_OS_OSX */
401 	return TRUE;
402 }
403 
404 boolean_t
ifnet_needs_fsw_transport_netagent(ifnet_t ifp)405 ifnet_needs_fsw_transport_netagent(ifnet_t ifp)
406 {
407 	if (if_is_fsw_transport_netagent_enabled()) {
408 		/* check if netagent has been manually enabled for ipsec/utun */
409 		if (ifp->if_family == IFNET_FAMILY_IPSEC) {
410 			return ipsec_interface_needs_netagent(ifp);
411 		} else if (ifp->if_family == IFNET_FAMILY_UTUN) {
412 			return utun_interface_needs_netagent(ifp);
413 		}
414 
415 		/* check ifnet no auto nexus override */
416 		if (ifnet_nx_noauto(ifp)) {
417 			return FALSE;
418 		}
419 
420 		/* check global if_attach_nx configuration */
421 		switch (ifp->if_family) {
422 		case IFNET_FAMILY_CELLULAR:
423 		case IFNET_FAMILY_ETHERNET:
424 			if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
425 				return TRUE;
426 			}
427 			break;
428 		default:
429 			break;
430 		}
431 	}
432 	return FALSE;
433 }
434 
435 boolean_t
ifnet_needs_fsw_ip_netagent(ifnet_t ifp)436 ifnet_needs_fsw_ip_netagent(ifnet_t ifp)
437 {
438 #pragma unused(ifp)
439 	if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) {
440 		return TRUE;
441 	}
442 	return FALSE;
443 }
444 
445 boolean_t
ifnet_needs_netif_netagent(ifnet_t ifp)446 ifnet_needs_netif_netagent(ifnet_t ifp)
447 {
448 #pragma unused(ifp)
449 	return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0;
450 }
451 
452 static boolean_t
dlil_detach_nexus_instance(nexus_controller_t controller,const char * func_str,uuid_t instance,uuid_t device)453 dlil_detach_nexus_instance(nexus_controller_t controller,
454     const char *func_str, uuid_t instance, uuid_t device)
455 {
456 	errno_t         err;
457 
458 	if (instance == NULL || uuid_is_null(instance)) {
459 		return FALSE;
460 	}
461 
462 	/* followed by the device port */
463 	if (device != NULL && !uuid_is_null(device)) {
464 		err = kern_nexus_ifdetach(controller, instance, device);
465 		if (err != 0) {
466 			DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n",
467 			    func_str, err);
468 		}
469 	}
470 	err = kern_nexus_controller_free_provider_instance(controller,
471 	    instance);
472 	if (err != 0) {
473 		DLIL_PRINTF("%s free_provider_instance failed %d\n",
474 		    func_str, err);
475 	}
476 	return TRUE;
477 }
478 
479 static boolean_t
dlil_detach_nexus(const char * func_str,uuid_t provider,uuid_t instance,uuid_t device)480 dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance,
481     uuid_t device)
482 {
483 	boolean_t               detached = FALSE;
484 	nexus_controller_t      controller = kern_nexus_shared_controller();
485 	int                     err;
486 
487 	if (dlil_detach_nexus_instance(controller, func_str, instance,
488 	    device)) {
489 		detached = TRUE;
490 	}
491 	if (provider != NULL && !uuid_is_null(provider)) {
492 		detached = TRUE;
493 		err = kern_nexus_controller_deregister_provider(controller,
494 		    provider);
495 		if (err != 0) {
496 			DLIL_PRINTF("%s deregister_provider %d\n",
497 			    func_str, err);
498 		}
499 	}
500 	return detached;
501 }
502 
503 static errno_t
dlil_create_provider_and_instance(nexus_controller_t controller,nexus_type_t type,ifnet_t ifp,uuid_t * provider,uuid_t * instance,nexus_attr_t attr)504 dlil_create_provider_and_instance(nexus_controller_t controller,
505     nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance,
506     nexus_attr_t attr)
507 {
508 	uuid_t          dom_prov;
509 	errno_t         err;
510 	nexus_name_t    provider_name;
511 	const char      *type_name =
512 	    (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch";
513 	struct kern_nexus_init init;
514 
515 	err = kern_nexus_get_default_domain_provider(type, &dom_prov);
516 	if (err != 0) {
517 		DLIL_PRINTF("%s can't get %s provider, error %d\n",
518 		    __func__, type_name, err);
519 		goto failed;
520 	}
521 
522 	snprintf((char *)provider_name, sizeof(provider_name),
523 	    "com.apple.%s.%s", type_name, if_name(ifp));
524 	err = kern_nexus_controller_register_provider(controller,
525 	    dom_prov,
526 	    provider_name,
527 	    NULL,
528 	    0,
529 	    attr,
530 	    provider);
531 	if (err != 0) {
532 		DLIL_PRINTF("%s register %s provider failed, error %d\n",
533 		    __func__, type_name, err);
534 		goto failed;
535 	}
536 	bzero(&init, sizeof(init));
537 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
538 	err = kern_nexus_controller_alloc_provider_instance(controller,
539 	    *provider,
540 	    NULL, NULL,
541 	    instance, &init);
542 	if (err != 0) {
543 		DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n",
544 		    __func__, type_name, err);
545 		kern_nexus_controller_deregister_provider(controller,
546 		    *provider);
547 		goto failed;
548 	}
549 failed:
550 	return err;
551 }
552 
553 static boolean_t
dlil_attach_netif_nexus_common(ifnet_t ifp,if_nexus_netif_t netif_nx)554 dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx)
555 {
556 	nexus_attr_t            __single attr = NULL;
557 	nexus_controller_t      controller;
558 	errno_t                 err;
559 	unsigned char          *empty_uuid = __unsafe_forge_bidi_indexable(unsigned char *, NULL, sizeof(uuid_t));
560 
561 	if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) {
562 		/* it's already attached */
563 		if (dlil_verbose) {
564 			DLIL_PRINTF("%s: %s already has nexus attached\n",
565 			    __func__, if_name(ifp));
566 			/* already attached */
567 		}
568 		goto failed;
569 	}
570 
571 	err = kern_nexus_attr_create(&attr);
572 	if (err != 0) {
573 		DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
574 		    if_name(ifp));
575 		goto failed;
576 	}
577 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, ifp->if_index);
578 	VERIFY(err == 0);
579 
580 	controller = kern_nexus_shared_controller();
581 
582 	/* create the netif provider and instance */
583 	err = dlil_create_provider_and_instance(controller,
584 	    NEXUS_TYPE_NET_IF, ifp, &netif_nx->if_nif_provider,
585 	    &netif_nx->if_nif_instance, attr);
586 	if (err != 0) {
587 		goto failed;
588 	}
589 
590 	err = kern_nexus_ifattach(controller, netif_nx->if_nif_instance, ifp,
591 	    empty_uuid, FALSE, &netif_nx->if_nif_attach);
592 	if (err != 0) {
593 		DLIL_PRINTF("%s kern_nexus_ifattach %d\n",
594 		    __func__, err);
595 		/* cleanup provider and instance */
596 		dlil_detach_nexus(__func__, netif_nx->if_nif_provider,
597 		    netif_nx->if_nif_instance, empty_uuid);
598 		goto failed;
599 	}
600 	return TRUE;
601 
602 failed:
603 	if (attr != NULL) {
604 		kern_nexus_attr_destroy(attr);
605 	}
606 	return FALSE;
607 }
608 
609 static boolean_t
dlil_attach_netif_compat_nexus(ifnet_t ifp,if_nexus_netif_t netif_nx)610 dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx)
611 {
612 	if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) ||
613 	    IFNET_IS_MANAGEMENT(ifp) || IFNET_IS_VMNET(ifp)) {
614 		goto failed;
615 	}
616 	switch (ifp->if_type) {
617 	case IFT_CELLULAR:
618 	case IFT_ETHER:
619 		if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
620 			/* don't auto-attach */
621 			goto failed;
622 		}
623 		break;
624 	default:
625 		/* don't auto-attach */
626 		goto failed;
627 	}
628 	return dlil_attach_netif_nexus_common(ifp, netif_nx);
629 
630 failed:
631 	return FALSE;
632 }
633 
634 __attribute__((noinline))
635 static void
dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)636 dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)
637 {
638 	dlil_detach_nexus(__func__, nexus_netif->if_nif_provider,
639 	    nexus_netif->if_nif_instance, nexus_netif->if_nif_attach);
640 }
641 
642 static inline int
dlil_siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)643 dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
644 {
645 	struct ifreq        ifr;
646 	int                 error;
647 
648 	bzero(&ifr, sizeof(ifr));
649 	error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
650 	if (error == 0) {
651 		*ifdm_p = ifr.ifr_devmtu;
652 	}
653 	return error;
654 }
655 
656 static inline void
_dlil_adjust_large_buf_size_for_tso(ifnet_t ifp,uint32_t * large_buf_size)657 _dlil_adjust_large_buf_size_for_tso(ifnet_t ifp, uint32_t *large_buf_size)
658 {
659 	uint32_t tso_v4_mtu = 0;
660 	uint32_t tso_v6_mtu = 0;
661 
662 	if (!kernel_is_macos_or_server()) {
663 		return;
664 	}
665 
666 	/*
667 	 * Note that we are reading the real hwassist flags set by the driver
668 	 * and not the adjusted ones because nx_netif_host_adjust_if_capabilities()
669 	 * hasn't been called yet.
670 	 */
671 	if ((ifp->if_hwassist & IFNET_TSO_IPV4) != 0) {
672 		tso_v4_mtu = ifp->if_tso_v4_mtu;
673 	}
674 	if ((ifp->if_hwassist & IFNET_TSO_IPV6) != 0) {
675 		tso_v6_mtu = ifp->if_tso_v6_mtu;
676 	}
677 
678 	/*
679 	 * If the hardware supports TSO, adjust the large buf size to match the
680 	 * supported TSO MTU size. Note that only native interfaces set TSO MTU
681 	 * size today.
682 	 * For compat, there is a 16KB limit on large buf size, so it needs to be
683 	 * bounded by NX_FSW_DEF_LARGE_BUFSIZE. Note that no compat interfaces
684 	 * set TSO MTU size today.
685 	 */
686 	if (SKYWALK_NATIVE(ifp)) {
687 		if (tso_v4_mtu != 0 || tso_v6_mtu != 0) {
688 			*large_buf_size = MAX(tso_v4_mtu, tso_v6_mtu);
689 		} else {
690 			*large_buf_size = MAX(*large_buf_size, sk_fsw_gso_mtu);
691 		}
692 		*large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, *large_buf_size);
693 	} else {
694 		*large_buf_size = MIN(NX_FSW_DEF_LARGE_BUFSIZE, *large_buf_size);
695 	}
696 }
697 
698 static inline int
_dlil_get_flowswitch_buffer_size(ifnet_t ifp,uuid_t netif,uint32_t * buf_size,bool * use_multi_buflet,uint32_t * large_buf_size)699 _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint32_t *buf_size,
700     bool *use_multi_buflet, uint32_t *large_buf_size)
701 {
702 	struct kern_pbufpool_memory_info rx_pp_info;
703 	struct kern_pbufpool_memory_info tx_pp_info;
704 	uint32_t if_max_mtu = 0;
705 	uint32_t drv_buf_size;
706 	struct ifdevmtu ifdm;
707 	int err;
708 
709 	/*
710 	 * To perform intra-stack RX aggregation flowswitch needs to use
711 	 * multi-buflet packet.
712 	 */
713 	*use_multi_buflet = NX_FSW_TCP_RX_AGG_ENABLED();
714 
715 	*large_buf_size = *use_multi_buflet ? NX_FSW_DEF_LARGE_BUFSIZE : 0;
716 	/*
717 	 * IP over Thunderbolt interface can deliver the largest IP packet,
718 	 * but the driver advertises the MAX MTU as only 9K.
719 	 */
720 	if (IFNET_IS_THUNDERBOLT_IP(ifp)) {
721 		if_max_mtu = IP_MAXPACKET;
722 		goto skip_mtu_ioctl;
723 	}
724 
725 	/* determine max mtu */
726 	bzero(&ifdm, sizeof(ifdm));
727 	err = dlil_siocgifdevmtu(ifp, &ifdm);
728 	if (__improbable(err != 0)) {
729 		DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n",
730 		    __func__, if_name(ifp));
731 		/* use default flowswitch buffer size */
732 		if_max_mtu = NX_FSW_BUFSIZE;
733 	} else {
734 		DLIL_PRINTF("%s: %s %d %d\n", __func__, if_name(ifp),
735 		    ifdm.ifdm_max, ifdm.ifdm_current);
736 		/* rdar://problem/44589731 */
737 		if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current);
738 	}
739 
740 skip_mtu_ioctl:
741 	if (if_max_mtu == 0) {
742 		DLIL_PRINTF("%s: can't determine MAX MTU for %s\n",
743 		    __func__, if_name(ifp));
744 		return EINVAL;
745 	}
746 	if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) {
747 		DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch "
748 		    "max bufsize(%d)\n", __func__,
749 		    if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE);
750 		return EINVAL;
751 	}
752 
753 	/*
754 	 * for skywalk native driver, consult the driver packet pool also.
755 	 */
756 	if (dlil_is_native_netif_nexus(ifp)) {
757 		err = kern_nexus_get_pbufpool_info(netif, &rx_pp_info,
758 		    &tx_pp_info);
759 		if (err != 0) {
760 			DLIL_PRINTF("%s: can't get pbufpool info for %s\n",
761 			    __func__, if_name(ifp));
762 			return ENXIO;
763 		}
764 		drv_buf_size = tx_pp_info.kpm_bufsize *
765 		    tx_pp_info.kpm_max_frags;
766 		if (if_max_mtu > drv_buf_size) {
767 			DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, "
768 			    "tx %d * %d) can't support max mtu(%d)\n", __func__,
769 			    if_name(ifp), rx_pp_info.kpm_bufsize,
770 			    rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize,
771 			    tx_pp_info.kpm_max_frags, if_max_mtu);
772 			return EINVAL;
773 		}
774 	} else {
775 		drv_buf_size = if_max_mtu;
776 	}
777 
778 	if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) {
779 		static_assert((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET);
780 		*use_multi_buflet = true;
781 		/* default flowswitch buffer size */
782 		*buf_size = NX_FSW_BUFSIZE;
783 		*large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, drv_buf_size);
784 	} else {
785 		*buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE);
786 	}
787 	_dlil_adjust_large_buf_size_for_tso(ifp, large_buf_size);
788 	ASSERT(*buf_size <= NX_FSW_MAXBUFSIZE);
789 	if (*buf_size >= *large_buf_size) {
790 		*large_buf_size = 0;
791 	}
792 	return 0;
793 }
794 
795 static boolean_t
_dlil_attach_flowswitch_nexus(ifnet_t ifp,if_nexus_flowswitch_t nexus_fsw)796 _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw)
797 {
798 	nexus_attr_t            __single attr = NULL;
799 	nexus_controller_t      controller;
800 	errno_t                 err = 0;
801 	uuid_t                  netif;
802 	uint32_t                buf_size = 0;
803 	uint32_t                large_buf_size = 0;
804 	bool                    multi_buflet;
805 
806 	if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) ||
807 	    IFNET_IS_VMNET(ifp)) {
808 		goto failed;
809 	}
810 
811 	if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) {
812 		/* not possible to attach (netif native/compat not plumbed) */
813 		goto failed;
814 	}
815 
816 	if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) {
817 		/* don't auto-attach */
818 		goto failed;
819 	}
820 
821 	/* get the netif instance from the ifp */
822 	err = kern_nexus_get_netif_instance(ifp, netif);
823 	if (err != 0) {
824 		DLIL_PRINTF("%s: can't find netif for %s\n", __func__,
825 		    if_name(ifp));
826 		goto failed;
827 	}
828 
829 	err = kern_nexus_attr_create(&attr);
830 	if (err != 0) {
831 		DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
832 		    if_name(ifp));
833 		goto failed;
834 	}
835 
836 	err = _dlil_get_flowswitch_buffer_size(ifp, netif, &buf_size,
837 	    &multi_buflet, &large_buf_size);
838 	if (err != 0) {
839 		goto failed;
840 	}
841 	ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE));
842 	ASSERT(large_buf_size <= NX_FSW_MAX_LARGE_BUFSIZE);
843 
844 	/* Configure flowswitch buffer size */
845 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, buf_size);
846 	VERIFY(err == 0);
847 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_LARGE_BUF_SIZE,
848 	    large_buf_size);
849 	VERIFY(err == 0);
850 
851 	/*
852 	 * Configure flowswitch to use super-packet (multi-buflet).
853 	 */
854 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
855 	    multi_buflet ? NX_PBUF_FRAGS_MAX : 1);
856 	VERIFY(err == 0);
857 
858 	/* create the flowswitch provider and instance */
859 	controller = kern_nexus_shared_controller();
860 	err = dlil_create_provider_and_instance(controller,
861 	    NEXUS_TYPE_FLOW_SWITCH, ifp, &nexus_fsw->if_fsw_provider,
862 	    &nexus_fsw->if_fsw_instance, attr);
863 	if (err != 0) {
864 		goto failed;
865 	}
866 
867 	/* attach the device port */
868 	err = kern_nexus_ifattach(controller, nexus_fsw->if_fsw_instance,
869 	    NULL, netif, FALSE, &nexus_fsw->if_fsw_device);
870 	if (err != 0) {
871 		DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n",
872 		    __func__, err, if_name(ifp));
873 		/* cleanup provider and instance */
874 		dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
875 		    nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
876 		goto failed;
877 	}
878 	return TRUE;
879 
880 failed:
881 	if (err != 0) {
882 		DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n",
883 		    __func__, if_name(ifp), err);
884 	} else {
885 		DLIL_PRINTF("%s: not attaching flowswitch to %s\n",
886 		    __func__, if_name(ifp));
887 	}
888 	if (attr != NULL) {
889 		kern_nexus_attr_destroy(attr);
890 	}
891 	return FALSE;
892 }
893 
894 static boolean_t
dlil_attach_flowswitch_nexus(ifnet_t ifp)895 dlil_attach_flowswitch_nexus(ifnet_t ifp)
896 {
897 	boolean_t               attached = FALSE;
898 	if_nexus_flowswitch     nexus_fsw;
899 
900 #if (DEVELOPMENT || DEBUG)
901 	if (skywalk_netif_direct_allowed(if_name(ifp))) {
902 		DLIL_PRINTF("skip attaching fsw to %s\n", if_name(ifp));
903 		return FALSE;
904 	}
905 #endif /* (DEVELOPMENT || DEBUG) */
906 
907 	/*
908 	 * flowswitch attachment is not supported for interface using the
909 	 * legacy model (IFNET_INIT_LEGACY)
910 	 */
911 	if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
912 		DLIL_PRINTF("skip attaching fsw to %s using legacy TX model\n",
913 		    if_name(ifp));
914 		return FALSE;
915 	}
916 	bzero(&nexus_fsw, sizeof(nexus_fsw));
917 
918 	/*
919 	 * A race can happen between a thread creating a flowswitch and another thread
920 	 * detaching the interface (also destroying the flowswitch).
921 	 *
922 	 * ifnet_datamov_begin() is used here to force dlil_quiesce_and_detach_nexuses()
923 	 * (called by another thread) to wait until this function finishes so the
924 	 * flowswitch can be cleaned up by dlil_detach_flowswitch_nexus().
925 	 *
926 	 * If ifnet_get_ioref() is used instead, dlil_quiesce_and_detach_nexuses()
927 	 * would not wait (because ifp->if_nx_flowswitch isn't assigned) and the
928 	 * created flowswitch would be left hanging and ifnet_detach_final() would never
929 	 * wakeup because the existence of the flowswitch prevents the ifnet's ioref
930 	 * from being released.
931 	 */
932 	if (!ifnet_datamov_begin(ifp)) {
933 		os_log(OS_LOG_DEFAULT, "%s: %s not attached",
934 		    __func__, ifp->if_xname);
935 		goto done;
936 	}
937 	if (uuid_is_null(ifp->if_nx_flowswitch.if_fsw_instance)) {
938 		attached = _dlil_attach_flowswitch_nexus(ifp, &nexus_fsw);
939 		if (attached) {
940 			ifnet_lock_exclusive(ifp);
941 			ifp->if_nx_flowswitch = nexus_fsw;
942 			ifnet_lock_done(ifp);
943 		}
944 	}
945 	ifnet_datamov_end(ifp);
946 
947 done:
948 	return attached;
949 }
950 
951 __attribute__((noinline))
952 static void
dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)953 dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)
954 {
955 	dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
956 	    nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
957 }
958 
959 __attribute__((noinline))
960 static void
dlil_netif_detach_notify(ifnet_t ifp)961 dlil_netif_detach_notify(ifnet_t ifp)
962 {
963 	ifnet_detach_notify_cb_t notify = NULL;
964 	void *__single arg = NULL;
965 
966 	ifnet_get_detach_notify(ifp, &notify, &arg);
967 	if (notify == NULL) {
968 		DTRACE_SKYWALK1(no__notify, ifnet_t, ifp);
969 		return;
970 	}
971 	(*notify)(arg);
972 }
973 
974 __attribute__((noinline))
975 static void
dlil_quiesce_and_detach_nexuses(ifnet_t ifp)976 dlil_quiesce_and_detach_nexuses(ifnet_t ifp)
977 {
978 	if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch;
979 	if_nexus_netif *nx_netif = &ifp->if_nx_netif;
980 
981 	ifnet_datamov_suspend_and_drain(ifp);
982 	if (!uuid_is_null(nx_fsw->if_fsw_device)) {
983 		ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider));
984 		ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance));
985 		dlil_detach_flowswitch_nexus(nx_fsw);
986 	} else {
987 		ASSERT(uuid_is_null(nx_fsw->if_fsw_provider));
988 		ASSERT(uuid_is_null(nx_fsw->if_fsw_instance));
989 		DTRACE_IP1(fsw__not__attached, ifnet_t, ifp);
990 	}
991 
992 	if (!uuid_is_null(nx_netif->if_nif_attach)) {
993 		ASSERT(!uuid_is_null(nx_netif->if_nif_provider));
994 		ASSERT(!uuid_is_null(nx_netif->if_nif_instance));
995 		dlil_detach_netif_nexus(nx_netif);
996 	} else {
997 		ASSERT(uuid_is_null(nx_netif->if_nif_provider));
998 		ASSERT(uuid_is_null(nx_netif->if_nif_instance));
999 		DTRACE_IP1(netif__not__attached, ifnet_t, ifp);
1000 	}
1001 	ifnet_datamov_resume(ifp);
1002 }
1003 
1004 boolean_t
ifnet_add_netagent(ifnet_t ifp)1005 ifnet_add_netagent(ifnet_t ifp)
1006 {
1007 	int     error;
1008 
1009 	error = kern_nexus_interface_add_netagent(ifp);
1010 	os_log(OS_LOG_DEFAULT,
1011 	    "kern_nexus_interface_add_netagent(%s) returned %d",
1012 	    ifp->if_xname, error);
1013 	return error == 0;
1014 }
1015 
1016 boolean_t
ifnet_remove_netagent(ifnet_t ifp)1017 ifnet_remove_netagent(ifnet_t ifp)
1018 {
1019 	int     error;
1020 
1021 	error = kern_nexus_interface_remove_netagent(ifp);
1022 	os_log(OS_LOG_DEFAULT,
1023 	    "kern_nexus_interface_remove_netagent(%s) returned %d",
1024 	    ifp->if_xname, error);
1025 	return error == 0;
1026 }
1027 
1028 boolean_t
ifnet_attach_flowswitch_nexus(ifnet_t ifp)1029 ifnet_attach_flowswitch_nexus(ifnet_t ifp)
1030 {
1031 	if (!ifnet_is_fully_attached(ifp)) {
1032 		return FALSE;
1033 	}
1034 	return dlil_attach_flowswitch_nexus(ifp);
1035 }
1036 
1037 boolean_t
ifnet_detach_flowswitch_nexus(ifnet_t ifp)1038 ifnet_detach_flowswitch_nexus(ifnet_t ifp)
1039 {
1040 	if_nexus_flowswitch     nexus_fsw;
1041 
1042 	ifnet_lock_exclusive(ifp);
1043 	nexus_fsw = ifp->if_nx_flowswitch;
1044 	bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
1045 	ifnet_lock_done(ifp);
1046 	return dlil_detach_nexus(__func__, nexus_fsw.if_fsw_provider,
1047 	           nexus_fsw.if_fsw_instance, nexus_fsw.if_fsw_device);
1048 }
1049 
1050 void
ifnet_attach_native_flowswitch(ifnet_t ifp)1051 ifnet_attach_native_flowswitch(ifnet_t ifp)
1052 {
1053 	if (!dlil_is_native_netif_nexus(ifp)) {
1054 		/* not a native netif */
1055 		return;
1056 	}
1057 	ifnet_attach_flowswitch_nexus(ifp);
1058 }
1059 
1060 int
ifnet_set_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t cb,void * arg)1061 ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg)
1062 {
1063 	lck_mtx_lock(&ifp->if_delegate_lock);
1064 	while (ifp->if_fsw_rx_cb_ref > 0) {
1065 		DTRACE_SKYWALK1(wait__fsw, ifnet_t, ifp);
1066 		(void) msleep(&ifp->if_fsw_rx_cb_ref, &ifp->if_delegate_lock,
1067 		    (PZERO + 1), __FUNCTION__, NULL);
1068 		DTRACE_SKYWALK1(wake__fsw, ifnet_t, ifp);
1069 	}
1070 	ifp->if_fsw_rx_cb = cb;
1071 	ifp->if_fsw_rx_cb_arg = arg;
1072 	lck_mtx_unlock(&ifp->if_delegate_lock);
1073 	return 0;
1074 }
1075 
1076 int
ifnet_get_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t * cbp,void ** argp)1077 ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp)
1078 {
1079 	/*
1080 	 * This is for avoiding the unnecessary lock acquire for interfaces
1081 	 * not used by a redirect interface.
1082 	 */
1083 	if (ifp->if_fsw_rx_cb == NULL) {
1084 		return ENOENT;
1085 	}
1086 	lck_mtx_lock(&ifp->if_delegate_lock);
1087 	if (ifp->if_fsw_rx_cb == NULL) {
1088 		lck_mtx_unlock(&ifp->if_delegate_lock);
1089 		return ENOENT;
1090 	}
1091 	*cbp = ifp->if_fsw_rx_cb;
1092 	*argp = ifp->if_fsw_rx_cb_arg;
1093 	ifp->if_fsw_rx_cb_ref++;
1094 	lck_mtx_unlock(&ifp->if_delegate_lock);
1095 	return 0;
1096 }
1097 
1098 void
ifnet_release_flowswitch_rx_callback(ifnet_t ifp)1099 ifnet_release_flowswitch_rx_callback(ifnet_t ifp)
1100 {
1101 	lck_mtx_lock(&ifp->if_delegate_lock);
1102 	if (--ifp->if_fsw_rx_cb_ref == 0) {
1103 		wakeup(&ifp->if_fsw_rx_cb_ref);
1104 	}
1105 	lck_mtx_unlock(&ifp->if_delegate_lock);
1106 }
1107 
1108 int
ifnet_set_delegate_parent(ifnet_t difp,ifnet_t parent)1109 ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent)
1110 {
1111 	lck_mtx_lock(&difp->if_delegate_lock);
1112 	while (difp->if_delegate_parent_ref > 0) {
1113 		DTRACE_SKYWALK1(wait__parent, ifnet_t, difp);
1114 		(void) msleep(&difp->if_delegate_parent_ref, &difp->if_delegate_lock,
1115 		    (PZERO + 1), __FUNCTION__, NULL);
1116 		DTRACE_SKYWALK1(wake__parent, ifnet_t, difp);
1117 	}
1118 	difp->if_delegate_parent = parent;
1119 	lck_mtx_unlock(&difp->if_delegate_lock);
1120 	return 0;
1121 }
1122 
1123 int
ifnet_get_delegate_parent(ifnet_t difp,ifnet_t * parentp)1124 ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parentp)
1125 {
1126 	lck_mtx_lock(&difp->if_delegate_lock);
1127 	if (difp->if_delegate_parent == NULL) {
1128 		lck_mtx_unlock(&difp->if_delegate_lock);
1129 		return ENOENT;
1130 	}
1131 	*parentp = difp->if_delegate_parent;
1132 	difp->if_delegate_parent_ref++;
1133 	lck_mtx_unlock(&difp->if_delegate_lock);
1134 	return 0;
1135 }
1136 
1137 void
ifnet_release_delegate_parent(ifnet_t difp)1138 ifnet_release_delegate_parent(ifnet_t difp)
1139 {
1140 	lck_mtx_lock(&difp->if_delegate_lock);
1141 	if (--difp->if_delegate_parent_ref == 0) {
1142 		wakeup(&difp->if_delegate_parent_ref);
1143 	}
1144 	lck_mtx_unlock(&difp->if_delegate_lock);
1145 }
1146 
1147 __attribute__((noinline))
1148 void
ifnet_set_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1149 ifnet_set_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1150 {
1151 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1152 	ifp->if_detach_notify = notify;
1153 	ifp->if_detach_notify_arg = arg;
1154 }
1155 
1156 __attribute__((noinline))
1157 void
ifnet_get_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1158 ifnet_get_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1159 {
1160 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1161 	*notifyp = ifp->if_detach_notify;
1162 	*argp = ifp->if_detach_notify_arg;
1163 }
1164 
1165 __attribute__((noinline))
1166 void
ifnet_set_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1167 ifnet_set_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1168 {
1169 	ifnet_lock_exclusive(ifp);
1170 	ifnet_set_detach_notify_locked(ifp, notify, arg);
1171 	ifnet_lock_done(ifp);
1172 }
1173 
1174 __attribute__((noinline))
1175 void
ifnet_get_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1176 ifnet_get_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1177 {
1178 	ifnet_lock_exclusive(ifp);
1179 	ifnet_get_detach_notify_locked(ifp, notifyp, argp);
1180 	ifnet_lock_done(ifp);
1181 }
1182 #endif /* SKYWALK */
1183 
1184 #define DLIL_INPUT_CHECK(m, ifp) {                                      \
1185 	ifnet_ref_t _rcvif = mbuf_pkthdr_rcvif(m);                      \
1186 	if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) ||       \
1187 	    !(mbuf_flags(m) & MBUF_PKTHDR)) {                           \
1188 	        panic_plain("%s: invalid mbuf %p\n", __func__, m);      \
1189 	/* NOTREACHED */                                        \
1190 	}                                                               \
1191 }
1192 
1193 #define MBPS    (1ULL * 1000 * 1000)
1194 #define GBPS    (MBPS * 1000)
1195 
1196 struct rxpoll_time_tbl {
1197 	u_int64_t       speed;          /* downlink speed */
1198 	u_int32_t       plowat;         /* packets low watermark */
1199 	u_int32_t       phiwat;         /* packets high watermark */
1200 	u_int32_t       blowat;         /* bytes low watermark */
1201 	u_int32_t       bhiwat;         /* bytes high watermark */
1202 };
1203 
1204 static struct rxpoll_time_tbl rxpoll_tbl[] = {
1205 	{ .speed =  10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024)    },
1206 	{ .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1207 	{ .speed =   1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1208 	{ .speed =  10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1209 	{ .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1210 	{ .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
1211 };
1212 
1213 int
proto_hash_value(u_int32_t protocol_family)1214 proto_hash_value(u_int32_t protocol_family)
1215 {
1216 	/*
1217 	 * dlil_proto_unplumb_all() depends on the mapping between
1218 	 * the hash bucket index and the protocol family defined
1219 	 * here; future changes must be applied there as well.
1220 	 */
1221 	switch (protocol_family) {
1222 	case PF_INET:
1223 		return 0;
1224 	case PF_INET6:
1225 		return 1;
1226 	case PF_VLAN:
1227 		return 2;
1228 	case PF_UNSPEC:
1229 	default:
1230 		return 3;
1231 	}
1232 }
1233 
1234 __private_extern__ int
dlil_post_msg(struct ifnet * ifp,u_int32_t event_subclass,u_int32_t event_code,struct net_event_data * event_data,u_int32_t event_data_len,boolean_t suppress_generation)1235 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1236     u_int32_t event_code, struct net_event_data *event_data,
1237     u_int32_t event_data_len, boolean_t suppress_generation)
1238 {
1239 	struct net_event_data ev_data;
1240 	struct kev_msg ev_msg;
1241 
1242 	bzero(&ev_msg, sizeof(ev_msg));
1243 	bzero(&ev_data, sizeof(ev_data));
1244 	/*
1245 	 * a net event always starts with a net_event_data structure
1246 	 * but the caller can generate a simple net event or
1247 	 * provide a longer event structure to post
1248 	 */
1249 	ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1250 	ev_msg.kev_class        = KEV_NETWORK_CLASS;
1251 	ev_msg.kev_subclass     = event_subclass;
1252 	ev_msg.event_code       = event_code;
1253 
1254 	if (event_data == NULL) {
1255 		event_data = &ev_data;
1256 		event_data_len = sizeof(struct net_event_data);
1257 	}
1258 
1259 	strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1260 	event_data->if_family = ifp->if_family;
1261 	event_data->if_unit   = (u_int32_t)ifp->if_unit;
1262 
1263 	ev_msg.dv[0].data_length = event_data_len;
1264 	ev_msg.dv[0].data_ptr    = event_data;
1265 	ev_msg.dv[1].data_length = 0;
1266 
1267 	bool update_generation = true;
1268 	if (event_subclass == KEV_DL_SUBCLASS) {
1269 		/* Don't update interface generation for frequent link quality and state changes  */
1270 		switch (event_code) {
1271 		case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1272 		case KEV_DL_RRC_STATE_CHANGED:
1273 		case KEV_DL_PRIMARY_ELECTED:
1274 			update_generation = false;
1275 			break;
1276 		default:
1277 			break;
1278 		}
1279 	}
1280 
1281 	/*
1282 	 * Some events that update generation counts might
1283 	 * want to suppress generation count.
1284 	 * One example is node presence/absence where we still
1285 	 * issue kernel event for the invocation but want to avoid
1286 	 * expensive operation of updating generation which triggers
1287 	 * NECP client updates.
1288 	 */
1289 	if (suppress_generation) {
1290 		update_generation = false;
1291 	}
1292 
1293 	return dlil_event_internal(ifp, &ev_msg, update_generation);
1294 }
1295 
1296 static void
dlil_reset_rxpoll_params(ifnet_t ifp)1297 dlil_reset_rxpoll_params(ifnet_t ifp)
1298 {
1299 	ASSERT(ifp != NULL);
1300 	ifnet_set_poll_cycle(ifp, NULL);
1301 	ifp->if_poll_update = 0;
1302 	ifp->if_poll_flags = 0;
1303 	ifp->if_poll_req = 0;
1304 	ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1305 	bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1306 	bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1307 	bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1308 	net_timerclear(&ifp->if_poll_mode_holdtime);
1309 	net_timerclear(&ifp->if_poll_mode_lasttime);
1310 	net_timerclear(&ifp->if_poll_sample_holdtime);
1311 	net_timerclear(&ifp->if_poll_sample_lasttime);
1312 	net_timerclear(&ifp->if_poll_dbg_lasttime);
1313 }
1314 
1315 
1316 #if SKYWALK
1317 static void
dlil_filter_event(struct eventhandler_entry_arg arg __unused,enum net_filter_event_subsystems state)1318 dlil_filter_event(struct eventhandler_entry_arg arg __unused,
1319     enum net_filter_event_subsystems state)
1320 {
1321 	evhlog(debug, "%s: eventhandler saw event type=net_filter_event_state event_code=0x%d",
1322 	    __func__, state);
1323 
1324 	bool old_if_enable_fsw_transport_netagent = if_enable_fsw_transport_netagent;
1325 	if ((state & ~NET_FILTER_EVENT_PF_PRIVATE_PROXY) == 0) {
1326 		if_enable_fsw_transport_netagent = 1;
1327 	} else {
1328 		if_enable_fsw_transport_netagent = 0;
1329 	}
1330 	if (old_if_enable_fsw_transport_netagent != if_enable_fsw_transport_netagent) {
1331 		kern_nexus_update_netagents();
1332 	} else if (!if_enable_fsw_transport_netagent) {
1333 		necp_update_all_clients();
1334 	}
1335 }
1336 #endif /* SKYWALK */
1337 
1338 void
dlil_init(void)1339 dlil_init(void)
1340 {
1341 	thread_t __single thread = THREAD_NULL;
1342 
1343 	dlil_main_input_thread = (struct dlil_threading_info *) &dlil_main_input_thread_info;
1344 
1345 	/*
1346 	 * The following fields must be 64-bit aligned for atomic operations.
1347 	 */
1348 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1349 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1350 	IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1351 	IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1352 	IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1353 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1354 	IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1355 	IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1356 	IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1357 	IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1358 	IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1359 	IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1360 	IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1361 	IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1362 	IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1363 
1364 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1365 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1366 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1367 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1368 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1369 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1370 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1371 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1372 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1373 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1374 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1375 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1376 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1377 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1378 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1379 
1380 	/*
1381 	 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1382 	 */
1383 	static_assert(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1384 	static_assert(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1385 	static_assert(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1386 	static_assert(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1387 	static_assert(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1388 	static_assert(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1389 	static_assert(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1390 	static_assert(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1391 	static_assert(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1392 	static_assert(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1393 	static_assert(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1394 	static_assert(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1395 	static_assert(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1396 	static_assert(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1397 
1398 	/*
1399 	 * ... as well as the mbuf checksum flags counterparts.
1400 	 */
1401 	static_assert(CSUM_IP == IF_HWASSIST_CSUM_IP);
1402 	static_assert(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1403 	static_assert(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1404 	static_assert(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1405 	static_assert(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1406 	static_assert(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1407 	static_assert(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1408 	static_assert(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1409 	static_assert(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1410 	static_assert(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1411 	static_assert(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1412 
1413 	/*
1414 	 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1415 	 */
1416 	static_assert(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1417 	static_assert(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1418 
1419 	static_assert(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1420 	static_assert(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1421 	static_assert(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1422 	static_assert(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1423 
1424 	static_assert(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1425 	static_assert(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1426 	static_assert(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1427 
1428 	static_assert(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1429 	static_assert(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1430 	static_assert(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1431 	static_assert(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1432 	static_assert(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1433 	static_assert(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1434 	static_assert(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1435 	static_assert(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1436 	static_assert(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1437 	static_assert(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1438 	static_assert(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1439 	static_assert(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1440 	static_assert(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1441 	static_assert(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1442 	static_assert(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1443 	static_assert(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1444 	static_assert(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1445 	static_assert(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1446 
1447 	static_assert(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1448 	static_assert(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1449 	static_assert(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1450 	static_assert(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1451 	static_assert(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1452 	static_assert(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1453 	static_assert(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1454 	static_assert(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1455 	static_assert(IFRTYPE_SUBFAMILY_VMNET == IFNET_SUBFAMILY_VMNET);
1456 	static_assert(IFRTYPE_SUBFAMILY_SIMCELL == IFNET_SUBFAMILY_SIMCELL);
1457 	static_assert(IFRTYPE_SUBFAMILY_MANAGEMENT == IFNET_SUBFAMILY_MANAGEMENT);
1458 
1459 	static_assert(DLIL_MODIDLEN == IFNET_MODIDLEN);
1460 	static_assert(DLIL_MODARGLEN == IFNET_MODARGLEN);
1461 
1462 	PE_parse_boot_argn("net_affinity", &net_affinity,
1463 	    sizeof(net_affinity));
1464 
1465 	PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1466 
1467 	PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1468 
1469 	PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1470 
1471 	PE_parse_boot_argn("if_link_heuristics", &if_link_heuristics_flags, sizeof(if_link_heuristics_flags));
1472 
1473 	VERIFY(dlil_pending_thread_cnt == 0);
1474 #if SKYWALK
1475 	boolean_t pe_enable_fsw_transport_netagent = FALSE;
1476 	boolean_t pe_disable_fsw_transport_netagent = FALSE;
1477 	boolean_t enable_fsw_netagent =
1478 	    (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) ||
1479 	    (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
1480 
1481 	/*
1482 	 * Check the device tree to see if Skywalk netagent has been explicitly
1483 	 * enabled or disabled.  This can be overridden via if_attach_nx below.
1484 	 * Note that the property is a 0-length key, and so checking for the
1485 	 * presence itself is enough (no need to check for the actual value of
1486 	 * the retrieved variable.)
1487 	 */
1488 	pe_enable_fsw_transport_netagent =
1489 	    PE_get_default("kern.skywalk_netagent_enable",
1490 	    &pe_enable_fsw_transport_netagent,
1491 	    sizeof(pe_enable_fsw_transport_netagent));
1492 	pe_disable_fsw_transport_netagent =
1493 	    PE_get_default("kern.skywalk_netagent_disable",
1494 	    &pe_disable_fsw_transport_netagent,
1495 	    sizeof(pe_disable_fsw_transport_netagent));
1496 
1497 	/*
1498 	 * These two are mutually exclusive, i.e. they both can be absent,
1499 	 * but only one can be present at a time, and so we assert to make
1500 	 * sure it is correct.
1501 	 */
1502 	VERIFY((!pe_enable_fsw_transport_netagent &&
1503 	    !pe_disable_fsw_transport_netagent) ||
1504 	    (pe_enable_fsw_transport_netagent ^
1505 	    pe_disable_fsw_transport_netagent));
1506 
1507 	if (pe_enable_fsw_transport_netagent) {
1508 		kprintf("SK: netagent is enabled via an override for "
1509 		    "this platform\n");
1510 		if_attach_nx = SKYWALK_NETWORKING_ENABLED;
1511 	} else if (pe_disable_fsw_transport_netagent) {
1512 		kprintf("SK: netagent is disabled via an override for "
1513 		    "this platform\n");
1514 		if_attach_nx = SKYWALK_NETWORKING_DISABLED;
1515 	} else {
1516 		kprintf("SK: netagent is %s by default for this platform\n",
1517 		    (enable_fsw_netagent ? "enabled" : "disabled"));
1518 		if_attach_nx = IF_ATTACH_NX_DEFAULT;
1519 	}
1520 
1521 	/*
1522 	 * Now see if there's a boot-arg override.
1523 	 */
1524 	(void) PE_parse_boot_argn("if_attach_nx", &if_attach_nx,
1525 	    sizeof(if_attach_nx));
1526 	if_enable_fsw_transport_netagent =
1527 	    ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
1528 
1529 	if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0);
1530 
1531 	if (pe_disable_fsw_transport_netagent &&
1532 	    if_enable_fsw_transport_netagent) {
1533 		kprintf("SK: netagent is force-enabled\n");
1534 	} else if (!pe_disable_fsw_transport_netagent &&
1535 	    !if_enable_fsw_transport_netagent) {
1536 		kprintf("SK: netagent is force-disabled\n");
1537 	}
1538 	if (kernel_is_macos_or_server() && if_enable_fsw_transport_netagent) {
1539 		net_filter_event_register(dlil_filter_event);
1540 	}
1541 
1542 #if (DEVELOPMENT || DEBUG)
1543 	(void) PE_parse_boot_argn("fsw_use_max_mtu_buffer",
1544 	    &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer));
1545 #endif /* (DEVELOPMENT || DEBUG) */
1546 
1547 #endif /* SKYWALK */
1548 
1549 	eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1550 
1551 	TAILQ_INIT(&dlil_ifnet_head);
1552 	TAILQ_INIT(&ifnet_head);
1553 	TAILQ_INIT(&ifnet_detaching_head);
1554 	TAILQ_INIT(&ifnet_ordered_head);
1555 
1556 	/* Initialize interface address subsystem */
1557 	ifa_init();
1558 
1559 #if PF
1560 	/* Initialize the packet filter */
1561 	pfinit();
1562 #endif /* PF */
1563 
1564 	/* Initialize queue algorithms */
1565 	classq_init();
1566 
1567 	/* Initialize packet schedulers */
1568 	pktsched_init();
1569 
1570 	/* Initialize flow advisory subsystem */
1571 	flowadv_init();
1572 
1573 	/* Initialize the pktap virtual interface */
1574 	pktap_init();
1575 
1576 	/* Initialize droptap interface */
1577 	droptap_init();
1578 
1579 	/* Initialize the service class to dscp map */
1580 	net_qos_map_init();
1581 
1582 	/* Initialize the interface low power mode event handler */
1583 	if_low_power_evhdlr_init();
1584 
1585 	/* Initialize the interface offload port list subsystem */
1586 	if_ports_used_init();
1587 
1588 #if DEBUG || DEVELOPMENT
1589 	/* Run self-tests */
1590 	dlil_verify_sum16();
1591 #endif /* DEBUG || DEVELOPMENT */
1592 
1593 	/*
1594 	 * Create and start up the main DLIL input thread and the interface
1595 	 * detacher threads once everything is initialized.
1596 	 */
1597 	dlil_incr_pending_thread_count();
1598 	(void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1599 
1600 	/*
1601 	 * Create ifnet detacher thread.
1602 	 * When an interface gets detached, part of the detach processing
1603 	 * is delayed. The interface is added to delayed detach list
1604 	 * and this thread is woken up to call ifnet_detach_final
1605 	 * on these interfaces.
1606 	 */
1607 	dlil_incr_pending_thread_count();
1608 	if (kernel_thread_start(ifnet_detacher_thread_func,
1609 	    NULL, &thread) != KERN_SUCCESS) {
1610 		panic_plain("%s: couldn't create detacher thread", __func__);
1611 		/* NOTREACHED */
1612 	}
1613 	thread_deallocate(thread);
1614 
1615 	/*
1616 	 * Wait for the created kernel threads for dlil to get
1617 	 * scheduled and run at least once before we proceed
1618 	 */
1619 	lck_mtx_lock(&dlil_thread_sync_lock);
1620 	while (dlil_pending_thread_cnt != 0) {
1621 		DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1622 		    "threads to get scheduled at least once.\n", __func__);
1623 		(void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1624 		    (PZERO - 1), __func__, NULL);
1625 		LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1626 	}
1627 	lck_mtx_unlock(&dlil_thread_sync_lock);
1628 	DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1629 	    "scheduled at least once. Proceeding.\n", __func__);
1630 }
1631 
1632 __private_extern__ int
dlil_attach_filter(struct ifnet * ifp,const struct iff_filter * if_filter,interface_filter_t * filter_ref,u_int32_t flags)1633 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1634     interface_filter_t *filter_ref, u_int32_t flags)
1635 {
1636 	int retval = 0;
1637 	struct ifnet_filter *filter = NULL;
1638 
1639 	ifnet_head_lock_shared();
1640 
1641 	/* Check that the interface is in the global list */
1642 	if (!ifnet_lookup(ifp)) {
1643 		retval = ENXIO;
1644 		goto done;
1645 	}
1646 	if (!ifnet_get_ioref(ifp)) {
1647 		os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
1648 		    __func__, if_name(ifp));
1649 		retval = ENXIO;
1650 		goto done;
1651 	}
1652 
1653 	filter = dlif_filt_alloc();
1654 	/* refcnt held above during lookup */
1655 	filter->filt_flags = flags;
1656 	filter->filt_ifp = ifp;
1657 	filter->filt_cookie = if_filter->iff_cookie;
1658 	filter->filt_name = if_filter->iff_name;
1659 	filter->filt_protocol = if_filter->iff_protocol;
1660 	/*
1661 	 * Do not install filter callbacks for internal coproc interface
1662 	 * and for management interfaces
1663 	 */
1664 	if (!IFNET_IS_INTCOPROC(ifp) && !IFNET_IS_MANAGEMENT(ifp)) {
1665 		filter->filt_input = if_filter->iff_input;
1666 		filter->filt_output = if_filter->iff_output;
1667 		filter->filt_event = if_filter->iff_event;
1668 		filter->filt_ioctl = if_filter->iff_ioctl;
1669 	}
1670 	filter->filt_detached = if_filter->iff_detached;
1671 
1672 	lck_mtx_lock(&ifp->if_flt_lock);
1673 	if_flt_monitor_enter(ifp);
1674 
1675 	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1676 	TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1677 
1678 	*filter_ref = filter;
1679 
1680 	/*
1681 	 * Bump filter count and route_generation ID to let TCP
1682 	 * know it shouldn't do TSO on this connection
1683 	 */
1684 	if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1685 		ifnet_filter_update_tso(ifp, TRUE);
1686 	}
1687 	OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1688 	INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1689 	if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1690 		OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_os_count);
1691 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1692 	} else {
1693 		OSAddAtomic(1, &ifp->if_flt_non_os_count);
1694 	}
1695 	if_flt_monitor_leave(ifp);
1696 	lck_mtx_unlock(&ifp->if_flt_lock);
1697 
1698 #if SKYWALK
1699 	if (kernel_is_macos_or_server()) {
1700 		net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1701 		    net_check_compatible_if_filter(NULL));
1702 	}
1703 #endif /* SKYWALK */
1704 
1705 	if (dlil_verbose) {
1706 		DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1707 		    if_filter->iff_name);
1708 	}
1709 	ifnet_decr_iorefcnt(ifp);
1710 
1711 done:
1712 	ifnet_head_done();
1713 	if (retval != 0 && ifp != NULL) {
1714 		DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1715 		    if_name(ifp), if_filter->iff_name, retval);
1716 	}
1717 	if (retval != 0 && filter != NULL) {
1718 		dlif_filt_free(filter);
1719 	}
1720 
1721 	return retval;
1722 }
1723 
1724 static int
dlil_detach_filter_internal(interface_filter_t filter,int detached)1725 dlil_detach_filter_internal(interface_filter_t  filter, int detached)
1726 {
1727 	int retval = 0;
1728 
1729 	if (detached == 0) {
1730 		ifnet_ref_t ifp = NULL;
1731 
1732 		ifnet_head_lock_shared();
1733 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1734 			interface_filter_t entry = NULL;
1735 
1736 			lck_mtx_lock(&ifp->if_flt_lock);
1737 			TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1738 				if (entry != filter || entry->filt_skip) {
1739 					continue;
1740 				}
1741 				/*
1742 				 * We've found a match; since it's possible
1743 				 * that the thread gets blocked in the monitor,
1744 				 * we do the lock dance.  Interface should
1745 				 * not be detached since we still have a use
1746 				 * count held during filter attach.
1747 				 */
1748 				entry->filt_skip = 1;   /* skip input/output */
1749 				lck_mtx_unlock(&ifp->if_flt_lock);
1750 				ifnet_head_done();
1751 
1752 				lck_mtx_lock(&ifp->if_flt_lock);
1753 				if_flt_monitor_enter(ifp);
1754 				LCK_MTX_ASSERT(&ifp->if_flt_lock,
1755 				    LCK_MTX_ASSERT_OWNED);
1756 
1757 				/* Remove the filter from the list */
1758 				TAILQ_REMOVE(&ifp->if_flt_head, filter,
1759 				    filt_next);
1760 
1761 				if (dlil_verbose) {
1762 					DLIL_PRINTF("%s: %s filter detached\n",
1763 					    if_name(ifp), filter->filt_name);
1764 				}
1765 				if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1766 					VERIFY(ifp->if_flt_non_os_count != 0);
1767 					OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1768 				}
1769 				/*
1770 				 * Decrease filter count and route_generation
1771 				 * ID to let TCP know it should reevalute doing
1772 				 * TSO or not.
1773 				 */
1774 				if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1775 					ifnet_filter_update_tso(ifp, FALSE);
1776 				}
1777 				/*
1778 				 * When we remove the bridge's interface filter,
1779 				 * clear the field in the ifnet.
1780 				 */
1781 				if ((filter->filt_flags & DLIL_IFF_BRIDGE)
1782 				    != 0) {
1783 					ifp->if_bridge = NULL;
1784 				}
1785 				if_flt_monitor_leave(ifp);
1786 				lck_mtx_unlock(&ifp->if_flt_lock);
1787 				goto destroy;
1788 			}
1789 			lck_mtx_unlock(&ifp->if_flt_lock);
1790 		}
1791 		ifnet_head_done();
1792 
1793 		/* filter parameter is not a valid filter ref */
1794 		retval = EINVAL;
1795 		goto done;
1796 	} else {
1797 		ifnet_ref_t ifp = filter->filt_ifp;
1798 		/*
1799 		 * Here we are called from ifnet_detach_final(); the
1800 		 * caller had emptied if_flt_head and we're doing an
1801 		 * implicit filter detach because the interface is
1802 		 * about to go away.  Make sure to adjust the counters
1803 		 * in this case.  We don't need the protection of the
1804 		 * filter monitor since we're called as part of the
1805 		 * final detach in the context of the detacher thread.
1806 		 */
1807 		if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1808 			VERIFY(ifp->if_flt_non_os_count != 0);
1809 			OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1810 		}
1811 		/*
1812 		 * Decrease filter count and route_generation
1813 		 * ID to let TCP know it should reevalute doing
1814 		 * TSO or not.
1815 		 */
1816 		if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1817 			ifnet_filter_update_tso(ifp, FALSE);
1818 		}
1819 	}
1820 
1821 	if (dlil_verbose) {
1822 		DLIL_PRINTF("%s filter detached\n", filter->filt_name);
1823 	}
1824 
1825 destroy:
1826 
1827 	/* Call the detached function if there is one */
1828 	if (filter->filt_detached) {
1829 		filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1830 	}
1831 
1832 	VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1833 	if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1834 		VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0);
1835 	}
1836 #if SKYWALK
1837 	if (kernel_is_macos_or_server()) {
1838 		net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1839 		    net_check_compatible_if_filter(NULL));
1840 	}
1841 #endif /* SKYWALK */
1842 
1843 	/* Free the filter */
1844 	dlif_filt_free(filter);
1845 	filter = NULL;
1846 done:
1847 	if (retval != 0 && filter != NULL) {
1848 		DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1849 		    filter->filt_name, retval);
1850 	}
1851 
1852 	return retval;
1853 }
1854 
1855 __private_extern__ void
dlil_detach_filter(interface_filter_t filter)1856 dlil_detach_filter(interface_filter_t filter)
1857 {
1858 	if (filter == NULL) {
1859 		return;
1860 	}
1861 	dlil_detach_filter_internal(filter, 0);
1862 }
1863 
1864 __private_extern__ boolean_t
dlil_has_ip_filter(void)1865 dlil_has_ip_filter(void)
1866 {
1867 	boolean_t has_filter = ((net_api_stats.nas_ipf_add_count - net_api_stats.nas_ipf_add_os_count) > 0);
1868 
1869 	VERIFY(net_api_stats.nas_ipf_add_count >= net_api_stats.nas_ipf_add_os_count);
1870 
1871 	DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
1872 	return has_filter;
1873 }
1874 
1875 __private_extern__ boolean_t
dlil_has_if_filter(struct ifnet * ifp)1876 dlil_has_if_filter(struct ifnet *ifp)
1877 {
1878 	boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
1879 	DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
1880 	return has_filter;
1881 }
1882 
1883 errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params * p)1884 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
1885 {
1886 	if (p != NULL) {
1887 		if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
1888 		    (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
1889 			return EINVAL;
1890 		}
1891 		if (p->packets_lowat != 0 &&    /* hiwat must be non-zero */
1892 		    p->packets_lowat >= p->packets_hiwat) {
1893 			return EINVAL;
1894 		}
1895 		if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
1896 		    (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
1897 			return EINVAL;
1898 		}
1899 		if (p->bytes_lowat != 0 &&      /* hiwat must be non-zero */
1900 		    p->bytes_lowat >= p->bytes_hiwat) {
1901 			return EINVAL;
1902 		}
1903 		if (p->interval_time != 0 &&
1904 		    p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
1905 			p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
1906 		}
1907 	}
1908 	return 0;
1909 }
1910 
1911 void
dlil_rxpoll_update_params(struct ifnet * ifp,struct ifnet_poll_params * p)1912 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
1913 {
1914 	u_int64_t sample_holdtime, inbw;
1915 
1916 	if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
1917 		sample_holdtime = 0;    /* polling is disabled */
1918 		ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
1919 		    ifp->if_rxpoll_blowat = 0;
1920 		ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
1921 		    ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
1922 		ifp->if_rxpoll_plim = 0;
1923 		ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
1924 	} else {
1925 		u_int32_t plowat, phiwat, blowat, bhiwat, plim;
1926 		u_int64_t ival;
1927 		unsigned int n, i;
1928 
1929 		for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
1930 			if (inbw < rxpoll_tbl[i].speed) {
1931 				break;
1932 			}
1933 			n = i;
1934 		}
1935 		/* auto-tune if caller didn't specify a value */
1936 		plowat = ((p == NULL || p->packets_lowat == 0) ?
1937 		    rxpoll_tbl[n].plowat : p->packets_lowat);
1938 		phiwat = ((p == NULL || p->packets_hiwat == 0) ?
1939 		    rxpoll_tbl[n].phiwat : p->packets_hiwat);
1940 		blowat = ((p == NULL || p->bytes_lowat == 0) ?
1941 		    rxpoll_tbl[n].blowat : p->bytes_lowat);
1942 		bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
1943 		    rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
1944 		plim = ((p == NULL || p->packets_limit == 0 ||
1945 		    if_rxpoll_max != 0) ?  if_rxpoll_max : p->packets_limit);
1946 		ival = ((p == NULL || p->interval_time == 0 ||
1947 		    if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ?
1948 		    if_rxpoll_interval_time : p->interval_time);
1949 
1950 		VERIFY(plowat != 0 && phiwat != 0);
1951 		VERIFY(blowat != 0 && bhiwat != 0);
1952 		VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
1953 
1954 		sample_holdtime = if_rxpoll_sample_holdtime;
1955 		ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
1956 		ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
1957 		ifp->if_rxpoll_plowat = plowat;
1958 		ifp->if_rxpoll_phiwat = phiwat;
1959 		ifp->if_rxpoll_blowat = blowat;
1960 		ifp->if_rxpoll_bhiwat = bhiwat;
1961 		ifp->if_rxpoll_plim = plim;
1962 		ifp->if_rxpoll_ival = ival;
1963 	}
1964 
1965 	net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
1966 	net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
1967 
1968 	if (dlil_verbose) {
1969 		DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
1970 		    "poll interval %llu nsec, pkts per poll %u, "
1971 		    "pkt limits [%u/%u], wreq limits [%u/%u], "
1972 		    "bytes limits [%u/%u]\n", if_name(ifp),
1973 		    inbw, sample_holdtime, ifp->if_rxpoll_ival,
1974 		    ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
1975 		    ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
1976 		    ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
1977 		    ifp->if_rxpoll_bhiwat);
1978 	}
1979 }
1980 
1981 /*
1982  * Must be called on an attached ifnet (caller is expected to check.)
1983  * Caller may pass NULL for poll parameters to indicate "auto-tuning."
1984  */
1985 errno_t
dlil_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)1986 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
1987     boolean_t locked)
1988 {
1989 	errno_t err;
1990 	struct dlil_threading_info *inp;
1991 
1992 	VERIFY(ifp != NULL);
1993 	if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
1994 		return ENXIO;
1995 	}
1996 	err = dlil_rxpoll_validate_params(p);
1997 	if (err != 0) {
1998 		return err;
1999 	}
2000 
2001 	if (!locked) {
2002 		lck_mtx_lock(&inp->dlth_lock);
2003 	}
2004 	LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2005 	/*
2006 	 * Normally, we'd reset the parameters to the auto-tuned values
2007 	 * if the the input thread detects a change in link rate.  If the
2008 	 * driver provides its own parameters right after a link rate
2009 	 * changes, but before the input thread gets to run, we want to
2010 	 * make sure to keep the driver's values.  Clearing if_poll_update
2011 	 * will achieve that.
2012 	 */
2013 	if (p != NULL && !locked && ifp->if_poll_update != 0) {
2014 		ifp->if_poll_update = 0;
2015 	}
2016 	dlil_rxpoll_update_params(ifp, p);
2017 	if (!locked) {
2018 		lck_mtx_unlock(&inp->dlth_lock);
2019 	}
2020 	return 0;
2021 }
2022 
2023 /*
2024  * Must be called on an attached ifnet (caller is expected to check.)
2025  */
2026 errno_t
dlil_rxpoll_get_params(struct ifnet * ifp,struct ifnet_poll_params * p)2027 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2028 {
2029 	struct dlil_threading_info *inp;
2030 
2031 	VERIFY(ifp != NULL && p != NULL);
2032 	if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2033 		return ENXIO;
2034 	}
2035 
2036 	bzero(p, sizeof(*p));
2037 
2038 	lck_mtx_lock(&inp->dlth_lock);
2039 	p->packets_limit = ifp->if_rxpoll_plim;
2040 	p->packets_lowat = ifp->if_rxpoll_plowat;
2041 	p->packets_hiwat = ifp->if_rxpoll_phiwat;
2042 	p->bytes_lowat = ifp->if_rxpoll_blowat;
2043 	p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2044 	p->interval_time = ifp->if_rxpoll_ival;
2045 	lck_mtx_unlock(&inp->dlth_lock);
2046 
2047 	return 0;
2048 }
2049 
2050 errno_t
ifnet_input(struct ifnet * ifp,struct mbuf * m_head,const struct ifnet_stat_increment_param * s)2051 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2052     const struct ifnet_stat_increment_param *s)
2053 {
2054 	return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2055 }
2056 
2057 errno_t
ifnet_input_extended(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2058 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2059     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2060 {
2061 	return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2062 }
2063 
2064 errno_t
ifnet_input_poll(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2065 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2066     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2067 {
2068 	return ifnet_input_common(ifp, m_head, m_tail, s,
2069 	           (m_head != NULL), TRUE);
2070 }
2071 
2072 static errno_t
ifnet_input_common(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t ext,boolean_t poll)2073 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2074     const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2075 {
2076 	dlil_input_func input_func;
2077 	struct ifnet_stat_increment_param _s;
2078 	u_int32_t m_cnt = 0, m_size = 0;
2079 	struct mbuf *last;
2080 	errno_t err = 0;
2081 
2082 	if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2083 		if (m_head != NULL) {
2084 			mbuf_freem_list(m_head);
2085 		}
2086 		return EINVAL;
2087 	}
2088 
2089 	VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2090 	VERIFY(m_tail == NULL || ext);
2091 	VERIFY(s != NULL || !ext);
2092 
2093 	/*
2094 	 * Drop the packet(s) if the parameters are invalid, or if the
2095 	 * interface is no longer attached; else hold an IO refcnt to
2096 	 * prevent it from being detached (will be released below.)
2097 	 */
2098 	if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2099 		if (m_head != NULL) {
2100 			mbuf_freem_list(m_head);
2101 		}
2102 		return EINVAL;
2103 	}
2104 
2105 	input_func = ifp->if_input_dlil;
2106 	VERIFY(input_func != NULL);
2107 
2108 	if (m_tail == NULL) {
2109 		last = m_head;
2110 		while (m_head != NULL) {
2111 			m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2112 #if IFNET_INPUT_SANITY_CHK
2113 			if (__improbable(dlil_input_sanity_check != 0)) {
2114 				DLIL_INPUT_CHECK(last, ifp);
2115 			}
2116 #endif /* IFNET_INPUT_SANITY_CHK */
2117 			m_cnt++;
2118 			m_size += m_length(last);
2119 			if (mbuf_nextpkt(last) == NULL) {
2120 				break;
2121 			}
2122 			last = mbuf_nextpkt(last);
2123 		}
2124 		m_tail = last;
2125 	} else {
2126 #if IFNET_INPUT_SANITY_CHK
2127 		if (__improbable(dlil_input_sanity_check != 0)) {
2128 			last = m_head;
2129 			while (1) {
2130 				m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2131 				DLIL_INPUT_CHECK(last, ifp);
2132 				m_cnt++;
2133 				m_size += m_length(last);
2134 				if (mbuf_nextpkt(last) == NULL) {
2135 					break;
2136 				}
2137 				last = mbuf_nextpkt(last);
2138 			}
2139 		} else {
2140 			m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2141 			m_cnt = s->packets_in;
2142 			m_size = s->bytes_in;
2143 			last = m_tail;
2144 		}
2145 #else
2146 		m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2147 		m_cnt = s->packets_in;
2148 		m_size = s->bytes_in;
2149 		last = m_tail;
2150 #endif /* IFNET_INPUT_SANITY_CHK */
2151 	}
2152 
2153 	if (last != m_tail) {
2154 		panic_plain("%s: invalid input packet chain for %s, "
2155 		    "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2156 		    m_tail, last);
2157 	}
2158 
2159 	/*
2160 	 * Assert packet count only for the extended variant, for backwards
2161 	 * compatibility, since this came directly from the device driver.
2162 	 * Relax this assertion for input bytes, as the driver may have
2163 	 * included the link-layer headers in the computation; hence
2164 	 * m_size is just an approximation.
2165 	 */
2166 	if (ext && s->packets_in != m_cnt) {
2167 		panic_plain("%s: input packet count mismatch for %s, "
2168 		    "%d instead of %d\n", __func__, if_name(ifp),
2169 		    s->packets_in, m_cnt);
2170 	}
2171 
2172 	if (s == NULL) {
2173 		bzero(&_s, sizeof(_s));
2174 		s = &_s;
2175 	} else {
2176 		_s = *s;
2177 	}
2178 	_s.packets_in = m_cnt;
2179 	_s.bytes_in = m_size;
2180 
2181 	if (ifp->if_xflags & IFXF_DISABLE_INPUT) {
2182 		m_freem_list(m_head);
2183 
2184 		os_atomic_add(&ifp->if_data.ifi_ipackets, _s.packets_in, relaxed);
2185 		os_atomic_add(&ifp->if_data.ifi_ibytes, _s.bytes_in, relaxed);
2186 
2187 		goto done;
2188 	}
2189 
2190 	err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2191 
2192 done:
2193 	if (ifp != lo_ifp) {
2194 		/* Release the IO refcnt */
2195 		ifnet_datamov_end(ifp);
2196 	}
2197 
2198 	return err;
2199 }
2200 
2201 
2202 static void
ifnet_start_common(struct ifnet * ifp,boolean_t resetfc,boolean_t ignore_delay)2203 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc, boolean_t ignore_delay)
2204 {
2205 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
2206 		return;
2207 	}
2208 	/*
2209 	 * If the starter thread is inactive, signal it to do work,
2210 	 * unless the interface is being flow controlled from below,
2211 	 * e.g. a virtual interface being flow controlled by a real
2212 	 * network interface beneath it, or it's been disabled via
2213 	 * a call to ifnet_disable_output().
2214 	 */
2215 	lck_mtx_lock_spin(&ifp->if_start_lock);
2216 	if (ignore_delay) {
2217 		ifp->if_start_flags |= IFSF_NO_DELAY;
2218 	}
2219 	if (resetfc) {
2220 		ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2221 	} else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2222 		lck_mtx_unlock(&ifp->if_start_lock);
2223 		return;
2224 	}
2225 	ifp->if_start_req++;
2226 	if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2227 	    (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2228 	    IFCQ_LEN(ifp->if_snd) >= ifp->if_start_delay_qlen ||
2229 	    ifp->if_start_delayed == 0)) {
2230 		(void) wakeup_one((caddr_t)&ifp->if_start_thread);
2231 	}
2232 	lck_mtx_unlock(&ifp->if_start_lock);
2233 }
2234 
2235 void
ifnet_start(struct ifnet * ifp)2236 ifnet_start(struct ifnet *ifp)
2237 {
2238 	ifnet_start_common(ifp, FALSE, FALSE);
2239 }
2240 
2241 void
ifnet_start_ignore_delay(struct ifnet * ifp)2242 ifnet_start_ignore_delay(struct ifnet *ifp)
2243 {
2244 	ifnet_start_common(ifp, FALSE, TRUE);
2245 }
2246 
2247 __attribute__((noreturn))
2248 static void
ifnet_start_thread_func(void * v,wait_result_t w)2249 ifnet_start_thread_func(void *v, wait_result_t w)
2250 {
2251 #pragma unused(w)
2252 	ifnet_ref_t ifp = v;
2253 	char thread_name[MAXTHREADNAMESIZE];
2254 
2255 	/* Construct the name for this thread, and then apply it. */
2256 	bzero(thread_name, sizeof(thread_name));
2257 	(void) snprintf(thread_name, sizeof(thread_name),
2258 	    "ifnet_start_%s", ifp->if_xname);
2259 #if SKYWALK
2260 	/* override name for native Skywalk interface */
2261 	if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
2262 		(void) snprintf(thread_name, sizeof(thread_name),
2263 		    "skywalk_doorbell_%s_tx", ifp->if_xname);
2264 	}
2265 #endif /* SKYWALK */
2266 	ASSERT(ifp->if_start_thread == current_thread());
2267 	thread_set_thread_name(current_thread(), __unsafe_null_terminated_from_indexable(thread_name));
2268 
2269 #if CONFIG_THREAD_GROUPS
2270 	if (IFNET_REQUIRES_CELL_GROUP(ifp)) {
2271 		thread_group_join_cellular();
2272 	}
2273 #endif
2274 
2275 	/*
2276 	 * Treat the dedicated starter thread for lo0 as equivalent to
2277 	 * the driver workloop thread; if net_affinity is enabled for
2278 	 * the main input thread, associate this starter thread to it
2279 	 * by binding them with the same affinity tag.  This is done
2280 	 * only once (as we only have one lo_ifp which never goes away.)
2281 	 */
2282 	if (ifp == lo_ifp) {
2283 		struct dlil_threading_info *inp = dlil_main_input_thread;
2284 		struct thread *__single tp = current_thread();
2285 #if SKYWALK
2286 		/* native skywalk loopback not yet implemented */
2287 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2288 #endif /* SKYWALK */
2289 
2290 		lck_mtx_lock(&inp->dlth_lock);
2291 		if (inp->dlth_affinity) {
2292 			u_int32_t tag = inp->dlth_affinity_tag;
2293 
2294 			VERIFY(inp->dlth_driver_thread == THREAD_NULL);
2295 			VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2296 			inp->dlth_driver_thread = tp;
2297 			lck_mtx_unlock(&inp->dlth_lock);
2298 
2299 			/* Associate this thread with the affinity tag */
2300 			(void) dlil_affinity_set(tp, tag);
2301 		} else {
2302 			lck_mtx_unlock(&inp->dlth_lock);
2303 		}
2304 	}
2305 
2306 	lck_mtx_lock(&ifp->if_start_lock);
2307 	VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
2308 	(void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
2309 	ifp->if_start_embryonic = 1;
2310 	/* wake up once to get out of embryonic state */
2311 	ifp->if_start_req++;
2312 	(void) wakeup_one((caddr_t)&ifp->if_start_thread);
2313 	lck_mtx_unlock(&ifp->if_start_lock);
2314 	(void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2315 	/* NOTREACHED */
2316 	__builtin_unreachable();
2317 }
2318 
2319 __attribute__((noreturn))
2320 static void
ifnet_start_thread_cont(void * v,wait_result_t wres)2321 ifnet_start_thread_cont(void *v, wait_result_t wres)
2322 {
2323 	ifnet_ref_t ifp = v;
2324 	struct ifclassq *ifq = ifp->if_snd;
2325 
2326 	lck_mtx_lock_spin(&ifp->if_start_lock);
2327 	if (__improbable(wres == THREAD_INTERRUPTED ||
2328 	    (ifp->if_start_flags & IFSF_TERMINATING) != 0)) {
2329 		goto terminate;
2330 	}
2331 
2332 	if (__improbable(ifp->if_start_embryonic)) {
2333 		ifp->if_start_embryonic = 0;
2334 		lck_mtx_unlock(&ifp->if_start_lock);
2335 		ifnet_decr_pending_thread_count(ifp);
2336 		lck_mtx_lock_spin(&ifp->if_start_lock);
2337 		goto skip;
2338 	}
2339 
2340 	ifp->if_start_active = 1;
2341 
2342 	/*
2343 	 * Keep on servicing until no more request.
2344 	 */
2345 	for (;;) {
2346 		u_int32_t req = ifp->if_start_req;
2347 		if ((ifp->if_start_flags & IFSF_NO_DELAY) == 0 &&
2348 		    !IFCQ_IS_EMPTY(ifq) &&
2349 		    (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2350 		    ifp->if_start_delayed == 0 &&
2351 		    IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2352 		    (ifp->if_eflags & IFEF_DELAY_START)) {
2353 			ifp->if_start_delayed = 1;
2354 			ifnet_start_delayed++;
2355 			break;
2356 		}
2357 		ifp->if_start_flags &= ~IFSF_NO_DELAY;
2358 		ifp->if_start_delayed = 0;
2359 		lck_mtx_unlock(&ifp->if_start_lock);
2360 
2361 		/*
2362 		 * If no longer attached, don't call start because ifp
2363 		 * is being destroyed; else hold an IO refcnt to
2364 		 * prevent the interface from being detached (will be
2365 		 * released below.)
2366 		 */
2367 		if (!ifnet_datamov_begin(ifp)) {
2368 			lck_mtx_lock_spin(&ifp->if_start_lock);
2369 			break;
2370 		}
2371 
2372 		/* invoke the driver's start routine */
2373 		((*ifp->if_start)(ifp));
2374 
2375 		/*
2376 		 * Release the io ref count taken above.
2377 		 */
2378 		ifnet_datamov_end(ifp);
2379 
2380 		lck_mtx_lock_spin(&ifp->if_start_lock);
2381 
2382 		/*
2383 		 * If there's no pending request or if the
2384 		 * interface has been disabled, we're done.
2385 		 */
2386 #define _IFSF_DISABLED  (IFSF_FLOW_CONTROLLED | IFSF_TERMINATING)
2387 		if (req == ifp->if_start_req ||
2388 		    (ifp->if_start_flags & _IFSF_DISABLED) != 0) {
2389 			break;
2390 		}
2391 	}
2392 skip:
2393 	ifp->if_start_req = 0;
2394 	ifp->if_start_active = 0;
2395 
2396 #if SKYWALK
2397 	/*
2398 	 * Wakeup any waiters, e.g. any threads waiting to
2399 	 * detach the interface from the flowswitch, etc.
2400 	 */
2401 	if (ifp->if_start_waiters != 0) {
2402 		ifp->if_start_waiters = 0;
2403 		wakeup(&ifp->if_start_waiters);
2404 	}
2405 #endif /* SKYWALK */
2406 	if (__probable((ifp->if_start_flags & IFSF_TERMINATING) == 0)) {
2407 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2408 		struct timespec delay_start_ts;
2409 		struct timespec *ts = NULL;
2410 
2411 		if (ts == NULL) {
2412 			ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2413 			    &ifp->if_start_cycle : NULL);
2414 		}
2415 
2416 		if (ts == NULL && ifp->if_start_delayed == 1) {
2417 			delay_start_ts.tv_sec = 0;
2418 			delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2419 			ts = &delay_start_ts;
2420 		}
2421 
2422 		if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
2423 			ts = NULL;
2424 		}
2425 
2426 		if (__improbable(ts != NULL)) {
2427 			clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2428 			    (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2429 		}
2430 
2431 		(void) assert_wait_deadline(&ifp->if_start_thread,
2432 		    THREAD_UNINT, deadline);
2433 		lck_mtx_unlock(&ifp->if_start_lock);
2434 		(void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2435 		/* NOTREACHED */
2436 	} else {
2437 terminate:
2438 		/* interface is detached? */
2439 		ifnet_set_start_cycle(ifp, NULL);
2440 
2441 		/* clear if_start_thread to allow termination to continue */
2442 		ASSERT(ifp->if_start_thread != THREAD_NULL);
2443 		ifp->if_start_thread = THREAD_NULL;
2444 		wakeup((caddr_t)&ifp->if_start_thread);
2445 		lck_mtx_unlock(&ifp->if_start_lock);
2446 
2447 		if (dlil_verbose) {
2448 			DLIL_PRINTF("%s: starter thread terminated\n",
2449 			    if_name(ifp));
2450 		}
2451 
2452 		/* for the extra refcnt from kernel_thread_start() */
2453 		thread_deallocate(current_thread());
2454 		/* this is the end */
2455 		thread_terminate(current_thread());
2456 		/* NOTREACHED */
2457 	}
2458 
2459 	/* must never get here */
2460 	VERIFY(0);
2461 	/* NOTREACHED */
2462 	__builtin_unreachable();
2463 }
2464 
2465 void
ifnet_set_start_cycle(struct ifnet * ifp,struct timespec * ts)2466 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2467 {
2468 	if (ts == NULL) {
2469 		bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
2470 	} else {
2471 		*(&ifp->if_start_cycle) = *ts;
2472 	}
2473 
2474 	if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2475 		DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
2476 		    if_name(ifp), ts->tv_nsec);
2477 	}
2478 }
2479 
2480 static inline void
ifnet_poll_wakeup(struct ifnet * ifp)2481 ifnet_poll_wakeup(struct ifnet *ifp)
2482 {
2483 	LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
2484 
2485 	ifp->if_poll_req++;
2486 	if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
2487 	    ifp->if_poll_thread != THREAD_NULL) {
2488 		wakeup_one((caddr_t)&ifp->if_poll_thread);
2489 	}
2490 }
2491 
2492 void
ifnet_poll(struct ifnet * ifp)2493 ifnet_poll(struct ifnet *ifp)
2494 {
2495 	/*
2496 	 * If the poller thread is inactive, signal it to do work.
2497 	 */
2498 	lck_mtx_lock_spin(&ifp->if_poll_lock);
2499 	ifnet_poll_wakeup(ifp);
2500 	lck_mtx_unlock(&ifp->if_poll_lock);
2501 }
2502 
2503 __attribute__((noreturn))
2504 static void
ifnet_poll_thread_func(void * v,wait_result_t w)2505 ifnet_poll_thread_func(void *v, wait_result_t w)
2506 {
2507 #pragma unused(w)
2508 	char thread_name[MAXTHREADNAMESIZE];
2509 	ifnet_ref_t ifp = v;
2510 
2511 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2512 	VERIFY(current_thread() == ifp->if_poll_thread);
2513 
2514 	/* construct the name for this thread, and then apply it */
2515 	bzero(thread_name, sizeof(thread_name));
2516 	(void) snprintf(thread_name, sizeof(thread_name),
2517 	    "ifnet_poller_%s", ifp->if_xname);
2518 	thread_set_thread_name(ifp->if_poll_thread, __unsafe_null_terminated_from_indexable(thread_name));
2519 
2520 	lck_mtx_lock(&ifp->if_poll_lock);
2521 	VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
2522 	(void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
2523 	ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
2524 	/* wake up once to get out of embryonic state */
2525 	ifnet_poll_wakeup(ifp);
2526 	lck_mtx_unlock(&ifp->if_poll_lock);
2527 	(void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2528 	/* NOTREACHED */
2529 	__builtin_unreachable();
2530 }
2531 
2532 __attribute__((noreturn))
2533 static void
ifnet_poll_thread_cont(void * v,wait_result_t wres)2534 ifnet_poll_thread_cont(void *v, wait_result_t wres)
2535 {
2536 	struct dlil_threading_info *inp;
2537 	ifnet_ref_t ifp = v;
2538 	struct ifnet_stat_increment_param s;
2539 	struct timespec start_time;
2540 
2541 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2542 
2543 	bzero(&s, sizeof(s));
2544 	net_timerclear(&start_time);
2545 
2546 	lck_mtx_lock_spin(&ifp->if_poll_lock);
2547 	if (__improbable(wres == THREAD_INTERRUPTED ||
2548 	    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
2549 		goto terminate;
2550 	}
2551 
2552 	inp = ifp->if_inp;
2553 	VERIFY(inp != NULL);
2554 
2555 	if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
2556 		ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
2557 		lck_mtx_unlock(&ifp->if_poll_lock);
2558 		ifnet_decr_pending_thread_count(ifp);
2559 		lck_mtx_lock_spin(&ifp->if_poll_lock);
2560 		goto skip;
2561 	}
2562 
2563 	ifp->if_poll_flags |= IF_POLLF_RUNNING;
2564 
2565 	/*
2566 	 * Keep on servicing until no more request.
2567 	 */
2568 	for (;;) {
2569 		mbuf_ref_t m_head, m_tail;
2570 		u_int32_t m_lim, m_cnt, m_totlen;
2571 		u_int16_t req = ifp->if_poll_req;
2572 
2573 		m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
2574 		    MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
2575 		lck_mtx_unlock(&ifp->if_poll_lock);
2576 
2577 		/*
2578 		 * If no longer attached, there's nothing to do;
2579 		 * else hold an IO refcnt to prevent the interface
2580 		 * from being detached (will be released below.)
2581 		 */
2582 		if (!ifnet_get_ioref(ifp)) {
2583 			lck_mtx_lock_spin(&ifp->if_poll_lock);
2584 			break;
2585 		}
2586 
2587 		if (dlil_verbose > 1) {
2588 			DLIL_PRINTF("%s: polling up to %d pkts, "
2589 			    "pkts avg %d max %d, wreq avg %d, "
2590 			    "bytes avg %d\n",
2591 			    if_name(ifp), m_lim,
2592 			    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2593 			    ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2594 		}
2595 
2596 		/* invoke the driver's input poll routine */
2597 		((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2598 		&m_cnt, &m_totlen));
2599 
2600 		if (m_head != NULL) {
2601 			VERIFY(m_tail != NULL && m_cnt > 0);
2602 
2603 			if (dlil_verbose > 1) {
2604 				DLIL_PRINTF("%s: polled %d pkts, "
2605 				    "pkts avg %d max %d, wreq avg %d, "
2606 				    "bytes avg %d\n",
2607 				    if_name(ifp), m_cnt,
2608 				    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2609 				    ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2610 			}
2611 
2612 			/* stats are required for extended variant */
2613 			s.packets_in = m_cnt;
2614 			s.bytes_in = m_totlen;
2615 
2616 			(void) ifnet_input_common(ifp, m_head, m_tail,
2617 			    &s, TRUE, TRUE);
2618 		} else {
2619 			if (dlil_verbose > 1) {
2620 				DLIL_PRINTF("%s: no packets, "
2621 				    "pkts avg %d max %d, wreq avg %d, "
2622 				    "bytes avg %d\n",
2623 				    if_name(ifp), ifp->if_rxpoll_pavg,
2624 				    ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
2625 				    ifp->if_rxpoll_bavg);
2626 			}
2627 
2628 			(void) ifnet_input_common(ifp, NULL, NULL,
2629 			    NULL, FALSE, TRUE);
2630 		}
2631 
2632 		/* Release the io ref count */
2633 		ifnet_decr_iorefcnt(ifp);
2634 
2635 		lck_mtx_lock_spin(&ifp->if_poll_lock);
2636 
2637 		/* if there's no pending request, we're done */
2638 		if (req == ifp->if_poll_req ||
2639 		    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0) {
2640 			break;
2641 		}
2642 	}
2643 skip:
2644 	ifp->if_poll_req = 0;
2645 	ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
2646 
2647 	if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
2648 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2649 		struct timespec *ts;
2650 
2651 		/*
2652 		 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2653 		 * until ifnet_poll() is called again.
2654 		 */
2655 		ts = &ifp->if_poll_cycle;
2656 		if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
2657 			ts = NULL;
2658 		}
2659 
2660 		if (ts != NULL) {
2661 			clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2662 			    (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2663 		}
2664 
2665 		(void) assert_wait_deadline(&ifp->if_poll_thread,
2666 		    THREAD_UNINT, deadline);
2667 		lck_mtx_unlock(&ifp->if_poll_lock);
2668 		(void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2669 		/* NOTREACHED */
2670 	} else {
2671 terminate:
2672 		/* interface is detached (maybe while asleep)? */
2673 		ifnet_set_poll_cycle(ifp, NULL);
2674 
2675 		/* clear if_poll_thread to allow termination to continue */
2676 		ASSERT(ifp->if_poll_thread != THREAD_NULL);
2677 		ifp->if_poll_thread = THREAD_NULL;
2678 		wakeup((caddr_t)&ifp->if_poll_thread);
2679 		lck_mtx_unlock(&ifp->if_poll_lock);
2680 
2681 		if (dlil_verbose) {
2682 			DLIL_PRINTF("%s: poller thread terminated\n",
2683 			    if_name(ifp));
2684 		}
2685 
2686 		/* for the extra refcnt from kernel_thread_start() */
2687 		thread_deallocate(current_thread());
2688 		/* this is the end */
2689 		thread_terminate(current_thread());
2690 		/* NOTREACHED */
2691 	}
2692 
2693 	/* must never get here */
2694 	VERIFY(0);
2695 	/* NOTREACHED */
2696 	__builtin_unreachable();
2697 }
2698 
2699 void
ifnet_set_poll_cycle(struct ifnet * ifp,struct timespec * ts)2700 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2701 {
2702 	if (ts == NULL) {
2703 		bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
2704 	} else {
2705 		*(&ifp->if_poll_cycle) = *ts;
2706 	}
2707 
2708 	if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2709 		DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
2710 		    if_name(ifp), ts->tv_nsec);
2711 	}
2712 }
2713 
2714 void
ifnet_purge(struct ifnet * ifp)2715 ifnet_purge(struct ifnet *ifp)
2716 {
2717 	if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
2718 		if_qflush(ifp, ifp->if_snd);
2719 	}
2720 }
2721 
2722 void
ifnet_update_rcv(struct ifnet * ifp,cqev_t ev)2723 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2724 {
2725 	switch (ev) {
2726 	case CLASSQ_EV_LINK_BANDWIDTH:
2727 		if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
2728 			ifp->if_poll_update++;
2729 		}
2730 		break;
2731 
2732 	default:
2733 		break;
2734 	}
2735 }
2736 
2737 errno_t
ifnet_set_output_sched_model(struct ifnet * ifp,u_int32_t model)2738 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2739 {
2740 	return ifclassq_change(ifp->if_snd, model);
2741 }
2742 
2743 errno_t
ifnet_set_sndq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2744 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2745 {
2746 	if (ifp == NULL) {
2747 		return EINVAL;
2748 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2749 		return ENXIO;
2750 	}
2751 
2752 	ifclassq_set_maxlen(ifp->if_snd, maxqlen);
2753 
2754 	return 0;
2755 }
2756 
2757 errno_t
ifnet_get_sndq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2758 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2759 {
2760 	if (ifp == NULL || maxqlen == NULL) {
2761 		return EINVAL;
2762 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2763 		return ENXIO;
2764 	}
2765 
2766 	*maxqlen = ifclassq_get_maxlen(ifp->if_snd);
2767 
2768 	return 0;
2769 }
2770 
2771 errno_t
ifnet_get_sndq_len(struct ifnet * ifp,u_int32_t * pkts)2772 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
2773 {
2774 	errno_t err;
2775 
2776 	if (ifp == NULL || pkts == NULL) {
2777 		err = EINVAL;
2778 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2779 		err = ENXIO;
2780 	} else {
2781 		err = ifclassq_get_len(ifp->if_snd, MBUF_SC_UNSPEC,
2782 		    IF_CLASSQ_ALL_GRPS, pkts, NULL);
2783 	}
2784 
2785 	return err;
2786 }
2787 
2788 errno_t
ifnet_get_service_class_sndq_len(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t * pkts,u_int32_t * bytes)2789 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2790     u_int32_t *pkts, u_int32_t *bytes)
2791 {
2792 	errno_t err;
2793 
2794 	if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2795 	    (pkts == NULL && bytes == NULL)) {
2796 		err = EINVAL;
2797 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2798 		err = ENXIO;
2799 	} else {
2800 		err = ifclassq_get_len(ifp->if_snd, sc, IF_CLASSQ_ALL_GRPS,
2801 		    pkts, bytes);
2802 	}
2803 
2804 	return err;
2805 }
2806 
2807 errno_t
ifnet_set_rcvq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2808 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2809 {
2810 	struct dlil_threading_info *inp;
2811 
2812 	if (ifp == NULL) {
2813 		return EINVAL;
2814 	} else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2815 		return ENXIO;
2816 	}
2817 
2818 	if (maxqlen == 0) {
2819 		maxqlen = if_rcvq_maxlen;
2820 	} else if (maxqlen < IF_RCVQ_MINLEN) {
2821 		maxqlen = IF_RCVQ_MINLEN;
2822 	}
2823 
2824 	inp = ifp->if_inp;
2825 	lck_mtx_lock(&inp->dlth_lock);
2826 	qlimit(&inp->dlth_pkts) = maxqlen;
2827 	lck_mtx_unlock(&inp->dlth_lock);
2828 
2829 	return 0;
2830 }
2831 
2832 errno_t
ifnet_get_rcvq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2833 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2834 {
2835 	struct dlil_threading_info *inp;
2836 
2837 	if (ifp == NULL || maxqlen == NULL) {
2838 		return EINVAL;
2839 	} else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2840 		return ENXIO;
2841 	}
2842 
2843 	inp = ifp->if_inp;
2844 	lck_mtx_lock(&inp->dlth_lock);
2845 	*maxqlen = qlimit(&inp->dlth_pkts);
2846 	lck_mtx_unlock(&inp->dlth_lock);
2847 	return 0;
2848 }
2849 
2850 void
ifnet_enqueue_multi_setup(struct ifnet * ifp,uint16_t delay_qlen,uint16_t delay_timeout)2851 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
2852     uint16_t delay_timeout)
2853 {
2854 	if (delay_qlen > 0 && delay_timeout > 0) {
2855 		if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
2856 		ifp->if_start_delay_qlen = MIN(100, delay_qlen);
2857 		ifp->if_start_delay_timeout = min(20000, delay_timeout);
2858 		/* convert timeout to nanoseconds */
2859 		ifp->if_start_delay_timeout *= 1000;
2860 		kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
2861 		    ifp->if_xname, (uint32_t)delay_qlen,
2862 		    (uint32_t)delay_timeout);
2863 	} else {
2864 		if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
2865 	}
2866 }
2867 
2868 /*
2869  * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
2870  * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
2871  * buf holds the full header.
2872  */
2873 static __attribute__((noinline)) void
ifnet_mcast_clear_dscp(uint8_t * __indexable buf,uint8_t ip_ver)2874 ifnet_mcast_clear_dscp(uint8_t *__indexable buf, uint8_t ip_ver)
2875 {
2876 	struct ip *ip;
2877 	struct ip6_hdr *ip6;
2878 	uint8_t lbuf[64] __attribute__((aligned(8)));
2879 	uint8_t *p = buf;
2880 
2881 	if (ip_ver == IPVERSION) {
2882 		uint8_t old_tos;
2883 		uint32_t sum;
2884 
2885 		if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2886 			DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
2887 			bcopy(buf, lbuf, sizeof(struct ip));
2888 			p = lbuf;
2889 		}
2890 		ip = (struct ip *)(void *)p;
2891 		if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
2892 			return;
2893 		}
2894 
2895 		DTRACE_IP1(clear__v4, struct ip *, ip);
2896 		old_tos = ip->ip_tos;
2897 		ip->ip_tos &= IPTOS_ECN_MASK;
2898 		sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
2899 		sum = (sum >> 16) + (sum & 0xffff);
2900 		ip->ip_sum = (uint16_t)(sum & 0xffff);
2901 
2902 		if (__improbable(p == lbuf)) {
2903 			bcopy(lbuf, buf, sizeof(struct ip));
2904 		}
2905 	} else {
2906 		uint32_t flow;
2907 		ASSERT(ip_ver == IPV6_VERSION);
2908 
2909 		if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2910 			DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
2911 			bcopy(buf, lbuf, sizeof(struct ip6_hdr));
2912 			p = lbuf;
2913 		}
2914 		ip6 = (struct ip6_hdr *)(void *)p;
2915 		flow = ntohl(ip6->ip6_flow);
2916 		if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
2917 			return;
2918 		}
2919 
2920 		DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
2921 		ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
2922 
2923 		if (__improbable(p == lbuf)) {
2924 			bcopy(lbuf, buf, sizeof(struct ip6_hdr));
2925 		}
2926 	}
2927 }
2928 
2929 static inline errno_t
ifnet_enqueue_single(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * p,boolean_t flush,boolean_t * pdrop)2930 ifnet_enqueue_single(struct ifnet *ifp, struct ifclassq *ifcq,
2931     classq_pkt_t *p, boolean_t flush, boolean_t *pdrop)
2932 {
2933 #if SKYWALK
2934 	volatile struct sk_nexusadv *nxadv = NULL;
2935 #endif /* SKYWALK */
2936 	volatile uint64_t *fg_ts = NULL;
2937 	volatile uint64_t *rt_ts = NULL;
2938 	struct timespec now;
2939 	u_int64_t now_nsec = 0;
2940 	int error = 0;
2941 	uint8_t *mcast_buf = NULL;
2942 	uint8_t ip_ver;
2943 	uint32_t pktlen;
2944 
2945 	ASSERT(ifp->if_eflags & IFEF_TXSTART);
2946 #if SKYWALK
2947 	/*
2948 	 * If attached to flowswitch, grab pointers to the
2949 	 * timestamp variables in the nexus advisory region.
2950 	 */
2951 	if ((ifp->if_capabilities & IFCAP_SKYWALK) && ifp->if_na != NULL &&
2952 	    (nxadv = ifp->if_na->nifna_netif->nif_fsw_nxadv) != NULL) {
2953 		fg_ts = &nxadv->nxadv_fg_sendts;
2954 		rt_ts = &nxadv->nxadv_rt_sendts;
2955 	}
2956 #endif /* SKYWALK */
2957 
2958 	/*
2959 	 * If packet already carries a timestamp, either from dlil_output()
2960 	 * or from flowswitch, use it here.  Otherwise, record timestamp.
2961 	 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
2962 	 * the timestamp value is used internally there.
2963 	 */
2964 	switch (p->cp_ptype) {
2965 	case QP_MBUF:
2966 #if SKYWALK
2967 		/*
2968 		 * Valid only for non-native (compat) Skywalk interface.
2969 		 * If the data source uses packet, caller must convert
2970 		 * it to mbuf first prior to calling this routine.
2971 		 */
2972 		ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2973 #endif /* SKYWALK */
2974 		ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
2975 		ASSERT(p->cp_mbuf->m_nextpkt == NULL);
2976 
2977 		if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
2978 		    p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
2979 			nanouptime(&now);
2980 			net_timernsec(&now, &now_nsec);
2981 			p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
2982 		}
2983 		p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
2984 		/*
2985 		 * If the packet service class is not background,
2986 		 * update the timestamp to indicate recent activity
2987 		 * on a foreground socket.
2988 		 */
2989 		if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
2990 		    p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2991 			if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
2992 			    PKTF_SO_BACKGROUND)) {
2993 				ifp->if_fg_sendts = (uint32_t)net_uptime();
2994 				if (fg_ts != NULL) {
2995 					*fg_ts = (uint32_t)net_uptime();
2996 				}
2997 			}
2998 			if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
2999 				ifp->if_rt_sendts = (uint32_t)net_uptime();
3000 				if (rt_ts != NULL) {
3001 					*rt_ts = (uint32_t)net_uptime();
3002 				}
3003 			}
3004 		}
3005 		pktlen = m_pktlen(p->cp_mbuf);
3006 
3007 		/*
3008 		 * Some Wi-Fi AP implementations do not correctly handle
3009 		 * multicast IP packets with DSCP bits set (radr://9331522).
3010 		 * As a workaround we clear the DSCP bits but keep service
3011 		 * class (rdar://51507725).
3012 		 */
3013 		if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3014 		    IFNET_IS_WIFI_INFRA(ifp)) {
3015 			size_t len = mbuf_len(p->cp_mbuf), hlen;
3016 			struct ether_header *eh;
3017 			boolean_t pullup = FALSE;
3018 			uint16_t etype;
3019 
3020 			if (__improbable(len < sizeof(struct ether_header))) {
3021 				DTRACE_IP1(small__ether, size_t, len);
3022 				if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3023 				    sizeof(struct ether_header))) == NULL) {
3024 					return ENOMEM;
3025 				}
3026 			}
3027 			eh = mtod(p->cp_mbuf, struct ether_header *);
3028 			etype = ntohs(eh->ether_type);
3029 			if (etype == ETHERTYPE_IP) {
3030 				hlen = sizeof(struct ether_header) +
3031 				    sizeof(struct ip);
3032 				if (len < hlen) {
3033 					DTRACE_IP1(small__v4, size_t, len);
3034 					pullup = TRUE;
3035 				}
3036 				ip_ver = IPVERSION;
3037 			} else if (etype == ETHERTYPE_IPV6) {
3038 				hlen = sizeof(struct ether_header) +
3039 				    sizeof(struct ip6_hdr);
3040 				if (len < hlen) {
3041 					DTRACE_IP1(small__v6, size_t, len);
3042 					pullup = TRUE;
3043 				}
3044 				ip_ver = IPV6_VERSION;
3045 			} else {
3046 				DTRACE_IP1(invalid__etype, uint16_t, etype);
3047 				break;
3048 			}
3049 			if (pullup) {
3050 				if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3051 				    NULL) {
3052 					return ENOMEM;
3053 				}
3054 
3055 				eh = mtod(p->cp_mbuf, struct ether_header *);
3056 			}
3057 			mcast_buf = (uint8_t *)(eh + 1);
3058 			/*
3059 			 * ifnet_mcast_clear_dscp() will finish the work below.
3060 			 * Note that the pullups above ensure that mcast_buf
3061 			 * points to a full IP header.
3062 			 */
3063 		}
3064 		break;
3065 
3066 #if SKYWALK
3067 	case QP_PACKET:
3068 		/*
3069 		 * Valid only for native Skywalk interface.  If the data
3070 		 * source uses mbuf, caller must convert it to packet first
3071 		 * prior to calling this routine.
3072 		 */
3073 		ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
3074 		if (!(p->cp_kpkt->pkt_pflags & PKT_F_TS_VALID) ||
3075 		    p->cp_kpkt->pkt_timestamp == 0) {
3076 			nanouptime(&now);
3077 			net_timernsec(&now, &now_nsec);
3078 			p->cp_kpkt->pkt_timestamp = now_nsec;
3079 		}
3080 		p->cp_kpkt->pkt_pflags &= ~PKT_F_TS_VALID;
3081 		/*
3082 		 * If the packet service class is not background,
3083 		 * update the timestamps on the interface, as well as
3084 		 * the ones in nexus-wide advisory to indicate recent
3085 		 * activity on a foreground flow.
3086 		 */
3087 		if (!(p->cp_kpkt->pkt_pflags & PKT_F_BACKGROUND)) {
3088 			ifp->if_fg_sendts = (uint32_t)net_uptime();
3089 			if (fg_ts != NULL) {
3090 				*fg_ts = (uint32_t)net_uptime();
3091 			}
3092 		}
3093 		if (p->cp_kpkt->pkt_pflags & PKT_F_REALTIME) {
3094 			ifp->if_rt_sendts = (uint32_t)net_uptime();
3095 			if (rt_ts != NULL) {
3096 				*rt_ts = (uint32_t)net_uptime();
3097 			}
3098 		}
3099 		pktlen = p->cp_kpkt->pkt_length;
3100 
3101 		/*
3102 		 * Some Wi-Fi AP implementations do not correctly handle
3103 		 * multicast IP packets with DSCP bits set (radr://9331522).
3104 		 * As a workaround we clear the DSCP bits but keep service
3105 		 * class (rdar://51507725).
3106 		 */
3107 		if ((p->cp_kpkt->pkt_link_flags & PKT_LINKF_MCAST) != 0 &&
3108 		    IFNET_IS_WIFI_INFRA(ifp)) {
3109 			uint8_t *baddr;
3110 			struct ether_header *eh;
3111 			uint16_t etype;
3112 
3113 			MD_BUFLET_ADDR_ABS(p->cp_kpkt, baddr);
3114 			baddr += p->cp_kpkt->pkt_headroom;
3115 			if (__improbable(pktlen < sizeof(struct ether_header))) {
3116 				DTRACE_IP1(pkt__small__ether, __kern_packet *,
3117 				    p->cp_kpkt);
3118 				break;
3119 			}
3120 			eh = (struct ether_header *)(void *)baddr;
3121 			etype = ntohs(eh->ether_type);
3122 			if (etype == ETHERTYPE_IP) {
3123 				if (pktlen < sizeof(struct ether_header) +
3124 				    sizeof(struct ip)) {
3125 					DTRACE_IP1(pkt__small__v4, uint32_t,
3126 					    pktlen);
3127 					break;
3128 				}
3129 				ip_ver = IPVERSION;
3130 			} else if (etype == ETHERTYPE_IPV6) {
3131 				if (pktlen < sizeof(struct ether_header) +
3132 				    sizeof(struct ip6_hdr)) {
3133 					DTRACE_IP1(pkt__small__v6, uint32_t,
3134 					    pktlen);
3135 					break;
3136 				}
3137 				ip_ver = IPV6_VERSION;
3138 			} else {
3139 				DTRACE_IP1(pkt__invalid__etype, uint16_t,
3140 				    etype);
3141 				break;
3142 			}
3143 			mcast_buf = (uint8_t *)(eh + 1);
3144 			/*
3145 			 * ifnet_mcast_clear_dscp() will finish the work below.
3146 			 * The checks above verify that the IP header is in the
3147 			 * first buflet.
3148 			 */
3149 		}
3150 		break;
3151 #endif /* SKYWALK */
3152 
3153 	default:
3154 		VERIFY(0);
3155 		/* NOTREACHED */
3156 		__builtin_unreachable();
3157 	}
3158 
3159 	if (mcast_buf != NULL) {
3160 		ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
3161 	}
3162 
3163 	if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3164 		if (now_nsec == 0) {
3165 			nanouptime(&now);
3166 			net_timernsec(&now, &now_nsec);
3167 		}
3168 		/*
3169 		 * If the driver chose to delay start callback for
3170 		 * coalescing multiple packets, Then use the following
3171 		 * heuristics to make sure that start callback will
3172 		 * be delayed only when bulk data transfer is detected.
3173 		 * 1. number of packets enqueued in (delay_win * 2) is
3174 		 * greater than or equal to the delay qlen.
3175 		 * 2. If delay_start is enabled it will stay enabled for
3176 		 * another 10 idle windows. This is to take into account
3177 		 * variable RTT and burst traffic.
3178 		 * 3. If the time elapsed since last enqueue is more
3179 		 * than 200ms we disable delaying start callback. This is
3180 		 * is to take idle time into account.
3181 		 */
3182 		u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3183 		if (ifp->if_start_delay_swin > 0) {
3184 			if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3185 				ifp->if_start_delay_cnt++;
3186 			} else if ((now_nsec - ifp->if_start_delay_swin)
3187 			    >= (200 * 1000 * 1000)) {
3188 				ifp->if_start_delay_swin = now_nsec;
3189 				ifp->if_start_delay_cnt = 1;
3190 				ifp->if_start_delay_idle = 0;
3191 				if (ifp->if_eflags & IFEF_DELAY_START) {
3192 					if_clear_eflags(ifp, IFEF_DELAY_START);
3193 					ifnet_delay_start_disabled_increment();
3194 				}
3195 			} else {
3196 				if (ifp->if_start_delay_cnt >=
3197 				    ifp->if_start_delay_qlen) {
3198 					if_set_eflags(ifp, IFEF_DELAY_START);
3199 					ifp->if_start_delay_idle = 0;
3200 				} else {
3201 					if (ifp->if_start_delay_idle >= 10) {
3202 						if_clear_eflags(ifp,
3203 						    IFEF_DELAY_START);
3204 						ifnet_delay_start_disabled_increment();
3205 					} else {
3206 						ifp->if_start_delay_idle++;
3207 					}
3208 				}
3209 				ifp->if_start_delay_swin = now_nsec;
3210 				ifp->if_start_delay_cnt = 1;
3211 			}
3212 		} else {
3213 			ifp->if_start_delay_swin = now_nsec;
3214 			ifp->if_start_delay_cnt = 1;
3215 			ifp->if_start_delay_idle = 0;
3216 			if_clear_eflags(ifp, IFEF_DELAY_START);
3217 		}
3218 	} else {
3219 		if_clear_eflags(ifp, IFEF_DELAY_START);
3220 	}
3221 
3222 	/* enqueue the packet (caller consumes object) */
3223 	error = ifclassq_enqueue(((ifcq != NULL) ? ifcq : ifp->if_snd), p, p,
3224 	    1, pktlen, pdrop);
3225 
3226 	/*
3227 	 * Tell the driver to start dequeueing; do this even when the queue
3228 	 * for the packet is suspended (EQSUSPENDED), as the driver could still
3229 	 * be dequeueing from other unsuspended queues.
3230 	 */
3231 	if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3232 	    ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3233 		ifnet_start(ifp);
3234 	}
3235 
3236 	return error;
3237 }
3238 
3239 static inline errno_t
ifnet_enqueue_chain(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3240 ifnet_enqueue_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3241     classq_pkt_t *head, classq_pkt_t *tail, uint32_t cnt, uint32_t bytes,
3242     boolean_t flush, boolean_t *pdrop)
3243 {
3244 	int error;
3245 
3246 	/* enqueue the packet (caller consumes object) */
3247 	error = ifclassq_enqueue(ifcq != NULL ? ifcq : ifp->if_snd, head, tail,
3248 	    cnt, bytes, pdrop);
3249 
3250 	/*
3251 	 * Tell the driver to start dequeueing; do this even when the queue
3252 	 * for the packet is suspended (EQSUSPENDED), as the driver could still
3253 	 * be dequeueing from other unsuspended queues.
3254 	 */
3255 	if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
3256 		ifnet_start(ifp);
3257 	}
3258 	return error;
3259 }
3260 
3261 int
ifnet_enqueue_netem(void * handle,pktsched_pkt_t * __sized_by (n_pkts)pkts,uint32_t n_pkts)3262 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *__sized_by(n_pkts)pkts, uint32_t n_pkts)
3263 {
3264 	ifnet_ref_t ifp = handle;
3265 	boolean_t pdrop;        /* dummy */
3266 	uint32_t i;
3267 
3268 	ASSERT(n_pkts >= 1);
3269 	for (i = 0; i < n_pkts - 1; i++) {
3270 		(void) ifnet_enqueue_single(ifp, ifp->if_snd, &pkts[i].pktsched_pkt,
3271 		    FALSE, &pdrop);
3272 	}
3273 	/* flush with the last packet */
3274 	(void) ifnet_enqueue_single(ifp, ifp->if_snd, &pkts[i].pktsched_pkt,
3275 	    TRUE, &pdrop);
3276 
3277 	return 0;
3278 }
3279 
3280 static inline errno_t
ifnet_enqueue_common_single(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * pkt,boolean_t flush,boolean_t * pdrop)3281 ifnet_enqueue_common_single(struct ifnet *ifp, struct ifclassq *ifcq,
3282     classq_pkt_t *pkt, boolean_t flush, boolean_t *pdrop)
3283 {
3284 	if (ifp->if_output_netem != NULL) {
3285 		bool drop;
3286 		errno_t error;
3287 		error = netem_enqueue(ifp->if_output_netem, pkt, &drop);
3288 		*pdrop = drop ? TRUE : FALSE;
3289 		return error;
3290 	} else {
3291 		return ifnet_enqueue_single(ifp, ifcq, pkt, flush, pdrop);
3292 	}
3293 }
3294 
3295 errno_t
ifnet_enqueue(struct ifnet * ifp,struct mbuf * m)3296 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3297 {
3298 	uint32_t bytes = m_pktlen(m);
3299 	struct mbuf *tail = m;
3300 	uint32_t cnt = 1;
3301 	boolean_t pdrop;
3302 
3303 	while (tail->m_nextpkt) {
3304 		VERIFY(tail->m_flags & M_PKTHDR);
3305 		tail = tail->m_nextpkt;
3306 		cnt++;
3307 		bytes += m_pktlen(tail);
3308 	}
3309 
3310 	return ifnet_enqueue_mbuf_chain(ifp, m, tail, cnt, bytes, TRUE, &pdrop);
3311 }
3312 
3313 errno_t
ifnet_enqueue_mbuf(struct ifnet * ifp,struct mbuf * m,boolean_t flush,boolean_t * pdrop)3314 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3315     boolean_t *pdrop)
3316 {
3317 	classq_pkt_t pkt;
3318 
3319 	m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
3320 	if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3321 	    m->m_nextpkt != NULL) {
3322 		if (m != NULL) {
3323 			m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_INVALID, NULL, 0);
3324 			*pdrop = TRUE;
3325 		}
3326 		return EINVAL;
3327 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3328 	    !ifnet_is_fully_attached(ifp)) {
3329 		/* flag tested without lock for performance */
3330 		m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3331 		*pdrop = TRUE;
3332 		return ENXIO;
3333 	} else if (!(ifp->if_flags & IFF_UP)) {
3334 		m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3335 		*pdrop = TRUE;
3336 		return ENETDOWN;
3337 	}
3338 
3339 	CLASSQ_PKT_INIT_MBUF(&pkt, m);
3340 	return ifnet_enqueue_common_single(ifp, NULL, &pkt, flush, pdrop);
3341 }
3342 
3343 errno_t
ifnet_enqueue_mbuf_chain(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3344 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
3345     struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3346     boolean_t *pdrop)
3347 {
3348 	classq_pkt_t head, tail;
3349 
3350 	m_add_hdr_crumb_interface_output(m_head, ifp->if_index, true);
3351 	ASSERT(m_head != NULL);
3352 	ASSERT((m_head->m_flags & M_PKTHDR) != 0);
3353 	ASSERT(m_tail != NULL);
3354 	ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
3355 	ASSERT(ifp != NULL);
3356 	ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3357 
3358 	if (!ifnet_is_fully_attached(ifp)) {
3359 		/* flag tested without lock for performance */
3360 		m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3361 		*pdrop = TRUE;
3362 		return ENXIO;
3363 	} else if (!(ifp->if_flags & IFF_UP)) {
3364 		m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3365 		*pdrop = TRUE;
3366 		return ENETDOWN;
3367 	}
3368 
3369 	CLASSQ_PKT_INIT_MBUF(&head, m_head);
3370 	CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3371 	return ifnet_enqueue_chain(ifp, NULL, &head, &tail, cnt, bytes,
3372 	           flush, pdrop);
3373 }
3374 
3375 #if SKYWALK
3376 errno_t
ifnet_enqueue_pkt(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3377 ifnet_enqueue_pkt(struct ifnet *ifp, struct ifclassq *ifcq,
3378     struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3379 {
3380 	classq_pkt_t pkt;
3381 
3382 	ASSERT(kpkt == NULL || kpkt->pkt_nextpkt == NULL);
3383 
3384 	if (__improbable(ifp == NULL || kpkt == NULL)) {
3385 		if (kpkt != NULL) {
3386 			pp_free_packet(__DECONST(struct kern_pbufpool *,
3387 			    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3388 			*pdrop = TRUE;
3389 		}
3390 		return EINVAL;
3391 	} else if (__improbable(!(ifp->if_eflags & IFEF_TXSTART) ||
3392 	    !ifnet_is_fully_attached(ifp))) {
3393 		/* flag tested without lock for performance */
3394 		pp_free_packet(__DECONST(struct kern_pbufpool *,
3395 		    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3396 		*pdrop = TRUE;
3397 		return ENXIO;
3398 	} else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3399 		pp_free_packet(__DECONST(struct kern_pbufpool *,
3400 		    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3401 		*pdrop = TRUE;
3402 		return ENETDOWN;
3403 	}
3404 
3405 	CLASSQ_PKT_INIT_PACKET(&pkt, kpkt);
3406 	return ifnet_enqueue_common_single(ifp, ifcq, &pkt, flush, pdrop);
3407 }
3408 
3409 errno_t
ifnet_enqueue_pkt_chain(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3410 ifnet_enqueue_pkt_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3411     struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3412     uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3413 {
3414 	classq_pkt_t head, tail;
3415 
3416 	ASSERT(k_head != NULL);
3417 	ASSERT(k_tail != NULL);
3418 	ASSERT(ifp != NULL);
3419 	ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3420 
3421 	if (!ifnet_is_fully_attached(ifp)) {
3422 		/* flag tested without lock for performance */
3423 		pp_free_packet_chain(k_head, NULL);
3424 		*pdrop = TRUE;
3425 		return ENXIO;
3426 	} else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3427 		pp_free_packet_chain(k_head, NULL);
3428 		*pdrop = TRUE;
3429 		return ENETDOWN;
3430 	}
3431 
3432 	CLASSQ_PKT_INIT_PACKET(&head, k_head);
3433 	CLASSQ_PKT_INIT_PACKET(&tail, k_tail);
3434 	return ifnet_enqueue_chain(ifp, ifcq, &head, &tail, cnt, bytes,
3435 	           flush, pdrop);
3436 }
3437 #endif /* SKYWALK */
3438 
3439 errno_t
ifnet_dequeue(struct ifnet * ifp,struct mbuf ** mp)3440 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3441 {
3442 	errno_t rc;
3443 	classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3444 
3445 	if (ifp == NULL || mp == NULL) {
3446 		return EINVAL;
3447 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3448 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3449 		return ENXIO;
3450 	}
3451 	if (!ifnet_get_ioref(ifp)) {
3452 		return ENXIO;
3453 	}
3454 
3455 #if SKYWALK
3456 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3457 #endif /* SKYWALK */
3458 	rc = ifclassq_dequeue(ifp->if_snd, MBUF_SC_UNSPEC, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3459 	    &pkt, NULL, NULL, NULL, 0);
3460 	VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3461 	ifnet_decr_iorefcnt(ifp);
3462 	*mp = pkt.cp_mbuf;
3463 	m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3464 	return rc;
3465 }
3466 
3467 errno_t
ifnet_dequeue_service_class(struct ifnet * ifp,mbuf_svc_class_t sc,struct mbuf ** mp)3468 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3469     struct mbuf **mp)
3470 {
3471 	errno_t rc;
3472 	classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3473 
3474 	if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
3475 		return EINVAL;
3476 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3477 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3478 		return ENXIO;
3479 	}
3480 	if (!ifnet_get_ioref(ifp)) {
3481 		return ENXIO;
3482 	}
3483 
3484 #if SKYWALK
3485 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3486 #endif /* SKYWALK */
3487 	rc = ifclassq_dequeue(ifp->if_snd, sc, 1,
3488 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL, 0);
3489 	VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3490 	ifnet_decr_iorefcnt(ifp);
3491 	*mp = pkt.cp_mbuf;
3492 	m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3493 	return rc;
3494 }
3495 
3496 errno_t
ifnet_dequeue_multi(struct ifnet * ifp,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3497 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3498     struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3499 {
3500 	errno_t rc;
3501 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3502 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3503 
3504 	if (ifp == NULL || head == NULL || pkt_limit < 1) {
3505 		return EINVAL;
3506 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3507 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3508 		return ENXIO;
3509 	}
3510 	if (!ifnet_get_ioref(ifp)) {
3511 		return ENXIO;
3512 	}
3513 
3514 #if SKYWALK
3515 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3516 #endif /* SKYWALK */
3517 	rc = ifclassq_dequeue(ifp->if_snd, MBUF_SC_UNSPEC, pkt_limit,
3518 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len, 0);
3519 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3520 	ifnet_decr_iorefcnt(ifp);
3521 	*head = pkt_head.cp_mbuf;
3522 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3523 	if (tail != NULL) {
3524 		*tail = pkt_tail.cp_mbuf;
3525 	}
3526 	return rc;
3527 }
3528 
3529 errno_t
ifnet_dequeue_multi_bytes(struct ifnet * ifp,u_int32_t byte_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3530 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3531     struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3532 {
3533 	errno_t rc;
3534 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3535 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3536 
3537 	if (ifp == NULL || head == NULL || byte_limit < 1) {
3538 		return EINVAL;
3539 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3540 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3541 		return ENXIO;
3542 	}
3543 	if (!ifnet_get_ioref(ifp)) {
3544 		return ENXIO;
3545 	}
3546 
3547 #if SKYWALK
3548 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3549 #endif /* SKYWALK */
3550 	rc = ifclassq_dequeue(ifp->if_snd, MBUF_SC_UNSPEC, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3551 	    byte_limit, &pkt_head, &pkt_tail, cnt, len, 0);
3552 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3553 	ifnet_decr_iorefcnt(ifp);
3554 	*head = pkt_head.cp_mbuf;
3555 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3556 	if (tail != NULL) {
3557 		*tail = pkt_tail.cp_mbuf;
3558 	}
3559 	return rc;
3560 }
3561 
3562 errno_t
ifnet_dequeue_service_class_multi(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3563 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3564     u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3565     u_int32_t *len)
3566 {
3567 	errno_t rc;
3568 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3569 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3570 
3571 	if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3572 	    !MBUF_VALID_SC(sc)) {
3573 		return EINVAL;
3574 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3575 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3576 		return ENXIO;
3577 	}
3578 	if (!ifnet_get_ioref(ifp)) {
3579 		return ENXIO;
3580 	}
3581 
3582 #if SKYWALK
3583 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3584 #endif /* SKYWALK */
3585 	rc = ifclassq_dequeue(ifp->if_snd, sc, pkt_limit,
3586 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
3587 	    cnt, len, 0);
3588 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3589 	ifnet_decr_iorefcnt(ifp);
3590 	*head = pkt_head.cp_mbuf;
3591 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3592 	if (tail != NULL) {
3593 		*tail = pkt_tail.cp_mbuf;
3594 	}
3595 	return rc;
3596 }
3597 
3598 #if XNU_TARGET_OS_OSX
3599 errno_t
ifnet_framer_stub(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * dest,IFNET_LLADDR_T dest_linkaddr,IFNET_FRAME_TYPE_T frame_type,u_int32_t * pre,u_int32_t * post)3600 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3601     const struct sockaddr *dest,
3602     IFNET_LLADDR_T dest_linkaddr,
3603     IFNET_FRAME_TYPE_T frame_type,
3604     u_int32_t *pre, u_int32_t *post)
3605 {
3606 	if (pre != NULL) {
3607 		*pre = 0;
3608 	}
3609 	if (post != NULL) {
3610 		*post = 0;
3611 	}
3612 
3613 	return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
3614 }
3615 #endif /* XNU_TARGET_OS_OSX */
3616 
3617 /* If ifp is set, we will increment the generation for the interface */
3618 int
dlil_post_complete_msg(struct ifnet * ifp,struct kev_msg * event)3619 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3620 {
3621 	if (ifp != NULL) {
3622 		ifnet_increment_generation(ifp);
3623 	}
3624 
3625 #if NECP
3626 	necp_update_all_clients();
3627 #endif /* NECP */
3628 
3629 	return kev_post_msg(event);
3630 }
3631 
3632 __private_extern__ void
dlil_post_sifflags_msg(struct ifnet * ifp)3633 dlil_post_sifflags_msg(struct ifnet * ifp)
3634 {
3635 	struct kev_msg ev_msg;
3636 	struct net_event_data ev_data;
3637 
3638 	bzero(&ev_data, sizeof(ev_data));
3639 	bzero(&ev_msg, sizeof(ev_msg));
3640 	ev_msg.vendor_code = KEV_VENDOR_APPLE;
3641 	ev_msg.kev_class = KEV_NETWORK_CLASS;
3642 	ev_msg.kev_subclass = KEV_DL_SUBCLASS;
3643 	ev_msg.event_code = KEV_DL_SIFFLAGS;
3644 	strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
3645 	ev_data.if_family = ifp->if_family;
3646 	ev_data.if_unit = (u_int32_t) ifp->if_unit;
3647 	ev_msg.dv[0].data_length = sizeof(struct net_event_data);
3648 	ev_msg.dv[0].data_ptr = &ev_data;
3649 	ev_msg.dv[1].data_length = 0;
3650 	dlil_post_complete_msg(ifp, &ev_msg);
3651 }
3652 
3653 #define TMP_IF_PROTO_ARR_SIZE   10
3654 static int
dlil_event_internal(struct ifnet * ifp,struct kev_msg * event,bool update_generation)3655 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
3656 {
3657 	struct ifnet_filter *filter = NULL;
3658 	struct if_proto *proto = NULL;
3659 	int if_proto_count = 0;
3660 	struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3661 	struct if_proto **tmp_ifproto_arr = tmp_ifproto_stack_arr;
3662 	int tmp_ifproto_arr_idx = 0;
3663 
3664 	/*
3665 	 * Pass the event to the interface filters
3666 	 */
3667 	lck_mtx_lock_spin(&ifp->if_flt_lock);
3668 	/* prevent filter list from changing in case we drop the lock */
3669 	if_flt_monitor_busy(ifp);
3670 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3671 		if (filter->filt_event != NULL) {
3672 			lck_mtx_unlock(&ifp->if_flt_lock);
3673 
3674 			filter->filt_event(filter->filt_cookie, ifp,
3675 			    filter->filt_protocol, event);
3676 
3677 			lck_mtx_lock_spin(&ifp->if_flt_lock);
3678 		}
3679 	}
3680 	/* we're done with the filter list */
3681 	if_flt_monitor_unbusy(ifp);
3682 	lck_mtx_unlock(&ifp->if_flt_lock);
3683 
3684 	/* Get an io ref count if the interface is attached */
3685 	if (!ifnet_get_ioref(ifp)) {
3686 		goto done;
3687 	}
3688 
3689 	/*
3690 	 * An embedded tmp_list_entry in if_proto may still get
3691 	 * over-written by another thread after giving up ifnet lock,
3692 	 * therefore we are avoiding embedded pointers here.
3693 	 */
3694 	ifnet_lock_shared(ifp);
3695 	if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
3696 	if (if_proto_count) {
3697 		int i;
3698 		VERIFY(ifp->if_proto_hash != NULL);
3699 		if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3700 			tmp_ifproto_arr = tmp_ifproto_stack_arr;
3701 		} else {
3702 			tmp_ifproto_arr = kalloc_type(struct if_proto *,
3703 			    if_proto_count, Z_WAITOK | Z_ZERO);
3704 			if (tmp_ifproto_arr == NULL) {
3705 				ifnet_lock_done(ifp);
3706 				goto cleanup;
3707 			}
3708 		}
3709 
3710 		for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3711 			SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3712 			    next_hash) {
3713 				if_proto_ref(proto);
3714 				tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3715 				tmp_ifproto_arr_idx++;
3716 			}
3717 		}
3718 		VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3719 	}
3720 	ifnet_lock_done(ifp);
3721 
3722 	for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3723 	    tmp_ifproto_arr_idx++) {
3724 		proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3725 		VERIFY(proto != NULL);
3726 		proto_media_event eventp =
3727 		    (proto->proto_kpi == kProtoKPI_v1 ?
3728 		    proto->kpi.v1.event :
3729 		    proto->kpi.v2.event);
3730 
3731 		if (eventp != NULL) {
3732 			eventp(ifp, proto->protocol_family,
3733 			    event);
3734 		}
3735 		if_proto_free(proto);
3736 	}
3737 
3738 cleanup:
3739 	if (tmp_ifproto_arr != tmp_ifproto_stack_arr) {
3740 		kfree_type(struct if_proto *, if_proto_count, tmp_ifproto_arr);
3741 	}
3742 
3743 	/* Pass the event to the interface */
3744 	if (ifp->if_event != NULL) {
3745 		ifp->if_event(ifp, event);
3746 	}
3747 
3748 	/* Release the io ref count */
3749 	ifnet_decr_iorefcnt(ifp);
3750 done:
3751 	return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
3752 }
3753 
3754 errno_t
ifnet_event(ifnet_t ifp,struct kern_event_msg * event)3755 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3756 {
3757 	struct kev_msg kev_msg;
3758 	int result = 0;
3759 
3760 	if (ifp == NULL || event == NULL) {
3761 		return EINVAL;
3762 	}
3763 
3764 	bzero(&kev_msg, sizeof(kev_msg));
3765 	kev_msg.vendor_code = event->vendor_code;
3766 	kev_msg.kev_class = event->kev_class;
3767 	kev_msg.kev_subclass = event->kev_subclass;
3768 	kev_msg.event_code = event->event_code;
3769 	kev_msg.dv[0].data_ptr = &event->event_data;
3770 	kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3771 	kev_msg.dv[1].data_length = 0;
3772 
3773 	result = dlil_event_internal(ifp, &kev_msg, TRUE);
3774 
3775 	return result;
3776 }
3777 
3778 /* The following is used to enqueue work items for ifnet ioctl events */
3779 static void ifnet_ioctl_event_callback(struct nwk_wq_entry *);
3780 
3781 struct ifnet_ioctl_event {
3782 	ifnet_ref_t ifp;
3783 	u_long ioctl_code;
3784 };
3785 
3786 struct ifnet_ioctl_event_nwk_wq_entry {
3787 	struct nwk_wq_entry nwk_wqe;
3788 	struct ifnet_ioctl_event ifnet_ioctl_ev_arg;
3789 };
3790 
3791 void
ifnet_ioctl_async(struct ifnet * ifp,u_long ioctl_code)3792 ifnet_ioctl_async(struct ifnet *ifp, u_long ioctl_code)
3793 {
3794 	struct ifnet_ioctl_event_nwk_wq_entry *p_ifnet_ioctl_ev = NULL;
3795 	bool compare_expected;
3796 
3797 	/*
3798 	 * Get an io ref count if the interface is attached.
3799 	 * At this point it most likely is. We are taking a reference for
3800 	 * deferred processing.
3801 	 */
3802 	if (!ifnet_get_ioref(ifp)) {
3803 		os_log(OS_LOG_DEFAULT, "%s:%d %s Failed for ioctl %lu as interface "
3804 		    "is not attached",
3805 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3806 		return;
3807 	}
3808 	switch (ioctl_code) {
3809 	case SIOCADDMULTI:
3810 		compare_expected = false;
3811 		if (!atomic_compare_exchange_strong(&ifp->if_mcast_add_signaled, &compare_expected, true)) {
3812 			ifnet_decr_iorefcnt(ifp);
3813 			return;
3814 		}
3815 		break;
3816 	case SIOCDELMULTI:
3817 		compare_expected = false;
3818 		if (!atomic_compare_exchange_strong(&ifp->if_mcast_del_signaled, &compare_expected, true)) {
3819 			ifnet_decr_iorefcnt(ifp);
3820 			return;
3821 		}
3822 		break;
3823 	default:
3824 		os_log(OS_LOG_DEFAULT, "%s:%d %s unknown ioctl %lu",
3825 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3826 		return;
3827 	}
3828 
3829 	p_ifnet_ioctl_ev = kalloc_type(struct ifnet_ioctl_event_nwk_wq_entry,
3830 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
3831 
3832 	p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ifp = ifp;
3833 	p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ioctl_code = ioctl_code;
3834 	p_ifnet_ioctl_ev->nwk_wqe.func = ifnet_ioctl_event_callback;
3835 	nwk_wq_enqueue(&p_ifnet_ioctl_ev->nwk_wqe);
3836 }
3837 
3838 static void
ifnet_ioctl_event_callback(struct nwk_wq_entry * nwk_item)3839 ifnet_ioctl_event_callback(struct nwk_wq_entry *nwk_item)
3840 {
3841 	struct ifnet_ioctl_event_nwk_wq_entry *p_ev = __container_of(nwk_item,
3842 	    struct ifnet_ioctl_event_nwk_wq_entry, nwk_wqe);
3843 
3844 	ifnet_ref_t ifp = p_ev->ifnet_ioctl_ev_arg.ifp;
3845 	u_long ioctl_code = p_ev->ifnet_ioctl_ev_arg.ioctl_code;
3846 	int ret = 0;
3847 
3848 	switch (ioctl_code) {
3849 	case SIOCADDMULTI:
3850 		atomic_store(&ifp->if_mcast_add_signaled, false);
3851 		break;
3852 	case SIOCDELMULTI:
3853 		atomic_store(&ifp->if_mcast_del_signaled, false);
3854 		break;
3855 	}
3856 	if ((ret = ifnet_ioctl(ifp, 0, ioctl_code, NULL)) != 0) {
3857 		os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned %d for ioctl %lu",
3858 		    __func__, __LINE__, if_name(ifp), ret, ioctl_code);
3859 	} else if (dlil_verbose) {
3860 		os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned successfully "
3861 		    "for ioctl %lu",
3862 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3863 	}
3864 	ifnet_decr_iorefcnt(ifp);
3865 	kfree_type(struct ifnet_ioctl_event_nwk_wq_entry, p_ev);
3866 	return;
3867 }
3868 
3869 errno_t
ifnet_ioctl(ifnet_t ifp,protocol_family_t proto_fam,u_long ioctl_code,void * ioctl_arg)3870 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
3871     void *ioctl_arg)
3872 {
3873 	struct ifnet_filter *filter;
3874 	int retval = EOPNOTSUPP;
3875 	int result = 0;
3876 
3877 	if (ifp == NULL || ioctl_code == 0) {
3878 		return EINVAL;
3879 	}
3880 
3881 	/* Get an io ref count if the interface is attached */
3882 	if (!ifnet_get_ioref(ifp)) {
3883 		return EOPNOTSUPP;
3884 	}
3885 
3886 	/*
3887 	 * Run the interface filters first.
3888 	 * We want to run all filters before calling the protocol,
3889 	 * interface family, or interface.
3890 	 */
3891 	lck_mtx_lock_spin(&ifp->if_flt_lock);
3892 	/* prevent filter list from changing in case we drop the lock */
3893 	if_flt_monitor_busy(ifp);
3894 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3895 		if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
3896 		    filter->filt_protocol == proto_fam)) {
3897 			lck_mtx_unlock(&ifp->if_flt_lock);
3898 
3899 			result = filter->filt_ioctl(filter->filt_cookie, ifp,
3900 			    proto_fam, ioctl_code, ioctl_arg);
3901 
3902 			lck_mtx_lock_spin(&ifp->if_flt_lock);
3903 
3904 			/* Only update retval if no one has handled the ioctl */
3905 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3906 				if (result == ENOTSUP) {
3907 					result = EOPNOTSUPP;
3908 				}
3909 				retval = result;
3910 				if (retval != 0 && retval != EOPNOTSUPP) {
3911 					/* we're done with the filter list */
3912 					if_flt_monitor_unbusy(ifp);
3913 					lck_mtx_unlock(&ifp->if_flt_lock);
3914 					goto cleanup;
3915 				}
3916 			}
3917 		}
3918 	}
3919 	/* we're done with the filter list */
3920 	if_flt_monitor_unbusy(ifp);
3921 	lck_mtx_unlock(&ifp->if_flt_lock);
3922 
3923 	/* Allow the protocol to handle the ioctl */
3924 	if (proto_fam != 0) {
3925 		struct if_proto *proto;
3926 
3927 		/* callee holds a proto refcnt upon success */
3928 		ifnet_lock_shared(ifp);
3929 		proto = find_attached_proto(ifp, proto_fam);
3930 		ifnet_lock_done(ifp);
3931 		if (proto != NULL) {
3932 			proto_media_ioctl ioctlp =
3933 			    (proto->proto_kpi == kProtoKPI_v1 ?
3934 			    proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
3935 			result = EOPNOTSUPP;
3936 			if (ioctlp != NULL) {
3937 				result = ioctlp(ifp, proto_fam, ioctl_code,
3938 				    ioctl_arg);
3939 			}
3940 			if_proto_free(proto);
3941 
3942 			/* Only update retval if no one has handled the ioctl */
3943 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3944 				if (result == ENOTSUP) {
3945 					result = EOPNOTSUPP;
3946 				}
3947 				retval = result;
3948 				if (retval && retval != EOPNOTSUPP) {
3949 					goto cleanup;
3950 				}
3951 			}
3952 		}
3953 	}
3954 
3955 	/* retval is either 0 or EOPNOTSUPP */
3956 
3957 	/*
3958 	 * Let the interface handle this ioctl.
3959 	 * If it returns EOPNOTSUPP, ignore that, we may have
3960 	 * already handled this in the protocol or family.
3961 	 */
3962 	if (ifp->if_ioctl) {
3963 		result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
3964 	}
3965 
3966 	/* Only update retval if no one has handled the ioctl */
3967 	if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3968 		if (result == ENOTSUP) {
3969 			result = EOPNOTSUPP;
3970 		}
3971 		retval = result;
3972 		if (retval && retval != EOPNOTSUPP) {
3973 			goto cleanup;
3974 		}
3975 	}
3976 
3977 cleanup:
3978 	if (retval == EJUSTRETURN) {
3979 		retval = 0;
3980 	}
3981 
3982 	ifnet_decr_iorefcnt(ifp);
3983 
3984 	return retval;
3985 }
3986 
3987 __private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp,bpf_tap_mode mode,bpf_packet_func callback)3988 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
3989 {
3990 	errno_t error = 0;
3991 
3992 	if (ifp->if_set_bpf_tap) {
3993 		/* Get an io reference on the interface if it is attached */
3994 		if (!ifnet_get_ioref(ifp)) {
3995 			return ENXIO;
3996 		}
3997 		error = ifp->if_set_bpf_tap(ifp, mode, callback);
3998 		ifnet_decr_iorefcnt(ifp);
3999 	}
4000 	return error;
4001 }
4002 
4003 errno_t
dlil_resolve_multi(struct ifnet * ifp,const struct sockaddr * proto_addr,struct sockaddr * ll_addr,size_t ll_len)4004 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4005     struct sockaddr *ll_addr, size_t ll_len)
4006 {
4007 	errno_t result = EOPNOTSUPP;
4008 	struct if_proto *proto;
4009 	const struct sockaddr *verify;
4010 	proto_media_resolve_multi resolvep;
4011 
4012 	if (!ifnet_get_ioref(ifp)) {
4013 		return result;
4014 	}
4015 
4016 	SOCKADDR_ZERO(ll_addr, ll_len);
4017 
4018 	/* Call the protocol first; callee holds a proto refcnt upon success */
4019 	ifnet_lock_shared(ifp);
4020 	proto = find_attached_proto(ifp, proto_addr->sa_family);
4021 	ifnet_lock_done(ifp);
4022 	if (proto != NULL) {
4023 		resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4024 		    proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4025 		if (resolvep != NULL) {
4026 			result = resolvep(ifp, proto_addr, SDL(ll_addr), ll_len);
4027 		}
4028 		if_proto_free(proto);
4029 	}
4030 
4031 	/* Let the interface verify the multicast address */
4032 	if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4033 		if (result == 0) {
4034 			verify = ll_addr;
4035 		} else {
4036 			verify = proto_addr;
4037 		}
4038 		result = ifp->if_check_multi(ifp, verify);
4039 	}
4040 
4041 	ifnet_decr_iorefcnt(ifp);
4042 	return result;
4043 }
4044 
4045 __private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4046 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4047     const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4048     const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4049 {
4050 	struct if_proto *proto;
4051 	errno_t result = 0;
4052 
4053 	if ((ifp->if_flags & IFF_NOARP) != 0) {
4054 		result = ENOTSUP;
4055 		goto done;
4056 	}
4057 
4058 	/* callee holds a proto refcnt upon success */
4059 	ifnet_lock_shared(ifp);
4060 	proto = find_attached_proto(ifp, target_proto->sa_family);
4061 	ifnet_lock_done(ifp);
4062 	if (proto == NULL) {
4063 		result = ENOTSUP;
4064 	} else {
4065 		proto_media_send_arp    arpp;
4066 		arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4067 		    proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4068 		if (arpp == NULL) {
4069 			result = ENOTSUP;
4070 		} else {
4071 			switch (arpop) {
4072 			case ARPOP_REQUEST:
4073 				arpstat.txrequests++;
4074 				if (target_hw != NULL) {
4075 					arpstat.txurequests++;
4076 				}
4077 				break;
4078 			case ARPOP_REPLY:
4079 				arpstat.txreplies++;
4080 				break;
4081 			}
4082 			result = arpp(ifp, arpop, sender_hw, sender_proto,
4083 			    target_hw, target_proto);
4084 		}
4085 		if_proto_free(proto);
4086 	}
4087 done:
4088 	return result;
4089 }
4090 
4091 static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,const struct sockaddr_in * target_sin)4092 _is_announcement(const struct sockaddr_in * sender_sin,
4093     const struct sockaddr_in * target_sin)
4094 {
4095 	if (target_sin == NULL || sender_sin == NULL) {
4096 		return FALSE;
4097 	}
4098 
4099 	return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
4100 }
4101 
4102 __private_extern__ errno_t
dlil_send_arp(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto0,u_int32_t rtflags)4103 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4104     const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4105     const struct sockaddr *target_proto0, u_int32_t rtflags)
4106 {
4107 	errno_t result = 0;
4108 	const struct sockaddr_in * sender_sin;
4109 	const struct sockaddr_in * target_sin;
4110 	struct sockaddr_inarp target_proto_sinarp;
4111 	struct sockaddr *target_proto = __DECONST_SA(target_proto0);
4112 
4113 	if (target_proto == NULL || sender_proto == NULL) {
4114 		return EINVAL;
4115 	}
4116 
4117 	if (sender_proto->sa_family != target_proto->sa_family) {
4118 		return EINVAL;
4119 	}
4120 
4121 	/*
4122 	 * If the target is a (default) router, provide that
4123 	 * information to the send_arp callback routine.
4124 	 */
4125 	if (rtflags & RTF_ROUTER) {
4126 		SOCKADDR_COPY(target_proto, &target_proto_sinarp, sizeof(struct sockaddr_in));
4127 		target_proto_sinarp.sin_other |= SIN_ROUTER;
4128 		target_proto = SA(&target_proto_sinarp);
4129 	}
4130 
4131 	/*
4132 	 * If this is an ARP request and the target IP is IPv4LL,
4133 	 * send the request on all interfaces.  The exception is
4134 	 * an announcement, which must only appear on the specific
4135 	 * interface.
4136 	 */
4137 	sender_sin = SIN(sender_proto);
4138 	target_sin = SIN(target_proto);
4139 	if (target_proto->sa_family == AF_INET &&
4140 	    IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4141 	    ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4142 	    !_is_announcement(sender_sin, target_sin)) {
4143 		u_int32_t       count;
4144 		ifnet_ref_t     *__counted_by(count) ifp_list;
4145 		u_int32_t       ifp_on;
4146 
4147 		result = ENOTSUP;
4148 
4149 		if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4150 			for (ifp_on = 0; ifp_on < count; ifp_on++) {
4151 				errno_t new_result;
4152 				ifaddr_t source_hw = NULL;
4153 				ifaddr_t source_ip = NULL;
4154 				struct sockaddr_in source_ip_copy;
4155 				ifnet_ref_t cur_ifp = ifp_list[ifp_on];
4156 
4157 				/*
4158 				 * Only arp on interfaces marked for IPv4LL
4159 				 * ARPing.  This may mean that we don't ARP on
4160 				 * the interface the subnet route points to.
4161 				 */
4162 				if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
4163 					continue;
4164 				}
4165 
4166 				/* Find the source IP address */
4167 				ifnet_lock_shared(cur_ifp);
4168 				source_hw = cur_ifp->if_lladdr;
4169 				TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4170 				    ifa_link) {
4171 					IFA_LOCK(source_ip);
4172 					if (source_ip->ifa_addr != NULL &&
4173 					    source_ip->ifa_addr->sa_family ==
4174 					    AF_INET) {
4175 						/* Copy the source IP address */
4176 						SOCKADDR_COPY(SIN(source_ip->ifa_addr), &source_ip_copy, sizeof(source_ip_copy));
4177 						IFA_UNLOCK(source_ip);
4178 						break;
4179 					}
4180 					IFA_UNLOCK(source_ip);
4181 				}
4182 
4183 				/* No IP Source, don't arp */
4184 				if (source_ip == NULL) {
4185 					ifnet_lock_done(cur_ifp);
4186 					continue;
4187 				}
4188 
4189 				ifa_addref(source_hw);
4190 				ifnet_lock_done(cur_ifp);
4191 
4192 				/* Send the ARP */
4193 				new_result = dlil_send_arp_internal(cur_ifp,
4194 				    arpop, SDL(source_hw->ifa_addr),
4195 				    SA(&source_ip_copy), NULL,
4196 				    target_proto);
4197 
4198 				ifa_remref(source_hw);
4199 				if (result == ENOTSUP) {
4200 					result = new_result;
4201 				}
4202 			}
4203 			ifnet_list_free_counted_by(ifp_list, count);
4204 		}
4205 	} else {
4206 		result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4207 		    sender_proto, target_hw, target_proto);
4208 	}
4209 
4210 	return result;
4211 }
4212 
4213 /*
4214  * Caller must hold ifnet head lock.
4215  */
4216 static int
ifnet_lookup(struct ifnet * ifp)4217 ifnet_lookup(struct ifnet *ifp)
4218 {
4219 	ifnet_ref_t _ifp;
4220 
4221 	ifnet_head_lock_assert(LCK_RW_ASSERT_HELD);
4222 	TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4223 		if (_ifp == ifp) {
4224 			break;
4225 		}
4226 	}
4227 	return _ifp != NULL;
4228 }
4229 
4230 /*
4231  * Caller has to pass a non-zero refio argument to get a
4232  * IO reference count. This will prevent ifnet_detach from
4233  * being called when there are outstanding io reference counts.
4234  */
4235 int
ifnet_get_ioref(struct ifnet * ifp)4236 ifnet_get_ioref(struct ifnet *ifp)
4237 {
4238 	bool ret;
4239 
4240 	ret = ifnet_is_fully_attached(ifp);
4241 	if (ret) {
4242 		if (os_ref_retain_try(&ifp->if_refio) == false) {
4243 			/* refio became 0 which means it is detaching */
4244 			return false;
4245 		}
4246 	}
4247 
4248 	return ret;
4249 }
4250 
4251 void
ifnet_incr_pending_thread_count(struct ifnet * ifp)4252 ifnet_incr_pending_thread_count(struct ifnet *ifp)
4253 {
4254 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4255 	ifp->if_threads_pending++;
4256 	lck_mtx_unlock(&ifp->if_ref_lock);
4257 }
4258 
4259 void
ifnet_decr_pending_thread_count(struct ifnet * ifp)4260 ifnet_decr_pending_thread_count(struct ifnet *ifp)
4261 {
4262 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4263 	VERIFY(ifp->if_threads_pending > 0);
4264 	ifp->if_threads_pending--;
4265 	if (ifp->if_threads_pending == 0) {
4266 		wakeup(&ifp->if_threads_pending);
4267 	}
4268 	lck_mtx_unlock(&ifp->if_ref_lock);
4269 }
4270 
4271 /*
4272  * Caller must ensure the interface is attached; the assumption is that
4273  * there is at least an outstanding IO reference count held already.
4274  * Most callers would call ifnet_is_{attached,data_ready}() instead.
4275  */
4276 void
ifnet_incr_iorefcnt(struct ifnet * ifp)4277 ifnet_incr_iorefcnt(struct ifnet *ifp)
4278 {
4279 	os_ref_retain(&ifp->if_refio);
4280 }
4281 
4282 void
ifnet_decr_iorefcnt(struct ifnet * ifp)4283 ifnet_decr_iorefcnt(struct ifnet *ifp)
4284 {
4285 	/*
4286 	 * if there are no more outstanding io references, wakeup the
4287 	 * ifnet_detach thread.
4288 	 */
4289 	if (os_ref_release_relaxed(&ifp->if_refio) == 0) {
4290 		lck_mtx_lock(&ifp->if_ref_lock);
4291 		wakeup(&(ifp->if_refio));
4292 		lck_mtx_unlock(&ifp->if_ref_lock);
4293 	}
4294 }
4295 
4296 static void
ifnet_decr_iorefcnt_locked(struct ifnet * ifp)4297 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
4298 {
4299 	/*
4300 	 * if there are no more outstanding io references, wakeup the
4301 	 * ifnet_detach thread.
4302 	 */
4303 	if (os_ref_release_relaxed(&ifp->if_refio) == 0) {
4304 		wakeup(&(ifp->if_refio));
4305 	}
4306 }
4307 
4308 boolean_t
ifnet_datamov_begin(struct ifnet * ifp)4309 ifnet_datamov_begin(struct ifnet *ifp)
4310 {
4311 	boolean_t ret;
4312 
4313 	ret = ifnet_is_attached_and_ready(ifp);
4314 	if (ret) {
4315 		if (os_ref_retain_try(&ifp->if_refio) == false) {
4316 			/* refio became 0 which means it is detaching */
4317 			return false;
4318 		}
4319 		os_ref_retain_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp);
4320 	}
4321 
4322 	DTRACE_IP2(datamov__begin, struct ifnet *, ifp, boolean_t, ret);
4323 	return ret;
4324 }
4325 
4326 void
ifnet_datamov_end(struct ifnet * ifp)4327 ifnet_datamov_end(struct ifnet *ifp)
4328 {
4329 	uint32_t datamov;
4330 	/*
4331 	 * if there's no more thread moving data, wakeup any
4332 	 * drainers that's blocked waiting for this.
4333 	 */
4334 	datamov = os_ref_release_raw_relaxed_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp);
4335 	if (datamov >> IF_DATAMOV_BITS == 1 && (datamov & IF_DATAMOV_DRAINING)) {
4336 		lck_mtx_lock(&ifp->if_ref_lock);
4337 		DLIL_PRINTF("Waking up drainers on %s\n", if_name(ifp));
4338 		DTRACE_IP1(datamov__drain__wake, struct ifnet *, ifp);
4339 		wakeup(&(ifp->if_datamov));
4340 		lck_mtx_unlock(&ifp->if_ref_lock);
4341 	}
4342 	ifnet_decr_iorefcnt(ifp);
4343 
4344 	DTRACE_IP1(datamov__end, struct ifnet *, ifp);
4345 }
4346 
4347 static void
ifnet_datamov_suspend_locked(struct ifnet * ifp)4348 ifnet_datamov_suspend_locked(struct ifnet *ifp)
4349 {
4350 	LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4351 	ifnet_incr_iorefcnt(ifp);
4352 	if (ifp->if_suspend++ == 0) {
4353 		VERIFY(ifp->if_refflags & IFRF_READY);
4354 		ifp->if_refflags &= ~IFRF_READY;
4355 	}
4356 }
4357 
4358 static void
ifnet_datamov_suspend(struct ifnet * ifp)4359 ifnet_datamov_suspend(struct ifnet *ifp)
4360 {
4361 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4362 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4363 	ifnet_datamov_suspend_locked(ifp);
4364 	lck_mtx_unlock(&ifp->if_ref_lock);
4365 }
4366 
4367 boolean_t
ifnet_datamov_suspend_if_needed(struct ifnet * ifp)4368 ifnet_datamov_suspend_if_needed(struct ifnet *ifp)
4369 {
4370 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4371 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4372 	if (ifp->if_suspend > 0) {
4373 		lck_mtx_unlock(&ifp->if_ref_lock);
4374 		return FALSE;
4375 	}
4376 	ifnet_datamov_suspend_locked(ifp);
4377 	lck_mtx_unlock(&ifp->if_ref_lock);
4378 	return TRUE;
4379 }
4380 
4381 void
ifnet_datamov_drain(struct ifnet * ifp)4382 ifnet_datamov_drain(struct ifnet *ifp)
4383 {
4384 	lck_mtx_lock(&ifp->if_ref_lock);
4385 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4386 	/* data movement must already be suspended */
4387 	VERIFY(ifp->if_suspend > 0);
4388 	VERIFY(!(ifp->if_refflags & IFRF_READY));
4389 	os_atomic_or(&ifp->if_datamov, IF_DATAMOV_DRAINING, relaxed);
4390 	while (os_ref_get_count_mask(&ifp->if_datamov, IF_DATAMOV_BITS) > 1) {
4391 		DLIL_PRINTF("Waiting for data path(s) to quiesce on %s\n",
4392 		    if_name(ifp));
4393 		DTRACE_IP1(datamov__wait, struct ifnet *, ifp);
4394 		(void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
4395 		    (PZERO - 1), __func__, NULL);
4396 		DTRACE_IP1(datamov__wake, struct ifnet *, ifp);
4397 	}
4398 	VERIFY(!(ifp->if_refflags & IFRF_READY));
4399 	os_atomic_andnot(&ifp->if_datamov, IF_DATAMOV_DRAINING, relaxed);
4400 	lck_mtx_unlock(&ifp->if_ref_lock);
4401 
4402 	/* purge the interface queues */
4403 	if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
4404 		if_qflush(ifp, ifp->if_snd);
4405 	}
4406 }
4407 
4408 void
ifnet_datamov_suspend_and_drain(struct ifnet * ifp)4409 ifnet_datamov_suspend_and_drain(struct ifnet *ifp)
4410 {
4411 	ifnet_datamov_suspend(ifp);
4412 	ifnet_datamov_drain(ifp);
4413 }
4414 
4415 void
ifnet_datamov_resume(struct ifnet * ifp)4416 ifnet_datamov_resume(struct ifnet *ifp)
4417 {
4418 	lck_mtx_lock(&ifp->if_ref_lock);
4419 	/* data movement must already be suspended */
4420 	VERIFY(ifp->if_suspend > 0);
4421 	if (--ifp->if_suspend == 0) {
4422 		VERIFY(!(ifp->if_refflags & IFRF_READY));
4423 		ifp->if_refflags |= IFRF_READY;
4424 	}
4425 	ifnet_decr_iorefcnt_locked(ifp);
4426 	lck_mtx_unlock(&ifp->if_ref_lock);
4427 }
4428 
4429 static errno_t
dlil_attach_protocol(struct if_proto * proto,const struct ifnet_demux_desc * __counted_by (demux_count)demux_list,u_int32_t demux_count,uint32_t * proto_count)4430 dlil_attach_protocol(struct if_proto *proto,
4431     const struct ifnet_demux_desc *__counted_by(demux_count) demux_list, u_int32_t demux_count,
4432     uint32_t *proto_count)
4433 {
4434 	struct kev_dl_proto_data ev_pr_data;
4435 	ifnet_ref_t ifp = proto->ifp;
4436 	errno_t retval = 0;
4437 	u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4438 	struct if_proto *prev_proto;
4439 	struct if_proto *_proto;
4440 
4441 	/* don't allow attaching anything but PF_BRIDGE to vmnet interfaces */
4442 	if (IFNET_IS_VMNET(ifp) && proto->protocol_family != PF_BRIDGE) {
4443 		return EINVAL;
4444 	}
4445 
4446 	if (!ifnet_get_ioref(ifp)) {
4447 		os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
4448 		    __func__, if_name(ifp));
4449 		return ENXIO;
4450 	}
4451 	/* callee holds a proto refcnt upon success */
4452 	ifnet_lock_exclusive(ifp);
4453 	_proto = find_attached_proto(ifp, proto->protocol_family);
4454 	if (_proto != NULL) {
4455 		ifnet_lock_done(ifp);
4456 		if_proto_free(_proto);
4457 		retval = EEXIST;
4458 		goto ioref_done;
4459 	}
4460 
4461 	/*
4462 	 * Call family module add_proto routine so it can refine the
4463 	 * demux descriptors as it wishes.
4464 	 */
4465 	retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4466 	    demux_count);
4467 	if (retval) {
4468 		ifnet_lock_done(ifp);
4469 		goto ioref_done;
4470 	}
4471 
4472 	/*
4473 	 * Insert the protocol in the hash
4474 	 */
4475 	prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4476 	while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
4477 		prev_proto = SLIST_NEXT(prev_proto, next_hash);
4478 	}
4479 	if (prev_proto) {
4480 		SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4481 	} else {
4482 		SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4483 		    proto, next_hash);
4484 	}
4485 
4486 	/* hold a proto refcnt for attach */
4487 	if_proto_ref(proto);
4488 
4489 	/*
4490 	 * The reserved field carries the number of protocol still attached
4491 	 * (subject to change)
4492 	 */
4493 	ev_pr_data.proto_family = proto->protocol_family;
4494 	ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
4495 
4496 	ifnet_lock_done(ifp);
4497 
4498 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4499 	    (struct net_event_data *)&ev_pr_data,
4500 	    sizeof(struct kev_dl_proto_data), FALSE);
4501 	if (proto_count != NULL) {
4502 		*proto_count = ev_pr_data.proto_remaining_count;
4503 	}
4504 ioref_done:
4505 	ifnet_decr_iorefcnt(ifp);
4506 	return retval;
4507 }
4508 
4509 static void
dlil_handle_proto_attach(ifnet_t ifp,protocol_family_t protocol)4510 dlil_handle_proto_attach(ifnet_t ifp, protocol_family_t protocol)
4511 {
4512 	/*
4513 	 * A protocol has been attached, mark the interface up.
4514 	 * This used to be done by configd.KernelEventMonitor, but that
4515 	 * is inherently prone to races (rdar://problem/30810208).
4516 	 */
4517 	(void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4518 	(void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
4519 	dlil_post_sifflags_msg(ifp);
4520 #if SKYWALK
4521 	switch (protocol) {
4522 	case AF_INET:
4523 	case AF_INET6:
4524 		/* don't attach the flowswitch unless attaching IP */
4525 		dlil_attach_flowswitch_nexus(ifp);
4526 		break;
4527 	default:
4528 		break;
4529 	}
4530 #endif /* SKYWALK */
4531 }
4532 
4533 errno_t
ifnet_attach_protocol(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param * proto_details)4534 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
4535     const struct ifnet_attach_proto_param *proto_details)
4536 {
4537 	int retval = 0;
4538 	struct if_proto  *ifproto = NULL;
4539 	uint32_t proto_count = 0;
4540 
4541 	ifnet_head_lock_shared();
4542 	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4543 		retval = EINVAL;
4544 		goto end;
4545 	}
4546 	/* Check that the interface is in the global list */
4547 	if (!ifnet_lookup(ifp)) {
4548 		retval = ENXIO;
4549 		goto end;
4550 	}
4551 
4552 	ifproto = dlif_proto_alloc();
4553 
4554 	/* refcnt held above during lookup */
4555 	ifproto->ifp = ifp;
4556 	ifproto->protocol_family = protocol;
4557 	ifproto->proto_kpi = kProtoKPI_v1;
4558 	ifproto->kpi.v1.input = proto_details->input;
4559 	ifproto->kpi.v1.pre_output = proto_details->pre_output;
4560 	ifproto->kpi.v1.event = proto_details->event;
4561 	ifproto->kpi.v1.ioctl = proto_details->ioctl;
4562 	ifproto->kpi.v1.detached = proto_details->detached;
4563 	ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4564 	ifproto->kpi.v1.send_arp = proto_details->send_arp;
4565 
4566 	retval = dlil_attach_protocol(ifproto,
4567 	    proto_details->demux_list, proto_details->demux_count,
4568 	    &proto_count);
4569 
4570 end:
4571 	if (retval == EEXIST) {
4572 		/* already attached */
4573 		if (dlil_verbose) {
4574 			DLIL_PRINTF("%s: protocol %d already attached\n",
4575 			    ifp != NULL ? if_name(ifp) : "N/A",
4576 			    protocol);
4577 		}
4578 	} else if (retval != 0) {
4579 		DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4580 		    ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4581 	} else if (dlil_verbose) {
4582 		DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
4583 		    ifp != NULL ? if_name(ifp) : "N/A",
4584 		    protocol, proto_count);
4585 	}
4586 	ifnet_head_done();
4587 	if (retval == 0) {
4588 		dlil_handle_proto_attach(ifp, protocol);
4589 	} else if (ifproto != NULL) {
4590 		dlif_proto_free(ifproto);
4591 	}
4592 	return retval;
4593 }
4594 
4595 errno_t
ifnet_attach_protocol_v2(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param_v2 * proto_details)4596 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
4597     const struct ifnet_attach_proto_param_v2 *proto_details)
4598 {
4599 	int retval = 0;
4600 	struct if_proto  *ifproto = NULL;
4601 	uint32_t proto_count = 0;
4602 
4603 	ifnet_head_lock_shared();
4604 	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4605 		retval = EINVAL;
4606 		goto end;
4607 	}
4608 	/* Check that the interface is in the global list */
4609 	if (!ifnet_lookup(ifp)) {
4610 		retval = ENXIO;
4611 		goto end;
4612 	}
4613 
4614 	ifproto = dlif_proto_alloc();
4615 
4616 	/* refcnt held above during lookup */
4617 	ifproto->ifp = ifp;
4618 	ifproto->protocol_family = protocol;
4619 	ifproto->proto_kpi = kProtoKPI_v2;
4620 	ifproto->kpi.v2.input = proto_details->input;
4621 	ifproto->kpi.v2.pre_output = proto_details->pre_output;
4622 	ifproto->kpi.v2.event = proto_details->event;
4623 	ifproto->kpi.v2.ioctl = proto_details->ioctl;
4624 	ifproto->kpi.v2.detached = proto_details->detached;
4625 	ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4626 	ifproto->kpi.v2.send_arp = proto_details->send_arp;
4627 
4628 	retval = dlil_attach_protocol(ifproto,
4629 	    proto_details->demux_list, proto_details->demux_count,
4630 	    &proto_count);
4631 
4632 end:
4633 	if (retval == EEXIST) {
4634 		/* already attached */
4635 		if (dlil_verbose) {
4636 			DLIL_PRINTF("%s: protocol %d already attached\n",
4637 			    ifp != NULL ? if_name(ifp) : "N/A",
4638 			    protocol);
4639 		}
4640 	} else if (retval != 0) {
4641 		DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4642 		    ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4643 	} else if (dlil_verbose) {
4644 		DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
4645 		    ifp != NULL ? if_name(ifp) : "N/A",
4646 		    protocol, proto_count);
4647 	}
4648 	ifnet_head_done();
4649 	if (retval == 0) {
4650 		dlil_handle_proto_attach(ifp, protocol);
4651 	} else if (ifproto != NULL) {
4652 		dlif_proto_free(ifproto);
4653 	}
4654 	return retval;
4655 }
4656 
4657 errno_t
ifnet_detach_protocol(ifnet_t ifp,protocol_family_t proto_family)4658 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
4659 {
4660 	struct if_proto *proto = NULL;
4661 	int     retval = 0;
4662 
4663 	if (ifp == NULL || proto_family == 0) {
4664 		retval = EINVAL;
4665 		goto end;
4666 	}
4667 
4668 	ifnet_lock_exclusive(ifp);
4669 	/* callee holds a proto refcnt upon success */
4670 	proto = find_attached_proto(ifp, proto_family);
4671 	if (proto == NULL) {
4672 		retval = ENXIO;
4673 		ifnet_lock_done(ifp);
4674 		goto end;
4675 	}
4676 
4677 	/* call family module del_proto */
4678 	if (ifp->if_del_proto) {
4679 		ifp->if_del_proto(ifp, proto->protocol_family);
4680 	}
4681 
4682 	SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4683 	    proto, if_proto, next_hash);
4684 
4685 	if (proto->proto_kpi == kProtoKPI_v1) {
4686 		proto->kpi.v1.input = ifproto_media_input_v1;
4687 		proto->kpi.v1.pre_output = ifproto_media_preout;
4688 		proto->kpi.v1.event = ifproto_media_event;
4689 		proto->kpi.v1.ioctl = ifproto_media_ioctl;
4690 		proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
4691 		proto->kpi.v1.send_arp = ifproto_media_send_arp;
4692 	} else {
4693 		proto->kpi.v2.input = ifproto_media_input_v2;
4694 		proto->kpi.v2.pre_output = ifproto_media_preout;
4695 		proto->kpi.v2.event = ifproto_media_event;
4696 		proto->kpi.v2.ioctl = ifproto_media_ioctl;
4697 		proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
4698 		proto->kpi.v2.send_arp = ifproto_media_send_arp;
4699 	}
4700 	proto->detached = 1;
4701 	ifnet_lock_done(ifp);
4702 
4703 	if (dlil_verbose) {
4704 		DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
4705 		    (proto->proto_kpi == kProtoKPI_v1) ?
4706 		    "v1" : "v2", proto_family);
4707 	}
4708 
4709 	/* release proto refcnt held during protocol attach */
4710 	if_proto_free(proto);
4711 
4712 	/*
4713 	 * Release proto refcnt held during lookup; the rest of
4714 	 * protocol detach steps will happen when the last proto
4715 	 * reference is released.
4716 	 */
4717 	if_proto_free(proto);
4718 
4719 end:
4720 	return retval;
4721 }
4722 
4723 static errno_t
ifproto_media_input_v1(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet,char * header)4724 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
4725     struct mbuf *packet, char *header)
4726 {
4727 #pragma unused(ifp, protocol, packet, header)
4728 	return ENXIO;
4729 }
4730 
4731 static errno_t
ifproto_media_input_v2(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet)4732 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
4733     struct mbuf *packet)
4734 {
4735 #pragma unused(ifp, protocol, packet)
4736 	return ENXIO;
4737 }
4738 
4739 static errno_t
ifproto_media_preout(struct ifnet * ifp,protocol_family_t protocol,mbuf_t * packet,const struct sockaddr * dest,void * route,IFNET_FRAME_TYPE_RW_T frame_type,IFNET_LLADDR_RW_T link_layer_dest)4740 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
4741     mbuf_t *packet, const struct sockaddr *dest, void *route,
4742     IFNET_FRAME_TYPE_RW_T frame_type, IFNET_LLADDR_RW_T link_layer_dest)
4743 {
4744 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4745 	return ENXIO;
4746 }
4747 
4748 static void
ifproto_media_event(struct ifnet * ifp,protocol_family_t protocol,const struct kev_msg * event)4749 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
4750     const struct kev_msg *event)
4751 {
4752 #pragma unused(ifp, protocol, event)
4753 }
4754 
4755 static errno_t
ifproto_media_ioctl(struct ifnet * ifp,protocol_family_t protocol,unsigned long command,void * argument)4756 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
4757     unsigned long command, void *argument)
4758 {
4759 #pragma unused(ifp, protocol, command, argument)
4760 	return ENXIO;
4761 }
4762 
4763 static errno_t
ifproto_media_resolve_multi(ifnet_t ifp,const struct sockaddr * proto_addr,struct sockaddr_dl * out_ll,size_t ll_len)4764 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
4765     struct sockaddr_dl *out_ll, size_t ll_len)
4766 {
4767 #pragma unused(ifp, proto_addr, out_ll, ll_len)
4768 	return ENXIO;
4769 }
4770 
4771 static errno_t
ifproto_media_send_arp(struct ifnet * ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4772 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
4773     const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4774     const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4775 {
4776 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4777 	return ENXIO;
4778 }
4779 
4780 extern int if_next_index(void);
4781 extern int tcp_ecn;
4782 
4783 void
dlil_ifclassq_setup(struct ifnet * ifp,struct ifclassq * ifcq)4784 dlil_ifclassq_setup(struct ifnet *ifp, struct ifclassq *ifcq)
4785 {
4786 	uint32_t sflags = 0;
4787 	int err;
4788 
4789 	if (if_flowadv) {
4790 		sflags |= PKTSCHEDF_QALG_FLOWCTL;
4791 	}
4792 
4793 	if (if_delaybased_queue) {
4794 		sflags |= PKTSCHEDF_QALG_DELAYBASED;
4795 	}
4796 
4797 	if (ifp->if_output_sched_model & IFNET_SCHED_DRIVER_MANGED_MODELS) {
4798 		VERIFY(IFNET_MODEL_IS_VALID(ifp->if_output_sched_model));
4799 		sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
4800 	}
4801 	/* Inherit drop limit from the default queue */
4802 	if (ifp->if_snd != ifcq) {
4803 		IFCQ_PKT_DROP_LIMIT(ifcq) = IFCQ_PKT_DROP_LIMIT(ifp->if_snd);
4804 	}
4805 	/* Initialize transmit queue(s) */
4806 	err = ifclassq_setup(ifcq, ifp, sflags);
4807 	if (err != 0) {
4808 		panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
4809 		    "err=%d", __func__, ifp, err);
4810 		/* NOTREACHED */
4811 	}
4812 }
4813 
4814 errno_t
ifnet_attach(ifnet_t ifp,const struct sockaddr_dl * ll_addr)4815 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
4816 {
4817 #if SKYWALK
4818 	boolean_t netif_compat;
4819 	if_nexus_netif  nexus_netif;
4820 #endif /* SKYWALK */
4821 	ifnet_ref_t tmp_if;
4822 	struct ifaddr *ifa;
4823 	struct if_data_internal if_data_saved;
4824 	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4825 	struct dlil_threading_info *dl_inp;
4826 	thread_continue_t thfunc = NULL;
4827 	int err;
4828 
4829 	if (ifp == NULL) {
4830 		return EINVAL;
4831 	}
4832 
4833 	/*
4834 	 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4835 	 * prevent the interface from being configured while it is
4836 	 * embryonic, as ifnet_head_lock is dropped and reacquired
4837 	 * below prior to marking the ifnet with IFRF_ATTACHED.
4838 	 */
4839 	dlil_if_lock();
4840 	ifnet_head_lock_exclusive();
4841 	/* Verify we aren't already on the list */
4842 	TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
4843 		if (tmp_if == ifp) {
4844 			ifnet_head_done();
4845 			dlil_if_unlock();
4846 			return EEXIST;
4847 		}
4848 	}
4849 
4850 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4851 	if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
4852 		panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
4853 		    __func__, ifp);
4854 		/* NOTREACHED */
4855 	}
4856 	lck_mtx_unlock(&ifp->if_ref_lock);
4857 
4858 	ifnet_lock_exclusive(ifp);
4859 
4860 	/* Sanity check */
4861 	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
4862 	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
4863 	VERIFY(ifp->if_threads_pending == 0);
4864 
4865 	if (ll_addr != NULL) {
4866 		if (ifp->if_addrlen == 0) {
4867 			ifp->if_addrlen = ll_addr->sdl_alen;
4868 		} else if (ll_addr->sdl_alen != ifp->if_addrlen) {
4869 			ifnet_lock_done(ifp);
4870 			ifnet_head_done();
4871 			dlil_if_unlock();
4872 			return EINVAL;
4873 		}
4874 	}
4875 
4876 	/*
4877 	 * Allow interfaces without protocol families to attach
4878 	 * only if they have the necessary fields filled out.
4879 	 */
4880 	if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
4881 		DLIL_PRINTF("%s: Attempt to attach interface without "
4882 		    "family module - %d\n", __func__, ifp->if_family);
4883 		ifnet_lock_done(ifp);
4884 		ifnet_head_done();
4885 		dlil_if_unlock();
4886 		return ENODEV;
4887 	}
4888 
4889 	/* Allocate protocol hash table */
4890 	VERIFY(ifp->if_proto_hash == NULL);
4891 	ifp->if_proto_hash = kalloc_type(struct proto_hash_entry,
4892 	    PROTO_HASH_SLOTS, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4893 	ifp->if_proto_hash_count = PROTO_HASH_SLOTS;
4894 
4895 	lck_mtx_lock_spin(&ifp->if_flt_lock);
4896 	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
4897 	TAILQ_INIT(&ifp->if_flt_head);
4898 	VERIFY(ifp->if_flt_busy == 0);
4899 	VERIFY(ifp->if_flt_waiters == 0);
4900 	VERIFY(ifp->if_flt_non_os_count == 0);
4901 	VERIFY(ifp->if_flt_no_tso_count == 0);
4902 	lck_mtx_unlock(&ifp->if_flt_lock);
4903 
4904 	if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
4905 		VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
4906 		LIST_INIT(&ifp->if_multiaddrs);
4907 	}
4908 
4909 	VERIFY(ifp->if_allhostsinm == NULL);
4910 	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
4911 	TAILQ_INIT(&ifp->if_addrhead);
4912 
4913 	if (ifp->if_index == 0) {
4914 		int idx = if_next_index();
4915 
4916 		/*
4917 		 * Since we exhausted the list of
4918 		 * if_index's, try to find an empty slot
4919 		 * in ifindex2ifnet.
4920 		 */
4921 		if (idx == -1 && if_index >= UINT16_MAX) {
4922 			for (int i = 1; i < if_index; i++) {
4923 				if (ifindex2ifnet[i] == NULL &&
4924 				    ifnet_addrs[i - 1] == NULL) {
4925 					idx = i;
4926 					break;
4927 				}
4928 			}
4929 		}
4930 		if (idx == -1) {
4931 			ifp->if_index = 0;
4932 			ifnet_lock_done(ifp);
4933 			ifnet_head_done();
4934 			dlil_if_unlock();
4935 			return ENOBUFS;
4936 		}
4937 		ifp->if_index = (uint16_t)idx;
4938 
4939 		/* the lladdr passed at attach time is the permanent address */
4940 		if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
4941 		    ll_addr->sdl_alen == ETHER_ADDR_LEN) {
4942 			bcopy(CONST_LLADDR(ll_addr),
4943 			    dl_if->dl_if_permanent_ether,
4944 			    ETHER_ADDR_LEN);
4945 			dl_if->dl_if_permanent_ether_is_set = 1;
4946 		}
4947 	}
4948 	/* There should not be anything occupying this slot */
4949 	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
4950 
4951 	/* allocate (if needed) and initialize a link address */
4952 	ifa = dlil_alloc_lladdr(ifp, ll_addr);
4953 	if (ifa == NULL) {
4954 		ifnet_lock_done(ifp);
4955 		ifnet_head_done();
4956 		dlil_if_unlock();
4957 		return ENOBUFS;
4958 	}
4959 
4960 	VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
4961 	ifnet_addrs[ifp->if_index - 1] = ifa;
4962 
4963 	/* make this address the first on the list */
4964 	IFA_LOCK(ifa);
4965 	/* hold a reference for ifnet_addrs[] */
4966 	ifa_addref(ifa);
4967 	/* if_attach_link_ifa() holds a reference for ifa_link */
4968 	if_attach_link_ifa(ifp, ifa);
4969 	IFA_UNLOCK(ifa);
4970 
4971 	TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
4972 	ifindex2ifnet[ifp->if_index] = ifp;
4973 
4974 	/* Hold a reference to the underlying dlil_ifnet */
4975 	ifnet_reference(ifp);
4976 
4977 	/* Clear stats (save and restore other fields that we care) */
4978 	if_data_saved = ifp->if_data;
4979 	bzero(&ifp->if_data, sizeof(ifp->if_data));
4980 	ifp->if_data.ifi_type = if_data_saved.ifi_type;
4981 	ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
4982 	ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
4983 	ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
4984 	ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
4985 	ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
4986 	ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
4987 	ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
4988 	ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
4989 	ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
4990 	ifnet_touch_lastchange(ifp);
4991 
4992 	VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
4993 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
4994 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL ||
4995 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL_DM);
4996 
4997 	dlil_ifclassq_setup(ifp, ifp->if_snd);
4998 
4999 	/* Sanity checks on the input thread storage */
5000 	dl_inp = &dl_if->dl_if_inpstorage;
5001 	bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
5002 	VERIFY(dl_inp->dlth_flags == 0);
5003 	VERIFY(dl_inp->dlth_wtot == 0);
5004 	VERIFY(dl_inp->dlth_ifp == NULL);
5005 	VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
5006 	VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
5007 	VERIFY(!dl_inp->dlth_affinity);
5008 	VERIFY(ifp->if_inp == NULL);
5009 	VERIFY(dl_inp->dlth_thread == THREAD_NULL);
5010 	VERIFY(dl_inp->dlth_strategy == NULL);
5011 	VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
5012 	VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
5013 	VERIFY(dl_inp->dlth_affinity_tag == 0);
5014 
5015 #if IFNET_INPUT_SANITY_CHK
5016 	VERIFY(dl_inp->dlth_pkts_cnt == 0);
5017 #endif /* IFNET_INPUT_SANITY_CHK */
5018 
5019 	VERIFY(ifp->if_poll_thread == THREAD_NULL);
5020 	dlil_reset_rxpoll_params(ifp);
5021 	/*
5022 	 * A specific DLIL input thread is created per non-loopback interface.
5023 	 */
5024 	if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
5025 		ifp->if_inp = dl_inp;
5026 		ifnet_incr_pending_thread_count(ifp);
5027 		err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
5028 		if (err == ENODEV) {
5029 			VERIFY(thfunc == NULL);
5030 			ifnet_decr_pending_thread_count(ifp);
5031 		} else if (err != 0) {
5032 			panic_plain("%s: ifp=%p couldn't get an input thread; "
5033 			    "err=%d", __func__, ifp, err);
5034 			/* NOTREACHED */
5035 		}
5036 	}
5037 	/*
5038 	 * If the driver supports the new transmit model, calculate flow hash
5039 	 * and create a workloop starter thread to invoke the if_start callback
5040 	 * where the packets may be dequeued and transmitted.
5041 	 */
5042 	if (ifp->if_eflags & IFEF_TXSTART) {
5043 		thread_precedence_policy_data_t info;
5044 		__unused kern_return_t kret;
5045 
5046 		ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5047 		VERIFY(ifp->if_flowhash != 0);
5048 		VERIFY(ifp->if_start_thread == THREAD_NULL);
5049 
5050 		ifnet_set_start_cycle(ifp, NULL);
5051 		ifp->if_start_active = 0;
5052 		ifp->if_start_req = 0;
5053 		ifp->if_start_flags = 0;
5054 		VERIFY(ifp->if_start != NULL);
5055 		ifnet_incr_pending_thread_count(ifp);
5056 		if ((err = kernel_thread_start(ifnet_start_thread_func,
5057 		    ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5058 			panic_plain("%s: "
5059 			    "ifp=%p couldn't get a start thread; "
5060 			    "err=%d", __func__, ifp, err);
5061 			/* NOTREACHED */
5062 		}
5063 		bzero(&info, sizeof(info));
5064 		info.importance = 1;
5065 		kret = thread_policy_set(ifp->if_start_thread,
5066 		    THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5067 		    THREAD_PRECEDENCE_POLICY_COUNT);
5068 		ASSERT(kret == KERN_SUCCESS);
5069 	} else {
5070 		ifp->if_flowhash = 0;
5071 	}
5072 
5073 	/* Reset polling parameters */
5074 	ifnet_set_poll_cycle(ifp, NULL);
5075 	ifp->if_poll_update = 0;
5076 	ifp->if_poll_flags = 0;
5077 	ifp->if_poll_req = 0;
5078 	VERIFY(ifp->if_poll_thread == THREAD_NULL);
5079 
5080 	/*
5081 	 * If the driver supports the new receive model, create a poller
5082 	 * thread to invoke if_input_poll callback where the packets may
5083 	 * be dequeued from the driver and processed for reception.
5084 	 * if the interface is netif compat then the poller thread is
5085 	 * managed by netif.
5086 	 */
5087 	if (dlil_is_rxpoll_input(thfunc)) {
5088 		thread_precedence_policy_data_t info;
5089 		__unused kern_return_t kret;
5090 #if SKYWALK
5091 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5092 #endif /* SKYWALK */
5093 		VERIFY(ifp->if_input_poll != NULL);
5094 		VERIFY(ifp->if_input_ctl != NULL);
5095 		ifnet_incr_pending_thread_count(ifp);
5096 		if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
5097 		    &ifp->if_poll_thread)) != KERN_SUCCESS) {
5098 			panic_plain("%s: ifp=%p couldn't get a poll thread; "
5099 			    "err=%d", __func__, ifp, err);
5100 			/* NOTREACHED */
5101 		}
5102 		bzero(&info, sizeof(info));
5103 		info.importance = 1;
5104 		kret = thread_policy_set(ifp->if_poll_thread,
5105 		    THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5106 		    THREAD_PRECEDENCE_POLICY_COUNT);
5107 		ASSERT(kret == KERN_SUCCESS);
5108 	}
5109 
5110 	VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5111 	VERIFY(ifp->if_desc.ifd_len == 0);
5112 	VERIFY(ifp->if_desc.ifd_desc != NULL);
5113 
5114 	/* Record attach PC stacktrace */
5115 	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5116 
5117 	ifp->if_updatemcasts = 0;
5118 	if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5119 		struct ifmultiaddr *ifma;
5120 		LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5121 			IFMA_LOCK(ifma);
5122 			if (ifma->ifma_addr->sa_family == AF_LINK ||
5123 			    ifma->ifma_addr->sa_family == AF_UNSPEC) {
5124 				ifp->if_updatemcasts++;
5125 			}
5126 			IFMA_UNLOCK(ifma);
5127 		}
5128 
5129 		DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
5130 		    "membership(s)\n", if_name(ifp),
5131 		    ifp->if_updatemcasts);
5132 	}
5133 
5134 	/* Clear logging parameters */
5135 	bzero(&ifp->if_log, sizeof(ifp->if_log));
5136 
5137 	/* Clear foreground/realtime activity timestamps */
5138 	ifp->if_fg_sendts = 0;
5139 	ifp->if_rt_sendts = 0;
5140 
5141 	/* Clear throughput estimates and radio type */
5142 	ifp->if_estimated_up_bucket = 0;
5143 	ifp->if_estimated_down_bucket = 0;
5144 	ifp->if_radio_type = 0;
5145 	ifp->if_radio_channel = 0;
5146 
5147 	VERIFY(ifp->if_delegated.ifp == NULL);
5148 	VERIFY(ifp->if_delegated.type == 0);
5149 	VERIFY(ifp->if_delegated.family == 0);
5150 	VERIFY(ifp->if_delegated.subfamily == 0);
5151 	VERIFY(ifp->if_delegated.expensive == 0);
5152 	VERIFY(ifp->if_delegated.constrained == 0);
5153 	VERIFY(ifp->if_delegated.ultra_constrained == 0);
5154 
5155 	VERIFY(ifp->if_agentids == NULL);
5156 	VERIFY(ifp->if_agentcount == 0);
5157 
5158 	/* Reset interface state */
5159 	bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5160 	ifp->if_interface_state.valid_bitmask |=
5161 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5162 	ifp->if_interface_state.interface_availability =
5163 	    IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5164 
5165 	/* Initialize Link Quality Metric (loopback [lo0] is always good) */
5166 	if (ifp == lo_ifp) {
5167 		ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5168 		ifp->if_interface_state.valid_bitmask |=
5169 		    IF_INTERFACE_STATE_LQM_STATE_VALID;
5170 	} else {
5171 		ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5172 	}
5173 
5174 	/*
5175 	 * Built-in Cyclops always on policy for WiFi infra
5176 	 */
5177 	if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5178 		errno_t error;
5179 
5180 		error = if_set_qosmarking_mode(ifp,
5181 		    IFRTYPE_QOSMARKING_FASTLANE);
5182 		if (error != 0) {
5183 			DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
5184 			    __func__, ifp->if_xname, error);
5185 		} else {
5186 			if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5187 #if (DEVELOPMENT || DEBUG)
5188 			DLIL_PRINTF("%s fastlane enabled on %s\n",
5189 			    __func__, ifp->if_xname);
5190 #endif /* (DEVELOPMENT || DEBUG) */
5191 		}
5192 	}
5193 
5194 	ifnet_lock_done(ifp);
5195 	ifnet_head_done();
5196 
5197 #if SKYWALK
5198 	netif_compat = dlil_attach_netif_compat_nexus(ifp, &nexus_netif);
5199 #endif /* SKYWALK */
5200 
5201 	lck_mtx_lock(&ifp->if_cached_route_lock);
5202 	/* Enable forwarding cached route */
5203 	ifp->if_fwd_cacheok = 1;
5204 	/* Clean up any existing cached routes */
5205 	ROUTE_RELEASE(&ifp->if_fwd_route);
5206 	bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5207 	ROUTE_RELEASE(&ifp->if_src_route);
5208 	bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5209 	ROUTE_RELEASE(&ifp->if_src_route6);
5210 	bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5211 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5212 
5213 	ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5214 
5215 	/*
5216 	 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5217 	 * and trees; do this before the ifnet is marked as attached.
5218 	 * The ifnet keeps the reference to the info structures even after
5219 	 * the ifnet is detached, since the network-layer records still
5220 	 * refer to the info structures even after that.  This also
5221 	 * makes it possible for them to still function after the ifnet
5222 	 * is recycled or reattached.
5223 	 */
5224 #if INET
5225 	if (IGMP_IFINFO(ifp) == NULL) {
5226 		IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
5227 		VERIFY(IGMP_IFINFO(ifp) != NULL);
5228 	} else {
5229 		VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5230 		igmp_domifreattach(IGMP_IFINFO(ifp));
5231 	}
5232 #endif /* INET */
5233 	if (MLD_IFINFO(ifp) == NULL) {
5234 		MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
5235 		VERIFY(MLD_IFINFO(ifp) != NULL);
5236 	} else {
5237 		VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5238 		mld_domifreattach(MLD_IFINFO(ifp));
5239 	}
5240 
5241 	VERIFY(ifp->if_data_threshold == 0);
5242 	VERIFY(ifp->if_dt_tcall != NULL);
5243 
5244 	/*
5245 	 * Wait for the created kernel threads for I/O to get
5246 	 * scheduled and run at least once before we proceed
5247 	 * to mark interface as attached.
5248 	 */
5249 	lck_mtx_lock(&ifp->if_ref_lock);
5250 	while (ifp->if_threads_pending != 0) {
5251 		DLIL_PRINTF("%s: Waiting for all kernel threads created for "
5252 		    "interface %s to get scheduled at least once.\n",
5253 		    __func__, ifp->if_xname);
5254 		(void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
5255 		    __func__, NULL);
5256 		LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
5257 	}
5258 	lck_mtx_unlock(&ifp->if_ref_lock);
5259 	DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
5260 	    "at least once. Proceeding.\n", __func__, ifp->if_xname);
5261 
5262 	/* Final mark this ifnet as attached. */
5263 	ifnet_lock_exclusive(ifp);
5264 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5265 	ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
5266 	os_ref_init(&ifp->if_refio, &if_refiogrp);
5267 	os_ref_init_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp, 0);
5268 	lck_mtx_unlock(&ifp->if_ref_lock);
5269 	if (net_rtref) {
5270 		/* boot-args override; enable idle notification */
5271 		(void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5272 		    IFRF_IDLE_NOTIFY);
5273 	} else {
5274 		/* apply previous request(s) to set the idle flags, if any */
5275 		(void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5276 		    ifp->if_idle_new_flags_mask);
5277 	}
5278 #if SKYWALK
5279 	/* the interface is fully attached; let the nexus adapter know */
5280 	if (netif_compat || dlil_is_native_netif_nexus(ifp)) {
5281 		if (netif_compat) {
5282 			if (sk_netif_compat_txmodel ==
5283 			    NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI) {
5284 				ifnet_enqueue_multi_setup(ifp,
5285 				    sk_tx_delay_qlen, sk_tx_delay_timeout);
5286 			}
5287 			ifp->if_nx_netif = nexus_netif;
5288 		}
5289 		ifp->if_na_ops->ni_finalize(ifp->if_na, ifp);
5290 	}
5291 #endif /* SKYWALK */
5292 	ifnet_lock_done(ifp);
5293 	dlil_if_unlock();
5294 
5295 #if PF
5296 	/*
5297 	 * Attach packet filter to this interface, if enabled.
5298 	 */
5299 	pf_ifnet_hook(ifp, 1);
5300 #endif /* PF */
5301 
5302 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0, FALSE);
5303 
5304 	os_log(OS_LOG_DEFAULT, "%s: attached%s\n", if_name(ifp),
5305 	    (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5306 	return 0;
5307 }
5308 
5309 static void
if_purgeaddrs(struct ifnet * ifp)5310 if_purgeaddrs(struct ifnet *ifp)
5311 {
5312 #if INET
5313 	in_purgeaddrs(ifp);
5314 #endif /* INET */
5315 	in6_purgeaddrs(ifp);
5316 }
5317 
5318 errno_t
ifnet_detach(ifnet_t ifp)5319 ifnet_detach(ifnet_t ifp)
5320 {
5321 	ifnet_ref_t delegated_ifp;
5322 	struct nd_ifinfo *ndi = NULL;
5323 
5324 	if (ifp == NULL) {
5325 		return EINVAL;
5326 	}
5327 
5328 	ndi = ND_IFINFO(ifp);
5329 	if (NULL != ndi) {
5330 		ndi->cga_initialized = FALSE;
5331 	}
5332 	os_log(OS_LOG_DEFAULT, "%s detaching", if_name(ifp));
5333 
5334 	/* Mark the interface down */
5335 	if_down(ifp);
5336 
5337 	/*
5338 	 * IMPORTANT NOTE
5339 	 *
5340 	 * Any field in the ifnet that relies on IF_FULLY_ATTACHED()
5341 	 * or equivalently, ifnet_get_ioref(ifp, 1), can't be modified
5342 	 * until after we've waited for all I/O references to drain
5343 	 * in ifnet_detach_final().
5344 	 */
5345 
5346 	ifnet_head_lock_exclusive();
5347 	ifnet_lock_exclusive(ifp);
5348 
5349 	if (ifp->if_output_netem != NULL) {
5350 		netem_destroy(ifp->if_output_netem);
5351 		ifp->if_output_netem = NULL;
5352 	}
5353 
5354 	/*
5355 	 * Check to see if this interface has previously triggered
5356 	 * aggressive protocol draining; if so, decrement the global
5357 	 * refcnt and clear PR_AGGDRAIN on the route domain if
5358 	 * there are no more of such an interface around.
5359 	 */
5360 	(void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5361 
5362 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5363 	if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5364 		lck_mtx_unlock(&ifp->if_ref_lock);
5365 		ifnet_lock_done(ifp);
5366 		ifnet_head_done();
5367 		return EINVAL;
5368 	} else if (ifp->if_refflags & IFRF_DETACHING) {
5369 		/* Interface has already been detached */
5370 		lck_mtx_unlock(&ifp->if_ref_lock);
5371 		ifnet_lock_done(ifp);
5372 		ifnet_head_done();
5373 		return ENXIO;
5374 	}
5375 	VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
5376 	/* Indicate this interface is being detached */
5377 	ifp->if_refflags &= ~IFRF_ATTACHED;
5378 	ifp->if_refflags |= IFRF_DETACHING;
5379 	lck_mtx_unlock(&ifp->if_ref_lock);
5380 
5381 	/* clean up flow control entry object if there's any */
5382 	if (ifp->if_eflags & IFEF_TXSTART) {
5383 		ifnet_flowadv(ifp->if_flowhash);
5384 	}
5385 
5386 	/* Reset CLAT46 flag */
5387 	if_clear_eflags(ifp, IFEF_CLAT46);
5388 
5389 	/*
5390 	 * We do not reset the TCP keep alive counters in case
5391 	 * a TCP connection stays connection after the interface
5392 	 * went down
5393 	 */
5394 	if (ifp->if_tcp_kao_cnt > 0) {
5395 		os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
5396 		    __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
5397 	}
5398 	ifp->if_tcp_kao_max = 0;
5399 
5400 	/*
5401 	 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5402 	 * no longer be visible during lookups from this point.
5403 	 */
5404 	VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5405 	TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5406 	ifp->if_link.tqe_next = NULL;
5407 	ifp->if_link.tqe_prev = NULL;
5408 	if (ifp->if_ordered_link.tqe_next != NULL ||
5409 	    ifp->if_ordered_link.tqe_prev != NULL) {
5410 		ifnet_remove_from_ordered_list(ifp);
5411 	}
5412 	ifindex2ifnet[ifp->if_index] = NULL;
5413 
5414 	/* 18717626 - reset router mode */
5415 	if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
5416 	ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
5417 
5418 	/* Record detach PC stacktrace */
5419 	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5420 
5421 	/* Clear logging parameters */
5422 	bzero(&ifp->if_log, sizeof(ifp->if_log));
5423 
5424 	/* Clear delegated interface info (reference released below) */
5425 	delegated_ifp = ifp->if_delegated.ifp;
5426 	bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
5427 
5428 	/* Reset interface state */
5429 	bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5430 
5431 	/*
5432 	 * Increment the generation count on interface deletion
5433 	 */
5434 	ifp->if_creation_generation_id = os_atomic_inc(&if_creation_generation_count, relaxed);
5435 
5436 	ifnet_lock_done(ifp);
5437 	ifnet_head_done();
5438 
5439 	/* Release reference held on the delegated interface */
5440 	if (delegated_ifp != NULL) {
5441 		ifnet_release(delegated_ifp);
5442 	}
5443 
5444 	/* Reset Link Quality Metric (unless loopback [lo0]) */
5445 	if (ifp != lo_ifp) {
5446 		if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5447 	}
5448 
5449 	/* Force reset link heuristics */
5450 	if (ifp->if_link_heuristics_tcall != NULL) {
5451 		thread_call_cancel_wait(ifp->if_link_heuristics_tcall);
5452 		thread_call_free(ifp->if_link_heuristics_tcall);
5453 		ifp->if_link_heuristics_tcall = NULL;
5454 	}
5455 	if_clear_xflags(ifp, IFXF_LINK_HEURISTICS);
5456 
5457 	/* Reset TCP local statistics */
5458 	if (ifp->if_tcp_stat != NULL) {
5459 		bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5460 	}
5461 
5462 	/* Reset UDP local statistics */
5463 	if (ifp->if_udp_stat != NULL) {
5464 		bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5465 	}
5466 
5467 	/* Reset ifnet IPv4 stats */
5468 	if (ifp->if_ipv4_stat != NULL) {
5469 		bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5470 	}
5471 
5472 	/* Reset ifnet IPv6 stats */
5473 	if (ifp->if_ipv6_stat != NULL) {
5474 		bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5475 	}
5476 
5477 	/* Release memory held for interface link status report */
5478 	if (ifp->if_link_status != NULL) {
5479 		kfree_type(struct if_link_status, ifp->if_link_status);
5480 		ifp->if_link_status = NULL;
5481 	}
5482 
5483 	/* Disable forwarding cached route */
5484 	lck_mtx_lock(&ifp->if_cached_route_lock);
5485 	ifp->if_fwd_cacheok = 0;
5486 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5487 
5488 	/* Disable data threshold and wait for any pending event posting */
5489 	ifp->if_data_threshold = 0;
5490 	VERIFY(ifp->if_dt_tcall != NULL);
5491 	(void) thread_call_cancel_wait(ifp->if_dt_tcall);
5492 
5493 	/*
5494 	 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5495 	 * references to the info structures and leave them attached to
5496 	 * this ifnet.
5497 	 */
5498 #if INET
5499 	igmp_domifdetach(ifp);
5500 #endif /* INET */
5501 	mld_domifdetach(ifp);
5502 
5503 #if SKYWALK
5504 	/* Clean up any netns tokens still pointing to to this ifnet */
5505 	netns_ifnet_detach(ifp);
5506 #endif /* SKYWALK */
5507 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0, FALSE);
5508 
5509 	/* Let worker thread take care of the rest, to avoid reentrancy */
5510 	dlil_if_lock();
5511 	ifnet_detaching_enqueue(ifp);
5512 	dlil_if_unlock();
5513 
5514 	return 0;
5515 }
5516 
5517 static void
ifnet_detaching_enqueue(struct ifnet * ifp)5518 ifnet_detaching_enqueue(struct ifnet *ifp)
5519 {
5520 	dlil_if_lock_assert();
5521 
5522 	++ifnet_detaching_cnt;
5523 	VERIFY(ifnet_detaching_cnt != 0);
5524 	TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5525 	wakeup((caddr_t)&ifnet_delayed_run);
5526 }
5527 
5528 static struct ifnet *
ifnet_detaching_dequeue(void)5529 ifnet_detaching_dequeue(void)
5530 {
5531 	ifnet_ref_t ifp;
5532 
5533 	dlil_if_lock_assert();
5534 
5535 	ifp = TAILQ_FIRST(&ifnet_detaching_head);
5536 	VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5537 	if (ifp != NULL) {
5538 		VERIFY(ifnet_detaching_cnt != 0);
5539 		--ifnet_detaching_cnt;
5540 		TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5541 		ifp->if_detaching_link.tqe_next = NULL;
5542 		ifp->if_detaching_link.tqe_prev = NULL;
5543 	}
5544 	return ifp;
5545 }
5546 
5547 __attribute__((noreturn))
5548 static void
ifnet_detacher_thread_cont(void * v,wait_result_t wres)5549 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
5550 {
5551 #pragma unused(v, wres)
5552 	ifnet_ref_t ifp;
5553 
5554 	dlil_if_lock();
5555 	if (__improbable(ifnet_detaching_embryonic)) {
5556 		ifnet_detaching_embryonic = FALSE;
5557 		/* there's no lock ordering constrain so OK to do this here */
5558 		dlil_decr_pending_thread_count();
5559 	}
5560 
5561 	for (;;) {
5562 		dlil_if_lock_assert();
5563 
5564 		if (ifnet_detaching_cnt == 0) {
5565 			break;
5566 		}
5567 
5568 		net_update_uptime();
5569 
5570 		VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5571 
5572 		/* Take care of detaching ifnet */
5573 		ifp = ifnet_detaching_dequeue();
5574 		if (ifp != NULL) {
5575 			dlil_if_unlock();
5576 			ifnet_detach_final(ifp);
5577 			dlil_if_lock();
5578 		}
5579 	}
5580 
5581 	(void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5582 	dlil_if_unlock();
5583 	(void) thread_block(ifnet_detacher_thread_cont);
5584 
5585 	VERIFY(0);      /* we should never get here */
5586 	/* NOTREACHED */
5587 	__builtin_unreachable();
5588 }
5589 
5590 __dead2
5591 static void
ifnet_detacher_thread_func(void * v,wait_result_t w)5592 ifnet_detacher_thread_func(void *v, wait_result_t w)
5593 {
5594 #pragma unused(v, w)
5595 	dlil_if_lock();
5596 	(void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5597 	ifnet_detaching_embryonic = TRUE;
5598 	/* wake up once to get out of embryonic state */
5599 	wakeup((caddr_t)&ifnet_delayed_run);
5600 	dlil_if_unlock();
5601 	(void) thread_block(ifnet_detacher_thread_cont);
5602 	VERIFY(0);
5603 	/* NOTREACHED */
5604 	__builtin_unreachable();
5605 }
5606 
5607 static void
ifnet_detach_final(struct ifnet * ifp)5608 ifnet_detach_final(struct ifnet *ifp)
5609 {
5610 	struct ifnet_filter *filter, *filter_next;
5611 	struct dlil_ifnet *dlifp;
5612 	struct ifnet_filter_head fhead;
5613 	struct dlil_threading_info *inp;
5614 	struct ifaddr *ifa;
5615 	ifnet_detached_func if_free;
5616 	int i;
5617 
5618 	/* Let BPF know we're detaching */
5619 	bpfdetach(ifp);
5620 
5621 #if SKYWALK
5622 	dlil_netif_detach_notify(ifp);
5623 	/*
5624 	 * Wait for the datapath to quiesce before tearing down
5625 	 * netif/flowswitch nexuses.
5626 	 */
5627 	dlil_quiesce_and_detach_nexuses(ifp);
5628 #endif /* SKYWALK */
5629 
5630 	lck_mtx_lock(&ifp->if_ref_lock);
5631 	if (!(ifp->if_refflags & IFRF_DETACHING)) {
5632 		panic("%s: flags mismatch (detaching not set) ifp=%p",
5633 		    __func__, ifp);
5634 		/* NOTREACHED */
5635 	}
5636 
5637 	/*
5638 	 * Wait until the existing IO references get released
5639 	 * before we proceed with ifnet_detach.  This is not a
5640 	 * common case, so block without using a continuation.
5641 	 */
5642 	if (os_ref_release_relaxed(&ifp->if_refio) > 0) {
5643 		bool waited = false;
5644 
5645 		while (os_ref_get_count(&ifp->if_refio) > 0) {
5646 			waited = true;
5647 			DLIL_PRINTF("%s: %s waiting for IO references to drain\n",
5648 			    __func__, if_name(ifp));
5649 			(void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5650 			    (PZERO - 1), "ifnet_ioref_wait", NULL);
5651 		}
5652 		if (waited) {
5653 			DLIL_PRINTF("%s: %s IO references drained\n",
5654 			    __func__, if_name(ifp));
5655 		}
5656 	}
5657 	os_ref_release_last_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp);
5658 	VERIFY(ifp->if_suspend == 0);
5659 	ifp->if_refflags &= ~IFRF_READY;
5660 	lck_mtx_unlock(&ifp->if_ref_lock);
5661 
5662 #if SKYWALK
5663 	VERIFY(LIST_EMPTY(&ifp->if_netns_tokens));
5664 #endif /* SKYWALK */
5665 	/* Drain and destroy send queue */
5666 	ifclassq_teardown(ifp->if_snd);
5667 
5668 	/* Detach interface filters */
5669 	lck_mtx_lock(&ifp->if_flt_lock);
5670 	if_flt_monitor_enter(ifp);
5671 
5672 	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
5673 	fhead = ifp->if_flt_head;
5674 	TAILQ_INIT(&ifp->if_flt_head);
5675 
5676 	for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5677 		filter_next = TAILQ_NEXT(filter, filt_next);
5678 		lck_mtx_unlock(&ifp->if_flt_lock);
5679 
5680 		dlil_detach_filter_internal(filter, 1);
5681 		lck_mtx_lock(&ifp->if_flt_lock);
5682 	}
5683 	if_flt_monitor_leave(ifp);
5684 	lck_mtx_unlock(&ifp->if_flt_lock);
5685 
5686 	/* Tell upper layers to drop their network addresses */
5687 	if_purgeaddrs(ifp);
5688 
5689 	ifnet_lock_exclusive(ifp);
5690 
5691 	/* Clear agent IDs */
5692 	if (ifp->if_agentids != NULL) {
5693 		kfree_data_sized_by(ifp->if_agentids, ifp->if_agentcount);
5694 	}
5695 
5696 	bzero(&ifp->if_nx_netif, sizeof(ifp->if_nx_netif));
5697 	bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
5698 
5699 	/* Unplumb all protocols */
5700 	for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5701 		struct if_proto *proto;
5702 
5703 		proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5704 		while (proto != NULL) {
5705 			protocol_family_t family = proto->protocol_family;
5706 			ifnet_lock_done(ifp);
5707 			proto_unplumb(family, ifp);
5708 			ifnet_lock_exclusive(ifp);
5709 			proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5710 		}
5711 		/* There should not be any protocols left */
5712 		VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5713 	}
5714 	kfree_type_counted_by(struct proto_hash_entry, ifp->if_proto_hash_count, ifp->if_proto_hash);
5715 
5716 	/* Detach (permanent) link address from if_addrhead */
5717 	ifa = TAILQ_FIRST(&ifp->if_addrhead);
5718 	VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5719 	IFA_LOCK(ifa);
5720 	if_detach_link_ifa(ifp, ifa);
5721 	IFA_UNLOCK(ifa);
5722 
5723 	/* Remove (permanent) link address from ifnet_addrs[] */
5724 	ifa_remref(ifa);
5725 	ifnet_addrs[ifp->if_index - 1] = NULL;
5726 
5727 	/* This interface should not be on {ifnet_head,detaching} */
5728 	VERIFY(ifp->if_link.tqe_next == NULL);
5729 	VERIFY(ifp->if_link.tqe_prev == NULL);
5730 	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5731 	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5732 	VERIFY(ifp->if_ordered_link.tqe_next == NULL);
5733 	VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
5734 
5735 	/* The slot should have been emptied */
5736 	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5737 
5738 	/* There should not be any addresses left */
5739 	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5740 
5741 	/*
5742 	 * Signal the starter thread to terminate itself, and wait until
5743 	 * it has exited.
5744 	 */
5745 	if (ifp->if_start_thread != THREAD_NULL) {
5746 		lck_mtx_lock_spin(&ifp->if_start_lock);
5747 		ifp->if_start_flags |= IFSF_TERMINATING;
5748 		wakeup_one((caddr_t)&ifp->if_start_thread);
5749 		lck_mtx_unlock(&ifp->if_start_lock);
5750 
5751 		/* wait for starter thread to terminate */
5752 		lck_mtx_lock(&ifp->if_start_lock);
5753 		while (ifp->if_start_thread != THREAD_NULL) {
5754 			if (dlil_verbose) {
5755 				DLIL_PRINTF("%s: waiting for %s starter thread to terminate\n",
5756 				    __func__,
5757 				    if_name(ifp));
5758 			}
5759 			(void) msleep(&ifp->if_start_thread,
5760 			    &ifp->if_start_lock, (PZERO - 1),
5761 			    "ifnet_start_thread_exit", NULL);
5762 		}
5763 		lck_mtx_unlock(&ifp->if_start_lock);
5764 		if (dlil_verbose) {
5765 			DLIL_PRINTF("%s: %s starter thread termination complete",
5766 			    __func__, if_name(ifp));
5767 		}
5768 	}
5769 
5770 	/*
5771 	 * Signal the poller thread to terminate itself, and wait until
5772 	 * it has exited.
5773 	 */
5774 	if (ifp->if_poll_thread != THREAD_NULL) {
5775 #if SKYWALK
5776 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5777 #endif /* SKYWALK */
5778 		lck_mtx_lock_spin(&ifp->if_poll_lock);
5779 		ifp->if_poll_flags |= IF_POLLF_TERMINATING;
5780 		wakeup_one((caddr_t)&ifp->if_poll_thread);
5781 		lck_mtx_unlock(&ifp->if_poll_lock);
5782 
5783 		/* wait for poller thread to terminate */
5784 		lck_mtx_lock(&ifp->if_poll_lock);
5785 		while (ifp->if_poll_thread != THREAD_NULL) {
5786 			if (dlil_verbose) {
5787 				DLIL_PRINTF("%s: waiting for %s poller thread to terminate\n",
5788 				    __func__,
5789 				    if_name(ifp));
5790 			}
5791 			(void) msleep(&ifp->if_poll_thread,
5792 			    &ifp->if_poll_lock, (PZERO - 1),
5793 			    "ifnet_poll_thread_exit", NULL);
5794 		}
5795 		lck_mtx_unlock(&ifp->if_poll_lock);
5796 		if (dlil_verbose) {
5797 			DLIL_PRINTF("%s: %s poller thread termination complete\n",
5798 			    __func__, if_name(ifp));
5799 		}
5800 	}
5801 
5802 	/*
5803 	 * If thread affinity was set for the workloop thread, we will need
5804 	 * to tear down the affinity and release the extra reference count
5805 	 * taken at attach time.  Does not apply to lo0 or other interfaces
5806 	 * without dedicated input threads.
5807 	 */
5808 	if ((inp = ifp->if_inp) != NULL) {
5809 		VERIFY(inp != dlil_main_input_thread);
5810 
5811 		if (inp->dlth_affinity) {
5812 			struct thread *__single tp, *__single wtp, *__single ptp;
5813 
5814 			lck_mtx_lock_spin(&inp->dlth_lock);
5815 			wtp = inp->dlth_driver_thread;
5816 			inp->dlth_driver_thread = THREAD_NULL;
5817 			ptp = inp->dlth_poller_thread;
5818 			inp->dlth_poller_thread = THREAD_NULL;
5819 			ASSERT(inp->dlth_thread != THREAD_NULL);
5820 			tp = inp->dlth_thread;    /* don't nullify now */
5821 			inp->dlth_affinity_tag = 0;
5822 			inp->dlth_affinity = FALSE;
5823 			lck_mtx_unlock(&inp->dlth_lock);
5824 
5825 			/* Tear down poll thread affinity */
5826 			if (ptp != NULL) {
5827 				VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5828 				VERIFY(ifp->if_xflags & IFXF_LEGACY);
5829 				(void) dlil_affinity_set(ptp,
5830 				    THREAD_AFFINITY_TAG_NULL);
5831 				thread_deallocate(ptp);
5832 			}
5833 
5834 			/* Tear down workloop thread affinity */
5835 			if (wtp != NULL) {
5836 				(void) dlil_affinity_set(wtp,
5837 				    THREAD_AFFINITY_TAG_NULL);
5838 				thread_deallocate(wtp);
5839 			}
5840 
5841 			/* Tear down DLIL input thread affinity */
5842 			(void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5843 			thread_deallocate(tp);
5844 		}
5845 
5846 		/* disassociate ifp DLIL input thread */
5847 		ifp->if_inp = NULL;
5848 
5849 		/* if the worker thread was created, tell it to terminate */
5850 		if (inp->dlth_thread != THREAD_NULL) {
5851 			lck_mtx_lock_spin(&inp->dlth_lock);
5852 			inp->dlth_flags |= DLIL_INPUT_TERMINATE;
5853 			if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
5854 				wakeup_one((caddr_t)&inp->dlth_flags);
5855 			}
5856 			lck_mtx_unlock(&inp->dlth_lock);
5857 			ifnet_lock_done(ifp);
5858 
5859 			/* wait for the input thread to terminate */
5860 			lck_mtx_lock_spin(&inp->dlth_lock);
5861 			while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
5862 			    == 0) {
5863 				(void) msleep(&inp->dlth_flags, &inp->dlth_lock,
5864 				    (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
5865 			}
5866 			lck_mtx_unlock(&inp->dlth_lock);
5867 			ifnet_lock_exclusive(ifp);
5868 		}
5869 
5870 		/* clean-up input thread state */
5871 		dlil_clean_threading_info(inp);
5872 		/* clean-up poll parameters */
5873 		VERIFY(ifp->if_poll_thread == THREAD_NULL);
5874 		dlil_reset_rxpoll_params(ifp);
5875 	}
5876 
5877 	/* The driver might unload, so point these to ourselves */
5878 	if_free = ifp->if_free;
5879 	ifp->if_output_dlil = ifp_if_output;
5880 	ifp->if_output = ifp_if_output;
5881 	ifp->if_pre_enqueue = ifp_if_output;
5882 	ifp->if_start = ifp_if_start;
5883 	ifp->if_output_ctl = ifp_if_ctl;
5884 	ifp->if_input_dlil = ifp_if_input;
5885 	ifp->if_input_poll = ifp_if_input_poll;
5886 	ifp->if_input_ctl = ifp_if_ctl;
5887 	ifp->if_ioctl = ifp_if_ioctl;
5888 	ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
5889 	ifp->if_free = ifp_if_free;
5890 	ifp->if_demux = ifp_if_demux;
5891 	ifp->if_event = ifp_if_event;
5892 	ifp->if_framer_legacy = ifp_if_framer;
5893 	ifp->if_framer = ifp_if_framer_extended;
5894 	ifp->if_add_proto = ifp_if_add_proto;
5895 	ifp->if_del_proto = ifp_if_del_proto;
5896 	ifp->if_check_multi = ifp_if_check_multi;
5897 
5898 	/* wipe out interface description */
5899 	VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5900 	ifp->if_desc.ifd_len = 0;
5901 	VERIFY(ifp->if_desc.ifd_desc != NULL);
5902 	bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
5903 
5904 	/* there shouldn't be any delegation by now */
5905 	VERIFY(ifp->if_delegated.ifp == NULL);
5906 	VERIFY(ifp->if_delegated.type == 0);
5907 	VERIFY(ifp->if_delegated.family == 0);
5908 	VERIFY(ifp->if_delegated.subfamily == 0);
5909 	VERIFY(ifp->if_delegated.expensive == 0);
5910 	VERIFY(ifp->if_delegated.constrained == 0);
5911 	VERIFY(ifp->if_delegated.ultra_constrained == 0);
5912 
5913 	/* QoS marking get cleared */
5914 	if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5915 	if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
5916 
5917 #if SKYWALK
5918 	/* the nexus destructor is responsible for clearing these */
5919 	VERIFY(ifp->if_na_ops == NULL);
5920 	VERIFY(ifp->if_na == NULL);
5921 #endif /* SKYWALK */
5922 
5923 	/* interface could come up with different hwassist next time */
5924 	ifp->if_hwassist = 0;
5925 	ifp->if_capenable = 0;
5926 
5927 	/* promiscuous/allmulti counts need to start at zero again */
5928 	ifp->if_pcount = 0;
5929 	ifp->if_amcount = 0;
5930 	ifp->if_flags &= ~(IFF_PROMISC | IFF_ALLMULTI);
5931 
5932 	ifnet_lock_done(ifp);
5933 
5934 #if PF
5935 	/*
5936 	 * Detach this interface from packet filter, if enabled.
5937 	 */
5938 	pf_ifnet_hook(ifp, 0);
5939 #endif /* PF */
5940 
5941 	/* Filter list should be empty */
5942 	lck_mtx_lock_spin(&ifp->if_flt_lock);
5943 	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5944 	VERIFY(ifp->if_flt_busy == 0);
5945 	VERIFY(ifp->if_flt_waiters == 0);
5946 	VERIFY(ifp->if_flt_non_os_count == 0);
5947 	VERIFY(ifp->if_flt_no_tso_count == 0);
5948 	lck_mtx_unlock(&ifp->if_flt_lock);
5949 
5950 	/* Last chance to drain send queue */
5951 	if_qflush(ifp, ifp->if_snd);
5952 
5953 	/* Last chance to cleanup any cached route */
5954 	lck_mtx_lock(&ifp->if_cached_route_lock);
5955 	VERIFY(!ifp->if_fwd_cacheok);
5956 	ROUTE_RELEASE(&ifp->if_fwd_route);
5957 	bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5958 	ROUTE_RELEASE(&ifp->if_src_route);
5959 	bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5960 	ROUTE_RELEASE(&ifp->if_src_route6);
5961 	bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5962 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5963 
5964 	/* Ignore any pending data threshold as the interface is anyways gone */
5965 	ifp->if_data_threshold = 0;
5966 
5967 	VERIFY(ifp->if_dt_tcall != NULL);
5968 	VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
5969 
5970 	ifnet_llreach_ifdetach(ifp);
5971 
5972 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0, FALSE);
5973 
5974 	/*
5975 	 * Finally, mark this ifnet as detached.
5976 	 */
5977 	os_log(OS_LOG_DEFAULT, "%s detached", if_name(ifp));
5978 
5979 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5980 	if (!(ifp->if_refflags & IFRF_DETACHING)) {
5981 		panic("%s: flags mismatch (detaching not set) ifp=%p",
5982 		    __func__, ifp);
5983 		/* NOTREACHED */
5984 	}
5985 	ifp->if_refflags &= ~IFRF_DETACHING;
5986 	lck_mtx_unlock(&ifp->if_ref_lock);
5987 	if (if_free != NULL) {
5988 		if_free(ifp);
5989 	}
5990 
5991 	ifclassq_release(&ifp->if_snd);
5992 
5993 	/* we're fully detached, clear the "in use" bit */
5994 	dlifp = (struct dlil_ifnet *)ifp;
5995 	lck_mtx_lock(&dlifp->dl_if_lock);
5996 	ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
5997 	dlifp->dl_if_flags &= ~DLIF_INUSE;
5998 	lck_mtx_unlock(&dlifp->dl_if_lock);
5999 
6000 	/* Release reference held during ifnet attach */
6001 	ifnet_release(ifp);
6002 }
6003 
6004 errno_t
ifp_if_output(struct ifnet * ifp,struct mbuf * m)6005 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6006 {
6007 #pragma unused(ifp)
6008 	m_freem_list(m);
6009 	return 0;
6010 }
6011 
6012 void
ifp_if_start(struct ifnet * ifp)6013 ifp_if_start(struct ifnet *ifp)
6014 {
6015 	ifnet_purge(ifp);
6016 }
6017 
6018 static errno_t
ifp_if_input(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)6019 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6020     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6021     boolean_t poll, struct thread *tp)
6022 {
6023 #pragma unused(ifp, m_tail, s, poll, tp)
6024 	m_freem_list(m_head);
6025 	return ENXIO;
6026 }
6027 
6028 static void
ifp_if_input_poll(struct ifnet * ifp,u_int32_t flags,u_int32_t max_cnt,struct mbuf ** m_head,struct mbuf ** m_tail,u_int32_t * cnt,u_int32_t * len)6029 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6030     struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6031 {
6032 #pragma unused(ifp, flags, max_cnt)
6033 	if (m_head != NULL) {
6034 		*m_head = NULL;
6035 	}
6036 	if (m_tail != NULL) {
6037 		*m_tail = NULL;
6038 	}
6039 	if (cnt != NULL) {
6040 		*cnt = 0;
6041 	}
6042 	if (len != NULL) {
6043 		*len = 0;
6044 	}
6045 }
6046 
6047 static errno_t
ifp_if_ctl(struct ifnet * ifp,ifnet_ctl_cmd_t cmd,u_int32_t arglen,void * arg)6048 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6049 {
6050 #pragma unused(ifp, cmd, arglen, arg)
6051 	return EOPNOTSUPP;
6052 }
6053 
6054 static errno_t
ifp_if_demux(struct ifnet * ifp,struct mbuf * m,char * fh,protocol_family_t * pf)6055 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6056 {
6057 #pragma unused(ifp, fh, pf)
6058 	m_freem(m);
6059 	return EJUSTRETURN;
6060 }
6061 
6062 static errno_t
ifp_if_add_proto(struct ifnet * ifp,protocol_family_t pf,const struct ifnet_demux_desc * da,u_int32_t dc)6063 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6064     const struct ifnet_demux_desc *da, u_int32_t dc)
6065 {
6066 #pragma unused(ifp, pf, da, dc)
6067 	return EINVAL;
6068 }
6069 
6070 static errno_t
ifp_if_del_proto(struct ifnet * ifp,protocol_family_t pf)6071 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6072 {
6073 #pragma unused(ifp, pf)
6074 	return EINVAL;
6075 }
6076 
6077 static errno_t
ifp_if_check_multi(struct ifnet * ifp,const struct sockaddr * sa)6078 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6079 {
6080 #pragma unused(ifp, sa)
6081 	return EOPNOTSUPP;
6082 }
6083 
6084 #if !XNU_TARGET_OS_OSX
6085 static errno_t
ifp_if_framer(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6086 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6087     const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t,
6088     u_int32_t *pre, u_int32_t *post)
6089 #else /* XNU_TARGET_OS_OSX */
6090 static errno_t
6091 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6092     const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t)
6093 #endif /* XNU_TARGET_OS_OSX */
6094 {
6095 #pragma unused(ifp, m, sa, ll, t)
6096 #if !XNU_TARGET_OS_OSX
6097 	return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
6098 #else /* XNU_TARGET_OS_OSX */
6099 	return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
6100 #endif /* XNU_TARGET_OS_OSX */
6101 }
6102 
6103 static errno_t
ifp_if_framer_extended(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6104 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6105     const struct sockaddr *sa,
6106     IFNET_LLADDR_T ll,
6107     IFNET_FRAME_TYPE_T t,
6108     u_int32_t *pre, u_int32_t *post)
6109 {
6110 #pragma unused(ifp, sa, ll, t)
6111 	m_freem(*m);
6112 	*m = NULL;
6113 
6114 	if (pre != NULL) {
6115 		*pre = 0;
6116 	}
6117 	if (post != NULL) {
6118 		*post = 0;
6119 	}
6120 
6121 	return EJUSTRETURN;
6122 }
6123 
6124 errno_t
ifp_if_ioctl(struct ifnet * ifp,unsigned long cmd,void * arg)6125 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6126 {
6127 #pragma unused(ifp, cmd, arg)
6128 	return EOPNOTSUPP;
6129 }
6130 
6131 static errno_t
ifp_if_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode tm,bpf_packet_func f)6132 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6133 {
6134 #pragma unused(ifp, tm, f)
6135 	/* XXX not sure what to do here */
6136 	return 0;
6137 }
6138 
6139 static void
ifp_if_free(struct ifnet * ifp)6140 ifp_if_free(struct ifnet *ifp)
6141 {
6142 #pragma unused(ifp)
6143 }
6144 
6145 static void
ifp_if_event(struct ifnet * ifp,const struct kev_msg * e)6146 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6147 {
6148 #pragma unused(ifp, e)
6149 }
6150 
6151 __private_extern__ void
dlil_proto_unplumb_all(struct ifnet * ifp)6152 dlil_proto_unplumb_all(struct ifnet *ifp)
6153 {
6154 	/*
6155 	 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6156 	 * each bucket contains exactly one entry; PF_VLAN does not need an
6157 	 * explicit unplumb.
6158 	 *
6159 	 * if_proto_hash[3] is for other protocols; we expect anything
6160 	 * in this bucket to respond to the DETACHING event (which would
6161 	 * have happened by now) and do the unplumb then.
6162 	 */
6163 	(void) proto_unplumb(PF_INET, ifp);
6164 	(void) proto_unplumb(PF_INET6, ifp);
6165 }
6166 
6167 static void
ifp_src_route_copyout(struct ifnet * ifp,struct route * dst)6168 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6169 {
6170 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6171 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6172 
6173 	route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6174 
6175 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6176 }
6177 
6178 static void
ifp_src_route_copyin(struct ifnet * ifp,struct route * src)6179 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6180 {
6181 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6182 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6183 
6184 	if (ifp->if_fwd_cacheok) {
6185 		route_copyin(src, &ifp->if_src_route, sizeof(*src));
6186 	} else {
6187 		ROUTE_RELEASE(src);
6188 	}
6189 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6190 }
6191 
6192 static void
ifp_src_route6_copyout(struct ifnet * ifp,struct route_in6 * dst)6193 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6194 {
6195 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6196 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6197 
6198 	route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6199 	    sizeof(*dst));
6200 
6201 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6202 }
6203 
6204 static void
ifp_src_route6_copyin(struct ifnet * ifp,struct route_in6 * src)6205 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6206 {
6207 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6208 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6209 
6210 	if (ifp->if_fwd_cacheok) {
6211 		route_copyin((struct route *)src,
6212 		    (struct route *)&ifp->if_src_route6, sizeof(*src));
6213 	} else {
6214 		ROUTE_RELEASE(src);
6215 	}
6216 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6217 }
6218 
6219 struct rtentry *
ifnet_cached_rtlookup_inet(struct ifnet * ifp,struct in_addr src_ip)6220 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6221 {
6222 	struct route            src_rt;
6223 	struct sockaddr_in      *dst;
6224 
6225 	dst = SIN(&src_rt.ro_dst);
6226 
6227 	ifp_src_route_copyout(ifp, &src_rt);
6228 
6229 	if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6230 		ROUTE_RELEASE(&src_rt);
6231 		if (dst->sin_family != AF_INET) {
6232 			SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6233 			dst->sin_len = sizeof(src_rt.ro_dst);
6234 			dst->sin_family = AF_INET;
6235 		}
6236 		dst->sin_addr = src_ip;
6237 
6238 		VERIFY(src_rt.ro_rt == NULL);
6239 		src_rt.ro_rt = rtalloc1_scoped(SA(dst),
6240 		    0, 0, ifp->if_index);
6241 
6242 		if (src_rt.ro_rt != NULL) {
6243 			/* retain a ref, copyin consumes one */
6244 			struct rtentry  *rte = src_rt.ro_rt;
6245 			RT_ADDREF(rte);
6246 			ifp_src_route_copyin(ifp, &src_rt);
6247 			src_rt.ro_rt = rte;
6248 		}
6249 	}
6250 
6251 	return src_rt.ro_rt;
6252 }
6253 
6254 struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet * ifp,struct in6_addr * src_ip6)6255 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6256 {
6257 	struct route_in6 src_rt;
6258 
6259 	ifp_src_route6_copyout(ifp, &src_rt);
6260 
6261 	if (ROUTE_UNUSABLE(&src_rt) ||
6262 	    !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6263 		ROUTE_RELEASE(&src_rt);
6264 		if (src_rt.ro_dst.sin6_family != AF_INET6) {
6265 			SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6266 			src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6267 			src_rt.ro_dst.sin6_family = AF_INET6;
6268 		}
6269 		src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6270 		bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6271 		    sizeof(src_rt.ro_dst.sin6_addr));
6272 
6273 		if (src_rt.ro_rt == NULL) {
6274 			src_rt.ro_rt = rtalloc1_scoped(
6275 				SA(&src_rt.ro_dst), 0, 0,
6276 				ifp->if_index);
6277 
6278 			if (src_rt.ro_rt != NULL) {
6279 				/* retain a ref, copyin consumes one */
6280 				struct rtentry  *rte = src_rt.ro_rt;
6281 				RT_ADDREF(rte);
6282 				ifp_src_route6_copyin(ifp, &src_rt);
6283 				src_rt.ro_rt = rte;
6284 			}
6285 		}
6286 	}
6287 
6288 	return src_rt.ro_rt;
6289 }
6290 
6291 void
if_lqm_update(struct ifnet * ifp,int lqm,int locked)6292 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6293 {
6294 	struct kev_dl_link_quality_metric_data ev_lqm_data;
6295 	uint64_t now, delta;
6296 	int8_t old_lqm;
6297 	bool need_necp_client_update;
6298 
6299 	VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6300 
6301 	lqm = ifnet_lqm_normalize(lqm);
6302 	if (lqm == IFNET_LQM_THRESH_ABORT) {
6303 		os_atomic_or(&tcbinfo.ipi_flags, INPCBINFO_HANDLE_LQM_ABORT, relaxed);
6304 		inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
6305 	}
6306 
6307 	/*
6308 	 * Take the lock if needed
6309 	 */
6310 	if (!locked) {
6311 		ifnet_lock_exclusive(ifp);
6312 	}
6313 
6314 	if (lqm == ifp->if_interface_state.lqm_state &&
6315 	    (ifp->if_interface_state.valid_bitmask &
6316 	    IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6317 		/*
6318 		 * Release the lock if was not held by the caller
6319 		 */
6320 		if (!locked) {
6321 			ifnet_lock_done(ifp);
6322 		}
6323 		return;         /* nothing to update */
6324 	}
6325 
6326 	net_update_uptime();
6327 	now = net_uptime_ms();
6328 	ASSERT(now >= ifp->if_lqmstate_start_time);
6329 	delta = now - ifp->if_lqmstate_start_time;
6330 
6331 	old_lqm = ifp->if_interface_state.lqm_state;
6332 	switch (old_lqm) {
6333 	case IFNET_LQM_THRESH_GOOD:
6334 		ifp->if_lqm_good_time += delta;
6335 		break;
6336 	case IFNET_LQM_THRESH_POOR:
6337 		ifp->if_lqm_poor_time += delta;
6338 		break;
6339 	case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6340 		ifp->if_lqm_min_viable_time += delta;
6341 		break;
6342 	case IFNET_LQM_THRESH_BAD:
6343 		ifp->if_lqm_bad_time += delta;
6344 		break;
6345 	default:
6346 		break;
6347 	}
6348 	switch (lqm) {
6349 	case IFNET_LQM_THRESH_GOOD:
6350 		ifp->if_lqm_good_cnt += 1;
6351 		break;
6352 	case IFNET_LQM_THRESH_POOR:
6353 		ifp->if_lqm_poor_cnt += 1;
6354 		break;
6355 	case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6356 		ifp->if_lqm_min_viable_cnt += 1;
6357 		break;
6358 	case IFNET_LQM_THRESH_BAD:
6359 		ifp->if_lqm_bad_cnt += 1;
6360 		break;
6361 	default:
6362 		break;
6363 	}
6364 	ifp->if_lqmstate_start_time = now;
6365 
6366 	ifp->if_interface_state.valid_bitmask |=
6367 	    IF_INTERFACE_STATE_LQM_STATE_VALID;
6368 	ifp->if_interface_state.lqm_state = (int8_t)lqm;
6369 
6370 	/*
6371 	 * Update the link heuristics
6372 	 */
6373 	need_necp_client_update = if_update_link_heuristic(ifp);
6374 
6375 	/*
6376 	 * Don't want to hold the lock when issuing kernel events or calling NECP
6377 	 */
6378 	ifnet_lock_done(ifp);
6379 
6380 	if (need_necp_client_update) {
6381 		necp_update_all_clients_immediately_if_needed(true);
6382 	}
6383 
6384 	bzero(&ev_lqm_data, sizeof(ev_lqm_data));
6385 	ev_lqm_data.link_quality_metric = lqm;
6386 
6387 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6388 	    (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data), FALSE);
6389 
6390 	/*
6391 	 * Reacquire the lock for the caller
6392 	 */
6393 	if (locked) {
6394 		ifnet_lock_exclusive(ifp);
6395 	}
6396 }
6397 
6398 static void
if_rrc_state_update(struct ifnet * ifp,unsigned int rrc_state)6399 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6400 {
6401 	struct kev_dl_rrc_state kev;
6402 
6403 	if (rrc_state == ifp->if_interface_state.rrc_state &&
6404 	    (ifp->if_interface_state.valid_bitmask &
6405 	    IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6406 		return;
6407 	}
6408 
6409 	ifp->if_interface_state.valid_bitmask |=
6410 	    IF_INTERFACE_STATE_RRC_STATE_VALID;
6411 
6412 	ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
6413 
6414 	/*
6415 	 * Don't want to hold the lock when issuing kernel events
6416 	 */
6417 	ifnet_lock_done(ifp);
6418 
6419 	bzero(&kev, sizeof(struct kev_dl_rrc_state));
6420 	kev.rrc_state = rrc_state;
6421 
6422 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6423 	    (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state), FALSE);
6424 
6425 	ifnet_lock_exclusive(ifp);
6426 }
6427 
6428 errno_t
if_state_update(struct ifnet * ifp,struct if_interface_state * if_interface_state)6429 if_state_update(struct ifnet *ifp,
6430     struct if_interface_state *if_interface_state)
6431 {
6432 	u_short if_index_available = 0;
6433 
6434 	ifnet_lock_exclusive(ifp);
6435 
6436 	if ((ifp->if_type != IFT_CELLULAR) &&
6437 	    (if_interface_state->valid_bitmask &
6438 	    IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6439 		ifnet_lock_done(ifp);
6440 		return ENOTSUP;
6441 	}
6442 	if ((if_interface_state->valid_bitmask &
6443 	    IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6444 	    (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6445 	    if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6446 		ifnet_lock_done(ifp);
6447 		return EINVAL;
6448 	}
6449 	if ((if_interface_state->valid_bitmask &
6450 	    IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6451 	    if_interface_state->rrc_state !=
6452 	    IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6453 	    if_interface_state->rrc_state !=
6454 	    IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6455 		ifnet_lock_done(ifp);
6456 		return EINVAL;
6457 	}
6458 
6459 	if (if_interface_state->valid_bitmask &
6460 	    IF_INTERFACE_STATE_LQM_STATE_VALID) {
6461 		if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6462 	}
6463 	if (if_interface_state->valid_bitmask &
6464 	    IF_INTERFACE_STATE_RRC_STATE_VALID) {
6465 		if_rrc_state_update(ifp, if_interface_state->rrc_state);
6466 	}
6467 	if (if_interface_state->valid_bitmask &
6468 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6469 		ifp->if_interface_state.valid_bitmask |=
6470 		    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6471 		ifp->if_interface_state.interface_availability =
6472 		    if_interface_state->interface_availability;
6473 
6474 		if (ifp->if_interface_state.interface_availability ==
6475 		    IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6476 			os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
6477 			    __func__, if_name(ifp), ifp->if_index);
6478 			if_index_available = ifp->if_index;
6479 		} else {
6480 			os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
6481 			    __func__, if_name(ifp), ifp->if_index);
6482 		}
6483 	}
6484 	ifnet_lock_done(ifp);
6485 
6486 	/*
6487 	 * Check if the TCP connections going on this interface should be
6488 	 * forced to send probe packets instead of waiting for TCP timers
6489 	 * to fire. This is done on an explicit notification such as
6490 	 * SIOCSIFINTERFACESTATE which marks the interface as available.
6491 	 */
6492 	if (if_index_available > 0) {
6493 		tcp_interface_send_probe(if_index_available);
6494 	}
6495 
6496 	return 0;
6497 }
6498 
6499 void
if_get_state(struct ifnet * ifp,struct if_interface_state * if_interface_state)6500 if_get_state(struct ifnet *ifp,
6501     struct if_interface_state *if_interface_state)
6502 {
6503 	ifnet_lock_shared(ifp);
6504 
6505 	if_interface_state->valid_bitmask = 0;
6506 
6507 	if (ifp->if_interface_state.valid_bitmask &
6508 	    IF_INTERFACE_STATE_RRC_STATE_VALID) {
6509 		if_interface_state->valid_bitmask |=
6510 		    IF_INTERFACE_STATE_RRC_STATE_VALID;
6511 		if_interface_state->rrc_state =
6512 		    ifp->if_interface_state.rrc_state;
6513 	}
6514 	if (ifp->if_interface_state.valid_bitmask &
6515 	    IF_INTERFACE_STATE_LQM_STATE_VALID) {
6516 		if_interface_state->valid_bitmask |=
6517 		    IF_INTERFACE_STATE_LQM_STATE_VALID;
6518 		if_interface_state->lqm_state =
6519 		    ifp->if_interface_state.lqm_state;
6520 	}
6521 	if (ifp->if_interface_state.valid_bitmask &
6522 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6523 		if_interface_state->valid_bitmask |=
6524 		    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6525 		if_interface_state->interface_availability =
6526 		    ifp->if_interface_state.interface_availability;
6527 	}
6528 
6529 	ifnet_lock_done(ifp);
6530 }
6531 
6532 errno_t
if_probe_connectivity(struct ifnet * ifp,u_int32_t conn_probe)6533 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6534 {
6535 	if (conn_probe > 1) {
6536 		return EINVAL;
6537 	}
6538 	if (conn_probe == 0) {
6539 		if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6540 	} else {
6541 		if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6542 	}
6543 
6544 	os_log(OS_LOG_DEFAULT, "interface probing on %s set to %u by %s:%d",
6545 	    if_name(ifp), conn_probe, proc_best_name(current_proc()), proc_selfpid());
6546 
6547 #if NECP
6548 	necp_update_all_clients();
6549 #endif /* NECP */
6550 
6551 	tcp_probe_connectivity(ifp, conn_probe);
6552 	return 0;
6553 }
6554 
6555 /* for uuid.c */
6556 static int
get_ether_index(int * ret_other_index)6557 get_ether_index(int * ret_other_index)
6558 {
6559 	ifnet_ref_t ifp;
6560 	int en0_index = 0;
6561 	int other_en_index = 0;
6562 	int any_ether_index = 0;
6563 	short best_unit = 0;
6564 
6565 	*ret_other_index = 0;
6566 	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6567 		/*
6568 		 * find en0, or if not en0, the lowest unit en*, and if not
6569 		 * that, any ethernet
6570 		 */
6571 		ifnet_lock_shared(ifp);
6572 		if (strcmp(ifp->if_name, "en") == 0) {
6573 			if (ifp->if_unit == 0) {
6574 				/* found en0, we're done */
6575 				en0_index = ifp->if_index;
6576 				ifnet_lock_done(ifp);
6577 				break;
6578 			}
6579 			if (other_en_index == 0 || ifp->if_unit < best_unit) {
6580 				other_en_index = ifp->if_index;
6581 				best_unit = ifp->if_unit;
6582 			}
6583 		} else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
6584 			any_ether_index = ifp->if_index;
6585 		}
6586 		ifnet_lock_done(ifp);
6587 	}
6588 	if (en0_index == 0) {
6589 		if (other_en_index != 0) {
6590 			*ret_other_index = other_en_index;
6591 		} else if (any_ether_index != 0) {
6592 			*ret_other_index = any_ether_index;
6593 		}
6594 	}
6595 	return en0_index;
6596 }
6597 
6598 int
uuid_get_ethernet(u_int8_t * __counted_by (ETHER_ADDR_LEN)node)6599 uuid_get_ethernet(u_int8_t *__counted_by(ETHER_ADDR_LEN) node)
6600 {
6601 	static int en0_index;
6602 	ifnet_ref_t ifp;
6603 	int other_index = 0;
6604 	int the_index = 0;
6605 	int ret;
6606 
6607 	ifnet_head_lock_shared();
6608 	if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
6609 		en0_index = get_ether_index(&other_index);
6610 	}
6611 	if (en0_index != 0) {
6612 		the_index = en0_index;
6613 	} else if (other_index != 0) {
6614 		the_index = other_index;
6615 	}
6616 	if (the_index != 0) {
6617 		struct dlil_ifnet *dl_if;
6618 
6619 		ifp = ifindex2ifnet[the_index];
6620 		VERIFY(ifp != NULL);
6621 		dl_if = (struct dlil_ifnet *)ifp;
6622 		if (dl_if->dl_if_permanent_ether_is_set != 0) {
6623 			/*
6624 			 * Use the permanent ethernet address if it is
6625 			 * available because it will never change.
6626 			 */
6627 			memcpy(node, dl_if->dl_if_permanent_ether,
6628 			    ETHER_ADDR_LEN);
6629 		} else {
6630 			memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
6631 		}
6632 		ret = 0;
6633 	} else {
6634 		ret = -1;
6635 	}
6636 	ifnet_head_done();
6637 	return ret;
6638 }
6639 
6640 int
dlil_node_present(struct ifnet * ifp,struct sockaddr * sa,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6641 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6642     int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6643 {
6644 	struct kev_dl_node_presence kev;
6645 	struct sockaddr_dl *sdl;
6646 	struct sockaddr_in6 *sin6;
6647 	int ret = 0;
6648 
6649 	VERIFY(ifp);
6650 	VERIFY(sa);
6651 	VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6652 
6653 	bzero(&kev, sizeof(kev));
6654 	sin6 = &kev.sin6_node_address;
6655 	sdl = &kev.sdl_node_address;
6656 	nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6657 	kev.rssi = rssi;
6658 	kev.link_quality_metric = lqm;
6659 	kev.node_proximity_metric = npm;
6660 	bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6661 
6662 	ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6663 	if (ret == 0 || ret == EEXIST) {
6664 		int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6665 		    &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6666 		if (err != 0) {
6667 			log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
6668 			    "error %d\n", __func__, err);
6669 		}
6670 	}
6671 
6672 	if (ret == EEXIST) {
6673 		ret = 0;
6674 	}
6675 	return ret;
6676 }
6677 
6678 void
dlil_node_absent(struct ifnet * ifp,struct sockaddr * sa)6679 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
6680 {
6681 	struct kev_dl_node_absence kev = {};
6682 	struct sockaddr_in6 *kev_sin6 = NULL;
6683 	struct sockaddr_dl *kev_sdl = NULL;
6684 	int error = 0;
6685 
6686 	VERIFY(ifp != NULL);
6687 	VERIFY(sa != NULL);
6688 	VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6689 
6690 	kev_sin6 = &kev.sin6_node_address;
6691 	kev_sdl = &kev.sdl_node_address;
6692 
6693 	if (sa->sa_family == AF_INET6) {
6694 		/*
6695 		 * If IPv6 address is given, get the link layer
6696 		 * address from what was cached in the neighbor cache
6697 		 */
6698 		VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6699 		SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6700 		error = nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
6701 	} else {
6702 		/*
6703 		 * If passed address is AF_LINK type, derive the address
6704 		 * based on the link address.
6705 		 */
6706 		nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
6707 		error = nd6_alt_node_absent(ifp, kev_sin6, NULL);
6708 	}
6709 
6710 	if (error == 0) {
6711 		kev_sdl->sdl_type = ifp->if_type;
6712 		kev_sdl->sdl_index = ifp->if_index;
6713 
6714 		dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
6715 		    &kev.link_data, sizeof(kev), FALSE);
6716 	}
6717 }
6718 
6719 int
dlil_node_present_v2(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr_dl * sdl,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6720 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
6721     int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6722 {
6723 	struct kev_dl_node_presence kev = {};
6724 	struct sockaddr_dl *kev_sdl = NULL;
6725 	struct sockaddr_in6 *kev_sin6 = NULL;
6726 	int ret = 0;
6727 
6728 	VERIFY(ifp != NULL);
6729 	VERIFY(sa != NULL && sdl != NULL);
6730 	VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
6731 
6732 	kev_sin6 = &kev.sin6_node_address;
6733 	kev_sdl = &kev.sdl_node_address;
6734 
6735 	VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
6736 	SOCKADDR_COPY(sdl, kev_sdl, sdl->sdl_len);
6737 	kev_sdl->sdl_type = ifp->if_type;
6738 	kev_sdl->sdl_index = ifp->if_index;
6739 
6740 	VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6741 	SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6742 
6743 	kev.rssi = rssi;
6744 	kev.link_quality_metric = lqm;
6745 	kev.node_proximity_metric = npm;
6746 	bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6747 
6748 	ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
6749 	if (ret == 0 || ret == EEXIST) {
6750 		int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6751 		    &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6752 		if (err != 0) {
6753 			log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
6754 		}
6755 	}
6756 
6757 	if (ret == EEXIST) {
6758 		ret = 0;
6759 	}
6760 	return ret;
6761 }
6762 
6763 const void *
dlil_ifaddr_bytes(const struct sockaddr_dl * sdl,size_t * sizep,kauth_cred_t * credp)6764 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
6765     kauth_cred_t *credp)
6766 {
6767 	const u_int8_t *bytes;
6768 	size_t size;
6769 
6770 	bytes = CONST_LLADDR(sdl);
6771 	size = sdl->sdl_alen;
6772 
6773 #if CONFIG_MACF
6774 	if (dlil_lladdr_ckreq) {
6775 		switch (sdl->sdl_type) {
6776 		case IFT_ETHER:
6777 		case IFT_IEEE1394:
6778 			break;
6779 		default:
6780 			credp = NULL;
6781 			break;
6782 		}
6783 		;
6784 
6785 		if (credp && mac_system_check_info(*credp, "net.link.addr")) {
6786 			static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
6787 				[0] = 2
6788 			};
6789 
6790 			bytes = unspec;
6791 		}
6792 	}
6793 #else
6794 #pragma unused(credp)
6795 #endif
6796 
6797 	if (sizep != NULL) {
6798 		*sizep = size;
6799 	}
6800 	return bytes;
6801 }
6802 
6803 void
dlil_report_issues(struct ifnet * ifp,u_int8_t modid[DLIL_MODIDLEN],u_int8_t info[DLIL_MODARGLEN])6804 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
6805     u_int8_t info[DLIL_MODARGLEN])
6806 {
6807 	struct kev_dl_issues kev;
6808 	struct timeval tv;
6809 
6810 	VERIFY(ifp != NULL);
6811 	VERIFY(modid != NULL);
6812 	static_assert(sizeof(kev.modid) == DLIL_MODIDLEN);
6813 	static_assert(sizeof(kev.info) == DLIL_MODARGLEN);
6814 
6815 	bzero(&kev, sizeof(kev));
6816 
6817 	microtime(&tv);
6818 	kev.timestamp = tv.tv_sec;
6819 	bcopy(modid, &kev.modid, DLIL_MODIDLEN);
6820 	if (info != NULL) {
6821 		bcopy(info, &kev.info, DLIL_MODARGLEN);
6822 	}
6823 
6824 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
6825 	    &kev.link_data, sizeof(kev), FALSE);
6826 }
6827 
6828 errno_t
ifnet_getset_opportunistic(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6829 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6830     struct proc *p)
6831 {
6832 	u_int32_t level = IFNET_THROTTLE_OFF;
6833 	errno_t result = 0;
6834 
6835 	VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
6836 
6837 	if (cmd == SIOCSIFOPPORTUNISTIC) {
6838 		/*
6839 		 * XXX: Use priv_check_cred() instead of root check?
6840 		 */
6841 		if ((result = proc_suser(p)) != 0) {
6842 			return result;
6843 		}
6844 
6845 		if (ifr->ifr_opportunistic.ifo_flags ==
6846 		    IFRIFOF_BLOCK_OPPORTUNISTIC) {
6847 			level = IFNET_THROTTLE_OPPORTUNISTIC;
6848 		} else if (ifr->ifr_opportunistic.ifo_flags == 0) {
6849 			level = IFNET_THROTTLE_OFF;
6850 		} else {
6851 			result = EINVAL;
6852 		}
6853 
6854 		if (result == 0) {
6855 			result = ifnet_set_throttle(ifp, level);
6856 		}
6857 	} else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
6858 		ifr->ifr_opportunistic.ifo_flags = 0;
6859 		if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
6860 			ifr->ifr_opportunistic.ifo_flags |=
6861 			    IFRIFOF_BLOCK_OPPORTUNISTIC;
6862 		}
6863 	}
6864 
6865 	/*
6866 	 * Return the count of current opportunistic connections
6867 	 * over the interface.
6868 	 */
6869 	if (result == 0) {
6870 		uint32_t flags = 0;
6871 		flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
6872 		    INPCB_OPPORTUNISTIC_SETCMD : 0;
6873 		flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
6874 		    INPCB_OPPORTUNISTIC_THROTTLEON : 0;
6875 		ifr->ifr_opportunistic.ifo_inuse =
6876 		    udp_count_opportunistic(ifp->if_index, flags) +
6877 		    tcp_count_opportunistic(ifp->if_index, flags);
6878 	}
6879 
6880 	if (result == EALREADY) {
6881 		result = 0;
6882 	}
6883 
6884 	return result;
6885 }
6886 
6887 int
ifnet_get_throttle(struct ifnet * ifp,u_int32_t * level)6888 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
6889 {
6890 	struct ifclassq *ifq;
6891 	cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
6892 	int err = 0;
6893 
6894 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
6895 		return ENXIO;
6896 	}
6897 
6898 	*level = IFNET_THROTTLE_OFF;
6899 
6900 	ifq = ifp->if_snd;
6901 	err = ifclassq_request(ifq, CLASSQRQ_THROTTLE, &req, false);
6902 	*level = req.level;
6903 
6904 	return err;
6905 }
6906 
6907 int
ifnet_set_throttle(struct ifnet * ifp,u_int32_t level)6908 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
6909 {
6910 	struct ifclassq *ifq;
6911 	cqrq_throttle_t req = { 1, level };
6912 	int err = 0;
6913 
6914 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
6915 		return ENXIO;
6916 	}
6917 
6918 	ifq = ifp->if_snd;
6919 
6920 	switch (level) {
6921 	case IFNET_THROTTLE_OFF:
6922 	case IFNET_THROTTLE_OPPORTUNISTIC:
6923 		break;
6924 	default:
6925 		return EINVAL;
6926 	}
6927 
6928 	err = ifclassq_request(ifq, CLASSQRQ_THROTTLE, &req, false);
6929 
6930 	if (err == 0) {
6931 		DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
6932 		    level);
6933 #if NECP
6934 		necp_update_all_clients();
6935 #endif /* NECP */
6936 		if (level == IFNET_THROTTLE_OFF) {
6937 			ifnet_start(ifp);
6938 		}
6939 	}
6940 
6941 	return err;
6942 }
6943 
6944 errno_t
ifnet_getset_log(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6945 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6946     struct proc *p)
6947 {
6948 #pragma unused(p)
6949 	errno_t result = 0;
6950 	uint32_t flags;
6951 	int level, category, subcategory;
6952 
6953 	VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
6954 
6955 	if (cmd == SIOCSIFLOG) {
6956 		if ((result = priv_check_cred(kauth_cred_get(),
6957 		    PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
6958 			return result;
6959 		}
6960 
6961 		level = ifr->ifr_log.ifl_level;
6962 		if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
6963 			result = EINVAL;
6964 		}
6965 
6966 		flags = ifr->ifr_log.ifl_flags;
6967 		if ((flags &= IFNET_LOGF_MASK) == 0) {
6968 			result = EINVAL;
6969 		}
6970 
6971 		category = ifr->ifr_log.ifl_category;
6972 		subcategory = ifr->ifr_log.ifl_subcategory;
6973 
6974 		if (result == 0) {
6975 			result = ifnet_set_log(ifp, level, flags,
6976 			    category, subcategory);
6977 		}
6978 	} else {
6979 		result = ifnet_get_log(ifp, &level, &flags, &category,
6980 		    &subcategory);
6981 		if (result == 0) {
6982 			ifr->ifr_log.ifl_level = level;
6983 			ifr->ifr_log.ifl_flags = flags;
6984 			ifr->ifr_log.ifl_category = category;
6985 			ifr->ifr_log.ifl_subcategory = subcategory;
6986 		}
6987 	}
6988 
6989 	return result;
6990 }
6991 
6992 int
ifnet_set_log(struct ifnet * ifp,int32_t level,uint32_t flags,int32_t category,int32_t subcategory)6993 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
6994     int32_t category, int32_t subcategory)
6995 {
6996 	int err = 0;
6997 
6998 	VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
6999 	VERIFY(flags & IFNET_LOGF_MASK);
7000 
7001 	/*
7002 	 * The logging level applies to all facilities; make sure to
7003 	 * update them all with the most current level.
7004 	 */
7005 	flags |= ifp->if_log.flags;
7006 
7007 	if (ifp->if_output_ctl != NULL) {
7008 		struct ifnet_log_params l;
7009 
7010 		bzero(&l, sizeof(l));
7011 		l.level = level;
7012 		l.flags = flags;
7013 		l.flags &= ~IFNET_LOGF_DLIL;
7014 		l.category = category;
7015 		l.subcategory = subcategory;
7016 
7017 		/* Send this request to lower layers */
7018 		if (l.flags != 0) {
7019 			err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7020 			    sizeof(l), &l);
7021 		}
7022 	} else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7023 		/*
7024 		 * If targeted to the lower layers without an output
7025 		 * control callback registered on the interface, just
7026 		 * silently ignore facilities other than ours.
7027 		 */
7028 		flags &= IFNET_LOGF_DLIL;
7029 		if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
7030 			level = 0;
7031 		}
7032 	}
7033 
7034 	if (err == 0) {
7035 		if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
7036 			ifp->if_log.flags = 0;
7037 		} else {
7038 			ifp->if_log.flags |= flags;
7039 		}
7040 
7041 		log(LOG_INFO, "%s: logging level set to %d flags=0x%x "
7042 		    "arg=0x%x, category=%d subcategory=%d\n", if_name(ifp),
7043 		    ifp->if_log.level, ifp->if_log.flags, flags,
7044 		    category, subcategory);
7045 	}
7046 
7047 	return err;
7048 }
7049 
7050 int
ifnet_get_log(struct ifnet * ifp,int32_t * level,uint32_t * flags,int32_t * category,int32_t * subcategory)7051 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7052     int32_t *category, int32_t *subcategory)
7053 {
7054 	if (level != NULL) {
7055 		*level = ifp->if_log.level;
7056 	}
7057 	if (flags != NULL) {
7058 		*flags = ifp->if_log.flags;
7059 	}
7060 	if (category != NULL) {
7061 		*category = ifp->if_log.category;
7062 	}
7063 	if (subcategory != NULL) {
7064 		*subcategory = ifp->if_log.subcategory;
7065 	}
7066 
7067 	return 0;
7068 }
7069 
7070 int
ifnet_notify_address(struct ifnet * ifp,int af)7071 ifnet_notify_address(struct ifnet *ifp, int af)
7072 {
7073 	struct ifnet_notify_address_params na;
7074 
7075 #if PF
7076 	(void) pf_ifaddr_hook(ifp);
7077 #endif /* PF */
7078 
7079 	if (ifp->if_output_ctl == NULL) {
7080 		return EOPNOTSUPP;
7081 	}
7082 
7083 	bzero(&na, sizeof(na));
7084 	na.address_family = (sa_family_t)af;
7085 
7086 	return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7087 	           sizeof(na), &na);
7088 }
7089 
7090 errno_t
ifnet_flowid(struct ifnet * ifp,uint32_t * flowid)7091 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7092 {
7093 	if (ifp == NULL || flowid == NULL) {
7094 		return EINVAL;
7095 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7096 	    !ifnet_is_fully_attached(ifp)) {
7097 		return ENXIO;
7098 	}
7099 
7100 	*flowid = ifp->if_flowhash;
7101 
7102 	return 0;
7103 }
7104 
7105 errno_t
ifnet_disable_output(struct ifnet * ifp)7106 ifnet_disable_output(struct ifnet *ifp)
7107 {
7108 	int err = 0;
7109 
7110 	if (ifp == NULL) {
7111 		return EINVAL;
7112 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7113 	    !ifnet_is_fully_attached(ifp)) {
7114 		return ENXIO;
7115 	}
7116 
7117 	lck_mtx_lock(&ifp->if_start_lock);
7118 	if (ifp->if_start_flags & IFSF_FLOW_RESUME_PENDING) {
7119 		ifp->if_start_flags &= ~(IFSF_FLOW_RESUME_PENDING | IFSF_FLOW_CONTROLLED);
7120 	} else if ((err = ifnet_fc_add(ifp)) == 0) {
7121 		ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7122 	}
7123 	lck_mtx_unlock(&ifp->if_start_lock);
7124 
7125 	return err;
7126 }
7127 
7128 errno_t
ifnet_enable_output(struct ifnet * ifp)7129 ifnet_enable_output(struct ifnet *ifp)
7130 {
7131 	if (ifp == NULL) {
7132 		return EINVAL;
7133 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7134 	    !ifnet_is_fully_attached(ifp)) {
7135 		return ENXIO;
7136 	}
7137 
7138 	ifnet_start_common(ifp, TRUE, FALSE);
7139 	return 0;
7140 }
7141 
7142 void
ifnet_flowadv(uint32_t flowhash)7143 ifnet_flowadv(uint32_t flowhash)
7144 {
7145 	struct ifnet_fc_entry *ifce;
7146 	ifnet_ref_t ifp;
7147 
7148 	ifce = ifnet_fc_get(flowhash);
7149 	if (ifce == NULL) {
7150 		return;
7151 	}
7152 
7153 	VERIFY(ifce->ifce_ifp != NULL);
7154 	ifp = ifce->ifce_ifp;
7155 
7156 	/* flow hash gets recalculated per attach, so check */
7157 	if (ifnet_get_ioref(ifp)) {
7158 		if (ifp->if_flowhash == flowhash) {
7159 			lck_mtx_lock_spin(&ifp->if_start_lock);
7160 			if ((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) == 0) {
7161 				ifp->if_start_flags |= IFSF_FLOW_RESUME_PENDING;
7162 			}
7163 			lck_mtx_unlock(&ifp->if_start_lock);
7164 			(void) ifnet_enable_output(ifp);
7165 		}
7166 		ifnet_decr_iorefcnt(ifp);
7167 	}
7168 	ifnet_fc_entry_free(ifce);
7169 }
7170 
7171 /*
7172  * Function to compare ifnet_fc_entries in ifnet flow control tree
7173  */
7174 static inline int
ifce_cmp(const struct ifnet_fc_entry * fc1,const struct ifnet_fc_entry * fc2)7175 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7176 {
7177 	return fc1->ifce_flowhash - fc2->ifce_flowhash;
7178 }
7179 
7180 static int
ifnet_fc_add(struct ifnet * ifp)7181 ifnet_fc_add(struct ifnet *ifp)
7182 {
7183 	struct ifnet_fc_entry keyfc, *ifce;
7184 	uint32_t flowhash;
7185 
7186 	VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7187 	VERIFY(ifp->if_flowhash != 0);
7188 	flowhash = ifp->if_flowhash;
7189 
7190 	bzero(&keyfc, sizeof(keyfc));
7191 	keyfc.ifce_flowhash = flowhash;
7192 
7193 	lck_mtx_lock_spin(&ifnet_fc_lock);
7194 	ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7195 	if (ifce != NULL && ifce->ifce_ifp == ifp) {
7196 		/* Entry is already in ifnet_fc_tree, return */
7197 		lck_mtx_unlock(&ifnet_fc_lock);
7198 		return 0;
7199 	}
7200 
7201 	if (ifce != NULL) {
7202 		/*
7203 		 * There is a different fc entry with the same flow hash
7204 		 * but different ifp pointer.  There can be a collision
7205 		 * on flow hash but the probability is low.  Let's just
7206 		 * avoid adding a second one when there is a collision.
7207 		 */
7208 		lck_mtx_unlock(&ifnet_fc_lock);
7209 		return EAGAIN;
7210 	}
7211 
7212 	/* become regular mutex */
7213 	lck_mtx_convert_spin(&ifnet_fc_lock);
7214 
7215 	ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
7216 	ifce->ifce_flowhash = flowhash;
7217 	ifce->ifce_ifp = ifp;
7218 
7219 	RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7220 	lck_mtx_unlock(&ifnet_fc_lock);
7221 	return 0;
7222 }
7223 
7224 static struct ifnet_fc_entry *
ifnet_fc_get(uint32_t flowhash)7225 ifnet_fc_get(uint32_t flowhash)
7226 {
7227 	struct ifnet_fc_entry keyfc, *ifce;
7228 	ifnet_ref_t ifp;
7229 
7230 	bzero(&keyfc, sizeof(keyfc));
7231 	keyfc.ifce_flowhash = flowhash;
7232 
7233 	lck_mtx_lock_spin(&ifnet_fc_lock);
7234 	ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7235 	if (ifce == NULL) {
7236 		/* Entry is not present in ifnet_fc_tree, return */
7237 		lck_mtx_unlock(&ifnet_fc_lock);
7238 		return NULL;
7239 	}
7240 
7241 	RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7242 
7243 	VERIFY(ifce->ifce_ifp != NULL);
7244 	ifp = ifce->ifce_ifp;
7245 
7246 	/* become regular mutex */
7247 	lck_mtx_convert_spin(&ifnet_fc_lock);
7248 
7249 	if (!ifnet_is_fully_attached(ifp)) {
7250 		/*
7251 		 * This ifp is not attached or in the process of being
7252 		 * detached; just don't process it.
7253 		 */
7254 		ifnet_fc_entry_free(ifce);
7255 		ifce = NULL;
7256 	}
7257 	lck_mtx_unlock(&ifnet_fc_lock);
7258 
7259 	return ifce;
7260 }
7261 
7262 static void
ifnet_fc_entry_free(struct ifnet_fc_entry * ifce)7263 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7264 {
7265 	zfree(ifnet_fc_zone, ifce);
7266 }
7267 
7268 static uint32_t
ifnet_calc_flowhash(struct ifnet * ifp)7269 ifnet_calc_flowhash(struct ifnet *ifp)
7270 {
7271 	struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7272 	uint32_t flowhash = 0;
7273 
7274 	if (ifnet_flowhash_seed == 0) {
7275 		ifnet_flowhash_seed = RandomULong();
7276 	}
7277 
7278 	bzero(&fh, sizeof(fh));
7279 
7280 	(void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
7281 	fh.ifk_unit = ifp->if_unit;
7282 	fh.ifk_flags = ifp->if_flags;
7283 	fh.ifk_eflags = ifp->if_eflags;
7284 	fh.ifk_capabilities = ifp->if_capabilities;
7285 	fh.ifk_capenable = ifp->if_capenable;
7286 	fh.ifk_output_sched_model = ifp->if_output_sched_model;
7287 	fh.ifk_rand1 = RandomULong();
7288 	fh.ifk_rand2 = RandomULong();
7289 
7290 try_again:
7291 	flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
7292 	if (flowhash == 0) {
7293 		/* try to get a non-zero flowhash */
7294 		ifnet_flowhash_seed = RandomULong();
7295 		goto try_again;
7296 	}
7297 
7298 	return flowhash;
7299 }
7300 
7301 int
ifnet_set_netsignature(struct ifnet * ifp,uint8_t family,uint8_t len,uint16_t flags,uint8_t * __sized_by (len)data)7302 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7303     uint16_t flags, uint8_t *__sized_by(len) data)
7304 {
7305 #pragma unused(flags)
7306 	int error = 0;
7307 
7308 	switch (family) {
7309 	case AF_INET:
7310 		if_inetdata_lock_exclusive(ifp);
7311 		if (IN_IFEXTRA(ifp) != NULL) {
7312 			if (len == 0) {
7313 				/* Allow clearing the signature */
7314 				IN_IFEXTRA(ifp)->netsig_len = 0;
7315 				bzero(IN_IFEXTRA(ifp)->netsig,
7316 				    sizeof(IN_IFEXTRA(ifp)->netsig));
7317 				if_inetdata_lock_done(ifp);
7318 				break;
7319 			} else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
7320 				error = EINVAL;
7321 				if_inetdata_lock_done(ifp);
7322 				break;
7323 			}
7324 			IN_IFEXTRA(ifp)->netsig_len = len;
7325 			bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7326 		} else {
7327 			error = ENOMEM;
7328 		}
7329 		if_inetdata_lock_done(ifp);
7330 		break;
7331 
7332 	case AF_INET6:
7333 		if_inet6data_lock_exclusive(ifp);
7334 		if (IN6_IFEXTRA(ifp) != NULL) {
7335 			if (len == 0) {
7336 				/* Allow clearing the signature */
7337 				IN6_IFEXTRA(ifp)->netsig_len = 0;
7338 				bzero(IN6_IFEXTRA(ifp)->netsig,
7339 				    sizeof(IN6_IFEXTRA(ifp)->netsig));
7340 				if_inet6data_lock_done(ifp);
7341 				break;
7342 			} else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
7343 				error = EINVAL;
7344 				if_inet6data_lock_done(ifp);
7345 				break;
7346 			}
7347 			IN6_IFEXTRA(ifp)->netsig_len = len;
7348 			bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7349 		} else {
7350 			error = ENOMEM;
7351 		}
7352 		if_inet6data_lock_done(ifp);
7353 		break;
7354 
7355 	default:
7356 		error = EINVAL;
7357 		break;
7358 	}
7359 
7360 	return error;
7361 }
7362 
7363 int
ifnet_get_netsignature(struct ifnet * ifp,uint8_t family,uint8_t * len,uint16_t * flags,uint8_t * __sized_by (* len)data)7364 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7365     uint16_t *flags, uint8_t *__sized_by(*len) data)
7366 {
7367 	int error = 0;
7368 
7369 	if (ifp == NULL || len == NULL || data == NULL) {
7370 		return EINVAL;
7371 	}
7372 
7373 	switch (family) {
7374 	case AF_INET:
7375 		if_inetdata_lock_shared(ifp);
7376 		if (IN_IFEXTRA(ifp) != NULL) {
7377 			if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7378 				error = EINVAL;
7379 				if_inetdata_lock_done(ifp);
7380 				break;
7381 			}
7382 			if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
7383 				bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7384 			} else {
7385 				error = ENOENT;
7386 			}
7387 		} else {
7388 			error = ENOMEM;
7389 		}
7390 		if_inetdata_lock_done(ifp);
7391 		break;
7392 
7393 	case AF_INET6:
7394 		if_inet6data_lock_shared(ifp);
7395 		if (IN6_IFEXTRA(ifp) != NULL) {
7396 			if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7397 				error = EINVAL;
7398 				if_inet6data_lock_done(ifp);
7399 				break;
7400 			}
7401 			if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
7402 				bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7403 			} else {
7404 				error = ENOENT;
7405 			}
7406 		} else {
7407 			error = ENOMEM;
7408 		}
7409 		if_inet6data_lock_done(ifp);
7410 		break;
7411 
7412 	default:
7413 		error = EINVAL;
7414 		break;
7415 	}
7416 
7417 	if (error == 0 && flags != NULL) {
7418 		*flags = 0;
7419 	}
7420 
7421 	return error;
7422 }
7423 
7424 int
ifnet_set_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7425 ifnet_set_nat64prefix(struct ifnet *ifp,
7426     struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7427 {
7428 	int i, error = 0, one_set = 0;
7429 
7430 	if_inet6data_lock_exclusive(ifp);
7431 
7432 	if (IN6_IFEXTRA(ifp) == NULL) {
7433 		error = ENOMEM;
7434 		goto out;
7435 	}
7436 
7437 	for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7438 		uint32_t prefix_len =
7439 		    prefixes[i].prefix_len;
7440 		struct in6_addr *prefix =
7441 		    &prefixes[i].ipv6_prefix;
7442 
7443 		if (prefix_len == 0) {
7444 			clat_log0((LOG_DEBUG,
7445 			    "NAT64 prefixes purged from Interface %s\n",
7446 			    if_name(ifp)));
7447 			/* Allow clearing the signature */
7448 			IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
7449 			bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7450 			    sizeof(struct in6_addr));
7451 
7452 			continue;
7453 		} else if (prefix_len != NAT64_PREFIX_LEN_32 &&
7454 		    prefix_len != NAT64_PREFIX_LEN_40 &&
7455 		    prefix_len != NAT64_PREFIX_LEN_48 &&
7456 		    prefix_len != NAT64_PREFIX_LEN_56 &&
7457 		    prefix_len != NAT64_PREFIX_LEN_64 &&
7458 		    prefix_len != NAT64_PREFIX_LEN_96) {
7459 			clat_log0((LOG_DEBUG,
7460 			    "NAT64 prefixlen is incorrect %d\n", prefix_len));
7461 			error = EINVAL;
7462 			goto out;
7463 		}
7464 
7465 		if (IN6_IS_SCOPE_EMBED(prefix)) {
7466 			clat_log0((LOG_DEBUG,
7467 			    "NAT64 prefix has interface/link local scope.\n"));
7468 			error = EINVAL;
7469 			goto out;
7470 		}
7471 
7472 		IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
7473 		bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7474 		    sizeof(struct in6_addr));
7475 		clat_log0((LOG_DEBUG,
7476 		    "NAT64 prefix set to %s with prefixlen: %d\n",
7477 		    ip6_sprintf(prefix), prefix_len));
7478 		one_set = 1;
7479 	}
7480 
7481 out:
7482 	if_inet6data_lock_done(ifp);
7483 
7484 	if (error == 0 && one_set != 0) {
7485 		necp_update_all_clients();
7486 	}
7487 
7488 	return error;
7489 }
7490 
7491 int
ifnet_get_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7492 ifnet_get_nat64prefix(struct ifnet *ifp,
7493     struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7494 {
7495 	int i, found_one = 0, error = 0;
7496 
7497 	if (ifp == NULL) {
7498 		return EINVAL;
7499 	}
7500 
7501 	if_inet6data_lock_shared(ifp);
7502 
7503 	if (IN6_IFEXTRA(ifp) == NULL) {
7504 		error = ENOMEM;
7505 		goto out;
7506 	}
7507 
7508 	for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7509 		if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
7510 			found_one = 1;
7511 		}
7512 	}
7513 
7514 	if (found_one == 0) {
7515 		error = ENOENT;
7516 		goto out;
7517 	}
7518 
7519 	if (prefixes) {
7520 		bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
7521 		    sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
7522 	}
7523 
7524 out:
7525 	if_inet6data_lock_done(ifp);
7526 
7527 	return error;
7528 }
7529 
7530 #if DEBUG || DEVELOPMENT
7531 /* Blob for sum16 verification */
7532 static uint8_t sumdata[] = {
7533 	0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7534 	0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7535 	0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7536 	0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7537 	0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7538 	0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7539 	0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7540 	0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7541 	0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7542 	0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7543 	0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7544 	0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7545 	0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7546 	0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7547 	0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7548 	0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7549 	0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7550 	0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7551 	0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7552 	0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7553 	0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7554 	0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7555 	0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7556 	0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7557 	0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7558 	0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7559 	0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7560 	0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7561 	0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7562 	0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7563 	0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7564 	0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7565 	0xc8, 0x28, 0x02, 0x00, 0x00
7566 };
7567 
7568 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7569 static struct {
7570 	boolean_t       init;
7571 	uint16_t        len;
7572 	uint16_t        sumr;   /* reference */
7573 	uint16_t        sumrp;  /* reference, precomputed */
7574 } sumtbl[] = {
7575 	{ FALSE, 0, 0, 0x0000 },
7576 	{ FALSE, 1, 0, 0x001f },
7577 	{ FALSE, 2, 0, 0x8b1f },
7578 	{ FALSE, 3, 0, 0x8b27 },
7579 	{ FALSE, 7, 0, 0x790e },
7580 	{ FALSE, 11, 0, 0xcb6d },
7581 	{ FALSE, 20, 0, 0x20dd },
7582 	{ FALSE, 27, 0, 0xbabd },
7583 	{ FALSE, 32, 0, 0xf3e8 },
7584 	{ FALSE, 37, 0, 0x197d },
7585 	{ FALSE, 43, 0, 0x9eae },
7586 	{ FALSE, 64, 0, 0x4678 },
7587 	{ FALSE, 127, 0, 0x9399 },
7588 	{ FALSE, 256, 0, 0xd147 },
7589 	{ FALSE, 325, 0, 0x0358 },
7590 };
7591 #define SUMTBL_MAX      ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7592 
7593 static void
dlil_verify_sum16(void)7594 dlil_verify_sum16(void)
7595 {
7596 	struct mbuf *m;
7597 	uint8_t *buf;
7598 	int n;
7599 
7600 	/* Make sure test data plus extra room for alignment fits in cluster */
7601 	static_assert((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
7602 
7603 	kprintf("DLIL: running SUM16 self-tests ... ");
7604 
7605 	m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7606 	m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
7607 
7608 	buf = mtod(m, uint8_t *);               /* base address */
7609 
7610 	for (n = 0; n < SUMTBL_MAX; n++) {
7611 		uint16_t len = sumtbl[n].len;
7612 		int i;
7613 
7614 		/* Verify for all possible alignments */
7615 		for (i = 0; i < (int)sizeof(uint64_t); i++) {
7616 			uint16_t sum, sumr;
7617 			uint8_t *c;
7618 
7619 			/* Copy over test data to mbuf */
7620 			VERIFY(len <= sizeof(sumdata));
7621 			c = buf + i;
7622 			bcopy(sumdata, c, len);
7623 
7624 			/* Zero-offset test (align by data pointer) */
7625 			m->m_data = (uintptr_t)c;
7626 			m->m_len = len;
7627 			sum = m_sum16(m, 0, len);
7628 
7629 			if (!sumtbl[n].init) {
7630 				sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
7631 				sumtbl[n].sumr = sumr;
7632 				sumtbl[n].init = TRUE;
7633 			} else {
7634 				sumr = sumtbl[n].sumr;
7635 			}
7636 
7637 			/* Something is horribly broken; stop now */
7638 			if (sumr != sumtbl[n].sumrp) {
7639 				panic_plain("\n%s: broken in_cksum_mbuf_ref() "
7640 				    "for len=%d align=%d sum=0x%04x "
7641 				    "[expected=0x%04x]\n", __func__,
7642 				    len, i, sum, sumr);
7643 				/* NOTREACHED */
7644 			} else if (sum != sumr) {
7645 				panic_plain("\n%s: broken m_sum16() for len=%d "
7646 				    "align=%d sum=0x%04x [expected=0x%04x]\n",
7647 				    __func__, len, i, sum, sumr);
7648 				/* NOTREACHED */
7649 			}
7650 
7651 			/* Alignment test by offset (fixed data pointer) */
7652 			m->m_data = (uintptr_t)buf;
7653 			m->m_len = i + len;
7654 			sum = m_sum16(m, i, len);
7655 
7656 			/* Something is horribly broken; stop now */
7657 			if (sum != sumr) {
7658 				panic_plain("\n%s: broken m_sum16() for len=%d "
7659 				    "offset=%d sum=0x%04x [expected=0x%04x]\n",
7660 				    __func__, len, i, sum, sumr);
7661 				/* NOTREACHED */
7662 			}
7663 #if INET
7664 			/* Simple sum16 contiguous buffer test by aligment */
7665 			sum = b_sum16(c, len);
7666 
7667 			/* Something is horribly broken; stop now */
7668 			if (sum != sumr) {
7669 				panic_plain("\n%s: broken b_sum16() for len=%d "
7670 				    "align=%d sum=0x%04x [expected=0x%04x]\n",
7671 				    __func__, len, i, sum, sumr);
7672 				/* NOTREACHED */
7673 			}
7674 #endif /* INET */
7675 		}
7676 	}
7677 	m_freem(m);
7678 
7679 	kprintf("PASSED\n");
7680 }
7681 #endif /* DEBUG || DEVELOPMENT */
7682 
7683 #define CASE_STRINGIFY(x) case x: return #x
7684 
7685 __private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)7686 dlil_kev_dl_code_str(u_int32_t event_code)
7687 {
7688 	switch (event_code) {
7689 		CASE_STRINGIFY(KEV_DL_SIFFLAGS);
7690 		CASE_STRINGIFY(KEV_DL_SIFMETRICS);
7691 		CASE_STRINGIFY(KEV_DL_SIFMTU);
7692 		CASE_STRINGIFY(KEV_DL_SIFPHYS);
7693 		CASE_STRINGIFY(KEV_DL_SIFMEDIA);
7694 		CASE_STRINGIFY(KEV_DL_SIFGENERIC);
7695 		CASE_STRINGIFY(KEV_DL_ADDMULTI);
7696 		CASE_STRINGIFY(KEV_DL_DELMULTI);
7697 		CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
7698 		CASE_STRINGIFY(KEV_DL_IF_DETACHING);
7699 		CASE_STRINGIFY(KEV_DL_IF_DETACHED);
7700 		CASE_STRINGIFY(KEV_DL_LINK_OFF);
7701 		CASE_STRINGIFY(KEV_DL_LINK_ON);
7702 		CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
7703 		CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
7704 		CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
7705 		CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
7706 		CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
7707 		CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
7708 		CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
7709 		CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
7710 		CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
7711 		CASE_STRINGIFY(KEV_DL_PRIMARY_ELECTED);
7712 		CASE_STRINGIFY(KEV_DL_ISSUES);
7713 		CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
7714 	default:
7715 		break;
7716 	}
7717 	return "";
7718 }
7719 
7720 void
dlil_dt_tcall_fn(thread_call_param_t arg0,thread_call_param_t arg1)7721 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
7722 {
7723 #pragma unused(arg1)
7724 	ifnet_ref_t ifp = arg0;
7725 
7726 	if (ifnet_get_ioref(ifp)) {
7727 		nstat_ifnet_threshold_reached(ifp->if_index);
7728 		ifnet_decr_iorefcnt(ifp);
7729 	}
7730 }
7731 
7732 void
ifnet_notify_data_threshold(struct ifnet * ifp)7733 ifnet_notify_data_threshold(struct ifnet *ifp)
7734 {
7735 	uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
7736 	uint64_t oldbytes = ifp->if_dt_bytes;
7737 
7738 	ASSERT(ifp->if_dt_tcall != NULL);
7739 
7740 	/*
7741 	 * If we went over the threshold, notify NetworkStatistics.
7742 	 * We rate-limit it based on the threshold interval value.
7743 	 */
7744 	if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
7745 	    OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
7746 	    !thread_call_isactive(ifp->if_dt_tcall)) {
7747 		uint64_t tival = (threshold_interval * NSEC_PER_SEC);
7748 		uint64_t now = mach_absolute_time(), deadline = now;
7749 		uint64_t ival;
7750 
7751 		if (tival != 0) {
7752 			nanoseconds_to_absolutetime(tival, &ival);
7753 			clock_deadline_for_periodic_event(ival, now, &deadline);
7754 			(void) thread_call_enter_delayed(ifp->if_dt_tcall,
7755 			    deadline);
7756 		} else {
7757 			(void) thread_call_enter(ifp->if_dt_tcall);
7758 		}
7759 	}
7760 }
7761 
7762 
7763 void
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow * ifs,struct ifnet * ifp)7764 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
7765     struct ifnet *ifp)
7766 {
7767 	tcp_update_stats_per_flow(ifs, ifp);
7768 }
7769 
7770 static inline u_int32_t
_set_flags(u_int32_t * flags_p,u_int32_t set_flags)7771 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
7772 {
7773 	return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
7774 }
7775 
7776 static inline u_int32_t
_clear_flags(u_int32_t * flags_p,u_int32_t clear_flags)7777 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
7778 {
7779 	return (u_int32_t)OSBitAndAtomic(~clear_flags, flags_p);
7780 }
7781 
7782 __private_extern__ u_int32_t
if_set_eflags(ifnet_t interface,u_int32_t set_flags)7783 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
7784 {
7785 	return _set_flags(&interface->if_eflags, set_flags);
7786 }
7787 
7788 __private_extern__ void
if_clear_eflags(ifnet_t interface,u_int32_t clear_flags)7789 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
7790 {
7791 	_clear_flags(&interface->if_eflags, clear_flags);
7792 }
7793 
7794 __private_extern__ u_int32_t
if_set_xflags(ifnet_t interface,u_int32_t set_flags)7795 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
7796 {
7797 	return _set_flags(&interface->if_xflags, set_flags);
7798 }
7799 
7800 __private_extern__ u_int32_t
if_clear_xflags(ifnet_t interface,u_int32_t clear_flags)7801 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
7802 {
7803 	return _clear_flags(&interface->if_xflags, clear_flags);
7804 }
7805 
7806 __private_extern__ void
ifnet_update_traffic_rule_genid(ifnet_t ifp)7807 ifnet_update_traffic_rule_genid(ifnet_t ifp)
7808 {
7809 	os_atomic_inc(&ifp->if_traffic_rule_genid, relaxed);
7810 }
7811 
7812 __private_extern__ boolean_t
ifnet_sync_traffic_rule_genid(ifnet_t ifp,uint32_t * genid)7813 ifnet_sync_traffic_rule_genid(ifnet_t ifp, uint32_t *genid)
7814 {
7815 	if (*genid != ifp->if_traffic_rule_genid) {
7816 		*genid = ifp->if_traffic_rule_genid;
7817 		return TRUE;
7818 	}
7819 	return FALSE;
7820 }
7821 __private_extern__ void
ifnet_update_inet_traffic_rule_count(ifnet_t ifp,uint32_t count)7822 ifnet_update_inet_traffic_rule_count(ifnet_t ifp, uint32_t count)
7823 {
7824 	os_atomic_store(&ifp->if_inet_traffic_rule_count, count, relaxed);
7825 	ifnet_update_traffic_rule_genid(ifp);
7826 }
7827 
7828 __private_extern__ void
ifnet_update_eth_traffic_rule_count(ifnet_t ifp,uint32_t count)7829 ifnet_update_eth_traffic_rule_count(ifnet_t ifp, uint32_t count)
7830 {
7831 	os_atomic_store(&ifp->if_eth_traffic_rule_count, count, relaxed);
7832 	ifnet_update_traffic_rule_genid(ifp);
7833 }
7834 
7835 #if SKYWALK
7836 static bool
net_check_compatible_if_filter(struct ifnet * ifp)7837 net_check_compatible_if_filter(struct ifnet *ifp)
7838 {
7839 	if (ifp == NULL) {
7840 		if (net_api_stats.nas_iflt_attach_count > net_api_stats.nas_iflt_attach_os_count) {
7841 			return false;
7842 		}
7843 	} else {
7844 		if (ifp->if_flt_non_os_count > 0) {
7845 			return false;
7846 		}
7847 	}
7848 	return true;
7849 }
7850 #endif /* SKYWALK */
7851 
7852 #if CONFIG_MBUF_MCACHE
7853 #define DUMP_BUF_CHK() {        \
7854 	clen -= k;              \
7855 	if (clen < 1)           \
7856 	        goto done;      \
7857 	c += k;                 \
7858 }
7859 
7860 #if NETWORKING
7861 int dlil_dump_top_if_qlen(char *__counted_by(str_len), int str_len);
7862 int
dlil_dump_top_if_qlen(char * __counted_by (str_len)str,int str_len)7863 dlil_dump_top_if_qlen(char *__counted_by(str_len) str, int str_len)
7864 {
7865 	char *c = str;
7866 	int k, clen = str_len;
7867 	ifnet_ref_t top_ifcq_ifp = NULL;
7868 	uint32_t top_ifcq_len = 0;
7869 	ifnet_ref_t top_inq_ifp = NULL;
7870 	uint32_t top_inq_len = 0;
7871 
7872 	for (int ifidx = 1; ifidx < if_index; ifidx++) {
7873 		ifnet_ref_t ifp = ifindex2ifnet[ifidx];
7874 		struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7875 
7876 		if (ifp == NULL) {
7877 			continue;
7878 		}
7879 		if (ifp->if_snd != NULL && ifp->if_snd->ifcq_len > top_ifcq_len) {
7880 			top_ifcq_len = ifp->if_snd->ifcq_len;
7881 			top_ifcq_ifp = ifp;
7882 		}
7883 		if (dl_if->dl_if_inpstorage.dlth_pkts.qlen > top_inq_len) {
7884 			top_inq_len = dl_if->dl_if_inpstorage.dlth_pkts.qlen;
7885 			top_inq_ifp = ifp;
7886 		}
7887 	}
7888 
7889 	if (top_ifcq_ifp != NULL) {
7890 		k = scnprintf(c, clen, "\ntop ifcq_len %u packets by %s\n",
7891 		    top_ifcq_len, top_ifcq_ifp->if_xname);
7892 		DUMP_BUF_CHK();
7893 	}
7894 	if (top_inq_ifp != NULL) {
7895 		k = scnprintf(c, clen, "\ntop inq_len %u packets by %s\n",
7896 		    top_inq_len, top_inq_ifp->if_xname);
7897 		DUMP_BUF_CHK();
7898 	}
7899 done:
7900 	return str_len - clen;
7901 }
7902 #endif /* NETWORKING */
7903 #endif /* CONFIG_MBUF_MCACHE */
7904