xref: /xnu-12377.41.6/bsd/net/dlil.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 1999-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30  * support for mandatory and extensible security protections.  This notice
31  * is included in support of clause 2.2 (b) of the Apple Public License,
32  * Version 2.0.
33  */
34 #include <stddef.h>
35 #include <ptrauth.h>
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/dlil_sysctl.h>
53 #include <net/dlil_var_private.h>
54 #include <net/if_arp.h>
55 #include <net/if_var_private.h>
56 #include <net/iptap.h>
57 #include <net/pktap.h>
58 #include <net/droptap.h>
59 #include <net/nwk_wq.h>
60 #include <sys/kern_event.h>
61 #include <sys/kdebug.h>
62 #include <sys/mcache.h>
63 #include <sys/syslog.h>
64 #include <sys/protosw.h>
65 #include <sys/priv.h>
66 
67 #include <kern/assert.h>
68 #include <kern/locks.h>
69 #include <kern/sched_prim.h>
70 #include <kern/task.h>
71 #include <kern/thread.h>
72 #include <kern/uipc_domain.h>
73 #include <kern/zalloc.h>
74 #include <kern/thread_group.h>
75 
76 #include <net/kpi_protocol.h>
77 #include <net/kpi_interface.h>
78 #include <net/if_types.h>
79 #include <net/if_ipsec.h>
80 #include <net/if_llreach.h>
81 #include <net/if_utun.h>
82 #include <net/kpi_interfacefilter.h>
83 #include <net/classq/classq.h>
84 #include <net/classq/classq_sfb.h>
85 #include <net/flowhash.h>
86 #include <net/ntstat.h>
87 #if SKYWALK
88 #include <skywalk/lib/net_filter_event.h>
89 #endif /* SKYWALK */
90 #include <net/net_api_stats.h>
91 #include <net/if_ports_used.h>
92 #include <net/if_vlan_var.h>
93 #include <netinet/in.h>
94 #if INET
95 #include <netinet/in_var.h>
96 #include <netinet/igmp_var.h>
97 #include <netinet/ip_var.h>
98 #include <netinet/tcp.h>
99 #include <netinet/tcp_var.h>
100 #include <netinet/udp.h>
101 #include <netinet/udp_var.h>
102 #include <netinet/if_ether.h>
103 #include <netinet/in_pcb.h>
104 #include <netinet/in_tclass.h>
105 #include <netinet/ip.h>
106 #include <netinet/ip_icmp.h>
107 #include <netinet/icmp_var.h>
108 #endif /* INET */
109 
110 #include <net/nat464_utils.h>
111 #include <netinet6/in6_var.h>
112 #include <netinet6/nd6.h>
113 #include <netinet6/mld6_var.h>
114 #include <netinet6/scope6_var.h>
115 #include <netinet/ip6.h>
116 #include <netinet/icmp6.h>
117 #include <net/pf_pbuf.h>
118 #include <libkern/OSAtomic.h>
119 #include <libkern/tree.h>
120 
121 #include <dev/random/randomdev.h>
122 #include <machine/machine_routines.h>
123 
124 #include <mach/thread_act.h>
125 #include <mach/sdt.h>
126 
127 #if CONFIG_MACF
128 #include <sys/kauth.h>
129 #include <security/mac_framework.h>
130 #include <net/ethernet.h>
131 #include <net/firewire.h>
132 #endif
133 
134 #if PF
135 #include <net/pfvar.h>
136 #endif /* PF */
137 #include <net/pktsched/pktsched.h>
138 #include <net/pktsched/pktsched_netem.h>
139 
140 #if NECP
141 #include <net/necp.h>
142 #endif /* NECP */
143 
144 #if SKYWALK
145 #include <skywalk/packet/packet_queue.h>
146 #include <skywalk/nexus/netif/nx_netif.h>
147 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
148 #endif /* SKYWALK */
149 
150 #include <net/sockaddr_utils.h>
151 
152 #include <os/log.h>
153 
154 uint64_t if_creation_generation_count = 0;
155 
156 dlil_ifnet_queue_t dlil_ifnet_head;
157 
158 static u_int32_t net_rtref;
159 
160 static struct dlil_main_threading_info dlil_main_input_thread_info;
161 struct dlil_threading_info *__single dlil_main_input_thread;
162 
163 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
164 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
165 
166 static int ifnet_lookup(struct ifnet *);
167 static void if_purgeaddrs(struct ifnet *);
168 
169 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
170     struct mbuf *, char *);
171 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
172     struct mbuf *);
173 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
174     mbuf_t *, const struct sockaddr *, void *,
175     IFNET_FRAME_TYPE_RW_T, IFNET_LLADDR_RW_T);
176 static void ifproto_media_event(struct ifnet *, protocol_family_t,
177     const struct kev_msg *);
178 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
179     unsigned long, void *);
180 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
181     struct sockaddr_dl *, size_t);
182 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
183     const struct sockaddr_dl *, const struct sockaddr *,
184     const struct sockaddr_dl *, const struct sockaddr *);
185 
186 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
187     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
188     boolean_t poll, struct thread *tp);
189 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
190     struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
191 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
192 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
193     protocol_family_t *);
194 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
195     const struct ifnet_demux_desc *, u_int32_t);
196 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
197 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
198 #if !XNU_TARGET_OS_OSX
199 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
200     const struct sockaddr *, IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
201     u_int32_t *, u_int32_t *);
202 #else /* XNU_TARGET_OS_OSX */
203 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
204     const struct sockaddr *,
205     IFNET_LLADDR_T, IFNET_FRAME_TYPE_T);
206 #endif /* XNU_TARGET_OS_OSX */
207 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
208     const struct sockaddr *,
209     IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
210     u_int32_t *, u_int32_t *);
211 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
212 static void ifp_if_free(struct ifnet *);
213 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
214 
215 
216 
217 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
218     const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
219 #if DEBUG || DEVELOPMENT
220 static void dlil_verify_sum16(void);
221 #endif /* DEBUG || DEVELOPMENT */
222 
223 
224 static void ifnet_detacher_thread_func(void *, wait_result_t);
225 static void ifnet_detacher_thread_cont(void *, wait_result_t);
226 static void ifnet_detach_final(struct ifnet *);
227 static void ifnet_detaching_enqueue(struct ifnet *);
228 static struct ifnet *ifnet_detaching_dequeue(void);
229 
230 static void ifnet_start_thread_func(void *, wait_result_t);
231 static void ifnet_start_thread_cont(void *, wait_result_t);
232 
233 static void ifnet_poll_thread_func(void *, wait_result_t);
234 static void ifnet_poll_thread_cont(void *, wait_result_t);
235 
236 static errno_t ifnet_enqueue_common_single(struct ifnet *, struct ifclassq *,
237     classq_pkt_t *, boolean_t, boolean_t *);
238 
239 static void ifp_src_route_copyout(struct ifnet *, struct route *);
240 static void ifp_src_route_copyin(struct ifnet *, struct route *);
241 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
242 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
243 
244 
245 /* The following are protected by dlil_ifnet_lock */
246 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
247 static u_int32_t ifnet_detaching_cnt;
248 static boolean_t ifnet_detaching_embryonic;
249 static void *ifnet_delayed_run; /* wait channel for detaching thread */
250 
251 static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group,
252     &dlil_lck_attributes);
253 
254 static uint32_t ifnet_flowhash_seed;
255 
256 struct ifnet_flowhash_key {
257 	char            ifk_name[IFNAMSIZ];
258 	uint32_t        ifk_unit;
259 	uint32_t        ifk_flags;
260 	uint32_t        ifk_eflags;
261 	uint32_t        ifk_capabilities;
262 	uint32_t        ifk_capenable;
263 	uint32_t        ifk_output_sched_model;
264 	uint32_t        ifk_rand1;
265 	uint32_t        ifk_rand2;
266 };
267 
268 /* Flow control entry per interface */
269 struct ifnet_fc_entry {
270 	RB_ENTRY(ifnet_fc_entry) ifce_entry;
271 	u_int32_t       ifce_flowhash;
272 	ifnet_ref_t     ifce_ifp;
273 };
274 
275 static uint32_t ifnet_calc_flowhash(struct ifnet *);
276 static int ifce_cmp(const struct ifnet_fc_entry *,
277     const struct ifnet_fc_entry *);
278 static int ifnet_fc_add(struct ifnet *);
279 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
280 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
281 
282 /* protected by ifnet_fc_lock */
283 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
284 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
285 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
286 
287 static KALLOC_TYPE_DEFINE(ifnet_fc_zone, struct ifnet_fc_entry, NET_KT_DEFAULT);
288 
289 extern void bpfdetach(struct ifnet *);
290 
291 
292 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
293     u_int32_t flags);
294 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
295     u_int32_t flags);
296 
297 
298 #if CONFIG_MACF
299 #if !XNU_TARGET_OS_OSX
300 int dlil_lladdr_ckreq = 1;
301 #else /* XNU_TARGET_OS_OSX */
302 int dlil_lladdr_ckreq = 0;
303 #endif /* XNU_TARGET_OS_OSX */
304 #endif /* CONFIG_MACF */
305 
306 
307 static inline void
ifnet_delay_start_disabled_increment(void)308 ifnet_delay_start_disabled_increment(void)
309 {
310 	OSIncrementAtomic(&ifnet_delay_start_disabled);
311 }
312 
313 unsigned int net_rxpoll = 1;
314 unsigned int net_affinity = 1;
315 unsigned int net_async = 1;     /* 0: synchronous, 1: asynchronous */
316 
317 extern u_int32_t        inject_buckets;
318 
319 void
ifnet_filter_update_tso(struct ifnet * ifp,boolean_t filter_enable)320 ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable)
321 {
322 	/*
323 	 * update filter count and route_generation ID to let TCP
324 	 * know it should reevalute doing TSO or not
325 	 */
326 	if (filter_enable) {
327 		OSAddAtomic(1, &ifp->if_flt_no_tso_count);
328 	} else {
329 		VERIFY(ifp->if_flt_no_tso_count != 0);
330 		OSAddAtomic(-1, &ifp->if_flt_no_tso_count);
331 	}
332 	routegenid_update();
333 }
334 
335 os_refgrp_decl(static, if_refiogrp, "if refio refcounts", NULL);
336 os_refgrp_decl(static, if_datamovgrp, "if datamov refcounts", NULL);
337 #define IF_DATAMOV_BITS 1
338 #define IF_DATAMOV_DRAINING 1
339 
340 #if SKYWALK
341 
342 static bool net_check_compatible_if_filter(struct ifnet *ifp);
343 
344 /* if_attach_nx flags defined in os_skywalk_private.h */
345 unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT;
346 unsigned int if_enable_fsw_ip_netagent =
347     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
348 unsigned int if_enable_fsw_transport_netagent =
349     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
350 
351 unsigned int if_netif_all =
352     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0);
353 
354 /* Configure flowswitch to use max mtu sized buffer */
355 static bool fsw_use_max_mtu_buffer = false;
356 
357 
358 static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw);
359 
360 #include <skywalk/os_skywalk_private.h>
361 
362 boolean_t
ifnet_nx_noauto(ifnet_t ifp)363 ifnet_nx_noauto(ifnet_t ifp)
364 {
365 	return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0;
366 }
367 
368 boolean_t
ifnet_nx_noauto_flowswitch(ifnet_t ifp)369 ifnet_nx_noauto_flowswitch(ifnet_t ifp)
370 {
371 	return ifnet_is_low_latency(ifp);
372 }
373 
374 boolean_t
ifnet_is_low_latency(ifnet_t ifp)375 ifnet_is_low_latency(ifnet_t ifp)
376 {
377 	return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0;
378 }
379 
380 boolean_t
ifnet_needs_compat(ifnet_t ifp)381 ifnet_needs_compat(ifnet_t ifp)
382 {
383 	if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
384 		return FALSE;
385 	}
386 #if !XNU_TARGET_OS_OSX
387 	/*
388 	 * To conserve memory, we plumb in the compat layer selectively; this
389 	 * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL.
390 	 * In particular, we check for Wi-Fi Access Point.
391 	 */
392 	if (IFNET_IS_WIFI(ifp)) {
393 		/* Wi-Fi Access Point */
394 		if (strcmp(ifp->if_name, "ap") == 0) {
395 			return if_netif_all;
396 		}
397 	}
398 #else /* XNU_TARGET_OS_OSX */
399 #pragma unused(ifp)
400 #endif /* XNU_TARGET_OS_OSX */
401 	return TRUE;
402 }
403 
404 boolean_t
ifnet_needs_fsw_transport_netagent(ifnet_t ifp)405 ifnet_needs_fsw_transport_netagent(ifnet_t ifp)
406 {
407 	if (if_is_fsw_transport_netagent_enabled()) {
408 		/* check if netagent has been manually enabled for ipsec/utun */
409 		if (ifp->if_family == IFNET_FAMILY_IPSEC) {
410 			return ipsec_interface_needs_netagent(ifp);
411 		} else if (ifp->if_family == IFNET_FAMILY_UTUN) {
412 			return utun_interface_needs_netagent(ifp);
413 		}
414 
415 		/* check ifnet no auto nexus override */
416 		if (ifnet_nx_noauto(ifp)) {
417 			return FALSE;
418 		}
419 
420 		/* check global if_attach_nx configuration */
421 		switch (ifp->if_family) {
422 		case IFNET_FAMILY_CELLULAR:
423 		case IFNET_FAMILY_ETHERNET:
424 			if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
425 				return TRUE;
426 			}
427 			break;
428 		default:
429 			break;
430 		}
431 	}
432 	return FALSE;
433 }
434 
435 boolean_t
ifnet_needs_fsw_ip_netagent(ifnet_t ifp)436 ifnet_needs_fsw_ip_netagent(ifnet_t ifp)
437 {
438 #pragma unused(ifp)
439 	if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) {
440 		return TRUE;
441 	}
442 	return FALSE;
443 }
444 
445 boolean_t
ifnet_needs_netif_netagent(ifnet_t ifp)446 ifnet_needs_netif_netagent(ifnet_t ifp)
447 {
448 #pragma unused(ifp)
449 	return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0;
450 }
451 
452 static boolean_t
dlil_detach_nexus_instance(nexus_controller_t controller,const char * func_str,uuid_t instance,uuid_t device)453 dlil_detach_nexus_instance(nexus_controller_t controller,
454     const char *func_str, uuid_t instance, uuid_t device)
455 {
456 	errno_t         err;
457 
458 	if (instance == NULL || uuid_is_null(instance)) {
459 		return FALSE;
460 	}
461 
462 	/* followed by the device port */
463 	if (device != NULL && !uuid_is_null(device)) {
464 		err = kern_nexus_ifdetach(controller, instance, device);
465 		if (err != 0) {
466 			DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n",
467 			    func_str, err);
468 		}
469 	}
470 	err = kern_nexus_controller_free_provider_instance(controller,
471 	    instance);
472 	if (err != 0) {
473 		DLIL_PRINTF("%s free_provider_instance failed %d\n",
474 		    func_str, err);
475 	}
476 	return TRUE;
477 }
478 
479 static boolean_t
dlil_detach_nexus(const char * func_str,uuid_t provider,uuid_t instance,uuid_t device)480 dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance,
481     uuid_t device)
482 {
483 	boolean_t               detached = FALSE;
484 	nexus_controller_t      controller = kern_nexus_shared_controller();
485 	int                     err;
486 
487 	if (dlil_detach_nexus_instance(controller, func_str, instance,
488 	    device)) {
489 		detached = TRUE;
490 	}
491 	if (provider != NULL && !uuid_is_null(provider)) {
492 		detached = TRUE;
493 		err = kern_nexus_controller_deregister_provider(controller,
494 		    provider);
495 		if (err != 0) {
496 			DLIL_PRINTF("%s deregister_provider %d\n",
497 			    func_str, err);
498 		}
499 	}
500 	return detached;
501 }
502 
503 static errno_t
dlil_create_provider_and_instance(nexus_controller_t controller,nexus_type_t type,ifnet_t ifp,uuid_t * provider,uuid_t * instance,nexus_attr_t attr)504 dlil_create_provider_and_instance(nexus_controller_t controller,
505     nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance,
506     nexus_attr_t attr)
507 {
508 	uuid_t          dom_prov;
509 	errno_t         err;
510 	nexus_name_t    provider_name;
511 	const char      *type_name =
512 	    (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch";
513 	struct kern_nexus_init init;
514 
515 	err = kern_nexus_get_default_domain_provider(type, &dom_prov);
516 	if (err != 0) {
517 		DLIL_PRINTF("%s can't get %s provider, error %d\n",
518 		    __func__, type_name, err);
519 		goto failed;
520 	}
521 
522 	snprintf((char *)provider_name, sizeof(provider_name),
523 	    "com.apple.%s.%s", type_name, if_name(ifp));
524 	err = kern_nexus_controller_register_provider(controller,
525 	    dom_prov,
526 	    provider_name,
527 	    NULL,
528 	    0,
529 	    attr,
530 	    provider);
531 	if (err != 0) {
532 		DLIL_PRINTF("%s register %s provider failed, error %d\n",
533 		    __func__, type_name, err);
534 		goto failed;
535 	}
536 	bzero(&init, sizeof(init));
537 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
538 	err = kern_nexus_controller_alloc_provider_instance(controller,
539 	    *provider,
540 	    NULL, NULL,
541 	    instance, &init);
542 	if (err != 0) {
543 		DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n",
544 		    __func__, type_name, err);
545 		kern_nexus_controller_deregister_provider(controller,
546 		    *provider);
547 		goto failed;
548 	}
549 failed:
550 	return err;
551 }
552 
553 static boolean_t
dlil_attach_netif_nexus_common(ifnet_t ifp,if_nexus_netif_t netif_nx)554 dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx)
555 {
556 	nexus_attr_t            __single attr = NULL;
557 	nexus_controller_t      controller;
558 	errno_t                 err;
559 	unsigned char          *empty_uuid = __unsafe_forge_bidi_indexable(unsigned char *, NULL, sizeof(uuid_t));
560 
561 	if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) {
562 		/* it's already attached */
563 		if (dlil_verbose) {
564 			DLIL_PRINTF("%s: %s already has nexus attached\n",
565 			    __func__, if_name(ifp));
566 			/* already attached */
567 		}
568 		goto failed;
569 	}
570 
571 	err = kern_nexus_attr_create(&attr);
572 	if (err != 0) {
573 		DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
574 		    if_name(ifp));
575 		goto failed;
576 	}
577 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, ifp->if_index);
578 	VERIFY(err == 0);
579 
580 	controller = kern_nexus_shared_controller();
581 
582 	/* create the netif provider and instance */
583 	err = dlil_create_provider_and_instance(controller,
584 	    NEXUS_TYPE_NET_IF, ifp, &netif_nx->if_nif_provider,
585 	    &netif_nx->if_nif_instance, attr);
586 	if (err != 0) {
587 		goto failed;
588 	}
589 
590 	err = kern_nexus_ifattach(controller, netif_nx->if_nif_instance, ifp,
591 	    empty_uuid, FALSE, &netif_nx->if_nif_attach);
592 	if (err != 0) {
593 		DLIL_PRINTF("%s kern_nexus_ifattach %d\n",
594 		    __func__, err);
595 		/* cleanup provider and instance */
596 		dlil_detach_nexus(__func__, netif_nx->if_nif_provider,
597 		    netif_nx->if_nif_instance, empty_uuid);
598 		goto failed;
599 	}
600 	return TRUE;
601 
602 failed:
603 	if (attr != NULL) {
604 		kern_nexus_attr_destroy(attr);
605 	}
606 	return FALSE;
607 }
608 
609 static boolean_t
dlil_attach_netif_compat_nexus(ifnet_t ifp,if_nexus_netif_t netif_nx)610 dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx)
611 {
612 	if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) ||
613 	    IFNET_IS_MANAGEMENT(ifp) || IFNET_IS_VMNET(ifp)) {
614 		goto failed;
615 	}
616 	switch (ifp->if_type) {
617 	case IFT_CELLULAR:
618 	case IFT_ETHER:
619 		if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
620 			/* don't auto-attach */
621 			goto failed;
622 		}
623 		break;
624 	default:
625 		/* don't auto-attach */
626 		goto failed;
627 	}
628 	return dlil_attach_netif_nexus_common(ifp, netif_nx);
629 
630 failed:
631 	return FALSE;
632 }
633 
634 __attribute__((noinline))
635 static void
dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)636 dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)
637 {
638 	dlil_detach_nexus(__func__, nexus_netif->if_nif_provider,
639 	    nexus_netif->if_nif_instance, nexus_netif->if_nif_attach);
640 }
641 
642 static inline int
dlil_siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)643 dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
644 {
645 	struct ifreq        ifr;
646 	int                 error;
647 
648 	bzero(&ifr, sizeof(ifr));
649 	error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
650 	if (error == 0) {
651 		*ifdm_p = ifr.ifr_devmtu;
652 	}
653 	return error;
654 }
655 
656 static inline void
_dlil_adjust_large_buf_size_for_tso(ifnet_t ifp,uint32_t * large_buf_size)657 _dlil_adjust_large_buf_size_for_tso(ifnet_t ifp, uint32_t *large_buf_size)
658 {
659 	uint32_t tso_v4_mtu = 0;
660 	uint32_t tso_v6_mtu = 0;
661 
662 	if (!kernel_is_macos_or_server()) {
663 		return;
664 	}
665 
666 	/*
667 	 * Note that we are reading the real hwassist flags set by the driver
668 	 * and not the adjusted ones because nx_netif_host_adjust_if_capabilities()
669 	 * hasn't been called yet.
670 	 */
671 	if ((ifp->if_hwassist & IFNET_TSO_IPV4) != 0) {
672 		tso_v4_mtu = ifp->if_tso_v4_mtu;
673 	}
674 	if ((ifp->if_hwassist & IFNET_TSO_IPV6) != 0) {
675 		tso_v6_mtu = ifp->if_tso_v6_mtu;
676 	}
677 
678 	/*
679 	 * If the hardware supports TSO, adjust the large buf size to match the
680 	 * supported TSO MTU size. Note that only native interfaces set TSO MTU
681 	 * size today.
682 	 * For compat, there is a 16KB limit on large buf size, so it needs to be
683 	 * bounded by NX_FSW_DEF_LARGE_BUFSIZE. Note that no compat interfaces
684 	 * set TSO MTU size today.
685 	 */
686 	if (SKYWALK_NATIVE(ifp)) {
687 		if (tso_v4_mtu != 0 || tso_v6_mtu != 0) {
688 			*large_buf_size = MAX(tso_v4_mtu, tso_v6_mtu);
689 		} else {
690 			*large_buf_size = MAX(*large_buf_size, sk_fsw_gso_mtu);
691 		}
692 		*large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, *large_buf_size);
693 	} else {
694 		*large_buf_size = MIN(NX_FSW_DEF_LARGE_BUFSIZE, *large_buf_size);
695 	}
696 }
697 
698 static inline int
_dlil_get_flowswitch_buffer_size(ifnet_t ifp,uuid_t netif,uint32_t * buf_size,bool * use_multi_buflet,uint32_t * large_buf_size)699 _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint32_t *buf_size,
700     bool *use_multi_buflet, uint32_t *large_buf_size)
701 {
702 	struct kern_pbufpool_memory_info rx_pp_info;
703 	struct kern_pbufpool_memory_info tx_pp_info;
704 	uint32_t if_max_mtu = 0;
705 	uint32_t drv_buf_size;
706 	struct ifdevmtu ifdm;
707 	int err;
708 
709 	/*
710 	 * To perform intra-stack RX aggregation flowswitch needs to use
711 	 * multi-buflet packet.
712 	 */
713 	*use_multi_buflet = NX_FSW_TCP_RX_AGG_ENABLED();
714 
715 	*large_buf_size = *use_multi_buflet ? NX_FSW_DEF_LARGE_BUFSIZE : 0;
716 	/*
717 	 * IP over Thunderbolt interface can deliver the largest IP packet,
718 	 * but the driver advertises the MAX MTU as only 9K.
719 	 */
720 	if (IFNET_IS_THUNDERBOLT_IP(ifp)) {
721 		if_max_mtu = IP_MAXPACKET;
722 		goto skip_mtu_ioctl;
723 	}
724 
725 	/* determine max mtu */
726 	bzero(&ifdm, sizeof(ifdm));
727 	err = dlil_siocgifdevmtu(ifp, &ifdm);
728 	if (__improbable(err != 0)) {
729 		DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n",
730 		    __func__, if_name(ifp));
731 		/* use default flowswitch buffer size */
732 		if_max_mtu = NX_FSW_BUFSIZE;
733 	} else {
734 		DLIL_PRINTF("%s: %s %d %d\n", __func__, if_name(ifp),
735 		    ifdm.ifdm_max, ifdm.ifdm_current);
736 		/* rdar://problem/44589731 */
737 		if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current);
738 	}
739 
740 skip_mtu_ioctl:
741 	if (if_max_mtu == 0) {
742 		DLIL_PRINTF("%s: can't determine MAX MTU for %s\n",
743 		    __func__, if_name(ifp));
744 		return EINVAL;
745 	}
746 	if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) {
747 		DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch "
748 		    "max bufsize(%d)\n", __func__,
749 		    if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE);
750 		return EINVAL;
751 	}
752 
753 	/*
754 	 * for skywalk native driver, consult the driver packet pool also.
755 	 */
756 	if (dlil_is_native_netif_nexus(ifp)) {
757 		err = kern_nexus_get_pbufpool_info(netif, &rx_pp_info,
758 		    &tx_pp_info);
759 		if (err != 0) {
760 			DLIL_PRINTF("%s: can't get pbufpool info for %s\n",
761 			    __func__, if_name(ifp));
762 			return ENXIO;
763 		}
764 		drv_buf_size = tx_pp_info.kpm_bufsize *
765 		    tx_pp_info.kpm_max_frags;
766 		if (if_max_mtu > drv_buf_size) {
767 			DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, "
768 			    "tx %d * %d) can't support max mtu(%d)\n", __func__,
769 			    if_name(ifp), rx_pp_info.kpm_bufsize,
770 			    rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize,
771 			    tx_pp_info.kpm_max_frags, if_max_mtu);
772 			return EINVAL;
773 		}
774 	} else {
775 		drv_buf_size = if_max_mtu;
776 	}
777 
778 	if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) {
779 		static_assert((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET);
780 		*use_multi_buflet = true;
781 		/* default flowswitch buffer size */
782 		*buf_size = NX_FSW_BUFSIZE;
783 		*large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, drv_buf_size);
784 	} else {
785 		*buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE);
786 	}
787 	_dlil_adjust_large_buf_size_for_tso(ifp, large_buf_size);
788 	ASSERT(*buf_size <= NX_FSW_MAXBUFSIZE);
789 	if (*buf_size >= *large_buf_size) {
790 		*large_buf_size = 0;
791 	}
792 	return 0;
793 }
794 
795 static boolean_t
_dlil_attach_flowswitch_nexus(ifnet_t ifp,if_nexus_flowswitch_t nexus_fsw)796 _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw)
797 {
798 	nexus_attr_t            __single attr = NULL;
799 	nexus_controller_t      controller;
800 	errno_t                 err = 0;
801 	uuid_t                  netif;
802 	uint32_t                buf_size = 0;
803 	uint32_t                large_buf_size = 0;
804 	bool                    multi_buflet;
805 
806 	if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) ||
807 	    IFNET_IS_VMNET(ifp)) {
808 		goto failed;
809 	}
810 
811 	if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) {
812 		/* not possible to attach (netif native/compat not plumbed) */
813 		goto failed;
814 	}
815 
816 	if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) {
817 		/* don't auto-attach */
818 		goto failed;
819 	}
820 
821 	/* get the netif instance from the ifp */
822 	err = kern_nexus_get_netif_instance(ifp, netif);
823 	if (err != 0) {
824 		DLIL_PRINTF("%s: can't find netif for %s\n", __func__,
825 		    if_name(ifp));
826 		goto failed;
827 	}
828 
829 	err = kern_nexus_attr_create(&attr);
830 	if (err != 0) {
831 		DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
832 		    if_name(ifp));
833 		goto failed;
834 	}
835 
836 	err = _dlil_get_flowswitch_buffer_size(ifp, netif, &buf_size,
837 	    &multi_buflet, &large_buf_size);
838 	if (err != 0) {
839 		goto failed;
840 	}
841 	ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE));
842 	ASSERT(large_buf_size <= NX_FSW_MAX_LARGE_BUFSIZE);
843 
844 	/* Configure flowswitch buffer size */
845 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, buf_size);
846 	VERIFY(err == 0);
847 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_LARGE_BUF_SIZE,
848 	    large_buf_size);
849 	VERIFY(err == 0);
850 
851 	/*
852 	 * Configure flowswitch to use super-packet (multi-buflet).
853 	 */
854 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
855 	    multi_buflet ? NX_PBUF_FRAGS_MAX : 1);
856 	VERIFY(err == 0);
857 
858 	/* create the flowswitch provider and instance */
859 	controller = kern_nexus_shared_controller();
860 	err = dlil_create_provider_and_instance(controller,
861 	    NEXUS_TYPE_FLOW_SWITCH, ifp, &nexus_fsw->if_fsw_provider,
862 	    &nexus_fsw->if_fsw_instance, attr);
863 	if (err != 0) {
864 		goto failed;
865 	}
866 
867 	/* attach the device port */
868 	err = kern_nexus_ifattach(controller, nexus_fsw->if_fsw_instance,
869 	    NULL, netif, FALSE, &nexus_fsw->if_fsw_device);
870 	if (err != 0) {
871 		DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n",
872 		    __func__, err, if_name(ifp));
873 		/* cleanup provider and instance */
874 		dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
875 		    nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
876 		goto failed;
877 	}
878 	return TRUE;
879 
880 failed:
881 	if (err != 0) {
882 		DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n",
883 		    __func__, if_name(ifp), err);
884 	} else {
885 		DLIL_PRINTF("%s: not attaching flowswitch to %s\n",
886 		    __func__, if_name(ifp));
887 	}
888 	if (attr != NULL) {
889 		kern_nexus_attr_destroy(attr);
890 	}
891 	return FALSE;
892 }
893 
894 static boolean_t
dlil_attach_flowswitch_nexus(ifnet_t ifp)895 dlil_attach_flowswitch_nexus(ifnet_t ifp)
896 {
897 	boolean_t               attached = FALSE;
898 	if_nexus_flowswitch     nexus_fsw;
899 
900 #if (DEVELOPMENT || DEBUG)
901 	if (skywalk_netif_direct_allowed(if_name(ifp))) {
902 		DLIL_PRINTF("skip attaching fsw to %s\n", if_name(ifp));
903 		return FALSE;
904 	}
905 #endif /* (DEVELOPMENT || DEBUG) */
906 
907 	/*
908 	 * flowswitch attachment is not supported for interface using the
909 	 * legacy model (IFNET_INIT_LEGACY)
910 	 */
911 	if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
912 		DLIL_PRINTF("skip attaching fsw to %s using legacy TX model\n",
913 		    if_name(ifp));
914 		return FALSE;
915 	}
916 	bzero(&nexus_fsw, sizeof(nexus_fsw));
917 
918 	/*
919 	 * A race can happen between a thread creating a flowswitch and another thread
920 	 * detaching the interface (also destroying the flowswitch).
921 	 *
922 	 * ifnet_datamov_begin() is used here to force dlil_quiesce_and_detach_nexuses()
923 	 * (called by another thread) to wait until this function finishes so the
924 	 * flowswitch can be cleaned up by dlil_detach_flowswitch_nexus().
925 	 *
926 	 * If ifnet_get_ioref() is used instead, dlil_quiesce_and_detach_nexuses()
927 	 * would not wait (because ifp->if_nx_flowswitch isn't assigned) and the
928 	 * created flowswitch would be left hanging and ifnet_detach_final() would never
929 	 * wakeup because the existence of the flowswitch prevents the ifnet's ioref
930 	 * from being released.
931 	 */
932 	if (!ifnet_datamov_begin(ifp)) {
933 		os_log(OS_LOG_DEFAULT, "%s: %s not attached",
934 		    __func__, ifp->if_xname);
935 		goto done;
936 	}
937 	if (uuid_is_null(ifp->if_nx_flowswitch.if_fsw_instance)) {
938 		attached = _dlil_attach_flowswitch_nexus(ifp, &nexus_fsw);
939 		if (attached) {
940 			ifnet_lock_exclusive(ifp);
941 			ifp->if_nx_flowswitch = nexus_fsw;
942 			ifnet_lock_done(ifp);
943 		}
944 	}
945 	ifnet_datamov_end(ifp);
946 
947 done:
948 	return attached;
949 }
950 
951 __attribute__((noinline))
952 static void
dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)953 dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)
954 {
955 	dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
956 	    nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
957 }
958 
959 __attribute__((noinline))
960 static void
dlil_netif_detach_notify(ifnet_t ifp)961 dlil_netif_detach_notify(ifnet_t ifp)
962 {
963 	ifnet_detach_notify_cb_t notify = NULL;
964 	void *__single arg = NULL;
965 
966 	ifnet_get_detach_notify(ifp, &notify, &arg);
967 	if (notify == NULL) {
968 		DTRACE_SKYWALK1(no__notify, ifnet_t, ifp);
969 		return;
970 	}
971 	(*notify)(arg);
972 }
973 
974 __attribute__((noinline))
975 static void
dlil_quiesce_and_detach_nexuses(ifnet_t ifp)976 dlil_quiesce_and_detach_nexuses(ifnet_t ifp)
977 {
978 	if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch;
979 	if_nexus_netif *nx_netif = &ifp->if_nx_netif;
980 
981 	ifnet_datamov_suspend_and_drain(ifp);
982 	if (!uuid_is_null(nx_fsw->if_fsw_device)) {
983 		ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider));
984 		ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance));
985 		dlil_detach_flowswitch_nexus(nx_fsw);
986 	} else {
987 		ASSERT(uuid_is_null(nx_fsw->if_fsw_provider));
988 		ASSERT(uuid_is_null(nx_fsw->if_fsw_instance));
989 		DTRACE_IP1(fsw__not__attached, ifnet_t, ifp);
990 	}
991 
992 	if (!uuid_is_null(nx_netif->if_nif_attach)) {
993 		ASSERT(!uuid_is_null(nx_netif->if_nif_provider));
994 		ASSERT(!uuid_is_null(nx_netif->if_nif_instance));
995 		dlil_detach_netif_nexus(nx_netif);
996 	} else {
997 		ASSERT(uuid_is_null(nx_netif->if_nif_provider));
998 		ASSERT(uuid_is_null(nx_netif->if_nif_instance));
999 		DTRACE_IP1(netif__not__attached, ifnet_t, ifp);
1000 	}
1001 	ifnet_datamov_resume(ifp);
1002 }
1003 
1004 boolean_t
ifnet_add_netagent(ifnet_t ifp)1005 ifnet_add_netagent(ifnet_t ifp)
1006 {
1007 	int     error;
1008 
1009 	error = kern_nexus_interface_add_netagent(ifp);
1010 	os_log(OS_LOG_DEFAULT,
1011 	    "kern_nexus_interface_add_netagent(%s) returned %d",
1012 	    ifp->if_xname, error);
1013 	return error == 0;
1014 }
1015 
1016 boolean_t
ifnet_remove_netagent(ifnet_t ifp)1017 ifnet_remove_netagent(ifnet_t ifp)
1018 {
1019 	int     error;
1020 
1021 	error = kern_nexus_interface_remove_netagent(ifp);
1022 	os_log(OS_LOG_DEFAULT,
1023 	    "kern_nexus_interface_remove_netagent(%s) returned %d",
1024 	    ifp->if_xname, error);
1025 	return error == 0;
1026 }
1027 
1028 boolean_t
ifnet_attach_flowswitch_nexus(ifnet_t ifp)1029 ifnet_attach_flowswitch_nexus(ifnet_t ifp)
1030 {
1031 	if (!ifnet_is_fully_attached(ifp)) {
1032 		return FALSE;
1033 	}
1034 	return dlil_attach_flowswitch_nexus(ifp);
1035 }
1036 
1037 boolean_t
ifnet_detach_flowswitch_nexus(ifnet_t ifp)1038 ifnet_detach_flowswitch_nexus(ifnet_t ifp)
1039 {
1040 	if_nexus_flowswitch     nexus_fsw;
1041 
1042 	ifnet_lock_exclusive(ifp);
1043 	nexus_fsw = ifp->if_nx_flowswitch;
1044 	bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
1045 	ifnet_lock_done(ifp);
1046 	return dlil_detach_nexus(__func__, nexus_fsw.if_fsw_provider,
1047 	           nexus_fsw.if_fsw_instance, nexus_fsw.if_fsw_device);
1048 }
1049 
1050 void
ifnet_attach_native_flowswitch(ifnet_t ifp)1051 ifnet_attach_native_flowswitch(ifnet_t ifp)
1052 {
1053 	if (!dlil_is_native_netif_nexus(ifp)) {
1054 		/* not a native netif */
1055 		return;
1056 	}
1057 	ifnet_attach_flowswitch_nexus(ifp);
1058 }
1059 
1060 int
ifnet_set_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t cb,void * arg)1061 ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg)
1062 {
1063 	lck_mtx_lock(&ifp->if_delegate_lock);
1064 	while (ifp->if_fsw_rx_cb_ref > 0) {
1065 		DTRACE_SKYWALK1(wait__fsw, ifnet_t, ifp);
1066 		(void) msleep(&ifp->if_fsw_rx_cb_ref, &ifp->if_delegate_lock,
1067 		    (PZERO + 1), __FUNCTION__, NULL);
1068 		DTRACE_SKYWALK1(wake__fsw, ifnet_t, ifp);
1069 	}
1070 	ifp->if_fsw_rx_cb = cb;
1071 	ifp->if_fsw_rx_cb_arg = arg;
1072 	lck_mtx_unlock(&ifp->if_delegate_lock);
1073 	return 0;
1074 }
1075 
1076 int
ifnet_get_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t * cbp,void ** argp)1077 ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp)
1078 {
1079 	/*
1080 	 * This is for avoiding the unnecessary lock acquire for interfaces
1081 	 * not used by a redirect interface.
1082 	 */
1083 	if (ifp->if_fsw_rx_cb == NULL) {
1084 		return ENOENT;
1085 	}
1086 	lck_mtx_lock(&ifp->if_delegate_lock);
1087 	if (ifp->if_fsw_rx_cb == NULL) {
1088 		lck_mtx_unlock(&ifp->if_delegate_lock);
1089 		return ENOENT;
1090 	}
1091 	*cbp = ifp->if_fsw_rx_cb;
1092 	*argp = ifp->if_fsw_rx_cb_arg;
1093 	ifp->if_fsw_rx_cb_ref++;
1094 	lck_mtx_unlock(&ifp->if_delegate_lock);
1095 	return 0;
1096 }
1097 
1098 void
ifnet_release_flowswitch_rx_callback(ifnet_t ifp)1099 ifnet_release_flowswitch_rx_callback(ifnet_t ifp)
1100 {
1101 	lck_mtx_lock(&ifp->if_delegate_lock);
1102 	if (--ifp->if_fsw_rx_cb_ref == 0) {
1103 		wakeup(&ifp->if_fsw_rx_cb_ref);
1104 	}
1105 	lck_mtx_unlock(&ifp->if_delegate_lock);
1106 }
1107 
1108 int
ifnet_set_delegate_parent(ifnet_t difp,ifnet_t parent)1109 ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent)
1110 {
1111 	lck_mtx_lock(&difp->if_delegate_lock);
1112 	while (difp->if_delegate_parent_ref > 0) {
1113 		DTRACE_SKYWALK1(wait__parent, ifnet_t, difp);
1114 		(void) msleep(&difp->if_delegate_parent_ref, &difp->if_delegate_lock,
1115 		    (PZERO + 1), __FUNCTION__, NULL);
1116 		DTRACE_SKYWALK1(wake__parent, ifnet_t, difp);
1117 	}
1118 	difp->if_delegate_parent = parent;
1119 	lck_mtx_unlock(&difp->if_delegate_lock);
1120 	return 0;
1121 }
1122 
1123 int
ifnet_get_delegate_parent(ifnet_t difp,ifnet_t * parentp)1124 ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parentp)
1125 {
1126 	lck_mtx_lock(&difp->if_delegate_lock);
1127 	if (difp->if_delegate_parent == NULL) {
1128 		lck_mtx_unlock(&difp->if_delegate_lock);
1129 		return ENOENT;
1130 	}
1131 	*parentp = difp->if_delegate_parent;
1132 	difp->if_delegate_parent_ref++;
1133 	lck_mtx_unlock(&difp->if_delegate_lock);
1134 	return 0;
1135 }
1136 
1137 void
ifnet_release_delegate_parent(ifnet_t difp)1138 ifnet_release_delegate_parent(ifnet_t difp)
1139 {
1140 	lck_mtx_lock(&difp->if_delegate_lock);
1141 	if (--difp->if_delegate_parent_ref == 0) {
1142 		wakeup(&difp->if_delegate_parent_ref);
1143 	}
1144 	lck_mtx_unlock(&difp->if_delegate_lock);
1145 }
1146 
1147 __attribute__((noinline))
1148 void
ifnet_set_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1149 ifnet_set_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1150 {
1151 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1152 	ifp->if_detach_notify = notify;
1153 	ifp->if_detach_notify_arg = arg;
1154 }
1155 
1156 __attribute__((noinline))
1157 void
ifnet_get_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1158 ifnet_get_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1159 {
1160 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1161 	*notifyp = ifp->if_detach_notify;
1162 	*argp = ifp->if_detach_notify_arg;
1163 }
1164 
1165 __attribute__((noinline))
1166 void
ifnet_set_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1167 ifnet_set_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1168 {
1169 	ifnet_lock_exclusive(ifp);
1170 	ifnet_set_detach_notify_locked(ifp, notify, arg);
1171 	ifnet_lock_done(ifp);
1172 }
1173 
1174 __attribute__((noinline))
1175 void
ifnet_get_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1176 ifnet_get_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1177 {
1178 	ifnet_lock_exclusive(ifp);
1179 	ifnet_get_detach_notify_locked(ifp, notifyp, argp);
1180 	ifnet_lock_done(ifp);
1181 }
1182 #endif /* SKYWALK */
1183 
1184 #define DLIL_INPUT_CHECK(m, ifp) {                                      \
1185 	ifnet_ref_t _rcvif = mbuf_pkthdr_rcvif(m);                      \
1186 	if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) ||       \
1187 	    !(mbuf_flags(m) & MBUF_PKTHDR)) {                           \
1188 	        panic_plain("%s: invalid mbuf %p\n", __func__, m);      \
1189 	/* NOTREACHED */                                        \
1190 	}                                                               \
1191 }
1192 
1193 #define MBPS    (1ULL * 1000 * 1000)
1194 #define GBPS    (MBPS * 1000)
1195 
1196 struct rxpoll_time_tbl {
1197 	u_int64_t       speed;          /* downlink speed */
1198 	u_int32_t       plowat;         /* packets low watermark */
1199 	u_int32_t       phiwat;         /* packets high watermark */
1200 	u_int32_t       blowat;         /* bytes low watermark */
1201 	u_int32_t       bhiwat;         /* bytes high watermark */
1202 };
1203 
1204 static struct rxpoll_time_tbl rxpoll_tbl[] = {
1205 	{ .speed =  10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024)    },
1206 	{ .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1207 	{ .speed =   1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1208 	{ .speed =  10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1209 	{ .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1210 	{ .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
1211 };
1212 
1213 int
proto_hash_value(u_int32_t protocol_family)1214 proto_hash_value(u_int32_t protocol_family)
1215 {
1216 	/*
1217 	 * dlil_proto_unplumb_all() depends on the mapping between
1218 	 * the hash bucket index and the protocol family defined
1219 	 * here; future changes must be applied there as well.
1220 	 */
1221 	switch (protocol_family) {
1222 	case PF_INET:
1223 		return 0;
1224 	case PF_INET6:
1225 		return 1;
1226 	case PF_VLAN:
1227 		return 2;
1228 	case PF_UNSPEC:
1229 	default:
1230 		return 3;
1231 	}
1232 }
1233 
1234 __private_extern__ int
dlil_post_msg(struct ifnet * ifp,u_int32_t event_subclass,u_int32_t event_code,struct net_event_data * event_data,u_int32_t event_data_len,boolean_t suppress_generation)1235 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1236     u_int32_t event_code, struct net_event_data *event_data,
1237     u_int32_t event_data_len, boolean_t suppress_generation)
1238 {
1239 	struct net_event_data ev_data;
1240 	struct kev_msg ev_msg;
1241 
1242 	bzero(&ev_msg, sizeof(ev_msg));
1243 	bzero(&ev_data, sizeof(ev_data));
1244 	/*
1245 	 * a net event always starts with a net_event_data structure
1246 	 * but the caller can generate a simple net event or
1247 	 * provide a longer event structure to post
1248 	 */
1249 	ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1250 	ev_msg.kev_class        = KEV_NETWORK_CLASS;
1251 	ev_msg.kev_subclass     = event_subclass;
1252 	ev_msg.event_code       = event_code;
1253 
1254 	if (event_data == NULL) {
1255 		event_data = &ev_data;
1256 		event_data_len = sizeof(struct net_event_data);
1257 	}
1258 
1259 	strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1260 	event_data->if_family = ifp->if_family;
1261 	event_data->if_unit   = (u_int32_t)ifp->if_unit;
1262 
1263 	ev_msg.dv[0].data_length = event_data_len;
1264 	ev_msg.dv[0].data_ptr    = event_data;
1265 	ev_msg.dv[1].data_length = 0;
1266 
1267 	bool update_generation = true;
1268 	if (event_subclass == KEV_DL_SUBCLASS) {
1269 		/* Don't update interface generation for frequent link quality and state changes  */
1270 		switch (event_code) {
1271 		case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1272 		case KEV_DL_RRC_STATE_CHANGED:
1273 		case KEV_DL_PRIMARY_ELECTED:
1274 			update_generation = false;
1275 			break;
1276 		default:
1277 			break;
1278 		}
1279 	}
1280 
1281 	/*
1282 	 * Some events that update generation counts might
1283 	 * want to suppress generation count.
1284 	 * One example is node presence/absence where we still
1285 	 * issue kernel event for the invocation but want to avoid
1286 	 * expensive operation of updating generation which triggers
1287 	 * NECP client updates.
1288 	 */
1289 	if (suppress_generation) {
1290 		update_generation = false;
1291 	}
1292 
1293 	return dlil_event_internal(ifp, &ev_msg, update_generation);
1294 }
1295 
1296 static void
dlil_reset_rxpoll_params(ifnet_t ifp)1297 dlil_reset_rxpoll_params(ifnet_t ifp)
1298 {
1299 	ASSERT(ifp != NULL);
1300 	ifnet_set_poll_cycle(ifp, NULL);
1301 	ifp->if_poll_update = 0;
1302 	ifp->if_poll_flags = 0;
1303 	ifp->if_poll_req = 0;
1304 	ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1305 	bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1306 	bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1307 	bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1308 	net_timerclear(&ifp->if_poll_mode_holdtime);
1309 	net_timerclear(&ifp->if_poll_mode_lasttime);
1310 	net_timerclear(&ifp->if_poll_sample_holdtime);
1311 	net_timerclear(&ifp->if_poll_sample_lasttime);
1312 	net_timerclear(&ifp->if_poll_dbg_lasttime);
1313 }
1314 
1315 
1316 #if SKYWALK
1317 static void
dlil_filter_event(struct eventhandler_entry_arg arg __unused,enum net_filter_event_subsystems state)1318 dlil_filter_event(struct eventhandler_entry_arg arg __unused,
1319     enum net_filter_event_subsystems state)
1320 {
1321 	evhlog(debug, "%s: eventhandler saw event type=net_filter_event_state event_code=0x%d",
1322 	    __func__, state);
1323 
1324 	bool old_if_enable_fsw_transport_netagent = if_enable_fsw_transport_netagent;
1325 	if ((state & ~NET_FILTER_EVENT_PF_PRIVATE_PROXY) == 0) {
1326 		if_enable_fsw_transport_netagent = 1;
1327 	} else {
1328 		if_enable_fsw_transport_netagent = 0;
1329 	}
1330 	if (old_if_enable_fsw_transport_netagent != if_enable_fsw_transport_netagent) {
1331 		kern_nexus_update_netagents();
1332 	} else if (!if_enable_fsw_transport_netagent) {
1333 		necp_update_all_clients();
1334 	}
1335 }
1336 #endif /* SKYWALK */
1337 
1338 void
dlil_init(void)1339 dlil_init(void)
1340 {
1341 	thread_t __single thread = THREAD_NULL;
1342 
1343 	dlil_main_input_thread = (struct dlil_threading_info *) &dlil_main_input_thread_info;
1344 
1345 	/*
1346 	 * The following fields must be 64-bit aligned for atomic operations.
1347 	 */
1348 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1349 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1350 	IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1351 	IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1352 	IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1353 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1354 	IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1355 	IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1356 	IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1357 	IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1358 	IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1359 	IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1360 	IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1361 	IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1362 	IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1363 
1364 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1365 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1366 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1367 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1368 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1369 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1370 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1371 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1372 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1373 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1374 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1375 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1376 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1377 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1378 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1379 
1380 	/*
1381 	 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1382 	 */
1383 	static_assert(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1384 	static_assert(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1385 	static_assert(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1386 	static_assert(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1387 	static_assert(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1388 	static_assert(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1389 	static_assert(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1390 	static_assert(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1391 	static_assert(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1392 	static_assert(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1393 	static_assert(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1394 	static_assert(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1395 	static_assert(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1396 	static_assert(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1397 
1398 	/*
1399 	 * ... as well as the mbuf checksum flags counterparts.
1400 	 */
1401 	static_assert(CSUM_IP == IF_HWASSIST_CSUM_IP);
1402 	static_assert(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1403 	static_assert(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1404 	static_assert(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1405 	static_assert(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1406 	static_assert(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1407 	static_assert(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1408 	static_assert(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1409 	static_assert(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1410 	static_assert(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1411 	static_assert(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1412 
1413 	/*
1414 	 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1415 	 */
1416 	static_assert(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1417 	static_assert(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1418 
1419 	static_assert(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1420 	static_assert(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1421 	static_assert(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1422 	static_assert(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1423 
1424 	static_assert(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1425 	static_assert(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1426 	static_assert(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1427 
1428 	static_assert(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1429 	static_assert(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1430 	static_assert(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1431 	static_assert(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1432 	static_assert(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1433 	static_assert(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1434 	static_assert(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1435 	static_assert(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1436 	static_assert(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1437 	static_assert(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1438 	static_assert(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1439 	static_assert(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1440 	static_assert(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1441 	static_assert(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1442 	static_assert(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1443 	static_assert(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1444 	static_assert(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1445 	static_assert(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1446 
1447 	static_assert(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1448 	static_assert(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1449 	static_assert(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1450 	static_assert(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1451 	static_assert(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1452 	static_assert(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1453 	static_assert(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1454 	static_assert(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1455 	static_assert(IFRTYPE_SUBFAMILY_VMNET == IFNET_SUBFAMILY_VMNET);
1456 	static_assert(IFRTYPE_SUBFAMILY_SIMCELL == IFNET_SUBFAMILY_SIMCELL);
1457 	static_assert(IFRTYPE_SUBFAMILY_MANAGEMENT == IFNET_SUBFAMILY_MANAGEMENT);
1458 
1459 	static_assert(DLIL_MODIDLEN == IFNET_MODIDLEN);
1460 	static_assert(DLIL_MODARGLEN == IFNET_MODARGLEN);
1461 
1462 	PE_parse_boot_argn("net_affinity", &net_affinity,
1463 	    sizeof(net_affinity));
1464 
1465 	PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1466 
1467 	PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1468 
1469 	PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1470 
1471 	PE_parse_boot_argn("if_link_heuristics", &if_link_heuristics_flags, sizeof(if_link_heuristics_flags));
1472 
1473 	VERIFY(dlil_pending_thread_cnt == 0);
1474 #if SKYWALK
1475 	boolean_t pe_enable_fsw_transport_netagent = FALSE;
1476 	boolean_t pe_disable_fsw_transport_netagent = FALSE;
1477 	boolean_t enable_fsw_netagent =
1478 	    (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) ||
1479 	    (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
1480 
1481 	/*
1482 	 * Check the device tree to see if Skywalk netagent has been explicitly
1483 	 * enabled or disabled.  This can be overridden via if_attach_nx below.
1484 	 * Note that the property is a 0-length key, and so checking for the
1485 	 * presence itself is enough (no need to check for the actual value of
1486 	 * the retrieved variable.)
1487 	 */
1488 	pe_enable_fsw_transport_netagent =
1489 	    PE_get_default("kern.skywalk_netagent_enable",
1490 	    &pe_enable_fsw_transport_netagent,
1491 	    sizeof(pe_enable_fsw_transport_netagent));
1492 	pe_disable_fsw_transport_netagent =
1493 	    PE_get_default("kern.skywalk_netagent_disable",
1494 	    &pe_disable_fsw_transport_netagent,
1495 	    sizeof(pe_disable_fsw_transport_netagent));
1496 
1497 	/*
1498 	 * These two are mutually exclusive, i.e. they both can be absent,
1499 	 * but only one can be present at a time, and so we assert to make
1500 	 * sure it is correct.
1501 	 */
1502 	VERIFY((!pe_enable_fsw_transport_netagent &&
1503 	    !pe_disable_fsw_transport_netagent) ||
1504 	    (pe_enable_fsw_transport_netagent ^
1505 	    pe_disable_fsw_transport_netagent));
1506 
1507 	if (pe_enable_fsw_transport_netagent) {
1508 		kprintf("SK: netagent is enabled via an override for "
1509 		    "this platform\n");
1510 		if_attach_nx = SKYWALK_NETWORKING_ENABLED;
1511 	} else if (pe_disable_fsw_transport_netagent) {
1512 		kprintf("SK: netagent is disabled via an override for "
1513 		    "this platform\n");
1514 		if_attach_nx = SKYWALK_NETWORKING_DISABLED;
1515 	} else {
1516 		kprintf("SK: netagent is %s by default for this platform\n",
1517 		    (enable_fsw_netagent ? "enabled" : "disabled"));
1518 		if_attach_nx = IF_ATTACH_NX_DEFAULT;
1519 	}
1520 
1521 	/*
1522 	 * Now see if there's a boot-arg override.
1523 	 */
1524 	(void) PE_parse_boot_argn("if_attach_nx", &if_attach_nx,
1525 	    sizeof(if_attach_nx));
1526 	if_enable_fsw_transport_netagent =
1527 	    ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
1528 
1529 	if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0);
1530 
1531 	if (pe_disable_fsw_transport_netagent &&
1532 	    if_enable_fsw_transport_netagent) {
1533 		kprintf("SK: netagent is force-enabled\n");
1534 	} else if (!pe_disable_fsw_transport_netagent &&
1535 	    !if_enable_fsw_transport_netagent) {
1536 		kprintf("SK: netagent is force-disabled\n");
1537 	}
1538 	if (kernel_is_macos_or_server() && if_enable_fsw_transport_netagent) {
1539 		net_filter_event_register(dlil_filter_event);
1540 	}
1541 
1542 #if (DEVELOPMENT || DEBUG)
1543 	(void) PE_parse_boot_argn("fsw_use_max_mtu_buffer",
1544 	    &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer));
1545 #endif /* (DEVELOPMENT || DEBUG) */
1546 
1547 #endif /* SKYWALK */
1548 
1549 	eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1550 
1551 	TAILQ_INIT(&dlil_ifnet_head);
1552 	TAILQ_INIT(&ifnet_head);
1553 	TAILQ_INIT(&ifnet_detaching_head);
1554 	TAILQ_INIT(&ifnet_ordered_head);
1555 
1556 	/* Initialize interface address subsystem */
1557 	ifa_init();
1558 
1559 #if PF
1560 	/* Initialize the packet filter */
1561 	pfinit();
1562 #endif /* PF */
1563 
1564 	/* Initialize queue algorithms */
1565 	classq_init();
1566 
1567 	/* Initialize packet schedulers */
1568 	pktsched_init();
1569 
1570 	/* Initialize flow advisory subsystem */
1571 	flowadv_init();
1572 
1573 	/* Initialize the pktap virtual interface */
1574 	pktap_init();
1575 
1576 	/* Initialize droptap interface */
1577 	droptap_init();
1578 
1579 	/* Initialize the service class to dscp map */
1580 	net_qos_map_init();
1581 
1582 	/* Initialize the interface low power mode event handler */
1583 	if_low_power_evhdlr_init();
1584 
1585 	/* Initialize the interface offload port list subsystem */
1586 	if_ports_used_init();
1587 
1588 #if DEBUG || DEVELOPMENT
1589 	/* Run self-tests */
1590 	dlil_verify_sum16();
1591 #endif /* DEBUG || DEVELOPMENT */
1592 
1593 	/*
1594 	 * Create and start up the main DLIL input thread and the interface
1595 	 * detacher threads once everything is initialized.
1596 	 */
1597 	dlil_incr_pending_thread_count();
1598 	(void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1599 
1600 	/*
1601 	 * Create ifnet detacher thread.
1602 	 * When an interface gets detached, part of the detach processing
1603 	 * is delayed. The interface is added to delayed detach list
1604 	 * and this thread is woken up to call ifnet_detach_final
1605 	 * on these interfaces.
1606 	 */
1607 	dlil_incr_pending_thread_count();
1608 	if (kernel_thread_start(ifnet_detacher_thread_func,
1609 	    NULL, &thread) != KERN_SUCCESS) {
1610 		panic_plain("%s: couldn't create detacher thread", __func__);
1611 		/* NOTREACHED */
1612 	}
1613 	thread_deallocate(thread);
1614 
1615 	/*
1616 	 * Wait for the created kernel threads for dlil to get
1617 	 * scheduled and run at least once before we proceed
1618 	 */
1619 	lck_mtx_lock(&dlil_thread_sync_lock);
1620 	while (dlil_pending_thread_cnt != 0) {
1621 		DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1622 		    "threads to get scheduled at least once.\n", __func__);
1623 		(void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1624 		    (PZERO - 1), __func__, NULL);
1625 		LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1626 	}
1627 	lck_mtx_unlock(&dlil_thread_sync_lock);
1628 	DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1629 	    "scheduled at least once. Proceeding.\n", __func__);
1630 }
1631 
1632 __private_extern__ int
dlil_attach_filter(struct ifnet * ifp,const struct iff_filter * if_filter,interface_filter_t * filter_ref,u_int32_t flags)1633 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1634     interface_filter_t *filter_ref, u_int32_t flags)
1635 {
1636 	int retval = 0;
1637 	struct ifnet_filter *filter = NULL;
1638 
1639 	ifnet_head_lock_shared();
1640 
1641 	/* Check that the interface is in the global list */
1642 	if (!ifnet_lookup(ifp)) {
1643 		retval = ENXIO;
1644 		goto done;
1645 	}
1646 	if (!ifnet_get_ioref(ifp)) {
1647 		os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
1648 		    __func__, if_name(ifp));
1649 		retval = ENXIO;
1650 		goto done;
1651 	}
1652 
1653 	filter = dlif_filt_alloc();
1654 	/* refcnt held above during lookup */
1655 	filter->filt_flags = flags;
1656 	filter->filt_ifp = ifp;
1657 	filter->filt_cookie = if_filter->iff_cookie;
1658 	filter->filt_name = if_filter->iff_name;
1659 	filter->filt_protocol = if_filter->iff_protocol;
1660 	/*
1661 	 * Do not install filter callbacks for internal coproc interface
1662 	 * and for management interfaces
1663 	 */
1664 	if (!IFNET_IS_INTCOPROC(ifp) && !IFNET_IS_MANAGEMENT(ifp)) {
1665 		filter->filt_input = if_filter->iff_input;
1666 		filter->filt_output = if_filter->iff_output;
1667 		filter->filt_event = if_filter->iff_event;
1668 		filter->filt_ioctl = if_filter->iff_ioctl;
1669 	}
1670 	filter->filt_detached = if_filter->iff_detached;
1671 
1672 	lck_mtx_lock(&ifp->if_flt_lock);
1673 	if_flt_monitor_enter(ifp);
1674 
1675 	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1676 	TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1677 
1678 	*filter_ref = filter;
1679 
1680 	/*
1681 	 * Bump filter count and route_generation ID to let TCP
1682 	 * know it shouldn't do TSO on this connection
1683 	 */
1684 	if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1685 		ifnet_filter_update_tso(ifp, TRUE);
1686 	}
1687 	OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1688 	INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1689 	if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1690 		OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_os_count);
1691 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1692 	} else {
1693 		OSAddAtomic(1, &ifp->if_flt_non_os_count);
1694 	}
1695 	if_flt_monitor_leave(ifp);
1696 	lck_mtx_unlock(&ifp->if_flt_lock);
1697 
1698 #if SKYWALK
1699 	if (kernel_is_macos_or_server()) {
1700 		net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1701 		    net_check_compatible_if_filter(NULL));
1702 	}
1703 #endif /* SKYWALK */
1704 
1705 	if (dlil_verbose) {
1706 		DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1707 		    if_filter->iff_name);
1708 	}
1709 	ifnet_decr_iorefcnt(ifp);
1710 
1711 done:
1712 	ifnet_head_done();
1713 	if (retval != 0 && ifp != NULL) {
1714 		DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1715 		    if_name(ifp), if_filter->iff_name, retval);
1716 	}
1717 	if (retval != 0 && filter != NULL) {
1718 		dlif_filt_free(filter);
1719 	}
1720 
1721 	return retval;
1722 }
1723 
1724 static int
dlil_detach_filter_internal(interface_filter_t filter,int detached)1725 dlil_detach_filter_internal(interface_filter_t  filter, int detached)
1726 {
1727 	int retval = 0;
1728 
1729 	if (detached == 0) {
1730 		ifnet_ref_t ifp = NULL;
1731 
1732 		ifnet_head_lock_shared();
1733 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1734 			interface_filter_t entry = NULL;
1735 
1736 			lck_mtx_lock(&ifp->if_flt_lock);
1737 			TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1738 				if (entry != filter || entry->filt_skip) {
1739 					continue;
1740 				}
1741 				/*
1742 				 * We've found a match; since it's possible
1743 				 * that the thread gets blocked in the monitor,
1744 				 * we do the lock dance.  Interface should
1745 				 * not be detached since we still have a use
1746 				 * count held during filter attach.
1747 				 */
1748 				entry->filt_skip = 1;   /* skip input/output */
1749 				lck_mtx_unlock(&ifp->if_flt_lock);
1750 				ifnet_head_done();
1751 
1752 				lck_mtx_lock(&ifp->if_flt_lock);
1753 				if_flt_monitor_enter(ifp);
1754 				LCK_MTX_ASSERT(&ifp->if_flt_lock,
1755 				    LCK_MTX_ASSERT_OWNED);
1756 
1757 				/* Remove the filter from the list */
1758 				TAILQ_REMOVE(&ifp->if_flt_head, filter,
1759 				    filt_next);
1760 
1761 				if (dlil_verbose) {
1762 					DLIL_PRINTF("%s: %s filter detached\n",
1763 					    if_name(ifp), filter->filt_name);
1764 				}
1765 				if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1766 					VERIFY(ifp->if_flt_non_os_count != 0);
1767 					OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1768 				}
1769 				/*
1770 				 * Decrease filter count and route_generation
1771 				 * ID to let TCP know it should reevalute doing
1772 				 * TSO or not.
1773 				 */
1774 				if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1775 					ifnet_filter_update_tso(ifp, FALSE);
1776 				}
1777 				/*
1778 				 * When we remove the bridge's interface filter,
1779 				 * clear the field in the ifnet.
1780 				 */
1781 				if ((filter->filt_flags & DLIL_IFF_BRIDGE)
1782 				    != 0) {
1783 					ifp->if_bridge = NULL;
1784 				}
1785 				if_flt_monitor_leave(ifp);
1786 				lck_mtx_unlock(&ifp->if_flt_lock);
1787 				goto destroy;
1788 			}
1789 			lck_mtx_unlock(&ifp->if_flt_lock);
1790 		}
1791 		ifnet_head_done();
1792 
1793 		/* filter parameter is not a valid filter ref */
1794 		retval = EINVAL;
1795 		goto done;
1796 	} else {
1797 		ifnet_ref_t ifp = filter->filt_ifp;
1798 		/*
1799 		 * Here we are called from ifnet_detach_final(); the
1800 		 * caller had emptied if_flt_head and we're doing an
1801 		 * implicit filter detach because the interface is
1802 		 * about to go away.  Make sure to adjust the counters
1803 		 * in this case.  We don't need the protection of the
1804 		 * filter monitor since we're called as part of the
1805 		 * final detach in the context of the detacher thread.
1806 		 */
1807 		if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1808 			VERIFY(ifp->if_flt_non_os_count != 0);
1809 			OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1810 		}
1811 		/*
1812 		 * Decrease filter count and route_generation
1813 		 * ID to let TCP know it should reevalute doing
1814 		 * TSO or not.
1815 		 */
1816 		if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1817 			ifnet_filter_update_tso(ifp, FALSE);
1818 		}
1819 	}
1820 
1821 	if (dlil_verbose) {
1822 		DLIL_PRINTF("%s filter detached\n", filter->filt_name);
1823 	}
1824 
1825 destroy:
1826 
1827 	/* Call the detached function if there is one */
1828 	if (filter->filt_detached) {
1829 		filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1830 	}
1831 
1832 	VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1833 	if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1834 		VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0);
1835 	}
1836 #if SKYWALK
1837 	if (kernel_is_macos_or_server()) {
1838 		net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1839 		    net_check_compatible_if_filter(NULL));
1840 	}
1841 #endif /* SKYWALK */
1842 
1843 	/* Free the filter */
1844 	dlif_filt_free(filter);
1845 	filter = NULL;
1846 done:
1847 	if (retval != 0 && filter != NULL) {
1848 		DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1849 		    filter->filt_name, retval);
1850 	}
1851 
1852 	return retval;
1853 }
1854 
1855 __private_extern__ void
dlil_detach_filter(interface_filter_t filter)1856 dlil_detach_filter(interface_filter_t filter)
1857 {
1858 	if (filter == NULL) {
1859 		return;
1860 	}
1861 	dlil_detach_filter_internal(filter, 0);
1862 }
1863 
1864 __private_extern__ boolean_t
dlil_has_ip_filter(void)1865 dlil_has_ip_filter(void)
1866 {
1867 	boolean_t has_filter = ((net_api_stats.nas_ipf_add_count - net_api_stats.nas_ipf_add_os_count) > 0);
1868 
1869 	VERIFY(net_api_stats.nas_ipf_add_count >= net_api_stats.nas_ipf_add_os_count);
1870 
1871 	DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
1872 	return has_filter;
1873 }
1874 
1875 __private_extern__ boolean_t
dlil_has_if_filter(struct ifnet * ifp)1876 dlil_has_if_filter(struct ifnet *ifp)
1877 {
1878 	boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
1879 	DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
1880 	return has_filter;
1881 }
1882 
1883 errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params * p)1884 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
1885 {
1886 	if (p != NULL) {
1887 		if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
1888 		    (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
1889 			return EINVAL;
1890 		}
1891 		if (p->packets_lowat != 0 &&    /* hiwat must be non-zero */
1892 		    p->packets_lowat >= p->packets_hiwat) {
1893 			return EINVAL;
1894 		}
1895 		if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
1896 		    (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
1897 			return EINVAL;
1898 		}
1899 		if (p->bytes_lowat != 0 &&      /* hiwat must be non-zero */
1900 		    p->bytes_lowat >= p->bytes_hiwat) {
1901 			return EINVAL;
1902 		}
1903 		if (p->interval_time != 0 &&
1904 		    p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
1905 			p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
1906 		}
1907 	}
1908 	return 0;
1909 }
1910 
1911 void
dlil_rxpoll_update_params(struct ifnet * ifp,struct ifnet_poll_params * p)1912 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
1913 {
1914 	u_int64_t sample_holdtime, inbw;
1915 
1916 	if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
1917 		sample_holdtime = 0;    /* polling is disabled */
1918 		ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
1919 		    ifp->if_rxpoll_blowat = 0;
1920 		ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
1921 		    ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
1922 		ifp->if_rxpoll_plim = 0;
1923 		ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
1924 	} else {
1925 		u_int32_t plowat, phiwat, blowat, bhiwat, plim;
1926 		u_int64_t ival;
1927 		unsigned int n, i;
1928 
1929 		for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
1930 			if (inbw < rxpoll_tbl[i].speed) {
1931 				break;
1932 			}
1933 			n = i;
1934 		}
1935 		/* auto-tune if caller didn't specify a value */
1936 		plowat = ((p == NULL || p->packets_lowat == 0) ?
1937 		    rxpoll_tbl[n].plowat : p->packets_lowat);
1938 		phiwat = ((p == NULL || p->packets_hiwat == 0) ?
1939 		    rxpoll_tbl[n].phiwat : p->packets_hiwat);
1940 		blowat = ((p == NULL || p->bytes_lowat == 0) ?
1941 		    rxpoll_tbl[n].blowat : p->bytes_lowat);
1942 		bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
1943 		    rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
1944 		plim = ((p == NULL || p->packets_limit == 0 ||
1945 		    if_rxpoll_max != 0) ?  if_rxpoll_max : p->packets_limit);
1946 		ival = ((p == NULL || p->interval_time == 0 ||
1947 		    if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ?
1948 		    if_rxpoll_interval_time : p->interval_time);
1949 
1950 		VERIFY(plowat != 0 && phiwat != 0);
1951 		VERIFY(blowat != 0 && bhiwat != 0);
1952 		VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
1953 
1954 		sample_holdtime = if_rxpoll_sample_holdtime;
1955 		ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
1956 		ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
1957 		ifp->if_rxpoll_plowat = plowat;
1958 		ifp->if_rxpoll_phiwat = phiwat;
1959 		ifp->if_rxpoll_blowat = blowat;
1960 		ifp->if_rxpoll_bhiwat = bhiwat;
1961 		ifp->if_rxpoll_plim = plim;
1962 		ifp->if_rxpoll_ival = ival;
1963 	}
1964 
1965 	net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
1966 	net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
1967 
1968 	if (dlil_verbose) {
1969 		DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
1970 		    "poll interval %llu nsec, pkts per poll %u, "
1971 		    "pkt limits [%u/%u], wreq limits [%u/%u], "
1972 		    "bytes limits [%u/%u]\n", if_name(ifp),
1973 		    inbw, sample_holdtime, ifp->if_rxpoll_ival,
1974 		    ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
1975 		    ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
1976 		    ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
1977 		    ifp->if_rxpoll_bhiwat);
1978 	}
1979 }
1980 
1981 /*
1982  * Must be called on an attached ifnet (caller is expected to check.)
1983  * Caller may pass NULL for poll parameters to indicate "auto-tuning."
1984  */
1985 errno_t
dlil_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)1986 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
1987     boolean_t locked)
1988 {
1989 	errno_t err;
1990 	struct dlil_threading_info *inp;
1991 
1992 	VERIFY(ifp != NULL);
1993 	if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
1994 		return ENXIO;
1995 	}
1996 	err = dlil_rxpoll_validate_params(p);
1997 	if (err != 0) {
1998 		return err;
1999 	}
2000 
2001 	if (!locked) {
2002 		lck_mtx_lock(&inp->dlth_lock);
2003 	}
2004 	LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2005 	/*
2006 	 * Normally, we'd reset the parameters to the auto-tuned values
2007 	 * if the the input thread detects a change in link rate.  If the
2008 	 * driver provides its own parameters right after a link rate
2009 	 * changes, but before the input thread gets to run, we want to
2010 	 * make sure to keep the driver's values.  Clearing if_poll_update
2011 	 * will achieve that.
2012 	 */
2013 	if (p != NULL && !locked && ifp->if_poll_update != 0) {
2014 		ifp->if_poll_update = 0;
2015 	}
2016 	dlil_rxpoll_update_params(ifp, p);
2017 	if (!locked) {
2018 		lck_mtx_unlock(&inp->dlth_lock);
2019 	}
2020 	return 0;
2021 }
2022 
2023 /*
2024  * Must be called on an attached ifnet (caller is expected to check.)
2025  */
2026 errno_t
dlil_rxpoll_get_params(struct ifnet * ifp,struct ifnet_poll_params * p)2027 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2028 {
2029 	struct dlil_threading_info *inp;
2030 
2031 	VERIFY(ifp != NULL && p != NULL);
2032 	if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2033 		return ENXIO;
2034 	}
2035 
2036 	bzero(p, sizeof(*p));
2037 
2038 	lck_mtx_lock(&inp->dlth_lock);
2039 	p->packets_limit = ifp->if_rxpoll_plim;
2040 	p->packets_lowat = ifp->if_rxpoll_plowat;
2041 	p->packets_hiwat = ifp->if_rxpoll_phiwat;
2042 	p->bytes_lowat = ifp->if_rxpoll_blowat;
2043 	p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2044 	p->interval_time = ifp->if_rxpoll_ival;
2045 	lck_mtx_unlock(&inp->dlth_lock);
2046 
2047 	return 0;
2048 }
2049 
2050 errno_t
ifnet_input(struct ifnet * ifp,struct mbuf * m_head,const struct ifnet_stat_increment_param * s)2051 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2052     const struct ifnet_stat_increment_param *s)
2053 {
2054 	return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2055 }
2056 
2057 errno_t
ifnet_input_extended(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2058 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2059     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2060 {
2061 	return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2062 }
2063 
2064 errno_t
ifnet_input_poll(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2065 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2066     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2067 {
2068 	return ifnet_input_common(ifp, m_head, m_tail, s,
2069 	           (m_head != NULL), TRUE);
2070 }
2071 
2072 static errno_t
ifnet_input_common(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t ext,boolean_t poll)2073 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2074     const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2075 {
2076 	dlil_input_func input_func;
2077 	struct ifnet_stat_increment_param _s;
2078 	u_int32_t m_cnt = 0, m_size = 0;
2079 	struct mbuf *last;
2080 	errno_t err = 0;
2081 
2082 	if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2083 		if (m_head != NULL) {
2084 			mbuf_freem_list(m_head);
2085 		}
2086 		return EINVAL;
2087 	}
2088 
2089 	VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2090 	VERIFY(m_tail == NULL || ext);
2091 	VERIFY(s != NULL || !ext);
2092 
2093 	/*
2094 	 * Drop the packet(s) if the parameters are invalid, or if the
2095 	 * interface is no longer attached; else hold an IO refcnt to
2096 	 * prevent it from being detached (will be released below.)
2097 	 */
2098 	if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2099 		if (m_head != NULL) {
2100 			mbuf_freem_list(m_head);
2101 		}
2102 		return EINVAL;
2103 	}
2104 
2105 	input_func = ifp->if_input_dlil;
2106 	VERIFY(input_func != NULL);
2107 
2108 	if (m_tail == NULL) {
2109 		last = m_head;
2110 		while (m_head != NULL) {
2111 			m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2112 #if IFNET_INPUT_SANITY_CHK
2113 			if (__improbable(dlil_input_sanity_check != 0)) {
2114 				DLIL_INPUT_CHECK(last, ifp);
2115 			}
2116 #endif /* IFNET_INPUT_SANITY_CHK */
2117 			m_cnt++;
2118 			m_size += m_length(last);
2119 			if (mbuf_nextpkt(last) == NULL) {
2120 				break;
2121 			}
2122 			last = mbuf_nextpkt(last);
2123 		}
2124 		m_tail = last;
2125 	} else {
2126 #if IFNET_INPUT_SANITY_CHK
2127 		if (__improbable(dlil_input_sanity_check != 0)) {
2128 			last = m_head;
2129 			while (1) {
2130 				m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2131 				DLIL_INPUT_CHECK(last, ifp);
2132 				m_cnt++;
2133 				m_size += m_length(last);
2134 				if (mbuf_nextpkt(last) == NULL) {
2135 					break;
2136 				}
2137 				last = mbuf_nextpkt(last);
2138 			}
2139 		} else {
2140 			m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2141 			m_cnt = s->packets_in;
2142 			m_size = s->bytes_in;
2143 			last = m_tail;
2144 		}
2145 #else
2146 		m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2147 		m_cnt = s->packets_in;
2148 		m_size = s->bytes_in;
2149 		last = m_tail;
2150 #endif /* IFNET_INPUT_SANITY_CHK */
2151 	}
2152 
2153 	if (last != m_tail) {
2154 		panic_plain("%s: invalid input packet chain for %s, "
2155 		    "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2156 		    m_tail, last);
2157 	}
2158 
2159 	/*
2160 	 * Assert packet count only for the extended variant, for backwards
2161 	 * compatibility, since this came directly from the device driver.
2162 	 * Relax this assertion for input bytes, as the driver may have
2163 	 * included the link-layer headers in the computation; hence
2164 	 * m_size is just an approximation.
2165 	 */
2166 	if (ext && s->packets_in != m_cnt) {
2167 		panic_plain("%s: input packet count mismatch for %s, "
2168 		    "%d instead of %d\n", __func__, if_name(ifp),
2169 		    s->packets_in, m_cnt);
2170 	}
2171 
2172 	if (s == NULL) {
2173 		bzero(&_s, sizeof(_s));
2174 		s = &_s;
2175 	} else {
2176 		_s = *s;
2177 	}
2178 	_s.packets_in = m_cnt;
2179 	_s.bytes_in = m_size;
2180 
2181 	if (ifp->if_xflags & IFXF_DISABLE_INPUT) {
2182 		m_freem_list(m_head);
2183 
2184 		os_atomic_add(&ifp->if_data.ifi_ipackets, _s.packets_in, relaxed);
2185 		os_atomic_add(&ifp->if_data.ifi_ibytes, _s.bytes_in, relaxed);
2186 
2187 		goto done;
2188 	}
2189 
2190 	err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2191 
2192 done:
2193 	if (ifp != lo_ifp) {
2194 		/* Release the IO refcnt */
2195 		ifnet_datamov_end(ifp);
2196 	}
2197 
2198 	return err;
2199 }
2200 
2201 
2202 static void
ifnet_start_common(struct ifnet * ifp,boolean_t resetfc,boolean_t ignore_delay)2203 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc, boolean_t ignore_delay)
2204 {
2205 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
2206 		return;
2207 	}
2208 	/*
2209 	 * If the starter thread is inactive, signal it to do work,
2210 	 * unless the interface is being flow controlled from below,
2211 	 * e.g. a virtual interface being flow controlled by a real
2212 	 * network interface beneath it, or it's been disabled via
2213 	 * a call to ifnet_disable_output().
2214 	 */
2215 	lck_mtx_lock_spin(&ifp->if_start_lock);
2216 	if (ignore_delay) {
2217 		ifp->if_start_flags |= IFSF_NO_DELAY;
2218 	}
2219 	if (resetfc) {
2220 		ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2221 	} else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2222 		lck_mtx_unlock(&ifp->if_start_lock);
2223 		return;
2224 	}
2225 	ifp->if_start_req++;
2226 	if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2227 	    (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2228 	    IFCQ_LEN(ifp->if_snd) >= ifp->if_start_delay_qlen ||
2229 	    ifp->if_start_delayed == 0)) {
2230 		(void) wakeup_one((caddr_t)&ifp->if_start_thread);
2231 	}
2232 	lck_mtx_unlock(&ifp->if_start_lock);
2233 }
2234 
2235 void
ifnet_start(struct ifnet * ifp)2236 ifnet_start(struct ifnet *ifp)
2237 {
2238 	ifnet_start_common(ifp, FALSE, FALSE);
2239 }
2240 
2241 void
ifnet_start_ignore_delay(struct ifnet * ifp)2242 ifnet_start_ignore_delay(struct ifnet *ifp)
2243 {
2244 	ifnet_start_common(ifp, FALSE, TRUE);
2245 }
2246 
2247 __attribute__((noreturn))
2248 static void
ifnet_start_thread_func(void * v,wait_result_t w)2249 ifnet_start_thread_func(void *v, wait_result_t w)
2250 {
2251 #pragma unused(w)
2252 	ifnet_ref_t ifp = v;
2253 	char thread_name[MAXTHREADNAMESIZE];
2254 
2255 	/* Construct the name for this thread, and then apply it. */
2256 	bzero(thread_name, sizeof(thread_name));
2257 	(void) snprintf(thread_name, sizeof(thread_name),
2258 	    "ifnet_start_%s", ifp->if_xname);
2259 #if SKYWALK
2260 	/* override name for native Skywalk interface */
2261 	if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
2262 		(void) snprintf(thread_name, sizeof(thread_name),
2263 		    "skywalk_doorbell_%s_tx", ifp->if_xname);
2264 	}
2265 #endif /* SKYWALK */
2266 	ASSERT(ifp->if_start_thread == current_thread());
2267 	thread_set_thread_name(current_thread(), __unsafe_null_terminated_from_indexable(thread_name));
2268 
2269 #if CONFIG_THREAD_GROUPS
2270 	if (IFNET_REQUIRES_CELL_GROUP(ifp)) {
2271 		thread_group_join_cellular();
2272 	}
2273 #endif
2274 
2275 	/*
2276 	 * Treat the dedicated starter thread for lo0 as equivalent to
2277 	 * the driver workloop thread; if net_affinity is enabled for
2278 	 * the main input thread, associate this starter thread to it
2279 	 * by binding them with the same affinity tag.  This is done
2280 	 * only once (as we only have one lo_ifp which never goes away.)
2281 	 */
2282 	if (ifp == lo_ifp) {
2283 		struct dlil_threading_info *inp = dlil_main_input_thread;
2284 		struct thread *__single tp = current_thread();
2285 #if SKYWALK
2286 		/* native skywalk loopback not yet implemented */
2287 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2288 #endif /* SKYWALK */
2289 
2290 		lck_mtx_lock(&inp->dlth_lock);
2291 		if (inp->dlth_affinity) {
2292 			u_int32_t tag = inp->dlth_affinity_tag;
2293 
2294 			VERIFY(inp->dlth_driver_thread == THREAD_NULL);
2295 			VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2296 			inp->dlth_driver_thread = tp;
2297 			lck_mtx_unlock(&inp->dlth_lock);
2298 
2299 			/* Associate this thread with the affinity tag */
2300 			(void) dlil_affinity_set(tp, tag);
2301 		} else {
2302 			lck_mtx_unlock(&inp->dlth_lock);
2303 		}
2304 	}
2305 
2306 	lck_mtx_lock(&ifp->if_start_lock);
2307 	VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
2308 	(void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
2309 	ifp->if_start_embryonic = 1;
2310 	/* wake up once to get out of embryonic state */
2311 	ifp->if_start_req++;
2312 	(void) wakeup_one((caddr_t)&ifp->if_start_thread);
2313 	lck_mtx_unlock(&ifp->if_start_lock);
2314 	(void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2315 	/* NOTREACHED */
2316 	__builtin_unreachable();
2317 }
2318 
2319 __attribute__((noreturn))
2320 static void
ifnet_start_thread_cont(void * v,wait_result_t wres)2321 ifnet_start_thread_cont(void *v, wait_result_t wres)
2322 {
2323 	ifnet_ref_t ifp = v;
2324 	struct ifclassq *ifq = ifp->if_snd;
2325 
2326 	lck_mtx_lock_spin(&ifp->if_start_lock);
2327 	if (__improbable(wres == THREAD_INTERRUPTED ||
2328 	    (ifp->if_start_flags & IFSF_TERMINATING) != 0)) {
2329 		goto terminate;
2330 	}
2331 
2332 	if (__improbable(ifp->if_start_embryonic)) {
2333 		ifp->if_start_embryonic = 0;
2334 		lck_mtx_unlock(&ifp->if_start_lock);
2335 		ifnet_decr_pending_thread_count(ifp);
2336 		lck_mtx_lock_spin(&ifp->if_start_lock);
2337 		goto skip;
2338 	}
2339 
2340 	ifp->if_start_active = 1;
2341 
2342 	/*
2343 	 * Keep on servicing until no more request.
2344 	 */
2345 	for (;;) {
2346 		u_int32_t req = ifp->if_start_req;
2347 		if ((ifp->if_start_flags & IFSF_NO_DELAY) == 0 &&
2348 		    !IFCQ_IS_EMPTY(ifq) &&
2349 		    (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2350 		    ifp->if_start_delayed == 0 &&
2351 		    IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2352 		    (ifp->if_eflags & IFEF_DELAY_START)) {
2353 			ifp->if_start_delayed = 1;
2354 			ifnet_start_delayed++;
2355 			break;
2356 		}
2357 		ifp->if_start_flags &= ~IFSF_NO_DELAY;
2358 		ifp->if_start_delayed = 0;
2359 		lck_mtx_unlock(&ifp->if_start_lock);
2360 
2361 		/*
2362 		 * If no longer attached, don't call start because ifp
2363 		 * is being destroyed; else hold an IO refcnt to
2364 		 * prevent the interface from being detached (will be
2365 		 * released below.)
2366 		 */
2367 		if (!ifnet_datamov_begin(ifp)) {
2368 			lck_mtx_lock_spin(&ifp->if_start_lock);
2369 			break;
2370 		}
2371 
2372 		/* invoke the driver's start routine */
2373 		((*ifp->if_start)(ifp));
2374 
2375 		/*
2376 		 * Release the io ref count taken above.
2377 		 */
2378 		ifnet_datamov_end(ifp);
2379 
2380 		lck_mtx_lock_spin(&ifp->if_start_lock);
2381 
2382 		/*
2383 		 * If there's no pending request or if the
2384 		 * interface has been disabled, we're done.
2385 		 */
2386 #define _IFSF_DISABLED  (IFSF_FLOW_CONTROLLED | IFSF_TERMINATING)
2387 		if (req == ifp->if_start_req ||
2388 		    (ifp->if_start_flags & _IFSF_DISABLED) != 0) {
2389 			break;
2390 		}
2391 	}
2392 skip:
2393 	ifp->if_start_req = 0;
2394 	ifp->if_start_active = 0;
2395 
2396 #if SKYWALK
2397 	/*
2398 	 * Wakeup any waiters, e.g. any threads waiting to
2399 	 * detach the interface from the flowswitch, etc.
2400 	 */
2401 	if (ifp->if_start_waiters != 0) {
2402 		ifp->if_start_waiters = 0;
2403 		wakeup(&ifp->if_start_waiters);
2404 	}
2405 #endif /* SKYWALK */
2406 	if (__probable((ifp->if_start_flags & IFSF_TERMINATING) == 0)) {
2407 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2408 		struct timespec delay_start_ts;
2409 		struct timespec *ts = NULL;
2410 
2411 		if (ts == NULL) {
2412 			ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2413 			    &ifp->if_start_cycle : NULL);
2414 		}
2415 
2416 		if (ts == NULL && ifp->if_start_delayed == 1) {
2417 			delay_start_ts.tv_sec = 0;
2418 			delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2419 			ts = &delay_start_ts;
2420 		}
2421 
2422 		if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
2423 			ts = NULL;
2424 		}
2425 
2426 		if (__improbable(ts != NULL)) {
2427 			clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2428 			    (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2429 		}
2430 
2431 		(void) assert_wait_deadline(&ifp->if_start_thread,
2432 		    THREAD_UNINT, deadline);
2433 		lck_mtx_unlock(&ifp->if_start_lock);
2434 		(void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2435 		/* NOTREACHED */
2436 	} else {
2437 terminate:
2438 		/* interface is detached? */
2439 		ifnet_set_start_cycle(ifp, NULL);
2440 
2441 		/* clear if_start_thread to allow termination to continue */
2442 		ASSERT(ifp->if_start_thread != THREAD_NULL);
2443 		ifp->if_start_thread = THREAD_NULL;
2444 		wakeup((caddr_t)&ifp->if_start_thread);
2445 		lck_mtx_unlock(&ifp->if_start_lock);
2446 
2447 		if (dlil_verbose) {
2448 			DLIL_PRINTF("%s: starter thread terminated\n",
2449 			    if_name(ifp));
2450 		}
2451 
2452 		/* for the extra refcnt from kernel_thread_start() */
2453 		thread_deallocate(current_thread());
2454 		/* this is the end */
2455 		thread_terminate(current_thread());
2456 		/* NOTREACHED */
2457 	}
2458 
2459 	/* must never get here */
2460 	VERIFY(0);
2461 	/* NOTREACHED */
2462 	__builtin_unreachable();
2463 }
2464 
2465 void
ifnet_set_start_cycle(struct ifnet * ifp,struct timespec * ts)2466 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2467 {
2468 	if (ts == NULL) {
2469 		bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
2470 	} else {
2471 		*(&ifp->if_start_cycle) = *ts;
2472 	}
2473 
2474 	if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2475 		DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
2476 		    if_name(ifp), ts->tv_nsec);
2477 	}
2478 }
2479 
2480 static inline void
ifnet_poll_wakeup(struct ifnet * ifp)2481 ifnet_poll_wakeup(struct ifnet *ifp)
2482 {
2483 	LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
2484 
2485 	ifp->if_poll_req++;
2486 	if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
2487 	    ifp->if_poll_thread != THREAD_NULL) {
2488 		wakeup_one((caddr_t)&ifp->if_poll_thread);
2489 	}
2490 }
2491 
2492 void
ifnet_poll(struct ifnet * ifp)2493 ifnet_poll(struct ifnet *ifp)
2494 {
2495 	/*
2496 	 * If the poller thread is inactive, signal it to do work.
2497 	 */
2498 	lck_mtx_lock_spin(&ifp->if_poll_lock);
2499 	ifnet_poll_wakeup(ifp);
2500 	lck_mtx_unlock(&ifp->if_poll_lock);
2501 }
2502 
2503 __attribute__((noreturn))
2504 static void
ifnet_poll_thread_func(void * v,wait_result_t w)2505 ifnet_poll_thread_func(void *v, wait_result_t w)
2506 {
2507 #pragma unused(w)
2508 	char thread_name[MAXTHREADNAMESIZE];
2509 	ifnet_ref_t ifp = v;
2510 
2511 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2512 	VERIFY(current_thread() == ifp->if_poll_thread);
2513 
2514 	/* construct the name for this thread, and then apply it */
2515 	bzero(thread_name, sizeof(thread_name));
2516 	(void) snprintf(thread_name, sizeof(thread_name),
2517 	    "ifnet_poller_%s", ifp->if_xname);
2518 	thread_set_thread_name(ifp->if_poll_thread, __unsafe_null_terminated_from_indexable(thread_name));
2519 
2520 	lck_mtx_lock(&ifp->if_poll_lock);
2521 	VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
2522 	(void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
2523 	ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
2524 	/* wake up once to get out of embryonic state */
2525 	ifnet_poll_wakeup(ifp);
2526 	lck_mtx_unlock(&ifp->if_poll_lock);
2527 	(void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2528 	/* NOTREACHED */
2529 	__builtin_unreachable();
2530 }
2531 
2532 __attribute__((noreturn))
2533 static void
ifnet_poll_thread_cont(void * v,wait_result_t wres)2534 ifnet_poll_thread_cont(void *v, wait_result_t wres)
2535 {
2536 	struct dlil_threading_info *inp;
2537 	ifnet_ref_t ifp = v;
2538 	struct ifnet_stat_increment_param s;
2539 	struct timespec start_time;
2540 
2541 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2542 
2543 	bzero(&s, sizeof(s));
2544 	net_timerclear(&start_time);
2545 
2546 	lck_mtx_lock_spin(&ifp->if_poll_lock);
2547 	if (__improbable(wres == THREAD_INTERRUPTED ||
2548 	    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
2549 		goto terminate;
2550 	}
2551 
2552 	inp = ifp->if_inp;
2553 	VERIFY(inp != NULL);
2554 
2555 	if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
2556 		ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
2557 		lck_mtx_unlock(&ifp->if_poll_lock);
2558 		ifnet_decr_pending_thread_count(ifp);
2559 		lck_mtx_lock_spin(&ifp->if_poll_lock);
2560 		goto skip;
2561 	}
2562 
2563 	ifp->if_poll_flags |= IF_POLLF_RUNNING;
2564 
2565 	/*
2566 	 * Keep on servicing until no more request.
2567 	 */
2568 	for (;;) {
2569 		mbuf_ref_t m_head, m_tail;
2570 		u_int32_t m_lim, m_cnt, m_totlen;
2571 		u_int16_t req = ifp->if_poll_req;
2572 
2573 		m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
2574 		    MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
2575 		lck_mtx_unlock(&ifp->if_poll_lock);
2576 
2577 		/*
2578 		 * If no longer attached, there's nothing to do;
2579 		 * else hold an IO refcnt to prevent the interface
2580 		 * from being detached (will be released below.)
2581 		 */
2582 		if (!ifnet_get_ioref(ifp)) {
2583 			lck_mtx_lock_spin(&ifp->if_poll_lock);
2584 			break;
2585 		}
2586 
2587 		if (dlil_verbose > 1) {
2588 			DLIL_PRINTF("%s: polling up to %d pkts, "
2589 			    "pkts avg %d max %d, wreq avg %d, "
2590 			    "bytes avg %d\n",
2591 			    if_name(ifp), m_lim,
2592 			    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2593 			    ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2594 		}
2595 
2596 		/* invoke the driver's input poll routine */
2597 		((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2598 		&m_cnt, &m_totlen));
2599 
2600 		if (m_head != NULL) {
2601 			VERIFY(m_tail != NULL && m_cnt > 0);
2602 
2603 			if (dlil_verbose > 1) {
2604 				DLIL_PRINTF("%s: polled %d pkts, "
2605 				    "pkts avg %d max %d, wreq avg %d, "
2606 				    "bytes avg %d\n",
2607 				    if_name(ifp), m_cnt,
2608 				    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2609 				    ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2610 			}
2611 
2612 			/* stats are required for extended variant */
2613 			s.packets_in = m_cnt;
2614 			s.bytes_in = m_totlen;
2615 
2616 			(void) ifnet_input_common(ifp, m_head, m_tail,
2617 			    &s, TRUE, TRUE);
2618 		} else {
2619 			if (dlil_verbose > 1) {
2620 				DLIL_PRINTF("%s: no packets, "
2621 				    "pkts avg %d max %d, wreq avg %d, "
2622 				    "bytes avg %d\n",
2623 				    if_name(ifp), ifp->if_rxpoll_pavg,
2624 				    ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
2625 				    ifp->if_rxpoll_bavg);
2626 			}
2627 
2628 			(void) ifnet_input_common(ifp, NULL, NULL,
2629 			    NULL, FALSE, TRUE);
2630 		}
2631 
2632 		/* Release the io ref count */
2633 		ifnet_decr_iorefcnt(ifp);
2634 
2635 		lck_mtx_lock_spin(&ifp->if_poll_lock);
2636 
2637 		/* if there's no pending request, we're done */
2638 		if (req == ifp->if_poll_req ||
2639 		    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0) {
2640 			break;
2641 		}
2642 	}
2643 skip:
2644 	ifp->if_poll_req = 0;
2645 	ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
2646 
2647 	if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
2648 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2649 		struct timespec *ts;
2650 
2651 		/*
2652 		 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2653 		 * until ifnet_poll() is called again.
2654 		 */
2655 		ts = &ifp->if_poll_cycle;
2656 		if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
2657 			ts = NULL;
2658 		}
2659 
2660 		if (ts != NULL) {
2661 			clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2662 			    (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2663 		}
2664 
2665 		(void) assert_wait_deadline(&ifp->if_poll_thread,
2666 		    THREAD_UNINT, deadline);
2667 		lck_mtx_unlock(&ifp->if_poll_lock);
2668 		(void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2669 		/* NOTREACHED */
2670 	} else {
2671 terminate:
2672 		/* interface is detached (maybe while asleep)? */
2673 		ifnet_set_poll_cycle(ifp, NULL);
2674 
2675 		/* clear if_poll_thread to allow termination to continue */
2676 		ASSERT(ifp->if_poll_thread != THREAD_NULL);
2677 		ifp->if_poll_thread = THREAD_NULL;
2678 		wakeup((caddr_t)&ifp->if_poll_thread);
2679 		lck_mtx_unlock(&ifp->if_poll_lock);
2680 
2681 		if (dlil_verbose) {
2682 			DLIL_PRINTF("%s: poller thread terminated\n",
2683 			    if_name(ifp));
2684 		}
2685 
2686 		/* for the extra refcnt from kernel_thread_start() */
2687 		thread_deallocate(current_thread());
2688 		/* this is the end */
2689 		thread_terminate(current_thread());
2690 		/* NOTREACHED */
2691 	}
2692 
2693 	/* must never get here */
2694 	VERIFY(0);
2695 	/* NOTREACHED */
2696 	__builtin_unreachable();
2697 }
2698 
2699 void
ifnet_set_poll_cycle(struct ifnet * ifp,struct timespec * ts)2700 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2701 {
2702 	if (ts == NULL) {
2703 		bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
2704 	} else {
2705 		*(&ifp->if_poll_cycle) = *ts;
2706 	}
2707 
2708 	if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2709 		DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
2710 		    if_name(ifp), ts->tv_nsec);
2711 	}
2712 }
2713 
2714 void
ifnet_purge(struct ifnet * ifp)2715 ifnet_purge(struct ifnet *ifp)
2716 {
2717 	if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
2718 		if_qflush(ifp, ifp->if_snd);
2719 	}
2720 }
2721 
2722 void
ifnet_update_rcv(struct ifnet * ifp,cqev_t ev)2723 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2724 {
2725 	switch (ev) {
2726 	case CLASSQ_EV_LINK_BANDWIDTH:
2727 		if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
2728 			ifp->if_poll_update++;
2729 		}
2730 		break;
2731 
2732 	default:
2733 		break;
2734 	}
2735 }
2736 
2737 errno_t
ifnet_set_output_sched_model(struct ifnet * ifp,u_int32_t model)2738 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2739 {
2740 	return ifclassq_change(ifp->if_snd, model);
2741 }
2742 
2743 errno_t
ifnet_set_sndq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2744 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2745 {
2746 	if (ifp == NULL) {
2747 		return EINVAL;
2748 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2749 		return ENXIO;
2750 	}
2751 
2752 	ifclassq_set_maxlen(ifp->if_snd, maxqlen);
2753 
2754 	return 0;
2755 }
2756 
2757 errno_t
ifnet_get_sndq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2758 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2759 {
2760 	if (ifp == NULL || maxqlen == NULL) {
2761 		return EINVAL;
2762 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2763 		return ENXIO;
2764 	}
2765 
2766 	*maxqlen = ifclassq_get_maxlen(ifp->if_snd);
2767 
2768 	return 0;
2769 }
2770 
2771 errno_t
ifnet_get_sndq_len(struct ifnet * ifp,u_int32_t * pkts)2772 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
2773 {
2774 	errno_t err;
2775 
2776 	if (ifp == NULL || pkts == NULL) {
2777 		err = EINVAL;
2778 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2779 		err = ENXIO;
2780 	} else {
2781 		err = ifclassq_get_len(ifp->if_snd, MBUF_SC_UNSPEC,
2782 		    IF_CLASSQ_ALL_GRPS, pkts, NULL);
2783 	}
2784 
2785 	return err;
2786 }
2787 
2788 errno_t
ifnet_get_service_class_sndq_len(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t * pkts,u_int32_t * bytes)2789 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2790     u_int32_t *pkts, u_int32_t *bytes)
2791 {
2792 	errno_t err;
2793 
2794 	if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2795 	    (pkts == NULL && bytes == NULL)) {
2796 		err = EINVAL;
2797 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2798 		err = ENXIO;
2799 	} else {
2800 		err = ifclassq_get_len(ifp->if_snd, sc, IF_CLASSQ_ALL_GRPS,
2801 		    pkts, bytes);
2802 	}
2803 
2804 	return err;
2805 }
2806 
2807 errno_t
ifnet_set_rcvq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2808 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2809 {
2810 	struct dlil_threading_info *inp;
2811 
2812 	if (ifp == NULL) {
2813 		return EINVAL;
2814 	} else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2815 		return ENXIO;
2816 	}
2817 
2818 	if (maxqlen == 0) {
2819 		maxqlen = if_rcvq_maxlen;
2820 	} else if (maxqlen < IF_RCVQ_MINLEN) {
2821 		maxqlen = IF_RCVQ_MINLEN;
2822 	}
2823 
2824 	inp = ifp->if_inp;
2825 	lck_mtx_lock(&inp->dlth_lock);
2826 	qlimit(&inp->dlth_pkts) = maxqlen;
2827 	lck_mtx_unlock(&inp->dlth_lock);
2828 
2829 	return 0;
2830 }
2831 
2832 errno_t
ifnet_get_rcvq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2833 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2834 {
2835 	struct dlil_threading_info *inp;
2836 
2837 	if (ifp == NULL || maxqlen == NULL) {
2838 		return EINVAL;
2839 	} else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2840 		return ENXIO;
2841 	}
2842 
2843 	inp = ifp->if_inp;
2844 	lck_mtx_lock(&inp->dlth_lock);
2845 	*maxqlen = qlimit(&inp->dlth_pkts);
2846 	lck_mtx_unlock(&inp->dlth_lock);
2847 	return 0;
2848 }
2849 
2850 void
ifnet_enqueue_multi_setup(struct ifnet * ifp,uint16_t delay_qlen,uint16_t delay_timeout)2851 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
2852     uint16_t delay_timeout)
2853 {
2854 	if (delay_qlen > 0 && delay_timeout > 0) {
2855 		if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
2856 		ifp->if_start_delay_qlen = MIN(100, delay_qlen);
2857 		ifp->if_start_delay_timeout = min(20000, delay_timeout);
2858 		/* convert timeout to nanoseconds */
2859 		ifp->if_start_delay_timeout *= 1000;
2860 		kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
2861 		    ifp->if_xname, (uint32_t)delay_qlen,
2862 		    (uint32_t)delay_timeout);
2863 	} else {
2864 		if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
2865 	}
2866 }
2867 
2868 /*
2869  * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
2870  * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
2871  * buf holds the full header.
2872  */
2873 static __attribute__((noinline)) void
ifnet_mcast_clear_dscp(uint8_t * __indexable buf,uint8_t ip_ver)2874 ifnet_mcast_clear_dscp(uint8_t *__indexable buf, uint8_t ip_ver)
2875 {
2876 	struct ip *ip;
2877 	struct ip6_hdr *ip6;
2878 	uint8_t lbuf[64] __attribute__((aligned(8)));
2879 	uint8_t *p = buf;
2880 
2881 	if (ip_ver == IPVERSION) {
2882 		uint8_t old_tos;
2883 		uint32_t sum;
2884 
2885 		if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2886 			DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
2887 			bcopy(buf, lbuf, sizeof(struct ip));
2888 			p = lbuf;
2889 		}
2890 		ip = (struct ip *)(void *)p;
2891 		if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
2892 			return;
2893 		}
2894 
2895 		DTRACE_IP1(clear__v4, struct ip *, ip);
2896 		old_tos = ip->ip_tos;
2897 		ip->ip_tos &= IPTOS_ECN_MASK;
2898 		sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
2899 		sum = (sum >> 16) + (sum & 0xffff);
2900 		ip->ip_sum = (uint16_t)(sum & 0xffff);
2901 
2902 		if (__improbable(p == lbuf)) {
2903 			bcopy(lbuf, buf, sizeof(struct ip));
2904 		}
2905 	} else {
2906 		uint32_t flow;
2907 		ASSERT(ip_ver == IPV6_VERSION);
2908 
2909 		if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2910 			DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
2911 			bcopy(buf, lbuf, sizeof(struct ip6_hdr));
2912 			p = lbuf;
2913 		}
2914 		ip6 = (struct ip6_hdr *)(void *)p;
2915 		flow = ntohl(ip6->ip6_flow);
2916 		if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
2917 			return;
2918 		}
2919 
2920 		DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
2921 		ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
2922 
2923 		if (__improbable(p == lbuf)) {
2924 			bcopy(lbuf, buf, sizeof(struct ip6_hdr));
2925 		}
2926 	}
2927 }
2928 
2929 static inline errno_t
ifnet_enqueue_single(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * p,boolean_t flush,boolean_t * pdrop)2930 ifnet_enqueue_single(struct ifnet *ifp, struct ifclassq *ifcq,
2931     classq_pkt_t *p, boolean_t flush, boolean_t *pdrop)
2932 {
2933 #if SKYWALK
2934 	volatile struct sk_nexusadv *nxadv = NULL;
2935 #endif /* SKYWALK */
2936 	volatile uint64_t *fg_ts = NULL;
2937 	volatile uint64_t *rt_ts = NULL;
2938 	struct timespec now;
2939 	u_int64_t now_nsec = 0;
2940 	int error = 0;
2941 	uint8_t *mcast_buf = NULL;
2942 	uint8_t ip_ver;
2943 	uint32_t pktlen;
2944 
2945 	ASSERT(ifp->if_eflags & IFEF_TXSTART);
2946 #if SKYWALK
2947 	/*
2948 	 * If attached to flowswitch, grab pointers to the
2949 	 * timestamp variables in the nexus advisory region.
2950 	 */
2951 	if ((ifp->if_capabilities & IFCAP_SKYWALK) && ifp->if_na != NULL &&
2952 	    (nxadv = ifp->if_na->nifna_netif->nif_fsw_nxadv) != NULL) {
2953 		fg_ts = &nxadv->nxadv_fg_sendts;
2954 		rt_ts = &nxadv->nxadv_rt_sendts;
2955 	}
2956 #endif /* SKYWALK */
2957 
2958 	/*
2959 	 * If packet already carries a timestamp, either from dlil_output()
2960 	 * or from flowswitch, use it here.  Otherwise, record timestamp.
2961 	 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
2962 	 * the timestamp value is used internally there.
2963 	 */
2964 	switch (p->cp_ptype) {
2965 	case QP_MBUF:
2966 #if SKYWALK
2967 		/*
2968 		 * Valid only for non-native (compat) Skywalk interface.
2969 		 * If the data source uses packet, caller must convert
2970 		 * it to mbuf first prior to calling this routine.
2971 		 */
2972 		ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2973 #endif /* SKYWALK */
2974 		ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
2975 		ASSERT(p->cp_mbuf->m_nextpkt == NULL);
2976 
2977 		if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
2978 		    p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
2979 			nanouptime(&now);
2980 			net_timernsec(&now, &now_nsec);
2981 			p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
2982 		}
2983 		p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
2984 		/*
2985 		 * If the packet service class is not background,
2986 		 * update the timestamp to indicate recent activity
2987 		 * on a foreground socket.
2988 		 */
2989 		if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
2990 		    p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2991 			if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
2992 			    PKTF_SO_BACKGROUND)) {
2993 				ifp->if_fg_sendts = (uint32_t)net_uptime();
2994 				if (fg_ts != NULL) {
2995 					*fg_ts = (uint32_t)net_uptime();
2996 				}
2997 			}
2998 			if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
2999 				ifp->if_rt_sendts = (uint32_t)net_uptime();
3000 				if (rt_ts != NULL) {
3001 					*rt_ts = (uint32_t)net_uptime();
3002 				}
3003 			}
3004 		}
3005 		pktlen = m_pktlen(p->cp_mbuf);
3006 
3007 		/*
3008 		 * Some Wi-Fi AP implementations do not correctly handle
3009 		 * multicast IP packets with DSCP bits set (radr://9331522).
3010 		 * As a workaround we clear the DSCP bits but keep service
3011 		 * class (rdar://51507725).
3012 		 */
3013 		if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3014 		    IFNET_IS_WIFI_INFRA(ifp)) {
3015 			size_t len = mbuf_len(p->cp_mbuf), hlen;
3016 			struct ether_header *eh;
3017 			boolean_t pullup = FALSE;
3018 			uint16_t etype;
3019 
3020 			if (__improbable(len < sizeof(struct ether_header))) {
3021 				DTRACE_IP1(small__ether, size_t, len);
3022 				if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3023 				    sizeof(struct ether_header))) == NULL) {
3024 					return ENOMEM;
3025 				}
3026 			}
3027 			eh = mtod(p->cp_mbuf, struct ether_header *);
3028 			etype = ntohs(eh->ether_type);
3029 			if (etype == ETHERTYPE_IP) {
3030 				hlen = sizeof(struct ether_header) +
3031 				    sizeof(struct ip);
3032 				if (len < hlen) {
3033 					DTRACE_IP1(small__v4, size_t, len);
3034 					pullup = TRUE;
3035 				}
3036 				ip_ver = IPVERSION;
3037 			} else if (etype == ETHERTYPE_IPV6) {
3038 				hlen = sizeof(struct ether_header) +
3039 				    sizeof(struct ip6_hdr);
3040 				if (len < hlen) {
3041 					DTRACE_IP1(small__v6, size_t, len);
3042 					pullup = TRUE;
3043 				}
3044 				ip_ver = IPV6_VERSION;
3045 			} else {
3046 				DTRACE_IP1(invalid__etype, uint16_t, etype);
3047 				break;
3048 			}
3049 			if (pullup) {
3050 				if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3051 				    NULL) {
3052 					return ENOMEM;
3053 				}
3054 
3055 				eh = mtod(p->cp_mbuf, struct ether_header *);
3056 			}
3057 			mcast_buf = (uint8_t *)(eh + 1);
3058 			/*
3059 			 * ifnet_mcast_clear_dscp() will finish the work below.
3060 			 * Note that the pullups above ensure that mcast_buf
3061 			 * points to a full IP header.
3062 			 */
3063 		}
3064 		break;
3065 
3066 #if SKYWALK
3067 	case QP_PACKET:
3068 		/*
3069 		 * Valid only for native Skywalk interface.  If the data
3070 		 * source uses mbuf, caller must convert it to packet first
3071 		 * prior to calling this routine.
3072 		 */
3073 		ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
3074 		if (!(p->cp_kpkt->pkt_pflags & PKT_F_TS_VALID) ||
3075 		    p->cp_kpkt->pkt_timestamp == 0) {
3076 			nanouptime(&now);
3077 			net_timernsec(&now, &now_nsec);
3078 			p->cp_kpkt->pkt_timestamp = now_nsec;
3079 		}
3080 		p->cp_kpkt->pkt_pflags &= ~PKT_F_TS_VALID;
3081 		/*
3082 		 * If the packet service class is not background,
3083 		 * update the timestamps on the interface, as well as
3084 		 * the ones in nexus-wide advisory to indicate recent
3085 		 * activity on a foreground flow.
3086 		 */
3087 		if (!(p->cp_kpkt->pkt_pflags & PKT_F_BACKGROUND)) {
3088 			ifp->if_fg_sendts = (uint32_t)net_uptime();
3089 			if (fg_ts != NULL) {
3090 				*fg_ts = (uint32_t)net_uptime();
3091 			}
3092 		}
3093 		if (p->cp_kpkt->pkt_pflags & PKT_F_REALTIME) {
3094 			ifp->if_rt_sendts = (uint32_t)net_uptime();
3095 			if (rt_ts != NULL) {
3096 				*rt_ts = (uint32_t)net_uptime();
3097 			}
3098 		}
3099 		pktlen = p->cp_kpkt->pkt_length;
3100 
3101 		/*
3102 		 * Some Wi-Fi AP implementations do not correctly handle
3103 		 * multicast IP packets with DSCP bits set (radr://9331522).
3104 		 * As a workaround we clear the DSCP bits but keep service
3105 		 * class (rdar://51507725).
3106 		 */
3107 		if ((p->cp_kpkt->pkt_link_flags & PKT_LINKF_MCAST) != 0 &&
3108 		    IFNET_IS_WIFI_INFRA(ifp)) {
3109 			uint8_t *baddr;
3110 			struct ether_header *eh;
3111 			uint16_t etype;
3112 
3113 			MD_BUFLET_ADDR_ABS(p->cp_kpkt, baddr);
3114 			baddr += p->cp_kpkt->pkt_headroom;
3115 			if (__improbable(pktlen < sizeof(struct ether_header))) {
3116 				DTRACE_IP1(pkt__small__ether, __kern_packet *,
3117 				    p->cp_kpkt);
3118 				break;
3119 			}
3120 			eh = (struct ether_header *)(void *)baddr;
3121 			etype = ntohs(eh->ether_type);
3122 			if (etype == ETHERTYPE_IP) {
3123 				if (pktlen < sizeof(struct ether_header) +
3124 				    sizeof(struct ip)) {
3125 					DTRACE_IP1(pkt__small__v4, uint32_t,
3126 					    pktlen);
3127 					break;
3128 				}
3129 				ip_ver = IPVERSION;
3130 			} else if (etype == ETHERTYPE_IPV6) {
3131 				if (pktlen < sizeof(struct ether_header) +
3132 				    sizeof(struct ip6_hdr)) {
3133 					DTRACE_IP1(pkt__small__v6, uint32_t,
3134 					    pktlen);
3135 					break;
3136 				}
3137 				ip_ver = IPV6_VERSION;
3138 			} else {
3139 				DTRACE_IP1(pkt__invalid__etype, uint16_t,
3140 				    etype);
3141 				break;
3142 			}
3143 			mcast_buf = (uint8_t *)(eh + 1);
3144 			/*
3145 			 * ifnet_mcast_clear_dscp() will finish the work below.
3146 			 * The checks above verify that the IP header is in the
3147 			 * first buflet.
3148 			 */
3149 		}
3150 		break;
3151 #endif /* SKYWALK */
3152 
3153 	default:
3154 		VERIFY(0);
3155 		/* NOTREACHED */
3156 		__builtin_unreachable();
3157 	}
3158 
3159 	if (mcast_buf != NULL) {
3160 		ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
3161 	}
3162 
3163 	if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3164 		if (now_nsec == 0) {
3165 			nanouptime(&now);
3166 			net_timernsec(&now, &now_nsec);
3167 		}
3168 		/*
3169 		 * If the driver chose to delay start callback for
3170 		 * coalescing multiple packets, Then use the following
3171 		 * heuristics to make sure that start callback will
3172 		 * be delayed only when bulk data transfer is detected.
3173 		 * 1. number of packets enqueued in (delay_win * 2) is
3174 		 * greater than or equal to the delay qlen.
3175 		 * 2. If delay_start is enabled it will stay enabled for
3176 		 * another 10 idle windows. This is to take into account
3177 		 * variable RTT and burst traffic.
3178 		 * 3. If the time elapsed since last enqueue is more
3179 		 * than 200ms we disable delaying start callback. This is
3180 		 * is to take idle time into account.
3181 		 */
3182 		u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3183 		if (ifp->if_start_delay_swin > 0) {
3184 			if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3185 				ifp->if_start_delay_cnt++;
3186 			} else if ((now_nsec - ifp->if_start_delay_swin)
3187 			    >= (200 * 1000 * 1000)) {
3188 				ifp->if_start_delay_swin = now_nsec;
3189 				ifp->if_start_delay_cnt = 1;
3190 				ifp->if_start_delay_idle = 0;
3191 				if (ifp->if_eflags & IFEF_DELAY_START) {
3192 					if_clear_eflags(ifp, IFEF_DELAY_START);
3193 					ifnet_delay_start_disabled_increment();
3194 				}
3195 			} else {
3196 				if (ifp->if_start_delay_cnt >=
3197 				    ifp->if_start_delay_qlen) {
3198 					if_set_eflags(ifp, IFEF_DELAY_START);
3199 					ifp->if_start_delay_idle = 0;
3200 				} else {
3201 					if (ifp->if_start_delay_idle >= 10) {
3202 						if_clear_eflags(ifp,
3203 						    IFEF_DELAY_START);
3204 						ifnet_delay_start_disabled_increment();
3205 					} else {
3206 						ifp->if_start_delay_idle++;
3207 					}
3208 				}
3209 				ifp->if_start_delay_swin = now_nsec;
3210 				ifp->if_start_delay_cnt = 1;
3211 			}
3212 		} else {
3213 			ifp->if_start_delay_swin = now_nsec;
3214 			ifp->if_start_delay_cnt = 1;
3215 			ifp->if_start_delay_idle = 0;
3216 			if_clear_eflags(ifp, IFEF_DELAY_START);
3217 		}
3218 	} else {
3219 		if_clear_eflags(ifp, IFEF_DELAY_START);
3220 	}
3221 
3222 	/* enqueue the packet (caller consumes object) */
3223 	error = ifclassq_enqueue(((ifcq != NULL) ? ifcq : ifp->if_snd), p, p,
3224 	    1, pktlen, pdrop);
3225 
3226 	/*
3227 	 * Tell the driver to start dequeueing; do this even when the queue
3228 	 * for the packet is suspended (EQSUSPENDED), as the driver could still
3229 	 * be dequeueing from other unsuspended queues.
3230 	 */
3231 	if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3232 	    ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3233 		ifnet_start(ifp);
3234 	}
3235 
3236 	return error;
3237 }
3238 
3239 static inline errno_t
ifnet_enqueue_chain(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3240 ifnet_enqueue_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3241     classq_pkt_t *head, classq_pkt_t *tail, uint32_t cnt, uint32_t bytes,
3242     boolean_t flush, boolean_t *pdrop)
3243 {
3244 	int error;
3245 
3246 	/* enqueue the packet (caller consumes object) */
3247 	error = ifclassq_enqueue(ifcq != NULL ? ifcq : ifp->if_snd, head, tail,
3248 	    cnt, bytes, pdrop);
3249 
3250 	/*
3251 	 * Tell the driver to start dequeueing; do this even when the queue
3252 	 * for the packet is suspended (EQSUSPENDED), as the driver could still
3253 	 * be dequeueing from other unsuspended queues.
3254 	 */
3255 	if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
3256 		ifnet_start(ifp);
3257 	}
3258 	return error;
3259 }
3260 
3261 int
ifnet_enqueue_netem(void * handle,pktsched_pkt_t * __sized_by (n_pkts)pkts,uint32_t n_pkts)3262 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *__sized_by(n_pkts)pkts, uint32_t n_pkts)
3263 {
3264 	ifnet_ref_t ifp = handle;
3265 	boolean_t pdrop;        /* dummy */
3266 	uint32_t i;
3267 
3268 	ASSERT(n_pkts >= 1);
3269 	for (i = 0; i < n_pkts - 1; i++) {
3270 		(void) ifnet_enqueue_single(ifp, ifp->if_snd, &pkts[i].pktsched_pkt,
3271 		    FALSE, &pdrop);
3272 	}
3273 	/* flush with the last packet */
3274 	(void) ifnet_enqueue_single(ifp, ifp->if_snd, &pkts[i].pktsched_pkt,
3275 	    TRUE, &pdrop);
3276 
3277 	return 0;
3278 }
3279 
3280 static inline errno_t
ifnet_enqueue_common_single(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * pkt,boolean_t flush,boolean_t * pdrop)3281 ifnet_enqueue_common_single(struct ifnet *ifp, struct ifclassq *ifcq,
3282     classq_pkt_t *pkt, boolean_t flush, boolean_t *pdrop)
3283 {
3284 	if (ifp->if_output_netem != NULL) {
3285 		bool drop;
3286 		errno_t error;
3287 		error = netem_enqueue(ifp->if_output_netem, pkt, &drop);
3288 		*pdrop = drop ? TRUE : FALSE;
3289 		return error;
3290 	} else {
3291 		return ifnet_enqueue_single(ifp, ifcq, pkt, flush, pdrop);
3292 	}
3293 }
3294 
3295 errno_t
ifnet_enqueue(struct ifnet * ifp,struct mbuf * m)3296 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3297 {
3298 	uint32_t bytes = m_pktlen(m);
3299 	struct mbuf *tail = m;
3300 	uint32_t cnt = 1;
3301 	boolean_t pdrop;
3302 
3303 	while (tail->m_nextpkt) {
3304 		VERIFY(tail->m_flags & M_PKTHDR);
3305 		tail = tail->m_nextpkt;
3306 		cnt++;
3307 		bytes += m_pktlen(tail);
3308 	}
3309 
3310 	return ifnet_enqueue_mbuf_chain(ifp, m, tail, cnt, bytes, TRUE, &pdrop);
3311 }
3312 
3313 errno_t
ifnet_enqueue_mbuf(struct ifnet * ifp,struct mbuf * m,boolean_t flush,boolean_t * pdrop)3314 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3315     boolean_t *pdrop)
3316 {
3317 	classq_pkt_t pkt;
3318 
3319 	m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
3320 	if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3321 	    m->m_nextpkt != NULL) {
3322 		if (m != NULL) {
3323 			m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_INVALID, NULL, 0);
3324 			*pdrop = TRUE;
3325 		}
3326 		return EINVAL;
3327 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3328 	    !ifnet_is_fully_attached(ifp)) {
3329 		/* flag tested without lock for performance */
3330 		m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3331 		*pdrop = TRUE;
3332 		return ENXIO;
3333 	} else if (!(ifp->if_flags & IFF_UP)) {
3334 		m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3335 		*pdrop = TRUE;
3336 		return ENETDOWN;
3337 	}
3338 
3339 	CLASSQ_PKT_INIT_MBUF(&pkt, m);
3340 	return ifnet_enqueue_common_single(ifp, NULL, &pkt, flush, pdrop);
3341 }
3342 
3343 errno_t
ifnet_enqueue_mbuf_chain(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3344 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
3345     struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3346     boolean_t *pdrop)
3347 {
3348 	classq_pkt_t head, tail;
3349 
3350 	m_add_hdr_crumb_interface_output(m_head, ifp->if_index, true);
3351 	ASSERT(m_head != NULL);
3352 	ASSERT((m_head->m_flags & M_PKTHDR) != 0);
3353 	ASSERT(m_tail != NULL);
3354 	ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
3355 	ASSERT(ifp != NULL);
3356 	ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3357 
3358 	if (!ifnet_is_fully_attached(ifp)) {
3359 		/* flag tested without lock for performance */
3360 		m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3361 		*pdrop = TRUE;
3362 		return ENXIO;
3363 	} else if (!(ifp->if_flags & IFF_UP)) {
3364 		m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3365 		*pdrop = TRUE;
3366 		return ENETDOWN;
3367 	}
3368 
3369 	CLASSQ_PKT_INIT_MBUF(&head, m_head);
3370 	CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3371 	return ifnet_enqueue_chain(ifp, NULL, &head, &tail, cnt, bytes,
3372 	           flush, pdrop);
3373 }
3374 
3375 #if SKYWALK
3376 errno_t
ifnet_enqueue_pkt(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3377 ifnet_enqueue_pkt(struct ifnet *ifp, struct ifclassq *ifcq,
3378     struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3379 {
3380 	classq_pkt_t pkt;
3381 
3382 	ASSERT(kpkt == NULL || kpkt->pkt_nextpkt == NULL);
3383 
3384 	if (__improbable(ifp == NULL || kpkt == NULL)) {
3385 		if (kpkt != NULL) {
3386 			pp_free_packet(__DECONST(struct kern_pbufpool *,
3387 			    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3388 			*pdrop = TRUE;
3389 		}
3390 		return EINVAL;
3391 	} else if (__improbable(!(ifp->if_eflags & IFEF_TXSTART) ||
3392 	    !ifnet_is_fully_attached(ifp))) {
3393 		/* flag tested without lock for performance */
3394 		pp_free_packet(__DECONST(struct kern_pbufpool *,
3395 		    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3396 		*pdrop = TRUE;
3397 		return ENXIO;
3398 	} else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3399 		pp_free_packet(__DECONST(struct kern_pbufpool *,
3400 		    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3401 		*pdrop = TRUE;
3402 		return ENETDOWN;
3403 	}
3404 
3405 	CLASSQ_PKT_INIT_PACKET(&pkt, kpkt);
3406 	return ifnet_enqueue_common_single(ifp, ifcq, &pkt, flush, pdrop);
3407 }
3408 
3409 errno_t
ifnet_enqueue_pkt_chain(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3410 ifnet_enqueue_pkt_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3411     struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3412     uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3413 {
3414 	classq_pkt_t head, tail;
3415 
3416 	ASSERT(k_head != NULL);
3417 	ASSERT(k_tail != NULL);
3418 	ASSERT(ifp != NULL);
3419 	ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3420 
3421 	if (!ifnet_is_fully_attached(ifp)) {
3422 		/* flag tested without lock for performance */
3423 		pp_free_packet_chain(k_head, NULL);
3424 		*pdrop = TRUE;
3425 		return ENXIO;
3426 	} else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3427 		pp_free_packet_chain(k_head, NULL);
3428 		*pdrop = TRUE;
3429 		return ENETDOWN;
3430 	}
3431 
3432 	CLASSQ_PKT_INIT_PACKET(&head, k_head);
3433 	CLASSQ_PKT_INIT_PACKET(&tail, k_tail);
3434 	return ifnet_enqueue_chain(ifp, ifcq, &head, &tail, cnt, bytes,
3435 	           flush, pdrop);
3436 }
3437 #endif /* SKYWALK */
3438 
3439 errno_t
ifnet_dequeue(struct ifnet * ifp,struct mbuf ** mp)3440 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3441 {
3442 	errno_t rc;
3443 	classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3444 
3445 	if (ifp == NULL || mp == NULL) {
3446 		return EINVAL;
3447 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3448 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3449 		return ENXIO;
3450 	}
3451 	if (!ifnet_get_ioref(ifp)) {
3452 		return ENXIO;
3453 	}
3454 
3455 #if SKYWALK
3456 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3457 #endif /* SKYWALK */
3458 	rc = ifclassq_dequeue(ifp->if_snd, MBUF_SC_UNSPEC, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3459 	    &pkt, NULL, NULL, NULL, 0);
3460 	VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3461 	ifnet_decr_iorefcnt(ifp);
3462 	*mp = pkt.cp_mbuf;
3463 	m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3464 	return rc;
3465 }
3466 
3467 errno_t
ifnet_dequeue_service_class(struct ifnet * ifp,mbuf_svc_class_t sc,struct mbuf ** mp)3468 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3469     struct mbuf **mp)
3470 {
3471 	errno_t rc;
3472 	classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3473 
3474 	if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
3475 		return EINVAL;
3476 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3477 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3478 		return ENXIO;
3479 	}
3480 	if (!ifnet_get_ioref(ifp)) {
3481 		return ENXIO;
3482 	}
3483 
3484 #if SKYWALK
3485 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3486 #endif /* SKYWALK */
3487 	rc = ifclassq_dequeue(ifp->if_snd, sc, 1,
3488 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL, 0);
3489 	VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3490 	ifnet_decr_iorefcnt(ifp);
3491 	*mp = pkt.cp_mbuf;
3492 	m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3493 	return rc;
3494 }
3495 
3496 errno_t
ifnet_dequeue_multi(struct ifnet * ifp,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3497 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3498     struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3499 {
3500 	errno_t rc;
3501 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3502 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3503 
3504 	if (ifp == NULL || head == NULL || pkt_limit < 1) {
3505 		return EINVAL;
3506 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3507 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3508 		return ENXIO;
3509 	}
3510 	if (!ifnet_get_ioref(ifp)) {
3511 		return ENXIO;
3512 	}
3513 
3514 #if SKYWALK
3515 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3516 #endif /* SKYWALK */
3517 	rc = ifclassq_dequeue(ifp->if_snd, MBUF_SC_UNSPEC, pkt_limit,
3518 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len, 0);
3519 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3520 	ifnet_decr_iorefcnt(ifp);
3521 	*head = pkt_head.cp_mbuf;
3522 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3523 	if (tail != NULL) {
3524 		*tail = pkt_tail.cp_mbuf;
3525 	}
3526 	return rc;
3527 }
3528 
3529 errno_t
ifnet_dequeue_multi_bytes(struct ifnet * ifp,u_int32_t byte_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3530 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3531     struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3532 {
3533 	errno_t rc;
3534 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3535 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3536 
3537 	if (ifp == NULL || head == NULL || byte_limit < 1) {
3538 		return EINVAL;
3539 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3540 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3541 		return ENXIO;
3542 	}
3543 	if (!ifnet_get_ioref(ifp)) {
3544 		return ENXIO;
3545 	}
3546 
3547 #if SKYWALK
3548 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3549 #endif /* SKYWALK */
3550 	rc = ifclassq_dequeue(ifp->if_snd, MBUF_SC_UNSPEC, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3551 	    byte_limit, &pkt_head, &pkt_tail, cnt, len, 0);
3552 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3553 	ifnet_decr_iorefcnt(ifp);
3554 	*head = pkt_head.cp_mbuf;
3555 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3556 	if (tail != NULL) {
3557 		*tail = pkt_tail.cp_mbuf;
3558 	}
3559 	return rc;
3560 }
3561 
3562 errno_t
ifnet_dequeue_service_class_multi(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3563 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3564     u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3565     u_int32_t *len)
3566 {
3567 	errno_t rc;
3568 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3569 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3570 
3571 	if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3572 	    !MBUF_VALID_SC(sc)) {
3573 		return EINVAL;
3574 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3575 	    !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3576 		return ENXIO;
3577 	}
3578 	if (!ifnet_get_ioref(ifp)) {
3579 		return ENXIO;
3580 	}
3581 
3582 #if SKYWALK
3583 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3584 #endif /* SKYWALK */
3585 	rc = ifclassq_dequeue(ifp->if_snd, sc, pkt_limit,
3586 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
3587 	    cnt, len, 0);
3588 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3589 	ifnet_decr_iorefcnt(ifp);
3590 	*head = pkt_head.cp_mbuf;
3591 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3592 	if (tail != NULL) {
3593 		*tail = pkt_tail.cp_mbuf;
3594 	}
3595 	return rc;
3596 }
3597 
3598 #if XNU_TARGET_OS_OSX
3599 errno_t
ifnet_framer_stub(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * dest,IFNET_LLADDR_T dest_linkaddr,IFNET_FRAME_TYPE_T frame_type,u_int32_t * pre,u_int32_t * post)3600 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3601     const struct sockaddr *dest,
3602     IFNET_LLADDR_T dest_linkaddr,
3603     IFNET_FRAME_TYPE_T frame_type,
3604     u_int32_t *pre, u_int32_t *post)
3605 {
3606 	if (pre != NULL) {
3607 		*pre = 0;
3608 	}
3609 	if (post != NULL) {
3610 		*post = 0;
3611 	}
3612 
3613 	return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
3614 }
3615 #endif /* XNU_TARGET_OS_OSX */
3616 
3617 /* If ifp is set, we will increment the generation for the interface */
3618 int
dlil_post_complete_msg(struct ifnet * ifp,struct kev_msg * event)3619 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3620 {
3621 	if (ifp != NULL) {
3622 		ifnet_increment_generation(ifp);
3623 	}
3624 
3625 #if NECP
3626 	necp_update_all_clients();
3627 #endif /* NECP */
3628 
3629 	return kev_post_msg(event);
3630 }
3631 
3632 __private_extern__ void
dlil_post_sifflags_msg(struct ifnet * ifp)3633 dlil_post_sifflags_msg(struct ifnet * ifp)
3634 {
3635 	struct kev_msg ev_msg;
3636 	struct net_event_data ev_data;
3637 
3638 	bzero(&ev_data, sizeof(ev_data));
3639 	bzero(&ev_msg, sizeof(ev_msg));
3640 	ev_msg.vendor_code = KEV_VENDOR_APPLE;
3641 	ev_msg.kev_class = KEV_NETWORK_CLASS;
3642 	ev_msg.kev_subclass = KEV_DL_SUBCLASS;
3643 	ev_msg.event_code = KEV_DL_SIFFLAGS;
3644 	strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
3645 	ev_data.if_family = ifp->if_family;
3646 	ev_data.if_unit = (u_int32_t) ifp->if_unit;
3647 	ev_msg.dv[0].data_length = sizeof(struct net_event_data);
3648 	ev_msg.dv[0].data_ptr = &ev_data;
3649 	ev_msg.dv[1].data_length = 0;
3650 	dlil_post_complete_msg(ifp, &ev_msg);
3651 }
3652 
3653 #define TMP_IF_PROTO_ARR_SIZE   10
3654 static int
dlil_event_internal(struct ifnet * ifp,struct kev_msg * event,bool update_generation)3655 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
3656 {
3657 	struct ifnet_filter *filter = NULL;
3658 	struct if_proto *proto = NULL;
3659 	int if_proto_count = 0;
3660 	struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3661 	struct if_proto **tmp_ifproto_arr = tmp_ifproto_stack_arr;
3662 	int tmp_ifproto_arr_idx = 0;
3663 
3664 	/*
3665 	 * Pass the event to the interface filters
3666 	 */
3667 	lck_mtx_lock_spin(&ifp->if_flt_lock);
3668 	/* prevent filter list from changing in case we drop the lock */
3669 	if_flt_monitor_busy(ifp);
3670 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3671 		if (filter->filt_event != NULL) {
3672 			lck_mtx_unlock(&ifp->if_flt_lock);
3673 
3674 			filter->filt_event(filter->filt_cookie, ifp,
3675 			    filter->filt_protocol, event);
3676 
3677 			lck_mtx_lock_spin(&ifp->if_flt_lock);
3678 		}
3679 	}
3680 	/* we're done with the filter list */
3681 	if_flt_monitor_unbusy(ifp);
3682 	lck_mtx_unlock(&ifp->if_flt_lock);
3683 
3684 	/* Get an io ref count if the interface is attached */
3685 	if (!ifnet_get_ioref(ifp)) {
3686 		goto done;
3687 	}
3688 
3689 	/*
3690 	 * An embedded tmp_list_entry in if_proto may still get
3691 	 * over-written by another thread after giving up ifnet lock,
3692 	 * therefore we are avoiding embedded pointers here.
3693 	 */
3694 	ifnet_lock_shared(ifp);
3695 	if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
3696 	if (if_proto_count) {
3697 		int i;
3698 		VERIFY(ifp->if_proto_hash != NULL);
3699 		if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3700 			tmp_ifproto_arr = tmp_ifproto_stack_arr;
3701 		} else {
3702 			tmp_ifproto_arr = kalloc_type(struct if_proto *,
3703 			    if_proto_count, Z_WAITOK | Z_ZERO);
3704 			if (tmp_ifproto_arr == NULL) {
3705 				ifnet_lock_done(ifp);
3706 				goto cleanup;
3707 			}
3708 		}
3709 
3710 		for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3711 			SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3712 			    next_hash) {
3713 				if_proto_ref(proto);
3714 				tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3715 				tmp_ifproto_arr_idx++;
3716 			}
3717 		}
3718 		VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3719 	}
3720 	ifnet_lock_done(ifp);
3721 
3722 	for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3723 	    tmp_ifproto_arr_idx++) {
3724 		proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3725 		VERIFY(proto != NULL);
3726 		proto_media_event eventp =
3727 		    (proto->proto_kpi == kProtoKPI_v1 ?
3728 		    proto->kpi.v1.event :
3729 		    proto->kpi.v2.event);
3730 
3731 		if (eventp != NULL) {
3732 			eventp(ifp, proto->protocol_family,
3733 			    event);
3734 		}
3735 		if_proto_free(proto);
3736 	}
3737 
3738 cleanup:
3739 	if (tmp_ifproto_arr != tmp_ifproto_stack_arr) {
3740 		kfree_type(struct if_proto *, if_proto_count, tmp_ifproto_arr);
3741 	}
3742 
3743 	/* Pass the event to the interface */
3744 	if (ifp->if_event != NULL) {
3745 		ifp->if_event(ifp, event);
3746 	}
3747 
3748 	/* Release the io ref count */
3749 	ifnet_decr_iorefcnt(ifp);
3750 done:
3751 	return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
3752 }
3753 
3754 errno_t
ifnet_event(ifnet_t ifp,struct kern_event_msg * event)3755 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3756 {
3757 	struct kev_msg kev_msg;
3758 	int result = 0;
3759 
3760 	if (ifp == NULL || event == NULL) {
3761 		return EINVAL;
3762 	}
3763 
3764 	bzero(&kev_msg, sizeof(kev_msg));
3765 	kev_msg.vendor_code = event->vendor_code;
3766 	kev_msg.kev_class = event->kev_class;
3767 	kev_msg.kev_subclass = event->kev_subclass;
3768 	kev_msg.event_code = event->event_code;
3769 	kev_msg.dv[0].data_ptr = &event->event_data;
3770 	kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3771 	kev_msg.dv[1].data_length = 0;
3772 
3773 	result = dlil_event_internal(ifp, &kev_msg, TRUE);
3774 
3775 	return result;
3776 }
3777 
3778 /* The following is used to enqueue work items for ifnet ioctl events */
3779 static void ifnet_ioctl_event_callback(struct nwk_wq_entry *);
3780 
3781 struct ifnet_ioctl_event {
3782 	ifnet_ref_t ifp;
3783 	u_long ioctl_code;
3784 };
3785 
3786 struct ifnet_ioctl_event_nwk_wq_entry {
3787 	struct nwk_wq_entry nwk_wqe;
3788 	struct ifnet_ioctl_event ifnet_ioctl_ev_arg;
3789 };
3790 
3791 void
ifnet_ioctl_async(struct ifnet * ifp,u_long ioctl_code)3792 ifnet_ioctl_async(struct ifnet *ifp, u_long ioctl_code)
3793 {
3794 	struct ifnet_ioctl_event_nwk_wq_entry *p_ifnet_ioctl_ev = NULL;
3795 	bool compare_expected;
3796 
3797 	/*
3798 	 * Get an io ref count if the interface is attached.
3799 	 * At this point it most likely is. We are taking a reference for
3800 	 * deferred processing.
3801 	 */
3802 	if (!ifnet_get_ioref(ifp)) {
3803 		os_log(OS_LOG_DEFAULT, "%s:%d %s Failed for ioctl %lu as interface "
3804 		    "is not attached",
3805 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3806 		return;
3807 	}
3808 	switch (ioctl_code) {
3809 	case SIOCADDMULTI:
3810 		compare_expected = false;
3811 		if (!atomic_compare_exchange_strong(&ifp->if_mcast_add_signaled, &compare_expected, true)) {
3812 			ifnet_decr_iorefcnt(ifp);
3813 			return;
3814 		}
3815 		break;
3816 	case SIOCDELMULTI:
3817 		compare_expected = false;
3818 		if (!atomic_compare_exchange_strong(&ifp->if_mcast_del_signaled, &compare_expected, true)) {
3819 			ifnet_decr_iorefcnt(ifp);
3820 			return;
3821 		}
3822 		break;
3823 	default:
3824 		os_log(OS_LOG_DEFAULT, "%s:%d %s unknown ioctl %lu",
3825 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3826 		return;
3827 	}
3828 
3829 	p_ifnet_ioctl_ev = kalloc_type(struct ifnet_ioctl_event_nwk_wq_entry,
3830 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
3831 
3832 	p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ifp = ifp;
3833 	p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ioctl_code = ioctl_code;
3834 	p_ifnet_ioctl_ev->nwk_wqe.func = ifnet_ioctl_event_callback;
3835 	nwk_wq_enqueue(&p_ifnet_ioctl_ev->nwk_wqe);
3836 }
3837 
3838 static void
ifnet_ioctl_event_callback(struct nwk_wq_entry * nwk_item)3839 ifnet_ioctl_event_callback(struct nwk_wq_entry *nwk_item)
3840 {
3841 	struct ifnet_ioctl_event_nwk_wq_entry *p_ev = __container_of(nwk_item,
3842 	    struct ifnet_ioctl_event_nwk_wq_entry, nwk_wqe);
3843 
3844 	ifnet_ref_t ifp = p_ev->ifnet_ioctl_ev_arg.ifp;
3845 	u_long ioctl_code = p_ev->ifnet_ioctl_ev_arg.ioctl_code;
3846 	int ret = 0;
3847 
3848 	switch (ioctl_code) {
3849 	case SIOCADDMULTI:
3850 		atomic_store(&ifp->if_mcast_add_signaled, false);
3851 		break;
3852 	case SIOCDELMULTI:
3853 		atomic_store(&ifp->if_mcast_del_signaled, false);
3854 		break;
3855 	}
3856 	if ((ret = ifnet_ioctl(ifp, 0, ioctl_code, NULL)) != 0) {
3857 		os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned %d for ioctl %lu",
3858 		    __func__, __LINE__, if_name(ifp), ret, ioctl_code);
3859 	} else if (dlil_verbose) {
3860 		os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned successfully "
3861 		    "for ioctl %lu",
3862 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3863 	}
3864 	ifnet_decr_iorefcnt(ifp);
3865 	kfree_type(struct ifnet_ioctl_event_nwk_wq_entry, p_ev);
3866 	return;
3867 }
3868 
3869 errno_t
ifnet_ioctl(ifnet_t ifp,protocol_family_t proto_fam,u_long ioctl_code,void * ioctl_arg)3870 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
3871     void *ioctl_arg)
3872 {
3873 	struct ifnet_filter *filter;
3874 	int retval = EOPNOTSUPP;
3875 	int result = 0;
3876 
3877 	if (ifp == NULL || ioctl_code == 0) {
3878 		return EINVAL;
3879 	}
3880 
3881 	/* Get an io ref count if the interface is attached */
3882 	if (!ifnet_get_ioref(ifp)) {
3883 		return EOPNOTSUPP;
3884 	}
3885 
3886 	/*
3887 	 * Run the interface filters first.
3888 	 * We want to run all filters before calling the protocol,
3889 	 * interface family, or interface.
3890 	 */
3891 	lck_mtx_lock_spin(&ifp->if_flt_lock);
3892 	/* prevent filter list from changing in case we drop the lock */
3893 	if_flt_monitor_busy(ifp);
3894 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3895 		if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
3896 		    filter->filt_protocol == proto_fam)) {
3897 			lck_mtx_unlock(&ifp->if_flt_lock);
3898 
3899 			result = filter->filt_ioctl(filter->filt_cookie, ifp,
3900 			    proto_fam, ioctl_code, ioctl_arg);
3901 
3902 			lck_mtx_lock_spin(&ifp->if_flt_lock);
3903 
3904 			/* Only update retval if no one has handled the ioctl */
3905 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3906 				if (result == ENOTSUP) {
3907 					result = EOPNOTSUPP;
3908 				}
3909 				retval = result;
3910 				if (retval != 0 && retval != EOPNOTSUPP) {
3911 					/* we're done with the filter list */
3912 					if_flt_monitor_unbusy(ifp);
3913 					lck_mtx_unlock(&ifp->if_flt_lock);
3914 					goto cleanup;
3915 				}
3916 			}
3917 		}
3918 	}
3919 	/* we're done with the filter list */
3920 	if_flt_monitor_unbusy(ifp);
3921 	lck_mtx_unlock(&ifp->if_flt_lock);
3922 
3923 	/* Allow the protocol to handle the ioctl */
3924 	if (proto_fam != 0) {
3925 		struct if_proto *proto;
3926 
3927 		/* callee holds a proto refcnt upon success */
3928 		ifnet_lock_shared(ifp);
3929 		proto = find_attached_proto(ifp, proto_fam);
3930 		ifnet_lock_done(ifp);
3931 		if (proto != NULL) {
3932 			proto_media_ioctl ioctlp =
3933 			    (proto->proto_kpi == kProtoKPI_v1 ?
3934 			    proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
3935 			result = EOPNOTSUPP;
3936 			if (ioctlp != NULL) {
3937 				result = ioctlp(ifp, proto_fam, ioctl_code,
3938 				    ioctl_arg);
3939 			}
3940 			if_proto_free(proto);
3941 
3942 			/* Only update retval if no one has handled the ioctl */
3943 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3944 				if (result == ENOTSUP) {
3945 					result = EOPNOTSUPP;
3946 				}
3947 				retval = result;
3948 				if (retval && retval != EOPNOTSUPP) {
3949 					goto cleanup;
3950 				}
3951 			}
3952 		}
3953 	}
3954 
3955 	/* retval is either 0 or EOPNOTSUPP */
3956 
3957 	/*
3958 	 * Let the interface handle this ioctl.
3959 	 * If it returns EOPNOTSUPP, ignore that, we may have
3960 	 * already handled this in the protocol or family.
3961 	 */
3962 	if (ifp->if_ioctl) {
3963 		result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
3964 	}
3965 
3966 	/* Only update retval if no one has handled the ioctl */
3967 	if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3968 		if (result == ENOTSUP) {
3969 			result = EOPNOTSUPP;
3970 		}
3971 		retval = result;
3972 		if (retval && retval != EOPNOTSUPP) {
3973 			goto cleanup;
3974 		}
3975 	}
3976 
3977 cleanup:
3978 	if (retval == EJUSTRETURN) {
3979 		retval = 0;
3980 	}
3981 
3982 	ifnet_decr_iorefcnt(ifp);
3983 
3984 	return retval;
3985 }
3986 
3987 __private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp,bpf_tap_mode mode,bpf_packet_func callback)3988 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
3989 {
3990 	errno_t error = 0;
3991 
3992 	if (ifp->if_set_bpf_tap) {
3993 		/* Get an io reference on the interface if it is attached */
3994 		if (!ifnet_get_ioref(ifp)) {
3995 			return ENXIO;
3996 		}
3997 		error = ifp->if_set_bpf_tap(ifp, mode, callback);
3998 		ifnet_decr_iorefcnt(ifp);
3999 	}
4000 	return error;
4001 }
4002 
4003 errno_t
dlil_resolve_multi(struct ifnet * ifp,const struct sockaddr * proto_addr,struct sockaddr * ll_addr,size_t ll_len)4004 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4005     struct sockaddr *ll_addr, size_t ll_len)
4006 {
4007 	errno_t result = EOPNOTSUPP;
4008 	struct if_proto *proto;
4009 	const struct sockaddr *verify;
4010 	proto_media_resolve_multi resolvep;
4011 
4012 	if (!ifnet_get_ioref(ifp)) {
4013 		return result;
4014 	}
4015 
4016 	SOCKADDR_ZERO(ll_addr, ll_len);
4017 
4018 	/* Call the protocol first; callee holds a proto refcnt upon success */
4019 	ifnet_lock_shared(ifp);
4020 	proto = find_attached_proto(ifp, proto_addr->sa_family);
4021 	ifnet_lock_done(ifp);
4022 	if (proto != NULL) {
4023 		resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4024 		    proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4025 		if (resolvep != NULL) {
4026 			result = resolvep(ifp, proto_addr, SDL(ll_addr), ll_len);
4027 		}
4028 		if_proto_free(proto);
4029 	}
4030 
4031 	/* Let the interface verify the multicast address */
4032 	if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4033 		if (result == 0) {
4034 			verify = ll_addr;
4035 		} else {
4036 			verify = proto_addr;
4037 		}
4038 		result = ifp->if_check_multi(ifp, verify);
4039 	}
4040 
4041 	ifnet_decr_iorefcnt(ifp);
4042 	return result;
4043 }
4044 
4045 __private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4046 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4047     const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4048     const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4049 {
4050 	struct if_proto *proto;
4051 	errno_t result = 0;
4052 
4053 	if ((ifp->if_flags & IFF_NOARP) != 0) {
4054 		result = ENOTSUP;
4055 		goto done;
4056 	}
4057 
4058 	/* callee holds a proto refcnt upon success */
4059 	ifnet_lock_shared(ifp);
4060 	proto = find_attached_proto(ifp, target_proto->sa_family);
4061 	ifnet_lock_done(ifp);
4062 	if (proto == NULL) {
4063 		result = ENOTSUP;
4064 	} else {
4065 		proto_media_send_arp    arpp;
4066 		arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4067 		    proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4068 		if (arpp == NULL) {
4069 			result = ENOTSUP;
4070 		} else {
4071 			switch (arpop) {
4072 			case ARPOP_REQUEST:
4073 				arpstat.txrequests++;
4074 				if (target_hw != NULL) {
4075 					arpstat.txurequests++;
4076 				}
4077 				break;
4078 			case ARPOP_REPLY:
4079 				arpstat.txreplies++;
4080 				break;
4081 			}
4082 			result = arpp(ifp, arpop, sender_hw, sender_proto,
4083 			    target_hw, target_proto);
4084 		}
4085 		if_proto_free(proto);
4086 	}
4087 done:
4088 	return result;
4089 }
4090 
4091 static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,const struct sockaddr_in * target_sin)4092 _is_announcement(const struct sockaddr_in * sender_sin,
4093     const struct sockaddr_in * target_sin)
4094 {
4095 	if (target_sin == NULL || sender_sin == NULL) {
4096 		return FALSE;
4097 	}
4098 
4099 	return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
4100 }
4101 
4102 __private_extern__ errno_t
dlil_send_arp(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto0,u_int32_t rtflags)4103 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4104     const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4105     const struct sockaddr *target_proto0, u_int32_t rtflags)
4106 {
4107 	errno_t result = 0;
4108 	const struct sockaddr_in * sender_sin;
4109 	const struct sockaddr_in * target_sin;
4110 	struct sockaddr_inarp target_proto_sinarp;
4111 	struct sockaddr *target_proto = __DECONST_SA(target_proto0);
4112 
4113 	if (target_proto == NULL || sender_proto == NULL) {
4114 		return EINVAL;
4115 	}
4116 
4117 	if (sender_proto->sa_family != target_proto->sa_family) {
4118 		return EINVAL;
4119 	}
4120 
4121 	/*
4122 	 * If the target is a (default) router, provide that
4123 	 * information to the send_arp callback routine.
4124 	 */
4125 	if (rtflags & RTF_ROUTER) {
4126 		SOCKADDR_COPY(target_proto, &target_proto_sinarp, sizeof(struct sockaddr_in));
4127 		target_proto_sinarp.sin_other |= SIN_ROUTER;
4128 		target_proto = SA(&target_proto_sinarp);
4129 	}
4130 
4131 	/*
4132 	 * If this is an ARP request and the target IP is IPv4LL,
4133 	 * send the request on all interfaces.  The exception is
4134 	 * an announcement, which must only appear on the specific
4135 	 * interface.
4136 	 */
4137 	sender_sin = SIN(sender_proto);
4138 	target_sin = SIN(target_proto);
4139 	if (target_proto->sa_family == AF_INET &&
4140 	    IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4141 	    ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4142 	    !_is_announcement(sender_sin, target_sin)) {
4143 		u_int32_t       count;
4144 		ifnet_ref_t     *__counted_by(count) ifp_list;
4145 		u_int32_t       ifp_on;
4146 
4147 		result = ENOTSUP;
4148 
4149 		if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4150 			for (ifp_on = 0; ifp_on < count; ifp_on++) {
4151 				errno_t new_result;
4152 				ifaddr_t source_hw = NULL;
4153 				ifaddr_t source_ip = NULL;
4154 				struct sockaddr_in source_ip_copy;
4155 				ifnet_ref_t cur_ifp = ifp_list[ifp_on];
4156 
4157 				/*
4158 				 * Only arp on interfaces marked for IPv4LL
4159 				 * ARPing.  This may mean that we don't ARP on
4160 				 * the interface the subnet route points to.
4161 				 */
4162 				if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
4163 					continue;
4164 				}
4165 
4166 				/* Find the source IP address */
4167 				ifnet_lock_shared(cur_ifp);
4168 				source_hw = cur_ifp->if_lladdr;
4169 				TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4170 				    ifa_link) {
4171 					IFA_LOCK(source_ip);
4172 					if (source_ip->ifa_addr != NULL &&
4173 					    source_ip->ifa_addr->sa_family ==
4174 					    AF_INET) {
4175 						/* Copy the source IP address */
4176 						SOCKADDR_COPY(SIN(source_ip->ifa_addr), &source_ip_copy, sizeof(source_ip_copy));
4177 						IFA_UNLOCK(source_ip);
4178 						break;
4179 					}
4180 					IFA_UNLOCK(source_ip);
4181 				}
4182 
4183 				/* No IP Source, don't arp */
4184 				if (source_ip == NULL) {
4185 					ifnet_lock_done(cur_ifp);
4186 					continue;
4187 				}
4188 
4189 				ifa_addref(source_hw);
4190 				ifnet_lock_done(cur_ifp);
4191 
4192 				/* Send the ARP */
4193 				new_result = dlil_send_arp_internal(cur_ifp,
4194 				    arpop, SDL(source_hw->ifa_addr),
4195 				    SA(&source_ip_copy), NULL,
4196 				    target_proto);
4197 
4198 				ifa_remref(source_hw);
4199 				if (result == ENOTSUP) {
4200 					result = new_result;
4201 				}
4202 			}
4203 			ifnet_list_free_counted_by(ifp_list, count);
4204 		}
4205 	} else {
4206 		result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4207 		    sender_proto, target_hw, target_proto);
4208 	}
4209 
4210 	return result;
4211 }
4212 
4213 /*
4214  * Caller must hold ifnet head lock.
4215  */
4216 static int
ifnet_lookup(struct ifnet * ifp)4217 ifnet_lookup(struct ifnet *ifp)
4218 {
4219 	ifnet_ref_t _ifp;
4220 
4221 	ifnet_head_lock_assert(LCK_RW_ASSERT_HELD);
4222 	TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4223 		if (_ifp == ifp) {
4224 			break;
4225 		}
4226 	}
4227 	return _ifp != NULL;
4228 }
4229 
4230 /*
4231  * Caller has to pass a non-zero refio argument to get a
4232  * IO reference count. This will prevent ifnet_detach from
4233  * being called when there are outstanding io reference counts.
4234  */
4235 int
ifnet_get_ioref(struct ifnet * ifp)4236 ifnet_get_ioref(struct ifnet *ifp)
4237 {
4238 	bool ret;
4239 
4240 	ret = ifnet_is_fully_attached(ifp);
4241 	if (ret) {
4242 		if (os_ref_retain_try(&ifp->if_refio) == false) {
4243 			/* refio became 0 which means it is detaching */
4244 			return false;
4245 		}
4246 	}
4247 
4248 	return ret;
4249 }
4250 
4251 void
ifnet_incr_pending_thread_count(struct ifnet * ifp)4252 ifnet_incr_pending_thread_count(struct ifnet *ifp)
4253 {
4254 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4255 	ifp->if_threads_pending++;
4256 	lck_mtx_unlock(&ifp->if_ref_lock);
4257 }
4258 
4259 void
ifnet_decr_pending_thread_count(struct ifnet * ifp)4260 ifnet_decr_pending_thread_count(struct ifnet *ifp)
4261 {
4262 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4263 	VERIFY(ifp->if_threads_pending > 0);
4264 	ifp->if_threads_pending--;
4265 	if (ifp->if_threads_pending == 0) {
4266 		wakeup(&ifp->if_threads_pending);
4267 	}
4268 	lck_mtx_unlock(&ifp->if_ref_lock);
4269 }
4270 
4271 /*
4272  * Caller must ensure the interface is attached; the assumption is that
4273  * there is at least an outstanding IO reference count held already.
4274  * Most callers would call ifnet_is_{attached,data_ready}() instead.
4275  */
4276 void
ifnet_incr_iorefcnt(struct ifnet * ifp)4277 ifnet_incr_iorefcnt(struct ifnet *ifp)
4278 {
4279 	os_ref_retain(&ifp->if_refio);
4280 }
4281 
4282 void
ifnet_decr_iorefcnt(struct ifnet * ifp)4283 ifnet_decr_iorefcnt(struct ifnet *ifp)
4284 {
4285 	/*
4286 	 * if there are no more outstanding io references, wakeup the
4287 	 * ifnet_detach thread.
4288 	 */
4289 	if (os_ref_release_relaxed(&ifp->if_refio) == 0) {
4290 		lck_mtx_lock(&ifp->if_ref_lock);
4291 		wakeup(&(ifp->if_refio));
4292 		lck_mtx_unlock(&ifp->if_ref_lock);
4293 	}
4294 }
4295 
4296 static void
ifnet_decr_iorefcnt_locked(struct ifnet * ifp)4297 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
4298 {
4299 	/*
4300 	 * if there are no more outstanding io references, wakeup the
4301 	 * ifnet_detach thread.
4302 	 */
4303 	if (os_ref_release_relaxed(&ifp->if_refio) == 0) {
4304 		wakeup(&(ifp->if_refio));
4305 	}
4306 }
4307 
4308 boolean_t
ifnet_datamov_begin(struct ifnet * ifp)4309 ifnet_datamov_begin(struct ifnet *ifp)
4310 {
4311 	boolean_t ret;
4312 
4313 	ret = ifnet_is_attached_and_ready(ifp);
4314 	if (ret) {
4315 		if (os_ref_retain_try(&ifp->if_refio) == false) {
4316 			/* refio became 0 which means it is detaching */
4317 			return false;
4318 		}
4319 		os_ref_retain_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp);
4320 	}
4321 
4322 	DTRACE_IP2(datamov__begin, struct ifnet *, ifp, boolean_t, ret);
4323 	return ret;
4324 }
4325 
4326 void
ifnet_datamov_end(struct ifnet * ifp)4327 ifnet_datamov_end(struct ifnet *ifp)
4328 {
4329 	uint32_t datamov;
4330 	/*
4331 	 * if there's no more thread moving data, wakeup any
4332 	 * drainers that's blocked waiting for this.
4333 	 */
4334 	datamov = os_ref_release_raw_relaxed_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp);
4335 	if (datamov >> IF_DATAMOV_BITS == 1 && (datamov & IF_DATAMOV_DRAINING)) {
4336 		lck_mtx_lock(&ifp->if_ref_lock);
4337 		DLIL_PRINTF("Waking up drainers on %s\n", if_name(ifp));
4338 		DTRACE_IP1(datamov__drain__wake, struct ifnet *, ifp);
4339 		wakeup(&(ifp->if_datamov));
4340 		lck_mtx_unlock(&ifp->if_ref_lock);
4341 	}
4342 	ifnet_decr_iorefcnt(ifp);
4343 
4344 	DTRACE_IP1(datamov__end, struct ifnet *, ifp);
4345 }
4346 
4347 static void
ifnet_datamov_suspend_locked(struct ifnet * ifp)4348 ifnet_datamov_suspend_locked(struct ifnet *ifp)
4349 {
4350 	LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4351 	ifnet_incr_iorefcnt(ifp);
4352 	if (ifp->if_suspend++ == 0) {
4353 		VERIFY(ifp->if_refflags & IFRF_READY);
4354 		ifp->if_refflags &= ~IFRF_READY;
4355 	}
4356 }
4357 
4358 static void
ifnet_datamov_suspend(struct ifnet * ifp)4359 ifnet_datamov_suspend(struct ifnet *ifp)
4360 {
4361 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4362 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4363 	ifnet_datamov_suspend_locked(ifp);
4364 	lck_mtx_unlock(&ifp->if_ref_lock);
4365 }
4366 
4367 boolean_t
ifnet_datamov_suspend_if_needed(struct ifnet * ifp)4368 ifnet_datamov_suspend_if_needed(struct ifnet *ifp)
4369 {
4370 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4371 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4372 	if (ifp->if_suspend > 0) {
4373 		lck_mtx_unlock(&ifp->if_ref_lock);
4374 		return FALSE;
4375 	}
4376 	ifnet_datamov_suspend_locked(ifp);
4377 	lck_mtx_unlock(&ifp->if_ref_lock);
4378 	return TRUE;
4379 }
4380 
4381 void
ifnet_datamov_drain(struct ifnet * ifp)4382 ifnet_datamov_drain(struct ifnet *ifp)
4383 {
4384 	lck_mtx_lock(&ifp->if_ref_lock);
4385 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4386 	/* data movement must already be suspended */
4387 	VERIFY(ifp->if_suspend > 0);
4388 	VERIFY(!(ifp->if_refflags & IFRF_READY));
4389 	os_atomic_or(&ifp->if_datamov, IF_DATAMOV_DRAINING, relaxed);
4390 	while (os_ref_get_count_mask(&ifp->if_datamov, IF_DATAMOV_BITS) > 1) {
4391 		DLIL_PRINTF("Waiting for data path(s) to quiesce on %s\n",
4392 		    if_name(ifp));
4393 		DTRACE_IP1(datamov__wait, struct ifnet *, ifp);
4394 		(void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
4395 		    (PZERO - 1), __func__, NULL);
4396 		DTRACE_IP1(datamov__wake, struct ifnet *, ifp);
4397 	}
4398 	VERIFY(!(ifp->if_refflags & IFRF_READY));
4399 	os_atomic_andnot(&ifp->if_datamov, IF_DATAMOV_DRAINING, relaxed);
4400 	lck_mtx_unlock(&ifp->if_ref_lock);
4401 
4402 	/* purge the interface queues */
4403 	if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
4404 		if_qflush(ifp, ifp->if_snd);
4405 	}
4406 }
4407 
4408 void
ifnet_datamov_suspend_and_drain(struct ifnet * ifp)4409 ifnet_datamov_suspend_and_drain(struct ifnet *ifp)
4410 {
4411 	ifnet_datamov_suspend(ifp);
4412 	ifnet_datamov_drain(ifp);
4413 }
4414 
4415 void
ifnet_datamov_resume(struct ifnet * ifp)4416 ifnet_datamov_resume(struct ifnet *ifp)
4417 {
4418 	lck_mtx_lock(&ifp->if_ref_lock);
4419 	/* data movement must already be suspended */
4420 	VERIFY(ifp->if_suspend > 0);
4421 	if (--ifp->if_suspend == 0) {
4422 		VERIFY(!(ifp->if_refflags & IFRF_READY));
4423 		ifp->if_refflags |= IFRF_READY;
4424 	}
4425 	ifnet_decr_iorefcnt_locked(ifp);
4426 	lck_mtx_unlock(&ifp->if_ref_lock);
4427 }
4428 
4429 static errno_t
dlil_attach_protocol(struct if_proto * proto,const struct ifnet_demux_desc * __counted_by (demux_count)demux_list,u_int32_t demux_count,uint32_t * proto_count)4430 dlil_attach_protocol(struct if_proto *proto,
4431     const struct ifnet_demux_desc *__counted_by(demux_count) demux_list, u_int32_t demux_count,
4432     uint32_t *proto_count)
4433 {
4434 	struct kev_dl_proto_data ev_pr_data;
4435 	ifnet_ref_t ifp = proto->ifp;
4436 	errno_t retval = 0;
4437 	u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4438 	struct if_proto *prev_proto;
4439 	struct if_proto *_proto;
4440 
4441 	/* don't allow attaching anything but PF_BRIDGE to vmnet interfaces */
4442 	if (IFNET_IS_VMNET(ifp) && proto->protocol_family != PF_BRIDGE) {
4443 		return EINVAL;
4444 	}
4445 
4446 	if (!ifnet_get_ioref(ifp)) {
4447 		os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
4448 		    __func__, if_name(ifp));
4449 		return ENXIO;
4450 	}
4451 	/* callee holds a proto refcnt upon success */
4452 	ifnet_lock_exclusive(ifp);
4453 	_proto = find_attached_proto(ifp, proto->protocol_family);
4454 	if (_proto != NULL) {
4455 		ifnet_lock_done(ifp);
4456 		if_proto_free(_proto);
4457 		retval = EEXIST;
4458 		goto ioref_done;
4459 	}
4460 
4461 	/*
4462 	 * Call family module add_proto routine so it can refine the
4463 	 * demux descriptors as it wishes.
4464 	 */
4465 	retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4466 	    demux_count);
4467 	if (retval) {
4468 		ifnet_lock_done(ifp);
4469 		goto ioref_done;
4470 	}
4471 
4472 	/*
4473 	 * Insert the protocol in the hash
4474 	 */
4475 	prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4476 	while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
4477 		prev_proto = SLIST_NEXT(prev_proto, next_hash);
4478 	}
4479 	if (prev_proto) {
4480 		SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4481 	} else {
4482 		SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4483 		    proto, next_hash);
4484 	}
4485 
4486 	/* hold a proto refcnt for attach */
4487 	if_proto_ref(proto);
4488 
4489 	/*
4490 	 * The reserved field carries the number of protocol still attached
4491 	 * (subject to change)
4492 	 */
4493 	ev_pr_data.proto_family = proto->protocol_family;
4494 	ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
4495 
4496 	ifnet_lock_done(ifp);
4497 
4498 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4499 	    (struct net_event_data *)&ev_pr_data,
4500 	    sizeof(struct kev_dl_proto_data), FALSE);
4501 	if (proto_count != NULL) {
4502 		*proto_count = ev_pr_data.proto_remaining_count;
4503 	}
4504 ioref_done:
4505 	ifnet_decr_iorefcnt(ifp);
4506 	return retval;
4507 }
4508 
4509 static void
dlil_handle_proto_attach(ifnet_t ifp,protocol_family_t protocol)4510 dlil_handle_proto_attach(ifnet_t ifp, protocol_family_t protocol)
4511 {
4512 	/*
4513 	 * A protocol has been attached, mark the interface up.
4514 	 * This used to be done by configd.KernelEventMonitor, but that
4515 	 * is inherently prone to races (rdar://problem/30810208).
4516 	 */
4517 	(void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4518 	(void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
4519 	dlil_post_sifflags_msg(ifp);
4520 #if SKYWALK
4521 	switch (protocol) {
4522 	case AF_INET:
4523 	case AF_INET6:
4524 		/* don't attach the flowswitch unless attaching IP */
4525 		dlil_attach_flowswitch_nexus(ifp);
4526 		break;
4527 	default:
4528 		break;
4529 	}
4530 #endif /* SKYWALK */
4531 }
4532 
4533 errno_t
ifnet_attach_protocol(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param * proto_details)4534 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
4535     const struct ifnet_attach_proto_param *proto_details)
4536 {
4537 	int retval = 0;
4538 	struct if_proto  *ifproto = NULL;
4539 	uint32_t proto_count = 0;
4540 
4541 	ifnet_head_lock_shared();
4542 	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4543 		retval = EINVAL;
4544 		goto end;
4545 	}
4546 	/* Check that the interface is in the global list */
4547 	if (!ifnet_lookup(ifp)) {
4548 		retval = ENXIO;
4549 		goto end;
4550 	}
4551 
4552 	ifproto = dlif_proto_alloc();
4553 
4554 	/* refcnt held above during lookup */
4555 	ifproto->ifp = ifp;
4556 	ifproto->protocol_family = protocol;
4557 	ifproto->proto_kpi = kProtoKPI_v1;
4558 	ifproto->kpi.v1.input = proto_details->input;
4559 	ifproto->kpi.v1.pre_output = proto_details->pre_output;
4560 	ifproto->kpi.v1.event = proto_details->event;
4561 	ifproto->kpi.v1.ioctl = proto_details->ioctl;
4562 	ifproto->kpi.v1.detached = proto_details->detached;
4563 	ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4564 	ifproto->kpi.v1.send_arp = proto_details->send_arp;
4565 
4566 	retval = dlil_attach_protocol(ifproto,
4567 	    proto_details->demux_list, proto_details->demux_count,
4568 	    &proto_count);
4569 
4570 end:
4571 	if (retval == EEXIST) {
4572 		/* already attached */
4573 		if (dlil_verbose) {
4574 			DLIL_PRINTF("%s: protocol %d already attached\n",
4575 			    ifp != NULL ? if_name(ifp) : "N/A",
4576 			    protocol);
4577 		}
4578 	} else if (retval != 0) {
4579 		DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4580 		    ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4581 	} else if (dlil_verbose) {
4582 		DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
4583 		    ifp != NULL ? if_name(ifp) : "N/A",
4584 		    protocol, proto_count);
4585 	}
4586 	ifnet_head_done();
4587 	if (retval == 0) {
4588 		dlil_handle_proto_attach(ifp, protocol);
4589 	} else if (ifproto != NULL) {
4590 		dlif_proto_free(ifproto);
4591 	}
4592 	return retval;
4593 }
4594 
4595 errno_t
ifnet_attach_protocol_v2(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param_v2 * proto_details)4596 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
4597     const struct ifnet_attach_proto_param_v2 *proto_details)
4598 {
4599 	int retval = 0;
4600 	struct if_proto  *ifproto = NULL;
4601 	uint32_t proto_count = 0;
4602 
4603 	ifnet_head_lock_shared();
4604 	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4605 		retval = EINVAL;
4606 		goto end;
4607 	}
4608 	/* Check that the interface is in the global list */
4609 	if (!ifnet_lookup(ifp)) {
4610 		retval = ENXIO;
4611 		goto end;
4612 	}
4613 
4614 	ifproto = dlif_proto_alloc();
4615 
4616 	/* refcnt held above during lookup */
4617 	ifproto->ifp = ifp;
4618 	ifproto->protocol_family = protocol;
4619 	ifproto->proto_kpi = kProtoKPI_v2;
4620 	ifproto->kpi.v2.input = proto_details->input;
4621 	ifproto->kpi.v2.pre_output = proto_details->pre_output;
4622 	ifproto->kpi.v2.event = proto_details->event;
4623 	ifproto->kpi.v2.ioctl = proto_details->ioctl;
4624 	ifproto->kpi.v2.detached = proto_details->detached;
4625 	ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4626 	ifproto->kpi.v2.send_arp = proto_details->send_arp;
4627 
4628 	retval = dlil_attach_protocol(ifproto,
4629 	    proto_details->demux_list, proto_details->demux_count,
4630 	    &proto_count);
4631 
4632 end:
4633 	if (retval == EEXIST) {
4634 		/* already attached */
4635 		if (dlil_verbose) {
4636 			DLIL_PRINTF("%s: protocol %d already attached\n",
4637 			    ifp != NULL ? if_name(ifp) : "N/A",
4638 			    protocol);
4639 		}
4640 	} else if (retval != 0) {
4641 		DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4642 		    ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4643 	} else if (dlil_verbose) {
4644 		DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
4645 		    ifp != NULL ? if_name(ifp) : "N/A",
4646 		    protocol, proto_count);
4647 	}
4648 	ifnet_head_done();
4649 	if (retval == 0) {
4650 		dlil_handle_proto_attach(ifp, protocol);
4651 	} else if (ifproto != NULL) {
4652 		dlif_proto_free(ifproto);
4653 	}
4654 	return retval;
4655 }
4656 
4657 errno_t
ifnet_detach_protocol(ifnet_t ifp,protocol_family_t proto_family)4658 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
4659 {
4660 	struct if_proto *proto = NULL;
4661 	int     retval = 0;
4662 
4663 	if (ifp == NULL || proto_family == 0) {
4664 		retval = EINVAL;
4665 		goto end;
4666 	}
4667 
4668 	ifnet_lock_exclusive(ifp);
4669 	/* callee holds a proto refcnt upon success */
4670 	proto = find_attached_proto(ifp, proto_family);
4671 	if (proto == NULL) {
4672 		retval = ENXIO;
4673 		ifnet_lock_done(ifp);
4674 		goto end;
4675 	}
4676 
4677 	/* call family module del_proto */
4678 	if (ifp->if_del_proto) {
4679 		ifp->if_del_proto(ifp, proto->protocol_family);
4680 	}
4681 
4682 	SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4683 	    proto, if_proto, next_hash);
4684 
4685 	if (proto->proto_kpi == kProtoKPI_v1) {
4686 		proto->kpi.v1.input = ifproto_media_input_v1;
4687 		proto->kpi.v1.pre_output = ifproto_media_preout;
4688 		proto->kpi.v1.event = ifproto_media_event;
4689 		proto->kpi.v1.ioctl = ifproto_media_ioctl;
4690 		proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
4691 		proto->kpi.v1.send_arp = ifproto_media_send_arp;
4692 	} else {
4693 		proto->kpi.v2.input = ifproto_media_input_v2;
4694 		proto->kpi.v2.pre_output = ifproto_media_preout;
4695 		proto->kpi.v2.event = ifproto_media_event;
4696 		proto->kpi.v2.ioctl = ifproto_media_ioctl;
4697 		proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
4698 		proto->kpi.v2.send_arp = ifproto_media_send_arp;
4699 	}
4700 	proto->detached = 1;
4701 	ifnet_lock_done(ifp);
4702 
4703 	if (dlil_verbose) {
4704 		DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
4705 		    (proto->proto_kpi == kProtoKPI_v1) ?
4706 		    "v1" : "v2", proto_family);
4707 	}
4708 
4709 	/* release proto refcnt held during protocol attach */
4710 	if_proto_free(proto);
4711 
4712 	/*
4713 	 * Release proto refcnt held during lookup; the rest of
4714 	 * protocol detach steps will happen when the last proto
4715 	 * reference is released.
4716 	 */
4717 	if_proto_free(proto);
4718 
4719 end:
4720 	return retval;
4721 }
4722 
4723 static errno_t
ifproto_media_input_v1(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet,char * header)4724 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
4725     struct mbuf *packet, char *header)
4726 {
4727 #pragma unused(ifp, protocol, packet, header)
4728 	return ENXIO;
4729 }
4730 
4731 static errno_t
ifproto_media_input_v2(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet)4732 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
4733     struct mbuf *packet)
4734 {
4735 #pragma unused(ifp, protocol, packet)
4736 	return ENXIO;
4737 }
4738 
4739 static errno_t
ifproto_media_preout(struct ifnet * ifp,protocol_family_t protocol,mbuf_t * packet,const struct sockaddr * dest,void * route,IFNET_FRAME_TYPE_RW_T frame_type,IFNET_LLADDR_RW_T link_layer_dest)4740 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
4741     mbuf_t *packet, const struct sockaddr *dest, void *route,
4742     IFNET_FRAME_TYPE_RW_T frame_type, IFNET_LLADDR_RW_T link_layer_dest)
4743 {
4744 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4745 	return ENXIO;
4746 }
4747 
4748 static void
ifproto_media_event(struct ifnet * ifp,protocol_family_t protocol,const struct kev_msg * event)4749 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
4750     const struct kev_msg *event)
4751 {
4752 #pragma unused(ifp, protocol, event)
4753 }
4754 
4755 static errno_t
ifproto_media_ioctl(struct ifnet * ifp,protocol_family_t protocol,unsigned long command,void * argument)4756 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
4757     unsigned long command, void *argument)
4758 {
4759 #pragma unused(ifp, protocol, command, argument)
4760 	return ENXIO;
4761 }
4762 
4763 static errno_t
ifproto_media_resolve_multi(ifnet_t ifp,const struct sockaddr * proto_addr,struct sockaddr_dl * out_ll,size_t ll_len)4764 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
4765     struct sockaddr_dl *out_ll, size_t ll_len)
4766 {
4767 #pragma unused(ifp, proto_addr, out_ll, ll_len)
4768 	return ENXIO;
4769 }
4770 
4771 static errno_t
ifproto_media_send_arp(struct ifnet * ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4772 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
4773     const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4774     const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4775 {
4776 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4777 	return ENXIO;
4778 }
4779 
4780 extern int if_next_index(void);
4781 extern int tcp_ecn;
4782 
4783 void
dlil_ifclassq_setup(struct ifnet * ifp,struct ifclassq * ifcq)4784 dlil_ifclassq_setup(struct ifnet *ifp, struct ifclassq *ifcq)
4785 {
4786 	uint32_t sflags = 0;
4787 	int err;
4788 
4789 	if (if_flowadv) {
4790 		sflags |= PKTSCHEDF_QALG_FLOWCTL;
4791 	}
4792 
4793 	if (if_delaybased_queue) {
4794 		sflags |= PKTSCHEDF_QALG_DELAYBASED;
4795 	}
4796 
4797 	if (ifp->if_output_sched_model & IFNET_SCHED_DRIVER_MANGED_MODELS) {
4798 		VERIFY(IFNET_MODEL_IS_VALID(ifp->if_output_sched_model));
4799 		sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
4800 	}
4801 	/* Inherit drop limit from the default queue */
4802 	if (ifp->if_snd != ifcq) {
4803 		IFCQ_PKT_DROP_LIMIT(ifcq) = IFCQ_PKT_DROP_LIMIT(ifp->if_snd);
4804 	}
4805 	/* Initialize transmit queue(s) */
4806 	err = ifclassq_setup(ifcq, ifp, sflags);
4807 	if (err != 0) {
4808 		panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
4809 		    "err=%d", __func__, ifp, err);
4810 		/* NOTREACHED */
4811 	}
4812 }
4813 
4814 errno_t
ifnet_attach(ifnet_t ifp,const struct sockaddr_dl * ll_addr)4815 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
4816 {
4817 #if SKYWALK
4818 	boolean_t netif_compat;
4819 	if_nexus_netif  nexus_netif;
4820 #endif /* SKYWALK */
4821 	ifnet_ref_t tmp_if;
4822 	struct ifaddr *ifa;
4823 	struct if_data_internal if_data_saved;
4824 	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4825 	struct dlil_threading_info *dl_inp;
4826 	thread_continue_t thfunc = NULL;
4827 	int err;
4828 
4829 	if (ifp == NULL) {
4830 		return EINVAL;
4831 	}
4832 
4833 	/*
4834 	 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4835 	 * prevent the interface from being configured while it is
4836 	 * embryonic, as ifnet_head_lock is dropped and reacquired
4837 	 * below prior to marking the ifnet with IFRF_ATTACHED.
4838 	 */
4839 	dlil_if_lock();
4840 	ifnet_head_lock_exclusive();
4841 	/* Verify we aren't already on the list */
4842 	TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
4843 		if (tmp_if == ifp) {
4844 			ifnet_head_done();
4845 			dlil_if_unlock();
4846 			return EEXIST;
4847 		}
4848 	}
4849 
4850 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4851 	if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
4852 		panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
4853 		    __func__, ifp);
4854 		/* NOTREACHED */
4855 	}
4856 	lck_mtx_unlock(&ifp->if_ref_lock);
4857 
4858 	ifnet_lock_exclusive(ifp);
4859 
4860 	/* Sanity check */
4861 	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
4862 	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
4863 	VERIFY(ifp->if_threads_pending == 0);
4864 
4865 	if (ll_addr != NULL) {
4866 		if (ifp->if_addrlen == 0) {
4867 			ifp->if_addrlen = ll_addr->sdl_alen;
4868 		} else if (ll_addr->sdl_alen != ifp->if_addrlen) {
4869 			ifnet_lock_done(ifp);
4870 			ifnet_head_done();
4871 			dlil_if_unlock();
4872 			return EINVAL;
4873 		}
4874 	}
4875 
4876 	/*
4877 	 * Allow interfaces without protocol families to attach
4878 	 * only if they have the necessary fields filled out.
4879 	 */
4880 	if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
4881 		DLIL_PRINTF("%s: Attempt to attach interface without "
4882 		    "family module - %d\n", __func__, ifp->if_family);
4883 		ifnet_lock_done(ifp);
4884 		ifnet_head_done();
4885 		dlil_if_unlock();
4886 		return ENODEV;
4887 	}
4888 
4889 	/* Allocate protocol hash table */
4890 	VERIFY(ifp->if_proto_hash == NULL);
4891 	ifp->if_proto_hash = kalloc_type(struct proto_hash_entry,
4892 	    PROTO_HASH_SLOTS, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4893 	ifp->if_proto_hash_count = PROTO_HASH_SLOTS;
4894 
4895 	lck_mtx_lock_spin(&ifp->if_flt_lock);
4896 	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
4897 	TAILQ_INIT(&ifp->if_flt_head);
4898 	VERIFY(ifp->if_flt_busy == 0);
4899 	VERIFY(ifp->if_flt_waiters == 0);
4900 	VERIFY(ifp->if_flt_non_os_count == 0);
4901 	VERIFY(ifp->if_flt_no_tso_count == 0);
4902 	lck_mtx_unlock(&ifp->if_flt_lock);
4903 
4904 	if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
4905 		VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
4906 		LIST_INIT(&ifp->if_multiaddrs);
4907 	}
4908 
4909 	VERIFY(ifp->if_allhostsinm == NULL);
4910 	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
4911 	TAILQ_INIT(&ifp->if_addrhead);
4912 
4913 	if (ifp->if_index == 0) {
4914 		int idx = if_next_index();
4915 
4916 		/*
4917 		 * Since we exhausted the list of
4918 		 * if_index's, try to find an empty slot
4919 		 * in ifindex2ifnet.
4920 		 */
4921 		if (idx == -1 && if_index >= UINT16_MAX) {
4922 			for (int i = 1; i < if_index; i++) {
4923 				if (ifindex2ifnet[i] == NULL &&
4924 				    ifnet_addrs[i - 1] == NULL) {
4925 					idx = i;
4926 					break;
4927 				}
4928 			}
4929 		}
4930 		if (idx == -1) {
4931 			ifp->if_index = 0;
4932 			ifnet_lock_done(ifp);
4933 			ifnet_head_done();
4934 			dlil_if_unlock();
4935 			return ENOBUFS;
4936 		}
4937 		ifp->if_index = (uint16_t)idx;
4938 
4939 		/* the lladdr passed at attach time is the permanent address */
4940 		if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
4941 		    ll_addr->sdl_alen == ETHER_ADDR_LEN) {
4942 			bcopy(CONST_LLADDR(ll_addr),
4943 			    dl_if->dl_if_permanent_ether,
4944 			    ETHER_ADDR_LEN);
4945 			dl_if->dl_if_permanent_ether_is_set = 1;
4946 		}
4947 	}
4948 	/* There should not be anything occupying this slot */
4949 	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
4950 
4951 	/* allocate (if needed) and initialize a link address */
4952 	ifa = dlil_alloc_lladdr(ifp, ll_addr);
4953 	if (ifa == NULL) {
4954 		ifnet_lock_done(ifp);
4955 		ifnet_head_done();
4956 		dlil_if_unlock();
4957 		return ENOBUFS;
4958 	}
4959 
4960 	VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
4961 	ifnet_addrs[ifp->if_index - 1] = ifa;
4962 
4963 	/* make this address the first on the list */
4964 	IFA_LOCK(ifa);
4965 	/* hold a reference for ifnet_addrs[] */
4966 	ifa_addref(ifa);
4967 	/* if_attach_link_ifa() holds a reference for ifa_link */
4968 	if_attach_link_ifa(ifp, ifa);
4969 	IFA_UNLOCK(ifa);
4970 
4971 	TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
4972 	ifindex2ifnet[ifp->if_index] = ifp;
4973 
4974 	/* Hold a reference to the underlying dlil_ifnet */
4975 	ifnet_reference(ifp);
4976 
4977 	/* Clear stats (save and restore other fields that we care) */
4978 	if_data_saved = ifp->if_data;
4979 	bzero(&ifp->if_data, sizeof(ifp->if_data));
4980 	ifp->if_data.ifi_type = if_data_saved.ifi_type;
4981 	ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
4982 	ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
4983 	ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
4984 	ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
4985 	ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
4986 	ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
4987 	ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
4988 	ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
4989 	ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
4990 	ifnet_touch_lastchange(ifp);
4991 
4992 	VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
4993 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
4994 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL ||
4995 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL_DM);
4996 
4997 	dlil_ifclassq_setup(ifp, ifp->if_snd);
4998 
4999 	/* Sanity checks on the input thread storage */
5000 	dl_inp = &dl_if->dl_if_inpstorage;
5001 	bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
5002 	VERIFY(dl_inp->dlth_flags == 0);
5003 	VERIFY(dl_inp->dlth_wtot == 0);
5004 	VERIFY(dl_inp->dlth_ifp == NULL);
5005 	VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
5006 	VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
5007 	VERIFY(!dl_inp->dlth_affinity);
5008 	VERIFY(ifp->if_inp == NULL);
5009 	VERIFY(dl_inp->dlth_thread == THREAD_NULL);
5010 	VERIFY(dl_inp->dlth_strategy == NULL);
5011 	VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
5012 	VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
5013 	VERIFY(dl_inp->dlth_affinity_tag == 0);
5014 
5015 #if IFNET_INPUT_SANITY_CHK
5016 	VERIFY(dl_inp->dlth_pkts_cnt == 0);
5017 #endif /* IFNET_INPUT_SANITY_CHK */
5018 
5019 	VERIFY(ifp->if_poll_thread == THREAD_NULL);
5020 	dlil_reset_rxpoll_params(ifp);
5021 	/*
5022 	 * A specific DLIL input thread is created per non-loopback interface.
5023 	 */
5024 	if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
5025 		ifp->if_inp = dl_inp;
5026 		ifnet_incr_pending_thread_count(ifp);
5027 		err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
5028 		if (err == ENODEV) {
5029 			VERIFY(thfunc == NULL);
5030 			ifnet_decr_pending_thread_count(ifp);
5031 		} else if (err != 0) {
5032 			panic_plain("%s: ifp=%p couldn't get an input thread; "
5033 			    "err=%d", __func__, ifp, err);
5034 			/* NOTREACHED */
5035 		}
5036 	}
5037 	/*
5038 	 * If the driver supports the new transmit model, calculate flow hash
5039 	 * and create a workloop starter thread to invoke the if_start callback
5040 	 * where the packets may be dequeued and transmitted.
5041 	 */
5042 	if (ifp->if_eflags & IFEF_TXSTART) {
5043 		thread_precedence_policy_data_t info;
5044 		__unused kern_return_t kret;
5045 
5046 		ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5047 		VERIFY(ifp->if_flowhash != 0);
5048 		VERIFY(ifp->if_start_thread == THREAD_NULL);
5049 
5050 		ifnet_set_start_cycle(ifp, NULL);
5051 		ifp->if_start_active = 0;
5052 		ifp->if_start_req = 0;
5053 		ifp->if_start_flags = 0;
5054 		VERIFY(ifp->if_start != NULL);
5055 		ifnet_incr_pending_thread_count(ifp);
5056 		if ((err = kernel_thread_start(ifnet_start_thread_func,
5057 		    ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5058 			panic_plain("%s: "
5059 			    "ifp=%p couldn't get a start thread; "
5060 			    "err=%d", __func__, ifp, err);
5061 			/* NOTREACHED */
5062 		}
5063 		bzero(&info, sizeof(info));
5064 		info.importance = 1;
5065 		kret = thread_policy_set(ifp->if_start_thread,
5066 		    THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5067 		    THREAD_PRECEDENCE_POLICY_COUNT);
5068 		ASSERT(kret == KERN_SUCCESS);
5069 	} else {
5070 		ifp->if_flowhash = 0;
5071 	}
5072 
5073 	/* Reset polling parameters */
5074 	ifnet_set_poll_cycle(ifp, NULL);
5075 	ifp->if_poll_update = 0;
5076 	ifp->if_poll_flags = 0;
5077 	ifp->if_poll_req = 0;
5078 	VERIFY(ifp->if_poll_thread == THREAD_NULL);
5079 
5080 	/*
5081 	 * If the driver supports the new receive model, create a poller
5082 	 * thread to invoke if_input_poll callback where the packets may
5083 	 * be dequeued from the driver and processed for reception.
5084 	 * if the interface is netif compat then the poller thread is
5085 	 * managed by netif.
5086 	 */
5087 	if (dlil_is_rxpoll_input(thfunc)) {
5088 		thread_precedence_policy_data_t info;
5089 		__unused kern_return_t kret;
5090 #if SKYWALK
5091 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5092 #endif /* SKYWALK */
5093 		VERIFY(ifp->if_input_poll != NULL);
5094 		VERIFY(ifp->if_input_ctl != NULL);
5095 		ifnet_incr_pending_thread_count(ifp);
5096 		if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
5097 		    &ifp->if_poll_thread)) != KERN_SUCCESS) {
5098 			panic_plain("%s: ifp=%p couldn't get a poll thread; "
5099 			    "err=%d", __func__, ifp, err);
5100 			/* NOTREACHED */
5101 		}
5102 		bzero(&info, sizeof(info));
5103 		info.importance = 1;
5104 		kret = thread_policy_set(ifp->if_poll_thread,
5105 		    THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5106 		    THREAD_PRECEDENCE_POLICY_COUNT);
5107 		ASSERT(kret == KERN_SUCCESS);
5108 	}
5109 
5110 	VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5111 	VERIFY(ifp->if_desc.ifd_len == 0);
5112 	VERIFY(ifp->if_desc.ifd_desc != NULL);
5113 
5114 	/* Record attach PC stacktrace */
5115 	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5116 
5117 	ifp->if_updatemcasts = 0;
5118 	if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5119 		struct ifmultiaddr *ifma;
5120 		LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5121 			IFMA_LOCK(ifma);
5122 			if (ifma->ifma_addr->sa_family == AF_LINK ||
5123 			    ifma->ifma_addr->sa_family == AF_UNSPEC) {
5124 				ifp->if_updatemcasts++;
5125 			}
5126 			IFMA_UNLOCK(ifma);
5127 		}
5128 
5129 		DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
5130 		    "membership(s)\n", if_name(ifp),
5131 		    ifp->if_updatemcasts);
5132 	}
5133 
5134 	/* Clear logging parameters */
5135 	bzero(&ifp->if_log, sizeof(ifp->if_log));
5136 
5137 	/* Clear foreground/realtime activity timestamps */
5138 	ifp->if_fg_sendts = 0;
5139 	ifp->if_rt_sendts = 0;
5140 
5141 	/* Clear throughput estimates and radio type */
5142 	ifp->if_estimated_up_bucket = 0;
5143 	ifp->if_estimated_down_bucket = 0;
5144 	ifp->if_radio_type = 0;
5145 	ifp->if_radio_channel = 0;
5146 
5147 	VERIFY(ifp->if_delegated.ifp == NULL);
5148 	VERIFY(ifp->if_delegated.type == 0);
5149 	VERIFY(ifp->if_delegated.family == 0);
5150 	VERIFY(ifp->if_delegated.subfamily == 0);
5151 	VERIFY(ifp->if_delegated.expensive == 0);
5152 	VERIFY(ifp->if_delegated.constrained == 0);
5153 	VERIFY(ifp->if_delegated.ultra_constrained == 0);
5154 
5155 	VERIFY(ifp->if_agentids == NULL);
5156 	VERIFY(ifp->if_agentcount == 0);
5157 
5158 	/* Reset interface state */
5159 	bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5160 	ifp->if_interface_state.valid_bitmask |=
5161 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5162 	ifp->if_interface_state.interface_availability =
5163 	    IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5164 
5165 	/* Initialize Link Quality Metric (loopback [lo0] is always good) */
5166 	if (ifp == lo_ifp) {
5167 		ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5168 		ifp->if_interface_state.valid_bitmask |=
5169 		    IF_INTERFACE_STATE_LQM_STATE_VALID;
5170 	} else {
5171 		ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5172 	}
5173 
5174 	/*
5175 	 * Built-in Cyclops always on policy for WiFi infra
5176 	 */
5177 	if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5178 		errno_t error;
5179 
5180 		error = if_set_qosmarking_mode(ifp,
5181 		    IFRTYPE_QOSMARKING_FASTLANE);
5182 		if (error != 0) {
5183 			DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
5184 			    __func__, ifp->if_xname, error);
5185 		} else {
5186 			if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5187 #if (DEVELOPMENT || DEBUG)
5188 			DLIL_PRINTF("%s fastlane enabled on %s\n",
5189 			    __func__, ifp->if_xname);
5190 #endif /* (DEVELOPMENT || DEBUG) */
5191 		}
5192 	}
5193 
5194 	ifnet_lock_done(ifp);
5195 	ifnet_head_done();
5196 
5197 #if SKYWALK
5198 	netif_compat = dlil_attach_netif_compat_nexus(ifp, &nexus_netif);
5199 #endif /* SKYWALK */
5200 
5201 	lck_mtx_lock(&ifp->if_cached_route_lock);
5202 	/* Enable forwarding cached route */
5203 	ifp->if_fwd_cacheok = 1;
5204 	/* Clean up any existing cached routes */
5205 	ROUTE_RELEASE(&ifp->if_fwd_route);
5206 	bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5207 	ROUTE_RELEASE(&ifp->if_src_route);
5208 	bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5209 	ROUTE_RELEASE(&ifp->if_src_route6);
5210 	bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5211 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5212 
5213 	ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5214 
5215 	/*
5216 	 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5217 	 * and trees; do this before the ifnet is marked as attached.
5218 	 * The ifnet keeps the reference to the info structures even after
5219 	 * the ifnet is detached, since the network-layer records still
5220 	 * refer to the info structures even after that.  This also
5221 	 * makes it possible for them to still function after the ifnet
5222 	 * is recycled or reattached.
5223 	 */
5224 #if INET
5225 	if (IGMP_IFINFO(ifp) == NULL) {
5226 		IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
5227 		VERIFY(IGMP_IFINFO(ifp) != NULL);
5228 	} else {
5229 		VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5230 		igmp_domifreattach(IGMP_IFINFO(ifp));
5231 	}
5232 #endif /* INET */
5233 	if (MLD_IFINFO(ifp) == NULL) {
5234 		MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
5235 		VERIFY(MLD_IFINFO(ifp) != NULL);
5236 	} else {
5237 		VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5238 		mld_domifreattach(MLD_IFINFO(ifp));
5239 	}
5240 
5241 	VERIFY(ifp->if_data_threshold == 0);
5242 	VERIFY(ifp->if_dt_tcall != NULL);
5243 
5244 	/*
5245 	 * Wait for the created kernel threads for I/O to get
5246 	 * scheduled and run at least once before we proceed
5247 	 * to mark interface as attached.
5248 	 */
5249 	lck_mtx_lock(&ifp->if_ref_lock);
5250 	while (ifp->if_threads_pending != 0) {
5251 		DLIL_PRINTF("%s: Waiting for all kernel threads created for "
5252 		    "interface %s to get scheduled at least once.\n",
5253 		    __func__, ifp->if_xname);
5254 		(void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
5255 		    __func__, NULL);
5256 		LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
5257 	}
5258 	lck_mtx_unlock(&ifp->if_ref_lock);
5259 	DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
5260 	    "at least once. Proceeding.\n", __func__, ifp->if_xname);
5261 
5262 	/* Final mark this ifnet as attached. */
5263 	ifnet_lock_exclusive(ifp);
5264 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5265 	ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
5266 	os_ref_init(&ifp->if_refio, &if_refiogrp);
5267 	os_ref_init_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp, 0);
5268 	lck_mtx_unlock(&ifp->if_ref_lock);
5269 	if (net_rtref) {
5270 		/* boot-args override; enable idle notification */
5271 		(void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5272 		    IFRF_IDLE_NOTIFY);
5273 	} else {
5274 		/* apply previous request(s) to set the idle flags, if any */
5275 		(void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5276 		    ifp->if_idle_new_flags_mask);
5277 	}
5278 #if SKYWALK
5279 	/* the interface is fully attached; let the nexus adapter know */
5280 	if (netif_compat || dlil_is_native_netif_nexus(ifp)) {
5281 		if (netif_compat) {
5282 			if (sk_netif_compat_txmodel ==
5283 			    NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI) {
5284 				ifnet_enqueue_multi_setup(ifp,
5285 				    sk_tx_delay_qlen, sk_tx_delay_timeout);
5286 			}
5287 			ifp->if_nx_netif = nexus_netif;
5288 		}
5289 		ifp->if_na_ops->ni_finalize(ifp->if_na, ifp);
5290 	}
5291 #endif /* SKYWALK */
5292 	ifnet_lock_done(ifp);
5293 	dlil_if_unlock();
5294 
5295 #if PF
5296 	/*
5297 	 * Attach packet filter to this interface, if enabled.
5298 	 */
5299 	pf_ifnet_hook(ifp, 1);
5300 #endif /* PF */
5301 
5302 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0, FALSE);
5303 
5304 	os_log(OS_LOG_DEFAULT, "%s: attached%s\n", if_name(ifp),
5305 	    (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5306 	return 0;
5307 }
5308 
5309 static void
if_purgeaddrs(struct ifnet * ifp)5310 if_purgeaddrs(struct ifnet *ifp)
5311 {
5312 #if INET
5313 	in_purgeaddrs(ifp);
5314 #endif /* INET */
5315 	in6_purgeaddrs(ifp);
5316 }
5317 
5318 errno_t
ifnet_detach(ifnet_t ifp)5319 ifnet_detach(ifnet_t ifp)
5320 {
5321 	ifnet_ref_t delegated_ifp;
5322 	struct nd_ifinfo *ndi = NULL;
5323 
5324 	if (ifp == NULL) {
5325 		return EINVAL;
5326 	}
5327 
5328 	ndi = ND_IFINFO(ifp);
5329 	if (NULL != ndi) {
5330 		ndi->cga_initialized = FALSE;
5331 	}
5332 	os_log(OS_LOG_DEFAULT, "%s detaching", if_name(ifp));
5333 
5334 	/* Mark the interface down */
5335 	if_down(ifp);
5336 
5337 	/*
5338 	 * IMPORTANT NOTE
5339 	 *
5340 	 * Any field in the ifnet that relies on IF_FULLY_ATTACHED()
5341 	 * or equivalently, ifnet_get_ioref(ifp, 1), can't be modified
5342 	 * until after we've waited for all I/O references to drain
5343 	 * in ifnet_detach_final().
5344 	 */
5345 
5346 	ifnet_head_lock_exclusive();
5347 	ifnet_lock_exclusive(ifp);
5348 
5349 	if (ifp->if_output_netem != NULL) {
5350 		netem_destroy(ifp->if_output_netem);
5351 		ifp->if_output_netem = NULL;
5352 	}
5353 
5354 	/*
5355 	 * Check to see if this interface has previously triggered
5356 	 * aggressive protocol draining; if so, decrement the global
5357 	 * refcnt and clear PR_AGGDRAIN on the route domain if
5358 	 * there are no more of such an interface around.
5359 	 */
5360 	(void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5361 
5362 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5363 	if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5364 		lck_mtx_unlock(&ifp->if_ref_lock);
5365 		ifnet_lock_done(ifp);
5366 		ifnet_head_done();
5367 		return EINVAL;
5368 	} else if (ifp->if_refflags & IFRF_DETACHING) {
5369 		/* Interface has already been detached */
5370 		lck_mtx_unlock(&ifp->if_ref_lock);
5371 		ifnet_lock_done(ifp);
5372 		ifnet_head_done();
5373 		return ENXIO;
5374 	}
5375 	VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
5376 	/* Indicate this interface is being detached */
5377 	ifp->if_refflags &= ~IFRF_ATTACHED;
5378 	ifp->if_refflags |= IFRF_DETACHING;
5379 	lck_mtx_unlock(&ifp->if_ref_lock);
5380 
5381 	/* clean up flow control entry object if there's any */
5382 	if (ifp->if_eflags & IFEF_TXSTART) {
5383 		ifnet_flowadv(ifp->if_flowhash);
5384 	}
5385 
5386 	/* Reset CLAT46 flag */
5387 	if_clear_eflags(ifp, IFEF_CLAT46);
5388 
5389 	/*
5390 	 * We do not reset the TCP keep alive counters in case
5391 	 * a TCP connection stays connection after the interface
5392 	 * went down
5393 	 */
5394 	if (ifp->if_tcp_kao_cnt > 0) {
5395 		os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
5396 		    __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
5397 	}
5398 	ifp->if_tcp_kao_max = 0;
5399 
5400 	/*
5401 	 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5402 	 * no longer be visible during lookups from this point.
5403 	 */
5404 	VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5405 	TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5406 	ifp->if_link.tqe_next = NULL;
5407 	ifp->if_link.tqe_prev = NULL;
5408 	if (ifp->if_ordered_link.tqe_next != NULL ||
5409 	    ifp->if_ordered_link.tqe_prev != NULL) {
5410 		ifnet_remove_from_ordered_list(ifp);
5411 	}
5412 	ifindex2ifnet[ifp->if_index] = NULL;
5413 
5414 	/* 18717626 - reset router mode */
5415 	if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
5416 	ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
5417 
5418 	/* Record detach PC stacktrace */
5419 	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5420 
5421 	/* Clear logging parameters */
5422 	bzero(&ifp->if_log, sizeof(ifp->if_log));
5423 
5424 	/* Clear delegated interface info (reference released below) */
5425 	delegated_ifp = ifp->if_delegated.ifp;
5426 	bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
5427 
5428 	/* Reset interface state */
5429 	bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5430 
5431 	/*
5432 	 * Increment the generation count on interface deletion
5433 	 */
5434 	ifp->if_creation_generation_id = os_atomic_inc(&if_creation_generation_count, relaxed);
5435 
5436 	ifnet_lock_done(ifp);
5437 	ifnet_head_done();
5438 
5439 	/* Release reference held on the delegated interface */
5440 	if (delegated_ifp != NULL) {
5441 		ifnet_release(delegated_ifp);
5442 	}
5443 
5444 	/* Reset Link Quality Metric (unless loopback [lo0]) */
5445 	if (ifp != lo_ifp) {
5446 		if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5447 	}
5448 
5449 	/* Force reset link heuristics */
5450 	if (ifp->if_link_heuristics_tcall != NULL) {
5451 		thread_call_cancel_wait(ifp->if_link_heuristics_tcall);
5452 		thread_call_free(ifp->if_link_heuristics_tcall);
5453 		ifp->if_link_heuristics_tcall = NULL;
5454 	}
5455 	if_clear_xflags(ifp, IFXF_LINK_HEURISTICS);
5456 
5457 	/* Reset TCP local statistics */
5458 	if (ifp->if_tcp_stat != NULL) {
5459 		bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5460 	}
5461 
5462 	/* Reset UDP local statistics */
5463 	if (ifp->if_udp_stat != NULL) {
5464 		bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5465 	}
5466 
5467 	/* Reset ifnet IPv4 stats */
5468 	if (ifp->if_ipv4_stat != NULL) {
5469 		bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5470 	}
5471 
5472 	/* Reset ifnet IPv6 stats */
5473 	if (ifp->if_ipv6_stat != NULL) {
5474 		bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5475 	}
5476 
5477 	/* Release memory held for interface link status report */
5478 	if (ifp->if_link_status != NULL) {
5479 		kfree_type(struct if_link_status, ifp->if_link_status);
5480 		ifp->if_link_status = NULL;
5481 	}
5482 
5483 	/* Disable forwarding cached route */
5484 	lck_mtx_lock(&ifp->if_cached_route_lock);
5485 	ifp->if_fwd_cacheok = 0;
5486 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5487 
5488 	/* Disable data threshold and wait for any pending event posting */
5489 	ifp->if_data_threshold = 0;
5490 	VERIFY(ifp->if_dt_tcall != NULL);
5491 	(void) thread_call_cancel_wait(ifp->if_dt_tcall);
5492 
5493 	/*
5494 	 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5495 	 * references to the info structures and leave them attached to
5496 	 * this ifnet.
5497 	 */
5498 #if INET
5499 	igmp_domifdetach(ifp);
5500 #endif /* INET */
5501 	mld_domifdetach(ifp);
5502 
5503 #if SKYWALK
5504 	/* Clean up any netns tokens still pointing to to this ifnet */
5505 	netns_ifnet_detach(ifp);
5506 #endif /* SKYWALK */
5507 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0, FALSE);
5508 
5509 	/* Let worker thread take care of the rest, to avoid reentrancy */
5510 	dlil_if_lock();
5511 	ifnet_detaching_enqueue(ifp);
5512 	dlil_if_unlock();
5513 
5514 	return 0;
5515 }
5516 
5517 static void
ifnet_detaching_enqueue(struct ifnet * ifp)5518 ifnet_detaching_enqueue(struct ifnet *ifp)
5519 {
5520 	dlil_if_lock_assert();
5521 
5522 	++ifnet_detaching_cnt;
5523 	VERIFY(ifnet_detaching_cnt != 0);
5524 	TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5525 	wakeup((caddr_t)&ifnet_delayed_run);
5526 }
5527 
5528 static struct ifnet *
ifnet_detaching_dequeue(void)5529 ifnet_detaching_dequeue(void)
5530 {
5531 	ifnet_ref_t ifp;
5532 
5533 	dlil_if_lock_assert();
5534 
5535 	ifp = TAILQ_FIRST(&ifnet_detaching_head);
5536 	VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5537 	if (ifp != NULL) {
5538 		VERIFY(ifnet_detaching_cnt != 0);
5539 		--ifnet_detaching_cnt;
5540 		TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5541 		ifp->if_detaching_link.tqe_next = NULL;
5542 		ifp->if_detaching_link.tqe_prev = NULL;
5543 	}
5544 	return ifp;
5545 }
5546 
5547 __attribute__((noreturn))
5548 static void
ifnet_detacher_thread_cont(void * v,wait_result_t wres)5549 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
5550 {
5551 #pragma unused(v, wres)
5552 	ifnet_ref_t ifp;
5553 
5554 	dlil_if_lock();
5555 	if (__improbable(ifnet_detaching_embryonic)) {
5556 		ifnet_detaching_embryonic = FALSE;
5557 		/* there's no lock ordering constrain so OK to do this here */
5558 		dlil_decr_pending_thread_count();
5559 	}
5560 
5561 	for (;;) {
5562 		dlil_if_lock_assert();
5563 
5564 		if (ifnet_detaching_cnt == 0) {
5565 			break;
5566 		}
5567 
5568 		net_update_uptime();
5569 
5570 		VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5571 
5572 		/* Take care of detaching ifnet */
5573 		ifp = ifnet_detaching_dequeue();
5574 		if (ifp != NULL) {
5575 			dlil_if_unlock();
5576 			ifnet_detach_final(ifp);
5577 			dlil_if_lock();
5578 		}
5579 	}
5580 
5581 	(void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5582 	dlil_if_unlock();
5583 	(void) thread_block(ifnet_detacher_thread_cont);
5584 
5585 	VERIFY(0);      /* we should never get here */
5586 	/* NOTREACHED */
5587 	__builtin_unreachable();
5588 }
5589 
5590 __dead2
5591 static void
ifnet_detacher_thread_func(void * v,wait_result_t w)5592 ifnet_detacher_thread_func(void *v, wait_result_t w)
5593 {
5594 #pragma unused(v, w)
5595 	dlil_if_lock();
5596 	(void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5597 	ifnet_detaching_embryonic = TRUE;
5598 	/* wake up once to get out of embryonic state */
5599 	wakeup((caddr_t)&ifnet_delayed_run);
5600 	dlil_if_unlock();
5601 	(void) thread_block(ifnet_detacher_thread_cont);
5602 	VERIFY(0);
5603 	/* NOTREACHED */
5604 	__builtin_unreachable();
5605 }
5606 
5607 static void
ifnet_detach_final(struct ifnet * ifp)5608 ifnet_detach_final(struct ifnet *ifp)
5609 {
5610 	struct ifnet_filter *filter, *filter_next;
5611 	struct dlil_ifnet *dlifp;
5612 	struct ifnet_filter_head fhead;
5613 	struct dlil_threading_info *inp;
5614 	struct ifaddr *ifa;
5615 	ifnet_detached_func if_free;
5616 	int i;
5617 
5618 	/* Let BPF know we're detaching */
5619 	bpfdetach(ifp);
5620 
5621 #if SKYWALK
5622 	dlil_netif_detach_notify(ifp);
5623 	/*
5624 	 * Wait for the datapath to quiesce before tearing down
5625 	 * netif/flowswitch nexuses.
5626 	 */
5627 	dlil_quiesce_and_detach_nexuses(ifp);
5628 #endif /* SKYWALK */
5629 
5630 	lck_mtx_lock(&ifp->if_ref_lock);
5631 	if (!(ifp->if_refflags & IFRF_DETACHING)) {
5632 		panic("%s: flags mismatch (detaching not set) ifp=%p",
5633 		    __func__, ifp);
5634 		/* NOTREACHED */
5635 	}
5636 
5637 	/*
5638 	 * Wait until the existing IO references get released
5639 	 * before we proceed with ifnet_detach.  This is not a
5640 	 * common case, so block without using a continuation.
5641 	 */
5642 	if (os_ref_release_relaxed(&ifp->if_refio) > 0) {
5643 		bool waited = false;
5644 
5645 		while (os_ref_get_count(&ifp->if_refio) > 0) {
5646 			waited = true;
5647 			DLIL_PRINTF("%s: %s waiting for IO references to drain\n",
5648 			    __func__, if_name(ifp));
5649 			(void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5650 			    (PZERO - 1), "ifnet_ioref_wait", NULL);
5651 		}
5652 		if (waited) {
5653 			DLIL_PRINTF("%s: %s IO references drained\n",
5654 			    __func__, if_name(ifp));
5655 		}
5656 	}
5657 	os_ref_release_last_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp);
5658 	VERIFY(ifp->if_suspend == 0);
5659 	ifp->if_refflags &= ~IFRF_READY;
5660 	lck_mtx_unlock(&ifp->if_ref_lock);
5661 
5662 #if SKYWALK
5663 	VERIFY(LIST_EMPTY(&ifp->if_netns_tokens));
5664 #endif /* SKYWALK */
5665 	/* Drain and destroy send queue */
5666 	ifclassq_teardown(ifp->if_snd);
5667 
5668 	/* Detach interface filters */
5669 	lck_mtx_lock(&ifp->if_flt_lock);
5670 	if_flt_monitor_enter(ifp);
5671 
5672 	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
5673 	fhead = ifp->if_flt_head;
5674 	TAILQ_INIT(&ifp->if_flt_head);
5675 
5676 	for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5677 		filter_next = TAILQ_NEXT(filter, filt_next);
5678 		lck_mtx_unlock(&ifp->if_flt_lock);
5679 
5680 		dlil_detach_filter_internal(filter, 1);
5681 		lck_mtx_lock(&ifp->if_flt_lock);
5682 	}
5683 	if_flt_monitor_leave(ifp);
5684 	lck_mtx_unlock(&ifp->if_flt_lock);
5685 
5686 	/* Tell upper layers to drop their network addresses */
5687 	if_purgeaddrs(ifp);
5688 
5689 	ifnet_lock_exclusive(ifp);
5690 
5691 	/* Clear agent IDs */
5692 	if (ifp->if_agentids != NULL) {
5693 		kfree_data_sized_by(ifp->if_agentids, ifp->if_agentcount);
5694 	}
5695 
5696 	bzero(&ifp->if_nx_netif, sizeof(ifp->if_nx_netif));
5697 	bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
5698 
5699 	/* Unplumb all protocols */
5700 	for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5701 		struct if_proto *proto;
5702 
5703 		proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5704 		while (proto != NULL) {
5705 			protocol_family_t family = proto->protocol_family;
5706 			ifnet_lock_done(ifp);
5707 			proto_unplumb(family, ifp);
5708 			ifnet_lock_exclusive(ifp);
5709 			proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5710 		}
5711 		/* There should not be any protocols left */
5712 		VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5713 	}
5714 	kfree_type_counted_by(struct proto_hash_entry, ifp->if_proto_hash_count, ifp->if_proto_hash);
5715 
5716 	/* Detach (permanent) link address from if_addrhead */
5717 	ifa = TAILQ_FIRST(&ifp->if_addrhead);
5718 	IFA_LOCK(ifa);
5719 	if_detach_link_ifa(ifp, ifa);
5720 	IFA_UNLOCK(ifa);
5721 
5722 	/* This interface should not be on {ifnet_head,detaching} */
5723 	VERIFY(ifp->if_link.tqe_next == NULL);
5724 	VERIFY(ifp->if_link.tqe_prev == NULL);
5725 	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5726 	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5727 	VERIFY(ifp->if_ordered_link.tqe_next == NULL);
5728 	VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
5729 
5730 	/* The slot should have been emptied */
5731 	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5732 
5733 	/* There should not be any addresses left */
5734 	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5735 
5736 	/*
5737 	 * Signal the starter thread to terminate itself, and wait until
5738 	 * it has exited.
5739 	 */
5740 	if (ifp->if_start_thread != THREAD_NULL) {
5741 		lck_mtx_lock_spin(&ifp->if_start_lock);
5742 		ifp->if_start_flags |= IFSF_TERMINATING;
5743 		wakeup_one((caddr_t)&ifp->if_start_thread);
5744 		lck_mtx_unlock(&ifp->if_start_lock);
5745 
5746 		/* wait for starter thread to terminate */
5747 		lck_mtx_lock(&ifp->if_start_lock);
5748 		while (ifp->if_start_thread != THREAD_NULL) {
5749 			if (dlil_verbose) {
5750 				DLIL_PRINTF("%s: waiting for %s starter thread to terminate\n",
5751 				    __func__,
5752 				    if_name(ifp));
5753 			}
5754 			(void) msleep(&ifp->if_start_thread,
5755 			    &ifp->if_start_lock, (PZERO - 1),
5756 			    "ifnet_start_thread_exit", NULL);
5757 		}
5758 		lck_mtx_unlock(&ifp->if_start_lock);
5759 		if (dlil_verbose) {
5760 			DLIL_PRINTF("%s: %s starter thread termination complete",
5761 			    __func__, if_name(ifp));
5762 		}
5763 	}
5764 
5765 	/*
5766 	 * Signal the poller thread to terminate itself, and wait until
5767 	 * it has exited.
5768 	 */
5769 	if (ifp->if_poll_thread != THREAD_NULL) {
5770 #if SKYWALK
5771 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5772 #endif /* SKYWALK */
5773 		lck_mtx_lock_spin(&ifp->if_poll_lock);
5774 		ifp->if_poll_flags |= IF_POLLF_TERMINATING;
5775 		wakeup_one((caddr_t)&ifp->if_poll_thread);
5776 		lck_mtx_unlock(&ifp->if_poll_lock);
5777 
5778 		/* wait for poller thread to terminate */
5779 		lck_mtx_lock(&ifp->if_poll_lock);
5780 		while (ifp->if_poll_thread != THREAD_NULL) {
5781 			if (dlil_verbose) {
5782 				DLIL_PRINTF("%s: waiting for %s poller thread to terminate\n",
5783 				    __func__,
5784 				    if_name(ifp));
5785 			}
5786 			(void) msleep(&ifp->if_poll_thread,
5787 			    &ifp->if_poll_lock, (PZERO - 1),
5788 			    "ifnet_poll_thread_exit", NULL);
5789 		}
5790 		lck_mtx_unlock(&ifp->if_poll_lock);
5791 		if (dlil_verbose) {
5792 			DLIL_PRINTF("%s: %s poller thread termination complete\n",
5793 			    __func__, if_name(ifp));
5794 		}
5795 	}
5796 
5797 	/*
5798 	 * If thread affinity was set for the workloop thread, we will need
5799 	 * to tear down the affinity and release the extra reference count
5800 	 * taken at attach time.  Does not apply to lo0 or other interfaces
5801 	 * without dedicated input threads.
5802 	 */
5803 	if ((inp = ifp->if_inp) != NULL) {
5804 		VERIFY(inp != dlil_main_input_thread);
5805 
5806 		if (inp->dlth_affinity) {
5807 			struct thread *__single tp, *__single wtp, *__single ptp;
5808 
5809 			lck_mtx_lock_spin(&inp->dlth_lock);
5810 			wtp = inp->dlth_driver_thread;
5811 			inp->dlth_driver_thread = THREAD_NULL;
5812 			ptp = inp->dlth_poller_thread;
5813 			inp->dlth_poller_thread = THREAD_NULL;
5814 			ASSERT(inp->dlth_thread != THREAD_NULL);
5815 			tp = inp->dlth_thread;    /* don't nullify now */
5816 			inp->dlth_affinity_tag = 0;
5817 			inp->dlth_affinity = FALSE;
5818 			lck_mtx_unlock(&inp->dlth_lock);
5819 
5820 			/* Tear down poll thread affinity */
5821 			if (ptp != NULL) {
5822 				VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5823 				VERIFY(ifp->if_xflags & IFXF_LEGACY);
5824 				(void) dlil_affinity_set(ptp,
5825 				    THREAD_AFFINITY_TAG_NULL);
5826 				thread_deallocate(ptp);
5827 			}
5828 
5829 			/* Tear down workloop thread affinity */
5830 			if (wtp != NULL) {
5831 				(void) dlil_affinity_set(wtp,
5832 				    THREAD_AFFINITY_TAG_NULL);
5833 				thread_deallocate(wtp);
5834 			}
5835 
5836 			/* Tear down DLIL input thread affinity */
5837 			(void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5838 			thread_deallocate(tp);
5839 		}
5840 
5841 		/* disassociate ifp DLIL input thread */
5842 		ifp->if_inp = NULL;
5843 
5844 		/* if the worker thread was created, tell it to terminate */
5845 		if (inp->dlth_thread != THREAD_NULL) {
5846 			lck_mtx_lock_spin(&inp->dlth_lock);
5847 			inp->dlth_flags |= DLIL_INPUT_TERMINATE;
5848 			if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
5849 				wakeup_one((caddr_t)&inp->dlth_flags);
5850 			}
5851 			lck_mtx_unlock(&inp->dlth_lock);
5852 			ifnet_lock_done(ifp);
5853 
5854 			/* wait for the input thread to terminate */
5855 			lck_mtx_lock_spin(&inp->dlth_lock);
5856 			while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
5857 			    == 0) {
5858 				(void) msleep(&inp->dlth_flags, &inp->dlth_lock,
5859 				    (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
5860 			}
5861 			lck_mtx_unlock(&inp->dlth_lock);
5862 			ifnet_lock_exclusive(ifp);
5863 		}
5864 
5865 		/* clean-up input thread state */
5866 		dlil_clean_threading_info(inp);
5867 		/* clean-up poll parameters */
5868 		VERIFY(ifp->if_poll_thread == THREAD_NULL);
5869 		dlil_reset_rxpoll_params(ifp);
5870 	}
5871 
5872 	/* The driver might unload, so point these to ourselves */
5873 	if_free = ifp->if_free;
5874 	ifp->if_output_dlil = ifp_if_output;
5875 	ifp->if_output = ifp_if_output;
5876 	ifp->if_pre_enqueue = ifp_if_output;
5877 	ifp->if_start = ifp_if_start;
5878 	ifp->if_output_ctl = ifp_if_ctl;
5879 	ifp->if_input_dlil = ifp_if_input;
5880 	ifp->if_input_poll = ifp_if_input_poll;
5881 	ifp->if_input_ctl = ifp_if_ctl;
5882 	ifp->if_ioctl = ifp_if_ioctl;
5883 	ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
5884 	ifp->if_free = ifp_if_free;
5885 	ifp->if_demux = ifp_if_demux;
5886 	ifp->if_event = ifp_if_event;
5887 	ifp->if_framer_legacy = ifp_if_framer;
5888 	ifp->if_framer = ifp_if_framer_extended;
5889 	ifp->if_add_proto = ifp_if_add_proto;
5890 	ifp->if_del_proto = ifp_if_del_proto;
5891 	ifp->if_check_multi = ifp_if_check_multi;
5892 
5893 	/* wipe out interface description */
5894 	VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5895 	ifp->if_desc.ifd_len = 0;
5896 	VERIFY(ifp->if_desc.ifd_desc != NULL);
5897 	bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
5898 
5899 	/* there shouldn't be any delegation by now */
5900 	VERIFY(ifp->if_delegated.ifp == NULL);
5901 	VERIFY(ifp->if_delegated.type == 0);
5902 	VERIFY(ifp->if_delegated.family == 0);
5903 	VERIFY(ifp->if_delegated.subfamily == 0);
5904 	VERIFY(ifp->if_delegated.expensive == 0);
5905 	VERIFY(ifp->if_delegated.constrained == 0);
5906 	VERIFY(ifp->if_delegated.ultra_constrained == 0);
5907 
5908 	/* QoS marking get cleared */
5909 	if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5910 	if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
5911 
5912 #if SKYWALK
5913 	/* the nexus destructor is responsible for clearing these */
5914 	VERIFY(ifp->if_na_ops == NULL);
5915 	VERIFY(ifp->if_na == NULL);
5916 #endif /* SKYWALK */
5917 
5918 	/* interface could come up with different hwassist next time */
5919 	ifp->if_hwassist = 0;
5920 	ifp->if_capenable = 0;
5921 
5922 	/* promiscuous/allmulti counts need to start at zero again */
5923 	ifp->if_pcount = 0;
5924 	ifp->if_amcount = 0;
5925 	ifp->if_flags &= ~(IFF_PROMISC | IFF_ALLMULTI);
5926 
5927 	ifnet_lock_done(ifp);
5928 
5929 #if PF
5930 	/*
5931 	 * Detach this interface from packet filter, if enabled.
5932 	 */
5933 	pf_ifnet_hook(ifp, 0);
5934 #endif /* PF */
5935 
5936 	/* Filter list should be empty */
5937 	lck_mtx_lock_spin(&ifp->if_flt_lock);
5938 	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5939 	VERIFY(ifp->if_flt_busy == 0);
5940 	VERIFY(ifp->if_flt_waiters == 0);
5941 	VERIFY(ifp->if_flt_non_os_count == 0);
5942 	VERIFY(ifp->if_flt_no_tso_count == 0);
5943 	lck_mtx_unlock(&ifp->if_flt_lock);
5944 
5945 	/* Last chance to drain send queue */
5946 	if_qflush(ifp, ifp->if_snd);
5947 
5948 	/* Last chance to cleanup any cached route */
5949 	lck_mtx_lock(&ifp->if_cached_route_lock);
5950 	VERIFY(!ifp->if_fwd_cacheok);
5951 	ROUTE_RELEASE(&ifp->if_fwd_route);
5952 	bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5953 	ROUTE_RELEASE(&ifp->if_src_route);
5954 	bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5955 	ROUTE_RELEASE(&ifp->if_src_route6);
5956 	bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5957 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5958 
5959 	/* Ignore any pending data threshold as the interface is anyways gone */
5960 	ifp->if_data_threshold = 0;
5961 
5962 	VERIFY(ifp->if_dt_tcall != NULL);
5963 	VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
5964 
5965 	ifnet_llreach_ifdetach(ifp);
5966 
5967 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0, FALSE);
5968 
5969 	/*
5970 	 * Finally, mark this ifnet as detached.
5971 	 */
5972 	os_log(OS_LOG_DEFAULT, "%s detached", if_name(ifp));
5973 
5974 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5975 	if (!(ifp->if_refflags & IFRF_DETACHING)) {
5976 		panic("%s: flags mismatch (detaching not set) ifp=%p",
5977 		    __func__, ifp);
5978 		/* NOTREACHED */
5979 	}
5980 	ifp->if_refflags &= ~IFRF_DETACHING;
5981 	lck_mtx_unlock(&ifp->if_ref_lock);
5982 	if (if_free != NULL) {
5983 		if_free(ifp);
5984 	}
5985 
5986 	ifclassq_release(&ifp->if_snd);
5987 
5988 	/* Remove (permanent) link address from ifnet_addrs[] */
5989 	ifnet_head_lock_exclusive();
5990 	VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5991 	ifa_remref(ifa);
5992 	ifnet_addrs[ifp->if_index - 1] = NULL;
5993 	ifnet_head_done();
5994 
5995 	/* we're fully detached, clear the "in use" bit */
5996 	dlifp = (struct dlil_ifnet *)ifp;
5997 	lck_mtx_lock(&dlifp->dl_if_lock);
5998 	ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
5999 	dlifp->dl_if_flags &= ~DLIF_INUSE;
6000 	lck_mtx_unlock(&dlifp->dl_if_lock);
6001 
6002 	/* Release reference held during ifnet attach */
6003 	ifnet_release(ifp);
6004 }
6005 
6006 errno_t
ifp_if_output(struct ifnet * ifp,struct mbuf * m)6007 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6008 {
6009 #pragma unused(ifp)
6010 	m_freem_list(m);
6011 	return 0;
6012 }
6013 
6014 void
ifp_if_start(struct ifnet * ifp)6015 ifp_if_start(struct ifnet *ifp)
6016 {
6017 	ifnet_purge(ifp);
6018 }
6019 
6020 static errno_t
ifp_if_input(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)6021 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6022     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6023     boolean_t poll, struct thread *tp)
6024 {
6025 #pragma unused(ifp, m_tail, s, poll, tp)
6026 	m_freem_list(m_head);
6027 	return ENXIO;
6028 }
6029 
6030 static void
ifp_if_input_poll(struct ifnet * ifp,u_int32_t flags,u_int32_t max_cnt,struct mbuf ** m_head,struct mbuf ** m_tail,u_int32_t * cnt,u_int32_t * len)6031 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6032     struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6033 {
6034 #pragma unused(ifp, flags, max_cnt)
6035 	if (m_head != NULL) {
6036 		*m_head = NULL;
6037 	}
6038 	if (m_tail != NULL) {
6039 		*m_tail = NULL;
6040 	}
6041 	if (cnt != NULL) {
6042 		*cnt = 0;
6043 	}
6044 	if (len != NULL) {
6045 		*len = 0;
6046 	}
6047 }
6048 
6049 static errno_t
ifp_if_ctl(struct ifnet * ifp,ifnet_ctl_cmd_t cmd,u_int32_t arglen,void * arg)6050 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6051 {
6052 #pragma unused(ifp, cmd, arglen, arg)
6053 	return EOPNOTSUPP;
6054 }
6055 
6056 static errno_t
ifp_if_demux(struct ifnet * ifp,struct mbuf * m,char * fh,protocol_family_t * pf)6057 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6058 {
6059 #pragma unused(ifp, fh, pf)
6060 	m_freem(m);
6061 	return EJUSTRETURN;
6062 }
6063 
6064 static errno_t
ifp_if_add_proto(struct ifnet * ifp,protocol_family_t pf,const struct ifnet_demux_desc * da,u_int32_t dc)6065 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6066     const struct ifnet_demux_desc *da, u_int32_t dc)
6067 {
6068 #pragma unused(ifp, pf, da, dc)
6069 	return EINVAL;
6070 }
6071 
6072 static errno_t
ifp_if_del_proto(struct ifnet * ifp,protocol_family_t pf)6073 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6074 {
6075 #pragma unused(ifp, pf)
6076 	return EINVAL;
6077 }
6078 
6079 static errno_t
ifp_if_check_multi(struct ifnet * ifp,const struct sockaddr * sa)6080 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6081 {
6082 #pragma unused(ifp, sa)
6083 	return EOPNOTSUPP;
6084 }
6085 
6086 #if !XNU_TARGET_OS_OSX
6087 static errno_t
ifp_if_framer(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6088 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6089     const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t,
6090     u_int32_t *pre, u_int32_t *post)
6091 #else /* XNU_TARGET_OS_OSX */
6092 static errno_t
6093 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6094     const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t)
6095 #endif /* XNU_TARGET_OS_OSX */
6096 {
6097 #pragma unused(ifp, m, sa, ll, t)
6098 #if !XNU_TARGET_OS_OSX
6099 	return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
6100 #else /* XNU_TARGET_OS_OSX */
6101 	return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
6102 #endif /* XNU_TARGET_OS_OSX */
6103 }
6104 
6105 static errno_t
ifp_if_framer_extended(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6106 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6107     const struct sockaddr *sa,
6108     IFNET_LLADDR_T ll,
6109     IFNET_FRAME_TYPE_T t,
6110     u_int32_t *pre, u_int32_t *post)
6111 {
6112 #pragma unused(ifp, sa, ll, t)
6113 	m_freem(*m);
6114 	*m = NULL;
6115 
6116 	if (pre != NULL) {
6117 		*pre = 0;
6118 	}
6119 	if (post != NULL) {
6120 		*post = 0;
6121 	}
6122 
6123 	return EJUSTRETURN;
6124 }
6125 
6126 errno_t
ifp_if_ioctl(struct ifnet * ifp,unsigned long cmd,void * arg)6127 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6128 {
6129 #pragma unused(ifp, cmd, arg)
6130 	return EOPNOTSUPP;
6131 }
6132 
6133 static errno_t
ifp_if_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode tm,bpf_packet_func f)6134 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6135 {
6136 #pragma unused(ifp, tm, f)
6137 	/* XXX not sure what to do here */
6138 	return 0;
6139 }
6140 
6141 static void
ifp_if_free(struct ifnet * ifp)6142 ifp_if_free(struct ifnet *ifp)
6143 {
6144 #pragma unused(ifp)
6145 }
6146 
6147 static void
ifp_if_event(struct ifnet * ifp,const struct kev_msg * e)6148 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6149 {
6150 #pragma unused(ifp, e)
6151 }
6152 
6153 __private_extern__ void
dlil_proto_unplumb_all(struct ifnet * ifp)6154 dlil_proto_unplumb_all(struct ifnet *ifp)
6155 {
6156 	/*
6157 	 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6158 	 * each bucket contains exactly one entry; PF_VLAN does not need an
6159 	 * explicit unplumb.
6160 	 *
6161 	 * if_proto_hash[3] is for other protocols; we expect anything
6162 	 * in this bucket to respond to the DETACHING event (which would
6163 	 * have happened by now) and do the unplumb then.
6164 	 */
6165 	(void) proto_unplumb(PF_INET, ifp);
6166 	(void) proto_unplumb(PF_INET6, ifp);
6167 }
6168 
6169 static void
ifp_src_route_copyout(struct ifnet * ifp,struct route * dst)6170 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6171 {
6172 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6173 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6174 
6175 	route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6176 
6177 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6178 }
6179 
6180 static void
ifp_src_route_copyin(struct ifnet * ifp,struct route * src)6181 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6182 {
6183 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6184 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6185 
6186 	if (ifp->if_fwd_cacheok) {
6187 		route_copyin(src, &ifp->if_src_route, sizeof(*src));
6188 	} else {
6189 		ROUTE_RELEASE(src);
6190 	}
6191 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6192 }
6193 
6194 static void
ifp_src_route6_copyout(struct ifnet * ifp,struct route_in6 * dst)6195 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6196 {
6197 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6198 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6199 
6200 	route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6201 	    sizeof(*dst));
6202 
6203 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6204 }
6205 
6206 static void
ifp_src_route6_copyin(struct ifnet * ifp,struct route_in6 * src)6207 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6208 {
6209 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6210 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6211 
6212 	if (ifp->if_fwd_cacheok) {
6213 		route_copyin((struct route *)src,
6214 		    (struct route *)&ifp->if_src_route6, sizeof(*src));
6215 	} else {
6216 		ROUTE_RELEASE(src);
6217 	}
6218 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6219 }
6220 
6221 struct rtentry *
ifnet_cached_rtlookup_inet(struct ifnet * ifp,struct in_addr src_ip)6222 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6223 {
6224 	struct route            src_rt;
6225 	struct sockaddr_in      *dst;
6226 
6227 	dst = SIN(&src_rt.ro_dst);
6228 
6229 	ifp_src_route_copyout(ifp, &src_rt);
6230 
6231 	if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6232 		ROUTE_RELEASE(&src_rt);
6233 		if (dst->sin_family != AF_INET) {
6234 			SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6235 			dst->sin_len = sizeof(src_rt.ro_dst);
6236 			dst->sin_family = AF_INET;
6237 		}
6238 		dst->sin_addr = src_ip;
6239 
6240 		VERIFY(src_rt.ro_rt == NULL);
6241 		src_rt.ro_rt = rtalloc1_scoped(SA(dst),
6242 		    0, 0, ifp->if_index);
6243 
6244 		if (src_rt.ro_rt != NULL) {
6245 			/* retain a ref, copyin consumes one */
6246 			struct rtentry  *rte = src_rt.ro_rt;
6247 			RT_ADDREF(rte);
6248 			ifp_src_route_copyin(ifp, &src_rt);
6249 			src_rt.ro_rt = rte;
6250 		}
6251 	}
6252 
6253 	return src_rt.ro_rt;
6254 }
6255 
6256 struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet * ifp,struct in6_addr * src_ip6)6257 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6258 {
6259 	struct route_in6 src_rt;
6260 
6261 	ifp_src_route6_copyout(ifp, &src_rt);
6262 
6263 	if (ROUTE_UNUSABLE(&src_rt) ||
6264 	    !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6265 		ROUTE_RELEASE(&src_rt);
6266 		if (src_rt.ro_dst.sin6_family != AF_INET6) {
6267 			SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6268 			src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6269 			src_rt.ro_dst.sin6_family = AF_INET6;
6270 		}
6271 		src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6272 		bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6273 		    sizeof(src_rt.ro_dst.sin6_addr));
6274 
6275 		if (src_rt.ro_rt == NULL) {
6276 			src_rt.ro_rt = rtalloc1_scoped(
6277 				SA(&src_rt.ro_dst), 0, 0,
6278 				ifp->if_index);
6279 
6280 			if (src_rt.ro_rt != NULL) {
6281 				/* retain a ref, copyin consumes one */
6282 				struct rtentry  *rte = src_rt.ro_rt;
6283 				RT_ADDREF(rte);
6284 				ifp_src_route6_copyin(ifp, &src_rt);
6285 				src_rt.ro_rt = rte;
6286 			}
6287 		}
6288 	}
6289 
6290 	return src_rt.ro_rt;
6291 }
6292 
6293 void
if_lqm_update(struct ifnet * ifp,int lqm,int locked)6294 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6295 {
6296 	struct kev_dl_link_quality_metric_data ev_lqm_data;
6297 	uint64_t now, delta;
6298 	int8_t old_lqm;
6299 	bool need_necp_client_update;
6300 
6301 	VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6302 
6303 	lqm = ifnet_lqm_normalize(lqm);
6304 	if (lqm == IFNET_LQM_THRESH_ABORT) {
6305 		os_atomic_or(&tcbinfo.ipi_flags, INPCBINFO_HANDLE_LQM_ABORT, relaxed);
6306 		inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
6307 	}
6308 
6309 	/*
6310 	 * Take the lock if needed
6311 	 */
6312 	if (!locked) {
6313 		ifnet_lock_exclusive(ifp);
6314 	}
6315 
6316 	if (lqm == ifp->if_interface_state.lqm_state &&
6317 	    (ifp->if_interface_state.valid_bitmask &
6318 	    IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6319 		/*
6320 		 * Release the lock if was not held by the caller
6321 		 */
6322 		if (!locked) {
6323 			ifnet_lock_done(ifp);
6324 		}
6325 		return;         /* nothing to update */
6326 	}
6327 
6328 	net_update_uptime();
6329 	now = net_uptime_ms();
6330 	ASSERT(now >= ifp->if_lqmstate_start_time);
6331 	delta = now - ifp->if_lqmstate_start_time;
6332 
6333 	old_lqm = ifp->if_interface_state.lqm_state;
6334 	switch (old_lqm) {
6335 	case IFNET_LQM_THRESH_GOOD:
6336 		ifp->if_lqm_good_time += delta;
6337 		break;
6338 	case IFNET_LQM_THRESH_POOR:
6339 		ifp->if_lqm_poor_time += delta;
6340 		break;
6341 	case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6342 		ifp->if_lqm_min_viable_time += delta;
6343 		break;
6344 	case IFNET_LQM_THRESH_BAD:
6345 		ifp->if_lqm_bad_time += delta;
6346 		break;
6347 	default:
6348 		break;
6349 	}
6350 	switch (lqm) {
6351 	case IFNET_LQM_THRESH_GOOD:
6352 		ifp->if_lqm_good_cnt += 1;
6353 		break;
6354 	case IFNET_LQM_THRESH_POOR:
6355 		ifp->if_lqm_poor_cnt += 1;
6356 		break;
6357 	case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6358 		ifp->if_lqm_min_viable_cnt += 1;
6359 		break;
6360 	case IFNET_LQM_THRESH_BAD:
6361 		ifp->if_lqm_bad_cnt += 1;
6362 		break;
6363 	default:
6364 		break;
6365 	}
6366 	ifp->if_lqmstate_start_time = now;
6367 
6368 	ifp->if_interface_state.valid_bitmask |=
6369 	    IF_INTERFACE_STATE_LQM_STATE_VALID;
6370 	ifp->if_interface_state.lqm_state = (int8_t)lqm;
6371 
6372 	/*
6373 	 * Update the link heuristics
6374 	 */
6375 	need_necp_client_update = if_update_link_heuristic(ifp);
6376 
6377 	/*
6378 	 * Don't want to hold the lock when issuing kernel events or calling NECP
6379 	 */
6380 	ifnet_lock_done(ifp);
6381 
6382 	if (need_necp_client_update) {
6383 		necp_update_all_clients_immediately_if_needed(true);
6384 	}
6385 
6386 	bzero(&ev_lqm_data, sizeof(ev_lqm_data));
6387 	ev_lqm_data.link_quality_metric = lqm;
6388 
6389 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6390 	    (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data), FALSE);
6391 
6392 	/*
6393 	 * Reacquire the lock for the caller
6394 	 */
6395 	if (locked) {
6396 		ifnet_lock_exclusive(ifp);
6397 	}
6398 }
6399 
6400 static void
if_rrc_state_update(struct ifnet * ifp,unsigned int rrc_state)6401 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6402 {
6403 	struct kev_dl_rrc_state kev;
6404 
6405 	if (rrc_state == ifp->if_interface_state.rrc_state &&
6406 	    (ifp->if_interface_state.valid_bitmask &
6407 	    IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6408 		return;
6409 	}
6410 
6411 	ifp->if_interface_state.valid_bitmask |=
6412 	    IF_INTERFACE_STATE_RRC_STATE_VALID;
6413 
6414 	ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
6415 
6416 	/*
6417 	 * Don't want to hold the lock when issuing kernel events
6418 	 */
6419 	ifnet_lock_done(ifp);
6420 
6421 	bzero(&kev, sizeof(struct kev_dl_rrc_state));
6422 	kev.rrc_state = rrc_state;
6423 
6424 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6425 	    (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state), FALSE);
6426 
6427 	ifnet_lock_exclusive(ifp);
6428 }
6429 
6430 errno_t
if_state_update(struct ifnet * ifp,struct if_interface_state * if_interface_state)6431 if_state_update(struct ifnet *ifp,
6432     struct if_interface_state *if_interface_state)
6433 {
6434 	u_short if_index_available = 0;
6435 
6436 	ifnet_lock_exclusive(ifp);
6437 
6438 	if ((ifp->if_type != IFT_CELLULAR) &&
6439 	    (if_interface_state->valid_bitmask &
6440 	    IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6441 		ifnet_lock_done(ifp);
6442 		return ENOTSUP;
6443 	}
6444 	if ((if_interface_state->valid_bitmask &
6445 	    IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6446 	    (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6447 	    if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6448 		ifnet_lock_done(ifp);
6449 		return EINVAL;
6450 	}
6451 	if ((if_interface_state->valid_bitmask &
6452 	    IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6453 	    if_interface_state->rrc_state !=
6454 	    IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6455 	    if_interface_state->rrc_state !=
6456 	    IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6457 		ifnet_lock_done(ifp);
6458 		return EINVAL;
6459 	}
6460 
6461 	if (if_interface_state->valid_bitmask &
6462 	    IF_INTERFACE_STATE_LQM_STATE_VALID) {
6463 		if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6464 	}
6465 	if (if_interface_state->valid_bitmask &
6466 	    IF_INTERFACE_STATE_RRC_STATE_VALID) {
6467 		if_rrc_state_update(ifp, if_interface_state->rrc_state);
6468 	}
6469 	if (if_interface_state->valid_bitmask &
6470 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6471 		ifp->if_interface_state.valid_bitmask |=
6472 		    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6473 		ifp->if_interface_state.interface_availability =
6474 		    if_interface_state->interface_availability;
6475 
6476 		if (ifp->if_interface_state.interface_availability ==
6477 		    IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6478 			os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
6479 			    __func__, if_name(ifp), ifp->if_index);
6480 			if_index_available = ifp->if_index;
6481 		} else {
6482 			os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
6483 			    __func__, if_name(ifp), ifp->if_index);
6484 		}
6485 	}
6486 	ifnet_lock_done(ifp);
6487 
6488 	/*
6489 	 * Check if the TCP connections going on this interface should be
6490 	 * forced to send probe packets instead of waiting for TCP timers
6491 	 * to fire. This is done on an explicit notification such as
6492 	 * SIOCSIFINTERFACESTATE which marks the interface as available.
6493 	 */
6494 	if (if_index_available > 0) {
6495 		tcp_interface_send_probe(if_index_available);
6496 	}
6497 
6498 	return 0;
6499 }
6500 
6501 void
if_get_state(struct ifnet * ifp,struct if_interface_state * if_interface_state)6502 if_get_state(struct ifnet *ifp,
6503     struct if_interface_state *if_interface_state)
6504 {
6505 	ifnet_lock_shared(ifp);
6506 
6507 	if_interface_state->valid_bitmask = 0;
6508 
6509 	if (ifp->if_interface_state.valid_bitmask &
6510 	    IF_INTERFACE_STATE_RRC_STATE_VALID) {
6511 		if_interface_state->valid_bitmask |=
6512 		    IF_INTERFACE_STATE_RRC_STATE_VALID;
6513 		if_interface_state->rrc_state =
6514 		    ifp->if_interface_state.rrc_state;
6515 	}
6516 	if (ifp->if_interface_state.valid_bitmask &
6517 	    IF_INTERFACE_STATE_LQM_STATE_VALID) {
6518 		if_interface_state->valid_bitmask |=
6519 		    IF_INTERFACE_STATE_LQM_STATE_VALID;
6520 		if_interface_state->lqm_state =
6521 		    ifp->if_interface_state.lqm_state;
6522 	}
6523 	if (ifp->if_interface_state.valid_bitmask &
6524 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6525 		if_interface_state->valid_bitmask |=
6526 		    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6527 		if_interface_state->interface_availability =
6528 		    ifp->if_interface_state.interface_availability;
6529 	}
6530 
6531 	ifnet_lock_done(ifp);
6532 }
6533 
6534 errno_t
if_probe_connectivity(struct ifnet * ifp,u_int32_t conn_probe)6535 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6536 {
6537 	if (conn_probe > 1) {
6538 		return EINVAL;
6539 	}
6540 	if (conn_probe == 0) {
6541 		if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6542 	} else {
6543 		if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6544 	}
6545 
6546 	os_log(OS_LOG_DEFAULT, "interface probing on %s set to %u by %s:%d",
6547 	    if_name(ifp), conn_probe, proc_best_name(current_proc()), proc_selfpid());
6548 
6549 #if NECP
6550 	necp_update_all_clients();
6551 #endif /* NECP */
6552 
6553 	tcp_probe_connectivity(ifp, conn_probe);
6554 	return 0;
6555 }
6556 
6557 /* for uuid.c */
6558 static int
get_ether_index(int * ret_other_index)6559 get_ether_index(int * ret_other_index)
6560 {
6561 	ifnet_ref_t ifp;
6562 	int en0_index = 0;
6563 	int other_en_index = 0;
6564 	int any_ether_index = 0;
6565 	short best_unit = 0;
6566 
6567 	*ret_other_index = 0;
6568 	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6569 		/*
6570 		 * find en0, or if not en0, the lowest unit en*, and if not
6571 		 * that, any ethernet
6572 		 */
6573 		ifnet_lock_shared(ifp);
6574 		if (strcmp(ifp->if_name, "en") == 0) {
6575 			if (ifp->if_unit == 0) {
6576 				/* found en0, we're done */
6577 				en0_index = ifp->if_index;
6578 				ifnet_lock_done(ifp);
6579 				break;
6580 			}
6581 			if (other_en_index == 0 || ifp->if_unit < best_unit) {
6582 				other_en_index = ifp->if_index;
6583 				best_unit = ifp->if_unit;
6584 			}
6585 		} else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
6586 			any_ether_index = ifp->if_index;
6587 		}
6588 		ifnet_lock_done(ifp);
6589 	}
6590 	if (en0_index == 0) {
6591 		if (other_en_index != 0) {
6592 			*ret_other_index = other_en_index;
6593 		} else if (any_ether_index != 0) {
6594 			*ret_other_index = any_ether_index;
6595 		}
6596 	}
6597 	return en0_index;
6598 }
6599 
6600 int
uuid_get_ethernet(u_int8_t * __counted_by (ETHER_ADDR_LEN)node)6601 uuid_get_ethernet(u_int8_t *__counted_by(ETHER_ADDR_LEN) node)
6602 {
6603 	static int en0_index;
6604 	ifnet_ref_t ifp;
6605 	int other_index = 0;
6606 	int the_index = 0;
6607 	int ret;
6608 
6609 	ifnet_head_lock_shared();
6610 	if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
6611 		en0_index = get_ether_index(&other_index);
6612 	}
6613 	if (en0_index != 0) {
6614 		the_index = en0_index;
6615 	} else if (other_index != 0) {
6616 		the_index = other_index;
6617 	}
6618 	if (the_index != 0) {
6619 		struct dlil_ifnet *dl_if;
6620 
6621 		ifp = ifindex2ifnet[the_index];
6622 		VERIFY(ifp != NULL);
6623 		dl_if = (struct dlil_ifnet *)ifp;
6624 		if (dl_if->dl_if_permanent_ether_is_set != 0) {
6625 			/*
6626 			 * Use the permanent ethernet address if it is
6627 			 * available because it will never change.
6628 			 */
6629 			memcpy(node, dl_if->dl_if_permanent_ether,
6630 			    ETHER_ADDR_LEN);
6631 		} else {
6632 			memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
6633 		}
6634 		ret = 0;
6635 	} else {
6636 		ret = -1;
6637 	}
6638 	ifnet_head_done();
6639 	return ret;
6640 }
6641 
6642 int
dlil_node_present(struct ifnet * ifp,struct sockaddr * sa,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6643 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6644     int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6645 {
6646 	struct kev_dl_node_presence kev;
6647 	struct sockaddr_dl *sdl;
6648 	struct sockaddr_in6 *sin6;
6649 	int ret = 0;
6650 
6651 	VERIFY(ifp);
6652 	VERIFY(sa);
6653 	VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6654 
6655 	bzero(&kev, sizeof(kev));
6656 	sin6 = &kev.sin6_node_address;
6657 	sdl = &kev.sdl_node_address;
6658 	nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6659 	kev.rssi = rssi;
6660 	kev.link_quality_metric = lqm;
6661 	kev.node_proximity_metric = npm;
6662 	bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6663 
6664 	ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6665 	if (ret == 0 || ret == EEXIST) {
6666 		int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6667 		    &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6668 		if (err != 0) {
6669 			log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
6670 			    "error %d\n", __func__, err);
6671 		}
6672 	}
6673 
6674 	if (ret == EEXIST) {
6675 		ret = 0;
6676 	}
6677 	return ret;
6678 }
6679 
6680 void
dlil_node_absent(struct ifnet * ifp,struct sockaddr * sa)6681 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
6682 {
6683 	struct kev_dl_node_absence kev = {};
6684 	struct sockaddr_in6 *kev_sin6 = NULL;
6685 	struct sockaddr_dl *kev_sdl = NULL;
6686 	int error = 0;
6687 
6688 	VERIFY(ifp != NULL);
6689 	VERIFY(sa != NULL);
6690 	VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6691 
6692 	kev_sin6 = &kev.sin6_node_address;
6693 	kev_sdl = &kev.sdl_node_address;
6694 
6695 	if (sa->sa_family == AF_INET6) {
6696 		/*
6697 		 * If IPv6 address is given, get the link layer
6698 		 * address from what was cached in the neighbor cache
6699 		 */
6700 		VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6701 		SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6702 		error = nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
6703 	} else {
6704 		/*
6705 		 * If passed address is AF_LINK type, derive the address
6706 		 * based on the link address.
6707 		 */
6708 		nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
6709 		error = nd6_alt_node_absent(ifp, kev_sin6, NULL);
6710 	}
6711 
6712 	if (error == 0) {
6713 		kev_sdl->sdl_type = ifp->if_type;
6714 		kev_sdl->sdl_index = ifp->if_index;
6715 
6716 		dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
6717 		    &kev.link_data, sizeof(kev), FALSE);
6718 	}
6719 }
6720 
6721 int
dlil_node_present_v2(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr_dl * sdl,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6722 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
6723     int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6724 {
6725 	struct kev_dl_node_presence kev = {};
6726 	struct sockaddr_dl *kev_sdl = NULL;
6727 	struct sockaddr_in6 *kev_sin6 = NULL;
6728 	int ret = 0;
6729 
6730 	VERIFY(ifp != NULL);
6731 	VERIFY(sa != NULL && sdl != NULL);
6732 	VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
6733 
6734 	kev_sin6 = &kev.sin6_node_address;
6735 	kev_sdl = &kev.sdl_node_address;
6736 
6737 	VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
6738 	SOCKADDR_COPY(sdl, kev_sdl, sdl->sdl_len);
6739 	kev_sdl->sdl_type = ifp->if_type;
6740 	kev_sdl->sdl_index = ifp->if_index;
6741 
6742 	VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6743 	SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6744 
6745 	kev.rssi = rssi;
6746 	kev.link_quality_metric = lqm;
6747 	kev.node_proximity_metric = npm;
6748 	bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6749 
6750 	ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
6751 	if (ret == 0 || ret == EEXIST) {
6752 		int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6753 		    &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6754 		if (err != 0) {
6755 			log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
6756 		}
6757 	}
6758 
6759 	if (ret == EEXIST) {
6760 		ret = 0;
6761 	}
6762 	return ret;
6763 }
6764 
6765 const void *
dlil_ifaddr_bytes(const struct sockaddr_dl * sdl,size_t * sizep,kauth_cred_t * credp)6766 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
6767     kauth_cred_t *credp)
6768 {
6769 	const u_int8_t *bytes;
6770 	size_t size;
6771 
6772 	bytes = CONST_LLADDR(sdl);
6773 	size = sdl->sdl_alen;
6774 
6775 #if CONFIG_MACF
6776 	if (dlil_lladdr_ckreq) {
6777 		switch (sdl->sdl_type) {
6778 		case IFT_ETHER:
6779 		case IFT_IEEE1394:
6780 			break;
6781 		default:
6782 			credp = NULL;
6783 			break;
6784 		}
6785 		;
6786 
6787 		if (credp && mac_system_check_info(*credp, "net.link.addr")) {
6788 			static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
6789 				[0] = 2
6790 			};
6791 
6792 			bytes = unspec;
6793 		}
6794 	}
6795 #else
6796 #pragma unused(credp)
6797 #endif
6798 
6799 	if (sizep != NULL) {
6800 		*sizep = size;
6801 	}
6802 	return bytes;
6803 }
6804 
6805 void
dlil_report_issues(struct ifnet * ifp,u_int8_t modid[DLIL_MODIDLEN],u_int8_t info[DLIL_MODARGLEN])6806 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
6807     u_int8_t info[DLIL_MODARGLEN])
6808 {
6809 	struct kev_dl_issues kev;
6810 	struct timeval tv;
6811 
6812 	VERIFY(ifp != NULL);
6813 	VERIFY(modid != NULL);
6814 	static_assert(sizeof(kev.modid) == DLIL_MODIDLEN);
6815 	static_assert(sizeof(kev.info) == DLIL_MODARGLEN);
6816 
6817 	bzero(&kev, sizeof(kev));
6818 
6819 	microtime(&tv);
6820 	kev.timestamp = tv.tv_sec;
6821 	bcopy(modid, &kev.modid, DLIL_MODIDLEN);
6822 	if (info != NULL) {
6823 		bcopy(info, &kev.info, DLIL_MODARGLEN);
6824 	}
6825 
6826 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
6827 	    &kev.link_data, sizeof(kev), FALSE);
6828 }
6829 
6830 errno_t
ifnet_getset_opportunistic(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6831 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6832     struct proc *p)
6833 {
6834 	u_int32_t level = IFNET_THROTTLE_OFF;
6835 	errno_t result = 0;
6836 
6837 	VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
6838 
6839 	if (cmd == SIOCSIFOPPORTUNISTIC) {
6840 		/*
6841 		 * XXX: Use priv_check_cred() instead of root check?
6842 		 */
6843 		if ((result = proc_suser(p)) != 0) {
6844 			return result;
6845 		}
6846 
6847 		if (ifr->ifr_opportunistic.ifo_flags ==
6848 		    IFRIFOF_BLOCK_OPPORTUNISTIC) {
6849 			level = IFNET_THROTTLE_OPPORTUNISTIC;
6850 		} else if (ifr->ifr_opportunistic.ifo_flags == 0) {
6851 			level = IFNET_THROTTLE_OFF;
6852 		} else {
6853 			result = EINVAL;
6854 		}
6855 
6856 		if (result == 0) {
6857 			result = ifnet_set_throttle(ifp, level);
6858 		}
6859 	} else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
6860 		ifr->ifr_opportunistic.ifo_flags = 0;
6861 		if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
6862 			ifr->ifr_opportunistic.ifo_flags |=
6863 			    IFRIFOF_BLOCK_OPPORTUNISTIC;
6864 		}
6865 	}
6866 
6867 	/*
6868 	 * Return the count of current opportunistic connections
6869 	 * over the interface.
6870 	 */
6871 	if (result == 0) {
6872 		uint32_t flags = 0;
6873 		flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
6874 		    INPCB_OPPORTUNISTIC_SETCMD : 0;
6875 		flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
6876 		    INPCB_OPPORTUNISTIC_THROTTLEON : 0;
6877 		ifr->ifr_opportunistic.ifo_inuse =
6878 		    udp_count_opportunistic(ifp->if_index, flags) +
6879 		    tcp_count_opportunistic(ifp->if_index, flags);
6880 	}
6881 
6882 	if (result == EALREADY) {
6883 		result = 0;
6884 	}
6885 
6886 	return result;
6887 }
6888 
6889 int
ifnet_get_throttle(struct ifnet * ifp,u_int32_t * level)6890 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
6891 {
6892 	struct ifclassq *ifq;
6893 	cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
6894 	int err = 0;
6895 
6896 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
6897 		return ENXIO;
6898 	}
6899 
6900 	*level = IFNET_THROTTLE_OFF;
6901 
6902 	ifq = ifp->if_snd;
6903 	err = ifclassq_request(ifq, CLASSQRQ_THROTTLE, &req, false);
6904 	*level = req.level;
6905 
6906 	return err;
6907 }
6908 
6909 int
ifnet_set_throttle(struct ifnet * ifp,u_int32_t level)6910 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
6911 {
6912 	struct ifclassq *ifq;
6913 	cqrq_throttle_t req = { 1, level };
6914 	int err = 0;
6915 
6916 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
6917 		return ENXIO;
6918 	}
6919 
6920 	ifq = ifp->if_snd;
6921 
6922 	switch (level) {
6923 	case IFNET_THROTTLE_OFF:
6924 	case IFNET_THROTTLE_OPPORTUNISTIC:
6925 		break;
6926 	default:
6927 		return EINVAL;
6928 	}
6929 
6930 	err = ifclassq_request(ifq, CLASSQRQ_THROTTLE, &req, false);
6931 
6932 	if (err == 0) {
6933 		DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
6934 		    level);
6935 #if NECP
6936 		necp_update_all_clients();
6937 #endif /* NECP */
6938 		if (level == IFNET_THROTTLE_OFF) {
6939 			ifnet_start(ifp);
6940 		}
6941 	}
6942 
6943 	return err;
6944 }
6945 
6946 errno_t
ifnet_getset_log(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6947 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6948     struct proc *p)
6949 {
6950 #pragma unused(p)
6951 	errno_t result = 0;
6952 	uint32_t flags;
6953 	int level, category, subcategory;
6954 
6955 	VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
6956 
6957 	if (cmd == SIOCSIFLOG) {
6958 		if ((result = priv_check_cred(kauth_cred_get(),
6959 		    PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
6960 			return result;
6961 		}
6962 
6963 		level = ifr->ifr_log.ifl_level;
6964 		if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
6965 			result = EINVAL;
6966 		}
6967 
6968 		flags = ifr->ifr_log.ifl_flags;
6969 		if ((flags &= IFNET_LOGF_MASK) == 0) {
6970 			result = EINVAL;
6971 		}
6972 
6973 		category = ifr->ifr_log.ifl_category;
6974 		subcategory = ifr->ifr_log.ifl_subcategory;
6975 
6976 		if (result == 0) {
6977 			result = ifnet_set_log(ifp, level, flags,
6978 			    category, subcategory);
6979 		}
6980 	} else {
6981 		result = ifnet_get_log(ifp, &level, &flags, &category,
6982 		    &subcategory);
6983 		if (result == 0) {
6984 			ifr->ifr_log.ifl_level = level;
6985 			ifr->ifr_log.ifl_flags = flags;
6986 			ifr->ifr_log.ifl_category = category;
6987 			ifr->ifr_log.ifl_subcategory = subcategory;
6988 		}
6989 	}
6990 
6991 	return result;
6992 }
6993 
6994 int
ifnet_set_log(struct ifnet * ifp,int32_t level,uint32_t flags,int32_t category,int32_t subcategory)6995 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
6996     int32_t category, int32_t subcategory)
6997 {
6998 	int err = 0;
6999 
7000 	VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7001 	VERIFY(flags & IFNET_LOGF_MASK);
7002 
7003 	/*
7004 	 * The logging level applies to all facilities; make sure to
7005 	 * update them all with the most current level.
7006 	 */
7007 	flags |= ifp->if_log.flags;
7008 
7009 	if (ifp->if_output_ctl != NULL) {
7010 		struct ifnet_log_params l;
7011 
7012 		bzero(&l, sizeof(l));
7013 		l.level = level;
7014 		l.flags = flags;
7015 		l.flags &= ~IFNET_LOGF_DLIL;
7016 		l.category = category;
7017 		l.subcategory = subcategory;
7018 
7019 		/* Send this request to lower layers */
7020 		if (l.flags != 0) {
7021 			err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7022 			    sizeof(l), &l);
7023 		}
7024 	} else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7025 		/*
7026 		 * If targeted to the lower layers without an output
7027 		 * control callback registered on the interface, just
7028 		 * silently ignore facilities other than ours.
7029 		 */
7030 		flags &= IFNET_LOGF_DLIL;
7031 		if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
7032 			level = 0;
7033 		}
7034 	}
7035 
7036 	if (err == 0) {
7037 		if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
7038 			ifp->if_log.flags = 0;
7039 		} else {
7040 			ifp->if_log.flags |= flags;
7041 		}
7042 
7043 		log(LOG_INFO, "%s: logging level set to %d flags=0x%x "
7044 		    "arg=0x%x, category=%d subcategory=%d\n", if_name(ifp),
7045 		    ifp->if_log.level, ifp->if_log.flags, flags,
7046 		    category, subcategory);
7047 	}
7048 
7049 	return err;
7050 }
7051 
7052 int
ifnet_get_log(struct ifnet * ifp,int32_t * level,uint32_t * flags,int32_t * category,int32_t * subcategory)7053 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7054     int32_t *category, int32_t *subcategory)
7055 {
7056 	if (level != NULL) {
7057 		*level = ifp->if_log.level;
7058 	}
7059 	if (flags != NULL) {
7060 		*flags = ifp->if_log.flags;
7061 	}
7062 	if (category != NULL) {
7063 		*category = ifp->if_log.category;
7064 	}
7065 	if (subcategory != NULL) {
7066 		*subcategory = ifp->if_log.subcategory;
7067 	}
7068 
7069 	return 0;
7070 }
7071 
7072 int
ifnet_notify_address(struct ifnet * ifp,int af)7073 ifnet_notify_address(struct ifnet *ifp, int af)
7074 {
7075 	struct ifnet_notify_address_params na;
7076 
7077 #if PF
7078 	(void) pf_ifaddr_hook(ifp);
7079 #endif /* PF */
7080 
7081 	if (ifp->if_output_ctl == NULL) {
7082 		return EOPNOTSUPP;
7083 	}
7084 
7085 	bzero(&na, sizeof(na));
7086 	na.address_family = (sa_family_t)af;
7087 
7088 	return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7089 	           sizeof(na), &na);
7090 }
7091 
7092 errno_t
ifnet_flowid(struct ifnet * ifp,uint32_t * flowid)7093 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7094 {
7095 	if (ifp == NULL || flowid == NULL) {
7096 		return EINVAL;
7097 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7098 	    !ifnet_is_fully_attached(ifp)) {
7099 		return ENXIO;
7100 	}
7101 
7102 	*flowid = ifp->if_flowhash;
7103 
7104 	return 0;
7105 }
7106 
7107 errno_t
ifnet_disable_output(struct ifnet * ifp)7108 ifnet_disable_output(struct ifnet *ifp)
7109 {
7110 	int err = 0;
7111 
7112 	if (ifp == NULL) {
7113 		return EINVAL;
7114 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7115 	    !ifnet_is_fully_attached(ifp)) {
7116 		return ENXIO;
7117 	}
7118 
7119 	lck_mtx_lock(&ifp->if_start_lock);
7120 	if (ifp->if_start_flags & IFSF_FLOW_RESUME_PENDING) {
7121 		ifp->if_start_flags &= ~(IFSF_FLOW_RESUME_PENDING | IFSF_FLOW_CONTROLLED);
7122 	} else if ((err = ifnet_fc_add(ifp)) == 0) {
7123 		ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7124 	}
7125 	lck_mtx_unlock(&ifp->if_start_lock);
7126 
7127 	return err;
7128 }
7129 
7130 errno_t
ifnet_enable_output(struct ifnet * ifp)7131 ifnet_enable_output(struct ifnet *ifp)
7132 {
7133 	if (ifp == NULL) {
7134 		return EINVAL;
7135 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7136 	    !ifnet_is_fully_attached(ifp)) {
7137 		return ENXIO;
7138 	}
7139 
7140 	ifnet_start_common(ifp, TRUE, FALSE);
7141 	return 0;
7142 }
7143 
7144 void
ifnet_flowadv(uint32_t flowhash)7145 ifnet_flowadv(uint32_t flowhash)
7146 {
7147 	struct ifnet_fc_entry *ifce;
7148 	ifnet_ref_t ifp;
7149 
7150 	ifce = ifnet_fc_get(flowhash);
7151 	if (ifce == NULL) {
7152 		return;
7153 	}
7154 
7155 	VERIFY(ifce->ifce_ifp != NULL);
7156 	ifp = ifce->ifce_ifp;
7157 
7158 	/* flow hash gets recalculated per attach, so check */
7159 	if (ifnet_get_ioref(ifp)) {
7160 		if (ifp->if_flowhash == flowhash) {
7161 			lck_mtx_lock_spin(&ifp->if_start_lock);
7162 			if ((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) == 0) {
7163 				ifp->if_start_flags |= IFSF_FLOW_RESUME_PENDING;
7164 			}
7165 			lck_mtx_unlock(&ifp->if_start_lock);
7166 			(void) ifnet_enable_output(ifp);
7167 		}
7168 		ifnet_decr_iorefcnt(ifp);
7169 	}
7170 	ifnet_fc_entry_free(ifce);
7171 }
7172 
7173 /*
7174  * Function to compare ifnet_fc_entries in ifnet flow control tree
7175  */
7176 static inline int
ifce_cmp(const struct ifnet_fc_entry * fc1,const struct ifnet_fc_entry * fc2)7177 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7178 {
7179 	return fc1->ifce_flowhash - fc2->ifce_flowhash;
7180 }
7181 
7182 static int
ifnet_fc_add(struct ifnet * ifp)7183 ifnet_fc_add(struct ifnet *ifp)
7184 {
7185 	struct ifnet_fc_entry keyfc, *ifce;
7186 	uint32_t flowhash;
7187 
7188 	VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7189 	VERIFY(ifp->if_flowhash != 0);
7190 	flowhash = ifp->if_flowhash;
7191 
7192 	bzero(&keyfc, sizeof(keyfc));
7193 	keyfc.ifce_flowhash = flowhash;
7194 
7195 	lck_mtx_lock_spin(&ifnet_fc_lock);
7196 	ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7197 	if (ifce != NULL && ifce->ifce_ifp == ifp) {
7198 		/* Entry is already in ifnet_fc_tree, return */
7199 		lck_mtx_unlock(&ifnet_fc_lock);
7200 		return 0;
7201 	}
7202 
7203 	if (ifce != NULL) {
7204 		/*
7205 		 * There is a different fc entry with the same flow hash
7206 		 * but different ifp pointer.  There can be a collision
7207 		 * on flow hash but the probability is low.  Let's just
7208 		 * avoid adding a second one when there is a collision.
7209 		 */
7210 		lck_mtx_unlock(&ifnet_fc_lock);
7211 		return EAGAIN;
7212 	}
7213 
7214 	/* become regular mutex */
7215 	lck_mtx_convert_spin(&ifnet_fc_lock);
7216 
7217 	ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
7218 	ifce->ifce_flowhash = flowhash;
7219 	ifce->ifce_ifp = ifp;
7220 
7221 	RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7222 	lck_mtx_unlock(&ifnet_fc_lock);
7223 	return 0;
7224 }
7225 
7226 static struct ifnet_fc_entry *
ifnet_fc_get(uint32_t flowhash)7227 ifnet_fc_get(uint32_t flowhash)
7228 {
7229 	struct ifnet_fc_entry keyfc, *ifce;
7230 	ifnet_ref_t ifp;
7231 
7232 	bzero(&keyfc, sizeof(keyfc));
7233 	keyfc.ifce_flowhash = flowhash;
7234 
7235 	lck_mtx_lock_spin(&ifnet_fc_lock);
7236 	ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7237 	if (ifce == NULL) {
7238 		/* Entry is not present in ifnet_fc_tree, return */
7239 		lck_mtx_unlock(&ifnet_fc_lock);
7240 		return NULL;
7241 	}
7242 
7243 	RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7244 
7245 	VERIFY(ifce->ifce_ifp != NULL);
7246 	ifp = ifce->ifce_ifp;
7247 
7248 	/* become regular mutex */
7249 	lck_mtx_convert_spin(&ifnet_fc_lock);
7250 
7251 	if (!ifnet_is_fully_attached(ifp)) {
7252 		/*
7253 		 * This ifp is not attached or in the process of being
7254 		 * detached; just don't process it.
7255 		 */
7256 		ifnet_fc_entry_free(ifce);
7257 		ifce = NULL;
7258 	}
7259 	lck_mtx_unlock(&ifnet_fc_lock);
7260 
7261 	return ifce;
7262 }
7263 
7264 static void
ifnet_fc_entry_free(struct ifnet_fc_entry * ifce)7265 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7266 {
7267 	zfree(ifnet_fc_zone, ifce);
7268 }
7269 
7270 static uint32_t
ifnet_calc_flowhash(struct ifnet * ifp)7271 ifnet_calc_flowhash(struct ifnet *ifp)
7272 {
7273 	struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7274 	uint32_t flowhash = 0;
7275 
7276 	if (ifnet_flowhash_seed == 0) {
7277 		ifnet_flowhash_seed = RandomULong();
7278 	}
7279 
7280 	bzero(&fh, sizeof(fh));
7281 
7282 	(void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
7283 	fh.ifk_unit = ifp->if_unit;
7284 	fh.ifk_flags = ifp->if_flags;
7285 	fh.ifk_eflags = ifp->if_eflags;
7286 	fh.ifk_capabilities = ifp->if_capabilities;
7287 	fh.ifk_capenable = ifp->if_capenable;
7288 	fh.ifk_output_sched_model = ifp->if_output_sched_model;
7289 	fh.ifk_rand1 = RandomULong();
7290 	fh.ifk_rand2 = RandomULong();
7291 
7292 try_again:
7293 	flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
7294 	if (flowhash == 0) {
7295 		/* try to get a non-zero flowhash */
7296 		ifnet_flowhash_seed = RandomULong();
7297 		goto try_again;
7298 	}
7299 
7300 	return flowhash;
7301 }
7302 
7303 int
ifnet_set_netsignature(struct ifnet * ifp,uint8_t family,uint8_t len,uint16_t flags,uint8_t * __sized_by (len)data)7304 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7305     uint16_t flags, uint8_t *__sized_by(len) data)
7306 {
7307 #pragma unused(flags)
7308 	int error = 0;
7309 
7310 	switch (family) {
7311 	case AF_INET:
7312 		if_inetdata_lock_exclusive(ifp);
7313 		if (IN_IFEXTRA(ifp) != NULL) {
7314 			if (len == 0) {
7315 				/* Allow clearing the signature */
7316 				IN_IFEXTRA(ifp)->netsig_len = 0;
7317 				bzero(IN_IFEXTRA(ifp)->netsig,
7318 				    sizeof(IN_IFEXTRA(ifp)->netsig));
7319 				if_inetdata_lock_done(ifp);
7320 				break;
7321 			} else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
7322 				error = EINVAL;
7323 				if_inetdata_lock_done(ifp);
7324 				break;
7325 			}
7326 			IN_IFEXTRA(ifp)->netsig_len = len;
7327 			bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7328 		} else {
7329 			error = ENOMEM;
7330 		}
7331 		if_inetdata_lock_done(ifp);
7332 		break;
7333 
7334 	case AF_INET6:
7335 		if_inet6data_lock_exclusive(ifp);
7336 		if (IN6_IFEXTRA(ifp) != NULL) {
7337 			if (len == 0) {
7338 				/* Allow clearing the signature */
7339 				IN6_IFEXTRA(ifp)->netsig_len = 0;
7340 				bzero(IN6_IFEXTRA(ifp)->netsig,
7341 				    sizeof(IN6_IFEXTRA(ifp)->netsig));
7342 				if_inet6data_lock_done(ifp);
7343 				break;
7344 			} else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
7345 				error = EINVAL;
7346 				if_inet6data_lock_done(ifp);
7347 				break;
7348 			}
7349 			IN6_IFEXTRA(ifp)->netsig_len = len;
7350 			bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7351 		} else {
7352 			error = ENOMEM;
7353 		}
7354 		if_inet6data_lock_done(ifp);
7355 		break;
7356 
7357 	default:
7358 		error = EINVAL;
7359 		break;
7360 	}
7361 
7362 	return error;
7363 }
7364 
7365 int
ifnet_get_netsignature(struct ifnet * ifp,uint8_t family,uint8_t * len,uint16_t * flags,uint8_t * __sized_by (* len)data)7366 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7367     uint16_t *flags, uint8_t *__sized_by(*len) data)
7368 {
7369 	int error = 0;
7370 
7371 	if (ifp == NULL || len == NULL || data == NULL) {
7372 		return EINVAL;
7373 	}
7374 
7375 	switch (family) {
7376 	case AF_INET:
7377 		if_inetdata_lock_shared(ifp);
7378 		if (IN_IFEXTRA(ifp) != NULL) {
7379 			if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7380 				error = EINVAL;
7381 				if_inetdata_lock_done(ifp);
7382 				break;
7383 			}
7384 			if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
7385 				bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7386 			} else {
7387 				error = ENOENT;
7388 			}
7389 		} else {
7390 			error = ENOMEM;
7391 		}
7392 		if_inetdata_lock_done(ifp);
7393 		break;
7394 
7395 	case AF_INET6:
7396 		if_inet6data_lock_shared(ifp);
7397 		if (IN6_IFEXTRA(ifp) != NULL) {
7398 			if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7399 				error = EINVAL;
7400 				if_inet6data_lock_done(ifp);
7401 				break;
7402 			}
7403 			if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
7404 				bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7405 			} else {
7406 				error = ENOENT;
7407 			}
7408 		} else {
7409 			error = ENOMEM;
7410 		}
7411 		if_inet6data_lock_done(ifp);
7412 		break;
7413 
7414 	default:
7415 		error = EINVAL;
7416 		break;
7417 	}
7418 
7419 	if (error == 0 && flags != NULL) {
7420 		*flags = 0;
7421 	}
7422 
7423 	return error;
7424 }
7425 
7426 int
ifnet_set_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7427 ifnet_set_nat64prefix(struct ifnet *ifp,
7428     struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7429 {
7430 	int i, error = 0, one_set = 0;
7431 
7432 	if_inet6data_lock_exclusive(ifp);
7433 
7434 	if (IN6_IFEXTRA(ifp) == NULL) {
7435 		error = ENOMEM;
7436 		goto out;
7437 	}
7438 
7439 	for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7440 		uint32_t prefix_len =
7441 		    prefixes[i].prefix_len;
7442 		struct in6_addr *prefix =
7443 		    &prefixes[i].ipv6_prefix;
7444 
7445 		if (prefix_len == 0) {
7446 			clat_log0((LOG_DEBUG,
7447 			    "NAT64 prefixes purged from Interface %s\n",
7448 			    if_name(ifp)));
7449 			/* Allow clearing the signature */
7450 			IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
7451 			bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7452 			    sizeof(struct in6_addr));
7453 
7454 			continue;
7455 		} else if (prefix_len != NAT64_PREFIX_LEN_32 &&
7456 		    prefix_len != NAT64_PREFIX_LEN_40 &&
7457 		    prefix_len != NAT64_PREFIX_LEN_48 &&
7458 		    prefix_len != NAT64_PREFIX_LEN_56 &&
7459 		    prefix_len != NAT64_PREFIX_LEN_64 &&
7460 		    prefix_len != NAT64_PREFIX_LEN_96) {
7461 			clat_log0((LOG_DEBUG,
7462 			    "NAT64 prefixlen is incorrect %d\n", prefix_len));
7463 			error = EINVAL;
7464 			goto out;
7465 		}
7466 
7467 		if (IN6_IS_SCOPE_EMBED(prefix)) {
7468 			clat_log0((LOG_DEBUG,
7469 			    "NAT64 prefix has interface/link local scope.\n"));
7470 			error = EINVAL;
7471 			goto out;
7472 		}
7473 
7474 		IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
7475 		bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7476 		    sizeof(struct in6_addr));
7477 		clat_log0((LOG_DEBUG,
7478 		    "NAT64 prefix set to %s with prefixlen: %d\n",
7479 		    ip6_sprintf(prefix), prefix_len));
7480 		one_set = 1;
7481 	}
7482 
7483 out:
7484 	if_inet6data_lock_done(ifp);
7485 
7486 	if (error == 0 && one_set != 0) {
7487 		necp_update_all_clients();
7488 	}
7489 
7490 	return error;
7491 }
7492 
7493 int
ifnet_get_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7494 ifnet_get_nat64prefix(struct ifnet *ifp,
7495     struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7496 {
7497 	int i, found_one = 0, error = 0;
7498 
7499 	if (ifp == NULL) {
7500 		return EINVAL;
7501 	}
7502 
7503 	if_inet6data_lock_shared(ifp);
7504 
7505 	if (IN6_IFEXTRA(ifp) == NULL) {
7506 		error = ENOMEM;
7507 		goto out;
7508 	}
7509 
7510 	for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7511 		if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
7512 			found_one = 1;
7513 		}
7514 	}
7515 
7516 	if (found_one == 0) {
7517 		error = ENOENT;
7518 		goto out;
7519 	}
7520 
7521 	if (prefixes) {
7522 		bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
7523 		    sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
7524 	}
7525 
7526 out:
7527 	if_inet6data_lock_done(ifp);
7528 
7529 	return error;
7530 }
7531 
7532 #if DEBUG || DEVELOPMENT
7533 /* Blob for sum16 verification */
7534 static uint8_t sumdata[] = {
7535 	0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7536 	0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7537 	0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7538 	0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7539 	0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7540 	0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7541 	0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7542 	0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7543 	0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7544 	0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7545 	0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7546 	0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7547 	0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7548 	0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7549 	0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7550 	0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7551 	0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7552 	0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7553 	0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7554 	0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7555 	0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7556 	0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7557 	0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7558 	0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7559 	0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7560 	0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7561 	0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7562 	0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7563 	0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7564 	0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7565 	0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7566 	0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7567 	0xc8, 0x28, 0x02, 0x00, 0x00
7568 };
7569 
7570 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7571 static struct {
7572 	boolean_t       init;
7573 	uint16_t        len;
7574 	uint16_t        sumr;   /* reference */
7575 	uint16_t        sumrp;  /* reference, precomputed */
7576 } sumtbl[] = {
7577 	{ FALSE, 0, 0, 0x0000 },
7578 	{ FALSE, 1, 0, 0x001f },
7579 	{ FALSE, 2, 0, 0x8b1f },
7580 	{ FALSE, 3, 0, 0x8b27 },
7581 	{ FALSE, 7, 0, 0x790e },
7582 	{ FALSE, 11, 0, 0xcb6d },
7583 	{ FALSE, 20, 0, 0x20dd },
7584 	{ FALSE, 27, 0, 0xbabd },
7585 	{ FALSE, 32, 0, 0xf3e8 },
7586 	{ FALSE, 37, 0, 0x197d },
7587 	{ FALSE, 43, 0, 0x9eae },
7588 	{ FALSE, 64, 0, 0x4678 },
7589 	{ FALSE, 127, 0, 0x9399 },
7590 	{ FALSE, 256, 0, 0xd147 },
7591 	{ FALSE, 325, 0, 0x0358 },
7592 };
7593 #define SUMTBL_MAX      ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7594 
7595 static void
dlil_verify_sum16(void)7596 dlil_verify_sum16(void)
7597 {
7598 	struct mbuf *m;
7599 	uint8_t *buf;
7600 	int n;
7601 
7602 	/* Make sure test data plus extra room for alignment fits in cluster */
7603 	static_assert((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
7604 
7605 	kprintf("DLIL: running SUM16 self-tests ... ");
7606 
7607 	m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7608 	m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
7609 
7610 	buf = mtod(m, uint8_t *);               /* base address */
7611 
7612 	for (n = 0; n < SUMTBL_MAX; n++) {
7613 		uint16_t len = sumtbl[n].len;
7614 		int i;
7615 
7616 		/* Verify for all possible alignments */
7617 		for (i = 0; i < (int)sizeof(uint64_t); i++) {
7618 			uint16_t sum, sumr;
7619 			uint8_t *c;
7620 
7621 			/* Copy over test data to mbuf */
7622 			VERIFY(len <= sizeof(sumdata));
7623 			c = buf + i;
7624 			bcopy(sumdata, c, len);
7625 
7626 			/* Zero-offset test (align by data pointer) */
7627 			m->m_data = (uintptr_t)c;
7628 			m->m_len = len;
7629 			sum = m_sum16(m, 0, len);
7630 
7631 			if (!sumtbl[n].init) {
7632 				sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
7633 				sumtbl[n].sumr = sumr;
7634 				sumtbl[n].init = TRUE;
7635 			} else {
7636 				sumr = sumtbl[n].sumr;
7637 			}
7638 
7639 			/* Something is horribly broken; stop now */
7640 			if (sumr != sumtbl[n].sumrp) {
7641 				panic_plain("\n%s: broken in_cksum_mbuf_ref() "
7642 				    "for len=%d align=%d sum=0x%04x "
7643 				    "[expected=0x%04x]\n", __func__,
7644 				    len, i, sum, sumr);
7645 				/* NOTREACHED */
7646 			} else if (sum != sumr) {
7647 				panic_plain("\n%s: broken m_sum16() for len=%d "
7648 				    "align=%d sum=0x%04x [expected=0x%04x]\n",
7649 				    __func__, len, i, sum, sumr);
7650 				/* NOTREACHED */
7651 			}
7652 
7653 			/* Alignment test by offset (fixed data pointer) */
7654 			m->m_data = (uintptr_t)buf;
7655 			m->m_len = i + len;
7656 			sum = m_sum16(m, i, len);
7657 
7658 			/* Something is horribly broken; stop now */
7659 			if (sum != sumr) {
7660 				panic_plain("\n%s: broken m_sum16() for len=%d "
7661 				    "offset=%d sum=0x%04x [expected=0x%04x]\n",
7662 				    __func__, len, i, sum, sumr);
7663 				/* NOTREACHED */
7664 			}
7665 #if INET
7666 			/* Simple sum16 contiguous buffer test by aligment */
7667 			sum = b_sum16(c, len);
7668 
7669 			/* Something is horribly broken; stop now */
7670 			if (sum != sumr) {
7671 				panic_plain("\n%s: broken b_sum16() for len=%d "
7672 				    "align=%d sum=0x%04x [expected=0x%04x]\n",
7673 				    __func__, len, i, sum, sumr);
7674 				/* NOTREACHED */
7675 			}
7676 #endif /* INET */
7677 		}
7678 	}
7679 	m_freem(m);
7680 
7681 	kprintf("PASSED\n");
7682 }
7683 #endif /* DEBUG || DEVELOPMENT */
7684 
7685 #define CASE_STRINGIFY(x) case x: return #x
7686 
7687 __private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)7688 dlil_kev_dl_code_str(u_int32_t event_code)
7689 {
7690 	switch (event_code) {
7691 		CASE_STRINGIFY(KEV_DL_SIFFLAGS);
7692 		CASE_STRINGIFY(KEV_DL_SIFMETRICS);
7693 		CASE_STRINGIFY(KEV_DL_SIFMTU);
7694 		CASE_STRINGIFY(KEV_DL_SIFPHYS);
7695 		CASE_STRINGIFY(KEV_DL_SIFMEDIA);
7696 		CASE_STRINGIFY(KEV_DL_SIFGENERIC);
7697 		CASE_STRINGIFY(KEV_DL_ADDMULTI);
7698 		CASE_STRINGIFY(KEV_DL_DELMULTI);
7699 		CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
7700 		CASE_STRINGIFY(KEV_DL_IF_DETACHING);
7701 		CASE_STRINGIFY(KEV_DL_IF_DETACHED);
7702 		CASE_STRINGIFY(KEV_DL_LINK_OFF);
7703 		CASE_STRINGIFY(KEV_DL_LINK_ON);
7704 		CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
7705 		CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
7706 		CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
7707 		CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
7708 		CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
7709 		CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
7710 		CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
7711 		CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
7712 		CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
7713 		CASE_STRINGIFY(KEV_DL_PRIMARY_ELECTED);
7714 		CASE_STRINGIFY(KEV_DL_ISSUES);
7715 		CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
7716 	default:
7717 		break;
7718 	}
7719 	return "";
7720 }
7721 
7722 void
dlil_dt_tcall_fn(thread_call_param_t arg0,thread_call_param_t arg1)7723 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
7724 {
7725 #pragma unused(arg1)
7726 	ifnet_ref_t ifp = arg0;
7727 
7728 	if (ifnet_get_ioref(ifp)) {
7729 		nstat_ifnet_threshold_reached(ifp->if_index);
7730 		ifnet_decr_iorefcnt(ifp);
7731 	}
7732 }
7733 
7734 void
ifnet_notify_data_threshold(struct ifnet * ifp)7735 ifnet_notify_data_threshold(struct ifnet *ifp)
7736 {
7737 	uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
7738 	uint64_t oldbytes = ifp->if_dt_bytes;
7739 
7740 	ASSERT(ifp->if_dt_tcall != NULL);
7741 
7742 	/*
7743 	 * If we went over the threshold, notify NetworkStatistics.
7744 	 * We rate-limit it based on the threshold interval value.
7745 	 */
7746 	if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
7747 	    OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
7748 	    !thread_call_isactive(ifp->if_dt_tcall)) {
7749 		uint64_t tival = (threshold_interval * NSEC_PER_SEC);
7750 		uint64_t now = mach_absolute_time(), deadline = now;
7751 		uint64_t ival;
7752 
7753 		if (tival != 0) {
7754 			nanoseconds_to_absolutetime(tival, &ival);
7755 			clock_deadline_for_periodic_event(ival, now, &deadline);
7756 			(void) thread_call_enter_delayed(ifp->if_dt_tcall,
7757 			    deadline);
7758 		} else {
7759 			(void) thread_call_enter(ifp->if_dt_tcall);
7760 		}
7761 	}
7762 }
7763 
7764 
7765 void
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow * ifs,struct ifnet * ifp)7766 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
7767     struct ifnet *ifp)
7768 {
7769 	tcp_update_stats_per_flow(ifs, ifp);
7770 }
7771 
7772 static inline u_int32_t
_set_flags(u_int32_t * flags_p,u_int32_t set_flags)7773 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
7774 {
7775 	return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
7776 }
7777 
7778 static inline u_int32_t
_clear_flags(u_int32_t * flags_p,u_int32_t clear_flags)7779 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
7780 {
7781 	return (u_int32_t)OSBitAndAtomic(~clear_flags, flags_p);
7782 }
7783 
7784 __private_extern__ u_int32_t
if_set_eflags(ifnet_t interface,u_int32_t set_flags)7785 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
7786 {
7787 	return _set_flags(&interface->if_eflags, set_flags);
7788 }
7789 
7790 __private_extern__ void
if_clear_eflags(ifnet_t interface,u_int32_t clear_flags)7791 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
7792 {
7793 	_clear_flags(&interface->if_eflags, clear_flags);
7794 }
7795 
7796 __private_extern__ u_int32_t
if_set_xflags(ifnet_t interface,u_int32_t set_flags)7797 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
7798 {
7799 	return _set_flags(&interface->if_xflags, set_flags);
7800 }
7801 
7802 __private_extern__ u_int32_t
if_clear_xflags(ifnet_t interface,u_int32_t clear_flags)7803 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
7804 {
7805 	return _clear_flags(&interface->if_xflags, clear_flags);
7806 }
7807 
7808 __private_extern__ void
ifnet_update_traffic_rule_genid(ifnet_t ifp)7809 ifnet_update_traffic_rule_genid(ifnet_t ifp)
7810 {
7811 	os_atomic_inc(&ifp->if_traffic_rule_genid, relaxed);
7812 }
7813 
7814 __private_extern__ boolean_t
ifnet_sync_traffic_rule_genid(ifnet_t ifp,uint32_t * genid)7815 ifnet_sync_traffic_rule_genid(ifnet_t ifp, uint32_t *genid)
7816 {
7817 	if (*genid != ifp->if_traffic_rule_genid) {
7818 		*genid = ifp->if_traffic_rule_genid;
7819 		return TRUE;
7820 	}
7821 	return FALSE;
7822 }
7823 __private_extern__ void
ifnet_update_inet_traffic_rule_count(ifnet_t ifp,uint32_t count)7824 ifnet_update_inet_traffic_rule_count(ifnet_t ifp, uint32_t count)
7825 {
7826 	os_atomic_store(&ifp->if_inet_traffic_rule_count, count, relaxed);
7827 	ifnet_update_traffic_rule_genid(ifp);
7828 }
7829 
7830 __private_extern__ void
ifnet_update_eth_traffic_rule_count(ifnet_t ifp,uint32_t count)7831 ifnet_update_eth_traffic_rule_count(ifnet_t ifp, uint32_t count)
7832 {
7833 	os_atomic_store(&ifp->if_eth_traffic_rule_count, count, relaxed);
7834 	ifnet_update_traffic_rule_genid(ifp);
7835 }
7836 
7837 #if SKYWALK
7838 static bool
net_check_compatible_if_filter(struct ifnet * ifp)7839 net_check_compatible_if_filter(struct ifnet *ifp)
7840 {
7841 	if (ifp == NULL) {
7842 		if (net_api_stats.nas_iflt_attach_count > net_api_stats.nas_iflt_attach_os_count) {
7843 			return false;
7844 		}
7845 	} else {
7846 		if (ifp->if_flt_non_os_count > 0) {
7847 			return false;
7848 		}
7849 	}
7850 	return true;
7851 }
7852 #endif /* SKYWALK */
7853 
7854 #if CONFIG_MBUF_MCACHE
7855 #define DUMP_BUF_CHK() {        \
7856 	clen -= k;              \
7857 	if (clen < 1)           \
7858 	        goto done;      \
7859 	c += k;                 \
7860 }
7861 
7862 #if NETWORKING
7863 int dlil_dump_top_if_qlen(char *__counted_by(str_len), int str_len);
7864 int
dlil_dump_top_if_qlen(char * __counted_by (str_len)str,int str_len)7865 dlil_dump_top_if_qlen(char *__counted_by(str_len) str, int str_len)
7866 {
7867 	char *c = str;
7868 	int k, clen = str_len;
7869 	ifnet_ref_t top_ifcq_ifp = NULL;
7870 	uint32_t top_ifcq_len = 0;
7871 	ifnet_ref_t top_inq_ifp = NULL;
7872 	uint32_t top_inq_len = 0;
7873 
7874 	for (int ifidx = 1; ifidx < if_index; ifidx++) {
7875 		ifnet_ref_t ifp = ifindex2ifnet[ifidx];
7876 		struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7877 
7878 		if (ifp == NULL) {
7879 			continue;
7880 		}
7881 		if (ifp->if_snd != NULL && ifp->if_snd->ifcq_len > top_ifcq_len) {
7882 			top_ifcq_len = ifp->if_snd->ifcq_len;
7883 			top_ifcq_ifp = ifp;
7884 		}
7885 		if (dl_if->dl_if_inpstorage.dlth_pkts.qlen > top_inq_len) {
7886 			top_inq_len = dl_if->dl_if_inpstorage.dlth_pkts.qlen;
7887 			top_inq_ifp = ifp;
7888 		}
7889 	}
7890 
7891 	if (top_ifcq_ifp != NULL) {
7892 		k = scnprintf(c, clen, "\ntop ifcq_len %u packets by %s\n",
7893 		    top_ifcq_len, top_ifcq_ifp->if_xname);
7894 		DUMP_BUF_CHK();
7895 	}
7896 	if (top_inq_ifp != NULL) {
7897 		k = scnprintf(c, clen, "\ntop inq_len %u packets by %s\n",
7898 		    top_inq_len, top_inq_ifp->if_xname);
7899 		DUMP_BUF_CHK();
7900 	}
7901 done:
7902 	return str_len - clen;
7903 }
7904 #endif /* NETWORKING */
7905 #endif /* CONFIG_MBUF_MCACHE */
7906