xref: /xnu-11417.140.69/bsd/net/dlil.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 1999-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30  * support for mandatory and extensible security protections.  This notice
31  * is included in support of clause 2.2 (b) of the Apple Public License,
32  * Version 2.0.
33  */
34 #include <stddef.h>
35 #include <ptrauth.h>
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/dlil_sysctl.h>
53 #include <net/dlil_var_private.h>
54 #include <net/if_arp.h>
55 #include <net/iptap.h>
56 #include <net/pktap.h>
57 #include <net/droptap.h>
58 #include <net/nwk_wq.h>
59 #include <sys/kern_event.h>
60 #include <sys/kdebug.h>
61 #include <sys/mcache.h>
62 #include <sys/syslog.h>
63 #include <sys/protosw.h>
64 #include <sys/priv.h>
65 
66 #include <kern/assert.h>
67 #include <kern/task.h>
68 #include <kern/thread.h>
69 #include <kern/sched_prim.h>
70 #include <kern/locks.h>
71 #include <kern/zalloc.h>
72 
73 #include <net/kpi_protocol.h>
74 #include <net/kpi_interface.h>
75 #include <net/if_types.h>
76 #include <net/if_ipsec.h>
77 #include <net/if_llreach.h>
78 #include <net/if_utun.h>
79 #include <net/kpi_interfacefilter.h>
80 #include <net/classq/classq.h>
81 #include <net/classq/classq_sfb.h>
82 #include <net/flowhash.h>
83 #include <net/ntstat.h>
84 #if SKYWALK
85 #include <skywalk/lib/net_filter_event.h>
86 #endif /* SKYWALK */
87 #include <net/net_api_stats.h>
88 #include <net/if_ports_used.h>
89 #include <net/if_vlan_var.h>
90 #include <netinet/in.h>
91 #if INET
92 #include <netinet/in_var.h>
93 #include <netinet/igmp_var.h>
94 #include <netinet/ip_var.h>
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_var.h>
97 #include <netinet/udp.h>
98 #include <netinet/udp_var.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/in_pcb.h>
101 #include <netinet/in_tclass.h>
102 #include <netinet/ip.h>
103 #include <netinet/ip_icmp.h>
104 #include <netinet/icmp_var.h>
105 #endif /* INET */
106 
107 #include <net/nat464_utils.h>
108 #include <netinet6/in6_var.h>
109 #include <netinet6/nd6.h>
110 #include <netinet6/mld6_var.h>
111 #include <netinet6/scope6_var.h>
112 #include <netinet/ip6.h>
113 #include <netinet/icmp6.h>
114 #include <net/pf_pbuf.h>
115 #include <libkern/OSAtomic.h>
116 #include <libkern/tree.h>
117 
118 #include <dev/random/randomdev.h>
119 #include <machine/machine_routines.h>
120 
121 #include <mach/thread_act.h>
122 #include <mach/sdt.h>
123 
124 #if CONFIG_MACF
125 #include <sys/kauth.h>
126 #include <security/mac_framework.h>
127 #include <net/ethernet.h>
128 #include <net/firewire.h>
129 #endif
130 
131 #if PF
132 #include <net/pfvar.h>
133 #endif /* PF */
134 #include <net/pktsched/pktsched.h>
135 #include <net/pktsched/pktsched_netem.h>
136 
137 #if NECP
138 #include <net/necp.h>
139 #endif /* NECP */
140 
141 #if SKYWALK
142 #include <skywalk/packet/packet_queue.h>
143 #include <skywalk/nexus/netif/nx_netif.h>
144 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
145 #endif /* SKYWALK */
146 
147 #include <net/sockaddr_utils.h>
148 
149 #include <os/log.h>
150 
151 uint64_t if_creation_generation_count = 0;
152 
153 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
154 
155 dlil_ifnet_queue_t dlil_ifnet_head;
156 
157 #if DEBUG
158 unsigned int ifnet_debug = 1;    /* debugging (enabled) */
159 #else
160 unsigned int ifnet_debug;        /* debugging (disabled) */
161 #endif /* !DEBUG */
162 
163 
164 static u_int32_t net_rtref;
165 
166 static struct dlil_main_threading_info dlil_main_input_thread_info;
167 struct dlil_threading_info *__single dlil_main_input_thread;
168 
169 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
170 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
171 
172 static int ifnet_lookup(struct ifnet *);
173 static void if_purgeaddrs(struct ifnet *);
174 
175 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
176     struct mbuf *, char *);
177 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
178     struct mbuf *);
179 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
180     mbuf_t *, const struct sockaddr *, void *,
181     IFNET_FRAME_TYPE_RW_T, IFNET_LLADDR_RW_T);
182 static void ifproto_media_event(struct ifnet *, protocol_family_t,
183     const struct kev_msg *);
184 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
185     unsigned long, void *);
186 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
187     struct sockaddr_dl *, size_t);
188 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
189     const struct sockaddr_dl *, const struct sockaddr *,
190     const struct sockaddr_dl *, const struct sockaddr *);
191 
192 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
193     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
194     boolean_t poll, struct thread *tp);
195 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
196     struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
197 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
198 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
199     protocol_family_t *);
200 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
201     const struct ifnet_demux_desc *, u_int32_t);
202 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
203 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
204 #if !XNU_TARGET_OS_OSX
205 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
206     const struct sockaddr *, IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
207     u_int32_t *, u_int32_t *);
208 #else /* XNU_TARGET_OS_OSX */
209 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
210     const struct sockaddr *,
211     IFNET_LLADDR_T, IFNET_FRAME_TYPE_T);
212 #endif /* XNU_TARGET_OS_OSX */
213 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
214     const struct sockaddr *,
215     IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
216     u_int32_t *, u_int32_t *);
217 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
218 static void ifp_if_free(struct ifnet *);
219 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
220 
221 
222 
223 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
224     const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
225 #if DEBUG || DEVELOPMENT
226 static void dlil_verify_sum16(void);
227 #endif /* DEBUG || DEVELOPMENT */
228 
229 
230 static void ifnet_detacher_thread_func(void *, wait_result_t);
231 static void ifnet_detacher_thread_cont(void *, wait_result_t);
232 static void ifnet_detach_final(struct ifnet *);
233 static void ifnet_detaching_enqueue(struct ifnet *);
234 static struct ifnet *ifnet_detaching_dequeue(void);
235 
236 static void ifnet_start_thread_func(void *, wait_result_t);
237 static void ifnet_start_thread_cont(void *, wait_result_t);
238 
239 static void ifnet_poll_thread_func(void *, wait_result_t);
240 static void ifnet_poll_thread_cont(void *, wait_result_t);
241 
242 static errno_t ifnet_enqueue_common(struct ifnet *, struct ifclassq *,
243     classq_pkt_t *, boolean_t, boolean_t *);
244 
245 static void ifp_src_route_copyout(struct ifnet *, struct route *);
246 static void ifp_src_route_copyin(struct ifnet *, struct route *);
247 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
248 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
249 
250 
251 /* The following are protected by dlil_ifnet_lock */
252 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
253 static u_int32_t ifnet_detaching_cnt;
254 static boolean_t ifnet_detaching_embryonic;
255 static void *ifnet_delayed_run; /* wait channel for detaching thread */
256 
257 static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group,
258     &dlil_lck_attributes);
259 
260 static uint32_t ifnet_flowhash_seed;
261 
262 struct ifnet_flowhash_key {
263 	char            ifk_name[IFNAMSIZ];
264 	uint32_t        ifk_unit;
265 	uint32_t        ifk_flags;
266 	uint32_t        ifk_eflags;
267 	uint32_t        ifk_capabilities;
268 	uint32_t        ifk_capenable;
269 	uint32_t        ifk_output_sched_model;
270 	uint32_t        ifk_rand1;
271 	uint32_t        ifk_rand2;
272 };
273 
274 /* Flow control entry per interface */
275 struct ifnet_fc_entry {
276 	RB_ENTRY(ifnet_fc_entry) ifce_entry;
277 	u_int32_t       ifce_flowhash;
278 	ifnet_ref_t     ifce_ifp;
279 };
280 
281 static uint32_t ifnet_calc_flowhash(struct ifnet *);
282 static int ifce_cmp(const struct ifnet_fc_entry *,
283     const struct ifnet_fc_entry *);
284 static int ifnet_fc_add(struct ifnet *);
285 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
286 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
287 
288 /* protected by ifnet_fc_lock */
289 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
290 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
291 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
292 
293 static KALLOC_TYPE_DEFINE(ifnet_fc_zone, struct ifnet_fc_entry, NET_KT_DEFAULT);
294 
295 extern void bpfdetach(struct ifnet *);
296 
297 
298 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
299     u_int32_t flags);
300 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
301     u_int32_t flags);
302 
303 
304 #if CONFIG_MACF
305 #if !XNU_TARGET_OS_OSX
306 int dlil_lladdr_ckreq = 1;
307 #else /* XNU_TARGET_OS_OSX */
308 int dlil_lladdr_ckreq = 0;
309 #endif /* XNU_TARGET_OS_OSX */
310 #endif /* CONFIG_MACF */
311 
312 
313 static inline void
ifnet_delay_start_disabled_increment(void)314 ifnet_delay_start_disabled_increment(void)
315 {
316 	OSIncrementAtomic(&ifnet_delay_start_disabled);
317 }
318 
319 unsigned int net_rxpoll = 1;
320 unsigned int net_affinity = 1;
321 unsigned int net_async = 1;     /* 0: synchronous, 1: asynchronous */
322 
323 extern u_int32_t        inject_buckets;
324 
325 void
ifnet_filter_update_tso(struct ifnet * ifp,boolean_t filter_enable)326 ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable)
327 {
328 	/*
329 	 * update filter count and route_generation ID to let TCP
330 	 * know it should reevalute doing TSO or not
331 	 */
332 	if (filter_enable) {
333 		OSAddAtomic(1, &ifp->if_flt_no_tso_count);
334 	} else {
335 		VERIFY(ifp->if_flt_no_tso_count != 0);
336 		OSAddAtomic(-1, &ifp->if_flt_no_tso_count);
337 	}
338 	routegenid_update();
339 }
340 
341 #if SKYWALK
342 
343 static bool net_check_compatible_if_filter(struct ifnet *ifp);
344 
345 /* if_attach_nx flags defined in os_skywalk_private.h */
346 unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT;
347 unsigned int if_enable_fsw_ip_netagent =
348     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
349 unsigned int if_enable_fsw_transport_netagent =
350     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
351 
352 unsigned int if_netif_all =
353     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0);
354 
355 /* Configure flowswitch to use max mtu sized buffer */
356 static bool fsw_use_max_mtu_buffer = false;
357 
358 
359 static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw);
360 
361 #include <skywalk/os_skywalk_private.h>
362 
363 boolean_t
ifnet_nx_noauto(ifnet_t ifp)364 ifnet_nx_noauto(ifnet_t ifp)
365 {
366 	return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0;
367 }
368 
369 boolean_t
ifnet_nx_noauto_flowswitch(ifnet_t ifp)370 ifnet_nx_noauto_flowswitch(ifnet_t ifp)
371 {
372 	return ifnet_is_low_latency(ifp);
373 }
374 
375 boolean_t
ifnet_is_low_latency(ifnet_t ifp)376 ifnet_is_low_latency(ifnet_t ifp)
377 {
378 	return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0;
379 }
380 
381 boolean_t
ifnet_needs_compat(ifnet_t ifp)382 ifnet_needs_compat(ifnet_t ifp)
383 {
384 	if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
385 		return FALSE;
386 	}
387 #if !XNU_TARGET_OS_OSX
388 	/*
389 	 * To conserve memory, we plumb in the compat layer selectively; this
390 	 * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL.
391 	 * In particular, we check for Wi-Fi Access Point.
392 	 */
393 	if (IFNET_IS_WIFI(ifp)) {
394 		/* Wi-Fi Access Point */
395 		if (strcmp(ifp->if_name, "ap") == 0) {
396 			return if_netif_all;
397 		}
398 	}
399 #else /* XNU_TARGET_OS_OSX */
400 #pragma unused(ifp)
401 #endif /* XNU_TARGET_OS_OSX */
402 	return TRUE;
403 }
404 
405 boolean_t
ifnet_needs_fsw_transport_netagent(ifnet_t ifp)406 ifnet_needs_fsw_transport_netagent(ifnet_t ifp)
407 {
408 	if (if_is_fsw_transport_netagent_enabled()) {
409 		/* check if netagent has been manually enabled for ipsec/utun */
410 		if (ifp->if_family == IFNET_FAMILY_IPSEC) {
411 			return ipsec_interface_needs_netagent(ifp);
412 		} else if (ifp->if_family == IFNET_FAMILY_UTUN) {
413 			return utun_interface_needs_netagent(ifp);
414 		}
415 
416 		/* check ifnet no auto nexus override */
417 		if (ifnet_nx_noauto(ifp)) {
418 			return FALSE;
419 		}
420 
421 		/* check global if_attach_nx configuration */
422 		switch (ifp->if_family) {
423 		case IFNET_FAMILY_CELLULAR:
424 		case IFNET_FAMILY_ETHERNET:
425 			if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
426 				return TRUE;
427 			}
428 			break;
429 		default:
430 			break;
431 		}
432 	}
433 	return FALSE;
434 }
435 
436 boolean_t
ifnet_needs_fsw_ip_netagent(ifnet_t ifp)437 ifnet_needs_fsw_ip_netagent(ifnet_t ifp)
438 {
439 #pragma unused(ifp)
440 	if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) {
441 		return TRUE;
442 	}
443 	return FALSE;
444 }
445 
446 boolean_t
ifnet_needs_netif_netagent(ifnet_t ifp)447 ifnet_needs_netif_netagent(ifnet_t ifp)
448 {
449 #pragma unused(ifp)
450 	return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0;
451 }
452 
453 static boolean_t
dlil_detach_nexus_instance(nexus_controller_t controller,const char * func_str,uuid_t instance,uuid_t device)454 dlil_detach_nexus_instance(nexus_controller_t controller,
455     const char *func_str, uuid_t instance, uuid_t device)
456 {
457 	errno_t         err;
458 
459 	if (instance == NULL || uuid_is_null(instance)) {
460 		return FALSE;
461 	}
462 
463 	/* followed by the device port */
464 	if (device != NULL && !uuid_is_null(device)) {
465 		err = kern_nexus_ifdetach(controller, instance, device);
466 		if (err != 0) {
467 			DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n",
468 			    func_str, err);
469 		}
470 	}
471 	err = kern_nexus_controller_free_provider_instance(controller,
472 	    instance);
473 	if (err != 0) {
474 		DLIL_PRINTF("%s free_provider_instance failed %d\n",
475 		    func_str, err);
476 	}
477 	return TRUE;
478 }
479 
480 static boolean_t
dlil_detach_nexus(const char * func_str,uuid_t provider,uuid_t instance,uuid_t device)481 dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance,
482     uuid_t device)
483 {
484 	boolean_t               detached = FALSE;
485 	nexus_controller_t      controller = kern_nexus_shared_controller();
486 	int                     err;
487 
488 	if (dlil_detach_nexus_instance(controller, func_str, instance,
489 	    device)) {
490 		detached = TRUE;
491 	}
492 	if (provider != NULL && !uuid_is_null(provider)) {
493 		detached = TRUE;
494 		err = kern_nexus_controller_deregister_provider(controller,
495 		    provider);
496 		if (err != 0) {
497 			DLIL_PRINTF("%s deregister_provider %d\n",
498 			    func_str, err);
499 		}
500 	}
501 	return detached;
502 }
503 
504 static errno_t
dlil_create_provider_and_instance(nexus_controller_t controller,nexus_type_t type,ifnet_t ifp,uuid_t * provider,uuid_t * instance,nexus_attr_t attr)505 dlil_create_provider_and_instance(nexus_controller_t controller,
506     nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance,
507     nexus_attr_t attr)
508 {
509 	uuid_t          dom_prov;
510 	errno_t         err;
511 	nexus_name_t    provider_name;
512 	const char      *type_name =
513 	    (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch";
514 	struct kern_nexus_init init;
515 
516 	err = kern_nexus_get_default_domain_provider(type, &dom_prov);
517 	if (err != 0) {
518 		DLIL_PRINTF("%s can't get %s provider, error %d\n",
519 		    __func__, type_name, err);
520 		goto failed;
521 	}
522 
523 	snprintf((char *)provider_name, sizeof(provider_name),
524 	    "com.apple.%s.%s", type_name, if_name(ifp));
525 	err = kern_nexus_controller_register_provider(controller,
526 	    dom_prov,
527 	    provider_name,
528 	    NULL,
529 	    0,
530 	    attr,
531 	    provider);
532 	if (err != 0) {
533 		DLIL_PRINTF("%s register %s provider failed, error %d\n",
534 		    __func__, type_name, err);
535 		goto failed;
536 	}
537 	bzero(&init, sizeof(init));
538 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
539 	err = kern_nexus_controller_alloc_provider_instance(controller,
540 	    *provider,
541 	    NULL, NULL,
542 	    instance, &init);
543 	if (err != 0) {
544 		DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n",
545 		    __func__, type_name, err);
546 		kern_nexus_controller_deregister_provider(controller,
547 		    *provider);
548 		goto failed;
549 	}
550 failed:
551 	return err;
552 }
553 
554 static boolean_t
dlil_attach_netif_nexus_common(ifnet_t ifp,if_nexus_netif_t netif_nx)555 dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx)
556 {
557 	nexus_attr_t            __single attr = NULL;
558 	nexus_controller_t      controller;
559 	errno_t                 err;
560 	unsigned char          *empty_uuid = __unsafe_forge_bidi_indexable(unsigned char *, NULL, sizeof(uuid_t));
561 
562 	if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) {
563 		/* it's already attached */
564 		if (dlil_verbose) {
565 			DLIL_PRINTF("%s: %s already has nexus attached\n",
566 			    __func__, if_name(ifp));
567 			/* already attached */
568 		}
569 		goto failed;
570 	}
571 
572 	err = kern_nexus_attr_create(&attr);
573 	if (err != 0) {
574 		DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
575 		    if_name(ifp));
576 		goto failed;
577 	}
578 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, ifp->if_index);
579 	VERIFY(err == 0);
580 
581 	controller = kern_nexus_shared_controller();
582 
583 	/* create the netif provider and instance */
584 	err = dlil_create_provider_and_instance(controller,
585 	    NEXUS_TYPE_NET_IF, ifp, &netif_nx->if_nif_provider,
586 	    &netif_nx->if_nif_instance, attr);
587 	if (err != 0) {
588 		goto failed;
589 	}
590 
591 	err = kern_nexus_ifattach(controller, netif_nx->if_nif_instance, ifp,
592 	    empty_uuid, FALSE, &netif_nx->if_nif_attach);
593 	if (err != 0) {
594 		DLIL_PRINTF("%s kern_nexus_ifattach %d\n",
595 		    __func__, err);
596 		/* cleanup provider and instance */
597 		dlil_detach_nexus(__func__, netif_nx->if_nif_provider,
598 		    netif_nx->if_nif_instance, empty_uuid);
599 		goto failed;
600 	}
601 	return TRUE;
602 
603 failed:
604 	if (attr != NULL) {
605 		kern_nexus_attr_destroy(attr);
606 	}
607 	return FALSE;
608 }
609 
610 static boolean_t
dlil_attach_netif_compat_nexus(ifnet_t ifp,if_nexus_netif_t netif_nx)611 dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx)
612 {
613 	if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) ||
614 	    IFNET_IS_MANAGEMENT(ifp) || IFNET_IS_VMNET(ifp)) {
615 		goto failed;
616 	}
617 	switch (ifp->if_type) {
618 	case IFT_CELLULAR:
619 	case IFT_ETHER:
620 		if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
621 			/* don't auto-attach */
622 			goto failed;
623 		}
624 		break;
625 	default:
626 		/* don't auto-attach */
627 		goto failed;
628 	}
629 	return dlil_attach_netif_nexus_common(ifp, netif_nx);
630 
631 failed:
632 	return FALSE;
633 }
634 
635 __attribute__((noinline))
636 static void
dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)637 dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)
638 {
639 	dlil_detach_nexus(__func__, nexus_netif->if_nif_provider,
640 	    nexus_netif->if_nif_instance, nexus_netif->if_nif_attach);
641 }
642 
643 static inline int
dlil_siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)644 dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
645 {
646 	struct ifreq        ifr;
647 	int                 error;
648 
649 	bzero(&ifr, sizeof(ifr));
650 	error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
651 	if (error == 0) {
652 		*ifdm_p = ifr.ifr_devmtu;
653 	}
654 	return error;
655 }
656 
657 static inline void
_dlil_adjust_large_buf_size_for_tso(ifnet_t ifp,uint32_t * large_buf_size)658 _dlil_adjust_large_buf_size_for_tso(ifnet_t ifp, uint32_t *large_buf_size)
659 {
660 	uint32_t tso_v4_mtu = 0;
661 	uint32_t tso_v6_mtu = 0;
662 
663 	if (!kernel_is_macos_or_server()) {
664 		return;
665 	}
666 
667 	/*
668 	 * Note that we are reading the real hwassist flags set by the driver
669 	 * and not the adjusted ones because nx_netif_host_adjust_if_capabilities()
670 	 * hasn't been called yet.
671 	 */
672 	if ((ifp->if_hwassist & IFNET_TSO_IPV4) != 0) {
673 		tso_v4_mtu = ifp->if_tso_v4_mtu;
674 	}
675 	if ((ifp->if_hwassist & IFNET_TSO_IPV6) != 0) {
676 		tso_v6_mtu = ifp->if_tso_v6_mtu;
677 	}
678 
679 	/*
680 	 * If the hardware supports TSO, adjust the large buf size to match the
681 	 * supported TSO MTU size. Note that only native interfaces set TSO MTU
682 	 * size today.
683 	 * For compat, there is a 16KB limit on large buf size, so it needs to be
684 	 * bounded by NX_FSW_DEF_LARGE_BUFSIZE. Note that no compat interfaces
685 	 * set TSO MTU size today.
686 	 */
687 	if (SKYWALK_NATIVE(ifp)) {
688 		if (tso_v4_mtu != 0 || tso_v6_mtu != 0) {
689 			*large_buf_size = MAX(tso_v4_mtu, tso_v6_mtu);
690 		} else {
691 			*large_buf_size = MAX(*large_buf_size, sk_fsw_gso_mtu);
692 		}
693 		*large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, *large_buf_size);
694 	} else {
695 		*large_buf_size = MIN(NX_FSW_DEF_LARGE_BUFSIZE, *large_buf_size);
696 	}
697 }
698 
699 static inline int
_dlil_get_flowswitch_buffer_size(ifnet_t ifp,uuid_t netif,uint32_t * buf_size,bool * use_multi_buflet,uint32_t * large_buf_size)700 _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint32_t *buf_size,
701     bool *use_multi_buflet, uint32_t *large_buf_size)
702 {
703 	struct kern_pbufpool_memory_info rx_pp_info;
704 	struct kern_pbufpool_memory_info tx_pp_info;
705 	uint32_t if_max_mtu = 0;
706 	uint32_t drv_buf_size;
707 	struct ifdevmtu ifdm;
708 	int err;
709 
710 	/*
711 	 * To perform intra-stack RX aggregation flowswitch needs to use
712 	 * multi-buflet packet.
713 	 */
714 	*use_multi_buflet = NX_FSW_TCP_RX_AGG_ENABLED();
715 
716 	*large_buf_size = *use_multi_buflet ? NX_FSW_DEF_LARGE_BUFSIZE : 0;
717 	/*
718 	 * IP over Thunderbolt interface can deliver the largest IP packet,
719 	 * but the driver advertises the MAX MTU as only 9K.
720 	 */
721 	if (IFNET_IS_THUNDERBOLT_IP(ifp)) {
722 		if_max_mtu = IP_MAXPACKET;
723 		goto skip_mtu_ioctl;
724 	}
725 
726 	/* determine max mtu */
727 	bzero(&ifdm, sizeof(ifdm));
728 	err = dlil_siocgifdevmtu(ifp, &ifdm);
729 	if (__improbable(err != 0)) {
730 		DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n",
731 		    __func__, if_name(ifp));
732 		/* use default flowswitch buffer size */
733 		if_max_mtu = NX_FSW_BUFSIZE;
734 	} else {
735 		DLIL_PRINTF("%s: %s %d %d\n", __func__, if_name(ifp),
736 		    ifdm.ifdm_max, ifdm.ifdm_current);
737 		/* rdar://problem/44589731 */
738 		if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current);
739 	}
740 
741 skip_mtu_ioctl:
742 	if (if_max_mtu == 0) {
743 		DLIL_PRINTF("%s: can't determine MAX MTU for %s\n",
744 		    __func__, if_name(ifp));
745 		return EINVAL;
746 	}
747 	if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) {
748 		DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch "
749 		    "max bufsize(%d)\n", __func__,
750 		    if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE);
751 		return EINVAL;
752 	}
753 
754 	/*
755 	 * for skywalk native driver, consult the driver packet pool also.
756 	 */
757 	if (dlil_is_native_netif_nexus(ifp)) {
758 		err = kern_nexus_get_pbufpool_info(netif, &rx_pp_info,
759 		    &tx_pp_info);
760 		if (err != 0) {
761 			DLIL_PRINTF("%s: can't get pbufpool info for %s\n",
762 			    __func__, if_name(ifp));
763 			return ENXIO;
764 		}
765 		drv_buf_size = tx_pp_info.kpm_bufsize *
766 		    tx_pp_info.kpm_max_frags;
767 		if (if_max_mtu > drv_buf_size) {
768 			DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, "
769 			    "tx %d * %d) can't support max mtu(%d)\n", __func__,
770 			    if_name(ifp), rx_pp_info.kpm_bufsize,
771 			    rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize,
772 			    tx_pp_info.kpm_max_frags, if_max_mtu);
773 			return EINVAL;
774 		}
775 	} else {
776 		drv_buf_size = if_max_mtu;
777 	}
778 
779 	if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) {
780 		_CASSERT((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET);
781 		*use_multi_buflet = true;
782 		/* default flowswitch buffer size */
783 		*buf_size = NX_FSW_BUFSIZE;
784 		*large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, drv_buf_size);
785 	} else {
786 		*buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE);
787 	}
788 	_dlil_adjust_large_buf_size_for_tso(ifp, large_buf_size);
789 	ASSERT(*buf_size <= NX_FSW_MAXBUFSIZE);
790 	if (*buf_size >= *large_buf_size) {
791 		*large_buf_size = 0;
792 	}
793 	return 0;
794 }
795 
796 static boolean_t
_dlil_attach_flowswitch_nexus(ifnet_t ifp,if_nexus_flowswitch_t nexus_fsw)797 _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw)
798 {
799 	nexus_attr_t            __single attr = NULL;
800 	nexus_controller_t      controller;
801 	errno_t                 err = 0;
802 	uuid_t                  netif;
803 	uint32_t                buf_size = 0;
804 	uint32_t                large_buf_size = 0;
805 	bool                    multi_buflet;
806 
807 	if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) ||
808 	    IFNET_IS_VMNET(ifp)) {
809 		goto failed;
810 	}
811 
812 	if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) {
813 		/* not possible to attach (netif native/compat not plumbed) */
814 		goto failed;
815 	}
816 
817 	if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) {
818 		/* don't auto-attach */
819 		goto failed;
820 	}
821 
822 	/* get the netif instance from the ifp */
823 	err = kern_nexus_get_netif_instance(ifp, netif);
824 	if (err != 0) {
825 		DLIL_PRINTF("%s: can't find netif for %s\n", __func__,
826 		    if_name(ifp));
827 		goto failed;
828 	}
829 
830 	err = kern_nexus_attr_create(&attr);
831 	if (err != 0) {
832 		DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
833 		    if_name(ifp));
834 		goto failed;
835 	}
836 
837 	err = _dlil_get_flowswitch_buffer_size(ifp, netif, &buf_size,
838 	    &multi_buflet, &large_buf_size);
839 	if (err != 0) {
840 		goto failed;
841 	}
842 	ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE));
843 	ASSERT(large_buf_size <= NX_FSW_MAX_LARGE_BUFSIZE);
844 
845 	/* Configure flowswitch buffer size */
846 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, buf_size);
847 	VERIFY(err == 0);
848 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_LARGE_BUF_SIZE,
849 	    large_buf_size);
850 	VERIFY(err == 0);
851 
852 	/*
853 	 * Configure flowswitch to use super-packet (multi-buflet).
854 	 */
855 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
856 	    multi_buflet ? NX_PBUF_FRAGS_MAX : 1);
857 	VERIFY(err == 0);
858 
859 	/* create the flowswitch provider and instance */
860 	controller = kern_nexus_shared_controller();
861 	err = dlil_create_provider_and_instance(controller,
862 	    NEXUS_TYPE_FLOW_SWITCH, ifp, &nexus_fsw->if_fsw_provider,
863 	    &nexus_fsw->if_fsw_instance, attr);
864 	if (err != 0) {
865 		goto failed;
866 	}
867 
868 	/* attach the device port */
869 	err = kern_nexus_ifattach(controller, nexus_fsw->if_fsw_instance,
870 	    NULL, netif, FALSE, &nexus_fsw->if_fsw_device);
871 	if (err != 0) {
872 		DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n",
873 		    __func__, err, if_name(ifp));
874 		/* cleanup provider and instance */
875 		dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
876 		    nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
877 		goto failed;
878 	}
879 	return TRUE;
880 
881 failed:
882 	if (err != 0) {
883 		DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n",
884 		    __func__, if_name(ifp), err);
885 	} else {
886 		DLIL_PRINTF("%s: not attaching flowswitch to %s\n",
887 		    __func__, if_name(ifp));
888 	}
889 	if (attr != NULL) {
890 		kern_nexus_attr_destroy(attr);
891 	}
892 	return FALSE;
893 }
894 
895 static boolean_t
dlil_attach_flowswitch_nexus(ifnet_t ifp)896 dlil_attach_flowswitch_nexus(ifnet_t ifp)
897 {
898 	boolean_t               attached = FALSE;
899 	if_nexus_flowswitch     nexus_fsw;
900 
901 #if (DEVELOPMENT || DEBUG)
902 	if (skywalk_netif_direct_allowed(if_name(ifp))) {
903 		DLIL_PRINTF("skip attaching fsw to %s\n", if_name(ifp));
904 		return FALSE;
905 	}
906 #endif /* (DEVELOPMENT || DEBUG) */
907 
908 	/*
909 	 * flowswitch attachment is not supported for interface using the
910 	 * legacy model (IFNET_INIT_LEGACY)
911 	 */
912 	if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
913 		DLIL_PRINTF("skip attaching fsw to %s using legacy TX model\n",
914 		    if_name(ifp));
915 		return FALSE;
916 	}
917 	bzero(&nexus_fsw, sizeof(nexus_fsw));
918 
919 	/*
920 	 * A race can happen between a thread creating a flowswitch and another thread
921 	 * detaching the interface (also destroying the flowswitch).
922 	 *
923 	 * ifnet_datamov_begin() is used here to force dlil_quiesce_and_detach_nexuses()
924 	 * (called by another thread) to wait until this function finishes so the
925 	 * flowswitch can be cleaned up by dlil_detach_flowswitch_nexus().
926 	 *
927 	 * If ifnet_is_attached() is used instead, dlil_quiesce_and_detach_nexuses()
928 	 * would not wait (because ifp->if_nx_flowswitch isn't assigned) and the
929 	 * created flowswitch would be left hanging and ifnet_detach_final() would never
930 	 * wakeup because the existence of the flowswitch prevents the ifnet's ioref
931 	 * from being released.
932 	 */
933 	if (!ifnet_datamov_begin(ifp)) {
934 		os_log(OS_LOG_DEFAULT, "%s: %s not attached",
935 		    __func__, ifp->if_xname);
936 		goto done;
937 	}
938 	if (uuid_is_null(ifp->if_nx_flowswitch.if_fsw_instance)) {
939 		attached = _dlil_attach_flowswitch_nexus(ifp, &nexus_fsw);
940 		if (attached) {
941 			ifnet_lock_exclusive(ifp);
942 			ifp->if_nx_flowswitch = nexus_fsw;
943 			ifnet_lock_done(ifp);
944 		}
945 	}
946 	ifnet_datamov_end(ifp);
947 
948 done:
949 	return attached;
950 }
951 
952 __attribute__((noinline))
953 static void
dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)954 dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)
955 {
956 	dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
957 	    nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
958 }
959 
960 __attribute__((noinline))
961 static void
dlil_netif_detach_notify(ifnet_t ifp)962 dlil_netif_detach_notify(ifnet_t ifp)
963 {
964 	ifnet_detach_notify_cb_t notify = NULL;
965 	void *__single arg = NULL;
966 
967 	ifnet_get_detach_notify(ifp, &notify, &arg);
968 	if (notify == NULL) {
969 		DTRACE_SKYWALK1(no__notify, ifnet_t, ifp);
970 		return;
971 	}
972 	(*notify)(arg);
973 }
974 
975 __attribute__((noinline))
976 static void
dlil_quiesce_and_detach_nexuses(ifnet_t ifp)977 dlil_quiesce_and_detach_nexuses(ifnet_t ifp)
978 {
979 	if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch;
980 	if_nexus_netif *nx_netif = &ifp->if_nx_netif;
981 
982 	ifnet_datamov_suspend_and_drain(ifp);
983 	if (!uuid_is_null(nx_fsw->if_fsw_device)) {
984 		ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider));
985 		ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance));
986 		dlil_detach_flowswitch_nexus(nx_fsw);
987 	} else {
988 		ASSERT(uuid_is_null(nx_fsw->if_fsw_provider));
989 		ASSERT(uuid_is_null(nx_fsw->if_fsw_instance));
990 		DTRACE_IP1(fsw__not__attached, ifnet_t, ifp);
991 	}
992 
993 	if (!uuid_is_null(nx_netif->if_nif_attach)) {
994 		ASSERT(!uuid_is_null(nx_netif->if_nif_provider));
995 		ASSERT(!uuid_is_null(nx_netif->if_nif_instance));
996 		dlil_detach_netif_nexus(nx_netif);
997 	} else {
998 		ASSERT(uuid_is_null(nx_netif->if_nif_provider));
999 		ASSERT(uuid_is_null(nx_netif->if_nif_instance));
1000 		DTRACE_IP1(netif__not__attached, ifnet_t, ifp);
1001 	}
1002 	ifnet_datamov_resume(ifp);
1003 }
1004 
1005 boolean_t
ifnet_add_netagent(ifnet_t ifp)1006 ifnet_add_netagent(ifnet_t ifp)
1007 {
1008 	int     error;
1009 
1010 	error = kern_nexus_interface_add_netagent(ifp);
1011 	os_log(OS_LOG_DEFAULT,
1012 	    "kern_nexus_interface_add_netagent(%s) returned %d",
1013 	    ifp->if_xname, error);
1014 	return error == 0;
1015 }
1016 
1017 boolean_t
ifnet_remove_netagent(ifnet_t ifp)1018 ifnet_remove_netagent(ifnet_t ifp)
1019 {
1020 	int     error;
1021 
1022 	error = kern_nexus_interface_remove_netagent(ifp);
1023 	os_log(OS_LOG_DEFAULT,
1024 	    "kern_nexus_interface_remove_netagent(%s) returned %d",
1025 	    ifp->if_xname, error);
1026 	return error == 0;
1027 }
1028 
1029 boolean_t
ifnet_attach_flowswitch_nexus(ifnet_t ifp)1030 ifnet_attach_flowswitch_nexus(ifnet_t ifp)
1031 {
1032 	if (!IF_FULLY_ATTACHED(ifp)) {
1033 		return FALSE;
1034 	}
1035 	return dlil_attach_flowswitch_nexus(ifp);
1036 }
1037 
1038 boolean_t
ifnet_detach_flowswitch_nexus(ifnet_t ifp)1039 ifnet_detach_flowswitch_nexus(ifnet_t ifp)
1040 {
1041 	if_nexus_flowswitch     nexus_fsw;
1042 
1043 	ifnet_lock_exclusive(ifp);
1044 	nexus_fsw = ifp->if_nx_flowswitch;
1045 	bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
1046 	ifnet_lock_done(ifp);
1047 	return dlil_detach_nexus(__func__, nexus_fsw.if_fsw_provider,
1048 	           nexus_fsw.if_fsw_instance, nexus_fsw.if_fsw_device);
1049 }
1050 
1051 void
ifnet_attach_native_flowswitch(ifnet_t ifp)1052 ifnet_attach_native_flowswitch(ifnet_t ifp)
1053 {
1054 	if (!dlil_is_native_netif_nexus(ifp)) {
1055 		/* not a native netif */
1056 		return;
1057 	}
1058 	ifnet_attach_flowswitch_nexus(ifp);
1059 }
1060 
1061 int
ifnet_set_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t cb,void * arg)1062 ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg)
1063 {
1064 	lck_mtx_lock(&ifp->if_delegate_lock);
1065 	while (ifp->if_fsw_rx_cb_ref > 0) {
1066 		DTRACE_SKYWALK1(wait__fsw, ifnet_t, ifp);
1067 		(void) msleep(&ifp->if_fsw_rx_cb_ref, &ifp->if_delegate_lock,
1068 		    (PZERO + 1), __FUNCTION__, NULL);
1069 		DTRACE_SKYWALK1(wake__fsw, ifnet_t, ifp);
1070 	}
1071 	ifp->if_fsw_rx_cb = cb;
1072 	ifp->if_fsw_rx_cb_arg = arg;
1073 	lck_mtx_unlock(&ifp->if_delegate_lock);
1074 	return 0;
1075 }
1076 
1077 int
ifnet_get_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t * cbp,void ** argp)1078 ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp)
1079 {
1080 	/*
1081 	 * This is for avoiding the unnecessary lock acquire for interfaces
1082 	 * not used by a redirect interface.
1083 	 */
1084 	if (ifp->if_fsw_rx_cb == NULL) {
1085 		return ENOENT;
1086 	}
1087 	lck_mtx_lock(&ifp->if_delegate_lock);
1088 	if (ifp->if_fsw_rx_cb == NULL) {
1089 		lck_mtx_unlock(&ifp->if_delegate_lock);
1090 		return ENOENT;
1091 	}
1092 	*cbp = ifp->if_fsw_rx_cb;
1093 	*argp = ifp->if_fsw_rx_cb_arg;
1094 	ifp->if_fsw_rx_cb_ref++;
1095 	lck_mtx_unlock(&ifp->if_delegate_lock);
1096 	return 0;
1097 }
1098 
1099 void
ifnet_release_flowswitch_rx_callback(ifnet_t ifp)1100 ifnet_release_flowswitch_rx_callback(ifnet_t ifp)
1101 {
1102 	lck_mtx_lock(&ifp->if_delegate_lock);
1103 	if (--ifp->if_fsw_rx_cb_ref == 0) {
1104 		wakeup(&ifp->if_fsw_rx_cb_ref);
1105 	}
1106 	lck_mtx_unlock(&ifp->if_delegate_lock);
1107 }
1108 
1109 int
ifnet_set_delegate_parent(ifnet_t difp,ifnet_t parent)1110 ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent)
1111 {
1112 	lck_mtx_lock(&difp->if_delegate_lock);
1113 	while (difp->if_delegate_parent_ref > 0) {
1114 		DTRACE_SKYWALK1(wait__parent, ifnet_t, difp);
1115 		(void) msleep(&difp->if_delegate_parent_ref, &difp->if_delegate_lock,
1116 		    (PZERO + 1), __FUNCTION__, NULL);
1117 		DTRACE_SKYWALK1(wake__parent, ifnet_t, difp);
1118 	}
1119 	difp->if_delegate_parent = parent;
1120 	lck_mtx_unlock(&difp->if_delegate_lock);
1121 	return 0;
1122 }
1123 
1124 int
ifnet_get_delegate_parent(ifnet_t difp,ifnet_t * parentp)1125 ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parentp)
1126 {
1127 	lck_mtx_lock(&difp->if_delegate_lock);
1128 	if (difp->if_delegate_parent == NULL) {
1129 		lck_mtx_unlock(&difp->if_delegate_lock);
1130 		return ENOENT;
1131 	}
1132 	*parentp = difp->if_delegate_parent;
1133 	difp->if_delegate_parent_ref++;
1134 	lck_mtx_unlock(&difp->if_delegate_lock);
1135 	return 0;
1136 }
1137 
1138 void
ifnet_release_delegate_parent(ifnet_t difp)1139 ifnet_release_delegate_parent(ifnet_t difp)
1140 {
1141 	lck_mtx_lock(&difp->if_delegate_lock);
1142 	if (--difp->if_delegate_parent_ref == 0) {
1143 		wakeup(&difp->if_delegate_parent_ref);
1144 	}
1145 	lck_mtx_unlock(&difp->if_delegate_lock);
1146 }
1147 
1148 __attribute__((noinline))
1149 void
ifnet_set_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1150 ifnet_set_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1151 {
1152 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1153 	ifp->if_detach_notify = notify;
1154 	ifp->if_detach_notify_arg = arg;
1155 }
1156 
1157 __attribute__((noinline))
1158 void
ifnet_get_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1159 ifnet_get_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1160 {
1161 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1162 	*notifyp = ifp->if_detach_notify;
1163 	*argp = ifp->if_detach_notify_arg;
1164 }
1165 
1166 __attribute__((noinline))
1167 void
ifnet_set_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1168 ifnet_set_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1169 {
1170 	ifnet_lock_exclusive(ifp);
1171 	ifnet_set_detach_notify_locked(ifp, notify, arg);
1172 	ifnet_lock_done(ifp);
1173 }
1174 
1175 __attribute__((noinline))
1176 void
ifnet_get_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1177 ifnet_get_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1178 {
1179 	ifnet_lock_exclusive(ifp);
1180 	ifnet_get_detach_notify_locked(ifp, notifyp, argp);
1181 	ifnet_lock_done(ifp);
1182 }
1183 #endif /* SKYWALK */
1184 
1185 #define DLIL_INPUT_CHECK(m, ifp) {                                      \
1186 	ifnet_ref_t _rcvif = mbuf_pkthdr_rcvif(m);                      \
1187 	if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) ||       \
1188 	    !(mbuf_flags(m) & MBUF_PKTHDR)) {                           \
1189 	        panic_plain("%s: invalid mbuf %p\n", __func__, m);      \
1190 	/* NOTREACHED */                                        \
1191 	}                                                               \
1192 }
1193 
1194 #define MBPS    (1ULL * 1000 * 1000)
1195 #define GBPS    (MBPS * 1000)
1196 
1197 struct rxpoll_time_tbl {
1198 	u_int64_t       speed;          /* downlink speed */
1199 	u_int32_t       plowat;         /* packets low watermark */
1200 	u_int32_t       phiwat;         /* packets high watermark */
1201 	u_int32_t       blowat;         /* bytes low watermark */
1202 	u_int32_t       bhiwat;         /* bytes high watermark */
1203 };
1204 
1205 static struct rxpoll_time_tbl rxpoll_tbl[] = {
1206 	{ .speed =  10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024)    },
1207 	{ .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1208 	{ .speed =   1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1209 	{ .speed =  10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1210 	{ .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1211 	{ .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
1212 };
1213 
1214 int
proto_hash_value(u_int32_t protocol_family)1215 proto_hash_value(u_int32_t protocol_family)
1216 {
1217 	/*
1218 	 * dlil_proto_unplumb_all() depends on the mapping between
1219 	 * the hash bucket index and the protocol family defined
1220 	 * here; future changes must be applied there as well.
1221 	 */
1222 	switch (protocol_family) {
1223 	case PF_INET:
1224 		return 0;
1225 	case PF_INET6:
1226 		return 1;
1227 	case PF_VLAN:
1228 		return 2;
1229 	case PF_UNSPEC:
1230 	default:
1231 		return 3;
1232 	}
1233 }
1234 
1235 __private_extern__ int
dlil_post_msg(struct ifnet * ifp,u_int32_t event_subclass,u_int32_t event_code,struct net_event_data * event_data,u_int32_t event_data_len,boolean_t suppress_generation)1236 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1237     u_int32_t event_code, struct net_event_data *event_data,
1238     u_int32_t event_data_len, boolean_t suppress_generation)
1239 {
1240 	struct net_event_data ev_data;
1241 	struct kev_msg ev_msg;
1242 
1243 	bzero(&ev_msg, sizeof(ev_msg));
1244 	bzero(&ev_data, sizeof(ev_data));
1245 	/*
1246 	 * a net event always starts with a net_event_data structure
1247 	 * but the caller can generate a simple net event or
1248 	 * provide a longer event structure to post
1249 	 */
1250 	ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1251 	ev_msg.kev_class        = KEV_NETWORK_CLASS;
1252 	ev_msg.kev_subclass     = event_subclass;
1253 	ev_msg.event_code       = event_code;
1254 
1255 	if (event_data == NULL) {
1256 		event_data = &ev_data;
1257 		event_data_len = sizeof(struct net_event_data);
1258 	}
1259 
1260 	strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1261 	event_data->if_family = ifp->if_family;
1262 	event_data->if_unit   = (u_int32_t)ifp->if_unit;
1263 
1264 	ev_msg.dv[0].data_length = event_data_len;
1265 	ev_msg.dv[0].data_ptr    = event_data;
1266 	ev_msg.dv[1].data_length = 0;
1267 
1268 	bool update_generation = true;
1269 	if (event_subclass == KEV_DL_SUBCLASS) {
1270 		/* Don't update interface generation for frequent link quality and state changes  */
1271 		switch (event_code) {
1272 		case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1273 		case KEV_DL_RRC_STATE_CHANGED:
1274 		case KEV_DL_PRIMARY_ELECTED:
1275 			update_generation = false;
1276 			break;
1277 		default:
1278 			break;
1279 		}
1280 	}
1281 
1282 	/*
1283 	 * Some events that update generation counts might
1284 	 * want to suppress generation count.
1285 	 * One example is node presence/absence where we still
1286 	 * issue kernel event for the invocation but want to avoid
1287 	 * expensive operation of updating generation which triggers
1288 	 * NECP client updates.
1289 	 */
1290 	if (suppress_generation) {
1291 		update_generation = false;
1292 	}
1293 
1294 	return dlil_event_internal(ifp, &ev_msg, update_generation);
1295 }
1296 
1297 static void
dlil_reset_rxpoll_params(ifnet_t ifp)1298 dlil_reset_rxpoll_params(ifnet_t ifp)
1299 {
1300 	ASSERT(ifp != NULL);
1301 	ifnet_set_poll_cycle(ifp, NULL);
1302 	ifp->if_poll_update = 0;
1303 	ifp->if_poll_flags = 0;
1304 	ifp->if_poll_req = 0;
1305 	ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1306 	bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1307 	bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1308 	bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1309 	net_timerclear(&ifp->if_poll_mode_holdtime);
1310 	net_timerclear(&ifp->if_poll_mode_lasttime);
1311 	net_timerclear(&ifp->if_poll_sample_holdtime);
1312 	net_timerclear(&ifp->if_poll_sample_lasttime);
1313 	net_timerclear(&ifp->if_poll_dbg_lasttime);
1314 }
1315 
1316 
1317 #if SKYWALK
1318 static void
dlil_filter_event(struct eventhandler_entry_arg arg __unused,enum net_filter_event_subsystems state)1319 dlil_filter_event(struct eventhandler_entry_arg arg __unused,
1320     enum net_filter_event_subsystems state)
1321 {
1322 	evhlog(debug, "%s: eventhandler saw event type=net_filter_event_state event_code=0x%d",
1323 	    __func__, state);
1324 
1325 	bool old_if_enable_fsw_transport_netagent = if_enable_fsw_transport_netagent;
1326 	if ((state & ~NET_FILTER_EVENT_PF_PRIVATE_PROXY) == 0) {
1327 		if_enable_fsw_transport_netagent = 1;
1328 	} else {
1329 		if_enable_fsw_transport_netagent = 0;
1330 	}
1331 	if (old_if_enable_fsw_transport_netagent != if_enable_fsw_transport_netagent) {
1332 		kern_nexus_update_netagents();
1333 	} else if (!if_enable_fsw_transport_netagent) {
1334 		necp_update_all_clients();
1335 	}
1336 }
1337 #endif /* SKYWALK */
1338 
1339 void
dlil_init(void)1340 dlil_init(void)
1341 {
1342 	thread_t __single thread = THREAD_NULL;
1343 
1344 	dlil_main_input_thread = (struct dlil_threading_info *) &dlil_main_input_thread_info;
1345 
1346 	/*
1347 	 * The following fields must be 64-bit aligned for atomic operations.
1348 	 */
1349 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1350 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1351 	IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1352 	IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1353 	IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1354 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1355 	IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1356 	IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1357 	IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1358 	IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1359 	IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1360 	IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1361 	IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1362 	IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1363 	IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1364 
1365 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1366 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1367 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1368 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1369 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1370 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1371 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1372 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1373 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1374 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1375 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1376 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1377 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1378 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1379 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1380 
1381 	/*
1382 	 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1383 	 */
1384 	_CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1385 	_CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1386 	_CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1387 	_CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1388 	_CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1389 	_CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1390 	_CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1391 	_CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1392 	_CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1393 	_CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1394 	_CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1395 	_CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1396 	_CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1397 	_CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1398 
1399 	/*
1400 	 * ... as well as the mbuf checksum flags counterparts.
1401 	 */
1402 	_CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1403 	_CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1404 	_CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1405 	_CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1406 	_CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1407 	_CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1408 	_CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1409 	_CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1410 	_CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1411 	_CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1412 	_CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1413 
1414 	/*
1415 	 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1416 	 */
1417 	_CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1418 	_CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1419 
1420 	_CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1421 	_CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1422 	_CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1423 	_CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1424 
1425 	_CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1426 	_CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1427 	_CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1428 
1429 	_CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1430 	_CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1431 	_CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1432 	_CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1433 	_CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1434 	_CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1435 	_CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1436 	_CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1437 	_CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1438 	_CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1439 	_CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1440 	_CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1441 	_CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1442 	_CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1443 	_CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1444 	_CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1445 	_CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1446 	_CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1447 
1448 	_CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1449 	_CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1450 	_CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1451 	_CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1452 	_CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1453 	_CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1454 	_CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1455 	_CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1456 	_CASSERT(IFRTYPE_SUBFAMILY_VMNET == IFNET_SUBFAMILY_VMNET);
1457 	_CASSERT(IFRTYPE_SUBFAMILY_SIMCELL == IFNET_SUBFAMILY_SIMCELL);
1458 	_CASSERT(IFRTYPE_SUBFAMILY_MANAGEMENT == IFNET_SUBFAMILY_MANAGEMENT);
1459 
1460 	_CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1461 	_CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1462 
1463 	PE_parse_boot_argn("net_affinity", &net_affinity,
1464 	    sizeof(net_affinity));
1465 
1466 	PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1467 
1468 	PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1469 
1470 	PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1471 
1472 	PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1473 
1474 	PE_parse_boot_argn("if_link_heuristics", &if_link_heuristics_flags, sizeof(if_link_heuristics_flags));
1475 
1476 	VERIFY(dlil_pending_thread_cnt == 0);
1477 #if SKYWALK
1478 	boolean_t pe_enable_fsw_transport_netagent = FALSE;
1479 	boolean_t pe_disable_fsw_transport_netagent = FALSE;
1480 	boolean_t enable_fsw_netagent =
1481 	    (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) ||
1482 	    (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
1483 
1484 	/*
1485 	 * Check the device tree to see if Skywalk netagent has been explicitly
1486 	 * enabled or disabled.  This can be overridden via if_attach_nx below.
1487 	 * Note that the property is a 0-length key, and so checking for the
1488 	 * presence itself is enough (no need to check for the actual value of
1489 	 * the retrieved variable.)
1490 	 */
1491 	pe_enable_fsw_transport_netagent =
1492 	    PE_get_default("kern.skywalk_netagent_enable",
1493 	    &pe_enable_fsw_transport_netagent,
1494 	    sizeof(pe_enable_fsw_transport_netagent));
1495 	pe_disable_fsw_transport_netagent =
1496 	    PE_get_default("kern.skywalk_netagent_disable",
1497 	    &pe_disable_fsw_transport_netagent,
1498 	    sizeof(pe_disable_fsw_transport_netagent));
1499 
1500 	/*
1501 	 * These two are mutually exclusive, i.e. they both can be absent,
1502 	 * but only one can be present at a time, and so we assert to make
1503 	 * sure it is correct.
1504 	 */
1505 	VERIFY((!pe_enable_fsw_transport_netagent &&
1506 	    !pe_disable_fsw_transport_netagent) ||
1507 	    (pe_enable_fsw_transport_netagent ^
1508 	    pe_disable_fsw_transport_netagent));
1509 
1510 	if (pe_enable_fsw_transport_netagent) {
1511 		kprintf("SK: netagent is enabled via an override for "
1512 		    "this platform\n");
1513 		if_attach_nx = SKYWALK_NETWORKING_ENABLED;
1514 	} else if (pe_disable_fsw_transport_netagent) {
1515 		kprintf("SK: netagent is disabled via an override for "
1516 		    "this platform\n");
1517 		if_attach_nx = SKYWALK_NETWORKING_DISABLED;
1518 	} else {
1519 		kprintf("SK: netagent is %s by default for this platform\n",
1520 		    (enable_fsw_netagent ? "enabled" : "disabled"));
1521 		if_attach_nx = IF_ATTACH_NX_DEFAULT;
1522 	}
1523 
1524 	/*
1525 	 * Now see if there's a boot-arg override.
1526 	 */
1527 	(void) PE_parse_boot_argn("if_attach_nx", &if_attach_nx,
1528 	    sizeof(if_attach_nx));
1529 	if_enable_fsw_transport_netagent =
1530 	    ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
1531 
1532 	if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0);
1533 
1534 	if (pe_disable_fsw_transport_netagent &&
1535 	    if_enable_fsw_transport_netagent) {
1536 		kprintf("SK: netagent is force-enabled\n");
1537 	} else if (!pe_disable_fsw_transport_netagent &&
1538 	    !if_enable_fsw_transport_netagent) {
1539 		kprintf("SK: netagent is force-disabled\n");
1540 	}
1541 	if (kernel_is_macos_or_server() && if_enable_fsw_transport_netagent) {
1542 		net_filter_event_register(dlil_filter_event);
1543 	}
1544 
1545 #if (DEVELOPMENT || DEBUG)
1546 	(void) PE_parse_boot_argn("fsw_use_max_mtu_buffer",
1547 	    &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer));
1548 #endif /* (DEVELOPMENT || DEBUG) */
1549 
1550 #endif /* SKYWALK */
1551 
1552 	dlil_allocation_zones_init();
1553 	eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1554 
1555 	TAILQ_INIT(&dlil_ifnet_head);
1556 	TAILQ_INIT(&ifnet_head);
1557 	TAILQ_INIT(&ifnet_detaching_head);
1558 	TAILQ_INIT(&ifnet_ordered_head);
1559 
1560 	/* Initialize interface address subsystem */
1561 	ifa_init();
1562 
1563 #if PF
1564 	/* Initialize the packet filter */
1565 	pfinit();
1566 #endif /* PF */
1567 
1568 	/* Initialize queue algorithms */
1569 	classq_init();
1570 
1571 	/* Initialize packet schedulers */
1572 	pktsched_init();
1573 
1574 	/* Initialize flow advisory subsystem */
1575 	flowadv_init();
1576 
1577 	/* Initialize the pktap virtual interface */
1578 	pktap_init();
1579 
1580 	/* Initialize droptap interface */
1581 	droptap_init();
1582 
1583 	/* Initialize the service class to dscp map */
1584 	net_qos_map_init();
1585 
1586 	/* Initialize the interface low power mode event handler */
1587 	if_low_power_evhdlr_init();
1588 
1589 	/* Initialize the interface offload port list subsystem */
1590 	if_ports_used_init();
1591 
1592 #if DEBUG || DEVELOPMENT
1593 	/* Run self-tests */
1594 	dlil_verify_sum16();
1595 #endif /* DEBUG || DEVELOPMENT */
1596 
1597 	/*
1598 	 * Create and start up the main DLIL input thread and the interface
1599 	 * detacher threads once everything is initialized.
1600 	 */
1601 	dlil_incr_pending_thread_count();
1602 	(void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1603 
1604 	/*
1605 	 * Create ifnet detacher thread.
1606 	 * When an interface gets detached, part of the detach processing
1607 	 * is delayed. The interface is added to delayed detach list
1608 	 * and this thread is woken up to call ifnet_detach_final
1609 	 * on these interfaces.
1610 	 */
1611 	dlil_incr_pending_thread_count();
1612 	if (kernel_thread_start(ifnet_detacher_thread_func,
1613 	    NULL, &thread) != KERN_SUCCESS) {
1614 		panic_plain("%s: couldn't create detacher thread", __func__);
1615 		/* NOTREACHED */
1616 	}
1617 	thread_deallocate(thread);
1618 
1619 	/*
1620 	 * Wait for the created kernel threads for dlil to get
1621 	 * scheduled and run at least once before we proceed
1622 	 */
1623 	lck_mtx_lock(&dlil_thread_sync_lock);
1624 	while (dlil_pending_thread_cnt != 0) {
1625 		DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1626 		    "threads to get scheduled at least once.\n", __func__);
1627 		(void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1628 		    (PZERO - 1), __func__, NULL);
1629 		LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1630 	}
1631 	lck_mtx_unlock(&dlil_thread_sync_lock);
1632 	DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1633 	    "scheduled at least once. Proceeding.\n", __func__);
1634 }
1635 
1636 __private_extern__ int
dlil_attach_filter(struct ifnet * ifp,const struct iff_filter * if_filter,interface_filter_t * filter_ref,u_int32_t flags)1637 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1638     interface_filter_t *filter_ref, u_int32_t flags)
1639 {
1640 	int retval = 0;
1641 	struct ifnet_filter *filter = NULL;
1642 
1643 	ifnet_head_lock_shared();
1644 
1645 	/* Check that the interface is in the global list */
1646 	if (!ifnet_lookup(ifp)) {
1647 		retval = ENXIO;
1648 		goto done;
1649 	}
1650 	if (!ifnet_is_attached(ifp, 1)) {
1651 		os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
1652 		    __func__, if_name(ifp));
1653 		retval = ENXIO;
1654 		goto done;
1655 	}
1656 
1657 	filter = dlif_filt_alloc();
1658 	/* refcnt held above during lookup */
1659 	filter->filt_flags = flags;
1660 	filter->filt_ifp = ifp;
1661 	filter->filt_cookie = if_filter->iff_cookie;
1662 	filter->filt_name = if_filter->iff_name;
1663 	filter->filt_protocol = if_filter->iff_protocol;
1664 	/*
1665 	 * Do not install filter callbacks for internal coproc interface
1666 	 * and for management interfaces
1667 	 */
1668 	if (!IFNET_IS_INTCOPROC(ifp) && !IFNET_IS_MANAGEMENT(ifp)) {
1669 		filter->filt_input = if_filter->iff_input;
1670 		filter->filt_output = if_filter->iff_output;
1671 		filter->filt_event = if_filter->iff_event;
1672 		filter->filt_ioctl = if_filter->iff_ioctl;
1673 	}
1674 	filter->filt_detached = if_filter->iff_detached;
1675 
1676 	lck_mtx_lock(&ifp->if_flt_lock);
1677 	if_flt_monitor_enter(ifp);
1678 
1679 	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1680 	TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1681 
1682 	*filter_ref = filter;
1683 
1684 	/*
1685 	 * Bump filter count and route_generation ID to let TCP
1686 	 * know it shouldn't do TSO on this connection
1687 	 */
1688 	if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1689 		ifnet_filter_update_tso(ifp, TRUE);
1690 	}
1691 	OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1692 	INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1693 	if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1694 		OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_os_count);
1695 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1696 	} else {
1697 		OSAddAtomic(1, &ifp->if_flt_non_os_count);
1698 	}
1699 	if_flt_monitor_leave(ifp);
1700 	lck_mtx_unlock(&ifp->if_flt_lock);
1701 
1702 #if SKYWALK
1703 	if (kernel_is_macos_or_server()) {
1704 		net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1705 		    net_check_compatible_if_filter(NULL));
1706 	}
1707 #endif /* SKYWALK */
1708 
1709 	if (dlil_verbose) {
1710 		DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1711 		    if_filter->iff_name);
1712 	}
1713 	ifnet_decr_iorefcnt(ifp);
1714 
1715 done:
1716 	ifnet_head_done();
1717 	if (retval != 0 && ifp != NULL) {
1718 		DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1719 		    if_name(ifp), if_filter->iff_name, retval);
1720 	}
1721 	if (retval != 0 && filter != NULL) {
1722 		dlif_filt_free(filter);
1723 	}
1724 
1725 	return retval;
1726 }
1727 
1728 static int
dlil_detach_filter_internal(interface_filter_t filter,int detached)1729 dlil_detach_filter_internal(interface_filter_t  filter, int detached)
1730 {
1731 	int retval = 0;
1732 
1733 	if (detached == 0) {
1734 		ifnet_ref_t ifp = NULL;
1735 
1736 		ifnet_head_lock_shared();
1737 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1738 			interface_filter_t entry = NULL;
1739 
1740 			lck_mtx_lock(&ifp->if_flt_lock);
1741 			TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1742 				if (entry != filter || entry->filt_skip) {
1743 					continue;
1744 				}
1745 				/*
1746 				 * We've found a match; since it's possible
1747 				 * that the thread gets blocked in the monitor,
1748 				 * we do the lock dance.  Interface should
1749 				 * not be detached since we still have a use
1750 				 * count held during filter attach.
1751 				 */
1752 				entry->filt_skip = 1;   /* skip input/output */
1753 				lck_mtx_unlock(&ifp->if_flt_lock);
1754 				ifnet_head_done();
1755 
1756 				lck_mtx_lock(&ifp->if_flt_lock);
1757 				if_flt_monitor_enter(ifp);
1758 				LCK_MTX_ASSERT(&ifp->if_flt_lock,
1759 				    LCK_MTX_ASSERT_OWNED);
1760 
1761 				/* Remove the filter from the list */
1762 				TAILQ_REMOVE(&ifp->if_flt_head, filter,
1763 				    filt_next);
1764 
1765 				if (dlil_verbose) {
1766 					DLIL_PRINTF("%s: %s filter detached\n",
1767 					    if_name(ifp), filter->filt_name);
1768 				}
1769 				if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1770 					VERIFY(ifp->if_flt_non_os_count != 0);
1771 					OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1772 				}
1773 				/*
1774 				 * Decrease filter count and route_generation
1775 				 * ID to let TCP know it should reevalute doing
1776 				 * TSO or not.
1777 				 */
1778 				if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1779 					ifnet_filter_update_tso(ifp, FALSE);
1780 				}
1781 				/*
1782 				 * When we remove the bridge's interface filter,
1783 				 * clear the field in the ifnet.
1784 				 */
1785 				if ((filter->filt_flags & DLIL_IFF_BRIDGE)
1786 				    != 0) {
1787 					ifp->if_bridge = NULL;
1788 				}
1789 				if_flt_monitor_leave(ifp);
1790 				lck_mtx_unlock(&ifp->if_flt_lock);
1791 				goto destroy;
1792 			}
1793 			lck_mtx_unlock(&ifp->if_flt_lock);
1794 		}
1795 		ifnet_head_done();
1796 
1797 		/* filter parameter is not a valid filter ref */
1798 		retval = EINVAL;
1799 		goto done;
1800 	} else {
1801 		ifnet_ref_t ifp = filter->filt_ifp;
1802 		/*
1803 		 * Here we are called from ifnet_detach_final(); the
1804 		 * caller had emptied if_flt_head and we're doing an
1805 		 * implicit filter detach because the interface is
1806 		 * about to go away.  Make sure to adjust the counters
1807 		 * in this case.  We don't need the protection of the
1808 		 * filter monitor since we're called as part of the
1809 		 * final detach in the context of the detacher thread.
1810 		 */
1811 		if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1812 			VERIFY(ifp->if_flt_non_os_count != 0);
1813 			OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1814 		}
1815 		/*
1816 		 * Decrease filter count and route_generation
1817 		 * ID to let TCP know it should reevalute doing
1818 		 * TSO or not.
1819 		 */
1820 		if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1821 			ifnet_filter_update_tso(ifp, FALSE);
1822 		}
1823 	}
1824 
1825 	if (dlil_verbose) {
1826 		DLIL_PRINTF("%s filter detached\n", filter->filt_name);
1827 	}
1828 
1829 destroy:
1830 
1831 	/* Call the detached function if there is one */
1832 	if (filter->filt_detached) {
1833 		filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1834 	}
1835 
1836 	VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1837 	if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1838 		VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0);
1839 	}
1840 #if SKYWALK
1841 	if (kernel_is_macos_or_server()) {
1842 		net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1843 		    net_check_compatible_if_filter(NULL));
1844 	}
1845 #endif /* SKYWALK */
1846 
1847 	/* Free the filter */
1848 	dlif_filt_free(filter);
1849 	filter = NULL;
1850 done:
1851 	if (retval != 0 && filter != NULL) {
1852 		DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1853 		    filter->filt_name, retval);
1854 	}
1855 
1856 	return retval;
1857 }
1858 
1859 __private_extern__ void
dlil_detach_filter(interface_filter_t filter)1860 dlil_detach_filter(interface_filter_t filter)
1861 {
1862 	if (filter == NULL) {
1863 		return;
1864 	}
1865 	dlil_detach_filter_internal(filter, 0);
1866 }
1867 
1868 __private_extern__ boolean_t
dlil_has_ip_filter(void)1869 dlil_has_ip_filter(void)
1870 {
1871 	boolean_t has_filter = ((net_api_stats.nas_ipf_add_count - net_api_stats.nas_ipf_add_os_count) > 0);
1872 
1873 	VERIFY(net_api_stats.nas_ipf_add_count >= net_api_stats.nas_ipf_add_os_count);
1874 
1875 	DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
1876 	return has_filter;
1877 }
1878 
1879 __private_extern__ boolean_t
dlil_has_if_filter(struct ifnet * ifp)1880 dlil_has_if_filter(struct ifnet *ifp)
1881 {
1882 	boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
1883 	DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
1884 	return has_filter;
1885 }
1886 
1887 errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params * p)1888 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
1889 {
1890 	if (p != NULL) {
1891 		if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
1892 		    (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
1893 			return EINVAL;
1894 		}
1895 		if (p->packets_lowat != 0 &&    /* hiwat must be non-zero */
1896 		    p->packets_lowat >= p->packets_hiwat) {
1897 			return EINVAL;
1898 		}
1899 		if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
1900 		    (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
1901 			return EINVAL;
1902 		}
1903 		if (p->bytes_lowat != 0 &&      /* hiwat must be non-zero */
1904 		    p->bytes_lowat >= p->bytes_hiwat) {
1905 			return EINVAL;
1906 		}
1907 		if (p->interval_time != 0 &&
1908 		    p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
1909 			p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
1910 		}
1911 	}
1912 	return 0;
1913 }
1914 
1915 void
dlil_rxpoll_update_params(struct ifnet * ifp,struct ifnet_poll_params * p)1916 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
1917 {
1918 	u_int64_t sample_holdtime, inbw;
1919 
1920 	if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
1921 		sample_holdtime = 0;    /* polling is disabled */
1922 		ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
1923 		    ifp->if_rxpoll_blowat = 0;
1924 		ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
1925 		    ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
1926 		ifp->if_rxpoll_plim = 0;
1927 		ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
1928 	} else {
1929 		u_int32_t plowat, phiwat, blowat, bhiwat, plim;
1930 		u_int64_t ival;
1931 		unsigned int n, i;
1932 
1933 		for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
1934 			if (inbw < rxpoll_tbl[i].speed) {
1935 				break;
1936 			}
1937 			n = i;
1938 		}
1939 		/* auto-tune if caller didn't specify a value */
1940 		plowat = ((p == NULL || p->packets_lowat == 0) ?
1941 		    rxpoll_tbl[n].plowat : p->packets_lowat);
1942 		phiwat = ((p == NULL || p->packets_hiwat == 0) ?
1943 		    rxpoll_tbl[n].phiwat : p->packets_hiwat);
1944 		blowat = ((p == NULL || p->bytes_lowat == 0) ?
1945 		    rxpoll_tbl[n].blowat : p->bytes_lowat);
1946 		bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
1947 		    rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
1948 		plim = ((p == NULL || p->packets_limit == 0 ||
1949 		    if_rxpoll_max != 0) ?  if_rxpoll_max : p->packets_limit);
1950 		ival = ((p == NULL || p->interval_time == 0 ||
1951 		    if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ?
1952 		    if_rxpoll_interval_time : p->interval_time);
1953 
1954 		VERIFY(plowat != 0 && phiwat != 0);
1955 		VERIFY(blowat != 0 && bhiwat != 0);
1956 		VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
1957 
1958 		sample_holdtime = if_rxpoll_sample_holdtime;
1959 		ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
1960 		ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
1961 		ifp->if_rxpoll_plowat = plowat;
1962 		ifp->if_rxpoll_phiwat = phiwat;
1963 		ifp->if_rxpoll_blowat = blowat;
1964 		ifp->if_rxpoll_bhiwat = bhiwat;
1965 		ifp->if_rxpoll_plim = plim;
1966 		ifp->if_rxpoll_ival = ival;
1967 	}
1968 
1969 	net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
1970 	net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
1971 
1972 	if (dlil_verbose) {
1973 		DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
1974 		    "poll interval %llu nsec, pkts per poll %u, "
1975 		    "pkt limits [%u/%u], wreq limits [%u/%u], "
1976 		    "bytes limits [%u/%u]\n", if_name(ifp),
1977 		    inbw, sample_holdtime, ifp->if_rxpoll_ival,
1978 		    ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
1979 		    ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
1980 		    ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
1981 		    ifp->if_rxpoll_bhiwat);
1982 	}
1983 }
1984 
1985 /*
1986  * Must be called on an attached ifnet (caller is expected to check.)
1987  * Caller may pass NULL for poll parameters to indicate "auto-tuning."
1988  */
1989 errno_t
dlil_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)1990 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
1991     boolean_t locked)
1992 {
1993 	errno_t err;
1994 	struct dlil_threading_info *inp;
1995 
1996 	VERIFY(ifp != NULL);
1997 	if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
1998 		return ENXIO;
1999 	}
2000 	err = dlil_rxpoll_validate_params(p);
2001 	if (err != 0) {
2002 		return err;
2003 	}
2004 
2005 	if (!locked) {
2006 		lck_mtx_lock(&inp->dlth_lock);
2007 	}
2008 	LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2009 	/*
2010 	 * Normally, we'd reset the parameters to the auto-tuned values
2011 	 * if the the input thread detects a change in link rate.  If the
2012 	 * driver provides its own parameters right after a link rate
2013 	 * changes, but before the input thread gets to run, we want to
2014 	 * make sure to keep the driver's values.  Clearing if_poll_update
2015 	 * will achieve that.
2016 	 */
2017 	if (p != NULL && !locked && ifp->if_poll_update != 0) {
2018 		ifp->if_poll_update = 0;
2019 	}
2020 	dlil_rxpoll_update_params(ifp, p);
2021 	if (!locked) {
2022 		lck_mtx_unlock(&inp->dlth_lock);
2023 	}
2024 	return 0;
2025 }
2026 
2027 /*
2028  * Must be called on an attached ifnet (caller is expected to check.)
2029  */
2030 errno_t
dlil_rxpoll_get_params(struct ifnet * ifp,struct ifnet_poll_params * p)2031 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2032 {
2033 	struct dlil_threading_info *inp;
2034 
2035 	VERIFY(ifp != NULL && p != NULL);
2036 	if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2037 		return ENXIO;
2038 	}
2039 
2040 	bzero(p, sizeof(*p));
2041 
2042 	lck_mtx_lock(&inp->dlth_lock);
2043 	p->packets_limit = ifp->if_rxpoll_plim;
2044 	p->packets_lowat = ifp->if_rxpoll_plowat;
2045 	p->packets_hiwat = ifp->if_rxpoll_phiwat;
2046 	p->bytes_lowat = ifp->if_rxpoll_blowat;
2047 	p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2048 	p->interval_time = ifp->if_rxpoll_ival;
2049 	lck_mtx_unlock(&inp->dlth_lock);
2050 
2051 	return 0;
2052 }
2053 
2054 errno_t
ifnet_input(struct ifnet * ifp,struct mbuf * m_head,const struct ifnet_stat_increment_param * s)2055 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2056     const struct ifnet_stat_increment_param *s)
2057 {
2058 	return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2059 }
2060 
2061 errno_t
ifnet_input_extended(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2062 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2063     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2064 {
2065 	return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2066 }
2067 
2068 errno_t
ifnet_input_poll(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2069 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2070     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2071 {
2072 	return ifnet_input_common(ifp, m_head, m_tail, s,
2073 	           (m_head != NULL), TRUE);
2074 }
2075 
2076 static errno_t
ifnet_input_common(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t ext,boolean_t poll)2077 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2078     const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2079 {
2080 	dlil_input_func input_func;
2081 	struct ifnet_stat_increment_param _s;
2082 	u_int32_t m_cnt = 0, m_size = 0;
2083 	struct mbuf *last;
2084 	errno_t err = 0;
2085 
2086 	if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2087 		if (m_head != NULL) {
2088 			mbuf_freem_list(m_head);
2089 		}
2090 		return EINVAL;
2091 	}
2092 
2093 	VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2094 	VERIFY(m_tail == NULL || ext);
2095 	VERIFY(s != NULL || !ext);
2096 
2097 	/*
2098 	 * Drop the packet(s) if the parameters are invalid, or if the
2099 	 * interface is no longer attached; else hold an IO refcnt to
2100 	 * prevent it from being detached (will be released below.)
2101 	 */
2102 	if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2103 		if (m_head != NULL) {
2104 			mbuf_freem_list(m_head);
2105 		}
2106 		return EINVAL;
2107 	}
2108 
2109 	input_func = ifp->if_input_dlil;
2110 	VERIFY(input_func != NULL);
2111 
2112 	if (m_tail == NULL) {
2113 		last = m_head;
2114 		while (m_head != NULL) {
2115 			m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2116 #if IFNET_INPUT_SANITY_CHK
2117 			if (__improbable(dlil_input_sanity_check != 0)) {
2118 				DLIL_INPUT_CHECK(last, ifp);
2119 			}
2120 #endif /* IFNET_INPUT_SANITY_CHK */
2121 			m_cnt++;
2122 			m_size += m_length(last);
2123 			if (mbuf_nextpkt(last) == NULL) {
2124 				break;
2125 			}
2126 			last = mbuf_nextpkt(last);
2127 		}
2128 		m_tail = last;
2129 	} else {
2130 #if IFNET_INPUT_SANITY_CHK
2131 		if (__improbable(dlil_input_sanity_check != 0)) {
2132 			last = m_head;
2133 			while (1) {
2134 				m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2135 				DLIL_INPUT_CHECK(last, ifp);
2136 				m_cnt++;
2137 				m_size += m_length(last);
2138 				if (mbuf_nextpkt(last) == NULL) {
2139 					break;
2140 				}
2141 				last = mbuf_nextpkt(last);
2142 			}
2143 		} else {
2144 			m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2145 			m_cnt = s->packets_in;
2146 			m_size = s->bytes_in;
2147 			last = m_tail;
2148 		}
2149 #else
2150 		m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2151 		m_cnt = s->packets_in;
2152 		m_size = s->bytes_in;
2153 		last = m_tail;
2154 #endif /* IFNET_INPUT_SANITY_CHK */
2155 	}
2156 
2157 	if (last != m_tail) {
2158 		panic_plain("%s: invalid input packet chain for %s, "
2159 		    "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2160 		    m_tail, last);
2161 	}
2162 
2163 	/*
2164 	 * Assert packet count only for the extended variant, for backwards
2165 	 * compatibility, since this came directly from the device driver.
2166 	 * Relax this assertion for input bytes, as the driver may have
2167 	 * included the link-layer headers in the computation; hence
2168 	 * m_size is just an approximation.
2169 	 */
2170 	if (ext && s->packets_in != m_cnt) {
2171 		panic_plain("%s: input packet count mismatch for %s, "
2172 		    "%d instead of %d\n", __func__, if_name(ifp),
2173 		    s->packets_in, m_cnt);
2174 	}
2175 
2176 	if (s == NULL) {
2177 		bzero(&_s, sizeof(_s));
2178 		s = &_s;
2179 	} else {
2180 		_s = *s;
2181 	}
2182 	_s.packets_in = m_cnt;
2183 	_s.bytes_in = m_size;
2184 
2185 	if (ifp->if_xflags & IFXF_DISABLE_INPUT) {
2186 		m_freem_list(m_head);
2187 
2188 		os_atomic_add(&ifp->if_data.ifi_ipackets, _s.packets_in, relaxed);
2189 		os_atomic_add(&ifp->if_data.ifi_ibytes, _s.bytes_in, relaxed);
2190 
2191 		goto done;
2192 	}
2193 
2194 	err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2195 
2196 done:
2197 	if (ifp != lo_ifp) {
2198 		/* Release the IO refcnt */
2199 		ifnet_datamov_end(ifp);
2200 	}
2201 
2202 	return err;
2203 }
2204 
2205 
2206 static void
ifnet_start_common(struct ifnet * ifp,boolean_t resetfc,boolean_t ignore_delay)2207 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc, boolean_t ignore_delay)
2208 {
2209 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
2210 		return;
2211 	}
2212 	/*
2213 	 * If the starter thread is inactive, signal it to do work,
2214 	 * unless the interface is being flow controlled from below,
2215 	 * e.g. a virtual interface being flow controlled by a real
2216 	 * network interface beneath it, or it's been disabled via
2217 	 * a call to ifnet_disable_output().
2218 	 */
2219 	lck_mtx_lock_spin(&ifp->if_start_lock);
2220 	if (ignore_delay) {
2221 		ifp->if_start_flags |= IFSF_NO_DELAY;
2222 	}
2223 	if (resetfc) {
2224 		ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2225 	} else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2226 		lck_mtx_unlock(&ifp->if_start_lock);
2227 		return;
2228 	}
2229 	ifp->if_start_req++;
2230 	if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2231 	    (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2232 	    IFCQ_LEN(ifp->if_snd) >= ifp->if_start_delay_qlen ||
2233 	    ifp->if_start_delayed == 0)) {
2234 		(void) wakeup_one((caddr_t)&ifp->if_start_thread);
2235 	}
2236 	lck_mtx_unlock(&ifp->if_start_lock);
2237 }
2238 
2239 void
ifnet_start(struct ifnet * ifp)2240 ifnet_start(struct ifnet *ifp)
2241 {
2242 	ifnet_start_common(ifp, FALSE, FALSE);
2243 }
2244 
2245 void
ifnet_start_ignore_delay(struct ifnet * ifp)2246 ifnet_start_ignore_delay(struct ifnet *ifp)
2247 {
2248 	ifnet_start_common(ifp, FALSE, TRUE);
2249 }
2250 
2251 __attribute__((noreturn))
2252 static void
ifnet_start_thread_func(void * v,wait_result_t w)2253 ifnet_start_thread_func(void *v, wait_result_t w)
2254 {
2255 #pragma unused(w)
2256 	ifnet_ref_t ifp = v;
2257 	char thread_name[MAXTHREADNAMESIZE];
2258 
2259 	/* Construct the name for this thread, and then apply it. */
2260 	bzero(thread_name, sizeof(thread_name));
2261 	(void) snprintf(thread_name, sizeof(thread_name),
2262 	    "ifnet_start_%s", ifp->if_xname);
2263 #if SKYWALK
2264 	/* override name for native Skywalk interface */
2265 	if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
2266 		(void) snprintf(thread_name, sizeof(thread_name),
2267 		    "skywalk_doorbell_%s_tx", ifp->if_xname);
2268 	}
2269 #endif /* SKYWALK */
2270 	ASSERT(ifp->if_start_thread == current_thread());
2271 	thread_set_thread_name(current_thread(), __unsafe_null_terminated_from_indexable(thread_name));
2272 
2273 	/*
2274 	 * Treat the dedicated starter thread for lo0 as equivalent to
2275 	 * the driver workloop thread; if net_affinity is enabled for
2276 	 * the main input thread, associate this starter thread to it
2277 	 * by binding them with the same affinity tag.  This is done
2278 	 * only once (as we only have one lo_ifp which never goes away.)
2279 	 */
2280 	if (ifp == lo_ifp) {
2281 		struct dlil_threading_info *inp = dlil_main_input_thread;
2282 		struct thread *__single tp = current_thread();
2283 #if SKYWALK
2284 		/* native skywalk loopback not yet implemented */
2285 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2286 #endif /* SKYWALK */
2287 
2288 		lck_mtx_lock(&inp->dlth_lock);
2289 		if (inp->dlth_affinity) {
2290 			u_int32_t tag = inp->dlth_affinity_tag;
2291 
2292 			VERIFY(inp->dlth_driver_thread == THREAD_NULL);
2293 			VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2294 			inp->dlth_driver_thread = tp;
2295 			lck_mtx_unlock(&inp->dlth_lock);
2296 
2297 			/* Associate this thread with the affinity tag */
2298 			(void) dlil_affinity_set(tp, tag);
2299 		} else {
2300 			lck_mtx_unlock(&inp->dlth_lock);
2301 		}
2302 	}
2303 
2304 	lck_mtx_lock(&ifp->if_start_lock);
2305 	VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
2306 	(void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
2307 	ifp->if_start_embryonic = 1;
2308 	/* wake up once to get out of embryonic state */
2309 	ifp->if_start_req++;
2310 	(void) wakeup_one((caddr_t)&ifp->if_start_thread);
2311 	lck_mtx_unlock(&ifp->if_start_lock);
2312 	(void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2313 	/* NOTREACHED */
2314 	__builtin_unreachable();
2315 }
2316 
2317 __attribute__((noreturn))
2318 static void
ifnet_start_thread_cont(void * v,wait_result_t wres)2319 ifnet_start_thread_cont(void *v, wait_result_t wres)
2320 {
2321 	ifnet_ref_t ifp = v;
2322 	struct ifclassq *ifq = ifp->if_snd;
2323 
2324 	lck_mtx_lock_spin(&ifp->if_start_lock);
2325 	if (__improbable(wres == THREAD_INTERRUPTED ||
2326 	    (ifp->if_start_flags & IFSF_TERMINATING) != 0)) {
2327 		goto terminate;
2328 	}
2329 
2330 	if (__improbable(ifp->if_start_embryonic)) {
2331 		ifp->if_start_embryonic = 0;
2332 		lck_mtx_unlock(&ifp->if_start_lock);
2333 		ifnet_decr_pending_thread_count(ifp);
2334 		lck_mtx_lock_spin(&ifp->if_start_lock);
2335 		goto skip;
2336 	}
2337 
2338 	ifp->if_start_active = 1;
2339 
2340 	/*
2341 	 * Keep on servicing until no more request.
2342 	 */
2343 	for (;;) {
2344 		u_int32_t req = ifp->if_start_req;
2345 		if ((ifp->if_start_flags & IFSF_NO_DELAY) == 0 &&
2346 		    !IFCQ_IS_EMPTY(ifq) &&
2347 		    (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2348 		    ifp->if_start_delayed == 0 &&
2349 		    IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2350 		    (ifp->if_eflags & IFEF_DELAY_START)) {
2351 			ifp->if_start_delayed = 1;
2352 			ifnet_start_delayed++;
2353 			break;
2354 		}
2355 		ifp->if_start_flags &= ~IFSF_NO_DELAY;
2356 		ifp->if_start_delayed = 0;
2357 		lck_mtx_unlock(&ifp->if_start_lock);
2358 
2359 		/*
2360 		 * If no longer attached, don't call start because ifp
2361 		 * is being destroyed; else hold an IO refcnt to
2362 		 * prevent the interface from being detached (will be
2363 		 * released below.)
2364 		 */
2365 		if (!ifnet_datamov_begin(ifp)) {
2366 			lck_mtx_lock_spin(&ifp->if_start_lock);
2367 			break;
2368 		}
2369 
2370 		/* invoke the driver's start routine */
2371 		((*ifp->if_start)(ifp));
2372 
2373 		/*
2374 		 * Release the io ref count taken above.
2375 		 */
2376 		ifnet_datamov_end(ifp);
2377 
2378 		lck_mtx_lock_spin(&ifp->if_start_lock);
2379 
2380 		/*
2381 		 * If there's no pending request or if the
2382 		 * interface has been disabled, we're done.
2383 		 */
2384 #define _IFSF_DISABLED  (IFSF_FLOW_CONTROLLED | IFSF_TERMINATING)
2385 		if (req == ifp->if_start_req ||
2386 		    (ifp->if_start_flags & _IFSF_DISABLED) != 0) {
2387 			break;
2388 		}
2389 	}
2390 skip:
2391 	ifp->if_start_req = 0;
2392 	ifp->if_start_active = 0;
2393 
2394 #if SKYWALK
2395 	/*
2396 	 * Wakeup any waiters, e.g. any threads waiting to
2397 	 * detach the interface from the flowswitch, etc.
2398 	 */
2399 	if (ifp->if_start_waiters != 0) {
2400 		ifp->if_start_waiters = 0;
2401 		wakeup(&ifp->if_start_waiters);
2402 	}
2403 #endif /* SKYWALK */
2404 	if (__probable((ifp->if_start_flags & IFSF_TERMINATING) == 0)) {
2405 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2406 		struct timespec delay_start_ts;
2407 		struct timespec *ts = NULL;
2408 
2409 		if (ts == NULL) {
2410 			ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2411 			    &ifp->if_start_cycle : NULL);
2412 		}
2413 
2414 		if (ts == NULL && ifp->if_start_delayed == 1) {
2415 			delay_start_ts.tv_sec = 0;
2416 			delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2417 			ts = &delay_start_ts;
2418 		}
2419 
2420 		if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
2421 			ts = NULL;
2422 		}
2423 
2424 		if (__improbable(ts != NULL)) {
2425 			clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2426 			    (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2427 		}
2428 
2429 		(void) assert_wait_deadline(&ifp->if_start_thread,
2430 		    THREAD_UNINT, deadline);
2431 		lck_mtx_unlock(&ifp->if_start_lock);
2432 		(void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2433 		/* NOTREACHED */
2434 	} else {
2435 terminate:
2436 		/* interface is detached? */
2437 		ifnet_set_start_cycle(ifp, NULL);
2438 
2439 		/* clear if_start_thread to allow termination to continue */
2440 		ASSERT(ifp->if_start_thread != THREAD_NULL);
2441 		ifp->if_start_thread = THREAD_NULL;
2442 		wakeup((caddr_t)&ifp->if_start_thread);
2443 		lck_mtx_unlock(&ifp->if_start_lock);
2444 
2445 		if (dlil_verbose) {
2446 			DLIL_PRINTF("%s: starter thread terminated\n",
2447 			    if_name(ifp));
2448 		}
2449 
2450 		/* for the extra refcnt from kernel_thread_start() */
2451 		thread_deallocate(current_thread());
2452 		/* this is the end */
2453 		thread_terminate(current_thread());
2454 		/* NOTREACHED */
2455 	}
2456 
2457 	/* must never get here */
2458 	VERIFY(0);
2459 	/* NOTREACHED */
2460 	__builtin_unreachable();
2461 }
2462 
2463 void
ifnet_set_start_cycle(struct ifnet * ifp,struct timespec * ts)2464 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2465 {
2466 	if (ts == NULL) {
2467 		bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
2468 	} else {
2469 		*(&ifp->if_start_cycle) = *ts;
2470 	}
2471 
2472 	if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2473 		DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
2474 		    if_name(ifp), ts->tv_nsec);
2475 	}
2476 }
2477 
2478 static inline void
ifnet_poll_wakeup(struct ifnet * ifp)2479 ifnet_poll_wakeup(struct ifnet *ifp)
2480 {
2481 	LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
2482 
2483 	ifp->if_poll_req++;
2484 	if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
2485 	    ifp->if_poll_thread != THREAD_NULL) {
2486 		wakeup_one((caddr_t)&ifp->if_poll_thread);
2487 	}
2488 }
2489 
2490 void
ifnet_poll(struct ifnet * ifp)2491 ifnet_poll(struct ifnet *ifp)
2492 {
2493 	/*
2494 	 * If the poller thread is inactive, signal it to do work.
2495 	 */
2496 	lck_mtx_lock_spin(&ifp->if_poll_lock);
2497 	ifnet_poll_wakeup(ifp);
2498 	lck_mtx_unlock(&ifp->if_poll_lock);
2499 }
2500 
2501 __attribute__((noreturn))
2502 static void
ifnet_poll_thread_func(void * v,wait_result_t w)2503 ifnet_poll_thread_func(void *v, wait_result_t w)
2504 {
2505 #pragma unused(w)
2506 	char thread_name[MAXTHREADNAMESIZE];
2507 	ifnet_ref_t ifp = v;
2508 
2509 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2510 	VERIFY(current_thread() == ifp->if_poll_thread);
2511 
2512 	/* construct the name for this thread, and then apply it */
2513 	bzero(thread_name, sizeof(thread_name));
2514 	(void) snprintf(thread_name, sizeof(thread_name),
2515 	    "ifnet_poller_%s", ifp->if_xname);
2516 	thread_set_thread_name(ifp->if_poll_thread, __unsafe_null_terminated_from_indexable(thread_name));
2517 
2518 	lck_mtx_lock(&ifp->if_poll_lock);
2519 	VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
2520 	(void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
2521 	ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
2522 	/* wake up once to get out of embryonic state */
2523 	ifnet_poll_wakeup(ifp);
2524 	lck_mtx_unlock(&ifp->if_poll_lock);
2525 	(void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2526 	/* NOTREACHED */
2527 	__builtin_unreachable();
2528 }
2529 
2530 __attribute__((noreturn))
2531 static void
ifnet_poll_thread_cont(void * v,wait_result_t wres)2532 ifnet_poll_thread_cont(void *v, wait_result_t wres)
2533 {
2534 	struct dlil_threading_info *inp;
2535 	ifnet_ref_t ifp = v;
2536 	struct ifnet_stat_increment_param s;
2537 	struct timespec start_time;
2538 
2539 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2540 
2541 	bzero(&s, sizeof(s));
2542 	net_timerclear(&start_time);
2543 
2544 	lck_mtx_lock_spin(&ifp->if_poll_lock);
2545 	if (__improbable(wres == THREAD_INTERRUPTED ||
2546 	    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
2547 		goto terminate;
2548 	}
2549 
2550 	inp = ifp->if_inp;
2551 	VERIFY(inp != NULL);
2552 
2553 	if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
2554 		ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
2555 		lck_mtx_unlock(&ifp->if_poll_lock);
2556 		ifnet_decr_pending_thread_count(ifp);
2557 		lck_mtx_lock_spin(&ifp->if_poll_lock);
2558 		goto skip;
2559 	}
2560 
2561 	ifp->if_poll_flags |= IF_POLLF_RUNNING;
2562 
2563 	/*
2564 	 * Keep on servicing until no more request.
2565 	 */
2566 	for (;;) {
2567 		mbuf_ref_t m_head, m_tail;
2568 		u_int32_t m_lim, m_cnt, m_totlen;
2569 		u_int16_t req = ifp->if_poll_req;
2570 
2571 		m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
2572 		    MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
2573 		lck_mtx_unlock(&ifp->if_poll_lock);
2574 
2575 		/*
2576 		 * If no longer attached, there's nothing to do;
2577 		 * else hold an IO refcnt to prevent the interface
2578 		 * from being detached (will be released below.)
2579 		 */
2580 		if (!ifnet_is_attached(ifp, 1)) {
2581 			lck_mtx_lock_spin(&ifp->if_poll_lock);
2582 			break;
2583 		}
2584 
2585 		if (dlil_verbose > 1) {
2586 			DLIL_PRINTF("%s: polling up to %d pkts, "
2587 			    "pkts avg %d max %d, wreq avg %d, "
2588 			    "bytes avg %d\n",
2589 			    if_name(ifp), m_lim,
2590 			    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2591 			    ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2592 		}
2593 
2594 		/* invoke the driver's input poll routine */
2595 		((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2596 		&m_cnt, &m_totlen));
2597 
2598 		if (m_head != NULL) {
2599 			VERIFY(m_tail != NULL && m_cnt > 0);
2600 
2601 			if (dlil_verbose > 1) {
2602 				DLIL_PRINTF("%s: polled %d pkts, "
2603 				    "pkts avg %d max %d, wreq avg %d, "
2604 				    "bytes avg %d\n",
2605 				    if_name(ifp), m_cnt,
2606 				    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2607 				    ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2608 			}
2609 
2610 			/* stats are required for extended variant */
2611 			s.packets_in = m_cnt;
2612 			s.bytes_in = m_totlen;
2613 
2614 			(void) ifnet_input_common(ifp, m_head, m_tail,
2615 			    &s, TRUE, TRUE);
2616 		} else {
2617 			if (dlil_verbose > 1) {
2618 				DLIL_PRINTF("%s: no packets, "
2619 				    "pkts avg %d max %d, wreq avg %d, "
2620 				    "bytes avg %d\n",
2621 				    if_name(ifp), ifp->if_rxpoll_pavg,
2622 				    ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
2623 				    ifp->if_rxpoll_bavg);
2624 			}
2625 
2626 			(void) ifnet_input_common(ifp, NULL, NULL,
2627 			    NULL, FALSE, TRUE);
2628 		}
2629 
2630 		/* Release the io ref count */
2631 		ifnet_decr_iorefcnt(ifp);
2632 
2633 		lck_mtx_lock_spin(&ifp->if_poll_lock);
2634 
2635 		/* if there's no pending request, we're done */
2636 		if (req == ifp->if_poll_req ||
2637 		    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0) {
2638 			break;
2639 		}
2640 	}
2641 skip:
2642 	ifp->if_poll_req = 0;
2643 	ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
2644 
2645 	if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
2646 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2647 		struct timespec *ts;
2648 
2649 		/*
2650 		 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2651 		 * until ifnet_poll() is called again.
2652 		 */
2653 		ts = &ifp->if_poll_cycle;
2654 		if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
2655 			ts = NULL;
2656 		}
2657 
2658 		if (ts != NULL) {
2659 			clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2660 			    (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2661 		}
2662 
2663 		(void) assert_wait_deadline(&ifp->if_poll_thread,
2664 		    THREAD_UNINT, deadline);
2665 		lck_mtx_unlock(&ifp->if_poll_lock);
2666 		(void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2667 		/* NOTREACHED */
2668 	} else {
2669 terminate:
2670 		/* interface is detached (maybe while asleep)? */
2671 		ifnet_set_poll_cycle(ifp, NULL);
2672 
2673 		/* clear if_poll_thread to allow termination to continue */
2674 		ASSERT(ifp->if_poll_thread != THREAD_NULL);
2675 		ifp->if_poll_thread = THREAD_NULL;
2676 		wakeup((caddr_t)&ifp->if_poll_thread);
2677 		lck_mtx_unlock(&ifp->if_poll_lock);
2678 
2679 		if (dlil_verbose) {
2680 			DLIL_PRINTF("%s: poller thread terminated\n",
2681 			    if_name(ifp));
2682 		}
2683 
2684 		/* for the extra refcnt from kernel_thread_start() */
2685 		thread_deallocate(current_thread());
2686 		/* this is the end */
2687 		thread_terminate(current_thread());
2688 		/* NOTREACHED */
2689 	}
2690 
2691 	/* must never get here */
2692 	VERIFY(0);
2693 	/* NOTREACHED */
2694 	__builtin_unreachable();
2695 }
2696 
2697 void
ifnet_set_poll_cycle(struct ifnet * ifp,struct timespec * ts)2698 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2699 {
2700 	if (ts == NULL) {
2701 		bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
2702 	} else {
2703 		*(&ifp->if_poll_cycle) = *ts;
2704 	}
2705 
2706 	if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2707 		DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
2708 		    if_name(ifp), ts->tv_nsec);
2709 	}
2710 }
2711 
2712 void
ifnet_purge(struct ifnet * ifp)2713 ifnet_purge(struct ifnet *ifp)
2714 {
2715 	if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
2716 		if_qflush_snd(ifp, false);
2717 	}
2718 }
2719 
2720 void
ifnet_update_sndq(struct ifclassq * ifq,cqev_t ev)2721 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2722 {
2723 	IFCQ_LOCK_ASSERT_HELD(ifq);
2724 
2725 	if (!(IFCQ_IS_READY(ifq))) {
2726 		return;
2727 	}
2728 
2729 	if (IFCQ_TBR_IS_ENABLED(ifq)) {
2730 		struct tb_profile tb = {
2731 			.rate = ifq->ifcq_tbr.tbr_rate_raw,
2732 			.percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
2733 		};
2734 		(void) ifclassq_tbr_set(ifq, &tb, FALSE);
2735 	}
2736 
2737 	ifclassq_update(ifq, ev);
2738 }
2739 
2740 void
ifnet_update_rcv(struct ifnet * ifp,cqev_t ev)2741 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2742 {
2743 	switch (ev) {
2744 	case CLASSQ_EV_LINK_BANDWIDTH:
2745 		if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
2746 			ifp->if_poll_update++;
2747 		}
2748 		break;
2749 
2750 	default:
2751 		break;
2752 	}
2753 }
2754 
2755 errno_t
ifnet_set_output_sched_model(struct ifnet * ifp,u_int32_t model)2756 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2757 {
2758 	struct ifclassq *ifq;
2759 	u_int32_t omodel;
2760 	errno_t err;
2761 
2762 	if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
2763 		return EINVAL;
2764 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2765 		return ENXIO;
2766 	}
2767 
2768 	ifq = ifp->if_snd;
2769 	IFCQ_LOCK(ifq);
2770 	omodel = ifp->if_output_sched_model;
2771 	ifp->if_output_sched_model = model;
2772 	if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
2773 		ifp->if_output_sched_model = omodel;
2774 	}
2775 	IFCQ_UNLOCK(ifq);
2776 
2777 	return err;
2778 }
2779 
2780 errno_t
ifnet_set_sndq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2781 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2782 {
2783 	if (ifp == NULL) {
2784 		return EINVAL;
2785 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2786 		return ENXIO;
2787 	}
2788 
2789 	ifclassq_set_maxlen(ifp->if_snd, maxqlen);
2790 
2791 	return 0;
2792 }
2793 
2794 errno_t
ifnet_get_sndq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2795 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2796 {
2797 	if (ifp == NULL || maxqlen == NULL) {
2798 		return EINVAL;
2799 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2800 		return ENXIO;
2801 	}
2802 
2803 	*maxqlen = ifclassq_get_maxlen(ifp->if_snd);
2804 
2805 	return 0;
2806 }
2807 
2808 errno_t
ifnet_get_sndq_len(struct ifnet * ifp,u_int32_t * pkts)2809 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
2810 {
2811 	errno_t err;
2812 
2813 	if (ifp == NULL || pkts == NULL) {
2814 		err = EINVAL;
2815 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2816 		err = ENXIO;
2817 	} else {
2818 		err = ifclassq_get_len(ifp->if_snd, MBUF_SC_UNSPEC,
2819 		    IF_CLASSQ_ALL_GRPS, pkts, NULL);
2820 	}
2821 
2822 	return err;
2823 }
2824 
2825 errno_t
ifnet_get_service_class_sndq_len(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t * pkts,u_int32_t * bytes)2826 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2827     u_int32_t *pkts, u_int32_t *bytes)
2828 {
2829 	errno_t err;
2830 
2831 	if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2832 	    (pkts == NULL && bytes == NULL)) {
2833 		err = EINVAL;
2834 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2835 		err = ENXIO;
2836 	} else {
2837 		err = ifclassq_get_len(ifp->if_snd, sc, IF_CLASSQ_ALL_GRPS,
2838 		    pkts, bytes);
2839 	}
2840 
2841 	return err;
2842 }
2843 
2844 errno_t
ifnet_set_rcvq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2845 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2846 {
2847 	struct dlil_threading_info *inp;
2848 
2849 	if (ifp == NULL) {
2850 		return EINVAL;
2851 	} else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2852 		return ENXIO;
2853 	}
2854 
2855 	if (maxqlen == 0) {
2856 		maxqlen = if_rcvq_maxlen;
2857 	} else if (maxqlen < IF_RCVQ_MINLEN) {
2858 		maxqlen = IF_RCVQ_MINLEN;
2859 	}
2860 
2861 	inp = ifp->if_inp;
2862 	lck_mtx_lock(&inp->dlth_lock);
2863 	qlimit(&inp->dlth_pkts) = maxqlen;
2864 	lck_mtx_unlock(&inp->dlth_lock);
2865 
2866 	return 0;
2867 }
2868 
2869 errno_t
ifnet_get_rcvq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2870 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2871 {
2872 	struct dlil_threading_info *inp;
2873 
2874 	if (ifp == NULL || maxqlen == NULL) {
2875 		return EINVAL;
2876 	} else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2877 		return ENXIO;
2878 	}
2879 
2880 	inp = ifp->if_inp;
2881 	lck_mtx_lock(&inp->dlth_lock);
2882 	*maxqlen = qlimit(&inp->dlth_pkts);
2883 	lck_mtx_unlock(&inp->dlth_lock);
2884 	return 0;
2885 }
2886 
2887 void
ifnet_enqueue_multi_setup(struct ifnet * ifp,uint16_t delay_qlen,uint16_t delay_timeout)2888 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
2889     uint16_t delay_timeout)
2890 {
2891 	if (delay_qlen > 0 && delay_timeout > 0) {
2892 		if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
2893 		ifp->if_start_delay_qlen = MIN(100, delay_qlen);
2894 		ifp->if_start_delay_timeout = min(20000, delay_timeout);
2895 		/* convert timeout to nanoseconds */
2896 		ifp->if_start_delay_timeout *= 1000;
2897 		kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
2898 		    ifp->if_xname, (uint32_t)delay_qlen,
2899 		    (uint32_t)delay_timeout);
2900 	} else {
2901 		if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
2902 	}
2903 }
2904 
2905 /*
2906  * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
2907  * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
2908  * buf holds the full header.
2909  */
2910 static __attribute__((noinline)) void
ifnet_mcast_clear_dscp(uint8_t * __indexable buf,uint8_t ip_ver)2911 ifnet_mcast_clear_dscp(uint8_t *__indexable buf, uint8_t ip_ver)
2912 {
2913 	struct ip *ip;
2914 	struct ip6_hdr *ip6;
2915 	uint8_t lbuf[64] __attribute__((aligned(8)));
2916 	uint8_t *p = buf;
2917 
2918 	if (ip_ver == IPVERSION) {
2919 		uint8_t old_tos;
2920 		uint32_t sum;
2921 
2922 		if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2923 			DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
2924 			bcopy(buf, lbuf, sizeof(struct ip));
2925 			p = lbuf;
2926 		}
2927 		ip = (struct ip *)(void *)p;
2928 		if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
2929 			return;
2930 		}
2931 
2932 		DTRACE_IP1(clear__v4, struct ip *, ip);
2933 		old_tos = ip->ip_tos;
2934 		ip->ip_tos &= IPTOS_ECN_MASK;
2935 		sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
2936 		sum = (sum >> 16) + (sum & 0xffff);
2937 		ip->ip_sum = (uint16_t)(sum & 0xffff);
2938 
2939 		if (__improbable(p == lbuf)) {
2940 			bcopy(lbuf, buf, sizeof(struct ip));
2941 		}
2942 	} else {
2943 		uint32_t flow;
2944 		ASSERT(ip_ver == IPV6_VERSION);
2945 
2946 		if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2947 			DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
2948 			bcopy(buf, lbuf, sizeof(struct ip6_hdr));
2949 			p = lbuf;
2950 		}
2951 		ip6 = (struct ip6_hdr *)(void *)p;
2952 		flow = ntohl(ip6->ip6_flow);
2953 		if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
2954 			return;
2955 		}
2956 
2957 		DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
2958 		ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
2959 
2960 		if (__improbable(p == lbuf)) {
2961 			bcopy(lbuf, buf, sizeof(struct ip6_hdr));
2962 		}
2963 	}
2964 }
2965 
2966 static inline errno_t
ifnet_enqueue_ifclassq(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * p,boolean_t flush,boolean_t * pdrop)2967 ifnet_enqueue_ifclassq(struct ifnet *ifp, struct ifclassq *ifcq,
2968     classq_pkt_t *p, boolean_t flush, boolean_t *pdrop)
2969 {
2970 #if SKYWALK
2971 	volatile struct sk_nexusadv *nxadv = NULL;
2972 #endif /* SKYWALK */
2973 	volatile uint64_t *fg_ts = NULL;
2974 	volatile uint64_t *rt_ts = NULL;
2975 	struct timespec now;
2976 	u_int64_t now_nsec = 0;
2977 	int error = 0;
2978 	uint8_t *mcast_buf = NULL;
2979 	uint8_t ip_ver;
2980 	uint32_t pktlen;
2981 
2982 	ASSERT(ifp->if_eflags & IFEF_TXSTART);
2983 #if SKYWALK
2984 	/*
2985 	 * If attached to flowswitch, grab pointers to the
2986 	 * timestamp variables in the nexus advisory region.
2987 	 */
2988 	if ((ifp->if_capabilities & IFCAP_SKYWALK) && ifp->if_na != NULL &&
2989 	    (nxadv = ifp->if_na->nifna_netif->nif_fsw_nxadv) != NULL) {
2990 		fg_ts = &nxadv->nxadv_fg_sendts;
2991 		rt_ts = &nxadv->nxadv_rt_sendts;
2992 	}
2993 #endif /* SKYWALK */
2994 
2995 	/*
2996 	 * If packet already carries a timestamp, either from dlil_output()
2997 	 * or from flowswitch, use it here.  Otherwise, record timestamp.
2998 	 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
2999 	 * the timestamp value is used internally there.
3000 	 */
3001 	switch (p->cp_ptype) {
3002 	case QP_MBUF:
3003 #if SKYWALK
3004 		/*
3005 		 * Valid only for non-native (compat) Skywalk interface.
3006 		 * If the data source uses packet, caller must convert
3007 		 * it to mbuf first prior to calling this routine.
3008 		 */
3009 		ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3010 #endif /* SKYWALK */
3011 		ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
3012 		ASSERT(p->cp_mbuf->m_nextpkt == NULL);
3013 
3014 		if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3015 		    p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
3016 			nanouptime(&now);
3017 			net_timernsec(&now, &now_nsec);
3018 			p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
3019 		}
3020 		p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3021 		/*
3022 		 * If the packet service class is not background,
3023 		 * update the timestamp to indicate recent activity
3024 		 * on a foreground socket.
3025 		 */
3026 		if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3027 		    p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3028 			if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3029 			    PKTF_SO_BACKGROUND)) {
3030 				ifp->if_fg_sendts = (uint32_t)_net_uptime;
3031 				if (fg_ts != NULL) {
3032 					*fg_ts = (uint32_t)_net_uptime;
3033 				}
3034 			}
3035 			if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3036 				ifp->if_rt_sendts = (uint32_t)_net_uptime;
3037 				if (rt_ts != NULL) {
3038 					*rt_ts = (uint32_t)_net_uptime;
3039 				}
3040 			}
3041 		}
3042 		pktlen = m_pktlen(p->cp_mbuf);
3043 
3044 		/*
3045 		 * Some Wi-Fi AP implementations do not correctly handle
3046 		 * multicast IP packets with DSCP bits set (radr://9331522).
3047 		 * As a workaround we clear the DSCP bits but keep service
3048 		 * class (rdar://51507725).
3049 		 */
3050 		if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3051 		    IFNET_IS_WIFI_INFRA(ifp)) {
3052 			size_t len = mbuf_len(p->cp_mbuf), hlen;
3053 			struct ether_header *eh;
3054 			boolean_t pullup = FALSE;
3055 			uint16_t etype;
3056 
3057 			if (__improbable(len < sizeof(struct ether_header))) {
3058 				DTRACE_IP1(small__ether, size_t, len);
3059 				if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3060 				    sizeof(struct ether_header))) == NULL) {
3061 					return ENOMEM;
3062 				}
3063 			}
3064 			eh = mtod(p->cp_mbuf, struct ether_header *);
3065 			etype = ntohs(eh->ether_type);
3066 			if (etype == ETHERTYPE_IP) {
3067 				hlen = sizeof(struct ether_header) +
3068 				    sizeof(struct ip);
3069 				if (len < hlen) {
3070 					DTRACE_IP1(small__v4, size_t, len);
3071 					pullup = TRUE;
3072 				}
3073 				ip_ver = IPVERSION;
3074 			} else if (etype == ETHERTYPE_IPV6) {
3075 				hlen = sizeof(struct ether_header) +
3076 				    sizeof(struct ip6_hdr);
3077 				if (len < hlen) {
3078 					DTRACE_IP1(small__v6, size_t, len);
3079 					pullup = TRUE;
3080 				}
3081 				ip_ver = IPV6_VERSION;
3082 			} else {
3083 				DTRACE_IP1(invalid__etype, uint16_t, etype);
3084 				break;
3085 			}
3086 			if (pullup) {
3087 				if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3088 				    NULL) {
3089 					return ENOMEM;
3090 				}
3091 
3092 				eh = mtod(p->cp_mbuf, struct ether_header *);
3093 			}
3094 			mcast_buf = (uint8_t *)(eh + 1);
3095 			/*
3096 			 * ifnet_mcast_clear_dscp() will finish the work below.
3097 			 * Note that the pullups above ensure that mcast_buf
3098 			 * points to a full IP header.
3099 			 */
3100 		}
3101 		break;
3102 
3103 #if SKYWALK
3104 	case QP_PACKET:
3105 		/*
3106 		 * Valid only for native Skywalk interface.  If the data
3107 		 * source uses mbuf, caller must convert it to packet first
3108 		 * prior to calling this routine.
3109 		 */
3110 		ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
3111 		if (!(p->cp_kpkt->pkt_pflags & PKT_F_TS_VALID) ||
3112 		    p->cp_kpkt->pkt_timestamp == 0) {
3113 			nanouptime(&now);
3114 			net_timernsec(&now, &now_nsec);
3115 			p->cp_kpkt->pkt_timestamp = now_nsec;
3116 		}
3117 		p->cp_kpkt->pkt_pflags &= ~PKT_F_TS_VALID;
3118 		/*
3119 		 * If the packet service class is not background,
3120 		 * update the timestamps on the interface, as well as
3121 		 * the ones in nexus-wide advisory to indicate recent
3122 		 * activity on a foreground flow.
3123 		 */
3124 		if (!(p->cp_kpkt->pkt_pflags & PKT_F_BACKGROUND)) {
3125 			ifp->if_fg_sendts = (uint32_t)_net_uptime;
3126 			if (fg_ts != NULL) {
3127 				*fg_ts = (uint32_t)_net_uptime;
3128 			}
3129 		}
3130 		if (p->cp_kpkt->pkt_pflags & PKT_F_REALTIME) {
3131 			ifp->if_rt_sendts = (uint32_t)_net_uptime;
3132 			if (rt_ts != NULL) {
3133 				*rt_ts = (uint32_t)_net_uptime;
3134 			}
3135 		}
3136 		pktlen = p->cp_kpkt->pkt_length;
3137 
3138 		/*
3139 		 * Some Wi-Fi AP implementations do not correctly handle
3140 		 * multicast IP packets with DSCP bits set (radr://9331522).
3141 		 * As a workaround we clear the DSCP bits but keep service
3142 		 * class (rdar://51507725).
3143 		 */
3144 		if ((p->cp_kpkt->pkt_link_flags & PKT_LINKF_MCAST) != 0 &&
3145 		    IFNET_IS_WIFI_INFRA(ifp)) {
3146 			uint8_t *baddr;
3147 			struct ether_header *eh;
3148 			uint16_t etype;
3149 
3150 			MD_BUFLET_ADDR_ABS(p->cp_kpkt, baddr);
3151 			baddr += p->cp_kpkt->pkt_headroom;
3152 			if (__improbable(pktlen < sizeof(struct ether_header))) {
3153 				DTRACE_IP1(pkt__small__ether, __kern_packet *,
3154 				    p->cp_kpkt);
3155 				break;
3156 			}
3157 			eh = (struct ether_header *)(void *)baddr;
3158 			etype = ntohs(eh->ether_type);
3159 			if (etype == ETHERTYPE_IP) {
3160 				if (pktlen < sizeof(struct ether_header) +
3161 				    sizeof(struct ip)) {
3162 					DTRACE_IP1(pkt__small__v4, uint32_t,
3163 					    pktlen);
3164 					break;
3165 				}
3166 				ip_ver = IPVERSION;
3167 			} else if (etype == ETHERTYPE_IPV6) {
3168 				if (pktlen < sizeof(struct ether_header) +
3169 				    sizeof(struct ip6_hdr)) {
3170 					DTRACE_IP1(pkt__small__v6, uint32_t,
3171 					    pktlen);
3172 					break;
3173 				}
3174 				ip_ver = IPV6_VERSION;
3175 			} else {
3176 				DTRACE_IP1(pkt__invalid__etype, uint16_t,
3177 				    etype);
3178 				break;
3179 			}
3180 			mcast_buf = (uint8_t *)(eh + 1);
3181 			/*
3182 			 * ifnet_mcast_clear_dscp() will finish the work below.
3183 			 * The checks above verify that the IP header is in the
3184 			 * first buflet.
3185 			 */
3186 		}
3187 		break;
3188 #endif /* SKYWALK */
3189 
3190 	default:
3191 		VERIFY(0);
3192 		/* NOTREACHED */
3193 		__builtin_unreachable();
3194 	}
3195 
3196 	if (mcast_buf != NULL) {
3197 		ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
3198 	}
3199 
3200 	if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3201 		if (now_nsec == 0) {
3202 			nanouptime(&now);
3203 			net_timernsec(&now, &now_nsec);
3204 		}
3205 		/*
3206 		 * If the driver chose to delay start callback for
3207 		 * coalescing multiple packets, Then use the following
3208 		 * heuristics to make sure that start callback will
3209 		 * be delayed only when bulk data transfer is detected.
3210 		 * 1. number of packets enqueued in (delay_win * 2) is
3211 		 * greater than or equal to the delay qlen.
3212 		 * 2. If delay_start is enabled it will stay enabled for
3213 		 * another 10 idle windows. This is to take into account
3214 		 * variable RTT and burst traffic.
3215 		 * 3. If the time elapsed since last enqueue is more
3216 		 * than 200ms we disable delaying start callback. This is
3217 		 * is to take idle time into account.
3218 		 */
3219 		u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3220 		if (ifp->if_start_delay_swin > 0) {
3221 			if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3222 				ifp->if_start_delay_cnt++;
3223 			} else if ((now_nsec - ifp->if_start_delay_swin)
3224 			    >= (200 * 1000 * 1000)) {
3225 				ifp->if_start_delay_swin = now_nsec;
3226 				ifp->if_start_delay_cnt = 1;
3227 				ifp->if_start_delay_idle = 0;
3228 				if (ifp->if_eflags & IFEF_DELAY_START) {
3229 					if_clear_eflags(ifp, IFEF_DELAY_START);
3230 					ifnet_delay_start_disabled_increment();
3231 				}
3232 			} else {
3233 				if (ifp->if_start_delay_cnt >=
3234 				    ifp->if_start_delay_qlen) {
3235 					if_set_eflags(ifp, IFEF_DELAY_START);
3236 					ifp->if_start_delay_idle = 0;
3237 				} else {
3238 					if (ifp->if_start_delay_idle >= 10) {
3239 						if_clear_eflags(ifp,
3240 						    IFEF_DELAY_START);
3241 						ifnet_delay_start_disabled_increment();
3242 					} else {
3243 						ifp->if_start_delay_idle++;
3244 					}
3245 				}
3246 				ifp->if_start_delay_swin = now_nsec;
3247 				ifp->if_start_delay_cnt = 1;
3248 			}
3249 		} else {
3250 			ifp->if_start_delay_swin = now_nsec;
3251 			ifp->if_start_delay_cnt = 1;
3252 			ifp->if_start_delay_idle = 0;
3253 			if_clear_eflags(ifp, IFEF_DELAY_START);
3254 		}
3255 	} else {
3256 		if_clear_eflags(ifp, IFEF_DELAY_START);
3257 	}
3258 
3259 	/* enqueue the packet (caller consumes object) */
3260 	error = ifclassq_enqueue(((ifcq != NULL) ? ifcq : ifp->if_snd), p, p,
3261 	    1, pktlen, pdrop);
3262 
3263 	/*
3264 	 * Tell the driver to start dequeueing; do this even when the queue
3265 	 * for the packet is suspended (EQSUSPENDED), as the driver could still
3266 	 * be dequeueing from other unsuspended queues.
3267 	 */
3268 	if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3269 	    ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3270 		ifnet_start(ifp);
3271 	}
3272 
3273 	return error;
3274 }
3275 
3276 static inline errno_t
ifnet_enqueue_ifclassq_chain(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3277 ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3278     classq_pkt_t *head, classq_pkt_t *tail, uint32_t cnt, uint32_t bytes,
3279     boolean_t flush, boolean_t *pdrop)
3280 {
3281 	int error;
3282 
3283 	/* enqueue the packet (caller consumes object) */
3284 	error = ifclassq_enqueue(ifcq != NULL ? ifcq : ifp->if_snd, head, tail,
3285 	    cnt, bytes, pdrop);
3286 
3287 	/*
3288 	 * Tell the driver to start dequeueing; do this even when the queue
3289 	 * for the packet is suspended (EQSUSPENDED), as the driver could still
3290 	 * be dequeueing from other unsuspended queues.
3291 	 */
3292 	if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
3293 		ifnet_start(ifp);
3294 	}
3295 	return error;
3296 }
3297 
3298 int
ifnet_enqueue_netem(void * handle,pktsched_pkt_t * __sized_by (n_pkts)pkts,uint32_t n_pkts)3299 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *__sized_by(n_pkts)pkts, uint32_t n_pkts)
3300 {
3301 	ifnet_ref_t ifp = handle;
3302 	boolean_t pdrop;        /* dummy */
3303 	uint32_t i;
3304 
3305 	ASSERT(n_pkts >= 1);
3306 	for (i = 0; i < n_pkts - 1; i++) {
3307 		(void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
3308 		    FALSE, &pdrop);
3309 	}
3310 	/* flush with the last packet */
3311 	(void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
3312 	    TRUE, &pdrop);
3313 
3314 	return 0;
3315 }
3316 
3317 static inline errno_t
ifnet_enqueue_common(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * pkt,boolean_t flush,boolean_t * pdrop)3318 ifnet_enqueue_common(struct ifnet *ifp, struct ifclassq *ifcq,
3319     classq_pkt_t *pkt, boolean_t flush, boolean_t *pdrop)
3320 {
3321 	if (ifp->if_output_netem != NULL) {
3322 		bool drop;
3323 		errno_t error;
3324 		error = netem_enqueue(ifp->if_output_netem, pkt, &drop);
3325 		*pdrop = drop ? TRUE : FALSE;
3326 		return error;
3327 	} else {
3328 		return ifnet_enqueue_ifclassq(ifp, ifcq, pkt, flush, pdrop);
3329 	}
3330 }
3331 
3332 errno_t
ifnet_enqueue(struct ifnet * ifp,struct mbuf * m)3333 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3334 {
3335 	uint32_t bytes = m_pktlen(m);
3336 	struct mbuf *tail = m;
3337 	uint32_t cnt = 1;
3338 	boolean_t pdrop;
3339 
3340 	while (tail->m_nextpkt) {
3341 		VERIFY(tail->m_flags & M_PKTHDR);
3342 		tail = tail->m_nextpkt;
3343 		cnt++;
3344 		bytes += m_pktlen(tail);
3345 	}
3346 
3347 	return ifnet_enqueue_mbuf_chain(ifp, m, tail, cnt, bytes, TRUE, &pdrop);
3348 }
3349 
3350 errno_t
ifnet_enqueue_mbuf(struct ifnet * ifp,struct mbuf * m,boolean_t flush,boolean_t * pdrop)3351 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3352     boolean_t *pdrop)
3353 {
3354 	classq_pkt_t pkt;
3355 
3356 	m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
3357 	if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3358 	    m->m_nextpkt != NULL) {
3359 		if (m != NULL) {
3360 			m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_INVALID, NULL, 0);
3361 			*pdrop = TRUE;
3362 		}
3363 		return EINVAL;
3364 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3365 	    !IF_FULLY_ATTACHED(ifp)) {
3366 		/* flag tested without lock for performance */
3367 		m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3368 		*pdrop = TRUE;
3369 		return ENXIO;
3370 	} else if (!(ifp->if_flags & IFF_UP)) {
3371 		m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3372 		*pdrop = TRUE;
3373 		return ENETDOWN;
3374 	}
3375 
3376 	CLASSQ_PKT_INIT_MBUF(&pkt, m);
3377 	return ifnet_enqueue_common(ifp, NULL, &pkt, flush, pdrop);
3378 }
3379 
3380 errno_t
ifnet_enqueue_mbuf_chain(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3381 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
3382     struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3383     boolean_t *pdrop)
3384 {
3385 	classq_pkt_t head, tail;
3386 
3387 	m_add_hdr_crumb_interface_output(m_head, ifp->if_index, true);
3388 	ASSERT(m_head != NULL);
3389 	ASSERT((m_head->m_flags & M_PKTHDR) != 0);
3390 	ASSERT(m_tail != NULL);
3391 	ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
3392 	ASSERT(ifp != NULL);
3393 	ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3394 
3395 	if (!IF_FULLY_ATTACHED(ifp)) {
3396 		/* flag tested without lock for performance */
3397 		m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3398 		*pdrop = TRUE;
3399 		return ENXIO;
3400 	} else if (!(ifp->if_flags & IFF_UP)) {
3401 		m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3402 		*pdrop = TRUE;
3403 		return ENETDOWN;
3404 	}
3405 
3406 	CLASSQ_PKT_INIT_MBUF(&head, m_head);
3407 	CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3408 	return ifnet_enqueue_ifclassq_chain(ifp, NULL, &head, &tail, cnt, bytes,
3409 	           flush, pdrop);
3410 }
3411 
3412 #if SKYWALK
3413 static errno_t
ifnet_enqueue_pkt_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3414 ifnet_enqueue_pkt_common(struct ifnet *ifp, struct ifclassq *ifcq,
3415     struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3416 {
3417 	classq_pkt_t pkt;
3418 
3419 	ASSERT(kpkt == NULL || kpkt->pkt_nextpkt == NULL);
3420 
3421 	if (__improbable(ifp == NULL || kpkt == NULL)) {
3422 		if (kpkt != NULL) {
3423 			pp_free_packet(__DECONST(struct kern_pbufpool *,
3424 			    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3425 			*pdrop = TRUE;
3426 		}
3427 		return EINVAL;
3428 	} else if (__improbable(!(ifp->if_eflags & IFEF_TXSTART) ||
3429 	    !IF_FULLY_ATTACHED(ifp))) {
3430 		/* flag tested without lock for performance */
3431 		pp_free_packet(__DECONST(struct kern_pbufpool *,
3432 		    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3433 		*pdrop = TRUE;
3434 		return ENXIO;
3435 	} else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3436 		pp_free_packet(__DECONST(struct kern_pbufpool *,
3437 		    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3438 		*pdrop = TRUE;
3439 		return ENETDOWN;
3440 	}
3441 
3442 	CLASSQ_PKT_INIT_PACKET(&pkt, kpkt);
3443 	return ifnet_enqueue_common(ifp, ifcq, &pkt, flush, pdrop);
3444 }
3445 
3446 errno_t
ifnet_enqueue_pkt(struct ifnet * ifp,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3447 ifnet_enqueue_pkt(struct ifnet *ifp, struct __kern_packet *kpkt,
3448     boolean_t flush, boolean_t *pdrop)
3449 {
3450 	return ifnet_enqueue_pkt_common(ifp, NULL, kpkt, flush, pdrop);
3451 }
3452 
3453 errno_t
ifnet_enqueue_ifcq_pkt(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3454 ifnet_enqueue_ifcq_pkt(struct ifnet *ifp, struct ifclassq *ifcq,
3455     struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3456 {
3457 	return ifnet_enqueue_pkt_common(ifp, ifcq, kpkt, flush, pdrop);
3458 }
3459 
3460 static errno_t
ifnet_enqueue_pkt_chain_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3461 ifnet_enqueue_pkt_chain_common(struct ifnet *ifp, struct ifclassq *ifcq,
3462     struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3463     uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3464 {
3465 	classq_pkt_t head, tail;
3466 
3467 	ASSERT(k_head != NULL);
3468 	ASSERT(k_tail != NULL);
3469 	ASSERT(ifp != NULL);
3470 	ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3471 
3472 	if (!IF_FULLY_ATTACHED(ifp)) {
3473 		/* flag tested without lock for performance */
3474 		pp_free_packet_chain(k_head, NULL);
3475 		*pdrop = TRUE;
3476 		return ENXIO;
3477 	} else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3478 		pp_free_packet_chain(k_head, NULL);
3479 		*pdrop = TRUE;
3480 		return ENETDOWN;
3481 	}
3482 
3483 	CLASSQ_PKT_INIT_PACKET(&head, k_head);
3484 	CLASSQ_PKT_INIT_PACKET(&tail, k_tail);
3485 	return ifnet_enqueue_ifclassq_chain(ifp, ifcq, &head, &tail, cnt, bytes,
3486 	           flush, pdrop);
3487 }
3488 
3489 errno_t
ifnet_enqueue_pkt_chain(struct ifnet * ifp,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3490 ifnet_enqueue_pkt_chain(struct ifnet *ifp, struct __kern_packet *k_head,
3491     struct __kern_packet *k_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3492     boolean_t *pdrop)
3493 {
3494 	return ifnet_enqueue_pkt_chain_common(ifp, NULL, k_head, k_tail,
3495 	           cnt, bytes, flush, pdrop);
3496 }
3497 
3498 errno_t
ifnet_enqueue_ifcq_pkt_chain(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3499 ifnet_enqueue_ifcq_pkt_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3500     struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3501     uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3502 {
3503 	return ifnet_enqueue_pkt_chain_common(ifp, ifcq, k_head, k_tail,
3504 	           cnt, bytes, flush, pdrop);
3505 }
3506 #endif /* SKYWALK */
3507 
3508 errno_t
ifnet_dequeue(struct ifnet * ifp,struct mbuf ** mp)3509 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3510 {
3511 	errno_t rc;
3512 	classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3513 
3514 	if (ifp == NULL || mp == NULL) {
3515 		return EINVAL;
3516 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3517 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3518 		return ENXIO;
3519 	}
3520 	if (!ifnet_is_attached(ifp, 1)) {
3521 		return ENXIO;
3522 	}
3523 
3524 #if SKYWALK
3525 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3526 #endif /* SKYWALK */
3527 	rc = ifclassq_dequeue(ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3528 	    &pkt, NULL, NULL, NULL, 0);
3529 	VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3530 	ifnet_decr_iorefcnt(ifp);
3531 	*mp = pkt.cp_mbuf;
3532 	m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3533 	return rc;
3534 }
3535 
3536 errno_t
ifnet_dequeue_service_class(struct ifnet * ifp,mbuf_svc_class_t sc,struct mbuf ** mp)3537 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3538     struct mbuf **mp)
3539 {
3540 	errno_t rc;
3541 	classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3542 
3543 	if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
3544 		return EINVAL;
3545 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3546 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3547 		return ENXIO;
3548 	}
3549 	if (!ifnet_is_attached(ifp, 1)) {
3550 		return ENXIO;
3551 	}
3552 
3553 #if SKYWALK
3554 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3555 #endif /* SKYWALK */
3556 	rc = ifclassq_dequeue_sc(ifp->if_snd, sc, 1,
3557 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL, 0);
3558 	VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3559 	ifnet_decr_iorefcnt(ifp);
3560 	*mp = pkt.cp_mbuf;
3561 	m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3562 	return rc;
3563 }
3564 
3565 errno_t
ifnet_dequeue_multi(struct ifnet * ifp,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3566 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3567     struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3568 {
3569 	errno_t rc;
3570 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3571 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3572 
3573 	if (ifp == NULL || head == NULL || pkt_limit < 1) {
3574 		return EINVAL;
3575 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3576 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3577 		return ENXIO;
3578 	}
3579 	if (!ifnet_is_attached(ifp, 1)) {
3580 		return ENXIO;
3581 	}
3582 
3583 #if SKYWALK
3584 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3585 #endif /* SKYWALK */
3586 	rc = ifclassq_dequeue(ifp->if_snd, pkt_limit,
3587 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len, 0);
3588 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3589 	ifnet_decr_iorefcnt(ifp);
3590 	*head = pkt_head.cp_mbuf;
3591 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3592 	if (tail != NULL) {
3593 		*tail = pkt_tail.cp_mbuf;
3594 	}
3595 	return rc;
3596 }
3597 
3598 errno_t
ifnet_dequeue_multi_bytes(struct ifnet * ifp,u_int32_t byte_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3599 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3600     struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3601 {
3602 	errno_t rc;
3603 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3604 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3605 
3606 	if (ifp == NULL || head == NULL || byte_limit < 1) {
3607 		return EINVAL;
3608 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3609 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3610 		return ENXIO;
3611 	}
3612 	if (!ifnet_is_attached(ifp, 1)) {
3613 		return ENXIO;
3614 	}
3615 
3616 #if SKYWALK
3617 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3618 #endif /* SKYWALK */
3619 	rc = ifclassq_dequeue(ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3620 	    byte_limit, &pkt_head, &pkt_tail, cnt, len, 0);
3621 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3622 	ifnet_decr_iorefcnt(ifp);
3623 	*head = pkt_head.cp_mbuf;
3624 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3625 	if (tail != NULL) {
3626 		*tail = pkt_tail.cp_mbuf;
3627 	}
3628 	return rc;
3629 }
3630 
3631 errno_t
ifnet_dequeue_service_class_multi(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3632 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3633     u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3634     u_int32_t *len)
3635 {
3636 	errno_t rc;
3637 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3638 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3639 
3640 	if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3641 	    !MBUF_VALID_SC(sc)) {
3642 		return EINVAL;
3643 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3644 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3645 		return ENXIO;
3646 	}
3647 	if (!ifnet_is_attached(ifp, 1)) {
3648 		return ENXIO;
3649 	}
3650 
3651 #if SKYWALK
3652 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3653 #endif /* SKYWALK */
3654 	rc = ifclassq_dequeue_sc(ifp->if_snd, sc, pkt_limit,
3655 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
3656 	    cnt, len, 0);
3657 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3658 	ifnet_decr_iorefcnt(ifp);
3659 	*head = pkt_head.cp_mbuf;
3660 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3661 	if (tail != NULL) {
3662 		*tail = pkt_tail.cp_mbuf;
3663 	}
3664 	return rc;
3665 }
3666 
3667 #if XNU_TARGET_OS_OSX
3668 errno_t
ifnet_framer_stub(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * dest,IFNET_LLADDR_T dest_linkaddr,IFNET_FRAME_TYPE_T frame_type,u_int32_t * pre,u_int32_t * post)3669 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3670     const struct sockaddr *dest,
3671     IFNET_LLADDR_T dest_linkaddr,
3672     IFNET_FRAME_TYPE_T frame_type,
3673     u_int32_t *pre, u_int32_t *post)
3674 {
3675 	if (pre != NULL) {
3676 		*pre = 0;
3677 	}
3678 	if (post != NULL) {
3679 		*post = 0;
3680 	}
3681 
3682 	return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
3683 }
3684 #endif /* XNU_TARGET_OS_OSX */
3685 
3686 /* If ifp is set, we will increment the generation for the interface */
3687 int
dlil_post_complete_msg(struct ifnet * ifp,struct kev_msg * event)3688 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3689 {
3690 	if (ifp != NULL) {
3691 		ifnet_increment_generation(ifp);
3692 	}
3693 
3694 #if NECP
3695 	necp_update_all_clients();
3696 #endif /* NECP */
3697 
3698 	return kev_post_msg(event);
3699 }
3700 
3701 __private_extern__ void
dlil_post_sifflags_msg(struct ifnet * ifp)3702 dlil_post_sifflags_msg(struct ifnet * ifp)
3703 {
3704 	struct kev_msg ev_msg;
3705 	struct net_event_data ev_data;
3706 
3707 	bzero(&ev_data, sizeof(ev_data));
3708 	bzero(&ev_msg, sizeof(ev_msg));
3709 	ev_msg.vendor_code = KEV_VENDOR_APPLE;
3710 	ev_msg.kev_class = KEV_NETWORK_CLASS;
3711 	ev_msg.kev_subclass = KEV_DL_SUBCLASS;
3712 	ev_msg.event_code = KEV_DL_SIFFLAGS;
3713 	strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
3714 	ev_data.if_family = ifp->if_family;
3715 	ev_data.if_unit = (u_int32_t) ifp->if_unit;
3716 	ev_msg.dv[0].data_length = sizeof(struct net_event_data);
3717 	ev_msg.dv[0].data_ptr = &ev_data;
3718 	ev_msg.dv[1].data_length = 0;
3719 	dlil_post_complete_msg(ifp, &ev_msg);
3720 }
3721 
3722 #define TMP_IF_PROTO_ARR_SIZE   10
3723 static int
dlil_event_internal(struct ifnet * ifp,struct kev_msg * event,bool update_generation)3724 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
3725 {
3726 	struct ifnet_filter *filter = NULL;
3727 	struct if_proto *proto = NULL;
3728 	int if_proto_count = 0;
3729 	struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3730 	struct if_proto **tmp_ifproto_arr = tmp_ifproto_stack_arr;
3731 	int tmp_ifproto_arr_idx = 0;
3732 
3733 	/*
3734 	 * Pass the event to the interface filters
3735 	 */
3736 	lck_mtx_lock_spin(&ifp->if_flt_lock);
3737 	/* prevent filter list from changing in case we drop the lock */
3738 	if_flt_monitor_busy(ifp);
3739 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3740 		if (filter->filt_event != NULL) {
3741 			lck_mtx_unlock(&ifp->if_flt_lock);
3742 
3743 			filter->filt_event(filter->filt_cookie, ifp,
3744 			    filter->filt_protocol, event);
3745 
3746 			lck_mtx_lock_spin(&ifp->if_flt_lock);
3747 		}
3748 	}
3749 	/* we're done with the filter list */
3750 	if_flt_monitor_unbusy(ifp);
3751 	lck_mtx_unlock(&ifp->if_flt_lock);
3752 
3753 	/* Get an io ref count if the interface is attached */
3754 	if (!ifnet_is_attached(ifp, 1)) {
3755 		goto done;
3756 	}
3757 
3758 	/*
3759 	 * An embedded tmp_list_entry in if_proto may still get
3760 	 * over-written by another thread after giving up ifnet lock,
3761 	 * therefore we are avoiding embedded pointers here.
3762 	 */
3763 	ifnet_lock_shared(ifp);
3764 	if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
3765 	if (if_proto_count) {
3766 		int i;
3767 		VERIFY(ifp->if_proto_hash != NULL);
3768 		if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3769 			tmp_ifproto_arr = tmp_ifproto_stack_arr;
3770 		} else {
3771 			tmp_ifproto_arr = kalloc_type(struct if_proto *,
3772 			    if_proto_count, Z_WAITOK | Z_ZERO);
3773 			if (tmp_ifproto_arr == NULL) {
3774 				ifnet_lock_done(ifp);
3775 				goto cleanup;
3776 			}
3777 		}
3778 
3779 		for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3780 			SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3781 			    next_hash) {
3782 				if_proto_ref(proto);
3783 				tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3784 				tmp_ifproto_arr_idx++;
3785 			}
3786 		}
3787 		VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3788 	}
3789 	ifnet_lock_done(ifp);
3790 
3791 	for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3792 	    tmp_ifproto_arr_idx++) {
3793 		proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3794 		VERIFY(proto != NULL);
3795 		proto_media_event eventp =
3796 		    (proto->proto_kpi == kProtoKPI_v1 ?
3797 		    proto->kpi.v1.event :
3798 		    proto->kpi.v2.event);
3799 
3800 		if (eventp != NULL) {
3801 			eventp(ifp, proto->protocol_family,
3802 			    event);
3803 		}
3804 		if_proto_free(proto);
3805 	}
3806 
3807 cleanup:
3808 	if (tmp_ifproto_arr != tmp_ifproto_stack_arr) {
3809 		kfree_type(struct if_proto *, if_proto_count, tmp_ifproto_arr);
3810 	}
3811 
3812 	/* Pass the event to the interface */
3813 	if (ifp->if_event != NULL) {
3814 		ifp->if_event(ifp, event);
3815 	}
3816 
3817 	/* Release the io ref count */
3818 	ifnet_decr_iorefcnt(ifp);
3819 done:
3820 	return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
3821 }
3822 
3823 errno_t
ifnet_event(ifnet_t ifp,struct kern_event_msg * event)3824 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3825 {
3826 	struct kev_msg kev_msg;
3827 	int result = 0;
3828 
3829 	if (ifp == NULL || event == NULL) {
3830 		return EINVAL;
3831 	}
3832 
3833 	bzero(&kev_msg, sizeof(kev_msg));
3834 	kev_msg.vendor_code = event->vendor_code;
3835 	kev_msg.kev_class = event->kev_class;
3836 	kev_msg.kev_subclass = event->kev_subclass;
3837 	kev_msg.event_code = event->event_code;
3838 	kev_msg.dv[0].data_ptr = &event->event_data;
3839 	kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3840 	kev_msg.dv[1].data_length = 0;
3841 
3842 	result = dlil_event_internal(ifp, &kev_msg, TRUE);
3843 
3844 	return result;
3845 }
3846 
3847 /* The following is used to enqueue work items for ifnet ioctl events */
3848 static void ifnet_ioctl_event_callback(struct nwk_wq_entry *);
3849 
3850 struct ifnet_ioctl_event {
3851 	ifnet_ref_t ifp;
3852 	u_long ioctl_code;
3853 };
3854 
3855 struct ifnet_ioctl_event_nwk_wq_entry {
3856 	struct nwk_wq_entry nwk_wqe;
3857 	struct ifnet_ioctl_event ifnet_ioctl_ev_arg;
3858 };
3859 
3860 void
ifnet_ioctl_async(struct ifnet * ifp,u_long ioctl_code)3861 ifnet_ioctl_async(struct ifnet *ifp, u_long ioctl_code)
3862 {
3863 	struct ifnet_ioctl_event_nwk_wq_entry *p_ifnet_ioctl_ev = NULL;
3864 	bool compare_expected;
3865 
3866 	/*
3867 	 * Get an io ref count if the interface is attached.
3868 	 * At this point it most likely is. We are taking a reference for
3869 	 * deferred processing.
3870 	 */
3871 	if (!ifnet_is_attached(ifp, 1)) {
3872 		os_log(OS_LOG_DEFAULT, "%s:%d %s Failed for ioctl %lu as interface "
3873 		    "is not attached",
3874 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3875 		return;
3876 	}
3877 	switch (ioctl_code) {
3878 	case SIOCADDMULTI:
3879 		compare_expected = false;
3880 		if (!atomic_compare_exchange_strong(&ifp->if_mcast_add_signaled, &compare_expected, true)) {
3881 			ifnet_decr_iorefcnt(ifp);
3882 			return;
3883 		}
3884 		break;
3885 	case SIOCDELMULTI:
3886 		compare_expected = false;
3887 		if (!atomic_compare_exchange_strong(&ifp->if_mcast_del_signaled, &compare_expected, true)) {
3888 			ifnet_decr_iorefcnt(ifp);
3889 			return;
3890 		}
3891 		break;
3892 	default:
3893 		os_log(OS_LOG_DEFAULT, "%s:%d %s unknown ioctl %lu",
3894 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3895 		return;
3896 	}
3897 
3898 	p_ifnet_ioctl_ev = kalloc_type(struct ifnet_ioctl_event_nwk_wq_entry,
3899 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
3900 
3901 	p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ifp = ifp;
3902 	p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ioctl_code = ioctl_code;
3903 	p_ifnet_ioctl_ev->nwk_wqe.func = ifnet_ioctl_event_callback;
3904 	nwk_wq_enqueue(&p_ifnet_ioctl_ev->nwk_wqe);
3905 }
3906 
3907 static void
ifnet_ioctl_event_callback(struct nwk_wq_entry * nwk_item)3908 ifnet_ioctl_event_callback(struct nwk_wq_entry *nwk_item)
3909 {
3910 	struct ifnet_ioctl_event_nwk_wq_entry *p_ev = __container_of(nwk_item,
3911 	    struct ifnet_ioctl_event_nwk_wq_entry, nwk_wqe);
3912 
3913 	ifnet_ref_t ifp = p_ev->ifnet_ioctl_ev_arg.ifp;
3914 	u_long ioctl_code = p_ev->ifnet_ioctl_ev_arg.ioctl_code;
3915 	int ret = 0;
3916 
3917 	switch (ioctl_code) {
3918 	case SIOCADDMULTI:
3919 		atomic_store(&ifp->if_mcast_add_signaled, false);
3920 		break;
3921 	case SIOCDELMULTI:
3922 		atomic_store(&ifp->if_mcast_del_signaled, false);
3923 		break;
3924 	}
3925 	if ((ret = ifnet_ioctl(ifp, 0, ioctl_code, NULL)) != 0) {
3926 		os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned %d for ioctl %lu",
3927 		    __func__, __LINE__, if_name(ifp), ret, ioctl_code);
3928 	} else if (dlil_verbose) {
3929 		os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned successfully "
3930 		    "for ioctl %lu",
3931 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3932 	}
3933 	ifnet_decr_iorefcnt(ifp);
3934 	kfree_type(struct ifnet_ioctl_event_nwk_wq_entry, p_ev);
3935 	return;
3936 }
3937 
3938 errno_t
ifnet_ioctl(ifnet_t ifp,protocol_family_t proto_fam,u_long ioctl_code,void * ioctl_arg)3939 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
3940     void *ioctl_arg)
3941 {
3942 	struct ifnet_filter *filter;
3943 	int retval = EOPNOTSUPP;
3944 	int result = 0;
3945 
3946 	if (ifp == NULL || ioctl_code == 0) {
3947 		return EINVAL;
3948 	}
3949 
3950 	/* Get an io ref count if the interface is attached */
3951 	if (!ifnet_is_attached(ifp, 1)) {
3952 		return EOPNOTSUPP;
3953 	}
3954 
3955 	/*
3956 	 * Run the interface filters first.
3957 	 * We want to run all filters before calling the protocol,
3958 	 * interface family, or interface.
3959 	 */
3960 	lck_mtx_lock_spin(&ifp->if_flt_lock);
3961 	/* prevent filter list from changing in case we drop the lock */
3962 	if_flt_monitor_busy(ifp);
3963 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3964 		if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
3965 		    filter->filt_protocol == proto_fam)) {
3966 			lck_mtx_unlock(&ifp->if_flt_lock);
3967 
3968 			result = filter->filt_ioctl(filter->filt_cookie, ifp,
3969 			    proto_fam, ioctl_code, ioctl_arg);
3970 
3971 			lck_mtx_lock_spin(&ifp->if_flt_lock);
3972 
3973 			/* Only update retval if no one has handled the ioctl */
3974 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3975 				if (result == ENOTSUP) {
3976 					result = EOPNOTSUPP;
3977 				}
3978 				retval = result;
3979 				if (retval != 0 && retval != EOPNOTSUPP) {
3980 					/* we're done with the filter list */
3981 					if_flt_monitor_unbusy(ifp);
3982 					lck_mtx_unlock(&ifp->if_flt_lock);
3983 					goto cleanup;
3984 				}
3985 			}
3986 		}
3987 	}
3988 	/* we're done with the filter list */
3989 	if_flt_monitor_unbusy(ifp);
3990 	lck_mtx_unlock(&ifp->if_flt_lock);
3991 
3992 	/* Allow the protocol to handle the ioctl */
3993 	if (proto_fam != 0) {
3994 		struct if_proto *proto;
3995 
3996 		/* callee holds a proto refcnt upon success */
3997 		ifnet_lock_shared(ifp);
3998 		proto = find_attached_proto(ifp, proto_fam);
3999 		ifnet_lock_done(ifp);
4000 		if (proto != NULL) {
4001 			proto_media_ioctl ioctlp =
4002 			    (proto->proto_kpi == kProtoKPI_v1 ?
4003 			    proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
4004 			result = EOPNOTSUPP;
4005 			if (ioctlp != NULL) {
4006 				result = ioctlp(ifp, proto_fam, ioctl_code,
4007 				    ioctl_arg);
4008 			}
4009 			if_proto_free(proto);
4010 
4011 			/* Only update retval if no one has handled the ioctl */
4012 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4013 				if (result == ENOTSUP) {
4014 					result = EOPNOTSUPP;
4015 				}
4016 				retval = result;
4017 				if (retval && retval != EOPNOTSUPP) {
4018 					goto cleanup;
4019 				}
4020 			}
4021 		}
4022 	}
4023 
4024 	/* retval is either 0 or EOPNOTSUPP */
4025 
4026 	/*
4027 	 * Let the interface handle this ioctl.
4028 	 * If it returns EOPNOTSUPP, ignore that, we may have
4029 	 * already handled this in the protocol or family.
4030 	 */
4031 	if (ifp->if_ioctl) {
4032 		result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4033 	}
4034 
4035 	/* Only update retval if no one has handled the ioctl */
4036 	if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4037 		if (result == ENOTSUP) {
4038 			result = EOPNOTSUPP;
4039 		}
4040 		retval = result;
4041 		if (retval && retval != EOPNOTSUPP) {
4042 			goto cleanup;
4043 		}
4044 	}
4045 
4046 cleanup:
4047 	if (retval == EJUSTRETURN) {
4048 		retval = 0;
4049 	}
4050 
4051 	ifnet_decr_iorefcnt(ifp);
4052 
4053 	return retval;
4054 }
4055 
4056 __private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp,bpf_tap_mode mode,bpf_packet_func callback)4057 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4058 {
4059 	errno_t error = 0;
4060 
4061 	if (ifp->if_set_bpf_tap) {
4062 		/* Get an io reference on the interface if it is attached */
4063 		if (!ifnet_is_attached(ifp, 1)) {
4064 			return ENXIO;
4065 		}
4066 		error = ifp->if_set_bpf_tap(ifp, mode, callback);
4067 		ifnet_decr_iorefcnt(ifp);
4068 	}
4069 	return error;
4070 }
4071 
4072 errno_t
dlil_resolve_multi(struct ifnet * ifp,const struct sockaddr * proto_addr,struct sockaddr * ll_addr,size_t ll_len)4073 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4074     struct sockaddr *ll_addr, size_t ll_len)
4075 {
4076 	errno_t result = EOPNOTSUPP;
4077 	struct if_proto *proto;
4078 	const struct sockaddr *verify;
4079 	proto_media_resolve_multi resolvep;
4080 
4081 	if (!ifnet_is_attached(ifp, 1)) {
4082 		return result;
4083 	}
4084 
4085 	SOCKADDR_ZERO(ll_addr, ll_len);
4086 
4087 	/* Call the protocol first; callee holds a proto refcnt upon success */
4088 	ifnet_lock_shared(ifp);
4089 	proto = find_attached_proto(ifp, proto_addr->sa_family);
4090 	ifnet_lock_done(ifp);
4091 	if (proto != NULL) {
4092 		resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4093 		    proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4094 		if (resolvep != NULL) {
4095 			result = resolvep(ifp, proto_addr, SDL(ll_addr), ll_len);
4096 		}
4097 		if_proto_free(proto);
4098 	}
4099 
4100 	/* Let the interface verify the multicast address */
4101 	if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4102 		if (result == 0) {
4103 			verify = ll_addr;
4104 		} else {
4105 			verify = proto_addr;
4106 		}
4107 		result = ifp->if_check_multi(ifp, verify);
4108 	}
4109 
4110 	ifnet_decr_iorefcnt(ifp);
4111 	return result;
4112 }
4113 
4114 __private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4115 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4116     const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4117     const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4118 {
4119 	struct if_proto *proto;
4120 	errno_t result = 0;
4121 
4122 	if ((ifp->if_flags & IFF_NOARP) != 0) {
4123 		result = ENOTSUP;
4124 		goto done;
4125 	}
4126 
4127 	/* callee holds a proto refcnt upon success */
4128 	ifnet_lock_shared(ifp);
4129 	proto = find_attached_proto(ifp, target_proto->sa_family);
4130 	ifnet_lock_done(ifp);
4131 	if (proto == NULL) {
4132 		result = ENOTSUP;
4133 	} else {
4134 		proto_media_send_arp    arpp;
4135 		arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4136 		    proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4137 		if (arpp == NULL) {
4138 			result = ENOTSUP;
4139 		} else {
4140 			switch (arpop) {
4141 			case ARPOP_REQUEST:
4142 				arpstat.txrequests++;
4143 				if (target_hw != NULL) {
4144 					arpstat.txurequests++;
4145 				}
4146 				break;
4147 			case ARPOP_REPLY:
4148 				arpstat.txreplies++;
4149 				break;
4150 			}
4151 			result = arpp(ifp, arpop, sender_hw, sender_proto,
4152 			    target_hw, target_proto);
4153 		}
4154 		if_proto_free(proto);
4155 	}
4156 done:
4157 	return result;
4158 }
4159 
4160 static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,const struct sockaddr_in * target_sin)4161 _is_announcement(const struct sockaddr_in * sender_sin,
4162     const struct sockaddr_in * target_sin)
4163 {
4164 	if (target_sin == NULL || sender_sin == NULL) {
4165 		return FALSE;
4166 	}
4167 
4168 	return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
4169 }
4170 
4171 __private_extern__ errno_t
dlil_send_arp(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto0,u_int32_t rtflags)4172 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4173     const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4174     const struct sockaddr *target_proto0, u_int32_t rtflags)
4175 {
4176 	errno_t result = 0;
4177 	const struct sockaddr_in * sender_sin;
4178 	const struct sockaddr_in * target_sin;
4179 	struct sockaddr_inarp target_proto_sinarp;
4180 	struct sockaddr *target_proto = __DECONST_SA(target_proto0);
4181 
4182 	if (target_proto == NULL || sender_proto == NULL) {
4183 		return EINVAL;
4184 	}
4185 
4186 	if (sender_proto->sa_family != target_proto->sa_family) {
4187 		return EINVAL;
4188 	}
4189 
4190 	/*
4191 	 * If the target is a (default) router, provide that
4192 	 * information to the send_arp callback routine.
4193 	 */
4194 	if (rtflags & RTF_ROUTER) {
4195 		SOCKADDR_COPY(target_proto, &target_proto_sinarp, sizeof(struct sockaddr_in));
4196 		target_proto_sinarp.sin_other |= SIN_ROUTER;
4197 		target_proto = SA(&target_proto_sinarp);
4198 	}
4199 
4200 	/*
4201 	 * If this is an ARP request and the target IP is IPv4LL,
4202 	 * send the request on all interfaces.  The exception is
4203 	 * an announcement, which must only appear on the specific
4204 	 * interface.
4205 	 */
4206 	sender_sin = SIN(sender_proto);
4207 	target_sin = SIN(target_proto);
4208 	if (target_proto->sa_family == AF_INET &&
4209 	    IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4210 	    ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4211 	    !_is_announcement(sender_sin, target_sin)) {
4212 		u_int32_t       count;
4213 		ifnet_ref_t     *__counted_by(count) ifp_list;
4214 		u_int32_t       ifp_on;
4215 
4216 		result = ENOTSUP;
4217 
4218 		if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4219 			for (ifp_on = 0; ifp_on < count; ifp_on++) {
4220 				errno_t new_result;
4221 				ifaddr_t source_hw = NULL;
4222 				ifaddr_t source_ip = NULL;
4223 				struct sockaddr_in source_ip_copy;
4224 				ifnet_ref_t cur_ifp = ifp_list[ifp_on];
4225 
4226 				/*
4227 				 * Only arp on interfaces marked for IPv4LL
4228 				 * ARPing.  This may mean that we don't ARP on
4229 				 * the interface the subnet route points to.
4230 				 */
4231 				if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
4232 					continue;
4233 				}
4234 
4235 				/* Find the source IP address */
4236 				ifnet_lock_shared(cur_ifp);
4237 				source_hw = cur_ifp->if_lladdr;
4238 				TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4239 				    ifa_link) {
4240 					IFA_LOCK(source_ip);
4241 					if (source_ip->ifa_addr != NULL &&
4242 					    source_ip->ifa_addr->sa_family ==
4243 					    AF_INET) {
4244 						/* Copy the source IP address */
4245 						SOCKADDR_COPY(SIN(source_ip->ifa_addr), &source_ip_copy, sizeof(source_ip_copy));
4246 						IFA_UNLOCK(source_ip);
4247 						break;
4248 					}
4249 					IFA_UNLOCK(source_ip);
4250 				}
4251 
4252 				/* No IP Source, don't arp */
4253 				if (source_ip == NULL) {
4254 					ifnet_lock_done(cur_ifp);
4255 					continue;
4256 				}
4257 
4258 				ifa_addref(source_hw);
4259 				ifnet_lock_done(cur_ifp);
4260 
4261 				/* Send the ARP */
4262 				new_result = dlil_send_arp_internal(cur_ifp,
4263 				    arpop, SDL(source_hw->ifa_addr),
4264 				    SA(&source_ip_copy), NULL,
4265 				    target_proto);
4266 
4267 				ifa_remref(source_hw);
4268 				if (result == ENOTSUP) {
4269 					result = new_result;
4270 				}
4271 			}
4272 			ifnet_list_free_counted_by(ifp_list, count);
4273 		}
4274 	} else {
4275 		result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4276 		    sender_proto, target_hw, target_proto);
4277 	}
4278 
4279 	return result;
4280 }
4281 
4282 /*
4283  * Caller must hold ifnet head lock.
4284  */
4285 static int
ifnet_lookup(struct ifnet * ifp)4286 ifnet_lookup(struct ifnet *ifp)
4287 {
4288 	ifnet_ref_t _ifp;
4289 
4290 	ifnet_head_lock_assert(LCK_RW_ASSERT_HELD);
4291 	TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4292 		if (_ifp == ifp) {
4293 			break;
4294 		}
4295 	}
4296 	return _ifp != NULL;
4297 }
4298 
4299 /*
4300  * Caller has to pass a non-zero refio argument to get a
4301  * IO reference count. This will prevent ifnet_detach from
4302  * being called when there are outstanding io reference counts.
4303  */
4304 int
ifnet_is_attached(struct ifnet * ifp,int refio)4305 ifnet_is_attached(struct ifnet *ifp, int refio)
4306 {
4307 	int ret;
4308 
4309 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4310 	if ((ret = IF_FULLY_ATTACHED(ifp))) {
4311 		if (refio > 0) {
4312 			ifp->if_refio++;
4313 		}
4314 	}
4315 	lck_mtx_unlock(&ifp->if_ref_lock);
4316 
4317 	return ret;
4318 }
4319 
4320 void
ifnet_incr_pending_thread_count(struct ifnet * ifp)4321 ifnet_incr_pending_thread_count(struct ifnet *ifp)
4322 {
4323 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4324 	ifp->if_threads_pending++;
4325 	lck_mtx_unlock(&ifp->if_ref_lock);
4326 }
4327 
4328 void
ifnet_decr_pending_thread_count(struct ifnet * ifp)4329 ifnet_decr_pending_thread_count(struct ifnet *ifp)
4330 {
4331 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4332 	VERIFY(ifp->if_threads_pending > 0);
4333 	ifp->if_threads_pending--;
4334 	if (ifp->if_threads_pending == 0) {
4335 		wakeup(&ifp->if_threads_pending);
4336 	}
4337 	lck_mtx_unlock(&ifp->if_ref_lock);
4338 }
4339 
4340 /*
4341  * Caller must ensure the interface is attached; the assumption is that
4342  * there is at least an outstanding IO reference count held already.
4343  * Most callers would call ifnet_is_{attached,data_ready}() instead.
4344  */
4345 void
ifnet_incr_iorefcnt(struct ifnet * ifp)4346 ifnet_incr_iorefcnt(struct ifnet *ifp)
4347 {
4348 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4349 	VERIFY(IF_FULLY_ATTACHED(ifp));
4350 	VERIFY(ifp->if_refio > 0);
4351 	ifp->if_refio++;
4352 	lck_mtx_unlock(&ifp->if_ref_lock);
4353 }
4354 
4355 __attribute__((always_inline))
4356 static void
ifnet_decr_iorefcnt_locked(struct ifnet * ifp)4357 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
4358 {
4359 	LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4360 
4361 	VERIFY(ifp->if_refio > 0);
4362 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4363 
4364 	ifp->if_refio--;
4365 	VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
4366 
4367 	/*
4368 	 * if there are no more outstanding io references, wakeup the
4369 	 * ifnet_detach thread if detaching flag is set.
4370 	 */
4371 	if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
4372 		wakeup(&(ifp->if_refio));
4373 	}
4374 }
4375 
4376 void
ifnet_decr_iorefcnt(struct ifnet * ifp)4377 ifnet_decr_iorefcnt(struct ifnet *ifp)
4378 {
4379 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4380 	ifnet_decr_iorefcnt_locked(ifp);
4381 	lck_mtx_unlock(&ifp->if_ref_lock);
4382 }
4383 
4384 boolean_t
ifnet_datamov_begin(struct ifnet * ifp)4385 ifnet_datamov_begin(struct ifnet *ifp)
4386 {
4387 	boolean_t ret;
4388 
4389 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4390 	if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
4391 		ifp->if_refio++;
4392 		ifp->if_datamov++;
4393 	}
4394 	lck_mtx_unlock(&ifp->if_ref_lock);
4395 
4396 	DTRACE_IP2(datamov__begin, struct ifnet *, ifp, boolean_t, ret);
4397 	return ret;
4398 }
4399 
4400 void
ifnet_datamov_end(struct ifnet * ifp)4401 ifnet_datamov_end(struct ifnet *ifp)
4402 {
4403 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4404 	VERIFY(ifp->if_datamov > 0);
4405 	/*
4406 	 * if there's no more thread moving data, wakeup any
4407 	 * drainers that's blocked waiting for this.
4408 	 */
4409 	if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
4410 		DLIL_PRINTF("Waking up drainers on %s\n", if_name(ifp));
4411 		DTRACE_IP1(datamov__drain__wake, struct ifnet *, ifp);
4412 		wakeup(&(ifp->if_datamov));
4413 	}
4414 	ifnet_decr_iorefcnt_locked(ifp);
4415 	lck_mtx_unlock(&ifp->if_ref_lock);
4416 
4417 	DTRACE_IP1(datamov__end, struct ifnet *, ifp);
4418 }
4419 
4420 static void
ifnet_datamov_suspend_locked(struct ifnet * ifp)4421 ifnet_datamov_suspend_locked(struct ifnet *ifp)
4422 {
4423 	LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4424 	ifp->if_refio++;
4425 	if (ifp->if_suspend++ == 0) {
4426 		VERIFY(ifp->if_refflags & IFRF_READY);
4427 		ifp->if_refflags &= ~IFRF_READY;
4428 	}
4429 }
4430 
4431 void
ifnet_datamov_suspend(struct ifnet * ifp)4432 ifnet_datamov_suspend(struct ifnet *ifp)
4433 {
4434 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4435 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4436 	ifnet_datamov_suspend_locked(ifp);
4437 	lck_mtx_unlock(&ifp->if_ref_lock);
4438 }
4439 
4440 boolean_t
ifnet_datamov_suspend_if_needed(struct ifnet * ifp)4441 ifnet_datamov_suspend_if_needed(struct ifnet *ifp)
4442 {
4443 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4444 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4445 	if (ifp->if_suspend > 0) {
4446 		lck_mtx_unlock(&ifp->if_ref_lock);
4447 		return FALSE;
4448 	}
4449 	ifnet_datamov_suspend_locked(ifp);
4450 	lck_mtx_unlock(&ifp->if_ref_lock);
4451 	return TRUE;
4452 }
4453 
4454 void
ifnet_datamov_drain(struct ifnet * ifp)4455 ifnet_datamov_drain(struct ifnet *ifp)
4456 {
4457 	lck_mtx_lock(&ifp->if_ref_lock);
4458 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4459 	/* data movement must already be suspended */
4460 	VERIFY(ifp->if_suspend > 0);
4461 	VERIFY(!(ifp->if_refflags & IFRF_READY));
4462 	ifp->if_drainers++;
4463 	while (ifp->if_datamov != 0) {
4464 		DLIL_PRINTF("Waiting for data path(s) to quiesce on %s\n",
4465 		    if_name(ifp));
4466 		DTRACE_IP1(datamov__wait, struct ifnet *, ifp);
4467 		(void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
4468 		    (PZERO - 1), __func__, NULL);
4469 		DTRACE_IP1(datamov__wake, struct ifnet *, ifp);
4470 	}
4471 	VERIFY(!(ifp->if_refflags & IFRF_READY));
4472 	VERIFY(ifp->if_drainers > 0);
4473 	ifp->if_drainers--;
4474 	lck_mtx_unlock(&ifp->if_ref_lock);
4475 
4476 	/* purge the interface queues */
4477 	if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
4478 		if_qflush_snd(ifp, false);
4479 	}
4480 }
4481 
4482 void
ifnet_datamov_suspend_and_drain(struct ifnet * ifp)4483 ifnet_datamov_suspend_and_drain(struct ifnet *ifp)
4484 {
4485 	ifnet_datamov_suspend(ifp);
4486 	ifnet_datamov_drain(ifp);
4487 }
4488 
4489 void
ifnet_datamov_resume(struct ifnet * ifp)4490 ifnet_datamov_resume(struct ifnet *ifp)
4491 {
4492 	lck_mtx_lock(&ifp->if_ref_lock);
4493 	/* data movement must already be suspended */
4494 	VERIFY(ifp->if_suspend > 0);
4495 	if (--ifp->if_suspend == 0) {
4496 		VERIFY(!(ifp->if_refflags & IFRF_READY));
4497 		ifp->if_refflags |= IFRF_READY;
4498 	}
4499 	ifnet_decr_iorefcnt_locked(ifp);
4500 	lck_mtx_unlock(&ifp->if_ref_lock);
4501 }
4502 
4503 static errno_t
dlil_attach_protocol(struct if_proto * proto,const struct ifnet_demux_desc * __counted_by (demux_count)demux_list,u_int32_t demux_count,uint32_t * proto_count)4504 dlil_attach_protocol(struct if_proto *proto,
4505     const struct ifnet_demux_desc *__counted_by(demux_count) demux_list, u_int32_t demux_count,
4506     uint32_t *proto_count)
4507 {
4508 	struct kev_dl_proto_data ev_pr_data;
4509 	ifnet_ref_t ifp = proto->ifp;
4510 	errno_t retval = 0;
4511 	u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4512 	struct if_proto *prev_proto;
4513 	struct if_proto *_proto;
4514 
4515 	/* don't allow attaching anything but PF_BRIDGE to vmnet interfaces */
4516 	if (IFNET_IS_VMNET(ifp) && proto->protocol_family != PF_BRIDGE) {
4517 		return EINVAL;
4518 	}
4519 
4520 	if (!ifnet_is_attached(ifp, 1)) {
4521 		os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
4522 		    __func__, if_name(ifp));
4523 		return ENXIO;
4524 	}
4525 	/* callee holds a proto refcnt upon success */
4526 	ifnet_lock_exclusive(ifp);
4527 	_proto = find_attached_proto(ifp, proto->protocol_family);
4528 	if (_proto != NULL) {
4529 		ifnet_lock_done(ifp);
4530 		if_proto_free(_proto);
4531 		retval = EEXIST;
4532 		goto ioref_done;
4533 	}
4534 
4535 	/*
4536 	 * Call family module add_proto routine so it can refine the
4537 	 * demux descriptors as it wishes.
4538 	 */
4539 	retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4540 	    demux_count);
4541 	if (retval) {
4542 		ifnet_lock_done(ifp);
4543 		goto ioref_done;
4544 	}
4545 
4546 	/*
4547 	 * Insert the protocol in the hash
4548 	 */
4549 	prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4550 	while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
4551 		prev_proto = SLIST_NEXT(prev_proto, next_hash);
4552 	}
4553 	if (prev_proto) {
4554 		SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4555 	} else {
4556 		SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4557 		    proto, next_hash);
4558 	}
4559 
4560 	/* hold a proto refcnt for attach */
4561 	if_proto_ref(proto);
4562 
4563 	/*
4564 	 * The reserved field carries the number of protocol still attached
4565 	 * (subject to change)
4566 	 */
4567 	ev_pr_data.proto_family = proto->protocol_family;
4568 	ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
4569 
4570 	ifnet_lock_done(ifp);
4571 
4572 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4573 	    (struct net_event_data *)&ev_pr_data,
4574 	    sizeof(struct kev_dl_proto_data), FALSE);
4575 	if (proto_count != NULL) {
4576 		*proto_count = ev_pr_data.proto_remaining_count;
4577 	}
4578 ioref_done:
4579 	ifnet_decr_iorefcnt(ifp);
4580 	return retval;
4581 }
4582 
4583 static void
dlil_handle_proto_attach(ifnet_t ifp,protocol_family_t protocol)4584 dlil_handle_proto_attach(ifnet_t ifp, protocol_family_t protocol)
4585 {
4586 	/*
4587 	 * A protocol has been attached, mark the interface up.
4588 	 * This used to be done by configd.KernelEventMonitor, but that
4589 	 * is inherently prone to races (rdar://problem/30810208).
4590 	 */
4591 	(void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4592 	(void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
4593 	dlil_post_sifflags_msg(ifp);
4594 #if SKYWALK
4595 	switch (protocol) {
4596 	case AF_INET:
4597 	case AF_INET6:
4598 		/* don't attach the flowswitch unless attaching IP */
4599 		dlil_attach_flowswitch_nexus(ifp);
4600 		break;
4601 	default:
4602 		break;
4603 	}
4604 #endif /* SKYWALK */
4605 }
4606 
4607 errno_t
ifnet_attach_protocol(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param * proto_details)4608 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
4609     const struct ifnet_attach_proto_param *proto_details)
4610 {
4611 	int retval = 0;
4612 	struct if_proto  *ifproto = NULL;
4613 	uint32_t proto_count = 0;
4614 
4615 	ifnet_head_lock_shared();
4616 	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4617 		retval = EINVAL;
4618 		goto end;
4619 	}
4620 	/* Check that the interface is in the global list */
4621 	if (!ifnet_lookup(ifp)) {
4622 		retval = ENXIO;
4623 		goto end;
4624 	}
4625 
4626 	ifproto = dlif_proto_alloc();
4627 
4628 	/* refcnt held above during lookup */
4629 	ifproto->ifp = ifp;
4630 	ifproto->protocol_family = protocol;
4631 	ifproto->proto_kpi = kProtoKPI_v1;
4632 	ifproto->kpi.v1.input = proto_details->input;
4633 	ifproto->kpi.v1.pre_output = proto_details->pre_output;
4634 	ifproto->kpi.v1.event = proto_details->event;
4635 	ifproto->kpi.v1.ioctl = proto_details->ioctl;
4636 	ifproto->kpi.v1.detached = proto_details->detached;
4637 	ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4638 	ifproto->kpi.v1.send_arp = proto_details->send_arp;
4639 
4640 	retval = dlil_attach_protocol(ifproto,
4641 	    proto_details->demux_list, proto_details->demux_count,
4642 	    &proto_count);
4643 
4644 end:
4645 	if (retval == EEXIST) {
4646 		/* already attached */
4647 		if (dlil_verbose) {
4648 			DLIL_PRINTF("%s: protocol %d already attached\n",
4649 			    ifp != NULL ? if_name(ifp) : "N/A",
4650 			    protocol);
4651 		}
4652 	} else if (retval != 0) {
4653 		DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4654 		    ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4655 	} else if (dlil_verbose) {
4656 		DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
4657 		    ifp != NULL ? if_name(ifp) : "N/A",
4658 		    protocol, proto_count);
4659 	}
4660 	ifnet_head_done();
4661 	if (retval == 0) {
4662 		dlil_handle_proto_attach(ifp, protocol);
4663 	} else if (ifproto != NULL) {
4664 		dlif_proto_free(ifproto);
4665 	}
4666 	return retval;
4667 }
4668 
4669 errno_t
ifnet_attach_protocol_v2(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param_v2 * proto_details)4670 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
4671     const struct ifnet_attach_proto_param_v2 *proto_details)
4672 {
4673 	int retval = 0;
4674 	struct if_proto  *ifproto = NULL;
4675 	uint32_t proto_count = 0;
4676 
4677 	ifnet_head_lock_shared();
4678 	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4679 		retval = EINVAL;
4680 		goto end;
4681 	}
4682 	/* Check that the interface is in the global list */
4683 	if (!ifnet_lookup(ifp)) {
4684 		retval = ENXIO;
4685 		goto end;
4686 	}
4687 
4688 	ifproto = dlif_proto_alloc();
4689 
4690 	/* refcnt held above during lookup */
4691 	ifproto->ifp = ifp;
4692 	ifproto->protocol_family = protocol;
4693 	ifproto->proto_kpi = kProtoKPI_v2;
4694 	ifproto->kpi.v2.input = proto_details->input;
4695 	ifproto->kpi.v2.pre_output = proto_details->pre_output;
4696 	ifproto->kpi.v2.event = proto_details->event;
4697 	ifproto->kpi.v2.ioctl = proto_details->ioctl;
4698 	ifproto->kpi.v2.detached = proto_details->detached;
4699 	ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4700 	ifproto->kpi.v2.send_arp = proto_details->send_arp;
4701 
4702 	retval = dlil_attach_protocol(ifproto,
4703 	    proto_details->demux_list, proto_details->demux_count,
4704 	    &proto_count);
4705 
4706 end:
4707 	if (retval == EEXIST) {
4708 		/* already attached */
4709 		if (dlil_verbose) {
4710 			DLIL_PRINTF("%s: protocol %d already attached\n",
4711 			    ifp != NULL ? if_name(ifp) : "N/A",
4712 			    protocol);
4713 		}
4714 	} else if (retval != 0) {
4715 		DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4716 		    ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4717 	} else if (dlil_verbose) {
4718 		DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
4719 		    ifp != NULL ? if_name(ifp) : "N/A",
4720 		    protocol, proto_count);
4721 	}
4722 	ifnet_head_done();
4723 	if (retval == 0) {
4724 		dlil_handle_proto_attach(ifp, protocol);
4725 	} else if (ifproto != NULL) {
4726 		dlif_proto_free(ifproto);
4727 	}
4728 	return retval;
4729 }
4730 
4731 errno_t
ifnet_detach_protocol(ifnet_t ifp,protocol_family_t proto_family)4732 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
4733 {
4734 	struct if_proto *proto = NULL;
4735 	int     retval = 0;
4736 
4737 	if (ifp == NULL || proto_family == 0) {
4738 		retval = EINVAL;
4739 		goto end;
4740 	}
4741 
4742 	ifnet_lock_exclusive(ifp);
4743 	/* callee holds a proto refcnt upon success */
4744 	proto = find_attached_proto(ifp, proto_family);
4745 	if (proto == NULL) {
4746 		retval = ENXIO;
4747 		ifnet_lock_done(ifp);
4748 		goto end;
4749 	}
4750 
4751 	/* call family module del_proto */
4752 	if (ifp->if_del_proto) {
4753 		ifp->if_del_proto(ifp, proto->protocol_family);
4754 	}
4755 
4756 	SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4757 	    proto, if_proto, next_hash);
4758 
4759 	if (proto->proto_kpi == kProtoKPI_v1) {
4760 		proto->kpi.v1.input = ifproto_media_input_v1;
4761 		proto->kpi.v1.pre_output = ifproto_media_preout;
4762 		proto->kpi.v1.event = ifproto_media_event;
4763 		proto->kpi.v1.ioctl = ifproto_media_ioctl;
4764 		proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
4765 		proto->kpi.v1.send_arp = ifproto_media_send_arp;
4766 	} else {
4767 		proto->kpi.v2.input = ifproto_media_input_v2;
4768 		proto->kpi.v2.pre_output = ifproto_media_preout;
4769 		proto->kpi.v2.event = ifproto_media_event;
4770 		proto->kpi.v2.ioctl = ifproto_media_ioctl;
4771 		proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
4772 		proto->kpi.v2.send_arp = ifproto_media_send_arp;
4773 	}
4774 	proto->detached = 1;
4775 	ifnet_lock_done(ifp);
4776 
4777 	if (dlil_verbose) {
4778 		DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
4779 		    (proto->proto_kpi == kProtoKPI_v1) ?
4780 		    "v1" : "v2", proto_family);
4781 	}
4782 
4783 	/* release proto refcnt held during protocol attach */
4784 	if_proto_free(proto);
4785 
4786 	/*
4787 	 * Release proto refcnt held during lookup; the rest of
4788 	 * protocol detach steps will happen when the last proto
4789 	 * reference is released.
4790 	 */
4791 	if_proto_free(proto);
4792 
4793 end:
4794 	return retval;
4795 }
4796 
4797 static errno_t
ifproto_media_input_v1(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet,char * header)4798 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
4799     struct mbuf *packet, char *header)
4800 {
4801 #pragma unused(ifp, protocol, packet, header)
4802 	return ENXIO;
4803 }
4804 
4805 static errno_t
ifproto_media_input_v2(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet)4806 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
4807     struct mbuf *packet)
4808 {
4809 #pragma unused(ifp, protocol, packet)
4810 	return ENXIO;
4811 }
4812 
4813 static errno_t
ifproto_media_preout(struct ifnet * ifp,protocol_family_t protocol,mbuf_t * packet,const struct sockaddr * dest,void * route,IFNET_FRAME_TYPE_RW_T frame_type,IFNET_LLADDR_RW_T link_layer_dest)4814 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
4815     mbuf_t *packet, const struct sockaddr *dest, void *route,
4816     IFNET_FRAME_TYPE_RW_T frame_type, IFNET_LLADDR_RW_T link_layer_dest)
4817 {
4818 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4819 	return ENXIO;
4820 }
4821 
4822 static void
ifproto_media_event(struct ifnet * ifp,protocol_family_t protocol,const struct kev_msg * event)4823 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
4824     const struct kev_msg *event)
4825 {
4826 #pragma unused(ifp, protocol, event)
4827 }
4828 
4829 static errno_t
ifproto_media_ioctl(struct ifnet * ifp,protocol_family_t protocol,unsigned long command,void * argument)4830 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
4831     unsigned long command, void *argument)
4832 {
4833 #pragma unused(ifp, protocol, command, argument)
4834 	return ENXIO;
4835 }
4836 
4837 static errno_t
ifproto_media_resolve_multi(ifnet_t ifp,const struct sockaddr * proto_addr,struct sockaddr_dl * out_ll,size_t ll_len)4838 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
4839     struct sockaddr_dl *out_ll, size_t ll_len)
4840 {
4841 #pragma unused(ifp, proto_addr, out_ll, ll_len)
4842 	return ENXIO;
4843 }
4844 
4845 static errno_t
ifproto_media_send_arp(struct ifnet * ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4846 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
4847     const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4848     const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4849 {
4850 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4851 	return ENXIO;
4852 }
4853 
4854 extern int if_next_index(void);
4855 extern int tcp_ecn_outbound;
4856 
4857 void
dlil_ifclassq_setup(struct ifnet * ifp,struct ifclassq * ifcq)4858 dlil_ifclassq_setup(struct ifnet *ifp, struct ifclassq *ifcq)
4859 {
4860 	uint32_t sflags = 0;
4861 	int err;
4862 
4863 	if (if_flowadv) {
4864 		sflags |= PKTSCHEDF_QALG_FLOWCTL;
4865 	}
4866 
4867 	if (if_delaybased_queue) {
4868 		sflags |= PKTSCHEDF_QALG_DELAYBASED;
4869 	}
4870 
4871 	if (ifp->if_output_sched_model ==
4872 	    IFNET_SCHED_MODEL_DRIVER_MANAGED) {
4873 		sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
4874 	}
4875 	/* Inherit drop limit from the default queue */
4876 	if (ifp->if_snd != ifcq) {
4877 		IFCQ_PKT_DROP_LIMIT(ifcq) = IFCQ_PKT_DROP_LIMIT(ifp->if_snd);
4878 	}
4879 	/* Initialize transmit queue(s) */
4880 	err = ifclassq_setup(ifcq, ifp, sflags);
4881 	if (err != 0) {
4882 		panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
4883 		    "err=%d", __func__, ifp, err);
4884 		/* NOTREACHED */
4885 	}
4886 }
4887 
4888 errno_t
ifnet_attach(ifnet_t ifp,const struct sockaddr_dl * ll_addr)4889 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
4890 {
4891 #if SKYWALK
4892 	boolean_t netif_compat;
4893 	if_nexus_netif  nexus_netif;
4894 #endif /* SKYWALK */
4895 	ifnet_ref_t tmp_if;
4896 	struct ifaddr *ifa;
4897 	struct if_data_internal if_data_saved;
4898 	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4899 	struct dlil_threading_info *dl_inp;
4900 	thread_continue_t thfunc = NULL;
4901 	int err;
4902 
4903 	if (ifp == NULL) {
4904 		return EINVAL;
4905 	}
4906 
4907 	/*
4908 	 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4909 	 * prevent the interface from being configured while it is
4910 	 * embryonic, as ifnet_head_lock is dropped and reacquired
4911 	 * below prior to marking the ifnet with IFRF_ATTACHED.
4912 	 */
4913 	dlil_if_lock();
4914 	ifnet_head_lock_exclusive();
4915 	/* Verify we aren't already on the list */
4916 	TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
4917 		if (tmp_if == ifp) {
4918 			ifnet_head_done();
4919 			dlil_if_unlock();
4920 			return EEXIST;
4921 		}
4922 	}
4923 
4924 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4925 	if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
4926 		panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
4927 		    __func__, ifp);
4928 		/* NOTREACHED */
4929 	}
4930 	lck_mtx_unlock(&ifp->if_ref_lock);
4931 
4932 	ifnet_lock_exclusive(ifp);
4933 
4934 	/* Sanity check */
4935 	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
4936 	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
4937 	VERIFY(ifp->if_threads_pending == 0);
4938 
4939 	if (ll_addr != NULL) {
4940 		if (ifp->if_addrlen == 0) {
4941 			ifp->if_addrlen = ll_addr->sdl_alen;
4942 		} else if (ll_addr->sdl_alen != ifp->if_addrlen) {
4943 			ifnet_lock_done(ifp);
4944 			ifnet_head_done();
4945 			dlil_if_unlock();
4946 			return EINVAL;
4947 		}
4948 	}
4949 
4950 	/*
4951 	 * Allow interfaces without protocol families to attach
4952 	 * only if they have the necessary fields filled out.
4953 	 */
4954 	if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
4955 		DLIL_PRINTF("%s: Attempt to attach interface without "
4956 		    "family module - %d\n", __func__, ifp->if_family);
4957 		ifnet_lock_done(ifp);
4958 		ifnet_head_done();
4959 		dlil_if_unlock();
4960 		return ENODEV;
4961 	}
4962 
4963 	/* Allocate protocol hash table */
4964 	VERIFY(ifp->if_proto_hash == NULL);
4965 	ifp->if_proto_hash = kalloc_type(struct proto_hash_entry,
4966 	    PROTO_HASH_SLOTS, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4967 	ifp->if_proto_hash_count = PROTO_HASH_SLOTS;
4968 
4969 	lck_mtx_lock_spin(&ifp->if_flt_lock);
4970 	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
4971 	TAILQ_INIT(&ifp->if_flt_head);
4972 	VERIFY(ifp->if_flt_busy == 0);
4973 	VERIFY(ifp->if_flt_waiters == 0);
4974 	VERIFY(ifp->if_flt_non_os_count == 0);
4975 	VERIFY(ifp->if_flt_no_tso_count == 0);
4976 	lck_mtx_unlock(&ifp->if_flt_lock);
4977 
4978 	if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
4979 		VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
4980 		LIST_INIT(&ifp->if_multiaddrs);
4981 	}
4982 
4983 	VERIFY(ifp->if_allhostsinm == NULL);
4984 	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
4985 	TAILQ_INIT(&ifp->if_addrhead);
4986 
4987 	if (ifp->if_index == 0) {
4988 		int idx = if_next_index();
4989 
4990 		/*
4991 		 * Since we exhausted the list of
4992 		 * if_index's, try to find an empty slot
4993 		 * in ifindex2ifnet.
4994 		 */
4995 		if (idx == -1 && if_index >= UINT16_MAX) {
4996 			for (int i = 1; i < if_index; i++) {
4997 				if (ifindex2ifnet[i] == NULL &&
4998 				    ifnet_addrs[i - 1] == NULL) {
4999 					idx = i;
5000 					break;
5001 				}
5002 			}
5003 		}
5004 		if (idx == -1) {
5005 			ifp->if_index = 0;
5006 			ifnet_lock_done(ifp);
5007 			ifnet_head_done();
5008 			dlil_if_unlock();
5009 			return ENOBUFS;
5010 		}
5011 		ifp->if_index = (uint16_t)idx;
5012 
5013 		/* the lladdr passed at attach time is the permanent address */
5014 		if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
5015 		    ll_addr->sdl_alen == ETHER_ADDR_LEN) {
5016 			bcopy(CONST_LLADDR(ll_addr),
5017 			    dl_if->dl_if_permanent_ether,
5018 			    ETHER_ADDR_LEN);
5019 			dl_if->dl_if_permanent_ether_is_set = 1;
5020 		}
5021 	}
5022 	/* There should not be anything occupying this slot */
5023 	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5024 
5025 	/* allocate (if needed) and initialize a link address */
5026 	ifa = dlil_alloc_lladdr(ifp, ll_addr);
5027 	if (ifa == NULL) {
5028 		ifnet_lock_done(ifp);
5029 		ifnet_head_done();
5030 		dlil_if_unlock();
5031 		return ENOBUFS;
5032 	}
5033 
5034 	VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5035 	ifnet_addrs[ifp->if_index - 1] = ifa;
5036 
5037 	/* make this address the first on the list */
5038 	IFA_LOCK(ifa);
5039 	/* hold a reference for ifnet_addrs[] */
5040 	ifa_addref(ifa);
5041 	/* if_attach_link_ifa() holds a reference for ifa_link */
5042 	if_attach_link_ifa(ifp, ifa);
5043 	IFA_UNLOCK(ifa);
5044 
5045 	TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5046 	ifindex2ifnet[ifp->if_index] = ifp;
5047 
5048 	/* Hold a reference to the underlying dlil_ifnet */
5049 	ifnet_reference(ifp);
5050 
5051 	/* Clear stats (save and restore other fields that we care) */
5052 	if_data_saved = ifp->if_data;
5053 	bzero(&ifp->if_data, sizeof(ifp->if_data));
5054 	ifp->if_data.ifi_type = if_data_saved.ifi_type;
5055 	ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5056 	ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5057 	ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5058 	ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5059 	ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5060 	ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5061 	ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5062 	ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5063 	ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5064 	ifnet_touch_lastchange(ifp);
5065 
5066 	VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5067 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5068 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5069 
5070 	dlil_ifclassq_setup(ifp, ifp->if_snd);
5071 
5072 	/* Sanity checks on the input thread storage */
5073 	dl_inp = &dl_if->dl_if_inpstorage;
5074 	bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
5075 	VERIFY(dl_inp->dlth_flags == 0);
5076 	VERIFY(dl_inp->dlth_wtot == 0);
5077 	VERIFY(dl_inp->dlth_ifp == NULL);
5078 	VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
5079 	VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
5080 	VERIFY(!dl_inp->dlth_affinity);
5081 	VERIFY(ifp->if_inp == NULL);
5082 	VERIFY(dl_inp->dlth_thread == THREAD_NULL);
5083 	VERIFY(dl_inp->dlth_strategy == NULL);
5084 	VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
5085 	VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
5086 	VERIFY(dl_inp->dlth_affinity_tag == 0);
5087 
5088 #if IFNET_INPUT_SANITY_CHK
5089 	VERIFY(dl_inp->dlth_pkts_cnt == 0);
5090 #endif /* IFNET_INPUT_SANITY_CHK */
5091 
5092 	VERIFY(ifp->if_poll_thread == THREAD_NULL);
5093 	dlil_reset_rxpoll_params(ifp);
5094 	/*
5095 	 * A specific DLIL input thread is created per non-loopback interface.
5096 	 */
5097 	if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
5098 		ifp->if_inp = dl_inp;
5099 		ifnet_incr_pending_thread_count(ifp);
5100 		err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
5101 		if (err == ENODEV) {
5102 			VERIFY(thfunc == NULL);
5103 			ifnet_decr_pending_thread_count(ifp);
5104 		} else if (err != 0) {
5105 			panic_plain("%s: ifp=%p couldn't get an input thread; "
5106 			    "err=%d", __func__, ifp, err);
5107 			/* NOTREACHED */
5108 		}
5109 	}
5110 	/*
5111 	 * If the driver supports the new transmit model, calculate flow hash
5112 	 * and create a workloop starter thread to invoke the if_start callback
5113 	 * where the packets may be dequeued and transmitted.
5114 	 */
5115 	if (ifp->if_eflags & IFEF_TXSTART) {
5116 		thread_precedence_policy_data_t info;
5117 		__unused kern_return_t kret;
5118 
5119 		ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5120 		VERIFY(ifp->if_flowhash != 0);
5121 		VERIFY(ifp->if_start_thread == THREAD_NULL);
5122 
5123 		ifnet_set_start_cycle(ifp, NULL);
5124 		ifp->if_start_active = 0;
5125 		ifp->if_start_req = 0;
5126 		ifp->if_start_flags = 0;
5127 		VERIFY(ifp->if_start != NULL);
5128 		ifnet_incr_pending_thread_count(ifp);
5129 		if ((err = kernel_thread_start(ifnet_start_thread_func,
5130 		    ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5131 			panic_plain("%s: "
5132 			    "ifp=%p couldn't get a start thread; "
5133 			    "err=%d", __func__, ifp, err);
5134 			/* NOTREACHED */
5135 		}
5136 		bzero(&info, sizeof(info));
5137 		info.importance = 1;
5138 		kret = thread_policy_set(ifp->if_start_thread,
5139 		    THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5140 		    THREAD_PRECEDENCE_POLICY_COUNT);
5141 		ASSERT(kret == KERN_SUCCESS);
5142 	} else {
5143 		ifp->if_flowhash = 0;
5144 	}
5145 
5146 	/* Reset polling parameters */
5147 	ifnet_set_poll_cycle(ifp, NULL);
5148 	ifp->if_poll_update = 0;
5149 	ifp->if_poll_flags = 0;
5150 	ifp->if_poll_req = 0;
5151 	VERIFY(ifp->if_poll_thread == THREAD_NULL);
5152 
5153 	/*
5154 	 * If the driver supports the new receive model, create a poller
5155 	 * thread to invoke if_input_poll callback where the packets may
5156 	 * be dequeued from the driver and processed for reception.
5157 	 * if the interface is netif compat then the poller thread is
5158 	 * managed by netif.
5159 	 */
5160 	if (dlil_is_rxpoll_input(thfunc)) {
5161 		thread_precedence_policy_data_t info;
5162 		__unused kern_return_t kret;
5163 #if SKYWALK
5164 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5165 #endif /* SKYWALK */
5166 		VERIFY(ifp->if_input_poll != NULL);
5167 		VERIFY(ifp->if_input_ctl != NULL);
5168 		ifnet_incr_pending_thread_count(ifp);
5169 		if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
5170 		    &ifp->if_poll_thread)) != KERN_SUCCESS) {
5171 			panic_plain("%s: ifp=%p couldn't get a poll thread; "
5172 			    "err=%d", __func__, ifp, err);
5173 			/* NOTREACHED */
5174 		}
5175 		bzero(&info, sizeof(info));
5176 		info.importance = 1;
5177 		kret = thread_policy_set(ifp->if_poll_thread,
5178 		    THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5179 		    THREAD_PRECEDENCE_POLICY_COUNT);
5180 		ASSERT(kret == KERN_SUCCESS);
5181 	}
5182 
5183 	VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5184 	VERIFY(ifp->if_desc.ifd_len == 0);
5185 	VERIFY(ifp->if_desc.ifd_desc != NULL);
5186 
5187 	/* Record attach PC stacktrace */
5188 	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5189 
5190 	ifp->if_updatemcasts = 0;
5191 	if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5192 		struct ifmultiaddr *ifma;
5193 		LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5194 			IFMA_LOCK(ifma);
5195 			if (ifma->ifma_addr->sa_family == AF_LINK ||
5196 			    ifma->ifma_addr->sa_family == AF_UNSPEC) {
5197 				ifp->if_updatemcasts++;
5198 			}
5199 			IFMA_UNLOCK(ifma);
5200 		}
5201 
5202 		DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
5203 		    "membership(s)\n", if_name(ifp),
5204 		    ifp->if_updatemcasts);
5205 	}
5206 
5207 	/* Clear logging parameters */
5208 	bzero(&ifp->if_log, sizeof(ifp->if_log));
5209 
5210 	/* Clear foreground/realtime activity timestamps */
5211 	ifp->if_fg_sendts = 0;
5212 	ifp->if_rt_sendts = 0;
5213 
5214 	/* Clear throughput estimates and radio type */
5215 	ifp->if_estimated_up_bucket = 0;
5216 	ifp->if_estimated_down_bucket = 0;
5217 	ifp->if_radio_type = 0;
5218 	ifp->if_radio_channel = 0;
5219 
5220 	VERIFY(ifp->if_delegated.ifp == NULL);
5221 	VERIFY(ifp->if_delegated.type == 0);
5222 	VERIFY(ifp->if_delegated.family == 0);
5223 	VERIFY(ifp->if_delegated.subfamily == 0);
5224 	VERIFY(ifp->if_delegated.expensive == 0);
5225 	VERIFY(ifp->if_delegated.constrained == 0);
5226 	VERIFY(ifp->if_delegated.ultra_constrained == 0);
5227 
5228 	VERIFY(ifp->if_agentids == NULL);
5229 	VERIFY(ifp->if_agentcount == 0);
5230 
5231 	/* Reset interface state */
5232 	bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5233 	ifp->if_interface_state.valid_bitmask |=
5234 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5235 	ifp->if_interface_state.interface_availability =
5236 	    IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5237 
5238 	/* Initialize Link Quality Metric (loopback [lo0] is always good) */
5239 	if (ifp == lo_ifp) {
5240 		ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5241 		ifp->if_interface_state.valid_bitmask |=
5242 		    IF_INTERFACE_STATE_LQM_STATE_VALID;
5243 	} else {
5244 		ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5245 	}
5246 
5247 	/*
5248 	 * Enable ECN capability on this interface depending on the
5249 	 * value of ECN global setting
5250 	 */
5251 	if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5252 		if_set_eflags(ifp, IFEF_ECN_ENABLE);
5253 		if_clear_eflags(ifp, IFEF_ECN_DISABLE);
5254 	}
5255 
5256 	/*
5257 	 * Built-in Cyclops always on policy for WiFi infra
5258 	 */
5259 	if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5260 		errno_t error;
5261 
5262 		error = if_set_qosmarking_mode(ifp,
5263 		    IFRTYPE_QOSMARKING_FASTLANE);
5264 		if (error != 0) {
5265 			DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
5266 			    __func__, ifp->if_xname, error);
5267 		} else {
5268 			if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5269 #if (DEVELOPMENT || DEBUG)
5270 			DLIL_PRINTF("%s fastlane enabled on %s\n",
5271 			    __func__, ifp->if_xname);
5272 #endif /* (DEVELOPMENT || DEBUG) */
5273 		}
5274 	}
5275 
5276 	ifnet_lock_done(ifp);
5277 	ifnet_head_done();
5278 
5279 #if SKYWALK
5280 	netif_compat = dlil_attach_netif_compat_nexus(ifp, &nexus_netif);
5281 #endif /* SKYWALK */
5282 
5283 	lck_mtx_lock(&ifp->if_cached_route_lock);
5284 	/* Enable forwarding cached route */
5285 	ifp->if_fwd_cacheok = 1;
5286 	/* Clean up any existing cached routes */
5287 	ROUTE_RELEASE(&ifp->if_fwd_route);
5288 	bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5289 	ROUTE_RELEASE(&ifp->if_src_route);
5290 	bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5291 	ROUTE_RELEASE(&ifp->if_src_route6);
5292 	bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5293 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5294 
5295 	ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5296 
5297 	/*
5298 	 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5299 	 * and trees; do this before the ifnet is marked as attached.
5300 	 * The ifnet keeps the reference to the info structures even after
5301 	 * the ifnet is detached, since the network-layer records still
5302 	 * refer to the info structures even after that.  This also
5303 	 * makes it possible for them to still function after the ifnet
5304 	 * is recycled or reattached.
5305 	 */
5306 #if INET
5307 	if (IGMP_IFINFO(ifp) == NULL) {
5308 		IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
5309 		VERIFY(IGMP_IFINFO(ifp) != NULL);
5310 	} else {
5311 		VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5312 		igmp_domifreattach(IGMP_IFINFO(ifp));
5313 	}
5314 #endif /* INET */
5315 	if (MLD_IFINFO(ifp) == NULL) {
5316 		MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
5317 		VERIFY(MLD_IFINFO(ifp) != NULL);
5318 	} else {
5319 		VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5320 		mld_domifreattach(MLD_IFINFO(ifp));
5321 	}
5322 
5323 	VERIFY(ifp->if_data_threshold == 0);
5324 	VERIFY(ifp->if_dt_tcall != NULL);
5325 
5326 	/*
5327 	 * Wait for the created kernel threads for I/O to get
5328 	 * scheduled and run at least once before we proceed
5329 	 * to mark interface as attached.
5330 	 */
5331 	lck_mtx_lock(&ifp->if_ref_lock);
5332 	while (ifp->if_threads_pending != 0) {
5333 		DLIL_PRINTF("%s: Waiting for all kernel threads created for "
5334 		    "interface %s to get scheduled at least once.\n",
5335 		    __func__, ifp->if_xname);
5336 		(void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
5337 		    __func__, NULL);
5338 		LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
5339 	}
5340 	lck_mtx_unlock(&ifp->if_ref_lock);
5341 	DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
5342 	    "at least once. Proceeding.\n", __func__, ifp->if_xname);
5343 
5344 	/* Final mark this ifnet as attached. */
5345 	ifnet_lock_exclusive(ifp);
5346 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5347 	ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
5348 	lck_mtx_unlock(&ifp->if_ref_lock);
5349 	if (net_rtref) {
5350 		/* boot-args override; enable idle notification */
5351 		(void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5352 		    IFRF_IDLE_NOTIFY);
5353 	} else {
5354 		/* apply previous request(s) to set the idle flags, if any */
5355 		(void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5356 		    ifp->if_idle_new_flags_mask);
5357 	}
5358 #if SKYWALK
5359 	/* the interface is fully attached; let the nexus adapter know */
5360 	if (netif_compat || dlil_is_native_netif_nexus(ifp)) {
5361 		if (netif_compat) {
5362 			if (sk_netif_compat_txmodel ==
5363 			    NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI) {
5364 				ifnet_enqueue_multi_setup(ifp,
5365 				    sk_tx_delay_qlen, sk_tx_delay_timeout);
5366 			}
5367 			ifp->if_nx_netif = nexus_netif;
5368 		}
5369 		ifp->if_na_ops->ni_finalize(ifp->if_na, ifp);
5370 	}
5371 #endif /* SKYWALK */
5372 	ifnet_lock_done(ifp);
5373 	dlil_if_unlock();
5374 
5375 #if PF
5376 	/*
5377 	 * Attach packet filter to this interface, if enabled.
5378 	 */
5379 	pf_ifnet_hook(ifp, 1);
5380 #endif /* PF */
5381 
5382 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0, FALSE);
5383 
5384 	os_log(OS_LOG_DEFAULT, "%s: attached%s\n", if_name(ifp),
5385 	    (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5386 	return 0;
5387 }
5388 
5389 static void
if_purgeaddrs(struct ifnet * ifp)5390 if_purgeaddrs(struct ifnet *ifp)
5391 {
5392 #if INET
5393 	in_purgeaddrs(ifp);
5394 #endif /* INET */
5395 	in6_purgeaddrs(ifp);
5396 }
5397 
5398 errno_t
ifnet_detach(ifnet_t ifp)5399 ifnet_detach(ifnet_t ifp)
5400 {
5401 	ifnet_ref_t delegated_ifp;
5402 	struct nd_ifinfo *ndi = NULL;
5403 
5404 	if (ifp == NULL) {
5405 		return EINVAL;
5406 	}
5407 
5408 	ndi = ND_IFINFO(ifp);
5409 	if (NULL != ndi) {
5410 		ndi->cga_initialized = FALSE;
5411 	}
5412 	os_log(OS_LOG_DEFAULT, "%s detaching", if_name(ifp));
5413 
5414 	/* Mark the interface down */
5415 	if_down(ifp);
5416 
5417 	/*
5418 	 * IMPORTANT NOTE
5419 	 *
5420 	 * Any field in the ifnet that relies on IF_FULLY_ATTACHED()
5421 	 * or equivalently, ifnet_is_attached(ifp, 1), can't be modified
5422 	 * until after we've waited for all I/O references to drain
5423 	 * in ifnet_detach_final().
5424 	 */
5425 
5426 	ifnet_head_lock_exclusive();
5427 	ifnet_lock_exclusive(ifp);
5428 
5429 	if (ifp->if_output_netem != NULL) {
5430 		netem_destroy(ifp->if_output_netem);
5431 		ifp->if_output_netem = NULL;
5432 	}
5433 
5434 	/*
5435 	 * Check to see if this interface has previously triggered
5436 	 * aggressive protocol draining; if so, decrement the global
5437 	 * refcnt and clear PR_AGGDRAIN on the route domain if
5438 	 * there are no more of such an interface around.
5439 	 */
5440 	(void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5441 
5442 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5443 	if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5444 		lck_mtx_unlock(&ifp->if_ref_lock);
5445 		ifnet_lock_done(ifp);
5446 		ifnet_head_done();
5447 		return EINVAL;
5448 	} else if (ifp->if_refflags & IFRF_DETACHING) {
5449 		/* Interface has already been detached */
5450 		lck_mtx_unlock(&ifp->if_ref_lock);
5451 		ifnet_lock_done(ifp);
5452 		ifnet_head_done();
5453 		return ENXIO;
5454 	}
5455 	VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
5456 	/* Indicate this interface is being detached */
5457 	ifp->if_refflags &= ~IFRF_ATTACHED;
5458 	ifp->if_refflags |= IFRF_DETACHING;
5459 	lck_mtx_unlock(&ifp->if_ref_lock);
5460 
5461 	/* clean up flow control entry object if there's any */
5462 	if (ifp->if_eflags & IFEF_TXSTART) {
5463 		ifnet_flowadv(ifp->if_flowhash);
5464 	}
5465 
5466 	/* Reset ECN enable/disable flags */
5467 	/* Reset CLAT46 flag */
5468 	if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
5469 
5470 	/*
5471 	 * We do not reset the TCP keep alive counters in case
5472 	 * a TCP connection stays connection after the interface
5473 	 * went down
5474 	 */
5475 	if (ifp->if_tcp_kao_cnt > 0) {
5476 		os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
5477 		    __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
5478 	}
5479 	ifp->if_tcp_kao_max = 0;
5480 
5481 	/*
5482 	 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5483 	 * no longer be visible during lookups from this point.
5484 	 */
5485 	VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5486 	TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5487 	ifp->if_link.tqe_next = NULL;
5488 	ifp->if_link.tqe_prev = NULL;
5489 	if (ifp->if_ordered_link.tqe_next != NULL ||
5490 	    ifp->if_ordered_link.tqe_prev != NULL) {
5491 		ifnet_remove_from_ordered_list(ifp);
5492 	}
5493 	ifindex2ifnet[ifp->if_index] = NULL;
5494 
5495 	/* 18717626 - reset router mode */
5496 	if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
5497 	ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
5498 
5499 	/* Record detach PC stacktrace */
5500 	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5501 
5502 	/* Clear logging parameters */
5503 	bzero(&ifp->if_log, sizeof(ifp->if_log));
5504 
5505 	/* Clear delegated interface info (reference released below) */
5506 	delegated_ifp = ifp->if_delegated.ifp;
5507 	bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
5508 
5509 	/* Reset interface state */
5510 	bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5511 
5512 	/*
5513 	 * Increment the generation count on interface deletion
5514 	 */
5515 	ifp->if_creation_generation_id = os_atomic_inc(&if_creation_generation_count, relaxed);
5516 
5517 	ifnet_lock_done(ifp);
5518 	ifnet_head_done();
5519 
5520 	/* Release reference held on the delegated interface */
5521 	if (delegated_ifp != NULL) {
5522 		ifnet_release(delegated_ifp);
5523 	}
5524 
5525 	/* Reset Link Quality Metric (unless loopback [lo0]) */
5526 	if (ifp != lo_ifp) {
5527 		if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5528 	}
5529 
5530 	/* Force reset link heuristics */
5531 	if (ifp->if_link_heuristics_tcall != NULL) {
5532 		thread_call_cancel_wait(ifp->if_link_heuristics_tcall);
5533 		thread_call_free(ifp->if_link_heuristics_tcall);
5534 		ifp->if_link_heuristics_tcall = NULL;
5535 	}
5536 	if_clear_xflags(ifp, IFXF_LINK_HEURISTICS);
5537 
5538 	/* Reset TCP local statistics */
5539 	if (ifp->if_tcp_stat != NULL) {
5540 		bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5541 	}
5542 
5543 	/* Reset UDP local statistics */
5544 	if (ifp->if_udp_stat != NULL) {
5545 		bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5546 	}
5547 
5548 	/* Reset ifnet IPv4 stats */
5549 	if (ifp->if_ipv4_stat != NULL) {
5550 		bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5551 	}
5552 
5553 	/* Reset ifnet IPv6 stats */
5554 	if (ifp->if_ipv6_stat != NULL) {
5555 		bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5556 	}
5557 
5558 	/* Release memory held for interface link status report */
5559 	if (ifp->if_link_status != NULL) {
5560 		kfree_type(struct if_link_status, ifp->if_link_status);
5561 		ifp->if_link_status = NULL;
5562 	}
5563 
5564 	/* Disable forwarding cached route */
5565 	lck_mtx_lock(&ifp->if_cached_route_lock);
5566 	ifp->if_fwd_cacheok = 0;
5567 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5568 
5569 	/* Disable data threshold and wait for any pending event posting */
5570 	ifp->if_data_threshold = 0;
5571 	VERIFY(ifp->if_dt_tcall != NULL);
5572 	(void) thread_call_cancel_wait(ifp->if_dt_tcall);
5573 
5574 	/*
5575 	 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5576 	 * references to the info structures and leave them attached to
5577 	 * this ifnet.
5578 	 */
5579 #if INET
5580 	igmp_domifdetach(ifp);
5581 #endif /* INET */
5582 	mld_domifdetach(ifp);
5583 
5584 #if SKYWALK
5585 	/* Clean up any netns tokens still pointing to to this ifnet */
5586 	netns_ifnet_detach(ifp);
5587 #endif /* SKYWALK */
5588 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0, FALSE);
5589 
5590 	/* Let worker thread take care of the rest, to avoid reentrancy */
5591 	dlil_if_lock();
5592 	ifnet_detaching_enqueue(ifp);
5593 	dlil_if_unlock();
5594 
5595 	return 0;
5596 }
5597 
5598 static void
ifnet_detaching_enqueue(struct ifnet * ifp)5599 ifnet_detaching_enqueue(struct ifnet *ifp)
5600 {
5601 	dlil_if_lock_assert();
5602 
5603 	++ifnet_detaching_cnt;
5604 	VERIFY(ifnet_detaching_cnt != 0);
5605 	TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5606 	wakeup((caddr_t)&ifnet_delayed_run);
5607 }
5608 
5609 static struct ifnet *
ifnet_detaching_dequeue(void)5610 ifnet_detaching_dequeue(void)
5611 {
5612 	ifnet_ref_t ifp;
5613 
5614 	dlil_if_lock_assert();
5615 
5616 	ifp = TAILQ_FIRST(&ifnet_detaching_head);
5617 	VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5618 	if (ifp != NULL) {
5619 		VERIFY(ifnet_detaching_cnt != 0);
5620 		--ifnet_detaching_cnt;
5621 		TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5622 		ifp->if_detaching_link.tqe_next = NULL;
5623 		ifp->if_detaching_link.tqe_prev = NULL;
5624 	}
5625 	return ifp;
5626 }
5627 
5628 __attribute__((noreturn))
5629 static void
ifnet_detacher_thread_cont(void * v,wait_result_t wres)5630 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
5631 {
5632 #pragma unused(v, wres)
5633 	ifnet_ref_t ifp;
5634 
5635 	dlil_if_lock();
5636 	if (__improbable(ifnet_detaching_embryonic)) {
5637 		ifnet_detaching_embryonic = FALSE;
5638 		/* there's no lock ordering constrain so OK to do this here */
5639 		dlil_decr_pending_thread_count();
5640 	}
5641 
5642 	for (;;) {
5643 		dlil_if_lock_assert();
5644 
5645 		if (ifnet_detaching_cnt == 0) {
5646 			break;
5647 		}
5648 
5649 		net_update_uptime();
5650 
5651 		VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5652 
5653 		/* Take care of detaching ifnet */
5654 		ifp = ifnet_detaching_dequeue();
5655 		if (ifp != NULL) {
5656 			dlil_if_unlock();
5657 			ifnet_detach_final(ifp);
5658 			dlil_if_lock();
5659 		}
5660 	}
5661 
5662 	(void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5663 	dlil_if_unlock();
5664 	(void) thread_block(ifnet_detacher_thread_cont);
5665 
5666 	VERIFY(0);      /* we should never get here */
5667 	/* NOTREACHED */
5668 	__builtin_unreachable();
5669 }
5670 
5671 __dead2
5672 static void
ifnet_detacher_thread_func(void * v,wait_result_t w)5673 ifnet_detacher_thread_func(void *v, wait_result_t w)
5674 {
5675 #pragma unused(v, w)
5676 	dlil_if_lock();
5677 	(void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5678 	ifnet_detaching_embryonic = TRUE;
5679 	/* wake up once to get out of embryonic state */
5680 	wakeup((caddr_t)&ifnet_delayed_run);
5681 	dlil_if_unlock();
5682 	(void) thread_block(ifnet_detacher_thread_cont);
5683 	VERIFY(0);
5684 	/* NOTREACHED */
5685 	__builtin_unreachable();
5686 }
5687 
5688 static void
ifnet_detach_final(struct ifnet * ifp)5689 ifnet_detach_final(struct ifnet *ifp)
5690 {
5691 	struct ifnet_filter *filter, *filter_next;
5692 	struct dlil_ifnet *dlifp;
5693 	struct ifnet_filter_head fhead;
5694 	struct dlil_threading_info *inp;
5695 	struct ifaddr *ifa;
5696 	ifnet_detached_func if_free;
5697 	int i;
5698 	bool waited = false;
5699 
5700 	/* Let BPF know we're detaching */
5701 	bpfdetach(ifp);
5702 
5703 #if SKYWALK
5704 	dlil_netif_detach_notify(ifp);
5705 	/*
5706 	 * Wait for the datapath to quiesce before tearing down
5707 	 * netif/flowswitch nexuses.
5708 	 */
5709 	dlil_quiesce_and_detach_nexuses(ifp);
5710 #endif /* SKYWALK */
5711 
5712 	lck_mtx_lock(&ifp->if_ref_lock);
5713 	if (!(ifp->if_refflags & IFRF_DETACHING)) {
5714 		panic("%s: flags mismatch (detaching not set) ifp=%p",
5715 		    __func__, ifp);
5716 		/* NOTREACHED */
5717 	}
5718 
5719 	/*
5720 	 * Wait until the existing IO references get released
5721 	 * before we proceed with ifnet_detach.  This is not a
5722 	 * common case, so block without using a continuation.
5723 	 */
5724 	while (ifp->if_refio > 0) {
5725 		waited = true;
5726 		DLIL_PRINTF("%s: %s waiting for IO references to drain\n",
5727 		    __func__, if_name(ifp));
5728 		(void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5729 		    (PZERO - 1), "ifnet_ioref_wait", NULL);
5730 	}
5731 	if (waited) {
5732 		DLIL_PRINTF("%s: %s IO references drained\n",
5733 		    __func__, if_name(ifp));
5734 	}
5735 	VERIFY(ifp->if_datamov == 0);
5736 	VERIFY(ifp->if_drainers == 0);
5737 	VERIFY(ifp->if_suspend == 0);
5738 	ifp->if_refflags &= ~IFRF_READY;
5739 	lck_mtx_unlock(&ifp->if_ref_lock);
5740 
5741 #if SKYWALK
5742 	VERIFY(LIST_EMPTY(&ifp->if_netns_tokens));
5743 #endif /* SKYWALK */
5744 	/* Drain and destroy send queue */
5745 	ifclassq_teardown(ifp->if_snd);
5746 
5747 	/* Detach interface filters */
5748 	lck_mtx_lock(&ifp->if_flt_lock);
5749 	if_flt_monitor_enter(ifp);
5750 
5751 	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
5752 	fhead = ifp->if_flt_head;
5753 	TAILQ_INIT(&ifp->if_flt_head);
5754 
5755 	for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5756 		filter_next = TAILQ_NEXT(filter, filt_next);
5757 		lck_mtx_unlock(&ifp->if_flt_lock);
5758 
5759 		dlil_detach_filter_internal(filter, 1);
5760 		lck_mtx_lock(&ifp->if_flt_lock);
5761 	}
5762 	if_flt_monitor_leave(ifp);
5763 	lck_mtx_unlock(&ifp->if_flt_lock);
5764 
5765 	/* Tell upper layers to drop their network addresses */
5766 	if_purgeaddrs(ifp);
5767 
5768 	ifnet_lock_exclusive(ifp);
5769 
5770 	/* Clear agent IDs */
5771 	if (ifp->if_agentids != NULL) {
5772 		kfree_data_sized_by(ifp->if_agentids, ifp->if_agentcount);
5773 	}
5774 
5775 	bzero(&ifp->if_nx_netif, sizeof(ifp->if_nx_netif));
5776 	bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
5777 
5778 	/* Unplumb all protocols */
5779 	for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5780 		struct if_proto *proto;
5781 
5782 		proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5783 		while (proto != NULL) {
5784 			protocol_family_t family = proto->protocol_family;
5785 			ifnet_lock_done(ifp);
5786 			proto_unplumb(family, ifp);
5787 			ifnet_lock_exclusive(ifp);
5788 			proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5789 		}
5790 		/* There should not be any protocols left */
5791 		VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5792 	}
5793 	kfree_type_counted_by(struct proto_hash_entry, ifp->if_proto_hash_count, ifp->if_proto_hash);
5794 
5795 	/* Detach (permanent) link address from if_addrhead */
5796 	ifa = TAILQ_FIRST(&ifp->if_addrhead);
5797 	VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5798 	IFA_LOCK(ifa);
5799 	if_detach_link_ifa(ifp, ifa);
5800 	IFA_UNLOCK(ifa);
5801 
5802 	/* Remove (permanent) link address from ifnet_addrs[] */
5803 	ifa_remref(ifa);
5804 	ifnet_addrs[ifp->if_index - 1] = NULL;
5805 
5806 	/* This interface should not be on {ifnet_head,detaching} */
5807 	VERIFY(ifp->if_link.tqe_next == NULL);
5808 	VERIFY(ifp->if_link.tqe_prev == NULL);
5809 	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5810 	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5811 	VERIFY(ifp->if_ordered_link.tqe_next == NULL);
5812 	VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
5813 
5814 	/* The slot should have been emptied */
5815 	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5816 
5817 	/* There should not be any addresses left */
5818 	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5819 
5820 	/*
5821 	 * Signal the starter thread to terminate itself, and wait until
5822 	 * it has exited.
5823 	 */
5824 	if (ifp->if_start_thread != THREAD_NULL) {
5825 		lck_mtx_lock_spin(&ifp->if_start_lock);
5826 		ifp->if_start_flags |= IFSF_TERMINATING;
5827 		wakeup_one((caddr_t)&ifp->if_start_thread);
5828 		lck_mtx_unlock(&ifp->if_start_lock);
5829 
5830 		/* wait for starter thread to terminate */
5831 		lck_mtx_lock(&ifp->if_start_lock);
5832 		while (ifp->if_start_thread != THREAD_NULL) {
5833 			if (dlil_verbose) {
5834 				DLIL_PRINTF("%s: waiting for %s starter thread to terminate\n",
5835 				    __func__,
5836 				    if_name(ifp));
5837 			}
5838 			(void) msleep(&ifp->if_start_thread,
5839 			    &ifp->if_start_lock, (PZERO - 1),
5840 			    "ifnet_start_thread_exit", NULL);
5841 		}
5842 		lck_mtx_unlock(&ifp->if_start_lock);
5843 		if (dlil_verbose) {
5844 			DLIL_PRINTF("%s: %s starter thread termination complete",
5845 			    __func__, if_name(ifp));
5846 		}
5847 	}
5848 
5849 	/*
5850 	 * Signal the poller thread to terminate itself, and wait until
5851 	 * it has exited.
5852 	 */
5853 	if (ifp->if_poll_thread != THREAD_NULL) {
5854 #if SKYWALK
5855 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5856 #endif /* SKYWALK */
5857 		lck_mtx_lock_spin(&ifp->if_poll_lock);
5858 		ifp->if_poll_flags |= IF_POLLF_TERMINATING;
5859 		wakeup_one((caddr_t)&ifp->if_poll_thread);
5860 		lck_mtx_unlock(&ifp->if_poll_lock);
5861 
5862 		/* wait for poller thread to terminate */
5863 		lck_mtx_lock(&ifp->if_poll_lock);
5864 		while (ifp->if_poll_thread != THREAD_NULL) {
5865 			if (dlil_verbose) {
5866 				DLIL_PRINTF("%s: waiting for %s poller thread to terminate\n",
5867 				    __func__,
5868 				    if_name(ifp));
5869 			}
5870 			(void) msleep(&ifp->if_poll_thread,
5871 			    &ifp->if_poll_lock, (PZERO - 1),
5872 			    "ifnet_poll_thread_exit", NULL);
5873 		}
5874 		lck_mtx_unlock(&ifp->if_poll_lock);
5875 		if (dlil_verbose) {
5876 			DLIL_PRINTF("%s: %s poller thread termination complete\n",
5877 			    __func__, if_name(ifp));
5878 		}
5879 	}
5880 
5881 	/*
5882 	 * If thread affinity was set for the workloop thread, we will need
5883 	 * to tear down the affinity and release the extra reference count
5884 	 * taken at attach time.  Does not apply to lo0 or other interfaces
5885 	 * without dedicated input threads.
5886 	 */
5887 	if ((inp = ifp->if_inp) != NULL) {
5888 		VERIFY(inp != dlil_main_input_thread);
5889 
5890 		if (inp->dlth_affinity) {
5891 			struct thread *__single tp, *__single wtp, *__single ptp;
5892 
5893 			lck_mtx_lock_spin(&inp->dlth_lock);
5894 			wtp = inp->dlth_driver_thread;
5895 			inp->dlth_driver_thread = THREAD_NULL;
5896 			ptp = inp->dlth_poller_thread;
5897 			inp->dlth_poller_thread = THREAD_NULL;
5898 			ASSERT(inp->dlth_thread != THREAD_NULL);
5899 			tp = inp->dlth_thread;    /* don't nullify now */
5900 			inp->dlth_affinity_tag = 0;
5901 			inp->dlth_affinity = FALSE;
5902 			lck_mtx_unlock(&inp->dlth_lock);
5903 
5904 			/* Tear down poll thread affinity */
5905 			if (ptp != NULL) {
5906 				VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5907 				VERIFY(ifp->if_xflags & IFXF_LEGACY);
5908 				(void) dlil_affinity_set(ptp,
5909 				    THREAD_AFFINITY_TAG_NULL);
5910 				thread_deallocate(ptp);
5911 			}
5912 
5913 			/* Tear down workloop thread affinity */
5914 			if (wtp != NULL) {
5915 				(void) dlil_affinity_set(wtp,
5916 				    THREAD_AFFINITY_TAG_NULL);
5917 				thread_deallocate(wtp);
5918 			}
5919 
5920 			/* Tear down DLIL input thread affinity */
5921 			(void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5922 			thread_deallocate(tp);
5923 		}
5924 
5925 		/* disassociate ifp DLIL input thread */
5926 		ifp->if_inp = NULL;
5927 
5928 		/* if the worker thread was created, tell it to terminate */
5929 		if (inp->dlth_thread != THREAD_NULL) {
5930 			lck_mtx_lock_spin(&inp->dlth_lock);
5931 			inp->dlth_flags |= DLIL_INPUT_TERMINATE;
5932 			if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
5933 				wakeup_one((caddr_t)&inp->dlth_flags);
5934 			}
5935 			lck_mtx_unlock(&inp->dlth_lock);
5936 			ifnet_lock_done(ifp);
5937 
5938 			/* wait for the input thread to terminate */
5939 			lck_mtx_lock_spin(&inp->dlth_lock);
5940 			while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
5941 			    == 0) {
5942 				(void) msleep(&inp->dlth_flags, &inp->dlth_lock,
5943 				    (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
5944 			}
5945 			lck_mtx_unlock(&inp->dlth_lock);
5946 			ifnet_lock_exclusive(ifp);
5947 		}
5948 
5949 		/* clean-up input thread state */
5950 		dlil_clean_threading_info(inp);
5951 		/* clean-up poll parameters */
5952 		VERIFY(ifp->if_poll_thread == THREAD_NULL);
5953 		dlil_reset_rxpoll_params(ifp);
5954 	}
5955 
5956 	/* The driver might unload, so point these to ourselves */
5957 	if_free = ifp->if_free;
5958 	ifp->if_output_dlil = ifp_if_output;
5959 	ifp->if_output = ifp_if_output;
5960 	ifp->if_pre_enqueue = ifp_if_output;
5961 	ifp->if_start = ifp_if_start;
5962 	ifp->if_output_ctl = ifp_if_ctl;
5963 	ifp->if_input_dlil = ifp_if_input;
5964 	ifp->if_input_poll = ifp_if_input_poll;
5965 	ifp->if_input_ctl = ifp_if_ctl;
5966 	ifp->if_ioctl = ifp_if_ioctl;
5967 	ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
5968 	ifp->if_free = ifp_if_free;
5969 	ifp->if_demux = ifp_if_demux;
5970 	ifp->if_event = ifp_if_event;
5971 	ifp->if_framer_legacy = ifp_if_framer;
5972 	ifp->if_framer = ifp_if_framer_extended;
5973 	ifp->if_add_proto = ifp_if_add_proto;
5974 	ifp->if_del_proto = ifp_if_del_proto;
5975 	ifp->if_check_multi = ifp_if_check_multi;
5976 
5977 	/* wipe out interface description */
5978 	VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5979 	ifp->if_desc.ifd_len = 0;
5980 	VERIFY(ifp->if_desc.ifd_desc != NULL);
5981 	bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
5982 
5983 	/* there shouldn't be any delegation by now */
5984 	VERIFY(ifp->if_delegated.ifp == NULL);
5985 	VERIFY(ifp->if_delegated.type == 0);
5986 	VERIFY(ifp->if_delegated.family == 0);
5987 	VERIFY(ifp->if_delegated.subfamily == 0);
5988 	VERIFY(ifp->if_delegated.expensive == 0);
5989 	VERIFY(ifp->if_delegated.constrained == 0);
5990 	VERIFY(ifp->if_delegated.ultra_constrained == 0);
5991 
5992 	/* QoS marking get cleared */
5993 	if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5994 	if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
5995 
5996 #if SKYWALK
5997 	/* the nexus destructor is responsible for clearing these */
5998 	VERIFY(ifp->if_na_ops == NULL);
5999 	VERIFY(ifp->if_na == NULL);
6000 #endif /* SKYWALK */
6001 
6002 	/* interface could come up with different hwassist next time */
6003 	ifp->if_hwassist = 0;
6004 	ifp->if_capenable = 0;
6005 
6006 	/* promiscuous/allmulti counts need to start at zero again */
6007 	ifp->if_pcount = 0;
6008 	ifp->if_amcount = 0;
6009 	ifp->if_flags &= ~(IFF_PROMISC | IFF_ALLMULTI);
6010 
6011 	ifnet_lock_done(ifp);
6012 
6013 #if PF
6014 	/*
6015 	 * Detach this interface from packet filter, if enabled.
6016 	 */
6017 	pf_ifnet_hook(ifp, 0);
6018 #endif /* PF */
6019 
6020 	/* Filter list should be empty */
6021 	lck_mtx_lock_spin(&ifp->if_flt_lock);
6022 	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6023 	VERIFY(ifp->if_flt_busy == 0);
6024 	VERIFY(ifp->if_flt_waiters == 0);
6025 	VERIFY(ifp->if_flt_non_os_count == 0);
6026 	VERIFY(ifp->if_flt_no_tso_count == 0);
6027 	lck_mtx_unlock(&ifp->if_flt_lock);
6028 
6029 	/* Last chance to drain send queue */
6030 	if_qflush_snd(ifp, 0);
6031 
6032 	/* Last chance to cleanup any cached route */
6033 	lck_mtx_lock(&ifp->if_cached_route_lock);
6034 	VERIFY(!ifp->if_fwd_cacheok);
6035 	ROUTE_RELEASE(&ifp->if_fwd_route);
6036 	bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
6037 	ROUTE_RELEASE(&ifp->if_src_route);
6038 	bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
6039 	ROUTE_RELEASE(&ifp->if_src_route6);
6040 	bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6041 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6042 
6043 	/* Ignore any pending data threshold as the interface is anyways gone */
6044 	ifp->if_data_threshold = 0;
6045 
6046 	VERIFY(ifp->if_dt_tcall != NULL);
6047 	VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
6048 
6049 	ifnet_llreach_ifdetach(ifp);
6050 
6051 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0, FALSE);
6052 
6053 	/*
6054 	 * Finally, mark this ifnet as detached.
6055 	 */
6056 	os_log(OS_LOG_DEFAULT, "%s detached", if_name(ifp));
6057 
6058 	lck_mtx_lock_spin(&ifp->if_ref_lock);
6059 	if (!(ifp->if_refflags & IFRF_DETACHING)) {
6060 		panic("%s: flags mismatch (detaching not set) ifp=%p",
6061 		    __func__, ifp);
6062 		/* NOTREACHED */
6063 	}
6064 	ifp->if_refflags &= ~IFRF_DETACHING;
6065 	lck_mtx_unlock(&ifp->if_ref_lock);
6066 	if (if_free != NULL) {
6067 		if_free(ifp);
6068 	}
6069 
6070 	ifclassq_release(&ifp->if_snd);
6071 
6072 	/* we're fully detached, clear the "in use" bit */
6073 	dlifp = (struct dlil_ifnet *)ifp;
6074 	lck_mtx_lock(&dlifp->dl_if_lock);
6075 	ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
6076 	dlifp->dl_if_flags &= ~DLIF_INUSE;
6077 	lck_mtx_unlock(&dlifp->dl_if_lock);
6078 
6079 	/* Release reference held during ifnet attach */
6080 	ifnet_release(ifp);
6081 }
6082 
6083 errno_t
ifp_if_output(struct ifnet * ifp,struct mbuf * m)6084 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6085 {
6086 #pragma unused(ifp)
6087 	m_freem_list(m);
6088 	return 0;
6089 }
6090 
6091 void
ifp_if_start(struct ifnet * ifp)6092 ifp_if_start(struct ifnet *ifp)
6093 {
6094 	ifnet_purge(ifp);
6095 }
6096 
6097 static errno_t
ifp_if_input(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)6098 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6099     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6100     boolean_t poll, struct thread *tp)
6101 {
6102 #pragma unused(ifp, m_tail, s, poll, tp)
6103 	m_freem_list(m_head);
6104 	return ENXIO;
6105 }
6106 
6107 static void
ifp_if_input_poll(struct ifnet * ifp,u_int32_t flags,u_int32_t max_cnt,struct mbuf ** m_head,struct mbuf ** m_tail,u_int32_t * cnt,u_int32_t * len)6108 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6109     struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6110 {
6111 #pragma unused(ifp, flags, max_cnt)
6112 	if (m_head != NULL) {
6113 		*m_head = NULL;
6114 	}
6115 	if (m_tail != NULL) {
6116 		*m_tail = NULL;
6117 	}
6118 	if (cnt != NULL) {
6119 		*cnt = 0;
6120 	}
6121 	if (len != NULL) {
6122 		*len = 0;
6123 	}
6124 }
6125 
6126 static errno_t
ifp_if_ctl(struct ifnet * ifp,ifnet_ctl_cmd_t cmd,u_int32_t arglen,void * arg)6127 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6128 {
6129 #pragma unused(ifp, cmd, arglen, arg)
6130 	return EOPNOTSUPP;
6131 }
6132 
6133 static errno_t
ifp_if_demux(struct ifnet * ifp,struct mbuf * m,char * fh,protocol_family_t * pf)6134 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6135 {
6136 #pragma unused(ifp, fh, pf)
6137 	m_freem(m);
6138 	return EJUSTRETURN;
6139 }
6140 
6141 static errno_t
ifp_if_add_proto(struct ifnet * ifp,protocol_family_t pf,const struct ifnet_demux_desc * da,u_int32_t dc)6142 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6143     const struct ifnet_demux_desc *da, u_int32_t dc)
6144 {
6145 #pragma unused(ifp, pf, da, dc)
6146 	return EINVAL;
6147 }
6148 
6149 static errno_t
ifp_if_del_proto(struct ifnet * ifp,protocol_family_t pf)6150 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6151 {
6152 #pragma unused(ifp, pf)
6153 	return EINVAL;
6154 }
6155 
6156 static errno_t
ifp_if_check_multi(struct ifnet * ifp,const struct sockaddr * sa)6157 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6158 {
6159 #pragma unused(ifp, sa)
6160 	return EOPNOTSUPP;
6161 }
6162 
6163 #if !XNU_TARGET_OS_OSX
6164 static errno_t
ifp_if_framer(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6165 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6166     const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t,
6167     u_int32_t *pre, u_int32_t *post)
6168 #else /* XNU_TARGET_OS_OSX */
6169 static errno_t
6170 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6171     const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t)
6172 #endif /* XNU_TARGET_OS_OSX */
6173 {
6174 #pragma unused(ifp, m, sa, ll, t)
6175 #if !XNU_TARGET_OS_OSX
6176 	return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
6177 #else /* XNU_TARGET_OS_OSX */
6178 	return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
6179 #endif /* XNU_TARGET_OS_OSX */
6180 }
6181 
6182 static errno_t
ifp_if_framer_extended(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6183 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6184     const struct sockaddr *sa,
6185     IFNET_LLADDR_T ll,
6186     IFNET_FRAME_TYPE_T t,
6187     u_int32_t *pre, u_int32_t *post)
6188 {
6189 #pragma unused(ifp, sa, ll, t)
6190 	m_freem(*m);
6191 	*m = NULL;
6192 
6193 	if (pre != NULL) {
6194 		*pre = 0;
6195 	}
6196 	if (post != NULL) {
6197 		*post = 0;
6198 	}
6199 
6200 	return EJUSTRETURN;
6201 }
6202 
6203 errno_t
ifp_if_ioctl(struct ifnet * ifp,unsigned long cmd,void * arg)6204 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6205 {
6206 #pragma unused(ifp, cmd, arg)
6207 	return EOPNOTSUPP;
6208 }
6209 
6210 static errno_t
ifp_if_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode tm,bpf_packet_func f)6211 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6212 {
6213 #pragma unused(ifp, tm, f)
6214 	/* XXX not sure what to do here */
6215 	return 0;
6216 }
6217 
6218 static void
ifp_if_free(struct ifnet * ifp)6219 ifp_if_free(struct ifnet *ifp)
6220 {
6221 #pragma unused(ifp)
6222 }
6223 
6224 static void
ifp_if_event(struct ifnet * ifp,const struct kev_msg * e)6225 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6226 {
6227 #pragma unused(ifp, e)
6228 }
6229 
6230 __private_extern__ void
dlil_proto_unplumb_all(struct ifnet * ifp)6231 dlil_proto_unplumb_all(struct ifnet *ifp)
6232 {
6233 	/*
6234 	 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6235 	 * each bucket contains exactly one entry; PF_VLAN does not need an
6236 	 * explicit unplumb.
6237 	 *
6238 	 * if_proto_hash[3] is for other protocols; we expect anything
6239 	 * in this bucket to respond to the DETACHING event (which would
6240 	 * have happened by now) and do the unplumb then.
6241 	 */
6242 	(void) proto_unplumb(PF_INET, ifp);
6243 	(void) proto_unplumb(PF_INET6, ifp);
6244 }
6245 
6246 static void
ifp_src_route_copyout(struct ifnet * ifp,struct route * dst)6247 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6248 {
6249 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6250 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6251 
6252 	route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6253 
6254 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6255 }
6256 
6257 static void
ifp_src_route_copyin(struct ifnet * ifp,struct route * src)6258 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6259 {
6260 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6261 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6262 
6263 	if (ifp->if_fwd_cacheok) {
6264 		route_copyin(src, &ifp->if_src_route, sizeof(*src));
6265 	} else {
6266 		ROUTE_RELEASE(src);
6267 	}
6268 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6269 }
6270 
6271 static void
ifp_src_route6_copyout(struct ifnet * ifp,struct route_in6 * dst)6272 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6273 {
6274 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6275 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6276 
6277 	route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6278 	    sizeof(*dst));
6279 
6280 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6281 }
6282 
6283 static void
ifp_src_route6_copyin(struct ifnet * ifp,struct route_in6 * src)6284 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6285 {
6286 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6287 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6288 
6289 	if (ifp->if_fwd_cacheok) {
6290 		route_copyin((struct route *)src,
6291 		    (struct route *)&ifp->if_src_route6, sizeof(*src));
6292 	} else {
6293 		ROUTE_RELEASE(src);
6294 	}
6295 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6296 }
6297 
6298 struct rtentry *
ifnet_cached_rtlookup_inet(struct ifnet * ifp,struct in_addr src_ip)6299 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6300 {
6301 	struct route            src_rt;
6302 	struct sockaddr_in      *dst;
6303 
6304 	dst = SIN(&src_rt.ro_dst);
6305 
6306 	ifp_src_route_copyout(ifp, &src_rt);
6307 
6308 	if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6309 		ROUTE_RELEASE(&src_rt);
6310 		if (dst->sin_family != AF_INET) {
6311 			SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6312 			dst->sin_len = sizeof(src_rt.ro_dst);
6313 			dst->sin_family = AF_INET;
6314 		}
6315 		dst->sin_addr = src_ip;
6316 
6317 		VERIFY(src_rt.ro_rt == NULL);
6318 		src_rt.ro_rt = rtalloc1_scoped(SA(dst),
6319 		    0, 0, ifp->if_index);
6320 
6321 		if (src_rt.ro_rt != NULL) {
6322 			/* retain a ref, copyin consumes one */
6323 			struct rtentry  *rte = src_rt.ro_rt;
6324 			RT_ADDREF(rte);
6325 			ifp_src_route_copyin(ifp, &src_rt);
6326 			src_rt.ro_rt = rte;
6327 		}
6328 	}
6329 
6330 	return src_rt.ro_rt;
6331 }
6332 
6333 struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet * ifp,struct in6_addr * src_ip6)6334 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6335 {
6336 	struct route_in6 src_rt;
6337 
6338 	ifp_src_route6_copyout(ifp, &src_rt);
6339 
6340 	if (ROUTE_UNUSABLE(&src_rt) ||
6341 	    !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6342 		ROUTE_RELEASE(&src_rt);
6343 		if (src_rt.ro_dst.sin6_family != AF_INET6) {
6344 			SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6345 			src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6346 			src_rt.ro_dst.sin6_family = AF_INET6;
6347 		}
6348 		src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6349 		bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6350 		    sizeof(src_rt.ro_dst.sin6_addr));
6351 
6352 		if (src_rt.ro_rt == NULL) {
6353 			src_rt.ro_rt = rtalloc1_scoped(
6354 				SA(&src_rt.ro_dst), 0, 0,
6355 				ifp->if_index);
6356 
6357 			if (src_rt.ro_rt != NULL) {
6358 				/* retain a ref, copyin consumes one */
6359 				struct rtentry  *rte = src_rt.ro_rt;
6360 				RT_ADDREF(rte);
6361 				ifp_src_route6_copyin(ifp, &src_rt);
6362 				src_rt.ro_rt = rte;
6363 			}
6364 		}
6365 	}
6366 
6367 	return src_rt.ro_rt;
6368 }
6369 
6370 void
if_lqm_update(struct ifnet * ifp,int lqm,int locked)6371 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6372 {
6373 	struct kev_dl_link_quality_metric_data ev_lqm_data;
6374 	uint64_t now, delta;
6375 	int8_t old_lqm;
6376 	bool need_necp_client_update;
6377 
6378 	VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6379 
6380 	/* Normalize to edge */
6381 	if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
6382 		lqm = IFNET_LQM_THRESH_ABORT;
6383 		os_atomic_or(&tcbinfo.ipi_flags, INPCBINFO_HANDLE_LQM_ABORT, relaxed);
6384 		inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
6385 	} else if (lqm > IFNET_LQM_THRESH_ABORT &&
6386 	    lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
6387 		lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
6388 	} else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
6389 	    lqm <= IFNET_LQM_THRESH_POOR) {
6390 		lqm = IFNET_LQM_THRESH_POOR;
6391 	} else if (lqm > IFNET_LQM_THRESH_POOR &&
6392 	    lqm <= IFNET_LQM_THRESH_GOOD) {
6393 		lqm = IFNET_LQM_THRESH_GOOD;
6394 	}
6395 
6396 	/*
6397 	 * Take the lock if needed
6398 	 */
6399 	if (!locked) {
6400 		ifnet_lock_exclusive(ifp);
6401 	}
6402 
6403 	if (lqm == ifp->if_interface_state.lqm_state &&
6404 	    (ifp->if_interface_state.valid_bitmask &
6405 	    IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6406 		/*
6407 		 * Release the lock if was not held by the caller
6408 		 */
6409 		if (!locked) {
6410 			ifnet_lock_done(ifp);
6411 		}
6412 		return;         /* nothing to update */
6413 	}
6414 
6415 	net_update_uptime();
6416 	now = net_uptime_ms();
6417 	ASSERT(now >= ifp->if_lqmstate_start_time);
6418 	delta = now - ifp->if_lqmstate_start_time;
6419 
6420 	old_lqm = ifp->if_interface_state.lqm_state;
6421 	switch (old_lqm) {
6422 	case IFNET_LQM_THRESH_GOOD:
6423 		ifp->if_lqm_good_time += delta;
6424 		break;
6425 	case IFNET_LQM_THRESH_POOR:
6426 		ifp->if_lqm_poor_time += delta;
6427 		break;
6428 	case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6429 		ifp->if_lqm_min_viable_time += delta;
6430 		break;
6431 	case IFNET_LQM_THRESH_BAD:
6432 		ifp->if_lqm_bad_time += delta;
6433 		break;
6434 	default:
6435 		break;
6436 	}
6437 	switch (lqm) {
6438 	case IFNET_LQM_THRESH_GOOD:
6439 		ifp->if_lqm_good_cnt += 1;
6440 		break;
6441 	case IFNET_LQM_THRESH_POOR:
6442 		ifp->if_lqm_poor_cnt += 1;
6443 		break;
6444 	case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6445 		ifp->if_lqm_min_viable_cnt += 1;
6446 		break;
6447 	case IFNET_LQM_THRESH_BAD:
6448 		ifp->if_lqm_bad_cnt += 1;
6449 		break;
6450 	default:
6451 		break;
6452 	}
6453 	ifp->if_lqmstate_start_time = now;
6454 
6455 	ifp->if_interface_state.valid_bitmask |=
6456 	    IF_INTERFACE_STATE_LQM_STATE_VALID;
6457 	ifp->if_interface_state.lqm_state = (int8_t)lqm;
6458 
6459 	/*
6460 	 * Update the link heuristics
6461 	 */
6462 	need_necp_client_update = if_update_link_heuristic(ifp);
6463 
6464 	/*
6465 	 * Don't want to hold the lock when issuing kernel events or calling NECP
6466 	 */
6467 	ifnet_lock_done(ifp);
6468 
6469 	if (need_necp_client_update) {
6470 		necp_update_all_clients_immediately_if_needed(true);
6471 	}
6472 
6473 	bzero(&ev_lqm_data, sizeof(ev_lqm_data));
6474 	ev_lqm_data.link_quality_metric = lqm;
6475 
6476 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6477 	    (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data), FALSE);
6478 
6479 	/*
6480 	 * Reacquire the lock for the caller
6481 	 */
6482 	if (locked) {
6483 		ifnet_lock_exclusive(ifp);
6484 	}
6485 }
6486 
6487 static void
if_rrc_state_update(struct ifnet * ifp,unsigned int rrc_state)6488 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6489 {
6490 	struct kev_dl_rrc_state kev;
6491 
6492 	if (rrc_state == ifp->if_interface_state.rrc_state &&
6493 	    (ifp->if_interface_state.valid_bitmask &
6494 	    IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6495 		return;
6496 	}
6497 
6498 	ifp->if_interface_state.valid_bitmask |=
6499 	    IF_INTERFACE_STATE_RRC_STATE_VALID;
6500 
6501 	ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
6502 
6503 	/*
6504 	 * Don't want to hold the lock when issuing kernel events
6505 	 */
6506 	ifnet_lock_done(ifp);
6507 
6508 	bzero(&kev, sizeof(struct kev_dl_rrc_state));
6509 	kev.rrc_state = rrc_state;
6510 
6511 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6512 	    (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state), FALSE);
6513 
6514 	ifnet_lock_exclusive(ifp);
6515 }
6516 
6517 errno_t
if_state_update(struct ifnet * ifp,struct if_interface_state * if_interface_state)6518 if_state_update(struct ifnet *ifp,
6519     struct if_interface_state *if_interface_state)
6520 {
6521 	u_short if_index_available = 0;
6522 
6523 	ifnet_lock_exclusive(ifp);
6524 
6525 	if ((ifp->if_type != IFT_CELLULAR) &&
6526 	    (if_interface_state->valid_bitmask &
6527 	    IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6528 		ifnet_lock_done(ifp);
6529 		return ENOTSUP;
6530 	}
6531 	if ((if_interface_state->valid_bitmask &
6532 	    IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6533 	    (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6534 	    if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6535 		ifnet_lock_done(ifp);
6536 		return EINVAL;
6537 	}
6538 	if ((if_interface_state->valid_bitmask &
6539 	    IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6540 	    if_interface_state->rrc_state !=
6541 	    IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6542 	    if_interface_state->rrc_state !=
6543 	    IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6544 		ifnet_lock_done(ifp);
6545 		return EINVAL;
6546 	}
6547 
6548 	if (if_interface_state->valid_bitmask &
6549 	    IF_INTERFACE_STATE_LQM_STATE_VALID) {
6550 		if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6551 	}
6552 	if (if_interface_state->valid_bitmask &
6553 	    IF_INTERFACE_STATE_RRC_STATE_VALID) {
6554 		if_rrc_state_update(ifp, if_interface_state->rrc_state);
6555 	}
6556 	if (if_interface_state->valid_bitmask &
6557 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6558 		ifp->if_interface_state.valid_bitmask |=
6559 		    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6560 		ifp->if_interface_state.interface_availability =
6561 		    if_interface_state->interface_availability;
6562 
6563 		if (ifp->if_interface_state.interface_availability ==
6564 		    IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6565 			os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
6566 			    __func__, if_name(ifp), ifp->if_index);
6567 			if_index_available = ifp->if_index;
6568 		} else {
6569 			os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
6570 			    __func__, if_name(ifp), ifp->if_index);
6571 		}
6572 	}
6573 	ifnet_lock_done(ifp);
6574 
6575 	/*
6576 	 * Check if the TCP connections going on this interface should be
6577 	 * forced to send probe packets instead of waiting for TCP timers
6578 	 * to fire. This is done on an explicit notification such as
6579 	 * SIOCSIFINTERFACESTATE which marks the interface as available.
6580 	 */
6581 	if (if_index_available > 0) {
6582 		tcp_interface_send_probe(if_index_available);
6583 	}
6584 
6585 	return 0;
6586 }
6587 
6588 void
if_get_state(struct ifnet * ifp,struct if_interface_state * if_interface_state)6589 if_get_state(struct ifnet *ifp,
6590     struct if_interface_state *if_interface_state)
6591 {
6592 	ifnet_lock_shared(ifp);
6593 
6594 	if_interface_state->valid_bitmask = 0;
6595 
6596 	if (ifp->if_interface_state.valid_bitmask &
6597 	    IF_INTERFACE_STATE_RRC_STATE_VALID) {
6598 		if_interface_state->valid_bitmask |=
6599 		    IF_INTERFACE_STATE_RRC_STATE_VALID;
6600 		if_interface_state->rrc_state =
6601 		    ifp->if_interface_state.rrc_state;
6602 	}
6603 	if (ifp->if_interface_state.valid_bitmask &
6604 	    IF_INTERFACE_STATE_LQM_STATE_VALID) {
6605 		if_interface_state->valid_bitmask |=
6606 		    IF_INTERFACE_STATE_LQM_STATE_VALID;
6607 		if_interface_state->lqm_state =
6608 		    ifp->if_interface_state.lqm_state;
6609 	}
6610 	if (ifp->if_interface_state.valid_bitmask &
6611 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6612 		if_interface_state->valid_bitmask |=
6613 		    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6614 		if_interface_state->interface_availability =
6615 		    ifp->if_interface_state.interface_availability;
6616 	}
6617 
6618 	ifnet_lock_done(ifp);
6619 }
6620 
6621 errno_t
if_probe_connectivity(struct ifnet * ifp,u_int32_t conn_probe)6622 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6623 {
6624 	if (conn_probe > 1) {
6625 		return EINVAL;
6626 	}
6627 	if (conn_probe == 0) {
6628 		if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6629 	} else {
6630 		if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6631 	}
6632 
6633 #if NECP
6634 	necp_update_all_clients();
6635 #endif /* NECP */
6636 
6637 	tcp_probe_connectivity(ifp, conn_probe);
6638 	return 0;
6639 }
6640 
6641 /* for uuid.c */
6642 static int
get_ether_index(int * ret_other_index)6643 get_ether_index(int * ret_other_index)
6644 {
6645 	ifnet_ref_t ifp;
6646 	int en0_index = 0;
6647 	int other_en_index = 0;
6648 	int any_ether_index = 0;
6649 	short best_unit = 0;
6650 
6651 	*ret_other_index = 0;
6652 	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6653 		/*
6654 		 * find en0, or if not en0, the lowest unit en*, and if not
6655 		 * that, any ethernet
6656 		 */
6657 		ifnet_lock_shared(ifp);
6658 		if (strcmp(ifp->if_name, "en") == 0) {
6659 			if (ifp->if_unit == 0) {
6660 				/* found en0, we're done */
6661 				en0_index = ifp->if_index;
6662 				ifnet_lock_done(ifp);
6663 				break;
6664 			}
6665 			if (other_en_index == 0 || ifp->if_unit < best_unit) {
6666 				other_en_index = ifp->if_index;
6667 				best_unit = ifp->if_unit;
6668 			}
6669 		} else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
6670 			any_ether_index = ifp->if_index;
6671 		}
6672 		ifnet_lock_done(ifp);
6673 	}
6674 	if (en0_index == 0) {
6675 		if (other_en_index != 0) {
6676 			*ret_other_index = other_en_index;
6677 		} else if (any_ether_index != 0) {
6678 			*ret_other_index = any_ether_index;
6679 		}
6680 	}
6681 	return en0_index;
6682 }
6683 
6684 int
uuid_get_ethernet(u_int8_t * __counted_by (ETHER_ADDR_LEN)node)6685 uuid_get_ethernet(u_int8_t *__counted_by(ETHER_ADDR_LEN) node)
6686 {
6687 	static int en0_index;
6688 	ifnet_ref_t ifp;
6689 	int other_index = 0;
6690 	int the_index = 0;
6691 	int ret;
6692 
6693 	ifnet_head_lock_shared();
6694 	if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
6695 		en0_index = get_ether_index(&other_index);
6696 	}
6697 	if (en0_index != 0) {
6698 		the_index = en0_index;
6699 	} else if (other_index != 0) {
6700 		the_index = other_index;
6701 	}
6702 	if (the_index != 0) {
6703 		struct dlil_ifnet *dl_if;
6704 
6705 		ifp = ifindex2ifnet[the_index];
6706 		VERIFY(ifp != NULL);
6707 		dl_if = (struct dlil_ifnet *)ifp;
6708 		if (dl_if->dl_if_permanent_ether_is_set != 0) {
6709 			/*
6710 			 * Use the permanent ethernet address if it is
6711 			 * available because it will never change.
6712 			 */
6713 			memcpy(node, dl_if->dl_if_permanent_ether,
6714 			    ETHER_ADDR_LEN);
6715 		} else {
6716 			memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
6717 		}
6718 		ret = 0;
6719 	} else {
6720 		ret = -1;
6721 	}
6722 	ifnet_head_done();
6723 	return ret;
6724 }
6725 
6726 int
dlil_node_present(struct ifnet * ifp,struct sockaddr * sa,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6727 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6728     int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6729 {
6730 	struct kev_dl_node_presence kev;
6731 	struct sockaddr_dl *sdl;
6732 	struct sockaddr_in6 *sin6;
6733 	int ret = 0;
6734 
6735 	VERIFY(ifp);
6736 	VERIFY(sa);
6737 	VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6738 
6739 	bzero(&kev, sizeof(kev));
6740 	sin6 = &kev.sin6_node_address;
6741 	sdl = &kev.sdl_node_address;
6742 	nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6743 	kev.rssi = rssi;
6744 	kev.link_quality_metric = lqm;
6745 	kev.node_proximity_metric = npm;
6746 	bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6747 
6748 	ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6749 	if (ret == 0 || ret == EEXIST) {
6750 		int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6751 		    &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6752 		if (err != 0) {
6753 			log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
6754 			    "error %d\n", __func__, err);
6755 		}
6756 	}
6757 
6758 	if (ret == EEXIST) {
6759 		ret = 0;
6760 	}
6761 	return ret;
6762 }
6763 
6764 void
dlil_node_absent(struct ifnet * ifp,struct sockaddr * sa)6765 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
6766 {
6767 	struct kev_dl_node_absence kev = {};
6768 	struct sockaddr_in6 *kev_sin6 = NULL;
6769 	struct sockaddr_dl *kev_sdl = NULL;
6770 	int error = 0;
6771 
6772 	VERIFY(ifp != NULL);
6773 	VERIFY(sa != NULL);
6774 	VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6775 
6776 	kev_sin6 = &kev.sin6_node_address;
6777 	kev_sdl = &kev.sdl_node_address;
6778 
6779 	if (sa->sa_family == AF_INET6) {
6780 		/*
6781 		 * If IPv6 address is given, get the link layer
6782 		 * address from what was cached in the neighbor cache
6783 		 */
6784 		VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6785 		SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6786 		error = nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
6787 	} else {
6788 		/*
6789 		 * If passed address is AF_LINK type, derive the address
6790 		 * based on the link address.
6791 		 */
6792 		nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
6793 		error = nd6_alt_node_absent(ifp, kev_sin6, NULL);
6794 	}
6795 
6796 	if (error == 0) {
6797 		kev_sdl->sdl_type = ifp->if_type;
6798 		kev_sdl->sdl_index = ifp->if_index;
6799 
6800 		dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
6801 		    &kev.link_data, sizeof(kev), FALSE);
6802 	}
6803 }
6804 
6805 int
dlil_node_present_v2(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr_dl * sdl,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6806 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
6807     int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6808 {
6809 	struct kev_dl_node_presence kev = {};
6810 	struct sockaddr_dl *kev_sdl = NULL;
6811 	struct sockaddr_in6 *kev_sin6 = NULL;
6812 	int ret = 0;
6813 
6814 	VERIFY(ifp != NULL);
6815 	VERIFY(sa != NULL && sdl != NULL);
6816 	VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
6817 
6818 	kev_sin6 = &kev.sin6_node_address;
6819 	kev_sdl = &kev.sdl_node_address;
6820 
6821 	VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
6822 	SOCKADDR_COPY(sdl, kev_sdl, sdl->sdl_len);
6823 	kev_sdl->sdl_type = ifp->if_type;
6824 	kev_sdl->sdl_index = ifp->if_index;
6825 
6826 	VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6827 	SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6828 
6829 	kev.rssi = rssi;
6830 	kev.link_quality_metric = lqm;
6831 	kev.node_proximity_metric = npm;
6832 	bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6833 
6834 	ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
6835 	if (ret == 0 || ret == EEXIST) {
6836 		int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6837 		    &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6838 		if (err != 0) {
6839 			log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
6840 		}
6841 	}
6842 
6843 	if (ret == EEXIST) {
6844 		ret = 0;
6845 	}
6846 	return ret;
6847 }
6848 
6849 const void *
dlil_ifaddr_bytes(const struct sockaddr_dl * sdl,size_t * sizep,kauth_cred_t * credp)6850 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
6851     kauth_cred_t *credp)
6852 {
6853 	const u_int8_t *bytes;
6854 	size_t size;
6855 
6856 	bytes = CONST_LLADDR(sdl);
6857 	size = sdl->sdl_alen;
6858 
6859 #if CONFIG_MACF
6860 	if (dlil_lladdr_ckreq) {
6861 		switch (sdl->sdl_type) {
6862 		case IFT_ETHER:
6863 		case IFT_IEEE1394:
6864 			break;
6865 		default:
6866 			credp = NULL;
6867 			break;
6868 		}
6869 		;
6870 
6871 		if (credp && mac_system_check_info(*credp, "net.link.addr")) {
6872 			static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
6873 				[0] = 2
6874 			};
6875 
6876 			bytes = unspec;
6877 		}
6878 	}
6879 #else
6880 #pragma unused(credp)
6881 #endif
6882 
6883 	if (sizep != NULL) {
6884 		*sizep = size;
6885 	}
6886 	return bytes;
6887 }
6888 
6889 void
dlil_report_issues(struct ifnet * ifp,u_int8_t modid[DLIL_MODIDLEN],u_int8_t info[DLIL_MODARGLEN])6890 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
6891     u_int8_t info[DLIL_MODARGLEN])
6892 {
6893 	struct kev_dl_issues kev;
6894 	struct timeval tv;
6895 
6896 	VERIFY(ifp != NULL);
6897 	VERIFY(modid != NULL);
6898 	_CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
6899 	_CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
6900 
6901 	bzero(&kev, sizeof(kev));
6902 
6903 	microtime(&tv);
6904 	kev.timestamp = tv.tv_sec;
6905 	bcopy(modid, &kev.modid, DLIL_MODIDLEN);
6906 	if (info != NULL) {
6907 		bcopy(info, &kev.info, DLIL_MODARGLEN);
6908 	}
6909 
6910 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
6911 	    &kev.link_data, sizeof(kev), FALSE);
6912 }
6913 
6914 errno_t
ifnet_getset_opportunistic(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6915 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6916     struct proc *p)
6917 {
6918 	u_int32_t level = IFNET_THROTTLE_OFF;
6919 	errno_t result = 0;
6920 
6921 	VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
6922 
6923 	if (cmd == SIOCSIFOPPORTUNISTIC) {
6924 		/*
6925 		 * XXX: Use priv_check_cred() instead of root check?
6926 		 */
6927 		if ((result = proc_suser(p)) != 0) {
6928 			return result;
6929 		}
6930 
6931 		if (ifr->ifr_opportunistic.ifo_flags ==
6932 		    IFRIFOF_BLOCK_OPPORTUNISTIC) {
6933 			level = IFNET_THROTTLE_OPPORTUNISTIC;
6934 		} else if (ifr->ifr_opportunistic.ifo_flags == 0) {
6935 			level = IFNET_THROTTLE_OFF;
6936 		} else {
6937 			result = EINVAL;
6938 		}
6939 
6940 		if (result == 0) {
6941 			result = ifnet_set_throttle(ifp, level);
6942 		}
6943 	} else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
6944 		ifr->ifr_opportunistic.ifo_flags = 0;
6945 		if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
6946 			ifr->ifr_opportunistic.ifo_flags |=
6947 			    IFRIFOF_BLOCK_OPPORTUNISTIC;
6948 		}
6949 	}
6950 
6951 	/*
6952 	 * Return the count of current opportunistic connections
6953 	 * over the interface.
6954 	 */
6955 	if (result == 0) {
6956 		uint32_t flags = 0;
6957 		flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
6958 		    INPCB_OPPORTUNISTIC_SETCMD : 0;
6959 		flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
6960 		    INPCB_OPPORTUNISTIC_THROTTLEON : 0;
6961 		ifr->ifr_opportunistic.ifo_inuse =
6962 		    udp_count_opportunistic(ifp->if_index, flags) +
6963 		    tcp_count_opportunistic(ifp->if_index, flags);
6964 	}
6965 
6966 	if (result == EALREADY) {
6967 		result = 0;
6968 	}
6969 
6970 	return result;
6971 }
6972 
6973 int
ifnet_get_throttle(struct ifnet * ifp,u_int32_t * level)6974 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
6975 {
6976 	struct ifclassq *ifq;
6977 	int err = 0;
6978 
6979 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
6980 		return ENXIO;
6981 	}
6982 
6983 	*level = IFNET_THROTTLE_OFF;
6984 
6985 	ifq = ifp->if_snd;
6986 	IFCQ_LOCK(ifq);
6987 	/* Throttling works only for IFCQ, not ALTQ instances */
6988 	if (IFCQ_IS_ENABLED(ifq)) {
6989 		cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
6990 
6991 		err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
6992 		*level = req.level;
6993 	}
6994 	IFCQ_UNLOCK(ifq);
6995 
6996 	return err;
6997 }
6998 
6999 int
ifnet_set_throttle(struct ifnet * ifp,u_int32_t level)7000 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7001 {
7002 	struct ifclassq *ifq;
7003 	int err = 0;
7004 
7005 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
7006 		return ENXIO;
7007 	}
7008 
7009 	ifq = ifp->if_snd;
7010 
7011 	switch (level) {
7012 	case IFNET_THROTTLE_OFF:
7013 	case IFNET_THROTTLE_OPPORTUNISTIC:
7014 		break;
7015 	default:
7016 		return EINVAL;
7017 	}
7018 
7019 	IFCQ_LOCK(ifq);
7020 	if (IFCQ_IS_ENABLED(ifq)) {
7021 		cqrq_throttle_t req = { 1, level };
7022 
7023 		err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
7024 	}
7025 	IFCQ_UNLOCK(ifq);
7026 
7027 	if (err == 0) {
7028 		DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
7029 		    level);
7030 #if NECP
7031 		necp_update_all_clients();
7032 #endif /* NECP */
7033 		if (level == IFNET_THROTTLE_OFF) {
7034 			ifnet_start(ifp);
7035 		}
7036 	}
7037 
7038 	return err;
7039 }
7040 
7041 errno_t
ifnet_getset_log(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)7042 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7043     struct proc *p)
7044 {
7045 #pragma unused(p)
7046 	errno_t result = 0;
7047 	uint32_t flags;
7048 	int level, category, subcategory;
7049 
7050 	VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7051 
7052 	if (cmd == SIOCSIFLOG) {
7053 		if ((result = priv_check_cred(kauth_cred_get(),
7054 		    PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
7055 			return result;
7056 		}
7057 
7058 		level = ifr->ifr_log.ifl_level;
7059 		if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
7060 			result = EINVAL;
7061 		}
7062 
7063 		flags = ifr->ifr_log.ifl_flags;
7064 		if ((flags &= IFNET_LOGF_MASK) == 0) {
7065 			result = EINVAL;
7066 		}
7067 
7068 		category = ifr->ifr_log.ifl_category;
7069 		subcategory = ifr->ifr_log.ifl_subcategory;
7070 
7071 		if (result == 0) {
7072 			result = ifnet_set_log(ifp, level, flags,
7073 			    category, subcategory);
7074 		}
7075 	} else {
7076 		result = ifnet_get_log(ifp, &level, &flags, &category,
7077 		    &subcategory);
7078 		if (result == 0) {
7079 			ifr->ifr_log.ifl_level = level;
7080 			ifr->ifr_log.ifl_flags = flags;
7081 			ifr->ifr_log.ifl_category = category;
7082 			ifr->ifr_log.ifl_subcategory = subcategory;
7083 		}
7084 	}
7085 
7086 	return result;
7087 }
7088 
7089 int
ifnet_set_log(struct ifnet * ifp,int32_t level,uint32_t flags,int32_t category,int32_t subcategory)7090 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7091     int32_t category, int32_t subcategory)
7092 {
7093 	int err = 0;
7094 
7095 	VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7096 	VERIFY(flags & IFNET_LOGF_MASK);
7097 
7098 	/*
7099 	 * The logging level applies to all facilities; make sure to
7100 	 * update them all with the most current level.
7101 	 */
7102 	flags |= ifp->if_log.flags;
7103 
7104 	if (ifp->if_output_ctl != NULL) {
7105 		struct ifnet_log_params l;
7106 
7107 		bzero(&l, sizeof(l));
7108 		l.level = level;
7109 		l.flags = flags;
7110 		l.flags &= ~IFNET_LOGF_DLIL;
7111 		l.category = category;
7112 		l.subcategory = subcategory;
7113 
7114 		/* Send this request to lower layers */
7115 		if (l.flags != 0) {
7116 			err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7117 			    sizeof(l), &l);
7118 		}
7119 	} else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7120 		/*
7121 		 * If targeted to the lower layers without an output
7122 		 * control callback registered on the interface, just
7123 		 * silently ignore facilities other than ours.
7124 		 */
7125 		flags &= IFNET_LOGF_DLIL;
7126 		if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
7127 			level = 0;
7128 		}
7129 	}
7130 
7131 	if (err == 0) {
7132 		if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
7133 			ifp->if_log.flags = 0;
7134 		} else {
7135 			ifp->if_log.flags |= flags;
7136 		}
7137 
7138 		log(LOG_INFO, "%s: logging level set to %d flags=0x%x "
7139 		    "arg=0x%x, category=%d subcategory=%d\n", if_name(ifp),
7140 		    ifp->if_log.level, ifp->if_log.flags, flags,
7141 		    category, subcategory);
7142 	}
7143 
7144 	return err;
7145 }
7146 
7147 int
ifnet_get_log(struct ifnet * ifp,int32_t * level,uint32_t * flags,int32_t * category,int32_t * subcategory)7148 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7149     int32_t *category, int32_t *subcategory)
7150 {
7151 	if (level != NULL) {
7152 		*level = ifp->if_log.level;
7153 	}
7154 	if (flags != NULL) {
7155 		*flags = ifp->if_log.flags;
7156 	}
7157 	if (category != NULL) {
7158 		*category = ifp->if_log.category;
7159 	}
7160 	if (subcategory != NULL) {
7161 		*subcategory = ifp->if_log.subcategory;
7162 	}
7163 
7164 	return 0;
7165 }
7166 
7167 int
ifnet_notify_address(struct ifnet * ifp,int af)7168 ifnet_notify_address(struct ifnet *ifp, int af)
7169 {
7170 	struct ifnet_notify_address_params na;
7171 
7172 #if PF
7173 	(void) pf_ifaddr_hook(ifp);
7174 #endif /* PF */
7175 
7176 	if (ifp->if_output_ctl == NULL) {
7177 		return EOPNOTSUPP;
7178 	}
7179 
7180 	bzero(&na, sizeof(na));
7181 	na.address_family = (sa_family_t)af;
7182 
7183 	return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7184 	           sizeof(na), &na);
7185 }
7186 
7187 errno_t
ifnet_flowid(struct ifnet * ifp,uint32_t * flowid)7188 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7189 {
7190 	if (ifp == NULL || flowid == NULL) {
7191 		return EINVAL;
7192 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7193 	    !IF_FULLY_ATTACHED(ifp)) {
7194 		return ENXIO;
7195 	}
7196 
7197 	*flowid = ifp->if_flowhash;
7198 
7199 	return 0;
7200 }
7201 
7202 errno_t
ifnet_disable_output(struct ifnet * ifp)7203 ifnet_disable_output(struct ifnet *ifp)
7204 {
7205 	int err = 0;
7206 
7207 	if (ifp == NULL) {
7208 		return EINVAL;
7209 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7210 	    !IF_FULLY_ATTACHED(ifp)) {
7211 		return ENXIO;
7212 	}
7213 
7214 	lck_mtx_lock(&ifp->if_start_lock);
7215 	if (ifp->if_start_flags & IFSF_FLOW_RESUME_PENDING) {
7216 		ifp->if_start_flags &= ~(IFSF_FLOW_RESUME_PENDING | IFSF_FLOW_CONTROLLED);
7217 	} else if ((err = ifnet_fc_add(ifp)) == 0) {
7218 		ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7219 	}
7220 	lck_mtx_unlock(&ifp->if_start_lock);
7221 
7222 	return err;
7223 }
7224 
7225 errno_t
ifnet_enable_output(struct ifnet * ifp)7226 ifnet_enable_output(struct ifnet *ifp)
7227 {
7228 	if (ifp == NULL) {
7229 		return EINVAL;
7230 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7231 	    !IF_FULLY_ATTACHED(ifp)) {
7232 		return ENXIO;
7233 	}
7234 
7235 	ifnet_start_common(ifp, TRUE, FALSE);
7236 	return 0;
7237 }
7238 
7239 void
ifnet_flowadv(uint32_t flowhash)7240 ifnet_flowadv(uint32_t flowhash)
7241 {
7242 	struct ifnet_fc_entry *ifce;
7243 	ifnet_ref_t ifp;
7244 
7245 	ifce = ifnet_fc_get(flowhash);
7246 	if (ifce == NULL) {
7247 		return;
7248 	}
7249 
7250 	VERIFY(ifce->ifce_ifp != NULL);
7251 	ifp = ifce->ifce_ifp;
7252 
7253 	/* flow hash gets recalculated per attach, so check */
7254 	if (ifnet_is_attached(ifp, 1)) {
7255 		if (ifp->if_flowhash == flowhash) {
7256 			lck_mtx_lock_spin(&ifp->if_start_lock);
7257 			if ((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) == 0) {
7258 				ifp->if_start_flags |= IFSF_FLOW_RESUME_PENDING;
7259 			}
7260 			lck_mtx_unlock(&ifp->if_start_lock);
7261 			(void) ifnet_enable_output(ifp);
7262 		}
7263 		ifnet_decr_iorefcnt(ifp);
7264 	}
7265 	ifnet_fc_entry_free(ifce);
7266 }
7267 
7268 /*
7269  * Function to compare ifnet_fc_entries in ifnet flow control tree
7270  */
7271 static inline int
ifce_cmp(const struct ifnet_fc_entry * fc1,const struct ifnet_fc_entry * fc2)7272 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7273 {
7274 	return fc1->ifce_flowhash - fc2->ifce_flowhash;
7275 }
7276 
7277 static int
ifnet_fc_add(struct ifnet * ifp)7278 ifnet_fc_add(struct ifnet *ifp)
7279 {
7280 	struct ifnet_fc_entry keyfc, *ifce;
7281 	uint32_t flowhash;
7282 
7283 	VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7284 	VERIFY(ifp->if_flowhash != 0);
7285 	flowhash = ifp->if_flowhash;
7286 
7287 	bzero(&keyfc, sizeof(keyfc));
7288 	keyfc.ifce_flowhash = flowhash;
7289 
7290 	lck_mtx_lock_spin(&ifnet_fc_lock);
7291 	ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7292 	if (ifce != NULL && ifce->ifce_ifp == ifp) {
7293 		/* Entry is already in ifnet_fc_tree, return */
7294 		lck_mtx_unlock(&ifnet_fc_lock);
7295 		return 0;
7296 	}
7297 
7298 	if (ifce != NULL) {
7299 		/*
7300 		 * There is a different fc entry with the same flow hash
7301 		 * but different ifp pointer.  There can be a collision
7302 		 * on flow hash but the probability is low.  Let's just
7303 		 * avoid adding a second one when there is a collision.
7304 		 */
7305 		lck_mtx_unlock(&ifnet_fc_lock);
7306 		return EAGAIN;
7307 	}
7308 
7309 	/* become regular mutex */
7310 	lck_mtx_convert_spin(&ifnet_fc_lock);
7311 
7312 	ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
7313 	ifce->ifce_flowhash = flowhash;
7314 	ifce->ifce_ifp = ifp;
7315 
7316 	RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7317 	lck_mtx_unlock(&ifnet_fc_lock);
7318 	return 0;
7319 }
7320 
7321 static struct ifnet_fc_entry *
ifnet_fc_get(uint32_t flowhash)7322 ifnet_fc_get(uint32_t flowhash)
7323 {
7324 	struct ifnet_fc_entry keyfc, *ifce;
7325 	ifnet_ref_t ifp;
7326 
7327 	bzero(&keyfc, sizeof(keyfc));
7328 	keyfc.ifce_flowhash = flowhash;
7329 
7330 	lck_mtx_lock_spin(&ifnet_fc_lock);
7331 	ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7332 	if (ifce == NULL) {
7333 		/* Entry is not present in ifnet_fc_tree, return */
7334 		lck_mtx_unlock(&ifnet_fc_lock);
7335 		return NULL;
7336 	}
7337 
7338 	RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7339 
7340 	VERIFY(ifce->ifce_ifp != NULL);
7341 	ifp = ifce->ifce_ifp;
7342 
7343 	/* become regular mutex */
7344 	lck_mtx_convert_spin(&ifnet_fc_lock);
7345 
7346 	if (!ifnet_is_attached(ifp, 0)) {
7347 		/*
7348 		 * This ifp is not attached or in the process of being
7349 		 * detached; just don't process it.
7350 		 */
7351 		ifnet_fc_entry_free(ifce);
7352 		ifce = NULL;
7353 	}
7354 	lck_mtx_unlock(&ifnet_fc_lock);
7355 
7356 	return ifce;
7357 }
7358 
7359 static void
ifnet_fc_entry_free(struct ifnet_fc_entry * ifce)7360 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7361 {
7362 	zfree(ifnet_fc_zone, ifce);
7363 }
7364 
7365 static uint32_t
ifnet_calc_flowhash(struct ifnet * ifp)7366 ifnet_calc_flowhash(struct ifnet *ifp)
7367 {
7368 	struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7369 	uint32_t flowhash = 0;
7370 
7371 	if (ifnet_flowhash_seed == 0) {
7372 		ifnet_flowhash_seed = RandomULong();
7373 	}
7374 
7375 	bzero(&fh, sizeof(fh));
7376 
7377 	(void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
7378 	fh.ifk_unit = ifp->if_unit;
7379 	fh.ifk_flags = ifp->if_flags;
7380 	fh.ifk_eflags = ifp->if_eflags;
7381 	fh.ifk_capabilities = ifp->if_capabilities;
7382 	fh.ifk_capenable = ifp->if_capenable;
7383 	fh.ifk_output_sched_model = ifp->if_output_sched_model;
7384 	fh.ifk_rand1 = RandomULong();
7385 	fh.ifk_rand2 = RandomULong();
7386 
7387 try_again:
7388 	flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
7389 	if (flowhash == 0) {
7390 		/* try to get a non-zero flowhash */
7391 		ifnet_flowhash_seed = RandomULong();
7392 		goto try_again;
7393 	}
7394 
7395 	return flowhash;
7396 }
7397 
7398 int
ifnet_set_netsignature(struct ifnet * ifp,uint8_t family,uint8_t len,uint16_t flags,uint8_t * __sized_by (len)data)7399 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7400     uint16_t flags, uint8_t *__sized_by(len) data)
7401 {
7402 #pragma unused(flags)
7403 	int error = 0;
7404 
7405 	switch (family) {
7406 	case AF_INET:
7407 		if_inetdata_lock_exclusive(ifp);
7408 		if (IN_IFEXTRA(ifp) != NULL) {
7409 			if (len == 0) {
7410 				/* Allow clearing the signature */
7411 				IN_IFEXTRA(ifp)->netsig_len = 0;
7412 				bzero(IN_IFEXTRA(ifp)->netsig,
7413 				    sizeof(IN_IFEXTRA(ifp)->netsig));
7414 				if_inetdata_lock_done(ifp);
7415 				break;
7416 			} else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
7417 				error = EINVAL;
7418 				if_inetdata_lock_done(ifp);
7419 				break;
7420 			}
7421 			IN_IFEXTRA(ifp)->netsig_len = len;
7422 			bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7423 		} else {
7424 			error = ENOMEM;
7425 		}
7426 		if_inetdata_lock_done(ifp);
7427 		break;
7428 
7429 	case AF_INET6:
7430 		if_inet6data_lock_exclusive(ifp);
7431 		if (IN6_IFEXTRA(ifp) != NULL) {
7432 			if (len == 0) {
7433 				/* Allow clearing the signature */
7434 				IN6_IFEXTRA(ifp)->netsig_len = 0;
7435 				bzero(IN6_IFEXTRA(ifp)->netsig,
7436 				    sizeof(IN6_IFEXTRA(ifp)->netsig));
7437 				if_inet6data_lock_done(ifp);
7438 				break;
7439 			} else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
7440 				error = EINVAL;
7441 				if_inet6data_lock_done(ifp);
7442 				break;
7443 			}
7444 			IN6_IFEXTRA(ifp)->netsig_len = len;
7445 			bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7446 		} else {
7447 			error = ENOMEM;
7448 		}
7449 		if_inet6data_lock_done(ifp);
7450 		break;
7451 
7452 	default:
7453 		error = EINVAL;
7454 		break;
7455 	}
7456 
7457 	return error;
7458 }
7459 
7460 int
ifnet_get_netsignature(struct ifnet * ifp,uint8_t family,uint8_t * len,uint16_t * flags,uint8_t * __sized_by (* len)data)7461 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7462     uint16_t *flags, uint8_t *__sized_by(*len) data)
7463 {
7464 	int error = 0;
7465 
7466 	if (ifp == NULL || len == NULL || data == NULL) {
7467 		return EINVAL;
7468 	}
7469 
7470 	switch (family) {
7471 	case AF_INET:
7472 		if_inetdata_lock_shared(ifp);
7473 		if (IN_IFEXTRA(ifp) != NULL) {
7474 			if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7475 				error = EINVAL;
7476 				if_inetdata_lock_done(ifp);
7477 				break;
7478 			}
7479 			if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
7480 				bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7481 			} else {
7482 				error = ENOENT;
7483 			}
7484 		} else {
7485 			error = ENOMEM;
7486 		}
7487 		if_inetdata_lock_done(ifp);
7488 		break;
7489 
7490 	case AF_INET6:
7491 		if_inet6data_lock_shared(ifp);
7492 		if (IN6_IFEXTRA(ifp) != NULL) {
7493 			if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7494 				error = EINVAL;
7495 				if_inet6data_lock_done(ifp);
7496 				break;
7497 			}
7498 			if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
7499 				bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7500 			} else {
7501 				error = ENOENT;
7502 			}
7503 		} else {
7504 			error = ENOMEM;
7505 		}
7506 		if_inet6data_lock_done(ifp);
7507 		break;
7508 
7509 	default:
7510 		error = EINVAL;
7511 		break;
7512 	}
7513 
7514 	if (error == 0 && flags != NULL) {
7515 		*flags = 0;
7516 	}
7517 
7518 	return error;
7519 }
7520 
7521 int
ifnet_set_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7522 ifnet_set_nat64prefix(struct ifnet *ifp,
7523     struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7524 {
7525 	int i, error = 0, one_set = 0;
7526 
7527 	if_inet6data_lock_exclusive(ifp);
7528 
7529 	if (IN6_IFEXTRA(ifp) == NULL) {
7530 		error = ENOMEM;
7531 		goto out;
7532 	}
7533 
7534 	for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7535 		uint32_t prefix_len =
7536 		    prefixes[i].prefix_len;
7537 		struct in6_addr *prefix =
7538 		    &prefixes[i].ipv6_prefix;
7539 
7540 		if (prefix_len == 0) {
7541 			clat_log0((LOG_DEBUG,
7542 			    "NAT64 prefixes purged from Interface %s\n",
7543 			    if_name(ifp)));
7544 			/* Allow clearing the signature */
7545 			IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
7546 			bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7547 			    sizeof(struct in6_addr));
7548 
7549 			continue;
7550 		} else if (prefix_len != NAT64_PREFIX_LEN_32 &&
7551 		    prefix_len != NAT64_PREFIX_LEN_40 &&
7552 		    prefix_len != NAT64_PREFIX_LEN_48 &&
7553 		    prefix_len != NAT64_PREFIX_LEN_56 &&
7554 		    prefix_len != NAT64_PREFIX_LEN_64 &&
7555 		    prefix_len != NAT64_PREFIX_LEN_96) {
7556 			clat_log0((LOG_DEBUG,
7557 			    "NAT64 prefixlen is incorrect %d\n", prefix_len));
7558 			error = EINVAL;
7559 			goto out;
7560 		}
7561 
7562 		if (IN6_IS_SCOPE_EMBED(prefix)) {
7563 			clat_log0((LOG_DEBUG,
7564 			    "NAT64 prefix has interface/link local scope.\n"));
7565 			error = EINVAL;
7566 			goto out;
7567 		}
7568 
7569 		IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
7570 		bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7571 		    sizeof(struct in6_addr));
7572 		clat_log0((LOG_DEBUG,
7573 		    "NAT64 prefix set to %s with prefixlen: %d\n",
7574 		    ip6_sprintf(prefix), prefix_len));
7575 		one_set = 1;
7576 	}
7577 
7578 out:
7579 	if_inet6data_lock_done(ifp);
7580 
7581 	if (error == 0 && one_set != 0) {
7582 		necp_update_all_clients();
7583 	}
7584 
7585 	return error;
7586 }
7587 
7588 int
ifnet_get_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7589 ifnet_get_nat64prefix(struct ifnet *ifp,
7590     struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7591 {
7592 	int i, found_one = 0, error = 0;
7593 
7594 	if (ifp == NULL) {
7595 		return EINVAL;
7596 	}
7597 
7598 	if_inet6data_lock_shared(ifp);
7599 
7600 	if (IN6_IFEXTRA(ifp) == NULL) {
7601 		error = ENOMEM;
7602 		goto out;
7603 	}
7604 
7605 	for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7606 		if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
7607 			found_one = 1;
7608 		}
7609 	}
7610 
7611 	if (found_one == 0) {
7612 		error = ENOENT;
7613 		goto out;
7614 	}
7615 
7616 	if (prefixes) {
7617 		bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
7618 		    sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
7619 	}
7620 
7621 out:
7622 	if_inet6data_lock_done(ifp);
7623 
7624 	return error;
7625 }
7626 
7627 #if DEBUG || DEVELOPMENT
7628 /* Blob for sum16 verification */
7629 static uint8_t sumdata[] = {
7630 	0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7631 	0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7632 	0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7633 	0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7634 	0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7635 	0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7636 	0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7637 	0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7638 	0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7639 	0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7640 	0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7641 	0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7642 	0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7643 	0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7644 	0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7645 	0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7646 	0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7647 	0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7648 	0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7649 	0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7650 	0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7651 	0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7652 	0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7653 	0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7654 	0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7655 	0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7656 	0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7657 	0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7658 	0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7659 	0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7660 	0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7661 	0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7662 	0xc8, 0x28, 0x02, 0x00, 0x00
7663 };
7664 
7665 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7666 static struct {
7667 	boolean_t       init;
7668 	uint16_t        len;
7669 	uint16_t        sumr;   /* reference */
7670 	uint16_t        sumrp;  /* reference, precomputed */
7671 } sumtbl[] = {
7672 	{ FALSE, 0, 0, 0x0000 },
7673 	{ FALSE, 1, 0, 0x001f },
7674 	{ FALSE, 2, 0, 0x8b1f },
7675 	{ FALSE, 3, 0, 0x8b27 },
7676 	{ FALSE, 7, 0, 0x790e },
7677 	{ FALSE, 11, 0, 0xcb6d },
7678 	{ FALSE, 20, 0, 0x20dd },
7679 	{ FALSE, 27, 0, 0xbabd },
7680 	{ FALSE, 32, 0, 0xf3e8 },
7681 	{ FALSE, 37, 0, 0x197d },
7682 	{ FALSE, 43, 0, 0x9eae },
7683 	{ FALSE, 64, 0, 0x4678 },
7684 	{ FALSE, 127, 0, 0x9399 },
7685 	{ FALSE, 256, 0, 0xd147 },
7686 	{ FALSE, 325, 0, 0x0358 },
7687 };
7688 #define SUMTBL_MAX      ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7689 
7690 static void
dlil_verify_sum16(void)7691 dlil_verify_sum16(void)
7692 {
7693 	struct mbuf *m;
7694 	uint8_t *buf;
7695 	int n;
7696 
7697 	/* Make sure test data plus extra room for alignment fits in cluster */
7698 	_CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
7699 
7700 	kprintf("DLIL: running SUM16 self-tests ... ");
7701 
7702 	m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7703 	m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
7704 
7705 	buf = mtod(m, uint8_t *);               /* base address */
7706 
7707 	for (n = 0; n < SUMTBL_MAX; n++) {
7708 		uint16_t len = sumtbl[n].len;
7709 		int i;
7710 
7711 		/* Verify for all possible alignments */
7712 		for (i = 0; i < (int)sizeof(uint64_t); i++) {
7713 			uint16_t sum, sumr;
7714 			uint8_t *c;
7715 
7716 			/* Copy over test data to mbuf */
7717 			VERIFY(len <= sizeof(sumdata));
7718 			c = buf + i;
7719 			bcopy(sumdata, c, len);
7720 
7721 			/* Zero-offset test (align by data pointer) */
7722 			m->m_data = (uintptr_t)c;
7723 			m->m_len = len;
7724 			sum = m_sum16(m, 0, len);
7725 
7726 			if (!sumtbl[n].init) {
7727 				sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
7728 				sumtbl[n].sumr = sumr;
7729 				sumtbl[n].init = TRUE;
7730 			} else {
7731 				sumr = sumtbl[n].sumr;
7732 			}
7733 
7734 			/* Something is horribly broken; stop now */
7735 			if (sumr != sumtbl[n].sumrp) {
7736 				panic_plain("\n%s: broken in_cksum_mbuf_ref() "
7737 				    "for len=%d align=%d sum=0x%04x "
7738 				    "[expected=0x%04x]\n", __func__,
7739 				    len, i, sum, sumr);
7740 				/* NOTREACHED */
7741 			} else if (sum != sumr) {
7742 				panic_plain("\n%s: broken m_sum16() for len=%d "
7743 				    "align=%d sum=0x%04x [expected=0x%04x]\n",
7744 				    __func__, len, i, sum, sumr);
7745 				/* NOTREACHED */
7746 			}
7747 
7748 			/* Alignment test by offset (fixed data pointer) */
7749 			m->m_data = (uintptr_t)buf;
7750 			m->m_len = i + len;
7751 			sum = m_sum16(m, i, len);
7752 
7753 			/* Something is horribly broken; stop now */
7754 			if (sum != sumr) {
7755 				panic_plain("\n%s: broken m_sum16() for len=%d "
7756 				    "offset=%d sum=0x%04x [expected=0x%04x]\n",
7757 				    __func__, len, i, sum, sumr);
7758 				/* NOTREACHED */
7759 			}
7760 #if INET
7761 			/* Simple sum16 contiguous buffer test by aligment */
7762 			sum = b_sum16(c, len);
7763 
7764 			/* Something is horribly broken; stop now */
7765 			if (sum != sumr) {
7766 				panic_plain("\n%s: broken b_sum16() for len=%d "
7767 				    "align=%d sum=0x%04x [expected=0x%04x]\n",
7768 				    __func__, len, i, sum, sumr);
7769 				/* NOTREACHED */
7770 			}
7771 #endif /* INET */
7772 		}
7773 	}
7774 	m_freem(m);
7775 
7776 	kprintf("PASSED\n");
7777 }
7778 #endif /* DEBUG || DEVELOPMENT */
7779 
7780 #define CASE_STRINGIFY(x) case x: return #x
7781 
7782 __private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)7783 dlil_kev_dl_code_str(u_int32_t event_code)
7784 {
7785 	switch (event_code) {
7786 		CASE_STRINGIFY(KEV_DL_SIFFLAGS);
7787 		CASE_STRINGIFY(KEV_DL_SIFMETRICS);
7788 		CASE_STRINGIFY(KEV_DL_SIFMTU);
7789 		CASE_STRINGIFY(KEV_DL_SIFPHYS);
7790 		CASE_STRINGIFY(KEV_DL_SIFMEDIA);
7791 		CASE_STRINGIFY(KEV_DL_SIFGENERIC);
7792 		CASE_STRINGIFY(KEV_DL_ADDMULTI);
7793 		CASE_STRINGIFY(KEV_DL_DELMULTI);
7794 		CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
7795 		CASE_STRINGIFY(KEV_DL_IF_DETACHING);
7796 		CASE_STRINGIFY(KEV_DL_IF_DETACHED);
7797 		CASE_STRINGIFY(KEV_DL_LINK_OFF);
7798 		CASE_STRINGIFY(KEV_DL_LINK_ON);
7799 		CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
7800 		CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
7801 		CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
7802 		CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
7803 		CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
7804 		CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
7805 		CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
7806 		CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
7807 		CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
7808 		CASE_STRINGIFY(KEV_DL_PRIMARY_ELECTED);
7809 		CASE_STRINGIFY(KEV_DL_ISSUES);
7810 		CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
7811 	default:
7812 		break;
7813 	}
7814 	return "";
7815 }
7816 
7817 void
dlil_dt_tcall_fn(thread_call_param_t arg0,thread_call_param_t arg1)7818 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
7819 {
7820 #pragma unused(arg1)
7821 	ifnet_ref_t ifp = arg0;
7822 
7823 	if (ifnet_is_attached(ifp, 1)) {
7824 		nstat_ifnet_threshold_reached(ifp->if_index);
7825 		ifnet_decr_iorefcnt(ifp);
7826 	}
7827 }
7828 
7829 void
ifnet_notify_data_threshold(struct ifnet * ifp)7830 ifnet_notify_data_threshold(struct ifnet *ifp)
7831 {
7832 	uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
7833 	uint64_t oldbytes = ifp->if_dt_bytes;
7834 
7835 	ASSERT(ifp->if_dt_tcall != NULL);
7836 
7837 	/*
7838 	 * If we went over the threshold, notify NetworkStatistics.
7839 	 * We rate-limit it based on the threshold interval value.
7840 	 */
7841 	if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
7842 	    OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
7843 	    !thread_call_isactive(ifp->if_dt_tcall)) {
7844 		uint64_t tival = (threshold_interval * NSEC_PER_SEC);
7845 		uint64_t now = mach_absolute_time(), deadline = now;
7846 		uint64_t ival;
7847 
7848 		if (tival != 0) {
7849 			nanoseconds_to_absolutetime(tival, &ival);
7850 			clock_deadline_for_periodic_event(ival, now, &deadline);
7851 			(void) thread_call_enter_delayed(ifp->if_dt_tcall,
7852 			    deadline);
7853 		} else {
7854 			(void) thread_call_enter(ifp->if_dt_tcall);
7855 		}
7856 	}
7857 }
7858 
7859 
7860 void
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow * ifs,struct ifnet * ifp)7861 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
7862     struct ifnet *ifp)
7863 {
7864 	tcp_update_stats_per_flow(ifs, ifp);
7865 }
7866 
7867 static inline u_int32_t
_set_flags(u_int32_t * flags_p,u_int32_t set_flags)7868 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
7869 {
7870 	return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
7871 }
7872 
7873 static inline u_int32_t
_clear_flags(u_int32_t * flags_p,u_int32_t clear_flags)7874 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
7875 {
7876 	return (u_int32_t)OSBitAndAtomic(~clear_flags, flags_p);
7877 }
7878 
7879 __private_extern__ u_int32_t
if_set_eflags(ifnet_t interface,u_int32_t set_flags)7880 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
7881 {
7882 	return _set_flags(&interface->if_eflags, set_flags);
7883 }
7884 
7885 __private_extern__ void
if_clear_eflags(ifnet_t interface,u_int32_t clear_flags)7886 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
7887 {
7888 	_clear_flags(&interface->if_eflags, clear_flags);
7889 }
7890 
7891 __private_extern__ u_int32_t
if_set_xflags(ifnet_t interface,u_int32_t set_flags)7892 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
7893 {
7894 	return _set_flags(&interface->if_xflags, set_flags);
7895 }
7896 
7897 __private_extern__ u_int32_t
if_clear_xflags(ifnet_t interface,u_int32_t clear_flags)7898 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
7899 {
7900 	return _clear_flags(&interface->if_xflags, clear_flags);
7901 }
7902 
7903 __private_extern__ void
ifnet_update_traffic_rule_genid(ifnet_t ifp)7904 ifnet_update_traffic_rule_genid(ifnet_t ifp)
7905 {
7906 	os_atomic_inc(&ifp->if_traffic_rule_genid, relaxed);
7907 }
7908 
7909 __private_extern__ boolean_t
ifnet_sync_traffic_rule_genid(ifnet_t ifp,uint32_t * genid)7910 ifnet_sync_traffic_rule_genid(ifnet_t ifp, uint32_t *genid)
7911 {
7912 	if (*genid != ifp->if_traffic_rule_genid) {
7913 		*genid = ifp->if_traffic_rule_genid;
7914 		return TRUE;
7915 	}
7916 	return FALSE;
7917 }
7918 __private_extern__ void
ifnet_update_traffic_rule_count(ifnet_t ifp,uint32_t count)7919 ifnet_update_traffic_rule_count(ifnet_t ifp, uint32_t count)
7920 {
7921 	os_atomic_store(&ifp->if_traffic_rule_count, count, release);
7922 	ifnet_update_traffic_rule_genid(ifp);
7923 }
7924 
7925 
7926 #if SKYWALK
7927 static bool
net_check_compatible_if_filter(struct ifnet * ifp)7928 net_check_compatible_if_filter(struct ifnet *ifp)
7929 {
7930 	if (ifp == NULL) {
7931 		if (net_api_stats.nas_iflt_attach_count > net_api_stats.nas_iflt_attach_os_count) {
7932 			return false;
7933 		}
7934 	} else {
7935 		if (ifp->if_flt_non_os_count > 0) {
7936 			return false;
7937 		}
7938 	}
7939 	return true;
7940 }
7941 #endif /* SKYWALK */
7942 
7943 #define DUMP_BUF_CHK() {        \
7944 	clen -= k;              \
7945 	if (clen < 1)           \
7946 	        goto done;      \
7947 	c += k;                 \
7948 }
7949 
7950 int dlil_dump_top_if_qlen(char *__counted_by(str_len), int str_len);
7951 int
dlil_dump_top_if_qlen(char * __counted_by (str_len)str,int str_len)7952 dlil_dump_top_if_qlen(char *__counted_by(str_len) str, int str_len)
7953 {
7954 	char *c = str;
7955 	int k, clen = str_len;
7956 	ifnet_ref_t top_ifcq_ifp = NULL;
7957 	uint32_t top_ifcq_len = 0;
7958 	ifnet_ref_t top_inq_ifp = NULL;
7959 	uint32_t top_inq_len = 0;
7960 
7961 	for (int ifidx = 1; ifidx < if_index; ifidx++) {
7962 		ifnet_ref_t ifp = ifindex2ifnet[ifidx];
7963 		struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7964 
7965 		if (ifp == NULL) {
7966 			continue;
7967 		}
7968 		if (ifp->if_snd != NULL && ifp->if_snd->ifcq_len > top_ifcq_len) {
7969 			top_ifcq_len = ifp->if_snd->ifcq_len;
7970 			top_ifcq_ifp = ifp;
7971 		}
7972 		if (dl_if->dl_if_inpstorage.dlth_pkts.qlen > top_inq_len) {
7973 			top_inq_len = dl_if->dl_if_inpstorage.dlth_pkts.qlen;
7974 			top_inq_ifp = ifp;
7975 		}
7976 	}
7977 
7978 	if (top_ifcq_ifp != NULL) {
7979 		k = scnprintf(c, clen, "\ntop ifcq_len %u packets by %s\n",
7980 		    top_ifcq_len, top_ifcq_ifp->if_xname);
7981 		DUMP_BUF_CHK();
7982 	}
7983 	if (top_inq_ifp != NULL) {
7984 		k = scnprintf(c, clen, "\ntop inq_len %u packets by %s\n",
7985 		    top_inq_len, top_inq_ifp->if_xname);
7986 		DUMP_BUF_CHK();
7987 	}
7988 done:
7989 	return str_len - clen;
7990 }
7991