xref: /xnu-11417.101.15/bsd/net/dlil.c (revision e3723e1f17661b24996789d8afc084c0c3303b26)
1 /*
2  * Copyright (c) 1999-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30  * support for mandatory and extensible security protections.  This notice
31  * is included in support of clause 2.2 (b) of the Apple Public License,
32  * Version 2.0.
33  */
34 #include <stddef.h>
35 #include <ptrauth.h>
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/dlil_sysctl.h>
53 #include <net/dlil_var_private.h>
54 #include <net/if_arp.h>
55 #include <net/iptap.h>
56 #include <net/pktap.h>
57 #include <net/droptap.h>
58 #include <net/nwk_wq.h>
59 #include <sys/kern_event.h>
60 #include <sys/kdebug.h>
61 #include <sys/mcache.h>
62 #include <sys/syslog.h>
63 #include <sys/protosw.h>
64 #include <sys/priv.h>
65 
66 #include <kern/assert.h>
67 #include <kern/task.h>
68 #include <kern/thread.h>
69 #include <kern/sched_prim.h>
70 #include <kern/locks.h>
71 #include <kern/zalloc.h>
72 
73 #include <net/kpi_protocol.h>
74 #include <net/kpi_interface.h>
75 #include <net/if_types.h>
76 #include <net/if_ipsec.h>
77 #include <net/if_llreach.h>
78 #include <net/if_utun.h>
79 #include <net/kpi_interfacefilter.h>
80 #include <net/classq/classq.h>
81 #include <net/classq/classq_sfb.h>
82 #include <net/flowhash.h>
83 #include <net/ntstat.h>
84 #if SKYWALK
85 #include <skywalk/lib/net_filter_event.h>
86 #endif /* SKYWALK */
87 #include <net/net_api_stats.h>
88 #include <net/if_ports_used.h>
89 #include <net/if_vlan_var.h>
90 #include <netinet/in.h>
91 #if INET
92 #include <netinet/in_var.h>
93 #include <netinet/igmp_var.h>
94 #include <netinet/ip_var.h>
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_var.h>
97 #include <netinet/udp.h>
98 #include <netinet/udp_var.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/in_pcb.h>
101 #include <netinet/in_tclass.h>
102 #include <netinet/ip.h>
103 #include <netinet/ip_icmp.h>
104 #include <netinet/icmp_var.h>
105 #endif /* INET */
106 
107 #include <net/nat464_utils.h>
108 #include <netinet6/in6_var.h>
109 #include <netinet6/nd6.h>
110 #include <netinet6/mld6_var.h>
111 #include <netinet6/scope6_var.h>
112 #include <netinet/ip6.h>
113 #include <netinet/icmp6.h>
114 #include <net/pf_pbuf.h>
115 #include <libkern/OSAtomic.h>
116 #include <libkern/tree.h>
117 
118 #include <dev/random/randomdev.h>
119 #include <machine/machine_routines.h>
120 
121 #include <mach/thread_act.h>
122 #include <mach/sdt.h>
123 
124 #if CONFIG_MACF
125 #include <sys/kauth.h>
126 #include <security/mac_framework.h>
127 #include <net/ethernet.h>
128 #include <net/firewire.h>
129 #endif
130 
131 #if PF
132 #include <net/pfvar.h>
133 #endif /* PF */
134 #include <net/pktsched/pktsched.h>
135 #include <net/pktsched/pktsched_netem.h>
136 
137 #if NECP
138 #include <net/necp.h>
139 #endif /* NECP */
140 
141 #if SKYWALK
142 #include <skywalk/packet/packet_queue.h>
143 #include <skywalk/nexus/netif/nx_netif.h>
144 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
145 #endif /* SKYWALK */
146 
147 #include <net/sockaddr_utils.h>
148 
149 #include <os/log.h>
150 
151 uint64_t if_creation_generation_count = 0;
152 
153 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
154 
155 dlil_ifnet_queue_t dlil_ifnet_head;
156 
157 #if DEBUG
158 unsigned int ifnet_debug = 1;    /* debugging (enabled) */
159 #else
160 unsigned int ifnet_debug;        /* debugging (disabled) */
161 #endif /* !DEBUG */
162 
163 
164 static u_int32_t net_rtref;
165 
166 static struct dlil_main_threading_info dlil_main_input_thread_info;
167 struct dlil_threading_info *__single dlil_main_input_thread;
168 
169 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
170 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
171 
172 static int ifnet_lookup(struct ifnet *);
173 static void if_purgeaddrs(struct ifnet *);
174 
175 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
176     struct mbuf *, char *);
177 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
178     struct mbuf *);
179 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
180     mbuf_t *, const struct sockaddr *, void *,
181     IFNET_FRAME_TYPE_RW_T, IFNET_LLADDR_RW_T);
182 static void ifproto_media_event(struct ifnet *, protocol_family_t,
183     const struct kev_msg *);
184 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
185     unsigned long, void *);
186 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
187     struct sockaddr_dl *, size_t);
188 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
189     const struct sockaddr_dl *, const struct sockaddr *,
190     const struct sockaddr_dl *, const struct sockaddr *);
191 
192 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
193     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
194     boolean_t poll, struct thread *tp);
195 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
196     struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
197 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
198 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
199     protocol_family_t *);
200 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
201     const struct ifnet_demux_desc *, u_int32_t);
202 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
203 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
204 #if !XNU_TARGET_OS_OSX
205 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
206     const struct sockaddr *, IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
207     u_int32_t *, u_int32_t *);
208 #else /* XNU_TARGET_OS_OSX */
209 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
210     const struct sockaddr *,
211     IFNET_LLADDR_T, IFNET_FRAME_TYPE_T);
212 #endif /* XNU_TARGET_OS_OSX */
213 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
214     const struct sockaddr *,
215     IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
216     u_int32_t *, u_int32_t *);
217 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
218 static void ifp_if_free(struct ifnet *);
219 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
220 
221 
222 
223 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
224     const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
225 #if DEBUG || DEVELOPMENT
226 static void dlil_verify_sum16(void);
227 #endif /* DEBUG || DEVELOPMENT */
228 
229 
230 static void ifnet_detacher_thread_func(void *, wait_result_t);
231 static void ifnet_detacher_thread_cont(void *, wait_result_t);
232 static void ifnet_detach_final(struct ifnet *);
233 static void ifnet_detaching_enqueue(struct ifnet *);
234 static struct ifnet *ifnet_detaching_dequeue(void);
235 
236 static void ifnet_start_thread_func(void *, wait_result_t);
237 static void ifnet_start_thread_cont(void *, wait_result_t);
238 
239 static void ifnet_poll_thread_func(void *, wait_result_t);
240 static void ifnet_poll_thread_cont(void *, wait_result_t);
241 
242 static errno_t ifnet_enqueue_common(struct ifnet *, struct ifclassq *,
243     classq_pkt_t *, boolean_t, boolean_t *);
244 
245 static void ifp_src_route_copyout(struct ifnet *, struct route *);
246 static void ifp_src_route_copyin(struct ifnet *, struct route *);
247 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
248 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
249 
250 
251 /* The following are protected by dlil_ifnet_lock */
252 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
253 static u_int32_t ifnet_detaching_cnt;
254 static boolean_t ifnet_detaching_embryonic;
255 static void *ifnet_delayed_run; /* wait channel for detaching thread */
256 
257 static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group,
258     &dlil_lck_attributes);
259 
260 static uint32_t ifnet_flowhash_seed;
261 
262 struct ifnet_flowhash_key {
263 	char            ifk_name[IFNAMSIZ];
264 	uint32_t        ifk_unit;
265 	uint32_t        ifk_flags;
266 	uint32_t        ifk_eflags;
267 	uint32_t        ifk_capabilities;
268 	uint32_t        ifk_capenable;
269 	uint32_t        ifk_output_sched_model;
270 	uint32_t        ifk_rand1;
271 	uint32_t        ifk_rand2;
272 };
273 
274 /* Flow control entry per interface */
275 struct ifnet_fc_entry {
276 	RB_ENTRY(ifnet_fc_entry) ifce_entry;
277 	u_int32_t       ifce_flowhash;
278 	ifnet_ref_t     ifce_ifp;
279 };
280 
281 static uint32_t ifnet_calc_flowhash(struct ifnet *);
282 static int ifce_cmp(const struct ifnet_fc_entry *,
283     const struct ifnet_fc_entry *);
284 static int ifnet_fc_add(struct ifnet *);
285 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
286 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
287 
288 /* protected by ifnet_fc_lock */
289 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
290 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
291 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
292 
293 static KALLOC_TYPE_DEFINE(ifnet_fc_zone, struct ifnet_fc_entry, NET_KT_DEFAULT);
294 
295 extern void bpfdetach(struct ifnet *);
296 
297 
298 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
299     u_int32_t flags);
300 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
301     u_int32_t flags);
302 
303 
304 #if CONFIG_MACF
305 #if !XNU_TARGET_OS_OSX
306 int dlil_lladdr_ckreq = 1;
307 #else /* XNU_TARGET_OS_OSX */
308 int dlil_lladdr_ckreq = 0;
309 #endif /* XNU_TARGET_OS_OSX */
310 #endif /* CONFIG_MACF */
311 
312 
313 static inline void
ifnet_delay_start_disabled_increment(void)314 ifnet_delay_start_disabled_increment(void)
315 {
316 	OSIncrementAtomic(&ifnet_delay_start_disabled);
317 }
318 
319 unsigned int net_rxpoll = 1;
320 unsigned int net_affinity = 1;
321 unsigned int net_async = 1;     /* 0: synchronous, 1: asynchronous */
322 
323 extern u_int32_t        inject_buckets;
324 
325 void
ifnet_filter_update_tso(struct ifnet * ifp,boolean_t filter_enable)326 ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable)
327 {
328 	/*
329 	 * update filter count and route_generation ID to let TCP
330 	 * know it should reevalute doing TSO or not
331 	 */
332 	if (filter_enable) {
333 		OSAddAtomic(1, &ifp->if_flt_no_tso_count);
334 	} else {
335 		VERIFY(ifp->if_flt_no_tso_count != 0);
336 		OSAddAtomic(-1, &ifp->if_flt_no_tso_count);
337 	}
338 	routegenid_update();
339 }
340 
341 #if SKYWALK
342 
343 static bool net_check_compatible_if_filter(struct ifnet *ifp);
344 
345 /* if_attach_nx flags defined in os_skywalk_private.h */
346 unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT;
347 unsigned int if_enable_fsw_ip_netagent =
348     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
349 unsigned int if_enable_fsw_transport_netagent =
350     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
351 
352 unsigned int if_netif_all =
353     ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0);
354 
355 /* Configure flowswitch to use max mtu sized buffer */
356 static bool fsw_use_max_mtu_buffer = false;
357 
358 
359 static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw);
360 
361 #include <skywalk/os_skywalk_private.h>
362 
363 boolean_t
ifnet_nx_noauto(ifnet_t ifp)364 ifnet_nx_noauto(ifnet_t ifp)
365 {
366 	return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0;
367 }
368 
369 boolean_t
ifnet_nx_noauto_flowswitch(ifnet_t ifp)370 ifnet_nx_noauto_flowswitch(ifnet_t ifp)
371 {
372 	return ifnet_is_low_latency(ifp);
373 }
374 
375 boolean_t
ifnet_is_low_latency(ifnet_t ifp)376 ifnet_is_low_latency(ifnet_t ifp)
377 {
378 	return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0;
379 }
380 
381 boolean_t
ifnet_needs_compat(ifnet_t ifp)382 ifnet_needs_compat(ifnet_t ifp)
383 {
384 	if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
385 		return FALSE;
386 	}
387 #if !XNU_TARGET_OS_OSX
388 	/*
389 	 * To conserve memory, we plumb in the compat layer selectively; this
390 	 * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL.
391 	 * In particular, we check for Wi-Fi Access Point.
392 	 */
393 	if (IFNET_IS_WIFI(ifp)) {
394 		/* Wi-Fi Access Point */
395 		if (strcmp(ifp->if_name, "ap") == 0) {
396 			return if_netif_all;
397 		}
398 	}
399 #else /* XNU_TARGET_OS_OSX */
400 #pragma unused(ifp)
401 #endif /* XNU_TARGET_OS_OSX */
402 	return TRUE;
403 }
404 
405 boolean_t
ifnet_needs_fsw_transport_netagent(ifnet_t ifp)406 ifnet_needs_fsw_transport_netagent(ifnet_t ifp)
407 {
408 	if (if_is_fsw_transport_netagent_enabled()) {
409 		/* check if netagent has been manually enabled for ipsec/utun */
410 		if (ifp->if_family == IFNET_FAMILY_IPSEC) {
411 			return ipsec_interface_needs_netagent(ifp);
412 		} else if (ifp->if_family == IFNET_FAMILY_UTUN) {
413 			return utun_interface_needs_netagent(ifp);
414 		}
415 
416 		/* check ifnet no auto nexus override */
417 		if (ifnet_nx_noauto(ifp)) {
418 			return FALSE;
419 		}
420 
421 		/* check global if_attach_nx configuration */
422 		switch (ifp->if_family) {
423 		case IFNET_FAMILY_CELLULAR:
424 		case IFNET_FAMILY_ETHERNET:
425 			if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
426 				return TRUE;
427 			}
428 			break;
429 		default:
430 			break;
431 		}
432 	}
433 	return FALSE;
434 }
435 
436 boolean_t
ifnet_needs_fsw_ip_netagent(ifnet_t ifp)437 ifnet_needs_fsw_ip_netagent(ifnet_t ifp)
438 {
439 #pragma unused(ifp)
440 	if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) {
441 		return TRUE;
442 	}
443 	return FALSE;
444 }
445 
446 boolean_t
ifnet_needs_netif_netagent(ifnet_t ifp)447 ifnet_needs_netif_netagent(ifnet_t ifp)
448 {
449 #pragma unused(ifp)
450 	return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0;
451 }
452 
453 static boolean_t
dlil_detach_nexus_instance(nexus_controller_t controller,const char * func_str,uuid_t instance,uuid_t device)454 dlil_detach_nexus_instance(nexus_controller_t controller,
455     const char *func_str, uuid_t instance, uuid_t device)
456 {
457 	errno_t         err;
458 
459 	if (instance == NULL || uuid_is_null(instance)) {
460 		return FALSE;
461 	}
462 
463 	/* followed by the device port */
464 	if (device != NULL && !uuid_is_null(device)) {
465 		err = kern_nexus_ifdetach(controller, instance, device);
466 		if (err != 0) {
467 			DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n",
468 			    func_str, err);
469 		}
470 	}
471 	err = kern_nexus_controller_free_provider_instance(controller,
472 	    instance);
473 	if (err != 0) {
474 		DLIL_PRINTF("%s free_provider_instance failed %d\n",
475 		    func_str, err);
476 	}
477 	return TRUE;
478 }
479 
480 static boolean_t
dlil_detach_nexus(const char * func_str,uuid_t provider,uuid_t instance,uuid_t device)481 dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance,
482     uuid_t device)
483 {
484 	boolean_t               detached = FALSE;
485 	nexus_controller_t      controller = kern_nexus_shared_controller();
486 	int                     err;
487 
488 	if (dlil_detach_nexus_instance(controller, func_str, instance,
489 	    device)) {
490 		detached = TRUE;
491 	}
492 	if (provider != NULL && !uuid_is_null(provider)) {
493 		detached = TRUE;
494 		err = kern_nexus_controller_deregister_provider(controller,
495 		    provider);
496 		if (err != 0) {
497 			DLIL_PRINTF("%s deregister_provider %d\n",
498 			    func_str, err);
499 		}
500 	}
501 	return detached;
502 }
503 
504 static errno_t
dlil_create_provider_and_instance(nexus_controller_t controller,nexus_type_t type,ifnet_t ifp,uuid_t * provider,uuid_t * instance,nexus_attr_t attr)505 dlil_create_provider_and_instance(nexus_controller_t controller,
506     nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance,
507     nexus_attr_t attr)
508 {
509 	uuid_t          dom_prov;
510 	errno_t         err;
511 	nexus_name_t    provider_name;
512 	const char      *type_name =
513 	    (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch";
514 	struct kern_nexus_init init;
515 
516 	err = kern_nexus_get_default_domain_provider(type, &dom_prov);
517 	if (err != 0) {
518 		DLIL_PRINTF("%s can't get %s provider, error %d\n",
519 		    __func__, type_name, err);
520 		goto failed;
521 	}
522 
523 	snprintf((char *)provider_name, sizeof(provider_name),
524 	    "com.apple.%s.%s", type_name, if_name(ifp));
525 	err = kern_nexus_controller_register_provider(controller,
526 	    dom_prov,
527 	    provider_name,
528 	    NULL,
529 	    0,
530 	    attr,
531 	    provider);
532 	if (err != 0) {
533 		DLIL_PRINTF("%s register %s provider failed, error %d\n",
534 		    __func__, type_name, err);
535 		goto failed;
536 	}
537 	bzero(&init, sizeof(init));
538 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
539 	err = kern_nexus_controller_alloc_provider_instance(controller,
540 	    *provider,
541 	    NULL, NULL,
542 	    instance, &init);
543 	if (err != 0) {
544 		DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n",
545 		    __func__, type_name, err);
546 		kern_nexus_controller_deregister_provider(controller,
547 		    *provider);
548 		goto failed;
549 	}
550 failed:
551 	return err;
552 }
553 
554 static boolean_t
dlil_attach_netif_nexus_common(ifnet_t ifp,if_nexus_netif_t netif_nx)555 dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx)
556 {
557 	nexus_attr_t            __single attr = NULL;
558 	nexus_controller_t      controller;
559 	errno_t                 err;
560 	unsigned char          *empty_uuid = __unsafe_forge_bidi_indexable(unsigned char *, NULL, sizeof(uuid_t));
561 
562 	if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) {
563 		/* it's already attached */
564 		if (dlil_verbose) {
565 			DLIL_PRINTF("%s: %s already has nexus attached\n",
566 			    __func__, if_name(ifp));
567 			/* already attached */
568 		}
569 		goto failed;
570 	}
571 
572 	err = kern_nexus_attr_create(&attr);
573 	if (err != 0) {
574 		DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
575 		    if_name(ifp));
576 		goto failed;
577 	}
578 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, ifp->if_index);
579 	VERIFY(err == 0);
580 
581 	controller = kern_nexus_shared_controller();
582 
583 	/* create the netif provider and instance */
584 	err = dlil_create_provider_and_instance(controller,
585 	    NEXUS_TYPE_NET_IF, ifp, &netif_nx->if_nif_provider,
586 	    &netif_nx->if_nif_instance, attr);
587 	if (err != 0) {
588 		goto failed;
589 	}
590 
591 	err = kern_nexus_ifattach(controller, netif_nx->if_nif_instance, ifp,
592 	    empty_uuid, FALSE, &netif_nx->if_nif_attach);
593 	if (err != 0) {
594 		DLIL_PRINTF("%s kern_nexus_ifattach %d\n",
595 		    __func__, err);
596 		/* cleanup provider and instance */
597 		dlil_detach_nexus(__func__, netif_nx->if_nif_provider,
598 		    netif_nx->if_nif_instance, empty_uuid);
599 		goto failed;
600 	}
601 	return TRUE;
602 
603 failed:
604 	if (attr != NULL) {
605 		kern_nexus_attr_destroy(attr);
606 	}
607 	return FALSE;
608 }
609 
610 static boolean_t
dlil_attach_netif_compat_nexus(ifnet_t ifp,if_nexus_netif_t netif_nx)611 dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx)
612 {
613 	if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) ||
614 	    IFNET_IS_MANAGEMENT(ifp) || IFNET_IS_VMNET(ifp)) {
615 		goto failed;
616 	}
617 	switch (ifp->if_type) {
618 	case IFT_CELLULAR:
619 	case IFT_ETHER:
620 		if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
621 			/* don't auto-attach */
622 			goto failed;
623 		}
624 		break;
625 	default:
626 		/* don't auto-attach */
627 		goto failed;
628 	}
629 	return dlil_attach_netif_nexus_common(ifp, netif_nx);
630 
631 failed:
632 	return FALSE;
633 }
634 
635 __attribute__((noinline))
636 static void
dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)637 dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)
638 {
639 	dlil_detach_nexus(__func__, nexus_netif->if_nif_provider,
640 	    nexus_netif->if_nif_instance, nexus_netif->if_nif_attach);
641 }
642 
643 static inline int
dlil_siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)644 dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
645 {
646 	struct ifreq        ifr;
647 	int                 error;
648 
649 	bzero(&ifr, sizeof(ifr));
650 	error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
651 	if (error == 0) {
652 		*ifdm_p = ifr.ifr_devmtu;
653 	}
654 	return error;
655 }
656 
657 static inline void
_dlil_adjust_large_buf_size_for_tso(ifnet_t ifp,uint32_t * large_buf_size)658 _dlil_adjust_large_buf_size_for_tso(ifnet_t ifp, uint32_t *large_buf_size)
659 {
660 	uint32_t tso_v4_mtu = 0;
661 	uint32_t tso_v6_mtu = 0;
662 
663 	if (!kernel_is_macos_or_server()) {
664 		return;
665 	}
666 
667 	/*
668 	 * Note that we are reading the real hwassist flags set by the driver
669 	 * and not the adjusted ones because nx_netif_host_adjust_if_capabilities()
670 	 * hasn't been called yet.
671 	 */
672 	if ((ifp->if_hwassist & IFNET_TSO_IPV4) != 0) {
673 		tso_v4_mtu = ifp->if_tso_v4_mtu;
674 	}
675 	if ((ifp->if_hwassist & IFNET_TSO_IPV6) != 0) {
676 		tso_v6_mtu = ifp->if_tso_v6_mtu;
677 	}
678 
679 	/*
680 	 * If the hardware supports TSO, adjust the large buf size to match the
681 	 * supported TSO MTU size. Note that only native interfaces set TSO MTU
682 	 * size today.
683 	 * For compat, there is a 16KB limit on large buf size, so it needs to be
684 	 * bounded by NX_FSW_DEF_LARGE_BUFSIZE. Note that no compat interfaces
685 	 * set TSO MTU size today.
686 	 */
687 	if (SKYWALK_NATIVE(ifp)) {
688 		if (tso_v4_mtu != 0 || tso_v6_mtu != 0) {
689 			*large_buf_size = MAX(tso_v4_mtu, tso_v6_mtu);
690 		} else {
691 			*large_buf_size = MAX(*large_buf_size, sk_fsw_gso_mtu);
692 		}
693 		*large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, *large_buf_size);
694 	} else {
695 		*large_buf_size = MIN(NX_FSW_DEF_LARGE_BUFSIZE, *large_buf_size);
696 	}
697 }
698 
699 static inline int
_dlil_get_flowswitch_buffer_size(ifnet_t ifp,uuid_t netif,uint32_t * buf_size,bool * use_multi_buflet,uint32_t * large_buf_size)700 _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint32_t *buf_size,
701     bool *use_multi_buflet, uint32_t *large_buf_size)
702 {
703 	struct kern_pbufpool_memory_info rx_pp_info;
704 	struct kern_pbufpool_memory_info tx_pp_info;
705 	uint32_t if_max_mtu = 0;
706 	uint32_t drv_buf_size;
707 	struct ifdevmtu ifdm;
708 	int err;
709 
710 	/*
711 	 * To perform intra-stack RX aggregation flowswitch needs to use
712 	 * multi-buflet packet.
713 	 */
714 	*use_multi_buflet = NX_FSW_TCP_RX_AGG_ENABLED();
715 
716 	*large_buf_size = *use_multi_buflet ? NX_FSW_DEF_LARGE_BUFSIZE : 0;
717 	/*
718 	 * IP over Thunderbolt interface can deliver the largest IP packet,
719 	 * but the driver advertises the MAX MTU as only 9K.
720 	 */
721 	if (IFNET_IS_THUNDERBOLT_IP(ifp)) {
722 		if_max_mtu = IP_MAXPACKET;
723 		goto skip_mtu_ioctl;
724 	}
725 
726 	/* determine max mtu */
727 	bzero(&ifdm, sizeof(ifdm));
728 	err = dlil_siocgifdevmtu(ifp, &ifdm);
729 	if (__improbable(err != 0)) {
730 		DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n",
731 		    __func__, if_name(ifp));
732 		/* use default flowswitch buffer size */
733 		if_max_mtu = NX_FSW_BUFSIZE;
734 	} else {
735 		DLIL_PRINTF("%s: %s %d %d\n", __func__, if_name(ifp),
736 		    ifdm.ifdm_max, ifdm.ifdm_current);
737 		/* rdar://problem/44589731 */
738 		if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current);
739 	}
740 
741 skip_mtu_ioctl:
742 	if (if_max_mtu == 0) {
743 		DLIL_PRINTF("%s: can't determine MAX MTU for %s\n",
744 		    __func__, if_name(ifp));
745 		return EINVAL;
746 	}
747 	if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) {
748 		DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch "
749 		    "max bufsize(%d)\n", __func__,
750 		    if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE);
751 		return EINVAL;
752 	}
753 
754 	/*
755 	 * for skywalk native driver, consult the driver packet pool also.
756 	 */
757 	if (dlil_is_native_netif_nexus(ifp)) {
758 		err = kern_nexus_get_pbufpool_info(netif, &rx_pp_info,
759 		    &tx_pp_info);
760 		if (err != 0) {
761 			DLIL_PRINTF("%s: can't get pbufpool info for %s\n",
762 			    __func__, if_name(ifp));
763 			return ENXIO;
764 		}
765 		drv_buf_size = tx_pp_info.kpm_bufsize *
766 		    tx_pp_info.kpm_max_frags;
767 		if (if_max_mtu > drv_buf_size) {
768 			DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, "
769 			    "tx %d * %d) can't support max mtu(%d)\n", __func__,
770 			    if_name(ifp), rx_pp_info.kpm_bufsize,
771 			    rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize,
772 			    tx_pp_info.kpm_max_frags, if_max_mtu);
773 			return EINVAL;
774 		}
775 	} else {
776 		drv_buf_size = if_max_mtu;
777 	}
778 
779 	if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) {
780 		_CASSERT((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET);
781 		*use_multi_buflet = true;
782 		/* default flowswitch buffer size */
783 		*buf_size = NX_FSW_BUFSIZE;
784 		*large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, drv_buf_size);
785 	} else {
786 		*buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE);
787 	}
788 	_dlil_adjust_large_buf_size_for_tso(ifp, large_buf_size);
789 	ASSERT(*buf_size <= NX_FSW_MAXBUFSIZE);
790 	if (*buf_size >= *large_buf_size) {
791 		*large_buf_size = 0;
792 	}
793 	return 0;
794 }
795 
796 static boolean_t
_dlil_attach_flowswitch_nexus(ifnet_t ifp,if_nexus_flowswitch_t nexus_fsw)797 _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw)
798 {
799 	nexus_attr_t            __single attr = NULL;
800 	nexus_controller_t      controller;
801 	errno_t                 err = 0;
802 	uuid_t                  netif;
803 	uint32_t                buf_size = 0;
804 	uint32_t                large_buf_size = 0;
805 	bool                    multi_buflet;
806 
807 	if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) ||
808 	    IFNET_IS_VMNET(ifp)) {
809 		goto failed;
810 	}
811 
812 	if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) {
813 		/* not possible to attach (netif native/compat not plumbed) */
814 		goto failed;
815 	}
816 
817 	if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) {
818 		/* don't auto-attach */
819 		goto failed;
820 	}
821 
822 	/* get the netif instance from the ifp */
823 	err = kern_nexus_get_netif_instance(ifp, netif);
824 	if (err != 0) {
825 		DLIL_PRINTF("%s: can't find netif for %s\n", __func__,
826 		    if_name(ifp));
827 		goto failed;
828 	}
829 
830 	err = kern_nexus_attr_create(&attr);
831 	if (err != 0) {
832 		DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
833 		    if_name(ifp));
834 		goto failed;
835 	}
836 
837 	err = _dlil_get_flowswitch_buffer_size(ifp, netif, &buf_size,
838 	    &multi_buflet, &large_buf_size);
839 	if (err != 0) {
840 		goto failed;
841 	}
842 	ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE));
843 	ASSERT(large_buf_size <= NX_FSW_MAX_LARGE_BUFSIZE);
844 
845 	/* Configure flowswitch buffer size */
846 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, buf_size);
847 	VERIFY(err == 0);
848 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_LARGE_BUF_SIZE,
849 	    large_buf_size);
850 	VERIFY(err == 0);
851 
852 	/*
853 	 * Configure flowswitch to use super-packet (multi-buflet).
854 	 */
855 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
856 	    multi_buflet ? NX_PBUF_FRAGS_MAX : 1);
857 	VERIFY(err == 0);
858 
859 	/* create the flowswitch provider and instance */
860 	controller = kern_nexus_shared_controller();
861 	err = dlil_create_provider_and_instance(controller,
862 	    NEXUS_TYPE_FLOW_SWITCH, ifp, &nexus_fsw->if_fsw_provider,
863 	    &nexus_fsw->if_fsw_instance, attr);
864 	if (err != 0) {
865 		goto failed;
866 	}
867 
868 	/* attach the device port */
869 	err = kern_nexus_ifattach(controller, nexus_fsw->if_fsw_instance,
870 	    NULL, netif, FALSE, &nexus_fsw->if_fsw_device);
871 	if (err != 0) {
872 		DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n",
873 		    __func__, err, if_name(ifp));
874 		/* cleanup provider and instance */
875 		dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
876 		    nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
877 		goto failed;
878 	}
879 	return TRUE;
880 
881 failed:
882 	if (err != 0) {
883 		DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n",
884 		    __func__, if_name(ifp), err);
885 	} else {
886 		DLIL_PRINTF("%s: not attaching flowswitch to %s\n",
887 		    __func__, if_name(ifp));
888 	}
889 	if (attr != NULL) {
890 		kern_nexus_attr_destroy(attr);
891 	}
892 	return FALSE;
893 }
894 
895 static boolean_t
dlil_attach_flowswitch_nexus(ifnet_t ifp)896 dlil_attach_flowswitch_nexus(ifnet_t ifp)
897 {
898 	boolean_t               attached = FALSE;
899 	if_nexus_flowswitch     nexus_fsw;
900 
901 #if (DEVELOPMENT || DEBUG)
902 	if (skywalk_netif_direct_allowed(if_name(ifp))) {
903 		DLIL_PRINTF("skip attaching fsw to %s\n", if_name(ifp));
904 		return FALSE;
905 	}
906 #endif /* (DEVELOPMENT || DEBUG) */
907 
908 	/*
909 	 * flowswitch attachment is not supported for interface using the
910 	 * legacy model (IFNET_INIT_LEGACY)
911 	 */
912 	if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
913 		DLIL_PRINTF("skip attaching fsw to %s using legacy TX model\n",
914 		    if_name(ifp));
915 		return FALSE;
916 	}
917 	bzero(&nexus_fsw, sizeof(nexus_fsw));
918 	if (!ifnet_is_attached(ifp, 1)) {
919 		os_log(OS_LOG_DEFAULT, "%s: %s not attached",
920 		    __func__, ifp->if_xname);
921 		goto done;
922 	}
923 	if (uuid_is_null(ifp->if_nx_flowswitch.if_fsw_instance)) {
924 		attached = _dlil_attach_flowswitch_nexus(ifp, &nexus_fsw);
925 		if (attached) {
926 			ifnet_lock_exclusive(ifp);
927 			ifp->if_nx_flowswitch = nexus_fsw;
928 			ifnet_lock_done(ifp);
929 		}
930 	}
931 	ifnet_decr_iorefcnt(ifp);
932 
933 done:
934 	return attached;
935 }
936 
937 __attribute__((noinline))
938 static void
dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)939 dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)
940 {
941 	dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
942 	    nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
943 }
944 
945 __attribute__((noinline))
946 static void
dlil_netif_detach_notify(ifnet_t ifp)947 dlil_netif_detach_notify(ifnet_t ifp)
948 {
949 	ifnet_detach_notify_cb_t notify = NULL;
950 	void *__single arg = NULL;
951 
952 	ifnet_get_detach_notify(ifp, &notify, &arg);
953 	if (notify == NULL) {
954 		DTRACE_SKYWALK1(no__notify, ifnet_t, ifp);
955 		return;
956 	}
957 	(*notify)(arg);
958 }
959 
960 __attribute__((noinline))
961 static void
dlil_quiesce_and_detach_nexuses(ifnet_t ifp)962 dlil_quiesce_and_detach_nexuses(ifnet_t ifp)
963 {
964 	if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch;
965 	if_nexus_netif *nx_netif = &ifp->if_nx_netif;
966 
967 	ifnet_datamov_suspend_and_drain(ifp);
968 	if (!uuid_is_null(nx_fsw->if_fsw_device)) {
969 		ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider));
970 		ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance));
971 		dlil_detach_flowswitch_nexus(nx_fsw);
972 	} else {
973 		ASSERT(uuid_is_null(nx_fsw->if_fsw_provider));
974 		ASSERT(uuid_is_null(nx_fsw->if_fsw_instance));
975 		DTRACE_IP1(fsw__not__attached, ifnet_t, ifp);
976 	}
977 
978 	if (!uuid_is_null(nx_netif->if_nif_attach)) {
979 		ASSERT(!uuid_is_null(nx_netif->if_nif_provider));
980 		ASSERT(!uuid_is_null(nx_netif->if_nif_instance));
981 		dlil_detach_netif_nexus(nx_netif);
982 	} else {
983 		ASSERT(uuid_is_null(nx_netif->if_nif_provider));
984 		ASSERT(uuid_is_null(nx_netif->if_nif_instance));
985 		DTRACE_IP1(netif__not__attached, ifnet_t, ifp);
986 	}
987 	ifnet_datamov_resume(ifp);
988 }
989 
990 boolean_t
ifnet_add_netagent(ifnet_t ifp)991 ifnet_add_netagent(ifnet_t ifp)
992 {
993 	int     error;
994 
995 	error = kern_nexus_interface_add_netagent(ifp);
996 	os_log(OS_LOG_DEFAULT,
997 	    "kern_nexus_interface_add_netagent(%s) returned %d",
998 	    ifp->if_xname, error);
999 	return error == 0;
1000 }
1001 
1002 boolean_t
ifnet_remove_netagent(ifnet_t ifp)1003 ifnet_remove_netagent(ifnet_t ifp)
1004 {
1005 	int     error;
1006 
1007 	error = kern_nexus_interface_remove_netagent(ifp);
1008 	os_log(OS_LOG_DEFAULT,
1009 	    "kern_nexus_interface_remove_netagent(%s) returned %d",
1010 	    ifp->if_xname, error);
1011 	return error == 0;
1012 }
1013 
1014 boolean_t
ifnet_attach_flowswitch_nexus(ifnet_t ifp)1015 ifnet_attach_flowswitch_nexus(ifnet_t ifp)
1016 {
1017 	if (!IF_FULLY_ATTACHED(ifp)) {
1018 		return FALSE;
1019 	}
1020 	return dlil_attach_flowswitch_nexus(ifp);
1021 }
1022 
1023 boolean_t
ifnet_detach_flowswitch_nexus(ifnet_t ifp)1024 ifnet_detach_flowswitch_nexus(ifnet_t ifp)
1025 {
1026 	if_nexus_flowswitch     nexus_fsw;
1027 
1028 	ifnet_lock_exclusive(ifp);
1029 	nexus_fsw = ifp->if_nx_flowswitch;
1030 	bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
1031 	ifnet_lock_done(ifp);
1032 	return dlil_detach_nexus(__func__, nexus_fsw.if_fsw_provider,
1033 	           nexus_fsw.if_fsw_instance, nexus_fsw.if_fsw_device);
1034 }
1035 
1036 void
ifnet_attach_native_flowswitch(ifnet_t ifp)1037 ifnet_attach_native_flowswitch(ifnet_t ifp)
1038 {
1039 	if (!dlil_is_native_netif_nexus(ifp)) {
1040 		/* not a native netif */
1041 		return;
1042 	}
1043 	ifnet_attach_flowswitch_nexus(ifp);
1044 }
1045 
1046 int
ifnet_set_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t cb,void * arg)1047 ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg)
1048 {
1049 	lck_mtx_lock(&ifp->if_delegate_lock);
1050 	while (ifp->if_fsw_rx_cb_ref > 0) {
1051 		DTRACE_SKYWALK1(wait__fsw, ifnet_t, ifp);
1052 		(void) msleep(&ifp->if_fsw_rx_cb_ref, &ifp->if_delegate_lock,
1053 		    (PZERO + 1), __FUNCTION__, NULL);
1054 		DTRACE_SKYWALK1(wake__fsw, ifnet_t, ifp);
1055 	}
1056 	ifp->if_fsw_rx_cb = cb;
1057 	ifp->if_fsw_rx_cb_arg = arg;
1058 	lck_mtx_unlock(&ifp->if_delegate_lock);
1059 	return 0;
1060 }
1061 
1062 int
ifnet_get_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t * cbp,void ** argp)1063 ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp)
1064 {
1065 	/*
1066 	 * This is for avoiding the unnecessary lock acquire for interfaces
1067 	 * not used by a redirect interface.
1068 	 */
1069 	if (ifp->if_fsw_rx_cb == NULL) {
1070 		return ENOENT;
1071 	}
1072 	lck_mtx_lock(&ifp->if_delegate_lock);
1073 	if (ifp->if_fsw_rx_cb == NULL) {
1074 		lck_mtx_unlock(&ifp->if_delegate_lock);
1075 		return ENOENT;
1076 	}
1077 	*cbp = ifp->if_fsw_rx_cb;
1078 	*argp = ifp->if_fsw_rx_cb_arg;
1079 	ifp->if_fsw_rx_cb_ref++;
1080 	lck_mtx_unlock(&ifp->if_delegate_lock);
1081 	return 0;
1082 }
1083 
1084 void
ifnet_release_flowswitch_rx_callback(ifnet_t ifp)1085 ifnet_release_flowswitch_rx_callback(ifnet_t ifp)
1086 {
1087 	lck_mtx_lock(&ifp->if_delegate_lock);
1088 	if (--ifp->if_fsw_rx_cb_ref == 0) {
1089 		wakeup(&ifp->if_fsw_rx_cb_ref);
1090 	}
1091 	lck_mtx_unlock(&ifp->if_delegate_lock);
1092 }
1093 
1094 int
ifnet_set_delegate_parent(ifnet_t difp,ifnet_t parent)1095 ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent)
1096 {
1097 	lck_mtx_lock(&difp->if_delegate_lock);
1098 	while (difp->if_delegate_parent_ref > 0) {
1099 		DTRACE_SKYWALK1(wait__parent, ifnet_t, difp);
1100 		(void) msleep(&difp->if_delegate_parent_ref, &difp->if_delegate_lock,
1101 		    (PZERO + 1), __FUNCTION__, NULL);
1102 		DTRACE_SKYWALK1(wake__parent, ifnet_t, difp);
1103 	}
1104 	difp->if_delegate_parent = parent;
1105 	lck_mtx_unlock(&difp->if_delegate_lock);
1106 	return 0;
1107 }
1108 
1109 int
ifnet_get_delegate_parent(ifnet_t difp,ifnet_t * parentp)1110 ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parentp)
1111 {
1112 	lck_mtx_lock(&difp->if_delegate_lock);
1113 	if (difp->if_delegate_parent == NULL) {
1114 		lck_mtx_unlock(&difp->if_delegate_lock);
1115 		return ENOENT;
1116 	}
1117 	*parentp = difp->if_delegate_parent;
1118 	difp->if_delegate_parent_ref++;
1119 	lck_mtx_unlock(&difp->if_delegate_lock);
1120 	return 0;
1121 }
1122 
1123 void
ifnet_release_delegate_parent(ifnet_t difp)1124 ifnet_release_delegate_parent(ifnet_t difp)
1125 {
1126 	lck_mtx_lock(&difp->if_delegate_lock);
1127 	if (--difp->if_delegate_parent_ref == 0) {
1128 		wakeup(&difp->if_delegate_parent_ref);
1129 	}
1130 	lck_mtx_unlock(&difp->if_delegate_lock);
1131 }
1132 
1133 __attribute__((noinline))
1134 void
ifnet_set_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1135 ifnet_set_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1136 {
1137 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1138 	ifp->if_detach_notify = notify;
1139 	ifp->if_detach_notify_arg = arg;
1140 }
1141 
1142 __attribute__((noinline))
1143 void
ifnet_get_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1144 ifnet_get_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1145 {
1146 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1147 	*notifyp = ifp->if_detach_notify;
1148 	*argp = ifp->if_detach_notify_arg;
1149 }
1150 
1151 __attribute__((noinline))
1152 void
ifnet_set_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1153 ifnet_set_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1154 {
1155 	ifnet_lock_exclusive(ifp);
1156 	ifnet_set_detach_notify_locked(ifp, notify, arg);
1157 	ifnet_lock_done(ifp);
1158 }
1159 
1160 __attribute__((noinline))
1161 void
ifnet_get_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1162 ifnet_get_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1163 {
1164 	ifnet_lock_exclusive(ifp);
1165 	ifnet_get_detach_notify_locked(ifp, notifyp, argp);
1166 	ifnet_lock_done(ifp);
1167 }
1168 #endif /* SKYWALK */
1169 
1170 #define DLIL_INPUT_CHECK(m, ifp) {                                      \
1171 	ifnet_ref_t _rcvif = mbuf_pkthdr_rcvif(m);                      \
1172 	if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) ||       \
1173 	    !(mbuf_flags(m) & MBUF_PKTHDR)) {                           \
1174 	        panic_plain("%s: invalid mbuf %p\n", __func__, m);      \
1175 	/* NOTREACHED */                                        \
1176 	}                                                               \
1177 }
1178 
1179 #define MBPS    (1ULL * 1000 * 1000)
1180 #define GBPS    (MBPS * 1000)
1181 
1182 struct rxpoll_time_tbl {
1183 	u_int64_t       speed;          /* downlink speed */
1184 	u_int32_t       plowat;         /* packets low watermark */
1185 	u_int32_t       phiwat;         /* packets high watermark */
1186 	u_int32_t       blowat;         /* bytes low watermark */
1187 	u_int32_t       bhiwat;         /* bytes high watermark */
1188 };
1189 
1190 static struct rxpoll_time_tbl rxpoll_tbl[] = {
1191 	{ .speed =  10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024)    },
1192 	{ .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1193 	{ .speed =   1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1194 	{ .speed =  10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1195 	{ .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024)   },
1196 	{ .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
1197 };
1198 
1199 int
proto_hash_value(u_int32_t protocol_family)1200 proto_hash_value(u_int32_t protocol_family)
1201 {
1202 	/*
1203 	 * dlil_proto_unplumb_all() depends on the mapping between
1204 	 * the hash bucket index and the protocol family defined
1205 	 * here; future changes must be applied there as well.
1206 	 */
1207 	switch (protocol_family) {
1208 	case PF_INET:
1209 		return 0;
1210 	case PF_INET6:
1211 		return 1;
1212 	case PF_VLAN:
1213 		return 2;
1214 	case PF_UNSPEC:
1215 	default:
1216 		return 3;
1217 	}
1218 }
1219 
1220 __private_extern__ int
dlil_post_msg(struct ifnet * ifp,u_int32_t event_subclass,u_int32_t event_code,struct net_event_data * event_data,u_int32_t event_data_len,boolean_t suppress_generation)1221 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1222     u_int32_t event_code, struct net_event_data *event_data,
1223     u_int32_t event_data_len, boolean_t suppress_generation)
1224 {
1225 	struct net_event_data ev_data;
1226 	struct kev_msg ev_msg;
1227 
1228 	bzero(&ev_msg, sizeof(ev_msg));
1229 	bzero(&ev_data, sizeof(ev_data));
1230 	/*
1231 	 * a net event always starts with a net_event_data structure
1232 	 * but the caller can generate a simple net event or
1233 	 * provide a longer event structure to post
1234 	 */
1235 	ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1236 	ev_msg.kev_class        = KEV_NETWORK_CLASS;
1237 	ev_msg.kev_subclass     = event_subclass;
1238 	ev_msg.event_code       = event_code;
1239 
1240 	if (event_data == NULL) {
1241 		event_data = &ev_data;
1242 		event_data_len = sizeof(struct net_event_data);
1243 	}
1244 
1245 	strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1246 	event_data->if_family = ifp->if_family;
1247 	event_data->if_unit   = (u_int32_t)ifp->if_unit;
1248 
1249 	ev_msg.dv[0].data_length = event_data_len;
1250 	ev_msg.dv[0].data_ptr    = event_data;
1251 	ev_msg.dv[1].data_length = 0;
1252 
1253 	bool update_generation = true;
1254 	if (event_subclass == KEV_DL_SUBCLASS) {
1255 		/* Don't update interface generation for frequent link quality and state changes  */
1256 		switch (event_code) {
1257 		case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1258 		case KEV_DL_RRC_STATE_CHANGED:
1259 		case KEV_DL_PRIMARY_ELECTED:
1260 			update_generation = false;
1261 			break;
1262 		default:
1263 			break;
1264 		}
1265 	}
1266 
1267 	/*
1268 	 * Some events that update generation counts might
1269 	 * want to suppress generation count.
1270 	 * One example is node presence/absence where we still
1271 	 * issue kernel event for the invocation but want to avoid
1272 	 * expensive operation of updating generation which triggers
1273 	 * NECP client updates.
1274 	 */
1275 	if (suppress_generation) {
1276 		update_generation = false;
1277 	}
1278 
1279 	return dlil_event_internal(ifp, &ev_msg, update_generation);
1280 }
1281 
1282 static void
dlil_reset_rxpoll_params(ifnet_t ifp)1283 dlil_reset_rxpoll_params(ifnet_t ifp)
1284 {
1285 	ASSERT(ifp != NULL);
1286 	ifnet_set_poll_cycle(ifp, NULL);
1287 	ifp->if_poll_update = 0;
1288 	ifp->if_poll_flags = 0;
1289 	ifp->if_poll_req = 0;
1290 	ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1291 	bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1292 	bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1293 	bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1294 	net_timerclear(&ifp->if_poll_mode_holdtime);
1295 	net_timerclear(&ifp->if_poll_mode_lasttime);
1296 	net_timerclear(&ifp->if_poll_sample_holdtime);
1297 	net_timerclear(&ifp->if_poll_sample_lasttime);
1298 	net_timerclear(&ifp->if_poll_dbg_lasttime);
1299 }
1300 
1301 
1302 #if SKYWALK
1303 static void
dlil_filter_event(struct eventhandler_entry_arg arg __unused,enum net_filter_event_subsystems state)1304 dlil_filter_event(struct eventhandler_entry_arg arg __unused,
1305     enum net_filter_event_subsystems state)
1306 {
1307 	evhlog(debug, "%s: eventhandler saw event type=net_filter_event_state event_code=0x%d",
1308 	    __func__, state);
1309 
1310 	bool old_if_enable_fsw_transport_netagent = if_enable_fsw_transport_netagent;
1311 	if ((state & ~NET_FILTER_EVENT_PF_PRIVATE_PROXY) == 0) {
1312 		if_enable_fsw_transport_netagent = 1;
1313 	} else {
1314 		if_enable_fsw_transport_netagent = 0;
1315 	}
1316 	if (old_if_enable_fsw_transport_netagent != if_enable_fsw_transport_netagent) {
1317 		kern_nexus_update_netagents();
1318 	} else if (!if_enable_fsw_transport_netagent) {
1319 		necp_update_all_clients();
1320 	}
1321 }
1322 #endif /* SKYWALK */
1323 
1324 void
dlil_init(void)1325 dlil_init(void)
1326 {
1327 	thread_t __single thread = THREAD_NULL;
1328 
1329 	dlil_main_input_thread = (struct dlil_threading_info *) &dlil_main_input_thread_info;
1330 
1331 	/*
1332 	 * The following fields must be 64-bit aligned for atomic operations.
1333 	 */
1334 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1335 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1336 	IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1337 	IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1338 	IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1339 	IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1340 	IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1341 	IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1342 	IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1343 	IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1344 	IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1345 	IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1346 	IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1347 	IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1348 	IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1349 
1350 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1351 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1352 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1353 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1354 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1355 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1356 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1357 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1358 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1359 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1360 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1361 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1362 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1363 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1364 	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1365 
1366 	/*
1367 	 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1368 	 */
1369 	_CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1370 	_CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1371 	_CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1372 	_CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1373 	_CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1374 	_CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1375 	_CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1376 	_CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1377 	_CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1378 	_CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1379 	_CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1380 	_CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1381 	_CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1382 	_CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1383 
1384 	/*
1385 	 * ... as well as the mbuf checksum flags counterparts.
1386 	 */
1387 	_CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1388 	_CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1389 	_CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1390 	_CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1391 	_CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1392 	_CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1393 	_CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1394 	_CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1395 	_CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1396 	_CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1397 	_CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1398 
1399 	/*
1400 	 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1401 	 */
1402 	_CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1403 	_CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1404 
1405 	_CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1406 	_CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1407 	_CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1408 	_CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1409 
1410 	_CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1411 	_CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1412 	_CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1413 
1414 	_CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1415 	_CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1416 	_CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1417 	_CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1418 	_CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1419 	_CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1420 	_CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1421 	_CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1422 	_CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1423 	_CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1424 	_CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1425 	_CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1426 	_CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1427 	_CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1428 	_CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1429 	_CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1430 	_CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1431 	_CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1432 
1433 	_CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1434 	_CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1435 	_CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1436 	_CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1437 	_CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1438 	_CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1439 	_CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1440 	_CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1441 	_CASSERT(IFRTYPE_SUBFAMILY_VMNET == IFNET_SUBFAMILY_VMNET);
1442 	_CASSERT(IFRTYPE_SUBFAMILY_SIMCELL == IFNET_SUBFAMILY_SIMCELL);
1443 	_CASSERT(IFRTYPE_SUBFAMILY_MANAGEMENT == IFNET_SUBFAMILY_MANAGEMENT);
1444 
1445 	_CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1446 	_CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1447 
1448 	PE_parse_boot_argn("net_affinity", &net_affinity,
1449 	    sizeof(net_affinity));
1450 
1451 	PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1452 
1453 	PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1454 
1455 	PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1456 
1457 	PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1458 
1459 	PE_parse_boot_argn("if_link_heuristics", &if_link_heuristics_flags, sizeof(if_link_heuristics_flags));
1460 
1461 	VERIFY(dlil_pending_thread_cnt == 0);
1462 #if SKYWALK
1463 	boolean_t pe_enable_fsw_transport_netagent = FALSE;
1464 	boolean_t pe_disable_fsw_transport_netagent = FALSE;
1465 	boolean_t enable_fsw_netagent =
1466 	    (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) ||
1467 	    (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
1468 
1469 	/*
1470 	 * Check the device tree to see if Skywalk netagent has been explicitly
1471 	 * enabled or disabled.  This can be overridden via if_attach_nx below.
1472 	 * Note that the property is a 0-length key, and so checking for the
1473 	 * presence itself is enough (no need to check for the actual value of
1474 	 * the retrieved variable.)
1475 	 */
1476 	pe_enable_fsw_transport_netagent =
1477 	    PE_get_default("kern.skywalk_netagent_enable",
1478 	    &pe_enable_fsw_transport_netagent,
1479 	    sizeof(pe_enable_fsw_transport_netagent));
1480 	pe_disable_fsw_transport_netagent =
1481 	    PE_get_default("kern.skywalk_netagent_disable",
1482 	    &pe_disable_fsw_transport_netagent,
1483 	    sizeof(pe_disable_fsw_transport_netagent));
1484 
1485 	/*
1486 	 * These two are mutually exclusive, i.e. they both can be absent,
1487 	 * but only one can be present at a time, and so we assert to make
1488 	 * sure it is correct.
1489 	 */
1490 	VERIFY((!pe_enable_fsw_transport_netagent &&
1491 	    !pe_disable_fsw_transport_netagent) ||
1492 	    (pe_enable_fsw_transport_netagent ^
1493 	    pe_disable_fsw_transport_netagent));
1494 
1495 	if (pe_enable_fsw_transport_netagent) {
1496 		kprintf("SK: netagent is enabled via an override for "
1497 		    "this platform\n");
1498 		if_attach_nx = SKYWALK_NETWORKING_ENABLED;
1499 	} else if (pe_disable_fsw_transport_netagent) {
1500 		kprintf("SK: netagent is disabled via an override for "
1501 		    "this platform\n");
1502 		if_attach_nx = SKYWALK_NETWORKING_DISABLED;
1503 	} else {
1504 		kprintf("SK: netagent is %s by default for this platform\n",
1505 		    (enable_fsw_netagent ? "enabled" : "disabled"));
1506 		if_attach_nx = IF_ATTACH_NX_DEFAULT;
1507 	}
1508 
1509 	/*
1510 	 * Now see if there's a boot-arg override.
1511 	 */
1512 	(void) PE_parse_boot_argn("if_attach_nx", &if_attach_nx,
1513 	    sizeof(if_attach_nx));
1514 	if_enable_fsw_transport_netagent =
1515 	    ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
1516 
1517 	if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0);
1518 
1519 	if (pe_disable_fsw_transport_netagent &&
1520 	    if_enable_fsw_transport_netagent) {
1521 		kprintf("SK: netagent is force-enabled\n");
1522 	} else if (!pe_disable_fsw_transport_netagent &&
1523 	    !if_enable_fsw_transport_netagent) {
1524 		kprintf("SK: netagent is force-disabled\n");
1525 	}
1526 	if (kernel_is_macos_or_server() && if_enable_fsw_transport_netagent) {
1527 		net_filter_event_register(dlil_filter_event);
1528 	}
1529 
1530 #if (DEVELOPMENT || DEBUG)
1531 	(void) PE_parse_boot_argn("fsw_use_max_mtu_buffer",
1532 	    &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer));
1533 #endif /* (DEVELOPMENT || DEBUG) */
1534 
1535 #endif /* SKYWALK */
1536 
1537 	dlil_allocation_zones_init();
1538 	eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1539 
1540 	TAILQ_INIT(&dlil_ifnet_head);
1541 	TAILQ_INIT(&ifnet_head);
1542 	TAILQ_INIT(&ifnet_detaching_head);
1543 	TAILQ_INIT(&ifnet_ordered_head);
1544 
1545 	/* Initialize interface address subsystem */
1546 	ifa_init();
1547 
1548 #if PF
1549 	/* Initialize the packet filter */
1550 	pfinit();
1551 #endif /* PF */
1552 
1553 	/* Initialize queue algorithms */
1554 	classq_init();
1555 
1556 	/* Initialize packet schedulers */
1557 	pktsched_init();
1558 
1559 	/* Initialize flow advisory subsystem */
1560 	flowadv_init();
1561 
1562 	/* Initialize the pktap virtual interface */
1563 	pktap_init();
1564 
1565 	/* Initialize droptap interface */
1566 	droptap_init();
1567 
1568 	/* Initialize the service class to dscp map */
1569 	net_qos_map_init();
1570 
1571 	/* Initialize the interface low power mode event handler */
1572 	if_low_power_evhdlr_init();
1573 
1574 	/* Initialize the interface offload port list subsystem */
1575 	if_ports_used_init();
1576 
1577 #if DEBUG || DEVELOPMENT
1578 	/* Run self-tests */
1579 	dlil_verify_sum16();
1580 #endif /* DEBUG || DEVELOPMENT */
1581 
1582 	/*
1583 	 * Create and start up the main DLIL input thread and the interface
1584 	 * detacher threads once everything is initialized.
1585 	 */
1586 	dlil_incr_pending_thread_count();
1587 	(void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1588 
1589 	/*
1590 	 * Create ifnet detacher thread.
1591 	 * When an interface gets detached, part of the detach processing
1592 	 * is delayed. The interface is added to delayed detach list
1593 	 * and this thread is woken up to call ifnet_detach_final
1594 	 * on these interfaces.
1595 	 */
1596 	dlil_incr_pending_thread_count();
1597 	if (kernel_thread_start(ifnet_detacher_thread_func,
1598 	    NULL, &thread) != KERN_SUCCESS) {
1599 		panic_plain("%s: couldn't create detacher thread", __func__);
1600 		/* NOTREACHED */
1601 	}
1602 	thread_deallocate(thread);
1603 
1604 	/*
1605 	 * Wait for the created kernel threads for dlil to get
1606 	 * scheduled and run at least once before we proceed
1607 	 */
1608 	lck_mtx_lock(&dlil_thread_sync_lock);
1609 	while (dlil_pending_thread_cnt != 0) {
1610 		DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1611 		    "threads to get scheduled at least once.\n", __func__);
1612 		(void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1613 		    (PZERO - 1), __func__, NULL);
1614 		LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1615 	}
1616 	lck_mtx_unlock(&dlil_thread_sync_lock);
1617 	DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1618 	    "scheduled at least once. Proceeding.\n", __func__);
1619 }
1620 
1621 __private_extern__ int
dlil_attach_filter(struct ifnet * ifp,const struct iff_filter * if_filter,interface_filter_t * filter_ref,u_int32_t flags)1622 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1623     interface_filter_t *filter_ref, u_int32_t flags)
1624 {
1625 	int retval = 0;
1626 	struct ifnet_filter *filter = NULL;
1627 
1628 	ifnet_head_lock_shared();
1629 
1630 	/* Check that the interface is in the global list */
1631 	if (!ifnet_lookup(ifp)) {
1632 		retval = ENXIO;
1633 		goto done;
1634 	}
1635 	if (!ifnet_is_attached(ifp, 1)) {
1636 		os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
1637 		    __func__, if_name(ifp));
1638 		retval = ENXIO;
1639 		goto done;
1640 	}
1641 
1642 	filter = dlif_filt_alloc();
1643 	/* refcnt held above during lookup */
1644 	filter->filt_flags = flags;
1645 	filter->filt_ifp = ifp;
1646 	filter->filt_cookie = if_filter->iff_cookie;
1647 	filter->filt_name = if_filter->iff_name;
1648 	filter->filt_protocol = if_filter->iff_protocol;
1649 	/*
1650 	 * Do not install filter callbacks for internal coproc interface
1651 	 * and for management interfaces
1652 	 */
1653 	if (!IFNET_IS_INTCOPROC(ifp) && !IFNET_IS_MANAGEMENT(ifp)) {
1654 		filter->filt_input = if_filter->iff_input;
1655 		filter->filt_output = if_filter->iff_output;
1656 		filter->filt_event = if_filter->iff_event;
1657 		filter->filt_ioctl = if_filter->iff_ioctl;
1658 	}
1659 	filter->filt_detached = if_filter->iff_detached;
1660 
1661 	lck_mtx_lock(&ifp->if_flt_lock);
1662 	if_flt_monitor_enter(ifp);
1663 
1664 	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1665 	TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1666 
1667 	*filter_ref = filter;
1668 
1669 	/*
1670 	 * Bump filter count and route_generation ID to let TCP
1671 	 * know it shouldn't do TSO on this connection
1672 	 */
1673 	if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1674 		ifnet_filter_update_tso(ifp, TRUE);
1675 	}
1676 	OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1677 	INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1678 	if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1679 		OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_os_count);
1680 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1681 	} else {
1682 		OSAddAtomic(1, &ifp->if_flt_non_os_count);
1683 	}
1684 	if_flt_monitor_leave(ifp);
1685 	lck_mtx_unlock(&ifp->if_flt_lock);
1686 
1687 #if SKYWALK
1688 	if (kernel_is_macos_or_server()) {
1689 		net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1690 		    net_check_compatible_if_filter(NULL));
1691 	}
1692 #endif /* SKYWALK */
1693 
1694 	if (dlil_verbose) {
1695 		DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1696 		    if_filter->iff_name);
1697 	}
1698 	ifnet_decr_iorefcnt(ifp);
1699 
1700 done:
1701 	ifnet_head_done();
1702 	if (retval != 0 && ifp != NULL) {
1703 		DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1704 		    if_name(ifp), if_filter->iff_name, retval);
1705 	}
1706 	if (retval != 0 && filter != NULL) {
1707 		dlif_filt_free(filter);
1708 	}
1709 
1710 	return retval;
1711 }
1712 
1713 static int
dlil_detach_filter_internal(interface_filter_t filter,int detached)1714 dlil_detach_filter_internal(interface_filter_t  filter, int detached)
1715 {
1716 	int retval = 0;
1717 
1718 	if (detached == 0) {
1719 		ifnet_ref_t ifp = NULL;
1720 
1721 		ifnet_head_lock_shared();
1722 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1723 			interface_filter_t entry = NULL;
1724 
1725 			lck_mtx_lock(&ifp->if_flt_lock);
1726 			TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1727 				if (entry != filter || entry->filt_skip) {
1728 					continue;
1729 				}
1730 				/*
1731 				 * We've found a match; since it's possible
1732 				 * that the thread gets blocked in the monitor,
1733 				 * we do the lock dance.  Interface should
1734 				 * not be detached since we still have a use
1735 				 * count held during filter attach.
1736 				 */
1737 				entry->filt_skip = 1;   /* skip input/output */
1738 				lck_mtx_unlock(&ifp->if_flt_lock);
1739 				ifnet_head_done();
1740 
1741 				lck_mtx_lock(&ifp->if_flt_lock);
1742 				if_flt_monitor_enter(ifp);
1743 				LCK_MTX_ASSERT(&ifp->if_flt_lock,
1744 				    LCK_MTX_ASSERT_OWNED);
1745 
1746 				/* Remove the filter from the list */
1747 				TAILQ_REMOVE(&ifp->if_flt_head, filter,
1748 				    filt_next);
1749 
1750 				if (dlil_verbose) {
1751 					DLIL_PRINTF("%s: %s filter detached\n",
1752 					    if_name(ifp), filter->filt_name);
1753 				}
1754 				if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1755 					VERIFY(ifp->if_flt_non_os_count != 0);
1756 					OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1757 				}
1758 				/*
1759 				 * Decrease filter count and route_generation
1760 				 * ID to let TCP know it should reevalute doing
1761 				 * TSO or not.
1762 				 */
1763 				if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1764 					ifnet_filter_update_tso(ifp, FALSE);
1765 				}
1766 				/*
1767 				 * When we remove the bridge's interface filter,
1768 				 * clear the field in the ifnet.
1769 				 */
1770 				if ((filter->filt_flags & DLIL_IFF_BRIDGE)
1771 				    != 0) {
1772 					ifp->if_bridge = NULL;
1773 				}
1774 				if_flt_monitor_leave(ifp);
1775 				lck_mtx_unlock(&ifp->if_flt_lock);
1776 				goto destroy;
1777 			}
1778 			lck_mtx_unlock(&ifp->if_flt_lock);
1779 		}
1780 		ifnet_head_done();
1781 
1782 		/* filter parameter is not a valid filter ref */
1783 		retval = EINVAL;
1784 		goto done;
1785 	} else {
1786 		ifnet_ref_t ifp = filter->filt_ifp;
1787 		/*
1788 		 * Here we are called from ifnet_detach_final(); the
1789 		 * caller had emptied if_flt_head and we're doing an
1790 		 * implicit filter detach because the interface is
1791 		 * about to go away.  Make sure to adjust the counters
1792 		 * in this case.  We don't need the protection of the
1793 		 * filter monitor since we're called as part of the
1794 		 * final detach in the context of the detacher thread.
1795 		 */
1796 		if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1797 			VERIFY(ifp->if_flt_non_os_count != 0);
1798 			OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1799 		}
1800 		/*
1801 		 * Decrease filter count and route_generation
1802 		 * ID to let TCP know it should reevalute doing
1803 		 * TSO or not.
1804 		 */
1805 		if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1806 			ifnet_filter_update_tso(ifp, FALSE);
1807 		}
1808 	}
1809 
1810 	if (dlil_verbose) {
1811 		DLIL_PRINTF("%s filter detached\n", filter->filt_name);
1812 	}
1813 
1814 destroy:
1815 
1816 	/* Call the detached function if there is one */
1817 	if (filter->filt_detached) {
1818 		filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1819 	}
1820 
1821 	VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1822 	if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1823 		VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0);
1824 	}
1825 #if SKYWALK
1826 	if (kernel_is_macos_or_server()) {
1827 		net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1828 		    net_check_compatible_if_filter(NULL));
1829 	}
1830 #endif /* SKYWALK */
1831 
1832 	/* Free the filter */
1833 	dlif_filt_free(filter);
1834 	filter = NULL;
1835 done:
1836 	if (retval != 0 && filter != NULL) {
1837 		DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1838 		    filter->filt_name, retval);
1839 	}
1840 
1841 	return retval;
1842 }
1843 
1844 __private_extern__ void
dlil_detach_filter(interface_filter_t filter)1845 dlil_detach_filter(interface_filter_t filter)
1846 {
1847 	if (filter == NULL) {
1848 		return;
1849 	}
1850 	dlil_detach_filter_internal(filter, 0);
1851 }
1852 
1853 __private_extern__ boolean_t
dlil_has_ip_filter(void)1854 dlil_has_ip_filter(void)
1855 {
1856 	boolean_t has_filter = ((net_api_stats.nas_ipf_add_count - net_api_stats.nas_ipf_add_os_count) > 0);
1857 
1858 	VERIFY(net_api_stats.nas_ipf_add_count >= net_api_stats.nas_ipf_add_os_count);
1859 
1860 	DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
1861 	return has_filter;
1862 }
1863 
1864 __private_extern__ boolean_t
dlil_has_if_filter(struct ifnet * ifp)1865 dlil_has_if_filter(struct ifnet *ifp)
1866 {
1867 	boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
1868 	DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
1869 	return has_filter;
1870 }
1871 
1872 errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params * p)1873 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
1874 {
1875 	if (p != NULL) {
1876 		if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
1877 		    (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
1878 			return EINVAL;
1879 		}
1880 		if (p->packets_lowat != 0 &&    /* hiwat must be non-zero */
1881 		    p->packets_lowat >= p->packets_hiwat) {
1882 			return EINVAL;
1883 		}
1884 		if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
1885 		    (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
1886 			return EINVAL;
1887 		}
1888 		if (p->bytes_lowat != 0 &&      /* hiwat must be non-zero */
1889 		    p->bytes_lowat >= p->bytes_hiwat) {
1890 			return EINVAL;
1891 		}
1892 		if (p->interval_time != 0 &&
1893 		    p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
1894 			p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
1895 		}
1896 	}
1897 	return 0;
1898 }
1899 
1900 void
dlil_rxpoll_update_params(struct ifnet * ifp,struct ifnet_poll_params * p)1901 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
1902 {
1903 	u_int64_t sample_holdtime, inbw;
1904 
1905 	if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
1906 		sample_holdtime = 0;    /* polling is disabled */
1907 		ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
1908 		    ifp->if_rxpoll_blowat = 0;
1909 		ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
1910 		    ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
1911 		ifp->if_rxpoll_plim = 0;
1912 		ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
1913 	} else {
1914 		u_int32_t plowat, phiwat, blowat, bhiwat, plim;
1915 		u_int64_t ival;
1916 		unsigned int n, i;
1917 
1918 		for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
1919 			if (inbw < rxpoll_tbl[i].speed) {
1920 				break;
1921 			}
1922 			n = i;
1923 		}
1924 		/* auto-tune if caller didn't specify a value */
1925 		plowat = ((p == NULL || p->packets_lowat == 0) ?
1926 		    rxpoll_tbl[n].plowat : p->packets_lowat);
1927 		phiwat = ((p == NULL || p->packets_hiwat == 0) ?
1928 		    rxpoll_tbl[n].phiwat : p->packets_hiwat);
1929 		blowat = ((p == NULL || p->bytes_lowat == 0) ?
1930 		    rxpoll_tbl[n].blowat : p->bytes_lowat);
1931 		bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
1932 		    rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
1933 		plim = ((p == NULL || p->packets_limit == 0 ||
1934 		    if_rxpoll_max != 0) ?  if_rxpoll_max : p->packets_limit);
1935 		ival = ((p == NULL || p->interval_time == 0 ||
1936 		    if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ?
1937 		    if_rxpoll_interval_time : p->interval_time);
1938 
1939 		VERIFY(plowat != 0 && phiwat != 0);
1940 		VERIFY(blowat != 0 && bhiwat != 0);
1941 		VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
1942 
1943 		sample_holdtime = if_rxpoll_sample_holdtime;
1944 		ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
1945 		ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
1946 		ifp->if_rxpoll_plowat = plowat;
1947 		ifp->if_rxpoll_phiwat = phiwat;
1948 		ifp->if_rxpoll_blowat = blowat;
1949 		ifp->if_rxpoll_bhiwat = bhiwat;
1950 		ifp->if_rxpoll_plim = plim;
1951 		ifp->if_rxpoll_ival = ival;
1952 	}
1953 
1954 	net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
1955 	net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
1956 
1957 	if (dlil_verbose) {
1958 		DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
1959 		    "poll interval %llu nsec, pkts per poll %u, "
1960 		    "pkt limits [%u/%u], wreq limits [%u/%u], "
1961 		    "bytes limits [%u/%u]\n", if_name(ifp),
1962 		    inbw, sample_holdtime, ifp->if_rxpoll_ival,
1963 		    ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
1964 		    ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
1965 		    ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
1966 		    ifp->if_rxpoll_bhiwat);
1967 	}
1968 }
1969 
1970 /*
1971  * Must be called on an attached ifnet (caller is expected to check.)
1972  * Caller may pass NULL for poll parameters to indicate "auto-tuning."
1973  */
1974 errno_t
dlil_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)1975 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
1976     boolean_t locked)
1977 {
1978 	errno_t err;
1979 	struct dlil_threading_info *inp;
1980 
1981 	VERIFY(ifp != NULL);
1982 	if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
1983 		return ENXIO;
1984 	}
1985 	err = dlil_rxpoll_validate_params(p);
1986 	if (err != 0) {
1987 		return err;
1988 	}
1989 
1990 	if (!locked) {
1991 		lck_mtx_lock(&inp->dlth_lock);
1992 	}
1993 	LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
1994 	/*
1995 	 * Normally, we'd reset the parameters to the auto-tuned values
1996 	 * if the the input thread detects a change in link rate.  If the
1997 	 * driver provides its own parameters right after a link rate
1998 	 * changes, but before the input thread gets to run, we want to
1999 	 * make sure to keep the driver's values.  Clearing if_poll_update
2000 	 * will achieve that.
2001 	 */
2002 	if (p != NULL && !locked && ifp->if_poll_update != 0) {
2003 		ifp->if_poll_update = 0;
2004 	}
2005 	dlil_rxpoll_update_params(ifp, p);
2006 	if (!locked) {
2007 		lck_mtx_unlock(&inp->dlth_lock);
2008 	}
2009 	return 0;
2010 }
2011 
2012 /*
2013  * Must be called on an attached ifnet (caller is expected to check.)
2014  */
2015 errno_t
dlil_rxpoll_get_params(struct ifnet * ifp,struct ifnet_poll_params * p)2016 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2017 {
2018 	struct dlil_threading_info *inp;
2019 
2020 	VERIFY(ifp != NULL && p != NULL);
2021 	if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2022 		return ENXIO;
2023 	}
2024 
2025 	bzero(p, sizeof(*p));
2026 
2027 	lck_mtx_lock(&inp->dlth_lock);
2028 	p->packets_limit = ifp->if_rxpoll_plim;
2029 	p->packets_lowat = ifp->if_rxpoll_plowat;
2030 	p->packets_hiwat = ifp->if_rxpoll_phiwat;
2031 	p->bytes_lowat = ifp->if_rxpoll_blowat;
2032 	p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2033 	p->interval_time = ifp->if_rxpoll_ival;
2034 	lck_mtx_unlock(&inp->dlth_lock);
2035 
2036 	return 0;
2037 }
2038 
2039 errno_t
ifnet_input(struct ifnet * ifp,struct mbuf * m_head,const struct ifnet_stat_increment_param * s)2040 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2041     const struct ifnet_stat_increment_param *s)
2042 {
2043 	return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2044 }
2045 
2046 errno_t
ifnet_input_extended(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2047 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2048     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2049 {
2050 	return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2051 }
2052 
2053 errno_t
ifnet_input_poll(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2054 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2055     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2056 {
2057 	return ifnet_input_common(ifp, m_head, m_tail, s,
2058 	           (m_head != NULL), TRUE);
2059 }
2060 
2061 static errno_t
ifnet_input_common(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t ext,boolean_t poll)2062 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2063     const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2064 {
2065 	dlil_input_func input_func;
2066 	struct ifnet_stat_increment_param _s;
2067 	u_int32_t m_cnt = 0, m_size = 0;
2068 	struct mbuf *last;
2069 	errno_t err = 0;
2070 
2071 	if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2072 		if (m_head != NULL) {
2073 			mbuf_freem_list(m_head);
2074 		}
2075 		return EINVAL;
2076 	}
2077 
2078 	VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2079 	VERIFY(m_tail == NULL || ext);
2080 	VERIFY(s != NULL || !ext);
2081 
2082 	/*
2083 	 * Drop the packet(s) if the parameters are invalid, or if the
2084 	 * interface is no longer attached; else hold an IO refcnt to
2085 	 * prevent it from being detached (will be released below.)
2086 	 */
2087 	if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2088 		if (m_head != NULL) {
2089 			mbuf_freem_list(m_head);
2090 		}
2091 		return EINVAL;
2092 	}
2093 
2094 	input_func = ifp->if_input_dlil;
2095 	VERIFY(input_func != NULL);
2096 
2097 	if (m_tail == NULL) {
2098 		last = m_head;
2099 		while (m_head != NULL) {
2100 			m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2101 #if IFNET_INPUT_SANITY_CHK
2102 			if (__improbable(dlil_input_sanity_check != 0)) {
2103 				DLIL_INPUT_CHECK(last, ifp);
2104 			}
2105 #endif /* IFNET_INPUT_SANITY_CHK */
2106 			m_cnt++;
2107 			m_size += m_length(last);
2108 			if (mbuf_nextpkt(last) == NULL) {
2109 				break;
2110 			}
2111 			last = mbuf_nextpkt(last);
2112 		}
2113 		m_tail = last;
2114 	} else {
2115 #if IFNET_INPUT_SANITY_CHK
2116 		if (__improbable(dlil_input_sanity_check != 0)) {
2117 			last = m_head;
2118 			while (1) {
2119 				m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2120 				DLIL_INPUT_CHECK(last, ifp);
2121 				m_cnt++;
2122 				m_size += m_length(last);
2123 				if (mbuf_nextpkt(last) == NULL) {
2124 					break;
2125 				}
2126 				last = mbuf_nextpkt(last);
2127 			}
2128 		} else {
2129 			m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2130 			m_cnt = s->packets_in;
2131 			m_size = s->bytes_in;
2132 			last = m_tail;
2133 		}
2134 #else
2135 		m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2136 		m_cnt = s->packets_in;
2137 		m_size = s->bytes_in;
2138 		last = m_tail;
2139 #endif /* IFNET_INPUT_SANITY_CHK */
2140 	}
2141 
2142 	if (last != m_tail) {
2143 		panic_plain("%s: invalid input packet chain for %s, "
2144 		    "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2145 		    m_tail, last);
2146 	}
2147 
2148 	/*
2149 	 * Assert packet count only for the extended variant, for backwards
2150 	 * compatibility, since this came directly from the device driver.
2151 	 * Relax this assertion for input bytes, as the driver may have
2152 	 * included the link-layer headers in the computation; hence
2153 	 * m_size is just an approximation.
2154 	 */
2155 	if (ext && s->packets_in != m_cnt) {
2156 		panic_plain("%s: input packet count mismatch for %s, "
2157 		    "%d instead of %d\n", __func__, if_name(ifp),
2158 		    s->packets_in, m_cnt);
2159 	}
2160 
2161 	if (s == NULL) {
2162 		bzero(&_s, sizeof(_s));
2163 		s = &_s;
2164 	} else {
2165 		_s = *s;
2166 	}
2167 	_s.packets_in = m_cnt;
2168 	_s.bytes_in = m_size;
2169 
2170 	if (ifp->if_xflags & IFXF_DISABLE_INPUT) {
2171 		m_freem_list(m_head);
2172 
2173 		os_atomic_add(&ifp->if_data.ifi_ipackets, _s.packets_in, relaxed);
2174 		os_atomic_add(&ifp->if_data.ifi_ibytes, _s.bytes_in, relaxed);
2175 
2176 		goto done;
2177 	}
2178 
2179 	err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2180 
2181 done:
2182 	if (ifp != lo_ifp) {
2183 		/* Release the IO refcnt */
2184 		ifnet_datamov_end(ifp);
2185 	}
2186 
2187 	return err;
2188 }
2189 
2190 
2191 static void
ifnet_start_common(struct ifnet * ifp,boolean_t resetfc,boolean_t ignore_delay)2192 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc, boolean_t ignore_delay)
2193 {
2194 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
2195 		return;
2196 	}
2197 	/*
2198 	 * If the starter thread is inactive, signal it to do work,
2199 	 * unless the interface is being flow controlled from below,
2200 	 * e.g. a virtual interface being flow controlled by a real
2201 	 * network interface beneath it, or it's been disabled via
2202 	 * a call to ifnet_disable_output().
2203 	 */
2204 	lck_mtx_lock_spin(&ifp->if_start_lock);
2205 	if (ignore_delay) {
2206 		ifp->if_start_flags |= IFSF_NO_DELAY;
2207 	}
2208 	if (resetfc) {
2209 		ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2210 	} else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2211 		lck_mtx_unlock(&ifp->if_start_lock);
2212 		return;
2213 	}
2214 	ifp->if_start_req++;
2215 	if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2216 	    (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2217 	    IFCQ_LEN(ifp->if_snd) >= ifp->if_start_delay_qlen ||
2218 	    ifp->if_start_delayed == 0)) {
2219 		(void) wakeup_one((caddr_t)&ifp->if_start_thread);
2220 	}
2221 	lck_mtx_unlock(&ifp->if_start_lock);
2222 }
2223 
2224 void
ifnet_start(struct ifnet * ifp)2225 ifnet_start(struct ifnet *ifp)
2226 {
2227 	ifnet_start_common(ifp, FALSE, FALSE);
2228 }
2229 
2230 void
ifnet_start_ignore_delay(struct ifnet * ifp)2231 ifnet_start_ignore_delay(struct ifnet *ifp)
2232 {
2233 	ifnet_start_common(ifp, FALSE, TRUE);
2234 }
2235 
2236 __attribute__((noreturn))
2237 static void
ifnet_start_thread_func(void * v,wait_result_t w)2238 ifnet_start_thread_func(void *v, wait_result_t w)
2239 {
2240 #pragma unused(w)
2241 	ifnet_ref_t ifp = v;
2242 	char thread_name[MAXTHREADNAMESIZE];
2243 
2244 	/* Construct the name for this thread, and then apply it. */
2245 	bzero(thread_name, sizeof(thread_name));
2246 	(void) snprintf(thread_name, sizeof(thread_name),
2247 	    "ifnet_start_%s", ifp->if_xname);
2248 #if SKYWALK
2249 	/* override name for native Skywalk interface */
2250 	if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
2251 		(void) snprintf(thread_name, sizeof(thread_name),
2252 		    "skywalk_doorbell_%s_tx", ifp->if_xname);
2253 	}
2254 #endif /* SKYWALK */
2255 	ASSERT(ifp->if_start_thread == current_thread());
2256 	thread_set_thread_name(current_thread(), __unsafe_null_terminated_from_indexable(thread_name));
2257 
2258 	/*
2259 	 * Treat the dedicated starter thread for lo0 as equivalent to
2260 	 * the driver workloop thread; if net_affinity is enabled for
2261 	 * the main input thread, associate this starter thread to it
2262 	 * by binding them with the same affinity tag.  This is done
2263 	 * only once (as we only have one lo_ifp which never goes away.)
2264 	 */
2265 	if (ifp == lo_ifp) {
2266 		struct dlil_threading_info *inp = dlil_main_input_thread;
2267 		struct thread *__single tp = current_thread();
2268 #if SKYWALK
2269 		/* native skywalk loopback not yet implemented */
2270 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2271 #endif /* SKYWALK */
2272 
2273 		lck_mtx_lock(&inp->dlth_lock);
2274 		if (inp->dlth_affinity) {
2275 			u_int32_t tag = inp->dlth_affinity_tag;
2276 
2277 			VERIFY(inp->dlth_driver_thread == THREAD_NULL);
2278 			VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2279 			inp->dlth_driver_thread = tp;
2280 			lck_mtx_unlock(&inp->dlth_lock);
2281 
2282 			/* Associate this thread with the affinity tag */
2283 			(void) dlil_affinity_set(tp, tag);
2284 		} else {
2285 			lck_mtx_unlock(&inp->dlth_lock);
2286 		}
2287 	}
2288 
2289 	lck_mtx_lock(&ifp->if_start_lock);
2290 	VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
2291 	(void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
2292 	ifp->if_start_embryonic = 1;
2293 	/* wake up once to get out of embryonic state */
2294 	ifp->if_start_req++;
2295 	(void) wakeup_one((caddr_t)&ifp->if_start_thread);
2296 	lck_mtx_unlock(&ifp->if_start_lock);
2297 	(void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2298 	/* NOTREACHED */
2299 	__builtin_unreachable();
2300 }
2301 
2302 __attribute__((noreturn))
2303 static void
ifnet_start_thread_cont(void * v,wait_result_t wres)2304 ifnet_start_thread_cont(void *v, wait_result_t wres)
2305 {
2306 	ifnet_ref_t ifp = v;
2307 	struct ifclassq *ifq = ifp->if_snd;
2308 
2309 	lck_mtx_lock_spin(&ifp->if_start_lock);
2310 	if (__improbable(wres == THREAD_INTERRUPTED ||
2311 	    (ifp->if_start_flags & IFSF_TERMINATING) != 0)) {
2312 		goto terminate;
2313 	}
2314 
2315 	if (__improbable(ifp->if_start_embryonic)) {
2316 		ifp->if_start_embryonic = 0;
2317 		lck_mtx_unlock(&ifp->if_start_lock);
2318 		ifnet_decr_pending_thread_count(ifp);
2319 		lck_mtx_lock_spin(&ifp->if_start_lock);
2320 		goto skip;
2321 	}
2322 
2323 	ifp->if_start_active = 1;
2324 
2325 	/*
2326 	 * Keep on servicing until no more request.
2327 	 */
2328 	for (;;) {
2329 		u_int32_t req = ifp->if_start_req;
2330 		if ((ifp->if_start_flags & IFSF_NO_DELAY) == 0 &&
2331 		    !IFCQ_IS_EMPTY(ifq) &&
2332 		    (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2333 		    ifp->if_start_delayed == 0 &&
2334 		    IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2335 		    (ifp->if_eflags & IFEF_DELAY_START)) {
2336 			ifp->if_start_delayed = 1;
2337 			ifnet_start_delayed++;
2338 			break;
2339 		}
2340 		ifp->if_start_flags &= ~IFSF_NO_DELAY;
2341 		ifp->if_start_delayed = 0;
2342 		lck_mtx_unlock(&ifp->if_start_lock);
2343 
2344 		/*
2345 		 * If no longer attached, don't call start because ifp
2346 		 * is being destroyed; else hold an IO refcnt to
2347 		 * prevent the interface from being detached (will be
2348 		 * released below.)
2349 		 */
2350 		if (!ifnet_datamov_begin(ifp)) {
2351 			lck_mtx_lock_spin(&ifp->if_start_lock);
2352 			break;
2353 		}
2354 
2355 		/* invoke the driver's start routine */
2356 		((*ifp->if_start)(ifp));
2357 
2358 		/*
2359 		 * Release the io ref count taken above.
2360 		 */
2361 		ifnet_datamov_end(ifp);
2362 
2363 		lck_mtx_lock_spin(&ifp->if_start_lock);
2364 
2365 		/*
2366 		 * If there's no pending request or if the
2367 		 * interface has been disabled, we're done.
2368 		 */
2369 #define _IFSF_DISABLED  (IFSF_FLOW_CONTROLLED | IFSF_TERMINATING)
2370 		if (req == ifp->if_start_req ||
2371 		    (ifp->if_start_flags & _IFSF_DISABLED) != 0) {
2372 			break;
2373 		}
2374 	}
2375 skip:
2376 	ifp->if_start_req = 0;
2377 	ifp->if_start_active = 0;
2378 
2379 #if SKYWALK
2380 	/*
2381 	 * Wakeup any waiters, e.g. any threads waiting to
2382 	 * detach the interface from the flowswitch, etc.
2383 	 */
2384 	if (ifp->if_start_waiters != 0) {
2385 		ifp->if_start_waiters = 0;
2386 		wakeup(&ifp->if_start_waiters);
2387 	}
2388 #endif /* SKYWALK */
2389 	if (__probable((ifp->if_start_flags & IFSF_TERMINATING) == 0)) {
2390 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2391 		struct timespec delay_start_ts;
2392 		struct timespec *ts = NULL;
2393 
2394 		if (ts == NULL) {
2395 			ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2396 			    &ifp->if_start_cycle : NULL);
2397 		}
2398 
2399 		if (ts == NULL && ifp->if_start_delayed == 1) {
2400 			delay_start_ts.tv_sec = 0;
2401 			delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2402 			ts = &delay_start_ts;
2403 		}
2404 
2405 		if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
2406 			ts = NULL;
2407 		}
2408 
2409 		if (__improbable(ts != NULL)) {
2410 			clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2411 			    (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2412 		}
2413 
2414 		(void) assert_wait_deadline(&ifp->if_start_thread,
2415 		    THREAD_UNINT, deadline);
2416 		lck_mtx_unlock(&ifp->if_start_lock);
2417 		(void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2418 		/* NOTREACHED */
2419 	} else {
2420 terminate:
2421 		/* interface is detached? */
2422 		ifnet_set_start_cycle(ifp, NULL);
2423 
2424 		/* clear if_start_thread to allow termination to continue */
2425 		ASSERT(ifp->if_start_thread != THREAD_NULL);
2426 		ifp->if_start_thread = THREAD_NULL;
2427 		wakeup((caddr_t)&ifp->if_start_thread);
2428 		lck_mtx_unlock(&ifp->if_start_lock);
2429 
2430 		if (dlil_verbose) {
2431 			DLIL_PRINTF("%s: starter thread terminated\n",
2432 			    if_name(ifp));
2433 		}
2434 
2435 		/* for the extra refcnt from kernel_thread_start() */
2436 		thread_deallocate(current_thread());
2437 		/* this is the end */
2438 		thread_terminate(current_thread());
2439 		/* NOTREACHED */
2440 	}
2441 
2442 	/* must never get here */
2443 	VERIFY(0);
2444 	/* NOTREACHED */
2445 	__builtin_unreachable();
2446 }
2447 
2448 void
ifnet_set_start_cycle(struct ifnet * ifp,struct timespec * ts)2449 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2450 {
2451 	if (ts == NULL) {
2452 		bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
2453 	} else {
2454 		*(&ifp->if_start_cycle) = *ts;
2455 	}
2456 
2457 	if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2458 		DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
2459 		    if_name(ifp), ts->tv_nsec);
2460 	}
2461 }
2462 
2463 static inline void
ifnet_poll_wakeup(struct ifnet * ifp)2464 ifnet_poll_wakeup(struct ifnet *ifp)
2465 {
2466 	LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
2467 
2468 	ifp->if_poll_req++;
2469 	if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
2470 	    ifp->if_poll_thread != THREAD_NULL) {
2471 		wakeup_one((caddr_t)&ifp->if_poll_thread);
2472 	}
2473 }
2474 
2475 void
ifnet_poll(struct ifnet * ifp)2476 ifnet_poll(struct ifnet *ifp)
2477 {
2478 	/*
2479 	 * If the poller thread is inactive, signal it to do work.
2480 	 */
2481 	lck_mtx_lock_spin(&ifp->if_poll_lock);
2482 	ifnet_poll_wakeup(ifp);
2483 	lck_mtx_unlock(&ifp->if_poll_lock);
2484 }
2485 
2486 __attribute__((noreturn))
2487 static void
ifnet_poll_thread_func(void * v,wait_result_t w)2488 ifnet_poll_thread_func(void *v, wait_result_t w)
2489 {
2490 #pragma unused(w)
2491 	char thread_name[MAXTHREADNAMESIZE];
2492 	ifnet_ref_t ifp = v;
2493 
2494 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2495 	VERIFY(current_thread() == ifp->if_poll_thread);
2496 
2497 	/* construct the name for this thread, and then apply it */
2498 	bzero(thread_name, sizeof(thread_name));
2499 	(void) snprintf(thread_name, sizeof(thread_name),
2500 	    "ifnet_poller_%s", ifp->if_xname);
2501 	thread_set_thread_name(ifp->if_poll_thread, __unsafe_null_terminated_from_indexable(thread_name));
2502 
2503 	lck_mtx_lock(&ifp->if_poll_lock);
2504 	VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
2505 	(void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
2506 	ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
2507 	/* wake up once to get out of embryonic state */
2508 	ifnet_poll_wakeup(ifp);
2509 	lck_mtx_unlock(&ifp->if_poll_lock);
2510 	(void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2511 	/* NOTREACHED */
2512 	__builtin_unreachable();
2513 }
2514 
2515 __attribute__((noreturn))
2516 static void
ifnet_poll_thread_cont(void * v,wait_result_t wres)2517 ifnet_poll_thread_cont(void *v, wait_result_t wres)
2518 {
2519 	struct dlil_threading_info *inp;
2520 	ifnet_ref_t ifp = v;
2521 	struct ifnet_stat_increment_param s;
2522 	struct timespec start_time;
2523 
2524 	VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2525 
2526 	bzero(&s, sizeof(s));
2527 	net_timerclear(&start_time);
2528 
2529 	lck_mtx_lock_spin(&ifp->if_poll_lock);
2530 	if (__improbable(wres == THREAD_INTERRUPTED ||
2531 	    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
2532 		goto terminate;
2533 	}
2534 
2535 	inp = ifp->if_inp;
2536 	VERIFY(inp != NULL);
2537 
2538 	if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
2539 		ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
2540 		lck_mtx_unlock(&ifp->if_poll_lock);
2541 		ifnet_decr_pending_thread_count(ifp);
2542 		lck_mtx_lock_spin(&ifp->if_poll_lock);
2543 		goto skip;
2544 	}
2545 
2546 	ifp->if_poll_flags |= IF_POLLF_RUNNING;
2547 
2548 	/*
2549 	 * Keep on servicing until no more request.
2550 	 */
2551 	for (;;) {
2552 		mbuf_ref_t m_head, m_tail;
2553 		u_int32_t m_lim, m_cnt, m_totlen;
2554 		u_int16_t req = ifp->if_poll_req;
2555 
2556 		m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
2557 		    MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
2558 		lck_mtx_unlock(&ifp->if_poll_lock);
2559 
2560 		/*
2561 		 * If no longer attached, there's nothing to do;
2562 		 * else hold an IO refcnt to prevent the interface
2563 		 * from being detached (will be released below.)
2564 		 */
2565 		if (!ifnet_is_attached(ifp, 1)) {
2566 			lck_mtx_lock_spin(&ifp->if_poll_lock);
2567 			break;
2568 		}
2569 
2570 		if (dlil_verbose > 1) {
2571 			DLIL_PRINTF("%s: polling up to %d pkts, "
2572 			    "pkts avg %d max %d, wreq avg %d, "
2573 			    "bytes avg %d\n",
2574 			    if_name(ifp), m_lim,
2575 			    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2576 			    ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2577 		}
2578 
2579 		/* invoke the driver's input poll routine */
2580 		((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2581 		&m_cnt, &m_totlen));
2582 
2583 		if (m_head != NULL) {
2584 			VERIFY(m_tail != NULL && m_cnt > 0);
2585 
2586 			if (dlil_verbose > 1) {
2587 				DLIL_PRINTF("%s: polled %d pkts, "
2588 				    "pkts avg %d max %d, wreq avg %d, "
2589 				    "bytes avg %d\n",
2590 				    if_name(ifp), m_cnt,
2591 				    ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2592 				    ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2593 			}
2594 
2595 			/* stats are required for extended variant */
2596 			s.packets_in = m_cnt;
2597 			s.bytes_in = m_totlen;
2598 
2599 			(void) ifnet_input_common(ifp, m_head, m_tail,
2600 			    &s, TRUE, TRUE);
2601 		} else {
2602 			if (dlil_verbose > 1) {
2603 				DLIL_PRINTF("%s: no packets, "
2604 				    "pkts avg %d max %d, wreq avg %d, "
2605 				    "bytes avg %d\n",
2606 				    if_name(ifp), ifp->if_rxpoll_pavg,
2607 				    ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
2608 				    ifp->if_rxpoll_bavg);
2609 			}
2610 
2611 			(void) ifnet_input_common(ifp, NULL, NULL,
2612 			    NULL, FALSE, TRUE);
2613 		}
2614 
2615 		/* Release the io ref count */
2616 		ifnet_decr_iorefcnt(ifp);
2617 
2618 		lck_mtx_lock_spin(&ifp->if_poll_lock);
2619 
2620 		/* if there's no pending request, we're done */
2621 		if (req == ifp->if_poll_req ||
2622 		    (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0) {
2623 			break;
2624 		}
2625 	}
2626 skip:
2627 	ifp->if_poll_req = 0;
2628 	ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
2629 
2630 	if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
2631 		uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2632 		struct timespec *ts;
2633 
2634 		/*
2635 		 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2636 		 * until ifnet_poll() is called again.
2637 		 */
2638 		ts = &ifp->if_poll_cycle;
2639 		if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
2640 			ts = NULL;
2641 		}
2642 
2643 		if (ts != NULL) {
2644 			clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2645 			    (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2646 		}
2647 
2648 		(void) assert_wait_deadline(&ifp->if_poll_thread,
2649 		    THREAD_UNINT, deadline);
2650 		lck_mtx_unlock(&ifp->if_poll_lock);
2651 		(void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2652 		/* NOTREACHED */
2653 	} else {
2654 terminate:
2655 		/* interface is detached (maybe while asleep)? */
2656 		ifnet_set_poll_cycle(ifp, NULL);
2657 
2658 		/* clear if_poll_thread to allow termination to continue */
2659 		ASSERT(ifp->if_poll_thread != THREAD_NULL);
2660 		ifp->if_poll_thread = THREAD_NULL;
2661 		wakeup((caddr_t)&ifp->if_poll_thread);
2662 		lck_mtx_unlock(&ifp->if_poll_lock);
2663 
2664 		if (dlil_verbose) {
2665 			DLIL_PRINTF("%s: poller thread terminated\n",
2666 			    if_name(ifp));
2667 		}
2668 
2669 		/* for the extra refcnt from kernel_thread_start() */
2670 		thread_deallocate(current_thread());
2671 		/* this is the end */
2672 		thread_terminate(current_thread());
2673 		/* NOTREACHED */
2674 	}
2675 
2676 	/* must never get here */
2677 	VERIFY(0);
2678 	/* NOTREACHED */
2679 	__builtin_unreachable();
2680 }
2681 
2682 void
ifnet_set_poll_cycle(struct ifnet * ifp,struct timespec * ts)2683 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2684 {
2685 	if (ts == NULL) {
2686 		bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
2687 	} else {
2688 		*(&ifp->if_poll_cycle) = *ts;
2689 	}
2690 
2691 	if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2692 		DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
2693 		    if_name(ifp), ts->tv_nsec);
2694 	}
2695 }
2696 
2697 void
ifnet_purge(struct ifnet * ifp)2698 ifnet_purge(struct ifnet *ifp)
2699 {
2700 	if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
2701 		if_qflush_snd(ifp, false);
2702 	}
2703 }
2704 
2705 void
ifnet_update_sndq(struct ifclassq * ifq,cqev_t ev)2706 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2707 {
2708 	IFCQ_LOCK_ASSERT_HELD(ifq);
2709 
2710 	if (!(IFCQ_IS_READY(ifq))) {
2711 		return;
2712 	}
2713 
2714 	if (IFCQ_TBR_IS_ENABLED(ifq)) {
2715 		struct tb_profile tb = {
2716 			.rate = ifq->ifcq_tbr.tbr_rate_raw,
2717 			.percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
2718 		};
2719 		(void) ifclassq_tbr_set(ifq, &tb, FALSE);
2720 	}
2721 
2722 	ifclassq_update(ifq, ev);
2723 }
2724 
2725 void
ifnet_update_rcv(struct ifnet * ifp,cqev_t ev)2726 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2727 {
2728 	switch (ev) {
2729 	case CLASSQ_EV_LINK_BANDWIDTH:
2730 		if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
2731 			ifp->if_poll_update++;
2732 		}
2733 		break;
2734 
2735 	default:
2736 		break;
2737 	}
2738 }
2739 
2740 errno_t
ifnet_set_output_sched_model(struct ifnet * ifp,u_int32_t model)2741 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2742 {
2743 	struct ifclassq *ifq;
2744 	u_int32_t omodel;
2745 	errno_t err;
2746 
2747 	if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
2748 		return EINVAL;
2749 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2750 		return ENXIO;
2751 	}
2752 
2753 	ifq = ifp->if_snd;
2754 	IFCQ_LOCK(ifq);
2755 	omodel = ifp->if_output_sched_model;
2756 	ifp->if_output_sched_model = model;
2757 	if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
2758 		ifp->if_output_sched_model = omodel;
2759 	}
2760 	IFCQ_UNLOCK(ifq);
2761 
2762 	return err;
2763 }
2764 
2765 errno_t
ifnet_set_sndq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2766 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2767 {
2768 	if (ifp == NULL) {
2769 		return EINVAL;
2770 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2771 		return ENXIO;
2772 	}
2773 
2774 	ifclassq_set_maxlen(ifp->if_snd, maxqlen);
2775 
2776 	return 0;
2777 }
2778 
2779 errno_t
ifnet_get_sndq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2780 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2781 {
2782 	if (ifp == NULL || maxqlen == NULL) {
2783 		return EINVAL;
2784 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2785 		return ENXIO;
2786 	}
2787 
2788 	*maxqlen = ifclassq_get_maxlen(ifp->if_snd);
2789 
2790 	return 0;
2791 }
2792 
2793 errno_t
ifnet_get_sndq_len(struct ifnet * ifp,u_int32_t * pkts)2794 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
2795 {
2796 	errno_t err;
2797 
2798 	if (ifp == NULL || pkts == NULL) {
2799 		err = EINVAL;
2800 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2801 		err = ENXIO;
2802 	} else {
2803 		err = ifclassq_get_len(ifp->if_snd, MBUF_SC_UNSPEC,
2804 		    IF_CLASSQ_ALL_GRPS, pkts, NULL);
2805 	}
2806 
2807 	return err;
2808 }
2809 
2810 errno_t
ifnet_get_service_class_sndq_len(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t * pkts,u_int32_t * bytes)2811 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2812     u_int32_t *pkts, u_int32_t *bytes)
2813 {
2814 	errno_t err;
2815 
2816 	if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2817 	    (pkts == NULL && bytes == NULL)) {
2818 		err = EINVAL;
2819 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2820 		err = ENXIO;
2821 	} else {
2822 		err = ifclassq_get_len(ifp->if_snd, sc, IF_CLASSQ_ALL_GRPS,
2823 		    pkts, bytes);
2824 	}
2825 
2826 	return err;
2827 }
2828 
2829 errno_t
ifnet_set_rcvq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2830 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2831 {
2832 	struct dlil_threading_info *inp;
2833 
2834 	if (ifp == NULL) {
2835 		return EINVAL;
2836 	} else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2837 		return ENXIO;
2838 	}
2839 
2840 	if (maxqlen == 0) {
2841 		maxqlen = if_rcvq_maxlen;
2842 	} else if (maxqlen < IF_RCVQ_MINLEN) {
2843 		maxqlen = IF_RCVQ_MINLEN;
2844 	}
2845 
2846 	inp = ifp->if_inp;
2847 	lck_mtx_lock(&inp->dlth_lock);
2848 	qlimit(&inp->dlth_pkts) = maxqlen;
2849 	lck_mtx_unlock(&inp->dlth_lock);
2850 
2851 	return 0;
2852 }
2853 
2854 errno_t
ifnet_get_rcvq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2855 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2856 {
2857 	struct dlil_threading_info *inp;
2858 
2859 	if (ifp == NULL || maxqlen == NULL) {
2860 		return EINVAL;
2861 	} else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2862 		return ENXIO;
2863 	}
2864 
2865 	inp = ifp->if_inp;
2866 	lck_mtx_lock(&inp->dlth_lock);
2867 	*maxqlen = qlimit(&inp->dlth_pkts);
2868 	lck_mtx_unlock(&inp->dlth_lock);
2869 	return 0;
2870 }
2871 
2872 void
ifnet_enqueue_multi_setup(struct ifnet * ifp,uint16_t delay_qlen,uint16_t delay_timeout)2873 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
2874     uint16_t delay_timeout)
2875 {
2876 	if (delay_qlen > 0 && delay_timeout > 0) {
2877 		if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
2878 		ifp->if_start_delay_qlen = MIN(100, delay_qlen);
2879 		ifp->if_start_delay_timeout = min(20000, delay_timeout);
2880 		/* convert timeout to nanoseconds */
2881 		ifp->if_start_delay_timeout *= 1000;
2882 		kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
2883 		    ifp->if_xname, (uint32_t)delay_qlen,
2884 		    (uint32_t)delay_timeout);
2885 	} else {
2886 		if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
2887 	}
2888 }
2889 
2890 /*
2891  * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
2892  * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
2893  * buf holds the full header.
2894  */
2895 static __attribute__((noinline)) void
ifnet_mcast_clear_dscp(uint8_t * __indexable buf,uint8_t ip_ver)2896 ifnet_mcast_clear_dscp(uint8_t *__indexable buf, uint8_t ip_ver)
2897 {
2898 	struct ip *ip;
2899 	struct ip6_hdr *ip6;
2900 	uint8_t lbuf[64] __attribute__((aligned(8)));
2901 	uint8_t *p = buf;
2902 
2903 	if (ip_ver == IPVERSION) {
2904 		uint8_t old_tos;
2905 		uint32_t sum;
2906 
2907 		if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2908 			DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
2909 			bcopy(buf, lbuf, sizeof(struct ip));
2910 			p = lbuf;
2911 		}
2912 		ip = (struct ip *)(void *)p;
2913 		if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
2914 			return;
2915 		}
2916 
2917 		DTRACE_IP1(clear__v4, struct ip *, ip);
2918 		old_tos = ip->ip_tos;
2919 		ip->ip_tos &= IPTOS_ECN_MASK;
2920 		sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
2921 		sum = (sum >> 16) + (sum & 0xffff);
2922 		ip->ip_sum = (uint16_t)(sum & 0xffff);
2923 
2924 		if (__improbable(p == lbuf)) {
2925 			bcopy(lbuf, buf, sizeof(struct ip));
2926 		}
2927 	} else {
2928 		uint32_t flow;
2929 		ASSERT(ip_ver == IPV6_VERSION);
2930 
2931 		if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2932 			DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
2933 			bcopy(buf, lbuf, sizeof(struct ip6_hdr));
2934 			p = lbuf;
2935 		}
2936 		ip6 = (struct ip6_hdr *)(void *)p;
2937 		flow = ntohl(ip6->ip6_flow);
2938 		if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
2939 			return;
2940 		}
2941 
2942 		DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
2943 		ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
2944 
2945 		if (__improbable(p == lbuf)) {
2946 			bcopy(lbuf, buf, sizeof(struct ip6_hdr));
2947 		}
2948 	}
2949 }
2950 
2951 static inline errno_t
ifnet_enqueue_ifclassq(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * p,boolean_t flush,boolean_t * pdrop)2952 ifnet_enqueue_ifclassq(struct ifnet *ifp, struct ifclassq *ifcq,
2953     classq_pkt_t *p, boolean_t flush, boolean_t *pdrop)
2954 {
2955 #if SKYWALK
2956 	volatile struct sk_nexusadv *nxadv = NULL;
2957 #endif /* SKYWALK */
2958 	volatile uint64_t *fg_ts = NULL;
2959 	volatile uint64_t *rt_ts = NULL;
2960 	struct timespec now;
2961 	u_int64_t now_nsec = 0;
2962 	int error = 0;
2963 	uint8_t *mcast_buf = NULL;
2964 	uint8_t ip_ver;
2965 	uint32_t pktlen;
2966 
2967 	ASSERT(ifp->if_eflags & IFEF_TXSTART);
2968 #if SKYWALK
2969 	/*
2970 	 * If attached to flowswitch, grab pointers to the
2971 	 * timestamp variables in the nexus advisory region.
2972 	 */
2973 	if ((ifp->if_capabilities & IFCAP_SKYWALK) && ifp->if_na != NULL &&
2974 	    (nxadv = ifp->if_na->nifna_netif->nif_fsw_nxadv) != NULL) {
2975 		fg_ts = &nxadv->nxadv_fg_sendts;
2976 		rt_ts = &nxadv->nxadv_rt_sendts;
2977 	}
2978 #endif /* SKYWALK */
2979 
2980 	/*
2981 	 * If packet already carries a timestamp, either from dlil_output()
2982 	 * or from flowswitch, use it here.  Otherwise, record timestamp.
2983 	 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
2984 	 * the timestamp value is used internally there.
2985 	 */
2986 	switch (p->cp_ptype) {
2987 	case QP_MBUF:
2988 #if SKYWALK
2989 		/*
2990 		 * Valid only for non-native (compat) Skywalk interface.
2991 		 * If the data source uses packet, caller must convert
2992 		 * it to mbuf first prior to calling this routine.
2993 		 */
2994 		ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2995 #endif /* SKYWALK */
2996 		ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
2997 		ASSERT(p->cp_mbuf->m_nextpkt == NULL);
2998 
2999 		if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3000 		    p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
3001 			nanouptime(&now);
3002 			net_timernsec(&now, &now_nsec);
3003 			p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
3004 		}
3005 		p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3006 		/*
3007 		 * If the packet service class is not background,
3008 		 * update the timestamp to indicate recent activity
3009 		 * on a foreground socket.
3010 		 */
3011 		if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3012 		    p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3013 			if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3014 			    PKTF_SO_BACKGROUND)) {
3015 				ifp->if_fg_sendts = (uint32_t)_net_uptime;
3016 				if (fg_ts != NULL) {
3017 					*fg_ts = (uint32_t)_net_uptime;
3018 				}
3019 			}
3020 			if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3021 				ifp->if_rt_sendts = (uint32_t)_net_uptime;
3022 				if (rt_ts != NULL) {
3023 					*rt_ts = (uint32_t)_net_uptime;
3024 				}
3025 			}
3026 		}
3027 		pktlen = m_pktlen(p->cp_mbuf);
3028 
3029 		/*
3030 		 * Some Wi-Fi AP implementations do not correctly handle
3031 		 * multicast IP packets with DSCP bits set (radr://9331522).
3032 		 * As a workaround we clear the DSCP bits but keep service
3033 		 * class (rdar://51507725).
3034 		 */
3035 		if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3036 		    IFNET_IS_WIFI_INFRA(ifp)) {
3037 			size_t len = mbuf_len(p->cp_mbuf), hlen;
3038 			struct ether_header *eh;
3039 			boolean_t pullup = FALSE;
3040 			uint16_t etype;
3041 
3042 			if (__improbable(len < sizeof(struct ether_header))) {
3043 				DTRACE_IP1(small__ether, size_t, len);
3044 				if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3045 				    sizeof(struct ether_header))) == NULL) {
3046 					return ENOMEM;
3047 				}
3048 			}
3049 			eh = mtod(p->cp_mbuf, struct ether_header *);
3050 			etype = ntohs(eh->ether_type);
3051 			if (etype == ETHERTYPE_IP) {
3052 				hlen = sizeof(struct ether_header) +
3053 				    sizeof(struct ip);
3054 				if (len < hlen) {
3055 					DTRACE_IP1(small__v4, size_t, len);
3056 					pullup = TRUE;
3057 				}
3058 				ip_ver = IPVERSION;
3059 			} else if (etype == ETHERTYPE_IPV6) {
3060 				hlen = sizeof(struct ether_header) +
3061 				    sizeof(struct ip6_hdr);
3062 				if (len < hlen) {
3063 					DTRACE_IP1(small__v6, size_t, len);
3064 					pullup = TRUE;
3065 				}
3066 				ip_ver = IPV6_VERSION;
3067 			} else {
3068 				DTRACE_IP1(invalid__etype, uint16_t, etype);
3069 				break;
3070 			}
3071 			if (pullup) {
3072 				if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3073 				    NULL) {
3074 					return ENOMEM;
3075 				}
3076 
3077 				eh = mtod(p->cp_mbuf, struct ether_header *);
3078 			}
3079 			mcast_buf = (uint8_t *)(eh + 1);
3080 			/*
3081 			 * ifnet_mcast_clear_dscp() will finish the work below.
3082 			 * Note that the pullups above ensure that mcast_buf
3083 			 * points to a full IP header.
3084 			 */
3085 		}
3086 		break;
3087 
3088 #if SKYWALK
3089 	case QP_PACKET:
3090 		/*
3091 		 * Valid only for native Skywalk interface.  If the data
3092 		 * source uses mbuf, caller must convert it to packet first
3093 		 * prior to calling this routine.
3094 		 */
3095 		ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
3096 		if (!(p->cp_kpkt->pkt_pflags & PKT_F_TS_VALID) ||
3097 		    p->cp_kpkt->pkt_timestamp == 0) {
3098 			nanouptime(&now);
3099 			net_timernsec(&now, &now_nsec);
3100 			p->cp_kpkt->pkt_timestamp = now_nsec;
3101 		}
3102 		p->cp_kpkt->pkt_pflags &= ~PKT_F_TS_VALID;
3103 		/*
3104 		 * If the packet service class is not background,
3105 		 * update the timestamps on the interface, as well as
3106 		 * the ones in nexus-wide advisory to indicate recent
3107 		 * activity on a foreground flow.
3108 		 */
3109 		if (!(p->cp_kpkt->pkt_pflags & PKT_F_BACKGROUND)) {
3110 			ifp->if_fg_sendts = (uint32_t)_net_uptime;
3111 			if (fg_ts != NULL) {
3112 				*fg_ts = (uint32_t)_net_uptime;
3113 			}
3114 		}
3115 		if (p->cp_kpkt->pkt_pflags & PKT_F_REALTIME) {
3116 			ifp->if_rt_sendts = (uint32_t)_net_uptime;
3117 			if (rt_ts != NULL) {
3118 				*rt_ts = (uint32_t)_net_uptime;
3119 			}
3120 		}
3121 		pktlen = p->cp_kpkt->pkt_length;
3122 
3123 		/*
3124 		 * Some Wi-Fi AP implementations do not correctly handle
3125 		 * multicast IP packets with DSCP bits set (radr://9331522).
3126 		 * As a workaround we clear the DSCP bits but keep service
3127 		 * class (rdar://51507725).
3128 		 */
3129 		if ((p->cp_kpkt->pkt_link_flags & PKT_LINKF_MCAST) != 0 &&
3130 		    IFNET_IS_WIFI_INFRA(ifp)) {
3131 			uint8_t *baddr;
3132 			struct ether_header *eh;
3133 			uint16_t etype;
3134 
3135 			MD_BUFLET_ADDR_ABS(p->cp_kpkt, baddr);
3136 			baddr += p->cp_kpkt->pkt_headroom;
3137 			if (__improbable(pktlen < sizeof(struct ether_header))) {
3138 				DTRACE_IP1(pkt__small__ether, __kern_packet *,
3139 				    p->cp_kpkt);
3140 				break;
3141 			}
3142 			eh = (struct ether_header *)(void *)baddr;
3143 			etype = ntohs(eh->ether_type);
3144 			if (etype == ETHERTYPE_IP) {
3145 				if (pktlen < sizeof(struct ether_header) +
3146 				    sizeof(struct ip)) {
3147 					DTRACE_IP1(pkt__small__v4, uint32_t,
3148 					    pktlen);
3149 					break;
3150 				}
3151 				ip_ver = IPVERSION;
3152 			} else if (etype == ETHERTYPE_IPV6) {
3153 				if (pktlen < sizeof(struct ether_header) +
3154 				    sizeof(struct ip6_hdr)) {
3155 					DTRACE_IP1(pkt__small__v6, uint32_t,
3156 					    pktlen);
3157 					break;
3158 				}
3159 				ip_ver = IPV6_VERSION;
3160 			} else {
3161 				DTRACE_IP1(pkt__invalid__etype, uint16_t,
3162 				    etype);
3163 				break;
3164 			}
3165 			mcast_buf = (uint8_t *)(eh + 1);
3166 			/*
3167 			 * ifnet_mcast_clear_dscp() will finish the work below.
3168 			 * The checks above verify that the IP header is in the
3169 			 * first buflet.
3170 			 */
3171 		}
3172 		break;
3173 #endif /* SKYWALK */
3174 
3175 	default:
3176 		VERIFY(0);
3177 		/* NOTREACHED */
3178 		__builtin_unreachable();
3179 	}
3180 
3181 	if (mcast_buf != NULL) {
3182 		ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
3183 	}
3184 
3185 	if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3186 		if (now_nsec == 0) {
3187 			nanouptime(&now);
3188 			net_timernsec(&now, &now_nsec);
3189 		}
3190 		/*
3191 		 * If the driver chose to delay start callback for
3192 		 * coalescing multiple packets, Then use the following
3193 		 * heuristics to make sure that start callback will
3194 		 * be delayed only when bulk data transfer is detected.
3195 		 * 1. number of packets enqueued in (delay_win * 2) is
3196 		 * greater than or equal to the delay qlen.
3197 		 * 2. If delay_start is enabled it will stay enabled for
3198 		 * another 10 idle windows. This is to take into account
3199 		 * variable RTT and burst traffic.
3200 		 * 3. If the time elapsed since last enqueue is more
3201 		 * than 200ms we disable delaying start callback. This is
3202 		 * is to take idle time into account.
3203 		 */
3204 		u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3205 		if (ifp->if_start_delay_swin > 0) {
3206 			if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3207 				ifp->if_start_delay_cnt++;
3208 			} else if ((now_nsec - ifp->if_start_delay_swin)
3209 			    >= (200 * 1000 * 1000)) {
3210 				ifp->if_start_delay_swin = now_nsec;
3211 				ifp->if_start_delay_cnt = 1;
3212 				ifp->if_start_delay_idle = 0;
3213 				if (ifp->if_eflags & IFEF_DELAY_START) {
3214 					if_clear_eflags(ifp, IFEF_DELAY_START);
3215 					ifnet_delay_start_disabled_increment();
3216 				}
3217 			} else {
3218 				if (ifp->if_start_delay_cnt >=
3219 				    ifp->if_start_delay_qlen) {
3220 					if_set_eflags(ifp, IFEF_DELAY_START);
3221 					ifp->if_start_delay_idle = 0;
3222 				} else {
3223 					if (ifp->if_start_delay_idle >= 10) {
3224 						if_clear_eflags(ifp,
3225 						    IFEF_DELAY_START);
3226 						ifnet_delay_start_disabled_increment();
3227 					} else {
3228 						ifp->if_start_delay_idle++;
3229 					}
3230 				}
3231 				ifp->if_start_delay_swin = now_nsec;
3232 				ifp->if_start_delay_cnt = 1;
3233 			}
3234 		} else {
3235 			ifp->if_start_delay_swin = now_nsec;
3236 			ifp->if_start_delay_cnt = 1;
3237 			ifp->if_start_delay_idle = 0;
3238 			if_clear_eflags(ifp, IFEF_DELAY_START);
3239 		}
3240 	} else {
3241 		if_clear_eflags(ifp, IFEF_DELAY_START);
3242 	}
3243 
3244 	/* enqueue the packet (caller consumes object) */
3245 	error = ifclassq_enqueue(((ifcq != NULL) ? ifcq : ifp->if_snd), p, p,
3246 	    1, pktlen, pdrop);
3247 
3248 	/*
3249 	 * Tell the driver to start dequeueing; do this even when the queue
3250 	 * for the packet is suspended (EQSUSPENDED), as the driver could still
3251 	 * be dequeueing from other unsuspended queues.
3252 	 */
3253 	if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3254 	    ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3255 		ifnet_start(ifp);
3256 	}
3257 
3258 	return error;
3259 }
3260 
3261 static inline errno_t
ifnet_enqueue_ifclassq_chain(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3262 ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3263     classq_pkt_t *head, classq_pkt_t *tail, uint32_t cnt, uint32_t bytes,
3264     boolean_t flush, boolean_t *pdrop)
3265 {
3266 	int error;
3267 
3268 	/* enqueue the packet (caller consumes object) */
3269 	error = ifclassq_enqueue(ifcq != NULL ? ifcq : ifp->if_snd, head, tail,
3270 	    cnt, bytes, pdrop);
3271 
3272 	/*
3273 	 * Tell the driver to start dequeueing; do this even when the queue
3274 	 * for the packet is suspended (EQSUSPENDED), as the driver could still
3275 	 * be dequeueing from other unsuspended queues.
3276 	 */
3277 	if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
3278 		ifnet_start(ifp);
3279 	}
3280 	return error;
3281 }
3282 
3283 int
ifnet_enqueue_netem(void * handle,pktsched_pkt_t * __sized_by (n_pkts)pkts,uint32_t n_pkts)3284 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *__sized_by(n_pkts)pkts, uint32_t n_pkts)
3285 {
3286 	ifnet_ref_t ifp = handle;
3287 	boolean_t pdrop;        /* dummy */
3288 	uint32_t i;
3289 
3290 	ASSERT(n_pkts >= 1);
3291 	for (i = 0; i < n_pkts - 1; i++) {
3292 		(void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
3293 		    FALSE, &pdrop);
3294 	}
3295 	/* flush with the last packet */
3296 	(void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
3297 	    TRUE, &pdrop);
3298 
3299 	return 0;
3300 }
3301 
3302 static inline errno_t
ifnet_enqueue_common(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * pkt,boolean_t flush,boolean_t * pdrop)3303 ifnet_enqueue_common(struct ifnet *ifp, struct ifclassq *ifcq,
3304     classq_pkt_t *pkt, boolean_t flush, boolean_t *pdrop)
3305 {
3306 	if (ifp->if_output_netem != NULL) {
3307 		bool drop;
3308 		errno_t error;
3309 		error = netem_enqueue(ifp->if_output_netem, pkt, &drop);
3310 		*pdrop = drop ? TRUE : FALSE;
3311 		return error;
3312 	} else {
3313 		return ifnet_enqueue_ifclassq(ifp, ifcq, pkt, flush, pdrop);
3314 	}
3315 }
3316 
3317 errno_t
ifnet_enqueue(struct ifnet * ifp,struct mbuf * m)3318 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3319 {
3320 	uint32_t bytes = m_pktlen(m);
3321 	struct mbuf *tail = m;
3322 	uint32_t cnt = 1;
3323 	boolean_t pdrop;
3324 
3325 	while (tail->m_nextpkt) {
3326 		VERIFY(tail->m_flags & M_PKTHDR);
3327 		tail = tail->m_nextpkt;
3328 		cnt++;
3329 		bytes += m_pktlen(tail);
3330 	}
3331 
3332 	return ifnet_enqueue_mbuf_chain(ifp, m, tail, cnt, bytes, TRUE, &pdrop);
3333 }
3334 
3335 errno_t
ifnet_enqueue_mbuf(struct ifnet * ifp,struct mbuf * m,boolean_t flush,boolean_t * pdrop)3336 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3337     boolean_t *pdrop)
3338 {
3339 	classq_pkt_t pkt;
3340 
3341 	m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
3342 	if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3343 	    m->m_nextpkt != NULL) {
3344 		if (m != NULL) {
3345 			m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_INVALID, NULL, 0);
3346 			*pdrop = TRUE;
3347 		}
3348 		return EINVAL;
3349 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3350 	    !IF_FULLY_ATTACHED(ifp)) {
3351 		/* flag tested without lock for performance */
3352 		m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3353 		*pdrop = TRUE;
3354 		return ENXIO;
3355 	} else if (!(ifp->if_flags & IFF_UP)) {
3356 		m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3357 		*pdrop = TRUE;
3358 		return ENETDOWN;
3359 	}
3360 
3361 	CLASSQ_PKT_INIT_MBUF(&pkt, m);
3362 	return ifnet_enqueue_common(ifp, NULL, &pkt, flush, pdrop);
3363 }
3364 
3365 errno_t
ifnet_enqueue_mbuf_chain(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3366 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
3367     struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3368     boolean_t *pdrop)
3369 {
3370 	classq_pkt_t head, tail;
3371 
3372 	m_add_hdr_crumb_interface_output(m_head, ifp->if_index, true);
3373 	ASSERT(m_head != NULL);
3374 	ASSERT((m_head->m_flags & M_PKTHDR) != 0);
3375 	ASSERT(m_tail != NULL);
3376 	ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
3377 	ASSERT(ifp != NULL);
3378 	ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3379 
3380 	if (!IF_FULLY_ATTACHED(ifp)) {
3381 		/* flag tested without lock for performance */
3382 		m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3383 		*pdrop = TRUE;
3384 		return ENXIO;
3385 	} else if (!(ifp->if_flags & IFF_UP)) {
3386 		m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3387 		*pdrop = TRUE;
3388 		return ENETDOWN;
3389 	}
3390 
3391 	CLASSQ_PKT_INIT_MBUF(&head, m_head);
3392 	CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3393 	return ifnet_enqueue_ifclassq_chain(ifp, NULL, &head, &tail, cnt, bytes,
3394 	           flush, pdrop);
3395 }
3396 
3397 #if SKYWALK
3398 static errno_t
ifnet_enqueue_pkt_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3399 ifnet_enqueue_pkt_common(struct ifnet *ifp, struct ifclassq *ifcq,
3400     struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3401 {
3402 	classq_pkt_t pkt;
3403 
3404 	ASSERT(kpkt == NULL || kpkt->pkt_nextpkt == NULL);
3405 
3406 	if (__improbable(ifp == NULL || kpkt == NULL)) {
3407 		if (kpkt != NULL) {
3408 			pp_free_packet(__DECONST(struct kern_pbufpool *,
3409 			    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3410 			*pdrop = TRUE;
3411 		}
3412 		return EINVAL;
3413 	} else if (__improbable(!(ifp->if_eflags & IFEF_TXSTART) ||
3414 	    !IF_FULLY_ATTACHED(ifp))) {
3415 		/* flag tested without lock for performance */
3416 		pp_free_packet(__DECONST(struct kern_pbufpool *,
3417 		    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3418 		*pdrop = TRUE;
3419 		return ENXIO;
3420 	} else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3421 		pp_free_packet(__DECONST(struct kern_pbufpool *,
3422 		    kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3423 		*pdrop = TRUE;
3424 		return ENETDOWN;
3425 	}
3426 
3427 	CLASSQ_PKT_INIT_PACKET(&pkt, kpkt);
3428 	return ifnet_enqueue_common(ifp, ifcq, &pkt, flush, pdrop);
3429 }
3430 
3431 errno_t
ifnet_enqueue_pkt(struct ifnet * ifp,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3432 ifnet_enqueue_pkt(struct ifnet *ifp, struct __kern_packet *kpkt,
3433     boolean_t flush, boolean_t *pdrop)
3434 {
3435 	return ifnet_enqueue_pkt_common(ifp, NULL, kpkt, flush, pdrop);
3436 }
3437 
3438 errno_t
ifnet_enqueue_ifcq_pkt(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3439 ifnet_enqueue_ifcq_pkt(struct ifnet *ifp, struct ifclassq *ifcq,
3440     struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3441 {
3442 	return ifnet_enqueue_pkt_common(ifp, ifcq, kpkt, flush, pdrop);
3443 }
3444 
3445 static errno_t
ifnet_enqueue_pkt_chain_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3446 ifnet_enqueue_pkt_chain_common(struct ifnet *ifp, struct ifclassq *ifcq,
3447     struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3448     uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3449 {
3450 	classq_pkt_t head, tail;
3451 
3452 	ASSERT(k_head != NULL);
3453 	ASSERT(k_tail != NULL);
3454 	ASSERT(ifp != NULL);
3455 	ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3456 
3457 	if (!IF_FULLY_ATTACHED(ifp)) {
3458 		/* flag tested without lock for performance */
3459 		pp_free_packet_chain(k_head, NULL);
3460 		*pdrop = TRUE;
3461 		return ENXIO;
3462 	} else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3463 		pp_free_packet_chain(k_head, NULL);
3464 		*pdrop = TRUE;
3465 		return ENETDOWN;
3466 	}
3467 
3468 	CLASSQ_PKT_INIT_PACKET(&head, k_head);
3469 	CLASSQ_PKT_INIT_PACKET(&tail, k_tail);
3470 	return ifnet_enqueue_ifclassq_chain(ifp, ifcq, &head, &tail, cnt, bytes,
3471 	           flush, pdrop);
3472 }
3473 
3474 errno_t
ifnet_enqueue_pkt_chain(struct ifnet * ifp,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3475 ifnet_enqueue_pkt_chain(struct ifnet *ifp, struct __kern_packet *k_head,
3476     struct __kern_packet *k_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3477     boolean_t *pdrop)
3478 {
3479 	return ifnet_enqueue_pkt_chain_common(ifp, NULL, k_head, k_tail,
3480 	           cnt, bytes, flush, pdrop);
3481 }
3482 
3483 errno_t
ifnet_enqueue_ifcq_pkt_chain(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3484 ifnet_enqueue_ifcq_pkt_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3485     struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3486     uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3487 {
3488 	return ifnet_enqueue_pkt_chain_common(ifp, ifcq, k_head, k_tail,
3489 	           cnt, bytes, flush, pdrop);
3490 }
3491 #endif /* SKYWALK */
3492 
3493 errno_t
ifnet_dequeue(struct ifnet * ifp,struct mbuf ** mp)3494 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3495 {
3496 	errno_t rc;
3497 	classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3498 
3499 	if (ifp == NULL || mp == NULL) {
3500 		return EINVAL;
3501 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3502 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3503 		return ENXIO;
3504 	}
3505 	if (!ifnet_is_attached(ifp, 1)) {
3506 		return ENXIO;
3507 	}
3508 
3509 #if SKYWALK
3510 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3511 #endif /* SKYWALK */
3512 	rc = ifclassq_dequeue(ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3513 	    &pkt, NULL, NULL, NULL, 0);
3514 	VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3515 	ifnet_decr_iorefcnt(ifp);
3516 	*mp = pkt.cp_mbuf;
3517 	m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3518 	return rc;
3519 }
3520 
3521 errno_t
ifnet_dequeue_service_class(struct ifnet * ifp,mbuf_svc_class_t sc,struct mbuf ** mp)3522 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3523     struct mbuf **mp)
3524 {
3525 	errno_t rc;
3526 	classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3527 
3528 	if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
3529 		return EINVAL;
3530 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3531 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3532 		return ENXIO;
3533 	}
3534 	if (!ifnet_is_attached(ifp, 1)) {
3535 		return ENXIO;
3536 	}
3537 
3538 #if SKYWALK
3539 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3540 #endif /* SKYWALK */
3541 	rc = ifclassq_dequeue_sc(ifp->if_snd, sc, 1,
3542 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL, 0);
3543 	VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3544 	ifnet_decr_iorefcnt(ifp);
3545 	*mp = pkt.cp_mbuf;
3546 	m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3547 	return rc;
3548 }
3549 
3550 errno_t
ifnet_dequeue_multi(struct ifnet * ifp,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3551 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3552     struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3553 {
3554 	errno_t rc;
3555 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3556 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3557 
3558 	if (ifp == NULL || head == NULL || pkt_limit < 1) {
3559 		return EINVAL;
3560 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3561 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3562 		return ENXIO;
3563 	}
3564 	if (!ifnet_is_attached(ifp, 1)) {
3565 		return ENXIO;
3566 	}
3567 
3568 #if SKYWALK
3569 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3570 #endif /* SKYWALK */
3571 	rc = ifclassq_dequeue(ifp->if_snd, pkt_limit,
3572 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len, 0);
3573 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3574 	ifnet_decr_iorefcnt(ifp);
3575 	*head = pkt_head.cp_mbuf;
3576 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3577 	if (tail != NULL) {
3578 		*tail = pkt_tail.cp_mbuf;
3579 	}
3580 	return rc;
3581 }
3582 
3583 errno_t
ifnet_dequeue_multi_bytes(struct ifnet * ifp,u_int32_t byte_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3584 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3585     struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3586 {
3587 	errno_t rc;
3588 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3589 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3590 
3591 	if (ifp == NULL || head == NULL || byte_limit < 1) {
3592 		return EINVAL;
3593 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3594 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3595 		return ENXIO;
3596 	}
3597 	if (!ifnet_is_attached(ifp, 1)) {
3598 		return ENXIO;
3599 	}
3600 
3601 #if SKYWALK
3602 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3603 #endif /* SKYWALK */
3604 	rc = ifclassq_dequeue(ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3605 	    byte_limit, &pkt_head, &pkt_tail, cnt, len, 0);
3606 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3607 	ifnet_decr_iorefcnt(ifp);
3608 	*head = pkt_head.cp_mbuf;
3609 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3610 	if (tail != NULL) {
3611 		*tail = pkt_tail.cp_mbuf;
3612 	}
3613 	return rc;
3614 }
3615 
3616 errno_t
ifnet_dequeue_service_class_multi(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3617 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3618     u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3619     u_int32_t *len)
3620 {
3621 	errno_t rc;
3622 	classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3623 	classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3624 
3625 	if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3626 	    !MBUF_VALID_SC(sc)) {
3627 		return EINVAL;
3628 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3629 	    ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3630 		return ENXIO;
3631 	}
3632 	if (!ifnet_is_attached(ifp, 1)) {
3633 		return ENXIO;
3634 	}
3635 
3636 #if SKYWALK
3637 	ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3638 #endif /* SKYWALK */
3639 	rc = ifclassq_dequeue_sc(ifp->if_snd, sc, pkt_limit,
3640 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
3641 	    cnt, len, 0);
3642 	VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3643 	ifnet_decr_iorefcnt(ifp);
3644 	*head = pkt_head.cp_mbuf;
3645 	m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3646 	if (tail != NULL) {
3647 		*tail = pkt_tail.cp_mbuf;
3648 	}
3649 	return rc;
3650 }
3651 
3652 #if XNU_TARGET_OS_OSX
3653 errno_t
ifnet_framer_stub(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * dest,IFNET_LLADDR_T dest_linkaddr,IFNET_FRAME_TYPE_T frame_type,u_int32_t * pre,u_int32_t * post)3654 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3655     const struct sockaddr *dest,
3656     IFNET_LLADDR_T dest_linkaddr,
3657     IFNET_FRAME_TYPE_T frame_type,
3658     u_int32_t *pre, u_int32_t *post)
3659 {
3660 	if (pre != NULL) {
3661 		*pre = 0;
3662 	}
3663 	if (post != NULL) {
3664 		*post = 0;
3665 	}
3666 
3667 	return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
3668 }
3669 #endif /* XNU_TARGET_OS_OSX */
3670 
3671 /* If ifp is set, we will increment the generation for the interface */
3672 int
dlil_post_complete_msg(struct ifnet * ifp,struct kev_msg * event)3673 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3674 {
3675 	if (ifp != NULL) {
3676 		ifnet_increment_generation(ifp);
3677 	}
3678 
3679 #if NECP
3680 	necp_update_all_clients();
3681 #endif /* NECP */
3682 
3683 	return kev_post_msg(event);
3684 }
3685 
3686 __private_extern__ void
dlil_post_sifflags_msg(struct ifnet * ifp)3687 dlil_post_sifflags_msg(struct ifnet * ifp)
3688 {
3689 	struct kev_msg ev_msg;
3690 	struct net_event_data ev_data;
3691 
3692 	bzero(&ev_data, sizeof(ev_data));
3693 	bzero(&ev_msg, sizeof(ev_msg));
3694 	ev_msg.vendor_code = KEV_VENDOR_APPLE;
3695 	ev_msg.kev_class = KEV_NETWORK_CLASS;
3696 	ev_msg.kev_subclass = KEV_DL_SUBCLASS;
3697 	ev_msg.event_code = KEV_DL_SIFFLAGS;
3698 	strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
3699 	ev_data.if_family = ifp->if_family;
3700 	ev_data.if_unit = (u_int32_t) ifp->if_unit;
3701 	ev_msg.dv[0].data_length = sizeof(struct net_event_data);
3702 	ev_msg.dv[0].data_ptr = &ev_data;
3703 	ev_msg.dv[1].data_length = 0;
3704 	dlil_post_complete_msg(ifp, &ev_msg);
3705 }
3706 
3707 #define TMP_IF_PROTO_ARR_SIZE   10
3708 static int
dlil_event_internal(struct ifnet * ifp,struct kev_msg * event,bool update_generation)3709 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
3710 {
3711 	struct ifnet_filter *filter = NULL;
3712 	struct if_proto *proto = NULL;
3713 	int if_proto_count = 0;
3714 	struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3715 	struct if_proto **tmp_ifproto_arr = tmp_ifproto_stack_arr;
3716 	int tmp_ifproto_arr_idx = 0;
3717 
3718 	/*
3719 	 * Pass the event to the interface filters
3720 	 */
3721 	lck_mtx_lock_spin(&ifp->if_flt_lock);
3722 	/* prevent filter list from changing in case we drop the lock */
3723 	if_flt_monitor_busy(ifp);
3724 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3725 		if (filter->filt_event != NULL) {
3726 			lck_mtx_unlock(&ifp->if_flt_lock);
3727 
3728 			filter->filt_event(filter->filt_cookie, ifp,
3729 			    filter->filt_protocol, event);
3730 
3731 			lck_mtx_lock_spin(&ifp->if_flt_lock);
3732 		}
3733 	}
3734 	/* we're done with the filter list */
3735 	if_flt_monitor_unbusy(ifp);
3736 	lck_mtx_unlock(&ifp->if_flt_lock);
3737 
3738 	/* Get an io ref count if the interface is attached */
3739 	if (!ifnet_is_attached(ifp, 1)) {
3740 		goto done;
3741 	}
3742 
3743 	/*
3744 	 * An embedded tmp_list_entry in if_proto may still get
3745 	 * over-written by another thread after giving up ifnet lock,
3746 	 * therefore we are avoiding embedded pointers here.
3747 	 */
3748 	ifnet_lock_shared(ifp);
3749 	if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
3750 	if (if_proto_count) {
3751 		int i;
3752 		VERIFY(ifp->if_proto_hash != NULL);
3753 		if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3754 			tmp_ifproto_arr = tmp_ifproto_stack_arr;
3755 		} else {
3756 			tmp_ifproto_arr = kalloc_type(struct if_proto *,
3757 			    if_proto_count, Z_WAITOK | Z_ZERO);
3758 			if (tmp_ifproto_arr == NULL) {
3759 				ifnet_lock_done(ifp);
3760 				goto cleanup;
3761 			}
3762 		}
3763 
3764 		for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3765 			SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3766 			    next_hash) {
3767 				if_proto_ref(proto);
3768 				tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3769 				tmp_ifproto_arr_idx++;
3770 			}
3771 		}
3772 		VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3773 	}
3774 	ifnet_lock_done(ifp);
3775 
3776 	for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3777 	    tmp_ifproto_arr_idx++) {
3778 		proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3779 		VERIFY(proto != NULL);
3780 		proto_media_event eventp =
3781 		    (proto->proto_kpi == kProtoKPI_v1 ?
3782 		    proto->kpi.v1.event :
3783 		    proto->kpi.v2.event);
3784 
3785 		if (eventp != NULL) {
3786 			eventp(ifp, proto->protocol_family,
3787 			    event);
3788 		}
3789 		if_proto_free(proto);
3790 	}
3791 
3792 cleanup:
3793 	if (tmp_ifproto_arr != tmp_ifproto_stack_arr) {
3794 		kfree_type(struct if_proto *, if_proto_count, tmp_ifproto_arr);
3795 	}
3796 
3797 	/* Pass the event to the interface */
3798 	if (ifp->if_event != NULL) {
3799 		ifp->if_event(ifp, event);
3800 	}
3801 
3802 	/* Release the io ref count */
3803 	ifnet_decr_iorefcnt(ifp);
3804 done:
3805 	return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
3806 }
3807 
3808 errno_t
ifnet_event(ifnet_t ifp,struct kern_event_msg * event)3809 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3810 {
3811 	struct kev_msg kev_msg;
3812 	int result = 0;
3813 
3814 	if (ifp == NULL || event == NULL) {
3815 		return EINVAL;
3816 	}
3817 
3818 	bzero(&kev_msg, sizeof(kev_msg));
3819 	kev_msg.vendor_code = event->vendor_code;
3820 	kev_msg.kev_class = event->kev_class;
3821 	kev_msg.kev_subclass = event->kev_subclass;
3822 	kev_msg.event_code = event->event_code;
3823 	kev_msg.dv[0].data_ptr = &event->event_data;
3824 	kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3825 	kev_msg.dv[1].data_length = 0;
3826 
3827 	result = dlil_event_internal(ifp, &kev_msg, TRUE);
3828 
3829 	return result;
3830 }
3831 
3832 /* The following is used to enqueue work items for ifnet ioctl events */
3833 static void ifnet_ioctl_event_callback(struct nwk_wq_entry *);
3834 
3835 struct ifnet_ioctl_event {
3836 	ifnet_ref_t ifp;
3837 	u_long ioctl_code;
3838 };
3839 
3840 struct ifnet_ioctl_event_nwk_wq_entry {
3841 	struct nwk_wq_entry nwk_wqe;
3842 	struct ifnet_ioctl_event ifnet_ioctl_ev_arg;
3843 };
3844 
3845 void
ifnet_ioctl_async(struct ifnet * ifp,u_long ioctl_code)3846 ifnet_ioctl_async(struct ifnet *ifp, u_long ioctl_code)
3847 {
3848 	struct ifnet_ioctl_event_nwk_wq_entry *p_ifnet_ioctl_ev = NULL;
3849 	bool compare_expected;
3850 
3851 	/*
3852 	 * Get an io ref count if the interface is attached.
3853 	 * At this point it most likely is. We are taking a reference for
3854 	 * deferred processing.
3855 	 */
3856 	if (!ifnet_is_attached(ifp, 1)) {
3857 		os_log(OS_LOG_DEFAULT, "%s:%d %s Failed for ioctl %lu as interface "
3858 		    "is not attached",
3859 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3860 		return;
3861 	}
3862 	switch (ioctl_code) {
3863 	case SIOCADDMULTI:
3864 		compare_expected = false;
3865 		if (!atomic_compare_exchange_strong(&ifp->if_mcast_add_signaled, &compare_expected, true)) {
3866 			ifnet_decr_iorefcnt(ifp);
3867 			return;
3868 		}
3869 		break;
3870 	case SIOCDELMULTI:
3871 		compare_expected = false;
3872 		if (!atomic_compare_exchange_strong(&ifp->if_mcast_del_signaled, &compare_expected, true)) {
3873 			ifnet_decr_iorefcnt(ifp);
3874 			return;
3875 		}
3876 		break;
3877 	default:
3878 		os_log(OS_LOG_DEFAULT, "%s:%d %s unknown ioctl %lu",
3879 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3880 		return;
3881 	}
3882 
3883 	p_ifnet_ioctl_ev = kalloc_type(struct ifnet_ioctl_event_nwk_wq_entry,
3884 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
3885 
3886 	p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ifp = ifp;
3887 	p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ioctl_code = ioctl_code;
3888 	p_ifnet_ioctl_ev->nwk_wqe.func = ifnet_ioctl_event_callback;
3889 	nwk_wq_enqueue(&p_ifnet_ioctl_ev->nwk_wqe);
3890 }
3891 
3892 static void
ifnet_ioctl_event_callback(struct nwk_wq_entry * nwk_item)3893 ifnet_ioctl_event_callback(struct nwk_wq_entry *nwk_item)
3894 {
3895 	struct ifnet_ioctl_event_nwk_wq_entry *p_ev = __container_of(nwk_item,
3896 	    struct ifnet_ioctl_event_nwk_wq_entry, nwk_wqe);
3897 
3898 	ifnet_ref_t ifp = p_ev->ifnet_ioctl_ev_arg.ifp;
3899 	u_long ioctl_code = p_ev->ifnet_ioctl_ev_arg.ioctl_code;
3900 	int ret = 0;
3901 
3902 	switch (ioctl_code) {
3903 	case SIOCADDMULTI:
3904 		atomic_store(&ifp->if_mcast_add_signaled, false);
3905 		break;
3906 	case SIOCDELMULTI:
3907 		atomic_store(&ifp->if_mcast_del_signaled, false);
3908 		break;
3909 	}
3910 	if ((ret = ifnet_ioctl(ifp, 0, ioctl_code, NULL)) != 0) {
3911 		os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned %d for ioctl %lu",
3912 		    __func__, __LINE__, if_name(ifp), ret, ioctl_code);
3913 	} else if (dlil_verbose) {
3914 		os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned successfully "
3915 		    "for ioctl %lu",
3916 		    __func__, __LINE__, if_name(ifp), ioctl_code);
3917 	}
3918 	ifnet_decr_iorefcnt(ifp);
3919 	kfree_type(struct ifnet_ioctl_event_nwk_wq_entry, p_ev);
3920 	return;
3921 }
3922 
3923 errno_t
ifnet_ioctl(ifnet_t ifp,protocol_family_t proto_fam,u_long ioctl_code,void * ioctl_arg)3924 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
3925     void *ioctl_arg)
3926 {
3927 	struct ifnet_filter *filter;
3928 	int retval = EOPNOTSUPP;
3929 	int result = 0;
3930 
3931 	if (ifp == NULL || ioctl_code == 0) {
3932 		return EINVAL;
3933 	}
3934 
3935 	/* Get an io ref count if the interface is attached */
3936 	if (!ifnet_is_attached(ifp, 1)) {
3937 		return EOPNOTSUPP;
3938 	}
3939 
3940 	/*
3941 	 * Run the interface filters first.
3942 	 * We want to run all filters before calling the protocol,
3943 	 * interface family, or interface.
3944 	 */
3945 	lck_mtx_lock_spin(&ifp->if_flt_lock);
3946 	/* prevent filter list from changing in case we drop the lock */
3947 	if_flt_monitor_busy(ifp);
3948 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3949 		if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
3950 		    filter->filt_protocol == proto_fam)) {
3951 			lck_mtx_unlock(&ifp->if_flt_lock);
3952 
3953 			result = filter->filt_ioctl(filter->filt_cookie, ifp,
3954 			    proto_fam, ioctl_code, ioctl_arg);
3955 
3956 			lck_mtx_lock_spin(&ifp->if_flt_lock);
3957 
3958 			/* Only update retval if no one has handled the ioctl */
3959 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3960 				if (result == ENOTSUP) {
3961 					result = EOPNOTSUPP;
3962 				}
3963 				retval = result;
3964 				if (retval != 0 && retval != EOPNOTSUPP) {
3965 					/* we're done with the filter list */
3966 					if_flt_monitor_unbusy(ifp);
3967 					lck_mtx_unlock(&ifp->if_flt_lock);
3968 					goto cleanup;
3969 				}
3970 			}
3971 		}
3972 	}
3973 	/* we're done with the filter list */
3974 	if_flt_monitor_unbusy(ifp);
3975 	lck_mtx_unlock(&ifp->if_flt_lock);
3976 
3977 	/* Allow the protocol to handle the ioctl */
3978 	if (proto_fam != 0) {
3979 		struct if_proto *proto;
3980 
3981 		/* callee holds a proto refcnt upon success */
3982 		ifnet_lock_shared(ifp);
3983 		proto = find_attached_proto(ifp, proto_fam);
3984 		ifnet_lock_done(ifp);
3985 		if (proto != NULL) {
3986 			proto_media_ioctl ioctlp =
3987 			    (proto->proto_kpi == kProtoKPI_v1 ?
3988 			    proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
3989 			result = EOPNOTSUPP;
3990 			if (ioctlp != NULL) {
3991 				result = ioctlp(ifp, proto_fam, ioctl_code,
3992 				    ioctl_arg);
3993 			}
3994 			if_proto_free(proto);
3995 
3996 			/* Only update retval if no one has handled the ioctl */
3997 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3998 				if (result == ENOTSUP) {
3999 					result = EOPNOTSUPP;
4000 				}
4001 				retval = result;
4002 				if (retval && retval != EOPNOTSUPP) {
4003 					goto cleanup;
4004 				}
4005 			}
4006 		}
4007 	}
4008 
4009 	/* retval is either 0 or EOPNOTSUPP */
4010 
4011 	/*
4012 	 * Let the interface handle this ioctl.
4013 	 * If it returns EOPNOTSUPP, ignore that, we may have
4014 	 * already handled this in the protocol or family.
4015 	 */
4016 	if (ifp->if_ioctl) {
4017 		result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4018 	}
4019 
4020 	/* Only update retval if no one has handled the ioctl */
4021 	if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4022 		if (result == ENOTSUP) {
4023 			result = EOPNOTSUPP;
4024 		}
4025 		retval = result;
4026 		if (retval && retval != EOPNOTSUPP) {
4027 			goto cleanup;
4028 		}
4029 	}
4030 
4031 cleanup:
4032 	if (retval == EJUSTRETURN) {
4033 		retval = 0;
4034 	}
4035 
4036 	ifnet_decr_iorefcnt(ifp);
4037 
4038 	return retval;
4039 }
4040 
4041 __private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp,bpf_tap_mode mode,bpf_packet_func callback)4042 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4043 {
4044 	errno_t error = 0;
4045 
4046 	if (ifp->if_set_bpf_tap) {
4047 		/* Get an io reference on the interface if it is attached */
4048 		if (!ifnet_is_attached(ifp, 1)) {
4049 			return ENXIO;
4050 		}
4051 		error = ifp->if_set_bpf_tap(ifp, mode, callback);
4052 		ifnet_decr_iorefcnt(ifp);
4053 	}
4054 	return error;
4055 }
4056 
4057 errno_t
dlil_resolve_multi(struct ifnet * ifp,const struct sockaddr * proto_addr,struct sockaddr * ll_addr,size_t ll_len)4058 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4059     struct sockaddr *ll_addr, size_t ll_len)
4060 {
4061 	errno_t result = EOPNOTSUPP;
4062 	struct if_proto *proto;
4063 	const struct sockaddr *verify;
4064 	proto_media_resolve_multi resolvep;
4065 
4066 	if (!ifnet_is_attached(ifp, 1)) {
4067 		return result;
4068 	}
4069 
4070 	SOCKADDR_ZERO(ll_addr, ll_len);
4071 
4072 	/* Call the protocol first; callee holds a proto refcnt upon success */
4073 	ifnet_lock_shared(ifp);
4074 	proto = find_attached_proto(ifp, proto_addr->sa_family);
4075 	ifnet_lock_done(ifp);
4076 	if (proto != NULL) {
4077 		resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4078 		    proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4079 		if (resolvep != NULL) {
4080 			result = resolvep(ifp, proto_addr, SDL(ll_addr), ll_len);
4081 		}
4082 		if_proto_free(proto);
4083 	}
4084 
4085 	/* Let the interface verify the multicast address */
4086 	if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4087 		if (result == 0) {
4088 			verify = ll_addr;
4089 		} else {
4090 			verify = proto_addr;
4091 		}
4092 		result = ifp->if_check_multi(ifp, verify);
4093 	}
4094 
4095 	ifnet_decr_iorefcnt(ifp);
4096 	return result;
4097 }
4098 
4099 __private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4100 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4101     const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4102     const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4103 {
4104 	struct if_proto *proto;
4105 	errno_t result = 0;
4106 
4107 	if ((ifp->if_flags & IFF_NOARP) != 0) {
4108 		result = ENOTSUP;
4109 		goto done;
4110 	}
4111 
4112 	/* callee holds a proto refcnt upon success */
4113 	ifnet_lock_shared(ifp);
4114 	proto = find_attached_proto(ifp, target_proto->sa_family);
4115 	ifnet_lock_done(ifp);
4116 	if (proto == NULL) {
4117 		result = ENOTSUP;
4118 	} else {
4119 		proto_media_send_arp    arpp;
4120 		arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4121 		    proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4122 		if (arpp == NULL) {
4123 			result = ENOTSUP;
4124 		} else {
4125 			switch (arpop) {
4126 			case ARPOP_REQUEST:
4127 				arpstat.txrequests++;
4128 				if (target_hw != NULL) {
4129 					arpstat.txurequests++;
4130 				}
4131 				break;
4132 			case ARPOP_REPLY:
4133 				arpstat.txreplies++;
4134 				break;
4135 			}
4136 			result = arpp(ifp, arpop, sender_hw, sender_proto,
4137 			    target_hw, target_proto);
4138 		}
4139 		if_proto_free(proto);
4140 	}
4141 done:
4142 	return result;
4143 }
4144 
4145 static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,const struct sockaddr_in * target_sin)4146 _is_announcement(const struct sockaddr_in * sender_sin,
4147     const struct sockaddr_in * target_sin)
4148 {
4149 	if (target_sin == NULL || sender_sin == NULL) {
4150 		return FALSE;
4151 	}
4152 
4153 	return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
4154 }
4155 
4156 __private_extern__ errno_t
dlil_send_arp(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto0,u_int32_t rtflags)4157 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4158     const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4159     const struct sockaddr *target_proto0, u_int32_t rtflags)
4160 {
4161 	errno_t result = 0;
4162 	const struct sockaddr_in * sender_sin;
4163 	const struct sockaddr_in * target_sin;
4164 	struct sockaddr_inarp target_proto_sinarp;
4165 	struct sockaddr *target_proto = __DECONST_SA(target_proto0);
4166 
4167 	if (target_proto == NULL || sender_proto == NULL) {
4168 		return EINVAL;
4169 	}
4170 
4171 	if (sender_proto->sa_family != target_proto->sa_family) {
4172 		return EINVAL;
4173 	}
4174 
4175 	/*
4176 	 * If the target is a (default) router, provide that
4177 	 * information to the send_arp callback routine.
4178 	 */
4179 	if (rtflags & RTF_ROUTER) {
4180 		SOCKADDR_COPY(target_proto, &target_proto_sinarp, sizeof(struct sockaddr_in));
4181 		target_proto_sinarp.sin_other |= SIN_ROUTER;
4182 		target_proto = SA(&target_proto_sinarp);
4183 	}
4184 
4185 	/*
4186 	 * If this is an ARP request and the target IP is IPv4LL,
4187 	 * send the request on all interfaces.  The exception is
4188 	 * an announcement, which must only appear on the specific
4189 	 * interface.
4190 	 */
4191 	sender_sin = SIN(sender_proto);
4192 	target_sin = SIN(target_proto);
4193 	if (target_proto->sa_family == AF_INET &&
4194 	    IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4195 	    ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4196 	    !_is_announcement(sender_sin, target_sin)) {
4197 		u_int32_t       count;
4198 		ifnet_ref_t     *__counted_by(count) ifp_list;
4199 		u_int32_t       ifp_on;
4200 
4201 		result = ENOTSUP;
4202 
4203 		if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4204 			for (ifp_on = 0; ifp_on < count; ifp_on++) {
4205 				errno_t new_result;
4206 				ifaddr_t source_hw = NULL;
4207 				ifaddr_t source_ip = NULL;
4208 				struct sockaddr_in source_ip_copy;
4209 				ifnet_ref_t cur_ifp = ifp_list[ifp_on];
4210 
4211 				/*
4212 				 * Only arp on interfaces marked for IPv4LL
4213 				 * ARPing.  This may mean that we don't ARP on
4214 				 * the interface the subnet route points to.
4215 				 */
4216 				if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
4217 					continue;
4218 				}
4219 
4220 				/* Find the source IP address */
4221 				ifnet_lock_shared(cur_ifp);
4222 				source_hw = cur_ifp->if_lladdr;
4223 				TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4224 				    ifa_link) {
4225 					IFA_LOCK(source_ip);
4226 					if (source_ip->ifa_addr != NULL &&
4227 					    source_ip->ifa_addr->sa_family ==
4228 					    AF_INET) {
4229 						/* Copy the source IP address */
4230 						SOCKADDR_COPY(SIN(source_ip->ifa_addr), &source_ip_copy, sizeof(source_ip_copy));
4231 						IFA_UNLOCK(source_ip);
4232 						break;
4233 					}
4234 					IFA_UNLOCK(source_ip);
4235 				}
4236 
4237 				/* No IP Source, don't arp */
4238 				if (source_ip == NULL) {
4239 					ifnet_lock_done(cur_ifp);
4240 					continue;
4241 				}
4242 
4243 				ifa_addref(source_hw);
4244 				ifnet_lock_done(cur_ifp);
4245 
4246 				/* Send the ARP */
4247 				new_result = dlil_send_arp_internal(cur_ifp,
4248 				    arpop, SDL(source_hw->ifa_addr),
4249 				    SA(&source_ip_copy), NULL,
4250 				    target_proto);
4251 
4252 				ifa_remref(source_hw);
4253 				if (result == ENOTSUP) {
4254 					result = new_result;
4255 				}
4256 			}
4257 			ifnet_list_free_counted_by(ifp_list, count);
4258 		}
4259 	} else {
4260 		result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4261 		    sender_proto, target_hw, target_proto);
4262 	}
4263 
4264 	return result;
4265 }
4266 
4267 /*
4268  * Caller must hold ifnet head lock.
4269  */
4270 static int
ifnet_lookup(struct ifnet * ifp)4271 ifnet_lookup(struct ifnet *ifp)
4272 {
4273 	ifnet_ref_t _ifp;
4274 
4275 	ifnet_head_lock_assert(LCK_RW_ASSERT_HELD);
4276 	TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4277 		if (_ifp == ifp) {
4278 			break;
4279 		}
4280 	}
4281 	return _ifp != NULL;
4282 }
4283 
4284 /*
4285  * Caller has to pass a non-zero refio argument to get a
4286  * IO reference count. This will prevent ifnet_detach from
4287  * being called when there are outstanding io reference counts.
4288  */
4289 int
ifnet_is_attached(struct ifnet * ifp,int refio)4290 ifnet_is_attached(struct ifnet *ifp, int refio)
4291 {
4292 	int ret;
4293 
4294 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4295 	if ((ret = IF_FULLY_ATTACHED(ifp))) {
4296 		if (refio > 0) {
4297 			ifp->if_refio++;
4298 		}
4299 	}
4300 	lck_mtx_unlock(&ifp->if_ref_lock);
4301 
4302 	return ret;
4303 }
4304 
4305 void
ifnet_incr_pending_thread_count(struct ifnet * ifp)4306 ifnet_incr_pending_thread_count(struct ifnet *ifp)
4307 {
4308 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4309 	ifp->if_threads_pending++;
4310 	lck_mtx_unlock(&ifp->if_ref_lock);
4311 }
4312 
4313 void
ifnet_decr_pending_thread_count(struct ifnet * ifp)4314 ifnet_decr_pending_thread_count(struct ifnet *ifp)
4315 {
4316 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4317 	VERIFY(ifp->if_threads_pending > 0);
4318 	ifp->if_threads_pending--;
4319 	if (ifp->if_threads_pending == 0) {
4320 		wakeup(&ifp->if_threads_pending);
4321 	}
4322 	lck_mtx_unlock(&ifp->if_ref_lock);
4323 }
4324 
4325 /*
4326  * Caller must ensure the interface is attached; the assumption is that
4327  * there is at least an outstanding IO reference count held already.
4328  * Most callers would call ifnet_is_{attached,data_ready}() instead.
4329  */
4330 void
ifnet_incr_iorefcnt(struct ifnet * ifp)4331 ifnet_incr_iorefcnt(struct ifnet *ifp)
4332 {
4333 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4334 	VERIFY(IF_FULLY_ATTACHED(ifp));
4335 	VERIFY(ifp->if_refio > 0);
4336 	ifp->if_refio++;
4337 	lck_mtx_unlock(&ifp->if_ref_lock);
4338 }
4339 
4340 __attribute__((always_inline))
4341 static void
ifnet_decr_iorefcnt_locked(struct ifnet * ifp)4342 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
4343 {
4344 	LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4345 
4346 	VERIFY(ifp->if_refio > 0);
4347 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4348 
4349 	ifp->if_refio--;
4350 	VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
4351 
4352 	/*
4353 	 * if there are no more outstanding io references, wakeup the
4354 	 * ifnet_detach thread if detaching flag is set.
4355 	 */
4356 	if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
4357 		wakeup(&(ifp->if_refio));
4358 	}
4359 }
4360 
4361 void
ifnet_decr_iorefcnt(struct ifnet * ifp)4362 ifnet_decr_iorefcnt(struct ifnet *ifp)
4363 {
4364 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4365 	ifnet_decr_iorefcnt_locked(ifp);
4366 	lck_mtx_unlock(&ifp->if_ref_lock);
4367 }
4368 
4369 boolean_t
ifnet_datamov_begin(struct ifnet * ifp)4370 ifnet_datamov_begin(struct ifnet *ifp)
4371 {
4372 	boolean_t ret;
4373 
4374 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4375 	if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
4376 		ifp->if_refio++;
4377 		ifp->if_datamov++;
4378 	}
4379 	lck_mtx_unlock(&ifp->if_ref_lock);
4380 
4381 	DTRACE_IP2(datamov__begin, struct ifnet *, ifp, boolean_t, ret);
4382 	return ret;
4383 }
4384 
4385 void
ifnet_datamov_end(struct ifnet * ifp)4386 ifnet_datamov_end(struct ifnet *ifp)
4387 {
4388 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4389 	VERIFY(ifp->if_datamov > 0);
4390 	/*
4391 	 * if there's no more thread moving data, wakeup any
4392 	 * drainers that's blocked waiting for this.
4393 	 */
4394 	if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
4395 		DLIL_PRINTF("Waking up drainers on %s\n", if_name(ifp));
4396 		DTRACE_IP1(datamov__drain__wake, struct ifnet *, ifp);
4397 		wakeup(&(ifp->if_datamov));
4398 	}
4399 	ifnet_decr_iorefcnt_locked(ifp);
4400 	lck_mtx_unlock(&ifp->if_ref_lock);
4401 
4402 	DTRACE_IP1(datamov__end, struct ifnet *, ifp);
4403 }
4404 
4405 static void
ifnet_datamov_suspend_locked(struct ifnet * ifp)4406 ifnet_datamov_suspend_locked(struct ifnet *ifp)
4407 {
4408 	LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4409 	ifp->if_refio++;
4410 	if (ifp->if_suspend++ == 0) {
4411 		VERIFY(ifp->if_refflags & IFRF_READY);
4412 		ifp->if_refflags &= ~IFRF_READY;
4413 	}
4414 }
4415 
4416 void
ifnet_datamov_suspend(struct ifnet * ifp)4417 ifnet_datamov_suspend(struct ifnet *ifp)
4418 {
4419 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4420 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4421 	ifnet_datamov_suspend_locked(ifp);
4422 	lck_mtx_unlock(&ifp->if_ref_lock);
4423 }
4424 
4425 boolean_t
ifnet_datamov_suspend_if_needed(struct ifnet * ifp)4426 ifnet_datamov_suspend_if_needed(struct ifnet *ifp)
4427 {
4428 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4429 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4430 	if (ifp->if_suspend > 0) {
4431 		lck_mtx_unlock(&ifp->if_ref_lock);
4432 		return FALSE;
4433 	}
4434 	ifnet_datamov_suspend_locked(ifp);
4435 	lck_mtx_unlock(&ifp->if_ref_lock);
4436 	return TRUE;
4437 }
4438 
4439 void
ifnet_datamov_drain(struct ifnet * ifp)4440 ifnet_datamov_drain(struct ifnet *ifp)
4441 {
4442 	lck_mtx_lock(&ifp->if_ref_lock);
4443 	VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4444 	/* data movement must already be suspended */
4445 	VERIFY(ifp->if_suspend > 0);
4446 	VERIFY(!(ifp->if_refflags & IFRF_READY));
4447 	ifp->if_drainers++;
4448 	while (ifp->if_datamov != 0) {
4449 		DLIL_PRINTF("Waiting for data path(s) to quiesce on %s\n",
4450 		    if_name(ifp));
4451 		DTRACE_IP1(datamov__wait, struct ifnet *, ifp);
4452 		(void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
4453 		    (PZERO - 1), __func__, NULL);
4454 		DTRACE_IP1(datamov__wake, struct ifnet *, ifp);
4455 	}
4456 	VERIFY(!(ifp->if_refflags & IFRF_READY));
4457 	VERIFY(ifp->if_drainers > 0);
4458 	ifp->if_drainers--;
4459 	lck_mtx_unlock(&ifp->if_ref_lock);
4460 
4461 	/* purge the interface queues */
4462 	if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
4463 		if_qflush_snd(ifp, false);
4464 	}
4465 }
4466 
4467 void
ifnet_datamov_suspend_and_drain(struct ifnet * ifp)4468 ifnet_datamov_suspend_and_drain(struct ifnet *ifp)
4469 {
4470 	ifnet_datamov_suspend(ifp);
4471 	ifnet_datamov_drain(ifp);
4472 }
4473 
4474 void
ifnet_datamov_resume(struct ifnet * ifp)4475 ifnet_datamov_resume(struct ifnet *ifp)
4476 {
4477 	lck_mtx_lock(&ifp->if_ref_lock);
4478 	/* data movement must already be suspended */
4479 	VERIFY(ifp->if_suspend > 0);
4480 	if (--ifp->if_suspend == 0) {
4481 		VERIFY(!(ifp->if_refflags & IFRF_READY));
4482 		ifp->if_refflags |= IFRF_READY;
4483 	}
4484 	ifnet_decr_iorefcnt_locked(ifp);
4485 	lck_mtx_unlock(&ifp->if_ref_lock);
4486 }
4487 
4488 static errno_t
dlil_attach_protocol(struct if_proto * proto,const struct ifnet_demux_desc * __counted_by (demux_count)demux_list,u_int32_t demux_count,uint32_t * proto_count)4489 dlil_attach_protocol(struct if_proto *proto,
4490     const struct ifnet_demux_desc *__counted_by(demux_count) demux_list, u_int32_t demux_count,
4491     uint32_t *proto_count)
4492 {
4493 	struct kev_dl_proto_data ev_pr_data;
4494 	ifnet_ref_t ifp = proto->ifp;
4495 	errno_t retval = 0;
4496 	u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4497 	struct if_proto *prev_proto;
4498 	struct if_proto *_proto;
4499 
4500 	/* don't allow attaching anything but PF_BRIDGE to vmnet interfaces */
4501 	if (IFNET_IS_VMNET(ifp) && proto->protocol_family != PF_BRIDGE) {
4502 		return EINVAL;
4503 	}
4504 
4505 	if (!ifnet_is_attached(ifp, 1)) {
4506 		os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
4507 		    __func__, if_name(ifp));
4508 		return ENXIO;
4509 	}
4510 	/* callee holds a proto refcnt upon success */
4511 	ifnet_lock_exclusive(ifp);
4512 	_proto = find_attached_proto(ifp, proto->protocol_family);
4513 	if (_proto != NULL) {
4514 		ifnet_lock_done(ifp);
4515 		if_proto_free(_proto);
4516 		retval = EEXIST;
4517 		goto ioref_done;
4518 	}
4519 
4520 	/*
4521 	 * Call family module add_proto routine so it can refine the
4522 	 * demux descriptors as it wishes.
4523 	 */
4524 	retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4525 	    demux_count);
4526 	if (retval) {
4527 		ifnet_lock_done(ifp);
4528 		goto ioref_done;
4529 	}
4530 
4531 	/*
4532 	 * Insert the protocol in the hash
4533 	 */
4534 	prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4535 	while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
4536 		prev_proto = SLIST_NEXT(prev_proto, next_hash);
4537 	}
4538 	if (prev_proto) {
4539 		SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4540 	} else {
4541 		SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4542 		    proto, next_hash);
4543 	}
4544 
4545 	/* hold a proto refcnt for attach */
4546 	if_proto_ref(proto);
4547 
4548 	/*
4549 	 * The reserved field carries the number of protocol still attached
4550 	 * (subject to change)
4551 	 */
4552 	ev_pr_data.proto_family = proto->protocol_family;
4553 	ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
4554 
4555 	ifnet_lock_done(ifp);
4556 
4557 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4558 	    (struct net_event_data *)&ev_pr_data,
4559 	    sizeof(struct kev_dl_proto_data), FALSE);
4560 	if (proto_count != NULL) {
4561 		*proto_count = ev_pr_data.proto_remaining_count;
4562 	}
4563 ioref_done:
4564 	ifnet_decr_iorefcnt(ifp);
4565 	return retval;
4566 }
4567 
4568 static void
dlil_handle_proto_attach(ifnet_t ifp,protocol_family_t protocol)4569 dlil_handle_proto_attach(ifnet_t ifp, protocol_family_t protocol)
4570 {
4571 	/*
4572 	 * A protocol has been attached, mark the interface up.
4573 	 * This used to be done by configd.KernelEventMonitor, but that
4574 	 * is inherently prone to races (rdar://problem/30810208).
4575 	 */
4576 	(void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4577 	(void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
4578 	dlil_post_sifflags_msg(ifp);
4579 #if SKYWALK
4580 	switch (protocol) {
4581 	case AF_INET:
4582 	case AF_INET6:
4583 		/* don't attach the flowswitch unless attaching IP */
4584 		dlil_attach_flowswitch_nexus(ifp);
4585 		break;
4586 	default:
4587 		break;
4588 	}
4589 #endif /* SKYWALK */
4590 }
4591 
4592 errno_t
ifnet_attach_protocol(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param * proto_details)4593 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
4594     const struct ifnet_attach_proto_param *proto_details)
4595 {
4596 	int retval = 0;
4597 	struct if_proto  *ifproto = NULL;
4598 	uint32_t proto_count = 0;
4599 
4600 	ifnet_head_lock_shared();
4601 	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4602 		retval = EINVAL;
4603 		goto end;
4604 	}
4605 	/* Check that the interface is in the global list */
4606 	if (!ifnet_lookup(ifp)) {
4607 		retval = ENXIO;
4608 		goto end;
4609 	}
4610 
4611 	ifproto = dlif_proto_alloc();
4612 
4613 	/* refcnt held above during lookup */
4614 	ifproto->ifp = ifp;
4615 	ifproto->protocol_family = protocol;
4616 	ifproto->proto_kpi = kProtoKPI_v1;
4617 	ifproto->kpi.v1.input = proto_details->input;
4618 	ifproto->kpi.v1.pre_output = proto_details->pre_output;
4619 	ifproto->kpi.v1.event = proto_details->event;
4620 	ifproto->kpi.v1.ioctl = proto_details->ioctl;
4621 	ifproto->kpi.v1.detached = proto_details->detached;
4622 	ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4623 	ifproto->kpi.v1.send_arp = proto_details->send_arp;
4624 
4625 	retval = dlil_attach_protocol(ifproto,
4626 	    proto_details->demux_list, proto_details->demux_count,
4627 	    &proto_count);
4628 
4629 end:
4630 	if (retval == EEXIST) {
4631 		/* already attached */
4632 		if (dlil_verbose) {
4633 			DLIL_PRINTF("%s: protocol %d already attached\n",
4634 			    ifp != NULL ? if_name(ifp) : "N/A",
4635 			    protocol);
4636 		}
4637 	} else if (retval != 0) {
4638 		DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4639 		    ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4640 	} else if (dlil_verbose) {
4641 		DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
4642 		    ifp != NULL ? if_name(ifp) : "N/A",
4643 		    protocol, proto_count);
4644 	}
4645 	ifnet_head_done();
4646 	if (retval == 0) {
4647 		dlil_handle_proto_attach(ifp, protocol);
4648 	} else if (ifproto != NULL) {
4649 		dlif_proto_free(ifproto);
4650 	}
4651 	return retval;
4652 }
4653 
4654 errno_t
ifnet_attach_protocol_v2(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param_v2 * proto_details)4655 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
4656     const struct ifnet_attach_proto_param_v2 *proto_details)
4657 {
4658 	int retval = 0;
4659 	struct if_proto  *ifproto = NULL;
4660 	uint32_t proto_count = 0;
4661 
4662 	ifnet_head_lock_shared();
4663 	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4664 		retval = EINVAL;
4665 		goto end;
4666 	}
4667 	/* Check that the interface is in the global list */
4668 	if (!ifnet_lookup(ifp)) {
4669 		retval = ENXIO;
4670 		goto end;
4671 	}
4672 
4673 	ifproto = dlif_proto_alloc();
4674 
4675 	/* refcnt held above during lookup */
4676 	ifproto->ifp = ifp;
4677 	ifproto->protocol_family = protocol;
4678 	ifproto->proto_kpi = kProtoKPI_v2;
4679 	ifproto->kpi.v2.input = proto_details->input;
4680 	ifproto->kpi.v2.pre_output = proto_details->pre_output;
4681 	ifproto->kpi.v2.event = proto_details->event;
4682 	ifproto->kpi.v2.ioctl = proto_details->ioctl;
4683 	ifproto->kpi.v2.detached = proto_details->detached;
4684 	ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4685 	ifproto->kpi.v2.send_arp = proto_details->send_arp;
4686 
4687 	retval = dlil_attach_protocol(ifproto,
4688 	    proto_details->demux_list, proto_details->demux_count,
4689 	    &proto_count);
4690 
4691 end:
4692 	if (retval == EEXIST) {
4693 		/* already attached */
4694 		if (dlil_verbose) {
4695 			DLIL_PRINTF("%s: protocol %d already attached\n",
4696 			    ifp != NULL ? if_name(ifp) : "N/A",
4697 			    protocol);
4698 		}
4699 	} else if (retval != 0) {
4700 		DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4701 		    ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4702 	} else if (dlil_verbose) {
4703 		DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
4704 		    ifp != NULL ? if_name(ifp) : "N/A",
4705 		    protocol, proto_count);
4706 	}
4707 	ifnet_head_done();
4708 	if (retval == 0) {
4709 		dlil_handle_proto_attach(ifp, protocol);
4710 	} else if (ifproto != NULL) {
4711 		dlif_proto_free(ifproto);
4712 	}
4713 	return retval;
4714 }
4715 
4716 errno_t
ifnet_detach_protocol(ifnet_t ifp,protocol_family_t proto_family)4717 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
4718 {
4719 	struct if_proto *proto = NULL;
4720 	int     retval = 0;
4721 
4722 	if (ifp == NULL || proto_family == 0) {
4723 		retval = EINVAL;
4724 		goto end;
4725 	}
4726 
4727 	ifnet_lock_exclusive(ifp);
4728 	/* callee holds a proto refcnt upon success */
4729 	proto = find_attached_proto(ifp, proto_family);
4730 	if (proto == NULL) {
4731 		retval = ENXIO;
4732 		ifnet_lock_done(ifp);
4733 		goto end;
4734 	}
4735 
4736 	/* call family module del_proto */
4737 	if (ifp->if_del_proto) {
4738 		ifp->if_del_proto(ifp, proto->protocol_family);
4739 	}
4740 
4741 	SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4742 	    proto, if_proto, next_hash);
4743 
4744 	if (proto->proto_kpi == kProtoKPI_v1) {
4745 		proto->kpi.v1.input = ifproto_media_input_v1;
4746 		proto->kpi.v1.pre_output = ifproto_media_preout;
4747 		proto->kpi.v1.event = ifproto_media_event;
4748 		proto->kpi.v1.ioctl = ifproto_media_ioctl;
4749 		proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
4750 		proto->kpi.v1.send_arp = ifproto_media_send_arp;
4751 	} else {
4752 		proto->kpi.v2.input = ifproto_media_input_v2;
4753 		proto->kpi.v2.pre_output = ifproto_media_preout;
4754 		proto->kpi.v2.event = ifproto_media_event;
4755 		proto->kpi.v2.ioctl = ifproto_media_ioctl;
4756 		proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
4757 		proto->kpi.v2.send_arp = ifproto_media_send_arp;
4758 	}
4759 	proto->detached = 1;
4760 	ifnet_lock_done(ifp);
4761 
4762 	if (dlil_verbose) {
4763 		DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
4764 		    (proto->proto_kpi == kProtoKPI_v1) ?
4765 		    "v1" : "v2", proto_family);
4766 	}
4767 
4768 	/* release proto refcnt held during protocol attach */
4769 	if_proto_free(proto);
4770 
4771 	/*
4772 	 * Release proto refcnt held during lookup; the rest of
4773 	 * protocol detach steps will happen when the last proto
4774 	 * reference is released.
4775 	 */
4776 	if_proto_free(proto);
4777 
4778 end:
4779 	return retval;
4780 }
4781 
4782 static errno_t
ifproto_media_input_v1(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet,char * header)4783 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
4784     struct mbuf *packet, char *header)
4785 {
4786 #pragma unused(ifp, protocol, packet, header)
4787 	return ENXIO;
4788 }
4789 
4790 static errno_t
ifproto_media_input_v2(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet)4791 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
4792     struct mbuf *packet)
4793 {
4794 #pragma unused(ifp, protocol, packet)
4795 	return ENXIO;
4796 }
4797 
4798 static errno_t
ifproto_media_preout(struct ifnet * ifp,protocol_family_t protocol,mbuf_t * packet,const struct sockaddr * dest,void * route,IFNET_FRAME_TYPE_RW_T frame_type,IFNET_LLADDR_RW_T link_layer_dest)4799 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
4800     mbuf_t *packet, const struct sockaddr *dest, void *route,
4801     IFNET_FRAME_TYPE_RW_T frame_type, IFNET_LLADDR_RW_T link_layer_dest)
4802 {
4803 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4804 	return ENXIO;
4805 }
4806 
4807 static void
ifproto_media_event(struct ifnet * ifp,protocol_family_t protocol,const struct kev_msg * event)4808 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
4809     const struct kev_msg *event)
4810 {
4811 #pragma unused(ifp, protocol, event)
4812 }
4813 
4814 static errno_t
ifproto_media_ioctl(struct ifnet * ifp,protocol_family_t protocol,unsigned long command,void * argument)4815 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
4816     unsigned long command, void *argument)
4817 {
4818 #pragma unused(ifp, protocol, command, argument)
4819 	return ENXIO;
4820 }
4821 
4822 static errno_t
ifproto_media_resolve_multi(ifnet_t ifp,const struct sockaddr * proto_addr,struct sockaddr_dl * out_ll,size_t ll_len)4823 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
4824     struct sockaddr_dl *out_ll, size_t ll_len)
4825 {
4826 #pragma unused(ifp, proto_addr, out_ll, ll_len)
4827 	return ENXIO;
4828 }
4829 
4830 static errno_t
ifproto_media_send_arp(struct ifnet * ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4831 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
4832     const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4833     const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4834 {
4835 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4836 	return ENXIO;
4837 }
4838 
4839 extern int if_next_index(void);
4840 extern int tcp_ecn_outbound;
4841 
4842 void
dlil_ifclassq_setup(struct ifnet * ifp,struct ifclassq * ifcq)4843 dlil_ifclassq_setup(struct ifnet *ifp, struct ifclassq *ifcq)
4844 {
4845 	uint32_t sflags = 0;
4846 	int err;
4847 
4848 	if (if_flowadv) {
4849 		sflags |= PKTSCHEDF_QALG_FLOWCTL;
4850 	}
4851 
4852 	if (if_delaybased_queue) {
4853 		sflags |= PKTSCHEDF_QALG_DELAYBASED;
4854 	}
4855 
4856 	if (ifp->if_output_sched_model ==
4857 	    IFNET_SCHED_MODEL_DRIVER_MANAGED) {
4858 		sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
4859 	}
4860 	/* Inherit drop limit from the default queue */
4861 	if (ifp->if_snd != ifcq) {
4862 		IFCQ_PKT_DROP_LIMIT(ifcq) = IFCQ_PKT_DROP_LIMIT(ifp->if_snd);
4863 	}
4864 	/* Initialize transmit queue(s) */
4865 	err = ifclassq_setup(ifcq, ifp, sflags);
4866 	if (err != 0) {
4867 		panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
4868 		    "err=%d", __func__, ifp, err);
4869 		/* NOTREACHED */
4870 	}
4871 }
4872 
4873 errno_t
ifnet_attach(ifnet_t ifp,const struct sockaddr_dl * ll_addr)4874 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
4875 {
4876 #if SKYWALK
4877 	boolean_t netif_compat;
4878 	if_nexus_netif  nexus_netif;
4879 #endif /* SKYWALK */
4880 	ifnet_ref_t tmp_if;
4881 	struct ifaddr *ifa;
4882 	struct if_data_internal if_data_saved;
4883 	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4884 	struct dlil_threading_info *dl_inp;
4885 	thread_continue_t thfunc = NULL;
4886 	int err;
4887 
4888 	if (ifp == NULL) {
4889 		return EINVAL;
4890 	}
4891 
4892 	/*
4893 	 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4894 	 * prevent the interface from being configured while it is
4895 	 * embryonic, as ifnet_head_lock is dropped and reacquired
4896 	 * below prior to marking the ifnet with IFRF_ATTACHED.
4897 	 */
4898 	dlil_if_lock();
4899 	ifnet_head_lock_exclusive();
4900 	/* Verify we aren't already on the list */
4901 	TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
4902 		if (tmp_if == ifp) {
4903 			ifnet_head_done();
4904 			dlil_if_unlock();
4905 			return EEXIST;
4906 		}
4907 	}
4908 
4909 	lck_mtx_lock_spin(&ifp->if_ref_lock);
4910 	if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
4911 		panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
4912 		    __func__, ifp);
4913 		/* NOTREACHED */
4914 	}
4915 	lck_mtx_unlock(&ifp->if_ref_lock);
4916 
4917 	ifnet_lock_exclusive(ifp);
4918 
4919 	/* Sanity check */
4920 	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
4921 	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
4922 	VERIFY(ifp->if_threads_pending == 0);
4923 
4924 	if (ll_addr != NULL) {
4925 		if (ifp->if_addrlen == 0) {
4926 			ifp->if_addrlen = ll_addr->sdl_alen;
4927 		} else if (ll_addr->sdl_alen != ifp->if_addrlen) {
4928 			ifnet_lock_done(ifp);
4929 			ifnet_head_done();
4930 			dlil_if_unlock();
4931 			return EINVAL;
4932 		}
4933 	}
4934 
4935 	/*
4936 	 * Allow interfaces without protocol families to attach
4937 	 * only if they have the necessary fields filled out.
4938 	 */
4939 	if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
4940 		DLIL_PRINTF("%s: Attempt to attach interface without "
4941 		    "family module - %d\n", __func__, ifp->if_family);
4942 		ifnet_lock_done(ifp);
4943 		ifnet_head_done();
4944 		dlil_if_unlock();
4945 		return ENODEV;
4946 	}
4947 
4948 	/* Allocate protocol hash table */
4949 	VERIFY(ifp->if_proto_hash == NULL);
4950 	ifp->if_proto_hash = kalloc_type(struct proto_hash_entry,
4951 	    PROTO_HASH_SLOTS, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4952 	ifp->if_proto_hash_count = PROTO_HASH_SLOTS;
4953 
4954 	lck_mtx_lock_spin(&ifp->if_flt_lock);
4955 	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
4956 	TAILQ_INIT(&ifp->if_flt_head);
4957 	VERIFY(ifp->if_flt_busy == 0);
4958 	VERIFY(ifp->if_flt_waiters == 0);
4959 	VERIFY(ifp->if_flt_non_os_count == 0);
4960 	VERIFY(ifp->if_flt_no_tso_count == 0);
4961 	lck_mtx_unlock(&ifp->if_flt_lock);
4962 
4963 	if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
4964 		VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
4965 		LIST_INIT(&ifp->if_multiaddrs);
4966 	}
4967 
4968 	VERIFY(ifp->if_allhostsinm == NULL);
4969 	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
4970 	TAILQ_INIT(&ifp->if_addrhead);
4971 
4972 	if (ifp->if_index == 0) {
4973 		int idx = if_next_index();
4974 
4975 		/*
4976 		 * Since we exhausted the list of
4977 		 * if_index's, try to find an empty slot
4978 		 * in ifindex2ifnet.
4979 		 */
4980 		if (idx == -1 && if_index >= UINT16_MAX) {
4981 			for (int i = 1; i < if_index; i++) {
4982 				if (ifindex2ifnet[i] == NULL &&
4983 				    ifnet_addrs[i - 1] == NULL) {
4984 					idx = i;
4985 					break;
4986 				}
4987 			}
4988 		}
4989 		if (idx == -1) {
4990 			ifp->if_index = 0;
4991 			ifnet_lock_done(ifp);
4992 			ifnet_head_done();
4993 			dlil_if_unlock();
4994 			return ENOBUFS;
4995 		}
4996 		ifp->if_index = (uint16_t)idx;
4997 
4998 		/* the lladdr passed at attach time is the permanent address */
4999 		if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
5000 		    ll_addr->sdl_alen == ETHER_ADDR_LEN) {
5001 			bcopy(CONST_LLADDR(ll_addr),
5002 			    dl_if->dl_if_permanent_ether,
5003 			    ETHER_ADDR_LEN);
5004 			dl_if->dl_if_permanent_ether_is_set = 1;
5005 		}
5006 	}
5007 	/* There should not be anything occupying this slot */
5008 	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5009 
5010 	/* allocate (if needed) and initialize a link address */
5011 	ifa = dlil_alloc_lladdr(ifp, ll_addr);
5012 	if (ifa == NULL) {
5013 		ifnet_lock_done(ifp);
5014 		ifnet_head_done();
5015 		dlil_if_unlock();
5016 		return ENOBUFS;
5017 	}
5018 
5019 	VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5020 	ifnet_addrs[ifp->if_index - 1] = ifa;
5021 
5022 	/* make this address the first on the list */
5023 	IFA_LOCK(ifa);
5024 	/* hold a reference for ifnet_addrs[] */
5025 	ifa_addref(ifa);
5026 	/* if_attach_link_ifa() holds a reference for ifa_link */
5027 	if_attach_link_ifa(ifp, ifa);
5028 	IFA_UNLOCK(ifa);
5029 
5030 	TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5031 	ifindex2ifnet[ifp->if_index] = ifp;
5032 
5033 	/* Hold a reference to the underlying dlil_ifnet */
5034 	ifnet_reference(ifp);
5035 
5036 	/* Clear stats (save and restore other fields that we care) */
5037 	if_data_saved = ifp->if_data;
5038 	bzero(&ifp->if_data, sizeof(ifp->if_data));
5039 	ifp->if_data.ifi_type = if_data_saved.ifi_type;
5040 	ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5041 	ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5042 	ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5043 	ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5044 	ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5045 	ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5046 	ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5047 	ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5048 	ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5049 	ifnet_touch_lastchange(ifp);
5050 
5051 	VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5052 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5053 	    ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5054 
5055 	dlil_ifclassq_setup(ifp, ifp->if_snd);
5056 
5057 	/* Sanity checks on the input thread storage */
5058 	dl_inp = &dl_if->dl_if_inpstorage;
5059 	bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
5060 	VERIFY(dl_inp->dlth_flags == 0);
5061 	VERIFY(dl_inp->dlth_wtot == 0);
5062 	VERIFY(dl_inp->dlth_ifp == NULL);
5063 	VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
5064 	VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
5065 	VERIFY(!dl_inp->dlth_affinity);
5066 	VERIFY(ifp->if_inp == NULL);
5067 	VERIFY(dl_inp->dlth_thread == THREAD_NULL);
5068 	VERIFY(dl_inp->dlth_strategy == NULL);
5069 	VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
5070 	VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
5071 	VERIFY(dl_inp->dlth_affinity_tag == 0);
5072 
5073 #if IFNET_INPUT_SANITY_CHK
5074 	VERIFY(dl_inp->dlth_pkts_cnt == 0);
5075 #endif /* IFNET_INPUT_SANITY_CHK */
5076 
5077 	VERIFY(ifp->if_poll_thread == THREAD_NULL);
5078 	dlil_reset_rxpoll_params(ifp);
5079 	/*
5080 	 * A specific DLIL input thread is created per non-loopback interface.
5081 	 */
5082 	if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
5083 		ifp->if_inp = dl_inp;
5084 		ifnet_incr_pending_thread_count(ifp);
5085 		err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
5086 		if (err == ENODEV) {
5087 			VERIFY(thfunc == NULL);
5088 			ifnet_decr_pending_thread_count(ifp);
5089 		} else if (err != 0) {
5090 			panic_plain("%s: ifp=%p couldn't get an input thread; "
5091 			    "err=%d", __func__, ifp, err);
5092 			/* NOTREACHED */
5093 		}
5094 	}
5095 	/*
5096 	 * If the driver supports the new transmit model, calculate flow hash
5097 	 * and create a workloop starter thread to invoke the if_start callback
5098 	 * where the packets may be dequeued and transmitted.
5099 	 */
5100 	if (ifp->if_eflags & IFEF_TXSTART) {
5101 		thread_precedence_policy_data_t info;
5102 		__unused kern_return_t kret;
5103 
5104 		ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5105 		VERIFY(ifp->if_flowhash != 0);
5106 		VERIFY(ifp->if_start_thread == THREAD_NULL);
5107 
5108 		ifnet_set_start_cycle(ifp, NULL);
5109 		ifp->if_start_active = 0;
5110 		ifp->if_start_req = 0;
5111 		ifp->if_start_flags = 0;
5112 		VERIFY(ifp->if_start != NULL);
5113 		ifnet_incr_pending_thread_count(ifp);
5114 		if ((err = kernel_thread_start(ifnet_start_thread_func,
5115 		    ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5116 			panic_plain("%s: "
5117 			    "ifp=%p couldn't get a start thread; "
5118 			    "err=%d", __func__, ifp, err);
5119 			/* NOTREACHED */
5120 		}
5121 		bzero(&info, sizeof(info));
5122 		info.importance = 1;
5123 		kret = thread_policy_set(ifp->if_start_thread,
5124 		    THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5125 		    THREAD_PRECEDENCE_POLICY_COUNT);
5126 		ASSERT(kret == KERN_SUCCESS);
5127 	} else {
5128 		ifp->if_flowhash = 0;
5129 	}
5130 
5131 	/* Reset polling parameters */
5132 	ifnet_set_poll_cycle(ifp, NULL);
5133 	ifp->if_poll_update = 0;
5134 	ifp->if_poll_flags = 0;
5135 	ifp->if_poll_req = 0;
5136 	VERIFY(ifp->if_poll_thread == THREAD_NULL);
5137 
5138 	/*
5139 	 * If the driver supports the new receive model, create a poller
5140 	 * thread to invoke if_input_poll callback where the packets may
5141 	 * be dequeued from the driver and processed for reception.
5142 	 * if the interface is netif compat then the poller thread is
5143 	 * managed by netif.
5144 	 */
5145 	if (dlil_is_rxpoll_input(thfunc)) {
5146 		thread_precedence_policy_data_t info;
5147 		__unused kern_return_t kret;
5148 #if SKYWALK
5149 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5150 #endif /* SKYWALK */
5151 		VERIFY(ifp->if_input_poll != NULL);
5152 		VERIFY(ifp->if_input_ctl != NULL);
5153 		ifnet_incr_pending_thread_count(ifp);
5154 		if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
5155 		    &ifp->if_poll_thread)) != KERN_SUCCESS) {
5156 			panic_plain("%s: ifp=%p couldn't get a poll thread; "
5157 			    "err=%d", __func__, ifp, err);
5158 			/* NOTREACHED */
5159 		}
5160 		bzero(&info, sizeof(info));
5161 		info.importance = 1;
5162 		kret = thread_policy_set(ifp->if_poll_thread,
5163 		    THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5164 		    THREAD_PRECEDENCE_POLICY_COUNT);
5165 		ASSERT(kret == KERN_SUCCESS);
5166 	}
5167 
5168 	VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5169 	VERIFY(ifp->if_desc.ifd_len == 0);
5170 	VERIFY(ifp->if_desc.ifd_desc != NULL);
5171 
5172 	/* Record attach PC stacktrace */
5173 	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5174 
5175 	ifp->if_updatemcasts = 0;
5176 	if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5177 		struct ifmultiaddr *ifma;
5178 		LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5179 			IFMA_LOCK(ifma);
5180 			if (ifma->ifma_addr->sa_family == AF_LINK ||
5181 			    ifma->ifma_addr->sa_family == AF_UNSPEC) {
5182 				ifp->if_updatemcasts++;
5183 			}
5184 			IFMA_UNLOCK(ifma);
5185 		}
5186 
5187 		DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
5188 		    "membership(s)\n", if_name(ifp),
5189 		    ifp->if_updatemcasts);
5190 	}
5191 
5192 	/* Clear logging parameters */
5193 	bzero(&ifp->if_log, sizeof(ifp->if_log));
5194 
5195 	/* Clear foreground/realtime activity timestamps */
5196 	ifp->if_fg_sendts = 0;
5197 	ifp->if_rt_sendts = 0;
5198 
5199 	/* Clear throughput estimates and radio type */
5200 	ifp->if_estimated_up_bucket = 0;
5201 	ifp->if_estimated_down_bucket = 0;
5202 	ifp->if_radio_type = 0;
5203 	ifp->if_radio_channel = 0;
5204 
5205 	VERIFY(ifp->if_delegated.ifp == NULL);
5206 	VERIFY(ifp->if_delegated.type == 0);
5207 	VERIFY(ifp->if_delegated.family == 0);
5208 	VERIFY(ifp->if_delegated.subfamily == 0);
5209 	VERIFY(ifp->if_delegated.expensive == 0);
5210 	VERIFY(ifp->if_delegated.constrained == 0);
5211 	VERIFY(ifp->if_delegated.ultra_constrained == 0);
5212 
5213 	VERIFY(ifp->if_agentids == NULL);
5214 	VERIFY(ifp->if_agentcount == 0);
5215 
5216 	/* Reset interface state */
5217 	bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5218 	ifp->if_interface_state.valid_bitmask |=
5219 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5220 	ifp->if_interface_state.interface_availability =
5221 	    IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5222 
5223 	/* Initialize Link Quality Metric (loopback [lo0] is always good) */
5224 	if (ifp == lo_ifp) {
5225 		ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5226 		ifp->if_interface_state.valid_bitmask |=
5227 		    IF_INTERFACE_STATE_LQM_STATE_VALID;
5228 	} else {
5229 		ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5230 	}
5231 
5232 	/*
5233 	 * Enable ECN capability on this interface depending on the
5234 	 * value of ECN global setting
5235 	 */
5236 	if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5237 		if_set_eflags(ifp, IFEF_ECN_ENABLE);
5238 		if_clear_eflags(ifp, IFEF_ECN_DISABLE);
5239 	}
5240 
5241 	/*
5242 	 * Built-in Cyclops always on policy for WiFi infra
5243 	 */
5244 	if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5245 		errno_t error;
5246 
5247 		error = if_set_qosmarking_mode(ifp,
5248 		    IFRTYPE_QOSMARKING_FASTLANE);
5249 		if (error != 0) {
5250 			DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
5251 			    __func__, ifp->if_xname, error);
5252 		} else {
5253 			if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5254 #if (DEVELOPMENT || DEBUG)
5255 			DLIL_PRINTF("%s fastlane enabled on %s\n",
5256 			    __func__, ifp->if_xname);
5257 #endif /* (DEVELOPMENT || DEBUG) */
5258 		}
5259 	}
5260 
5261 	ifnet_lock_done(ifp);
5262 	ifnet_head_done();
5263 
5264 #if SKYWALK
5265 	netif_compat = dlil_attach_netif_compat_nexus(ifp, &nexus_netif);
5266 #endif /* SKYWALK */
5267 
5268 	lck_mtx_lock(&ifp->if_cached_route_lock);
5269 	/* Enable forwarding cached route */
5270 	ifp->if_fwd_cacheok = 1;
5271 	/* Clean up any existing cached routes */
5272 	ROUTE_RELEASE(&ifp->if_fwd_route);
5273 	bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5274 	ROUTE_RELEASE(&ifp->if_src_route);
5275 	bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5276 	ROUTE_RELEASE(&ifp->if_src_route6);
5277 	bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5278 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5279 
5280 	ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5281 
5282 	/*
5283 	 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5284 	 * and trees; do this before the ifnet is marked as attached.
5285 	 * The ifnet keeps the reference to the info structures even after
5286 	 * the ifnet is detached, since the network-layer records still
5287 	 * refer to the info structures even after that.  This also
5288 	 * makes it possible for them to still function after the ifnet
5289 	 * is recycled or reattached.
5290 	 */
5291 #if INET
5292 	if (IGMP_IFINFO(ifp) == NULL) {
5293 		IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
5294 		VERIFY(IGMP_IFINFO(ifp) != NULL);
5295 	} else {
5296 		VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5297 		igmp_domifreattach(IGMP_IFINFO(ifp));
5298 	}
5299 #endif /* INET */
5300 	if (MLD_IFINFO(ifp) == NULL) {
5301 		MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
5302 		VERIFY(MLD_IFINFO(ifp) != NULL);
5303 	} else {
5304 		VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5305 		mld_domifreattach(MLD_IFINFO(ifp));
5306 	}
5307 
5308 	VERIFY(ifp->if_data_threshold == 0);
5309 	VERIFY(ifp->if_dt_tcall != NULL);
5310 
5311 	/*
5312 	 * Wait for the created kernel threads for I/O to get
5313 	 * scheduled and run at least once before we proceed
5314 	 * to mark interface as attached.
5315 	 */
5316 	lck_mtx_lock(&ifp->if_ref_lock);
5317 	while (ifp->if_threads_pending != 0) {
5318 		DLIL_PRINTF("%s: Waiting for all kernel threads created for "
5319 		    "interface %s to get scheduled at least once.\n",
5320 		    __func__, ifp->if_xname);
5321 		(void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
5322 		    __func__, NULL);
5323 		LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
5324 	}
5325 	lck_mtx_unlock(&ifp->if_ref_lock);
5326 	DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
5327 	    "at least once. Proceeding.\n", __func__, ifp->if_xname);
5328 
5329 	/* Final mark this ifnet as attached. */
5330 	ifnet_lock_exclusive(ifp);
5331 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5332 	ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
5333 	lck_mtx_unlock(&ifp->if_ref_lock);
5334 	if (net_rtref) {
5335 		/* boot-args override; enable idle notification */
5336 		(void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5337 		    IFRF_IDLE_NOTIFY);
5338 	} else {
5339 		/* apply previous request(s) to set the idle flags, if any */
5340 		(void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5341 		    ifp->if_idle_new_flags_mask);
5342 	}
5343 #if SKYWALK
5344 	/* the interface is fully attached; let the nexus adapter know */
5345 	if (netif_compat || dlil_is_native_netif_nexus(ifp)) {
5346 		if (netif_compat) {
5347 			if (sk_netif_compat_txmodel ==
5348 			    NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI) {
5349 				ifnet_enqueue_multi_setup(ifp,
5350 				    sk_tx_delay_qlen, sk_tx_delay_timeout);
5351 			}
5352 			ifp->if_nx_netif = nexus_netif;
5353 		}
5354 		ifp->if_na_ops->ni_finalize(ifp->if_na, ifp);
5355 	}
5356 #endif /* SKYWALK */
5357 	ifnet_lock_done(ifp);
5358 	dlil_if_unlock();
5359 
5360 #if PF
5361 	/*
5362 	 * Attach packet filter to this interface, if enabled.
5363 	 */
5364 	pf_ifnet_hook(ifp, 1);
5365 #endif /* PF */
5366 
5367 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0, FALSE);
5368 
5369 	if (dlil_verbose) {
5370 		DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
5371 		    (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5372 	}
5373 
5374 	return 0;
5375 }
5376 
5377 static void
if_purgeaddrs(struct ifnet * ifp)5378 if_purgeaddrs(struct ifnet *ifp)
5379 {
5380 #if INET
5381 	in_purgeaddrs(ifp);
5382 #endif /* INET */
5383 	in6_purgeaddrs(ifp);
5384 }
5385 
5386 errno_t
ifnet_detach(ifnet_t ifp)5387 ifnet_detach(ifnet_t ifp)
5388 {
5389 	ifnet_ref_t delegated_ifp;
5390 	struct nd_ifinfo *ndi = NULL;
5391 
5392 	if (ifp == NULL) {
5393 		return EINVAL;
5394 	}
5395 
5396 	ndi = ND_IFINFO(ifp);
5397 	if (NULL != ndi) {
5398 		ndi->cga_initialized = FALSE;
5399 	}
5400 	os_log(OS_LOG_DEFAULT, "%s detaching", if_name(ifp));
5401 
5402 	/* Mark the interface down */
5403 	if_down(ifp);
5404 
5405 	/*
5406 	 * IMPORTANT NOTE
5407 	 *
5408 	 * Any field in the ifnet that relies on IF_FULLY_ATTACHED()
5409 	 * or equivalently, ifnet_is_attached(ifp, 1), can't be modified
5410 	 * until after we've waited for all I/O references to drain
5411 	 * in ifnet_detach_final().
5412 	 */
5413 
5414 	ifnet_head_lock_exclusive();
5415 	ifnet_lock_exclusive(ifp);
5416 
5417 	if (ifp->if_output_netem != NULL) {
5418 		netem_destroy(ifp->if_output_netem);
5419 		ifp->if_output_netem = NULL;
5420 	}
5421 
5422 	/*
5423 	 * Check to see if this interface has previously triggered
5424 	 * aggressive protocol draining; if so, decrement the global
5425 	 * refcnt and clear PR_AGGDRAIN on the route domain if
5426 	 * there are no more of such an interface around.
5427 	 */
5428 	(void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5429 
5430 	lck_mtx_lock_spin(&ifp->if_ref_lock);
5431 	if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5432 		lck_mtx_unlock(&ifp->if_ref_lock);
5433 		ifnet_lock_done(ifp);
5434 		ifnet_head_done();
5435 		return EINVAL;
5436 	} else if (ifp->if_refflags & IFRF_DETACHING) {
5437 		/* Interface has already been detached */
5438 		lck_mtx_unlock(&ifp->if_ref_lock);
5439 		ifnet_lock_done(ifp);
5440 		ifnet_head_done();
5441 		return ENXIO;
5442 	}
5443 	VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
5444 	/* Indicate this interface is being detached */
5445 	ifp->if_refflags &= ~IFRF_ATTACHED;
5446 	ifp->if_refflags |= IFRF_DETACHING;
5447 	lck_mtx_unlock(&ifp->if_ref_lock);
5448 
5449 	/* clean up flow control entry object if there's any */
5450 	if (ifp->if_eflags & IFEF_TXSTART) {
5451 		ifnet_flowadv(ifp->if_flowhash);
5452 	}
5453 
5454 	/* Reset ECN enable/disable flags */
5455 	/* Reset CLAT46 flag */
5456 	if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
5457 
5458 	/*
5459 	 * We do not reset the TCP keep alive counters in case
5460 	 * a TCP connection stays connection after the interface
5461 	 * went down
5462 	 */
5463 	if (ifp->if_tcp_kao_cnt > 0) {
5464 		os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
5465 		    __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
5466 	}
5467 	ifp->if_tcp_kao_max = 0;
5468 
5469 	/*
5470 	 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5471 	 * no longer be visible during lookups from this point.
5472 	 */
5473 	VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5474 	TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5475 	ifp->if_link.tqe_next = NULL;
5476 	ifp->if_link.tqe_prev = NULL;
5477 	if (ifp->if_ordered_link.tqe_next != NULL ||
5478 	    ifp->if_ordered_link.tqe_prev != NULL) {
5479 		ifnet_remove_from_ordered_list(ifp);
5480 	}
5481 	ifindex2ifnet[ifp->if_index] = NULL;
5482 
5483 	/* 18717626 - reset router mode */
5484 	if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
5485 	ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
5486 
5487 	/* Record detach PC stacktrace */
5488 	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5489 
5490 	/* Clear logging parameters */
5491 	bzero(&ifp->if_log, sizeof(ifp->if_log));
5492 
5493 	/* Clear delegated interface info (reference released below) */
5494 	delegated_ifp = ifp->if_delegated.ifp;
5495 	bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
5496 
5497 	/* Reset interface state */
5498 	bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5499 
5500 	/*
5501 	 * Increment the generation count on interface deletion
5502 	 */
5503 	ifp->if_creation_generation_id = os_atomic_inc(&if_creation_generation_count, relaxed);
5504 
5505 	ifnet_lock_done(ifp);
5506 	ifnet_head_done();
5507 
5508 	/* Release reference held on the delegated interface */
5509 	if (delegated_ifp != NULL) {
5510 		ifnet_release(delegated_ifp);
5511 	}
5512 
5513 	/* Reset Link Quality Metric (unless loopback [lo0]) */
5514 	if (ifp != lo_ifp) {
5515 		if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5516 	}
5517 
5518 	/* Force reset link heuristics */
5519 	if (ifp->if_link_heuristics_tcall != NULL) {
5520 		thread_call_cancel_wait(ifp->if_link_heuristics_tcall);
5521 		thread_call_free(ifp->if_link_heuristics_tcall);
5522 		ifp->if_link_heuristics_tcall = NULL;
5523 	}
5524 	if_clear_xflags(ifp, IFXF_LINK_HEURISTICS);
5525 
5526 	/* Reset TCP local statistics */
5527 	if (ifp->if_tcp_stat != NULL) {
5528 		bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5529 	}
5530 
5531 	/* Reset UDP local statistics */
5532 	if (ifp->if_udp_stat != NULL) {
5533 		bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5534 	}
5535 
5536 	/* Reset ifnet IPv4 stats */
5537 	if (ifp->if_ipv4_stat != NULL) {
5538 		bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5539 	}
5540 
5541 	/* Reset ifnet IPv6 stats */
5542 	if (ifp->if_ipv6_stat != NULL) {
5543 		bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5544 	}
5545 
5546 	/* Release memory held for interface link status report */
5547 	if (ifp->if_link_status != NULL) {
5548 		kfree_type(struct if_link_status, ifp->if_link_status);
5549 		ifp->if_link_status = NULL;
5550 	}
5551 
5552 	/* Disable forwarding cached route */
5553 	lck_mtx_lock(&ifp->if_cached_route_lock);
5554 	ifp->if_fwd_cacheok = 0;
5555 	lck_mtx_unlock(&ifp->if_cached_route_lock);
5556 
5557 	/* Disable data threshold and wait for any pending event posting */
5558 	ifp->if_data_threshold = 0;
5559 	VERIFY(ifp->if_dt_tcall != NULL);
5560 	(void) thread_call_cancel_wait(ifp->if_dt_tcall);
5561 
5562 	/*
5563 	 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5564 	 * references to the info structures and leave them attached to
5565 	 * this ifnet.
5566 	 */
5567 #if INET
5568 	igmp_domifdetach(ifp);
5569 #endif /* INET */
5570 	mld_domifdetach(ifp);
5571 
5572 #if SKYWALK
5573 	/* Clean up any netns tokens still pointing to to this ifnet */
5574 	netns_ifnet_detach(ifp);
5575 #endif /* SKYWALK */
5576 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0, FALSE);
5577 
5578 	/* Let worker thread take care of the rest, to avoid reentrancy */
5579 	dlil_if_lock();
5580 	ifnet_detaching_enqueue(ifp);
5581 	dlil_if_unlock();
5582 
5583 	return 0;
5584 }
5585 
5586 static void
ifnet_detaching_enqueue(struct ifnet * ifp)5587 ifnet_detaching_enqueue(struct ifnet *ifp)
5588 {
5589 	dlil_if_lock_assert();
5590 
5591 	++ifnet_detaching_cnt;
5592 	VERIFY(ifnet_detaching_cnt != 0);
5593 	TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5594 	wakeup((caddr_t)&ifnet_delayed_run);
5595 }
5596 
5597 static struct ifnet *
ifnet_detaching_dequeue(void)5598 ifnet_detaching_dequeue(void)
5599 {
5600 	ifnet_ref_t ifp;
5601 
5602 	dlil_if_lock_assert();
5603 
5604 	ifp = TAILQ_FIRST(&ifnet_detaching_head);
5605 	VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5606 	if (ifp != NULL) {
5607 		VERIFY(ifnet_detaching_cnt != 0);
5608 		--ifnet_detaching_cnt;
5609 		TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5610 		ifp->if_detaching_link.tqe_next = NULL;
5611 		ifp->if_detaching_link.tqe_prev = NULL;
5612 	}
5613 	return ifp;
5614 }
5615 
5616 __attribute__((noreturn))
5617 static void
ifnet_detacher_thread_cont(void * v,wait_result_t wres)5618 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
5619 {
5620 #pragma unused(v, wres)
5621 	ifnet_ref_t ifp;
5622 
5623 	dlil_if_lock();
5624 	if (__improbable(ifnet_detaching_embryonic)) {
5625 		ifnet_detaching_embryonic = FALSE;
5626 		/* there's no lock ordering constrain so OK to do this here */
5627 		dlil_decr_pending_thread_count();
5628 	}
5629 
5630 	for (;;) {
5631 		dlil_if_lock_assert();
5632 
5633 		if (ifnet_detaching_cnt == 0) {
5634 			break;
5635 		}
5636 
5637 		net_update_uptime();
5638 
5639 		VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5640 
5641 		/* Take care of detaching ifnet */
5642 		ifp = ifnet_detaching_dequeue();
5643 		if (ifp != NULL) {
5644 			dlil_if_unlock();
5645 			ifnet_detach_final(ifp);
5646 			dlil_if_lock();
5647 		}
5648 	}
5649 
5650 	(void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5651 	dlil_if_unlock();
5652 	(void) thread_block(ifnet_detacher_thread_cont);
5653 
5654 	VERIFY(0);      /* we should never get here */
5655 	/* NOTREACHED */
5656 	__builtin_unreachable();
5657 }
5658 
5659 __dead2
5660 static void
ifnet_detacher_thread_func(void * v,wait_result_t w)5661 ifnet_detacher_thread_func(void *v, wait_result_t w)
5662 {
5663 #pragma unused(v, w)
5664 	dlil_if_lock();
5665 	(void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5666 	ifnet_detaching_embryonic = TRUE;
5667 	/* wake up once to get out of embryonic state */
5668 	wakeup((caddr_t)&ifnet_delayed_run);
5669 	dlil_if_unlock();
5670 	(void) thread_block(ifnet_detacher_thread_cont);
5671 	VERIFY(0);
5672 	/* NOTREACHED */
5673 	__builtin_unreachable();
5674 }
5675 
5676 static void
ifnet_detach_final(struct ifnet * ifp)5677 ifnet_detach_final(struct ifnet *ifp)
5678 {
5679 	struct ifnet_filter *filter, *filter_next;
5680 	struct dlil_ifnet *dlifp;
5681 	struct ifnet_filter_head fhead;
5682 	struct dlil_threading_info *inp;
5683 	struct ifaddr *ifa;
5684 	ifnet_detached_func if_free;
5685 	int i;
5686 	bool waited = false;
5687 
5688 	/* Let BPF know we're detaching */
5689 	bpfdetach(ifp);
5690 
5691 #if SKYWALK
5692 	dlil_netif_detach_notify(ifp);
5693 	/*
5694 	 * Wait for the datapath to quiesce before tearing down
5695 	 * netif/flowswitch nexuses.
5696 	 */
5697 	dlil_quiesce_and_detach_nexuses(ifp);
5698 #endif /* SKYWALK */
5699 
5700 	lck_mtx_lock(&ifp->if_ref_lock);
5701 	if (!(ifp->if_refflags & IFRF_DETACHING)) {
5702 		panic("%s: flags mismatch (detaching not set) ifp=%p",
5703 		    __func__, ifp);
5704 		/* NOTREACHED */
5705 	}
5706 
5707 	/*
5708 	 * Wait until the existing IO references get released
5709 	 * before we proceed with ifnet_detach.  This is not a
5710 	 * common case, so block without using a continuation.
5711 	 */
5712 	while (ifp->if_refio > 0) {
5713 		waited = true;
5714 		DLIL_PRINTF("%s: %s waiting for IO references to drain\n",
5715 		    __func__, if_name(ifp));
5716 		(void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5717 		    (PZERO - 1), "ifnet_ioref_wait", NULL);
5718 	}
5719 	if (waited) {
5720 		DLIL_PRINTF("%s: %s IO references drained\n",
5721 		    __func__, if_name(ifp));
5722 	}
5723 	VERIFY(ifp->if_datamov == 0);
5724 	VERIFY(ifp->if_drainers == 0);
5725 	VERIFY(ifp->if_suspend == 0);
5726 	ifp->if_refflags &= ~IFRF_READY;
5727 	lck_mtx_unlock(&ifp->if_ref_lock);
5728 
5729 #if SKYWALK
5730 	VERIFY(LIST_EMPTY(&ifp->if_netns_tokens));
5731 #endif /* SKYWALK */
5732 	/* Drain and destroy send queue */
5733 	ifclassq_teardown(ifp->if_snd);
5734 
5735 	/* Detach interface filters */
5736 	lck_mtx_lock(&ifp->if_flt_lock);
5737 	if_flt_monitor_enter(ifp);
5738 
5739 	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
5740 	fhead = ifp->if_flt_head;
5741 	TAILQ_INIT(&ifp->if_flt_head);
5742 
5743 	for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5744 		filter_next = TAILQ_NEXT(filter, filt_next);
5745 		lck_mtx_unlock(&ifp->if_flt_lock);
5746 
5747 		dlil_detach_filter_internal(filter, 1);
5748 		lck_mtx_lock(&ifp->if_flt_lock);
5749 	}
5750 	if_flt_monitor_leave(ifp);
5751 	lck_mtx_unlock(&ifp->if_flt_lock);
5752 
5753 	/* Tell upper layers to drop their network addresses */
5754 	if_purgeaddrs(ifp);
5755 
5756 	ifnet_lock_exclusive(ifp);
5757 
5758 	/* Clear agent IDs */
5759 	if (ifp->if_agentids != NULL) {
5760 		kfree_data_sized_by(ifp->if_agentids, ifp->if_agentcount);
5761 	}
5762 
5763 	bzero(&ifp->if_nx_netif, sizeof(ifp->if_nx_netif));
5764 	bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
5765 
5766 	/* Unplumb all protocols */
5767 	for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5768 		struct if_proto *proto;
5769 
5770 		proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5771 		while (proto != NULL) {
5772 			protocol_family_t family = proto->protocol_family;
5773 			ifnet_lock_done(ifp);
5774 			proto_unplumb(family, ifp);
5775 			ifnet_lock_exclusive(ifp);
5776 			proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5777 		}
5778 		/* There should not be any protocols left */
5779 		VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5780 	}
5781 	kfree_type_counted_by(struct proto_hash_entry, ifp->if_proto_hash_count, ifp->if_proto_hash);
5782 
5783 	/* Detach (permanent) link address from if_addrhead */
5784 	ifa = TAILQ_FIRST(&ifp->if_addrhead);
5785 	VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5786 	IFA_LOCK(ifa);
5787 	if_detach_link_ifa(ifp, ifa);
5788 	IFA_UNLOCK(ifa);
5789 
5790 	/* Remove (permanent) link address from ifnet_addrs[] */
5791 	ifa_remref(ifa);
5792 	ifnet_addrs[ifp->if_index - 1] = NULL;
5793 
5794 	/* This interface should not be on {ifnet_head,detaching} */
5795 	VERIFY(ifp->if_link.tqe_next == NULL);
5796 	VERIFY(ifp->if_link.tqe_prev == NULL);
5797 	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5798 	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5799 	VERIFY(ifp->if_ordered_link.tqe_next == NULL);
5800 	VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
5801 
5802 	/* The slot should have been emptied */
5803 	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5804 
5805 	/* There should not be any addresses left */
5806 	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5807 
5808 	/*
5809 	 * Signal the starter thread to terminate itself, and wait until
5810 	 * it has exited.
5811 	 */
5812 	if (ifp->if_start_thread != THREAD_NULL) {
5813 		lck_mtx_lock_spin(&ifp->if_start_lock);
5814 		ifp->if_start_flags |= IFSF_TERMINATING;
5815 		wakeup_one((caddr_t)&ifp->if_start_thread);
5816 		lck_mtx_unlock(&ifp->if_start_lock);
5817 
5818 		/* wait for starter thread to terminate */
5819 		lck_mtx_lock(&ifp->if_start_lock);
5820 		while (ifp->if_start_thread != THREAD_NULL) {
5821 			if (dlil_verbose) {
5822 				DLIL_PRINTF("%s: waiting for %s starter thread to terminate\n",
5823 				    __func__,
5824 				    if_name(ifp));
5825 			}
5826 			(void) msleep(&ifp->if_start_thread,
5827 			    &ifp->if_start_lock, (PZERO - 1),
5828 			    "ifnet_start_thread_exit", NULL);
5829 		}
5830 		lck_mtx_unlock(&ifp->if_start_lock);
5831 		if (dlil_verbose) {
5832 			DLIL_PRINTF("%s: %s starter thread termination complete",
5833 			    __func__, if_name(ifp));
5834 		}
5835 	}
5836 
5837 	/*
5838 	 * Signal the poller thread to terminate itself, and wait until
5839 	 * it has exited.
5840 	 */
5841 	if (ifp->if_poll_thread != THREAD_NULL) {
5842 #if SKYWALK
5843 		VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5844 #endif /* SKYWALK */
5845 		lck_mtx_lock_spin(&ifp->if_poll_lock);
5846 		ifp->if_poll_flags |= IF_POLLF_TERMINATING;
5847 		wakeup_one((caddr_t)&ifp->if_poll_thread);
5848 		lck_mtx_unlock(&ifp->if_poll_lock);
5849 
5850 		/* wait for poller thread to terminate */
5851 		lck_mtx_lock(&ifp->if_poll_lock);
5852 		while (ifp->if_poll_thread != THREAD_NULL) {
5853 			if (dlil_verbose) {
5854 				DLIL_PRINTF("%s: waiting for %s poller thread to terminate\n",
5855 				    __func__,
5856 				    if_name(ifp));
5857 			}
5858 			(void) msleep(&ifp->if_poll_thread,
5859 			    &ifp->if_poll_lock, (PZERO - 1),
5860 			    "ifnet_poll_thread_exit", NULL);
5861 		}
5862 		lck_mtx_unlock(&ifp->if_poll_lock);
5863 		if (dlil_verbose) {
5864 			DLIL_PRINTF("%s: %s poller thread termination complete\n",
5865 			    __func__, if_name(ifp));
5866 		}
5867 	}
5868 
5869 	/*
5870 	 * If thread affinity was set for the workloop thread, we will need
5871 	 * to tear down the affinity and release the extra reference count
5872 	 * taken at attach time.  Does not apply to lo0 or other interfaces
5873 	 * without dedicated input threads.
5874 	 */
5875 	if ((inp = ifp->if_inp) != NULL) {
5876 		VERIFY(inp != dlil_main_input_thread);
5877 
5878 		if (inp->dlth_affinity) {
5879 			struct thread *__single tp, *__single wtp, *__single ptp;
5880 
5881 			lck_mtx_lock_spin(&inp->dlth_lock);
5882 			wtp = inp->dlth_driver_thread;
5883 			inp->dlth_driver_thread = THREAD_NULL;
5884 			ptp = inp->dlth_poller_thread;
5885 			inp->dlth_poller_thread = THREAD_NULL;
5886 			ASSERT(inp->dlth_thread != THREAD_NULL);
5887 			tp = inp->dlth_thread;    /* don't nullify now */
5888 			inp->dlth_affinity_tag = 0;
5889 			inp->dlth_affinity = FALSE;
5890 			lck_mtx_unlock(&inp->dlth_lock);
5891 
5892 			/* Tear down poll thread affinity */
5893 			if (ptp != NULL) {
5894 				VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5895 				VERIFY(ifp->if_xflags & IFXF_LEGACY);
5896 				(void) dlil_affinity_set(ptp,
5897 				    THREAD_AFFINITY_TAG_NULL);
5898 				thread_deallocate(ptp);
5899 			}
5900 
5901 			/* Tear down workloop thread affinity */
5902 			if (wtp != NULL) {
5903 				(void) dlil_affinity_set(wtp,
5904 				    THREAD_AFFINITY_TAG_NULL);
5905 				thread_deallocate(wtp);
5906 			}
5907 
5908 			/* Tear down DLIL input thread affinity */
5909 			(void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5910 			thread_deallocate(tp);
5911 		}
5912 
5913 		/* disassociate ifp DLIL input thread */
5914 		ifp->if_inp = NULL;
5915 
5916 		/* if the worker thread was created, tell it to terminate */
5917 		if (inp->dlth_thread != THREAD_NULL) {
5918 			lck_mtx_lock_spin(&inp->dlth_lock);
5919 			inp->dlth_flags |= DLIL_INPUT_TERMINATE;
5920 			if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
5921 				wakeup_one((caddr_t)&inp->dlth_flags);
5922 			}
5923 			lck_mtx_unlock(&inp->dlth_lock);
5924 			ifnet_lock_done(ifp);
5925 
5926 			/* wait for the input thread to terminate */
5927 			lck_mtx_lock_spin(&inp->dlth_lock);
5928 			while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
5929 			    == 0) {
5930 				(void) msleep(&inp->dlth_flags, &inp->dlth_lock,
5931 				    (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
5932 			}
5933 			lck_mtx_unlock(&inp->dlth_lock);
5934 			ifnet_lock_exclusive(ifp);
5935 		}
5936 
5937 		/* clean-up input thread state */
5938 		dlil_clean_threading_info(inp);
5939 		/* clean-up poll parameters */
5940 		VERIFY(ifp->if_poll_thread == THREAD_NULL);
5941 		dlil_reset_rxpoll_params(ifp);
5942 	}
5943 
5944 	/* The driver might unload, so point these to ourselves */
5945 	if_free = ifp->if_free;
5946 	ifp->if_output_dlil = ifp_if_output;
5947 	ifp->if_output = ifp_if_output;
5948 	ifp->if_pre_enqueue = ifp_if_output;
5949 	ifp->if_start = ifp_if_start;
5950 	ifp->if_output_ctl = ifp_if_ctl;
5951 	ifp->if_input_dlil = ifp_if_input;
5952 	ifp->if_input_poll = ifp_if_input_poll;
5953 	ifp->if_input_ctl = ifp_if_ctl;
5954 	ifp->if_ioctl = ifp_if_ioctl;
5955 	ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
5956 	ifp->if_free = ifp_if_free;
5957 	ifp->if_demux = ifp_if_demux;
5958 	ifp->if_event = ifp_if_event;
5959 	ifp->if_framer_legacy = ifp_if_framer;
5960 	ifp->if_framer = ifp_if_framer_extended;
5961 	ifp->if_add_proto = ifp_if_add_proto;
5962 	ifp->if_del_proto = ifp_if_del_proto;
5963 	ifp->if_check_multi = ifp_if_check_multi;
5964 
5965 	/* wipe out interface description */
5966 	VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5967 	ifp->if_desc.ifd_len = 0;
5968 	VERIFY(ifp->if_desc.ifd_desc != NULL);
5969 	bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
5970 
5971 	/* there shouldn't be any delegation by now */
5972 	VERIFY(ifp->if_delegated.ifp == NULL);
5973 	VERIFY(ifp->if_delegated.type == 0);
5974 	VERIFY(ifp->if_delegated.family == 0);
5975 	VERIFY(ifp->if_delegated.subfamily == 0);
5976 	VERIFY(ifp->if_delegated.expensive == 0);
5977 	VERIFY(ifp->if_delegated.constrained == 0);
5978 	VERIFY(ifp->if_delegated.ultra_constrained == 0);
5979 
5980 	/* QoS marking get cleared */
5981 	if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5982 	if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
5983 
5984 #if SKYWALK
5985 	/* the nexus destructor is responsible for clearing these */
5986 	VERIFY(ifp->if_na_ops == NULL);
5987 	VERIFY(ifp->if_na == NULL);
5988 #endif /* SKYWALK */
5989 
5990 	/* interface could come up with different hwassist next time */
5991 	ifp->if_hwassist = 0;
5992 	ifp->if_capenable = 0;
5993 
5994 	/* promiscuous/allmulti counts need to start at zero again */
5995 	ifp->if_pcount = 0;
5996 	ifp->if_amcount = 0;
5997 	ifp->if_flags &= ~(IFF_PROMISC | IFF_ALLMULTI);
5998 
5999 	ifnet_lock_done(ifp);
6000 
6001 #if PF
6002 	/*
6003 	 * Detach this interface from packet filter, if enabled.
6004 	 */
6005 	pf_ifnet_hook(ifp, 0);
6006 #endif /* PF */
6007 
6008 	/* Filter list should be empty */
6009 	lck_mtx_lock_spin(&ifp->if_flt_lock);
6010 	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6011 	VERIFY(ifp->if_flt_busy == 0);
6012 	VERIFY(ifp->if_flt_waiters == 0);
6013 	VERIFY(ifp->if_flt_non_os_count == 0);
6014 	VERIFY(ifp->if_flt_no_tso_count == 0);
6015 	lck_mtx_unlock(&ifp->if_flt_lock);
6016 
6017 	/* Last chance to drain send queue */
6018 	if_qflush_snd(ifp, 0);
6019 
6020 	/* Last chance to cleanup any cached route */
6021 	lck_mtx_lock(&ifp->if_cached_route_lock);
6022 	VERIFY(!ifp->if_fwd_cacheok);
6023 	ROUTE_RELEASE(&ifp->if_fwd_route);
6024 	bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
6025 	ROUTE_RELEASE(&ifp->if_src_route);
6026 	bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
6027 	ROUTE_RELEASE(&ifp->if_src_route6);
6028 	bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6029 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6030 
6031 	/* Ignore any pending data threshold as the interface is anyways gone */
6032 	ifp->if_data_threshold = 0;
6033 
6034 	VERIFY(ifp->if_dt_tcall != NULL);
6035 	VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
6036 
6037 	ifnet_llreach_ifdetach(ifp);
6038 
6039 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0, FALSE);
6040 
6041 	/*
6042 	 * Finally, mark this ifnet as detached.
6043 	 */
6044 	os_log(OS_LOG_DEFAULT, "%s detached", if_name(ifp));
6045 
6046 	lck_mtx_lock_spin(&ifp->if_ref_lock);
6047 	if (!(ifp->if_refflags & IFRF_DETACHING)) {
6048 		panic("%s: flags mismatch (detaching not set) ifp=%p",
6049 		    __func__, ifp);
6050 		/* NOTREACHED */
6051 	}
6052 	ifp->if_refflags &= ~IFRF_DETACHING;
6053 	lck_mtx_unlock(&ifp->if_ref_lock);
6054 	if (if_free != NULL) {
6055 		if_free(ifp);
6056 	}
6057 
6058 	ifclassq_release(&ifp->if_snd);
6059 
6060 	/* we're fully detached, clear the "in use" bit */
6061 	dlifp = (struct dlil_ifnet *)ifp;
6062 	lck_mtx_lock(&dlifp->dl_if_lock);
6063 	ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
6064 	dlifp->dl_if_flags &= ~DLIF_INUSE;
6065 	lck_mtx_unlock(&dlifp->dl_if_lock);
6066 
6067 	/* Release reference held during ifnet attach */
6068 	ifnet_release(ifp);
6069 }
6070 
6071 errno_t
ifp_if_output(struct ifnet * ifp,struct mbuf * m)6072 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6073 {
6074 #pragma unused(ifp)
6075 	m_freem_list(m);
6076 	return 0;
6077 }
6078 
6079 void
ifp_if_start(struct ifnet * ifp)6080 ifp_if_start(struct ifnet *ifp)
6081 {
6082 	ifnet_purge(ifp);
6083 }
6084 
6085 static errno_t
ifp_if_input(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)6086 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6087     struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6088     boolean_t poll, struct thread *tp)
6089 {
6090 #pragma unused(ifp, m_tail, s, poll, tp)
6091 	m_freem_list(m_head);
6092 	return ENXIO;
6093 }
6094 
6095 static void
ifp_if_input_poll(struct ifnet * ifp,u_int32_t flags,u_int32_t max_cnt,struct mbuf ** m_head,struct mbuf ** m_tail,u_int32_t * cnt,u_int32_t * len)6096 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6097     struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6098 {
6099 #pragma unused(ifp, flags, max_cnt)
6100 	if (m_head != NULL) {
6101 		*m_head = NULL;
6102 	}
6103 	if (m_tail != NULL) {
6104 		*m_tail = NULL;
6105 	}
6106 	if (cnt != NULL) {
6107 		*cnt = 0;
6108 	}
6109 	if (len != NULL) {
6110 		*len = 0;
6111 	}
6112 }
6113 
6114 static errno_t
ifp_if_ctl(struct ifnet * ifp,ifnet_ctl_cmd_t cmd,u_int32_t arglen,void * arg)6115 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6116 {
6117 #pragma unused(ifp, cmd, arglen, arg)
6118 	return EOPNOTSUPP;
6119 }
6120 
6121 static errno_t
ifp_if_demux(struct ifnet * ifp,struct mbuf * m,char * fh,protocol_family_t * pf)6122 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6123 {
6124 #pragma unused(ifp, fh, pf)
6125 	m_freem(m);
6126 	return EJUSTRETURN;
6127 }
6128 
6129 static errno_t
ifp_if_add_proto(struct ifnet * ifp,protocol_family_t pf,const struct ifnet_demux_desc * da,u_int32_t dc)6130 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6131     const struct ifnet_demux_desc *da, u_int32_t dc)
6132 {
6133 #pragma unused(ifp, pf, da, dc)
6134 	return EINVAL;
6135 }
6136 
6137 static errno_t
ifp_if_del_proto(struct ifnet * ifp,protocol_family_t pf)6138 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6139 {
6140 #pragma unused(ifp, pf)
6141 	return EINVAL;
6142 }
6143 
6144 static errno_t
ifp_if_check_multi(struct ifnet * ifp,const struct sockaddr * sa)6145 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6146 {
6147 #pragma unused(ifp, sa)
6148 	return EOPNOTSUPP;
6149 }
6150 
6151 #if !XNU_TARGET_OS_OSX
6152 static errno_t
ifp_if_framer(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6153 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6154     const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t,
6155     u_int32_t *pre, u_int32_t *post)
6156 #else /* XNU_TARGET_OS_OSX */
6157 static errno_t
6158 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6159     const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t)
6160 #endif /* XNU_TARGET_OS_OSX */
6161 {
6162 #pragma unused(ifp, m, sa, ll, t)
6163 #if !XNU_TARGET_OS_OSX
6164 	return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
6165 #else /* XNU_TARGET_OS_OSX */
6166 	return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
6167 #endif /* XNU_TARGET_OS_OSX */
6168 }
6169 
6170 static errno_t
ifp_if_framer_extended(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6171 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6172     const struct sockaddr *sa,
6173     IFNET_LLADDR_T ll,
6174     IFNET_FRAME_TYPE_T t,
6175     u_int32_t *pre, u_int32_t *post)
6176 {
6177 #pragma unused(ifp, sa, ll, t)
6178 	m_freem(*m);
6179 	*m = NULL;
6180 
6181 	if (pre != NULL) {
6182 		*pre = 0;
6183 	}
6184 	if (post != NULL) {
6185 		*post = 0;
6186 	}
6187 
6188 	return EJUSTRETURN;
6189 }
6190 
6191 errno_t
ifp_if_ioctl(struct ifnet * ifp,unsigned long cmd,void * arg)6192 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6193 {
6194 #pragma unused(ifp, cmd, arg)
6195 	return EOPNOTSUPP;
6196 }
6197 
6198 static errno_t
ifp_if_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode tm,bpf_packet_func f)6199 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6200 {
6201 #pragma unused(ifp, tm, f)
6202 	/* XXX not sure what to do here */
6203 	return 0;
6204 }
6205 
6206 static void
ifp_if_free(struct ifnet * ifp)6207 ifp_if_free(struct ifnet *ifp)
6208 {
6209 #pragma unused(ifp)
6210 }
6211 
6212 static void
ifp_if_event(struct ifnet * ifp,const struct kev_msg * e)6213 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6214 {
6215 #pragma unused(ifp, e)
6216 }
6217 
6218 __private_extern__ void
dlil_proto_unplumb_all(struct ifnet * ifp)6219 dlil_proto_unplumb_all(struct ifnet *ifp)
6220 {
6221 	/*
6222 	 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6223 	 * each bucket contains exactly one entry; PF_VLAN does not need an
6224 	 * explicit unplumb.
6225 	 *
6226 	 * if_proto_hash[3] is for other protocols; we expect anything
6227 	 * in this bucket to respond to the DETACHING event (which would
6228 	 * have happened by now) and do the unplumb then.
6229 	 */
6230 	(void) proto_unplumb(PF_INET, ifp);
6231 	(void) proto_unplumb(PF_INET6, ifp);
6232 }
6233 
6234 static void
ifp_src_route_copyout(struct ifnet * ifp,struct route * dst)6235 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6236 {
6237 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6238 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6239 
6240 	route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6241 
6242 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6243 }
6244 
6245 static void
ifp_src_route_copyin(struct ifnet * ifp,struct route * src)6246 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6247 {
6248 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6249 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6250 
6251 	if (ifp->if_fwd_cacheok) {
6252 		route_copyin(src, &ifp->if_src_route, sizeof(*src));
6253 	} else {
6254 		ROUTE_RELEASE(src);
6255 	}
6256 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6257 }
6258 
6259 static void
ifp_src_route6_copyout(struct ifnet * ifp,struct route_in6 * dst)6260 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6261 {
6262 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6263 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6264 
6265 	route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6266 	    sizeof(*dst));
6267 
6268 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6269 }
6270 
6271 static void
ifp_src_route6_copyin(struct ifnet * ifp,struct route_in6 * src)6272 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6273 {
6274 	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6275 	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6276 
6277 	if (ifp->if_fwd_cacheok) {
6278 		route_copyin((struct route *)src,
6279 		    (struct route *)&ifp->if_src_route6, sizeof(*src));
6280 	} else {
6281 		ROUTE_RELEASE(src);
6282 	}
6283 	lck_mtx_unlock(&ifp->if_cached_route_lock);
6284 }
6285 
6286 struct rtentry *
ifnet_cached_rtlookup_inet(struct ifnet * ifp,struct in_addr src_ip)6287 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6288 {
6289 	struct route            src_rt;
6290 	struct sockaddr_in      *dst;
6291 
6292 	dst = SIN(&src_rt.ro_dst);
6293 
6294 	ifp_src_route_copyout(ifp, &src_rt);
6295 
6296 	if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6297 		ROUTE_RELEASE(&src_rt);
6298 		if (dst->sin_family != AF_INET) {
6299 			SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6300 			dst->sin_len = sizeof(src_rt.ro_dst);
6301 			dst->sin_family = AF_INET;
6302 		}
6303 		dst->sin_addr = src_ip;
6304 
6305 		VERIFY(src_rt.ro_rt == NULL);
6306 		src_rt.ro_rt = rtalloc1_scoped(SA(dst),
6307 		    0, 0, ifp->if_index);
6308 
6309 		if (src_rt.ro_rt != NULL) {
6310 			/* retain a ref, copyin consumes one */
6311 			struct rtentry  *rte = src_rt.ro_rt;
6312 			RT_ADDREF(rte);
6313 			ifp_src_route_copyin(ifp, &src_rt);
6314 			src_rt.ro_rt = rte;
6315 		}
6316 	}
6317 
6318 	return src_rt.ro_rt;
6319 }
6320 
6321 struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet * ifp,struct in6_addr * src_ip6)6322 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6323 {
6324 	struct route_in6 src_rt;
6325 
6326 	ifp_src_route6_copyout(ifp, &src_rt);
6327 
6328 	if (ROUTE_UNUSABLE(&src_rt) ||
6329 	    !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6330 		ROUTE_RELEASE(&src_rt);
6331 		if (src_rt.ro_dst.sin6_family != AF_INET6) {
6332 			SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6333 			src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6334 			src_rt.ro_dst.sin6_family = AF_INET6;
6335 		}
6336 		src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6337 		bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6338 		    sizeof(src_rt.ro_dst.sin6_addr));
6339 
6340 		if (src_rt.ro_rt == NULL) {
6341 			src_rt.ro_rt = rtalloc1_scoped(
6342 				SA(&src_rt.ro_dst), 0, 0,
6343 				ifp->if_index);
6344 
6345 			if (src_rt.ro_rt != NULL) {
6346 				/* retain a ref, copyin consumes one */
6347 				struct rtentry  *rte = src_rt.ro_rt;
6348 				RT_ADDREF(rte);
6349 				ifp_src_route6_copyin(ifp, &src_rt);
6350 				src_rt.ro_rt = rte;
6351 			}
6352 		}
6353 	}
6354 
6355 	return src_rt.ro_rt;
6356 }
6357 
6358 void
if_lqm_update(struct ifnet * ifp,int lqm,int locked)6359 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6360 {
6361 	struct kev_dl_link_quality_metric_data ev_lqm_data;
6362 	uint64_t now, delta;
6363 	int8_t old_lqm;
6364 	bool need_necp_client_update;
6365 
6366 	VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6367 
6368 	/* Normalize to edge */
6369 	if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
6370 		lqm = IFNET_LQM_THRESH_ABORT;
6371 		os_atomic_or(&tcbinfo.ipi_flags, INPCBINFO_HANDLE_LQM_ABORT, relaxed);
6372 		inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
6373 	} else if (lqm > IFNET_LQM_THRESH_ABORT &&
6374 	    lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
6375 		lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
6376 	} else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
6377 	    lqm <= IFNET_LQM_THRESH_POOR) {
6378 		lqm = IFNET_LQM_THRESH_POOR;
6379 	} else if (lqm > IFNET_LQM_THRESH_POOR &&
6380 	    lqm <= IFNET_LQM_THRESH_GOOD) {
6381 		lqm = IFNET_LQM_THRESH_GOOD;
6382 	}
6383 
6384 	/*
6385 	 * Take the lock if needed
6386 	 */
6387 	if (!locked) {
6388 		ifnet_lock_exclusive(ifp);
6389 	}
6390 
6391 	if (lqm == ifp->if_interface_state.lqm_state &&
6392 	    (ifp->if_interface_state.valid_bitmask &
6393 	    IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6394 		/*
6395 		 * Release the lock if was not held by the caller
6396 		 */
6397 		if (!locked) {
6398 			ifnet_lock_done(ifp);
6399 		}
6400 		return;         /* nothing to update */
6401 	}
6402 
6403 	net_update_uptime();
6404 	now = net_uptime_ms();
6405 	ASSERT(now >= ifp->if_lqmstate_start_time);
6406 	delta = now - ifp->if_lqmstate_start_time;
6407 
6408 	old_lqm = ifp->if_interface_state.lqm_state;
6409 	switch (old_lqm) {
6410 	case IFNET_LQM_THRESH_GOOD:
6411 		ifp->if_lqm_good_time += delta;
6412 		break;
6413 	case IFNET_LQM_THRESH_POOR:
6414 		ifp->if_lqm_poor_time += delta;
6415 		break;
6416 	case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6417 		ifp->if_lqm_min_viable_time += delta;
6418 		break;
6419 	case IFNET_LQM_THRESH_BAD:
6420 		ifp->if_lqm_bad_time += delta;
6421 		break;
6422 	default:
6423 		break;
6424 	}
6425 	switch (lqm) {
6426 	case IFNET_LQM_THRESH_GOOD:
6427 		ifp->if_lqm_good_cnt += 1;
6428 		break;
6429 	case IFNET_LQM_THRESH_POOR:
6430 		ifp->if_lqm_poor_cnt += 1;
6431 		break;
6432 	case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6433 		ifp->if_lqm_min_viable_cnt += 1;
6434 		break;
6435 	case IFNET_LQM_THRESH_BAD:
6436 		ifp->if_lqm_bad_cnt += 1;
6437 		break;
6438 	default:
6439 		break;
6440 	}
6441 	ifp->if_lqmstate_start_time = now;
6442 
6443 	ifp->if_interface_state.valid_bitmask |=
6444 	    IF_INTERFACE_STATE_LQM_STATE_VALID;
6445 	ifp->if_interface_state.lqm_state = (int8_t)lqm;
6446 
6447 	/*
6448 	 * Update the link heuristics
6449 	 */
6450 	need_necp_client_update = if_update_link_heuristic(ifp);
6451 
6452 	/*
6453 	 * Don't want to hold the lock when issuing kernel events or calling NECP
6454 	 */
6455 	ifnet_lock_done(ifp);
6456 
6457 	if (need_necp_client_update) {
6458 		necp_update_all_clients_immediately_if_needed(true);
6459 	}
6460 
6461 	bzero(&ev_lqm_data, sizeof(ev_lqm_data));
6462 	ev_lqm_data.link_quality_metric = lqm;
6463 
6464 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6465 	    (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data), FALSE);
6466 
6467 	/*
6468 	 * Reacquire the lock for the caller
6469 	 */
6470 	if (locked) {
6471 		ifnet_lock_exclusive(ifp);
6472 	}
6473 }
6474 
6475 static void
if_rrc_state_update(struct ifnet * ifp,unsigned int rrc_state)6476 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6477 {
6478 	struct kev_dl_rrc_state kev;
6479 
6480 	if (rrc_state == ifp->if_interface_state.rrc_state &&
6481 	    (ifp->if_interface_state.valid_bitmask &
6482 	    IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6483 		return;
6484 	}
6485 
6486 	ifp->if_interface_state.valid_bitmask |=
6487 	    IF_INTERFACE_STATE_RRC_STATE_VALID;
6488 
6489 	ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
6490 
6491 	/*
6492 	 * Don't want to hold the lock when issuing kernel events
6493 	 */
6494 	ifnet_lock_done(ifp);
6495 
6496 	bzero(&kev, sizeof(struct kev_dl_rrc_state));
6497 	kev.rrc_state = rrc_state;
6498 
6499 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6500 	    (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state), FALSE);
6501 
6502 	ifnet_lock_exclusive(ifp);
6503 }
6504 
6505 errno_t
if_state_update(struct ifnet * ifp,struct if_interface_state * if_interface_state)6506 if_state_update(struct ifnet *ifp,
6507     struct if_interface_state *if_interface_state)
6508 {
6509 	u_short if_index_available = 0;
6510 
6511 	ifnet_lock_exclusive(ifp);
6512 
6513 	if ((ifp->if_type != IFT_CELLULAR) &&
6514 	    (if_interface_state->valid_bitmask &
6515 	    IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6516 		ifnet_lock_done(ifp);
6517 		return ENOTSUP;
6518 	}
6519 	if ((if_interface_state->valid_bitmask &
6520 	    IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6521 	    (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6522 	    if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6523 		ifnet_lock_done(ifp);
6524 		return EINVAL;
6525 	}
6526 	if ((if_interface_state->valid_bitmask &
6527 	    IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6528 	    if_interface_state->rrc_state !=
6529 	    IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6530 	    if_interface_state->rrc_state !=
6531 	    IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6532 		ifnet_lock_done(ifp);
6533 		return EINVAL;
6534 	}
6535 
6536 	if (if_interface_state->valid_bitmask &
6537 	    IF_INTERFACE_STATE_LQM_STATE_VALID) {
6538 		if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6539 	}
6540 	if (if_interface_state->valid_bitmask &
6541 	    IF_INTERFACE_STATE_RRC_STATE_VALID) {
6542 		if_rrc_state_update(ifp, if_interface_state->rrc_state);
6543 	}
6544 	if (if_interface_state->valid_bitmask &
6545 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6546 		ifp->if_interface_state.valid_bitmask |=
6547 		    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6548 		ifp->if_interface_state.interface_availability =
6549 		    if_interface_state->interface_availability;
6550 
6551 		if (ifp->if_interface_state.interface_availability ==
6552 		    IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6553 			os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
6554 			    __func__, if_name(ifp), ifp->if_index);
6555 			if_index_available = ifp->if_index;
6556 		} else {
6557 			os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
6558 			    __func__, if_name(ifp), ifp->if_index);
6559 		}
6560 	}
6561 	ifnet_lock_done(ifp);
6562 
6563 	/*
6564 	 * Check if the TCP connections going on this interface should be
6565 	 * forced to send probe packets instead of waiting for TCP timers
6566 	 * to fire. This is done on an explicit notification such as
6567 	 * SIOCSIFINTERFACESTATE which marks the interface as available.
6568 	 */
6569 	if (if_index_available > 0) {
6570 		tcp_interface_send_probe(if_index_available);
6571 	}
6572 
6573 	return 0;
6574 }
6575 
6576 void
if_get_state(struct ifnet * ifp,struct if_interface_state * if_interface_state)6577 if_get_state(struct ifnet *ifp,
6578     struct if_interface_state *if_interface_state)
6579 {
6580 	ifnet_lock_shared(ifp);
6581 
6582 	if_interface_state->valid_bitmask = 0;
6583 
6584 	if (ifp->if_interface_state.valid_bitmask &
6585 	    IF_INTERFACE_STATE_RRC_STATE_VALID) {
6586 		if_interface_state->valid_bitmask |=
6587 		    IF_INTERFACE_STATE_RRC_STATE_VALID;
6588 		if_interface_state->rrc_state =
6589 		    ifp->if_interface_state.rrc_state;
6590 	}
6591 	if (ifp->if_interface_state.valid_bitmask &
6592 	    IF_INTERFACE_STATE_LQM_STATE_VALID) {
6593 		if_interface_state->valid_bitmask |=
6594 		    IF_INTERFACE_STATE_LQM_STATE_VALID;
6595 		if_interface_state->lqm_state =
6596 		    ifp->if_interface_state.lqm_state;
6597 	}
6598 	if (ifp->if_interface_state.valid_bitmask &
6599 	    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6600 		if_interface_state->valid_bitmask |=
6601 		    IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6602 		if_interface_state->interface_availability =
6603 		    ifp->if_interface_state.interface_availability;
6604 	}
6605 
6606 	ifnet_lock_done(ifp);
6607 }
6608 
6609 errno_t
if_probe_connectivity(struct ifnet * ifp,u_int32_t conn_probe)6610 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6611 {
6612 	if (conn_probe > 1) {
6613 		return EINVAL;
6614 	}
6615 	if (conn_probe == 0) {
6616 		if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6617 	} else {
6618 		if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6619 	}
6620 
6621 #if NECP
6622 	necp_update_all_clients();
6623 #endif /* NECP */
6624 
6625 	tcp_probe_connectivity(ifp, conn_probe);
6626 	return 0;
6627 }
6628 
6629 /* for uuid.c */
6630 static int
get_ether_index(int * ret_other_index)6631 get_ether_index(int * ret_other_index)
6632 {
6633 	ifnet_ref_t ifp;
6634 	int en0_index = 0;
6635 	int other_en_index = 0;
6636 	int any_ether_index = 0;
6637 	short best_unit = 0;
6638 
6639 	*ret_other_index = 0;
6640 	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6641 		/*
6642 		 * find en0, or if not en0, the lowest unit en*, and if not
6643 		 * that, any ethernet
6644 		 */
6645 		ifnet_lock_shared(ifp);
6646 		if (strcmp(ifp->if_name, "en") == 0) {
6647 			if (ifp->if_unit == 0) {
6648 				/* found en0, we're done */
6649 				en0_index = ifp->if_index;
6650 				ifnet_lock_done(ifp);
6651 				break;
6652 			}
6653 			if (other_en_index == 0 || ifp->if_unit < best_unit) {
6654 				other_en_index = ifp->if_index;
6655 				best_unit = ifp->if_unit;
6656 			}
6657 		} else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
6658 			any_ether_index = ifp->if_index;
6659 		}
6660 		ifnet_lock_done(ifp);
6661 	}
6662 	if (en0_index == 0) {
6663 		if (other_en_index != 0) {
6664 			*ret_other_index = other_en_index;
6665 		} else if (any_ether_index != 0) {
6666 			*ret_other_index = any_ether_index;
6667 		}
6668 	}
6669 	return en0_index;
6670 }
6671 
6672 int
uuid_get_ethernet(u_int8_t * __counted_by (ETHER_ADDR_LEN)node)6673 uuid_get_ethernet(u_int8_t *__counted_by(ETHER_ADDR_LEN) node)
6674 {
6675 	static int en0_index;
6676 	ifnet_ref_t ifp;
6677 	int other_index = 0;
6678 	int the_index = 0;
6679 	int ret;
6680 
6681 	ifnet_head_lock_shared();
6682 	if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
6683 		en0_index = get_ether_index(&other_index);
6684 	}
6685 	if (en0_index != 0) {
6686 		the_index = en0_index;
6687 	} else if (other_index != 0) {
6688 		the_index = other_index;
6689 	}
6690 	if (the_index != 0) {
6691 		struct dlil_ifnet *dl_if;
6692 
6693 		ifp = ifindex2ifnet[the_index];
6694 		VERIFY(ifp != NULL);
6695 		dl_if = (struct dlil_ifnet *)ifp;
6696 		if (dl_if->dl_if_permanent_ether_is_set != 0) {
6697 			/*
6698 			 * Use the permanent ethernet address if it is
6699 			 * available because it will never change.
6700 			 */
6701 			memcpy(node, dl_if->dl_if_permanent_ether,
6702 			    ETHER_ADDR_LEN);
6703 		} else {
6704 			memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
6705 		}
6706 		ret = 0;
6707 	} else {
6708 		ret = -1;
6709 	}
6710 	ifnet_head_done();
6711 	return ret;
6712 }
6713 
6714 int
dlil_node_present(struct ifnet * ifp,struct sockaddr * sa,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6715 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6716     int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6717 {
6718 	struct kev_dl_node_presence kev;
6719 	struct sockaddr_dl *sdl;
6720 	struct sockaddr_in6 *sin6;
6721 	int ret = 0;
6722 
6723 	VERIFY(ifp);
6724 	VERIFY(sa);
6725 	VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6726 
6727 	bzero(&kev, sizeof(kev));
6728 	sin6 = &kev.sin6_node_address;
6729 	sdl = &kev.sdl_node_address;
6730 	nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6731 	kev.rssi = rssi;
6732 	kev.link_quality_metric = lqm;
6733 	kev.node_proximity_metric = npm;
6734 	bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6735 
6736 	ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6737 	if (ret == 0 || ret == EEXIST) {
6738 		int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6739 		    &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6740 		if (err != 0) {
6741 			log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
6742 			    "error %d\n", __func__, err);
6743 		}
6744 	}
6745 
6746 	if (ret == EEXIST) {
6747 		ret = 0;
6748 	}
6749 	return ret;
6750 }
6751 
6752 void
dlil_node_absent(struct ifnet * ifp,struct sockaddr * sa)6753 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
6754 {
6755 	struct kev_dl_node_absence kev = {};
6756 	struct sockaddr_in6 *kev_sin6 = NULL;
6757 	struct sockaddr_dl *kev_sdl = NULL;
6758 	int error = 0;
6759 
6760 	VERIFY(ifp != NULL);
6761 	VERIFY(sa != NULL);
6762 	VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6763 
6764 	kev_sin6 = &kev.sin6_node_address;
6765 	kev_sdl = &kev.sdl_node_address;
6766 
6767 	if (sa->sa_family == AF_INET6) {
6768 		/*
6769 		 * If IPv6 address is given, get the link layer
6770 		 * address from what was cached in the neighbor cache
6771 		 */
6772 		VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6773 		SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6774 		error = nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
6775 	} else {
6776 		/*
6777 		 * If passed address is AF_LINK type, derive the address
6778 		 * based on the link address.
6779 		 */
6780 		nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
6781 		error = nd6_alt_node_absent(ifp, kev_sin6, NULL);
6782 	}
6783 
6784 	if (error == 0) {
6785 		kev_sdl->sdl_type = ifp->if_type;
6786 		kev_sdl->sdl_index = ifp->if_index;
6787 
6788 		dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
6789 		    &kev.link_data, sizeof(kev), FALSE);
6790 	}
6791 }
6792 
6793 int
dlil_node_present_v2(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr_dl * sdl,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6794 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
6795     int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6796 {
6797 	struct kev_dl_node_presence kev = {};
6798 	struct sockaddr_dl *kev_sdl = NULL;
6799 	struct sockaddr_in6 *kev_sin6 = NULL;
6800 	int ret = 0;
6801 
6802 	VERIFY(ifp != NULL);
6803 	VERIFY(sa != NULL && sdl != NULL);
6804 	VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
6805 
6806 	kev_sin6 = &kev.sin6_node_address;
6807 	kev_sdl = &kev.sdl_node_address;
6808 
6809 	VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
6810 	SOCKADDR_COPY(sdl, kev_sdl, sdl->sdl_len);
6811 	kev_sdl->sdl_type = ifp->if_type;
6812 	kev_sdl->sdl_index = ifp->if_index;
6813 
6814 	VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6815 	SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6816 
6817 	kev.rssi = rssi;
6818 	kev.link_quality_metric = lqm;
6819 	kev.node_proximity_metric = npm;
6820 	bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6821 
6822 	ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
6823 	if (ret == 0 || ret == EEXIST) {
6824 		int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6825 		    &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6826 		if (err != 0) {
6827 			log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
6828 		}
6829 	}
6830 
6831 	if (ret == EEXIST) {
6832 		ret = 0;
6833 	}
6834 	return ret;
6835 }
6836 
6837 const void *
dlil_ifaddr_bytes(const struct sockaddr_dl * sdl,size_t * sizep,kauth_cred_t * credp)6838 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
6839     kauth_cred_t *credp)
6840 {
6841 	const u_int8_t *bytes;
6842 	size_t size;
6843 
6844 	bytes = CONST_LLADDR(sdl);
6845 	size = sdl->sdl_alen;
6846 
6847 #if CONFIG_MACF
6848 	if (dlil_lladdr_ckreq) {
6849 		switch (sdl->sdl_type) {
6850 		case IFT_ETHER:
6851 		case IFT_IEEE1394:
6852 			break;
6853 		default:
6854 			credp = NULL;
6855 			break;
6856 		}
6857 		;
6858 
6859 		if (credp && mac_system_check_info(*credp, "net.link.addr")) {
6860 			static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
6861 				[0] = 2
6862 			};
6863 
6864 			bytes = unspec;
6865 		}
6866 	}
6867 #else
6868 #pragma unused(credp)
6869 #endif
6870 
6871 	if (sizep != NULL) {
6872 		*sizep = size;
6873 	}
6874 	return bytes;
6875 }
6876 
6877 void
dlil_report_issues(struct ifnet * ifp,u_int8_t modid[DLIL_MODIDLEN],u_int8_t info[DLIL_MODARGLEN])6878 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
6879     u_int8_t info[DLIL_MODARGLEN])
6880 {
6881 	struct kev_dl_issues kev;
6882 	struct timeval tv;
6883 
6884 	VERIFY(ifp != NULL);
6885 	VERIFY(modid != NULL);
6886 	_CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
6887 	_CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
6888 
6889 	bzero(&kev, sizeof(kev));
6890 
6891 	microtime(&tv);
6892 	kev.timestamp = tv.tv_sec;
6893 	bcopy(modid, &kev.modid, DLIL_MODIDLEN);
6894 	if (info != NULL) {
6895 		bcopy(info, &kev.info, DLIL_MODARGLEN);
6896 	}
6897 
6898 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
6899 	    &kev.link_data, sizeof(kev), FALSE);
6900 }
6901 
6902 errno_t
ifnet_getset_opportunistic(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6903 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6904     struct proc *p)
6905 {
6906 	u_int32_t level = IFNET_THROTTLE_OFF;
6907 	errno_t result = 0;
6908 
6909 	VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
6910 
6911 	if (cmd == SIOCSIFOPPORTUNISTIC) {
6912 		/*
6913 		 * XXX: Use priv_check_cred() instead of root check?
6914 		 */
6915 		if ((result = proc_suser(p)) != 0) {
6916 			return result;
6917 		}
6918 
6919 		if (ifr->ifr_opportunistic.ifo_flags ==
6920 		    IFRIFOF_BLOCK_OPPORTUNISTIC) {
6921 			level = IFNET_THROTTLE_OPPORTUNISTIC;
6922 		} else if (ifr->ifr_opportunistic.ifo_flags == 0) {
6923 			level = IFNET_THROTTLE_OFF;
6924 		} else {
6925 			result = EINVAL;
6926 		}
6927 
6928 		if (result == 0) {
6929 			result = ifnet_set_throttle(ifp, level);
6930 		}
6931 	} else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
6932 		ifr->ifr_opportunistic.ifo_flags = 0;
6933 		if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
6934 			ifr->ifr_opportunistic.ifo_flags |=
6935 			    IFRIFOF_BLOCK_OPPORTUNISTIC;
6936 		}
6937 	}
6938 
6939 	/*
6940 	 * Return the count of current opportunistic connections
6941 	 * over the interface.
6942 	 */
6943 	if (result == 0) {
6944 		uint32_t flags = 0;
6945 		flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
6946 		    INPCB_OPPORTUNISTIC_SETCMD : 0;
6947 		flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
6948 		    INPCB_OPPORTUNISTIC_THROTTLEON : 0;
6949 		ifr->ifr_opportunistic.ifo_inuse =
6950 		    udp_count_opportunistic(ifp->if_index, flags) +
6951 		    tcp_count_opportunistic(ifp->if_index, flags);
6952 	}
6953 
6954 	if (result == EALREADY) {
6955 		result = 0;
6956 	}
6957 
6958 	return result;
6959 }
6960 
6961 int
ifnet_get_throttle(struct ifnet * ifp,u_int32_t * level)6962 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
6963 {
6964 	struct ifclassq *ifq;
6965 	int err = 0;
6966 
6967 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
6968 		return ENXIO;
6969 	}
6970 
6971 	*level = IFNET_THROTTLE_OFF;
6972 
6973 	ifq = ifp->if_snd;
6974 	IFCQ_LOCK(ifq);
6975 	/* Throttling works only for IFCQ, not ALTQ instances */
6976 	if (IFCQ_IS_ENABLED(ifq)) {
6977 		cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
6978 
6979 		err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
6980 		*level = req.level;
6981 	}
6982 	IFCQ_UNLOCK(ifq);
6983 
6984 	return err;
6985 }
6986 
6987 int
ifnet_set_throttle(struct ifnet * ifp,u_int32_t level)6988 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
6989 {
6990 	struct ifclassq *ifq;
6991 	int err = 0;
6992 
6993 	if (!(ifp->if_eflags & IFEF_TXSTART)) {
6994 		return ENXIO;
6995 	}
6996 
6997 	ifq = ifp->if_snd;
6998 
6999 	switch (level) {
7000 	case IFNET_THROTTLE_OFF:
7001 	case IFNET_THROTTLE_OPPORTUNISTIC:
7002 		break;
7003 	default:
7004 		return EINVAL;
7005 	}
7006 
7007 	IFCQ_LOCK(ifq);
7008 	if (IFCQ_IS_ENABLED(ifq)) {
7009 		cqrq_throttle_t req = { 1, level };
7010 
7011 		err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
7012 	}
7013 	IFCQ_UNLOCK(ifq);
7014 
7015 	if (err == 0) {
7016 		DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
7017 		    level);
7018 #if NECP
7019 		necp_update_all_clients();
7020 #endif /* NECP */
7021 		if (level == IFNET_THROTTLE_OFF) {
7022 			ifnet_start(ifp);
7023 		}
7024 	}
7025 
7026 	return err;
7027 }
7028 
7029 errno_t
ifnet_getset_log(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)7030 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7031     struct proc *p)
7032 {
7033 #pragma unused(p)
7034 	errno_t result = 0;
7035 	uint32_t flags;
7036 	int level, category, subcategory;
7037 
7038 	VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7039 
7040 	if (cmd == SIOCSIFLOG) {
7041 		if ((result = priv_check_cred(kauth_cred_get(),
7042 		    PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
7043 			return result;
7044 		}
7045 
7046 		level = ifr->ifr_log.ifl_level;
7047 		if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
7048 			result = EINVAL;
7049 		}
7050 
7051 		flags = ifr->ifr_log.ifl_flags;
7052 		if ((flags &= IFNET_LOGF_MASK) == 0) {
7053 			result = EINVAL;
7054 		}
7055 
7056 		category = ifr->ifr_log.ifl_category;
7057 		subcategory = ifr->ifr_log.ifl_subcategory;
7058 
7059 		if (result == 0) {
7060 			result = ifnet_set_log(ifp, level, flags,
7061 			    category, subcategory);
7062 		}
7063 	} else {
7064 		result = ifnet_get_log(ifp, &level, &flags, &category,
7065 		    &subcategory);
7066 		if (result == 0) {
7067 			ifr->ifr_log.ifl_level = level;
7068 			ifr->ifr_log.ifl_flags = flags;
7069 			ifr->ifr_log.ifl_category = category;
7070 			ifr->ifr_log.ifl_subcategory = subcategory;
7071 		}
7072 	}
7073 
7074 	return result;
7075 }
7076 
7077 int
ifnet_set_log(struct ifnet * ifp,int32_t level,uint32_t flags,int32_t category,int32_t subcategory)7078 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7079     int32_t category, int32_t subcategory)
7080 {
7081 	int err = 0;
7082 
7083 	VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7084 	VERIFY(flags & IFNET_LOGF_MASK);
7085 
7086 	/*
7087 	 * The logging level applies to all facilities; make sure to
7088 	 * update them all with the most current level.
7089 	 */
7090 	flags |= ifp->if_log.flags;
7091 
7092 	if (ifp->if_output_ctl != NULL) {
7093 		struct ifnet_log_params l;
7094 
7095 		bzero(&l, sizeof(l));
7096 		l.level = level;
7097 		l.flags = flags;
7098 		l.flags &= ~IFNET_LOGF_DLIL;
7099 		l.category = category;
7100 		l.subcategory = subcategory;
7101 
7102 		/* Send this request to lower layers */
7103 		if (l.flags != 0) {
7104 			err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7105 			    sizeof(l), &l);
7106 		}
7107 	} else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7108 		/*
7109 		 * If targeted to the lower layers without an output
7110 		 * control callback registered on the interface, just
7111 		 * silently ignore facilities other than ours.
7112 		 */
7113 		flags &= IFNET_LOGF_DLIL;
7114 		if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
7115 			level = 0;
7116 		}
7117 	}
7118 
7119 	if (err == 0) {
7120 		if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
7121 			ifp->if_log.flags = 0;
7122 		} else {
7123 			ifp->if_log.flags |= flags;
7124 		}
7125 
7126 		log(LOG_INFO, "%s: logging level set to %d flags=0x%x "
7127 		    "arg=0x%x, category=%d subcategory=%d\n", if_name(ifp),
7128 		    ifp->if_log.level, ifp->if_log.flags, flags,
7129 		    category, subcategory);
7130 	}
7131 
7132 	return err;
7133 }
7134 
7135 int
ifnet_get_log(struct ifnet * ifp,int32_t * level,uint32_t * flags,int32_t * category,int32_t * subcategory)7136 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7137     int32_t *category, int32_t *subcategory)
7138 {
7139 	if (level != NULL) {
7140 		*level = ifp->if_log.level;
7141 	}
7142 	if (flags != NULL) {
7143 		*flags = ifp->if_log.flags;
7144 	}
7145 	if (category != NULL) {
7146 		*category = ifp->if_log.category;
7147 	}
7148 	if (subcategory != NULL) {
7149 		*subcategory = ifp->if_log.subcategory;
7150 	}
7151 
7152 	return 0;
7153 }
7154 
7155 int
ifnet_notify_address(struct ifnet * ifp,int af)7156 ifnet_notify_address(struct ifnet *ifp, int af)
7157 {
7158 	struct ifnet_notify_address_params na;
7159 
7160 #if PF
7161 	(void) pf_ifaddr_hook(ifp);
7162 #endif /* PF */
7163 
7164 	if (ifp->if_output_ctl == NULL) {
7165 		return EOPNOTSUPP;
7166 	}
7167 
7168 	bzero(&na, sizeof(na));
7169 	na.address_family = (sa_family_t)af;
7170 
7171 	return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7172 	           sizeof(na), &na);
7173 }
7174 
7175 errno_t
ifnet_flowid(struct ifnet * ifp,uint32_t * flowid)7176 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7177 {
7178 	if (ifp == NULL || flowid == NULL) {
7179 		return EINVAL;
7180 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7181 	    !IF_FULLY_ATTACHED(ifp)) {
7182 		return ENXIO;
7183 	}
7184 
7185 	*flowid = ifp->if_flowhash;
7186 
7187 	return 0;
7188 }
7189 
7190 errno_t
ifnet_disable_output(struct ifnet * ifp)7191 ifnet_disable_output(struct ifnet *ifp)
7192 {
7193 	int err = 0;
7194 
7195 	if (ifp == NULL) {
7196 		return EINVAL;
7197 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7198 	    !IF_FULLY_ATTACHED(ifp)) {
7199 		return ENXIO;
7200 	}
7201 
7202 	lck_mtx_lock(&ifp->if_start_lock);
7203 	if (ifp->if_start_flags & IFSF_FLOW_RESUME_PENDING) {
7204 		ifp->if_start_flags &= ~(IFSF_FLOW_RESUME_PENDING | IFSF_FLOW_CONTROLLED);
7205 	} else if ((err = ifnet_fc_add(ifp)) == 0) {
7206 		ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7207 	}
7208 	lck_mtx_unlock(&ifp->if_start_lock);
7209 
7210 	return err;
7211 }
7212 
7213 errno_t
ifnet_enable_output(struct ifnet * ifp)7214 ifnet_enable_output(struct ifnet *ifp)
7215 {
7216 	if (ifp == NULL) {
7217 		return EINVAL;
7218 	} else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7219 	    !IF_FULLY_ATTACHED(ifp)) {
7220 		return ENXIO;
7221 	}
7222 
7223 	ifnet_start_common(ifp, TRUE, FALSE);
7224 	return 0;
7225 }
7226 
7227 void
ifnet_flowadv(uint32_t flowhash)7228 ifnet_flowadv(uint32_t flowhash)
7229 {
7230 	struct ifnet_fc_entry *ifce;
7231 	ifnet_ref_t ifp;
7232 
7233 	ifce = ifnet_fc_get(flowhash);
7234 	if (ifce == NULL) {
7235 		return;
7236 	}
7237 
7238 	VERIFY(ifce->ifce_ifp != NULL);
7239 	ifp = ifce->ifce_ifp;
7240 
7241 	/* flow hash gets recalculated per attach, so check */
7242 	if (ifnet_is_attached(ifp, 1)) {
7243 		if (ifp->if_flowhash == flowhash) {
7244 			lck_mtx_lock_spin(&ifp->if_start_lock);
7245 			if ((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) == 0) {
7246 				ifp->if_start_flags |= IFSF_FLOW_RESUME_PENDING;
7247 			}
7248 			lck_mtx_unlock(&ifp->if_start_lock);
7249 			(void) ifnet_enable_output(ifp);
7250 		}
7251 		ifnet_decr_iorefcnt(ifp);
7252 	}
7253 	ifnet_fc_entry_free(ifce);
7254 }
7255 
7256 /*
7257  * Function to compare ifnet_fc_entries in ifnet flow control tree
7258  */
7259 static inline int
ifce_cmp(const struct ifnet_fc_entry * fc1,const struct ifnet_fc_entry * fc2)7260 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7261 {
7262 	return fc1->ifce_flowhash - fc2->ifce_flowhash;
7263 }
7264 
7265 static int
ifnet_fc_add(struct ifnet * ifp)7266 ifnet_fc_add(struct ifnet *ifp)
7267 {
7268 	struct ifnet_fc_entry keyfc, *ifce;
7269 	uint32_t flowhash;
7270 
7271 	VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7272 	VERIFY(ifp->if_flowhash != 0);
7273 	flowhash = ifp->if_flowhash;
7274 
7275 	bzero(&keyfc, sizeof(keyfc));
7276 	keyfc.ifce_flowhash = flowhash;
7277 
7278 	lck_mtx_lock_spin(&ifnet_fc_lock);
7279 	ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7280 	if (ifce != NULL && ifce->ifce_ifp == ifp) {
7281 		/* Entry is already in ifnet_fc_tree, return */
7282 		lck_mtx_unlock(&ifnet_fc_lock);
7283 		return 0;
7284 	}
7285 
7286 	if (ifce != NULL) {
7287 		/*
7288 		 * There is a different fc entry with the same flow hash
7289 		 * but different ifp pointer.  There can be a collision
7290 		 * on flow hash but the probability is low.  Let's just
7291 		 * avoid adding a second one when there is a collision.
7292 		 */
7293 		lck_mtx_unlock(&ifnet_fc_lock);
7294 		return EAGAIN;
7295 	}
7296 
7297 	/* become regular mutex */
7298 	lck_mtx_convert_spin(&ifnet_fc_lock);
7299 
7300 	ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
7301 	ifce->ifce_flowhash = flowhash;
7302 	ifce->ifce_ifp = ifp;
7303 
7304 	RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7305 	lck_mtx_unlock(&ifnet_fc_lock);
7306 	return 0;
7307 }
7308 
7309 static struct ifnet_fc_entry *
ifnet_fc_get(uint32_t flowhash)7310 ifnet_fc_get(uint32_t flowhash)
7311 {
7312 	struct ifnet_fc_entry keyfc, *ifce;
7313 	ifnet_ref_t ifp;
7314 
7315 	bzero(&keyfc, sizeof(keyfc));
7316 	keyfc.ifce_flowhash = flowhash;
7317 
7318 	lck_mtx_lock_spin(&ifnet_fc_lock);
7319 	ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7320 	if (ifce == NULL) {
7321 		/* Entry is not present in ifnet_fc_tree, return */
7322 		lck_mtx_unlock(&ifnet_fc_lock);
7323 		return NULL;
7324 	}
7325 
7326 	RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7327 
7328 	VERIFY(ifce->ifce_ifp != NULL);
7329 	ifp = ifce->ifce_ifp;
7330 
7331 	/* become regular mutex */
7332 	lck_mtx_convert_spin(&ifnet_fc_lock);
7333 
7334 	if (!ifnet_is_attached(ifp, 0)) {
7335 		/*
7336 		 * This ifp is not attached or in the process of being
7337 		 * detached; just don't process it.
7338 		 */
7339 		ifnet_fc_entry_free(ifce);
7340 		ifce = NULL;
7341 	}
7342 	lck_mtx_unlock(&ifnet_fc_lock);
7343 
7344 	return ifce;
7345 }
7346 
7347 static void
ifnet_fc_entry_free(struct ifnet_fc_entry * ifce)7348 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7349 {
7350 	zfree(ifnet_fc_zone, ifce);
7351 }
7352 
7353 static uint32_t
ifnet_calc_flowhash(struct ifnet * ifp)7354 ifnet_calc_flowhash(struct ifnet *ifp)
7355 {
7356 	struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7357 	uint32_t flowhash = 0;
7358 
7359 	if (ifnet_flowhash_seed == 0) {
7360 		ifnet_flowhash_seed = RandomULong();
7361 	}
7362 
7363 	bzero(&fh, sizeof(fh));
7364 
7365 	(void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
7366 	fh.ifk_unit = ifp->if_unit;
7367 	fh.ifk_flags = ifp->if_flags;
7368 	fh.ifk_eflags = ifp->if_eflags;
7369 	fh.ifk_capabilities = ifp->if_capabilities;
7370 	fh.ifk_capenable = ifp->if_capenable;
7371 	fh.ifk_output_sched_model = ifp->if_output_sched_model;
7372 	fh.ifk_rand1 = RandomULong();
7373 	fh.ifk_rand2 = RandomULong();
7374 
7375 try_again:
7376 	flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
7377 	if (flowhash == 0) {
7378 		/* try to get a non-zero flowhash */
7379 		ifnet_flowhash_seed = RandomULong();
7380 		goto try_again;
7381 	}
7382 
7383 	return flowhash;
7384 }
7385 
7386 int
ifnet_set_netsignature(struct ifnet * ifp,uint8_t family,uint8_t len,uint16_t flags,uint8_t * __sized_by (len)data)7387 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7388     uint16_t flags, uint8_t *__sized_by(len) data)
7389 {
7390 #pragma unused(flags)
7391 	int error = 0;
7392 
7393 	switch (family) {
7394 	case AF_INET:
7395 		if_inetdata_lock_exclusive(ifp);
7396 		if (IN_IFEXTRA(ifp) != NULL) {
7397 			if (len == 0) {
7398 				/* Allow clearing the signature */
7399 				IN_IFEXTRA(ifp)->netsig_len = 0;
7400 				bzero(IN_IFEXTRA(ifp)->netsig,
7401 				    sizeof(IN_IFEXTRA(ifp)->netsig));
7402 				if_inetdata_lock_done(ifp);
7403 				break;
7404 			} else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
7405 				error = EINVAL;
7406 				if_inetdata_lock_done(ifp);
7407 				break;
7408 			}
7409 			IN_IFEXTRA(ifp)->netsig_len = len;
7410 			bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7411 		} else {
7412 			error = ENOMEM;
7413 		}
7414 		if_inetdata_lock_done(ifp);
7415 		break;
7416 
7417 	case AF_INET6:
7418 		if_inet6data_lock_exclusive(ifp);
7419 		if (IN6_IFEXTRA(ifp) != NULL) {
7420 			if (len == 0) {
7421 				/* Allow clearing the signature */
7422 				IN6_IFEXTRA(ifp)->netsig_len = 0;
7423 				bzero(IN6_IFEXTRA(ifp)->netsig,
7424 				    sizeof(IN6_IFEXTRA(ifp)->netsig));
7425 				if_inet6data_lock_done(ifp);
7426 				break;
7427 			} else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
7428 				error = EINVAL;
7429 				if_inet6data_lock_done(ifp);
7430 				break;
7431 			}
7432 			IN6_IFEXTRA(ifp)->netsig_len = len;
7433 			bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7434 		} else {
7435 			error = ENOMEM;
7436 		}
7437 		if_inet6data_lock_done(ifp);
7438 		break;
7439 
7440 	default:
7441 		error = EINVAL;
7442 		break;
7443 	}
7444 
7445 	return error;
7446 }
7447 
7448 int
ifnet_get_netsignature(struct ifnet * ifp,uint8_t family,uint8_t * len,uint16_t * flags,uint8_t * __sized_by (* len)data)7449 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7450     uint16_t *flags, uint8_t *__sized_by(*len) data)
7451 {
7452 	int error = 0;
7453 
7454 	if (ifp == NULL || len == NULL || data == NULL) {
7455 		return EINVAL;
7456 	}
7457 
7458 	switch (family) {
7459 	case AF_INET:
7460 		if_inetdata_lock_shared(ifp);
7461 		if (IN_IFEXTRA(ifp) != NULL) {
7462 			if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7463 				error = EINVAL;
7464 				if_inetdata_lock_done(ifp);
7465 				break;
7466 			}
7467 			if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
7468 				bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7469 			} else {
7470 				error = ENOENT;
7471 			}
7472 		} else {
7473 			error = ENOMEM;
7474 		}
7475 		if_inetdata_lock_done(ifp);
7476 		break;
7477 
7478 	case AF_INET6:
7479 		if_inet6data_lock_shared(ifp);
7480 		if (IN6_IFEXTRA(ifp) != NULL) {
7481 			if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7482 				error = EINVAL;
7483 				if_inet6data_lock_done(ifp);
7484 				break;
7485 			}
7486 			if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
7487 				bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7488 			} else {
7489 				error = ENOENT;
7490 			}
7491 		} else {
7492 			error = ENOMEM;
7493 		}
7494 		if_inet6data_lock_done(ifp);
7495 		break;
7496 
7497 	default:
7498 		error = EINVAL;
7499 		break;
7500 	}
7501 
7502 	if (error == 0 && flags != NULL) {
7503 		*flags = 0;
7504 	}
7505 
7506 	return error;
7507 }
7508 
7509 int
ifnet_set_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7510 ifnet_set_nat64prefix(struct ifnet *ifp,
7511     struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7512 {
7513 	int i, error = 0, one_set = 0;
7514 
7515 	if_inet6data_lock_exclusive(ifp);
7516 
7517 	if (IN6_IFEXTRA(ifp) == NULL) {
7518 		error = ENOMEM;
7519 		goto out;
7520 	}
7521 
7522 	for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7523 		uint32_t prefix_len =
7524 		    prefixes[i].prefix_len;
7525 		struct in6_addr *prefix =
7526 		    &prefixes[i].ipv6_prefix;
7527 
7528 		if (prefix_len == 0) {
7529 			clat_log0((LOG_DEBUG,
7530 			    "NAT64 prefixes purged from Interface %s\n",
7531 			    if_name(ifp)));
7532 			/* Allow clearing the signature */
7533 			IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
7534 			bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7535 			    sizeof(struct in6_addr));
7536 
7537 			continue;
7538 		} else if (prefix_len != NAT64_PREFIX_LEN_32 &&
7539 		    prefix_len != NAT64_PREFIX_LEN_40 &&
7540 		    prefix_len != NAT64_PREFIX_LEN_48 &&
7541 		    prefix_len != NAT64_PREFIX_LEN_56 &&
7542 		    prefix_len != NAT64_PREFIX_LEN_64 &&
7543 		    prefix_len != NAT64_PREFIX_LEN_96) {
7544 			clat_log0((LOG_DEBUG,
7545 			    "NAT64 prefixlen is incorrect %d\n", prefix_len));
7546 			error = EINVAL;
7547 			goto out;
7548 		}
7549 
7550 		if (IN6_IS_SCOPE_EMBED(prefix)) {
7551 			clat_log0((LOG_DEBUG,
7552 			    "NAT64 prefix has interface/link local scope.\n"));
7553 			error = EINVAL;
7554 			goto out;
7555 		}
7556 
7557 		IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
7558 		bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7559 		    sizeof(struct in6_addr));
7560 		clat_log0((LOG_DEBUG,
7561 		    "NAT64 prefix set to %s with prefixlen: %d\n",
7562 		    ip6_sprintf(prefix), prefix_len));
7563 		one_set = 1;
7564 	}
7565 
7566 out:
7567 	if_inet6data_lock_done(ifp);
7568 
7569 	if (error == 0 && one_set != 0) {
7570 		necp_update_all_clients();
7571 	}
7572 
7573 	return error;
7574 }
7575 
7576 int
ifnet_get_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7577 ifnet_get_nat64prefix(struct ifnet *ifp,
7578     struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7579 {
7580 	int i, found_one = 0, error = 0;
7581 
7582 	if (ifp == NULL) {
7583 		return EINVAL;
7584 	}
7585 
7586 	if_inet6data_lock_shared(ifp);
7587 
7588 	if (IN6_IFEXTRA(ifp) == NULL) {
7589 		error = ENOMEM;
7590 		goto out;
7591 	}
7592 
7593 	for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7594 		if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
7595 			found_one = 1;
7596 		}
7597 	}
7598 
7599 	if (found_one == 0) {
7600 		error = ENOENT;
7601 		goto out;
7602 	}
7603 
7604 	if (prefixes) {
7605 		bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
7606 		    sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
7607 	}
7608 
7609 out:
7610 	if_inet6data_lock_done(ifp);
7611 
7612 	return error;
7613 }
7614 
7615 #if DEBUG || DEVELOPMENT
7616 /* Blob for sum16 verification */
7617 static uint8_t sumdata[] = {
7618 	0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7619 	0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7620 	0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7621 	0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7622 	0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7623 	0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7624 	0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7625 	0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7626 	0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7627 	0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7628 	0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7629 	0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7630 	0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7631 	0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7632 	0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7633 	0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7634 	0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7635 	0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7636 	0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7637 	0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7638 	0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7639 	0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7640 	0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7641 	0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7642 	0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7643 	0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7644 	0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7645 	0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7646 	0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7647 	0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7648 	0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7649 	0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7650 	0xc8, 0x28, 0x02, 0x00, 0x00
7651 };
7652 
7653 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7654 static struct {
7655 	boolean_t       init;
7656 	uint16_t        len;
7657 	uint16_t        sumr;   /* reference */
7658 	uint16_t        sumrp;  /* reference, precomputed */
7659 } sumtbl[] = {
7660 	{ FALSE, 0, 0, 0x0000 },
7661 	{ FALSE, 1, 0, 0x001f },
7662 	{ FALSE, 2, 0, 0x8b1f },
7663 	{ FALSE, 3, 0, 0x8b27 },
7664 	{ FALSE, 7, 0, 0x790e },
7665 	{ FALSE, 11, 0, 0xcb6d },
7666 	{ FALSE, 20, 0, 0x20dd },
7667 	{ FALSE, 27, 0, 0xbabd },
7668 	{ FALSE, 32, 0, 0xf3e8 },
7669 	{ FALSE, 37, 0, 0x197d },
7670 	{ FALSE, 43, 0, 0x9eae },
7671 	{ FALSE, 64, 0, 0x4678 },
7672 	{ FALSE, 127, 0, 0x9399 },
7673 	{ FALSE, 256, 0, 0xd147 },
7674 	{ FALSE, 325, 0, 0x0358 },
7675 };
7676 #define SUMTBL_MAX      ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7677 
7678 static void
dlil_verify_sum16(void)7679 dlil_verify_sum16(void)
7680 {
7681 	struct mbuf *m;
7682 	uint8_t *buf;
7683 	int n;
7684 
7685 	/* Make sure test data plus extra room for alignment fits in cluster */
7686 	_CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
7687 
7688 	kprintf("DLIL: running SUM16 self-tests ... ");
7689 
7690 	m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7691 	m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
7692 
7693 	buf = mtod(m, uint8_t *);               /* base address */
7694 
7695 	for (n = 0; n < SUMTBL_MAX; n++) {
7696 		uint16_t len = sumtbl[n].len;
7697 		int i;
7698 
7699 		/* Verify for all possible alignments */
7700 		for (i = 0; i < (int)sizeof(uint64_t); i++) {
7701 			uint16_t sum, sumr;
7702 			uint8_t *c;
7703 
7704 			/* Copy over test data to mbuf */
7705 			VERIFY(len <= sizeof(sumdata));
7706 			c = buf + i;
7707 			bcopy(sumdata, c, len);
7708 
7709 			/* Zero-offset test (align by data pointer) */
7710 			m->m_data = (uintptr_t)c;
7711 			m->m_len = len;
7712 			sum = m_sum16(m, 0, len);
7713 
7714 			if (!sumtbl[n].init) {
7715 				sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
7716 				sumtbl[n].sumr = sumr;
7717 				sumtbl[n].init = TRUE;
7718 			} else {
7719 				sumr = sumtbl[n].sumr;
7720 			}
7721 
7722 			/* Something is horribly broken; stop now */
7723 			if (sumr != sumtbl[n].sumrp) {
7724 				panic_plain("\n%s: broken in_cksum_mbuf_ref() "
7725 				    "for len=%d align=%d sum=0x%04x "
7726 				    "[expected=0x%04x]\n", __func__,
7727 				    len, i, sum, sumr);
7728 				/* NOTREACHED */
7729 			} else if (sum != sumr) {
7730 				panic_plain("\n%s: broken m_sum16() for len=%d "
7731 				    "align=%d sum=0x%04x [expected=0x%04x]\n",
7732 				    __func__, len, i, sum, sumr);
7733 				/* NOTREACHED */
7734 			}
7735 
7736 			/* Alignment test by offset (fixed data pointer) */
7737 			m->m_data = (uintptr_t)buf;
7738 			m->m_len = i + len;
7739 			sum = m_sum16(m, i, len);
7740 
7741 			/* Something is horribly broken; stop now */
7742 			if (sum != sumr) {
7743 				panic_plain("\n%s: broken m_sum16() for len=%d "
7744 				    "offset=%d sum=0x%04x [expected=0x%04x]\n",
7745 				    __func__, len, i, sum, sumr);
7746 				/* NOTREACHED */
7747 			}
7748 #if INET
7749 			/* Simple sum16 contiguous buffer test by aligment */
7750 			sum = b_sum16(c, len);
7751 
7752 			/* Something is horribly broken; stop now */
7753 			if (sum != sumr) {
7754 				panic_plain("\n%s: broken b_sum16() for len=%d "
7755 				    "align=%d sum=0x%04x [expected=0x%04x]\n",
7756 				    __func__, len, i, sum, sumr);
7757 				/* NOTREACHED */
7758 			}
7759 #endif /* INET */
7760 		}
7761 	}
7762 	m_freem(m);
7763 
7764 	kprintf("PASSED\n");
7765 }
7766 #endif /* DEBUG || DEVELOPMENT */
7767 
7768 #define CASE_STRINGIFY(x) case x: return #x
7769 
7770 __private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)7771 dlil_kev_dl_code_str(u_int32_t event_code)
7772 {
7773 	switch (event_code) {
7774 		CASE_STRINGIFY(KEV_DL_SIFFLAGS);
7775 		CASE_STRINGIFY(KEV_DL_SIFMETRICS);
7776 		CASE_STRINGIFY(KEV_DL_SIFMTU);
7777 		CASE_STRINGIFY(KEV_DL_SIFPHYS);
7778 		CASE_STRINGIFY(KEV_DL_SIFMEDIA);
7779 		CASE_STRINGIFY(KEV_DL_SIFGENERIC);
7780 		CASE_STRINGIFY(KEV_DL_ADDMULTI);
7781 		CASE_STRINGIFY(KEV_DL_DELMULTI);
7782 		CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
7783 		CASE_STRINGIFY(KEV_DL_IF_DETACHING);
7784 		CASE_STRINGIFY(KEV_DL_IF_DETACHED);
7785 		CASE_STRINGIFY(KEV_DL_LINK_OFF);
7786 		CASE_STRINGIFY(KEV_DL_LINK_ON);
7787 		CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
7788 		CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
7789 		CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
7790 		CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
7791 		CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
7792 		CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
7793 		CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
7794 		CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
7795 		CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
7796 		CASE_STRINGIFY(KEV_DL_PRIMARY_ELECTED);
7797 		CASE_STRINGIFY(KEV_DL_ISSUES);
7798 		CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
7799 	default:
7800 		break;
7801 	}
7802 	return "";
7803 }
7804 
7805 void
dlil_dt_tcall_fn(thread_call_param_t arg0,thread_call_param_t arg1)7806 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
7807 {
7808 #pragma unused(arg1)
7809 	ifnet_ref_t ifp = arg0;
7810 
7811 	if (ifnet_is_attached(ifp, 1)) {
7812 		nstat_ifnet_threshold_reached(ifp->if_index);
7813 		ifnet_decr_iorefcnt(ifp);
7814 	}
7815 }
7816 
7817 void
ifnet_notify_data_threshold(struct ifnet * ifp)7818 ifnet_notify_data_threshold(struct ifnet *ifp)
7819 {
7820 	uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
7821 	uint64_t oldbytes = ifp->if_dt_bytes;
7822 
7823 	ASSERT(ifp->if_dt_tcall != NULL);
7824 
7825 	/*
7826 	 * If we went over the threshold, notify NetworkStatistics.
7827 	 * We rate-limit it based on the threshold interval value.
7828 	 */
7829 	if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
7830 	    OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
7831 	    !thread_call_isactive(ifp->if_dt_tcall)) {
7832 		uint64_t tival = (threshold_interval * NSEC_PER_SEC);
7833 		uint64_t now = mach_absolute_time(), deadline = now;
7834 		uint64_t ival;
7835 
7836 		if (tival != 0) {
7837 			nanoseconds_to_absolutetime(tival, &ival);
7838 			clock_deadline_for_periodic_event(ival, now, &deadline);
7839 			(void) thread_call_enter_delayed(ifp->if_dt_tcall,
7840 			    deadline);
7841 		} else {
7842 			(void) thread_call_enter(ifp->if_dt_tcall);
7843 		}
7844 	}
7845 }
7846 
7847 
7848 void
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow * ifs,struct ifnet * ifp)7849 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
7850     struct ifnet *ifp)
7851 {
7852 	tcp_update_stats_per_flow(ifs, ifp);
7853 }
7854 
7855 static inline u_int32_t
_set_flags(u_int32_t * flags_p,u_int32_t set_flags)7856 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
7857 {
7858 	return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
7859 }
7860 
7861 static inline u_int32_t
_clear_flags(u_int32_t * flags_p,u_int32_t clear_flags)7862 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
7863 {
7864 	return (u_int32_t)OSBitAndAtomic(~clear_flags, flags_p);
7865 }
7866 
7867 __private_extern__ u_int32_t
if_set_eflags(ifnet_t interface,u_int32_t set_flags)7868 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
7869 {
7870 	return _set_flags(&interface->if_eflags, set_flags);
7871 }
7872 
7873 __private_extern__ void
if_clear_eflags(ifnet_t interface,u_int32_t clear_flags)7874 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
7875 {
7876 	_clear_flags(&interface->if_eflags, clear_flags);
7877 }
7878 
7879 __private_extern__ u_int32_t
if_set_xflags(ifnet_t interface,u_int32_t set_flags)7880 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
7881 {
7882 	return _set_flags(&interface->if_xflags, set_flags);
7883 }
7884 
7885 __private_extern__ u_int32_t
if_clear_xflags(ifnet_t interface,u_int32_t clear_flags)7886 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
7887 {
7888 	return _clear_flags(&interface->if_xflags, clear_flags);
7889 }
7890 
7891 __private_extern__ void
ifnet_update_traffic_rule_genid(ifnet_t ifp)7892 ifnet_update_traffic_rule_genid(ifnet_t ifp)
7893 {
7894 	os_atomic_inc(&ifp->if_traffic_rule_genid, relaxed);
7895 }
7896 
7897 __private_extern__ boolean_t
ifnet_sync_traffic_rule_genid(ifnet_t ifp,uint32_t * genid)7898 ifnet_sync_traffic_rule_genid(ifnet_t ifp, uint32_t *genid)
7899 {
7900 	if (*genid != ifp->if_traffic_rule_genid) {
7901 		*genid = ifp->if_traffic_rule_genid;
7902 		return TRUE;
7903 	}
7904 	return FALSE;
7905 }
7906 __private_extern__ void
ifnet_update_traffic_rule_count(ifnet_t ifp,uint32_t count)7907 ifnet_update_traffic_rule_count(ifnet_t ifp, uint32_t count)
7908 {
7909 	os_atomic_store(&ifp->if_traffic_rule_count, count, release);
7910 	ifnet_update_traffic_rule_genid(ifp);
7911 }
7912 
7913 
7914 #if SKYWALK
7915 static bool
net_check_compatible_if_filter(struct ifnet * ifp)7916 net_check_compatible_if_filter(struct ifnet *ifp)
7917 {
7918 	if (ifp == NULL) {
7919 		if (net_api_stats.nas_iflt_attach_count > net_api_stats.nas_iflt_attach_os_count) {
7920 			return false;
7921 		}
7922 	} else {
7923 		if (ifp->if_flt_non_os_count > 0) {
7924 			return false;
7925 		}
7926 	}
7927 	return true;
7928 }
7929 #endif /* SKYWALK */
7930 
7931 #define DUMP_BUF_CHK() {        \
7932 	clen -= k;              \
7933 	if (clen < 1)           \
7934 	        goto done;      \
7935 	c += k;                 \
7936 }
7937 
7938 int dlil_dump_top_if_qlen(char *__counted_by(str_len), int str_len);
7939 int
dlil_dump_top_if_qlen(char * __counted_by (str_len)str,int str_len)7940 dlil_dump_top_if_qlen(char *__counted_by(str_len) str, int str_len)
7941 {
7942 	char *c = str;
7943 	int k, clen = str_len;
7944 	ifnet_ref_t top_ifcq_ifp = NULL;
7945 	uint32_t top_ifcq_len = 0;
7946 	ifnet_ref_t top_inq_ifp = NULL;
7947 	uint32_t top_inq_len = 0;
7948 
7949 	for (int ifidx = 1; ifidx < if_index; ifidx++) {
7950 		ifnet_ref_t ifp = ifindex2ifnet[ifidx];
7951 		struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7952 
7953 		if (ifp == NULL) {
7954 			continue;
7955 		}
7956 		if (ifp->if_snd != NULL && ifp->if_snd->ifcq_len > top_ifcq_len) {
7957 			top_ifcq_len = ifp->if_snd->ifcq_len;
7958 			top_ifcq_ifp = ifp;
7959 		}
7960 		if (dl_if->dl_if_inpstorage.dlth_pkts.qlen > top_inq_len) {
7961 			top_inq_len = dl_if->dl_if_inpstorage.dlth_pkts.qlen;
7962 			top_inq_ifp = ifp;
7963 		}
7964 	}
7965 
7966 	if (top_ifcq_ifp != NULL) {
7967 		k = scnprintf(c, clen, "\ntop ifcq_len %u packets by %s\n",
7968 		    top_ifcq_len, top_ifcq_ifp->if_xname);
7969 		DUMP_BUF_CHK();
7970 	}
7971 	if (top_inq_ifp != NULL) {
7972 		k = scnprintf(c, clen, "\ntop inq_len %u packets by %s\n",
7973 		    top_inq_len, top_inq_ifp->if_xname);
7974 		DUMP_BUF_CHK();
7975 	}
7976 done:
7977 	return str_len - clen;
7978 }
7979