1 /*
2 * Copyright (c) 1999-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35 #include <ptrauth.h>
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/dlil_sysctl.h>
53 #include <net/dlil_var_private.h>
54 #include <net/if_arp.h>
55 #include <net/iptap.h>
56 #include <net/pktap.h>
57 #include <net/droptap.h>
58 #include <net/nwk_wq.h>
59 #include <sys/kern_event.h>
60 #include <sys/kdebug.h>
61 #include <sys/mcache.h>
62 #include <sys/syslog.h>
63 #include <sys/protosw.h>
64 #include <sys/priv.h>
65
66 #include <kern/assert.h>
67 #include <kern/task.h>
68 #include <kern/thread.h>
69 #include <kern/sched_prim.h>
70 #include <kern/locks.h>
71 #include <kern/zalloc.h>
72
73 #include <net/kpi_protocol.h>
74 #include <net/kpi_interface.h>
75 #include <net/if_types.h>
76 #include <net/if_ipsec.h>
77 #include <net/if_llreach.h>
78 #include <net/if_utun.h>
79 #include <net/kpi_interfacefilter.h>
80 #include <net/classq/classq.h>
81 #include <net/classq/classq_sfb.h>
82 #include <net/flowhash.h>
83 #include <net/ntstat.h>
84 #if SKYWALK
85 #include <skywalk/lib/net_filter_event.h>
86 #endif /* SKYWALK */
87 #include <net/net_api_stats.h>
88 #include <net/if_ports_used.h>
89 #include <net/if_vlan_var.h>
90 #include <netinet/in.h>
91 #if INET
92 #include <netinet/in_var.h>
93 #include <netinet/igmp_var.h>
94 #include <netinet/ip_var.h>
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_var.h>
97 #include <netinet/udp.h>
98 #include <netinet/udp_var.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/in_pcb.h>
101 #include <netinet/in_tclass.h>
102 #include <netinet/ip.h>
103 #include <netinet/ip_icmp.h>
104 #include <netinet/icmp_var.h>
105 #endif /* INET */
106
107 #include <net/nat464_utils.h>
108 #include <netinet6/in6_var.h>
109 #include <netinet6/nd6.h>
110 #include <netinet6/mld6_var.h>
111 #include <netinet6/scope6_var.h>
112 #include <netinet/ip6.h>
113 #include <netinet/icmp6.h>
114 #include <net/pf_pbuf.h>
115 #include <libkern/OSAtomic.h>
116 #include <libkern/tree.h>
117
118 #include <dev/random/randomdev.h>
119 #include <machine/machine_routines.h>
120
121 #include <mach/thread_act.h>
122 #include <mach/sdt.h>
123
124 #if CONFIG_MACF
125 #include <sys/kauth.h>
126 #include <security/mac_framework.h>
127 #include <net/ethernet.h>
128 #include <net/firewire.h>
129 #endif
130
131 #if PF
132 #include <net/pfvar.h>
133 #endif /* PF */
134 #include <net/pktsched/pktsched.h>
135 #include <net/pktsched/pktsched_netem.h>
136
137 #if NECP
138 #include <net/necp.h>
139 #endif /* NECP */
140
141 #if SKYWALK
142 #include <skywalk/packet/packet_queue.h>
143 #include <skywalk/nexus/netif/nx_netif.h>
144 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
145 #endif /* SKYWALK */
146
147 #include <net/sockaddr_utils.h>
148
149 #include <os/log.h>
150
151 uint64_t if_creation_generation_count = 0;
152
153 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
154
155 dlil_ifnet_queue_t dlil_ifnet_head;
156
157 #if DEBUG
158 unsigned int ifnet_debug = 1; /* debugging (enabled) */
159 #else
160 unsigned int ifnet_debug; /* debugging (disabled) */
161 #endif /* !DEBUG */
162
163
164 static u_int32_t net_rtref;
165
166 static struct dlil_main_threading_info dlil_main_input_thread_info;
167 struct dlil_threading_info *__single dlil_main_input_thread;
168
169 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
170 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
171
172 static int ifnet_lookup(struct ifnet *);
173 static void if_purgeaddrs(struct ifnet *);
174
175 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
176 struct mbuf *, char *);
177 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
178 struct mbuf *);
179 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
180 mbuf_t *, const struct sockaddr *, void *,
181 IFNET_FRAME_TYPE_RW_T, IFNET_LLADDR_RW_T);
182 static void ifproto_media_event(struct ifnet *, protocol_family_t,
183 const struct kev_msg *);
184 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
185 unsigned long, void *);
186 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
187 struct sockaddr_dl *, size_t);
188 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
189 const struct sockaddr_dl *, const struct sockaddr *,
190 const struct sockaddr_dl *, const struct sockaddr *);
191
192 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
193 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
194 boolean_t poll, struct thread *tp);
195 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
196 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
197 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
198 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
199 protocol_family_t *);
200 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
201 const struct ifnet_demux_desc *, u_int32_t);
202 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
203 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
204 #if !XNU_TARGET_OS_OSX
205 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
206 const struct sockaddr *, IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
207 u_int32_t *, u_int32_t *);
208 #else /* XNU_TARGET_OS_OSX */
209 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
210 const struct sockaddr *,
211 IFNET_LLADDR_T, IFNET_FRAME_TYPE_T);
212 #endif /* XNU_TARGET_OS_OSX */
213 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
214 const struct sockaddr *,
215 IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
216 u_int32_t *, u_int32_t *);
217 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
218 static void ifp_if_free(struct ifnet *);
219 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
220
221
222
223 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
224 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
225 #if DEBUG || DEVELOPMENT
226 static void dlil_verify_sum16(void);
227 #endif /* DEBUG || DEVELOPMENT */
228
229
230 static void ifnet_detacher_thread_func(void *, wait_result_t);
231 static void ifnet_detacher_thread_cont(void *, wait_result_t);
232 static void ifnet_detach_final(struct ifnet *);
233 static void ifnet_detaching_enqueue(struct ifnet *);
234 static struct ifnet *ifnet_detaching_dequeue(void);
235
236 static void ifnet_start_thread_func(void *, wait_result_t);
237 static void ifnet_start_thread_cont(void *, wait_result_t);
238
239 static void ifnet_poll_thread_func(void *, wait_result_t);
240 static void ifnet_poll_thread_cont(void *, wait_result_t);
241
242 static errno_t ifnet_enqueue_common(struct ifnet *, struct ifclassq *,
243 classq_pkt_t *, boolean_t, boolean_t *);
244
245 static void ifp_src_route_copyout(struct ifnet *, struct route *);
246 static void ifp_src_route_copyin(struct ifnet *, struct route *);
247 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
248 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
249
250
251 /* The following are protected by dlil_ifnet_lock */
252 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
253 static u_int32_t ifnet_detaching_cnt;
254 static boolean_t ifnet_detaching_embryonic;
255 static void *ifnet_delayed_run; /* wait channel for detaching thread */
256
257 static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group,
258 &dlil_lck_attributes);
259
260 static uint32_t ifnet_flowhash_seed;
261
262 struct ifnet_flowhash_key {
263 char ifk_name[IFNAMSIZ];
264 uint32_t ifk_unit;
265 uint32_t ifk_flags;
266 uint32_t ifk_eflags;
267 uint32_t ifk_capabilities;
268 uint32_t ifk_capenable;
269 uint32_t ifk_output_sched_model;
270 uint32_t ifk_rand1;
271 uint32_t ifk_rand2;
272 };
273
274 /* Flow control entry per interface */
275 struct ifnet_fc_entry {
276 RB_ENTRY(ifnet_fc_entry) ifce_entry;
277 u_int32_t ifce_flowhash;
278 ifnet_ref_t ifce_ifp;
279 };
280
281 static uint32_t ifnet_calc_flowhash(struct ifnet *);
282 static int ifce_cmp(const struct ifnet_fc_entry *,
283 const struct ifnet_fc_entry *);
284 static int ifnet_fc_add(struct ifnet *);
285 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
286 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
287
288 /* protected by ifnet_fc_lock */
289 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
290 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
291 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
292
293 static KALLOC_TYPE_DEFINE(ifnet_fc_zone, struct ifnet_fc_entry, NET_KT_DEFAULT);
294
295 extern void bpfdetach(struct ifnet *);
296
297
298 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
299 u_int32_t flags);
300 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
301 u_int32_t flags);
302
303
304 #if CONFIG_MACF
305 #if !XNU_TARGET_OS_OSX
306 int dlil_lladdr_ckreq = 1;
307 #else /* XNU_TARGET_OS_OSX */
308 int dlil_lladdr_ckreq = 0;
309 #endif /* XNU_TARGET_OS_OSX */
310 #endif /* CONFIG_MACF */
311
312
313 static inline void
ifnet_delay_start_disabled_increment(void)314 ifnet_delay_start_disabled_increment(void)
315 {
316 OSIncrementAtomic(&ifnet_delay_start_disabled);
317 }
318
319 unsigned int net_rxpoll = 1;
320 unsigned int net_affinity = 1;
321 unsigned int net_async = 1; /* 0: synchronous, 1: asynchronous */
322
323 extern u_int32_t inject_buckets;
324
325 void
ifnet_filter_update_tso(struct ifnet * ifp,boolean_t filter_enable)326 ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable)
327 {
328 /*
329 * update filter count and route_generation ID to let TCP
330 * know it should reevalute doing TSO or not
331 */
332 if (filter_enable) {
333 OSAddAtomic(1, &ifp->if_flt_no_tso_count);
334 } else {
335 VERIFY(ifp->if_flt_no_tso_count != 0);
336 OSAddAtomic(-1, &ifp->if_flt_no_tso_count);
337 }
338 routegenid_update();
339 }
340
341 #if SKYWALK
342
343 static bool net_check_compatible_if_filter(struct ifnet *ifp);
344
345 /* if_attach_nx flags defined in os_skywalk_private.h */
346 unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT;
347 unsigned int if_enable_fsw_ip_netagent =
348 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
349 unsigned int if_enable_fsw_transport_netagent =
350 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
351
352 unsigned int if_netif_all =
353 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0);
354
355 /* Configure flowswitch to use max mtu sized buffer */
356 static bool fsw_use_max_mtu_buffer = false;
357
358
359 static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw);
360
361 #include <skywalk/os_skywalk_private.h>
362
363 boolean_t
ifnet_nx_noauto(ifnet_t ifp)364 ifnet_nx_noauto(ifnet_t ifp)
365 {
366 return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0;
367 }
368
369 boolean_t
ifnet_nx_noauto_flowswitch(ifnet_t ifp)370 ifnet_nx_noauto_flowswitch(ifnet_t ifp)
371 {
372 return ifnet_is_low_latency(ifp);
373 }
374
375 boolean_t
ifnet_is_low_latency(ifnet_t ifp)376 ifnet_is_low_latency(ifnet_t ifp)
377 {
378 return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0;
379 }
380
381 boolean_t
ifnet_needs_compat(ifnet_t ifp)382 ifnet_needs_compat(ifnet_t ifp)
383 {
384 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
385 return FALSE;
386 }
387 #if !XNU_TARGET_OS_OSX
388 /*
389 * To conserve memory, we plumb in the compat layer selectively; this
390 * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL.
391 * In particular, we check for Wi-Fi Access Point.
392 */
393 if (IFNET_IS_WIFI(ifp)) {
394 /* Wi-Fi Access Point */
395 if (strcmp(ifp->if_name, "ap") == 0) {
396 return if_netif_all;
397 }
398 }
399 #else /* XNU_TARGET_OS_OSX */
400 #pragma unused(ifp)
401 #endif /* XNU_TARGET_OS_OSX */
402 return TRUE;
403 }
404
405 boolean_t
ifnet_needs_fsw_transport_netagent(ifnet_t ifp)406 ifnet_needs_fsw_transport_netagent(ifnet_t ifp)
407 {
408 if (if_is_fsw_transport_netagent_enabled()) {
409 /* check if netagent has been manually enabled for ipsec/utun */
410 if (ifp->if_family == IFNET_FAMILY_IPSEC) {
411 return ipsec_interface_needs_netagent(ifp);
412 } else if (ifp->if_family == IFNET_FAMILY_UTUN) {
413 return utun_interface_needs_netagent(ifp);
414 }
415
416 /* check ifnet no auto nexus override */
417 if (ifnet_nx_noauto(ifp)) {
418 return FALSE;
419 }
420
421 /* check global if_attach_nx configuration */
422 switch (ifp->if_family) {
423 case IFNET_FAMILY_CELLULAR:
424 case IFNET_FAMILY_ETHERNET:
425 if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
426 return TRUE;
427 }
428 break;
429 default:
430 break;
431 }
432 }
433 return FALSE;
434 }
435
436 boolean_t
ifnet_needs_fsw_ip_netagent(ifnet_t ifp)437 ifnet_needs_fsw_ip_netagent(ifnet_t ifp)
438 {
439 #pragma unused(ifp)
440 if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) {
441 return TRUE;
442 }
443 return FALSE;
444 }
445
446 boolean_t
ifnet_needs_netif_netagent(ifnet_t ifp)447 ifnet_needs_netif_netagent(ifnet_t ifp)
448 {
449 #pragma unused(ifp)
450 return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0;
451 }
452
453 static boolean_t
dlil_detach_nexus_instance(nexus_controller_t controller,const char * func_str,uuid_t instance,uuid_t device)454 dlil_detach_nexus_instance(nexus_controller_t controller,
455 const char *func_str, uuid_t instance, uuid_t device)
456 {
457 errno_t err;
458
459 if (instance == NULL || uuid_is_null(instance)) {
460 return FALSE;
461 }
462
463 /* followed by the device port */
464 if (device != NULL && !uuid_is_null(device)) {
465 err = kern_nexus_ifdetach(controller, instance, device);
466 if (err != 0) {
467 DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n",
468 func_str, err);
469 }
470 }
471 err = kern_nexus_controller_free_provider_instance(controller,
472 instance);
473 if (err != 0) {
474 DLIL_PRINTF("%s free_provider_instance failed %d\n",
475 func_str, err);
476 }
477 return TRUE;
478 }
479
480 static boolean_t
dlil_detach_nexus(const char * func_str,uuid_t provider,uuid_t instance,uuid_t device)481 dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance,
482 uuid_t device)
483 {
484 boolean_t detached = FALSE;
485 nexus_controller_t controller = kern_nexus_shared_controller();
486 int err;
487
488 if (dlil_detach_nexus_instance(controller, func_str, instance,
489 device)) {
490 detached = TRUE;
491 }
492 if (provider != NULL && !uuid_is_null(provider)) {
493 detached = TRUE;
494 err = kern_nexus_controller_deregister_provider(controller,
495 provider);
496 if (err != 0) {
497 DLIL_PRINTF("%s deregister_provider %d\n",
498 func_str, err);
499 }
500 }
501 return detached;
502 }
503
504 static errno_t
dlil_create_provider_and_instance(nexus_controller_t controller,nexus_type_t type,ifnet_t ifp,uuid_t * provider,uuid_t * instance,nexus_attr_t attr)505 dlil_create_provider_and_instance(nexus_controller_t controller,
506 nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance,
507 nexus_attr_t attr)
508 {
509 uuid_t dom_prov;
510 errno_t err;
511 nexus_name_t provider_name;
512 const char *type_name =
513 (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch";
514 struct kern_nexus_init init;
515
516 err = kern_nexus_get_default_domain_provider(type, &dom_prov);
517 if (err != 0) {
518 DLIL_PRINTF("%s can't get %s provider, error %d\n",
519 __func__, type_name, err);
520 goto failed;
521 }
522
523 snprintf((char *)provider_name, sizeof(provider_name),
524 "com.apple.%s.%s", type_name, if_name(ifp));
525 err = kern_nexus_controller_register_provider(controller,
526 dom_prov,
527 provider_name,
528 NULL,
529 0,
530 attr,
531 provider);
532 if (err != 0) {
533 DLIL_PRINTF("%s register %s provider failed, error %d\n",
534 __func__, type_name, err);
535 goto failed;
536 }
537 bzero(&init, sizeof(init));
538 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
539 err = kern_nexus_controller_alloc_provider_instance(controller,
540 *provider,
541 NULL, NULL,
542 instance, &init);
543 if (err != 0) {
544 DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n",
545 __func__, type_name, err);
546 kern_nexus_controller_deregister_provider(controller,
547 *provider);
548 goto failed;
549 }
550 failed:
551 return err;
552 }
553
554 static boolean_t
dlil_attach_netif_nexus_common(ifnet_t ifp,if_nexus_netif_t netif_nx)555 dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx)
556 {
557 nexus_attr_t __single attr = NULL;
558 nexus_controller_t controller;
559 errno_t err;
560 unsigned char *empty_uuid = __unsafe_forge_bidi_indexable(unsigned char *, NULL, sizeof(uuid_t));
561
562 if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) {
563 /* it's already attached */
564 if (dlil_verbose) {
565 DLIL_PRINTF("%s: %s already has nexus attached\n",
566 __func__, if_name(ifp));
567 /* already attached */
568 }
569 goto failed;
570 }
571
572 err = kern_nexus_attr_create(&attr);
573 if (err != 0) {
574 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
575 if_name(ifp));
576 goto failed;
577 }
578 err = kern_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, ifp->if_index);
579 VERIFY(err == 0);
580
581 controller = kern_nexus_shared_controller();
582
583 /* create the netif provider and instance */
584 err = dlil_create_provider_and_instance(controller,
585 NEXUS_TYPE_NET_IF, ifp, &netif_nx->if_nif_provider,
586 &netif_nx->if_nif_instance, attr);
587 if (err != 0) {
588 goto failed;
589 }
590
591 err = kern_nexus_ifattach(controller, netif_nx->if_nif_instance, ifp,
592 empty_uuid, FALSE, &netif_nx->if_nif_attach);
593 if (err != 0) {
594 DLIL_PRINTF("%s kern_nexus_ifattach %d\n",
595 __func__, err);
596 /* cleanup provider and instance */
597 dlil_detach_nexus(__func__, netif_nx->if_nif_provider,
598 netif_nx->if_nif_instance, empty_uuid);
599 goto failed;
600 }
601 return TRUE;
602
603 failed:
604 if (attr != NULL) {
605 kern_nexus_attr_destroy(attr);
606 }
607 return FALSE;
608 }
609
610 static boolean_t
dlil_attach_netif_compat_nexus(ifnet_t ifp,if_nexus_netif_t netif_nx)611 dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx)
612 {
613 if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) ||
614 IFNET_IS_MANAGEMENT(ifp) || IFNET_IS_VMNET(ifp)) {
615 goto failed;
616 }
617 switch (ifp->if_type) {
618 case IFT_CELLULAR:
619 case IFT_ETHER:
620 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
621 /* don't auto-attach */
622 goto failed;
623 }
624 break;
625 default:
626 /* don't auto-attach */
627 goto failed;
628 }
629 return dlil_attach_netif_nexus_common(ifp, netif_nx);
630
631 failed:
632 return FALSE;
633 }
634
635 __attribute__((noinline))
636 static void
dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)637 dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)
638 {
639 dlil_detach_nexus(__func__, nexus_netif->if_nif_provider,
640 nexus_netif->if_nif_instance, nexus_netif->if_nif_attach);
641 }
642
643 static inline int
dlil_siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)644 dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
645 {
646 struct ifreq ifr;
647 int error;
648
649 bzero(&ifr, sizeof(ifr));
650 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
651 if (error == 0) {
652 *ifdm_p = ifr.ifr_devmtu;
653 }
654 return error;
655 }
656
657 static inline void
_dlil_adjust_large_buf_size_for_tso(ifnet_t ifp,uint32_t * large_buf_size)658 _dlil_adjust_large_buf_size_for_tso(ifnet_t ifp, uint32_t *large_buf_size)
659 {
660 uint32_t tso_v4_mtu = 0;
661 uint32_t tso_v6_mtu = 0;
662
663 if (!kernel_is_macos_or_server()) {
664 return;
665 }
666
667 /*
668 * Note that we are reading the real hwassist flags set by the driver
669 * and not the adjusted ones because nx_netif_host_adjust_if_capabilities()
670 * hasn't been called yet.
671 */
672 if ((ifp->if_hwassist & IFNET_TSO_IPV4) != 0) {
673 tso_v4_mtu = ifp->if_tso_v4_mtu;
674 }
675 if ((ifp->if_hwassist & IFNET_TSO_IPV6) != 0) {
676 tso_v6_mtu = ifp->if_tso_v6_mtu;
677 }
678
679 /*
680 * If the hardware supports TSO, adjust the large buf size to match the
681 * supported TSO MTU size. Note that only native interfaces set TSO MTU
682 * size today.
683 * For compat, there is a 16KB limit on large buf size, so it needs to be
684 * bounded by NX_FSW_DEF_LARGE_BUFSIZE. Note that no compat interfaces
685 * set TSO MTU size today.
686 */
687 if (SKYWALK_NATIVE(ifp)) {
688 if (tso_v4_mtu != 0 || tso_v6_mtu != 0) {
689 *large_buf_size = MAX(tso_v4_mtu, tso_v6_mtu);
690 } else {
691 *large_buf_size = MAX(*large_buf_size, sk_fsw_gso_mtu);
692 }
693 *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, *large_buf_size);
694 } else {
695 *large_buf_size = MIN(NX_FSW_DEF_LARGE_BUFSIZE, *large_buf_size);
696 }
697 }
698
699 static inline int
_dlil_get_flowswitch_buffer_size(ifnet_t ifp,uuid_t netif,uint32_t * buf_size,bool * use_multi_buflet,uint32_t * large_buf_size)700 _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint32_t *buf_size,
701 bool *use_multi_buflet, uint32_t *large_buf_size)
702 {
703 struct kern_pbufpool_memory_info rx_pp_info;
704 struct kern_pbufpool_memory_info tx_pp_info;
705 uint32_t if_max_mtu = 0;
706 uint32_t drv_buf_size;
707 struct ifdevmtu ifdm;
708 int err;
709
710 /*
711 * To perform intra-stack RX aggregation flowswitch needs to use
712 * multi-buflet packet.
713 */
714 *use_multi_buflet = NX_FSW_TCP_RX_AGG_ENABLED();
715
716 *large_buf_size = *use_multi_buflet ? NX_FSW_DEF_LARGE_BUFSIZE : 0;
717 /*
718 * IP over Thunderbolt interface can deliver the largest IP packet,
719 * but the driver advertises the MAX MTU as only 9K.
720 */
721 if (IFNET_IS_THUNDERBOLT_IP(ifp)) {
722 if_max_mtu = IP_MAXPACKET;
723 goto skip_mtu_ioctl;
724 }
725
726 /* determine max mtu */
727 bzero(&ifdm, sizeof(ifdm));
728 err = dlil_siocgifdevmtu(ifp, &ifdm);
729 if (__improbable(err != 0)) {
730 DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n",
731 __func__, if_name(ifp));
732 /* use default flowswitch buffer size */
733 if_max_mtu = NX_FSW_BUFSIZE;
734 } else {
735 DLIL_PRINTF("%s: %s %d %d\n", __func__, if_name(ifp),
736 ifdm.ifdm_max, ifdm.ifdm_current);
737 /* rdar://problem/44589731 */
738 if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current);
739 }
740
741 skip_mtu_ioctl:
742 if (if_max_mtu == 0) {
743 DLIL_PRINTF("%s: can't determine MAX MTU for %s\n",
744 __func__, if_name(ifp));
745 return EINVAL;
746 }
747 if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) {
748 DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch "
749 "max bufsize(%d)\n", __func__,
750 if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE);
751 return EINVAL;
752 }
753
754 /*
755 * for skywalk native driver, consult the driver packet pool also.
756 */
757 if (dlil_is_native_netif_nexus(ifp)) {
758 err = kern_nexus_get_pbufpool_info(netif, &rx_pp_info,
759 &tx_pp_info);
760 if (err != 0) {
761 DLIL_PRINTF("%s: can't get pbufpool info for %s\n",
762 __func__, if_name(ifp));
763 return ENXIO;
764 }
765 drv_buf_size = tx_pp_info.kpm_bufsize *
766 tx_pp_info.kpm_max_frags;
767 if (if_max_mtu > drv_buf_size) {
768 DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, "
769 "tx %d * %d) can't support max mtu(%d)\n", __func__,
770 if_name(ifp), rx_pp_info.kpm_bufsize,
771 rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize,
772 tx_pp_info.kpm_max_frags, if_max_mtu);
773 return EINVAL;
774 }
775 } else {
776 drv_buf_size = if_max_mtu;
777 }
778
779 if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) {
780 _CASSERT((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET);
781 *use_multi_buflet = true;
782 /* default flowswitch buffer size */
783 *buf_size = NX_FSW_BUFSIZE;
784 *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, drv_buf_size);
785 } else {
786 *buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE);
787 }
788 _dlil_adjust_large_buf_size_for_tso(ifp, large_buf_size);
789 ASSERT(*buf_size <= NX_FSW_MAXBUFSIZE);
790 if (*buf_size >= *large_buf_size) {
791 *large_buf_size = 0;
792 }
793 return 0;
794 }
795
796 static boolean_t
_dlil_attach_flowswitch_nexus(ifnet_t ifp,if_nexus_flowswitch_t nexus_fsw)797 _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw)
798 {
799 nexus_attr_t __single attr = NULL;
800 nexus_controller_t controller;
801 errno_t err = 0;
802 uuid_t netif;
803 uint32_t buf_size = 0;
804 uint32_t large_buf_size = 0;
805 bool multi_buflet;
806
807 if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) ||
808 IFNET_IS_VMNET(ifp)) {
809 goto failed;
810 }
811
812 if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) {
813 /* not possible to attach (netif native/compat not plumbed) */
814 goto failed;
815 }
816
817 if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) {
818 /* don't auto-attach */
819 goto failed;
820 }
821
822 /* get the netif instance from the ifp */
823 err = kern_nexus_get_netif_instance(ifp, netif);
824 if (err != 0) {
825 DLIL_PRINTF("%s: can't find netif for %s\n", __func__,
826 if_name(ifp));
827 goto failed;
828 }
829
830 err = kern_nexus_attr_create(&attr);
831 if (err != 0) {
832 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
833 if_name(ifp));
834 goto failed;
835 }
836
837 err = _dlil_get_flowswitch_buffer_size(ifp, netif, &buf_size,
838 &multi_buflet, &large_buf_size);
839 if (err != 0) {
840 goto failed;
841 }
842 ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE));
843 ASSERT(large_buf_size <= NX_FSW_MAX_LARGE_BUFSIZE);
844
845 /* Configure flowswitch buffer size */
846 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, buf_size);
847 VERIFY(err == 0);
848 err = kern_nexus_attr_set(attr, NEXUS_ATTR_LARGE_BUF_SIZE,
849 large_buf_size);
850 VERIFY(err == 0);
851
852 /*
853 * Configure flowswitch to use super-packet (multi-buflet).
854 */
855 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
856 multi_buflet ? NX_PBUF_FRAGS_MAX : 1);
857 VERIFY(err == 0);
858
859 /* create the flowswitch provider and instance */
860 controller = kern_nexus_shared_controller();
861 err = dlil_create_provider_and_instance(controller,
862 NEXUS_TYPE_FLOW_SWITCH, ifp, &nexus_fsw->if_fsw_provider,
863 &nexus_fsw->if_fsw_instance, attr);
864 if (err != 0) {
865 goto failed;
866 }
867
868 /* attach the device port */
869 err = kern_nexus_ifattach(controller, nexus_fsw->if_fsw_instance,
870 NULL, netif, FALSE, &nexus_fsw->if_fsw_device);
871 if (err != 0) {
872 DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n",
873 __func__, err, if_name(ifp));
874 /* cleanup provider and instance */
875 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
876 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
877 goto failed;
878 }
879 return TRUE;
880
881 failed:
882 if (err != 0) {
883 DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n",
884 __func__, if_name(ifp), err);
885 } else {
886 DLIL_PRINTF("%s: not attaching flowswitch to %s\n",
887 __func__, if_name(ifp));
888 }
889 if (attr != NULL) {
890 kern_nexus_attr_destroy(attr);
891 }
892 return FALSE;
893 }
894
895 static boolean_t
dlil_attach_flowswitch_nexus(ifnet_t ifp)896 dlil_attach_flowswitch_nexus(ifnet_t ifp)
897 {
898 boolean_t attached = FALSE;
899 if_nexus_flowswitch nexus_fsw;
900
901 #if (DEVELOPMENT || DEBUG)
902 if (skywalk_netif_direct_allowed(if_name(ifp))) {
903 DLIL_PRINTF("skip attaching fsw to %s\n", if_name(ifp));
904 return FALSE;
905 }
906 #endif /* (DEVELOPMENT || DEBUG) */
907
908 /*
909 * flowswitch attachment is not supported for interface using the
910 * legacy model (IFNET_INIT_LEGACY)
911 */
912 if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
913 DLIL_PRINTF("skip attaching fsw to %s using legacy TX model\n",
914 if_name(ifp));
915 return FALSE;
916 }
917 bzero(&nexus_fsw, sizeof(nexus_fsw));
918
919 /*
920 * A race can happen between a thread creating a flowswitch and another thread
921 * detaching the interface (also destroying the flowswitch).
922 *
923 * ifnet_datamov_begin() is used here to force dlil_quiesce_and_detach_nexuses()
924 * (called by another thread) to wait until this function finishes so the
925 * flowswitch can be cleaned up by dlil_detach_flowswitch_nexus().
926 *
927 * If ifnet_is_attached() is used instead, dlil_quiesce_and_detach_nexuses()
928 * would not wait (because ifp->if_nx_flowswitch isn't assigned) and the
929 * created flowswitch would be left hanging and ifnet_detach_final() would never
930 * wakeup because the existence of the flowswitch prevents the ifnet's ioref
931 * from being released.
932 */
933 if (!ifnet_datamov_begin(ifp)) {
934 os_log(OS_LOG_DEFAULT, "%s: %s not attached",
935 __func__, ifp->if_xname);
936 goto done;
937 }
938 if (uuid_is_null(ifp->if_nx_flowswitch.if_fsw_instance)) {
939 attached = _dlil_attach_flowswitch_nexus(ifp, &nexus_fsw);
940 if (attached) {
941 ifnet_lock_exclusive(ifp);
942 ifp->if_nx_flowswitch = nexus_fsw;
943 ifnet_lock_done(ifp);
944 }
945 }
946 ifnet_datamov_end(ifp);
947
948 done:
949 return attached;
950 }
951
952 __attribute__((noinline))
953 static void
dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)954 dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)
955 {
956 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
957 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
958 }
959
960 __attribute__((noinline))
961 static void
dlil_netif_detach_notify(ifnet_t ifp)962 dlil_netif_detach_notify(ifnet_t ifp)
963 {
964 ifnet_detach_notify_cb_t notify = NULL;
965 void *__single arg = NULL;
966
967 ifnet_get_detach_notify(ifp, ¬ify, &arg);
968 if (notify == NULL) {
969 DTRACE_SKYWALK1(no__notify, ifnet_t, ifp);
970 return;
971 }
972 (*notify)(arg);
973 }
974
975 __attribute__((noinline))
976 static void
dlil_quiesce_and_detach_nexuses(ifnet_t ifp)977 dlil_quiesce_and_detach_nexuses(ifnet_t ifp)
978 {
979 if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch;
980 if_nexus_netif *nx_netif = &ifp->if_nx_netif;
981
982 ifnet_datamov_suspend_and_drain(ifp);
983 if (!uuid_is_null(nx_fsw->if_fsw_device)) {
984 ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider));
985 ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance));
986 dlil_detach_flowswitch_nexus(nx_fsw);
987 } else {
988 ASSERT(uuid_is_null(nx_fsw->if_fsw_provider));
989 ASSERT(uuid_is_null(nx_fsw->if_fsw_instance));
990 DTRACE_IP1(fsw__not__attached, ifnet_t, ifp);
991 }
992
993 if (!uuid_is_null(nx_netif->if_nif_attach)) {
994 ASSERT(!uuid_is_null(nx_netif->if_nif_provider));
995 ASSERT(!uuid_is_null(nx_netif->if_nif_instance));
996 dlil_detach_netif_nexus(nx_netif);
997 } else {
998 ASSERT(uuid_is_null(nx_netif->if_nif_provider));
999 ASSERT(uuid_is_null(nx_netif->if_nif_instance));
1000 DTRACE_IP1(netif__not__attached, ifnet_t, ifp);
1001 }
1002 ifnet_datamov_resume(ifp);
1003 }
1004
1005 boolean_t
ifnet_add_netagent(ifnet_t ifp)1006 ifnet_add_netagent(ifnet_t ifp)
1007 {
1008 int error;
1009
1010 error = kern_nexus_interface_add_netagent(ifp);
1011 os_log(OS_LOG_DEFAULT,
1012 "kern_nexus_interface_add_netagent(%s) returned %d",
1013 ifp->if_xname, error);
1014 return error == 0;
1015 }
1016
1017 boolean_t
ifnet_remove_netagent(ifnet_t ifp)1018 ifnet_remove_netagent(ifnet_t ifp)
1019 {
1020 int error;
1021
1022 error = kern_nexus_interface_remove_netagent(ifp);
1023 os_log(OS_LOG_DEFAULT,
1024 "kern_nexus_interface_remove_netagent(%s) returned %d",
1025 ifp->if_xname, error);
1026 return error == 0;
1027 }
1028
1029 boolean_t
ifnet_attach_flowswitch_nexus(ifnet_t ifp)1030 ifnet_attach_flowswitch_nexus(ifnet_t ifp)
1031 {
1032 if (!IF_FULLY_ATTACHED(ifp)) {
1033 return FALSE;
1034 }
1035 return dlil_attach_flowswitch_nexus(ifp);
1036 }
1037
1038 boolean_t
ifnet_detach_flowswitch_nexus(ifnet_t ifp)1039 ifnet_detach_flowswitch_nexus(ifnet_t ifp)
1040 {
1041 if_nexus_flowswitch nexus_fsw;
1042
1043 ifnet_lock_exclusive(ifp);
1044 nexus_fsw = ifp->if_nx_flowswitch;
1045 bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
1046 ifnet_lock_done(ifp);
1047 return dlil_detach_nexus(__func__, nexus_fsw.if_fsw_provider,
1048 nexus_fsw.if_fsw_instance, nexus_fsw.if_fsw_device);
1049 }
1050
1051 void
ifnet_attach_native_flowswitch(ifnet_t ifp)1052 ifnet_attach_native_flowswitch(ifnet_t ifp)
1053 {
1054 if (!dlil_is_native_netif_nexus(ifp)) {
1055 /* not a native netif */
1056 return;
1057 }
1058 ifnet_attach_flowswitch_nexus(ifp);
1059 }
1060
1061 int
ifnet_set_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t cb,void * arg)1062 ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg)
1063 {
1064 lck_mtx_lock(&ifp->if_delegate_lock);
1065 while (ifp->if_fsw_rx_cb_ref > 0) {
1066 DTRACE_SKYWALK1(wait__fsw, ifnet_t, ifp);
1067 (void) msleep(&ifp->if_fsw_rx_cb_ref, &ifp->if_delegate_lock,
1068 (PZERO + 1), __FUNCTION__, NULL);
1069 DTRACE_SKYWALK1(wake__fsw, ifnet_t, ifp);
1070 }
1071 ifp->if_fsw_rx_cb = cb;
1072 ifp->if_fsw_rx_cb_arg = arg;
1073 lck_mtx_unlock(&ifp->if_delegate_lock);
1074 return 0;
1075 }
1076
1077 int
ifnet_get_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t * cbp,void ** argp)1078 ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp)
1079 {
1080 /*
1081 * This is for avoiding the unnecessary lock acquire for interfaces
1082 * not used by a redirect interface.
1083 */
1084 if (ifp->if_fsw_rx_cb == NULL) {
1085 return ENOENT;
1086 }
1087 lck_mtx_lock(&ifp->if_delegate_lock);
1088 if (ifp->if_fsw_rx_cb == NULL) {
1089 lck_mtx_unlock(&ifp->if_delegate_lock);
1090 return ENOENT;
1091 }
1092 *cbp = ifp->if_fsw_rx_cb;
1093 *argp = ifp->if_fsw_rx_cb_arg;
1094 ifp->if_fsw_rx_cb_ref++;
1095 lck_mtx_unlock(&ifp->if_delegate_lock);
1096 return 0;
1097 }
1098
1099 void
ifnet_release_flowswitch_rx_callback(ifnet_t ifp)1100 ifnet_release_flowswitch_rx_callback(ifnet_t ifp)
1101 {
1102 lck_mtx_lock(&ifp->if_delegate_lock);
1103 if (--ifp->if_fsw_rx_cb_ref == 0) {
1104 wakeup(&ifp->if_fsw_rx_cb_ref);
1105 }
1106 lck_mtx_unlock(&ifp->if_delegate_lock);
1107 }
1108
1109 int
ifnet_set_delegate_parent(ifnet_t difp,ifnet_t parent)1110 ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent)
1111 {
1112 lck_mtx_lock(&difp->if_delegate_lock);
1113 while (difp->if_delegate_parent_ref > 0) {
1114 DTRACE_SKYWALK1(wait__parent, ifnet_t, difp);
1115 (void) msleep(&difp->if_delegate_parent_ref, &difp->if_delegate_lock,
1116 (PZERO + 1), __FUNCTION__, NULL);
1117 DTRACE_SKYWALK1(wake__parent, ifnet_t, difp);
1118 }
1119 difp->if_delegate_parent = parent;
1120 lck_mtx_unlock(&difp->if_delegate_lock);
1121 return 0;
1122 }
1123
1124 int
ifnet_get_delegate_parent(ifnet_t difp,ifnet_t * parentp)1125 ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parentp)
1126 {
1127 lck_mtx_lock(&difp->if_delegate_lock);
1128 if (difp->if_delegate_parent == NULL) {
1129 lck_mtx_unlock(&difp->if_delegate_lock);
1130 return ENOENT;
1131 }
1132 *parentp = difp->if_delegate_parent;
1133 difp->if_delegate_parent_ref++;
1134 lck_mtx_unlock(&difp->if_delegate_lock);
1135 return 0;
1136 }
1137
1138 void
ifnet_release_delegate_parent(ifnet_t difp)1139 ifnet_release_delegate_parent(ifnet_t difp)
1140 {
1141 lck_mtx_lock(&difp->if_delegate_lock);
1142 if (--difp->if_delegate_parent_ref == 0) {
1143 wakeup(&difp->if_delegate_parent_ref);
1144 }
1145 lck_mtx_unlock(&difp->if_delegate_lock);
1146 }
1147
1148 __attribute__((noinline))
1149 void
ifnet_set_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1150 ifnet_set_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1151 {
1152 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1153 ifp->if_detach_notify = notify;
1154 ifp->if_detach_notify_arg = arg;
1155 }
1156
1157 __attribute__((noinline))
1158 void
ifnet_get_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1159 ifnet_get_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1160 {
1161 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1162 *notifyp = ifp->if_detach_notify;
1163 *argp = ifp->if_detach_notify_arg;
1164 }
1165
1166 __attribute__((noinline))
1167 void
ifnet_set_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1168 ifnet_set_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1169 {
1170 ifnet_lock_exclusive(ifp);
1171 ifnet_set_detach_notify_locked(ifp, notify, arg);
1172 ifnet_lock_done(ifp);
1173 }
1174
1175 __attribute__((noinline))
1176 void
ifnet_get_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1177 ifnet_get_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1178 {
1179 ifnet_lock_exclusive(ifp);
1180 ifnet_get_detach_notify_locked(ifp, notifyp, argp);
1181 ifnet_lock_done(ifp);
1182 }
1183 #endif /* SKYWALK */
1184
1185 #define DLIL_INPUT_CHECK(m, ifp) { \
1186 ifnet_ref_t _rcvif = mbuf_pkthdr_rcvif(m); \
1187 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
1188 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
1189 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
1190 /* NOTREACHED */ \
1191 } \
1192 }
1193
1194 #define MBPS (1ULL * 1000 * 1000)
1195 #define GBPS (MBPS * 1000)
1196
1197 struct rxpoll_time_tbl {
1198 u_int64_t speed; /* downlink speed */
1199 u_int32_t plowat; /* packets low watermark */
1200 u_int32_t phiwat; /* packets high watermark */
1201 u_int32_t blowat; /* bytes low watermark */
1202 u_int32_t bhiwat; /* bytes high watermark */
1203 };
1204
1205 static struct rxpoll_time_tbl rxpoll_tbl[] = {
1206 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
1207 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1208 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1209 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1210 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1211 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
1212 };
1213
1214 int
proto_hash_value(u_int32_t protocol_family)1215 proto_hash_value(u_int32_t protocol_family)
1216 {
1217 /*
1218 * dlil_proto_unplumb_all() depends on the mapping between
1219 * the hash bucket index and the protocol family defined
1220 * here; future changes must be applied there as well.
1221 */
1222 switch (protocol_family) {
1223 case PF_INET:
1224 return 0;
1225 case PF_INET6:
1226 return 1;
1227 case PF_VLAN:
1228 return 2;
1229 case PF_UNSPEC:
1230 default:
1231 return 3;
1232 }
1233 }
1234
1235 __private_extern__ int
dlil_post_msg(struct ifnet * ifp,u_int32_t event_subclass,u_int32_t event_code,struct net_event_data * event_data,u_int32_t event_data_len,boolean_t suppress_generation)1236 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1237 u_int32_t event_code, struct net_event_data *event_data,
1238 u_int32_t event_data_len, boolean_t suppress_generation)
1239 {
1240 struct net_event_data ev_data;
1241 struct kev_msg ev_msg;
1242
1243 bzero(&ev_msg, sizeof(ev_msg));
1244 bzero(&ev_data, sizeof(ev_data));
1245 /*
1246 * a net event always starts with a net_event_data structure
1247 * but the caller can generate a simple net event or
1248 * provide a longer event structure to post
1249 */
1250 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1251 ev_msg.kev_class = KEV_NETWORK_CLASS;
1252 ev_msg.kev_subclass = event_subclass;
1253 ev_msg.event_code = event_code;
1254
1255 if (event_data == NULL) {
1256 event_data = &ev_data;
1257 event_data_len = sizeof(struct net_event_data);
1258 }
1259
1260 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1261 event_data->if_family = ifp->if_family;
1262 event_data->if_unit = (u_int32_t)ifp->if_unit;
1263
1264 ev_msg.dv[0].data_length = event_data_len;
1265 ev_msg.dv[0].data_ptr = event_data;
1266 ev_msg.dv[1].data_length = 0;
1267
1268 bool update_generation = true;
1269 if (event_subclass == KEV_DL_SUBCLASS) {
1270 /* Don't update interface generation for frequent link quality and state changes */
1271 switch (event_code) {
1272 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1273 case KEV_DL_RRC_STATE_CHANGED:
1274 case KEV_DL_PRIMARY_ELECTED:
1275 update_generation = false;
1276 break;
1277 default:
1278 break;
1279 }
1280 }
1281
1282 /*
1283 * Some events that update generation counts might
1284 * want to suppress generation count.
1285 * One example is node presence/absence where we still
1286 * issue kernel event for the invocation but want to avoid
1287 * expensive operation of updating generation which triggers
1288 * NECP client updates.
1289 */
1290 if (suppress_generation) {
1291 update_generation = false;
1292 }
1293
1294 return dlil_event_internal(ifp, &ev_msg, update_generation);
1295 }
1296
1297 static void
dlil_reset_rxpoll_params(ifnet_t ifp)1298 dlil_reset_rxpoll_params(ifnet_t ifp)
1299 {
1300 ASSERT(ifp != NULL);
1301 ifnet_set_poll_cycle(ifp, NULL);
1302 ifp->if_poll_update = 0;
1303 ifp->if_poll_flags = 0;
1304 ifp->if_poll_req = 0;
1305 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1306 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1307 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1308 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1309 net_timerclear(&ifp->if_poll_mode_holdtime);
1310 net_timerclear(&ifp->if_poll_mode_lasttime);
1311 net_timerclear(&ifp->if_poll_sample_holdtime);
1312 net_timerclear(&ifp->if_poll_sample_lasttime);
1313 net_timerclear(&ifp->if_poll_dbg_lasttime);
1314 }
1315
1316
1317 #if SKYWALK
1318 static void
dlil_filter_event(struct eventhandler_entry_arg arg __unused,enum net_filter_event_subsystems state)1319 dlil_filter_event(struct eventhandler_entry_arg arg __unused,
1320 enum net_filter_event_subsystems state)
1321 {
1322 evhlog(debug, "%s: eventhandler saw event type=net_filter_event_state event_code=0x%d",
1323 __func__, state);
1324
1325 bool old_if_enable_fsw_transport_netagent = if_enable_fsw_transport_netagent;
1326 if ((state & ~NET_FILTER_EVENT_PF_PRIVATE_PROXY) == 0) {
1327 if_enable_fsw_transport_netagent = 1;
1328 } else {
1329 if_enable_fsw_transport_netagent = 0;
1330 }
1331 if (old_if_enable_fsw_transport_netagent != if_enable_fsw_transport_netagent) {
1332 kern_nexus_update_netagents();
1333 } else if (!if_enable_fsw_transport_netagent) {
1334 necp_update_all_clients();
1335 }
1336 }
1337 #endif /* SKYWALK */
1338
1339 void
dlil_init(void)1340 dlil_init(void)
1341 {
1342 thread_t __single thread = THREAD_NULL;
1343
1344 dlil_main_input_thread = (struct dlil_threading_info *) &dlil_main_input_thread_info;
1345
1346 /*
1347 * The following fields must be 64-bit aligned for atomic operations.
1348 */
1349 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1350 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1351 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1352 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1353 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1354 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1355 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1356 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1357 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1358 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1359 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1360 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1361 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1362 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1363 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1364
1365 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1366 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1367 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1368 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1369 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1370 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1371 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1372 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1373 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1374 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1375 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1376 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1377 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1378 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1379 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1380
1381 /*
1382 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1383 */
1384 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1385 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1386 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1387 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1388 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1389 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1390 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1391 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1392 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1393 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1394 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1395 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1396 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1397 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1398
1399 /*
1400 * ... as well as the mbuf checksum flags counterparts.
1401 */
1402 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1403 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1404 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1405 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1406 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1407 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1408 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1409 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1410 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1411 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1412 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1413
1414 /*
1415 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1416 */
1417 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1418 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1419
1420 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1421 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1422 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1423 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1424
1425 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1426 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1427 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1428
1429 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1430 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1431 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1432 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1433 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1434 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1435 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1436 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1437 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1438 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1439 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1440 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1441 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1442 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1443 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1444 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1445 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1446 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1447
1448 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1449 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1450 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1451 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1452 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1453 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1454 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1455 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1456 _CASSERT(IFRTYPE_SUBFAMILY_VMNET == IFNET_SUBFAMILY_VMNET);
1457 _CASSERT(IFRTYPE_SUBFAMILY_SIMCELL == IFNET_SUBFAMILY_SIMCELL);
1458 _CASSERT(IFRTYPE_SUBFAMILY_MANAGEMENT == IFNET_SUBFAMILY_MANAGEMENT);
1459
1460 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1461 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1462
1463 PE_parse_boot_argn("net_affinity", &net_affinity,
1464 sizeof(net_affinity));
1465
1466 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1467
1468 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1469
1470 PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1471
1472 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1473
1474 PE_parse_boot_argn("if_link_heuristics", &if_link_heuristics_flags, sizeof(if_link_heuristics_flags));
1475
1476 VERIFY(dlil_pending_thread_cnt == 0);
1477 #if SKYWALK
1478 boolean_t pe_enable_fsw_transport_netagent = FALSE;
1479 boolean_t pe_disable_fsw_transport_netagent = FALSE;
1480 boolean_t enable_fsw_netagent =
1481 (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) ||
1482 (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
1483
1484 /*
1485 * Check the device tree to see if Skywalk netagent has been explicitly
1486 * enabled or disabled. This can be overridden via if_attach_nx below.
1487 * Note that the property is a 0-length key, and so checking for the
1488 * presence itself is enough (no need to check for the actual value of
1489 * the retrieved variable.)
1490 */
1491 pe_enable_fsw_transport_netagent =
1492 PE_get_default("kern.skywalk_netagent_enable",
1493 &pe_enable_fsw_transport_netagent,
1494 sizeof(pe_enable_fsw_transport_netagent));
1495 pe_disable_fsw_transport_netagent =
1496 PE_get_default("kern.skywalk_netagent_disable",
1497 &pe_disable_fsw_transport_netagent,
1498 sizeof(pe_disable_fsw_transport_netagent));
1499
1500 /*
1501 * These two are mutually exclusive, i.e. they both can be absent,
1502 * but only one can be present at a time, and so we assert to make
1503 * sure it is correct.
1504 */
1505 VERIFY((!pe_enable_fsw_transport_netagent &&
1506 !pe_disable_fsw_transport_netagent) ||
1507 (pe_enable_fsw_transport_netagent ^
1508 pe_disable_fsw_transport_netagent));
1509
1510 if (pe_enable_fsw_transport_netagent) {
1511 kprintf("SK: netagent is enabled via an override for "
1512 "this platform\n");
1513 if_attach_nx = SKYWALK_NETWORKING_ENABLED;
1514 } else if (pe_disable_fsw_transport_netagent) {
1515 kprintf("SK: netagent is disabled via an override for "
1516 "this platform\n");
1517 if_attach_nx = SKYWALK_NETWORKING_DISABLED;
1518 } else {
1519 kprintf("SK: netagent is %s by default for this platform\n",
1520 (enable_fsw_netagent ? "enabled" : "disabled"));
1521 if_attach_nx = IF_ATTACH_NX_DEFAULT;
1522 }
1523
1524 /*
1525 * Now see if there's a boot-arg override.
1526 */
1527 (void) PE_parse_boot_argn("if_attach_nx", &if_attach_nx,
1528 sizeof(if_attach_nx));
1529 if_enable_fsw_transport_netagent =
1530 ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
1531
1532 if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0);
1533
1534 if (pe_disable_fsw_transport_netagent &&
1535 if_enable_fsw_transport_netagent) {
1536 kprintf("SK: netagent is force-enabled\n");
1537 } else if (!pe_disable_fsw_transport_netagent &&
1538 !if_enable_fsw_transport_netagent) {
1539 kprintf("SK: netagent is force-disabled\n");
1540 }
1541 if (kernel_is_macos_or_server() && if_enable_fsw_transport_netagent) {
1542 net_filter_event_register(dlil_filter_event);
1543 }
1544
1545 #if (DEVELOPMENT || DEBUG)
1546 (void) PE_parse_boot_argn("fsw_use_max_mtu_buffer",
1547 &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer));
1548 #endif /* (DEVELOPMENT || DEBUG) */
1549
1550 #endif /* SKYWALK */
1551
1552 dlil_allocation_zones_init();
1553 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1554
1555 TAILQ_INIT(&dlil_ifnet_head);
1556 TAILQ_INIT(&ifnet_head);
1557 TAILQ_INIT(&ifnet_detaching_head);
1558 TAILQ_INIT(&ifnet_ordered_head);
1559
1560 /* Initialize interface address subsystem */
1561 ifa_init();
1562
1563 #if PF
1564 /* Initialize the packet filter */
1565 pfinit();
1566 #endif /* PF */
1567
1568 /* Initialize queue algorithms */
1569 classq_init();
1570
1571 /* Initialize packet schedulers */
1572 pktsched_init();
1573
1574 /* Initialize flow advisory subsystem */
1575 flowadv_init();
1576
1577 /* Initialize the pktap virtual interface */
1578 pktap_init();
1579
1580 /* Initialize droptap interface */
1581 droptap_init();
1582
1583 /* Initialize the service class to dscp map */
1584 net_qos_map_init();
1585
1586 /* Initialize the interface low power mode event handler */
1587 if_low_power_evhdlr_init();
1588
1589 /* Initialize the interface offload port list subsystem */
1590 if_ports_used_init();
1591
1592 #if DEBUG || DEVELOPMENT
1593 /* Run self-tests */
1594 dlil_verify_sum16();
1595 #endif /* DEBUG || DEVELOPMENT */
1596
1597 /*
1598 * Create and start up the main DLIL input thread and the interface
1599 * detacher threads once everything is initialized.
1600 */
1601 dlil_incr_pending_thread_count();
1602 (void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1603
1604 /*
1605 * Create ifnet detacher thread.
1606 * When an interface gets detached, part of the detach processing
1607 * is delayed. The interface is added to delayed detach list
1608 * and this thread is woken up to call ifnet_detach_final
1609 * on these interfaces.
1610 */
1611 dlil_incr_pending_thread_count();
1612 if (kernel_thread_start(ifnet_detacher_thread_func,
1613 NULL, &thread) != KERN_SUCCESS) {
1614 panic_plain("%s: couldn't create detacher thread", __func__);
1615 /* NOTREACHED */
1616 }
1617 thread_deallocate(thread);
1618
1619 /*
1620 * Wait for the created kernel threads for dlil to get
1621 * scheduled and run at least once before we proceed
1622 */
1623 lck_mtx_lock(&dlil_thread_sync_lock);
1624 while (dlil_pending_thread_cnt != 0) {
1625 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1626 "threads to get scheduled at least once.\n", __func__);
1627 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1628 (PZERO - 1), __func__, NULL);
1629 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1630 }
1631 lck_mtx_unlock(&dlil_thread_sync_lock);
1632 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1633 "scheduled at least once. Proceeding.\n", __func__);
1634 }
1635
1636 __private_extern__ int
dlil_attach_filter(struct ifnet * ifp,const struct iff_filter * if_filter,interface_filter_t * filter_ref,u_int32_t flags)1637 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1638 interface_filter_t *filter_ref, u_int32_t flags)
1639 {
1640 int retval = 0;
1641 struct ifnet_filter *filter = NULL;
1642
1643 ifnet_head_lock_shared();
1644
1645 /* Check that the interface is in the global list */
1646 if (!ifnet_lookup(ifp)) {
1647 retval = ENXIO;
1648 goto done;
1649 }
1650 if (!ifnet_is_attached(ifp, 1)) {
1651 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
1652 __func__, if_name(ifp));
1653 retval = ENXIO;
1654 goto done;
1655 }
1656
1657 filter = dlif_filt_alloc();
1658 /* refcnt held above during lookup */
1659 filter->filt_flags = flags;
1660 filter->filt_ifp = ifp;
1661 filter->filt_cookie = if_filter->iff_cookie;
1662 filter->filt_name = if_filter->iff_name;
1663 filter->filt_protocol = if_filter->iff_protocol;
1664 /*
1665 * Do not install filter callbacks for internal coproc interface
1666 * and for management interfaces
1667 */
1668 if (!IFNET_IS_INTCOPROC(ifp) && !IFNET_IS_MANAGEMENT(ifp)) {
1669 filter->filt_input = if_filter->iff_input;
1670 filter->filt_output = if_filter->iff_output;
1671 filter->filt_event = if_filter->iff_event;
1672 filter->filt_ioctl = if_filter->iff_ioctl;
1673 }
1674 filter->filt_detached = if_filter->iff_detached;
1675
1676 lck_mtx_lock(&ifp->if_flt_lock);
1677 if_flt_monitor_enter(ifp);
1678
1679 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1680 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1681
1682 *filter_ref = filter;
1683
1684 /*
1685 * Bump filter count and route_generation ID to let TCP
1686 * know it shouldn't do TSO on this connection
1687 */
1688 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1689 ifnet_filter_update_tso(ifp, TRUE);
1690 }
1691 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1692 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1693 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1694 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_os_count);
1695 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1696 } else {
1697 OSAddAtomic(1, &ifp->if_flt_non_os_count);
1698 }
1699 if_flt_monitor_leave(ifp);
1700 lck_mtx_unlock(&ifp->if_flt_lock);
1701
1702 #if SKYWALK
1703 if (kernel_is_macos_or_server()) {
1704 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1705 net_check_compatible_if_filter(NULL));
1706 }
1707 #endif /* SKYWALK */
1708
1709 if (dlil_verbose) {
1710 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1711 if_filter->iff_name);
1712 }
1713 ifnet_decr_iorefcnt(ifp);
1714
1715 done:
1716 ifnet_head_done();
1717 if (retval != 0 && ifp != NULL) {
1718 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1719 if_name(ifp), if_filter->iff_name, retval);
1720 }
1721 if (retval != 0 && filter != NULL) {
1722 dlif_filt_free(filter);
1723 }
1724
1725 return retval;
1726 }
1727
1728 static int
dlil_detach_filter_internal(interface_filter_t filter,int detached)1729 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1730 {
1731 int retval = 0;
1732
1733 if (detached == 0) {
1734 ifnet_ref_t ifp = NULL;
1735
1736 ifnet_head_lock_shared();
1737 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1738 interface_filter_t entry = NULL;
1739
1740 lck_mtx_lock(&ifp->if_flt_lock);
1741 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1742 if (entry != filter || entry->filt_skip) {
1743 continue;
1744 }
1745 /*
1746 * We've found a match; since it's possible
1747 * that the thread gets blocked in the monitor,
1748 * we do the lock dance. Interface should
1749 * not be detached since we still have a use
1750 * count held during filter attach.
1751 */
1752 entry->filt_skip = 1; /* skip input/output */
1753 lck_mtx_unlock(&ifp->if_flt_lock);
1754 ifnet_head_done();
1755
1756 lck_mtx_lock(&ifp->if_flt_lock);
1757 if_flt_monitor_enter(ifp);
1758 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1759 LCK_MTX_ASSERT_OWNED);
1760
1761 /* Remove the filter from the list */
1762 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1763 filt_next);
1764
1765 if (dlil_verbose) {
1766 DLIL_PRINTF("%s: %s filter detached\n",
1767 if_name(ifp), filter->filt_name);
1768 }
1769 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1770 VERIFY(ifp->if_flt_non_os_count != 0);
1771 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1772 }
1773 /*
1774 * Decrease filter count and route_generation
1775 * ID to let TCP know it should reevalute doing
1776 * TSO or not.
1777 */
1778 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1779 ifnet_filter_update_tso(ifp, FALSE);
1780 }
1781 /*
1782 * When we remove the bridge's interface filter,
1783 * clear the field in the ifnet.
1784 */
1785 if ((filter->filt_flags & DLIL_IFF_BRIDGE)
1786 != 0) {
1787 ifp->if_bridge = NULL;
1788 }
1789 if_flt_monitor_leave(ifp);
1790 lck_mtx_unlock(&ifp->if_flt_lock);
1791 goto destroy;
1792 }
1793 lck_mtx_unlock(&ifp->if_flt_lock);
1794 }
1795 ifnet_head_done();
1796
1797 /* filter parameter is not a valid filter ref */
1798 retval = EINVAL;
1799 goto done;
1800 } else {
1801 ifnet_ref_t ifp = filter->filt_ifp;
1802 /*
1803 * Here we are called from ifnet_detach_final(); the
1804 * caller had emptied if_flt_head and we're doing an
1805 * implicit filter detach because the interface is
1806 * about to go away. Make sure to adjust the counters
1807 * in this case. We don't need the protection of the
1808 * filter monitor since we're called as part of the
1809 * final detach in the context of the detacher thread.
1810 */
1811 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1812 VERIFY(ifp->if_flt_non_os_count != 0);
1813 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1814 }
1815 /*
1816 * Decrease filter count and route_generation
1817 * ID to let TCP know it should reevalute doing
1818 * TSO or not.
1819 */
1820 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1821 ifnet_filter_update_tso(ifp, FALSE);
1822 }
1823 }
1824
1825 if (dlil_verbose) {
1826 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
1827 }
1828
1829 destroy:
1830
1831 /* Call the detached function if there is one */
1832 if (filter->filt_detached) {
1833 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1834 }
1835
1836 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1837 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1838 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0);
1839 }
1840 #if SKYWALK
1841 if (kernel_is_macos_or_server()) {
1842 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1843 net_check_compatible_if_filter(NULL));
1844 }
1845 #endif /* SKYWALK */
1846
1847 /* Free the filter */
1848 dlif_filt_free(filter);
1849 filter = NULL;
1850 done:
1851 if (retval != 0 && filter != NULL) {
1852 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1853 filter->filt_name, retval);
1854 }
1855
1856 return retval;
1857 }
1858
1859 __private_extern__ void
dlil_detach_filter(interface_filter_t filter)1860 dlil_detach_filter(interface_filter_t filter)
1861 {
1862 if (filter == NULL) {
1863 return;
1864 }
1865 dlil_detach_filter_internal(filter, 0);
1866 }
1867
1868 __private_extern__ boolean_t
dlil_has_ip_filter(void)1869 dlil_has_ip_filter(void)
1870 {
1871 boolean_t has_filter = ((net_api_stats.nas_ipf_add_count - net_api_stats.nas_ipf_add_os_count) > 0);
1872
1873 VERIFY(net_api_stats.nas_ipf_add_count >= net_api_stats.nas_ipf_add_os_count);
1874
1875 DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
1876 return has_filter;
1877 }
1878
1879 __private_extern__ boolean_t
dlil_has_if_filter(struct ifnet * ifp)1880 dlil_has_if_filter(struct ifnet *ifp)
1881 {
1882 boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
1883 DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
1884 return has_filter;
1885 }
1886
1887 errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params * p)1888 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
1889 {
1890 if (p != NULL) {
1891 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
1892 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
1893 return EINVAL;
1894 }
1895 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
1896 p->packets_lowat >= p->packets_hiwat) {
1897 return EINVAL;
1898 }
1899 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
1900 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
1901 return EINVAL;
1902 }
1903 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
1904 p->bytes_lowat >= p->bytes_hiwat) {
1905 return EINVAL;
1906 }
1907 if (p->interval_time != 0 &&
1908 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
1909 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
1910 }
1911 }
1912 return 0;
1913 }
1914
1915 void
dlil_rxpoll_update_params(struct ifnet * ifp,struct ifnet_poll_params * p)1916 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
1917 {
1918 u_int64_t sample_holdtime, inbw;
1919
1920 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
1921 sample_holdtime = 0; /* polling is disabled */
1922 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
1923 ifp->if_rxpoll_blowat = 0;
1924 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
1925 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
1926 ifp->if_rxpoll_plim = 0;
1927 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
1928 } else {
1929 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
1930 u_int64_t ival;
1931 unsigned int n, i;
1932
1933 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
1934 if (inbw < rxpoll_tbl[i].speed) {
1935 break;
1936 }
1937 n = i;
1938 }
1939 /* auto-tune if caller didn't specify a value */
1940 plowat = ((p == NULL || p->packets_lowat == 0) ?
1941 rxpoll_tbl[n].plowat : p->packets_lowat);
1942 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
1943 rxpoll_tbl[n].phiwat : p->packets_hiwat);
1944 blowat = ((p == NULL || p->bytes_lowat == 0) ?
1945 rxpoll_tbl[n].blowat : p->bytes_lowat);
1946 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
1947 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
1948 plim = ((p == NULL || p->packets_limit == 0 ||
1949 if_rxpoll_max != 0) ? if_rxpoll_max : p->packets_limit);
1950 ival = ((p == NULL || p->interval_time == 0 ||
1951 if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ?
1952 if_rxpoll_interval_time : p->interval_time);
1953
1954 VERIFY(plowat != 0 && phiwat != 0);
1955 VERIFY(blowat != 0 && bhiwat != 0);
1956 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
1957
1958 sample_holdtime = if_rxpoll_sample_holdtime;
1959 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
1960 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
1961 ifp->if_rxpoll_plowat = plowat;
1962 ifp->if_rxpoll_phiwat = phiwat;
1963 ifp->if_rxpoll_blowat = blowat;
1964 ifp->if_rxpoll_bhiwat = bhiwat;
1965 ifp->if_rxpoll_plim = plim;
1966 ifp->if_rxpoll_ival = ival;
1967 }
1968
1969 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
1970 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
1971
1972 if (dlil_verbose) {
1973 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
1974 "poll interval %llu nsec, pkts per poll %u, "
1975 "pkt limits [%u/%u], wreq limits [%u/%u], "
1976 "bytes limits [%u/%u]\n", if_name(ifp),
1977 inbw, sample_holdtime, ifp->if_rxpoll_ival,
1978 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
1979 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
1980 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
1981 ifp->if_rxpoll_bhiwat);
1982 }
1983 }
1984
1985 /*
1986 * Must be called on an attached ifnet (caller is expected to check.)
1987 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
1988 */
1989 errno_t
dlil_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)1990 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
1991 boolean_t locked)
1992 {
1993 errno_t err;
1994 struct dlil_threading_info *inp;
1995
1996 VERIFY(ifp != NULL);
1997 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
1998 return ENXIO;
1999 }
2000 err = dlil_rxpoll_validate_params(p);
2001 if (err != 0) {
2002 return err;
2003 }
2004
2005 if (!locked) {
2006 lck_mtx_lock(&inp->dlth_lock);
2007 }
2008 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2009 /*
2010 * Normally, we'd reset the parameters to the auto-tuned values
2011 * if the the input thread detects a change in link rate. If the
2012 * driver provides its own parameters right after a link rate
2013 * changes, but before the input thread gets to run, we want to
2014 * make sure to keep the driver's values. Clearing if_poll_update
2015 * will achieve that.
2016 */
2017 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2018 ifp->if_poll_update = 0;
2019 }
2020 dlil_rxpoll_update_params(ifp, p);
2021 if (!locked) {
2022 lck_mtx_unlock(&inp->dlth_lock);
2023 }
2024 return 0;
2025 }
2026
2027 /*
2028 * Must be called on an attached ifnet (caller is expected to check.)
2029 */
2030 errno_t
dlil_rxpoll_get_params(struct ifnet * ifp,struct ifnet_poll_params * p)2031 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2032 {
2033 struct dlil_threading_info *inp;
2034
2035 VERIFY(ifp != NULL && p != NULL);
2036 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2037 return ENXIO;
2038 }
2039
2040 bzero(p, sizeof(*p));
2041
2042 lck_mtx_lock(&inp->dlth_lock);
2043 p->packets_limit = ifp->if_rxpoll_plim;
2044 p->packets_lowat = ifp->if_rxpoll_plowat;
2045 p->packets_hiwat = ifp->if_rxpoll_phiwat;
2046 p->bytes_lowat = ifp->if_rxpoll_blowat;
2047 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2048 p->interval_time = ifp->if_rxpoll_ival;
2049 lck_mtx_unlock(&inp->dlth_lock);
2050
2051 return 0;
2052 }
2053
2054 errno_t
ifnet_input(struct ifnet * ifp,struct mbuf * m_head,const struct ifnet_stat_increment_param * s)2055 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2056 const struct ifnet_stat_increment_param *s)
2057 {
2058 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2059 }
2060
2061 errno_t
ifnet_input_extended(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2062 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2063 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2064 {
2065 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2066 }
2067
2068 errno_t
ifnet_input_poll(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2069 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2070 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2071 {
2072 return ifnet_input_common(ifp, m_head, m_tail, s,
2073 (m_head != NULL), TRUE);
2074 }
2075
2076 static errno_t
ifnet_input_common(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t ext,boolean_t poll)2077 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2078 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2079 {
2080 dlil_input_func input_func;
2081 struct ifnet_stat_increment_param _s;
2082 u_int32_t m_cnt = 0, m_size = 0;
2083 struct mbuf *last;
2084 errno_t err = 0;
2085
2086 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2087 if (m_head != NULL) {
2088 mbuf_freem_list(m_head);
2089 }
2090 return EINVAL;
2091 }
2092
2093 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2094 VERIFY(m_tail == NULL || ext);
2095 VERIFY(s != NULL || !ext);
2096
2097 /*
2098 * Drop the packet(s) if the parameters are invalid, or if the
2099 * interface is no longer attached; else hold an IO refcnt to
2100 * prevent it from being detached (will be released below.)
2101 */
2102 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2103 if (m_head != NULL) {
2104 mbuf_freem_list(m_head);
2105 }
2106 return EINVAL;
2107 }
2108
2109 input_func = ifp->if_input_dlil;
2110 VERIFY(input_func != NULL);
2111
2112 if (m_tail == NULL) {
2113 last = m_head;
2114 while (m_head != NULL) {
2115 m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2116 #if IFNET_INPUT_SANITY_CHK
2117 if (__improbable(dlil_input_sanity_check != 0)) {
2118 DLIL_INPUT_CHECK(last, ifp);
2119 }
2120 #endif /* IFNET_INPUT_SANITY_CHK */
2121 m_cnt++;
2122 m_size += m_length(last);
2123 if (mbuf_nextpkt(last) == NULL) {
2124 break;
2125 }
2126 last = mbuf_nextpkt(last);
2127 }
2128 m_tail = last;
2129 } else {
2130 #if IFNET_INPUT_SANITY_CHK
2131 if (__improbable(dlil_input_sanity_check != 0)) {
2132 last = m_head;
2133 while (1) {
2134 m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2135 DLIL_INPUT_CHECK(last, ifp);
2136 m_cnt++;
2137 m_size += m_length(last);
2138 if (mbuf_nextpkt(last) == NULL) {
2139 break;
2140 }
2141 last = mbuf_nextpkt(last);
2142 }
2143 } else {
2144 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2145 m_cnt = s->packets_in;
2146 m_size = s->bytes_in;
2147 last = m_tail;
2148 }
2149 #else
2150 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2151 m_cnt = s->packets_in;
2152 m_size = s->bytes_in;
2153 last = m_tail;
2154 #endif /* IFNET_INPUT_SANITY_CHK */
2155 }
2156
2157 if (last != m_tail) {
2158 panic_plain("%s: invalid input packet chain for %s, "
2159 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2160 m_tail, last);
2161 }
2162
2163 /*
2164 * Assert packet count only for the extended variant, for backwards
2165 * compatibility, since this came directly from the device driver.
2166 * Relax this assertion for input bytes, as the driver may have
2167 * included the link-layer headers in the computation; hence
2168 * m_size is just an approximation.
2169 */
2170 if (ext && s->packets_in != m_cnt) {
2171 panic_plain("%s: input packet count mismatch for %s, "
2172 "%d instead of %d\n", __func__, if_name(ifp),
2173 s->packets_in, m_cnt);
2174 }
2175
2176 if (s == NULL) {
2177 bzero(&_s, sizeof(_s));
2178 s = &_s;
2179 } else {
2180 _s = *s;
2181 }
2182 _s.packets_in = m_cnt;
2183 _s.bytes_in = m_size;
2184
2185 if (ifp->if_xflags & IFXF_DISABLE_INPUT) {
2186 m_freem_list(m_head);
2187
2188 os_atomic_add(&ifp->if_data.ifi_ipackets, _s.packets_in, relaxed);
2189 os_atomic_add(&ifp->if_data.ifi_ibytes, _s.bytes_in, relaxed);
2190
2191 goto done;
2192 }
2193
2194 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2195
2196 done:
2197 if (ifp != lo_ifp) {
2198 /* Release the IO refcnt */
2199 ifnet_datamov_end(ifp);
2200 }
2201
2202 return err;
2203 }
2204
2205
2206 static void
ifnet_start_common(struct ifnet * ifp,boolean_t resetfc,boolean_t ignore_delay)2207 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc, boolean_t ignore_delay)
2208 {
2209 if (!(ifp->if_eflags & IFEF_TXSTART)) {
2210 return;
2211 }
2212 /*
2213 * If the starter thread is inactive, signal it to do work,
2214 * unless the interface is being flow controlled from below,
2215 * e.g. a virtual interface being flow controlled by a real
2216 * network interface beneath it, or it's been disabled via
2217 * a call to ifnet_disable_output().
2218 */
2219 lck_mtx_lock_spin(&ifp->if_start_lock);
2220 if (ignore_delay) {
2221 ifp->if_start_flags |= IFSF_NO_DELAY;
2222 }
2223 if (resetfc) {
2224 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2225 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2226 lck_mtx_unlock(&ifp->if_start_lock);
2227 return;
2228 }
2229 ifp->if_start_req++;
2230 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2231 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2232 IFCQ_LEN(ifp->if_snd) >= ifp->if_start_delay_qlen ||
2233 ifp->if_start_delayed == 0)) {
2234 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
2235 }
2236 lck_mtx_unlock(&ifp->if_start_lock);
2237 }
2238
2239 void
ifnet_start(struct ifnet * ifp)2240 ifnet_start(struct ifnet *ifp)
2241 {
2242 ifnet_start_common(ifp, FALSE, FALSE);
2243 }
2244
2245 void
ifnet_start_ignore_delay(struct ifnet * ifp)2246 ifnet_start_ignore_delay(struct ifnet *ifp)
2247 {
2248 ifnet_start_common(ifp, FALSE, TRUE);
2249 }
2250
2251 __attribute__((noreturn))
2252 static void
ifnet_start_thread_func(void * v,wait_result_t w)2253 ifnet_start_thread_func(void *v, wait_result_t w)
2254 {
2255 #pragma unused(w)
2256 ifnet_ref_t ifp = v;
2257 char thread_name[MAXTHREADNAMESIZE];
2258
2259 /* Construct the name for this thread, and then apply it. */
2260 bzero(thread_name, sizeof(thread_name));
2261 (void) snprintf(thread_name, sizeof(thread_name),
2262 "ifnet_start_%s", ifp->if_xname);
2263 #if SKYWALK
2264 /* override name for native Skywalk interface */
2265 if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
2266 (void) snprintf(thread_name, sizeof(thread_name),
2267 "skywalk_doorbell_%s_tx", ifp->if_xname);
2268 }
2269 #endif /* SKYWALK */
2270 ASSERT(ifp->if_start_thread == current_thread());
2271 thread_set_thread_name(current_thread(), __unsafe_null_terminated_from_indexable(thread_name));
2272
2273 /*
2274 * Treat the dedicated starter thread for lo0 as equivalent to
2275 * the driver workloop thread; if net_affinity is enabled for
2276 * the main input thread, associate this starter thread to it
2277 * by binding them with the same affinity tag. This is done
2278 * only once (as we only have one lo_ifp which never goes away.)
2279 */
2280 if (ifp == lo_ifp) {
2281 struct dlil_threading_info *inp = dlil_main_input_thread;
2282 struct thread *__single tp = current_thread();
2283 #if SKYWALK
2284 /* native skywalk loopback not yet implemented */
2285 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2286 #endif /* SKYWALK */
2287
2288 lck_mtx_lock(&inp->dlth_lock);
2289 if (inp->dlth_affinity) {
2290 u_int32_t tag = inp->dlth_affinity_tag;
2291
2292 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
2293 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2294 inp->dlth_driver_thread = tp;
2295 lck_mtx_unlock(&inp->dlth_lock);
2296
2297 /* Associate this thread with the affinity tag */
2298 (void) dlil_affinity_set(tp, tag);
2299 } else {
2300 lck_mtx_unlock(&inp->dlth_lock);
2301 }
2302 }
2303
2304 lck_mtx_lock(&ifp->if_start_lock);
2305 VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
2306 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
2307 ifp->if_start_embryonic = 1;
2308 /* wake up once to get out of embryonic state */
2309 ifp->if_start_req++;
2310 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
2311 lck_mtx_unlock(&ifp->if_start_lock);
2312 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2313 /* NOTREACHED */
2314 __builtin_unreachable();
2315 }
2316
2317 __attribute__((noreturn))
2318 static void
ifnet_start_thread_cont(void * v,wait_result_t wres)2319 ifnet_start_thread_cont(void *v, wait_result_t wres)
2320 {
2321 ifnet_ref_t ifp = v;
2322 struct ifclassq *ifq = ifp->if_snd;
2323
2324 lck_mtx_lock_spin(&ifp->if_start_lock);
2325 if (__improbable(wres == THREAD_INTERRUPTED ||
2326 (ifp->if_start_flags & IFSF_TERMINATING) != 0)) {
2327 goto terminate;
2328 }
2329
2330 if (__improbable(ifp->if_start_embryonic)) {
2331 ifp->if_start_embryonic = 0;
2332 lck_mtx_unlock(&ifp->if_start_lock);
2333 ifnet_decr_pending_thread_count(ifp);
2334 lck_mtx_lock_spin(&ifp->if_start_lock);
2335 goto skip;
2336 }
2337
2338 ifp->if_start_active = 1;
2339
2340 /*
2341 * Keep on servicing until no more request.
2342 */
2343 for (;;) {
2344 u_int32_t req = ifp->if_start_req;
2345 if ((ifp->if_start_flags & IFSF_NO_DELAY) == 0 &&
2346 !IFCQ_IS_EMPTY(ifq) &&
2347 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2348 ifp->if_start_delayed == 0 &&
2349 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2350 (ifp->if_eflags & IFEF_DELAY_START)) {
2351 ifp->if_start_delayed = 1;
2352 ifnet_start_delayed++;
2353 break;
2354 }
2355 ifp->if_start_flags &= ~IFSF_NO_DELAY;
2356 ifp->if_start_delayed = 0;
2357 lck_mtx_unlock(&ifp->if_start_lock);
2358
2359 /*
2360 * If no longer attached, don't call start because ifp
2361 * is being destroyed; else hold an IO refcnt to
2362 * prevent the interface from being detached (will be
2363 * released below.)
2364 */
2365 if (!ifnet_datamov_begin(ifp)) {
2366 lck_mtx_lock_spin(&ifp->if_start_lock);
2367 break;
2368 }
2369
2370 /* invoke the driver's start routine */
2371 ((*ifp->if_start)(ifp));
2372
2373 /*
2374 * Release the io ref count taken above.
2375 */
2376 ifnet_datamov_end(ifp);
2377
2378 lck_mtx_lock_spin(&ifp->if_start_lock);
2379
2380 /*
2381 * If there's no pending request or if the
2382 * interface has been disabled, we're done.
2383 */
2384 #define _IFSF_DISABLED (IFSF_FLOW_CONTROLLED | IFSF_TERMINATING)
2385 if (req == ifp->if_start_req ||
2386 (ifp->if_start_flags & _IFSF_DISABLED) != 0) {
2387 break;
2388 }
2389 }
2390 skip:
2391 ifp->if_start_req = 0;
2392 ifp->if_start_active = 0;
2393
2394 #if SKYWALK
2395 /*
2396 * Wakeup any waiters, e.g. any threads waiting to
2397 * detach the interface from the flowswitch, etc.
2398 */
2399 if (ifp->if_start_waiters != 0) {
2400 ifp->if_start_waiters = 0;
2401 wakeup(&ifp->if_start_waiters);
2402 }
2403 #endif /* SKYWALK */
2404 if (__probable((ifp->if_start_flags & IFSF_TERMINATING) == 0)) {
2405 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2406 struct timespec delay_start_ts;
2407 struct timespec *ts = NULL;
2408
2409 if (ts == NULL) {
2410 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2411 &ifp->if_start_cycle : NULL);
2412 }
2413
2414 if (ts == NULL && ifp->if_start_delayed == 1) {
2415 delay_start_ts.tv_sec = 0;
2416 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2417 ts = &delay_start_ts;
2418 }
2419
2420 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
2421 ts = NULL;
2422 }
2423
2424 if (__improbable(ts != NULL)) {
2425 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2426 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2427 }
2428
2429 (void) assert_wait_deadline(&ifp->if_start_thread,
2430 THREAD_UNINT, deadline);
2431 lck_mtx_unlock(&ifp->if_start_lock);
2432 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2433 /* NOTREACHED */
2434 } else {
2435 terminate:
2436 /* interface is detached? */
2437 ifnet_set_start_cycle(ifp, NULL);
2438
2439 /* clear if_start_thread to allow termination to continue */
2440 ASSERT(ifp->if_start_thread != THREAD_NULL);
2441 ifp->if_start_thread = THREAD_NULL;
2442 wakeup((caddr_t)&ifp->if_start_thread);
2443 lck_mtx_unlock(&ifp->if_start_lock);
2444
2445 if (dlil_verbose) {
2446 DLIL_PRINTF("%s: starter thread terminated\n",
2447 if_name(ifp));
2448 }
2449
2450 /* for the extra refcnt from kernel_thread_start() */
2451 thread_deallocate(current_thread());
2452 /* this is the end */
2453 thread_terminate(current_thread());
2454 /* NOTREACHED */
2455 }
2456
2457 /* must never get here */
2458 VERIFY(0);
2459 /* NOTREACHED */
2460 __builtin_unreachable();
2461 }
2462
2463 void
ifnet_set_start_cycle(struct ifnet * ifp,struct timespec * ts)2464 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2465 {
2466 if (ts == NULL) {
2467 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
2468 } else {
2469 *(&ifp->if_start_cycle) = *ts;
2470 }
2471
2472 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2473 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
2474 if_name(ifp), ts->tv_nsec);
2475 }
2476 }
2477
2478 static inline void
ifnet_poll_wakeup(struct ifnet * ifp)2479 ifnet_poll_wakeup(struct ifnet *ifp)
2480 {
2481 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
2482
2483 ifp->if_poll_req++;
2484 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
2485 ifp->if_poll_thread != THREAD_NULL) {
2486 wakeup_one((caddr_t)&ifp->if_poll_thread);
2487 }
2488 }
2489
2490 void
ifnet_poll(struct ifnet * ifp)2491 ifnet_poll(struct ifnet *ifp)
2492 {
2493 /*
2494 * If the poller thread is inactive, signal it to do work.
2495 */
2496 lck_mtx_lock_spin(&ifp->if_poll_lock);
2497 ifnet_poll_wakeup(ifp);
2498 lck_mtx_unlock(&ifp->if_poll_lock);
2499 }
2500
2501 __attribute__((noreturn))
2502 static void
ifnet_poll_thread_func(void * v,wait_result_t w)2503 ifnet_poll_thread_func(void *v, wait_result_t w)
2504 {
2505 #pragma unused(w)
2506 char thread_name[MAXTHREADNAMESIZE];
2507 ifnet_ref_t ifp = v;
2508
2509 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2510 VERIFY(current_thread() == ifp->if_poll_thread);
2511
2512 /* construct the name for this thread, and then apply it */
2513 bzero(thread_name, sizeof(thread_name));
2514 (void) snprintf(thread_name, sizeof(thread_name),
2515 "ifnet_poller_%s", ifp->if_xname);
2516 thread_set_thread_name(ifp->if_poll_thread, __unsafe_null_terminated_from_indexable(thread_name));
2517
2518 lck_mtx_lock(&ifp->if_poll_lock);
2519 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
2520 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
2521 ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
2522 /* wake up once to get out of embryonic state */
2523 ifnet_poll_wakeup(ifp);
2524 lck_mtx_unlock(&ifp->if_poll_lock);
2525 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2526 /* NOTREACHED */
2527 __builtin_unreachable();
2528 }
2529
2530 __attribute__((noreturn))
2531 static void
ifnet_poll_thread_cont(void * v,wait_result_t wres)2532 ifnet_poll_thread_cont(void *v, wait_result_t wres)
2533 {
2534 struct dlil_threading_info *inp;
2535 ifnet_ref_t ifp = v;
2536 struct ifnet_stat_increment_param s;
2537 struct timespec start_time;
2538
2539 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2540
2541 bzero(&s, sizeof(s));
2542 net_timerclear(&start_time);
2543
2544 lck_mtx_lock_spin(&ifp->if_poll_lock);
2545 if (__improbable(wres == THREAD_INTERRUPTED ||
2546 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
2547 goto terminate;
2548 }
2549
2550 inp = ifp->if_inp;
2551 VERIFY(inp != NULL);
2552
2553 if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
2554 ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
2555 lck_mtx_unlock(&ifp->if_poll_lock);
2556 ifnet_decr_pending_thread_count(ifp);
2557 lck_mtx_lock_spin(&ifp->if_poll_lock);
2558 goto skip;
2559 }
2560
2561 ifp->if_poll_flags |= IF_POLLF_RUNNING;
2562
2563 /*
2564 * Keep on servicing until no more request.
2565 */
2566 for (;;) {
2567 mbuf_ref_t m_head, m_tail;
2568 u_int32_t m_lim, m_cnt, m_totlen;
2569 u_int16_t req = ifp->if_poll_req;
2570
2571 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
2572 MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
2573 lck_mtx_unlock(&ifp->if_poll_lock);
2574
2575 /*
2576 * If no longer attached, there's nothing to do;
2577 * else hold an IO refcnt to prevent the interface
2578 * from being detached (will be released below.)
2579 */
2580 if (!ifnet_is_attached(ifp, 1)) {
2581 lck_mtx_lock_spin(&ifp->if_poll_lock);
2582 break;
2583 }
2584
2585 if (dlil_verbose > 1) {
2586 DLIL_PRINTF("%s: polling up to %d pkts, "
2587 "pkts avg %d max %d, wreq avg %d, "
2588 "bytes avg %d\n",
2589 if_name(ifp), m_lim,
2590 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2591 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2592 }
2593
2594 /* invoke the driver's input poll routine */
2595 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2596 &m_cnt, &m_totlen));
2597
2598 if (m_head != NULL) {
2599 VERIFY(m_tail != NULL && m_cnt > 0);
2600
2601 if (dlil_verbose > 1) {
2602 DLIL_PRINTF("%s: polled %d pkts, "
2603 "pkts avg %d max %d, wreq avg %d, "
2604 "bytes avg %d\n",
2605 if_name(ifp), m_cnt,
2606 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2607 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2608 }
2609
2610 /* stats are required for extended variant */
2611 s.packets_in = m_cnt;
2612 s.bytes_in = m_totlen;
2613
2614 (void) ifnet_input_common(ifp, m_head, m_tail,
2615 &s, TRUE, TRUE);
2616 } else {
2617 if (dlil_verbose > 1) {
2618 DLIL_PRINTF("%s: no packets, "
2619 "pkts avg %d max %d, wreq avg %d, "
2620 "bytes avg %d\n",
2621 if_name(ifp), ifp->if_rxpoll_pavg,
2622 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
2623 ifp->if_rxpoll_bavg);
2624 }
2625
2626 (void) ifnet_input_common(ifp, NULL, NULL,
2627 NULL, FALSE, TRUE);
2628 }
2629
2630 /* Release the io ref count */
2631 ifnet_decr_iorefcnt(ifp);
2632
2633 lck_mtx_lock_spin(&ifp->if_poll_lock);
2634
2635 /* if there's no pending request, we're done */
2636 if (req == ifp->if_poll_req ||
2637 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0) {
2638 break;
2639 }
2640 }
2641 skip:
2642 ifp->if_poll_req = 0;
2643 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
2644
2645 if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
2646 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2647 struct timespec *ts;
2648
2649 /*
2650 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2651 * until ifnet_poll() is called again.
2652 */
2653 ts = &ifp->if_poll_cycle;
2654 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
2655 ts = NULL;
2656 }
2657
2658 if (ts != NULL) {
2659 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2660 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2661 }
2662
2663 (void) assert_wait_deadline(&ifp->if_poll_thread,
2664 THREAD_UNINT, deadline);
2665 lck_mtx_unlock(&ifp->if_poll_lock);
2666 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2667 /* NOTREACHED */
2668 } else {
2669 terminate:
2670 /* interface is detached (maybe while asleep)? */
2671 ifnet_set_poll_cycle(ifp, NULL);
2672
2673 /* clear if_poll_thread to allow termination to continue */
2674 ASSERT(ifp->if_poll_thread != THREAD_NULL);
2675 ifp->if_poll_thread = THREAD_NULL;
2676 wakeup((caddr_t)&ifp->if_poll_thread);
2677 lck_mtx_unlock(&ifp->if_poll_lock);
2678
2679 if (dlil_verbose) {
2680 DLIL_PRINTF("%s: poller thread terminated\n",
2681 if_name(ifp));
2682 }
2683
2684 /* for the extra refcnt from kernel_thread_start() */
2685 thread_deallocate(current_thread());
2686 /* this is the end */
2687 thread_terminate(current_thread());
2688 /* NOTREACHED */
2689 }
2690
2691 /* must never get here */
2692 VERIFY(0);
2693 /* NOTREACHED */
2694 __builtin_unreachable();
2695 }
2696
2697 void
ifnet_set_poll_cycle(struct ifnet * ifp,struct timespec * ts)2698 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2699 {
2700 if (ts == NULL) {
2701 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
2702 } else {
2703 *(&ifp->if_poll_cycle) = *ts;
2704 }
2705
2706 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2707 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
2708 if_name(ifp), ts->tv_nsec);
2709 }
2710 }
2711
2712 void
ifnet_purge(struct ifnet * ifp)2713 ifnet_purge(struct ifnet *ifp)
2714 {
2715 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
2716 if_qflush_snd(ifp, false);
2717 }
2718 }
2719
2720 void
ifnet_update_sndq(struct ifclassq * ifq,cqev_t ev)2721 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2722 {
2723 IFCQ_LOCK_ASSERT_HELD(ifq);
2724
2725 if (!(IFCQ_IS_READY(ifq))) {
2726 return;
2727 }
2728
2729 if (IFCQ_TBR_IS_ENABLED(ifq)) {
2730 struct tb_profile tb = {
2731 .rate = ifq->ifcq_tbr.tbr_rate_raw,
2732 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
2733 };
2734 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
2735 }
2736
2737 ifclassq_update(ifq, ev);
2738 }
2739
2740 void
ifnet_update_rcv(struct ifnet * ifp,cqev_t ev)2741 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2742 {
2743 switch (ev) {
2744 case CLASSQ_EV_LINK_BANDWIDTH:
2745 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
2746 ifp->if_poll_update++;
2747 }
2748 break;
2749
2750 default:
2751 break;
2752 }
2753 }
2754
2755 errno_t
ifnet_set_output_sched_model(struct ifnet * ifp,u_int32_t model)2756 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2757 {
2758 struct ifclassq *ifq;
2759 u_int32_t omodel;
2760 errno_t err;
2761
2762 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
2763 return EINVAL;
2764 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2765 return ENXIO;
2766 }
2767
2768 ifq = ifp->if_snd;
2769 IFCQ_LOCK(ifq);
2770 omodel = ifp->if_output_sched_model;
2771 ifp->if_output_sched_model = model;
2772 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
2773 ifp->if_output_sched_model = omodel;
2774 }
2775 IFCQ_UNLOCK(ifq);
2776
2777 return err;
2778 }
2779
2780 errno_t
ifnet_set_sndq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2781 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2782 {
2783 if (ifp == NULL) {
2784 return EINVAL;
2785 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2786 return ENXIO;
2787 }
2788
2789 ifclassq_set_maxlen(ifp->if_snd, maxqlen);
2790
2791 return 0;
2792 }
2793
2794 errno_t
ifnet_get_sndq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2795 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2796 {
2797 if (ifp == NULL || maxqlen == NULL) {
2798 return EINVAL;
2799 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2800 return ENXIO;
2801 }
2802
2803 *maxqlen = ifclassq_get_maxlen(ifp->if_snd);
2804
2805 return 0;
2806 }
2807
2808 errno_t
ifnet_get_sndq_len(struct ifnet * ifp,u_int32_t * pkts)2809 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
2810 {
2811 errno_t err;
2812
2813 if (ifp == NULL || pkts == NULL) {
2814 err = EINVAL;
2815 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2816 err = ENXIO;
2817 } else {
2818 err = ifclassq_get_len(ifp->if_snd, MBUF_SC_UNSPEC,
2819 IF_CLASSQ_ALL_GRPS, pkts, NULL);
2820 }
2821
2822 return err;
2823 }
2824
2825 errno_t
ifnet_get_service_class_sndq_len(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t * pkts,u_int32_t * bytes)2826 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2827 u_int32_t *pkts, u_int32_t *bytes)
2828 {
2829 errno_t err;
2830
2831 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2832 (pkts == NULL && bytes == NULL)) {
2833 err = EINVAL;
2834 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2835 err = ENXIO;
2836 } else {
2837 err = ifclassq_get_len(ifp->if_snd, sc, IF_CLASSQ_ALL_GRPS,
2838 pkts, bytes);
2839 }
2840
2841 return err;
2842 }
2843
2844 errno_t
ifnet_set_rcvq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2845 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2846 {
2847 struct dlil_threading_info *inp;
2848
2849 if (ifp == NULL) {
2850 return EINVAL;
2851 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2852 return ENXIO;
2853 }
2854
2855 if (maxqlen == 0) {
2856 maxqlen = if_rcvq_maxlen;
2857 } else if (maxqlen < IF_RCVQ_MINLEN) {
2858 maxqlen = IF_RCVQ_MINLEN;
2859 }
2860
2861 inp = ifp->if_inp;
2862 lck_mtx_lock(&inp->dlth_lock);
2863 qlimit(&inp->dlth_pkts) = maxqlen;
2864 lck_mtx_unlock(&inp->dlth_lock);
2865
2866 return 0;
2867 }
2868
2869 errno_t
ifnet_get_rcvq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2870 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2871 {
2872 struct dlil_threading_info *inp;
2873
2874 if (ifp == NULL || maxqlen == NULL) {
2875 return EINVAL;
2876 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2877 return ENXIO;
2878 }
2879
2880 inp = ifp->if_inp;
2881 lck_mtx_lock(&inp->dlth_lock);
2882 *maxqlen = qlimit(&inp->dlth_pkts);
2883 lck_mtx_unlock(&inp->dlth_lock);
2884 return 0;
2885 }
2886
2887 void
ifnet_enqueue_multi_setup(struct ifnet * ifp,uint16_t delay_qlen,uint16_t delay_timeout)2888 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
2889 uint16_t delay_timeout)
2890 {
2891 if (delay_qlen > 0 && delay_timeout > 0) {
2892 if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
2893 ifp->if_start_delay_qlen = MIN(100, delay_qlen);
2894 ifp->if_start_delay_timeout = min(20000, delay_timeout);
2895 /* convert timeout to nanoseconds */
2896 ifp->if_start_delay_timeout *= 1000;
2897 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
2898 ifp->if_xname, (uint32_t)delay_qlen,
2899 (uint32_t)delay_timeout);
2900 } else {
2901 if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
2902 }
2903 }
2904
2905 /*
2906 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
2907 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
2908 * buf holds the full header.
2909 */
2910 static __attribute__((noinline)) void
ifnet_mcast_clear_dscp(uint8_t * __indexable buf,uint8_t ip_ver)2911 ifnet_mcast_clear_dscp(uint8_t *__indexable buf, uint8_t ip_ver)
2912 {
2913 struct ip *ip;
2914 struct ip6_hdr *ip6;
2915 uint8_t lbuf[64] __attribute__((aligned(8)));
2916 uint8_t *p = buf;
2917
2918 if (ip_ver == IPVERSION) {
2919 uint8_t old_tos;
2920 uint32_t sum;
2921
2922 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2923 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
2924 bcopy(buf, lbuf, sizeof(struct ip));
2925 p = lbuf;
2926 }
2927 ip = (struct ip *)(void *)p;
2928 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
2929 return;
2930 }
2931
2932 DTRACE_IP1(clear__v4, struct ip *, ip);
2933 old_tos = ip->ip_tos;
2934 ip->ip_tos &= IPTOS_ECN_MASK;
2935 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
2936 sum = (sum >> 16) + (sum & 0xffff);
2937 ip->ip_sum = (uint16_t)(sum & 0xffff);
2938
2939 if (__improbable(p == lbuf)) {
2940 bcopy(lbuf, buf, sizeof(struct ip));
2941 }
2942 } else {
2943 uint32_t flow;
2944 ASSERT(ip_ver == IPV6_VERSION);
2945
2946 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2947 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
2948 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
2949 p = lbuf;
2950 }
2951 ip6 = (struct ip6_hdr *)(void *)p;
2952 flow = ntohl(ip6->ip6_flow);
2953 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
2954 return;
2955 }
2956
2957 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
2958 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
2959
2960 if (__improbable(p == lbuf)) {
2961 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
2962 }
2963 }
2964 }
2965
2966 static inline errno_t
ifnet_enqueue_ifclassq(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * p,boolean_t flush,boolean_t * pdrop)2967 ifnet_enqueue_ifclassq(struct ifnet *ifp, struct ifclassq *ifcq,
2968 classq_pkt_t *p, boolean_t flush, boolean_t *pdrop)
2969 {
2970 #if SKYWALK
2971 volatile struct sk_nexusadv *nxadv = NULL;
2972 #endif /* SKYWALK */
2973 volatile uint64_t *fg_ts = NULL;
2974 volatile uint64_t *rt_ts = NULL;
2975 struct timespec now;
2976 u_int64_t now_nsec = 0;
2977 int error = 0;
2978 uint8_t *mcast_buf = NULL;
2979 uint8_t ip_ver;
2980 uint32_t pktlen;
2981
2982 ASSERT(ifp->if_eflags & IFEF_TXSTART);
2983 #if SKYWALK
2984 /*
2985 * If attached to flowswitch, grab pointers to the
2986 * timestamp variables in the nexus advisory region.
2987 */
2988 if ((ifp->if_capabilities & IFCAP_SKYWALK) && ifp->if_na != NULL &&
2989 (nxadv = ifp->if_na->nifna_netif->nif_fsw_nxadv) != NULL) {
2990 fg_ts = &nxadv->nxadv_fg_sendts;
2991 rt_ts = &nxadv->nxadv_rt_sendts;
2992 }
2993 #endif /* SKYWALK */
2994
2995 /*
2996 * If packet already carries a timestamp, either from dlil_output()
2997 * or from flowswitch, use it here. Otherwise, record timestamp.
2998 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
2999 * the timestamp value is used internally there.
3000 */
3001 switch (p->cp_ptype) {
3002 case QP_MBUF:
3003 #if SKYWALK
3004 /*
3005 * Valid only for non-native (compat) Skywalk interface.
3006 * If the data source uses packet, caller must convert
3007 * it to mbuf first prior to calling this routine.
3008 */
3009 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3010 #endif /* SKYWALK */
3011 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
3012 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
3013
3014 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3015 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
3016 nanouptime(&now);
3017 net_timernsec(&now, &now_nsec);
3018 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
3019 }
3020 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3021 /*
3022 * If the packet service class is not background,
3023 * update the timestamp to indicate recent activity
3024 * on a foreground socket.
3025 */
3026 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3027 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3028 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3029 PKTF_SO_BACKGROUND)) {
3030 ifp->if_fg_sendts = (uint32_t)_net_uptime;
3031 if (fg_ts != NULL) {
3032 *fg_ts = (uint32_t)_net_uptime;
3033 }
3034 }
3035 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3036 ifp->if_rt_sendts = (uint32_t)_net_uptime;
3037 if (rt_ts != NULL) {
3038 *rt_ts = (uint32_t)_net_uptime;
3039 }
3040 }
3041 }
3042 pktlen = m_pktlen(p->cp_mbuf);
3043
3044 /*
3045 * Some Wi-Fi AP implementations do not correctly handle
3046 * multicast IP packets with DSCP bits set (radr://9331522).
3047 * As a workaround we clear the DSCP bits but keep service
3048 * class (rdar://51507725).
3049 */
3050 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3051 IFNET_IS_WIFI_INFRA(ifp)) {
3052 size_t len = mbuf_len(p->cp_mbuf), hlen;
3053 struct ether_header *eh;
3054 boolean_t pullup = FALSE;
3055 uint16_t etype;
3056
3057 if (__improbable(len < sizeof(struct ether_header))) {
3058 DTRACE_IP1(small__ether, size_t, len);
3059 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3060 sizeof(struct ether_header))) == NULL) {
3061 return ENOMEM;
3062 }
3063 }
3064 eh = mtod(p->cp_mbuf, struct ether_header *);
3065 etype = ntohs(eh->ether_type);
3066 if (etype == ETHERTYPE_IP) {
3067 hlen = sizeof(struct ether_header) +
3068 sizeof(struct ip);
3069 if (len < hlen) {
3070 DTRACE_IP1(small__v4, size_t, len);
3071 pullup = TRUE;
3072 }
3073 ip_ver = IPVERSION;
3074 } else if (etype == ETHERTYPE_IPV6) {
3075 hlen = sizeof(struct ether_header) +
3076 sizeof(struct ip6_hdr);
3077 if (len < hlen) {
3078 DTRACE_IP1(small__v6, size_t, len);
3079 pullup = TRUE;
3080 }
3081 ip_ver = IPV6_VERSION;
3082 } else {
3083 DTRACE_IP1(invalid__etype, uint16_t, etype);
3084 break;
3085 }
3086 if (pullup) {
3087 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3088 NULL) {
3089 return ENOMEM;
3090 }
3091
3092 eh = mtod(p->cp_mbuf, struct ether_header *);
3093 }
3094 mcast_buf = (uint8_t *)(eh + 1);
3095 /*
3096 * ifnet_mcast_clear_dscp() will finish the work below.
3097 * Note that the pullups above ensure that mcast_buf
3098 * points to a full IP header.
3099 */
3100 }
3101 break;
3102
3103 #if SKYWALK
3104 case QP_PACKET:
3105 /*
3106 * Valid only for native Skywalk interface. If the data
3107 * source uses mbuf, caller must convert it to packet first
3108 * prior to calling this routine.
3109 */
3110 ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
3111 if (!(p->cp_kpkt->pkt_pflags & PKT_F_TS_VALID) ||
3112 p->cp_kpkt->pkt_timestamp == 0) {
3113 nanouptime(&now);
3114 net_timernsec(&now, &now_nsec);
3115 p->cp_kpkt->pkt_timestamp = now_nsec;
3116 }
3117 p->cp_kpkt->pkt_pflags &= ~PKT_F_TS_VALID;
3118 /*
3119 * If the packet service class is not background,
3120 * update the timestamps on the interface, as well as
3121 * the ones in nexus-wide advisory to indicate recent
3122 * activity on a foreground flow.
3123 */
3124 if (!(p->cp_kpkt->pkt_pflags & PKT_F_BACKGROUND)) {
3125 ifp->if_fg_sendts = (uint32_t)_net_uptime;
3126 if (fg_ts != NULL) {
3127 *fg_ts = (uint32_t)_net_uptime;
3128 }
3129 }
3130 if (p->cp_kpkt->pkt_pflags & PKT_F_REALTIME) {
3131 ifp->if_rt_sendts = (uint32_t)_net_uptime;
3132 if (rt_ts != NULL) {
3133 *rt_ts = (uint32_t)_net_uptime;
3134 }
3135 }
3136 pktlen = p->cp_kpkt->pkt_length;
3137
3138 /*
3139 * Some Wi-Fi AP implementations do not correctly handle
3140 * multicast IP packets with DSCP bits set (radr://9331522).
3141 * As a workaround we clear the DSCP bits but keep service
3142 * class (rdar://51507725).
3143 */
3144 if ((p->cp_kpkt->pkt_link_flags & PKT_LINKF_MCAST) != 0 &&
3145 IFNET_IS_WIFI_INFRA(ifp)) {
3146 uint8_t *baddr;
3147 struct ether_header *eh;
3148 uint16_t etype;
3149
3150 MD_BUFLET_ADDR_ABS(p->cp_kpkt, baddr);
3151 baddr += p->cp_kpkt->pkt_headroom;
3152 if (__improbable(pktlen < sizeof(struct ether_header))) {
3153 DTRACE_IP1(pkt__small__ether, __kern_packet *,
3154 p->cp_kpkt);
3155 break;
3156 }
3157 eh = (struct ether_header *)(void *)baddr;
3158 etype = ntohs(eh->ether_type);
3159 if (etype == ETHERTYPE_IP) {
3160 if (pktlen < sizeof(struct ether_header) +
3161 sizeof(struct ip)) {
3162 DTRACE_IP1(pkt__small__v4, uint32_t,
3163 pktlen);
3164 break;
3165 }
3166 ip_ver = IPVERSION;
3167 } else if (etype == ETHERTYPE_IPV6) {
3168 if (pktlen < sizeof(struct ether_header) +
3169 sizeof(struct ip6_hdr)) {
3170 DTRACE_IP1(pkt__small__v6, uint32_t,
3171 pktlen);
3172 break;
3173 }
3174 ip_ver = IPV6_VERSION;
3175 } else {
3176 DTRACE_IP1(pkt__invalid__etype, uint16_t,
3177 etype);
3178 break;
3179 }
3180 mcast_buf = (uint8_t *)(eh + 1);
3181 /*
3182 * ifnet_mcast_clear_dscp() will finish the work below.
3183 * The checks above verify that the IP header is in the
3184 * first buflet.
3185 */
3186 }
3187 break;
3188 #endif /* SKYWALK */
3189
3190 default:
3191 VERIFY(0);
3192 /* NOTREACHED */
3193 __builtin_unreachable();
3194 }
3195
3196 if (mcast_buf != NULL) {
3197 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
3198 }
3199
3200 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3201 if (now_nsec == 0) {
3202 nanouptime(&now);
3203 net_timernsec(&now, &now_nsec);
3204 }
3205 /*
3206 * If the driver chose to delay start callback for
3207 * coalescing multiple packets, Then use the following
3208 * heuristics to make sure that start callback will
3209 * be delayed only when bulk data transfer is detected.
3210 * 1. number of packets enqueued in (delay_win * 2) is
3211 * greater than or equal to the delay qlen.
3212 * 2. If delay_start is enabled it will stay enabled for
3213 * another 10 idle windows. This is to take into account
3214 * variable RTT and burst traffic.
3215 * 3. If the time elapsed since last enqueue is more
3216 * than 200ms we disable delaying start callback. This is
3217 * is to take idle time into account.
3218 */
3219 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3220 if (ifp->if_start_delay_swin > 0) {
3221 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3222 ifp->if_start_delay_cnt++;
3223 } else if ((now_nsec - ifp->if_start_delay_swin)
3224 >= (200 * 1000 * 1000)) {
3225 ifp->if_start_delay_swin = now_nsec;
3226 ifp->if_start_delay_cnt = 1;
3227 ifp->if_start_delay_idle = 0;
3228 if (ifp->if_eflags & IFEF_DELAY_START) {
3229 if_clear_eflags(ifp, IFEF_DELAY_START);
3230 ifnet_delay_start_disabled_increment();
3231 }
3232 } else {
3233 if (ifp->if_start_delay_cnt >=
3234 ifp->if_start_delay_qlen) {
3235 if_set_eflags(ifp, IFEF_DELAY_START);
3236 ifp->if_start_delay_idle = 0;
3237 } else {
3238 if (ifp->if_start_delay_idle >= 10) {
3239 if_clear_eflags(ifp,
3240 IFEF_DELAY_START);
3241 ifnet_delay_start_disabled_increment();
3242 } else {
3243 ifp->if_start_delay_idle++;
3244 }
3245 }
3246 ifp->if_start_delay_swin = now_nsec;
3247 ifp->if_start_delay_cnt = 1;
3248 }
3249 } else {
3250 ifp->if_start_delay_swin = now_nsec;
3251 ifp->if_start_delay_cnt = 1;
3252 ifp->if_start_delay_idle = 0;
3253 if_clear_eflags(ifp, IFEF_DELAY_START);
3254 }
3255 } else {
3256 if_clear_eflags(ifp, IFEF_DELAY_START);
3257 }
3258
3259 /* enqueue the packet (caller consumes object) */
3260 error = ifclassq_enqueue(((ifcq != NULL) ? ifcq : ifp->if_snd), p, p,
3261 1, pktlen, pdrop);
3262
3263 /*
3264 * Tell the driver to start dequeueing; do this even when the queue
3265 * for the packet is suspended (EQSUSPENDED), as the driver could still
3266 * be dequeueing from other unsuspended queues.
3267 */
3268 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3269 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3270 ifnet_start(ifp);
3271 }
3272
3273 return error;
3274 }
3275
3276 static inline errno_t
ifnet_enqueue_ifclassq_chain(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3277 ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3278 classq_pkt_t *head, classq_pkt_t *tail, uint32_t cnt, uint32_t bytes,
3279 boolean_t flush, boolean_t *pdrop)
3280 {
3281 int error;
3282
3283 /* enqueue the packet (caller consumes object) */
3284 error = ifclassq_enqueue(ifcq != NULL ? ifcq : ifp->if_snd, head, tail,
3285 cnt, bytes, pdrop);
3286
3287 /*
3288 * Tell the driver to start dequeueing; do this even when the queue
3289 * for the packet is suspended (EQSUSPENDED), as the driver could still
3290 * be dequeueing from other unsuspended queues.
3291 */
3292 if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
3293 ifnet_start(ifp);
3294 }
3295 return error;
3296 }
3297
3298 int
ifnet_enqueue_netem(void * handle,pktsched_pkt_t * __sized_by (n_pkts)pkts,uint32_t n_pkts)3299 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *__sized_by(n_pkts)pkts, uint32_t n_pkts)
3300 {
3301 ifnet_ref_t ifp = handle;
3302 boolean_t pdrop; /* dummy */
3303 uint32_t i;
3304
3305 ASSERT(n_pkts >= 1);
3306 for (i = 0; i < n_pkts - 1; i++) {
3307 (void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
3308 FALSE, &pdrop);
3309 }
3310 /* flush with the last packet */
3311 (void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
3312 TRUE, &pdrop);
3313
3314 return 0;
3315 }
3316
3317 static inline errno_t
ifnet_enqueue_common(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * pkt,boolean_t flush,boolean_t * pdrop)3318 ifnet_enqueue_common(struct ifnet *ifp, struct ifclassq *ifcq,
3319 classq_pkt_t *pkt, boolean_t flush, boolean_t *pdrop)
3320 {
3321 if (ifp->if_output_netem != NULL) {
3322 bool drop;
3323 errno_t error;
3324 error = netem_enqueue(ifp->if_output_netem, pkt, &drop);
3325 *pdrop = drop ? TRUE : FALSE;
3326 return error;
3327 } else {
3328 return ifnet_enqueue_ifclassq(ifp, ifcq, pkt, flush, pdrop);
3329 }
3330 }
3331
3332 errno_t
ifnet_enqueue(struct ifnet * ifp,struct mbuf * m)3333 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3334 {
3335 uint32_t bytes = m_pktlen(m);
3336 struct mbuf *tail = m;
3337 uint32_t cnt = 1;
3338 boolean_t pdrop;
3339
3340 while (tail->m_nextpkt) {
3341 VERIFY(tail->m_flags & M_PKTHDR);
3342 tail = tail->m_nextpkt;
3343 cnt++;
3344 bytes += m_pktlen(tail);
3345 }
3346
3347 return ifnet_enqueue_mbuf_chain(ifp, m, tail, cnt, bytes, TRUE, &pdrop);
3348 }
3349
3350 errno_t
ifnet_enqueue_mbuf(struct ifnet * ifp,struct mbuf * m,boolean_t flush,boolean_t * pdrop)3351 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3352 boolean_t *pdrop)
3353 {
3354 classq_pkt_t pkt;
3355
3356 m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
3357 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3358 m->m_nextpkt != NULL) {
3359 if (m != NULL) {
3360 m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_INVALID, NULL, 0);
3361 *pdrop = TRUE;
3362 }
3363 return EINVAL;
3364 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3365 !IF_FULLY_ATTACHED(ifp)) {
3366 /* flag tested without lock for performance */
3367 m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3368 *pdrop = TRUE;
3369 return ENXIO;
3370 } else if (!(ifp->if_flags & IFF_UP)) {
3371 m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3372 *pdrop = TRUE;
3373 return ENETDOWN;
3374 }
3375
3376 CLASSQ_PKT_INIT_MBUF(&pkt, m);
3377 return ifnet_enqueue_common(ifp, NULL, &pkt, flush, pdrop);
3378 }
3379
3380 errno_t
ifnet_enqueue_mbuf_chain(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3381 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
3382 struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3383 boolean_t *pdrop)
3384 {
3385 classq_pkt_t head, tail;
3386
3387 m_add_hdr_crumb_interface_output(m_head, ifp->if_index, true);
3388 ASSERT(m_head != NULL);
3389 ASSERT((m_head->m_flags & M_PKTHDR) != 0);
3390 ASSERT(m_tail != NULL);
3391 ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
3392 ASSERT(ifp != NULL);
3393 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3394
3395 if (!IF_FULLY_ATTACHED(ifp)) {
3396 /* flag tested without lock for performance */
3397 m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3398 *pdrop = TRUE;
3399 return ENXIO;
3400 } else if (!(ifp->if_flags & IFF_UP)) {
3401 m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3402 *pdrop = TRUE;
3403 return ENETDOWN;
3404 }
3405
3406 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3407 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3408 return ifnet_enqueue_ifclassq_chain(ifp, NULL, &head, &tail, cnt, bytes,
3409 flush, pdrop);
3410 }
3411
3412 #if SKYWALK
3413 static errno_t
ifnet_enqueue_pkt_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3414 ifnet_enqueue_pkt_common(struct ifnet *ifp, struct ifclassq *ifcq,
3415 struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3416 {
3417 classq_pkt_t pkt;
3418
3419 ASSERT(kpkt == NULL || kpkt->pkt_nextpkt == NULL);
3420
3421 if (__improbable(ifp == NULL || kpkt == NULL)) {
3422 if (kpkt != NULL) {
3423 pp_free_packet(__DECONST(struct kern_pbufpool *,
3424 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3425 *pdrop = TRUE;
3426 }
3427 return EINVAL;
3428 } else if (__improbable(!(ifp->if_eflags & IFEF_TXSTART) ||
3429 !IF_FULLY_ATTACHED(ifp))) {
3430 /* flag tested without lock for performance */
3431 pp_free_packet(__DECONST(struct kern_pbufpool *,
3432 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3433 *pdrop = TRUE;
3434 return ENXIO;
3435 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3436 pp_free_packet(__DECONST(struct kern_pbufpool *,
3437 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3438 *pdrop = TRUE;
3439 return ENETDOWN;
3440 }
3441
3442 CLASSQ_PKT_INIT_PACKET(&pkt, kpkt);
3443 return ifnet_enqueue_common(ifp, ifcq, &pkt, flush, pdrop);
3444 }
3445
3446 errno_t
ifnet_enqueue_pkt(struct ifnet * ifp,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3447 ifnet_enqueue_pkt(struct ifnet *ifp, struct __kern_packet *kpkt,
3448 boolean_t flush, boolean_t *pdrop)
3449 {
3450 return ifnet_enqueue_pkt_common(ifp, NULL, kpkt, flush, pdrop);
3451 }
3452
3453 errno_t
ifnet_enqueue_ifcq_pkt(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3454 ifnet_enqueue_ifcq_pkt(struct ifnet *ifp, struct ifclassq *ifcq,
3455 struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3456 {
3457 return ifnet_enqueue_pkt_common(ifp, ifcq, kpkt, flush, pdrop);
3458 }
3459
3460 static errno_t
ifnet_enqueue_pkt_chain_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3461 ifnet_enqueue_pkt_chain_common(struct ifnet *ifp, struct ifclassq *ifcq,
3462 struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3463 uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3464 {
3465 classq_pkt_t head, tail;
3466
3467 ASSERT(k_head != NULL);
3468 ASSERT(k_tail != NULL);
3469 ASSERT(ifp != NULL);
3470 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3471
3472 if (!IF_FULLY_ATTACHED(ifp)) {
3473 /* flag tested without lock for performance */
3474 pp_free_packet_chain(k_head, NULL);
3475 *pdrop = TRUE;
3476 return ENXIO;
3477 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3478 pp_free_packet_chain(k_head, NULL);
3479 *pdrop = TRUE;
3480 return ENETDOWN;
3481 }
3482
3483 CLASSQ_PKT_INIT_PACKET(&head, k_head);
3484 CLASSQ_PKT_INIT_PACKET(&tail, k_tail);
3485 return ifnet_enqueue_ifclassq_chain(ifp, ifcq, &head, &tail, cnt, bytes,
3486 flush, pdrop);
3487 }
3488
3489 errno_t
ifnet_enqueue_pkt_chain(struct ifnet * ifp,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3490 ifnet_enqueue_pkt_chain(struct ifnet *ifp, struct __kern_packet *k_head,
3491 struct __kern_packet *k_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3492 boolean_t *pdrop)
3493 {
3494 return ifnet_enqueue_pkt_chain_common(ifp, NULL, k_head, k_tail,
3495 cnt, bytes, flush, pdrop);
3496 }
3497
3498 errno_t
ifnet_enqueue_ifcq_pkt_chain(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3499 ifnet_enqueue_ifcq_pkt_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3500 struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3501 uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3502 {
3503 return ifnet_enqueue_pkt_chain_common(ifp, ifcq, k_head, k_tail,
3504 cnt, bytes, flush, pdrop);
3505 }
3506 #endif /* SKYWALK */
3507
3508 errno_t
ifnet_dequeue(struct ifnet * ifp,struct mbuf ** mp)3509 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3510 {
3511 errno_t rc;
3512 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3513
3514 if (ifp == NULL || mp == NULL) {
3515 return EINVAL;
3516 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3517 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3518 return ENXIO;
3519 }
3520 if (!ifnet_is_attached(ifp, 1)) {
3521 return ENXIO;
3522 }
3523
3524 #if SKYWALK
3525 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3526 #endif /* SKYWALK */
3527 rc = ifclassq_dequeue(ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3528 &pkt, NULL, NULL, NULL, 0);
3529 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3530 ifnet_decr_iorefcnt(ifp);
3531 *mp = pkt.cp_mbuf;
3532 m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3533 return rc;
3534 }
3535
3536 errno_t
ifnet_dequeue_service_class(struct ifnet * ifp,mbuf_svc_class_t sc,struct mbuf ** mp)3537 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3538 struct mbuf **mp)
3539 {
3540 errno_t rc;
3541 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3542
3543 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
3544 return EINVAL;
3545 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3546 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3547 return ENXIO;
3548 }
3549 if (!ifnet_is_attached(ifp, 1)) {
3550 return ENXIO;
3551 }
3552
3553 #if SKYWALK
3554 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3555 #endif /* SKYWALK */
3556 rc = ifclassq_dequeue_sc(ifp->if_snd, sc, 1,
3557 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL, 0);
3558 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3559 ifnet_decr_iorefcnt(ifp);
3560 *mp = pkt.cp_mbuf;
3561 m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3562 return rc;
3563 }
3564
3565 errno_t
ifnet_dequeue_multi(struct ifnet * ifp,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3566 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3567 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3568 {
3569 errno_t rc;
3570 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3571 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3572
3573 if (ifp == NULL || head == NULL || pkt_limit < 1) {
3574 return EINVAL;
3575 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3576 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3577 return ENXIO;
3578 }
3579 if (!ifnet_is_attached(ifp, 1)) {
3580 return ENXIO;
3581 }
3582
3583 #if SKYWALK
3584 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3585 #endif /* SKYWALK */
3586 rc = ifclassq_dequeue(ifp->if_snd, pkt_limit,
3587 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len, 0);
3588 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3589 ifnet_decr_iorefcnt(ifp);
3590 *head = pkt_head.cp_mbuf;
3591 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3592 if (tail != NULL) {
3593 *tail = pkt_tail.cp_mbuf;
3594 }
3595 return rc;
3596 }
3597
3598 errno_t
ifnet_dequeue_multi_bytes(struct ifnet * ifp,u_int32_t byte_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3599 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3600 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3601 {
3602 errno_t rc;
3603 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3604 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3605
3606 if (ifp == NULL || head == NULL || byte_limit < 1) {
3607 return EINVAL;
3608 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3609 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3610 return ENXIO;
3611 }
3612 if (!ifnet_is_attached(ifp, 1)) {
3613 return ENXIO;
3614 }
3615
3616 #if SKYWALK
3617 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3618 #endif /* SKYWALK */
3619 rc = ifclassq_dequeue(ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3620 byte_limit, &pkt_head, &pkt_tail, cnt, len, 0);
3621 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3622 ifnet_decr_iorefcnt(ifp);
3623 *head = pkt_head.cp_mbuf;
3624 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3625 if (tail != NULL) {
3626 *tail = pkt_tail.cp_mbuf;
3627 }
3628 return rc;
3629 }
3630
3631 errno_t
ifnet_dequeue_service_class_multi(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3632 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3633 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3634 u_int32_t *len)
3635 {
3636 errno_t rc;
3637 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3638 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3639
3640 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3641 !MBUF_VALID_SC(sc)) {
3642 return EINVAL;
3643 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3644 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3645 return ENXIO;
3646 }
3647 if (!ifnet_is_attached(ifp, 1)) {
3648 return ENXIO;
3649 }
3650
3651 #if SKYWALK
3652 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3653 #endif /* SKYWALK */
3654 rc = ifclassq_dequeue_sc(ifp->if_snd, sc, pkt_limit,
3655 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
3656 cnt, len, 0);
3657 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3658 ifnet_decr_iorefcnt(ifp);
3659 *head = pkt_head.cp_mbuf;
3660 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3661 if (tail != NULL) {
3662 *tail = pkt_tail.cp_mbuf;
3663 }
3664 return rc;
3665 }
3666
3667 #if XNU_TARGET_OS_OSX
3668 errno_t
ifnet_framer_stub(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * dest,IFNET_LLADDR_T dest_linkaddr,IFNET_FRAME_TYPE_T frame_type,u_int32_t * pre,u_int32_t * post)3669 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3670 const struct sockaddr *dest,
3671 IFNET_LLADDR_T dest_linkaddr,
3672 IFNET_FRAME_TYPE_T frame_type,
3673 u_int32_t *pre, u_int32_t *post)
3674 {
3675 if (pre != NULL) {
3676 *pre = 0;
3677 }
3678 if (post != NULL) {
3679 *post = 0;
3680 }
3681
3682 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
3683 }
3684 #endif /* XNU_TARGET_OS_OSX */
3685
3686 /* If ifp is set, we will increment the generation for the interface */
3687 int
dlil_post_complete_msg(struct ifnet * ifp,struct kev_msg * event)3688 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3689 {
3690 if (ifp != NULL) {
3691 ifnet_increment_generation(ifp);
3692 }
3693
3694 #if NECP
3695 necp_update_all_clients();
3696 #endif /* NECP */
3697
3698 return kev_post_msg(event);
3699 }
3700
3701 __private_extern__ void
dlil_post_sifflags_msg(struct ifnet * ifp)3702 dlil_post_sifflags_msg(struct ifnet * ifp)
3703 {
3704 struct kev_msg ev_msg;
3705 struct net_event_data ev_data;
3706
3707 bzero(&ev_data, sizeof(ev_data));
3708 bzero(&ev_msg, sizeof(ev_msg));
3709 ev_msg.vendor_code = KEV_VENDOR_APPLE;
3710 ev_msg.kev_class = KEV_NETWORK_CLASS;
3711 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
3712 ev_msg.event_code = KEV_DL_SIFFLAGS;
3713 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
3714 ev_data.if_family = ifp->if_family;
3715 ev_data.if_unit = (u_int32_t) ifp->if_unit;
3716 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
3717 ev_msg.dv[0].data_ptr = &ev_data;
3718 ev_msg.dv[1].data_length = 0;
3719 dlil_post_complete_msg(ifp, &ev_msg);
3720 }
3721
3722 #define TMP_IF_PROTO_ARR_SIZE 10
3723 static int
dlil_event_internal(struct ifnet * ifp,struct kev_msg * event,bool update_generation)3724 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
3725 {
3726 struct ifnet_filter *filter = NULL;
3727 struct if_proto *proto = NULL;
3728 int if_proto_count = 0;
3729 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3730 struct if_proto **tmp_ifproto_arr = tmp_ifproto_stack_arr;
3731 int tmp_ifproto_arr_idx = 0;
3732
3733 /*
3734 * Pass the event to the interface filters
3735 */
3736 lck_mtx_lock_spin(&ifp->if_flt_lock);
3737 /* prevent filter list from changing in case we drop the lock */
3738 if_flt_monitor_busy(ifp);
3739 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3740 if (filter->filt_event != NULL) {
3741 lck_mtx_unlock(&ifp->if_flt_lock);
3742
3743 filter->filt_event(filter->filt_cookie, ifp,
3744 filter->filt_protocol, event);
3745
3746 lck_mtx_lock_spin(&ifp->if_flt_lock);
3747 }
3748 }
3749 /* we're done with the filter list */
3750 if_flt_monitor_unbusy(ifp);
3751 lck_mtx_unlock(&ifp->if_flt_lock);
3752
3753 /* Get an io ref count if the interface is attached */
3754 if (!ifnet_is_attached(ifp, 1)) {
3755 goto done;
3756 }
3757
3758 /*
3759 * An embedded tmp_list_entry in if_proto may still get
3760 * over-written by another thread after giving up ifnet lock,
3761 * therefore we are avoiding embedded pointers here.
3762 */
3763 ifnet_lock_shared(ifp);
3764 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
3765 if (if_proto_count) {
3766 int i;
3767 VERIFY(ifp->if_proto_hash != NULL);
3768 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3769 tmp_ifproto_arr = tmp_ifproto_stack_arr;
3770 } else {
3771 tmp_ifproto_arr = kalloc_type(struct if_proto *,
3772 if_proto_count, Z_WAITOK | Z_ZERO);
3773 if (tmp_ifproto_arr == NULL) {
3774 ifnet_lock_done(ifp);
3775 goto cleanup;
3776 }
3777 }
3778
3779 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3780 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3781 next_hash) {
3782 if_proto_ref(proto);
3783 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3784 tmp_ifproto_arr_idx++;
3785 }
3786 }
3787 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3788 }
3789 ifnet_lock_done(ifp);
3790
3791 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3792 tmp_ifproto_arr_idx++) {
3793 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3794 VERIFY(proto != NULL);
3795 proto_media_event eventp =
3796 (proto->proto_kpi == kProtoKPI_v1 ?
3797 proto->kpi.v1.event :
3798 proto->kpi.v2.event);
3799
3800 if (eventp != NULL) {
3801 eventp(ifp, proto->protocol_family,
3802 event);
3803 }
3804 if_proto_free(proto);
3805 }
3806
3807 cleanup:
3808 if (tmp_ifproto_arr != tmp_ifproto_stack_arr) {
3809 kfree_type(struct if_proto *, if_proto_count, tmp_ifproto_arr);
3810 }
3811
3812 /* Pass the event to the interface */
3813 if (ifp->if_event != NULL) {
3814 ifp->if_event(ifp, event);
3815 }
3816
3817 /* Release the io ref count */
3818 ifnet_decr_iorefcnt(ifp);
3819 done:
3820 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
3821 }
3822
3823 errno_t
ifnet_event(ifnet_t ifp,struct kern_event_msg * event)3824 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3825 {
3826 struct kev_msg kev_msg;
3827 int result = 0;
3828
3829 if (ifp == NULL || event == NULL) {
3830 return EINVAL;
3831 }
3832
3833 bzero(&kev_msg, sizeof(kev_msg));
3834 kev_msg.vendor_code = event->vendor_code;
3835 kev_msg.kev_class = event->kev_class;
3836 kev_msg.kev_subclass = event->kev_subclass;
3837 kev_msg.event_code = event->event_code;
3838 kev_msg.dv[0].data_ptr = &event->event_data;
3839 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3840 kev_msg.dv[1].data_length = 0;
3841
3842 result = dlil_event_internal(ifp, &kev_msg, TRUE);
3843
3844 return result;
3845 }
3846
3847 /* The following is used to enqueue work items for ifnet ioctl events */
3848 static void ifnet_ioctl_event_callback(struct nwk_wq_entry *);
3849
3850 struct ifnet_ioctl_event {
3851 ifnet_ref_t ifp;
3852 u_long ioctl_code;
3853 };
3854
3855 struct ifnet_ioctl_event_nwk_wq_entry {
3856 struct nwk_wq_entry nwk_wqe;
3857 struct ifnet_ioctl_event ifnet_ioctl_ev_arg;
3858 };
3859
3860 void
ifnet_ioctl_async(struct ifnet * ifp,u_long ioctl_code)3861 ifnet_ioctl_async(struct ifnet *ifp, u_long ioctl_code)
3862 {
3863 struct ifnet_ioctl_event_nwk_wq_entry *p_ifnet_ioctl_ev = NULL;
3864 bool compare_expected;
3865
3866 /*
3867 * Get an io ref count if the interface is attached.
3868 * At this point it most likely is. We are taking a reference for
3869 * deferred processing.
3870 */
3871 if (!ifnet_is_attached(ifp, 1)) {
3872 os_log(OS_LOG_DEFAULT, "%s:%d %s Failed for ioctl %lu as interface "
3873 "is not attached",
3874 __func__, __LINE__, if_name(ifp), ioctl_code);
3875 return;
3876 }
3877 switch (ioctl_code) {
3878 case SIOCADDMULTI:
3879 compare_expected = false;
3880 if (!atomic_compare_exchange_strong(&ifp->if_mcast_add_signaled, &compare_expected, true)) {
3881 ifnet_decr_iorefcnt(ifp);
3882 return;
3883 }
3884 break;
3885 case SIOCDELMULTI:
3886 compare_expected = false;
3887 if (!atomic_compare_exchange_strong(&ifp->if_mcast_del_signaled, &compare_expected, true)) {
3888 ifnet_decr_iorefcnt(ifp);
3889 return;
3890 }
3891 break;
3892 default:
3893 os_log(OS_LOG_DEFAULT, "%s:%d %s unknown ioctl %lu",
3894 __func__, __LINE__, if_name(ifp), ioctl_code);
3895 return;
3896 }
3897
3898 p_ifnet_ioctl_ev = kalloc_type(struct ifnet_ioctl_event_nwk_wq_entry,
3899 Z_WAITOK | Z_ZERO | Z_NOFAIL);
3900
3901 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ifp = ifp;
3902 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ioctl_code = ioctl_code;
3903 p_ifnet_ioctl_ev->nwk_wqe.func = ifnet_ioctl_event_callback;
3904 nwk_wq_enqueue(&p_ifnet_ioctl_ev->nwk_wqe);
3905 }
3906
3907 static void
ifnet_ioctl_event_callback(struct nwk_wq_entry * nwk_item)3908 ifnet_ioctl_event_callback(struct nwk_wq_entry *nwk_item)
3909 {
3910 struct ifnet_ioctl_event_nwk_wq_entry *p_ev = __container_of(nwk_item,
3911 struct ifnet_ioctl_event_nwk_wq_entry, nwk_wqe);
3912
3913 ifnet_ref_t ifp = p_ev->ifnet_ioctl_ev_arg.ifp;
3914 u_long ioctl_code = p_ev->ifnet_ioctl_ev_arg.ioctl_code;
3915 int ret = 0;
3916
3917 switch (ioctl_code) {
3918 case SIOCADDMULTI:
3919 atomic_store(&ifp->if_mcast_add_signaled, false);
3920 break;
3921 case SIOCDELMULTI:
3922 atomic_store(&ifp->if_mcast_del_signaled, false);
3923 break;
3924 }
3925 if ((ret = ifnet_ioctl(ifp, 0, ioctl_code, NULL)) != 0) {
3926 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned %d for ioctl %lu",
3927 __func__, __LINE__, if_name(ifp), ret, ioctl_code);
3928 } else if (dlil_verbose) {
3929 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned successfully "
3930 "for ioctl %lu",
3931 __func__, __LINE__, if_name(ifp), ioctl_code);
3932 }
3933 ifnet_decr_iorefcnt(ifp);
3934 kfree_type(struct ifnet_ioctl_event_nwk_wq_entry, p_ev);
3935 return;
3936 }
3937
3938 errno_t
ifnet_ioctl(ifnet_t ifp,protocol_family_t proto_fam,u_long ioctl_code,void * ioctl_arg)3939 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
3940 void *ioctl_arg)
3941 {
3942 struct ifnet_filter *filter;
3943 int retval = EOPNOTSUPP;
3944 int result = 0;
3945
3946 if (ifp == NULL || ioctl_code == 0) {
3947 return EINVAL;
3948 }
3949
3950 /* Get an io ref count if the interface is attached */
3951 if (!ifnet_is_attached(ifp, 1)) {
3952 return EOPNOTSUPP;
3953 }
3954
3955 /*
3956 * Run the interface filters first.
3957 * We want to run all filters before calling the protocol,
3958 * interface family, or interface.
3959 */
3960 lck_mtx_lock_spin(&ifp->if_flt_lock);
3961 /* prevent filter list from changing in case we drop the lock */
3962 if_flt_monitor_busy(ifp);
3963 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3964 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
3965 filter->filt_protocol == proto_fam)) {
3966 lck_mtx_unlock(&ifp->if_flt_lock);
3967
3968 result = filter->filt_ioctl(filter->filt_cookie, ifp,
3969 proto_fam, ioctl_code, ioctl_arg);
3970
3971 lck_mtx_lock_spin(&ifp->if_flt_lock);
3972
3973 /* Only update retval if no one has handled the ioctl */
3974 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3975 if (result == ENOTSUP) {
3976 result = EOPNOTSUPP;
3977 }
3978 retval = result;
3979 if (retval != 0 && retval != EOPNOTSUPP) {
3980 /* we're done with the filter list */
3981 if_flt_monitor_unbusy(ifp);
3982 lck_mtx_unlock(&ifp->if_flt_lock);
3983 goto cleanup;
3984 }
3985 }
3986 }
3987 }
3988 /* we're done with the filter list */
3989 if_flt_monitor_unbusy(ifp);
3990 lck_mtx_unlock(&ifp->if_flt_lock);
3991
3992 /* Allow the protocol to handle the ioctl */
3993 if (proto_fam != 0) {
3994 struct if_proto *proto;
3995
3996 /* callee holds a proto refcnt upon success */
3997 ifnet_lock_shared(ifp);
3998 proto = find_attached_proto(ifp, proto_fam);
3999 ifnet_lock_done(ifp);
4000 if (proto != NULL) {
4001 proto_media_ioctl ioctlp =
4002 (proto->proto_kpi == kProtoKPI_v1 ?
4003 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
4004 result = EOPNOTSUPP;
4005 if (ioctlp != NULL) {
4006 result = ioctlp(ifp, proto_fam, ioctl_code,
4007 ioctl_arg);
4008 }
4009 if_proto_free(proto);
4010
4011 /* Only update retval if no one has handled the ioctl */
4012 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4013 if (result == ENOTSUP) {
4014 result = EOPNOTSUPP;
4015 }
4016 retval = result;
4017 if (retval && retval != EOPNOTSUPP) {
4018 goto cleanup;
4019 }
4020 }
4021 }
4022 }
4023
4024 /* retval is either 0 or EOPNOTSUPP */
4025
4026 /*
4027 * Let the interface handle this ioctl.
4028 * If it returns EOPNOTSUPP, ignore that, we may have
4029 * already handled this in the protocol or family.
4030 */
4031 if (ifp->if_ioctl) {
4032 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4033 }
4034
4035 /* Only update retval if no one has handled the ioctl */
4036 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4037 if (result == ENOTSUP) {
4038 result = EOPNOTSUPP;
4039 }
4040 retval = result;
4041 if (retval && retval != EOPNOTSUPP) {
4042 goto cleanup;
4043 }
4044 }
4045
4046 cleanup:
4047 if (retval == EJUSTRETURN) {
4048 retval = 0;
4049 }
4050
4051 ifnet_decr_iorefcnt(ifp);
4052
4053 return retval;
4054 }
4055
4056 __private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp,bpf_tap_mode mode,bpf_packet_func callback)4057 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4058 {
4059 errno_t error = 0;
4060
4061 if (ifp->if_set_bpf_tap) {
4062 /* Get an io reference on the interface if it is attached */
4063 if (!ifnet_is_attached(ifp, 1)) {
4064 return ENXIO;
4065 }
4066 error = ifp->if_set_bpf_tap(ifp, mode, callback);
4067 ifnet_decr_iorefcnt(ifp);
4068 }
4069 return error;
4070 }
4071
4072 errno_t
dlil_resolve_multi(struct ifnet * ifp,const struct sockaddr * proto_addr,struct sockaddr * ll_addr,size_t ll_len)4073 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4074 struct sockaddr *ll_addr, size_t ll_len)
4075 {
4076 errno_t result = EOPNOTSUPP;
4077 struct if_proto *proto;
4078 const struct sockaddr *verify;
4079 proto_media_resolve_multi resolvep;
4080
4081 if (!ifnet_is_attached(ifp, 1)) {
4082 return result;
4083 }
4084
4085 SOCKADDR_ZERO(ll_addr, ll_len);
4086
4087 /* Call the protocol first; callee holds a proto refcnt upon success */
4088 ifnet_lock_shared(ifp);
4089 proto = find_attached_proto(ifp, proto_addr->sa_family);
4090 ifnet_lock_done(ifp);
4091 if (proto != NULL) {
4092 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4093 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4094 if (resolvep != NULL) {
4095 result = resolvep(ifp, proto_addr, SDL(ll_addr), ll_len);
4096 }
4097 if_proto_free(proto);
4098 }
4099
4100 /* Let the interface verify the multicast address */
4101 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4102 if (result == 0) {
4103 verify = ll_addr;
4104 } else {
4105 verify = proto_addr;
4106 }
4107 result = ifp->if_check_multi(ifp, verify);
4108 }
4109
4110 ifnet_decr_iorefcnt(ifp);
4111 return result;
4112 }
4113
4114 __private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4115 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4116 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4117 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4118 {
4119 struct if_proto *proto;
4120 errno_t result = 0;
4121
4122 if ((ifp->if_flags & IFF_NOARP) != 0) {
4123 result = ENOTSUP;
4124 goto done;
4125 }
4126
4127 /* callee holds a proto refcnt upon success */
4128 ifnet_lock_shared(ifp);
4129 proto = find_attached_proto(ifp, target_proto->sa_family);
4130 ifnet_lock_done(ifp);
4131 if (proto == NULL) {
4132 result = ENOTSUP;
4133 } else {
4134 proto_media_send_arp arpp;
4135 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4136 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4137 if (arpp == NULL) {
4138 result = ENOTSUP;
4139 } else {
4140 switch (arpop) {
4141 case ARPOP_REQUEST:
4142 arpstat.txrequests++;
4143 if (target_hw != NULL) {
4144 arpstat.txurequests++;
4145 }
4146 break;
4147 case ARPOP_REPLY:
4148 arpstat.txreplies++;
4149 break;
4150 }
4151 result = arpp(ifp, arpop, sender_hw, sender_proto,
4152 target_hw, target_proto);
4153 }
4154 if_proto_free(proto);
4155 }
4156 done:
4157 return result;
4158 }
4159
4160 static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,const struct sockaddr_in * target_sin)4161 _is_announcement(const struct sockaddr_in * sender_sin,
4162 const struct sockaddr_in * target_sin)
4163 {
4164 if (target_sin == NULL || sender_sin == NULL) {
4165 return FALSE;
4166 }
4167
4168 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
4169 }
4170
4171 __private_extern__ errno_t
dlil_send_arp(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto0,u_int32_t rtflags)4172 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4173 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4174 const struct sockaddr *target_proto0, u_int32_t rtflags)
4175 {
4176 errno_t result = 0;
4177 const struct sockaddr_in * sender_sin;
4178 const struct sockaddr_in * target_sin;
4179 struct sockaddr_inarp target_proto_sinarp;
4180 struct sockaddr *target_proto = __DECONST_SA(target_proto0);
4181
4182 if (target_proto == NULL || sender_proto == NULL) {
4183 return EINVAL;
4184 }
4185
4186 if (sender_proto->sa_family != target_proto->sa_family) {
4187 return EINVAL;
4188 }
4189
4190 /*
4191 * If the target is a (default) router, provide that
4192 * information to the send_arp callback routine.
4193 */
4194 if (rtflags & RTF_ROUTER) {
4195 SOCKADDR_COPY(target_proto, &target_proto_sinarp, sizeof(struct sockaddr_in));
4196 target_proto_sinarp.sin_other |= SIN_ROUTER;
4197 target_proto = SA(&target_proto_sinarp);
4198 }
4199
4200 /*
4201 * If this is an ARP request and the target IP is IPv4LL,
4202 * send the request on all interfaces. The exception is
4203 * an announcement, which must only appear on the specific
4204 * interface.
4205 */
4206 sender_sin = SIN(sender_proto);
4207 target_sin = SIN(target_proto);
4208 if (target_proto->sa_family == AF_INET &&
4209 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4210 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4211 !_is_announcement(sender_sin, target_sin)) {
4212 u_int32_t count;
4213 ifnet_ref_t *__counted_by(count) ifp_list;
4214 u_int32_t ifp_on;
4215
4216 result = ENOTSUP;
4217
4218 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4219 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4220 errno_t new_result;
4221 ifaddr_t source_hw = NULL;
4222 ifaddr_t source_ip = NULL;
4223 struct sockaddr_in source_ip_copy;
4224 ifnet_ref_t cur_ifp = ifp_list[ifp_on];
4225
4226 /*
4227 * Only arp on interfaces marked for IPv4LL
4228 * ARPing. This may mean that we don't ARP on
4229 * the interface the subnet route points to.
4230 */
4231 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
4232 continue;
4233 }
4234
4235 /* Find the source IP address */
4236 ifnet_lock_shared(cur_ifp);
4237 source_hw = cur_ifp->if_lladdr;
4238 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4239 ifa_link) {
4240 IFA_LOCK(source_ip);
4241 if (source_ip->ifa_addr != NULL &&
4242 source_ip->ifa_addr->sa_family ==
4243 AF_INET) {
4244 /* Copy the source IP address */
4245 SOCKADDR_COPY(SIN(source_ip->ifa_addr), &source_ip_copy, sizeof(source_ip_copy));
4246 IFA_UNLOCK(source_ip);
4247 break;
4248 }
4249 IFA_UNLOCK(source_ip);
4250 }
4251
4252 /* No IP Source, don't arp */
4253 if (source_ip == NULL) {
4254 ifnet_lock_done(cur_ifp);
4255 continue;
4256 }
4257
4258 ifa_addref(source_hw);
4259 ifnet_lock_done(cur_ifp);
4260
4261 /* Send the ARP */
4262 new_result = dlil_send_arp_internal(cur_ifp,
4263 arpop, SDL(source_hw->ifa_addr),
4264 SA(&source_ip_copy), NULL,
4265 target_proto);
4266
4267 ifa_remref(source_hw);
4268 if (result == ENOTSUP) {
4269 result = new_result;
4270 }
4271 }
4272 ifnet_list_free_counted_by(ifp_list, count);
4273 }
4274 } else {
4275 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4276 sender_proto, target_hw, target_proto);
4277 }
4278
4279 return result;
4280 }
4281
4282 /*
4283 * Caller must hold ifnet head lock.
4284 */
4285 static int
ifnet_lookup(struct ifnet * ifp)4286 ifnet_lookup(struct ifnet *ifp)
4287 {
4288 ifnet_ref_t _ifp;
4289
4290 ifnet_head_lock_assert(LCK_RW_ASSERT_HELD);
4291 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4292 if (_ifp == ifp) {
4293 break;
4294 }
4295 }
4296 return _ifp != NULL;
4297 }
4298
4299 /*
4300 * Caller has to pass a non-zero refio argument to get a
4301 * IO reference count. This will prevent ifnet_detach from
4302 * being called when there are outstanding io reference counts.
4303 */
4304 int
ifnet_is_attached(struct ifnet * ifp,int refio)4305 ifnet_is_attached(struct ifnet *ifp, int refio)
4306 {
4307 int ret;
4308
4309 lck_mtx_lock_spin(&ifp->if_ref_lock);
4310 if ((ret = IF_FULLY_ATTACHED(ifp))) {
4311 if (refio > 0) {
4312 ifp->if_refio++;
4313 }
4314 }
4315 lck_mtx_unlock(&ifp->if_ref_lock);
4316
4317 return ret;
4318 }
4319
4320 void
ifnet_incr_pending_thread_count(struct ifnet * ifp)4321 ifnet_incr_pending_thread_count(struct ifnet *ifp)
4322 {
4323 lck_mtx_lock_spin(&ifp->if_ref_lock);
4324 ifp->if_threads_pending++;
4325 lck_mtx_unlock(&ifp->if_ref_lock);
4326 }
4327
4328 void
ifnet_decr_pending_thread_count(struct ifnet * ifp)4329 ifnet_decr_pending_thread_count(struct ifnet *ifp)
4330 {
4331 lck_mtx_lock_spin(&ifp->if_ref_lock);
4332 VERIFY(ifp->if_threads_pending > 0);
4333 ifp->if_threads_pending--;
4334 if (ifp->if_threads_pending == 0) {
4335 wakeup(&ifp->if_threads_pending);
4336 }
4337 lck_mtx_unlock(&ifp->if_ref_lock);
4338 }
4339
4340 /*
4341 * Caller must ensure the interface is attached; the assumption is that
4342 * there is at least an outstanding IO reference count held already.
4343 * Most callers would call ifnet_is_{attached,data_ready}() instead.
4344 */
4345 void
ifnet_incr_iorefcnt(struct ifnet * ifp)4346 ifnet_incr_iorefcnt(struct ifnet *ifp)
4347 {
4348 lck_mtx_lock_spin(&ifp->if_ref_lock);
4349 VERIFY(IF_FULLY_ATTACHED(ifp));
4350 VERIFY(ifp->if_refio > 0);
4351 ifp->if_refio++;
4352 lck_mtx_unlock(&ifp->if_ref_lock);
4353 }
4354
4355 __attribute__((always_inline))
4356 static void
ifnet_decr_iorefcnt_locked(struct ifnet * ifp)4357 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
4358 {
4359 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4360
4361 VERIFY(ifp->if_refio > 0);
4362 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4363
4364 ifp->if_refio--;
4365 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
4366
4367 /*
4368 * if there are no more outstanding io references, wakeup the
4369 * ifnet_detach thread if detaching flag is set.
4370 */
4371 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
4372 wakeup(&(ifp->if_refio));
4373 }
4374 }
4375
4376 void
ifnet_decr_iorefcnt(struct ifnet * ifp)4377 ifnet_decr_iorefcnt(struct ifnet *ifp)
4378 {
4379 lck_mtx_lock_spin(&ifp->if_ref_lock);
4380 ifnet_decr_iorefcnt_locked(ifp);
4381 lck_mtx_unlock(&ifp->if_ref_lock);
4382 }
4383
4384 boolean_t
ifnet_datamov_begin(struct ifnet * ifp)4385 ifnet_datamov_begin(struct ifnet *ifp)
4386 {
4387 boolean_t ret;
4388
4389 lck_mtx_lock_spin(&ifp->if_ref_lock);
4390 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
4391 ifp->if_refio++;
4392 ifp->if_datamov++;
4393 }
4394 lck_mtx_unlock(&ifp->if_ref_lock);
4395
4396 DTRACE_IP2(datamov__begin, struct ifnet *, ifp, boolean_t, ret);
4397 return ret;
4398 }
4399
4400 void
ifnet_datamov_end(struct ifnet * ifp)4401 ifnet_datamov_end(struct ifnet *ifp)
4402 {
4403 lck_mtx_lock_spin(&ifp->if_ref_lock);
4404 VERIFY(ifp->if_datamov > 0);
4405 /*
4406 * if there's no more thread moving data, wakeup any
4407 * drainers that's blocked waiting for this.
4408 */
4409 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
4410 DLIL_PRINTF("Waking up drainers on %s\n", if_name(ifp));
4411 DTRACE_IP1(datamov__drain__wake, struct ifnet *, ifp);
4412 wakeup(&(ifp->if_datamov));
4413 }
4414 ifnet_decr_iorefcnt_locked(ifp);
4415 lck_mtx_unlock(&ifp->if_ref_lock);
4416
4417 DTRACE_IP1(datamov__end, struct ifnet *, ifp);
4418 }
4419
4420 static void
ifnet_datamov_suspend_locked(struct ifnet * ifp)4421 ifnet_datamov_suspend_locked(struct ifnet *ifp)
4422 {
4423 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4424 ifp->if_refio++;
4425 if (ifp->if_suspend++ == 0) {
4426 VERIFY(ifp->if_refflags & IFRF_READY);
4427 ifp->if_refflags &= ~IFRF_READY;
4428 }
4429 }
4430
4431 void
ifnet_datamov_suspend(struct ifnet * ifp)4432 ifnet_datamov_suspend(struct ifnet *ifp)
4433 {
4434 lck_mtx_lock_spin(&ifp->if_ref_lock);
4435 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4436 ifnet_datamov_suspend_locked(ifp);
4437 lck_mtx_unlock(&ifp->if_ref_lock);
4438 }
4439
4440 boolean_t
ifnet_datamov_suspend_if_needed(struct ifnet * ifp)4441 ifnet_datamov_suspend_if_needed(struct ifnet *ifp)
4442 {
4443 lck_mtx_lock_spin(&ifp->if_ref_lock);
4444 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4445 if (ifp->if_suspend > 0) {
4446 lck_mtx_unlock(&ifp->if_ref_lock);
4447 return FALSE;
4448 }
4449 ifnet_datamov_suspend_locked(ifp);
4450 lck_mtx_unlock(&ifp->if_ref_lock);
4451 return TRUE;
4452 }
4453
4454 void
ifnet_datamov_drain(struct ifnet * ifp)4455 ifnet_datamov_drain(struct ifnet *ifp)
4456 {
4457 lck_mtx_lock(&ifp->if_ref_lock);
4458 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4459 /* data movement must already be suspended */
4460 VERIFY(ifp->if_suspend > 0);
4461 VERIFY(!(ifp->if_refflags & IFRF_READY));
4462 ifp->if_drainers++;
4463 while (ifp->if_datamov != 0) {
4464 DLIL_PRINTF("Waiting for data path(s) to quiesce on %s\n",
4465 if_name(ifp));
4466 DTRACE_IP1(datamov__wait, struct ifnet *, ifp);
4467 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
4468 (PZERO - 1), __func__, NULL);
4469 DTRACE_IP1(datamov__wake, struct ifnet *, ifp);
4470 }
4471 VERIFY(!(ifp->if_refflags & IFRF_READY));
4472 VERIFY(ifp->if_drainers > 0);
4473 ifp->if_drainers--;
4474 lck_mtx_unlock(&ifp->if_ref_lock);
4475
4476 /* purge the interface queues */
4477 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
4478 if_qflush_snd(ifp, false);
4479 }
4480 }
4481
4482 void
ifnet_datamov_suspend_and_drain(struct ifnet * ifp)4483 ifnet_datamov_suspend_and_drain(struct ifnet *ifp)
4484 {
4485 ifnet_datamov_suspend(ifp);
4486 ifnet_datamov_drain(ifp);
4487 }
4488
4489 void
ifnet_datamov_resume(struct ifnet * ifp)4490 ifnet_datamov_resume(struct ifnet *ifp)
4491 {
4492 lck_mtx_lock(&ifp->if_ref_lock);
4493 /* data movement must already be suspended */
4494 VERIFY(ifp->if_suspend > 0);
4495 if (--ifp->if_suspend == 0) {
4496 VERIFY(!(ifp->if_refflags & IFRF_READY));
4497 ifp->if_refflags |= IFRF_READY;
4498 }
4499 ifnet_decr_iorefcnt_locked(ifp);
4500 lck_mtx_unlock(&ifp->if_ref_lock);
4501 }
4502
4503 static errno_t
dlil_attach_protocol(struct if_proto * proto,const struct ifnet_demux_desc * __counted_by (demux_count)demux_list,u_int32_t demux_count,uint32_t * proto_count)4504 dlil_attach_protocol(struct if_proto *proto,
4505 const struct ifnet_demux_desc *__counted_by(demux_count) demux_list, u_int32_t demux_count,
4506 uint32_t *proto_count)
4507 {
4508 struct kev_dl_proto_data ev_pr_data;
4509 ifnet_ref_t ifp = proto->ifp;
4510 errno_t retval = 0;
4511 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4512 struct if_proto *prev_proto;
4513 struct if_proto *_proto;
4514
4515 /* don't allow attaching anything but PF_BRIDGE to vmnet interfaces */
4516 if (IFNET_IS_VMNET(ifp) && proto->protocol_family != PF_BRIDGE) {
4517 return EINVAL;
4518 }
4519
4520 if (!ifnet_is_attached(ifp, 1)) {
4521 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
4522 __func__, if_name(ifp));
4523 return ENXIO;
4524 }
4525 /* callee holds a proto refcnt upon success */
4526 ifnet_lock_exclusive(ifp);
4527 _proto = find_attached_proto(ifp, proto->protocol_family);
4528 if (_proto != NULL) {
4529 ifnet_lock_done(ifp);
4530 if_proto_free(_proto);
4531 retval = EEXIST;
4532 goto ioref_done;
4533 }
4534
4535 /*
4536 * Call family module add_proto routine so it can refine the
4537 * demux descriptors as it wishes.
4538 */
4539 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4540 demux_count);
4541 if (retval) {
4542 ifnet_lock_done(ifp);
4543 goto ioref_done;
4544 }
4545
4546 /*
4547 * Insert the protocol in the hash
4548 */
4549 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4550 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
4551 prev_proto = SLIST_NEXT(prev_proto, next_hash);
4552 }
4553 if (prev_proto) {
4554 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4555 } else {
4556 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4557 proto, next_hash);
4558 }
4559
4560 /* hold a proto refcnt for attach */
4561 if_proto_ref(proto);
4562
4563 /*
4564 * The reserved field carries the number of protocol still attached
4565 * (subject to change)
4566 */
4567 ev_pr_data.proto_family = proto->protocol_family;
4568 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
4569
4570 ifnet_lock_done(ifp);
4571
4572 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4573 (struct net_event_data *)&ev_pr_data,
4574 sizeof(struct kev_dl_proto_data), FALSE);
4575 if (proto_count != NULL) {
4576 *proto_count = ev_pr_data.proto_remaining_count;
4577 }
4578 ioref_done:
4579 ifnet_decr_iorefcnt(ifp);
4580 return retval;
4581 }
4582
4583 static void
dlil_handle_proto_attach(ifnet_t ifp,protocol_family_t protocol)4584 dlil_handle_proto_attach(ifnet_t ifp, protocol_family_t protocol)
4585 {
4586 /*
4587 * A protocol has been attached, mark the interface up.
4588 * This used to be done by configd.KernelEventMonitor, but that
4589 * is inherently prone to races (rdar://problem/30810208).
4590 */
4591 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4592 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
4593 dlil_post_sifflags_msg(ifp);
4594 #if SKYWALK
4595 switch (protocol) {
4596 case AF_INET:
4597 case AF_INET6:
4598 /* don't attach the flowswitch unless attaching IP */
4599 dlil_attach_flowswitch_nexus(ifp);
4600 break;
4601 default:
4602 break;
4603 }
4604 #endif /* SKYWALK */
4605 }
4606
4607 errno_t
ifnet_attach_protocol(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param * proto_details)4608 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
4609 const struct ifnet_attach_proto_param *proto_details)
4610 {
4611 int retval = 0;
4612 struct if_proto *ifproto = NULL;
4613 uint32_t proto_count = 0;
4614
4615 ifnet_head_lock_shared();
4616 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4617 retval = EINVAL;
4618 goto end;
4619 }
4620 /* Check that the interface is in the global list */
4621 if (!ifnet_lookup(ifp)) {
4622 retval = ENXIO;
4623 goto end;
4624 }
4625
4626 ifproto = dlif_proto_alloc();
4627
4628 /* refcnt held above during lookup */
4629 ifproto->ifp = ifp;
4630 ifproto->protocol_family = protocol;
4631 ifproto->proto_kpi = kProtoKPI_v1;
4632 ifproto->kpi.v1.input = proto_details->input;
4633 ifproto->kpi.v1.pre_output = proto_details->pre_output;
4634 ifproto->kpi.v1.event = proto_details->event;
4635 ifproto->kpi.v1.ioctl = proto_details->ioctl;
4636 ifproto->kpi.v1.detached = proto_details->detached;
4637 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4638 ifproto->kpi.v1.send_arp = proto_details->send_arp;
4639
4640 retval = dlil_attach_protocol(ifproto,
4641 proto_details->demux_list, proto_details->demux_count,
4642 &proto_count);
4643
4644 end:
4645 if (retval == EEXIST) {
4646 /* already attached */
4647 if (dlil_verbose) {
4648 DLIL_PRINTF("%s: protocol %d already attached\n",
4649 ifp != NULL ? if_name(ifp) : "N/A",
4650 protocol);
4651 }
4652 } else if (retval != 0) {
4653 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4654 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4655 } else if (dlil_verbose) {
4656 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
4657 ifp != NULL ? if_name(ifp) : "N/A",
4658 protocol, proto_count);
4659 }
4660 ifnet_head_done();
4661 if (retval == 0) {
4662 dlil_handle_proto_attach(ifp, protocol);
4663 } else if (ifproto != NULL) {
4664 dlif_proto_free(ifproto);
4665 }
4666 return retval;
4667 }
4668
4669 errno_t
ifnet_attach_protocol_v2(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param_v2 * proto_details)4670 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
4671 const struct ifnet_attach_proto_param_v2 *proto_details)
4672 {
4673 int retval = 0;
4674 struct if_proto *ifproto = NULL;
4675 uint32_t proto_count = 0;
4676
4677 ifnet_head_lock_shared();
4678 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4679 retval = EINVAL;
4680 goto end;
4681 }
4682 /* Check that the interface is in the global list */
4683 if (!ifnet_lookup(ifp)) {
4684 retval = ENXIO;
4685 goto end;
4686 }
4687
4688 ifproto = dlif_proto_alloc();
4689
4690 /* refcnt held above during lookup */
4691 ifproto->ifp = ifp;
4692 ifproto->protocol_family = protocol;
4693 ifproto->proto_kpi = kProtoKPI_v2;
4694 ifproto->kpi.v2.input = proto_details->input;
4695 ifproto->kpi.v2.pre_output = proto_details->pre_output;
4696 ifproto->kpi.v2.event = proto_details->event;
4697 ifproto->kpi.v2.ioctl = proto_details->ioctl;
4698 ifproto->kpi.v2.detached = proto_details->detached;
4699 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4700 ifproto->kpi.v2.send_arp = proto_details->send_arp;
4701
4702 retval = dlil_attach_protocol(ifproto,
4703 proto_details->demux_list, proto_details->demux_count,
4704 &proto_count);
4705
4706 end:
4707 if (retval == EEXIST) {
4708 /* already attached */
4709 if (dlil_verbose) {
4710 DLIL_PRINTF("%s: protocol %d already attached\n",
4711 ifp != NULL ? if_name(ifp) : "N/A",
4712 protocol);
4713 }
4714 } else if (retval != 0) {
4715 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4716 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4717 } else if (dlil_verbose) {
4718 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
4719 ifp != NULL ? if_name(ifp) : "N/A",
4720 protocol, proto_count);
4721 }
4722 ifnet_head_done();
4723 if (retval == 0) {
4724 dlil_handle_proto_attach(ifp, protocol);
4725 } else if (ifproto != NULL) {
4726 dlif_proto_free(ifproto);
4727 }
4728 return retval;
4729 }
4730
4731 errno_t
ifnet_detach_protocol(ifnet_t ifp,protocol_family_t proto_family)4732 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
4733 {
4734 struct if_proto *proto = NULL;
4735 int retval = 0;
4736
4737 if (ifp == NULL || proto_family == 0) {
4738 retval = EINVAL;
4739 goto end;
4740 }
4741
4742 ifnet_lock_exclusive(ifp);
4743 /* callee holds a proto refcnt upon success */
4744 proto = find_attached_proto(ifp, proto_family);
4745 if (proto == NULL) {
4746 retval = ENXIO;
4747 ifnet_lock_done(ifp);
4748 goto end;
4749 }
4750
4751 /* call family module del_proto */
4752 if (ifp->if_del_proto) {
4753 ifp->if_del_proto(ifp, proto->protocol_family);
4754 }
4755
4756 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4757 proto, if_proto, next_hash);
4758
4759 if (proto->proto_kpi == kProtoKPI_v1) {
4760 proto->kpi.v1.input = ifproto_media_input_v1;
4761 proto->kpi.v1.pre_output = ifproto_media_preout;
4762 proto->kpi.v1.event = ifproto_media_event;
4763 proto->kpi.v1.ioctl = ifproto_media_ioctl;
4764 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
4765 proto->kpi.v1.send_arp = ifproto_media_send_arp;
4766 } else {
4767 proto->kpi.v2.input = ifproto_media_input_v2;
4768 proto->kpi.v2.pre_output = ifproto_media_preout;
4769 proto->kpi.v2.event = ifproto_media_event;
4770 proto->kpi.v2.ioctl = ifproto_media_ioctl;
4771 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
4772 proto->kpi.v2.send_arp = ifproto_media_send_arp;
4773 }
4774 proto->detached = 1;
4775 ifnet_lock_done(ifp);
4776
4777 if (dlil_verbose) {
4778 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
4779 (proto->proto_kpi == kProtoKPI_v1) ?
4780 "v1" : "v2", proto_family);
4781 }
4782
4783 /* release proto refcnt held during protocol attach */
4784 if_proto_free(proto);
4785
4786 /*
4787 * Release proto refcnt held during lookup; the rest of
4788 * protocol detach steps will happen when the last proto
4789 * reference is released.
4790 */
4791 if_proto_free(proto);
4792
4793 end:
4794 return retval;
4795 }
4796
4797 static errno_t
ifproto_media_input_v1(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet,char * header)4798 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
4799 struct mbuf *packet, char *header)
4800 {
4801 #pragma unused(ifp, protocol, packet, header)
4802 return ENXIO;
4803 }
4804
4805 static errno_t
ifproto_media_input_v2(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet)4806 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
4807 struct mbuf *packet)
4808 {
4809 #pragma unused(ifp, protocol, packet)
4810 return ENXIO;
4811 }
4812
4813 static errno_t
ifproto_media_preout(struct ifnet * ifp,protocol_family_t protocol,mbuf_t * packet,const struct sockaddr * dest,void * route,IFNET_FRAME_TYPE_RW_T frame_type,IFNET_LLADDR_RW_T link_layer_dest)4814 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
4815 mbuf_t *packet, const struct sockaddr *dest, void *route,
4816 IFNET_FRAME_TYPE_RW_T frame_type, IFNET_LLADDR_RW_T link_layer_dest)
4817 {
4818 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4819 return ENXIO;
4820 }
4821
4822 static void
ifproto_media_event(struct ifnet * ifp,protocol_family_t protocol,const struct kev_msg * event)4823 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
4824 const struct kev_msg *event)
4825 {
4826 #pragma unused(ifp, protocol, event)
4827 }
4828
4829 static errno_t
ifproto_media_ioctl(struct ifnet * ifp,protocol_family_t protocol,unsigned long command,void * argument)4830 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
4831 unsigned long command, void *argument)
4832 {
4833 #pragma unused(ifp, protocol, command, argument)
4834 return ENXIO;
4835 }
4836
4837 static errno_t
ifproto_media_resolve_multi(ifnet_t ifp,const struct sockaddr * proto_addr,struct sockaddr_dl * out_ll,size_t ll_len)4838 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
4839 struct sockaddr_dl *out_ll, size_t ll_len)
4840 {
4841 #pragma unused(ifp, proto_addr, out_ll, ll_len)
4842 return ENXIO;
4843 }
4844
4845 static errno_t
ifproto_media_send_arp(struct ifnet * ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4846 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
4847 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4848 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4849 {
4850 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4851 return ENXIO;
4852 }
4853
4854 extern int if_next_index(void);
4855 extern int tcp_ecn_outbound;
4856
4857 void
dlil_ifclassq_setup(struct ifnet * ifp,struct ifclassq * ifcq)4858 dlil_ifclassq_setup(struct ifnet *ifp, struct ifclassq *ifcq)
4859 {
4860 uint32_t sflags = 0;
4861 int err;
4862
4863 if (if_flowadv) {
4864 sflags |= PKTSCHEDF_QALG_FLOWCTL;
4865 }
4866
4867 if (if_delaybased_queue) {
4868 sflags |= PKTSCHEDF_QALG_DELAYBASED;
4869 }
4870
4871 if (ifp->if_output_sched_model ==
4872 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
4873 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
4874 }
4875 /* Inherit drop limit from the default queue */
4876 if (ifp->if_snd != ifcq) {
4877 IFCQ_PKT_DROP_LIMIT(ifcq) = IFCQ_PKT_DROP_LIMIT(ifp->if_snd);
4878 }
4879 /* Initialize transmit queue(s) */
4880 err = ifclassq_setup(ifcq, ifp, sflags);
4881 if (err != 0) {
4882 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
4883 "err=%d", __func__, ifp, err);
4884 /* NOTREACHED */
4885 }
4886 }
4887
4888 errno_t
ifnet_attach(ifnet_t ifp,const struct sockaddr_dl * ll_addr)4889 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
4890 {
4891 #if SKYWALK
4892 boolean_t netif_compat;
4893 if_nexus_netif nexus_netif;
4894 #endif /* SKYWALK */
4895 ifnet_ref_t tmp_if;
4896 struct ifaddr *ifa;
4897 struct if_data_internal if_data_saved;
4898 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4899 struct dlil_threading_info *dl_inp;
4900 thread_continue_t thfunc = NULL;
4901 int err;
4902
4903 if (ifp == NULL) {
4904 return EINVAL;
4905 }
4906
4907 /*
4908 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4909 * prevent the interface from being configured while it is
4910 * embryonic, as ifnet_head_lock is dropped and reacquired
4911 * below prior to marking the ifnet with IFRF_ATTACHED.
4912 */
4913 dlil_if_lock();
4914 ifnet_head_lock_exclusive();
4915 /* Verify we aren't already on the list */
4916 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
4917 if (tmp_if == ifp) {
4918 ifnet_head_done();
4919 dlil_if_unlock();
4920 return EEXIST;
4921 }
4922 }
4923
4924 lck_mtx_lock_spin(&ifp->if_ref_lock);
4925 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
4926 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
4927 __func__, ifp);
4928 /* NOTREACHED */
4929 }
4930 lck_mtx_unlock(&ifp->if_ref_lock);
4931
4932 ifnet_lock_exclusive(ifp);
4933
4934 /* Sanity check */
4935 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
4936 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
4937 VERIFY(ifp->if_threads_pending == 0);
4938
4939 if (ll_addr != NULL) {
4940 if (ifp->if_addrlen == 0) {
4941 ifp->if_addrlen = ll_addr->sdl_alen;
4942 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
4943 ifnet_lock_done(ifp);
4944 ifnet_head_done();
4945 dlil_if_unlock();
4946 return EINVAL;
4947 }
4948 }
4949
4950 /*
4951 * Allow interfaces without protocol families to attach
4952 * only if they have the necessary fields filled out.
4953 */
4954 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
4955 DLIL_PRINTF("%s: Attempt to attach interface without "
4956 "family module - %d\n", __func__, ifp->if_family);
4957 ifnet_lock_done(ifp);
4958 ifnet_head_done();
4959 dlil_if_unlock();
4960 return ENODEV;
4961 }
4962
4963 /* Allocate protocol hash table */
4964 VERIFY(ifp->if_proto_hash == NULL);
4965 ifp->if_proto_hash = kalloc_type(struct proto_hash_entry,
4966 PROTO_HASH_SLOTS, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4967 ifp->if_proto_hash_count = PROTO_HASH_SLOTS;
4968
4969 lck_mtx_lock_spin(&ifp->if_flt_lock);
4970 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
4971 TAILQ_INIT(&ifp->if_flt_head);
4972 VERIFY(ifp->if_flt_busy == 0);
4973 VERIFY(ifp->if_flt_waiters == 0);
4974 VERIFY(ifp->if_flt_non_os_count == 0);
4975 VERIFY(ifp->if_flt_no_tso_count == 0);
4976 lck_mtx_unlock(&ifp->if_flt_lock);
4977
4978 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
4979 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
4980 LIST_INIT(&ifp->if_multiaddrs);
4981 }
4982
4983 VERIFY(ifp->if_allhostsinm == NULL);
4984 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
4985 TAILQ_INIT(&ifp->if_addrhead);
4986
4987 if (ifp->if_index == 0) {
4988 int idx = if_next_index();
4989
4990 /*
4991 * Since we exhausted the list of
4992 * if_index's, try to find an empty slot
4993 * in ifindex2ifnet.
4994 */
4995 if (idx == -1 && if_index >= UINT16_MAX) {
4996 for (int i = 1; i < if_index; i++) {
4997 if (ifindex2ifnet[i] == NULL &&
4998 ifnet_addrs[i - 1] == NULL) {
4999 idx = i;
5000 break;
5001 }
5002 }
5003 }
5004 if (idx == -1) {
5005 ifp->if_index = 0;
5006 ifnet_lock_done(ifp);
5007 ifnet_head_done();
5008 dlil_if_unlock();
5009 return ENOBUFS;
5010 }
5011 ifp->if_index = (uint16_t)idx;
5012
5013 /* the lladdr passed at attach time is the permanent address */
5014 if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
5015 ll_addr->sdl_alen == ETHER_ADDR_LEN) {
5016 bcopy(CONST_LLADDR(ll_addr),
5017 dl_if->dl_if_permanent_ether,
5018 ETHER_ADDR_LEN);
5019 dl_if->dl_if_permanent_ether_is_set = 1;
5020 }
5021 }
5022 /* There should not be anything occupying this slot */
5023 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5024
5025 /* allocate (if needed) and initialize a link address */
5026 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5027 if (ifa == NULL) {
5028 ifnet_lock_done(ifp);
5029 ifnet_head_done();
5030 dlil_if_unlock();
5031 return ENOBUFS;
5032 }
5033
5034 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5035 ifnet_addrs[ifp->if_index - 1] = ifa;
5036
5037 /* make this address the first on the list */
5038 IFA_LOCK(ifa);
5039 /* hold a reference for ifnet_addrs[] */
5040 ifa_addref(ifa);
5041 /* if_attach_link_ifa() holds a reference for ifa_link */
5042 if_attach_link_ifa(ifp, ifa);
5043 IFA_UNLOCK(ifa);
5044
5045 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5046 ifindex2ifnet[ifp->if_index] = ifp;
5047
5048 /* Hold a reference to the underlying dlil_ifnet */
5049 ifnet_reference(ifp);
5050
5051 /* Clear stats (save and restore other fields that we care) */
5052 if_data_saved = ifp->if_data;
5053 bzero(&ifp->if_data, sizeof(ifp->if_data));
5054 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5055 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5056 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5057 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5058 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5059 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5060 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5061 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5062 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5063 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5064 ifnet_touch_lastchange(ifp);
5065
5066 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5067 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5068 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5069
5070 dlil_ifclassq_setup(ifp, ifp->if_snd);
5071
5072 /* Sanity checks on the input thread storage */
5073 dl_inp = &dl_if->dl_if_inpstorage;
5074 bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
5075 VERIFY(dl_inp->dlth_flags == 0);
5076 VERIFY(dl_inp->dlth_wtot == 0);
5077 VERIFY(dl_inp->dlth_ifp == NULL);
5078 VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
5079 VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
5080 VERIFY(!dl_inp->dlth_affinity);
5081 VERIFY(ifp->if_inp == NULL);
5082 VERIFY(dl_inp->dlth_thread == THREAD_NULL);
5083 VERIFY(dl_inp->dlth_strategy == NULL);
5084 VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
5085 VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
5086 VERIFY(dl_inp->dlth_affinity_tag == 0);
5087
5088 #if IFNET_INPUT_SANITY_CHK
5089 VERIFY(dl_inp->dlth_pkts_cnt == 0);
5090 #endif /* IFNET_INPUT_SANITY_CHK */
5091
5092 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5093 dlil_reset_rxpoll_params(ifp);
5094 /*
5095 * A specific DLIL input thread is created per non-loopback interface.
5096 */
5097 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
5098 ifp->if_inp = dl_inp;
5099 ifnet_incr_pending_thread_count(ifp);
5100 err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
5101 if (err == ENODEV) {
5102 VERIFY(thfunc == NULL);
5103 ifnet_decr_pending_thread_count(ifp);
5104 } else if (err != 0) {
5105 panic_plain("%s: ifp=%p couldn't get an input thread; "
5106 "err=%d", __func__, ifp, err);
5107 /* NOTREACHED */
5108 }
5109 }
5110 /*
5111 * If the driver supports the new transmit model, calculate flow hash
5112 * and create a workloop starter thread to invoke the if_start callback
5113 * where the packets may be dequeued and transmitted.
5114 */
5115 if (ifp->if_eflags & IFEF_TXSTART) {
5116 thread_precedence_policy_data_t info;
5117 __unused kern_return_t kret;
5118
5119 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5120 VERIFY(ifp->if_flowhash != 0);
5121 VERIFY(ifp->if_start_thread == THREAD_NULL);
5122
5123 ifnet_set_start_cycle(ifp, NULL);
5124 ifp->if_start_active = 0;
5125 ifp->if_start_req = 0;
5126 ifp->if_start_flags = 0;
5127 VERIFY(ifp->if_start != NULL);
5128 ifnet_incr_pending_thread_count(ifp);
5129 if ((err = kernel_thread_start(ifnet_start_thread_func,
5130 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5131 panic_plain("%s: "
5132 "ifp=%p couldn't get a start thread; "
5133 "err=%d", __func__, ifp, err);
5134 /* NOTREACHED */
5135 }
5136 bzero(&info, sizeof(info));
5137 info.importance = 1;
5138 kret = thread_policy_set(ifp->if_start_thread,
5139 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5140 THREAD_PRECEDENCE_POLICY_COUNT);
5141 ASSERT(kret == KERN_SUCCESS);
5142 } else {
5143 ifp->if_flowhash = 0;
5144 }
5145
5146 /* Reset polling parameters */
5147 ifnet_set_poll_cycle(ifp, NULL);
5148 ifp->if_poll_update = 0;
5149 ifp->if_poll_flags = 0;
5150 ifp->if_poll_req = 0;
5151 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5152
5153 /*
5154 * If the driver supports the new receive model, create a poller
5155 * thread to invoke if_input_poll callback where the packets may
5156 * be dequeued from the driver and processed for reception.
5157 * if the interface is netif compat then the poller thread is
5158 * managed by netif.
5159 */
5160 if (dlil_is_rxpoll_input(thfunc)) {
5161 thread_precedence_policy_data_t info;
5162 __unused kern_return_t kret;
5163 #if SKYWALK
5164 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5165 #endif /* SKYWALK */
5166 VERIFY(ifp->if_input_poll != NULL);
5167 VERIFY(ifp->if_input_ctl != NULL);
5168 ifnet_incr_pending_thread_count(ifp);
5169 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
5170 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5171 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5172 "err=%d", __func__, ifp, err);
5173 /* NOTREACHED */
5174 }
5175 bzero(&info, sizeof(info));
5176 info.importance = 1;
5177 kret = thread_policy_set(ifp->if_poll_thread,
5178 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5179 THREAD_PRECEDENCE_POLICY_COUNT);
5180 ASSERT(kret == KERN_SUCCESS);
5181 }
5182
5183 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5184 VERIFY(ifp->if_desc.ifd_len == 0);
5185 VERIFY(ifp->if_desc.ifd_desc != NULL);
5186
5187 /* Record attach PC stacktrace */
5188 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5189
5190 ifp->if_updatemcasts = 0;
5191 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5192 struct ifmultiaddr *ifma;
5193 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5194 IFMA_LOCK(ifma);
5195 if (ifma->ifma_addr->sa_family == AF_LINK ||
5196 ifma->ifma_addr->sa_family == AF_UNSPEC) {
5197 ifp->if_updatemcasts++;
5198 }
5199 IFMA_UNLOCK(ifma);
5200 }
5201
5202 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
5203 "membership(s)\n", if_name(ifp),
5204 ifp->if_updatemcasts);
5205 }
5206
5207 /* Clear logging parameters */
5208 bzero(&ifp->if_log, sizeof(ifp->if_log));
5209
5210 /* Clear foreground/realtime activity timestamps */
5211 ifp->if_fg_sendts = 0;
5212 ifp->if_rt_sendts = 0;
5213
5214 /* Clear throughput estimates and radio type */
5215 ifp->if_estimated_up_bucket = 0;
5216 ifp->if_estimated_down_bucket = 0;
5217 ifp->if_radio_type = 0;
5218 ifp->if_radio_channel = 0;
5219
5220 VERIFY(ifp->if_delegated.ifp == NULL);
5221 VERIFY(ifp->if_delegated.type == 0);
5222 VERIFY(ifp->if_delegated.family == 0);
5223 VERIFY(ifp->if_delegated.subfamily == 0);
5224 VERIFY(ifp->if_delegated.expensive == 0);
5225 VERIFY(ifp->if_delegated.constrained == 0);
5226 VERIFY(ifp->if_delegated.ultra_constrained == 0);
5227
5228 VERIFY(ifp->if_agentids == NULL);
5229 VERIFY(ifp->if_agentcount == 0);
5230
5231 /* Reset interface state */
5232 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5233 ifp->if_interface_state.valid_bitmask |=
5234 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5235 ifp->if_interface_state.interface_availability =
5236 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5237
5238 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5239 if (ifp == lo_ifp) {
5240 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5241 ifp->if_interface_state.valid_bitmask |=
5242 IF_INTERFACE_STATE_LQM_STATE_VALID;
5243 } else {
5244 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5245 }
5246
5247 /*
5248 * Enable ECN capability on this interface depending on the
5249 * value of ECN global setting
5250 */
5251 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5252 if_set_eflags(ifp, IFEF_ECN_ENABLE);
5253 if_clear_eflags(ifp, IFEF_ECN_DISABLE);
5254 }
5255
5256 /*
5257 * Built-in Cyclops always on policy for WiFi infra
5258 */
5259 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5260 errno_t error;
5261
5262 error = if_set_qosmarking_mode(ifp,
5263 IFRTYPE_QOSMARKING_FASTLANE);
5264 if (error != 0) {
5265 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
5266 __func__, ifp->if_xname, error);
5267 } else {
5268 if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5269 #if (DEVELOPMENT || DEBUG)
5270 DLIL_PRINTF("%s fastlane enabled on %s\n",
5271 __func__, ifp->if_xname);
5272 #endif /* (DEVELOPMENT || DEBUG) */
5273 }
5274 }
5275
5276 ifnet_lock_done(ifp);
5277 ifnet_head_done();
5278
5279 #if SKYWALK
5280 netif_compat = dlil_attach_netif_compat_nexus(ifp, &nexus_netif);
5281 #endif /* SKYWALK */
5282
5283 lck_mtx_lock(&ifp->if_cached_route_lock);
5284 /* Enable forwarding cached route */
5285 ifp->if_fwd_cacheok = 1;
5286 /* Clean up any existing cached routes */
5287 ROUTE_RELEASE(&ifp->if_fwd_route);
5288 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5289 ROUTE_RELEASE(&ifp->if_src_route);
5290 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5291 ROUTE_RELEASE(&ifp->if_src_route6);
5292 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5293 lck_mtx_unlock(&ifp->if_cached_route_lock);
5294
5295 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5296
5297 /*
5298 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5299 * and trees; do this before the ifnet is marked as attached.
5300 * The ifnet keeps the reference to the info structures even after
5301 * the ifnet is detached, since the network-layer records still
5302 * refer to the info structures even after that. This also
5303 * makes it possible for them to still function after the ifnet
5304 * is recycled or reattached.
5305 */
5306 #if INET
5307 if (IGMP_IFINFO(ifp) == NULL) {
5308 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
5309 VERIFY(IGMP_IFINFO(ifp) != NULL);
5310 } else {
5311 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5312 igmp_domifreattach(IGMP_IFINFO(ifp));
5313 }
5314 #endif /* INET */
5315 if (MLD_IFINFO(ifp) == NULL) {
5316 MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
5317 VERIFY(MLD_IFINFO(ifp) != NULL);
5318 } else {
5319 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5320 mld_domifreattach(MLD_IFINFO(ifp));
5321 }
5322
5323 VERIFY(ifp->if_data_threshold == 0);
5324 VERIFY(ifp->if_dt_tcall != NULL);
5325
5326 /*
5327 * Wait for the created kernel threads for I/O to get
5328 * scheduled and run at least once before we proceed
5329 * to mark interface as attached.
5330 */
5331 lck_mtx_lock(&ifp->if_ref_lock);
5332 while (ifp->if_threads_pending != 0) {
5333 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
5334 "interface %s to get scheduled at least once.\n",
5335 __func__, ifp->if_xname);
5336 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
5337 __func__, NULL);
5338 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
5339 }
5340 lck_mtx_unlock(&ifp->if_ref_lock);
5341 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
5342 "at least once. Proceeding.\n", __func__, ifp->if_xname);
5343
5344 /* Final mark this ifnet as attached. */
5345 ifnet_lock_exclusive(ifp);
5346 lck_mtx_lock_spin(&ifp->if_ref_lock);
5347 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
5348 lck_mtx_unlock(&ifp->if_ref_lock);
5349 if (net_rtref) {
5350 /* boot-args override; enable idle notification */
5351 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5352 IFRF_IDLE_NOTIFY);
5353 } else {
5354 /* apply previous request(s) to set the idle flags, if any */
5355 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5356 ifp->if_idle_new_flags_mask);
5357 }
5358 #if SKYWALK
5359 /* the interface is fully attached; let the nexus adapter know */
5360 if (netif_compat || dlil_is_native_netif_nexus(ifp)) {
5361 if (netif_compat) {
5362 if (sk_netif_compat_txmodel ==
5363 NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI) {
5364 ifnet_enqueue_multi_setup(ifp,
5365 sk_tx_delay_qlen, sk_tx_delay_timeout);
5366 }
5367 ifp->if_nx_netif = nexus_netif;
5368 }
5369 ifp->if_na_ops->ni_finalize(ifp->if_na, ifp);
5370 }
5371 #endif /* SKYWALK */
5372 ifnet_lock_done(ifp);
5373 dlil_if_unlock();
5374
5375 #if PF
5376 /*
5377 * Attach packet filter to this interface, if enabled.
5378 */
5379 pf_ifnet_hook(ifp, 1);
5380 #endif /* PF */
5381
5382 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0, FALSE);
5383
5384 os_log(OS_LOG_DEFAULT, "%s: attached%s\n", if_name(ifp),
5385 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5386 return 0;
5387 }
5388
5389 static void
if_purgeaddrs(struct ifnet * ifp)5390 if_purgeaddrs(struct ifnet *ifp)
5391 {
5392 #if INET
5393 in_purgeaddrs(ifp);
5394 #endif /* INET */
5395 in6_purgeaddrs(ifp);
5396 }
5397
5398 errno_t
ifnet_detach(ifnet_t ifp)5399 ifnet_detach(ifnet_t ifp)
5400 {
5401 ifnet_ref_t delegated_ifp;
5402 struct nd_ifinfo *ndi = NULL;
5403
5404 if (ifp == NULL) {
5405 return EINVAL;
5406 }
5407
5408 ndi = ND_IFINFO(ifp);
5409 if (NULL != ndi) {
5410 ndi->cga_initialized = FALSE;
5411 }
5412 os_log(OS_LOG_DEFAULT, "%s detaching", if_name(ifp));
5413
5414 /* Mark the interface down */
5415 if_down(ifp);
5416
5417 /*
5418 * IMPORTANT NOTE
5419 *
5420 * Any field in the ifnet that relies on IF_FULLY_ATTACHED()
5421 * or equivalently, ifnet_is_attached(ifp, 1), can't be modified
5422 * until after we've waited for all I/O references to drain
5423 * in ifnet_detach_final().
5424 */
5425
5426 ifnet_head_lock_exclusive();
5427 ifnet_lock_exclusive(ifp);
5428
5429 if (ifp->if_output_netem != NULL) {
5430 netem_destroy(ifp->if_output_netem);
5431 ifp->if_output_netem = NULL;
5432 }
5433
5434 /*
5435 * Check to see if this interface has previously triggered
5436 * aggressive protocol draining; if so, decrement the global
5437 * refcnt and clear PR_AGGDRAIN on the route domain if
5438 * there are no more of such an interface around.
5439 */
5440 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5441
5442 lck_mtx_lock_spin(&ifp->if_ref_lock);
5443 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5444 lck_mtx_unlock(&ifp->if_ref_lock);
5445 ifnet_lock_done(ifp);
5446 ifnet_head_done();
5447 return EINVAL;
5448 } else if (ifp->if_refflags & IFRF_DETACHING) {
5449 /* Interface has already been detached */
5450 lck_mtx_unlock(&ifp->if_ref_lock);
5451 ifnet_lock_done(ifp);
5452 ifnet_head_done();
5453 return ENXIO;
5454 }
5455 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
5456 /* Indicate this interface is being detached */
5457 ifp->if_refflags &= ~IFRF_ATTACHED;
5458 ifp->if_refflags |= IFRF_DETACHING;
5459 lck_mtx_unlock(&ifp->if_ref_lock);
5460
5461 /* clean up flow control entry object if there's any */
5462 if (ifp->if_eflags & IFEF_TXSTART) {
5463 ifnet_flowadv(ifp->if_flowhash);
5464 }
5465
5466 /* Reset ECN enable/disable flags */
5467 /* Reset CLAT46 flag */
5468 if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
5469
5470 /*
5471 * We do not reset the TCP keep alive counters in case
5472 * a TCP connection stays connection after the interface
5473 * went down
5474 */
5475 if (ifp->if_tcp_kao_cnt > 0) {
5476 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
5477 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
5478 }
5479 ifp->if_tcp_kao_max = 0;
5480
5481 /*
5482 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5483 * no longer be visible during lookups from this point.
5484 */
5485 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5486 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5487 ifp->if_link.tqe_next = NULL;
5488 ifp->if_link.tqe_prev = NULL;
5489 if (ifp->if_ordered_link.tqe_next != NULL ||
5490 ifp->if_ordered_link.tqe_prev != NULL) {
5491 ifnet_remove_from_ordered_list(ifp);
5492 }
5493 ifindex2ifnet[ifp->if_index] = NULL;
5494
5495 /* 18717626 - reset router mode */
5496 if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
5497 ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
5498
5499 /* Record detach PC stacktrace */
5500 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5501
5502 /* Clear logging parameters */
5503 bzero(&ifp->if_log, sizeof(ifp->if_log));
5504
5505 /* Clear delegated interface info (reference released below) */
5506 delegated_ifp = ifp->if_delegated.ifp;
5507 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
5508
5509 /* Reset interface state */
5510 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5511
5512 /*
5513 * Increment the generation count on interface deletion
5514 */
5515 ifp->if_creation_generation_id = os_atomic_inc(&if_creation_generation_count, relaxed);
5516
5517 ifnet_lock_done(ifp);
5518 ifnet_head_done();
5519
5520 /* Release reference held on the delegated interface */
5521 if (delegated_ifp != NULL) {
5522 ifnet_release(delegated_ifp);
5523 }
5524
5525 /* Reset Link Quality Metric (unless loopback [lo0]) */
5526 if (ifp != lo_ifp) {
5527 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5528 }
5529
5530 /* Force reset link heuristics */
5531 if (ifp->if_link_heuristics_tcall != NULL) {
5532 thread_call_cancel_wait(ifp->if_link_heuristics_tcall);
5533 thread_call_free(ifp->if_link_heuristics_tcall);
5534 ifp->if_link_heuristics_tcall = NULL;
5535 }
5536 if_clear_xflags(ifp, IFXF_LINK_HEURISTICS);
5537
5538 /* Reset TCP local statistics */
5539 if (ifp->if_tcp_stat != NULL) {
5540 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5541 }
5542
5543 /* Reset UDP local statistics */
5544 if (ifp->if_udp_stat != NULL) {
5545 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5546 }
5547
5548 /* Reset ifnet IPv4 stats */
5549 if (ifp->if_ipv4_stat != NULL) {
5550 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5551 }
5552
5553 /* Reset ifnet IPv6 stats */
5554 if (ifp->if_ipv6_stat != NULL) {
5555 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5556 }
5557
5558 /* Release memory held for interface link status report */
5559 if (ifp->if_link_status != NULL) {
5560 kfree_type(struct if_link_status, ifp->if_link_status);
5561 ifp->if_link_status = NULL;
5562 }
5563
5564 /* Disable forwarding cached route */
5565 lck_mtx_lock(&ifp->if_cached_route_lock);
5566 ifp->if_fwd_cacheok = 0;
5567 lck_mtx_unlock(&ifp->if_cached_route_lock);
5568
5569 /* Disable data threshold and wait for any pending event posting */
5570 ifp->if_data_threshold = 0;
5571 VERIFY(ifp->if_dt_tcall != NULL);
5572 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
5573
5574 /*
5575 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5576 * references to the info structures and leave them attached to
5577 * this ifnet.
5578 */
5579 #if INET
5580 igmp_domifdetach(ifp);
5581 #endif /* INET */
5582 mld_domifdetach(ifp);
5583
5584 #if SKYWALK
5585 /* Clean up any netns tokens still pointing to to this ifnet */
5586 netns_ifnet_detach(ifp);
5587 #endif /* SKYWALK */
5588 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0, FALSE);
5589
5590 /* Let worker thread take care of the rest, to avoid reentrancy */
5591 dlil_if_lock();
5592 ifnet_detaching_enqueue(ifp);
5593 dlil_if_unlock();
5594
5595 return 0;
5596 }
5597
5598 static void
ifnet_detaching_enqueue(struct ifnet * ifp)5599 ifnet_detaching_enqueue(struct ifnet *ifp)
5600 {
5601 dlil_if_lock_assert();
5602
5603 ++ifnet_detaching_cnt;
5604 VERIFY(ifnet_detaching_cnt != 0);
5605 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5606 wakeup((caddr_t)&ifnet_delayed_run);
5607 }
5608
5609 static struct ifnet *
ifnet_detaching_dequeue(void)5610 ifnet_detaching_dequeue(void)
5611 {
5612 ifnet_ref_t ifp;
5613
5614 dlil_if_lock_assert();
5615
5616 ifp = TAILQ_FIRST(&ifnet_detaching_head);
5617 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5618 if (ifp != NULL) {
5619 VERIFY(ifnet_detaching_cnt != 0);
5620 --ifnet_detaching_cnt;
5621 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5622 ifp->if_detaching_link.tqe_next = NULL;
5623 ifp->if_detaching_link.tqe_prev = NULL;
5624 }
5625 return ifp;
5626 }
5627
5628 __attribute__((noreturn))
5629 static void
ifnet_detacher_thread_cont(void * v,wait_result_t wres)5630 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
5631 {
5632 #pragma unused(v, wres)
5633 ifnet_ref_t ifp;
5634
5635 dlil_if_lock();
5636 if (__improbable(ifnet_detaching_embryonic)) {
5637 ifnet_detaching_embryonic = FALSE;
5638 /* there's no lock ordering constrain so OK to do this here */
5639 dlil_decr_pending_thread_count();
5640 }
5641
5642 for (;;) {
5643 dlil_if_lock_assert();
5644
5645 if (ifnet_detaching_cnt == 0) {
5646 break;
5647 }
5648
5649 net_update_uptime();
5650
5651 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5652
5653 /* Take care of detaching ifnet */
5654 ifp = ifnet_detaching_dequeue();
5655 if (ifp != NULL) {
5656 dlil_if_unlock();
5657 ifnet_detach_final(ifp);
5658 dlil_if_lock();
5659 }
5660 }
5661
5662 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5663 dlil_if_unlock();
5664 (void) thread_block(ifnet_detacher_thread_cont);
5665
5666 VERIFY(0); /* we should never get here */
5667 /* NOTREACHED */
5668 __builtin_unreachable();
5669 }
5670
5671 __dead2
5672 static void
ifnet_detacher_thread_func(void * v,wait_result_t w)5673 ifnet_detacher_thread_func(void *v, wait_result_t w)
5674 {
5675 #pragma unused(v, w)
5676 dlil_if_lock();
5677 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5678 ifnet_detaching_embryonic = TRUE;
5679 /* wake up once to get out of embryonic state */
5680 wakeup((caddr_t)&ifnet_delayed_run);
5681 dlil_if_unlock();
5682 (void) thread_block(ifnet_detacher_thread_cont);
5683 VERIFY(0);
5684 /* NOTREACHED */
5685 __builtin_unreachable();
5686 }
5687
5688 static void
ifnet_detach_final(struct ifnet * ifp)5689 ifnet_detach_final(struct ifnet *ifp)
5690 {
5691 struct ifnet_filter *filter, *filter_next;
5692 struct dlil_ifnet *dlifp;
5693 struct ifnet_filter_head fhead;
5694 struct dlil_threading_info *inp;
5695 struct ifaddr *ifa;
5696 ifnet_detached_func if_free;
5697 int i;
5698 bool waited = false;
5699
5700 /* Let BPF know we're detaching */
5701 bpfdetach(ifp);
5702
5703 #if SKYWALK
5704 dlil_netif_detach_notify(ifp);
5705 /*
5706 * Wait for the datapath to quiesce before tearing down
5707 * netif/flowswitch nexuses.
5708 */
5709 dlil_quiesce_and_detach_nexuses(ifp);
5710 #endif /* SKYWALK */
5711
5712 lck_mtx_lock(&ifp->if_ref_lock);
5713 if (!(ifp->if_refflags & IFRF_DETACHING)) {
5714 panic("%s: flags mismatch (detaching not set) ifp=%p",
5715 __func__, ifp);
5716 /* NOTREACHED */
5717 }
5718
5719 /*
5720 * Wait until the existing IO references get released
5721 * before we proceed with ifnet_detach. This is not a
5722 * common case, so block without using a continuation.
5723 */
5724 while (ifp->if_refio > 0) {
5725 waited = true;
5726 DLIL_PRINTF("%s: %s waiting for IO references to drain\n",
5727 __func__, if_name(ifp));
5728 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5729 (PZERO - 1), "ifnet_ioref_wait", NULL);
5730 }
5731 if (waited) {
5732 DLIL_PRINTF("%s: %s IO references drained\n",
5733 __func__, if_name(ifp));
5734 }
5735 VERIFY(ifp->if_datamov == 0);
5736 VERIFY(ifp->if_drainers == 0);
5737 VERIFY(ifp->if_suspend == 0);
5738 ifp->if_refflags &= ~IFRF_READY;
5739 lck_mtx_unlock(&ifp->if_ref_lock);
5740
5741 #if SKYWALK
5742 VERIFY(LIST_EMPTY(&ifp->if_netns_tokens));
5743 #endif /* SKYWALK */
5744 /* Drain and destroy send queue */
5745 ifclassq_teardown(ifp->if_snd);
5746
5747 /* Detach interface filters */
5748 lck_mtx_lock(&ifp->if_flt_lock);
5749 if_flt_monitor_enter(ifp);
5750
5751 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
5752 fhead = ifp->if_flt_head;
5753 TAILQ_INIT(&ifp->if_flt_head);
5754
5755 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5756 filter_next = TAILQ_NEXT(filter, filt_next);
5757 lck_mtx_unlock(&ifp->if_flt_lock);
5758
5759 dlil_detach_filter_internal(filter, 1);
5760 lck_mtx_lock(&ifp->if_flt_lock);
5761 }
5762 if_flt_monitor_leave(ifp);
5763 lck_mtx_unlock(&ifp->if_flt_lock);
5764
5765 /* Tell upper layers to drop their network addresses */
5766 if_purgeaddrs(ifp);
5767
5768 ifnet_lock_exclusive(ifp);
5769
5770 /* Clear agent IDs */
5771 if (ifp->if_agentids != NULL) {
5772 kfree_data_sized_by(ifp->if_agentids, ifp->if_agentcount);
5773 }
5774
5775 bzero(&ifp->if_nx_netif, sizeof(ifp->if_nx_netif));
5776 bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
5777
5778 /* Unplumb all protocols */
5779 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5780 struct if_proto *proto;
5781
5782 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5783 while (proto != NULL) {
5784 protocol_family_t family = proto->protocol_family;
5785 ifnet_lock_done(ifp);
5786 proto_unplumb(family, ifp);
5787 ifnet_lock_exclusive(ifp);
5788 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5789 }
5790 /* There should not be any protocols left */
5791 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5792 }
5793 kfree_type_counted_by(struct proto_hash_entry, ifp->if_proto_hash_count, ifp->if_proto_hash);
5794
5795 /* Detach (permanent) link address from if_addrhead */
5796 ifa = TAILQ_FIRST(&ifp->if_addrhead);
5797 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5798 IFA_LOCK(ifa);
5799 if_detach_link_ifa(ifp, ifa);
5800 IFA_UNLOCK(ifa);
5801
5802 /* Remove (permanent) link address from ifnet_addrs[] */
5803 ifa_remref(ifa);
5804 ifnet_addrs[ifp->if_index - 1] = NULL;
5805
5806 /* This interface should not be on {ifnet_head,detaching} */
5807 VERIFY(ifp->if_link.tqe_next == NULL);
5808 VERIFY(ifp->if_link.tqe_prev == NULL);
5809 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5810 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5811 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
5812 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
5813
5814 /* The slot should have been emptied */
5815 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5816
5817 /* There should not be any addresses left */
5818 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5819
5820 /*
5821 * Signal the starter thread to terminate itself, and wait until
5822 * it has exited.
5823 */
5824 if (ifp->if_start_thread != THREAD_NULL) {
5825 lck_mtx_lock_spin(&ifp->if_start_lock);
5826 ifp->if_start_flags |= IFSF_TERMINATING;
5827 wakeup_one((caddr_t)&ifp->if_start_thread);
5828 lck_mtx_unlock(&ifp->if_start_lock);
5829
5830 /* wait for starter thread to terminate */
5831 lck_mtx_lock(&ifp->if_start_lock);
5832 while (ifp->if_start_thread != THREAD_NULL) {
5833 if (dlil_verbose) {
5834 DLIL_PRINTF("%s: waiting for %s starter thread to terminate\n",
5835 __func__,
5836 if_name(ifp));
5837 }
5838 (void) msleep(&ifp->if_start_thread,
5839 &ifp->if_start_lock, (PZERO - 1),
5840 "ifnet_start_thread_exit", NULL);
5841 }
5842 lck_mtx_unlock(&ifp->if_start_lock);
5843 if (dlil_verbose) {
5844 DLIL_PRINTF("%s: %s starter thread termination complete",
5845 __func__, if_name(ifp));
5846 }
5847 }
5848
5849 /*
5850 * Signal the poller thread to terminate itself, and wait until
5851 * it has exited.
5852 */
5853 if (ifp->if_poll_thread != THREAD_NULL) {
5854 #if SKYWALK
5855 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5856 #endif /* SKYWALK */
5857 lck_mtx_lock_spin(&ifp->if_poll_lock);
5858 ifp->if_poll_flags |= IF_POLLF_TERMINATING;
5859 wakeup_one((caddr_t)&ifp->if_poll_thread);
5860 lck_mtx_unlock(&ifp->if_poll_lock);
5861
5862 /* wait for poller thread to terminate */
5863 lck_mtx_lock(&ifp->if_poll_lock);
5864 while (ifp->if_poll_thread != THREAD_NULL) {
5865 if (dlil_verbose) {
5866 DLIL_PRINTF("%s: waiting for %s poller thread to terminate\n",
5867 __func__,
5868 if_name(ifp));
5869 }
5870 (void) msleep(&ifp->if_poll_thread,
5871 &ifp->if_poll_lock, (PZERO - 1),
5872 "ifnet_poll_thread_exit", NULL);
5873 }
5874 lck_mtx_unlock(&ifp->if_poll_lock);
5875 if (dlil_verbose) {
5876 DLIL_PRINTF("%s: %s poller thread termination complete\n",
5877 __func__, if_name(ifp));
5878 }
5879 }
5880
5881 /*
5882 * If thread affinity was set for the workloop thread, we will need
5883 * to tear down the affinity and release the extra reference count
5884 * taken at attach time. Does not apply to lo0 or other interfaces
5885 * without dedicated input threads.
5886 */
5887 if ((inp = ifp->if_inp) != NULL) {
5888 VERIFY(inp != dlil_main_input_thread);
5889
5890 if (inp->dlth_affinity) {
5891 struct thread *__single tp, *__single wtp, *__single ptp;
5892
5893 lck_mtx_lock_spin(&inp->dlth_lock);
5894 wtp = inp->dlth_driver_thread;
5895 inp->dlth_driver_thread = THREAD_NULL;
5896 ptp = inp->dlth_poller_thread;
5897 inp->dlth_poller_thread = THREAD_NULL;
5898 ASSERT(inp->dlth_thread != THREAD_NULL);
5899 tp = inp->dlth_thread; /* don't nullify now */
5900 inp->dlth_affinity_tag = 0;
5901 inp->dlth_affinity = FALSE;
5902 lck_mtx_unlock(&inp->dlth_lock);
5903
5904 /* Tear down poll thread affinity */
5905 if (ptp != NULL) {
5906 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5907 VERIFY(ifp->if_xflags & IFXF_LEGACY);
5908 (void) dlil_affinity_set(ptp,
5909 THREAD_AFFINITY_TAG_NULL);
5910 thread_deallocate(ptp);
5911 }
5912
5913 /* Tear down workloop thread affinity */
5914 if (wtp != NULL) {
5915 (void) dlil_affinity_set(wtp,
5916 THREAD_AFFINITY_TAG_NULL);
5917 thread_deallocate(wtp);
5918 }
5919
5920 /* Tear down DLIL input thread affinity */
5921 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5922 thread_deallocate(tp);
5923 }
5924
5925 /* disassociate ifp DLIL input thread */
5926 ifp->if_inp = NULL;
5927
5928 /* if the worker thread was created, tell it to terminate */
5929 if (inp->dlth_thread != THREAD_NULL) {
5930 lck_mtx_lock_spin(&inp->dlth_lock);
5931 inp->dlth_flags |= DLIL_INPUT_TERMINATE;
5932 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
5933 wakeup_one((caddr_t)&inp->dlth_flags);
5934 }
5935 lck_mtx_unlock(&inp->dlth_lock);
5936 ifnet_lock_done(ifp);
5937
5938 /* wait for the input thread to terminate */
5939 lck_mtx_lock_spin(&inp->dlth_lock);
5940 while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
5941 == 0) {
5942 (void) msleep(&inp->dlth_flags, &inp->dlth_lock,
5943 (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
5944 }
5945 lck_mtx_unlock(&inp->dlth_lock);
5946 ifnet_lock_exclusive(ifp);
5947 }
5948
5949 /* clean-up input thread state */
5950 dlil_clean_threading_info(inp);
5951 /* clean-up poll parameters */
5952 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5953 dlil_reset_rxpoll_params(ifp);
5954 }
5955
5956 /* The driver might unload, so point these to ourselves */
5957 if_free = ifp->if_free;
5958 ifp->if_output_dlil = ifp_if_output;
5959 ifp->if_output = ifp_if_output;
5960 ifp->if_pre_enqueue = ifp_if_output;
5961 ifp->if_start = ifp_if_start;
5962 ifp->if_output_ctl = ifp_if_ctl;
5963 ifp->if_input_dlil = ifp_if_input;
5964 ifp->if_input_poll = ifp_if_input_poll;
5965 ifp->if_input_ctl = ifp_if_ctl;
5966 ifp->if_ioctl = ifp_if_ioctl;
5967 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
5968 ifp->if_free = ifp_if_free;
5969 ifp->if_demux = ifp_if_demux;
5970 ifp->if_event = ifp_if_event;
5971 ifp->if_framer_legacy = ifp_if_framer;
5972 ifp->if_framer = ifp_if_framer_extended;
5973 ifp->if_add_proto = ifp_if_add_proto;
5974 ifp->if_del_proto = ifp_if_del_proto;
5975 ifp->if_check_multi = ifp_if_check_multi;
5976
5977 /* wipe out interface description */
5978 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5979 ifp->if_desc.ifd_len = 0;
5980 VERIFY(ifp->if_desc.ifd_desc != NULL);
5981 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
5982
5983 /* there shouldn't be any delegation by now */
5984 VERIFY(ifp->if_delegated.ifp == NULL);
5985 VERIFY(ifp->if_delegated.type == 0);
5986 VERIFY(ifp->if_delegated.family == 0);
5987 VERIFY(ifp->if_delegated.subfamily == 0);
5988 VERIFY(ifp->if_delegated.expensive == 0);
5989 VERIFY(ifp->if_delegated.constrained == 0);
5990 VERIFY(ifp->if_delegated.ultra_constrained == 0);
5991
5992 /* QoS marking get cleared */
5993 if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5994 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
5995
5996 #if SKYWALK
5997 /* the nexus destructor is responsible for clearing these */
5998 VERIFY(ifp->if_na_ops == NULL);
5999 VERIFY(ifp->if_na == NULL);
6000 #endif /* SKYWALK */
6001
6002 /* interface could come up with different hwassist next time */
6003 ifp->if_hwassist = 0;
6004 ifp->if_capenable = 0;
6005
6006 /* promiscuous/allmulti counts need to start at zero again */
6007 ifp->if_pcount = 0;
6008 ifp->if_amcount = 0;
6009 ifp->if_flags &= ~(IFF_PROMISC | IFF_ALLMULTI);
6010
6011 ifnet_lock_done(ifp);
6012
6013 #if PF
6014 /*
6015 * Detach this interface from packet filter, if enabled.
6016 */
6017 pf_ifnet_hook(ifp, 0);
6018 #endif /* PF */
6019
6020 /* Filter list should be empty */
6021 lck_mtx_lock_spin(&ifp->if_flt_lock);
6022 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6023 VERIFY(ifp->if_flt_busy == 0);
6024 VERIFY(ifp->if_flt_waiters == 0);
6025 VERIFY(ifp->if_flt_non_os_count == 0);
6026 VERIFY(ifp->if_flt_no_tso_count == 0);
6027 lck_mtx_unlock(&ifp->if_flt_lock);
6028
6029 /* Last chance to drain send queue */
6030 if_qflush_snd(ifp, 0);
6031
6032 /* Last chance to cleanup any cached route */
6033 lck_mtx_lock(&ifp->if_cached_route_lock);
6034 VERIFY(!ifp->if_fwd_cacheok);
6035 ROUTE_RELEASE(&ifp->if_fwd_route);
6036 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
6037 ROUTE_RELEASE(&ifp->if_src_route);
6038 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
6039 ROUTE_RELEASE(&ifp->if_src_route6);
6040 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6041 lck_mtx_unlock(&ifp->if_cached_route_lock);
6042
6043 /* Ignore any pending data threshold as the interface is anyways gone */
6044 ifp->if_data_threshold = 0;
6045
6046 VERIFY(ifp->if_dt_tcall != NULL);
6047 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
6048
6049 ifnet_llreach_ifdetach(ifp);
6050
6051 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0, FALSE);
6052
6053 /*
6054 * Finally, mark this ifnet as detached.
6055 */
6056 os_log(OS_LOG_DEFAULT, "%s detached", if_name(ifp));
6057
6058 lck_mtx_lock_spin(&ifp->if_ref_lock);
6059 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6060 panic("%s: flags mismatch (detaching not set) ifp=%p",
6061 __func__, ifp);
6062 /* NOTREACHED */
6063 }
6064 ifp->if_refflags &= ~IFRF_DETACHING;
6065 lck_mtx_unlock(&ifp->if_ref_lock);
6066 if (if_free != NULL) {
6067 if_free(ifp);
6068 }
6069
6070 ifclassq_release(&ifp->if_snd);
6071
6072 /* we're fully detached, clear the "in use" bit */
6073 dlifp = (struct dlil_ifnet *)ifp;
6074 lck_mtx_lock(&dlifp->dl_if_lock);
6075 ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
6076 dlifp->dl_if_flags &= ~DLIF_INUSE;
6077 lck_mtx_unlock(&dlifp->dl_if_lock);
6078
6079 /* Release reference held during ifnet attach */
6080 ifnet_release(ifp);
6081 }
6082
6083 errno_t
ifp_if_output(struct ifnet * ifp,struct mbuf * m)6084 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6085 {
6086 #pragma unused(ifp)
6087 m_freem_list(m);
6088 return 0;
6089 }
6090
6091 void
ifp_if_start(struct ifnet * ifp)6092 ifp_if_start(struct ifnet *ifp)
6093 {
6094 ifnet_purge(ifp);
6095 }
6096
6097 static errno_t
ifp_if_input(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)6098 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6099 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6100 boolean_t poll, struct thread *tp)
6101 {
6102 #pragma unused(ifp, m_tail, s, poll, tp)
6103 m_freem_list(m_head);
6104 return ENXIO;
6105 }
6106
6107 static void
ifp_if_input_poll(struct ifnet * ifp,u_int32_t flags,u_int32_t max_cnt,struct mbuf ** m_head,struct mbuf ** m_tail,u_int32_t * cnt,u_int32_t * len)6108 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6109 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6110 {
6111 #pragma unused(ifp, flags, max_cnt)
6112 if (m_head != NULL) {
6113 *m_head = NULL;
6114 }
6115 if (m_tail != NULL) {
6116 *m_tail = NULL;
6117 }
6118 if (cnt != NULL) {
6119 *cnt = 0;
6120 }
6121 if (len != NULL) {
6122 *len = 0;
6123 }
6124 }
6125
6126 static errno_t
ifp_if_ctl(struct ifnet * ifp,ifnet_ctl_cmd_t cmd,u_int32_t arglen,void * arg)6127 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6128 {
6129 #pragma unused(ifp, cmd, arglen, arg)
6130 return EOPNOTSUPP;
6131 }
6132
6133 static errno_t
ifp_if_demux(struct ifnet * ifp,struct mbuf * m,char * fh,protocol_family_t * pf)6134 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6135 {
6136 #pragma unused(ifp, fh, pf)
6137 m_freem(m);
6138 return EJUSTRETURN;
6139 }
6140
6141 static errno_t
ifp_if_add_proto(struct ifnet * ifp,protocol_family_t pf,const struct ifnet_demux_desc * da,u_int32_t dc)6142 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6143 const struct ifnet_demux_desc *da, u_int32_t dc)
6144 {
6145 #pragma unused(ifp, pf, da, dc)
6146 return EINVAL;
6147 }
6148
6149 static errno_t
ifp_if_del_proto(struct ifnet * ifp,protocol_family_t pf)6150 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6151 {
6152 #pragma unused(ifp, pf)
6153 return EINVAL;
6154 }
6155
6156 static errno_t
ifp_if_check_multi(struct ifnet * ifp,const struct sockaddr * sa)6157 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6158 {
6159 #pragma unused(ifp, sa)
6160 return EOPNOTSUPP;
6161 }
6162
6163 #if !XNU_TARGET_OS_OSX
6164 static errno_t
ifp_if_framer(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6165 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6166 const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t,
6167 u_int32_t *pre, u_int32_t *post)
6168 #else /* XNU_TARGET_OS_OSX */
6169 static errno_t
6170 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6171 const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t)
6172 #endif /* XNU_TARGET_OS_OSX */
6173 {
6174 #pragma unused(ifp, m, sa, ll, t)
6175 #if !XNU_TARGET_OS_OSX
6176 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
6177 #else /* XNU_TARGET_OS_OSX */
6178 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
6179 #endif /* XNU_TARGET_OS_OSX */
6180 }
6181
6182 static errno_t
ifp_if_framer_extended(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6183 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6184 const struct sockaddr *sa,
6185 IFNET_LLADDR_T ll,
6186 IFNET_FRAME_TYPE_T t,
6187 u_int32_t *pre, u_int32_t *post)
6188 {
6189 #pragma unused(ifp, sa, ll, t)
6190 m_freem(*m);
6191 *m = NULL;
6192
6193 if (pre != NULL) {
6194 *pre = 0;
6195 }
6196 if (post != NULL) {
6197 *post = 0;
6198 }
6199
6200 return EJUSTRETURN;
6201 }
6202
6203 errno_t
ifp_if_ioctl(struct ifnet * ifp,unsigned long cmd,void * arg)6204 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6205 {
6206 #pragma unused(ifp, cmd, arg)
6207 return EOPNOTSUPP;
6208 }
6209
6210 static errno_t
ifp_if_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode tm,bpf_packet_func f)6211 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6212 {
6213 #pragma unused(ifp, tm, f)
6214 /* XXX not sure what to do here */
6215 return 0;
6216 }
6217
6218 static void
ifp_if_free(struct ifnet * ifp)6219 ifp_if_free(struct ifnet *ifp)
6220 {
6221 #pragma unused(ifp)
6222 }
6223
6224 static void
ifp_if_event(struct ifnet * ifp,const struct kev_msg * e)6225 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6226 {
6227 #pragma unused(ifp, e)
6228 }
6229
6230 __private_extern__ void
dlil_proto_unplumb_all(struct ifnet * ifp)6231 dlil_proto_unplumb_all(struct ifnet *ifp)
6232 {
6233 /*
6234 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6235 * each bucket contains exactly one entry; PF_VLAN does not need an
6236 * explicit unplumb.
6237 *
6238 * if_proto_hash[3] is for other protocols; we expect anything
6239 * in this bucket to respond to the DETACHING event (which would
6240 * have happened by now) and do the unplumb then.
6241 */
6242 (void) proto_unplumb(PF_INET, ifp);
6243 (void) proto_unplumb(PF_INET6, ifp);
6244 }
6245
6246 static void
ifp_src_route_copyout(struct ifnet * ifp,struct route * dst)6247 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6248 {
6249 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6250 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6251
6252 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6253
6254 lck_mtx_unlock(&ifp->if_cached_route_lock);
6255 }
6256
6257 static void
ifp_src_route_copyin(struct ifnet * ifp,struct route * src)6258 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6259 {
6260 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6261 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6262
6263 if (ifp->if_fwd_cacheok) {
6264 route_copyin(src, &ifp->if_src_route, sizeof(*src));
6265 } else {
6266 ROUTE_RELEASE(src);
6267 }
6268 lck_mtx_unlock(&ifp->if_cached_route_lock);
6269 }
6270
6271 static void
ifp_src_route6_copyout(struct ifnet * ifp,struct route_in6 * dst)6272 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6273 {
6274 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6275 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6276
6277 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6278 sizeof(*dst));
6279
6280 lck_mtx_unlock(&ifp->if_cached_route_lock);
6281 }
6282
6283 static void
ifp_src_route6_copyin(struct ifnet * ifp,struct route_in6 * src)6284 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6285 {
6286 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6287 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6288
6289 if (ifp->if_fwd_cacheok) {
6290 route_copyin((struct route *)src,
6291 (struct route *)&ifp->if_src_route6, sizeof(*src));
6292 } else {
6293 ROUTE_RELEASE(src);
6294 }
6295 lck_mtx_unlock(&ifp->if_cached_route_lock);
6296 }
6297
6298 struct rtentry *
ifnet_cached_rtlookup_inet(struct ifnet * ifp,struct in_addr src_ip)6299 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6300 {
6301 struct route src_rt;
6302 struct sockaddr_in *dst;
6303
6304 dst = SIN(&src_rt.ro_dst);
6305
6306 ifp_src_route_copyout(ifp, &src_rt);
6307
6308 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6309 ROUTE_RELEASE(&src_rt);
6310 if (dst->sin_family != AF_INET) {
6311 SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6312 dst->sin_len = sizeof(src_rt.ro_dst);
6313 dst->sin_family = AF_INET;
6314 }
6315 dst->sin_addr = src_ip;
6316
6317 VERIFY(src_rt.ro_rt == NULL);
6318 src_rt.ro_rt = rtalloc1_scoped(SA(dst),
6319 0, 0, ifp->if_index);
6320
6321 if (src_rt.ro_rt != NULL) {
6322 /* retain a ref, copyin consumes one */
6323 struct rtentry *rte = src_rt.ro_rt;
6324 RT_ADDREF(rte);
6325 ifp_src_route_copyin(ifp, &src_rt);
6326 src_rt.ro_rt = rte;
6327 }
6328 }
6329
6330 return src_rt.ro_rt;
6331 }
6332
6333 struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet * ifp,struct in6_addr * src_ip6)6334 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6335 {
6336 struct route_in6 src_rt;
6337
6338 ifp_src_route6_copyout(ifp, &src_rt);
6339
6340 if (ROUTE_UNUSABLE(&src_rt) ||
6341 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6342 ROUTE_RELEASE(&src_rt);
6343 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6344 SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6345 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6346 src_rt.ro_dst.sin6_family = AF_INET6;
6347 }
6348 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6349 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6350 sizeof(src_rt.ro_dst.sin6_addr));
6351
6352 if (src_rt.ro_rt == NULL) {
6353 src_rt.ro_rt = rtalloc1_scoped(
6354 SA(&src_rt.ro_dst), 0, 0,
6355 ifp->if_index);
6356
6357 if (src_rt.ro_rt != NULL) {
6358 /* retain a ref, copyin consumes one */
6359 struct rtentry *rte = src_rt.ro_rt;
6360 RT_ADDREF(rte);
6361 ifp_src_route6_copyin(ifp, &src_rt);
6362 src_rt.ro_rt = rte;
6363 }
6364 }
6365 }
6366
6367 return src_rt.ro_rt;
6368 }
6369
6370 void
if_lqm_update(struct ifnet * ifp,int lqm,int locked)6371 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6372 {
6373 struct kev_dl_link_quality_metric_data ev_lqm_data;
6374 uint64_t now, delta;
6375 int8_t old_lqm;
6376 bool need_necp_client_update;
6377
6378 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6379
6380 /* Normalize to edge */
6381 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
6382 lqm = IFNET_LQM_THRESH_ABORT;
6383 os_atomic_or(&tcbinfo.ipi_flags, INPCBINFO_HANDLE_LQM_ABORT, relaxed);
6384 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
6385 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
6386 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
6387 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
6388 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
6389 lqm <= IFNET_LQM_THRESH_POOR) {
6390 lqm = IFNET_LQM_THRESH_POOR;
6391 } else if (lqm > IFNET_LQM_THRESH_POOR &&
6392 lqm <= IFNET_LQM_THRESH_GOOD) {
6393 lqm = IFNET_LQM_THRESH_GOOD;
6394 }
6395
6396 /*
6397 * Take the lock if needed
6398 */
6399 if (!locked) {
6400 ifnet_lock_exclusive(ifp);
6401 }
6402
6403 if (lqm == ifp->if_interface_state.lqm_state &&
6404 (ifp->if_interface_state.valid_bitmask &
6405 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6406 /*
6407 * Release the lock if was not held by the caller
6408 */
6409 if (!locked) {
6410 ifnet_lock_done(ifp);
6411 }
6412 return; /* nothing to update */
6413 }
6414
6415 net_update_uptime();
6416 now = net_uptime_ms();
6417 ASSERT(now >= ifp->if_lqmstate_start_time);
6418 delta = now - ifp->if_lqmstate_start_time;
6419
6420 old_lqm = ifp->if_interface_state.lqm_state;
6421 switch (old_lqm) {
6422 case IFNET_LQM_THRESH_GOOD:
6423 ifp->if_lqm_good_time += delta;
6424 break;
6425 case IFNET_LQM_THRESH_POOR:
6426 ifp->if_lqm_poor_time += delta;
6427 break;
6428 case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6429 ifp->if_lqm_min_viable_time += delta;
6430 break;
6431 case IFNET_LQM_THRESH_BAD:
6432 ifp->if_lqm_bad_time += delta;
6433 break;
6434 default:
6435 break;
6436 }
6437 switch (lqm) {
6438 case IFNET_LQM_THRESH_GOOD:
6439 ifp->if_lqm_good_cnt += 1;
6440 break;
6441 case IFNET_LQM_THRESH_POOR:
6442 ifp->if_lqm_poor_cnt += 1;
6443 break;
6444 case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6445 ifp->if_lqm_min_viable_cnt += 1;
6446 break;
6447 case IFNET_LQM_THRESH_BAD:
6448 ifp->if_lqm_bad_cnt += 1;
6449 break;
6450 default:
6451 break;
6452 }
6453 ifp->if_lqmstate_start_time = now;
6454
6455 ifp->if_interface_state.valid_bitmask |=
6456 IF_INTERFACE_STATE_LQM_STATE_VALID;
6457 ifp->if_interface_state.lqm_state = (int8_t)lqm;
6458
6459 /*
6460 * Update the link heuristics
6461 */
6462 need_necp_client_update = if_update_link_heuristic(ifp);
6463
6464 /*
6465 * Don't want to hold the lock when issuing kernel events or calling NECP
6466 */
6467 ifnet_lock_done(ifp);
6468
6469 if (need_necp_client_update) {
6470 necp_update_all_clients_immediately_if_needed(true);
6471 }
6472
6473 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
6474 ev_lqm_data.link_quality_metric = lqm;
6475
6476 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6477 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data), FALSE);
6478
6479 /*
6480 * Reacquire the lock for the caller
6481 */
6482 if (locked) {
6483 ifnet_lock_exclusive(ifp);
6484 }
6485 }
6486
6487 static void
if_rrc_state_update(struct ifnet * ifp,unsigned int rrc_state)6488 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6489 {
6490 struct kev_dl_rrc_state kev;
6491
6492 if (rrc_state == ifp->if_interface_state.rrc_state &&
6493 (ifp->if_interface_state.valid_bitmask &
6494 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6495 return;
6496 }
6497
6498 ifp->if_interface_state.valid_bitmask |=
6499 IF_INTERFACE_STATE_RRC_STATE_VALID;
6500
6501 ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
6502
6503 /*
6504 * Don't want to hold the lock when issuing kernel events
6505 */
6506 ifnet_lock_done(ifp);
6507
6508 bzero(&kev, sizeof(struct kev_dl_rrc_state));
6509 kev.rrc_state = rrc_state;
6510
6511 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6512 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state), FALSE);
6513
6514 ifnet_lock_exclusive(ifp);
6515 }
6516
6517 errno_t
if_state_update(struct ifnet * ifp,struct if_interface_state * if_interface_state)6518 if_state_update(struct ifnet *ifp,
6519 struct if_interface_state *if_interface_state)
6520 {
6521 u_short if_index_available = 0;
6522
6523 ifnet_lock_exclusive(ifp);
6524
6525 if ((ifp->if_type != IFT_CELLULAR) &&
6526 (if_interface_state->valid_bitmask &
6527 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6528 ifnet_lock_done(ifp);
6529 return ENOTSUP;
6530 }
6531 if ((if_interface_state->valid_bitmask &
6532 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6533 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6534 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6535 ifnet_lock_done(ifp);
6536 return EINVAL;
6537 }
6538 if ((if_interface_state->valid_bitmask &
6539 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6540 if_interface_state->rrc_state !=
6541 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6542 if_interface_state->rrc_state !=
6543 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6544 ifnet_lock_done(ifp);
6545 return EINVAL;
6546 }
6547
6548 if (if_interface_state->valid_bitmask &
6549 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6550 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6551 }
6552 if (if_interface_state->valid_bitmask &
6553 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6554 if_rrc_state_update(ifp, if_interface_state->rrc_state);
6555 }
6556 if (if_interface_state->valid_bitmask &
6557 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6558 ifp->if_interface_state.valid_bitmask |=
6559 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6560 ifp->if_interface_state.interface_availability =
6561 if_interface_state->interface_availability;
6562
6563 if (ifp->if_interface_state.interface_availability ==
6564 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6565 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
6566 __func__, if_name(ifp), ifp->if_index);
6567 if_index_available = ifp->if_index;
6568 } else {
6569 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
6570 __func__, if_name(ifp), ifp->if_index);
6571 }
6572 }
6573 ifnet_lock_done(ifp);
6574
6575 /*
6576 * Check if the TCP connections going on this interface should be
6577 * forced to send probe packets instead of waiting for TCP timers
6578 * to fire. This is done on an explicit notification such as
6579 * SIOCSIFINTERFACESTATE which marks the interface as available.
6580 */
6581 if (if_index_available > 0) {
6582 tcp_interface_send_probe(if_index_available);
6583 }
6584
6585 return 0;
6586 }
6587
6588 void
if_get_state(struct ifnet * ifp,struct if_interface_state * if_interface_state)6589 if_get_state(struct ifnet *ifp,
6590 struct if_interface_state *if_interface_state)
6591 {
6592 ifnet_lock_shared(ifp);
6593
6594 if_interface_state->valid_bitmask = 0;
6595
6596 if (ifp->if_interface_state.valid_bitmask &
6597 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6598 if_interface_state->valid_bitmask |=
6599 IF_INTERFACE_STATE_RRC_STATE_VALID;
6600 if_interface_state->rrc_state =
6601 ifp->if_interface_state.rrc_state;
6602 }
6603 if (ifp->if_interface_state.valid_bitmask &
6604 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6605 if_interface_state->valid_bitmask |=
6606 IF_INTERFACE_STATE_LQM_STATE_VALID;
6607 if_interface_state->lqm_state =
6608 ifp->if_interface_state.lqm_state;
6609 }
6610 if (ifp->if_interface_state.valid_bitmask &
6611 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6612 if_interface_state->valid_bitmask |=
6613 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6614 if_interface_state->interface_availability =
6615 ifp->if_interface_state.interface_availability;
6616 }
6617
6618 ifnet_lock_done(ifp);
6619 }
6620
6621 errno_t
if_probe_connectivity(struct ifnet * ifp,u_int32_t conn_probe)6622 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6623 {
6624 if (conn_probe > 1) {
6625 return EINVAL;
6626 }
6627 if (conn_probe == 0) {
6628 if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6629 } else {
6630 if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6631 }
6632
6633 #if NECP
6634 necp_update_all_clients();
6635 #endif /* NECP */
6636
6637 tcp_probe_connectivity(ifp, conn_probe);
6638 return 0;
6639 }
6640
6641 /* for uuid.c */
6642 static int
get_ether_index(int * ret_other_index)6643 get_ether_index(int * ret_other_index)
6644 {
6645 ifnet_ref_t ifp;
6646 int en0_index = 0;
6647 int other_en_index = 0;
6648 int any_ether_index = 0;
6649 short best_unit = 0;
6650
6651 *ret_other_index = 0;
6652 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6653 /*
6654 * find en0, or if not en0, the lowest unit en*, and if not
6655 * that, any ethernet
6656 */
6657 ifnet_lock_shared(ifp);
6658 if (strcmp(ifp->if_name, "en") == 0) {
6659 if (ifp->if_unit == 0) {
6660 /* found en0, we're done */
6661 en0_index = ifp->if_index;
6662 ifnet_lock_done(ifp);
6663 break;
6664 }
6665 if (other_en_index == 0 || ifp->if_unit < best_unit) {
6666 other_en_index = ifp->if_index;
6667 best_unit = ifp->if_unit;
6668 }
6669 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
6670 any_ether_index = ifp->if_index;
6671 }
6672 ifnet_lock_done(ifp);
6673 }
6674 if (en0_index == 0) {
6675 if (other_en_index != 0) {
6676 *ret_other_index = other_en_index;
6677 } else if (any_ether_index != 0) {
6678 *ret_other_index = any_ether_index;
6679 }
6680 }
6681 return en0_index;
6682 }
6683
6684 int
uuid_get_ethernet(u_int8_t * __counted_by (ETHER_ADDR_LEN)node)6685 uuid_get_ethernet(u_int8_t *__counted_by(ETHER_ADDR_LEN) node)
6686 {
6687 static int en0_index;
6688 ifnet_ref_t ifp;
6689 int other_index = 0;
6690 int the_index = 0;
6691 int ret;
6692
6693 ifnet_head_lock_shared();
6694 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
6695 en0_index = get_ether_index(&other_index);
6696 }
6697 if (en0_index != 0) {
6698 the_index = en0_index;
6699 } else if (other_index != 0) {
6700 the_index = other_index;
6701 }
6702 if (the_index != 0) {
6703 struct dlil_ifnet *dl_if;
6704
6705 ifp = ifindex2ifnet[the_index];
6706 VERIFY(ifp != NULL);
6707 dl_if = (struct dlil_ifnet *)ifp;
6708 if (dl_if->dl_if_permanent_ether_is_set != 0) {
6709 /*
6710 * Use the permanent ethernet address if it is
6711 * available because it will never change.
6712 */
6713 memcpy(node, dl_if->dl_if_permanent_ether,
6714 ETHER_ADDR_LEN);
6715 } else {
6716 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
6717 }
6718 ret = 0;
6719 } else {
6720 ret = -1;
6721 }
6722 ifnet_head_done();
6723 return ret;
6724 }
6725
6726 int
dlil_node_present(struct ifnet * ifp,struct sockaddr * sa,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6727 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6728 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6729 {
6730 struct kev_dl_node_presence kev;
6731 struct sockaddr_dl *sdl;
6732 struct sockaddr_in6 *sin6;
6733 int ret = 0;
6734
6735 VERIFY(ifp);
6736 VERIFY(sa);
6737 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6738
6739 bzero(&kev, sizeof(kev));
6740 sin6 = &kev.sin6_node_address;
6741 sdl = &kev.sdl_node_address;
6742 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6743 kev.rssi = rssi;
6744 kev.link_quality_metric = lqm;
6745 kev.node_proximity_metric = npm;
6746 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6747
6748 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6749 if (ret == 0 || ret == EEXIST) {
6750 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6751 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6752 if (err != 0) {
6753 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
6754 "error %d\n", __func__, err);
6755 }
6756 }
6757
6758 if (ret == EEXIST) {
6759 ret = 0;
6760 }
6761 return ret;
6762 }
6763
6764 void
dlil_node_absent(struct ifnet * ifp,struct sockaddr * sa)6765 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
6766 {
6767 struct kev_dl_node_absence kev = {};
6768 struct sockaddr_in6 *kev_sin6 = NULL;
6769 struct sockaddr_dl *kev_sdl = NULL;
6770 int error = 0;
6771
6772 VERIFY(ifp != NULL);
6773 VERIFY(sa != NULL);
6774 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6775
6776 kev_sin6 = &kev.sin6_node_address;
6777 kev_sdl = &kev.sdl_node_address;
6778
6779 if (sa->sa_family == AF_INET6) {
6780 /*
6781 * If IPv6 address is given, get the link layer
6782 * address from what was cached in the neighbor cache
6783 */
6784 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6785 SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6786 error = nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
6787 } else {
6788 /*
6789 * If passed address is AF_LINK type, derive the address
6790 * based on the link address.
6791 */
6792 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
6793 error = nd6_alt_node_absent(ifp, kev_sin6, NULL);
6794 }
6795
6796 if (error == 0) {
6797 kev_sdl->sdl_type = ifp->if_type;
6798 kev_sdl->sdl_index = ifp->if_index;
6799
6800 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
6801 &kev.link_data, sizeof(kev), FALSE);
6802 }
6803 }
6804
6805 int
dlil_node_present_v2(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr_dl * sdl,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6806 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
6807 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6808 {
6809 struct kev_dl_node_presence kev = {};
6810 struct sockaddr_dl *kev_sdl = NULL;
6811 struct sockaddr_in6 *kev_sin6 = NULL;
6812 int ret = 0;
6813
6814 VERIFY(ifp != NULL);
6815 VERIFY(sa != NULL && sdl != NULL);
6816 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
6817
6818 kev_sin6 = &kev.sin6_node_address;
6819 kev_sdl = &kev.sdl_node_address;
6820
6821 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
6822 SOCKADDR_COPY(sdl, kev_sdl, sdl->sdl_len);
6823 kev_sdl->sdl_type = ifp->if_type;
6824 kev_sdl->sdl_index = ifp->if_index;
6825
6826 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6827 SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6828
6829 kev.rssi = rssi;
6830 kev.link_quality_metric = lqm;
6831 kev.node_proximity_metric = npm;
6832 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6833
6834 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
6835 if (ret == 0 || ret == EEXIST) {
6836 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6837 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6838 if (err != 0) {
6839 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
6840 }
6841 }
6842
6843 if (ret == EEXIST) {
6844 ret = 0;
6845 }
6846 return ret;
6847 }
6848
6849 const void *
dlil_ifaddr_bytes(const struct sockaddr_dl * sdl,size_t * sizep,kauth_cred_t * credp)6850 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
6851 kauth_cred_t *credp)
6852 {
6853 const u_int8_t *bytes;
6854 size_t size;
6855
6856 bytes = CONST_LLADDR(sdl);
6857 size = sdl->sdl_alen;
6858
6859 #if CONFIG_MACF
6860 if (dlil_lladdr_ckreq) {
6861 switch (sdl->sdl_type) {
6862 case IFT_ETHER:
6863 case IFT_IEEE1394:
6864 break;
6865 default:
6866 credp = NULL;
6867 break;
6868 }
6869 ;
6870
6871 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
6872 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
6873 [0] = 2
6874 };
6875
6876 bytes = unspec;
6877 }
6878 }
6879 #else
6880 #pragma unused(credp)
6881 #endif
6882
6883 if (sizep != NULL) {
6884 *sizep = size;
6885 }
6886 return bytes;
6887 }
6888
6889 void
dlil_report_issues(struct ifnet * ifp,u_int8_t modid[DLIL_MODIDLEN],u_int8_t info[DLIL_MODARGLEN])6890 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
6891 u_int8_t info[DLIL_MODARGLEN])
6892 {
6893 struct kev_dl_issues kev;
6894 struct timeval tv;
6895
6896 VERIFY(ifp != NULL);
6897 VERIFY(modid != NULL);
6898 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
6899 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
6900
6901 bzero(&kev, sizeof(kev));
6902
6903 microtime(&tv);
6904 kev.timestamp = tv.tv_sec;
6905 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
6906 if (info != NULL) {
6907 bcopy(info, &kev.info, DLIL_MODARGLEN);
6908 }
6909
6910 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
6911 &kev.link_data, sizeof(kev), FALSE);
6912 }
6913
6914 errno_t
ifnet_getset_opportunistic(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6915 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6916 struct proc *p)
6917 {
6918 u_int32_t level = IFNET_THROTTLE_OFF;
6919 errno_t result = 0;
6920
6921 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
6922
6923 if (cmd == SIOCSIFOPPORTUNISTIC) {
6924 /*
6925 * XXX: Use priv_check_cred() instead of root check?
6926 */
6927 if ((result = proc_suser(p)) != 0) {
6928 return result;
6929 }
6930
6931 if (ifr->ifr_opportunistic.ifo_flags ==
6932 IFRIFOF_BLOCK_OPPORTUNISTIC) {
6933 level = IFNET_THROTTLE_OPPORTUNISTIC;
6934 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
6935 level = IFNET_THROTTLE_OFF;
6936 } else {
6937 result = EINVAL;
6938 }
6939
6940 if (result == 0) {
6941 result = ifnet_set_throttle(ifp, level);
6942 }
6943 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
6944 ifr->ifr_opportunistic.ifo_flags = 0;
6945 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
6946 ifr->ifr_opportunistic.ifo_flags |=
6947 IFRIFOF_BLOCK_OPPORTUNISTIC;
6948 }
6949 }
6950
6951 /*
6952 * Return the count of current opportunistic connections
6953 * over the interface.
6954 */
6955 if (result == 0) {
6956 uint32_t flags = 0;
6957 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
6958 INPCB_OPPORTUNISTIC_SETCMD : 0;
6959 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
6960 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
6961 ifr->ifr_opportunistic.ifo_inuse =
6962 udp_count_opportunistic(ifp->if_index, flags) +
6963 tcp_count_opportunistic(ifp->if_index, flags);
6964 }
6965
6966 if (result == EALREADY) {
6967 result = 0;
6968 }
6969
6970 return result;
6971 }
6972
6973 int
ifnet_get_throttle(struct ifnet * ifp,u_int32_t * level)6974 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
6975 {
6976 struct ifclassq *ifq;
6977 int err = 0;
6978
6979 if (!(ifp->if_eflags & IFEF_TXSTART)) {
6980 return ENXIO;
6981 }
6982
6983 *level = IFNET_THROTTLE_OFF;
6984
6985 ifq = ifp->if_snd;
6986 IFCQ_LOCK(ifq);
6987 /* Throttling works only for IFCQ, not ALTQ instances */
6988 if (IFCQ_IS_ENABLED(ifq)) {
6989 cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
6990
6991 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
6992 *level = req.level;
6993 }
6994 IFCQ_UNLOCK(ifq);
6995
6996 return err;
6997 }
6998
6999 int
ifnet_set_throttle(struct ifnet * ifp,u_int32_t level)7000 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
7001 {
7002 struct ifclassq *ifq;
7003 int err = 0;
7004
7005 if (!(ifp->if_eflags & IFEF_TXSTART)) {
7006 return ENXIO;
7007 }
7008
7009 ifq = ifp->if_snd;
7010
7011 switch (level) {
7012 case IFNET_THROTTLE_OFF:
7013 case IFNET_THROTTLE_OPPORTUNISTIC:
7014 break;
7015 default:
7016 return EINVAL;
7017 }
7018
7019 IFCQ_LOCK(ifq);
7020 if (IFCQ_IS_ENABLED(ifq)) {
7021 cqrq_throttle_t req = { 1, level };
7022
7023 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
7024 }
7025 IFCQ_UNLOCK(ifq);
7026
7027 if (err == 0) {
7028 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
7029 level);
7030 #if NECP
7031 necp_update_all_clients();
7032 #endif /* NECP */
7033 if (level == IFNET_THROTTLE_OFF) {
7034 ifnet_start(ifp);
7035 }
7036 }
7037
7038 return err;
7039 }
7040
7041 errno_t
ifnet_getset_log(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)7042 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7043 struct proc *p)
7044 {
7045 #pragma unused(p)
7046 errno_t result = 0;
7047 uint32_t flags;
7048 int level, category, subcategory;
7049
7050 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7051
7052 if (cmd == SIOCSIFLOG) {
7053 if ((result = priv_check_cred(kauth_cred_get(),
7054 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
7055 return result;
7056 }
7057
7058 level = ifr->ifr_log.ifl_level;
7059 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
7060 result = EINVAL;
7061 }
7062
7063 flags = ifr->ifr_log.ifl_flags;
7064 if ((flags &= IFNET_LOGF_MASK) == 0) {
7065 result = EINVAL;
7066 }
7067
7068 category = ifr->ifr_log.ifl_category;
7069 subcategory = ifr->ifr_log.ifl_subcategory;
7070
7071 if (result == 0) {
7072 result = ifnet_set_log(ifp, level, flags,
7073 category, subcategory);
7074 }
7075 } else {
7076 result = ifnet_get_log(ifp, &level, &flags, &category,
7077 &subcategory);
7078 if (result == 0) {
7079 ifr->ifr_log.ifl_level = level;
7080 ifr->ifr_log.ifl_flags = flags;
7081 ifr->ifr_log.ifl_category = category;
7082 ifr->ifr_log.ifl_subcategory = subcategory;
7083 }
7084 }
7085
7086 return result;
7087 }
7088
7089 int
ifnet_set_log(struct ifnet * ifp,int32_t level,uint32_t flags,int32_t category,int32_t subcategory)7090 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7091 int32_t category, int32_t subcategory)
7092 {
7093 int err = 0;
7094
7095 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7096 VERIFY(flags & IFNET_LOGF_MASK);
7097
7098 /*
7099 * The logging level applies to all facilities; make sure to
7100 * update them all with the most current level.
7101 */
7102 flags |= ifp->if_log.flags;
7103
7104 if (ifp->if_output_ctl != NULL) {
7105 struct ifnet_log_params l;
7106
7107 bzero(&l, sizeof(l));
7108 l.level = level;
7109 l.flags = flags;
7110 l.flags &= ~IFNET_LOGF_DLIL;
7111 l.category = category;
7112 l.subcategory = subcategory;
7113
7114 /* Send this request to lower layers */
7115 if (l.flags != 0) {
7116 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7117 sizeof(l), &l);
7118 }
7119 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7120 /*
7121 * If targeted to the lower layers without an output
7122 * control callback registered on the interface, just
7123 * silently ignore facilities other than ours.
7124 */
7125 flags &= IFNET_LOGF_DLIL;
7126 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
7127 level = 0;
7128 }
7129 }
7130
7131 if (err == 0) {
7132 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
7133 ifp->if_log.flags = 0;
7134 } else {
7135 ifp->if_log.flags |= flags;
7136 }
7137
7138 log(LOG_INFO, "%s: logging level set to %d flags=0x%x "
7139 "arg=0x%x, category=%d subcategory=%d\n", if_name(ifp),
7140 ifp->if_log.level, ifp->if_log.flags, flags,
7141 category, subcategory);
7142 }
7143
7144 return err;
7145 }
7146
7147 int
ifnet_get_log(struct ifnet * ifp,int32_t * level,uint32_t * flags,int32_t * category,int32_t * subcategory)7148 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7149 int32_t *category, int32_t *subcategory)
7150 {
7151 if (level != NULL) {
7152 *level = ifp->if_log.level;
7153 }
7154 if (flags != NULL) {
7155 *flags = ifp->if_log.flags;
7156 }
7157 if (category != NULL) {
7158 *category = ifp->if_log.category;
7159 }
7160 if (subcategory != NULL) {
7161 *subcategory = ifp->if_log.subcategory;
7162 }
7163
7164 return 0;
7165 }
7166
7167 int
ifnet_notify_address(struct ifnet * ifp,int af)7168 ifnet_notify_address(struct ifnet *ifp, int af)
7169 {
7170 struct ifnet_notify_address_params na;
7171
7172 #if PF
7173 (void) pf_ifaddr_hook(ifp);
7174 #endif /* PF */
7175
7176 if (ifp->if_output_ctl == NULL) {
7177 return EOPNOTSUPP;
7178 }
7179
7180 bzero(&na, sizeof(na));
7181 na.address_family = (sa_family_t)af;
7182
7183 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7184 sizeof(na), &na);
7185 }
7186
7187 errno_t
ifnet_flowid(struct ifnet * ifp,uint32_t * flowid)7188 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7189 {
7190 if (ifp == NULL || flowid == NULL) {
7191 return EINVAL;
7192 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7193 !IF_FULLY_ATTACHED(ifp)) {
7194 return ENXIO;
7195 }
7196
7197 *flowid = ifp->if_flowhash;
7198
7199 return 0;
7200 }
7201
7202 errno_t
ifnet_disable_output(struct ifnet * ifp)7203 ifnet_disable_output(struct ifnet *ifp)
7204 {
7205 int err = 0;
7206
7207 if (ifp == NULL) {
7208 return EINVAL;
7209 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7210 !IF_FULLY_ATTACHED(ifp)) {
7211 return ENXIO;
7212 }
7213
7214 lck_mtx_lock(&ifp->if_start_lock);
7215 if (ifp->if_start_flags & IFSF_FLOW_RESUME_PENDING) {
7216 ifp->if_start_flags &= ~(IFSF_FLOW_RESUME_PENDING | IFSF_FLOW_CONTROLLED);
7217 } else if ((err = ifnet_fc_add(ifp)) == 0) {
7218 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7219 }
7220 lck_mtx_unlock(&ifp->if_start_lock);
7221
7222 return err;
7223 }
7224
7225 errno_t
ifnet_enable_output(struct ifnet * ifp)7226 ifnet_enable_output(struct ifnet *ifp)
7227 {
7228 if (ifp == NULL) {
7229 return EINVAL;
7230 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7231 !IF_FULLY_ATTACHED(ifp)) {
7232 return ENXIO;
7233 }
7234
7235 ifnet_start_common(ifp, TRUE, FALSE);
7236 return 0;
7237 }
7238
7239 void
ifnet_flowadv(uint32_t flowhash)7240 ifnet_flowadv(uint32_t flowhash)
7241 {
7242 struct ifnet_fc_entry *ifce;
7243 ifnet_ref_t ifp;
7244
7245 ifce = ifnet_fc_get(flowhash);
7246 if (ifce == NULL) {
7247 return;
7248 }
7249
7250 VERIFY(ifce->ifce_ifp != NULL);
7251 ifp = ifce->ifce_ifp;
7252
7253 /* flow hash gets recalculated per attach, so check */
7254 if (ifnet_is_attached(ifp, 1)) {
7255 if (ifp->if_flowhash == flowhash) {
7256 lck_mtx_lock_spin(&ifp->if_start_lock);
7257 if ((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) == 0) {
7258 ifp->if_start_flags |= IFSF_FLOW_RESUME_PENDING;
7259 }
7260 lck_mtx_unlock(&ifp->if_start_lock);
7261 (void) ifnet_enable_output(ifp);
7262 }
7263 ifnet_decr_iorefcnt(ifp);
7264 }
7265 ifnet_fc_entry_free(ifce);
7266 }
7267
7268 /*
7269 * Function to compare ifnet_fc_entries in ifnet flow control tree
7270 */
7271 static inline int
ifce_cmp(const struct ifnet_fc_entry * fc1,const struct ifnet_fc_entry * fc2)7272 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7273 {
7274 return fc1->ifce_flowhash - fc2->ifce_flowhash;
7275 }
7276
7277 static int
ifnet_fc_add(struct ifnet * ifp)7278 ifnet_fc_add(struct ifnet *ifp)
7279 {
7280 struct ifnet_fc_entry keyfc, *ifce;
7281 uint32_t flowhash;
7282
7283 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7284 VERIFY(ifp->if_flowhash != 0);
7285 flowhash = ifp->if_flowhash;
7286
7287 bzero(&keyfc, sizeof(keyfc));
7288 keyfc.ifce_flowhash = flowhash;
7289
7290 lck_mtx_lock_spin(&ifnet_fc_lock);
7291 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7292 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7293 /* Entry is already in ifnet_fc_tree, return */
7294 lck_mtx_unlock(&ifnet_fc_lock);
7295 return 0;
7296 }
7297
7298 if (ifce != NULL) {
7299 /*
7300 * There is a different fc entry with the same flow hash
7301 * but different ifp pointer. There can be a collision
7302 * on flow hash but the probability is low. Let's just
7303 * avoid adding a second one when there is a collision.
7304 */
7305 lck_mtx_unlock(&ifnet_fc_lock);
7306 return EAGAIN;
7307 }
7308
7309 /* become regular mutex */
7310 lck_mtx_convert_spin(&ifnet_fc_lock);
7311
7312 ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
7313 ifce->ifce_flowhash = flowhash;
7314 ifce->ifce_ifp = ifp;
7315
7316 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7317 lck_mtx_unlock(&ifnet_fc_lock);
7318 return 0;
7319 }
7320
7321 static struct ifnet_fc_entry *
ifnet_fc_get(uint32_t flowhash)7322 ifnet_fc_get(uint32_t flowhash)
7323 {
7324 struct ifnet_fc_entry keyfc, *ifce;
7325 ifnet_ref_t ifp;
7326
7327 bzero(&keyfc, sizeof(keyfc));
7328 keyfc.ifce_flowhash = flowhash;
7329
7330 lck_mtx_lock_spin(&ifnet_fc_lock);
7331 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7332 if (ifce == NULL) {
7333 /* Entry is not present in ifnet_fc_tree, return */
7334 lck_mtx_unlock(&ifnet_fc_lock);
7335 return NULL;
7336 }
7337
7338 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7339
7340 VERIFY(ifce->ifce_ifp != NULL);
7341 ifp = ifce->ifce_ifp;
7342
7343 /* become regular mutex */
7344 lck_mtx_convert_spin(&ifnet_fc_lock);
7345
7346 if (!ifnet_is_attached(ifp, 0)) {
7347 /*
7348 * This ifp is not attached or in the process of being
7349 * detached; just don't process it.
7350 */
7351 ifnet_fc_entry_free(ifce);
7352 ifce = NULL;
7353 }
7354 lck_mtx_unlock(&ifnet_fc_lock);
7355
7356 return ifce;
7357 }
7358
7359 static void
ifnet_fc_entry_free(struct ifnet_fc_entry * ifce)7360 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7361 {
7362 zfree(ifnet_fc_zone, ifce);
7363 }
7364
7365 static uint32_t
ifnet_calc_flowhash(struct ifnet * ifp)7366 ifnet_calc_flowhash(struct ifnet *ifp)
7367 {
7368 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7369 uint32_t flowhash = 0;
7370
7371 if (ifnet_flowhash_seed == 0) {
7372 ifnet_flowhash_seed = RandomULong();
7373 }
7374
7375 bzero(&fh, sizeof(fh));
7376
7377 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
7378 fh.ifk_unit = ifp->if_unit;
7379 fh.ifk_flags = ifp->if_flags;
7380 fh.ifk_eflags = ifp->if_eflags;
7381 fh.ifk_capabilities = ifp->if_capabilities;
7382 fh.ifk_capenable = ifp->if_capenable;
7383 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7384 fh.ifk_rand1 = RandomULong();
7385 fh.ifk_rand2 = RandomULong();
7386
7387 try_again:
7388 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
7389 if (flowhash == 0) {
7390 /* try to get a non-zero flowhash */
7391 ifnet_flowhash_seed = RandomULong();
7392 goto try_again;
7393 }
7394
7395 return flowhash;
7396 }
7397
7398 int
ifnet_set_netsignature(struct ifnet * ifp,uint8_t family,uint8_t len,uint16_t flags,uint8_t * __sized_by (len)data)7399 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7400 uint16_t flags, uint8_t *__sized_by(len) data)
7401 {
7402 #pragma unused(flags)
7403 int error = 0;
7404
7405 switch (family) {
7406 case AF_INET:
7407 if_inetdata_lock_exclusive(ifp);
7408 if (IN_IFEXTRA(ifp) != NULL) {
7409 if (len == 0) {
7410 /* Allow clearing the signature */
7411 IN_IFEXTRA(ifp)->netsig_len = 0;
7412 bzero(IN_IFEXTRA(ifp)->netsig,
7413 sizeof(IN_IFEXTRA(ifp)->netsig));
7414 if_inetdata_lock_done(ifp);
7415 break;
7416 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
7417 error = EINVAL;
7418 if_inetdata_lock_done(ifp);
7419 break;
7420 }
7421 IN_IFEXTRA(ifp)->netsig_len = len;
7422 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7423 } else {
7424 error = ENOMEM;
7425 }
7426 if_inetdata_lock_done(ifp);
7427 break;
7428
7429 case AF_INET6:
7430 if_inet6data_lock_exclusive(ifp);
7431 if (IN6_IFEXTRA(ifp) != NULL) {
7432 if (len == 0) {
7433 /* Allow clearing the signature */
7434 IN6_IFEXTRA(ifp)->netsig_len = 0;
7435 bzero(IN6_IFEXTRA(ifp)->netsig,
7436 sizeof(IN6_IFEXTRA(ifp)->netsig));
7437 if_inet6data_lock_done(ifp);
7438 break;
7439 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
7440 error = EINVAL;
7441 if_inet6data_lock_done(ifp);
7442 break;
7443 }
7444 IN6_IFEXTRA(ifp)->netsig_len = len;
7445 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7446 } else {
7447 error = ENOMEM;
7448 }
7449 if_inet6data_lock_done(ifp);
7450 break;
7451
7452 default:
7453 error = EINVAL;
7454 break;
7455 }
7456
7457 return error;
7458 }
7459
7460 int
ifnet_get_netsignature(struct ifnet * ifp,uint8_t family,uint8_t * len,uint16_t * flags,uint8_t * __sized_by (* len)data)7461 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7462 uint16_t *flags, uint8_t *__sized_by(*len) data)
7463 {
7464 int error = 0;
7465
7466 if (ifp == NULL || len == NULL || data == NULL) {
7467 return EINVAL;
7468 }
7469
7470 switch (family) {
7471 case AF_INET:
7472 if_inetdata_lock_shared(ifp);
7473 if (IN_IFEXTRA(ifp) != NULL) {
7474 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7475 error = EINVAL;
7476 if_inetdata_lock_done(ifp);
7477 break;
7478 }
7479 if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
7480 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7481 } else {
7482 error = ENOENT;
7483 }
7484 } else {
7485 error = ENOMEM;
7486 }
7487 if_inetdata_lock_done(ifp);
7488 break;
7489
7490 case AF_INET6:
7491 if_inet6data_lock_shared(ifp);
7492 if (IN6_IFEXTRA(ifp) != NULL) {
7493 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7494 error = EINVAL;
7495 if_inet6data_lock_done(ifp);
7496 break;
7497 }
7498 if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
7499 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7500 } else {
7501 error = ENOENT;
7502 }
7503 } else {
7504 error = ENOMEM;
7505 }
7506 if_inet6data_lock_done(ifp);
7507 break;
7508
7509 default:
7510 error = EINVAL;
7511 break;
7512 }
7513
7514 if (error == 0 && flags != NULL) {
7515 *flags = 0;
7516 }
7517
7518 return error;
7519 }
7520
7521 int
ifnet_set_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7522 ifnet_set_nat64prefix(struct ifnet *ifp,
7523 struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7524 {
7525 int i, error = 0, one_set = 0;
7526
7527 if_inet6data_lock_exclusive(ifp);
7528
7529 if (IN6_IFEXTRA(ifp) == NULL) {
7530 error = ENOMEM;
7531 goto out;
7532 }
7533
7534 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7535 uint32_t prefix_len =
7536 prefixes[i].prefix_len;
7537 struct in6_addr *prefix =
7538 &prefixes[i].ipv6_prefix;
7539
7540 if (prefix_len == 0) {
7541 clat_log0((LOG_DEBUG,
7542 "NAT64 prefixes purged from Interface %s\n",
7543 if_name(ifp)));
7544 /* Allow clearing the signature */
7545 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
7546 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7547 sizeof(struct in6_addr));
7548
7549 continue;
7550 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
7551 prefix_len != NAT64_PREFIX_LEN_40 &&
7552 prefix_len != NAT64_PREFIX_LEN_48 &&
7553 prefix_len != NAT64_PREFIX_LEN_56 &&
7554 prefix_len != NAT64_PREFIX_LEN_64 &&
7555 prefix_len != NAT64_PREFIX_LEN_96) {
7556 clat_log0((LOG_DEBUG,
7557 "NAT64 prefixlen is incorrect %d\n", prefix_len));
7558 error = EINVAL;
7559 goto out;
7560 }
7561
7562 if (IN6_IS_SCOPE_EMBED(prefix)) {
7563 clat_log0((LOG_DEBUG,
7564 "NAT64 prefix has interface/link local scope.\n"));
7565 error = EINVAL;
7566 goto out;
7567 }
7568
7569 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
7570 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7571 sizeof(struct in6_addr));
7572 clat_log0((LOG_DEBUG,
7573 "NAT64 prefix set to %s with prefixlen: %d\n",
7574 ip6_sprintf(prefix), prefix_len));
7575 one_set = 1;
7576 }
7577
7578 out:
7579 if_inet6data_lock_done(ifp);
7580
7581 if (error == 0 && one_set != 0) {
7582 necp_update_all_clients();
7583 }
7584
7585 return error;
7586 }
7587
7588 int
ifnet_get_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7589 ifnet_get_nat64prefix(struct ifnet *ifp,
7590 struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7591 {
7592 int i, found_one = 0, error = 0;
7593
7594 if (ifp == NULL) {
7595 return EINVAL;
7596 }
7597
7598 if_inet6data_lock_shared(ifp);
7599
7600 if (IN6_IFEXTRA(ifp) == NULL) {
7601 error = ENOMEM;
7602 goto out;
7603 }
7604
7605 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7606 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
7607 found_one = 1;
7608 }
7609 }
7610
7611 if (found_one == 0) {
7612 error = ENOENT;
7613 goto out;
7614 }
7615
7616 if (prefixes) {
7617 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
7618 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
7619 }
7620
7621 out:
7622 if_inet6data_lock_done(ifp);
7623
7624 return error;
7625 }
7626
7627 #if DEBUG || DEVELOPMENT
7628 /* Blob for sum16 verification */
7629 static uint8_t sumdata[] = {
7630 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7631 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7632 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7633 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7634 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7635 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7636 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7637 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7638 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7639 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7640 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7641 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7642 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7643 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7644 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7645 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7646 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7647 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7648 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7649 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7650 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7651 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7652 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7653 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7654 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7655 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7656 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7657 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7658 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7659 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7660 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7661 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7662 0xc8, 0x28, 0x02, 0x00, 0x00
7663 };
7664
7665 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7666 static struct {
7667 boolean_t init;
7668 uint16_t len;
7669 uint16_t sumr; /* reference */
7670 uint16_t sumrp; /* reference, precomputed */
7671 } sumtbl[] = {
7672 { FALSE, 0, 0, 0x0000 },
7673 { FALSE, 1, 0, 0x001f },
7674 { FALSE, 2, 0, 0x8b1f },
7675 { FALSE, 3, 0, 0x8b27 },
7676 { FALSE, 7, 0, 0x790e },
7677 { FALSE, 11, 0, 0xcb6d },
7678 { FALSE, 20, 0, 0x20dd },
7679 { FALSE, 27, 0, 0xbabd },
7680 { FALSE, 32, 0, 0xf3e8 },
7681 { FALSE, 37, 0, 0x197d },
7682 { FALSE, 43, 0, 0x9eae },
7683 { FALSE, 64, 0, 0x4678 },
7684 { FALSE, 127, 0, 0x9399 },
7685 { FALSE, 256, 0, 0xd147 },
7686 { FALSE, 325, 0, 0x0358 },
7687 };
7688 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7689
7690 static void
dlil_verify_sum16(void)7691 dlil_verify_sum16(void)
7692 {
7693 struct mbuf *m;
7694 uint8_t *buf;
7695 int n;
7696
7697 /* Make sure test data plus extra room for alignment fits in cluster */
7698 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
7699
7700 kprintf("DLIL: running SUM16 self-tests ... ");
7701
7702 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7703 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
7704
7705 buf = mtod(m, uint8_t *); /* base address */
7706
7707 for (n = 0; n < SUMTBL_MAX; n++) {
7708 uint16_t len = sumtbl[n].len;
7709 int i;
7710
7711 /* Verify for all possible alignments */
7712 for (i = 0; i < (int)sizeof(uint64_t); i++) {
7713 uint16_t sum, sumr;
7714 uint8_t *c;
7715
7716 /* Copy over test data to mbuf */
7717 VERIFY(len <= sizeof(sumdata));
7718 c = buf + i;
7719 bcopy(sumdata, c, len);
7720
7721 /* Zero-offset test (align by data pointer) */
7722 m->m_data = (uintptr_t)c;
7723 m->m_len = len;
7724 sum = m_sum16(m, 0, len);
7725
7726 if (!sumtbl[n].init) {
7727 sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
7728 sumtbl[n].sumr = sumr;
7729 sumtbl[n].init = TRUE;
7730 } else {
7731 sumr = sumtbl[n].sumr;
7732 }
7733
7734 /* Something is horribly broken; stop now */
7735 if (sumr != sumtbl[n].sumrp) {
7736 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
7737 "for len=%d align=%d sum=0x%04x "
7738 "[expected=0x%04x]\n", __func__,
7739 len, i, sum, sumr);
7740 /* NOTREACHED */
7741 } else if (sum != sumr) {
7742 panic_plain("\n%s: broken m_sum16() for len=%d "
7743 "align=%d sum=0x%04x [expected=0x%04x]\n",
7744 __func__, len, i, sum, sumr);
7745 /* NOTREACHED */
7746 }
7747
7748 /* Alignment test by offset (fixed data pointer) */
7749 m->m_data = (uintptr_t)buf;
7750 m->m_len = i + len;
7751 sum = m_sum16(m, i, len);
7752
7753 /* Something is horribly broken; stop now */
7754 if (sum != sumr) {
7755 panic_plain("\n%s: broken m_sum16() for len=%d "
7756 "offset=%d sum=0x%04x [expected=0x%04x]\n",
7757 __func__, len, i, sum, sumr);
7758 /* NOTREACHED */
7759 }
7760 #if INET
7761 /* Simple sum16 contiguous buffer test by aligment */
7762 sum = b_sum16(c, len);
7763
7764 /* Something is horribly broken; stop now */
7765 if (sum != sumr) {
7766 panic_plain("\n%s: broken b_sum16() for len=%d "
7767 "align=%d sum=0x%04x [expected=0x%04x]\n",
7768 __func__, len, i, sum, sumr);
7769 /* NOTREACHED */
7770 }
7771 #endif /* INET */
7772 }
7773 }
7774 m_freem(m);
7775
7776 kprintf("PASSED\n");
7777 }
7778 #endif /* DEBUG || DEVELOPMENT */
7779
7780 #define CASE_STRINGIFY(x) case x: return #x
7781
7782 __private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)7783 dlil_kev_dl_code_str(u_int32_t event_code)
7784 {
7785 switch (event_code) {
7786 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
7787 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
7788 CASE_STRINGIFY(KEV_DL_SIFMTU);
7789 CASE_STRINGIFY(KEV_DL_SIFPHYS);
7790 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
7791 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
7792 CASE_STRINGIFY(KEV_DL_ADDMULTI);
7793 CASE_STRINGIFY(KEV_DL_DELMULTI);
7794 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
7795 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
7796 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
7797 CASE_STRINGIFY(KEV_DL_LINK_OFF);
7798 CASE_STRINGIFY(KEV_DL_LINK_ON);
7799 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
7800 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
7801 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
7802 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
7803 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
7804 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
7805 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
7806 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
7807 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
7808 CASE_STRINGIFY(KEV_DL_PRIMARY_ELECTED);
7809 CASE_STRINGIFY(KEV_DL_ISSUES);
7810 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
7811 default:
7812 break;
7813 }
7814 return "";
7815 }
7816
7817 void
dlil_dt_tcall_fn(thread_call_param_t arg0,thread_call_param_t arg1)7818 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
7819 {
7820 #pragma unused(arg1)
7821 ifnet_ref_t ifp = arg0;
7822
7823 if (ifnet_is_attached(ifp, 1)) {
7824 nstat_ifnet_threshold_reached(ifp->if_index);
7825 ifnet_decr_iorefcnt(ifp);
7826 }
7827 }
7828
7829 void
ifnet_notify_data_threshold(struct ifnet * ifp)7830 ifnet_notify_data_threshold(struct ifnet *ifp)
7831 {
7832 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
7833 uint64_t oldbytes = ifp->if_dt_bytes;
7834
7835 ASSERT(ifp->if_dt_tcall != NULL);
7836
7837 /*
7838 * If we went over the threshold, notify NetworkStatistics.
7839 * We rate-limit it based on the threshold interval value.
7840 */
7841 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
7842 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
7843 !thread_call_isactive(ifp->if_dt_tcall)) {
7844 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
7845 uint64_t now = mach_absolute_time(), deadline = now;
7846 uint64_t ival;
7847
7848 if (tival != 0) {
7849 nanoseconds_to_absolutetime(tival, &ival);
7850 clock_deadline_for_periodic_event(ival, now, &deadline);
7851 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
7852 deadline);
7853 } else {
7854 (void) thread_call_enter(ifp->if_dt_tcall);
7855 }
7856 }
7857 }
7858
7859
7860 void
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow * ifs,struct ifnet * ifp)7861 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
7862 struct ifnet *ifp)
7863 {
7864 tcp_update_stats_per_flow(ifs, ifp);
7865 }
7866
7867 static inline u_int32_t
_set_flags(u_int32_t * flags_p,u_int32_t set_flags)7868 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
7869 {
7870 return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
7871 }
7872
7873 static inline u_int32_t
_clear_flags(u_int32_t * flags_p,u_int32_t clear_flags)7874 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
7875 {
7876 return (u_int32_t)OSBitAndAtomic(~clear_flags, flags_p);
7877 }
7878
7879 __private_extern__ u_int32_t
if_set_eflags(ifnet_t interface,u_int32_t set_flags)7880 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
7881 {
7882 return _set_flags(&interface->if_eflags, set_flags);
7883 }
7884
7885 __private_extern__ void
if_clear_eflags(ifnet_t interface,u_int32_t clear_flags)7886 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
7887 {
7888 _clear_flags(&interface->if_eflags, clear_flags);
7889 }
7890
7891 __private_extern__ u_int32_t
if_set_xflags(ifnet_t interface,u_int32_t set_flags)7892 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
7893 {
7894 return _set_flags(&interface->if_xflags, set_flags);
7895 }
7896
7897 __private_extern__ u_int32_t
if_clear_xflags(ifnet_t interface,u_int32_t clear_flags)7898 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
7899 {
7900 return _clear_flags(&interface->if_xflags, clear_flags);
7901 }
7902
7903 __private_extern__ void
ifnet_update_traffic_rule_genid(ifnet_t ifp)7904 ifnet_update_traffic_rule_genid(ifnet_t ifp)
7905 {
7906 os_atomic_inc(&ifp->if_traffic_rule_genid, relaxed);
7907 }
7908
7909 __private_extern__ boolean_t
ifnet_sync_traffic_rule_genid(ifnet_t ifp,uint32_t * genid)7910 ifnet_sync_traffic_rule_genid(ifnet_t ifp, uint32_t *genid)
7911 {
7912 if (*genid != ifp->if_traffic_rule_genid) {
7913 *genid = ifp->if_traffic_rule_genid;
7914 return TRUE;
7915 }
7916 return FALSE;
7917 }
7918 __private_extern__ void
ifnet_update_traffic_rule_count(ifnet_t ifp,uint32_t count)7919 ifnet_update_traffic_rule_count(ifnet_t ifp, uint32_t count)
7920 {
7921 os_atomic_store(&ifp->if_traffic_rule_count, count, release);
7922 ifnet_update_traffic_rule_genid(ifp);
7923 }
7924
7925
7926 #if SKYWALK
7927 static bool
net_check_compatible_if_filter(struct ifnet * ifp)7928 net_check_compatible_if_filter(struct ifnet *ifp)
7929 {
7930 if (ifp == NULL) {
7931 if (net_api_stats.nas_iflt_attach_count > net_api_stats.nas_iflt_attach_os_count) {
7932 return false;
7933 }
7934 } else {
7935 if (ifp->if_flt_non_os_count > 0) {
7936 return false;
7937 }
7938 }
7939 return true;
7940 }
7941 #endif /* SKYWALK */
7942
7943 #define DUMP_BUF_CHK() { \
7944 clen -= k; \
7945 if (clen < 1) \
7946 goto done; \
7947 c += k; \
7948 }
7949
7950 int dlil_dump_top_if_qlen(char *__counted_by(str_len), int str_len);
7951 int
dlil_dump_top_if_qlen(char * __counted_by (str_len)str,int str_len)7952 dlil_dump_top_if_qlen(char *__counted_by(str_len) str, int str_len)
7953 {
7954 char *c = str;
7955 int k, clen = str_len;
7956 ifnet_ref_t top_ifcq_ifp = NULL;
7957 uint32_t top_ifcq_len = 0;
7958 ifnet_ref_t top_inq_ifp = NULL;
7959 uint32_t top_inq_len = 0;
7960
7961 for (int ifidx = 1; ifidx < if_index; ifidx++) {
7962 ifnet_ref_t ifp = ifindex2ifnet[ifidx];
7963 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7964
7965 if (ifp == NULL) {
7966 continue;
7967 }
7968 if (ifp->if_snd != NULL && ifp->if_snd->ifcq_len > top_ifcq_len) {
7969 top_ifcq_len = ifp->if_snd->ifcq_len;
7970 top_ifcq_ifp = ifp;
7971 }
7972 if (dl_if->dl_if_inpstorage.dlth_pkts.qlen > top_inq_len) {
7973 top_inq_len = dl_if->dl_if_inpstorage.dlth_pkts.qlen;
7974 top_inq_ifp = ifp;
7975 }
7976 }
7977
7978 if (top_ifcq_ifp != NULL) {
7979 k = scnprintf(c, clen, "\ntop ifcq_len %u packets by %s\n",
7980 top_ifcq_len, top_ifcq_ifp->if_xname);
7981 DUMP_BUF_CHK();
7982 }
7983 if (top_inq_ifp != NULL) {
7984 k = scnprintf(c, clen, "\ntop inq_len %u packets by %s\n",
7985 top_inq_len, top_inq_ifp->if_xname);
7986 DUMP_BUF_CHK();
7987 }
7988 done:
7989 return str_len - clen;
7990 }
7991