1 /*
2 * Copyright (c) 1999-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35 #include <ptrauth.h>
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/dlil_sysctl.h>
53 #include <net/dlil_var_private.h>
54 #include <net/if_arp.h>
55 #include <net/if_var_private.h>
56 #include <net/iptap.h>
57 #include <net/pktap.h>
58 #include <net/droptap.h>
59 #include <net/nwk_wq.h>
60 #include <sys/kern_event.h>
61 #include <sys/kdebug.h>
62 #include <sys/mcache.h>
63 #include <sys/syslog.h>
64 #include <sys/protosw.h>
65 #include <sys/priv.h>
66
67 #include <kern/assert.h>
68 #include <kern/locks.h>
69 #include <kern/sched_prim.h>
70 #include <kern/task.h>
71 #include <kern/thread.h>
72 #include <kern/uipc_domain.h>
73 #include <kern/zalloc.h>
74 #include <kern/thread_group.h>
75
76 #include <net/kpi_protocol.h>
77 #include <net/kpi_interface.h>
78 #include <net/if_types.h>
79 #include <net/if_ipsec.h>
80 #include <net/if_llreach.h>
81 #include <net/if_utun.h>
82 #include <net/kpi_interfacefilter.h>
83 #include <net/classq/classq.h>
84 #include <net/classq/classq_sfb.h>
85 #include <net/flowhash.h>
86 #include <net/ntstat.h>
87 #if SKYWALK
88 #include <skywalk/lib/net_filter_event.h>
89 #endif /* SKYWALK */
90 #include <net/net_api_stats.h>
91 #include <net/if_ports_used.h>
92 #include <net/if_vlan_var.h>
93 #include <netinet/in.h>
94 #if INET
95 #include <netinet/in_var.h>
96 #include <netinet/igmp_var.h>
97 #include <netinet/ip_var.h>
98 #include <netinet/tcp.h>
99 #include <netinet/tcp_var.h>
100 #include <netinet/udp.h>
101 #include <netinet/udp_var.h>
102 #include <netinet/if_ether.h>
103 #include <netinet/in_pcb.h>
104 #include <netinet/in_tclass.h>
105 #include <netinet/ip.h>
106 #include <netinet/ip_icmp.h>
107 #include <netinet/icmp_var.h>
108 #endif /* INET */
109
110 #include <net/nat464_utils.h>
111 #include <netinet6/in6_var.h>
112 #include <netinet6/nd6.h>
113 #include <netinet6/mld6_var.h>
114 #include <netinet6/scope6_var.h>
115 #include <netinet/ip6.h>
116 #include <netinet/icmp6.h>
117 #include <net/pf_pbuf.h>
118 #include <libkern/OSAtomic.h>
119 #include <libkern/tree.h>
120
121 #include <dev/random/randomdev.h>
122 #include <machine/machine_routines.h>
123
124 #include <mach/thread_act.h>
125 #include <mach/sdt.h>
126
127 #if CONFIG_MACF
128 #include <sys/kauth.h>
129 #include <security/mac_framework.h>
130 #include <net/ethernet.h>
131 #include <net/firewire.h>
132 #endif
133
134 #if PF
135 #include <net/pfvar.h>
136 #endif /* PF */
137 #include <net/pktsched/pktsched.h>
138 #include <net/pktsched/pktsched_netem.h>
139
140 #if NECP
141 #include <net/necp.h>
142 #endif /* NECP */
143
144 #if SKYWALK
145 #include <skywalk/packet/packet_queue.h>
146 #include <skywalk/nexus/netif/nx_netif.h>
147 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
148 #endif /* SKYWALK */
149
150 #include <net/sockaddr_utils.h>
151
152 #include <os/log.h>
153
154 uint64_t if_creation_generation_count = 0;
155
156 dlil_ifnet_queue_t dlil_ifnet_head;
157
158 static u_int32_t net_rtref;
159
160 static struct dlil_main_threading_info dlil_main_input_thread_info;
161 struct dlil_threading_info *__single dlil_main_input_thread;
162
163 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
164 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
165
166 static int ifnet_lookup(struct ifnet *);
167 static void if_purgeaddrs(struct ifnet *);
168
169 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
170 struct mbuf *, char *);
171 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
172 struct mbuf *);
173 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
174 mbuf_t *, const struct sockaddr *, void *,
175 IFNET_FRAME_TYPE_RW_T, IFNET_LLADDR_RW_T);
176 static void ifproto_media_event(struct ifnet *, protocol_family_t,
177 const struct kev_msg *);
178 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
179 unsigned long, void *);
180 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
181 struct sockaddr_dl *, size_t);
182 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
183 const struct sockaddr_dl *, const struct sockaddr *,
184 const struct sockaddr_dl *, const struct sockaddr *);
185
186 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
187 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
188 boolean_t poll, struct thread *tp);
189 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
190 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
191 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
192 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
193 protocol_family_t *);
194 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
195 const struct ifnet_demux_desc *, u_int32_t);
196 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
197 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
198 #if !XNU_TARGET_OS_OSX
199 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
200 const struct sockaddr *, IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
201 u_int32_t *, u_int32_t *);
202 #else /* XNU_TARGET_OS_OSX */
203 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
204 const struct sockaddr *,
205 IFNET_LLADDR_T, IFNET_FRAME_TYPE_T);
206 #endif /* XNU_TARGET_OS_OSX */
207 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
208 const struct sockaddr *,
209 IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
210 u_int32_t *, u_int32_t *);
211 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
212 static void ifp_if_free(struct ifnet *);
213 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
214
215
216
217 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
218 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
219 #if DEBUG || DEVELOPMENT
220 static void dlil_verify_sum16(void);
221 #endif /* DEBUG || DEVELOPMENT */
222
223
224 static void ifnet_detacher_thread_func(void *, wait_result_t);
225 static void ifnet_detacher_thread_cont(void *, wait_result_t);
226 static void ifnet_detach_final(struct ifnet *);
227 static void ifnet_detaching_enqueue(struct ifnet *);
228 static struct ifnet *ifnet_detaching_dequeue(void);
229
230 static void ifnet_start_thread_func(void *, wait_result_t);
231 static void ifnet_start_thread_cont(void *, wait_result_t);
232
233 static void ifnet_poll_thread_func(void *, wait_result_t);
234 static void ifnet_poll_thread_cont(void *, wait_result_t);
235
236 static errno_t ifnet_enqueue_common_single(struct ifnet *, struct ifclassq *,
237 classq_pkt_t *, boolean_t, boolean_t *);
238
239 static void ifp_src_route_copyout(struct ifnet *, struct route *);
240 static void ifp_src_route_copyin(struct ifnet *, struct route *);
241 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
242 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
243
244
245 /* The following are protected by dlil_ifnet_lock */
246 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
247 static u_int32_t ifnet_detaching_cnt;
248 static boolean_t ifnet_detaching_embryonic;
249 static void *ifnet_delayed_run; /* wait channel for detaching thread */
250
251 static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group,
252 &dlil_lck_attributes);
253
254 static uint32_t ifnet_flowhash_seed;
255
256 struct ifnet_flowhash_key {
257 char ifk_name[IFNAMSIZ];
258 uint32_t ifk_unit;
259 uint32_t ifk_flags;
260 uint32_t ifk_eflags;
261 uint32_t ifk_capabilities;
262 uint32_t ifk_capenable;
263 uint32_t ifk_output_sched_model;
264 uint32_t ifk_rand1;
265 uint32_t ifk_rand2;
266 };
267
268 /* Flow control entry per interface */
269 struct ifnet_fc_entry {
270 RB_ENTRY(ifnet_fc_entry) ifce_entry;
271 u_int32_t ifce_flowhash;
272 ifnet_ref_t ifce_ifp;
273 };
274
275 static uint32_t ifnet_calc_flowhash(struct ifnet *);
276 static int ifce_cmp(const struct ifnet_fc_entry *,
277 const struct ifnet_fc_entry *);
278 static int ifnet_fc_add(struct ifnet *);
279 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
280 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
281
282 /* protected by ifnet_fc_lock */
283 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
284 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
285 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
286
287 static KALLOC_TYPE_DEFINE(ifnet_fc_zone, struct ifnet_fc_entry, NET_KT_DEFAULT);
288
289 extern void bpfdetach(struct ifnet *);
290
291
292 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
293 u_int32_t flags);
294 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
295 u_int32_t flags);
296
297
298 #if CONFIG_MACF
299 #if !XNU_TARGET_OS_OSX
300 int dlil_lladdr_ckreq = 1;
301 #else /* XNU_TARGET_OS_OSX */
302 int dlil_lladdr_ckreq = 0;
303 #endif /* XNU_TARGET_OS_OSX */
304 #endif /* CONFIG_MACF */
305
306
307 static inline void
ifnet_delay_start_disabled_increment(void)308 ifnet_delay_start_disabled_increment(void)
309 {
310 OSIncrementAtomic(&ifnet_delay_start_disabled);
311 }
312
313 unsigned int net_rxpoll = 1;
314 unsigned int net_affinity = 1;
315 unsigned int net_async = 1; /* 0: synchronous, 1: asynchronous */
316
317 extern u_int32_t inject_buckets;
318
319 void
ifnet_filter_update_tso(struct ifnet * ifp,boolean_t filter_enable)320 ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable)
321 {
322 /*
323 * update filter count and route_generation ID to let TCP
324 * know it should reevalute doing TSO or not
325 */
326 if (filter_enable) {
327 OSAddAtomic(1, &ifp->if_flt_no_tso_count);
328 } else {
329 VERIFY(ifp->if_flt_no_tso_count != 0);
330 OSAddAtomic(-1, &ifp->if_flt_no_tso_count);
331 }
332 routegenid_update();
333 }
334
335 os_refgrp_decl(static, if_refiogrp, "if refio refcounts", NULL);
336 os_refgrp_decl(static, if_datamovgrp, "if datamov refcounts", NULL);
337 #define IF_DATAMOV_BITS 1
338 #define IF_DATAMOV_DRAINING 1
339
340 #if SKYWALK
341
342 static bool net_check_compatible_if_filter(struct ifnet *ifp);
343
344 /* if_attach_nx flags defined in os_skywalk_private.h */
345 unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT;
346 unsigned int if_enable_fsw_ip_netagent =
347 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
348 unsigned int if_enable_fsw_transport_netagent =
349 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
350
351 unsigned int if_netif_all =
352 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0);
353
354 /* Configure flowswitch to use max mtu sized buffer */
355 static bool fsw_use_max_mtu_buffer = false;
356
357
358 static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw);
359
360 #include <skywalk/os_skywalk_private.h>
361
362 boolean_t
ifnet_nx_noauto(ifnet_t ifp)363 ifnet_nx_noauto(ifnet_t ifp)
364 {
365 return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0;
366 }
367
368 boolean_t
ifnet_nx_noauto_flowswitch(ifnet_t ifp)369 ifnet_nx_noauto_flowswitch(ifnet_t ifp)
370 {
371 return ifnet_is_low_latency(ifp);
372 }
373
374 boolean_t
ifnet_is_low_latency(ifnet_t ifp)375 ifnet_is_low_latency(ifnet_t ifp)
376 {
377 return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0;
378 }
379
380 boolean_t
ifnet_needs_compat(ifnet_t ifp)381 ifnet_needs_compat(ifnet_t ifp)
382 {
383 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
384 return FALSE;
385 }
386 #if !XNU_TARGET_OS_OSX
387 /*
388 * To conserve memory, we plumb in the compat layer selectively; this
389 * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL.
390 * In particular, we check for Wi-Fi Access Point.
391 */
392 if (IFNET_IS_WIFI(ifp)) {
393 /* Wi-Fi Access Point */
394 if (strcmp(ifp->if_name, "ap") == 0) {
395 return if_netif_all;
396 }
397 }
398 #else /* XNU_TARGET_OS_OSX */
399 #pragma unused(ifp)
400 #endif /* XNU_TARGET_OS_OSX */
401 return TRUE;
402 }
403
404 boolean_t
ifnet_needs_fsw_transport_netagent(ifnet_t ifp)405 ifnet_needs_fsw_transport_netagent(ifnet_t ifp)
406 {
407 if (if_is_fsw_transport_netagent_enabled()) {
408 /* check if netagent has been manually enabled for ipsec/utun */
409 if (ifp->if_family == IFNET_FAMILY_IPSEC) {
410 return ipsec_interface_needs_netagent(ifp);
411 } else if (ifp->if_family == IFNET_FAMILY_UTUN) {
412 return utun_interface_needs_netagent(ifp);
413 }
414
415 /* check ifnet no auto nexus override */
416 if (ifnet_nx_noauto(ifp)) {
417 return FALSE;
418 }
419
420 /* check global if_attach_nx configuration */
421 switch (ifp->if_family) {
422 case IFNET_FAMILY_CELLULAR:
423 case IFNET_FAMILY_ETHERNET:
424 if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
425 return TRUE;
426 }
427 break;
428 default:
429 break;
430 }
431 }
432 return FALSE;
433 }
434
435 boolean_t
ifnet_needs_fsw_ip_netagent(ifnet_t ifp)436 ifnet_needs_fsw_ip_netagent(ifnet_t ifp)
437 {
438 #pragma unused(ifp)
439 if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) {
440 return TRUE;
441 }
442 return FALSE;
443 }
444
445 boolean_t
ifnet_needs_netif_netagent(ifnet_t ifp)446 ifnet_needs_netif_netagent(ifnet_t ifp)
447 {
448 #pragma unused(ifp)
449 return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0;
450 }
451
452 static boolean_t
dlil_detach_nexus_instance(nexus_controller_t controller,const char * func_str,uuid_t instance,uuid_t device)453 dlil_detach_nexus_instance(nexus_controller_t controller,
454 const char *func_str, uuid_t instance, uuid_t device)
455 {
456 errno_t err;
457
458 if (instance == NULL || uuid_is_null(instance)) {
459 return FALSE;
460 }
461
462 /* followed by the device port */
463 if (device != NULL && !uuid_is_null(device)) {
464 err = kern_nexus_ifdetach(controller, instance, device);
465 if (err != 0) {
466 DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n",
467 func_str, err);
468 }
469 }
470 err = kern_nexus_controller_free_provider_instance(controller,
471 instance);
472 if (err != 0) {
473 DLIL_PRINTF("%s free_provider_instance failed %d\n",
474 func_str, err);
475 }
476 return TRUE;
477 }
478
479 static boolean_t
dlil_detach_nexus(const char * func_str,uuid_t provider,uuid_t instance,uuid_t device)480 dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance,
481 uuid_t device)
482 {
483 boolean_t detached = FALSE;
484 nexus_controller_t controller = kern_nexus_shared_controller();
485 int err;
486
487 if (dlil_detach_nexus_instance(controller, func_str, instance,
488 device)) {
489 detached = TRUE;
490 }
491 if (provider != NULL && !uuid_is_null(provider)) {
492 detached = TRUE;
493 err = kern_nexus_controller_deregister_provider(controller,
494 provider);
495 if (err != 0) {
496 DLIL_PRINTF("%s deregister_provider %d\n",
497 func_str, err);
498 }
499 }
500 return detached;
501 }
502
503 static errno_t
dlil_create_provider_and_instance(nexus_controller_t controller,nexus_type_t type,ifnet_t ifp,uuid_t * provider,uuid_t * instance,nexus_attr_t attr)504 dlil_create_provider_and_instance(nexus_controller_t controller,
505 nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance,
506 nexus_attr_t attr)
507 {
508 uuid_t dom_prov;
509 errno_t err;
510 nexus_name_t provider_name;
511 const char *type_name =
512 (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch";
513 struct kern_nexus_init init;
514
515 err = kern_nexus_get_default_domain_provider(type, &dom_prov);
516 if (err != 0) {
517 DLIL_PRINTF("%s can't get %s provider, error %d\n",
518 __func__, type_name, err);
519 goto failed;
520 }
521
522 snprintf((char *)provider_name, sizeof(provider_name),
523 "com.apple.%s.%s", type_name, if_name(ifp));
524 err = kern_nexus_controller_register_provider(controller,
525 dom_prov,
526 provider_name,
527 NULL,
528 0,
529 attr,
530 provider);
531 if (err != 0) {
532 DLIL_PRINTF("%s register %s provider failed, error %d\n",
533 __func__, type_name, err);
534 goto failed;
535 }
536 bzero(&init, sizeof(init));
537 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
538 err = kern_nexus_controller_alloc_provider_instance(controller,
539 *provider,
540 NULL, NULL,
541 instance, &init);
542 if (err != 0) {
543 DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n",
544 __func__, type_name, err);
545 kern_nexus_controller_deregister_provider(controller,
546 *provider);
547 goto failed;
548 }
549 failed:
550 return err;
551 }
552
553 static boolean_t
dlil_attach_netif_nexus_common(ifnet_t ifp,if_nexus_netif_t netif_nx)554 dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx)
555 {
556 nexus_attr_t __single attr = NULL;
557 nexus_controller_t controller;
558 errno_t err;
559 unsigned char *empty_uuid = __unsafe_forge_bidi_indexable(unsigned char *, NULL, sizeof(uuid_t));
560
561 if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) {
562 /* it's already attached */
563 if (dlil_verbose) {
564 DLIL_PRINTF("%s: %s already has nexus attached\n",
565 __func__, if_name(ifp));
566 /* already attached */
567 }
568 goto failed;
569 }
570
571 err = kern_nexus_attr_create(&attr);
572 if (err != 0) {
573 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
574 if_name(ifp));
575 goto failed;
576 }
577 err = kern_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, ifp->if_index);
578 VERIFY(err == 0);
579
580 controller = kern_nexus_shared_controller();
581
582 /* create the netif provider and instance */
583 err = dlil_create_provider_and_instance(controller,
584 NEXUS_TYPE_NET_IF, ifp, &netif_nx->if_nif_provider,
585 &netif_nx->if_nif_instance, attr);
586 if (err != 0) {
587 goto failed;
588 }
589
590 err = kern_nexus_ifattach(controller, netif_nx->if_nif_instance, ifp,
591 empty_uuid, FALSE, &netif_nx->if_nif_attach);
592 if (err != 0) {
593 DLIL_PRINTF("%s kern_nexus_ifattach %d\n",
594 __func__, err);
595 /* cleanup provider and instance */
596 dlil_detach_nexus(__func__, netif_nx->if_nif_provider,
597 netif_nx->if_nif_instance, empty_uuid);
598 goto failed;
599 }
600 return TRUE;
601
602 failed:
603 if (attr != NULL) {
604 kern_nexus_attr_destroy(attr);
605 }
606 return FALSE;
607 }
608
609 static boolean_t
dlil_attach_netif_compat_nexus(ifnet_t ifp,if_nexus_netif_t netif_nx)610 dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx)
611 {
612 if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) ||
613 IFNET_IS_MANAGEMENT(ifp) || IFNET_IS_VMNET(ifp)) {
614 goto failed;
615 }
616 switch (ifp->if_type) {
617 case IFT_CELLULAR:
618 case IFT_ETHER:
619 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
620 /* don't auto-attach */
621 goto failed;
622 }
623 break;
624 default:
625 /* don't auto-attach */
626 goto failed;
627 }
628 return dlil_attach_netif_nexus_common(ifp, netif_nx);
629
630 failed:
631 return FALSE;
632 }
633
634 __attribute__((noinline))
635 static void
dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)636 dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)
637 {
638 dlil_detach_nexus(__func__, nexus_netif->if_nif_provider,
639 nexus_netif->if_nif_instance, nexus_netif->if_nif_attach);
640 }
641
642 static inline int
dlil_siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)643 dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
644 {
645 struct ifreq ifr;
646 int error;
647
648 bzero(&ifr, sizeof(ifr));
649 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
650 if (error == 0) {
651 *ifdm_p = ifr.ifr_devmtu;
652 }
653 return error;
654 }
655
656 static inline void
_dlil_adjust_large_buf_size_for_tso(ifnet_t ifp,uint32_t * large_buf_size)657 _dlil_adjust_large_buf_size_for_tso(ifnet_t ifp, uint32_t *large_buf_size)
658 {
659 uint32_t tso_v4_mtu = 0;
660 uint32_t tso_v6_mtu = 0;
661
662 if (!kernel_is_macos_or_server()) {
663 return;
664 }
665
666 /*
667 * Note that we are reading the real hwassist flags set by the driver
668 * and not the adjusted ones because nx_netif_host_adjust_if_capabilities()
669 * hasn't been called yet.
670 */
671 if ((ifp->if_hwassist & IFNET_TSO_IPV4) != 0) {
672 tso_v4_mtu = ifp->if_tso_v4_mtu;
673 }
674 if ((ifp->if_hwassist & IFNET_TSO_IPV6) != 0) {
675 tso_v6_mtu = ifp->if_tso_v6_mtu;
676 }
677
678 /*
679 * If the hardware supports TSO, adjust the large buf size to match the
680 * supported TSO MTU size. Note that only native interfaces set TSO MTU
681 * size today.
682 * For compat, there is a 16KB limit on large buf size, so it needs to be
683 * bounded by NX_FSW_DEF_LARGE_BUFSIZE. Note that no compat interfaces
684 * set TSO MTU size today.
685 */
686 if (SKYWALK_NATIVE(ifp)) {
687 if (tso_v4_mtu != 0 || tso_v6_mtu != 0) {
688 *large_buf_size = MAX(tso_v4_mtu, tso_v6_mtu);
689 } else {
690 *large_buf_size = MAX(*large_buf_size, sk_fsw_gso_mtu);
691 }
692 *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, *large_buf_size);
693 } else {
694 *large_buf_size = MIN(NX_FSW_DEF_LARGE_BUFSIZE, *large_buf_size);
695 }
696 }
697
698 static inline int
_dlil_get_flowswitch_buffer_size(ifnet_t ifp,uuid_t netif,uint32_t * buf_size,bool * use_multi_buflet,uint32_t * large_buf_size)699 _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint32_t *buf_size,
700 bool *use_multi_buflet, uint32_t *large_buf_size)
701 {
702 struct kern_pbufpool_memory_info rx_pp_info;
703 struct kern_pbufpool_memory_info tx_pp_info;
704 uint32_t if_max_mtu = 0;
705 uint32_t drv_buf_size;
706 struct ifdevmtu ifdm;
707 int err;
708
709 /*
710 * To perform intra-stack RX aggregation flowswitch needs to use
711 * multi-buflet packet.
712 */
713 *use_multi_buflet = NX_FSW_TCP_RX_AGG_ENABLED();
714
715 *large_buf_size = *use_multi_buflet ? NX_FSW_DEF_LARGE_BUFSIZE : 0;
716 /*
717 * IP over Thunderbolt interface can deliver the largest IP packet,
718 * but the driver advertises the MAX MTU as only 9K.
719 */
720 if (IFNET_IS_THUNDERBOLT_IP(ifp)) {
721 if_max_mtu = IP_MAXPACKET;
722 goto skip_mtu_ioctl;
723 }
724
725 /* determine max mtu */
726 bzero(&ifdm, sizeof(ifdm));
727 err = dlil_siocgifdevmtu(ifp, &ifdm);
728 if (__improbable(err != 0)) {
729 DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n",
730 __func__, if_name(ifp));
731 /* use default flowswitch buffer size */
732 if_max_mtu = NX_FSW_BUFSIZE;
733 } else {
734 DLIL_PRINTF("%s: %s %d %d\n", __func__, if_name(ifp),
735 ifdm.ifdm_max, ifdm.ifdm_current);
736 /* rdar://problem/44589731 */
737 if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current);
738 }
739
740 skip_mtu_ioctl:
741 if (if_max_mtu == 0) {
742 DLIL_PRINTF("%s: can't determine MAX MTU for %s\n",
743 __func__, if_name(ifp));
744 return EINVAL;
745 }
746 if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) {
747 DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch "
748 "max bufsize(%d)\n", __func__,
749 if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE);
750 return EINVAL;
751 }
752
753 /*
754 * for skywalk native driver, consult the driver packet pool also.
755 */
756 if (dlil_is_native_netif_nexus(ifp)) {
757 err = kern_nexus_get_pbufpool_info(netif, &rx_pp_info,
758 &tx_pp_info);
759 if (err != 0) {
760 DLIL_PRINTF("%s: can't get pbufpool info for %s\n",
761 __func__, if_name(ifp));
762 return ENXIO;
763 }
764 drv_buf_size = tx_pp_info.kpm_bufsize *
765 tx_pp_info.kpm_max_frags;
766 if (if_max_mtu > drv_buf_size) {
767 DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, "
768 "tx %d * %d) can't support max mtu(%d)\n", __func__,
769 if_name(ifp), rx_pp_info.kpm_bufsize,
770 rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize,
771 tx_pp_info.kpm_max_frags, if_max_mtu);
772 return EINVAL;
773 }
774 } else {
775 drv_buf_size = if_max_mtu;
776 }
777
778 if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) {
779 static_assert((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET);
780 *use_multi_buflet = true;
781 /* default flowswitch buffer size */
782 *buf_size = NX_FSW_BUFSIZE;
783 *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, drv_buf_size);
784 } else {
785 *buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE);
786 }
787 _dlil_adjust_large_buf_size_for_tso(ifp, large_buf_size);
788 ASSERT(*buf_size <= NX_FSW_MAXBUFSIZE);
789 if (*buf_size >= *large_buf_size) {
790 *large_buf_size = 0;
791 }
792 return 0;
793 }
794
795 static boolean_t
_dlil_attach_flowswitch_nexus(ifnet_t ifp,if_nexus_flowswitch_t nexus_fsw)796 _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw)
797 {
798 nexus_attr_t __single attr = NULL;
799 nexus_controller_t controller;
800 errno_t err = 0;
801 uuid_t netif;
802 uint32_t buf_size = 0;
803 uint32_t large_buf_size = 0;
804 bool multi_buflet;
805
806 if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) ||
807 IFNET_IS_VMNET(ifp)) {
808 goto failed;
809 }
810
811 if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) {
812 /* not possible to attach (netif native/compat not plumbed) */
813 goto failed;
814 }
815
816 if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) {
817 /* don't auto-attach */
818 goto failed;
819 }
820
821 /* get the netif instance from the ifp */
822 err = kern_nexus_get_netif_instance(ifp, netif);
823 if (err != 0) {
824 DLIL_PRINTF("%s: can't find netif for %s\n", __func__,
825 if_name(ifp));
826 goto failed;
827 }
828
829 err = kern_nexus_attr_create(&attr);
830 if (err != 0) {
831 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
832 if_name(ifp));
833 goto failed;
834 }
835
836 err = _dlil_get_flowswitch_buffer_size(ifp, netif, &buf_size,
837 &multi_buflet, &large_buf_size);
838 if (err != 0) {
839 goto failed;
840 }
841 ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE));
842 ASSERT(large_buf_size <= NX_FSW_MAX_LARGE_BUFSIZE);
843
844 /* Configure flowswitch buffer size */
845 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, buf_size);
846 VERIFY(err == 0);
847 err = kern_nexus_attr_set(attr, NEXUS_ATTR_LARGE_BUF_SIZE,
848 large_buf_size);
849 VERIFY(err == 0);
850
851 /*
852 * Configure flowswitch to use super-packet (multi-buflet).
853 */
854 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
855 multi_buflet ? NX_PBUF_FRAGS_MAX : 1);
856 VERIFY(err == 0);
857
858 /* create the flowswitch provider and instance */
859 controller = kern_nexus_shared_controller();
860 err = dlil_create_provider_and_instance(controller,
861 NEXUS_TYPE_FLOW_SWITCH, ifp, &nexus_fsw->if_fsw_provider,
862 &nexus_fsw->if_fsw_instance, attr);
863 if (err != 0) {
864 goto failed;
865 }
866
867 /* attach the device port */
868 err = kern_nexus_ifattach(controller, nexus_fsw->if_fsw_instance,
869 NULL, netif, FALSE, &nexus_fsw->if_fsw_device);
870 if (err != 0) {
871 DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n",
872 __func__, err, if_name(ifp));
873 /* cleanup provider and instance */
874 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
875 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
876 goto failed;
877 }
878 return TRUE;
879
880 failed:
881 if (err != 0) {
882 DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n",
883 __func__, if_name(ifp), err);
884 } else {
885 DLIL_PRINTF("%s: not attaching flowswitch to %s\n",
886 __func__, if_name(ifp));
887 }
888 if (attr != NULL) {
889 kern_nexus_attr_destroy(attr);
890 }
891 return FALSE;
892 }
893
894 static boolean_t
dlil_attach_flowswitch_nexus(ifnet_t ifp)895 dlil_attach_flowswitch_nexus(ifnet_t ifp)
896 {
897 boolean_t attached = FALSE;
898 if_nexus_flowswitch nexus_fsw;
899
900 #if (DEVELOPMENT || DEBUG)
901 if (skywalk_netif_direct_allowed(if_name(ifp))) {
902 DLIL_PRINTF("skip attaching fsw to %s\n", if_name(ifp));
903 return FALSE;
904 }
905 #endif /* (DEVELOPMENT || DEBUG) */
906
907 /*
908 * flowswitch attachment is not supported for interface using the
909 * legacy model (IFNET_INIT_LEGACY)
910 */
911 if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
912 DLIL_PRINTF("skip attaching fsw to %s using legacy TX model\n",
913 if_name(ifp));
914 return FALSE;
915 }
916 bzero(&nexus_fsw, sizeof(nexus_fsw));
917
918 /*
919 * A race can happen between a thread creating a flowswitch and another thread
920 * detaching the interface (also destroying the flowswitch).
921 *
922 * ifnet_datamov_begin() is used here to force dlil_quiesce_and_detach_nexuses()
923 * (called by another thread) to wait until this function finishes so the
924 * flowswitch can be cleaned up by dlil_detach_flowswitch_nexus().
925 *
926 * If ifnet_get_ioref() is used instead, dlil_quiesce_and_detach_nexuses()
927 * would not wait (because ifp->if_nx_flowswitch isn't assigned) and the
928 * created flowswitch would be left hanging and ifnet_detach_final() would never
929 * wakeup because the existence of the flowswitch prevents the ifnet's ioref
930 * from being released.
931 */
932 if (!ifnet_datamov_begin(ifp)) {
933 os_log(OS_LOG_DEFAULT, "%s: %s not attached",
934 __func__, ifp->if_xname);
935 goto done;
936 }
937 if (uuid_is_null(ifp->if_nx_flowswitch.if_fsw_instance)) {
938 attached = _dlil_attach_flowswitch_nexus(ifp, &nexus_fsw);
939 if (attached) {
940 ifnet_lock_exclusive(ifp);
941 ifp->if_nx_flowswitch = nexus_fsw;
942 ifnet_lock_done(ifp);
943 }
944 }
945 ifnet_datamov_end(ifp);
946
947 done:
948 return attached;
949 }
950
951 __attribute__((noinline))
952 static void
dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)953 dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)
954 {
955 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
956 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
957 }
958
959 __attribute__((noinline))
960 static void
dlil_netif_detach_notify(ifnet_t ifp)961 dlil_netif_detach_notify(ifnet_t ifp)
962 {
963 ifnet_detach_notify_cb_t notify = NULL;
964 void *__single arg = NULL;
965
966 ifnet_get_detach_notify(ifp, ¬ify, &arg);
967 if (notify == NULL) {
968 DTRACE_SKYWALK1(no__notify, ifnet_t, ifp);
969 return;
970 }
971 (*notify)(arg);
972 }
973
974 __attribute__((noinline))
975 static void
dlil_quiesce_and_detach_nexuses(ifnet_t ifp)976 dlil_quiesce_and_detach_nexuses(ifnet_t ifp)
977 {
978 if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch;
979 if_nexus_netif *nx_netif = &ifp->if_nx_netif;
980
981 ifnet_datamov_suspend_and_drain(ifp);
982 if (!uuid_is_null(nx_fsw->if_fsw_device)) {
983 ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider));
984 ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance));
985 dlil_detach_flowswitch_nexus(nx_fsw);
986 } else {
987 ASSERT(uuid_is_null(nx_fsw->if_fsw_provider));
988 ASSERT(uuid_is_null(nx_fsw->if_fsw_instance));
989 DTRACE_IP1(fsw__not__attached, ifnet_t, ifp);
990 }
991
992 if (!uuid_is_null(nx_netif->if_nif_attach)) {
993 ASSERT(!uuid_is_null(nx_netif->if_nif_provider));
994 ASSERT(!uuid_is_null(nx_netif->if_nif_instance));
995 dlil_detach_netif_nexus(nx_netif);
996 } else {
997 ASSERT(uuid_is_null(nx_netif->if_nif_provider));
998 ASSERT(uuid_is_null(nx_netif->if_nif_instance));
999 DTRACE_IP1(netif__not__attached, ifnet_t, ifp);
1000 }
1001 ifnet_datamov_resume(ifp);
1002 }
1003
1004 boolean_t
ifnet_add_netagent(ifnet_t ifp)1005 ifnet_add_netagent(ifnet_t ifp)
1006 {
1007 int error;
1008
1009 error = kern_nexus_interface_add_netagent(ifp);
1010 os_log(OS_LOG_DEFAULT,
1011 "kern_nexus_interface_add_netagent(%s) returned %d",
1012 ifp->if_xname, error);
1013 return error == 0;
1014 }
1015
1016 boolean_t
ifnet_remove_netagent(ifnet_t ifp)1017 ifnet_remove_netagent(ifnet_t ifp)
1018 {
1019 int error;
1020
1021 error = kern_nexus_interface_remove_netagent(ifp);
1022 os_log(OS_LOG_DEFAULT,
1023 "kern_nexus_interface_remove_netagent(%s) returned %d",
1024 ifp->if_xname, error);
1025 return error == 0;
1026 }
1027
1028 boolean_t
ifnet_attach_flowswitch_nexus(ifnet_t ifp)1029 ifnet_attach_flowswitch_nexus(ifnet_t ifp)
1030 {
1031 if (!ifnet_is_fully_attached(ifp)) {
1032 return FALSE;
1033 }
1034 return dlil_attach_flowswitch_nexus(ifp);
1035 }
1036
1037 boolean_t
ifnet_detach_flowswitch_nexus(ifnet_t ifp)1038 ifnet_detach_flowswitch_nexus(ifnet_t ifp)
1039 {
1040 if_nexus_flowswitch nexus_fsw;
1041
1042 ifnet_lock_exclusive(ifp);
1043 nexus_fsw = ifp->if_nx_flowswitch;
1044 bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
1045 ifnet_lock_done(ifp);
1046 return dlil_detach_nexus(__func__, nexus_fsw.if_fsw_provider,
1047 nexus_fsw.if_fsw_instance, nexus_fsw.if_fsw_device);
1048 }
1049
1050 void
ifnet_attach_native_flowswitch(ifnet_t ifp)1051 ifnet_attach_native_flowswitch(ifnet_t ifp)
1052 {
1053 if (!dlil_is_native_netif_nexus(ifp)) {
1054 /* not a native netif */
1055 return;
1056 }
1057 ifnet_attach_flowswitch_nexus(ifp);
1058 }
1059
1060 int
ifnet_set_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t cb,void * arg)1061 ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg)
1062 {
1063 lck_mtx_lock(&ifp->if_delegate_lock);
1064 while (ifp->if_fsw_rx_cb_ref > 0) {
1065 DTRACE_SKYWALK1(wait__fsw, ifnet_t, ifp);
1066 (void) msleep(&ifp->if_fsw_rx_cb_ref, &ifp->if_delegate_lock,
1067 (PZERO + 1), __FUNCTION__, NULL);
1068 DTRACE_SKYWALK1(wake__fsw, ifnet_t, ifp);
1069 }
1070 ifp->if_fsw_rx_cb = cb;
1071 ifp->if_fsw_rx_cb_arg = arg;
1072 lck_mtx_unlock(&ifp->if_delegate_lock);
1073 return 0;
1074 }
1075
1076 int
ifnet_get_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t * cbp,void ** argp)1077 ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp)
1078 {
1079 /*
1080 * This is for avoiding the unnecessary lock acquire for interfaces
1081 * not used by a redirect interface.
1082 */
1083 if (ifp->if_fsw_rx_cb == NULL) {
1084 return ENOENT;
1085 }
1086 lck_mtx_lock(&ifp->if_delegate_lock);
1087 if (ifp->if_fsw_rx_cb == NULL) {
1088 lck_mtx_unlock(&ifp->if_delegate_lock);
1089 return ENOENT;
1090 }
1091 *cbp = ifp->if_fsw_rx_cb;
1092 *argp = ifp->if_fsw_rx_cb_arg;
1093 ifp->if_fsw_rx_cb_ref++;
1094 lck_mtx_unlock(&ifp->if_delegate_lock);
1095 return 0;
1096 }
1097
1098 void
ifnet_release_flowswitch_rx_callback(ifnet_t ifp)1099 ifnet_release_flowswitch_rx_callback(ifnet_t ifp)
1100 {
1101 lck_mtx_lock(&ifp->if_delegate_lock);
1102 if (--ifp->if_fsw_rx_cb_ref == 0) {
1103 wakeup(&ifp->if_fsw_rx_cb_ref);
1104 }
1105 lck_mtx_unlock(&ifp->if_delegate_lock);
1106 }
1107
1108 int
ifnet_set_delegate_parent(ifnet_t difp,ifnet_t parent)1109 ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent)
1110 {
1111 lck_mtx_lock(&difp->if_delegate_lock);
1112 while (difp->if_delegate_parent_ref > 0) {
1113 DTRACE_SKYWALK1(wait__parent, ifnet_t, difp);
1114 (void) msleep(&difp->if_delegate_parent_ref, &difp->if_delegate_lock,
1115 (PZERO + 1), __FUNCTION__, NULL);
1116 DTRACE_SKYWALK1(wake__parent, ifnet_t, difp);
1117 }
1118 difp->if_delegate_parent = parent;
1119 lck_mtx_unlock(&difp->if_delegate_lock);
1120 return 0;
1121 }
1122
1123 int
ifnet_get_delegate_parent(ifnet_t difp,ifnet_t * parentp)1124 ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parentp)
1125 {
1126 lck_mtx_lock(&difp->if_delegate_lock);
1127 if (difp->if_delegate_parent == NULL) {
1128 lck_mtx_unlock(&difp->if_delegate_lock);
1129 return ENOENT;
1130 }
1131 *parentp = difp->if_delegate_parent;
1132 difp->if_delegate_parent_ref++;
1133 lck_mtx_unlock(&difp->if_delegate_lock);
1134 return 0;
1135 }
1136
1137 void
ifnet_release_delegate_parent(ifnet_t difp)1138 ifnet_release_delegate_parent(ifnet_t difp)
1139 {
1140 lck_mtx_lock(&difp->if_delegate_lock);
1141 if (--difp->if_delegate_parent_ref == 0) {
1142 wakeup(&difp->if_delegate_parent_ref);
1143 }
1144 lck_mtx_unlock(&difp->if_delegate_lock);
1145 }
1146
1147 __attribute__((noinline))
1148 void
ifnet_set_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1149 ifnet_set_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1150 {
1151 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1152 ifp->if_detach_notify = notify;
1153 ifp->if_detach_notify_arg = arg;
1154 }
1155
1156 __attribute__((noinline))
1157 void
ifnet_get_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1158 ifnet_get_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1159 {
1160 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1161 *notifyp = ifp->if_detach_notify;
1162 *argp = ifp->if_detach_notify_arg;
1163 }
1164
1165 __attribute__((noinline))
1166 void
ifnet_set_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1167 ifnet_set_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1168 {
1169 ifnet_lock_exclusive(ifp);
1170 ifnet_set_detach_notify_locked(ifp, notify, arg);
1171 ifnet_lock_done(ifp);
1172 }
1173
1174 __attribute__((noinline))
1175 void
ifnet_get_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1176 ifnet_get_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1177 {
1178 ifnet_lock_exclusive(ifp);
1179 ifnet_get_detach_notify_locked(ifp, notifyp, argp);
1180 ifnet_lock_done(ifp);
1181 }
1182 #endif /* SKYWALK */
1183
1184 #define DLIL_INPUT_CHECK(m, ifp) { \
1185 ifnet_ref_t _rcvif = mbuf_pkthdr_rcvif(m); \
1186 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
1187 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
1188 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
1189 /* NOTREACHED */ \
1190 } \
1191 }
1192
1193 #define MBPS (1ULL * 1000 * 1000)
1194 #define GBPS (MBPS * 1000)
1195
1196 struct rxpoll_time_tbl {
1197 u_int64_t speed; /* downlink speed */
1198 u_int32_t plowat; /* packets low watermark */
1199 u_int32_t phiwat; /* packets high watermark */
1200 u_int32_t blowat; /* bytes low watermark */
1201 u_int32_t bhiwat; /* bytes high watermark */
1202 };
1203
1204 static struct rxpoll_time_tbl rxpoll_tbl[] = {
1205 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
1206 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1207 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1208 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1209 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1210 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
1211 };
1212
1213 int
proto_hash_value(u_int32_t protocol_family)1214 proto_hash_value(u_int32_t protocol_family)
1215 {
1216 /*
1217 * dlil_proto_unplumb_all() depends on the mapping between
1218 * the hash bucket index and the protocol family defined
1219 * here; future changes must be applied there as well.
1220 */
1221 switch (protocol_family) {
1222 case PF_INET:
1223 return 0;
1224 case PF_INET6:
1225 return 1;
1226 case PF_VLAN:
1227 return 2;
1228 case PF_UNSPEC:
1229 default:
1230 return 3;
1231 }
1232 }
1233
1234 __private_extern__ int
dlil_post_msg(struct ifnet * ifp,u_int32_t event_subclass,u_int32_t event_code,struct net_event_data * event_data,u_int32_t event_data_len,boolean_t suppress_generation)1235 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1236 u_int32_t event_code, struct net_event_data *event_data,
1237 u_int32_t event_data_len, boolean_t suppress_generation)
1238 {
1239 struct net_event_data ev_data;
1240 struct kev_msg ev_msg;
1241
1242 bzero(&ev_msg, sizeof(ev_msg));
1243 bzero(&ev_data, sizeof(ev_data));
1244 /*
1245 * a net event always starts with a net_event_data structure
1246 * but the caller can generate a simple net event or
1247 * provide a longer event structure to post
1248 */
1249 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1250 ev_msg.kev_class = KEV_NETWORK_CLASS;
1251 ev_msg.kev_subclass = event_subclass;
1252 ev_msg.event_code = event_code;
1253
1254 if (event_data == NULL) {
1255 event_data = &ev_data;
1256 event_data_len = sizeof(struct net_event_data);
1257 }
1258
1259 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1260 event_data->if_family = ifp->if_family;
1261 event_data->if_unit = (u_int32_t)ifp->if_unit;
1262
1263 ev_msg.dv[0].data_length = event_data_len;
1264 ev_msg.dv[0].data_ptr = event_data;
1265 ev_msg.dv[1].data_length = 0;
1266
1267 bool update_generation = true;
1268 if (event_subclass == KEV_DL_SUBCLASS) {
1269 /* Don't update interface generation for frequent link quality and state changes */
1270 switch (event_code) {
1271 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1272 case KEV_DL_RRC_STATE_CHANGED:
1273 case KEV_DL_PRIMARY_ELECTED:
1274 update_generation = false;
1275 break;
1276 default:
1277 break;
1278 }
1279 }
1280
1281 /*
1282 * Some events that update generation counts might
1283 * want to suppress generation count.
1284 * One example is node presence/absence where we still
1285 * issue kernel event for the invocation but want to avoid
1286 * expensive operation of updating generation which triggers
1287 * NECP client updates.
1288 */
1289 if (suppress_generation) {
1290 update_generation = false;
1291 }
1292
1293 return dlil_event_internal(ifp, &ev_msg, update_generation);
1294 }
1295
1296 static void
dlil_reset_rxpoll_params(ifnet_t ifp)1297 dlil_reset_rxpoll_params(ifnet_t ifp)
1298 {
1299 ASSERT(ifp != NULL);
1300 ifnet_set_poll_cycle(ifp, NULL);
1301 ifp->if_poll_update = 0;
1302 ifp->if_poll_flags = 0;
1303 ifp->if_poll_req = 0;
1304 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1305 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1306 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1307 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1308 net_timerclear(&ifp->if_poll_mode_holdtime);
1309 net_timerclear(&ifp->if_poll_mode_lasttime);
1310 net_timerclear(&ifp->if_poll_sample_holdtime);
1311 net_timerclear(&ifp->if_poll_sample_lasttime);
1312 net_timerclear(&ifp->if_poll_dbg_lasttime);
1313 }
1314
1315
1316 #if SKYWALK
1317 static void
dlil_filter_event(struct eventhandler_entry_arg arg __unused,enum net_filter_event_subsystems state)1318 dlil_filter_event(struct eventhandler_entry_arg arg __unused,
1319 enum net_filter_event_subsystems state)
1320 {
1321 evhlog(debug, "%s: eventhandler saw event type=net_filter_event_state event_code=0x%d",
1322 __func__, state);
1323
1324 bool old_if_enable_fsw_transport_netagent = if_enable_fsw_transport_netagent;
1325 if ((state & ~NET_FILTER_EVENT_PF_PRIVATE_PROXY) == 0) {
1326 if_enable_fsw_transport_netagent = 1;
1327 } else {
1328 if_enable_fsw_transport_netagent = 0;
1329 }
1330 if (old_if_enable_fsw_transport_netagent != if_enable_fsw_transport_netagent) {
1331 kern_nexus_update_netagents();
1332 } else if (!if_enable_fsw_transport_netagent) {
1333 necp_update_all_clients();
1334 }
1335 }
1336 #endif /* SKYWALK */
1337
1338 void
dlil_init(void)1339 dlil_init(void)
1340 {
1341 thread_t __single thread = THREAD_NULL;
1342
1343 dlil_main_input_thread = (struct dlil_threading_info *) &dlil_main_input_thread_info;
1344
1345 /*
1346 * The following fields must be 64-bit aligned for atomic operations.
1347 */
1348 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1349 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1350 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1351 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1352 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1353 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1354 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1355 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1356 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1357 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1358 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1359 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1360 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1361 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1362 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1363
1364 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1365 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1366 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1367 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1368 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1369 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1370 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1371 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1372 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1373 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1374 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1375 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1376 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1377 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1378 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1379
1380 /*
1381 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1382 */
1383 static_assert(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1384 static_assert(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1385 static_assert(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1386 static_assert(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1387 static_assert(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1388 static_assert(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1389 static_assert(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1390 static_assert(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1391 static_assert(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1392 static_assert(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1393 static_assert(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1394 static_assert(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1395 static_assert(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1396 static_assert(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1397
1398 /*
1399 * ... as well as the mbuf checksum flags counterparts.
1400 */
1401 static_assert(CSUM_IP == IF_HWASSIST_CSUM_IP);
1402 static_assert(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1403 static_assert(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1404 static_assert(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1405 static_assert(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1406 static_assert(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1407 static_assert(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1408 static_assert(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1409 static_assert(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1410 static_assert(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1411 static_assert(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1412
1413 /*
1414 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1415 */
1416 static_assert(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1417 static_assert(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1418
1419 static_assert(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1420 static_assert(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1421 static_assert(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1422 static_assert(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1423
1424 static_assert(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1425 static_assert(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1426 static_assert(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1427
1428 static_assert(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1429 static_assert(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1430 static_assert(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1431 static_assert(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1432 static_assert(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1433 static_assert(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1434 static_assert(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1435 static_assert(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1436 static_assert(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1437 static_assert(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1438 static_assert(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1439 static_assert(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1440 static_assert(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1441 static_assert(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1442 static_assert(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1443 static_assert(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1444 static_assert(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1445 static_assert(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1446
1447 static_assert(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1448 static_assert(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1449 static_assert(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1450 static_assert(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1451 static_assert(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1452 static_assert(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1453 static_assert(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1454 static_assert(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1455 static_assert(IFRTYPE_SUBFAMILY_VMNET == IFNET_SUBFAMILY_VMNET);
1456 static_assert(IFRTYPE_SUBFAMILY_SIMCELL == IFNET_SUBFAMILY_SIMCELL);
1457 static_assert(IFRTYPE_SUBFAMILY_MANAGEMENT == IFNET_SUBFAMILY_MANAGEMENT);
1458
1459 static_assert(DLIL_MODIDLEN == IFNET_MODIDLEN);
1460 static_assert(DLIL_MODARGLEN == IFNET_MODARGLEN);
1461
1462 PE_parse_boot_argn("net_affinity", &net_affinity,
1463 sizeof(net_affinity));
1464
1465 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1466
1467 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1468
1469 PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1470
1471 PE_parse_boot_argn("if_link_heuristics", &if_link_heuristics_flags, sizeof(if_link_heuristics_flags));
1472
1473 VERIFY(dlil_pending_thread_cnt == 0);
1474 #if SKYWALK
1475 boolean_t pe_enable_fsw_transport_netagent = FALSE;
1476 boolean_t pe_disable_fsw_transport_netagent = FALSE;
1477 boolean_t enable_fsw_netagent =
1478 (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) ||
1479 (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
1480
1481 /*
1482 * Check the device tree to see if Skywalk netagent has been explicitly
1483 * enabled or disabled. This can be overridden via if_attach_nx below.
1484 * Note that the property is a 0-length key, and so checking for the
1485 * presence itself is enough (no need to check for the actual value of
1486 * the retrieved variable.)
1487 */
1488 pe_enable_fsw_transport_netagent =
1489 PE_get_default("kern.skywalk_netagent_enable",
1490 &pe_enable_fsw_transport_netagent,
1491 sizeof(pe_enable_fsw_transport_netagent));
1492 pe_disable_fsw_transport_netagent =
1493 PE_get_default("kern.skywalk_netagent_disable",
1494 &pe_disable_fsw_transport_netagent,
1495 sizeof(pe_disable_fsw_transport_netagent));
1496
1497 /*
1498 * These two are mutually exclusive, i.e. they both can be absent,
1499 * but only one can be present at a time, and so we assert to make
1500 * sure it is correct.
1501 */
1502 VERIFY((!pe_enable_fsw_transport_netagent &&
1503 !pe_disable_fsw_transport_netagent) ||
1504 (pe_enable_fsw_transport_netagent ^
1505 pe_disable_fsw_transport_netagent));
1506
1507 if (pe_enable_fsw_transport_netagent) {
1508 kprintf("SK: netagent is enabled via an override for "
1509 "this platform\n");
1510 if_attach_nx = SKYWALK_NETWORKING_ENABLED;
1511 } else if (pe_disable_fsw_transport_netagent) {
1512 kprintf("SK: netagent is disabled via an override for "
1513 "this platform\n");
1514 if_attach_nx = SKYWALK_NETWORKING_DISABLED;
1515 } else {
1516 kprintf("SK: netagent is %s by default for this platform\n",
1517 (enable_fsw_netagent ? "enabled" : "disabled"));
1518 if_attach_nx = IF_ATTACH_NX_DEFAULT;
1519 }
1520
1521 /*
1522 * Now see if there's a boot-arg override.
1523 */
1524 (void) PE_parse_boot_argn("if_attach_nx", &if_attach_nx,
1525 sizeof(if_attach_nx));
1526 if_enable_fsw_transport_netagent =
1527 ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
1528
1529 if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0);
1530
1531 if (pe_disable_fsw_transport_netagent &&
1532 if_enable_fsw_transport_netagent) {
1533 kprintf("SK: netagent is force-enabled\n");
1534 } else if (!pe_disable_fsw_transport_netagent &&
1535 !if_enable_fsw_transport_netagent) {
1536 kprintf("SK: netagent is force-disabled\n");
1537 }
1538 if (kernel_is_macos_or_server() && if_enable_fsw_transport_netagent) {
1539 net_filter_event_register(dlil_filter_event);
1540 }
1541
1542 #if (DEVELOPMENT || DEBUG)
1543 (void) PE_parse_boot_argn("fsw_use_max_mtu_buffer",
1544 &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer));
1545 #endif /* (DEVELOPMENT || DEBUG) */
1546
1547 #endif /* SKYWALK */
1548
1549 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1550
1551 TAILQ_INIT(&dlil_ifnet_head);
1552 TAILQ_INIT(&ifnet_head);
1553 TAILQ_INIT(&ifnet_detaching_head);
1554 TAILQ_INIT(&ifnet_ordered_head);
1555
1556 /* Initialize interface address subsystem */
1557 ifa_init();
1558
1559 #if PF
1560 /* Initialize the packet filter */
1561 pfinit();
1562 #endif /* PF */
1563
1564 /* Initialize queue algorithms */
1565 classq_init();
1566
1567 /* Initialize packet schedulers */
1568 pktsched_init();
1569
1570 /* Initialize flow advisory subsystem */
1571 flowadv_init();
1572
1573 /* Initialize the pktap virtual interface */
1574 pktap_init();
1575
1576 /* Initialize droptap interface */
1577 droptap_init();
1578
1579 /* Initialize the service class to dscp map */
1580 net_qos_map_init();
1581
1582 /* Initialize the interface low power mode event handler */
1583 if_low_power_evhdlr_init();
1584
1585 /* Initialize the interface offload port list subsystem */
1586 if_ports_used_init();
1587
1588 #if DEBUG || DEVELOPMENT
1589 /* Run self-tests */
1590 dlil_verify_sum16();
1591 #endif /* DEBUG || DEVELOPMENT */
1592
1593 /*
1594 * Create and start up the main DLIL input thread and the interface
1595 * detacher threads once everything is initialized.
1596 */
1597 dlil_incr_pending_thread_count();
1598 (void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1599
1600 /*
1601 * Create ifnet detacher thread.
1602 * When an interface gets detached, part of the detach processing
1603 * is delayed. The interface is added to delayed detach list
1604 * and this thread is woken up to call ifnet_detach_final
1605 * on these interfaces.
1606 */
1607 dlil_incr_pending_thread_count();
1608 if (kernel_thread_start(ifnet_detacher_thread_func,
1609 NULL, &thread) != KERN_SUCCESS) {
1610 panic_plain("%s: couldn't create detacher thread", __func__);
1611 /* NOTREACHED */
1612 }
1613 thread_deallocate(thread);
1614
1615 /*
1616 * Wait for the created kernel threads for dlil to get
1617 * scheduled and run at least once before we proceed
1618 */
1619 lck_mtx_lock(&dlil_thread_sync_lock);
1620 while (dlil_pending_thread_cnt != 0) {
1621 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1622 "threads to get scheduled at least once.\n", __func__);
1623 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1624 (PZERO - 1), __func__, NULL);
1625 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1626 }
1627 lck_mtx_unlock(&dlil_thread_sync_lock);
1628 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1629 "scheduled at least once. Proceeding.\n", __func__);
1630 }
1631
1632 __private_extern__ int
dlil_attach_filter(struct ifnet * ifp,const struct iff_filter * if_filter,interface_filter_t * filter_ref,u_int32_t flags)1633 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1634 interface_filter_t *filter_ref, u_int32_t flags)
1635 {
1636 int retval = 0;
1637 struct ifnet_filter *filter = NULL;
1638
1639 ifnet_head_lock_shared();
1640
1641 /* Check that the interface is in the global list */
1642 if (!ifnet_lookup(ifp)) {
1643 retval = ENXIO;
1644 goto done;
1645 }
1646 if (!ifnet_get_ioref(ifp)) {
1647 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
1648 __func__, if_name(ifp));
1649 retval = ENXIO;
1650 goto done;
1651 }
1652
1653 filter = dlif_filt_alloc();
1654 /* refcnt held above during lookup */
1655 filter->filt_flags = flags;
1656 filter->filt_ifp = ifp;
1657 filter->filt_cookie = if_filter->iff_cookie;
1658 filter->filt_name = if_filter->iff_name;
1659 filter->filt_protocol = if_filter->iff_protocol;
1660 /*
1661 * Do not install filter callbacks for internal coproc interface
1662 * and for management interfaces
1663 */
1664 if (!IFNET_IS_INTCOPROC(ifp) && !IFNET_IS_MANAGEMENT(ifp)) {
1665 filter->filt_input = if_filter->iff_input;
1666 filter->filt_output = if_filter->iff_output;
1667 filter->filt_event = if_filter->iff_event;
1668 filter->filt_ioctl = if_filter->iff_ioctl;
1669 }
1670 filter->filt_detached = if_filter->iff_detached;
1671
1672 lck_mtx_lock(&ifp->if_flt_lock);
1673 if_flt_monitor_enter(ifp);
1674
1675 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1676 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1677
1678 *filter_ref = filter;
1679
1680 /*
1681 * Bump filter count and route_generation ID to let TCP
1682 * know it shouldn't do TSO on this connection
1683 */
1684 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1685 ifnet_filter_update_tso(ifp, TRUE);
1686 }
1687 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1688 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1689 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1690 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_os_count);
1691 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1692 } else {
1693 OSAddAtomic(1, &ifp->if_flt_non_os_count);
1694 }
1695 if_flt_monitor_leave(ifp);
1696 lck_mtx_unlock(&ifp->if_flt_lock);
1697
1698 #if SKYWALK
1699 if (kernel_is_macos_or_server()) {
1700 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1701 net_check_compatible_if_filter(NULL));
1702 }
1703 #endif /* SKYWALK */
1704
1705 if (dlil_verbose) {
1706 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1707 if_filter->iff_name);
1708 }
1709 ifnet_decr_iorefcnt(ifp);
1710
1711 done:
1712 ifnet_head_done();
1713 if (retval != 0 && ifp != NULL) {
1714 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1715 if_name(ifp), if_filter->iff_name, retval);
1716 }
1717 if (retval != 0 && filter != NULL) {
1718 dlif_filt_free(filter);
1719 }
1720
1721 return retval;
1722 }
1723
1724 static int
dlil_detach_filter_internal(interface_filter_t filter,int detached)1725 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1726 {
1727 int retval = 0;
1728
1729 if (detached == 0) {
1730 ifnet_ref_t ifp = NULL;
1731
1732 ifnet_head_lock_shared();
1733 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1734 interface_filter_t entry = NULL;
1735
1736 lck_mtx_lock(&ifp->if_flt_lock);
1737 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1738 if (entry != filter || entry->filt_skip) {
1739 continue;
1740 }
1741 /*
1742 * We've found a match; since it's possible
1743 * that the thread gets blocked in the monitor,
1744 * we do the lock dance. Interface should
1745 * not be detached since we still have a use
1746 * count held during filter attach.
1747 */
1748 entry->filt_skip = 1; /* skip input/output */
1749 lck_mtx_unlock(&ifp->if_flt_lock);
1750 ifnet_head_done();
1751
1752 lck_mtx_lock(&ifp->if_flt_lock);
1753 if_flt_monitor_enter(ifp);
1754 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1755 LCK_MTX_ASSERT_OWNED);
1756
1757 /* Remove the filter from the list */
1758 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1759 filt_next);
1760
1761 if (dlil_verbose) {
1762 DLIL_PRINTF("%s: %s filter detached\n",
1763 if_name(ifp), filter->filt_name);
1764 }
1765 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1766 VERIFY(ifp->if_flt_non_os_count != 0);
1767 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1768 }
1769 /*
1770 * Decrease filter count and route_generation
1771 * ID to let TCP know it should reevalute doing
1772 * TSO or not.
1773 */
1774 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1775 ifnet_filter_update_tso(ifp, FALSE);
1776 }
1777 /*
1778 * When we remove the bridge's interface filter,
1779 * clear the field in the ifnet.
1780 */
1781 if ((filter->filt_flags & DLIL_IFF_BRIDGE)
1782 != 0) {
1783 ifp->if_bridge = NULL;
1784 }
1785 if_flt_monitor_leave(ifp);
1786 lck_mtx_unlock(&ifp->if_flt_lock);
1787 goto destroy;
1788 }
1789 lck_mtx_unlock(&ifp->if_flt_lock);
1790 }
1791 ifnet_head_done();
1792
1793 /* filter parameter is not a valid filter ref */
1794 retval = EINVAL;
1795 goto done;
1796 } else {
1797 ifnet_ref_t ifp = filter->filt_ifp;
1798 /*
1799 * Here we are called from ifnet_detach_final(); the
1800 * caller had emptied if_flt_head and we're doing an
1801 * implicit filter detach because the interface is
1802 * about to go away. Make sure to adjust the counters
1803 * in this case. We don't need the protection of the
1804 * filter monitor since we're called as part of the
1805 * final detach in the context of the detacher thread.
1806 */
1807 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1808 VERIFY(ifp->if_flt_non_os_count != 0);
1809 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1810 }
1811 /*
1812 * Decrease filter count and route_generation
1813 * ID to let TCP know it should reevalute doing
1814 * TSO or not.
1815 */
1816 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1817 ifnet_filter_update_tso(ifp, FALSE);
1818 }
1819 }
1820
1821 if (dlil_verbose) {
1822 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
1823 }
1824
1825 destroy:
1826
1827 /* Call the detached function if there is one */
1828 if (filter->filt_detached) {
1829 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1830 }
1831
1832 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1833 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1834 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0);
1835 }
1836 #if SKYWALK
1837 if (kernel_is_macos_or_server()) {
1838 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1839 net_check_compatible_if_filter(NULL));
1840 }
1841 #endif /* SKYWALK */
1842
1843 /* Free the filter */
1844 dlif_filt_free(filter);
1845 filter = NULL;
1846 done:
1847 if (retval != 0 && filter != NULL) {
1848 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1849 filter->filt_name, retval);
1850 }
1851
1852 return retval;
1853 }
1854
1855 __private_extern__ void
dlil_detach_filter(interface_filter_t filter)1856 dlil_detach_filter(interface_filter_t filter)
1857 {
1858 if (filter == NULL) {
1859 return;
1860 }
1861 dlil_detach_filter_internal(filter, 0);
1862 }
1863
1864 __private_extern__ boolean_t
dlil_has_ip_filter(void)1865 dlil_has_ip_filter(void)
1866 {
1867 boolean_t has_filter = ((net_api_stats.nas_ipf_add_count - net_api_stats.nas_ipf_add_os_count) > 0);
1868
1869 VERIFY(net_api_stats.nas_ipf_add_count >= net_api_stats.nas_ipf_add_os_count);
1870
1871 DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
1872 return has_filter;
1873 }
1874
1875 __private_extern__ boolean_t
dlil_has_if_filter(struct ifnet * ifp)1876 dlil_has_if_filter(struct ifnet *ifp)
1877 {
1878 boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
1879 DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
1880 return has_filter;
1881 }
1882
1883 errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params * p)1884 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
1885 {
1886 if (p != NULL) {
1887 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
1888 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
1889 return EINVAL;
1890 }
1891 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
1892 p->packets_lowat >= p->packets_hiwat) {
1893 return EINVAL;
1894 }
1895 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
1896 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
1897 return EINVAL;
1898 }
1899 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
1900 p->bytes_lowat >= p->bytes_hiwat) {
1901 return EINVAL;
1902 }
1903 if (p->interval_time != 0 &&
1904 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
1905 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
1906 }
1907 }
1908 return 0;
1909 }
1910
1911 void
dlil_rxpoll_update_params(struct ifnet * ifp,struct ifnet_poll_params * p)1912 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
1913 {
1914 u_int64_t sample_holdtime, inbw;
1915
1916 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
1917 sample_holdtime = 0; /* polling is disabled */
1918 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
1919 ifp->if_rxpoll_blowat = 0;
1920 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
1921 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
1922 ifp->if_rxpoll_plim = 0;
1923 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
1924 } else {
1925 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
1926 u_int64_t ival;
1927 unsigned int n, i;
1928
1929 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
1930 if (inbw < rxpoll_tbl[i].speed) {
1931 break;
1932 }
1933 n = i;
1934 }
1935 /* auto-tune if caller didn't specify a value */
1936 plowat = ((p == NULL || p->packets_lowat == 0) ?
1937 rxpoll_tbl[n].plowat : p->packets_lowat);
1938 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
1939 rxpoll_tbl[n].phiwat : p->packets_hiwat);
1940 blowat = ((p == NULL || p->bytes_lowat == 0) ?
1941 rxpoll_tbl[n].blowat : p->bytes_lowat);
1942 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
1943 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
1944 plim = ((p == NULL || p->packets_limit == 0 ||
1945 if_rxpoll_max != 0) ? if_rxpoll_max : p->packets_limit);
1946 ival = ((p == NULL || p->interval_time == 0 ||
1947 if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ?
1948 if_rxpoll_interval_time : p->interval_time);
1949
1950 VERIFY(plowat != 0 && phiwat != 0);
1951 VERIFY(blowat != 0 && bhiwat != 0);
1952 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
1953
1954 sample_holdtime = if_rxpoll_sample_holdtime;
1955 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
1956 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
1957 ifp->if_rxpoll_plowat = plowat;
1958 ifp->if_rxpoll_phiwat = phiwat;
1959 ifp->if_rxpoll_blowat = blowat;
1960 ifp->if_rxpoll_bhiwat = bhiwat;
1961 ifp->if_rxpoll_plim = plim;
1962 ifp->if_rxpoll_ival = ival;
1963 }
1964
1965 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
1966 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
1967
1968 if (dlil_verbose) {
1969 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
1970 "poll interval %llu nsec, pkts per poll %u, "
1971 "pkt limits [%u/%u], wreq limits [%u/%u], "
1972 "bytes limits [%u/%u]\n", if_name(ifp),
1973 inbw, sample_holdtime, ifp->if_rxpoll_ival,
1974 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
1975 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
1976 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
1977 ifp->if_rxpoll_bhiwat);
1978 }
1979 }
1980
1981 /*
1982 * Must be called on an attached ifnet (caller is expected to check.)
1983 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
1984 */
1985 errno_t
dlil_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)1986 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
1987 boolean_t locked)
1988 {
1989 errno_t err;
1990 struct dlil_threading_info *inp;
1991
1992 VERIFY(ifp != NULL);
1993 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
1994 return ENXIO;
1995 }
1996 err = dlil_rxpoll_validate_params(p);
1997 if (err != 0) {
1998 return err;
1999 }
2000
2001 if (!locked) {
2002 lck_mtx_lock(&inp->dlth_lock);
2003 }
2004 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2005 /*
2006 * Normally, we'd reset the parameters to the auto-tuned values
2007 * if the the input thread detects a change in link rate. If the
2008 * driver provides its own parameters right after a link rate
2009 * changes, but before the input thread gets to run, we want to
2010 * make sure to keep the driver's values. Clearing if_poll_update
2011 * will achieve that.
2012 */
2013 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2014 ifp->if_poll_update = 0;
2015 }
2016 dlil_rxpoll_update_params(ifp, p);
2017 if (!locked) {
2018 lck_mtx_unlock(&inp->dlth_lock);
2019 }
2020 return 0;
2021 }
2022
2023 /*
2024 * Must be called on an attached ifnet (caller is expected to check.)
2025 */
2026 errno_t
dlil_rxpoll_get_params(struct ifnet * ifp,struct ifnet_poll_params * p)2027 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2028 {
2029 struct dlil_threading_info *inp;
2030
2031 VERIFY(ifp != NULL && p != NULL);
2032 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2033 return ENXIO;
2034 }
2035
2036 bzero(p, sizeof(*p));
2037
2038 lck_mtx_lock(&inp->dlth_lock);
2039 p->packets_limit = ifp->if_rxpoll_plim;
2040 p->packets_lowat = ifp->if_rxpoll_plowat;
2041 p->packets_hiwat = ifp->if_rxpoll_phiwat;
2042 p->bytes_lowat = ifp->if_rxpoll_blowat;
2043 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2044 p->interval_time = ifp->if_rxpoll_ival;
2045 lck_mtx_unlock(&inp->dlth_lock);
2046
2047 return 0;
2048 }
2049
2050 errno_t
ifnet_input(struct ifnet * ifp,struct mbuf * m_head,const struct ifnet_stat_increment_param * s)2051 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2052 const struct ifnet_stat_increment_param *s)
2053 {
2054 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2055 }
2056
2057 errno_t
ifnet_input_extended(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2058 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2059 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2060 {
2061 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2062 }
2063
2064 errno_t
ifnet_input_poll(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2065 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2066 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2067 {
2068 return ifnet_input_common(ifp, m_head, m_tail, s,
2069 (m_head != NULL), TRUE);
2070 }
2071
2072 static errno_t
ifnet_input_common(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t ext,boolean_t poll)2073 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2074 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2075 {
2076 dlil_input_func input_func;
2077 struct ifnet_stat_increment_param _s;
2078 u_int32_t m_cnt = 0, m_size = 0;
2079 struct mbuf *last;
2080 errno_t err = 0;
2081
2082 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2083 if (m_head != NULL) {
2084 mbuf_freem_list(m_head);
2085 }
2086 return EINVAL;
2087 }
2088
2089 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2090 VERIFY(m_tail == NULL || ext);
2091 VERIFY(s != NULL || !ext);
2092
2093 /*
2094 * Drop the packet(s) if the parameters are invalid, or if the
2095 * interface is no longer attached; else hold an IO refcnt to
2096 * prevent it from being detached (will be released below.)
2097 */
2098 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2099 if (m_head != NULL) {
2100 mbuf_freem_list(m_head);
2101 }
2102 return EINVAL;
2103 }
2104
2105 input_func = ifp->if_input_dlil;
2106 VERIFY(input_func != NULL);
2107
2108 if (m_tail == NULL) {
2109 last = m_head;
2110 while (m_head != NULL) {
2111 m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2112 #if IFNET_INPUT_SANITY_CHK
2113 if (__improbable(dlil_input_sanity_check != 0)) {
2114 DLIL_INPUT_CHECK(last, ifp);
2115 }
2116 #endif /* IFNET_INPUT_SANITY_CHK */
2117 m_cnt++;
2118 m_size += m_length(last);
2119 if (mbuf_nextpkt(last) == NULL) {
2120 break;
2121 }
2122 last = mbuf_nextpkt(last);
2123 }
2124 m_tail = last;
2125 } else {
2126 #if IFNET_INPUT_SANITY_CHK
2127 if (__improbable(dlil_input_sanity_check != 0)) {
2128 last = m_head;
2129 while (1) {
2130 m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2131 DLIL_INPUT_CHECK(last, ifp);
2132 m_cnt++;
2133 m_size += m_length(last);
2134 if (mbuf_nextpkt(last) == NULL) {
2135 break;
2136 }
2137 last = mbuf_nextpkt(last);
2138 }
2139 } else {
2140 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2141 m_cnt = s->packets_in;
2142 m_size = s->bytes_in;
2143 last = m_tail;
2144 }
2145 #else
2146 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2147 m_cnt = s->packets_in;
2148 m_size = s->bytes_in;
2149 last = m_tail;
2150 #endif /* IFNET_INPUT_SANITY_CHK */
2151 }
2152
2153 if (last != m_tail) {
2154 panic_plain("%s: invalid input packet chain for %s, "
2155 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2156 m_tail, last);
2157 }
2158
2159 /*
2160 * Assert packet count only for the extended variant, for backwards
2161 * compatibility, since this came directly from the device driver.
2162 * Relax this assertion for input bytes, as the driver may have
2163 * included the link-layer headers in the computation; hence
2164 * m_size is just an approximation.
2165 */
2166 if (ext && s->packets_in != m_cnt) {
2167 panic_plain("%s: input packet count mismatch for %s, "
2168 "%d instead of %d\n", __func__, if_name(ifp),
2169 s->packets_in, m_cnt);
2170 }
2171
2172 if (s == NULL) {
2173 bzero(&_s, sizeof(_s));
2174 s = &_s;
2175 } else {
2176 _s = *s;
2177 }
2178 _s.packets_in = m_cnt;
2179 _s.bytes_in = m_size;
2180
2181 if (ifp->if_xflags & IFXF_DISABLE_INPUT) {
2182 m_freem_list(m_head);
2183
2184 os_atomic_add(&ifp->if_data.ifi_ipackets, _s.packets_in, relaxed);
2185 os_atomic_add(&ifp->if_data.ifi_ibytes, _s.bytes_in, relaxed);
2186
2187 goto done;
2188 }
2189
2190 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2191
2192 done:
2193 if (ifp != lo_ifp) {
2194 /* Release the IO refcnt */
2195 ifnet_datamov_end(ifp);
2196 }
2197
2198 return err;
2199 }
2200
2201
2202 static void
ifnet_start_common(struct ifnet * ifp,boolean_t resetfc,boolean_t ignore_delay)2203 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc, boolean_t ignore_delay)
2204 {
2205 if (!(ifp->if_eflags & IFEF_TXSTART)) {
2206 return;
2207 }
2208 /*
2209 * If the starter thread is inactive, signal it to do work,
2210 * unless the interface is being flow controlled from below,
2211 * e.g. a virtual interface being flow controlled by a real
2212 * network interface beneath it, or it's been disabled via
2213 * a call to ifnet_disable_output().
2214 */
2215 lck_mtx_lock_spin(&ifp->if_start_lock);
2216 if (ignore_delay) {
2217 ifp->if_start_flags |= IFSF_NO_DELAY;
2218 }
2219 if (resetfc) {
2220 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2221 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2222 lck_mtx_unlock(&ifp->if_start_lock);
2223 return;
2224 }
2225 ifp->if_start_req++;
2226 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2227 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2228 IFCQ_LEN(ifp->if_snd) >= ifp->if_start_delay_qlen ||
2229 ifp->if_start_delayed == 0)) {
2230 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
2231 }
2232 lck_mtx_unlock(&ifp->if_start_lock);
2233 }
2234
2235 void
ifnet_start(struct ifnet * ifp)2236 ifnet_start(struct ifnet *ifp)
2237 {
2238 ifnet_start_common(ifp, FALSE, FALSE);
2239 }
2240
2241 void
ifnet_start_ignore_delay(struct ifnet * ifp)2242 ifnet_start_ignore_delay(struct ifnet *ifp)
2243 {
2244 ifnet_start_common(ifp, FALSE, TRUE);
2245 }
2246
2247 __attribute__((noreturn))
2248 static void
ifnet_start_thread_func(void * v,wait_result_t w)2249 ifnet_start_thread_func(void *v, wait_result_t w)
2250 {
2251 #pragma unused(w)
2252 ifnet_ref_t ifp = v;
2253 char thread_name[MAXTHREADNAMESIZE];
2254
2255 /* Construct the name for this thread, and then apply it. */
2256 bzero(thread_name, sizeof(thread_name));
2257 (void) snprintf(thread_name, sizeof(thread_name),
2258 "ifnet_start_%s", ifp->if_xname);
2259 #if SKYWALK
2260 /* override name for native Skywalk interface */
2261 if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
2262 (void) snprintf(thread_name, sizeof(thread_name),
2263 "skywalk_doorbell_%s_tx", ifp->if_xname);
2264 }
2265 #endif /* SKYWALK */
2266 ASSERT(ifp->if_start_thread == current_thread());
2267 thread_set_thread_name(current_thread(), __unsafe_null_terminated_from_indexable(thread_name));
2268
2269 #if CONFIG_THREAD_GROUPS
2270 if (IFNET_REQUIRES_CELL_GROUP(ifp)) {
2271 thread_group_join_cellular();
2272 }
2273 #endif
2274
2275 /*
2276 * Treat the dedicated starter thread for lo0 as equivalent to
2277 * the driver workloop thread; if net_affinity is enabled for
2278 * the main input thread, associate this starter thread to it
2279 * by binding them with the same affinity tag. This is done
2280 * only once (as we only have one lo_ifp which never goes away.)
2281 */
2282 if (ifp == lo_ifp) {
2283 struct dlil_threading_info *inp = dlil_main_input_thread;
2284 struct thread *__single tp = current_thread();
2285 #if SKYWALK
2286 /* native skywalk loopback not yet implemented */
2287 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2288 #endif /* SKYWALK */
2289
2290 lck_mtx_lock(&inp->dlth_lock);
2291 if (inp->dlth_affinity) {
2292 u_int32_t tag = inp->dlth_affinity_tag;
2293
2294 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
2295 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2296 inp->dlth_driver_thread = tp;
2297 lck_mtx_unlock(&inp->dlth_lock);
2298
2299 /* Associate this thread with the affinity tag */
2300 (void) dlil_affinity_set(tp, tag);
2301 } else {
2302 lck_mtx_unlock(&inp->dlth_lock);
2303 }
2304 }
2305
2306 lck_mtx_lock(&ifp->if_start_lock);
2307 VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
2308 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
2309 ifp->if_start_embryonic = 1;
2310 /* wake up once to get out of embryonic state */
2311 ifp->if_start_req++;
2312 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
2313 lck_mtx_unlock(&ifp->if_start_lock);
2314 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2315 /* NOTREACHED */
2316 __builtin_unreachable();
2317 }
2318
2319 __attribute__((noreturn))
2320 static void
ifnet_start_thread_cont(void * v,wait_result_t wres)2321 ifnet_start_thread_cont(void *v, wait_result_t wres)
2322 {
2323 ifnet_ref_t ifp = v;
2324 struct ifclassq *ifq = ifp->if_snd;
2325
2326 lck_mtx_lock_spin(&ifp->if_start_lock);
2327 if (__improbable(wres == THREAD_INTERRUPTED ||
2328 (ifp->if_start_flags & IFSF_TERMINATING) != 0)) {
2329 goto terminate;
2330 }
2331
2332 if (__improbable(ifp->if_start_embryonic)) {
2333 ifp->if_start_embryonic = 0;
2334 lck_mtx_unlock(&ifp->if_start_lock);
2335 ifnet_decr_pending_thread_count(ifp);
2336 lck_mtx_lock_spin(&ifp->if_start_lock);
2337 goto skip;
2338 }
2339
2340 ifp->if_start_active = 1;
2341
2342 /*
2343 * Keep on servicing until no more request.
2344 */
2345 for (;;) {
2346 u_int32_t req = ifp->if_start_req;
2347 if ((ifp->if_start_flags & IFSF_NO_DELAY) == 0 &&
2348 !IFCQ_IS_EMPTY(ifq) &&
2349 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2350 ifp->if_start_delayed == 0 &&
2351 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2352 (ifp->if_eflags & IFEF_DELAY_START)) {
2353 ifp->if_start_delayed = 1;
2354 ifnet_start_delayed++;
2355 break;
2356 }
2357 ifp->if_start_flags &= ~IFSF_NO_DELAY;
2358 ifp->if_start_delayed = 0;
2359 lck_mtx_unlock(&ifp->if_start_lock);
2360
2361 /*
2362 * If no longer attached, don't call start because ifp
2363 * is being destroyed; else hold an IO refcnt to
2364 * prevent the interface from being detached (will be
2365 * released below.)
2366 */
2367 if (!ifnet_datamov_begin(ifp)) {
2368 lck_mtx_lock_spin(&ifp->if_start_lock);
2369 break;
2370 }
2371
2372 /* invoke the driver's start routine */
2373 ((*ifp->if_start)(ifp));
2374
2375 /*
2376 * Release the io ref count taken above.
2377 */
2378 ifnet_datamov_end(ifp);
2379
2380 lck_mtx_lock_spin(&ifp->if_start_lock);
2381
2382 /*
2383 * If there's no pending request or if the
2384 * interface has been disabled, we're done.
2385 */
2386 #define _IFSF_DISABLED (IFSF_FLOW_CONTROLLED | IFSF_TERMINATING)
2387 if (req == ifp->if_start_req ||
2388 (ifp->if_start_flags & _IFSF_DISABLED) != 0) {
2389 break;
2390 }
2391 }
2392 skip:
2393 ifp->if_start_req = 0;
2394 ifp->if_start_active = 0;
2395
2396 #if SKYWALK
2397 /*
2398 * Wakeup any waiters, e.g. any threads waiting to
2399 * detach the interface from the flowswitch, etc.
2400 */
2401 if (ifp->if_start_waiters != 0) {
2402 ifp->if_start_waiters = 0;
2403 wakeup(&ifp->if_start_waiters);
2404 }
2405 #endif /* SKYWALK */
2406 if (__probable((ifp->if_start_flags & IFSF_TERMINATING) == 0)) {
2407 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2408 struct timespec delay_start_ts;
2409 struct timespec *ts = NULL;
2410
2411 if (ts == NULL) {
2412 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2413 &ifp->if_start_cycle : NULL);
2414 }
2415
2416 if (ts == NULL && ifp->if_start_delayed == 1) {
2417 delay_start_ts.tv_sec = 0;
2418 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2419 ts = &delay_start_ts;
2420 }
2421
2422 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
2423 ts = NULL;
2424 }
2425
2426 if (__improbable(ts != NULL)) {
2427 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2428 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2429 }
2430
2431 (void) assert_wait_deadline(&ifp->if_start_thread,
2432 THREAD_UNINT, deadline);
2433 lck_mtx_unlock(&ifp->if_start_lock);
2434 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2435 /* NOTREACHED */
2436 } else {
2437 terminate:
2438 /* interface is detached? */
2439 ifnet_set_start_cycle(ifp, NULL);
2440
2441 /* clear if_start_thread to allow termination to continue */
2442 ASSERT(ifp->if_start_thread != THREAD_NULL);
2443 ifp->if_start_thread = THREAD_NULL;
2444 wakeup((caddr_t)&ifp->if_start_thread);
2445 lck_mtx_unlock(&ifp->if_start_lock);
2446
2447 if (dlil_verbose) {
2448 DLIL_PRINTF("%s: starter thread terminated\n",
2449 if_name(ifp));
2450 }
2451
2452 /* for the extra refcnt from kernel_thread_start() */
2453 thread_deallocate(current_thread());
2454 /* this is the end */
2455 thread_terminate(current_thread());
2456 /* NOTREACHED */
2457 }
2458
2459 /* must never get here */
2460 VERIFY(0);
2461 /* NOTREACHED */
2462 __builtin_unreachable();
2463 }
2464
2465 void
ifnet_set_start_cycle(struct ifnet * ifp,struct timespec * ts)2466 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2467 {
2468 if (ts == NULL) {
2469 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
2470 } else {
2471 *(&ifp->if_start_cycle) = *ts;
2472 }
2473
2474 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2475 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
2476 if_name(ifp), ts->tv_nsec);
2477 }
2478 }
2479
2480 static inline void
ifnet_poll_wakeup(struct ifnet * ifp)2481 ifnet_poll_wakeup(struct ifnet *ifp)
2482 {
2483 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
2484
2485 ifp->if_poll_req++;
2486 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
2487 ifp->if_poll_thread != THREAD_NULL) {
2488 wakeup_one((caddr_t)&ifp->if_poll_thread);
2489 }
2490 }
2491
2492 void
ifnet_poll(struct ifnet * ifp)2493 ifnet_poll(struct ifnet *ifp)
2494 {
2495 /*
2496 * If the poller thread is inactive, signal it to do work.
2497 */
2498 lck_mtx_lock_spin(&ifp->if_poll_lock);
2499 ifnet_poll_wakeup(ifp);
2500 lck_mtx_unlock(&ifp->if_poll_lock);
2501 }
2502
2503 __attribute__((noreturn))
2504 static void
ifnet_poll_thread_func(void * v,wait_result_t w)2505 ifnet_poll_thread_func(void *v, wait_result_t w)
2506 {
2507 #pragma unused(w)
2508 char thread_name[MAXTHREADNAMESIZE];
2509 ifnet_ref_t ifp = v;
2510
2511 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2512 VERIFY(current_thread() == ifp->if_poll_thread);
2513
2514 /* construct the name for this thread, and then apply it */
2515 bzero(thread_name, sizeof(thread_name));
2516 (void) snprintf(thread_name, sizeof(thread_name),
2517 "ifnet_poller_%s", ifp->if_xname);
2518 thread_set_thread_name(ifp->if_poll_thread, __unsafe_null_terminated_from_indexable(thread_name));
2519
2520 lck_mtx_lock(&ifp->if_poll_lock);
2521 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
2522 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
2523 ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
2524 /* wake up once to get out of embryonic state */
2525 ifnet_poll_wakeup(ifp);
2526 lck_mtx_unlock(&ifp->if_poll_lock);
2527 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2528 /* NOTREACHED */
2529 __builtin_unreachable();
2530 }
2531
2532 __attribute__((noreturn))
2533 static void
ifnet_poll_thread_cont(void * v,wait_result_t wres)2534 ifnet_poll_thread_cont(void *v, wait_result_t wres)
2535 {
2536 struct dlil_threading_info *inp;
2537 ifnet_ref_t ifp = v;
2538 struct ifnet_stat_increment_param s;
2539 struct timespec start_time;
2540
2541 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2542
2543 bzero(&s, sizeof(s));
2544 net_timerclear(&start_time);
2545
2546 lck_mtx_lock_spin(&ifp->if_poll_lock);
2547 if (__improbable(wres == THREAD_INTERRUPTED ||
2548 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
2549 goto terminate;
2550 }
2551
2552 inp = ifp->if_inp;
2553 VERIFY(inp != NULL);
2554
2555 if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
2556 ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
2557 lck_mtx_unlock(&ifp->if_poll_lock);
2558 ifnet_decr_pending_thread_count(ifp);
2559 lck_mtx_lock_spin(&ifp->if_poll_lock);
2560 goto skip;
2561 }
2562
2563 ifp->if_poll_flags |= IF_POLLF_RUNNING;
2564
2565 /*
2566 * Keep on servicing until no more request.
2567 */
2568 for (;;) {
2569 mbuf_ref_t m_head, m_tail;
2570 u_int32_t m_lim, m_cnt, m_totlen;
2571 u_int16_t req = ifp->if_poll_req;
2572
2573 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
2574 MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
2575 lck_mtx_unlock(&ifp->if_poll_lock);
2576
2577 /*
2578 * If no longer attached, there's nothing to do;
2579 * else hold an IO refcnt to prevent the interface
2580 * from being detached (will be released below.)
2581 */
2582 if (!ifnet_get_ioref(ifp)) {
2583 lck_mtx_lock_spin(&ifp->if_poll_lock);
2584 break;
2585 }
2586
2587 if (dlil_verbose > 1) {
2588 DLIL_PRINTF("%s: polling up to %d pkts, "
2589 "pkts avg %d max %d, wreq avg %d, "
2590 "bytes avg %d\n",
2591 if_name(ifp), m_lim,
2592 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2593 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2594 }
2595
2596 /* invoke the driver's input poll routine */
2597 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2598 &m_cnt, &m_totlen));
2599
2600 if (m_head != NULL) {
2601 VERIFY(m_tail != NULL && m_cnt > 0);
2602
2603 if (dlil_verbose > 1) {
2604 DLIL_PRINTF("%s: polled %d pkts, "
2605 "pkts avg %d max %d, wreq avg %d, "
2606 "bytes avg %d\n",
2607 if_name(ifp), m_cnt,
2608 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2609 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2610 }
2611
2612 /* stats are required for extended variant */
2613 s.packets_in = m_cnt;
2614 s.bytes_in = m_totlen;
2615
2616 (void) ifnet_input_common(ifp, m_head, m_tail,
2617 &s, TRUE, TRUE);
2618 } else {
2619 if (dlil_verbose > 1) {
2620 DLIL_PRINTF("%s: no packets, "
2621 "pkts avg %d max %d, wreq avg %d, "
2622 "bytes avg %d\n",
2623 if_name(ifp), ifp->if_rxpoll_pavg,
2624 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
2625 ifp->if_rxpoll_bavg);
2626 }
2627
2628 (void) ifnet_input_common(ifp, NULL, NULL,
2629 NULL, FALSE, TRUE);
2630 }
2631
2632 /* Release the io ref count */
2633 ifnet_decr_iorefcnt(ifp);
2634
2635 lck_mtx_lock_spin(&ifp->if_poll_lock);
2636
2637 /* if there's no pending request, we're done */
2638 if (req == ifp->if_poll_req ||
2639 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0) {
2640 break;
2641 }
2642 }
2643 skip:
2644 ifp->if_poll_req = 0;
2645 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
2646
2647 if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
2648 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2649 struct timespec *ts;
2650
2651 /*
2652 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2653 * until ifnet_poll() is called again.
2654 */
2655 ts = &ifp->if_poll_cycle;
2656 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
2657 ts = NULL;
2658 }
2659
2660 if (ts != NULL) {
2661 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2662 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2663 }
2664
2665 (void) assert_wait_deadline(&ifp->if_poll_thread,
2666 THREAD_UNINT, deadline);
2667 lck_mtx_unlock(&ifp->if_poll_lock);
2668 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2669 /* NOTREACHED */
2670 } else {
2671 terminate:
2672 /* interface is detached (maybe while asleep)? */
2673 ifnet_set_poll_cycle(ifp, NULL);
2674
2675 /* clear if_poll_thread to allow termination to continue */
2676 ASSERT(ifp->if_poll_thread != THREAD_NULL);
2677 ifp->if_poll_thread = THREAD_NULL;
2678 wakeup((caddr_t)&ifp->if_poll_thread);
2679 lck_mtx_unlock(&ifp->if_poll_lock);
2680
2681 if (dlil_verbose) {
2682 DLIL_PRINTF("%s: poller thread terminated\n",
2683 if_name(ifp));
2684 }
2685
2686 /* for the extra refcnt from kernel_thread_start() */
2687 thread_deallocate(current_thread());
2688 /* this is the end */
2689 thread_terminate(current_thread());
2690 /* NOTREACHED */
2691 }
2692
2693 /* must never get here */
2694 VERIFY(0);
2695 /* NOTREACHED */
2696 __builtin_unreachable();
2697 }
2698
2699 void
ifnet_set_poll_cycle(struct ifnet * ifp,struct timespec * ts)2700 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2701 {
2702 if (ts == NULL) {
2703 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
2704 } else {
2705 *(&ifp->if_poll_cycle) = *ts;
2706 }
2707
2708 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2709 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
2710 if_name(ifp), ts->tv_nsec);
2711 }
2712 }
2713
2714 void
ifnet_purge(struct ifnet * ifp)2715 ifnet_purge(struct ifnet *ifp)
2716 {
2717 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
2718 if_qflush(ifp, ifp->if_snd);
2719 }
2720 }
2721
2722 void
ifnet_update_rcv(struct ifnet * ifp,cqev_t ev)2723 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2724 {
2725 switch (ev) {
2726 case CLASSQ_EV_LINK_BANDWIDTH:
2727 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
2728 ifp->if_poll_update++;
2729 }
2730 break;
2731
2732 default:
2733 break;
2734 }
2735 }
2736
2737 errno_t
ifnet_set_output_sched_model(struct ifnet * ifp,u_int32_t model)2738 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2739 {
2740 return ifclassq_change(ifp->if_snd, model);
2741 }
2742
2743 errno_t
ifnet_set_sndq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2744 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2745 {
2746 if (ifp == NULL) {
2747 return EINVAL;
2748 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2749 return ENXIO;
2750 }
2751
2752 ifclassq_set_maxlen(ifp->if_snd, maxqlen);
2753
2754 return 0;
2755 }
2756
2757 errno_t
ifnet_get_sndq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2758 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2759 {
2760 if (ifp == NULL || maxqlen == NULL) {
2761 return EINVAL;
2762 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2763 return ENXIO;
2764 }
2765
2766 *maxqlen = ifclassq_get_maxlen(ifp->if_snd);
2767
2768 return 0;
2769 }
2770
2771 errno_t
ifnet_get_sndq_len(struct ifnet * ifp,u_int32_t * pkts)2772 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
2773 {
2774 errno_t err;
2775
2776 if (ifp == NULL || pkts == NULL) {
2777 err = EINVAL;
2778 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2779 err = ENXIO;
2780 } else {
2781 err = ifclassq_get_len(ifp->if_snd, MBUF_SC_UNSPEC,
2782 IF_CLASSQ_ALL_GRPS, pkts, NULL);
2783 }
2784
2785 return err;
2786 }
2787
2788 errno_t
ifnet_get_service_class_sndq_len(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t * pkts,u_int32_t * bytes)2789 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2790 u_int32_t *pkts, u_int32_t *bytes)
2791 {
2792 errno_t err;
2793
2794 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2795 (pkts == NULL && bytes == NULL)) {
2796 err = EINVAL;
2797 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2798 err = ENXIO;
2799 } else {
2800 err = ifclassq_get_len(ifp->if_snd, sc, IF_CLASSQ_ALL_GRPS,
2801 pkts, bytes);
2802 }
2803
2804 return err;
2805 }
2806
2807 errno_t
ifnet_set_rcvq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2808 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2809 {
2810 struct dlil_threading_info *inp;
2811
2812 if (ifp == NULL) {
2813 return EINVAL;
2814 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2815 return ENXIO;
2816 }
2817
2818 if (maxqlen == 0) {
2819 maxqlen = if_rcvq_maxlen;
2820 } else if (maxqlen < IF_RCVQ_MINLEN) {
2821 maxqlen = IF_RCVQ_MINLEN;
2822 }
2823
2824 inp = ifp->if_inp;
2825 lck_mtx_lock(&inp->dlth_lock);
2826 qlimit(&inp->dlth_pkts) = maxqlen;
2827 lck_mtx_unlock(&inp->dlth_lock);
2828
2829 return 0;
2830 }
2831
2832 errno_t
ifnet_get_rcvq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2833 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2834 {
2835 struct dlil_threading_info *inp;
2836
2837 if (ifp == NULL || maxqlen == NULL) {
2838 return EINVAL;
2839 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2840 return ENXIO;
2841 }
2842
2843 inp = ifp->if_inp;
2844 lck_mtx_lock(&inp->dlth_lock);
2845 *maxqlen = qlimit(&inp->dlth_pkts);
2846 lck_mtx_unlock(&inp->dlth_lock);
2847 return 0;
2848 }
2849
2850 void
ifnet_enqueue_multi_setup(struct ifnet * ifp,uint16_t delay_qlen,uint16_t delay_timeout)2851 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
2852 uint16_t delay_timeout)
2853 {
2854 if (delay_qlen > 0 && delay_timeout > 0) {
2855 if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
2856 ifp->if_start_delay_qlen = MIN(100, delay_qlen);
2857 ifp->if_start_delay_timeout = min(20000, delay_timeout);
2858 /* convert timeout to nanoseconds */
2859 ifp->if_start_delay_timeout *= 1000;
2860 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
2861 ifp->if_xname, (uint32_t)delay_qlen,
2862 (uint32_t)delay_timeout);
2863 } else {
2864 if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
2865 }
2866 }
2867
2868 /*
2869 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
2870 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
2871 * buf holds the full header.
2872 */
2873 static __attribute__((noinline)) void
ifnet_mcast_clear_dscp(uint8_t * __indexable buf,uint8_t ip_ver)2874 ifnet_mcast_clear_dscp(uint8_t *__indexable buf, uint8_t ip_ver)
2875 {
2876 struct ip *ip;
2877 struct ip6_hdr *ip6;
2878 uint8_t lbuf[64] __attribute__((aligned(8)));
2879 uint8_t *p = buf;
2880
2881 if (ip_ver == IPVERSION) {
2882 uint8_t old_tos;
2883 uint32_t sum;
2884
2885 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2886 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
2887 bcopy(buf, lbuf, sizeof(struct ip));
2888 p = lbuf;
2889 }
2890 ip = (struct ip *)(void *)p;
2891 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
2892 return;
2893 }
2894
2895 DTRACE_IP1(clear__v4, struct ip *, ip);
2896 old_tos = ip->ip_tos;
2897 ip->ip_tos &= IPTOS_ECN_MASK;
2898 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
2899 sum = (sum >> 16) + (sum & 0xffff);
2900 ip->ip_sum = (uint16_t)(sum & 0xffff);
2901
2902 if (__improbable(p == lbuf)) {
2903 bcopy(lbuf, buf, sizeof(struct ip));
2904 }
2905 } else {
2906 uint32_t flow;
2907 ASSERT(ip_ver == IPV6_VERSION);
2908
2909 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2910 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
2911 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
2912 p = lbuf;
2913 }
2914 ip6 = (struct ip6_hdr *)(void *)p;
2915 flow = ntohl(ip6->ip6_flow);
2916 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
2917 return;
2918 }
2919
2920 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
2921 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
2922
2923 if (__improbable(p == lbuf)) {
2924 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
2925 }
2926 }
2927 }
2928
2929 static inline errno_t
ifnet_enqueue_single(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * p,boolean_t flush,boolean_t * pdrop)2930 ifnet_enqueue_single(struct ifnet *ifp, struct ifclassq *ifcq,
2931 classq_pkt_t *p, boolean_t flush, boolean_t *pdrop)
2932 {
2933 #if SKYWALK
2934 volatile struct sk_nexusadv *nxadv = NULL;
2935 #endif /* SKYWALK */
2936 volatile uint64_t *fg_ts = NULL;
2937 volatile uint64_t *rt_ts = NULL;
2938 struct timespec now;
2939 u_int64_t now_nsec = 0;
2940 int error = 0;
2941 uint8_t *mcast_buf = NULL;
2942 uint8_t ip_ver;
2943 uint32_t pktlen;
2944
2945 ASSERT(ifp->if_eflags & IFEF_TXSTART);
2946 #if SKYWALK
2947 /*
2948 * If attached to flowswitch, grab pointers to the
2949 * timestamp variables in the nexus advisory region.
2950 */
2951 if ((ifp->if_capabilities & IFCAP_SKYWALK) && ifp->if_na != NULL &&
2952 (nxadv = ifp->if_na->nifna_netif->nif_fsw_nxadv) != NULL) {
2953 fg_ts = &nxadv->nxadv_fg_sendts;
2954 rt_ts = &nxadv->nxadv_rt_sendts;
2955 }
2956 #endif /* SKYWALK */
2957
2958 /*
2959 * If packet already carries a timestamp, either from dlil_output()
2960 * or from flowswitch, use it here. Otherwise, record timestamp.
2961 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
2962 * the timestamp value is used internally there.
2963 */
2964 switch (p->cp_ptype) {
2965 case QP_MBUF:
2966 #if SKYWALK
2967 /*
2968 * Valid only for non-native (compat) Skywalk interface.
2969 * If the data source uses packet, caller must convert
2970 * it to mbuf first prior to calling this routine.
2971 */
2972 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2973 #endif /* SKYWALK */
2974 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
2975 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
2976
2977 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
2978 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
2979 nanouptime(&now);
2980 net_timernsec(&now, &now_nsec);
2981 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
2982 }
2983 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
2984 /*
2985 * If the packet service class is not background,
2986 * update the timestamp to indicate recent activity
2987 * on a foreground socket.
2988 */
2989 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
2990 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
2991 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
2992 PKTF_SO_BACKGROUND)) {
2993 ifp->if_fg_sendts = (uint32_t)net_uptime();
2994 if (fg_ts != NULL) {
2995 *fg_ts = (uint32_t)net_uptime();
2996 }
2997 }
2998 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
2999 ifp->if_rt_sendts = (uint32_t)net_uptime();
3000 if (rt_ts != NULL) {
3001 *rt_ts = (uint32_t)net_uptime();
3002 }
3003 }
3004 }
3005 pktlen = m_pktlen(p->cp_mbuf);
3006
3007 /*
3008 * Some Wi-Fi AP implementations do not correctly handle
3009 * multicast IP packets with DSCP bits set (radr://9331522).
3010 * As a workaround we clear the DSCP bits but keep service
3011 * class (rdar://51507725).
3012 */
3013 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3014 IFNET_IS_WIFI_INFRA(ifp)) {
3015 size_t len = mbuf_len(p->cp_mbuf), hlen;
3016 struct ether_header *eh;
3017 boolean_t pullup = FALSE;
3018 uint16_t etype;
3019
3020 if (__improbable(len < sizeof(struct ether_header))) {
3021 DTRACE_IP1(small__ether, size_t, len);
3022 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3023 sizeof(struct ether_header))) == NULL) {
3024 return ENOMEM;
3025 }
3026 }
3027 eh = mtod(p->cp_mbuf, struct ether_header *);
3028 etype = ntohs(eh->ether_type);
3029 if (etype == ETHERTYPE_IP) {
3030 hlen = sizeof(struct ether_header) +
3031 sizeof(struct ip);
3032 if (len < hlen) {
3033 DTRACE_IP1(small__v4, size_t, len);
3034 pullup = TRUE;
3035 }
3036 ip_ver = IPVERSION;
3037 } else if (etype == ETHERTYPE_IPV6) {
3038 hlen = sizeof(struct ether_header) +
3039 sizeof(struct ip6_hdr);
3040 if (len < hlen) {
3041 DTRACE_IP1(small__v6, size_t, len);
3042 pullup = TRUE;
3043 }
3044 ip_ver = IPV6_VERSION;
3045 } else {
3046 DTRACE_IP1(invalid__etype, uint16_t, etype);
3047 break;
3048 }
3049 if (pullup) {
3050 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3051 NULL) {
3052 return ENOMEM;
3053 }
3054
3055 eh = mtod(p->cp_mbuf, struct ether_header *);
3056 }
3057 mcast_buf = (uint8_t *)(eh + 1);
3058 /*
3059 * ifnet_mcast_clear_dscp() will finish the work below.
3060 * Note that the pullups above ensure that mcast_buf
3061 * points to a full IP header.
3062 */
3063 }
3064 break;
3065
3066 #if SKYWALK
3067 case QP_PACKET:
3068 /*
3069 * Valid only for native Skywalk interface. If the data
3070 * source uses mbuf, caller must convert it to packet first
3071 * prior to calling this routine.
3072 */
3073 ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
3074 if (!(p->cp_kpkt->pkt_pflags & PKT_F_TS_VALID) ||
3075 p->cp_kpkt->pkt_timestamp == 0) {
3076 nanouptime(&now);
3077 net_timernsec(&now, &now_nsec);
3078 p->cp_kpkt->pkt_timestamp = now_nsec;
3079 }
3080 p->cp_kpkt->pkt_pflags &= ~PKT_F_TS_VALID;
3081 /*
3082 * If the packet service class is not background,
3083 * update the timestamps on the interface, as well as
3084 * the ones in nexus-wide advisory to indicate recent
3085 * activity on a foreground flow.
3086 */
3087 if (!(p->cp_kpkt->pkt_pflags & PKT_F_BACKGROUND)) {
3088 ifp->if_fg_sendts = (uint32_t)net_uptime();
3089 if (fg_ts != NULL) {
3090 *fg_ts = (uint32_t)net_uptime();
3091 }
3092 }
3093 if (p->cp_kpkt->pkt_pflags & PKT_F_REALTIME) {
3094 ifp->if_rt_sendts = (uint32_t)net_uptime();
3095 if (rt_ts != NULL) {
3096 *rt_ts = (uint32_t)net_uptime();
3097 }
3098 }
3099 pktlen = p->cp_kpkt->pkt_length;
3100
3101 /*
3102 * Some Wi-Fi AP implementations do not correctly handle
3103 * multicast IP packets with DSCP bits set (radr://9331522).
3104 * As a workaround we clear the DSCP bits but keep service
3105 * class (rdar://51507725).
3106 */
3107 if ((p->cp_kpkt->pkt_link_flags & PKT_LINKF_MCAST) != 0 &&
3108 IFNET_IS_WIFI_INFRA(ifp)) {
3109 uint8_t *baddr;
3110 struct ether_header *eh;
3111 uint16_t etype;
3112
3113 MD_BUFLET_ADDR_ABS(p->cp_kpkt, baddr);
3114 baddr += p->cp_kpkt->pkt_headroom;
3115 if (__improbable(pktlen < sizeof(struct ether_header))) {
3116 DTRACE_IP1(pkt__small__ether, __kern_packet *,
3117 p->cp_kpkt);
3118 break;
3119 }
3120 eh = (struct ether_header *)(void *)baddr;
3121 etype = ntohs(eh->ether_type);
3122 if (etype == ETHERTYPE_IP) {
3123 if (pktlen < sizeof(struct ether_header) +
3124 sizeof(struct ip)) {
3125 DTRACE_IP1(pkt__small__v4, uint32_t,
3126 pktlen);
3127 break;
3128 }
3129 ip_ver = IPVERSION;
3130 } else if (etype == ETHERTYPE_IPV6) {
3131 if (pktlen < sizeof(struct ether_header) +
3132 sizeof(struct ip6_hdr)) {
3133 DTRACE_IP1(pkt__small__v6, uint32_t,
3134 pktlen);
3135 break;
3136 }
3137 ip_ver = IPV6_VERSION;
3138 } else {
3139 DTRACE_IP1(pkt__invalid__etype, uint16_t,
3140 etype);
3141 break;
3142 }
3143 mcast_buf = (uint8_t *)(eh + 1);
3144 /*
3145 * ifnet_mcast_clear_dscp() will finish the work below.
3146 * The checks above verify that the IP header is in the
3147 * first buflet.
3148 */
3149 }
3150 break;
3151 #endif /* SKYWALK */
3152
3153 default:
3154 VERIFY(0);
3155 /* NOTREACHED */
3156 __builtin_unreachable();
3157 }
3158
3159 if (mcast_buf != NULL) {
3160 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
3161 }
3162
3163 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3164 if (now_nsec == 0) {
3165 nanouptime(&now);
3166 net_timernsec(&now, &now_nsec);
3167 }
3168 /*
3169 * If the driver chose to delay start callback for
3170 * coalescing multiple packets, Then use the following
3171 * heuristics to make sure that start callback will
3172 * be delayed only when bulk data transfer is detected.
3173 * 1. number of packets enqueued in (delay_win * 2) is
3174 * greater than or equal to the delay qlen.
3175 * 2. If delay_start is enabled it will stay enabled for
3176 * another 10 idle windows. This is to take into account
3177 * variable RTT and burst traffic.
3178 * 3. If the time elapsed since last enqueue is more
3179 * than 200ms we disable delaying start callback. This is
3180 * is to take idle time into account.
3181 */
3182 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3183 if (ifp->if_start_delay_swin > 0) {
3184 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3185 ifp->if_start_delay_cnt++;
3186 } else if ((now_nsec - ifp->if_start_delay_swin)
3187 >= (200 * 1000 * 1000)) {
3188 ifp->if_start_delay_swin = now_nsec;
3189 ifp->if_start_delay_cnt = 1;
3190 ifp->if_start_delay_idle = 0;
3191 if (ifp->if_eflags & IFEF_DELAY_START) {
3192 if_clear_eflags(ifp, IFEF_DELAY_START);
3193 ifnet_delay_start_disabled_increment();
3194 }
3195 } else {
3196 if (ifp->if_start_delay_cnt >=
3197 ifp->if_start_delay_qlen) {
3198 if_set_eflags(ifp, IFEF_DELAY_START);
3199 ifp->if_start_delay_idle = 0;
3200 } else {
3201 if (ifp->if_start_delay_idle >= 10) {
3202 if_clear_eflags(ifp,
3203 IFEF_DELAY_START);
3204 ifnet_delay_start_disabled_increment();
3205 } else {
3206 ifp->if_start_delay_idle++;
3207 }
3208 }
3209 ifp->if_start_delay_swin = now_nsec;
3210 ifp->if_start_delay_cnt = 1;
3211 }
3212 } else {
3213 ifp->if_start_delay_swin = now_nsec;
3214 ifp->if_start_delay_cnt = 1;
3215 ifp->if_start_delay_idle = 0;
3216 if_clear_eflags(ifp, IFEF_DELAY_START);
3217 }
3218 } else {
3219 if_clear_eflags(ifp, IFEF_DELAY_START);
3220 }
3221
3222 /* enqueue the packet (caller consumes object) */
3223 error = ifclassq_enqueue(((ifcq != NULL) ? ifcq : ifp->if_snd), p, p,
3224 1, pktlen, pdrop);
3225
3226 /*
3227 * Tell the driver to start dequeueing; do this even when the queue
3228 * for the packet is suspended (EQSUSPENDED), as the driver could still
3229 * be dequeueing from other unsuspended queues.
3230 */
3231 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3232 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3233 ifnet_start(ifp);
3234 }
3235
3236 return error;
3237 }
3238
3239 static inline errno_t
ifnet_enqueue_chain(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3240 ifnet_enqueue_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3241 classq_pkt_t *head, classq_pkt_t *tail, uint32_t cnt, uint32_t bytes,
3242 boolean_t flush, boolean_t *pdrop)
3243 {
3244 int error;
3245
3246 /* enqueue the packet (caller consumes object) */
3247 error = ifclassq_enqueue(ifcq != NULL ? ifcq : ifp->if_snd, head, tail,
3248 cnt, bytes, pdrop);
3249
3250 /*
3251 * Tell the driver to start dequeueing; do this even when the queue
3252 * for the packet is suspended (EQSUSPENDED), as the driver could still
3253 * be dequeueing from other unsuspended queues.
3254 */
3255 if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
3256 ifnet_start(ifp);
3257 }
3258 return error;
3259 }
3260
3261 int
ifnet_enqueue_netem(void * handle,pktsched_pkt_t * __sized_by (n_pkts)pkts,uint32_t n_pkts)3262 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *__sized_by(n_pkts)pkts, uint32_t n_pkts)
3263 {
3264 ifnet_ref_t ifp = handle;
3265 boolean_t pdrop; /* dummy */
3266 uint32_t i;
3267
3268 ASSERT(n_pkts >= 1);
3269 for (i = 0; i < n_pkts - 1; i++) {
3270 (void) ifnet_enqueue_single(ifp, ifp->if_snd, &pkts[i].pktsched_pkt,
3271 FALSE, &pdrop);
3272 }
3273 /* flush with the last packet */
3274 (void) ifnet_enqueue_single(ifp, ifp->if_snd, &pkts[i].pktsched_pkt,
3275 TRUE, &pdrop);
3276
3277 return 0;
3278 }
3279
3280 static inline errno_t
ifnet_enqueue_common_single(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * pkt,boolean_t flush,boolean_t * pdrop)3281 ifnet_enqueue_common_single(struct ifnet *ifp, struct ifclassq *ifcq,
3282 classq_pkt_t *pkt, boolean_t flush, boolean_t *pdrop)
3283 {
3284 if (ifp->if_output_netem != NULL) {
3285 bool drop;
3286 errno_t error;
3287 error = netem_enqueue(ifp->if_output_netem, pkt, &drop);
3288 *pdrop = drop ? TRUE : FALSE;
3289 return error;
3290 } else {
3291 return ifnet_enqueue_single(ifp, ifcq, pkt, flush, pdrop);
3292 }
3293 }
3294
3295 errno_t
ifnet_enqueue(struct ifnet * ifp,struct mbuf * m)3296 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3297 {
3298 uint32_t bytes = m_pktlen(m);
3299 struct mbuf *tail = m;
3300 uint32_t cnt = 1;
3301 boolean_t pdrop;
3302
3303 while (tail->m_nextpkt) {
3304 VERIFY(tail->m_flags & M_PKTHDR);
3305 tail = tail->m_nextpkt;
3306 cnt++;
3307 bytes += m_pktlen(tail);
3308 }
3309
3310 return ifnet_enqueue_mbuf_chain(ifp, m, tail, cnt, bytes, TRUE, &pdrop);
3311 }
3312
3313 errno_t
ifnet_enqueue_mbuf(struct ifnet * ifp,struct mbuf * m,boolean_t flush,boolean_t * pdrop)3314 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3315 boolean_t *pdrop)
3316 {
3317 classq_pkt_t pkt;
3318
3319 m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
3320 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3321 m->m_nextpkt != NULL) {
3322 if (m != NULL) {
3323 m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_INVALID, NULL, 0);
3324 *pdrop = TRUE;
3325 }
3326 return EINVAL;
3327 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3328 !ifnet_is_fully_attached(ifp)) {
3329 /* flag tested without lock for performance */
3330 m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3331 *pdrop = TRUE;
3332 return ENXIO;
3333 } else if (!(ifp->if_flags & IFF_UP)) {
3334 m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3335 *pdrop = TRUE;
3336 return ENETDOWN;
3337 }
3338
3339 CLASSQ_PKT_INIT_MBUF(&pkt, m);
3340 return ifnet_enqueue_common_single(ifp, NULL, &pkt, flush, pdrop);
3341 }
3342
3343 errno_t
ifnet_enqueue_mbuf_chain(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3344 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
3345 struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3346 boolean_t *pdrop)
3347 {
3348 classq_pkt_t head, tail;
3349
3350 m_add_hdr_crumb_interface_output(m_head, ifp->if_index, true);
3351 ASSERT(m_head != NULL);
3352 ASSERT((m_head->m_flags & M_PKTHDR) != 0);
3353 ASSERT(m_tail != NULL);
3354 ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
3355 ASSERT(ifp != NULL);
3356 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3357
3358 if (!ifnet_is_fully_attached(ifp)) {
3359 /* flag tested without lock for performance */
3360 m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3361 *pdrop = TRUE;
3362 return ENXIO;
3363 } else if (!(ifp->if_flags & IFF_UP)) {
3364 m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3365 *pdrop = TRUE;
3366 return ENETDOWN;
3367 }
3368
3369 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3370 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3371 return ifnet_enqueue_chain(ifp, NULL, &head, &tail, cnt, bytes,
3372 flush, pdrop);
3373 }
3374
3375 #if SKYWALK
3376 errno_t
ifnet_enqueue_pkt(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3377 ifnet_enqueue_pkt(struct ifnet *ifp, struct ifclassq *ifcq,
3378 struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3379 {
3380 classq_pkt_t pkt;
3381
3382 ASSERT(kpkt == NULL || kpkt->pkt_nextpkt == NULL);
3383
3384 if (__improbable(ifp == NULL || kpkt == NULL)) {
3385 if (kpkt != NULL) {
3386 pp_free_packet(__DECONST(struct kern_pbufpool *,
3387 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3388 *pdrop = TRUE;
3389 }
3390 return EINVAL;
3391 } else if (__improbable(!(ifp->if_eflags & IFEF_TXSTART) ||
3392 !ifnet_is_fully_attached(ifp))) {
3393 /* flag tested without lock for performance */
3394 pp_free_packet(__DECONST(struct kern_pbufpool *,
3395 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3396 *pdrop = TRUE;
3397 return ENXIO;
3398 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3399 pp_free_packet(__DECONST(struct kern_pbufpool *,
3400 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3401 *pdrop = TRUE;
3402 return ENETDOWN;
3403 }
3404
3405 CLASSQ_PKT_INIT_PACKET(&pkt, kpkt);
3406 return ifnet_enqueue_common_single(ifp, ifcq, &pkt, flush, pdrop);
3407 }
3408
3409 errno_t
ifnet_enqueue_pkt_chain(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3410 ifnet_enqueue_pkt_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3411 struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3412 uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3413 {
3414 classq_pkt_t head, tail;
3415
3416 ASSERT(k_head != NULL);
3417 ASSERT(k_tail != NULL);
3418 ASSERT(ifp != NULL);
3419 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3420
3421 if (!ifnet_is_fully_attached(ifp)) {
3422 /* flag tested without lock for performance */
3423 pp_free_packet_chain(k_head, NULL);
3424 *pdrop = TRUE;
3425 return ENXIO;
3426 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3427 pp_free_packet_chain(k_head, NULL);
3428 *pdrop = TRUE;
3429 return ENETDOWN;
3430 }
3431
3432 CLASSQ_PKT_INIT_PACKET(&head, k_head);
3433 CLASSQ_PKT_INIT_PACKET(&tail, k_tail);
3434 return ifnet_enqueue_chain(ifp, ifcq, &head, &tail, cnt, bytes,
3435 flush, pdrop);
3436 }
3437 #endif /* SKYWALK */
3438
3439 errno_t
ifnet_dequeue(struct ifnet * ifp,struct mbuf ** mp)3440 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3441 {
3442 errno_t rc;
3443 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3444
3445 if (ifp == NULL || mp == NULL) {
3446 return EINVAL;
3447 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3448 !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3449 return ENXIO;
3450 }
3451 if (!ifnet_get_ioref(ifp)) {
3452 return ENXIO;
3453 }
3454
3455 #if SKYWALK
3456 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3457 #endif /* SKYWALK */
3458 rc = ifclassq_dequeue(ifp->if_snd, MBUF_SC_UNSPEC, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3459 &pkt, NULL, NULL, NULL, 0);
3460 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3461 ifnet_decr_iorefcnt(ifp);
3462 *mp = pkt.cp_mbuf;
3463 m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3464 return rc;
3465 }
3466
3467 errno_t
ifnet_dequeue_service_class(struct ifnet * ifp,mbuf_svc_class_t sc,struct mbuf ** mp)3468 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3469 struct mbuf **mp)
3470 {
3471 errno_t rc;
3472 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3473
3474 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
3475 return EINVAL;
3476 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3477 !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3478 return ENXIO;
3479 }
3480 if (!ifnet_get_ioref(ifp)) {
3481 return ENXIO;
3482 }
3483
3484 #if SKYWALK
3485 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3486 #endif /* SKYWALK */
3487 rc = ifclassq_dequeue(ifp->if_snd, sc, 1,
3488 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL, 0);
3489 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3490 ifnet_decr_iorefcnt(ifp);
3491 *mp = pkt.cp_mbuf;
3492 m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3493 return rc;
3494 }
3495
3496 errno_t
ifnet_dequeue_multi(struct ifnet * ifp,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3497 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3498 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3499 {
3500 errno_t rc;
3501 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3502 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3503
3504 if (ifp == NULL || head == NULL || pkt_limit < 1) {
3505 return EINVAL;
3506 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3507 !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3508 return ENXIO;
3509 }
3510 if (!ifnet_get_ioref(ifp)) {
3511 return ENXIO;
3512 }
3513
3514 #if SKYWALK
3515 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3516 #endif /* SKYWALK */
3517 rc = ifclassq_dequeue(ifp->if_snd, MBUF_SC_UNSPEC, pkt_limit,
3518 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len, 0);
3519 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3520 ifnet_decr_iorefcnt(ifp);
3521 *head = pkt_head.cp_mbuf;
3522 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3523 if (tail != NULL) {
3524 *tail = pkt_tail.cp_mbuf;
3525 }
3526 return rc;
3527 }
3528
3529 errno_t
ifnet_dequeue_multi_bytes(struct ifnet * ifp,u_int32_t byte_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3530 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3531 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3532 {
3533 errno_t rc;
3534 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3535 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3536
3537 if (ifp == NULL || head == NULL || byte_limit < 1) {
3538 return EINVAL;
3539 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3540 !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3541 return ENXIO;
3542 }
3543 if (!ifnet_get_ioref(ifp)) {
3544 return ENXIO;
3545 }
3546
3547 #if SKYWALK
3548 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3549 #endif /* SKYWALK */
3550 rc = ifclassq_dequeue(ifp->if_snd, MBUF_SC_UNSPEC, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3551 byte_limit, &pkt_head, &pkt_tail, cnt, len, 0);
3552 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3553 ifnet_decr_iorefcnt(ifp);
3554 *head = pkt_head.cp_mbuf;
3555 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3556 if (tail != NULL) {
3557 *tail = pkt_tail.cp_mbuf;
3558 }
3559 return rc;
3560 }
3561
3562 errno_t
ifnet_dequeue_service_class_multi(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3563 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3564 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3565 u_int32_t *len)
3566 {
3567 errno_t rc;
3568 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3569 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3570
3571 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3572 !MBUF_VALID_SC(sc)) {
3573 return EINVAL;
3574 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3575 !IFNET_MODEL_IS_VALID(ifp->if_output_sched_model)) {
3576 return ENXIO;
3577 }
3578 if (!ifnet_get_ioref(ifp)) {
3579 return ENXIO;
3580 }
3581
3582 #if SKYWALK
3583 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3584 #endif /* SKYWALK */
3585 rc = ifclassq_dequeue(ifp->if_snd, sc, pkt_limit,
3586 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
3587 cnt, len, 0);
3588 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3589 ifnet_decr_iorefcnt(ifp);
3590 *head = pkt_head.cp_mbuf;
3591 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3592 if (tail != NULL) {
3593 *tail = pkt_tail.cp_mbuf;
3594 }
3595 return rc;
3596 }
3597
3598 #if XNU_TARGET_OS_OSX
3599 errno_t
ifnet_framer_stub(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * dest,IFNET_LLADDR_T dest_linkaddr,IFNET_FRAME_TYPE_T frame_type,u_int32_t * pre,u_int32_t * post)3600 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3601 const struct sockaddr *dest,
3602 IFNET_LLADDR_T dest_linkaddr,
3603 IFNET_FRAME_TYPE_T frame_type,
3604 u_int32_t *pre, u_int32_t *post)
3605 {
3606 if (pre != NULL) {
3607 *pre = 0;
3608 }
3609 if (post != NULL) {
3610 *post = 0;
3611 }
3612
3613 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
3614 }
3615 #endif /* XNU_TARGET_OS_OSX */
3616
3617 /* If ifp is set, we will increment the generation for the interface */
3618 int
dlil_post_complete_msg(struct ifnet * ifp,struct kev_msg * event)3619 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3620 {
3621 if (ifp != NULL) {
3622 ifnet_increment_generation(ifp);
3623 }
3624
3625 #if NECP
3626 necp_update_all_clients();
3627 #endif /* NECP */
3628
3629 return kev_post_msg(event);
3630 }
3631
3632 __private_extern__ void
dlil_post_sifflags_msg(struct ifnet * ifp)3633 dlil_post_sifflags_msg(struct ifnet * ifp)
3634 {
3635 struct kev_msg ev_msg;
3636 struct net_event_data ev_data;
3637
3638 bzero(&ev_data, sizeof(ev_data));
3639 bzero(&ev_msg, sizeof(ev_msg));
3640 ev_msg.vendor_code = KEV_VENDOR_APPLE;
3641 ev_msg.kev_class = KEV_NETWORK_CLASS;
3642 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
3643 ev_msg.event_code = KEV_DL_SIFFLAGS;
3644 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
3645 ev_data.if_family = ifp->if_family;
3646 ev_data.if_unit = (u_int32_t) ifp->if_unit;
3647 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
3648 ev_msg.dv[0].data_ptr = &ev_data;
3649 ev_msg.dv[1].data_length = 0;
3650 dlil_post_complete_msg(ifp, &ev_msg);
3651 }
3652
3653 #define TMP_IF_PROTO_ARR_SIZE 10
3654 static int
dlil_event_internal(struct ifnet * ifp,struct kev_msg * event,bool update_generation)3655 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
3656 {
3657 struct ifnet_filter *filter = NULL;
3658 struct if_proto *proto = NULL;
3659 int if_proto_count = 0;
3660 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3661 struct if_proto **tmp_ifproto_arr = tmp_ifproto_stack_arr;
3662 int tmp_ifproto_arr_idx = 0;
3663
3664 /*
3665 * Pass the event to the interface filters
3666 */
3667 lck_mtx_lock_spin(&ifp->if_flt_lock);
3668 /* prevent filter list from changing in case we drop the lock */
3669 if_flt_monitor_busy(ifp);
3670 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3671 if (filter->filt_event != NULL) {
3672 lck_mtx_unlock(&ifp->if_flt_lock);
3673
3674 filter->filt_event(filter->filt_cookie, ifp,
3675 filter->filt_protocol, event);
3676
3677 lck_mtx_lock_spin(&ifp->if_flt_lock);
3678 }
3679 }
3680 /* we're done with the filter list */
3681 if_flt_monitor_unbusy(ifp);
3682 lck_mtx_unlock(&ifp->if_flt_lock);
3683
3684 /* Get an io ref count if the interface is attached */
3685 if (!ifnet_get_ioref(ifp)) {
3686 goto done;
3687 }
3688
3689 /*
3690 * An embedded tmp_list_entry in if_proto may still get
3691 * over-written by another thread after giving up ifnet lock,
3692 * therefore we are avoiding embedded pointers here.
3693 */
3694 ifnet_lock_shared(ifp);
3695 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
3696 if (if_proto_count) {
3697 int i;
3698 VERIFY(ifp->if_proto_hash != NULL);
3699 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3700 tmp_ifproto_arr = tmp_ifproto_stack_arr;
3701 } else {
3702 tmp_ifproto_arr = kalloc_type(struct if_proto *,
3703 if_proto_count, Z_WAITOK | Z_ZERO);
3704 if (tmp_ifproto_arr == NULL) {
3705 ifnet_lock_done(ifp);
3706 goto cleanup;
3707 }
3708 }
3709
3710 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3711 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3712 next_hash) {
3713 if_proto_ref(proto);
3714 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3715 tmp_ifproto_arr_idx++;
3716 }
3717 }
3718 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3719 }
3720 ifnet_lock_done(ifp);
3721
3722 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3723 tmp_ifproto_arr_idx++) {
3724 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3725 VERIFY(proto != NULL);
3726 proto_media_event eventp =
3727 (proto->proto_kpi == kProtoKPI_v1 ?
3728 proto->kpi.v1.event :
3729 proto->kpi.v2.event);
3730
3731 if (eventp != NULL) {
3732 eventp(ifp, proto->protocol_family,
3733 event);
3734 }
3735 if_proto_free(proto);
3736 }
3737
3738 cleanup:
3739 if (tmp_ifproto_arr != tmp_ifproto_stack_arr) {
3740 kfree_type(struct if_proto *, if_proto_count, tmp_ifproto_arr);
3741 }
3742
3743 /* Pass the event to the interface */
3744 if (ifp->if_event != NULL) {
3745 ifp->if_event(ifp, event);
3746 }
3747
3748 /* Release the io ref count */
3749 ifnet_decr_iorefcnt(ifp);
3750 done:
3751 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
3752 }
3753
3754 errno_t
ifnet_event(ifnet_t ifp,struct kern_event_msg * event)3755 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3756 {
3757 struct kev_msg kev_msg;
3758 int result = 0;
3759
3760 if (ifp == NULL || event == NULL) {
3761 return EINVAL;
3762 }
3763
3764 bzero(&kev_msg, sizeof(kev_msg));
3765 kev_msg.vendor_code = event->vendor_code;
3766 kev_msg.kev_class = event->kev_class;
3767 kev_msg.kev_subclass = event->kev_subclass;
3768 kev_msg.event_code = event->event_code;
3769 kev_msg.dv[0].data_ptr = &event->event_data;
3770 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3771 kev_msg.dv[1].data_length = 0;
3772
3773 result = dlil_event_internal(ifp, &kev_msg, TRUE);
3774
3775 return result;
3776 }
3777
3778 /* The following is used to enqueue work items for ifnet ioctl events */
3779 static void ifnet_ioctl_event_callback(struct nwk_wq_entry *);
3780
3781 struct ifnet_ioctl_event {
3782 ifnet_ref_t ifp;
3783 u_long ioctl_code;
3784 };
3785
3786 struct ifnet_ioctl_event_nwk_wq_entry {
3787 struct nwk_wq_entry nwk_wqe;
3788 struct ifnet_ioctl_event ifnet_ioctl_ev_arg;
3789 };
3790
3791 void
ifnet_ioctl_async(struct ifnet * ifp,u_long ioctl_code)3792 ifnet_ioctl_async(struct ifnet *ifp, u_long ioctl_code)
3793 {
3794 struct ifnet_ioctl_event_nwk_wq_entry *p_ifnet_ioctl_ev = NULL;
3795 bool compare_expected;
3796
3797 /*
3798 * Get an io ref count if the interface is attached.
3799 * At this point it most likely is. We are taking a reference for
3800 * deferred processing.
3801 */
3802 if (!ifnet_get_ioref(ifp)) {
3803 os_log(OS_LOG_DEFAULT, "%s:%d %s Failed for ioctl %lu as interface "
3804 "is not attached",
3805 __func__, __LINE__, if_name(ifp), ioctl_code);
3806 return;
3807 }
3808 switch (ioctl_code) {
3809 case SIOCADDMULTI:
3810 compare_expected = false;
3811 if (!atomic_compare_exchange_strong(&ifp->if_mcast_add_signaled, &compare_expected, true)) {
3812 ifnet_decr_iorefcnt(ifp);
3813 return;
3814 }
3815 break;
3816 case SIOCDELMULTI:
3817 compare_expected = false;
3818 if (!atomic_compare_exchange_strong(&ifp->if_mcast_del_signaled, &compare_expected, true)) {
3819 ifnet_decr_iorefcnt(ifp);
3820 return;
3821 }
3822 break;
3823 default:
3824 os_log(OS_LOG_DEFAULT, "%s:%d %s unknown ioctl %lu",
3825 __func__, __LINE__, if_name(ifp), ioctl_code);
3826 return;
3827 }
3828
3829 p_ifnet_ioctl_ev = kalloc_type(struct ifnet_ioctl_event_nwk_wq_entry,
3830 Z_WAITOK | Z_ZERO | Z_NOFAIL);
3831
3832 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ifp = ifp;
3833 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ioctl_code = ioctl_code;
3834 p_ifnet_ioctl_ev->nwk_wqe.func = ifnet_ioctl_event_callback;
3835 nwk_wq_enqueue(&p_ifnet_ioctl_ev->nwk_wqe);
3836 }
3837
3838 static void
ifnet_ioctl_event_callback(struct nwk_wq_entry * nwk_item)3839 ifnet_ioctl_event_callback(struct nwk_wq_entry *nwk_item)
3840 {
3841 struct ifnet_ioctl_event_nwk_wq_entry *p_ev = __container_of(nwk_item,
3842 struct ifnet_ioctl_event_nwk_wq_entry, nwk_wqe);
3843
3844 ifnet_ref_t ifp = p_ev->ifnet_ioctl_ev_arg.ifp;
3845 u_long ioctl_code = p_ev->ifnet_ioctl_ev_arg.ioctl_code;
3846 int ret = 0;
3847
3848 switch (ioctl_code) {
3849 case SIOCADDMULTI:
3850 atomic_store(&ifp->if_mcast_add_signaled, false);
3851 break;
3852 case SIOCDELMULTI:
3853 atomic_store(&ifp->if_mcast_del_signaled, false);
3854 break;
3855 }
3856 if ((ret = ifnet_ioctl(ifp, 0, ioctl_code, NULL)) != 0) {
3857 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned %d for ioctl %lu",
3858 __func__, __LINE__, if_name(ifp), ret, ioctl_code);
3859 } else if (dlil_verbose) {
3860 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned successfully "
3861 "for ioctl %lu",
3862 __func__, __LINE__, if_name(ifp), ioctl_code);
3863 }
3864 ifnet_decr_iorefcnt(ifp);
3865 kfree_type(struct ifnet_ioctl_event_nwk_wq_entry, p_ev);
3866 return;
3867 }
3868
3869 errno_t
ifnet_ioctl(ifnet_t ifp,protocol_family_t proto_fam,u_long ioctl_code,void * ioctl_arg)3870 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
3871 void *ioctl_arg)
3872 {
3873 struct ifnet_filter *filter;
3874 int retval = EOPNOTSUPP;
3875 int result = 0;
3876
3877 if (ifp == NULL || ioctl_code == 0) {
3878 return EINVAL;
3879 }
3880
3881 /* Get an io ref count if the interface is attached */
3882 if (!ifnet_get_ioref(ifp)) {
3883 return EOPNOTSUPP;
3884 }
3885
3886 /*
3887 * Run the interface filters first.
3888 * We want to run all filters before calling the protocol,
3889 * interface family, or interface.
3890 */
3891 lck_mtx_lock_spin(&ifp->if_flt_lock);
3892 /* prevent filter list from changing in case we drop the lock */
3893 if_flt_monitor_busy(ifp);
3894 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3895 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
3896 filter->filt_protocol == proto_fam)) {
3897 lck_mtx_unlock(&ifp->if_flt_lock);
3898
3899 result = filter->filt_ioctl(filter->filt_cookie, ifp,
3900 proto_fam, ioctl_code, ioctl_arg);
3901
3902 lck_mtx_lock_spin(&ifp->if_flt_lock);
3903
3904 /* Only update retval if no one has handled the ioctl */
3905 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3906 if (result == ENOTSUP) {
3907 result = EOPNOTSUPP;
3908 }
3909 retval = result;
3910 if (retval != 0 && retval != EOPNOTSUPP) {
3911 /* we're done with the filter list */
3912 if_flt_monitor_unbusy(ifp);
3913 lck_mtx_unlock(&ifp->if_flt_lock);
3914 goto cleanup;
3915 }
3916 }
3917 }
3918 }
3919 /* we're done with the filter list */
3920 if_flt_monitor_unbusy(ifp);
3921 lck_mtx_unlock(&ifp->if_flt_lock);
3922
3923 /* Allow the protocol to handle the ioctl */
3924 if (proto_fam != 0) {
3925 struct if_proto *proto;
3926
3927 /* callee holds a proto refcnt upon success */
3928 ifnet_lock_shared(ifp);
3929 proto = find_attached_proto(ifp, proto_fam);
3930 ifnet_lock_done(ifp);
3931 if (proto != NULL) {
3932 proto_media_ioctl ioctlp =
3933 (proto->proto_kpi == kProtoKPI_v1 ?
3934 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
3935 result = EOPNOTSUPP;
3936 if (ioctlp != NULL) {
3937 result = ioctlp(ifp, proto_fam, ioctl_code,
3938 ioctl_arg);
3939 }
3940 if_proto_free(proto);
3941
3942 /* Only update retval if no one has handled the ioctl */
3943 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3944 if (result == ENOTSUP) {
3945 result = EOPNOTSUPP;
3946 }
3947 retval = result;
3948 if (retval && retval != EOPNOTSUPP) {
3949 goto cleanup;
3950 }
3951 }
3952 }
3953 }
3954
3955 /* retval is either 0 or EOPNOTSUPP */
3956
3957 /*
3958 * Let the interface handle this ioctl.
3959 * If it returns EOPNOTSUPP, ignore that, we may have
3960 * already handled this in the protocol or family.
3961 */
3962 if (ifp->if_ioctl) {
3963 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
3964 }
3965
3966 /* Only update retval if no one has handled the ioctl */
3967 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3968 if (result == ENOTSUP) {
3969 result = EOPNOTSUPP;
3970 }
3971 retval = result;
3972 if (retval && retval != EOPNOTSUPP) {
3973 goto cleanup;
3974 }
3975 }
3976
3977 cleanup:
3978 if (retval == EJUSTRETURN) {
3979 retval = 0;
3980 }
3981
3982 ifnet_decr_iorefcnt(ifp);
3983
3984 return retval;
3985 }
3986
3987 __private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp,bpf_tap_mode mode,bpf_packet_func callback)3988 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
3989 {
3990 errno_t error = 0;
3991
3992 if (ifp->if_set_bpf_tap) {
3993 /* Get an io reference on the interface if it is attached */
3994 if (!ifnet_get_ioref(ifp)) {
3995 return ENXIO;
3996 }
3997 error = ifp->if_set_bpf_tap(ifp, mode, callback);
3998 ifnet_decr_iorefcnt(ifp);
3999 }
4000 return error;
4001 }
4002
4003 errno_t
dlil_resolve_multi(struct ifnet * ifp,const struct sockaddr * proto_addr,struct sockaddr * ll_addr,size_t ll_len)4004 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4005 struct sockaddr *ll_addr, size_t ll_len)
4006 {
4007 errno_t result = EOPNOTSUPP;
4008 struct if_proto *proto;
4009 const struct sockaddr *verify;
4010 proto_media_resolve_multi resolvep;
4011
4012 if (!ifnet_get_ioref(ifp)) {
4013 return result;
4014 }
4015
4016 SOCKADDR_ZERO(ll_addr, ll_len);
4017
4018 /* Call the protocol first; callee holds a proto refcnt upon success */
4019 ifnet_lock_shared(ifp);
4020 proto = find_attached_proto(ifp, proto_addr->sa_family);
4021 ifnet_lock_done(ifp);
4022 if (proto != NULL) {
4023 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4024 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4025 if (resolvep != NULL) {
4026 result = resolvep(ifp, proto_addr, SDL(ll_addr), ll_len);
4027 }
4028 if_proto_free(proto);
4029 }
4030
4031 /* Let the interface verify the multicast address */
4032 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4033 if (result == 0) {
4034 verify = ll_addr;
4035 } else {
4036 verify = proto_addr;
4037 }
4038 result = ifp->if_check_multi(ifp, verify);
4039 }
4040
4041 ifnet_decr_iorefcnt(ifp);
4042 return result;
4043 }
4044
4045 __private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4046 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4047 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4048 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4049 {
4050 struct if_proto *proto;
4051 errno_t result = 0;
4052
4053 if ((ifp->if_flags & IFF_NOARP) != 0) {
4054 result = ENOTSUP;
4055 goto done;
4056 }
4057
4058 /* callee holds a proto refcnt upon success */
4059 ifnet_lock_shared(ifp);
4060 proto = find_attached_proto(ifp, target_proto->sa_family);
4061 ifnet_lock_done(ifp);
4062 if (proto == NULL) {
4063 result = ENOTSUP;
4064 } else {
4065 proto_media_send_arp arpp;
4066 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4067 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4068 if (arpp == NULL) {
4069 result = ENOTSUP;
4070 } else {
4071 switch (arpop) {
4072 case ARPOP_REQUEST:
4073 arpstat.txrequests++;
4074 if (target_hw != NULL) {
4075 arpstat.txurequests++;
4076 }
4077 break;
4078 case ARPOP_REPLY:
4079 arpstat.txreplies++;
4080 break;
4081 }
4082 result = arpp(ifp, arpop, sender_hw, sender_proto,
4083 target_hw, target_proto);
4084 }
4085 if_proto_free(proto);
4086 }
4087 done:
4088 return result;
4089 }
4090
4091 static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,const struct sockaddr_in * target_sin)4092 _is_announcement(const struct sockaddr_in * sender_sin,
4093 const struct sockaddr_in * target_sin)
4094 {
4095 if (target_sin == NULL || sender_sin == NULL) {
4096 return FALSE;
4097 }
4098
4099 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
4100 }
4101
4102 __private_extern__ errno_t
dlil_send_arp(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto0,u_int32_t rtflags)4103 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4104 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4105 const struct sockaddr *target_proto0, u_int32_t rtflags)
4106 {
4107 errno_t result = 0;
4108 const struct sockaddr_in * sender_sin;
4109 const struct sockaddr_in * target_sin;
4110 struct sockaddr_inarp target_proto_sinarp;
4111 struct sockaddr *target_proto = __DECONST_SA(target_proto0);
4112
4113 if (target_proto == NULL || sender_proto == NULL) {
4114 return EINVAL;
4115 }
4116
4117 if (sender_proto->sa_family != target_proto->sa_family) {
4118 return EINVAL;
4119 }
4120
4121 /*
4122 * If the target is a (default) router, provide that
4123 * information to the send_arp callback routine.
4124 */
4125 if (rtflags & RTF_ROUTER) {
4126 SOCKADDR_COPY(target_proto, &target_proto_sinarp, sizeof(struct sockaddr_in));
4127 target_proto_sinarp.sin_other |= SIN_ROUTER;
4128 target_proto = SA(&target_proto_sinarp);
4129 }
4130
4131 /*
4132 * If this is an ARP request and the target IP is IPv4LL,
4133 * send the request on all interfaces. The exception is
4134 * an announcement, which must only appear on the specific
4135 * interface.
4136 */
4137 sender_sin = SIN(sender_proto);
4138 target_sin = SIN(target_proto);
4139 if (target_proto->sa_family == AF_INET &&
4140 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4141 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4142 !_is_announcement(sender_sin, target_sin)) {
4143 u_int32_t count;
4144 ifnet_ref_t *__counted_by(count) ifp_list;
4145 u_int32_t ifp_on;
4146
4147 result = ENOTSUP;
4148
4149 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4150 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4151 errno_t new_result;
4152 ifaddr_t source_hw = NULL;
4153 ifaddr_t source_ip = NULL;
4154 struct sockaddr_in source_ip_copy;
4155 ifnet_ref_t cur_ifp = ifp_list[ifp_on];
4156
4157 /*
4158 * Only arp on interfaces marked for IPv4LL
4159 * ARPing. This may mean that we don't ARP on
4160 * the interface the subnet route points to.
4161 */
4162 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
4163 continue;
4164 }
4165
4166 /* Find the source IP address */
4167 ifnet_lock_shared(cur_ifp);
4168 source_hw = cur_ifp->if_lladdr;
4169 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4170 ifa_link) {
4171 IFA_LOCK(source_ip);
4172 if (source_ip->ifa_addr != NULL &&
4173 source_ip->ifa_addr->sa_family ==
4174 AF_INET) {
4175 /* Copy the source IP address */
4176 SOCKADDR_COPY(SIN(source_ip->ifa_addr), &source_ip_copy, sizeof(source_ip_copy));
4177 IFA_UNLOCK(source_ip);
4178 break;
4179 }
4180 IFA_UNLOCK(source_ip);
4181 }
4182
4183 /* No IP Source, don't arp */
4184 if (source_ip == NULL) {
4185 ifnet_lock_done(cur_ifp);
4186 continue;
4187 }
4188
4189 ifa_addref(source_hw);
4190 ifnet_lock_done(cur_ifp);
4191
4192 /* Send the ARP */
4193 new_result = dlil_send_arp_internal(cur_ifp,
4194 arpop, SDL(source_hw->ifa_addr),
4195 SA(&source_ip_copy), NULL,
4196 target_proto);
4197
4198 ifa_remref(source_hw);
4199 if (result == ENOTSUP) {
4200 result = new_result;
4201 }
4202 }
4203 ifnet_list_free_counted_by(ifp_list, count);
4204 }
4205 } else {
4206 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4207 sender_proto, target_hw, target_proto);
4208 }
4209
4210 return result;
4211 }
4212
4213 /*
4214 * Caller must hold ifnet head lock.
4215 */
4216 static int
ifnet_lookup(struct ifnet * ifp)4217 ifnet_lookup(struct ifnet *ifp)
4218 {
4219 ifnet_ref_t _ifp;
4220
4221 ifnet_head_lock_assert(LCK_RW_ASSERT_HELD);
4222 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4223 if (_ifp == ifp) {
4224 break;
4225 }
4226 }
4227 return _ifp != NULL;
4228 }
4229
4230 /*
4231 * Caller has to pass a non-zero refio argument to get a
4232 * IO reference count. This will prevent ifnet_detach from
4233 * being called when there are outstanding io reference counts.
4234 */
4235 int
ifnet_get_ioref(struct ifnet * ifp)4236 ifnet_get_ioref(struct ifnet *ifp)
4237 {
4238 bool ret;
4239
4240 ret = ifnet_is_fully_attached(ifp);
4241 if (ret) {
4242 if (os_ref_retain_try(&ifp->if_refio) == false) {
4243 /* refio became 0 which means it is detaching */
4244 return false;
4245 }
4246 }
4247
4248 return ret;
4249 }
4250
4251 void
ifnet_incr_pending_thread_count(struct ifnet * ifp)4252 ifnet_incr_pending_thread_count(struct ifnet *ifp)
4253 {
4254 lck_mtx_lock_spin(&ifp->if_ref_lock);
4255 ifp->if_threads_pending++;
4256 lck_mtx_unlock(&ifp->if_ref_lock);
4257 }
4258
4259 void
ifnet_decr_pending_thread_count(struct ifnet * ifp)4260 ifnet_decr_pending_thread_count(struct ifnet *ifp)
4261 {
4262 lck_mtx_lock_spin(&ifp->if_ref_lock);
4263 VERIFY(ifp->if_threads_pending > 0);
4264 ifp->if_threads_pending--;
4265 if (ifp->if_threads_pending == 0) {
4266 wakeup(&ifp->if_threads_pending);
4267 }
4268 lck_mtx_unlock(&ifp->if_ref_lock);
4269 }
4270
4271 /*
4272 * Caller must ensure the interface is attached; the assumption is that
4273 * there is at least an outstanding IO reference count held already.
4274 * Most callers would call ifnet_is_{attached,data_ready}() instead.
4275 */
4276 void
ifnet_incr_iorefcnt(struct ifnet * ifp)4277 ifnet_incr_iorefcnt(struct ifnet *ifp)
4278 {
4279 os_ref_retain(&ifp->if_refio);
4280 }
4281
4282 void
ifnet_decr_iorefcnt(struct ifnet * ifp)4283 ifnet_decr_iorefcnt(struct ifnet *ifp)
4284 {
4285 /*
4286 * if there are no more outstanding io references, wakeup the
4287 * ifnet_detach thread.
4288 */
4289 if (os_ref_release_relaxed(&ifp->if_refio) == 0) {
4290 lck_mtx_lock(&ifp->if_ref_lock);
4291 wakeup(&(ifp->if_refio));
4292 lck_mtx_unlock(&ifp->if_ref_lock);
4293 }
4294 }
4295
4296 static void
ifnet_decr_iorefcnt_locked(struct ifnet * ifp)4297 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
4298 {
4299 /*
4300 * if there are no more outstanding io references, wakeup the
4301 * ifnet_detach thread.
4302 */
4303 if (os_ref_release_relaxed(&ifp->if_refio) == 0) {
4304 wakeup(&(ifp->if_refio));
4305 }
4306 }
4307
4308 boolean_t
ifnet_datamov_begin(struct ifnet * ifp)4309 ifnet_datamov_begin(struct ifnet *ifp)
4310 {
4311 boolean_t ret;
4312
4313 ret = ifnet_is_attached_and_ready(ifp);
4314 if (ret) {
4315 if (os_ref_retain_try(&ifp->if_refio) == false) {
4316 /* refio became 0 which means it is detaching */
4317 return false;
4318 }
4319 os_ref_retain_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp);
4320 }
4321
4322 DTRACE_IP2(datamov__begin, struct ifnet *, ifp, boolean_t, ret);
4323 return ret;
4324 }
4325
4326 void
ifnet_datamov_end(struct ifnet * ifp)4327 ifnet_datamov_end(struct ifnet *ifp)
4328 {
4329 uint32_t datamov;
4330 /*
4331 * if there's no more thread moving data, wakeup any
4332 * drainers that's blocked waiting for this.
4333 */
4334 datamov = os_ref_release_raw_relaxed_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp);
4335 if (datamov >> IF_DATAMOV_BITS == 1 && (datamov & IF_DATAMOV_DRAINING)) {
4336 lck_mtx_lock(&ifp->if_ref_lock);
4337 DLIL_PRINTF("Waking up drainers on %s\n", if_name(ifp));
4338 DTRACE_IP1(datamov__drain__wake, struct ifnet *, ifp);
4339 wakeup(&(ifp->if_datamov));
4340 lck_mtx_unlock(&ifp->if_ref_lock);
4341 }
4342 ifnet_decr_iorefcnt(ifp);
4343
4344 DTRACE_IP1(datamov__end, struct ifnet *, ifp);
4345 }
4346
4347 static void
ifnet_datamov_suspend_locked(struct ifnet * ifp)4348 ifnet_datamov_suspend_locked(struct ifnet *ifp)
4349 {
4350 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4351 ifnet_incr_iorefcnt(ifp);
4352 if (ifp->if_suspend++ == 0) {
4353 VERIFY(ifp->if_refflags & IFRF_READY);
4354 ifp->if_refflags &= ~IFRF_READY;
4355 }
4356 }
4357
4358 static void
ifnet_datamov_suspend(struct ifnet * ifp)4359 ifnet_datamov_suspend(struct ifnet *ifp)
4360 {
4361 lck_mtx_lock_spin(&ifp->if_ref_lock);
4362 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4363 ifnet_datamov_suspend_locked(ifp);
4364 lck_mtx_unlock(&ifp->if_ref_lock);
4365 }
4366
4367 boolean_t
ifnet_datamov_suspend_if_needed(struct ifnet * ifp)4368 ifnet_datamov_suspend_if_needed(struct ifnet *ifp)
4369 {
4370 lck_mtx_lock_spin(&ifp->if_ref_lock);
4371 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4372 if (ifp->if_suspend > 0) {
4373 lck_mtx_unlock(&ifp->if_ref_lock);
4374 return FALSE;
4375 }
4376 ifnet_datamov_suspend_locked(ifp);
4377 lck_mtx_unlock(&ifp->if_ref_lock);
4378 return TRUE;
4379 }
4380
4381 void
ifnet_datamov_drain(struct ifnet * ifp)4382 ifnet_datamov_drain(struct ifnet *ifp)
4383 {
4384 lck_mtx_lock(&ifp->if_ref_lock);
4385 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4386 /* data movement must already be suspended */
4387 VERIFY(ifp->if_suspend > 0);
4388 VERIFY(!(ifp->if_refflags & IFRF_READY));
4389 os_atomic_or(&ifp->if_datamov, IF_DATAMOV_DRAINING, relaxed);
4390 while (os_ref_get_count_mask(&ifp->if_datamov, IF_DATAMOV_BITS) > 1) {
4391 DLIL_PRINTF("Waiting for data path(s) to quiesce on %s\n",
4392 if_name(ifp));
4393 DTRACE_IP1(datamov__wait, struct ifnet *, ifp);
4394 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
4395 (PZERO - 1), __func__, NULL);
4396 DTRACE_IP1(datamov__wake, struct ifnet *, ifp);
4397 }
4398 VERIFY(!(ifp->if_refflags & IFRF_READY));
4399 os_atomic_andnot(&ifp->if_datamov, IF_DATAMOV_DRAINING, relaxed);
4400 lck_mtx_unlock(&ifp->if_ref_lock);
4401
4402 /* purge the interface queues */
4403 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
4404 if_qflush(ifp, ifp->if_snd);
4405 }
4406 }
4407
4408 void
ifnet_datamov_suspend_and_drain(struct ifnet * ifp)4409 ifnet_datamov_suspend_and_drain(struct ifnet *ifp)
4410 {
4411 ifnet_datamov_suspend(ifp);
4412 ifnet_datamov_drain(ifp);
4413 }
4414
4415 void
ifnet_datamov_resume(struct ifnet * ifp)4416 ifnet_datamov_resume(struct ifnet *ifp)
4417 {
4418 lck_mtx_lock(&ifp->if_ref_lock);
4419 /* data movement must already be suspended */
4420 VERIFY(ifp->if_suspend > 0);
4421 if (--ifp->if_suspend == 0) {
4422 VERIFY(!(ifp->if_refflags & IFRF_READY));
4423 ifp->if_refflags |= IFRF_READY;
4424 }
4425 ifnet_decr_iorefcnt_locked(ifp);
4426 lck_mtx_unlock(&ifp->if_ref_lock);
4427 }
4428
4429 static errno_t
dlil_attach_protocol(struct if_proto * proto,const struct ifnet_demux_desc * __counted_by (demux_count)demux_list,u_int32_t demux_count,uint32_t * proto_count)4430 dlil_attach_protocol(struct if_proto *proto,
4431 const struct ifnet_demux_desc *__counted_by(demux_count) demux_list, u_int32_t demux_count,
4432 uint32_t *proto_count)
4433 {
4434 struct kev_dl_proto_data ev_pr_data;
4435 ifnet_ref_t ifp = proto->ifp;
4436 errno_t retval = 0;
4437 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4438 struct if_proto *prev_proto;
4439 struct if_proto *_proto;
4440
4441 /* don't allow attaching anything but PF_BRIDGE to vmnet interfaces */
4442 if (IFNET_IS_VMNET(ifp) && proto->protocol_family != PF_BRIDGE) {
4443 return EINVAL;
4444 }
4445
4446 if (!ifnet_get_ioref(ifp)) {
4447 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
4448 __func__, if_name(ifp));
4449 return ENXIO;
4450 }
4451 /* callee holds a proto refcnt upon success */
4452 ifnet_lock_exclusive(ifp);
4453 _proto = find_attached_proto(ifp, proto->protocol_family);
4454 if (_proto != NULL) {
4455 ifnet_lock_done(ifp);
4456 if_proto_free(_proto);
4457 retval = EEXIST;
4458 goto ioref_done;
4459 }
4460
4461 /*
4462 * Call family module add_proto routine so it can refine the
4463 * demux descriptors as it wishes.
4464 */
4465 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4466 demux_count);
4467 if (retval) {
4468 ifnet_lock_done(ifp);
4469 goto ioref_done;
4470 }
4471
4472 /*
4473 * Insert the protocol in the hash
4474 */
4475 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4476 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
4477 prev_proto = SLIST_NEXT(prev_proto, next_hash);
4478 }
4479 if (prev_proto) {
4480 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4481 } else {
4482 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4483 proto, next_hash);
4484 }
4485
4486 /* hold a proto refcnt for attach */
4487 if_proto_ref(proto);
4488
4489 /*
4490 * The reserved field carries the number of protocol still attached
4491 * (subject to change)
4492 */
4493 ev_pr_data.proto_family = proto->protocol_family;
4494 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
4495
4496 ifnet_lock_done(ifp);
4497
4498 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4499 (struct net_event_data *)&ev_pr_data,
4500 sizeof(struct kev_dl_proto_data), FALSE);
4501 if (proto_count != NULL) {
4502 *proto_count = ev_pr_data.proto_remaining_count;
4503 }
4504 ioref_done:
4505 ifnet_decr_iorefcnt(ifp);
4506 return retval;
4507 }
4508
4509 static void
dlil_handle_proto_attach(ifnet_t ifp,protocol_family_t protocol)4510 dlil_handle_proto_attach(ifnet_t ifp, protocol_family_t protocol)
4511 {
4512 /*
4513 * A protocol has been attached, mark the interface up.
4514 * This used to be done by configd.KernelEventMonitor, but that
4515 * is inherently prone to races (rdar://problem/30810208).
4516 */
4517 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4518 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
4519 dlil_post_sifflags_msg(ifp);
4520 #if SKYWALK
4521 switch (protocol) {
4522 case AF_INET:
4523 case AF_INET6:
4524 /* don't attach the flowswitch unless attaching IP */
4525 dlil_attach_flowswitch_nexus(ifp);
4526 break;
4527 default:
4528 break;
4529 }
4530 #endif /* SKYWALK */
4531 }
4532
4533 errno_t
ifnet_attach_protocol(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param * proto_details)4534 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
4535 const struct ifnet_attach_proto_param *proto_details)
4536 {
4537 int retval = 0;
4538 struct if_proto *ifproto = NULL;
4539 uint32_t proto_count = 0;
4540
4541 ifnet_head_lock_shared();
4542 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4543 retval = EINVAL;
4544 goto end;
4545 }
4546 /* Check that the interface is in the global list */
4547 if (!ifnet_lookup(ifp)) {
4548 retval = ENXIO;
4549 goto end;
4550 }
4551
4552 ifproto = dlif_proto_alloc();
4553
4554 /* refcnt held above during lookup */
4555 ifproto->ifp = ifp;
4556 ifproto->protocol_family = protocol;
4557 ifproto->proto_kpi = kProtoKPI_v1;
4558 ifproto->kpi.v1.input = proto_details->input;
4559 ifproto->kpi.v1.pre_output = proto_details->pre_output;
4560 ifproto->kpi.v1.event = proto_details->event;
4561 ifproto->kpi.v1.ioctl = proto_details->ioctl;
4562 ifproto->kpi.v1.detached = proto_details->detached;
4563 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4564 ifproto->kpi.v1.send_arp = proto_details->send_arp;
4565
4566 retval = dlil_attach_protocol(ifproto,
4567 proto_details->demux_list, proto_details->demux_count,
4568 &proto_count);
4569
4570 end:
4571 if (retval == EEXIST) {
4572 /* already attached */
4573 if (dlil_verbose) {
4574 DLIL_PRINTF("%s: protocol %d already attached\n",
4575 ifp != NULL ? if_name(ifp) : "N/A",
4576 protocol);
4577 }
4578 } else if (retval != 0) {
4579 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4580 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4581 } else if (dlil_verbose) {
4582 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
4583 ifp != NULL ? if_name(ifp) : "N/A",
4584 protocol, proto_count);
4585 }
4586 ifnet_head_done();
4587 if (retval == 0) {
4588 dlil_handle_proto_attach(ifp, protocol);
4589 } else if (ifproto != NULL) {
4590 dlif_proto_free(ifproto);
4591 }
4592 return retval;
4593 }
4594
4595 errno_t
ifnet_attach_protocol_v2(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param_v2 * proto_details)4596 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
4597 const struct ifnet_attach_proto_param_v2 *proto_details)
4598 {
4599 int retval = 0;
4600 struct if_proto *ifproto = NULL;
4601 uint32_t proto_count = 0;
4602
4603 ifnet_head_lock_shared();
4604 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4605 retval = EINVAL;
4606 goto end;
4607 }
4608 /* Check that the interface is in the global list */
4609 if (!ifnet_lookup(ifp)) {
4610 retval = ENXIO;
4611 goto end;
4612 }
4613
4614 ifproto = dlif_proto_alloc();
4615
4616 /* refcnt held above during lookup */
4617 ifproto->ifp = ifp;
4618 ifproto->protocol_family = protocol;
4619 ifproto->proto_kpi = kProtoKPI_v2;
4620 ifproto->kpi.v2.input = proto_details->input;
4621 ifproto->kpi.v2.pre_output = proto_details->pre_output;
4622 ifproto->kpi.v2.event = proto_details->event;
4623 ifproto->kpi.v2.ioctl = proto_details->ioctl;
4624 ifproto->kpi.v2.detached = proto_details->detached;
4625 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4626 ifproto->kpi.v2.send_arp = proto_details->send_arp;
4627
4628 retval = dlil_attach_protocol(ifproto,
4629 proto_details->demux_list, proto_details->demux_count,
4630 &proto_count);
4631
4632 end:
4633 if (retval == EEXIST) {
4634 /* already attached */
4635 if (dlil_verbose) {
4636 DLIL_PRINTF("%s: protocol %d already attached\n",
4637 ifp != NULL ? if_name(ifp) : "N/A",
4638 protocol);
4639 }
4640 } else if (retval != 0) {
4641 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4642 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4643 } else if (dlil_verbose) {
4644 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
4645 ifp != NULL ? if_name(ifp) : "N/A",
4646 protocol, proto_count);
4647 }
4648 ifnet_head_done();
4649 if (retval == 0) {
4650 dlil_handle_proto_attach(ifp, protocol);
4651 } else if (ifproto != NULL) {
4652 dlif_proto_free(ifproto);
4653 }
4654 return retval;
4655 }
4656
4657 errno_t
ifnet_detach_protocol(ifnet_t ifp,protocol_family_t proto_family)4658 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
4659 {
4660 struct if_proto *proto = NULL;
4661 int retval = 0;
4662
4663 if (ifp == NULL || proto_family == 0) {
4664 retval = EINVAL;
4665 goto end;
4666 }
4667
4668 ifnet_lock_exclusive(ifp);
4669 /* callee holds a proto refcnt upon success */
4670 proto = find_attached_proto(ifp, proto_family);
4671 if (proto == NULL) {
4672 retval = ENXIO;
4673 ifnet_lock_done(ifp);
4674 goto end;
4675 }
4676
4677 /* call family module del_proto */
4678 if (ifp->if_del_proto) {
4679 ifp->if_del_proto(ifp, proto->protocol_family);
4680 }
4681
4682 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4683 proto, if_proto, next_hash);
4684
4685 if (proto->proto_kpi == kProtoKPI_v1) {
4686 proto->kpi.v1.input = ifproto_media_input_v1;
4687 proto->kpi.v1.pre_output = ifproto_media_preout;
4688 proto->kpi.v1.event = ifproto_media_event;
4689 proto->kpi.v1.ioctl = ifproto_media_ioctl;
4690 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
4691 proto->kpi.v1.send_arp = ifproto_media_send_arp;
4692 } else {
4693 proto->kpi.v2.input = ifproto_media_input_v2;
4694 proto->kpi.v2.pre_output = ifproto_media_preout;
4695 proto->kpi.v2.event = ifproto_media_event;
4696 proto->kpi.v2.ioctl = ifproto_media_ioctl;
4697 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
4698 proto->kpi.v2.send_arp = ifproto_media_send_arp;
4699 }
4700 proto->detached = 1;
4701 ifnet_lock_done(ifp);
4702
4703 if (dlil_verbose) {
4704 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
4705 (proto->proto_kpi == kProtoKPI_v1) ?
4706 "v1" : "v2", proto_family);
4707 }
4708
4709 /* release proto refcnt held during protocol attach */
4710 if_proto_free(proto);
4711
4712 /*
4713 * Release proto refcnt held during lookup; the rest of
4714 * protocol detach steps will happen when the last proto
4715 * reference is released.
4716 */
4717 if_proto_free(proto);
4718
4719 end:
4720 return retval;
4721 }
4722
4723 static errno_t
ifproto_media_input_v1(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet,char * header)4724 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
4725 struct mbuf *packet, char *header)
4726 {
4727 #pragma unused(ifp, protocol, packet, header)
4728 return ENXIO;
4729 }
4730
4731 static errno_t
ifproto_media_input_v2(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet)4732 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
4733 struct mbuf *packet)
4734 {
4735 #pragma unused(ifp, protocol, packet)
4736 return ENXIO;
4737 }
4738
4739 static errno_t
ifproto_media_preout(struct ifnet * ifp,protocol_family_t protocol,mbuf_t * packet,const struct sockaddr * dest,void * route,IFNET_FRAME_TYPE_RW_T frame_type,IFNET_LLADDR_RW_T link_layer_dest)4740 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
4741 mbuf_t *packet, const struct sockaddr *dest, void *route,
4742 IFNET_FRAME_TYPE_RW_T frame_type, IFNET_LLADDR_RW_T link_layer_dest)
4743 {
4744 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4745 return ENXIO;
4746 }
4747
4748 static void
ifproto_media_event(struct ifnet * ifp,protocol_family_t protocol,const struct kev_msg * event)4749 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
4750 const struct kev_msg *event)
4751 {
4752 #pragma unused(ifp, protocol, event)
4753 }
4754
4755 static errno_t
ifproto_media_ioctl(struct ifnet * ifp,protocol_family_t protocol,unsigned long command,void * argument)4756 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
4757 unsigned long command, void *argument)
4758 {
4759 #pragma unused(ifp, protocol, command, argument)
4760 return ENXIO;
4761 }
4762
4763 static errno_t
ifproto_media_resolve_multi(ifnet_t ifp,const struct sockaddr * proto_addr,struct sockaddr_dl * out_ll,size_t ll_len)4764 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
4765 struct sockaddr_dl *out_ll, size_t ll_len)
4766 {
4767 #pragma unused(ifp, proto_addr, out_ll, ll_len)
4768 return ENXIO;
4769 }
4770
4771 static errno_t
ifproto_media_send_arp(struct ifnet * ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4772 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
4773 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4774 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4775 {
4776 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4777 return ENXIO;
4778 }
4779
4780 extern int if_next_index(void);
4781 extern int tcp_ecn;
4782
4783 void
dlil_ifclassq_setup(struct ifnet * ifp,struct ifclassq * ifcq)4784 dlil_ifclassq_setup(struct ifnet *ifp, struct ifclassq *ifcq)
4785 {
4786 uint32_t sflags = 0;
4787 int err;
4788
4789 if (if_flowadv) {
4790 sflags |= PKTSCHEDF_QALG_FLOWCTL;
4791 }
4792
4793 if (if_delaybased_queue) {
4794 sflags |= PKTSCHEDF_QALG_DELAYBASED;
4795 }
4796
4797 if (ifp->if_output_sched_model & IFNET_SCHED_DRIVER_MANGED_MODELS) {
4798 VERIFY(IFNET_MODEL_IS_VALID(ifp->if_output_sched_model));
4799 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
4800 }
4801 /* Inherit drop limit from the default queue */
4802 if (ifp->if_snd != ifcq) {
4803 IFCQ_PKT_DROP_LIMIT(ifcq) = IFCQ_PKT_DROP_LIMIT(ifp->if_snd);
4804 }
4805 /* Initialize transmit queue(s) */
4806 err = ifclassq_setup(ifcq, ifp, sflags);
4807 if (err != 0) {
4808 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
4809 "err=%d", __func__, ifp, err);
4810 /* NOTREACHED */
4811 }
4812 }
4813
4814 errno_t
ifnet_attach(ifnet_t ifp,const struct sockaddr_dl * ll_addr)4815 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
4816 {
4817 #if SKYWALK
4818 boolean_t netif_compat;
4819 if_nexus_netif nexus_netif;
4820 #endif /* SKYWALK */
4821 ifnet_ref_t tmp_if;
4822 struct ifaddr *ifa;
4823 struct if_data_internal if_data_saved;
4824 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4825 struct dlil_threading_info *dl_inp;
4826 thread_continue_t thfunc = NULL;
4827 int err;
4828
4829 if (ifp == NULL) {
4830 return EINVAL;
4831 }
4832
4833 /*
4834 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4835 * prevent the interface from being configured while it is
4836 * embryonic, as ifnet_head_lock is dropped and reacquired
4837 * below prior to marking the ifnet with IFRF_ATTACHED.
4838 */
4839 dlil_if_lock();
4840 ifnet_head_lock_exclusive();
4841 /* Verify we aren't already on the list */
4842 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
4843 if (tmp_if == ifp) {
4844 ifnet_head_done();
4845 dlil_if_unlock();
4846 return EEXIST;
4847 }
4848 }
4849
4850 lck_mtx_lock_spin(&ifp->if_ref_lock);
4851 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
4852 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
4853 __func__, ifp);
4854 /* NOTREACHED */
4855 }
4856 lck_mtx_unlock(&ifp->if_ref_lock);
4857
4858 ifnet_lock_exclusive(ifp);
4859
4860 /* Sanity check */
4861 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
4862 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
4863 VERIFY(ifp->if_threads_pending == 0);
4864
4865 if (ll_addr != NULL) {
4866 if (ifp->if_addrlen == 0) {
4867 ifp->if_addrlen = ll_addr->sdl_alen;
4868 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
4869 ifnet_lock_done(ifp);
4870 ifnet_head_done();
4871 dlil_if_unlock();
4872 return EINVAL;
4873 }
4874 }
4875
4876 /*
4877 * Allow interfaces without protocol families to attach
4878 * only if they have the necessary fields filled out.
4879 */
4880 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
4881 DLIL_PRINTF("%s: Attempt to attach interface without "
4882 "family module - %d\n", __func__, ifp->if_family);
4883 ifnet_lock_done(ifp);
4884 ifnet_head_done();
4885 dlil_if_unlock();
4886 return ENODEV;
4887 }
4888
4889 /* Allocate protocol hash table */
4890 VERIFY(ifp->if_proto_hash == NULL);
4891 ifp->if_proto_hash = kalloc_type(struct proto_hash_entry,
4892 PROTO_HASH_SLOTS, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4893 ifp->if_proto_hash_count = PROTO_HASH_SLOTS;
4894
4895 lck_mtx_lock_spin(&ifp->if_flt_lock);
4896 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
4897 TAILQ_INIT(&ifp->if_flt_head);
4898 VERIFY(ifp->if_flt_busy == 0);
4899 VERIFY(ifp->if_flt_waiters == 0);
4900 VERIFY(ifp->if_flt_non_os_count == 0);
4901 VERIFY(ifp->if_flt_no_tso_count == 0);
4902 lck_mtx_unlock(&ifp->if_flt_lock);
4903
4904 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
4905 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
4906 LIST_INIT(&ifp->if_multiaddrs);
4907 }
4908
4909 VERIFY(ifp->if_allhostsinm == NULL);
4910 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
4911 TAILQ_INIT(&ifp->if_addrhead);
4912
4913 if (ifp->if_index == 0) {
4914 int idx = if_next_index();
4915
4916 /*
4917 * Since we exhausted the list of
4918 * if_index's, try to find an empty slot
4919 * in ifindex2ifnet.
4920 */
4921 if (idx == -1 && if_index >= UINT16_MAX) {
4922 for (int i = 1; i < if_index; i++) {
4923 if (ifindex2ifnet[i] == NULL &&
4924 ifnet_addrs[i - 1] == NULL) {
4925 idx = i;
4926 break;
4927 }
4928 }
4929 }
4930 if (idx == -1) {
4931 ifp->if_index = 0;
4932 ifnet_lock_done(ifp);
4933 ifnet_head_done();
4934 dlil_if_unlock();
4935 return ENOBUFS;
4936 }
4937 ifp->if_index = (uint16_t)idx;
4938
4939 /* the lladdr passed at attach time is the permanent address */
4940 if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
4941 ll_addr->sdl_alen == ETHER_ADDR_LEN) {
4942 bcopy(CONST_LLADDR(ll_addr),
4943 dl_if->dl_if_permanent_ether,
4944 ETHER_ADDR_LEN);
4945 dl_if->dl_if_permanent_ether_is_set = 1;
4946 }
4947 }
4948 /* There should not be anything occupying this slot */
4949 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
4950
4951 /* allocate (if needed) and initialize a link address */
4952 ifa = dlil_alloc_lladdr(ifp, ll_addr);
4953 if (ifa == NULL) {
4954 ifnet_lock_done(ifp);
4955 ifnet_head_done();
4956 dlil_if_unlock();
4957 return ENOBUFS;
4958 }
4959
4960 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
4961 ifnet_addrs[ifp->if_index - 1] = ifa;
4962
4963 /* make this address the first on the list */
4964 IFA_LOCK(ifa);
4965 /* hold a reference for ifnet_addrs[] */
4966 ifa_addref(ifa);
4967 /* if_attach_link_ifa() holds a reference for ifa_link */
4968 if_attach_link_ifa(ifp, ifa);
4969 IFA_UNLOCK(ifa);
4970
4971 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
4972 ifindex2ifnet[ifp->if_index] = ifp;
4973
4974 /* Hold a reference to the underlying dlil_ifnet */
4975 ifnet_reference(ifp);
4976
4977 /* Clear stats (save and restore other fields that we care) */
4978 if_data_saved = ifp->if_data;
4979 bzero(&ifp->if_data, sizeof(ifp->if_data));
4980 ifp->if_data.ifi_type = if_data_saved.ifi_type;
4981 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
4982 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
4983 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
4984 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
4985 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
4986 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
4987 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
4988 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
4989 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
4990 ifnet_touch_lastchange(ifp);
4991
4992 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
4993 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
4994 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL ||
4995 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL_DM);
4996
4997 dlil_ifclassq_setup(ifp, ifp->if_snd);
4998
4999 /* Sanity checks on the input thread storage */
5000 dl_inp = &dl_if->dl_if_inpstorage;
5001 bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
5002 VERIFY(dl_inp->dlth_flags == 0);
5003 VERIFY(dl_inp->dlth_wtot == 0);
5004 VERIFY(dl_inp->dlth_ifp == NULL);
5005 VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
5006 VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
5007 VERIFY(!dl_inp->dlth_affinity);
5008 VERIFY(ifp->if_inp == NULL);
5009 VERIFY(dl_inp->dlth_thread == THREAD_NULL);
5010 VERIFY(dl_inp->dlth_strategy == NULL);
5011 VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
5012 VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
5013 VERIFY(dl_inp->dlth_affinity_tag == 0);
5014
5015 #if IFNET_INPUT_SANITY_CHK
5016 VERIFY(dl_inp->dlth_pkts_cnt == 0);
5017 #endif /* IFNET_INPUT_SANITY_CHK */
5018
5019 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5020 dlil_reset_rxpoll_params(ifp);
5021 /*
5022 * A specific DLIL input thread is created per non-loopback interface.
5023 */
5024 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
5025 ifp->if_inp = dl_inp;
5026 ifnet_incr_pending_thread_count(ifp);
5027 err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
5028 if (err == ENODEV) {
5029 VERIFY(thfunc == NULL);
5030 ifnet_decr_pending_thread_count(ifp);
5031 } else if (err != 0) {
5032 panic_plain("%s: ifp=%p couldn't get an input thread; "
5033 "err=%d", __func__, ifp, err);
5034 /* NOTREACHED */
5035 }
5036 }
5037 /*
5038 * If the driver supports the new transmit model, calculate flow hash
5039 * and create a workloop starter thread to invoke the if_start callback
5040 * where the packets may be dequeued and transmitted.
5041 */
5042 if (ifp->if_eflags & IFEF_TXSTART) {
5043 thread_precedence_policy_data_t info;
5044 __unused kern_return_t kret;
5045
5046 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5047 VERIFY(ifp->if_flowhash != 0);
5048 VERIFY(ifp->if_start_thread == THREAD_NULL);
5049
5050 ifnet_set_start_cycle(ifp, NULL);
5051 ifp->if_start_active = 0;
5052 ifp->if_start_req = 0;
5053 ifp->if_start_flags = 0;
5054 VERIFY(ifp->if_start != NULL);
5055 ifnet_incr_pending_thread_count(ifp);
5056 if ((err = kernel_thread_start(ifnet_start_thread_func,
5057 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5058 panic_plain("%s: "
5059 "ifp=%p couldn't get a start thread; "
5060 "err=%d", __func__, ifp, err);
5061 /* NOTREACHED */
5062 }
5063 bzero(&info, sizeof(info));
5064 info.importance = 1;
5065 kret = thread_policy_set(ifp->if_start_thread,
5066 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5067 THREAD_PRECEDENCE_POLICY_COUNT);
5068 ASSERT(kret == KERN_SUCCESS);
5069 } else {
5070 ifp->if_flowhash = 0;
5071 }
5072
5073 /* Reset polling parameters */
5074 ifnet_set_poll_cycle(ifp, NULL);
5075 ifp->if_poll_update = 0;
5076 ifp->if_poll_flags = 0;
5077 ifp->if_poll_req = 0;
5078 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5079
5080 /*
5081 * If the driver supports the new receive model, create a poller
5082 * thread to invoke if_input_poll callback where the packets may
5083 * be dequeued from the driver and processed for reception.
5084 * if the interface is netif compat then the poller thread is
5085 * managed by netif.
5086 */
5087 if (dlil_is_rxpoll_input(thfunc)) {
5088 thread_precedence_policy_data_t info;
5089 __unused kern_return_t kret;
5090 #if SKYWALK
5091 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5092 #endif /* SKYWALK */
5093 VERIFY(ifp->if_input_poll != NULL);
5094 VERIFY(ifp->if_input_ctl != NULL);
5095 ifnet_incr_pending_thread_count(ifp);
5096 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
5097 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5098 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5099 "err=%d", __func__, ifp, err);
5100 /* NOTREACHED */
5101 }
5102 bzero(&info, sizeof(info));
5103 info.importance = 1;
5104 kret = thread_policy_set(ifp->if_poll_thread,
5105 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5106 THREAD_PRECEDENCE_POLICY_COUNT);
5107 ASSERT(kret == KERN_SUCCESS);
5108 }
5109
5110 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5111 VERIFY(ifp->if_desc.ifd_len == 0);
5112 VERIFY(ifp->if_desc.ifd_desc != NULL);
5113
5114 /* Record attach PC stacktrace */
5115 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5116
5117 ifp->if_updatemcasts = 0;
5118 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5119 struct ifmultiaddr *ifma;
5120 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5121 IFMA_LOCK(ifma);
5122 if (ifma->ifma_addr->sa_family == AF_LINK ||
5123 ifma->ifma_addr->sa_family == AF_UNSPEC) {
5124 ifp->if_updatemcasts++;
5125 }
5126 IFMA_UNLOCK(ifma);
5127 }
5128
5129 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
5130 "membership(s)\n", if_name(ifp),
5131 ifp->if_updatemcasts);
5132 }
5133
5134 /* Clear logging parameters */
5135 bzero(&ifp->if_log, sizeof(ifp->if_log));
5136
5137 /* Clear foreground/realtime activity timestamps */
5138 ifp->if_fg_sendts = 0;
5139 ifp->if_rt_sendts = 0;
5140
5141 /* Clear throughput estimates and radio type */
5142 ifp->if_estimated_up_bucket = 0;
5143 ifp->if_estimated_down_bucket = 0;
5144 ifp->if_radio_type = 0;
5145 ifp->if_radio_channel = 0;
5146
5147 VERIFY(ifp->if_delegated.ifp == NULL);
5148 VERIFY(ifp->if_delegated.type == 0);
5149 VERIFY(ifp->if_delegated.family == 0);
5150 VERIFY(ifp->if_delegated.subfamily == 0);
5151 VERIFY(ifp->if_delegated.expensive == 0);
5152 VERIFY(ifp->if_delegated.constrained == 0);
5153 VERIFY(ifp->if_delegated.ultra_constrained == 0);
5154
5155 VERIFY(ifp->if_agentids == NULL);
5156 VERIFY(ifp->if_agentcount == 0);
5157
5158 /* Reset interface state */
5159 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5160 ifp->if_interface_state.valid_bitmask |=
5161 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5162 ifp->if_interface_state.interface_availability =
5163 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5164
5165 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5166 if (ifp == lo_ifp) {
5167 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5168 ifp->if_interface_state.valid_bitmask |=
5169 IF_INTERFACE_STATE_LQM_STATE_VALID;
5170 } else {
5171 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5172 }
5173
5174 /*
5175 * Built-in Cyclops always on policy for WiFi infra
5176 */
5177 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5178 errno_t error;
5179
5180 error = if_set_qosmarking_mode(ifp,
5181 IFRTYPE_QOSMARKING_FASTLANE);
5182 if (error != 0) {
5183 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
5184 __func__, ifp->if_xname, error);
5185 } else {
5186 if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5187 #if (DEVELOPMENT || DEBUG)
5188 DLIL_PRINTF("%s fastlane enabled on %s\n",
5189 __func__, ifp->if_xname);
5190 #endif /* (DEVELOPMENT || DEBUG) */
5191 }
5192 }
5193
5194 ifnet_lock_done(ifp);
5195 ifnet_head_done();
5196
5197 #if SKYWALK
5198 netif_compat = dlil_attach_netif_compat_nexus(ifp, &nexus_netif);
5199 #endif /* SKYWALK */
5200
5201 lck_mtx_lock(&ifp->if_cached_route_lock);
5202 /* Enable forwarding cached route */
5203 ifp->if_fwd_cacheok = 1;
5204 /* Clean up any existing cached routes */
5205 ROUTE_RELEASE(&ifp->if_fwd_route);
5206 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5207 ROUTE_RELEASE(&ifp->if_src_route);
5208 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5209 ROUTE_RELEASE(&ifp->if_src_route6);
5210 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5211 lck_mtx_unlock(&ifp->if_cached_route_lock);
5212
5213 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5214
5215 /*
5216 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5217 * and trees; do this before the ifnet is marked as attached.
5218 * The ifnet keeps the reference to the info structures even after
5219 * the ifnet is detached, since the network-layer records still
5220 * refer to the info structures even after that. This also
5221 * makes it possible for them to still function after the ifnet
5222 * is recycled or reattached.
5223 */
5224 #if INET
5225 if (IGMP_IFINFO(ifp) == NULL) {
5226 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
5227 VERIFY(IGMP_IFINFO(ifp) != NULL);
5228 } else {
5229 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5230 igmp_domifreattach(IGMP_IFINFO(ifp));
5231 }
5232 #endif /* INET */
5233 if (MLD_IFINFO(ifp) == NULL) {
5234 MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
5235 VERIFY(MLD_IFINFO(ifp) != NULL);
5236 } else {
5237 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5238 mld_domifreattach(MLD_IFINFO(ifp));
5239 }
5240
5241 VERIFY(ifp->if_data_threshold == 0);
5242 VERIFY(ifp->if_dt_tcall != NULL);
5243
5244 /*
5245 * Wait for the created kernel threads for I/O to get
5246 * scheduled and run at least once before we proceed
5247 * to mark interface as attached.
5248 */
5249 lck_mtx_lock(&ifp->if_ref_lock);
5250 while (ifp->if_threads_pending != 0) {
5251 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
5252 "interface %s to get scheduled at least once.\n",
5253 __func__, ifp->if_xname);
5254 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
5255 __func__, NULL);
5256 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
5257 }
5258 lck_mtx_unlock(&ifp->if_ref_lock);
5259 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
5260 "at least once. Proceeding.\n", __func__, ifp->if_xname);
5261
5262 /* Final mark this ifnet as attached. */
5263 ifnet_lock_exclusive(ifp);
5264 lck_mtx_lock_spin(&ifp->if_ref_lock);
5265 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
5266 os_ref_init(&ifp->if_refio, &if_refiogrp);
5267 os_ref_init_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp, 0);
5268 lck_mtx_unlock(&ifp->if_ref_lock);
5269 if (net_rtref) {
5270 /* boot-args override; enable idle notification */
5271 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5272 IFRF_IDLE_NOTIFY);
5273 } else {
5274 /* apply previous request(s) to set the idle flags, if any */
5275 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5276 ifp->if_idle_new_flags_mask);
5277 }
5278 #if SKYWALK
5279 /* the interface is fully attached; let the nexus adapter know */
5280 if (netif_compat || dlil_is_native_netif_nexus(ifp)) {
5281 if (netif_compat) {
5282 if (sk_netif_compat_txmodel ==
5283 NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI) {
5284 ifnet_enqueue_multi_setup(ifp,
5285 sk_tx_delay_qlen, sk_tx_delay_timeout);
5286 }
5287 ifp->if_nx_netif = nexus_netif;
5288 }
5289 ifp->if_na_ops->ni_finalize(ifp->if_na, ifp);
5290 }
5291 #endif /* SKYWALK */
5292 ifnet_lock_done(ifp);
5293 dlil_if_unlock();
5294
5295 #if PF
5296 /*
5297 * Attach packet filter to this interface, if enabled.
5298 */
5299 pf_ifnet_hook(ifp, 1);
5300 #endif /* PF */
5301
5302 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0, FALSE);
5303
5304 os_log(OS_LOG_DEFAULT, "%s: attached%s\n", if_name(ifp),
5305 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5306 return 0;
5307 }
5308
5309 static void
if_purgeaddrs(struct ifnet * ifp)5310 if_purgeaddrs(struct ifnet *ifp)
5311 {
5312 #if INET
5313 in_purgeaddrs(ifp);
5314 #endif /* INET */
5315 in6_purgeaddrs(ifp);
5316 }
5317
5318 errno_t
ifnet_detach(ifnet_t ifp)5319 ifnet_detach(ifnet_t ifp)
5320 {
5321 ifnet_ref_t delegated_ifp;
5322 struct nd_ifinfo *ndi = NULL;
5323
5324 if (ifp == NULL) {
5325 return EINVAL;
5326 }
5327
5328 ndi = ND_IFINFO(ifp);
5329 if (NULL != ndi) {
5330 ndi->cga_initialized = FALSE;
5331 }
5332 os_log(OS_LOG_DEFAULT, "%s detaching", if_name(ifp));
5333
5334 /* Mark the interface down */
5335 if_down(ifp);
5336
5337 /*
5338 * IMPORTANT NOTE
5339 *
5340 * Any field in the ifnet that relies on IF_FULLY_ATTACHED()
5341 * or equivalently, ifnet_get_ioref(ifp, 1), can't be modified
5342 * until after we've waited for all I/O references to drain
5343 * in ifnet_detach_final().
5344 */
5345
5346 ifnet_head_lock_exclusive();
5347 ifnet_lock_exclusive(ifp);
5348
5349 if (ifp->if_output_netem != NULL) {
5350 netem_destroy(ifp->if_output_netem);
5351 ifp->if_output_netem = NULL;
5352 }
5353
5354 /*
5355 * Check to see if this interface has previously triggered
5356 * aggressive protocol draining; if so, decrement the global
5357 * refcnt and clear PR_AGGDRAIN on the route domain if
5358 * there are no more of such an interface around.
5359 */
5360 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5361
5362 lck_mtx_lock_spin(&ifp->if_ref_lock);
5363 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5364 lck_mtx_unlock(&ifp->if_ref_lock);
5365 ifnet_lock_done(ifp);
5366 ifnet_head_done();
5367 return EINVAL;
5368 } else if (ifp->if_refflags & IFRF_DETACHING) {
5369 /* Interface has already been detached */
5370 lck_mtx_unlock(&ifp->if_ref_lock);
5371 ifnet_lock_done(ifp);
5372 ifnet_head_done();
5373 return ENXIO;
5374 }
5375 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
5376 /* Indicate this interface is being detached */
5377 ifp->if_refflags &= ~IFRF_ATTACHED;
5378 ifp->if_refflags |= IFRF_DETACHING;
5379 lck_mtx_unlock(&ifp->if_ref_lock);
5380
5381 /* clean up flow control entry object if there's any */
5382 if (ifp->if_eflags & IFEF_TXSTART) {
5383 ifnet_flowadv(ifp->if_flowhash);
5384 }
5385
5386 /* Reset CLAT46 flag */
5387 if_clear_eflags(ifp, IFEF_CLAT46);
5388
5389 /*
5390 * We do not reset the TCP keep alive counters in case
5391 * a TCP connection stays connection after the interface
5392 * went down
5393 */
5394 if (ifp->if_tcp_kao_cnt > 0) {
5395 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
5396 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
5397 }
5398 ifp->if_tcp_kao_max = 0;
5399
5400 /*
5401 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5402 * no longer be visible during lookups from this point.
5403 */
5404 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5405 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5406 ifp->if_link.tqe_next = NULL;
5407 ifp->if_link.tqe_prev = NULL;
5408 if (ifp->if_ordered_link.tqe_next != NULL ||
5409 ifp->if_ordered_link.tqe_prev != NULL) {
5410 ifnet_remove_from_ordered_list(ifp);
5411 }
5412 ifindex2ifnet[ifp->if_index] = NULL;
5413
5414 /* 18717626 - reset router mode */
5415 if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
5416 ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
5417
5418 /* Record detach PC stacktrace */
5419 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5420
5421 /* Clear logging parameters */
5422 bzero(&ifp->if_log, sizeof(ifp->if_log));
5423
5424 /* Clear delegated interface info (reference released below) */
5425 delegated_ifp = ifp->if_delegated.ifp;
5426 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
5427
5428 /* Reset interface state */
5429 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5430
5431 /*
5432 * Increment the generation count on interface deletion
5433 */
5434 ifp->if_creation_generation_id = os_atomic_inc(&if_creation_generation_count, relaxed);
5435
5436 ifnet_lock_done(ifp);
5437 ifnet_head_done();
5438
5439 /* Release reference held on the delegated interface */
5440 if (delegated_ifp != NULL) {
5441 ifnet_release(delegated_ifp);
5442 }
5443
5444 /* Reset Link Quality Metric (unless loopback [lo0]) */
5445 if (ifp != lo_ifp) {
5446 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5447 }
5448
5449 /* Force reset link heuristics */
5450 if (ifp->if_link_heuristics_tcall != NULL) {
5451 thread_call_cancel_wait(ifp->if_link_heuristics_tcall);
5452 thread_call_free(ifp->if_link_heuristics_tcall);
5453 ifp->if_link_heuristics_tcall = NULL;
5454 }
5455 if_clear_xflags(ifp, IFXF_LINK_HEURISTICS);
5456
5457 /* Reset TCP local statistics */
5458 if (ifp->if_tcp_stat != NULL) {
5459 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5460 }
5461
5462 /* Reset UDP local statistics */
5463 if (ifp->if_udp_stat != NULL) {
5464 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5465 }
5466
5467 /* Reset ifnet IPv4 stats */
5468 if (ifp->if_ipv4_stat != NULL) {
5469 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5470 }
5471
5472 /* Reset ifnet IPv6 stats */
5473 if (ifp->if_ipv6_stat != NULL) {
5474 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5475 }
5476
5477 /* Release memory held for interface link status report */
5478 if (ifp->if_link_status != NULL) {
5479 kfree_type(struct if_link_status, ifp->if_link_status);
5480 ifp->if_link_status = NULL;
5481 }
5482
5483 /* Disable forwarding cached route */
5484 lck_mtx_lock(&ifp->if_cached_route_lock);
5485 ifp->if_fwd_cacheok = 0;
5486 lck_mtx_unlock(&ifp->if_cached_route_lock);
5487
5488 /* Disable data threshold and wait for any pending event posting */
5489 ifp->if_data_threshold = 0;
5490 VERIFY(ifp->if_dt_tcall != NULL);
5491 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
5492
5493 /*
5494 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5495 * references to the info structures and leave them attached to
5496 * this ifnet.
5497 */
5498 #if INET
5499 igmp_domifdetach(ifp);
5500 #endif /* INET */
5501 mld_domifdetach(ifp);
5502
5503 #if SKYWALK
5504 /* Clean up any netns tokens still pointing to to this ifnet */
5505 netns_ifnet_detach(ifp);
5506 #endif /* SKYWALK */
5507 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0, FALSE);
5508
5509 /* Let worker thread take care of the rest, to avoid reentrancy */
5510 dlil_if_lock();
5511 ifnet_detaching_enqueue(ifp);
5512 dlil_if_unlock();
5513
5514 return 0;
5515 }
5516
5517 static void
ifnet_detaching_enqueue(struct ifnet * ifp)5518 ifnet_detaching_enqueue(struct ifnet *ifp)
5519 {
5520 dlil_if_lock_assert();
5521
5522 ++ifnet_detaching_cnt;
5523 VERIFY(ifnet_detaching_cnt != 0);
5524 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5525 wakeup((caddr_t)&ifnet_delayed_run);
5526 }
5527
5528 static struct ifnet *
ifnet_detaching_dequeue(void)5529 ifnet_detaching_dequeue(void)
5530 {
5531 ifnet_ref_t ifp;
5532
5533 dlil_if_lock_assert();
5534
5535 ifp = TAILQ_FIRST(&ifnet_detaching_head);
5536 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5537 if (ifp != NULL) {
5538 VERIFY(ifnet_detaching_cnt != 0);
5539 --ifnet_detaching_cnt;
5540 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5541 ifp->if_detaching_link.tqe_next = NULL;
5542 ifp->if_detaching_link.tqe_prev = NULL;
5543 }
5544 return ifp;
5545 }
5546
5547 __attribute__((noreturn))
5548 static void
ifnet_detacher_thread_cont(void * v,wait_result_t wres)5549 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
5550 {
5551 #pragma unused(v, wres)
5552 ifnet_ref_t ifp;
5553
5554 dlil_if_lock();
5555 if (__improbable(ifnet_detaching_embryonic)) {
5556 ifnet_detaching_embryonic = FALSE;
5557 /* there's no lock ordering constrain so OK to do this here */
5558 dlil_decr_pending_thread_count();
5559 }
5560
5561 for (;;) {
5562 dlil_if_lock_assert();
5563
5564 if (ifnet_detaching_cnt == 0) {
5565 break;
5566 }
5567
5568 net_update_uptime();
5569
5570 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5571
5572 /* Take care of detaching ifnet */
5573 ifp = ifnet_detaching_dequeue();
5574 if (ifp != NULL) {
5575 dlil_if_unlock();
5576 ifnet_detach_final(ifp);
5577 dlil_if_lock();
5578 }
5579 }
5580
5581 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5582 dlil_if_unlock();
5583 (void) thread_block(ifnet_detacher_thread_cont);
5584
5585 VERIFY(0); /* we should never get here */
5586 /* NOTREACHED */
5587 __builtin_unreachable();
5588 }
5589
5590 __dead2
5591 static void
ifnet_detacher_thread_func(void * v,wait_result_t w)5592 ifnet_detacher_thread_func(void *v, wait_result_t w)
5593 {
5594 #pragma unused(v, w)
5595 dlil_if_lock();
5596 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5597 ifnet_detaching_embryonic = TRUE;
5598 /* wake up once to get out of embryonic state */
5599 wakeup((caddr_t)&ifnet_delayed_run);
5600 dlil_if_unlock();
5601 (void) thread_block(ifnet_detacher_thread_cont);
5602 VERIFY(0);
5603 /* NOTREACHED */
5604 __builtin_unreachable();
5605 }
5606
5607 static void
ifnet_detach_final(struct ifnet * ifp)5608 ifnet_detach_final(struct ifnet *ifp)
5609 {
5610 struct ifnet_filter *filter, *filter_next;
5611 struct dlil_ifnet *dlifp;
5612 struct ifnet_filter_head fhead;
5613 struct dlil_threading_info *inp;
5614 struct ifaddr *ifa;
5615 ifnet_detached_func if_free;
5616 int i;
5617
5618 /* Let BPF know we're detaching */
5619 bpfdetach(ifp);
5620
5621 #if SKYWALK
5622 dlil_netif_detach_notify(ifp);
5623 /*
5624 * Wait for the datapath to quiesce before tearing down
5625 * netif/flowswitch nexuses.
5626 */
5627 dlil_quiesce_and_detach_nexuses(ifp);
5628 #endif /* SKYWALK */
5629
5630 lck_mtx_lock(&ifp->if_ref_lock);
5631 if (!(ifp->if_refflags & IFRF_DETACHING)) {
5632 panic("%s: flags mismatch (detaching not set) ifp=%p",
5633 __func__, ifp);
5634 /* NOTREACHED */
5635 }
5636
5637 /*
5638 * Wait until the existing IO references get released
5639 * before we proceed with ifnet_detach. This is not a
5640 * common case, so block without using a continuation.
5641 */
5642 if (os_ref_release_relaxed(&ifp->if_refio) > 0) {
5643 bool waited = false;
5644
5645 while (os_ref_get_count(&ifp->if_refio) > 0) {
5646 waited = true;
5647 DLIL_PRINTF("%s: %s waiting for IO references to drain\n",
5648 __func__, if_name(ifp));
5649 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5650 (PZERO - 1), "ifnet_ioref_wait", NULL);
5651 }
5652 if (waited) {
5653 DLIL_PRINTF("%s: %s IO references drained\n",
5654 __func__, if_name(ifp));
5655 }
5656 }
5657 os_ref_release_last_mask(&ifp->if_datamov, IF_DATAMOV_BITS, &if_datamovgrp);
5658 VERIFY(ifp->if_suspend == 0);
5659 ifp->if_refflags &= ~IFRF_READY;
5660 lck_mtx_unlock(&ifp->if_ref_lock);
5661
5662 #if SKYWALK
5663 VERIFY(LIST_EMPTY(&ifp->if_netns_tokens));
5664 #endif /* SKYWALK */
5665 /* Drain and destroy send queue */
5666 ifclassq_teardown(ifp->if_snd);
5667
5668 /* Detach interface filters */
5669 lck_mtx_lock(&ifp->if_flt_lock);
5670 if_flt_monitor_enter(ifp);
5671
5672 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
5673 fhead = ifp->if_flt_head;
5674 TAILQ_INIT(&ifp->if_flt_head);
5675
5676 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5677 filter_next = TAILQ_NEXT(filter, filt_next);
5678 lck_mtx_unlock(&ifp->if_flt_lock);
5679
5680 dlil_detach_filter_internal(filter, 1);
5681 lck_mtx_lock(&ifp->if_flt_lock);
5682 }
5683 if_flt_monitor_leave(ifp);
5684 lck_mtx_unlock(&ifp->if_flt_lock);
5685
5686 /* Tell upper layers to drop their network addresses */
5687 if_purgeaddrs(ifp);
5688
5689 ifnet_lock_exclusive(ifp);
5690
5691 /* Clear agent IDs */
5692 if (ifp->if_agentids != NULL) {
5693 kfree_data_sized_by(ifp->if_agentids, ifp->if_agentcount);
5694 }
5695
5696 bzero(&ifp->if_nx_netif, sizeof(ifp->if_nx_netif));
5697 bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
5698
5699 /* Unplumb all protocols */
5700 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5701 struct if_proto *proto;
5702
5703 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5704 while (proto != NULL) {
5705 protocol_family_t family = proto->protocol_family;
5706 ifnet_lock_done(ifp);
5707 proto_unplumb(family, ifp);
5708 ifnet_lock_exclusive(ifp);
5709 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5710 }
5711 /* There should not be any protocols left */
5712 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5713 }
5714 kfree_type_counted_by(struct proto_hash_entry, ifp->if_proto_hash_count, ifp->if_proto_hash);
5715
5716 /* Detach (permanent) link address from if_addrhead */
5717 ifa = TAILQ_FIRST(&ifp->if_addrhead);
5718 IFA_LOCK(ifa);
5719 if_detach_link_ifa(ifp, ifa);
5720 IFA_UNLOCK(ifa);
5721
5722 /* This interface should not be on {ifnet_head,detaching} */
5723 VERIFY(ifp->if_link.tqe_next == NULL);
5724 VERIFY(ifp->if_link.tqe_prev == NULL);
5725 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5726 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5727 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
5728 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
5729
5730 /* The slot should have been emptied */
5731 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5732
5733 /* There should not be any addresses left */
5734 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5735
5736 /*
5737 * Signal the starter thread to terminate itself, and wait until
5738 * it has exited.
5739 */
5740 if (ifp->if_start_thread != THREAD_NULL) {
5741 lck_mtx_lock_spin(&ifp->if_start_lock);
5742 ifp->if_start_flags |= IFSF_TERMINATING;
5743 wakeup_one((caddr_t)&ifp->if_start_thread);
5744 lck_mtx_unlock(&ifp->if_start_lock);
5745
5746 /* wait for starter thread to terminate */
5747 lck_mtx_lock(&ifp->if_start_lock);
5748 while (ifp->if_start_thread != THREAD_NULL) {
5749 if (dlil_verbose) {
5750 DLIL_PRINTF("%s: waiting for %s starter thread to terminate\n",
5751 __func__,
5752 if_name(ifp));
5753 }
5754 (void) msleep(&ifp->if_start_thread,
5755 &ifp->if_start_lock, (PZERO - 1),
5756 "ifnet_start_thread_exit", NULL);
5757 }
5758 lck_mtx_unlock(&ifp->if_start_lock);
5759 if (dlil_verbose) {
5760 DLIL_PRINTF("%s: %s starter thread termination complete",
5761 __func__, if_name(ifp));
5762 }
5763 }
5764
5765 /*
5766 * Signal the poller thread to terminate itself, and wait until
5767 * it has exited.
5768 */
5769 if (ifp->if_poll_thread != THREAD_NULL) {
5770 #if SKYWALK
5771 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5772 #endif /* SKYWALK */
5773 lck_mtx_lock_spin(&ifp->if_poll_lock);
5774 ifp->if_poll_flags |= IF_POLLF_TERMINATING;
5775 wakeup_one((caddr_t)&ifp->if_poll_thread);
5776 lck_mtx_unlock(&ifp->if_poll_lock);
5777
5778 /* wait for poller thread to terminate */
5779 lck_mtx_lock(&ifp->if_poll_lock);
5780 while (ifp->if_poll_thread != THREAD_NULL) {
5781 if (dlil_verbose) {
5782 DLIL_PRINTF("%s: waiting for %s poller thread to terminate\n",
5783 __func__,
5784 if_name(ifp));
5785 }
5786 (void) msleep(&ifp->if_poll_thread,
5787 &ifp->if_poll_lock, (PZERO - 1),
5788 "ifnet_poll_thread_exit", NULL);
5789 }
5790 lck_mtx_unlock(&ifp->if_poll_lock);
5791 if (dlil_verbose) {
5792 DLIL_PRINTF("%s: %s poller thread termination complete\n",
5793 __func__, if_name(ifp));
5794 }
5795 }
5796
5797 /*
5798 * If thread affinity was set for the workloop thread, we will need
5799 * to tear down the affinity and release the extra reference count
5800 * taken at attach time. Does not apply to lo0 or other interfaces
5801 * without dedicated input threads.
5802 */
5803 if ((inp = ifp->if_inp) != NULL) {
5804 VERIFY(inp != dlil_main_input_thread);
5805
5806 if (inp->dlth_affinity) {
5807 struct thread *__single tp, *__single wtp, *__single ptp;
5808
5809 lck_mtx_lock_spin(&inp->dlth_lock);
5810 wtp = inp->dlth_driver_thread;
5811 inp->dlth_driver_thread = THREAD_NULL;
5812 ptp = inp->dlth_poller_thread;
5813 inp->dlth_poller_thread = THREAD_NULL;
5814 ASSERT(inp->dlth_thread != THREAD_NULL);
5815 tp = inp->dlth_thread; /* don't nullify now */
5816 inp->dlth_affinity_tag = 0;
5817 inp->dlth_affinity = FALSE;
5818 lck_mtx_unlock(&inp->dlth_lock);
5819
5820 /* Tear down poll thread affinity */
5821 if (ptp != NULL) {
5822 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5823 VERIFY(ifp->if_xflags & IFXF_LEGACY);
5824 (void) dlil_affinity_set(ptp,
5825 THREAD_AFFINITY_TAG_NULL);
5826 thread_deallocate(ptp);
5827 }
5828
5829 /* Tear down workloop thread affinity */
5830 if (wtp != NULL) {
5831 (void) dlil_affinity_set(wtp,
5832 THREAD_AFFINITY_TAG_NULL);
5833 thread_deallocate(wtp);
5834 }
5835
5836 /* Tear down DLIL input thread affinity */
5837 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5838 thread_deallocate(tp);
5839 }
5840
5841 /* disassociate ifp DLIL input thread */
5842 ifp->if_inp = NULL;
5843
5844 /* if the worker thread was created, tell it to terminate */
5845 if (inp->dlth_thread != THREAD_NULL) {
5846 lck_mtx_lock_spin(&inp->dlth_lock);
5847 inp->dlth_flags |= DLIL_INPUT_TERMINATE;
5848 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
5849 wakeup_one((caddr_t)&inp->dlth_flags);
5850 }
5851 lck_mtx_unlock(&inp->dlth_lock);
5852 ifnet_lock_done(ifp);
5853
5854 /* wait for the input thread to terminate */
5855 lck_mtx_lock_spin(&inp->dlth_lock);
5856 while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
5857 == 0) {
5858 (void) msleep(&inp->dlth_flags, &inp->dlth_lock,
5859 (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
5860 }
5861 lck_mtx_unlock(&inp->dlth_lock);
5862 ifnet_lock_exclusive(ifp);
5863 }
5864
5865 /* clean-up input thread state */
5866 dlil_clean_threading_info(inp);
5867 /* clean-up poll parameters */
5868 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5869 dlil_reset_rxpoll_params(ifp);
5870 }
5871
5872 /* The driver might unload, so point these to ourselves */
5873 if_free = ifp->if_free;
5874 ifp->if_output_dlil = ifp_if_output;
5875 ifp->if_output = ifp_if_output;
5876 ifp->if_pre_enqueue = ifp_if_output;
5877 ifp->if_start = ifp_if_start;
5878 ifp->if_output_ctl = ifp_if_ctl;
5879 ifp->if_input_dlil = ifp_if_input;
5880 ifp->if_input_poll = ifp_if_input_poll;
5881 ifp->if_input_ctl = ifp_if_ctl;
5882 ifp->if_ioctl = ifp_if_ioctl;
5883 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
5884 ifp->if_free = ifp_if_free;
5885 ifp->if_demux = ifp_if_demux;
5886 ifp->if_event = ifp_if_event;
5887 ifp->if_framer_legacy = ifp_if_framer;
5888 ifp->if_framer = ifp_if_framer_extended;
5889 ifp->if_add_proto = ifp_if_add_proto;
5890 ifp->if_del_proto = ifp_if_del_proto;
5891 ifp->if_check_multi = ifp_if_check_multi;
5892
5893 /* wipe out interface description */
5894 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5895 ifp->if_desc.ifd_len = 0;
5896 VERIFY(ifp->if_desc.ifd_desc != NULL);
5897 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
5898
5899 /* there shouldn't be any delegation by now */
5900 VERIFY(ifp->if_delegated.ifp == NULL);
5901 VERIFY(ifp->if_delegated.type == 0);
5902 VERIFY(ifp->if_delegated.family == 0);
5903 VERIFY(ifp->if_delegated.subfamily == 0);
5904 VERIFY(ifp->if_delegated.expensive == 0);
5905 VERIFY(ifp->if_delegated.constrained == 0);
5906 VERIFY(ifp->if_delegated.ultra_constrained == 0);
5907
5908 /* QoS marking get cleared */
5909 if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5910 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
5911
5912 #if SKYWALK
5913 /* the nexus destructor is responsible for clearing these */
5914 VERIFY(ifp->if_na_ops == NULL);
5915 VERIFY(ifp->if_na == NULL);
5916 #endif /* SKYWALK */
5917
5918 /* interface could come up with different hwassist next time */
5919 ifp->if_hwassist = 0;
5920 ifp->if_capenable = 0;
5921
5922 /* promiscuous/allmulti counts need to start at zero again */
5923 ifp->if_pcount = 0;
5924 ifp->if_amcount = 0;
5925 ifp->if_flags &= ~(IFF_PROMISC | IFF_ALLMULTI);
5926
5927 ifnet_lock_done(ifp);
5928
5929 #if PF
5930 /*
5931 * Detach this interface from packet filter, if enabled.
5932 */
5933 pf_ifnet_hook(ifp, 0);
5934 #endif /* PF */
5935
5936 /* Filter list should be empty */
5937 lck_mtx_lock_spin(&ifp->if_flt_lock);
5938 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
5939 VERIFY(ifp->if_flt_busy == 0);
5940 VERIFY(ifp->if_flt_waiters == 0);
5941 VERIFY(ifp->if_flt_non_os_count == 0);
5942 VERIFY(ifp->if_flt_no_tso_count == 0);
5943 lck_mtx_unlock(&ifp->if_flt_lock);
5944
5945 /* Last chance to drain send queue */
5946 if_qflush(ifp, ifp->if_snd);
5947
5948 /* Last chance to cleanup any cached route */
5949 lck_mtx_lock(&ifp->if_cached_route_lock);
5950 VERIFY(!ifp->if_fwd_cacheok);
5951 ROUTE_RELEASE(&ifp->if_fwd_route);
5952 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5953 ROUTE_RELEASE(&ifp->if_src_route);
5954 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5955 ROUTE_RELEASE(&ifp->if_src_route6);
5956 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5957 lck_mtx_unlock(&ifp->if_cached_route_lock);
5958
5959 /* Ignore any pending data threshold as the interface is anyways gone */
5960 ifp->if_data_threshold = 0;
5961
5962 VERIFY(ifp->if_dt_tcall != NULL);
5963 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
5964
5965 ifnet_llreach_ifdetach(ifp);
5966
5967 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0, FALSE);
5968
5969 /*
5970 * Finally, mark this ifnet as detached.
5971 */
5972 os_log(OS_LOG_DEFAULT, "%s detached", if_name(ifp));
5973
5974 lck_mtx_lock_spin(&ifp->if_ref_lock);
5975 if (!(ifp->if_refflags & IFRF_DETACHING)) {
5976 panic("%s: flags mismatch (detaching not set) ifp=%p",
5977 __func__, ifp);
5978 /* NOTREACHED */
5979 }
5980 ifp->if_refflags &= ~IFRF_DETACHING;
5981 lck_mtx_unlock(&ifp->if_ref_lock);
5982 if (if_free != NULL) {
5983 if_free(ifp);
5984 }
5985
5986 ifclassq_release(&ifp->if_snd);
5987
5988 /* Remove (permanent) link address from ifnet_addrs[] */
5989 ifnet_head_lock_exclusive();
5990 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5991 ifa_remref(ifa);
5992 ifnet_addrs[ifp->if_index - 1] = NULL;
5993 ifnet_head_done();
5994
5995 /* we're fully detached, clear the "in use" bit */
5996 dlifp = (struct dlil_ifnet *)ifp;
5997 lck_mtx_lock(&dlifp->dl_if_lock);
5998 ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
5999 dlifp->dl_if_flags &= ~DLIF_INUSE;
6000 lck_mtx_unlock(&dlifp->dl_if_lock);
6001
6002 /* Release reference held during ifnet attach */
6003 ifnet_release(ifp);
6004 }
6005
6006 errno_t
ifp_if_output(struct ifnet * ifp,struct mbuf * m)6007 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6008 {
6009 #pragma unused(ifp)
6010 m_freem_list(m);
6011 return 0;
6012 }
6013
6014 void
ifp_if_start(struct ifnet * ifp)6015 ifp_if_start(struct ifnet *ifp)
6016 {
6017 ifnet_purge(ifp);
6018 }
6019
6020 static errno_t
ifp_if_input(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)6021 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6022 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6023 boolean_t poll, struct thread *tp)
6024 {
6025 #pragma unused(ifp, m_tail, s, poll, tp)
6026 m_freem_list(m_head);
6027 return ENXIO;
6028 }
6029
6030 static void
ifp_if_input_poll(struct ifnet * ifp,u_int32_t flags,u_int32_t max_cnt,struct mbuf ** m_head,struct mbuf ** m_tail,u_int32_t * cnt,u_int32_t * len)6031 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6032 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6033 {
6034 #pragma unused(ifp, flags, max_cnt)
6035 if (m_head != NULL) {
6036 *m_head = NULL;
6037 }
6038 if (m_tail != NULL) {
6039 *m_tail = NULL;
6040 }
6041 if (cnt != NULL) {
6042 *cnt = 0;
6043 }
6044 if (len != NULL) {
6045 *len = 0;
6046 }
6047 }
6048
6049 static errno_t
ifp_if_ctl(struct ifnet * ifp,ifnet_ctl_cmd_t cmd,u_int32_t arglen,void * arg)6050 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6051 {
6052 #pragma unused(ifp, cmd, arglen, arg)
6053 return EOPNOTSUPP;
6054 }
6055
6056 static errno_t
ifp_if_demux(struct ifnet * ifp,struct mbuf * m,char * fh,protocol_family_t * pf)6057 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6058 {
6059 #pragma unused(ifp, fh, pf)
6060 m_freem(m);
6061 return EJUSTRETURN;
6062 }
6063
6064 static errno_t
ifp_if_add_proto(struct ifnet * ifp,protocol_family_t pf,const struct ifnet_demux_desc * da,u_int32_t dc)6065 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6066 const struct ifnet_demux_desc *da, u_int32_t dc)
6067 {
6068 #pragma unused(ifp, pf, da, dc)
6069 return EINVAL;
6070 }
6071
6072 static errno_t
ifp_if_del_proto(struct ifnet * ifp,protocol_family_t pf)6073 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6074 {
6075 #pragma unused(ifp, pf)
6076 return EINVAL;
6077 }
6078
6079 static errno_t
ifp_if_check_multi(struct ifnet * ifp,const struct sockaddr * sa)6080 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6081 {
6082 #pragma unused(ifp, sa)
6083 return EOPNOTSUPP;
6084 }
6085
6086 #if !XNU_TARGET_OS_OSX
6087 static errno_t
ifp_if_framer(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6088 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6089 const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t,
6090 u_int32_t *pre, u_int32_t *post)
6091 #else /* XNU_TARGET_OS_OSX */
6092 static errno_t
6093 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6094 const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t)
6095 #endif /* XNU_TARGET_OS_OSX */
6096 {
6097 #pragma unused(ifp, m, sa, ll, t)
6098 #if !XNU_TARGET_OS_OSX
6099 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
6100 #else /* XNU_TARGET_OS_OSX */
6101 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
6102 #endif /* XNU_TARGET_OS_OSX */
6103 }
6104
6105 static errno_t
ifp_if_framer_extended(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6106 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6107 const struct sockaddr *sa,
6108 IFNET_LLADDR_T ll,
6109 IFNET_FRAME_TYPE_T t,
6110 u_int32_t *pre, u_int32_t *post)
6111 {
6112 #pragma unused(ifp, sa, ll, t)
6113 m_freem(*m);
6114 *m = NULL;
6115
6116 if (pre != NULL) {
6117 *pre = 0;
6118 }
6119 if (post != NULL) {
6120 *post = 0;
6121 }
6122
6123 return EJUSTRETURN;
6124 }
6125
6126 errno_t
ifp_if_ioctl(struct ifnet * ifp,unsigned long cmd,void * arg)6127 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6128 {
6129 #pragma unused(ifp, cmd, arg)
6130 return EOPNOTSUPP;
6131 }
6132
6133 static errno_t
ifp_if_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode tm,bpf_packet_func f)6134 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6135 {
6136 #pragma unused(ifp, tm, f)
6137 /* XXX not sure what to do here */
6138 return 0;
6139 }
6140
6141 static void
ifp_if_free(struct ifnet * ifp)6142 ifp_if_free(struct ifnet *ifp)
6143 {
6144 #pragma unused(ifp)
6145 }
6146
6147 static void
ifp_if_event(struct ifnet * ifp,const struct kev_msg * e)6148 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6149 {
6150 #pragma unused(ifp, e)
6151 }
6152
6153 __private_extern__ void
dlil_proto_unplumb_all(struct ifnet * ifp)6154 dlil_proto_unplumb_all(struct ifnet *ifp)
6155 {
6156 /*
6157 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6158 * each bucket contains exactly one entry; PF_VLAN does not need an
6159 * explicit unplumb.
6160 *
6161 * if_proto_hash[3] is for other protocols; we expect anything
6162 * in this bucket to respond to the DETACHING event (which would
6163 * have happened by now) and do the unplumb then.
6164 */
6165 (void) proto_unplumb(PF_INET, ifp);
6166 (void) proto_unplumb(PF_INET6, ifp);
6167 }
6168
6169 static void
ifp_src_route_copyout(struct ifnet * ifp,struct route * dst)6170 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6171 {
6172 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6173 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6174
6175 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6176
6177 lck_mtx_unlock(&ifp->if_cached_route_lock);
6178 }
6179
6180 static void
ifp_src_route_copyin(struct ifnet * ifp,struct route * src)6181 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6182 {
6183 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6184 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6185
6186 if (ifp->if_fwd_cacheok) {
6187 route_copyin(src, &ifp->if_src_route, sizeof(*src));
6188 } else {
6189 ROUTE_RELEASE(src);
6190 }
6191 lck_mtx_unlock(&ifp->if_cached_route_lock);
6192 }
6193
6194 static void
ifp_src_route6_copyout(struct ifnet * ifp,struct route_in6 * dst)6195 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6196 {
6197 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6198 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6199
6200 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6201 sizeof(*dst));
6202
6203 lck_mtx_unlock(&ifp->if_cached_route_lock);
6204 }
6205
6206 static void
ifp_src_route6_copyin(struct ifnet * ifp,struct route_in6 * src)6207 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6208 {
6209 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6210 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6211
6212 if (ifp->if_fwd_cacheok) {
6213 route_copyin((struct route *)src,
6214 (struct route *)&ifp->if_src_route6, sizeof(*src));
6215 } else {
6216 ROUTE_RELEASE(src);
6217 }
6218 lck_mtx_unlock(&ifp->if_cached_route_lock);
6219 }
6220
6221 struct rtentry *
ifnet_cached_rtlookup_inet(struct ifnet * ifp,struct in_addr src_ip)6222 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6223 {
6224 struct route src_rt;
6225 struct sockaddr_in *dst;
6226
6227 dst = SIN(&src_rt.ro_dst);
6228
6229 ifp_src_route_copyout(ifp, &src_rt);
6230
6231 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6232 ROUTE_RELEASE(&src_rt);
6233 if (dst->sin_family != AF_INET) {
6234 SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6235 dst->sin_len = sizeof(src_rt.ro_dst);
6236 dst->sin_family = AF_INET;
6237 }
6238 dst->sin_addr = src_ip;
6239
6240 VERIFY(src_rt.ro_rt == NULL);
6241 src_rt.ro_rt = rtalloc1_scoped(SA(dst),
6242 0, 0, ifp->if_index);
6243
6244 if (src_rt.ro_rt != NULL) {
6245 /* retain a ref, copyin consumes one */
6246 struct rtentry *rte = src_rt.ro_rt;
6247 RT_ADDREF(rte);
6248 ifp_src_route_copyin(ifp, &src_rt);
6249 src_rt.ro_rt = rte;
6250 }
6251 }
6252
6253 return src_rt.ro_rt;
6254 }
6255
6256 struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet * ifp,struct in6_addr * src_ip6)6257 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6258 {
6259 struct route_in6 src_rt;
6260
6261 ifp_src_route6_copyout(ifp, &src_rt);
6262
6263 if (ROUTE_UNUSABLE(&src_rt) ||
6264 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6265 ROUTE_RELEASE(&src_rt);
6266 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6267 SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6268 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6269 src_rt.ro_dst.sin6_family = AF_INET6;
6270 }
6271 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6272 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6273 sizeof(src_rt.ro_dst.sin6_addr));
6274
6275 if (src_rt.ro_rt == NULL) {
6276 src_rt.ro_rt = rtalloc1_scoped(
6277 SA(&src_rt.ro_dst), 0, 0,
6278 ifp->if_index);
6279
6280 if (src_rt.ro_rt != NULL) {
6281 /* retain a ref, copyin consumes one */
6282 struct rtentry *rte = src_rt.ro_rt;
6283 RT_ADDREF(rte);
6284 ifp_src_route6_copyin(ifp, &src_rt);
6285 src_rt.ro_rt = rte;
6286 }
6287 }
6288 }
6289
6290 return src_rt.ro_rt;
6291 }
6292
6293 void
if_lqm_update(struct ifnet * ifp,int lqm,int locked)6294 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6295 {
6296 struct kev_dl_link_quality_metric_data ev_lqm_data;
6297 uint64_t now, delta;
6298 int8_t old_lqm;
6299 bool need_necp_client_update;
6300
6301 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6302
6303 lqm = ifnet_lqm_normalize(lqm);
6304 if (lqm == IFNET_LQM_THRESH_ABORT) {
6305 os_atomic_or(&tcbinfo.ipi_flags, INPCBINFO_HANDLE_LQM_ABORT, relaxed);
6306 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
6307 }
6308
6309 /*
6310 * Take the lock if needed
6311 */
6312 if (!locked) {
6313 ifnet_lock_exclusive(ifp);
6314 }
6315
6316 if (lqm == ifp->if_interface_state.lqm_state &&
6317 (ifp->if_interface_state.valid_bitmask &
6318 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6319 /*
6320 * Release the lock if was not held by the caller
6321 */
6322 if (!locked) {
6323 ifnet_lock_done(ifp);
6324 }
6325 return; /* nothing to update */
6326 }
6327
6328 net_update_uptime();
6329 now = net_uptime_ms();
6330 ASSERT(now >= ifp->if_lqmstate_start_time);
6331 delta = now - ifp->if_lqmstate_start_time;
6332
6333 old_lqm = ifp->if_interface_state.lqm_state;
6334 switch (old_lqm) {
6335 case IFNET_LQM_THRESH_GOOD:
6336 ifp->if_lqm_good_time += delta;
6337 break;
6338 case IFNET_LQM_THRESH_POOR:
6339 ifp->if_lqm_poor_time += delta;
6340 break;
6341 case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6342 ifp->if_lqm_min_viable_time += delta;
6343 break;
6344 case IFNET_LQM_THRESH_BAD:
6345 ifp->if_lqm_bad_time += delta;
6346 break;
6347 default:
6348 break;
6349 }
6350 switch (lqm) {
6351 case IFNET_LQM_THRESH_GOOD:
6352 ifp->if_lqm_good_cnt += 1;
6353 break;
6354 case IFNET_LQM_THRESH_POOR:
6355 ifp->if_lqm_poor_cnt += 1;
6356 break;
6357 case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6358 ifp->if_lqm_min_viable_cnt += 1;
6359 break;
6360 case IFNET_LQM_THRESH_BAD:
6361 ifp->if_lqm_bad_cnt += 1;
6362 break;
6363 default:
6364 break;
6365 }
6366 ifp->if_lqmstate_start_time = now;
6367
6368 ifp->if_interface_state.valid_bitmask |=
6369 IF_INTERFACE_STATE_LQM_STATE_VALID;
6370 ifp->if_interface_state.lqm_state = (int8_t)lqm;
6371
6372 /*
6373 * Update the link heuristics
6374 */
6375 need_necp_client_update = if_update_link_heuristic(ifp);
6376
6377 /*
6378 * Don't want to hold the lock when issuing kernel events or calling NECP
6379 */
6380 ifnet_lock_done(ifp);
6381
6382 if (need_necp_client_update) {
6383 necp_update_all_clients_immediately_if_needed(true);
6384 }
6385
6386 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
6387 ev_lqm_data.link_quality_metric = lqm;
6388
6389 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6390 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data), FALSE);
6391
6392 /*
6393 * Reacquire the lock for the caller
6394 */
6395 if (locked) {
6396 ifnet_lock_exclusive(ifp);
6397 }
6398 }
6399
6400 static void
if_rrc_state_update(struct ifnet * ifp,unsigned int rrc_state)6401 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6402 {
6403 struct kev_dl_rrc_state kev;
6404
6405 if (rrc_state == ifp->if_interface_state.rrc_state &&
6406 (ifp->if_interface_state.valid_bitmask &
6407 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6408 return;
6409 }
6410
6411 ifp->if_interface_state.valid_bitmask |=
6412 IF_INTERFACE_STATE_RRC_STATE_VALID;
6413
6414 ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
6415
6416 /*
6417 * Don't want to hold the lock when issuing kernel events
6418 */
6419 ifnet_lock_done(ifp);
6420
6421 bzero(&kev, sizeof(struct kev_dl_rrc_state));
6422 kev.rrc_state = rrc_state;
6423
6424 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6425 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state), FALSE);
6426
6427 ifnet_lock_exclusive(ifp);
6428 }
6429
6430 errno_t
if_state_update(struct ifnet * ifp,struct if_interface_state * if_interface_state)6431 if_state_update(struct ifnet *ifp,
6432 struct if_interface_state *if_interface_state)
6433 {
6434 u_short if_index_available = 0;
6435
6436 ifnet_lock_exclusive(ifp);
6437
6438 if ((ifp->if_type != IFT_CELLULAR) &&
6439 (if_interface_state->valid_bitmask &
6440 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6441 ifnet_lock_done(ifp);
6442 return ENOTSUP;
6443 }
6444 if ((if_interface_state->valid_bitmask &
6445 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6446 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6447 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6448 ifnet_lock_done(ifp);
6449 return EINVAL;
6450 }
6451 if ((if_interface_state->valid_bitmask &
6452 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6453 if_interface_state->rrc_state !=
6454 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6455 if_interface_state->rrc_state !=
6456 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6457 ifnet_lock_done(ifp);
6458 return EINVAL;
6459 }
6460
6461 if (if_interface_state->valid_bitmask &
6462 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6463 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6464 }
6465 if (if_interface_state->valid_bitmask &
6466 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6467 if_rrc_state_update(ifp, if_interface_state->rrc_state);
6468 }
6469 if (if_interface_state->valid_bitmask &
6470 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6471 ifp->if_interface_state.valid_bitmask |=
6472 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6473 ifp->if_interface_state.interface_availability =
6474 if_interface_state->interface_availability;
6475
6476 if (ifp->if_interface_state.interface_availability ==
6477 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6478 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
6479 __func__, if_name(ifp), ifp->if_index);
6480 if_index_available = ifp->if_index;
6481 } else {
6482 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
6483 __func__, if_name(ifp), ifp->if_index);
6484 }
6485 }
6486 ifnet_lock_done(ifp);
6487
6488 /*
6489 * Check if the TCP connections going on this interface should be
6490 * forced to send probe packets instead of waiting for TCP timers
6491 * to fire. This is done on an explicit notification such as
6492 * SIOCSIFINTERFACESTATE which marks the interface as available.
6493 */
6494 if (if_index_available > 0) {
6495 tcp_interface_send_probe(if_index_available);
6496 }
6497
6498 return 0;
6499 }
6500
6501 void
if_get_state(struct ifnet * ifp,struct if_interface_state * if_interface_state)6502 if_get_state(struct ifnet *ifp,
6503 struct if_interface_state *if_interface_state)
6504 {
6505 ifnet_lock_shared(ifp);
6506
6507 if_interface_state->valid_bitmask = 0;
6508
6509 if (ifp->if_interface_state.valid_bitmask &
6510 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6511 if_interface_state->valid_bitmask |=
6512 IF_INTERFACE_STATE_RRC_STATE_VALID;
6513 if_interface_state->rrc_state =
6514 ifp->if_interface_state.rrc_state;
6515 }
6516 if (ifp->if_interface_state.valid_bitmask &
6517 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6518 if_interface_state->valid_bitmask |=
6519 IF_INTERFACE_STATE_LQM_STATE_VALID;
6520 if_interface_state->lqm_state =
6521 ifp->if_interface_state.lqm_state;
6522 }
6523 if (ifp->if_interface_state.valid_bitmask &
6524 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6525 if_interface_state->valid_bitmask |=
6526 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6527 if_interface_state->interface_availability =
6528 ifp->if_interface_state.interface_availability;
6529 }
6530
6531 ifnet_lock_done(ifp);
6532 }
6533
6534 errno_t
if_probe_connectivity(struct ifnet * ifp,u_int32_t conn_probe)6535 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6536 {
6537 if (conn_probe > 1) {
6538 return EINVAL;
6539 }
6540 if (conn_probe == 0) {
6541 if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6542 } else {
6543 if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6544 }
6545
6546 os_log(OS_LOG_DEFAULT, "interface probing on %s set to %u by %s:%d",
6547 if_name(ifp), conn_probe, proc_best_name(current_proc()), proc_selfpid());
6548
6549 #if NECP
6550 necp_update_all_clients();
6551 #endif /* NECP */
6552
6553 tcp_probe_connectivity(ifp, conn_probe);
6554 return 0;
6555 }
6556
6557 /* for uuid.c */
6558 static int
get_ether_index(int * ret_other_index)6559 get_ether_index(int * ret_other_index)
6560 {
6561 ifnet_ref_t ifp;
6562 int en0_index = 0;
6563 int other_en_index = 0;
6564 int any_ether_index = 0;
6565 short best_unit = 0;
6566
6567 *ret_other_index = 0;
6568 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6569 /*
6570 * find en0, or if not en0, the lowest unit en*, and if not
6571 * that, any ethernet
6572 */
6573 ifnet_lock_shared(ifp);
6574 if (strcmp(ifp->if_name, "en") == 0) {
6575 if (ifp->if_unit == 0) {
6576 /* found en0, we're done */
6577 en0_index = ifp->if_index;
6578 ifnet_lock_done(ifp);
6579 break;
6580 }
6581 if (other_en_index == 0 || ifp->if_unit < best_unit) {
6582 other_en_index = ifp->if_index;
6583 best_unit = ifp->if_unit;
6584 }
6585 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
6586 any_ether_index = ifp->if_index;
6587 }
6588 ifnet_lock_done(ifp);
6589 }
6590 if (en0_index == 0) {
6591 if (other_en_index != 0) {
6592 *ret_other_index = other_en_index;
6593 } else if (any_ether_index != 0) {
6594 *ret_other_index = any_ether_index;
6595 }
6596 }
6597 return en0_index;
6598 }
6599
6600 int
uuid_get_ethernet(u_int8_t * __counted_by (ETHER_ADDR_LEN)node)6601 uuid_get_ethernet(u_int8_t *__counted_by(ETHER_ADDR_LEN) node)
6602 {
6603 static int en0_index;
6604 ifnet_ref_t ifp;
6605 int other_index = 0;
6606 int the_index = 0;
6607 int ret;
6608
6609 ifnet_head_lock_shared();
6610 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
6611 en0_index = get_ether_index(&other_index);
6612 }
6613 if (en0_index != 0) {
6614 the_index = en0_index;
6615 } else if (other_index != 0) {
6616 the_index = other_index;
6617 }
6618 if (the_index != 0) {
6619 struct dlil_ifnet *dl_if;
6620
6621 ifp = ifindex2ifnet[the_index];
6622 VERIFY(ifp != NULL);
6623 dl_if = (struct dlil_ifnet *)ifp;
6624 if (dl_if->dl_if_permanent_ether_is_set != 0) {
6625 /*
6626 * Use the permanent ethernet address if it is
6627 * available because it will never change.
6628 */
6629 memcpy(node, dl_if->dl_if_permanent_ether,
6630 ETHER_ADDR_LEN);
6631 } else {
6632 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
6633 }
6634 ret = 0;
6635 } else {
6636 ret = -1;
6637 }
6638 ifnet_head_done();
6639 return ret;
6640 }
6641
6642 int
dlil_node_present(struct ifnet * ifp,struct sockaddr * sa,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6643 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6644 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6645 {
6646 struct kev_dl_node_presence kev;
6647 struct sockaddr_dl *sdl;
6648 struct sockaddr_in6 *sin6;
6649 int ret = 0;
6650
6651 VERIFY(ifp);
6652 VERIFY(sa);
6653 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6654
6655 bzero(&kev, sizeof(kev));
6656 sin6 = &kev.sin6_node_address;
6657 sdl = &kev.sdl_node_address;
6658 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6659 kev.rssi = rssi;
6660 kev.link_quality_metric = lqm;
6661 kev.node_proximity_metric = npm;
6662 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6663
6664 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6665 if (ret == 0 || ret == EEXIST) {
6666 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6667 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6668 if (err != 0) {
6669 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
6670 "error %d\n", __func__, err);
6671 }
6672 }
6673
6674 if (ret == EEXIST) {
6675 ret = 0;
6676 }
6677 return ret;
6678 }
6679
6680 void
dlil_node_absent(struct ifnet * ifp,struct sockaddr * sa)6681 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
6682 {
6683 struct kev_dl_node_absence kev = {};
6684 struct sockaddr_in6 *kev_sin6 = NULL;
6685 struct sockaddr_dl *kev_sdl = NULL;
6686 int error = 0;
6687
6688 VERIFY(ifp != NULL);
6689 VERIFY(sa != NULL);
6690 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6691
6692 kev_sin6 = &kev.sin6_node_address;
6693 kev_sdl = &kev.sdl_node_address;
6694
6695 if (sa->sa_family == AF_INET6) {
6696 /*
6697 * If IPv6 address is given, get the link layer
6698 * address from what was cached in the neighbor cache
6699 */
6700 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6701 SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6702 error = nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
6703 } else {
6704 /*
6705 * If passed address is AF_LINK type, derive the address
6706 * based on the link address.
6707 */
6708 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
6709 error = nd6_alt_node_absent(ifp, kev_sin6, NULL);
6710 }
6711
6712 if (error == 0) {
6713 kev_sdl->sdl_type = ifp->if_type;
6714 kev_sdl->sdl_index = ifp->if_index;
6715
6716 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
6717 &kev.link_data, sizeof(kev), FALSE);
6718 }
6719 }
6720
6721 int
dlil_node_present_v2(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr_dl * sdl,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6722 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
6723 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6724 {
6725 struct kev_dl_node_presence kev = {};
6726 struct sockaddr_dl *kev_sdl = NULL;
6727 struct sockaddr_in6 *kev_sin6 = NULL;
6728 int ret = 0;
6729
6730 VERIFY(ifp != NULL);
6731 VERIFY(sa != NULL && sdl != NULL);
6732 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
6733
6734 kev_sin6 = &kev.sin6_node_address;
6735 kev_sdl = &kev.sdl_node_address;
6736
6737 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
6738 SOCKADDR_COPY(sdl, kev_sdl, sdl->sdl_len);
6739 kev_sdl->sdl_type = ifp->if_type;
6740 kev_sdl->sdl_index = ifp->if_index;
6741
6742 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6743 SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6744
6745 kev.rssi = rssi;
6746 kev.link_quality_metric = lqm;
6747 kev.node_proximity_metric = npm;
6748 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6749
6750 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
6751 if (ret == 0 || ret == EEXIST) {
6752 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6753 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6754 if (err != 0) {
6755 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
6756 }
6757 }
6758
6759 if (ret == EEXIST) {
6760 ret = 0;
6761 }
6762 return ret;
6763 }
6764
6765 const void *
dlil_ifaddr_bytes(const struct sockaddr_dl * sdl,size_t * sizep,kauth_cred_t * credp)6766 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
6767 kauth_cred_t *credp)
6768 {
6769 const u_int8_t *bytes;
6770 size_t size;
6771
6772 bytes = CONST_LLADDR(sdl);
6773 size = sdl->sdl_alen;
6774
6775 #if CONFIG_MACF
6776 if (dlil_lladdr_ckreq) {
6777 switch (sdl->sdl_type) {
6778 case IFT_ETHER:
6779 case IFT_IEEE1394:
6780 break;
6781 default:
6782 credp = NULL;
6783 break;
6784 }
6785 ;
6786
6787 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
6788 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
6789 [0] = 2
6790 };
6791
6792 bytes = unspec;
6793 }
6794 }
6795 #else
6796 #pragma unused(credp)
6797 #endif
6798
6799 if (sizep != NULL) {
6800 *sizep = size;
6801 }
6802 return bytes;
6803 }
6804
6805 void
dlil_report_issues(struct ifnet * ifp,u_int8_t modid[DLIL_MODIDLEN],u_int8_t info[DLIL_MODARGLEN])6806 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
6807 u_int8_t info[DLIL_MODARGLEN])
6808 {
6809 struct kev_dl_issues kev;
6810 struct timeval tv;
6811
6812 VERIFY(ifp != NULL);
6813 VERIFY(modid != NULL);
6814 static_assert(sizeof(kev.modid) == DLIL_MODIDLEN);
6815 static_assert(sizeof(kev.info) == DLIL_MODARGLEN);
6816
6817 bzero(&kev, sizeof(kev));
6818
6819 microtime(&tv);
6820 kev.timestamp = tv.tv_sec;
6821 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
6822 if (info != NULL) {
6823 bcopy(info, &kev.info, DLIL_MODARGLEN);
6824 }
6825
6826 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
6827 &kev.link_data, sizeof(kev), FALSE);
6828 }
6829
6830 errno_t
ifnet_getset_opportunistic(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6831 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6832 struct proc *p)
6833 {
6834 u_int32_t level = IFNET_THROTTLE_OFF;
6835 errno_t result = 0;
6836
6837 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
6838
6839 if (cmd == SIOCSIFOPPORTUNISTIC) {
6840 /*
6841 * XXX: Use priv_check_cred() instead of root check?
6842 */
6843 if ((result = proc_suser(p)) != 0) {
6844 return result;
6845 }
6846
6847 if (ifr->ifr_opportunistic.ifo_flags ==
6848 IFRIFOF_BLOCK_OPPORTUNISTIC) {
6849 level = IFNET_THROTTLE_OPPORTUNISTIC;
6850 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
6851 level = IFNET_THROTTLE_OFF;
6852 } else {
6853 result = EINVAL;
6854 }
6855
6856 if (result == 0) {
6857 result = ifnet_set_throttle(ifp, level);
6858 }
6859 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
6860 ifr->ifr_opportunistic.ifo_flags = 0;
6861 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
6862 ifr->ifr_opportunistic.ifo_flags |=
6863 IFRIFOF_BLOCK_OPPORTUNISTIC;
6864 }
6865 }
6866
6867 /*
6868 * Return the count of current opportunistic connections
6869 * over the interface.
6870 */
6871 if (result == 0) {
6872 uint32_t flags = 0;
6873 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
6874 INPCB_OPPORTUNISTIC_SETCMD : 0;
6875 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
6876 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
6877 ifr->ifr_opportunistic.ifo_inuse =
6878 udp_count_opportunistic(ifp->if_index, flags) +
6879 tcp_count_opportunistic(ifp->if_index, flags);
6880 }
6881
6882 if (result == EALREADY) {
6883 result = 0;
6884 }
6885
6886 return result;
6887 }
6888
6889 int
ifnet_get_throttle(struct ifnet * ifp,u_int32_t * level)6890 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
6891 {
6892 struct ifclassq *ifq;
6893 cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
6894 int err = 0;
6895
6896 if (!(ifp->if_eflags & IFEF_TXSTART)) {
6897 return ENXIO;
6898 }
6899
6900 *level = IFNET_THROTTLE_OFF;
6901
6902 ifq = ifp->if_snd;
6903 err = ifclassq_request(ifq, CLASSQRQ_THROTTLE, &req, false);
6904 *level = req.level;
6905
6906 return err;
6907 }
6908
6909 int
ifnet_set_throttle(struct ifnet * ifp,u_int32_t level)6910 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
6911 {
6912 struct ifclassq *ifq;
6913 cqrq_throttle_t req = { 1, level };
6914 int err = 0;
6915
6916 if (!(ifp->if_eflags & IFEF_TXSTART)) {
6917 return ENXIO;
6918 }
6919
6920 ifq = ifp->if_snd;
6921
6922 switch (level) {
6923 case IFNET_THROTTLE_OFF:
6924 case IFNET_THROTTLE_OPPORTUNISTIC:
6925 break;
6926 default:
6927 return EINVAL;
6928 }
6929
6930 err = ifclassq_request(ifq, CLASSQRQ_THROTTLE, &req, false);
6931
6932 if (err == 0) {
6933 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
6934 level);
6935 #if NECP
6936 necp_update_all_clients();
6937 #endif /* NECP */
6938 if (level == IFNET_THROTTLE_OFF) {
6939 ifnet_start(ifp);
6940 }
6941 }
6942
6943 return err;
6944 }
6945
6946 errno_t
ifnet_getset_log(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6947 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6948 struct proc *p)
6949 {
6950 #pragma unused(p)
6951 errno_t result = 0;
6952 uint32_t flags;
6953 int level, category, subcategory;
6954
6955 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
6956
6957 if (cmd == SIOCSIFLOG) {
6958 if ((result = priv_check_cred(kauth_cred_get(),
6959 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
6960 return result;
6961 }
6962
6963 level = ifr->ifr_log.ifl_level;
6964 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
6965 result = EINVAL;
6966 }
6967
6968 flags = ifr->ifr_log.ifl_flags;
6969 if ((flags &= IFNET_LOGF_MASK) == 0) {
6970 result = EINVAL;
6971 }
6972
6973 category = ifr->ifr_log.ifl_category;
6974 subcategory = ifr->ifr_log.ifl_subcategory;
6975
6976 if (result == 0) {
6977 result = ifnet_set_log(ifp, level, flags,
6978 category, subcategory);
6979 }
6980 } else {
6981 result = ifnet_get_log(ifp, &level, &flags, &category,
6982 &subcategory);
6983 if (result == 0) {
6984 ifr->ifr_log.ifl_level = level;
6985 ifr->ifr_log.ifl_flags = flags;
6986 ifr->ifr_log.ifl_category = category;
6987 ifr->ifr_log.ifl_subcategory = subcategory;
6988 }
6989 }
6990
6991 return result;
6992 }
6993
6994 int
ifnet_set_log(struct ifnet * ifp,int32_t level,uint32_t flags,int32_t category,int32_t subcategory)6995 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
6996 int32_t category, int32_t subcategory)
6997 {
6998 int err = 0;
6999
7000 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7001 VERIFY(flags & IFNET_LOGF_MASK);
7002
7003 /*
7004 * The logging level applies to all facilities; make sure to
7005 * update them all with the most current level.
7006 */
7007 flags |= ifp->if_log.flags;
7008
7009 if (ifp->if_output_ctl != NULL) {
7010 struct ifnet_log_params l;
7011
7012 bzero(&l, sizeof(l));
7013 l.level = level;
7014 l.flags = flags;
7015 l.flags &= ~IFNET_LOGF_DLIL;
7016 l.category = category;
7017 l.subcategory = subcategory;
7018
7019 /* Send this request to lower layers */
7020 if (l.flags != 0) {
7021 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7022 sizeof(l), &l);
7023 }
7024 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7025 /*
7026 * If targeted to the lower layers without an output
7027 * control callback registered on the interface, just
7028 * silently ignore facilities other than ours.
7029 */
7030 flags &= IFNET_LOGF_DLIL;
7031 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
7032 level = 0;
7033 }
7034 }
7035
7036 if (err == 0) {
7037 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
7038 ifp->if_log.flags = 0;
7039 } else {
7040 ifp->if_log.flags |= flags;
7041 }
7042
7043 log(LOG_INFO, "%s: logging level set to %d flags=0x%x "
7044 "arg=0x%x, category=%d subcategory=%d\n", if_name(ifp),
7045 ifp->if_log.level, ifp->if_log.flags, flags,
7046 category, subcategory);
7047 }
7048
7049 return err;
7050 }
7051
7052 int
ifnet_get_log(struct ifnet * ifp,int32_t * level,uint32_t * flags,int32_t * category,int32_t * subcategory)7053 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7054 int32_t *category, int32_t *subcategory)
7055 {
7056 if (level != NULL) {
7057 *level = ifp->if_log.level;
7058 }
7059 if (flags != NULL) {
7060 *flags = ifp->if_log.flags;
7061 }
7062 if (category != NULL) {
7063 *category = ifp->if_log.category;
7064 }
7065 if (subcategory != NULL) {
7066 *subcategory = ifp->if_log.subcategory;
7067 }
7068
7069 return 0;
7070 }
7071
7072 int
ifnet_notify_address(struct ifnet * ifp,int af)7073 ifnet_notify_address(struct ifnet *ifp, int af)
7074 {
7075 struct ifnet_notify_address_params na;
7076
7077 #if PF
7078 (void) pf_ifaddr_hook(ifp);
7079 #endif /* PF */
7080
7081 if (ifp->if_output_ctl == NULL) {
7082 return EOPNOTSUPP;
7083 }
7084
7085 bzero(&na, sizeof(na));
7086 na.address_family = (sa_family_t)af;
7087
7088 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7089 sizeof(na), &na);
7090 }
7091
7092 errno_t
ifnet_flowid(struct ifnet * ifp,uint32_t * flowid)7093 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7094 {
7095 if (ifp == NULL || flowid == NULL) {
7096 return EINVAL;
7097 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7098 !ifnet_is_fully_attached(ifp)) {
7099 return ENXIO;
7100 }
7101
7102 *flowid = ifp->if_flowhash;
7103
7104 return 0;
7105 }
7106
7107 errno_t
ifnet_disable_output(struct ifnet * ifp)7108 ifnet_disable_output(struct ifnet *ifp)
7109 {
7110 int err = 0;
7111
7112 if (ifp == NULL) {
7113 return EINVAL;
7114 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7115 !ifnet_is_fully_attached(ifp)) {
7116 return ENXIO;
7117 }
7118
7119 lck_mtx_lock(&ifp->if_start_lock);
7120 if (ifp->if_start_flags & IFSF_FLOW_RESUME_PENDING) {
7121 ifp->if_start_flags &= ~(IFSF_FLOW_RESUME_PENDING | IFSF_FLOW_CONTROLLED);
7122 } else if ((err = ifnet_fc_add(ifp)) == 0) {
7123 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7124 }
7125 lck_mtx_unlock(&ifp->if_start_lock);
7126
7127 return err;
7128 }
7129
7130 errno_t
ifnet_enable_output(struct ifnet * ifp)7131 ifnet_enable_output(struct ifnet *ifp)
7132 {
7133 if (ifp == NULL) {
7134 return EINVAL;
7135 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7136 !ifnet_is_fully_attached(ifp)) {
7137 return ENXIO;
7138 }
7139
7140 ifnet_start_common(ifp, TRUE, FALSE);
7141 return 0;
7142 }
7143
7144 void
ifnet_flowadv(uint32_t flowhash)7145 ifnet_flowadv(uint32_t flowhash)
7146 {
7147 struct ifnet_fc_entry *ifce;
7148 ifnet_ref_t ifp;
7149
7150 ifce = ifnet_fc_get(flowhash);
7151 if (ifce == NULL) {
7152 return;
7153 }
7154
7155 VERIFY(ifce->ifce_ifp != NULL);
7156 ifp = ifce->ifce_ifp;
7157
7158 /* flow hash gets recalculated per attach, so check */
7159 if (ifnet_get_ioref(ifp)) {
7160 if (ifp->if_flowhash == flowhash) {
7161 lck_mtx_lock_spin(&ifp->if_start_lock);
7162 if ((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) == 0) {
7163 ifp->if_start_flags |= IFSF_FLOW_RESUME_PENDING;
7164 }
7165 lck_mtx_unlock(&ifp->if_start_lock);
7166 (void) ifnet_enable_output(ifp);
7167 }
7168 ifnet_decr_iorefcnt(ifp);
7169 }
7170 ifnet_fc_entry_free(ifce);
7171 }
7172
7173 /*
7174 * Function to compare ifnet_fc_entries in ifnet flow control tree
7175 */
7176 static inline int
ifce_cmp(const struct ifnet_fc_entry * fc1,const struct ifnet_fc_entry * fc2)7177 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7178 {
7179 return fc1->ifce_flowhash - fc2->ifce_flowhash;
7180 }
7181
7182 static int
ifnet_fc_add(struct ifnet * ifp)7183 ifnet_fc_add(struct ifnet *ifp)
7184 {
7185 struct ifnet_fc_entry keyfc, *ifce;
7186 uint32_t flowhash;
7187
7188 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7189 VERIFY(ifp->if_flowhash != 0);
7190 flowhash = ifp->if_flowhash;
7191
7192 bzero(&keyfc, sizeof(keyfc));
7193 keyfc.ifce_flowhash = flowhash;
7194
7195 lck_mtx_lock_spin(&ifnet_fc_lock);
7196 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7197 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7198 /* Entry is already in ifnet_fc_tree, return */
7199 lck_mtx_unlock(&ifnet_fc_lock);
7200 return 0;
7201 }
7202
7203 if (ifce != NULL) {
7204 /*
7205 * There is a different fc entry with the same flow hash
7206 * but different ifp pointer. There can be a collision
7207 * on flow hash but the probability is low. Let's just
7208 * avoid adding a second one when there is a collision.
7209 */
7210 lck_mtx_unlock(&ifnet_fc_lock);
7211 return EAGAIN;
7212 }
7213
7214 /* become regular mutex */
7215 lck_mtx_convert_spin(&ifnet_fc_lock);
7216
7217 ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
7218 ifce->ifce_flowhash = flowhash;
7219 ifce->ifce_ifp = ifp;
7220
7221 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7222 lck_mtx_unlock(&ifnet_fc_lock);
7223 return 0;
7224 }
7225
7226 static struct ifnet_fc_entry *
ifnet_fc_get(uint32_t flowhash)7227 ifnet_fc_get(uint32_t flowhash)
7228 {
7229 struct ifnet_fc_entry keyfc, *ifce;
7230 ifnet_ref_t ifp;
7231
7232 bzero(&keyfc, sizeof(keyfc));
7233 keyfc.ifce_flowhash = flowhash;
7234
7235 lck_mtx_lock_spin(&ifnet_fc_lock);
7236 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7237 if (ifce == NULL) {
7238 /* Entry is not present in ifnet_fc_tree, return */
7239 lck_mtx_unlock(&ifnet_fc_lock);
7240 return NULL;
7241 }
7242
7243 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7244
7245 VERIFY(ifce->ifce_ifp != NULL);
7246 ifp = ifce->ifce_ifp;
7247
7248 /* become regular mutex */
7249 lck_mtx_convert_spin(&ifnet_fc_lock);
7250
7251 if (!ifnet_is_fully_attached(ifp)) {
7252 /*
7253 * This ifp is not attached or in the process of being
7254 * detached; just don't process it.
7255 */
7256 ifnet_fc_entry_free(ifce);
7257 ifce = NULL;
7258 }
7259 lck_mtx_unlock(&ifnet_fc_lock);
7260
7261 return ifce;
7262 }
7263
7264 static void
ifnet_fc_entry_free(struct ifnet_fc_entry * ifce)7265 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7266 {
7267 zfree(ifnet_fc_zone, ifce);
7268 }
7269
7270 static uint32_t
ifnet_calc_flowhash(struct ifnet * ifp)7271 ifnet_calc_flowhash(struct ifnet *ifp)
7272 {
7273 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7274 uint32_t flowhash = 0;
7275
7276 if (ifnet_flowhash_seed == 0) {
7277 ifnet_flowhash_seed = RandomULong();
7278 }
7279
7280 bzero(&fh, sizeof(fh));
7281
7282 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
7283 fh.ifk_unit = ifp->if_unit;
7284 fh.ifk_flags = ifp->if_flags;
7285 fh.ifk_eflags = ifp->if_eflags;
7286 fh.ifk_capabilities = ifp->if_capabilities;
7287 fh.ifk_capenable = ifp->if_capenable;
7288 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7289 fh.ifk_rand1 = RandomULong();
7290 fh.ifk_rand2 = RandomULong();
7291
7292 try_again:
7293 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
7294 if (flowhash == 0) {
7295 /* try to get a non-zero flowhash */
7296 ifnet_flowhash_seed = RandomULong();
7297 goto try_again;
7298 }
7299
7300 return flowhash;
7301 }
7302
7303 int
ifnet_set_netsignature(struct ifnet * ifp,uint8_t family,uint8_t len,uint16_t flags,uint8_t * __sized_by (len)data)7304 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7305 uint16_t flags, uint8_t *__sized_by(len) data)
7306 {
7307 #pragma unused(flags)
7308 int error = 0;
7309
7310 switch (family) {
7311 case AF_INET:
7312 if_inetdata_lock_exclusive(ifp);
7313 if (IN_IFEXTRA(ifp) != NULL) {
7314 if (len == 0) {
7315 /* Allow clearing the signature */
7316 IN_IFEXTRA(ifp)->netsig_len = 0;
7317 bzero(IN_IFEXTRA(ifp)->netsig,
7318 sizeof(IN_IFEXTRA(ifp)->netsig));
7319 if_inetdata_lock_done(ifp);
7320 break;
7321 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
7322 error = EINVAL;
7323 if_inetdata_lock_done(ifp);
7324 break;
7325 }
7326 IN_IFEXTRA(ifp)->netsig_len = len;
7327 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7328 } else {
7329 error = ENOMEM;
7330 }
7331 if_inetdata_lock_done(ifp);
7332 break;
7333
7334 case AF_INET6:
7335 if_inet6data_lock_exclusive(ifp);
7336 if (IN6_IFEXTRA(ifp) != NULL) {
7337 if (len == 0) {
7338 /* Allow clearing the signature */
7339 IN6_IFEXTRA(ifp)->netsig_len = 0;
7340 bzero(IN6_IFEXTRA(ifp)->netsig,
7341 sizeof(IN6_IFEXTRA(ifp)->netsig));
7342 if_inet6data_lock_done(ifp);
7343 break;
7344 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
7345 error = EINVAL;
7346 if_inet6data_lock_done(ifp);
7347 break;
7348 }
7349 IN6_IFEXTRA(ifp)->netsig_len = len;
7350 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7351 } else {
7352 error = ENOMEM;
7353 }
7354 if_inet6data_lock_done(ifp);
7355 break;
7356
7357 default:
7358 error = EINVAL;
7359 break;
7360 }
7361
7362 return error;
7363 }
7364
7365 int
ifnet_get_netsignature(struct ifnet * ifp,uint8_t family,uint8_t * len,uint16_t * flags,uint8_t * __sized_by (* len)data)7366 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7367 uint16_t *flags, uint8_t *__sized_by(*len) data)
7368 {
7369 int error = 0;
7370
7371 if (ifp == NULL || len == NULL || data == NULL) {
7372 return EINVAL;
7373 }
7374
7375 switch (family) {
7376 case AF_INET:
7377 if_inetdata_lock_shared(ifp);
7378 if (IN_IFEXTRA(ifp) != NULL) {
7379 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7380 error = EINVAL;
7381 if_inetdata_lock_done(ifp);
7382 break;
7383 }
7384 if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
7385 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7386 } else {
7387 error = ENOENT;
7388 }
7389 } else {
7390 error = ENOMEM;
7391 }
7392 if_inetdata_lock_done(ifp);
7393 break;
7394
7395 case AF_INET6:
7396 if_inet6data_lock_shared(ifp);
7397 if (IN6_IFEXTRA(ifp) != NULL) {
7398 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7399 error = EINVAL;
7400 if_inet6data_lock_done(ifp);
7401 break;
7402 }
7403 if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
7404 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7405 } else {
7406 error = ENOENT;
7407 }
7408 } else {
7409 error = ENOMEM;
7410 }
7411 if_inet6data_lock_done(ifp);
7412 break;
7413
7414 default:
7415 error = EINVAL;
7416 break;
7417 }
7418
7419 if (error == 0 && flags != NULL) {
7420 *flags = 0;
7421 }
7422
7423 return error;
7424 }
7425
7426 int
ifnet_set_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7427 ifnet_set_nat64prefix(struct ifnet *ifp,
7428 struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7429 {
7430 int i, error = 0, one_set = 0;
7431
7432 if_inet6data_lock_exclusive(ifp);
7433
7434 if (IN6_IFEXTRA(ifp) == NULL) {
7435 error = ENOMEM;
7436 goto out;
7437 }
7438
7439 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7440 uint32_t prefix_len =
7441 prefixes[i].prefix_len;
7442 struct in6_addr *prefix =
7443 &prefixes[i].ipv6_prefix;
7444
7445 if (prefix_len == 0) {
7446 clat_log0((LOG_DEBUG,
7447 "NAT64 prefixes purged from Interface %s\n",
7448 if_name(ifp)));
7449 /* Allow clearing the signature */
7450 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
7451 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7452 sizeof(struct in6_addr));
7453
7454 continue;
7455 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
7456 prefix_len != NAT64_PREFIX_LEN_40 &&
7457 prefix_len != NAT64_PREFIX_LEN_48 &&
7458 prefix_len != NAT64_PREFIX_LEN_56 &&
7459 prefix_len != NAT64_PREFIX_LEN_64 &&
7460 prefix_len != NAT64_PREFIX_LEN_96) {
7461 clat_log0((LOG_DEBUG,
7462 "NAT64 prefixlen is incorrect %d\n", prefix_len));
7463 error = EINVAL;
7464 goto out;
7465 }
7466
7467 if (IN6_IS_SCOPE_EMBED(prefix)) {
7468 clat_log0((LOG_DEBUG,
7469 "NAT64 prefix has interface/link local scope.\n"));
7470 error = EINVAL;
7471 goto out;
7472 }
7473
7474 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
7475 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7476 sizeof(struct in6_addr));
7477 clat_log0((LOG_DEBUG,
7478 "NAT64 prefix set to %s with prefixlen: %d\n",
7479 ip6_sprintf(prefix), prefix_len));
7480 one_set = 1;
7481 }
7482
7483 out:
7484 if_inet6data_lock_done(ifp);
7485
7486 if (error == 0 && one_set != 0) {
7487 necp_update_all_clients();
7488 }
7489
7490 return error;
7491 }
7492
7493 int
ifnet_get_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7494 ifnet_get_nat64prefix(struct ifnet *ifp,
7495 struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7496 {
7497 int i, found_one = 0, error = 0;
7498
7499 if (ifp == NULL) {
7500 return EINVAL;
7501 }
7502
7503 if_inet6data_lock_shared(ifp);
7504
7505 if (IN6_IFEXTRA(ifp) == NULL) {
7506 error = ENOMEM;
7507 goto out;
7508 }
7509
7510 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7511 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
7512 found_one = 1;
7513 }
7514 }
7515
7516 if (found_one == 0) {
7517 error = ENOENT;
7518 goto out;
7519 }
7520
7521 if (prefixes) {
7522 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
7523 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
7524 }
7525
7526 out:
7527 if_inet6data_lock_done(ifp);
7528
7529 return error;
7530 }
7531
7532 #if DEBUG || DEVELOPMENT
7533 /* Blob for sum16 verification */
7534 static uint8_t sumdata[] = {
7535 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7536 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7537 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7538 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7539 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7540 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7541 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7542 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7543 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7544 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7545 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7546 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7547 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7548 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7549 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7550 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7551 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7552 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7553 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7554 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7555 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7556 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7557 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7558 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7559 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7560 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7561 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7562 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7563 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7564 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7565 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7566 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7567 0xc8, 0x28, 0x02, 0x00, 0x00
7568 };
7569
7570 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7571 static struct {
7572 boolean_t init;
7573 uint16_t len;
7574 uint16_t sumr; /* reference */
7575 uint16_t sumrp; /* reference, precomputed */
7576 } sumtbl[] = {
7577 { FALSE, 0, 0, 0x0000 },
7578 { FALSE, 1, 0, 0x001f },
7579 { FALSE, 2, 0, 0x8b1f },
7580 { FALSE, 3, 0, 0x8b27 },
7581 { FALSE, 7, 0, 0x790e },
7582 { FALSE, 11, 0, 0xcb6d },
7583 { FALSE, 20, 0, 0x20dd },
7584 { FALSE, 27, 0, 0xbabd },
7585 { FALSE, 32, 0, 0xf3e8 },
7586 { FALSE, 37, 0, 0x197d },
7587 { FALSE, 43, 0, 0x9eae },
7588 { FALSE, 64, 0, 0x4678 },
7589 { FALSE, 127, 0, 0x9399 },
7590 { FALSE, 256, 0, 0xd147 },
7591 { FALSE, 325, 0, 0x0358 },
7592 };
7593 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7594
7595 static void
dlil_verify_sum16(void)7596 dlil_verify_sum16(void)
7597 {
7598 struct mbuf *m;
7599 uint8_t *buf;
7600 int n;
7601
7602 /* Make sure test data plus extra room for alignment fits in cluster */
7603 static_assert((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
7604
7605 kprintf("DLIL: running SUM16 self-tests ... ");
7606
7607 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7608 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
7609
7610 buf = mtod(m, uint8_t *); /* base address */
7611
7612 for (n = 0; n < SUMTBL_MAX; n++) {
7613 uint16_t len = sumtbl[n].len;
7614 int i;
7615
7616 /* Verify for all possible alignments */
7617 for (i = 0; i < (int)sizeof(uint64_t); i++) {
7618 uint16_t sum, sumr;
7619 uint8_t *c;
7620
7621 /* Copy over test data to mbuf */
7622 VERIFY(len <= sizeof(sumdata));
7623 c = buf + i;
7624 bcopy(sumdata, c, len);
7625
7626 /* Zero-offset test (align by data pointer) */
7627 m->m_data = (uintptr_t)c;
7628 m->m_len = len;
7629 sum = m_sum16(m, 0, len);
7630
7631 if (!sumtbl[n].init) {
7632 sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
7633 sumtbl[n].sumr = sumr;
7634 sumtbl[n].init = TRUE;
7635 } else {
7636 sumr = sumtbl[n].sumr;
7637 }
7638
7639 /* Something is horribly broken; stop now */
7640 if (sumr != sumtbl[n].sumrp) {
7641 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
7642 "for len=%d align=%d sum=0x%04x "
7643 "[expected=0x%04x]\n", __func__,
7644 len, i, sum, sumr);
7645 /* NOTREACHED */
7646 } else if (sum != sumr) {
7647 panic_plain("\n%s: broken m_sum16() for len=%d "
7648 "align=%d sum=0x%04x [expected=0x%04x]\n",
7649 __func__, len, i, sum, sumr);
7650 /* NOTREACHED */
7651 }
7652
7653 /* Alignment test by offset (fixed data pointer) */
7654 m->m_data = (uintptr_t)buf;
7655 m->m_len = i + len;
7656 sum = m_sum16(m, i, len);
7657
7658 /* Something is horribly broken; stop now */
7659 if (sum != sumr) {
7660 panic_plain("\n%s: broken m_sum16() for len=%d "
7661 "offset=%d sum=0x%04x [expected=0x%04x]\n",
7662 __func__, len, i, sum, sumr);
7663 /* NOTREACHED */
7664 }
7665 #if INET
7666 /* Simple sum16 contiguous buffer test by aligment */
7667 sum = b_sum16(c, len);
7668
7669 /* Something is horribly broken; stop now */
7670 if (sum != sumr) {
7671 panic_plain("\n%s: broken b_sum16() for len=%d "
7672 "align=%d sum=0x%04x [expected=0x%04x]\n",
7673 __func__, len, i, sum, sumr);
7674 /* NOTREACHED */
7675 }
7676 #endif /* INET */
7677 }
7678 }
7679 m_freem(m);
7680
7681 kprintf("PASSED\n");
7682 }
7683 #endif /* DEBUG || DEVELOPMENT */
7684
7685 #define CASE_STRINGIFY(x) case x: return #x
7686
7687 __private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)7688 dlil_kev_dl_code_str(u_int32_t event_code)
7689 {
7690 switch (event_code) {
7691 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
7692 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
7693 CASE_STRINGIFY(KEV_DL_SIFMTU);
7694 CASE_STRINGIFY(KEV_DL_SIFPHYS);
7695 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
7696 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
7697 CASE_STRINGIFY(KEV_DL_ADDMULTI);
7698 CASE_STRINGIFY(KEV_DL_DELMULTI);
7699 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
7700 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
7701 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
7702 CASE_STRINGIFY(KEV_DL_LINK_OFF);
7703 CASE_STRINGIFY(KEV_DL_LINK_ON);
7704 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
7705 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
7706 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
7707 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
7708 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
7709 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
7710 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
7711 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
7712 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
7713 CASE_STRINGIFY(KEV_DL_PRIMARY_ELECTED);
7714 CASE_STRINGIFY(KEV_DL_ISSUES);
7715 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
7716 default:
7717 break;
7718 }
7719 return "";
7720 }
7721
7722 void
dlil_dt_tcall_fn(thread_call_param_t arg0,thread_call_param_t arg1)7723 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
7724 {
7725 #pragma unused(arg1)
7726 ifnet_ref_t ifp = arg0;
7727
7728 if (ifnet_get_ioref(ifp)) {
7729 nstat_ifnet_threshold_reached(ifp->if_index);
7730 ifnet_decr_iorefcnt(ifp);
7731 }
7732 }
7733
7734 void
ifnet_notify_data_threshold(struct ifnet * ifp)7735 ifnet_notify_data_threshold(struct ifnet *ifp)
7736 {
7737 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
7738 uint64_t oldbytes = ifp->if_dt_bytes;
7739
7740 ASSERT(ifp->if_dt_tcall != NULL);
7741
7742 /*
7743 * If we went over the threshold, notify NetworkStatistics.
7744 * We rate-limit it based on the threshold interval value.
7745 */
7746 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
7747 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
7748 !thread_call_isactive(ifp->if_dt_tcall)) {
7749 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
7750 uint64_t now = mach_absolute_time(), deadline = now;
7751 uint64_t ival;
7752
7753 if (tival != 0) {
7754 nanoseconds_to_absolutetime(tival, &ival);
7755 clock_deadline_for_periodic_event(ival, now, &deadline);
7756 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
7757 deadline);
7758 } else {
7759 (void) thread_call_enter(ifp->if_dt_tcall);
7760 }
7761 }
7762 }
7763
7764
7765 void
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow * ifs,struct ifnet * ifp)7766 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
7767 struct ifnet *ifp)
7768 {
7769 tcp_update_stats_per_flow(ifs, ifp);
7770 }
7771
7772 static inline u_int32_t
_set_flags(u_int32_t * flags_p,u_int32_t set_flags)7773 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
7774 {
7775 return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
7776 }
7777
7778 static inline u_int32_t
_clear_flags(u_int32_t * flags_p,u_int32_t clear_flags)7779 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
7780 {
7781 return (u_int32_t)OSBitAndAtomic(~clear_flags, flags_p);
7782 }
7783
7784 __private_extern__ u_int32_t
if_set_eflags(ifnet_t interface,u_int32_t set_flags)7785 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
7786 {
7787 return _set_flags(&interface->if_eflags, set_flags);
7788 }
7789
7790 __private_extern__ void
if_clear_eflags(ifnet_t interface,u_int32_t clear_flags)7791 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
7792 {
7793 _clear_flags(&interface->if_eflags, clear_flags);
7794 }
7795
7796 __private_extern__ u_int32_t
if_set_xflags(ifnet_t interface,u_int32_t set_flags)7797 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
7798 {
7799 return _set_flags(&interface->if_xflags, set_flags);
7800 }
7801
7802 __private_extern__ u_int32_t
if_clear_xflags(ifnet_t interface,u_int32_t clear_flags)7803 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
7804 {
7805 return _clear_flags(&interface->if_xflags, clear_flags);
7806 }
7807
7808 __private_extern__ void
ifnet_update_traffic_rule_genid(ifnet_t ifp)7809 ifnet_update_traffic_rule_genid(ifnet_t ifp)
7810 {
7811 os_atomic_inc(&ifp->if_traffic_rule_genid, relaxed);
7812 }
7813
7814 __private_extern__ boolean_t
ifnet_sync_traffic_rule_genid(ifnet_t ifp,uint32_t * genid)7815 ifnet_sync_traffic_rule_genid(ifnet_t ifp, uint32_t *genid)
7816 {
7817 if (*genid != ifp->if_traffic_rule_genid) {
7818 *genid = ifp->if_traffic_rule_genid;
7819 return TRUE;
7820 }
7821 return FALSE;
7822 }
7823 __private_extern__ void
ifnet_update_inet_traffic_rule_count(ifnet_t ifp,uint32_t count)7824 ifnet_update_inet_traffic_rule_count(ifnet_t ifp, uint32_t count)
7825 {
7826 os_atomic_store(&ifp->if_inet_traffic_rule_count, count, relaxed);
7827 ifnet_update_traffic_rule_genid(ifp);
7828 }
7829
7830 __private_extern__ void
ifnet_update_eth_traffic_rule_count(ifnet_t ifp,uint32_t count)7831 ifnet_update_eth_traffic_rule_count(ifnet_t ifp, uint32_t count)
7832 {
7833 os_atomic_store(&ifp->if_eth_traffic_rule_count, count, relaxed);
7834 ifnet_update_traffic_rule_genid(ifp);
7835 }
7836
7837 #if SKYWALK
7838 static bool
net_check_compatible_if_filter(struct ifnet * ifp)7839 net_check_compatible_if_filter(struct ifnet *ifp)
7840 {
7841 if (ifp == NULL) {
7842 if (net_api_stats.nas_iflt_attach_count > net_api_stats.nas_iflt_attach_os_count) {
7843 return false;
7844 }
7845 } else {
7846 if (ifp->if_flt_non_os_count > 0) {
7847 return false;
7848 }
7849 }
7850 return true;
7851 }
7852 #endif /* SKYWALK */
7853
7854 #if CONFIG_MBUF_MCACHE
7855 #define DUMP_BUF_CHK() { \
7856 clen -= k; \
7857 if (clen < 1) \
7858 goto done; \
7859 c += k; \
7860 }
7861
7862 #if NETWORKING
7863 int dlil_dump_top_if_qlen(char *__counted_by(str_len), int str_len);
7864 int
dlil_dump_top_if_qlen(char * __counted_by (str_len)str,int str_len)7865 dlil_dump_top_if_qlen(char *__counted_by(str_len) str, int str_len)
7866 {
7867 char *c = str;
7868 int k, clen = str_len;
7869 ifnet_ref_t top_ifcq_ifp = NULL;
7870 uint32_t top_ifcq_len = 0;
7871 ifnet_ref_t top_inq_ifp = NULL;
7872 uint32_t top_inq_len = 0;
7873
7874 for (int ifidx = 1; ifidx < if_index; ifidx++) {
7875 ifnet_ref_t ifp = ifindex2ifnet[ifidx];
7876 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7877
7878 if (ifp == NULL) {
7879 continue;
7880 }
7881 if (ifp->if_snd != NULL && ifp->if_snd->ifcq_len > top_ifcq_len) {
7882 top_ifcq_len = ifp->if_snd->ifcq_len;
7883 top_ifcq_ifp = ifp;
7884 }
7885 if (dl_if->dl_if_inpstorage.dlth_pkts.qlen > top_inq_len) {
7886 top_inq_len = dl_if->dl_if_inpstorage.dlth_pkts.qlen;
7887 top_inq_ifp = ifp;
7888 }
7889 }
7890
7891 if (top_ifcq_ifp != NULL) {
7892 k = scnprintf(c, clen, "\ntop ifcq_len %u packets by %s\n",
7893 top_ifcq_len, top_ifcq_ifp->if_xname);
7894 DUMP_BUF_CHK();
7895 }
7896 if (top_inq_ifp != NULL) {
7897 k = scnprintf(c, clen, "\ntop inq_len %u packets by %s\n",
7898 top_inq_len, top_inq_ifp->if_xname);
7899 DUMP_BUF_CHK();
7900 }
7901 done:
7902 return str_len - clen;
7903 }
7904 #endif /* NETWORKING */
7905 #endif /* CONFIG_MBUF_MCACHE */
7906