1 /*
2 * Copyright (c) 1999-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35 #include <ptrauth.h>
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/dlil_sysctl.h>
53 #include <net/dlil_var_private.h>
54 #include <net/if_arp.h>
55 #include <net/iptap.h>
56 #include <net/pktap.h>
57 #include <net/droptap.h>
58 #include <net/nwk_wq.h>
59 #include <sys/kern_event.h>
60 #include <sys/kdebug.h>
61 #include <sys/mcache.h>
62 #include <sys/syslog.h>
63 #include <sys/protosw.h>
64 #include <sys/priv.h>
65
66 #include <kern/assert.h>
67 #include <kern/task.h>
68 #include <kern/thread.h>
69 #include <kern/sched_prim.h>
70 #include <kern/locks.h>
71 #include <kern/zalloc.h>
72
73 #include <net/kpi_protocol.h>
74 #include <net/kpi_interface.h>
75 #include <net/if_types.h>
76 #include <net/if_ipsec.h>
77 #include <net/if_llreach.h>
78 #include <net/if_utun.h>
79 #include <net/kpi_interfacefilter.h>
80 #include <net/classq/classq.h>
81 #include <net/classq/classq_sfb.h>
82 #include <net/flowhash.h>
83 #include <net/ntstat.h>
84 #if SKYWALK
85 #include <skywalk/lib/net_filter_event.h>
86 #endif /* SKYWALK */
87 #include <net/net_api_stats.h>
88 #include <net/if_ports_used.h>
89 #include <net/if_vlan_var.h>
90 #include <netinet/in.h>
91 #if INET
92 #include <netinet/in_var.h>
93 #include <netinet/igmp_var.h>
94 #include <netinet/ip_var.h>
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_var.h>
97 #include <netinet/udp.h>
98 #include <netinet/udp_var.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/in_pcb.h>
101 #include <netinet/in_tclass.h>
102 #include <netinet/ip.h>
103 #include <netinet/ip_icmp.h>
104 #include <netinet/icmp_var.h>
105 #endif /* INET */
106
107 #include <net/nat464_utils.h>
108 #include <netinet6/in6_var.h>
109 #include <netinet6/nd6.h>
110 #include <netinet6/mld6_var.h>
111 #include <netinet6/scope6_var.h>
112 #include <netinet/ip6.h>
113 #include <netinet/icmp6.h>
114 #include <net/pf_pbuf.h>
115 #include <libkern/OSAtomic.h>
116 #include <libkern/tree.h>
117
118 #include <dev/random/randomdev.h>
119 #include <machine/machine_routines.h>
120
121 #include <mach/thread_act.h>
122 #include <mach/sdt.h>
123
124 #if CONFIG_MACF
125 #include <sys/kauth.h>
126 #include <security/mac_framework.h>
127 #include <net/ethernet.h>
128 #include <net/firewire.h>
129 #endif
130
131 #if PF
132 #include <net/pfvar.h>
133 #endif /* PF */
134 #include <net/pktsched/pktsched.h>
135 #include <net/pktsched/pktsched_netem.h>
136
137 #if NECP
138 #include <net/necp.h>
139 #endif /* NECP */
140
141 #if SKYWALK
142 #include <skywalk/packet/packet_queue.h>
143 #include <skywalk/nexus/netif/nx_netif.h>
144 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
145 #endif /* SKYWALK */
146
147 #include <net/sockaddr_utils.h>
148
149 #include <os/log.h>
150
151 uint64_t if_creation_generation_count = 0;
152
153 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
154
155 dlil_ifnet_queue_t dlil_ifnet_head;
156
157 #if DEBUG
158 unsigned int ifnet_debug = 1; /* debugging (enabled) */
159 #else
160 unsigned int ifnet_debug; /* debugging (disabled) */
161 #endif /* !DEBUG */
162
163
164 static u_int32_t net_rtref;
165
166 static struct dlil_main_threading_info dlil_main_input_thread_info;
167 struct dlil_threading_info *__single dlil_main_input_thread;
168
169 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
170 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
171
172 static int ifnet_lookup(struct ifnet *);
173 static void if_purgeaddrs(struct ifnet *);
174
175 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
176 struct mbuf *, char *);
177 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
178 struct mbuf *);
179 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
180 mbuf_t *, const struct sockaddr *, void *,
181 IFNET_FRAME_TYPE_RW_T, IFNET_LLADDR_RW_T);
182 static void ifproto_media_event(struct ifnet *, protocol_family_t,
183 const struct kev_msg *);
184 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
185 unsigned long, void *);
186 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
187 struct sockaddr_dl *, size_t);
188 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
189 const struct sockaddr_dl *, const struct sockaddr *,
190 const struct sockaddr_dl *, const struct sockaddr *);
191
192 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
193 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
194 boolean_t poll, struct thread *tp);
195 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
196 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
197 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
198 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
199 protocol_family_t *);
200 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
201 const struct ifnet_demux_desc *, u_int32_t);
202 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
203 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
204 #if !XNU_TARGET_OS_OSX
205 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
206 const struct sockaddr *, IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
207 u_int32_t *, u_int32_t *);
208 #else /* XNU_TARGET_OS_OSX */
209 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
210 const struct sockaddr *,
211 IFNET_LLADDR_T, IFNET_FRAME_TYPE_T);
212 #endif /* XNU_TARGET_OS_OSX */
213 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
214 const struct sockaddr *,
215 IFNET_LLADDR_T, IFNET_FRAME_TYPE_T,
216 u_int32_t *, u_int32_t *);
217 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
218 static void ifp_if_free(struct ifnet *);
219 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
220
221
222
223 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
224 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
225 #if DEBUG || DEVELOPMENT
226 static void dlil_verify_sum16(void);
227 #endif /* DEBUG || DEVELOPMENT */
228
229
230 static void ifnet_detacher_thread_func(void *, wait_result_t);
231 static void ifnet_detacher_thread_cont(void *, wait_result_t);
232 static void ifnet_detach_final(struct ifnet *);
233 static void ifnet_detaching_enqueue(struct ifnet *);
234 static struct ifnet *ifnet_detaching_dequeue(void);
235
236 static void ifnet_start_thread_func(void *, wait_result_t);
237 static void ifnet_start_thread_cont(void *, wait_result_t);
238
239 static void ifnet_poll_thread_func(void *, wait_result_t);
240 static void ifnet_poll_thread_cont(void *, wait_result_t);
241
242 static errno_t ifnet_enqueue_common(struct ifnet *, struct ifclassq *,
243 classq_pkt_t *, boolean_t, boolean_t *);
244
245 static void ifp_src_route_copyout(struct ifnet *, struct route *);
246 static void ifp_src_route_copyin(struct ifnet *, struct route *);
247 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
248 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
249
250
251 /* The following are protected by dlil_ifnet_lock */
252 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
253 static u_int32_t ifnet_detaching_cnt;
254 static boolean_t ifnet_detaching_embryonic;
255 static void *ifnet_delayed_run; /* wait channel for detaching thread */
256
257 static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group,
258 &dlil_lck_attributes);
259
260 static uint32_t ifnet_flowhash_seed;
261
262 struct ifnet_flowhash_key {
263 char ifk_name[IFNAMSIZ];
264 uint32_t ifk_unit;
265 uint32_t ifk_flags;
266 uint32_t ifk_eflags;
267 uint32_t ifk_capabilities;
268 uint32_t ifk_capenable;
269 uint32_t ifk_output_sched_model;
270 uint32_t ifk_rand1;
271 uint32_t ifk_rand2;
272 };
273
274 /* Flow control entry per interface */
275 struct ifnet_fc_entry {
276 RB_ENTRY(ifnet_fc_entry) ifce_entry;
277 u_int32_t ifce_flowhash;
278 ifnet_ref_t ifce_ifp;
279 };
280
281 static uint32_t ifnet_calc_flowhash(struct ifnet *);
282 static int ifce_cmp(const struct ifnet_fc_entry *,
283 const struct ifnet_fc_entry *);
284 static int ifnet_fc_add(struct ifnet *);
285 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
286 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
287
288 /* protected by ifnet_fc_lock */
289 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
290 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
291 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
292
293 static KALLOC_TYPE_DEFINE(ifnet_fc_zone, struct ifnet_fc_entry, NET_KT_DEFAULT);
294
295 extern void bpfdetach(struct ifnet *);
296
297
298 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
299 u_int32_t flags);
300 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
301 u_int32_t flags);
302
303
304 #if CONFIG_MACF
305 #if !XNU_TARGET_OS_OSX
306 int dlil_lladdr_ckreq = 1;
307 #else /* XNU_TARGET_OS_OSX */
308 int dlil_lladdr_ckreq = 0;
309 #endif /* XNU_TARGET_OS_OSX */
310 #endif /* CONFIG_MACF */
311
312
313 static inline void
ifnet_delay_start_disabled_increment(void)314 ifnet_delay_start_disabled_increment(void)
315 {
316 OSIncrementAtomic(&ifnet_delay_start_disabled);
317 }
318
319 unsigned int net_rxpoll = 1;
320 unsigned int net_affinity = 1;
321 unsigned int net_async = 1; /* 0: synchronous, 1: asynchronous */
322
323 extern u_int32_t inject_buckets;
324
325 void
ifnet_filter_update_tso(struct ifnet * ifp,boolean_t filter_enable)326 ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable)
327 {
328 /*
329 * update filter count and route_generation ID to let TCP
330 * know it should reevalute doing TSO or not
331 */
332 if (filter_enable) {
333 OSAddAtomic(1, &ifp->if_flt_no_tso_count);
334 } else {
335 VERIFY(ifp->if_flt_no_tso_count != 0);
336 OSAddAtomic(-1, &ifp->if_flt_no_tso_count);
337 }
338 routegenid_update();
339 }
340
341 #if SKYWALK
342
343 static bool net_check_compatible_if_filter(struct ifnet *ifp);
344
345 /* if_attach_nx flags defined in os_skywalk_private.h */
346 unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT;
347 unsigned int if_enable_fsw_ip_netagent =
348 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
349 unsigned int if_enable_fsw_transport_netagent =
350 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
351
352 unsigned int if_netif_all =
353 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0);
354
355 /* Configure flowswitch to use max mtu sized buffer */
356 static bool fsw_use_max_mtu_buffer = false;
357
358
359 static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw);
360
361 #include <skywalk/os_skywalk_private.h>
362
363 boolean_t
ifnet_nx_noauto(ifnet_t ifp)364 ifnet_nx_noauto(ifnet_t ifp)
365 {
366 return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0;
367 }
368
369 boolean_t
ifnet_nx_noauto_flowswitch(ifnet_t ifp)370 ifnet_nx_noauto_flowswitch(ifnet_t ifp)
371 {
372 return ifnet_is_low_latency(ifp);
373 }
374
375 boolean_t
ifnet_is_low_latency(ifnet_t ifp)376 ifnet_is_low_latency(ifnet_t ifp)
377 {
378 return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0;
379 }
380
381 boolean_t
ifnet_needs_compat(ifnet_t ifp)382 ifnet_needs_compat(ifnet_t ifp)
383 {
384 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
385 return FALSE;
386 }
387 #if !XNU_TARGET_OS_OSX
388 /*
389 * To conserve memory, we plumb in the compat layer selectively; this
390 * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL.
391 * In particular, we check for Wi-Fi Access Point.
392 */
393 if (IFNET_IS_WIFI(ifp)) {
394 /* Wi-Fi Access Point */
395 if (strcmp(ifp->if_name, "ap") == 0) {
396 return if_netif_all;
397 }
398 }
399 #else /* XNU_TARGET_OS_OSX */
400 #pragma unused(ifp)
401 #endif /* XNU_TARGET_OS_OSX */
402 return TRUE;
403 }
404
405 boolean_t
ifnet_needs_fsw_transport_netagent(ifnet_t ifp)406 ifnet_needs_fsw_transport_netagent(ifnet_t ifp)
407 {
408 if (if_is_fsw_transport_netagent_enabled()) {
409 /* check if netagent has been manually enabled for ipsec/utun */
410 if (ifp->if_family == IFNET_FAMILY_IPSEC) {
411 return ipsec_interface_needs_netagent(ifp);
412 } else if (ifp->if_family == IFNET_FAMILY_UTUN) {
413 return utun_interface_needs_netagent(ifp);
414 }
415
416 /* check ifnet no auto nexus override */
417 if (ifnet_nx_noauto(ifp)) {
418 return FALSE;
419 }
420
421 /* check global if_attach_nx configuration */
422 switch (ifp->if_family) {
423 case IFNET_FAMILY_CELLULAR:
424 case IFNET_FAMILY_ETHERNET:
425 if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
426 return TRUE;
427 }
428 break;
429 default:
430 break;
431 }
432 }
433 return FALSE;
434 }
435
436 boolean_t
ifnet_needs_fsw_ip_netagent(ifnet_t ifp)437 ifnet_needs_fsw_ip_netagent(ifnet_t ifp)
438 {
439 #pragma unused(ifp)
440 if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) {
441 return TRUE;
442 }
443 return FALSE;
444 }
445
446 boolean_t
ifnet_needs_netif_netagent(ifnet_t ifp)447 ifnet_needs_netif_netagent(ifnet_t ifp)
448 {
449 #pragma unused(ifp)
450 return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0;
451 }
452
453 static boolean_t
dlil_detach_nexus_instance(nexus_controller_t controller,const char * func_str,uuid_t instance,uuid_t device)454 dlil_detach_nexus_instance(nexus_controller_t controller,
455 const char *func_str, uuid_t instance, uuid_t device)
456 {
457 errno_t err;
458
459 if (instance == NULL || uuid_is_null(instance)) {
460 return FALSE;
461 }
462
463 /* followed by the device port */
464 if (device != NULL && !uuid_is_null(device)) {
465 err = kern_nexus_ifdetach(controller, instance, device);
466 if (err != 0) {
467 DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n",
468 func_str, err);
469 }
470 }
471 err = kern_nexus_controller_free_provider_instance(controller,
472 instance);
473 if (err != 0) {
474 DLIL_PRINTF("%s free_provider_instance failed %d\n",
475 func_str, err);
476 }
477 return TRUE;
478 }
479
480 static boolean_t
dlil_detach_nexus(const char * func_str,uuid_t provider,uuid_t instance,uuid_t device)481 dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance,
482 uuid_t device)
483 {
484 boolean_t detached = FALSE;
485 nexus_controller_t controller = kern_nexus_shared_controller();
486 int err;
487
488 if (dlil_detach_nexus_instance(controller, func_str, instance,
489 device)) {
490 detached = TRUE;
491 }
492 if (provider != NULL && !uuid_is_null(provider)) {
493 detached = TRUE;
494 err = kern_nexus_controller_deregister_provider(controller,
495 provider);
496 if (err != 0) {
497 DLIL_PRINTF("%s deregister_provider %d\n",
498 func_str, err);
499 }
500 }
501 return detached;
502 }
503
504 static errno_t
dlil_create_provider_and_instance(nexus_controller_t controller,nexus_type_t type,ifnet_t ifp,uuid_t * provider,uuid_t * instance,nexus_attr_t attr)505 dlil_create_provider_and_instance(nexus_controller_t controller,
506 nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance,
507 nexus_attr_t attr)
508 {
509 uuid_t dom_prov;
510 errno_t err;
511 nexus_name_t provider_name;
512 const char *type_name =
513 (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch";
514 struct kern_nexus_init init;
515
516 err = kern_nexus_get_default_domain_provider(type, &dom_prov);
517 if (err != 0) {
518 DLIL_PRINTF("%s can't get %s provider, error %d\n",
519 __func__, type_name, err);
520 goto failed;
521 }
522
523 snprintf((char *)provider_name, sizeof(provider_name),
524 "com.apple.%s.%s", type_name, if_name(ifp));
525 err = kern_nexus_controller_register_provider(controller,
526 dom_prov,
527 provider_name,
528 NULL,
529 0,
530 attr,
531 provider);
532 if (err != 0) {
533 DLIL_PRINTF("%s register %s provider failed, error %d\n",
534 __func__, type_name, err);
535 goto failed;
536 }
537 bzero(&init, sizeof(init));
538 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
539 err = kern_nexus_controller_alloc_provider_instance(controller,
540 *provider,
541 NULL, NULL,
542 instance, &init);
543 if (err != 0) {
544 DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n",
545 __func__, type_name, err);
546 kern_nexus_controller_deregister_provider(controller,
547 *provider);
548 goto failed;
549 }
550 failed:
551 return err;
552 }
553
554 static boolean_t
dlil_attach_netif_nexus_common(ifnet_t ifp,if_nexus_netif_t netif_nx)555 dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx)
556 {
557 nexus_attr_t __single attr = NULL;
558 nexus_controller_t controller;
559 errno_t err;
560 unsigned char *empty_uuid = __unsafe_forge_bidi_indexable(unsigned char *, NULL, sizeof(uuid_t));
561
562 if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) {
563 /* it's already attached */
564 if (dlil_verbose) {
565 DLIL_PRINTF("%s: %s already has nexus attached\n",
566 __func__, if_name(ifp));
567 /* already attached */
568 }
569 goto failed;
570 }
571
572 err = kern_nexus_attr_create(&attr);
573 if (err != 0) {
574 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
575 if_name(ifp));
576 goto failed;
577 }
578 err = kern_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, ifp->if_index);
579 VERIFY(err == 0);
580
581 controller = kern_nexus_shared_controller();
582
583 /* create the netif provider and instance */
584 err = dlil_create_provider_and_instance(controller,
585 NEXUS_TYPE_NET_IF, ifp, &netif_nx->if_nif_provider,
586 &netif_nx->if_nif_instance, attr);
587 if (err != 0) {
588 goto failed;
589 }
590
591 err = kern_nexus_ifattach(controller, netif_nx->if_nif_instance, ifp,
592 empty_uuid, FALSE, &netif_nx->if_nif_attach);
593 if (err != 0) {
594 DLIL_PRINTF("%s kern_nexus_ifattach %d\n",
595 __func__, err);
596 /* cleanup provider and instance */
597 dlil_detach_nexus(__func__, netif_nx->if_nif_provider,
598 netif_nx->if_nif_instance, empty_uuid);
599 goto failed;
600 }
601 return TRUE;
602
603 failed:
604 if (attr != NULL) {
605 kern_nexus_attr_destroy(attr);
606 }
607 return FALSE;
608 }
609
610 static boolean_t
dlil_attach_netif_compat_nexus(ifnet_t ifp,if_nexus_netif_t netif_nx)611 dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx)
612 {
613 if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) ||
614 IFNET_IS_MANAGEMENT(ifp) || IFNET_IS_VMNET(ifp)) {
615 goto failed;
616 }
617 switch (ifp->if_type) {
618 case IFT_CELLULAR:
619 case IFT_ETHER:
620 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
621 /* don't auto-attach */
622 goto failed;
623 }
624 break;
625 default:
626 /* don't auto-attach */
627 goto failed;
628 }
629 return dlil_attach_netif_nexus_common(ifp, netif_nx);
630
631 failed:
632 return FALSE;
633 }
634
635 __attribute__((noinline))
636 static void
dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)637 dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)
638 {
639 dlil_detach_nexus(__func__, nexus_netif->if_nif_provider,
640 nexus_netif->if_nif_instance, nexus_netif->if_nif_attach);
641 }
642
643 static inline int
dlil_siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)644 dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
645 {
646 struct ifreq ifr;
647 int error;
648
649 bzero(&ifr, sizeof(ifr));
650 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
651 if (error == 0) {
652 *ifdm_p = ifr.ifr_devmtu;
653 }
654 return error;
655 }
656
657 static inline void
_dlil_adjust_large_buf_size_for_tso(ifnet_t ifp,uint32_t * large_buf_size)658 _dlil_adjust_large_buf_size_for_tso(ifnet_t ifp, uint32_t *large_buf_size)
659 {
660 uint32_t tso_v4_mtu = 0;
661 uint32_t tso_v6_mtu = 0;
662
663 if (!kernel_is_macos_or_server()) {
664 return;
665 }
666
667 /*
668 * Note that we are reading the real hwassist flags set by the driver
669 * and not the adjusted ones because nx_netif_host_adjust_if_capabilities()
670 * hasn't been called yet.
671 */
672 if ((ifp->if_hwassist & IFNET_TSO_IPV4) != 0) {
673 tso_v4_mtu = ifp->if_tso_v4_mtu;
674 }
675 if ((ifp->if_hwassist & IFNET_TSO_IPV6) != 0) {
676 tso_v6_mtu = ifp->if_tso_v6_mtu;
677 }
678
679 /*
680 * If the hardware supports TSO, adjust the large buf size to match the
681 * supported TSO MTU size. Note that only native interfaces set TSO MTU
682 * size today.
683 * For compat, there is a 16KB limit on large buf size, so it needs to be
684 * bounded by NX_FSW_DEF_LARGE_BUFSIZE. Note that no compat interfaces
685 * set TSO MTU size today.
686 */
687 if (SKYWALK_NATIVE(ifp)) {
688 if (tso_v4_mtu != 0 || tso_v6_mtu != 0) {
689 *large_buf_size = MAX(tso_v4_mtu, tso_v6_mtu);
690 } else {
691 *large_buf_size = MAX(*large_buf_size, sk_fsw_gso_mtu);
692 }
693 *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, *large_buf_size);
694 } else {
695 *large_buf_size = MIN(NX_FSW_DEF_LARGE_BUFSIZE, *large_buf_size);
696 }
697 }
698
699 static inline int
_dlil_get_flowswitch_buffer_size(ifnet_t ifp,uuid_t netif,uint32_t * buf_size,bool * use_multi_buflet,uint32_t * large_buf_size)700 _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint32_t *buf_size,
701 bool *use_multi_buflet, uint32_t *large_buf_size)
702 {
703 struct kern_pbufpool_memory_info rx_pp_info;
704 struct kern_pbufpool_memory_info tx_pp_info;
705 uint32_t if_max_mtu = 0;
706 uint32_t drv_buf_size;
707 struct ifdevmtu ifdm;
708 int err;
709
710 /*
711 * To perform intra-stack RX aggregation flowswitch needs to use
712 * multi-buflet packet.
713 */
714 *use_multi_buflet = NX_FSW_TCP_RX_AGG_ENABLED();
715
716 *large_buf_size = *use_multi_buflet ? NX_FSW_DEF_LARGE_BUFSIZE : 0;
717 /*
718 * IP over Thunderbolt interface can deliver the largest IP packet,
719 * but the driver advertises the MAX MTU as only 9K.
720 */
721 if (IFNET_IS_THUNDERBOLT_IP(ifp)) {
722 if_max_mtu = IP_MAXPACKET;
723 goto skip_mtu_ioctl;
724 }
725
726 /* determine max mtu */
727 bzero(&ifdm, sizeof(ifdm));
728 err = dlil_siocgifdevmtu(ifp, &ifdm);
729 if (__improbable(err != 0)) {
730 DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n",
731 __func__, if_name(ifp));
732 /* use default flowswitch buffer size */
733 if_max_mtu = NX_FSW_BUFSIZE;
734 } else {
735 DLIL_PRINTF("%s: %s %d %d\n", __func__, if_name(ifp),
736 ifdm.ifdm_max, ifdm.ifdm_current);
737 /* rdar://problem/44589731 */
738 if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current);
739 }
740
741 skip_mtu_ioctl:
742 if (if_max_mtu == 0) {
743 DLIL_PRINTF("%s: can't determine MAX MTU for %s\n",
744 __func__, if_name(ifp));
745 return EINVAL;
746 }
747 if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) {
748 DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch "
749 "max bufsize(%d)\n", __func__,
750 if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE);
751 return EINVAL;
752 }
753
754 /*
755 * for skywalk native driver, consult the driver packet pool also.
756 */
757 if (dlil_is_native_netif_nexus(ifp)) {
758 err = kern_nexus_get_pbufpool_info(netif, &rx_pp_info,
759 &tx_pp_info);
760 if (err != 0) {
761 DLIL_PRINTF("%s: can't get pbufpool info for %s\n",
762 __func__, if_name(ifp));
763 return ENXIO;
764 }
765 drv_buf_size = tx_pp_info.kpm_bufsize *
766 tx_pp_info.kpm_max_frags;
767 if (if_max_mtu > drv_buf_size) {
768 DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, "
769 "tx %d * %d) can't support max mtu(%d)\n", __func__,
770 if_name(ifp), rx_pp_info.kpm_bufsize,
771 rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize,
772 tx_pp_info.kpm_max_frags, if_max_mtu);
773 return EINVAL;
774 }
775 } else {
776 drv_buf_size = if_max_mtu;
777 }
778
779 if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) {
780 _CASSERT((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET);
781 *use_multi_buflet = true;
782 /* default flowswitch buffer size */
783 *buf_size = NX_FSW_BUFSIZE;
784 *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, drv_buf_size);
785 } else {
786 *buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE);
787 }
788 _dlil_adjust_large_buf_size_for_tso(ifp, large_buf_size);
789 ASSERT(*buf_size <= NX_FSW_MAXBUFSIZE);
790 if (*buf_size >= *large_buf_size) {
791 *large_buf_size = 0;
792 }
793 return 0;
794 }
795
796 static boolean_t
_dlil_attach_flowswitch_nexus(ifnet_t ifp,if_nexus_flowswitch_t nexus_fsw)797 _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw)
798 {
799 nexus_attr_t __single attr = NULL;
800 nexus_controller_t controller;
801 errno_t err = 0;
802 uuid_t netif;
803 uint32_t buf_size = 0;
804 uint32_t large_buf_size = 0;
805 bool multi_buflet;
806
807 if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) ||
808 IFNET_IS_VMNET(ifp)) {
809 goto failed;
810 }
811
812 if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) {
813 /* not possible to attach (netif native/compat not plumbed) */
814 goto failed;
815 }
816
817 if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) {
818 /* don't auto-attach */
819 goto failed;
820 }
821
822 /* get the netif instance from the ifp */
823 err = kern_nexus_get_netif_instance(ifp, netif);
824 if (err != 0) {
825 DLIL_PRINTF("%s: can't find netif for %s\n", __func__,
826 if_name(ifp));
827 goto failed;
828 }
829
830 err = kern_nexus_attr_create(&attr);
831 if (err != 0) {
832 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
833 if_name(ifp));
834 goto failed;
835 }
836
837 err = _dlil_get_flowswitch_buffer_size(ifp, netif, &buf_size,
838 &multi_buflet, &large_buf_size);
839 if (err != 0) {
840 goto failed;
841 }
842 ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE));
843 ASSERT(large_buf_size <= NX_FSW_MAX_LARGE_BUFSIZE);
844
845 /* Configure flowswitch buffer size */
846 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, buf_size);
847 VERIFY(err == 0);
848 err = kern_nexus_attr_set(attr, NEXUS_ATTR_LARGE_BUF_SIZE,
849 large_buf_size);
850 VERIFY(err == 0);
851
852 /*
853 * Configure flowswitch to use super-packet (multi-buflet).
854 */
855 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
856 multi_buflet ? NX_PBUF_FRAGS_MAX : 1);
857 VERIFY(err == 0);
858
859 /* create the flowswitch provider and instance */
860 controller = kern_nexus_shared_controller();
861 err = dlil_create_provider_and_instance(controller,
862 NEXUS_TYPE_FLOW_SWITCH, ifp, &nexus_fsw->if_fsw_provider,
863 &nexus_fsw->if_fsw_instance, attr);
864 if (err != 0) {
865 goto failed;
866 }
867
868 /* attach the device port */
869 err = kern_nexus_ifattach(controller, nexus_fsw->if_fsw_instance,
870 NULL, netif, FALSE, &nexus_fsw->if_fsw_device);
871 if (err != 0) {
872 DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n",
873 __func__, err, if_name(ifp));
874 /* cleanup provider and instance */
875 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
876 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
877 goto failed;
878 }
879 return TRUE;
880
881 failed:
882 if (err != 0) {
883 DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n",
884 __func__, if_name(ifp), err);
885 } else {
886 DLIL_PRINTF("%s: not attaching flowswitch to %s\n",
887 __func__, if_name(ifp));
888 }
889 if (attr != NULL) {
890 kern_nexus_attr_destroy(attr);
891 }
892 return FALSE;
893 }
894
895 static boolean_t
dlil_attach_flowswitch_nexus(ifnet_t ifp)896 dlil_attach_flowswitch_nexus(ifnet_t ifp)
897 {
898 boolean_t attached = FALSE;
899 if_nexus_flowswitch nexus_fsw;
900
901 #if (DEVELOPMENT || DEBUG)
902 if (skywalk_netif_direct_allowed(if_name(ifp))) {
903 DLIL_PRINTF("skip attaching fsw to %s\n", if_name(ifp));
904 return FALSE;
905 }
906 #endif /* (DEVELOPMENT || DEBUG) */
907
908 /*
909 * flowswitch attachment is not supported for interface using the
910 * legacy model (IFNET_INIT_LEGACY)
911 */
912 if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
913 DLIL_PRINTF("skip attaching fsw to %s using legacy TX model\n",
914 if_name(ifp));
915 return FALSE;
916 }
917 bzero(&nexus_fsw, sizeof(nexus_fsw));
918 if (!ifnet_is_attached(ifp, 1)) {
919 os_log(OS_LOG_DEFAULT, "%s: %s not attached",
920 __func__, ifp->if_xname);
921 goto done;
922 }
923 if (uuid_is_null(ifp->if_nx_flowswitch.if_fsw_instance)) {
924 attached = _dlil_attach_flowswitch_nexus(ifp, &nexus_fsw);
925 if (attached) {
926 ifnet_lock_exclusive(ifp);
927 ifp->if_nx_flowswitch = nexus_fsw;
928 ifnet_lock_done(ifp);
929 }
930 }
931 ifnet_decr_iorefcnt(ifp);
932
933 done:
934 return attached;
935 }
936
937 __attribute__((noinline))
938 static void
dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)939 dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)
940 {
941 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
942 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
943 }
944
945 __attribute__((noinline))
946 static void
dlil_netif_detach_notify(ifnet_t ifp)947 dlil_netif_detach_notify(ifnet_t ifp)
948 {
949 ifnet_detach_notify_cb_t notify = NULL;
950 void *__single arg = NULL;
951
952 ifnet_get_detach_notify(ifp, ¬ify, &arg);
953 if (notify == NULL) {
954 DTRACE_SKYWALK1(no__notify, ifnet_t, ifp);
955 return;
956 }
957 (*notify)(arg);
958 }
959
960 __attribute__((noinline))
961 static void
dlil_quiesce_and_detach_nexuses(ifnet_t ifp)962 dlil_quiesce_and_detach_nexuses(ifnet_t ifp)
963 {
964 if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch;
965 if_nexus_netif *nx_netif = &ifp->if_nx_netif;
966
967 ifnet_datamov_suspend_and_drain(ifp);
968 if (!uuid_is_null(nx_fsw->if_fsw_device)) {
969 ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider));
970 ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance));
971 dlil_detach_flowswitch_nexus(nx_fsw);
972 } else {
973 ASSERT(uuid_is_null(nx_fsw->if_fsw_provider));
974 ASSERT(uuid_is_null(nx_fsw->if_fsw_instance));
975 DTRACE_IP1(fsw__not__attached, ifnet_t, ifp);
976 }
977
978 if (!uuid_is_null(nx_netif->if_nif_attach)) {
979 ASSERT(!uuid_is_null(nx_netif->if_nif_provider));
980 ASSERT(!uuid_is_null(nx_netif->if_nif_instance));
981 dlil_detach_netif_nexus(nx_netif);
982 } else {
983 ASSERT(uuid_is_null(nx_netif->if_nif_provider));
984 ASSERT(uuid_is_null(nx_netif->if_nif_instance));
985 DTRACE_IP1(netif__not__attached, ifnet_t, ifp);
986 }
987 ifnet_datamov_resume(ifp);
988 }
989
990 boolean_t
ifnet_add_netagent(ifnet_t ifp)991 ifnet_add_netagent(ifnet_t ifp)
992 {
993 int error;
994
995 error = kern_nexus_interface_add_netagent(ifp);
996 os_log(OS_LOG_DEFAULT,
997 "kern_nexus_interface_add_netagent(%s) returned %d",
998 ifp->if_xname, error);
999 return error == 0;
1000 }
1001
1002 boolean_t
ifnet_remove_netagent(ifnet_t ifp)1003 ifnet_remove_netagent(ifnet_t ifp)
1004 {
1005 int error;
1006
1007 error = kern_nexus_interface_remove_netagent(ifp);
1008 os_log(OS_LOG_DEFAULT,
1009 "kern_nexus_interface_remove_netagent(%s) returned %d",
1010 ifp->if_xname, error);
1011 return error == 0;
1012 }
1013
1014 boolean_t
ifnet_attach_flowswitch_nexus(ifnet_t ifp)1015 ifnet_attach_flowswitch_nexus(ifnet_t ifp)
1016 {
1017 if (!IF_FULLY_ATTACHED(ifp)) {
1018 return FALSE;
1019 }
1020 return dlil_attach_flowswitch_nexus(ifp);
1021 }
1022
1023 boolean_t
ifnet_detach_flowswitch_nexus(ifnet_t ifp)1024 ifnet_detach_flowswitch_nexus(ifnet_t ifp)
1025 {
1026 if_nexus_flowswitch nexus_fsw;
1027
1028 ifnet_lock_exclusive(ifp);
1029 nexus_fsw = ifp->if_nx_flowswitch;
1030 bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
1031 ifnet_lock_done(ifp);
1032 return dlil_detach_nexus(__func__, nexus_fsw.if_fsw_provider,
1033 nexus_fsw.if_fsw_instance, nexus_fsw.if_fsw_device);
1034 }
1035
1036 void
ifnet_attach_native_flowswitch(ifnet_t ifp)1037 ifnet_attach_native_flowswitch(ifnet_t ifp)
1038 {
1039 if (!dlil_is_native_netif_nexus(ifp)) {
1040 /* not a native netif */
1041 return;
1042 }
1043 ifnet_attach_flowswitch_nexus(ifp);
1044 }
1045
1046 int
ifnet_set_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t cb,void * arg)1047 ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg)
1048 {
1049 lck_mtx_lock(&ifp->if_delegate_lock);
1050 while (ifp->if_fsw_rx_cb_ref > 0) {
1051 DTRACE_SKYWALK1(wait__fsw, ifnet_t, ifp);
1052 (void) msleep(&ifp->if_fsw_rx_cb_ref, &ifp->if_delegate_lock,
1053 (PZERO + 1), __FUNCTION__, NULL);
1054 DTRACE_SKYWALK1(wake__fsw, ifnet_t, ifp);
1055 }
1056 ifp->if_fsw_rx_cb = cb;
1057 ifp->if_fsw_rx_cb_arg = arg;
1058 lck_mtx_unlock(&ifp->if_delegate_lock);
1059 return 0;
1060 }
1061
1062 int
ifnet_get_flowswitch_rx_callback(ifnet_t ifp,ifnet_fsw_rx_cb_t * cbp,void ** argp)1063 ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp)
1064 {
1065 /*
1066 * This is for avoiding the unnecessary lock acquire for interfaces
1067 * not used by a redirect interface.
1068 */
1069 if (ifp->if_fsw_rx_cb == NULL) {
1070 return ENOENT;
1071 }
1072 lck_mtx_lock(&ifp->if_delegate_lock);
1073 if (ifp->if_fsw_rx_cb == NULL) {
1074 lck_mtx_unlock(&ifp->if_delegate_lock);
1075 return ENOENT;
1076 }
1077 *cbp = ifp->if_fsw_rx_cb;
1078 *argp = ifp->if_fsw_rx_cb_arg;
1079 ifp->if_fsw_rx_cb_ref++;
1080 lck_mtx_unlock(&ifp->if_delegate_lock);
1081 return 0;
1082 }
1083
1084 void
ifnet_release_flowswitch_rx_callback(ifnet_t ifp)1085 ifnet_release_flowswitch_rx_callback(ifnet_t ifp)
1086 {
1087 lck_mtx_lock(&ifp->if_delegate_lock);
1088 if (--ifp->if_fsw_rx_cb_ref == 0) {
1089 wakeup(&ifp->if_fsw_rx_cb_ref);
1090 }
1091 lck_mtx_unlock(&ifp->if_delegate_lock);
1092 }
1093
1094 int
ifnet_set_delegate_parent(ifnet_t difp,ifnet_t parent)1095 ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent)
1096 {
1097 lck_mtx_lock(&difp->if_delegate_lock);
1098 while (difp->if_delegate_parent_ref > 0) {
1099 DTRACE_SKYWALK1(wait__parent, ifnet_t, difp);
1100 (void) msleep(&difp->if_delegate_parent_ref, &difp->if_delegate_lock,
1101 (PZERO + 1), __FUNCTION__, NULL);
1102 DTRACE_SKYWALK1(wake__parent, ifnet_t, difp);
1103 }
1104 difp->if_delegate_parent = parent;
1105 lck_mtx_unlock(&difp->if_delegate_lock);
1106 return 0;
1107 }
1108
1109 int
ifnet_get_delegate_parent(ifnet_t difp,ifnet_t * parentp)1110 ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parentp)
1111 {
1112 lck_mtx_lock(&difp->if_delegate_lock);
1113 if (difp->if_delegate_parent == NULL) {
1114 lck_mtx_unlock(&difp->if_delegate_lock);
1115 return ENOENT;
1116 }
1117 *parentp = difp->if_delegate_parent;
1118 difp->if_delegate_parent_ref++;
1119 lck_mtx_unlock(&difp->if_delegate_lock);
1120 return 0;
1121 }
1122
1123 void
ifnet_release_delegate_parent(ifnet_t difp)1124 ifnet_release_delegate_parent(ifnet_t difp)
1125 {
1126 lck_mtx_lock(&difp->if_delegate_lock);
1127 if (--difp->if_delegate_parent_ref == 0) {
1128 wakeup(&difp->if_delegate_parent_ref);
1129 }
1130 lck_mtx_unlock(&difp->if_delegate_lock);
1131 }
1132
1133 __attribute__((noinline))
1134 void
ifnet_set_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1135 ifnet_set_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1136 {
1137 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1138 ifp->if_detach_notify = notify;
1139 ifp->if_detach_notify_arg = arg;
1140 }
1141
1142 __attribute__((noinline))
1143 void
ifnet_get_detach_notify_locked(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1144 ifnet_get_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1145 {
1146 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
1147 *notifyp = ifp->if_detach_notify;
1148 *argp = ifp->if_detach_notify_arg;
1149 }
1150
1151 __attribute__((noinline))
1152 void
ifnet_set_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t notify,void * arg)1153 ifnet_set_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg)
1154 {
1155 ifnet_lock_exclusive(ifp);
1156 ifnet_set_detach_notify_locked(ifp, notify, arg);
1157 ifnet_lock_done(ifp);
1158 }
1159
1160 __attribute__((noinline))
1161 void
ifnet_get_detach_notify(ifnet_t ifp,ifnet_detach_notify_cb_t * notifyp,void ** argp)1162 ifnet_get_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp)
1163 {
1164 ifnet_lock_exclusive(ifp);
1165 ifnet_get_detach_notify_locked(ifp, notifyp, argp);
1166 ifnet_lock_done(ifp);
1167 }
1168 #endif /* SKYWALK */
1169
1170 #define DLIL_INPUT_CHECK(m, ifp) { \
1171 ifnet_ref_t _rcvif = mbuf_pkthdr_rcvif(m); \
1172 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
1173 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
1174 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
1175 /* NOTREACHED */ \
1176 } \
1177 }
1178
1179 #define MBPS (1ULL * 1000 * 1000)
1180 #define GBPS (MBPS * 1000)
1181
1182 struct rxpoll_time_tbl {
1183 u_int64_t speed; /* downlink speed */
1184 u_int32_t plowat; /* packets low watermark */
1185 u_int32_t phiwat; /* packets high watermark */
1186 u_int32_t blowat; /* bytes low watermark */
1187 u_int32_t bhiwat; /* bytes high watermark */
1188 };
1189
1190 static struct rxpoll_time_tbl rxpoll_tbl[] = {
1191 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
1192 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1193 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1194 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1195 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1196 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
1197 };
1198
1199 int
proto_hash_value(u_int32_t protocol_family)1200 proto_hash_value(u_int32_t protocol_family)
1201 {
1202 /*
1203 * dlil_proto_unplumb_all() depends on the mapping between
1204 * the hash bucket index and the protocol family defined
1205 * here; future changes must be applied there as well.
1206 */
1207 switch (protocol_family) {
1208 case PF_INET:
1209 return 0;
1210 case PF_INET6:
1211 return 1;
1212 case PF_VLAN:
1213 return 2;
1214 case PF_UNSPEC:
1215 default:
1216 return 3;
1217 }
1218 }
1219
1220 __private_extern__ int
dlil_post_msg(struct ifnet * ifp,u_int32_t event_subclass,u_int32_t event_code,struct net_event_data * event_data,u_int32_t event_data_len,boolean_t suppress_generation)1221 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1222 u_int32_t event_code, struct net_event_data *event_data,
1223 u_int32_t event_data_len, boolean_t suppress_generation)
1224 {
1225 struct net_event_data ev_data;
1226 struct kev_msg ev_msg;
1227
1228 bzero(&ev_msg, sizeof(ev_msg));
1229 bzero(&ev_data, sizeof(ev_data));
1230 /*
1231 * a net event always starts with a net_event_data structure
1232 * but the caller can generate a simple net event or
1233 * provide a longer event structure to post
1234 */
1235 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1236 ev_msg.kev_class = KEV_NETWORK_CLASS;
1237 ev_msg.kev_subclass = event_subclass;
1238 ev_msg.event_code = event_code;
1239
1240 if (event_data == NULL) {
1241 event_data = &ev_data;
1242 event_data_len = sizeof(struct net_event_data);
1243 }
1244
1245 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1246 event_data->if_family = ifp->if_family;
1247 event_data->if_unit = (u_int32_t)ifp->if_unit;
1248
1249 ev_msg.dv[0].data_length = event_data_len;
1250 ev_msg.dv[0].data_ptr = event_data;
1251 ev_msg.dv[1].data_length = 0;
1252
1253 bool update_generation = true;
1254 if (event_subclass == KEV_DL_SUBCLASS) {
1255 /* Don't update interface generation for frequent link quality and state changes */
1256 switch (event_code) {
1257 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1258 case KEV_DL_RRC_STATE_CHANGED:
1259 case KEV_DL_PRIMARY_ELECTED:
1260 update_generation = false;
1261 break;
1262 default:
1263 break;
1264 }
1265 }
1266
1267 /*
1268 * Some events that update generation counts might
1269 * want to suppress generation count.
1270 * One example is node presence/absence where we still
1271 * issue kernel event for the invocation but want to avoid
1272 * expensive operation of updating generation which triggers
1273 * NECP client updates.
1274 */
1275 if (suppress_generation) {
1276 update_generation = false;
1277 }
1278
1279 return dlil_event_internal(ifp, &ev_msg, update_generation);
1280 }
1281
1282 static void
dlil_reset_rxpoll_params(ifnet_t ifp)1283 dlil_reset_rxpoll_params(ifnet_t ifp)
1284 {
1285 ASSERT(ifp != NULL);
1286 ifnet_set_poll_cycle(ifp, NULL);
1287 ifp->if_poll_update = 0;
1288 ifp->if_poll_flags = 0;
1289 ifp->if_poll_req = 0;
1290 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
1291 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
1292 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
1293 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
1294 net_timerclear(&ifp->if_poll_mode_holdtime);
1295 net_timerclear(&ifp->if_poll_mode_lasttime);
1296 net_timerclear(&ifp->if_poll_sample_holdtime);
1297 net_timerclear(&ifp->if_poll_sample_lasttime);
1298 net_timerclear(&ifp->if_poll_dbg_lasttime);
1299 }
1300
1301
1302 #if SKYWALK
1303 static void
dlil_filter_event(struct eventhandler_entry_arg arg __unused,enum net_filter_event_subsystems state)1304 dlil_filter_event(struct eventhandler_entry_arg arg __unused,
1305 enum net_filter_event_subsystems state)
1306 {
1307 evhlog(debug, "%s: eventhandler saw event type=net_filter_event_state event_code=0x%d",
1308 __func__, state);
1309
1310 bool old_if_enable_fsw_transport_netagent = if_enable_fsw_transport_netagent;
1311 if ((state & ~NET_FILTER_EVENT_PF_PRIVATE_PROXY) == 0) {
1312 if_enable_fsw_transport_netagent = 1;
1313 } else {
1314 if_enable_fsw_transport_netagent = 0;
1315 }
1316 if (old_if_enable_fsw_transport_netagent != if_enable_fsw_transport_netagent) {
1317 kern_nexus_update_netagents();
1318 } else if (!if_enable_fsw_transport_netagent) {
1319 necp_update_all_clients();
1320 }
1321 }
1322 #endif /* SKYWALK */
1323
1324 void
dlil_init(void)1325 dlil_init(void)
1326 {
1327 thread_t __single thread = THREAD_NULL;
1328
1329 dlil_main_input_thread = (struct dlil_threading_info *) &dlil_main_input_thread_info;
1330
1331 /*
1332 * The following fields must be 64-bit aligned for atomic operations.
1333 */
1334 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1335 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1336 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1337 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1338 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1339 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1340 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1341 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1342 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1343 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1344 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1345 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1346 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1347 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1348 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1349
1350 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1351 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1352 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1353 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1354 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1355 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1356 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1357 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1358 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1359 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1360 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1361 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1362 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1363 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1364 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1365
1366 /*
1367 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1368 */
1369 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1370 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1371 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1372 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1373 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1374 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1375 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1376 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1377 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1378 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1379 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1380 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1381 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1382 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1383
1384 /*
1385 * ... as well as the mbuf checksum flags counterparts.
1386 */
1387 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1388 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1389 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1390 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1391 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1392 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1393 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1394 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1395 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1396 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1397 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1398
1399 /*
1400 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1401 */
1402 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1403 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1404
1405 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1406 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1407 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1408 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1409
1410 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1411 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1412 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1413
1414 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1415 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1416 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1417 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1418 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1419 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1420 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1421 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1422 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1423 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1424 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1425 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1426 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1427 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1428 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1429 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1430 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
1431 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
1432
1433 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1434 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1435 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1436 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1437 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1438 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1439 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1440 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
1441 _CASSERT(IFRTYPE_SUBFAMILY_VMNET == IFNET_SUBFAMILY_VMNET);
1442 _CASSERT(IFRTYPE_SUBFAMILY_SIMCELL == IFNET_SUBFAMILY_SIMCELL);
1443 _CASSERT(IFRTYPE_SUBFAMILY_MANAGEMENT == IFNET_SUBFAMILY_MANAGEMENT);
1444
1445 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1446 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1447
1448 PE_parse_boot_argn("net_affinity", &net_affinity,
1449 sizeof(net_affinity));
1450
1451 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
1452
1453 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
1454
1455 PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
1456
1457 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
1458
1459 PE_parse_boot_argn("if_link_heuristics", &if_link_heuristics_flags, sizeof(if_link_heuristics_flags));
1460
1461 VERIFY(dlil_pending_thread_cnt == 0);
1462 #if SKYWALK
1463 boolean_t pe_enable_fsw_transport_netagent = FALSE;
1464 boolean_t pe_disable_fsw_transport_netagent = FALSE;
1465 boolean_t enable_fsw_netagent =
1466 (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) ||
1467 (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
1468
1469 /*
1470 * Check the device tree to see if Skywalk netagent has been explicitly
1471 * enabled or disabled. This can be overridden via if_attach_nx below.
1472 * Note that the property is a 0-length key, and so checking for the
1473 * presence itself is enough (no need to check for the actual value of
1474 * the retrieved variable.)
1475 */
1476 pe_enable_fsw_transport_netagent =
1477 PE_get_default("kern.skywalk_netagent_enable",
1478 &pe_enable_fsw_transport_netagent,
1479 sizeof(pe_enable_fsw_transport_netagent));
1480 pe_disable_fsw_transport_netagent =
1481 PE_get_default("kern.skywalk_netagent_disable",
1482 &pe_disable_fsw_transport_netagent,
1483 sizeof(pe_disable_fsw_transport_netagent));
1484
1485 /*
1486 * These two are mutually exclusive, i.e. they both can be absent,
1487 * but only one can be present at a time, and so we assert to make
1488 * sure it is correct.
1489 */
1490 VERIFY((!pe_enable_fsw_transport_netagent &&
1491 !pe_disable_fsw_transport_netagent) ||
1492 (pe_enable_fsw_transport_netagent ^
1493 pe_disable_fsw_transport_netagent));
1494
1495 if (pe_enable_fsw_transport_netagent) {
1496 kprintf("SK: netagent is enabled via an override for "
1497 "this platform\n");
1498 if_attach_nx = SKYWALK_NETWORKING_ENABLED;
1499 } else if (pe_disable_fsw_transport_netagent) {
1500 kprintf("SK: netagent is disabled via an override for "
1501 "this platform\n");
1502 if_attach_nx = SKYWALK_NETWORKING_DISABLED;
1503 } else {
1504 kprintf("SK: netagent is %s by default for this platform\n",
1505 (enable_fsw_netagent ? "enabled" : "disabled"));
1506 if_attach_nx = IF_ATTACH_NX_DEFAULT;
1507 }
1508
1509 /*
1510 * Now see if there's a boot-arg override.
1511 */
1512 (void) PE_parse_boot_argn("if_attach_nx", &if_attach_nx,
1513 sizeof(if_attach_nx));
1514 if_enable_fsw_transport_netagent =
1515 ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
1516
1517 if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0);
1518
1519 if (pe_disable_fsw_transport_netagent &&
1520 if_enable_fsw_transport_netagent) {
1521 kprintf("SK: netagent is force-enabled\n");
1522 } else if (!pe_disable_fsw_transport_netagent &&
1523 !if_enable_fsw_transport_netagent) {
1524 kprintf("SK: netagent is force-disabled\n");
1525 }
1526 if (kernel_is_macos_or_server() && if_enable_fsw_transport_netagent) {
1527 net_filter_event_register(dlil_filter_event);
1528 }
1529
1530 #if (DEVELOPMENT || DEBUG)
1531 (void) PE_parse_boot_argn("fsw_use_max_mtu_buffer",
1532 &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer));
1533 #endif /* (DEVELOPMENT || DEBUG) */
1534
1535 #endif /* SKYWALK */
1536
1537 dlil_allocation_zones_init();
1538 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1539
1540 TAILQ_INIT(&dlil_ifnet_head);
1541 TAILQ_INIT(&ifnet_head);
1542 TAILQ_INIT(&ifnet_detaching_head);
1543 TAILQ_INIT(&ifnet_ordered_head);
1544
1545 /* Initialize interface address subsystem */
1546 ifa_init();
1547
1548 #if PF
1549 /* Initialize the packet filter */
1550 pfinit();
1551 #endif /* PF */
1552
1553 /* Initialize queue algorithms */
1554 classq_init();
1555
1556 /* Initialize packet schedulers */
1557 pktsched_init();
1558
1559 /* Initialize flow advisory subsystem */
1560 flowadv_init();
1561
1562 /* Initialize the pktap virtual interface */
1563 pktap_init();
1564
1565 /* Initialize droptap interface */
1566 droptap_init();
1567
1568 /* Initialize the service class to dscp map */
1569 net_qos_map_init();
1570
1571 /* Initialize the interface low power mode event handler */
1572 if_low_power_evhdlr_init();
1573
1574 /* Initialize the interface offload port list subsystem */
1575 if_ports_used_init();
1576
1577 #if DEBUG || DEVELOPMENT
1578 /* Run self-tests */
1579 dlil_verify_sum16();
1580 #endif /* DEBUG || DEVELOPMENT */
1581
1582 /*
1583 * Create and start up the main DLIL input thread and the interface
1584 * detacher threads once everything is initialized.
1585 */
1586 dlil_incr_pending_thread_count();
1587 (void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
1588
1589 /*
1590 * Create ifnet detacher thread.
1591 * When an interface gets detached, part of the detach processing
1592 * is delayed. The interface is added to delayed detach list
1593 * and this thread is woken up to call ifnet_detach_final
1594 * on these interfaces.
1595 */
1596 dlil_incr_pending_thread_count();
1597 if (kernel_thread_start(ifnet_detacher_thread_func,
1598 NULL, &thread) != KERN_SUCCESS) {
1599 panic_plain("%s: couldn't create detacher thread", __func__);
1600 /* NOTREACHED */
1601 }
1602 thread_deallocate(thread);
1603
1604 /*
1605 * Wait for the created kernel threads for dlil to get
1606 * scheduled and run at least once before we proceed
1607 */
1608 lck_mtx_lock(&dlil_thread_sync_lock);
1609 while (dlil_pending_thread_cnt != 0) {
1610 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
1611 "threads to get scheduled at least once.\n", __func__);
1612 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
1613 (PZERO - 1), __func__, NULL);
1614 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
1615 }
1616 lck_mtx_unlock(&dlil_thread_sync_lock);
1617 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
1618 "scheduled at least once. Proceeding.\n", __func__);
1619 }
1620
1621 __private_extern__ int
dlil_attach_filter(struct ifnet * ifp,const struct iff_filter * if_filter,interface_filter_t * filter_ref,u_int32_t flags)1622 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1623 interface_filter_t *filter_ref, u_int32_t flags)
1624 {
1625 int retval = 0;
1626 struct ifnet_filter *filter = NULL;
1627
1628 ifnet_head_lock_shared();
1629
1630 /* Check that the interface is in the global list */
1631 if (!ifnet_lookup(ifp)) {
1632 retval = ENXIO;
1633 goto done;
1634 }
1635 if (!ifnet_is_attached(ifp, 1)) {
1636 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
1637 __func__, if_name(ifp));
1638 retval = ENXIO;
1639 goto done;
1640 }
1641
1642 filter = dlif_filt_alloc();
1643 /* refcnt held above during lookup */
1644 filter->filt_flags = flags;
1645 filter->filt_ifp = ifp;
1646 filter->filt_cookie = if_filter->iff_cookie;
1647 filter->filt_name = if_filter->iff_name;
1648 filter->filt_protocol = if_filter->iff_protocol;
1649 /*
1650 * Do not install filter callbacks for internal coproc interface
1651 * and for management interfaces
1652 */
1653 if (!IFNET_IS_INTCOPROC(ifp) && !IFNET_IS_MANAGEMENT(ifp)) {
1654 filter->filt_input = if_filter->iff_input;
1655 filter->filt_output = if_filter->iff_output;
1656 filter->filt_event = if_filter->iff_event;
1657 filter->filt_ioctl = if_filter->iff_ioctl;
1658 }
1659 filter->filt_detached = if_filter->iff_detached;
1660
1661 lck_mtx_lock(&ifp->if_flt_lock);
1662 if_flt_monitor_enter(ifp);
1663
1664 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1665 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1666
1667 *filter_ref = filter;
1668
1669 /*
1670 * Bump filter count and route_generation ID to let TCP
1671 * know it shouldn't do TSO on this connection
1672 */
1673 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1674 ifnet_filter_update_tso(ifp, TRUE);
1675 }
1676 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1677 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1678 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1679 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_os_count);
1680 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1681 } else {
1682 OSAddAtomic(1, &ifp->if_flt_non_os_count);
1683 }
1684 if_flt_monitor_leave(ifp);
1685 lck_mtx_unlock(&ifp->if_flt_lock);
1686
1687 #if SKYWALK
1688 if (kernel_is_macos_or_server()) {
1689 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1690 net_check_compatible_if_filter(NULL));
1691 }
1692 #endif /* SKYWALK */
1693
1694 if (dlil_verbose) {
1695 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
1696 if_filter->iff_name);
1697 }
1698 ifnet_decr_iorefcnt(ifp);
1699
1700 done:
1701 ifnet_head_done();
1702 if (retval != 0 && ifp != NULL) {
1703 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1704 if_name(ifp), if_filter->iff_name, retval);
1705 }
1706 if (retval != 0 && filter != NULL) {
1707 dlif_filt_free(filter);
1708 }
1709
1710 return retval;
1711 }
1712
1713 static int
dlil_detach_filter_internal(interface_filter_t filter,int detached)1714 dlil_detach_filter_internal(interface_filter_t filter, int detached)
1715 {
1716 int retval = 0;
1717
1718 if (detached == 0) {
1719 ifnet_ref_t ifp = NULL;
1720
1721 ifnet_head_lock_shared();
1722 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1723 interface_filter_t entry = NULL;
1724
1725 lck_mtx_lock(&ifp->if_flt_lock);
1726 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1727 if (entry != filter || entry->filt_skip) {
1728 continue;
1729 }
1730 /*
1731 * We've found a match; since it's possible
1732 * that the thread gets blocked in the monitor,
1733 * we do the lock dance. Interface should
1734 * not be detached since we still have a use
1735 * count held during filter attach.
1736 */
1737 entry->filt_skip = 1; /* skip input/output */
1738 lck_mtx_unlock(&ifp->if_flt_lock);
1739 ifnet_head_done();
1740
1741 lck_mtx_lock(&ifp->if_flt_lock);
1742 if_flt_monitor_enter(ifp);
1743 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1744 LCK_MTX_ASSERT_OWNED);
1745
1746 /* Remove the filter from the list */
1747 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1748 filt_next);
1749
1750 if (dlil_verbose) {
1751 DLIL_PRINTF("%s: %s filter detached\n",
1752 if_name(ifp), filter->filt_name);
1753 }
1754 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1755 VERIFY(ifp->if_flt_non_os_count != 0);
1756 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1757 }
1758 /*
1759 * Decrease filter count and route_generation
1760 * ID to let TCP know it should reevalute doing
1761 * TSO or not.
1762 */
1763 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1764 ifnet_filter_update_tso(ifp, FALSE);
1765 }
1766 /*
1767 * When we remove the bridge's interface filter,
1768 * clear the field in the ifnet.
1769 */
1770 if ((filter->filt_flags & DLIL_IFF_BRIDGE)
1771 != 0) {
1772 ifp->if_bridge = NULL;
1773 }
1774 if_flt_monitor_leave(ifp);
1775 lck_mtx_unlock(&ifp->if_flt_lock);
1776 goto destroy;
1777 }
1778 lck_mtx_unlock(&ifp->if_flt_lock);
1779 }
1780 ifnet_head_done();
1781
1782 /* filter parameter is not a valid filter ref */
1783 retval = EINVAL;
1784 goto done;
1785 } else {
1786 ifnet_ref_t ifp = filter->filt_ifp;
1787 /*
1788 * Here we are called from ifnet_detach_final(); the
1789 * caller had emptied if_flt_head and we're doing an
1790 * implicit filter detach because the interface is
1791 * about to go away. Make sure to adjust the counters
1792 * in this case. We don't need the protection of the
1793 * filter monitor since we're called as part of the
1794 * final detach in the context of the detacher thread.
1795 */
1796 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
1797 VERIFY(ifp->if_flt_non_os_count != 0);
1798 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
1799 }
1800 /*
1801 * Decrease filter count and route_generation
1802 * ID to let TCP know it should reevalute doing
1803 * TSO or not.
1804 */
1805 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1806 ifnet_filter_update_tso(ifp, FALSE);
1807 }
1808 }
1809
1810 if (dlil_verbose) {
1811 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
1812 }
1813
1814 destroy:
1815
1816 /* Call the detached function if there is one */
1817 if (filter->filt_detached) {
1818 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1819 }
1820
1821 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1822 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
1823 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0);
1824 }
1825 #if SKYWALK
1826 if (kernel_is_macos_or_server()) {
1827 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
1828 net_check_compatible_if_filter(NULL));
1829 }
1830 #endif /* SKYWALK */
1831
1832 /* Free the filter */
1833 dlif_filt_free(filter);
1834 filter = NULL;
1835 done:
1836 if (retval != 0 && filter != NULL) {
1837 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1838 filter->filt_name, retval);
1839 }
1840
1841 return retval;
1842 }
1843
1844 __private_extern__ void
dlil_detach_filter(interface_filter_t filter)1845 dlil_detach_filter(interface_filter_t filter)
1846 {
1847 if (filter == NULL) {
1848 return;
1849 }
1850 dlil_detach_filter_internal(filter, 0);
1851 }
1852
1853 __private_extern__ boolean_t
dlil_has_ip_filter(void)1854 dlil_has_ip_filter(void)
1855 {
1856 boolean_t has_filter = ((net_api_stats.nas_ipf_add_count - net_api_stats.nas_ipf_add_os_count) > 0);
1857
1858 VERIFY(net_api_stats.nas_ipf_add_count >= net_api_stats.nas_ipf_add_os_count);
1859
1860 DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
1861 return has_filter;
1862 }
1863
1864 __private_extern__ boolean_t
dlil_has_if_filter(struct ifnet * ifp)1865 dlil_has_if_filter(struct ifnet *ifp)
1866 {
1867 boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
1868 DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
1869 return has_filter;
1870 }
1871
1872 errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params * p)1873 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
1874 {
1875 if (p != NULL) {
1876 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
1877 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
1878 return EINVAL;
1879 }
1880 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
1881 p->packets_lowat >= p->packets_hiwat) {
1882 return EINVAL;
1883 }
1884 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
1885 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
1886 return EINVAL;
1887 }
1888 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
1889 p->bytes_lowat >= p->bytes_hiwat) {
1890 return EINVAL;
1891 }
1892 if (p->interval_time != 0 &&
1893 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
1894 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
1895 }
1896 }
1897 return 0;
1898 }
1899
1900 void
dlil_rxpoll_update_params(struct ifnet * ifp,struct ifnet_poll_params * p)1901 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
1902 {
1903 u_int64_t sample_holdtime, inbw;
1904
1905 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
1906 sample_holdtime = 0; /* polling is disabled */
1907 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
1908 ifp->if_rxpoll_blowat = 0;
1909 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
1910 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
1911 ifp->if_rxpoll_plim = 0;
1912 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
1913 } else {
1914 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
1915 u_int64_t ival;
1916 unsigned int n, i;
1917
1918 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
1919 if (inbw < rxpoll_tbl[i].speed) {
1920 break;
1921 }
1922 n = i;
1923 }
1924 /* auto-tune if caller didn't specify a value */
1925 plowat = ((p == NULL || p->packets_lowat == 0) ?
1926 rxpoll_tbl[n].plowat : p->packets_lowat);
1927 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
1928 rxpoll_tbl[n].phiwat : p->packets_hiwat);
1929 blowat = ((p == NULL || p->bytes_lowat == 0) ?
1930 rxpoll_tbl[n].blowat : p->bytes_lowat);
1931 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
1932 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
1933 plim = ((p == NULL || p->packets_limit == 0 ||
1934 if_rxpoll_max != 0) ? if_rxpoll_max : p->packets_limit);
1935 ival = ((p == NULL || p->interval_time == 0 ||
1936 if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ?
1937 if_rxpoll_interval_time : p->interval_time);
1938
1939 VERIFY(plowat != 0 && phiwat != 0);
1940 VERIFY(blowat != 0 && bhiwat != 0);
1941 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
1942
1943 sample_holdtime = if_rxpoll_sample_holdtime;
1944 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
1945 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
1946 ifp->if_rxpoll_plowat = plowat;
1947 ifp->if_rxpoll_phiwat = phiwat;
1948 ifp->if_rxpoll_blowat = blowat;
1949 ifp->if_rxpoll_bhiwat = bhiwat;
1950 ifp->if_rxpoll_plim = plim;
1951 ifp->if_rxpoll_ival = ival;
1952 }
1953
1954 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
1955 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
1956
1957 if (dlil_verbose) {
1958 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
1959 "poll interval %llu nsec, pkts per poll %u, "
1960 "pkt limits [%u/%u], wreq limits [%u/%u], "
1961 "bytes limits [%u/%u]\n", if_name(ifp),
1962 inbw, sample_holdtime, ifp->if_rxpoll_ival,
1963 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
1964 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
1965 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
1966 ifp->if_rxpoll_bhiwat);
1967 }
1968 }
1969
1970 /*
1971 * Must be called on an attached ifnet (caller is expected to check.)
1972 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
1973 */
1974 errno_t
dlil_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)1975 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
1976 boolean_t locked)
1977 {
1978 errno_t err;
1979 struct dlil_threading_info *inp;
1980
1981 VERIFY(ifp != NULL);
1982 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
1983 return ENXIO;
1984 }
1985 err = dlil_rxpoll_validate_params(p);
1986 if (err != 0) {
1987 return err;
1988 }
1989
1990 if (!locked) {
1991 lck_mtx_lock(&inp->dlth_lock);
1992 }
1993 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
1994 /*
1995 * Normally, we'd reset the parameters to the auto-tuned values
1996 * if the the input thread detects a change in link rate. If the
1997 * driver provides its own parameters right after a link rate
1998 * changes, but before the input thread gets to run, we want to
1999 * make sure to keep the driver's values. Clearing if_poll_update
2000 * will achieve that.
2001 */
2002 if (p != NULL && !locked && ifp->if_poll_update != 0) {
2003 ifp->if_poll_update = 0;
2004 }
2005 dlil_rxpoll_update_params(ifp, p);
2006 if (!locked) {
2007 lck_mtx_unlock(&inp->dlth_lock);
2008 }
2009 return 0;
2010 }
2011
2012 /*
2013 * Must be called on an attached ifnet (caller is expected to check.)
2014 */
2015 errno_t
dlil_rxpoll_get_params(struct ifnet * ifp,struct ifnet_poll_params * p)2016 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2017 {
2018 struct dlil_threading_info *inp;
2019
2020 VERIFY(ifp != NULL && p != NULL);
2021 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
2022 return ENXIO;
2023 }
2024
2025 bzero(p, sizeof(*p));
2026
2027 lck_mtx_lock(&inp->dlth_lock);
2028 p->packets_limit = ifp->if_rxpoll_plim;
2029 p->packets_lowat = ifp->if_rxpoll_plowat;
2030 p->packets_hiwat = ifp->if_rxpoll_phiwat;
2031 p->bytes_lowat = ifp->if_rxpoll_blowat;
2032 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
2033 p->interval_time = ifp->if_rxpoll_ival;
2034 lck_mtx_unlock(&inp->dlth_lock);
2035
2036 return 0;
2037 }
2038
2039 errno_t
ifnet_input(struct ifnet * ifp,struct mbuf * m_head,const struct ifnet_stat_increment_param * s)2040 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2041 const struct ifnet_stat_increment_param *s)
2042 {
2043 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
2044 }
2045
2046 errno_t
ifnet_input_extended(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2047 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2048 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2049 {
2050 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
2051 }
2052
2053 errno_t
ifnet_input_poll(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)2054 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
2055 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2056 {
2057 return ifnet_input_common(ifp, m_head, m_tail, s,
2058 (m_head != NULL), TRUE);
2059 }
2060
2061 static errno_t
ifnet_input_common(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t ext,boolean_t poll)2062 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2063 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2064 {
2065 dlil_input_func input_func;
2066 struct ifnet_stat_increment_param _s;
2067 u_int32_t m_cnt = 0, m_size = 0;
2068 struct mbuf *last;
2069 errno_t err = 0;
2070
2071 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2072 if (m_head != NULL) {
2073 mbuf_freem_list(m_head);
2074 }
2075 return EINVAL;
2076 }
2077
2078 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2079 VERIFY(m_tail == NULL || ext);
2080 VERIFY(s != NULL || !ext);
2081
2082 /*
2083 * Drop the packet(s) if the parameters are invalid, or if the
2084 * interface is no longer attached; else hold an IO refcnt to
2085 * prevent it from being detached (will be released below.)
2086 */
2087 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
2088 if (m_head != NULL) {
2089 mbuf_freem_list(m_head);
2090 }
2091 return EINVAL;
2092 }
2093
2094 input_func = ifp->if_input_dlil;
2095 VERIFY(input_func != NULL);
2096
2097 if (m_tail == NULL) {
2098 last = m_head;
2099 while (m_head != NULL) {
2100 m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2101 #if IFNET_INPUT_SANITY_CHK
2102 if (__improbable(dlil_input_sanity_check != 0)) {
2103 DLIL_INPUT_CHECK(last, ifp);
2104 }
2105 #endif /* IFNET_INPUT_SANITY_CHK */
2106 m_cnt++;
2107 m_size += m_length(last);
2108 if (mbuf_nextpkt(last) == NULL) {
2109 break;
2110 }
2111 last = mbuf_nextpkt(last);
2112 }
2113 m_tail = last;
2114 } else {
2115 #if IFNET_INPUT_SANITY_CHK
2116 if (__improbable(dlil_input_sanity_check != 0)) {
2117 last = m_head;
2118 while (1) {
2119 m_add_hdr_crumb_interface_input(last, ifp->if_index, false);
2120 DLIL_INPUT_CHECK(last, ifp);
2121 m_cnt++;
2122 m_size += m_length(last);
2123 if (mbuf_nextpkt(last) == NULL) {
2124 break;
2125 }
2126 last = mbuf_nextpkt(last);
2127 }
2128 } else {
2129 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2130 m_cnt = s->packets_in;
2131 m_size = s->bytes_in;
2132 last = m_tail;
2133 }
2134 #else
2135 m_add_hdr_crumb_interface_input(m_head, ifp->if_index, true);
2136 m_cnt = s->packets_in;
2137 m_size = s->bytes_in;
2138 last = m_tail;
2139 #endif /* IFNET_INPUT_SANITY_CHK */
2140 }
2141
2142 if (last != m_tail) {
2143 panic_plain("%s: invalid input packet chain for %s, "
2144 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2145 m_tail, last);
2146 }
2147
2148 /*
2149 * Assert packet count only for the extended variant, for backwards
2150 * compatibility, since this came directly from the device driver.
2151 * Relax this assertion for input bytes, as the driver may have
2152 * included the link-layer headers in the computation; hence
2153 * m_size is just an approximation.
2154 */
2155 if (ext && s->packets_in != m_cnt) {
2156 panic_plain("%s: input packet count mismatch for %s, "
2157 "%d instead of %d\n", __func__, if_name(ifp),
2158 s->packets_in, m_cnt);
2159 }
2160
2161 if (s == NULL) {
2162 bzero(&_s, sizeof(_s));
2163 s = &_s;
2164 } else {
2165 _s = *s;
2166 }
2167 _s.packets_in = m_cnt;
2168 _s.bytes_in = m_size;
2169
2170 if (ifp->if_xflags & IFXF_DISABLE_INPUT) {
2171 m_freem_list(m_head);
2172
2173 os_atomic_add(&ifp->if_data.ifi_ipackets, _s.packets_in, relaxed);
2174 os_atomic_add(&ifp->if_data.ifi_ibytes, _s.bytes_in, relaxed);
2175
2176 goto done;
2177 }
2178
2179 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2180
2181 done:
2182 if (ifp != lo_ifp) {
2183 /* Release the IO refcnt */
2184 ifnet_datamov_end(ifp);
2185 }
2186
2187 return err;
2188 }
2189
2190
2191 static void
ifnet_start_common(struct ifnet * ifp,boolean_t resetfc,boolean_t ignore_delay)2192 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc, boolean_t ignore_delay)
2193 {
2194 if (!(ifp->if_eflags & IFEF_TXSTART)) {
2195 return;
2196 }
2197 /*
2198 * If the starter thread is inactive, signal it to do work,
2199 * unless the interface is being flow controlled from below,
2200 * e.g. a virtual interface being flow controlled by a real
2201 * network interface beneath it, or it's been disabled via
2202 * a call to ifnet_disable_output().
2203 */
2204 lck_mtx_lock_spin(&ifp->if_start_lock);
2205 if (ignore_delay) {
2206 ifp->if_start_flags |= IFSF_NO_DELAY;
2207 }
2208 if (resetfc) {
2209 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2210 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2211 lck_mtx_unlock(&ifp->if_start_lock);
2212 return;
2213 }
2214 ifp->if_start_req++;
2215 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2216 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2217 IFCQ_LEN(ifp->if_snd) >= ifp->if_start_delay_qlen ||
2218 ifp->if_start_delayed == 0)) {
2219 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
2220 }
2221 lck_mtx_unlock(&ifp->if_start_lock);
2222 }
2223
2224 void
ifnet_start(struct ifnet * ifp)2225 ifnet_start(struct ifnet *ifp)
2226 {
2227 ifnet_start_common(ifp, FALSE, FALSE);
2228 }
2229
2230 void
ifnet_start_ignore_delay(struct ifnet * ifp)2231 ifnet_start_ignore_delay(struct ifnet *ifp)
2232 {
2233 ifnet_start_common(ifp, FALSE, TRUE);
2234 }
2235
2236 __attribute__((noreturn))
2237 static void
ifnet_start_thread_func(void * v,wait_result_t w)2238 ifnet_start_thread_func(void *v, wait_result_t w)
2239 {
2240 #pragma unused(w)
2241 ifnet_ref_t ifp = v;
2242 char thread_name[MAXTHREADNAMESIZE];
2243
2244 /* Construct the name for this thread, and then apply it. */
2245 bzero(thread_name, sizeof(thread_name));
2246 (void) snprintf(thread_name, sizeof(thread_name),
2247 "ifnet_start_%s", ifp->if_xname);
2248 #if SKYWALK
2249 /* override name for native Skywalk interface */
2250 if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
2251 (void) snprintf(thread_name, sizeof(thread_name),
2252 "skywalk_doorbell_%s_tx", ifp->if_xname);
2253 }
2254 #endif /* SKYWALK */
2255 ASSERT(ifp->if_start_thread == current_thread());
2256 thread_set_thread_name(current_thread(), __unsafe_null_terminated_from_indexable(thread_name));
2257
2258 /*
2259 * Treat the dedicated starter thread for lo0 as equivalent to
2260 * the driver workloop thread; if net_affinity is enabled for
2261 * the main input thread, associate this starter thread to it
2262 * by binding them with the same affinity tag. This is done
2263 * only once (as we only have one lo_ifp which never goes away.)
2264 */
2265 if (ifp == lo_ifp) {
2266 struct dlil_threading_info *inp = dlil_main_input_thread;
2267 struct thread *__single tp = current_thread();
2268 #if SKYWALK
2269 /* native skywalk loopback not yet implemented */
2270 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2271 #endif /* SKYWALK */
2272
2273 lck_mtx_lock(&inp->dlth_lock);
2274 if (inp->dlth_affinity) {
2275 u_int32_t tag = inp->dlth_affinity_tag;
2276
2277 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
2278 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2279 inp->dlth_driver_thread = tp;
2280 lck_mtx_unlock(&inp->dlth_lock);
2281
2282 /* Associate this thread with the affinity tag */
2283 (void) dlil_affinity_set(tp, tag);
2284 } else {
2285 lck_mtx_unlock(&inp->dlth_lock);
2286 }
2287 }
2288
2289 lck_mtx_lock(&ifp->if_start_lock);
2290 VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
2291 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
2292 ifp->if_start_embryonic = 1;
2293 /* wake up once to get out of embryonic state */
2294 ifp->if_start_req++;
2295 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
2296 lck_mtx_unlock(&ifp->if_start_lock);
2297 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2298 /* NOTREACHED */
2299 __builtin_unreachable();
2300 }
2301
2302 __attribute__((noreturn))
2303 static void
ifnet_start_thread_cont(void * v,wait_result_t wres)2304 ifnet_start_thread_cont(void *v, wait_result_t wres)
2305 {
2306 ifnet_ref_t ifp = v;
2307 struct ifclassq *ifq = ifp->if_snd;
2308
2309 lck_mtx_lock_spin(&ifp->if_start_lock);
2310 if (__improbable(wres == THREAD_INTERRUPTED ||
2311 (ifp->if_start_flags & IFSF_TERMINATING) != 0)) {
2312 goto terminate;
2313 }
2314
2315 if (__improbable(ifp->if_start_embryonic)) {
2316 ifp->if_start_embryonic = 0;
2317 lck_mtx_unlock(&ifp->if_start_lock);
2318 ifnet_decr_pending_thread_count(ifp);
2319 lck_mtx_lock_spin(&ifp->if_start_lock);
2320 goto skip;
2321 }
2322
2323 ifp->if_start_active = 1;
2324
2325 /*
2326 * Keep on servicing until no more request.
2327 */
2328 for (;;) {
2329 u_int32_t req = ifp->if_start_req;
2330 if ((ifp->if_start_flags & IFSF_NO_DELAY) == 0 &&
2331 !IFCQ_IS_EMPTY(ifq) &&
2332 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2333 ifp->if_start_delayed == 0 &&
2334 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2335 (ifp->if_eflags & IFEF_DELAY_START)) {
2336 ifp->if_start_delayed = 1;
2337 ifnet_start_delayed++;
2338 break;
2339 }
2340 ifp->if_start_flags &= ~IFSF_NO_DELAY;
2341 ifp->if_start_delayed = 0;
2342 lck_mtx_unlock(&ifp->if_start_lock);
2343
2344 /*
2345 * If no longer attached, don't call start because ifp
2346 * is being destroyed; else hold an IO refcnt to
2347 * prevent the interface from being detached (will be
2348 * released below.)
2349 */
2350 if (!ifnet_datamov_begin(ifp)) {
2351 lck_mtx_lock_spin(&ifp->if_start_lock);
2352 break;
2353 }
2354
2355 /* invoke the driver's start routine */
2356 ((*ifp->if_start)(ifp));
2357
2358 /*
2359 * Release the io ref count taken above.
2360 */
2361 ifnet_datamov_end(ifp);
2362
2363 lck_mtx_lock_spin(&ifp->if_start_lock);
2364
2365 /*
2366 * If there's no pending request or if the
2367 * interface has been disabled, we're done.
2368 */
2369 #define _IFSF_DISABLED (IFSF_FLOW_CONTROLLED | IFSF_TERMINATING)
2370 if (req == ifp->if_start_req ||
2371 (ifp->if_start_flags & _IFSF_DISABLED) != 0) {
2372 break;
2373 }
2374 }
2375 skip:
2376 ifp->if_start_req = 0;
2377 ifp->if_start_active = 0;
2378
2379 #if SKYWALK
2380 /*
2381 * Wakeup any waiters, e.g. any threads waiting to
2382 * detach the interface from the flowswitch, etc.
2383 */
2384 if (ifp->if_start_waiters != 0) {
2385 ifp->if_start_waiters = 0;
2386 wakeup(&ifp->if_start_waiters);
2387 }
2388 #endif /* SKYWALK */
2389 if (__probable((ifp->if_start_flags & IFSF_TERMINATING) == 0)) {
2390 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2391 struct timespec delay_start_ts;
2392 struct timespec *ts = NULL;
2393
2394 if (ts == NULL) {
2395 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2396 &ifp->if_start_cycle : NULL);
2397 }
2398
2399 if (ts == NULL && ifp->if_start_delayed == 1) {
2400 delay_start_ts.tv_sec = 0;
2401 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2402 ts = &delay_start_ts;
2403 }
2404
2405 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
2406 ts = NULL;
2407 }
2408
2409 if (__improbable(ts != NULL)) {
2410 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2411 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2412 }
2413
2414 (void) assert_wait_deadline(&ifp->if_start_thread,
2415 THREAD_UNINT, deadline);
2416 lck_mtx_unlock(&ifp->if_start_lock);
2417 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
2418 /* NOTREACHED */
2419 } else {
2420 terminate:
2421 /* interface is detached? */
2422 ifnet_set_start_cycle(ifp, NULL);
2423
2424 /* clear if_start_thread to allow termination to continue */
2425 ASSERT(ifp->if_start_thread != THREAD_NULL);
2426 ifp->if_start_thread = THREAD_NULL;
2427 wakeup((caddr_t)&ifp->if_start_thread);
2428 lck_mtx_unlock(&ifp->if_start_lock);
2429
2430 if (dlil_verbose) {
2431 DLIL_PRINTF("%s: starter thread terminated\n",
2432 if_name(ifp));
2433 }
2434
2435 /* for the extra refcnt from kernel_thread_start() */
2436 thread_deallocate(current_thread());
2437 /* this is the end */
2438 thread_terminate(current_thread());
2439 /* NOTREACHED */
2440 }
2441
2442 /* must never get here */
2443 VERIFY(0);
2444 /* NOTREACHED */
2445 __builtin_unreachable();
2446 }
2447
2448 void
ifnet_set_start_cycle(struct ifnet * ifp,struct timespec * ts)2449 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2450 {
2451 if (ts == NULL) {
2452 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
2453 } else {
2454 *(&ifp->if_start_cycle) = *ts;
2455 }
2456
2457 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2458 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
2459 if_name(ifp), ts->tv_nsec);
2460 }
2461 }
2462
2463 static inline void
ifnet_poll_wakeup(struct ifnet * ifp)2464 ifnet_poll_wakeup(struct ifnet *ifp)
2465 {
2466 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
2467
2468 ifp->if_poll_req++;
2469 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
2470 ifp->if_poll_thread != THREAD_NULL) {
2471 wakeup_one((caddr_t)&ifp->if_poll_thread);
2472 }
2473 }
2474
2475 void
ifnet_poll(struct ifnet * ifp)2476 ifnet_poll(struct ifnet *ifp)
2477 {
2478 /*
2479 * If the poller thread is inactive, signal it to do work.
2480 */
2481 lck_mtx_lock_spin(&ifp->if_poll_lock);
2482 ifnet_poll_wakeup(ifp);
2483 lck_mtx_unlock(&ifp->if_poll_lock);
2484 }
2485
2486 __attribute__((noreturn))
2487 static void
ifnet_poll_thread_func(void * v,wait_result_t w)2488 ifnet_poll_thread_func(void *v, wait_result_t w)
2489 {
2490 #pragma unused(w)
2491 char thread_name[MAXTHREADNAMESIZE];
2492 ifnet_ref_t ifp = v;
2493
2494 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2495 VERIFY(current_thread() == ifp->if_poll_thread);
2496
2497 /* construct the name for this thread, and then apply it */
2498 bzero(thread_name, sizeof(thread_name));
2499 (void) snprintf(thread_name, sizeof(thread_name),
2500 "ifnet_poller_%s", ifp->if_xname);
2501 thread_set_thread_name(ifp->if_poll_thread, __unsafe_null_terminated_from_indexable(thread_name));
2502
2503 lck_mtx_lock(&ifp->if_poll_lock);
2504 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
2505 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
2506 ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
2507 /* wake up once to get out of embryonic state */
2508 ifnet_poll_wakeup(ifp);
2509 lck_mtx_unlock(&ifp->if_poll_lock);
2510 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2511 /* NOTREACHED */
2512 __builtin_unreachable();
2513 }
2514
2515 __attribute__((noreturn))
2516 static void
ifnet_poll_thread_cont(void * v,wait_result_t wres)2517 ifnet_poll_thread_cont(void *v, wait_result_t wres)
2518 {
2519 struct dlil_threading_info *inp;
2520 ifnet_ref_t ifp = v;
2521 struct ifnet_stat_increment_param s;
2522 struct timespec start_time;
2523
2524 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
2525
2526 bzero(&s, sizeof(s));
2527 net_timerclear(&start_time);
2528
2529 lck_mtx_lock_spin(&ifp->if_poll_lock);
2530 if (__improbable(wres == THREAD_INTERRUPTED ||
2531 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
2532 goto terminate;
2533 }
2534
2535 inp = ifp->if_inp;
2536 VERIFY(inp != NULL);
2537
2538 if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
2539 ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
2540 lck_mtx_unlock(&ifp->if_poll_lock);
2541 ifnet_decr_pending_thread_count(ifp);
2542 lck_mtx_lock_spin(&ifp->if_poll_lock);
2543 goto skip;
2544 }
2545
2546 ifp->if_poll_flags |= IF_POLLF_RUNNING;
2547
2548 /*
2549 * Keep on servicing until no more request.
2550 */
2551 for (;;) {
2552 mbuf_ref_t m_head, m_tail;
2553 u_int32_t m_lim, m_cnt, m_totlen;
2554 u_int16_t req = ifp->if_poll_req;
2555
2556 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
2557 MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
2558 lck_mtx_unlock(&ifp->if_poll_lock);
2559
2560 /*
2561 * If no longer attached, there's nothing to do;
2562 * else hold an IO refcnt to prevent the interface
2563 * from being detached (will be released below.)
2564 */
2565 if (!ifnet_is_attached(ifp, 1)) {
2566 lck_mtx_lock_spin(&ifp->if_poll_lock);
2567 break;
2568 }
2569
2570 if (dlil_verbose > 1) {
2571 DLIL_PRINTF("%s: polling up to %d pkts, "
2572 "pkts avg %d max %d, wreq avg %d, "
2573 "bytes avg %d\n",
2574 if_name(ifp), m_lim,
2575 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2576 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2577 }
2578
2579 /* invoke the driver's input poll routine */
2580 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
2581 &m_cnt, &m_totlen));
2582
2583 if (m_head != NULL) {
2584 VERIFY(m_tail != NULL && m_cnt > 0);
2585
2586 if (dlil_verbose > 1) {
2587 DLIL_PRINTF("%s: polled %d pkts, "
2588 "pkts avg %d max %d, wreq avg %d, "
2589 "bytes avg %d\n",
2590 if_name(ifp), m_cnt,
2591 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
2592 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
2593 }
2594
2595 /* stats are required for extended variant */
2596 s.packets_in = m_cnt;
2597 s.bytes_in = m_totlen;
2598
2599 (void) ifnet_input_common(ifp, m_head, m_tail,
2600 &s, TRUE, TRUE);
2601 } else {
2602 if (dlil_verbose > 1) {
2603 DLIL_PRINTF("%s: no packets, "
2604 "pkts avg %d max %d, wreq avg %d, "
2605 "bytes avg %d\n",
2606 if_name(ifp), ifp->if_rxpoll_pavg,
2607 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
2608 ifp->if_rxpoll_bavg);
2609 }
2610
2611 (void) ifnet_input_common(ifp, NULL, NULL,
2612 NULL, FALSE, TRUE);
2613 }
2614
2615 /* Release the io ref count */
2616 ifnet_decr_iorefcnt(ifp);
2617
2618 lck_mtx_lock_spin(&ifp->if_poll_lock);
2619
2620 /* if there's no pending request, we're done */
2621 if (req == ifp->if_poll_req ||
2622 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0) {
2623 break;
2624 }
2625 }
2626 skip:
2627 ifp->if_poll_req = 0;
2628 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
2629
2630 if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
2631 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
2632 struct timespec *ts;
2633
2634 /*
2635 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
2636 * until ifnet_poll() is called again.
2637 */
2638 ts = &ifp->if_poll_cycle;
2639 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
2640 ts = NULL;
2641 }
2642
2643 if (ts != NULL) {
2644 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
2645 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
2646 }
2647
2648 (void) assert_wait_deadline(&ifp->if_poll_thread,
2649 THREAD_UNINT, deadline);
2650 lck_mtx_unlock(&ifp->if_poll_lock);
2651 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
2652 /* NOTREACHED */
2653 } else {
2654 terminate:
2655 /* interface is detached (maybe while asleep)? */
2656 ifnet_set_poll_cycle(ifp, NULL);
2657
2658 /* clear if_poll_thread to allow termination to continue */
2659 ASSERT(ifp->if_poll_thread != THREAD_NULL);
2660 ifp->if_poll_thread = THREAD_NULL;
2661 wakeup((caddr_t)&ifp->if_poll_thread);
2662 lck_mtx_unlock(&ifp->if_poll_lock);
2663
2664 if (dlil_verbose) {
2665 DLIL_PRINTF("%s: poller thread terminated\n",
2666 if_name(ifp));
2667 }
2668
2669 /* for the extra refcnt from kernel_thread_start() */
2670 thread_deallocate(current_thread());
2671 /* this is the end */
2672 thread_terminate(current_thread());
2673 /* NOTREACHED */
2674 }
2675
2676 /* must never get here */
2677 VERIFY(0);
2678 /* NOTREACHED */
2679 __builtin_unreachable();
2680 }
2681
2682 void
ifnet_set_poll_cycle(struct ifnet * ifp,struct timespec * ts)2683 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
2684 {
2685 if (ts == NULL) {
2686 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
2687 } else {
2688 *(&ifp->if_poll_cycle) = *ts;
2689 }
2690
2691 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
2692 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
2693 if_name(ifp), ts->tv_nsec);
2694 }
2695 }
2696
2697 void
ifnet_purge(struct ifnet * ifp)2698 ifnet_purge(struct ifnet *ifp)
2699 {
2700 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
2701 if_qflush_snd(ifp, false);
2702 }
2703 }
2704
2705 void
ifnet_update_sndq(struct ifclassq * ifq,cqev_t ev)2706 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
2707 {
2708 IFCQ_LOCK_ASSERT_HELD(ifq);
2709
2710 if (!(IFCQ_IS_READY(ifq))) {
2711 return;
2712 }
2713
2714 if (IFCQ_TBR_IS_ENABLED(ifq)) {
2715 struct tb_profile tb = {
2716 .rate = ifq->ifcq_tbr.tbr_rate_raw,
2717 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
2718 };
2719 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
2720 }
2721
2722 ifclassq_update(ifq, ev);
2723 }
2724
2725 void
ifnet_update_rcv(struct ifnet * ifp,cqev_t ev)2726 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
2727 {
2728 switch (ev) {
2729 case CLASSQ_EV_LINK_BANDWIDTH:
2730 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
2731 ifp->if_poll_update++;
2732 }
2733 break;
2734
2735 default:
2736 break;
2737 }
2738 }
2739
2740 errno_t
ifnet_set_output_sched_model(struct ifnet * ifp,u_int32_t model)2741 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
2742 {
2743 struct ifclassq *ifq;
2744 u_int32_t omodel;
2745 errno_t err;
2746
2747 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
2748 return EINVAL;
2749 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2750 return ENXIO;
2751 }
2752
2753 ifq = ifp->if_snd;
2754 IFCQ_LOCK(ifq);
2755 omodel = ifp->if_output_sched_model;
2756 ifp->if_output_sched_model = model;
2757 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
2758 ifp->if_output_sched_model = omodel;
2759 }
2760 IFCQ_UNLOCK(ifq);
2761
2762 return err;
2763 }
2764
2765 errno_t
ifnet_set_sndq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2766 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2767 {
2768 if (ifp == NULL) {
2769 return EINVAL;
2770 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2771 return ENXIO;
2772 }
2773
2774 ifclassq_set_maxlen(ifp->if_snd, maxqlen);
2775
2776 return 0;
2777 }
2778
2779 errno_t
ifnet_get_sndq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2780 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2781 {
2782 if (ifp == NULL || maxqlen == NULL) {
2783 return EINVAL;
2784 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2785 return ENXIO;
2786 }
2787
2788 *maxqlen = ifclassq_get_maxlen(ifp->if_snd);
2789
2790 return 0;
2791 }
2792
2793 errno_t
ifnet_get_sndq_len(struct ifnet * ifp,u_int32_t * pkts)2794 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
2795 {
2796 errno_t err;
2797
2798 if (ifp == NULL || pkts == NULL) {
2799 err = EINVAL;
2800 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2801 err = ENXIO;
2802 } else {
2803 err = ifclassq_get_len(ifp->if_snd, MBUF_SC_UNSPEC,
2804 IF_CLASSQ_ALL_GRPS, pkts, NULL);
2805 }
2806
2807 return err;
2808 }
2809
2810 errno_t
ifnet_get_service_class_sndq_len(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t * pkts,u_int32_t * bytes)2811 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
2812 u_int32_t *pkts, u_int32_t *bytes)
2813 {
2814 errno_t err;
2815
2816 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
2817 (pkts == NULL && bytes == NULL)) {
2818 err = EINVAL;
2819 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
2820 err = ENXIO;
2821 } else {
2822 err = ifclassq_get_len(ifp->if_snd, sc, IF_CLASSQ_ALL_GRPS,
2823 pkts, bytes);
2824 }
2825
2826 return err;
2827 }
2828
2829 errno_t
ifnet_set_rcvq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)2830 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
2831 {
2832 struct dlil_threading_info *inp;
2833
2834 if (ifp == NULL) {
2835 return EINVAL;
2836 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2837 return ENXIO;
2838 }
2839
2840 if (maxqlen == 0) {
2841 maxqlen = if_rcvq_maxlen;
2842 } else if (maxqlen < IF_RCVQ_MINLEN) {
2843 maxqlen = IF_RCVQ_MINLEN;
2844 }
2845
2846 inp = ifp->if_inp;
2847 lck_mtx_lock(&inp->dlth_lock);
2848 qlimit(&inp->dlth_pkts) = maxqlen;
2849 lck_mtx_unlock(&inp->dlth_lock);
2850
2851 return 0;
2852 }
2853
2854 errno_t
ifnet_get_rcvq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)2855 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
2856 {
2857 struct dlil_threading_info *inp;
2858
2859 if (ifp == NULL || maxqlen == NULL) {
2860 return EINVAL;
2861 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
2862 return ENXIO;
2863 }
2864
2865 inp = ifp->if_inp;
2866 lck_mtx_lock(&inp->dlth_lock);
2867 *maxqlen = qlimit(&inp->dlth_pkts);
2868 lck_mtx_unlock(&inp->dlth_lock);
2869 return 0;
2870 }
2871
2872 void
ifnet_enqueue_multi_setup(struct ifnet * ifp,uint16_t delay_qlen,uint16_t delay_timeout)2873 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
2874 uint16_t delay_timeout)
2875 {
2876 if (delay_qlen > 0 && delay_timeout > 0) {
2877 if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
2878 ifp->if_start_delay_qlen = MIN(100, delay_qlen);
2879 ifp->if_start_delay_timeout = min(20000, delay_timeout);
2880 /* convert timeout to nanoseconds */
2881 ifp->if_start_delay_timeout *= 1000;
2882 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
2883 ifp->if_xname, (uint32_t)delay_qlen,
2884 (uint32_t)delay_timeout);
2885 } else {
2886 if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
2887 }
2888 }
2889
2890 /*
2891 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
2892 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
2893 * buf holds the full header.
2894 */
2895 static __attribute__((noinline)) void
ifnet_mcast_clear_dscp(uint8_t * __indexable buf,uint8_t ip_ver)2896 ifnet_mcast_clear_dscp(uint8_t *__indexable buf, uint8_t ip_ver)
2897 {
2898 struct ip *ip;
2899 struct ip6_hdr *ip6;
2900 uint8_t lbuf[64] __attribute__((aligned(8)));
2901 uint8_t *p = buf;
2902
2903 if (ip_ver == IPVERSION) {
2904 uint8_t old_tos;
2905 uint32_t sum;
2906
2907 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2908 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
2909 bcopy(buf, lbuf, sizeof(struct ip));
2910 p = lbuf;
2911 }
2912 ip = (struct ip *)(void *)p;
2913 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
2914 return;
2915 }
2916
2917 DTRACE_IP1(clear__v4, struct ip *, ip);
2918 old_tos = ip->ip_tos;
2919 ip->ip_tos &= IPTOS_ECN_MASK;
2920 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
2921 sum = (sum >> 16) + (sum & 0xffff);
2922 ip->ip_sum = (uint16_t)(sum & 0xffff);
2923
2924 if (__improbable(p == lbuf)) {
2925 bcopy(lbuf, buf, sizeof(struct ip));
2926 }
2927 } else {
2928 uint32_t flow;
2929 ASSERT(ip_ver == IPV6_VERSION);
2930
2931 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
2932 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
2933 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
2934 p = lbuf;
2935 }
2936 ip6 = (struct ip6_hdr *)(void *)p;
2937 flow = ntohl(ip6->ip6_flow);
2938 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
2939 return;
2940 }
2941
2942 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
2943 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
2944
2945 if (__improbable(p == lbuf)) {
2946 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
2947 }
2948 }
2949 }
2950
2951 static inline errno_t
ifnet_enqueue_ifclassq(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * p,boolean_t flush,boolean_t * pdrop)2952 ifnet_enqueue_ifclassq(struct ifnet *ifp, struct ifclassq *ifcq,
2953 classq_pkt_t *p, boolean_t flush, boolean_t *pdrop)
2954 {
2955 #if SKYWALK
2956 volatile struct sk_nexusadv *nxadv = NULL;
2957 #endif /* SKYWALK */
2958 volatile uint64_t *fg_ts = NULL;
2959 volatile uint64_t *rt_ts = NULL;
2960 struct timespec now;
2961 u_int64_t now_nsec = 0;
2962 int error = 0;
2963 uint8_t *mcast_buf = NULL;
2964 uint8_t ip_ver;
2965 uint32_t pktlen;
2966
2967 ASSERT(ifp->if_eflags & IFEF_TXSTART);
2968 #if SKYWALK
2969 /*
2970 * If attached to flowswitch, grab pointers to the
2971 * timestamp variables in the nexus advisory region.
2972 */
2973 if ((ifp->if_capabilities & IFCAP_SKYWALK) && ifp->if_na != NULL &&
2974 (nxadv = ifp->if_na->nifna_netif->nif_fsw_nxadv) != NULL) {
2975 fg_ts = &nxadv->nxadv_fg_sendts;
2976 rt_ts = &nxadv->nxadv_rt_sendts;
2977 }
2978 #endif /* SKYWALK */
2979
2980 /*
2981 * If packet already carries a timestamp, either from dlil_output()
2982 * or from flowswitch, use it here. Otherwise, record timestamp.
2983 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
2984 * the timestamp value is used internally there.
2985 */
2986 switch (p->cp_ptype) {
2987 case QP_MBUF:
2988 #if SKYWALK
2989 /*
2990 * Valid only for non-native (compat) Skywalk interface.
2991 * If the data source uses packet, caller must convert
2992 * it to mbuf first prior to calling this routine.
2993 */
2994 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
2995 #endif /* SKYWALK */
2996 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
2997 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
2998
2999 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3000 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
3001 nanouptime(&now);
3002 net_timernsec(&now, &now_nsec);
3003 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
3004 }
3005 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3006 /*
3007 * If the packet service class is not background,
3008 * update the timestamp to indicate recent activity
3009 * on a foreground socket.
3010 */
3011 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3012 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3013 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
3014 PKTF_SO_BACKGROUND)) {
3015 ifp->if_fg_sendts = (uint32_t)_net_uptime;
3016 if (fg_ts != NULL) {
3017 *fg_ts = (uint32_t)_net_uptime;
3018 }
3019 }
3020 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3021 ifp->if_rt_sendts = (uint32_t)_net_uptime;
3022 if (rt_ts != NULL) {
3023 *rt_ts = (uint32_t)_net_uptime;
3024 }
3025 }
3026 }
3027 pktlen = m_pktlen(p->cp_mbuf);
3028
3029 /*
3030 * Some Wi-Fi AP implementations do not correctly handle
3031 * multicast IP packets with DSCP bits set (radr://9331522).
3032 * As a workaround we clear the DSCP bits but keep service
3033 * class (rdar://51507725).
3034 */
3035 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
3036 IFNET_IS_WIFI_INFRA(ifp)) {
3037 size_t len = mbuf_len(p->cp_mbuf), hlen;
3038 struct ether_header *eh;
3039 boolean_t pullup = FALSE;
3040 uint16_t etype;
3041
3042 if (__improbable(len < sizeof(struct ether_header))) {
3043 DTRACE_IP1(small__ether, size_t, len);
3044 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
3045 sizeof(struct ether_header))) == NULL) {
3046 return ENOMEM;
3047 }
3048 }
3049 eh = mtod(p->cp_mbuf, struct ether_header *);
3050 etype = ntohs(eh->ether_type);
3051 if (etype == ETHERTYPE_IP) {
3052 hlen = sizeof(struct ether_header) +
3053 sizeof(struct ip);
3054 if (len < hlen) {
3055 DTRACE_IP1(small__v4, size_t, len);
3056 pullup = TRUE;
3057 }
3058 ip_ver = IPVERSION;
3059 } else if (etype == ETHERTYPE_IPV6) {
3060 hlen = sizeof(struct ether_header) +
3061 sizeof(struct ip6_hdr);
3062 if (len < hlen) {
3063 DTRACE_IP1(small__v6, size_t, len);
3064 pullup = TRUE;
3065 }
3066 ip_ver = IPV6_VERSION;
3067 } else {
3068 DTRACE_IP1(invalid__etype, uint16_t, etype);
3069 break;
3070 }
3071 if (pullup) {
3072 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
3073 NULL) {
3074 return ENOMEM;
3075 }
3076
3077 eh = mtod(p->cp_mbuf, struct ether_header *);
3078 }
3079 mcast_buf = (uint8_t *)(eh + 1);
3080 /*
3081 * ifnet_mcast_clear_dscp() will finish the work below.
3082 * Note that the pullups above ensure that mcast_buf
3083 * points to a full IP header.
3084 */
3085 }
3086 break;
3087
3088 #if SKYWALK
3089 case QP_PACKET:
3090 /*
3091 * Valid only for native Skywalk interface. If the data
3092 * source uses mbuf, caller must convert it to packet first
3093 * prior to calling this routine.
3094 */
3095 ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
3096 if (!(p->cp_kpkt->pkt_pflags & PKT_F_TS_VALID) ||
3097 p->cp_kpkt->pkt_timestamp == 0) {
3098 nanouptime(&now);
3099 net_timernsec(&now, &now_nsec);
3100 p->cp_kpkt->pkt_timestamp = now_nsec;
3101 }
3102 p->cp_kpkt->pkt_pflags &= ~PKT_F_TS_VALID;
3103 /*
3104 * If the packet service class is not background,
3105 * update the timestamps on the interface, as well as
3106 * the ones in nexus-wide advisory to indicate recent
3107 * activity on a foreground flow.
3108 */
3109 if (!(p->cp_kpkt->pkt_pflags & PKT_F_BACKGROUND)) {
3110 ifp->if_fg_sendts = (uint32_t)_net_uptime;
3111 if (fg_ts != NULL) {
3112 *fg_ts = (uint32_t)_net_uptime;
3113 }
3114 }
3115 if (p->cp_kpkt->pkt_pflags & PKT_F_REALTIME) {
3116 ifp->if_rt_sendts = (uint32_t)_net_uptime;
3117 if (rt_ts != NULL) {
3118 *rt_ts = (uint32_t)_net_uptime;
3119 }
3120 }
3121 pktlen = p->cp_kpkt->pkt_length;
3122
3123 /*
3124 * Some Wi-Fi AP implementations do not correctly handle
3125 * multicast IP packets with DSCP bits set (radr://9331522).
3126 * As a workaround we clear the DSCP bits but keep service
3127 * class (rdar://51507725).
3128 */
3129 if ((p->cp_kpkt->pkt_link_flags & PKT_LINKF_MCAST) != 0 &&
3130 IFNET_IS_WIFI_INFRA(ifp)) {
3131 uint8_t *baddr;
3132 struct ether_header *eh;
3133 uint16_t etype;
3134
3135 MD_BUFLET_ADDR_ABS(p->cp_kpkt, baddr);
3136 baddr += p->cp_kpkt->pkt_headroom;
3137 if (__improbable(pktlen < sizeof(struct ether_header))) {
3138 DTRACE_IP1(pkt__small__ether, __kern_packet *,
3139 p->cp_kpkt);
3140 break;
3141 }
3142 eh = (struct ether_header *)(void *)baddr;
3143 etype = ntohs(eh->ether_type);
3144 if (etype == ETHERTYPE_IP) {
3145 if (pktlen < sizeof(struct ether_header) +
3146 sizeof(struct ip)) {
3147 DTRACE_IP1(pkt__small__v4, uint32_t,
3148 pktlen);
3149 break;
3150 }
3151 ip_ver = IPVERSION;
3152 } else if (etype == ETHERTYPE_IPV6) {
3153 if (pktlen < sizeof(struct ether_header) +
3154 sizeof(struct ip6_hdr)) {
3155 DTRACE_IP1(pkt__small__v6, uint32_t,
3156 pktlen);
3157 break;
3158 }
3159 ip_ver = IPV6_VERSION;
3160 } else {
3161 DTRACE_IP1(pkt__invalid__etype, uint16_t,
3162 etype);
3163 break;
3164 }
3165 mcast_buf = (uint8_t *)(eh + 1);
3166 /*
3167 * ifnet_mcast_clear_dscp() will finish the work below.
3168 * The checks above verify that the IP header is in the
3169 * first buflet.
3170 */
3171 }
3172 break;
3173 #endif /* SKYWALK */
3174
3175 default:
3176 VERIFY(0);
3177 /* NOTREACHED */
3178 __builtin_unreachable();
3179 }
3180
3181 if (mcast_buf != NULL) {
3182 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
3183 }
3184
3185 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3186 if (now_nsec == 0) {
3187 nanouptime(&now);
3188 net_timernsec(&now, &now_nsec);
3189 }
3190 /*
3191 * If the driver chose to delay start callback for
3192 * coalescing multiple packets, Then use the following
3193 * heuristics to make sure that start callback will
3194 * be delayed only when bulk data transfer is detected.
3195 * 1. number of packets enqueued in (delay_win * 2) is
3196 * greater than or equal to the delay qlen.
3197 * 2. If delay_start is enabled it will stay enabled for
3198 * another 10 idle windows. This is to take into account
3199 * variable RTT and burst traffic.
3200 * 3. If the time elapsed since last enqueue is more
3201 * than 200ms we disable delaying start callback. This is
3202 * is to take idle time into account.
3203 */
3204 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3205 if (ifp->if_start_delay_swin > 0) {
3206 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3207 ifp->if_start_delay_cnt++;
3208 } else if ((now_nsec - ifp->if_start_delay_swin)
3209 >= (200 * 1000 * 1000)) {
3210 ifp->if_start_delay_swin = now_nsec;
3211 ifp->if_start_delay_cnt = 1;
3212 ifp->if_start_delay_idle = 0;
3213 if (ifp->if_eflags & IFEF_DELAY_START) {
3214 if_clear_eflags(ifp, IFEF_DELAY_START);
3215 ifnet_delay_start_disabled_increment();
3216 }
3217 } else {
3218 if (ifp->if_start_delay_cnt >=
3219 ifp->if_start_delay_qlen) {
3220 if_set_eflags(ifp, IFEF_DELAY_START);
3221 ifp->if_start_delay_idle = 0;
3222 } else {
3223 if (ifp->if_start_delay_idle >= 10) {
3224 if_clear_eflags(ifp,
3225 IFEF_DELAY_START);
3226 ifnet_delay_start_disabled_increment();
3227 } else {
3228 ifp->if_start_delay_idle++;
3229 }
3230 }
3231 ifp->if_start_delay_swin = now_nsec;
3232 ifp->if_start_delay_cnt = 1;
3233 }
3234 } else {
3235 ifp->if_start_delay_swin = now_nsec;
3236 ifp->if_start_delay_cnt = 1;
3237 ifp->if_start_delay_idle = 0;
3238 if_clear_eflags(ifp, IFEF_DELAY_START);
3239 }
3240 } else {
3241 if_clear_eflags(ifp, IFEF_DELAY_START);
3242 }
3243
3244 /* enqueue the packet (caller consumes object) */
3245 error = ifclassq_enqueue(((ifcq != NULL) ? ifcq : ifp->if_snd), p, p,
3246 1, pktlen, pdrop);
3247
3248 /*
3249 * Tell the driver to start dequeueing; do this even when the queue
3250 * for the packet is suspended (EQSUSPENDED), as the driver could still
3251 * be dequeueing from other unsuspended queues.
3252 */
3253 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3254 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
3255 ifnet_start(ifp);
3256 }
3257
3258 return error;
3259 }
3260
3261 static inline errno_t
ifnet_enqueue_ifclassq_chain(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3262 ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3263 classq_pkt_t *head, classq_pkt_t *tail, uint32_t cnt, uint32_t bytes,
3264 boolean_t flush, boolean_t *pdrop)
3265 {
3266 int error;
3267
3268 /* enqueue the packet (caller consumes object) */
3269 error = ifclassq_enqueue(ifcq != NULL ? ifcq : ifp->if_snd, head, tail,
3270 cnt, bytes, pdrop);
3271
3272 /*
3273 * Tell the driver to start dequeueing; do this even when the queue
3274 * for the packet is suspended (EQSUSPENDED), as the driver could still
3275 * be dequeueing from other unsuspended queues.
3276 */
3277 if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
3278 ifnet_start(ifp);
3279 }
3280 return error;
3281 }
3282
3283 int
ifnet_enqueue_netem(void * handle,pktsched_pkt_t * __sized_by (n_pkts)pkts,uint32_t n_pkts)3284 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *__sized_by(n_pkts)pkts, uint32_t n_pkts)
3285 {
3286 ifnet_ref_t ifp = handle;
3287 boolean_t pdrop; /* dummy */
3288 uint32_t i;
3289
3290 ASSERT(n_pkts >= 1);
3291 for (i = 0; i < n_pkts - 1; i++) {
3292 (void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
3293 FALSE, &pdrop);
3294 }
3295 /* flush with the last packet */
3296 (void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
3297 TRUE, &pdrop);
3298
3299 return 0;
3300 }
3301
3302 static inline errno_t
ifnet_enqueue_common(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * pkt,boolean_t flush,boolean_t * pdrop)3303 ifnet_enqueue_common(struct ifnet *ifp, struct ifclassq *ifcq,
3304 classq_pkt_t *pkt, boolean_t flush, boolean_t *pdrop)
3305 {
3306 if (ifp->if_output_netem != NULL) {
3307 bool drop;
3308 errno_t error;
3309 error = netem_enqueue(ifp->if_output_netem, pkt, &drop);
3310 *pdrop = drop ? TRUE : FALSE;
3311 return error;
3312 } else {
3313 return ifnet_enqueue_ifclassq(ifp, ifcq, pkt, flush, pdrop);
3314 }
3315 }
3316
3317 errno_t
ifnet_enqueue(struct ifnet * ifp,struct mbuf * m)3318 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3319 {
3320 uint32_t bytes = m_pktlen(m);
3321 struct mbuf *tail = m;
3322 uint32_t cnt = 1;
3323 boolean_t pdrop;
3324
3325 while (tail->m_nextpkt) {
3326 VERIFY(tail->m_flags & M_PKTHDR);
3327 tail = tail->m_nextpkt;
3328 cnt++;
3329 bytes += m_pktlen(tail);
3330 }
3331
3332 return ifnet_enqueue_mbuf_chain(ifp, m, tail, cnt, bytes, TRUE, &pdrop);
3333 }
3334
3335 errno_t
ifnet_enqueue_mbuf(struct ifnet * ifp,struct mbuf * m,boolean_t flush,boolean_t * pdrop)3336 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3337 boolean_t *pdrop)
3338 {
3339 classq_pkt_t pkt;
3340
3341 m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
3342 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3343 m->m_nextpkt != NULL) {
3344 if (m != NULL) {
3345 m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_INVALID, NULL, 0);
3346 *pdrop = TRUE;
3347 }
3348 return EINVAL;
3349 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3350 !IF_FULLY_ATTACHED(ifp)) {
3351 /* flag tested without lock for performance */
3352 m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3353 *pdrop = TRUE;
3354 return ENXIO;
3355 } else if (!(ifp->if_flags & IFF_UP)) {
3356 m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3357 *pdrop = TRUE;
3358 return ENETDOWN;
3359 }
3360
3361 CLASSQ_PKT_INIT_MBUF(&pkt, m);
3362 return ifnet_enqueue_common(ifp, NULL, &pkt, flush, pdrop);
3363 }
3364
3365 errno_t
ifnet_enqueue_mbuf_chain(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3366 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
3367 struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3368 boolean_t *pdrop)
3369 {
3370 classq_pkt_t head, tail;
3371
3372 m_add_hdr_crumb_interface_output(m_head, ifp->if_index, true);
3373 ASSERT(m_head != NULL);
3374 ASSERT((m_head->m_flags & M_PKTHDR) != 0);
3375 ASSERT(m_tail != NULL);
3376 ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
3377 ASSERT(ifp != NULL);
3378 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3379
3380 if (!IF_FULLY_ATTACHED(ifp)) {
3381 /* flag tested without lock for performance */
3382 m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_ATTACHED, NULL, 0);
3383 *pdrop = TRUE;
3384 return ENXIO;
3385 } else if (!(ifp->if_flags & IFF_UP)) {
3386 m_drop_list(m_head, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_ENQUEUE_IF_NOT_UP, NULL, 0);
3387 *pdrop = TRUE;
3388 return ENETDOWN;
3389 }
3390
3391 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3392 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3393 return ifnet_enqueue_ifclassq_chain(ifp, NULL, &head, &tail, cnt, bytes,
3394 flush, pdrop);
3395 }
3396
3397 #if SKYWALK
3398 static errno_t
ifnet_enqueue_pkt_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3399 ifnet_enqueue_pkt_common(struct ifnet *ifp, struct ifclassq *ifcq,
3400 struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3401 {
3402 classq_pkt_t pkt;
3403
3404 ASSERT(kpkt == NULL || kpkt->pkt_nextpkt == NULL);
3405
3406 if (__improbable(ifp == NULL || kpkt == NULL)) {
3407 if (kpkt != NULL) {
3408 pp_free_packet(__DECONST(struct kern_pbufpool *,
3409 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3410 *pdrop = TRUE;
3411 }
3412 return EINVAL;
3413 } else if (__improbable(!(ifp->if_eflags & IFEF_TXSTART) ||
3414 !IF_FULLY_ATTACHED(ifp))) {
3415 /* flag tested without lock for performance */
3416 pp_free_packet(__DECONST(struct kern_pbufpool *,
3417 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3418 *pdrop = TRUE;
3419 return ENXIO;
3420 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3421 pp_free_packet(__DECONST(struct kern_pbufpool *,
3422 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
3423 *pdrop = TRUE;
3424 return ENETDOWN;
3425 }
3426
3427 CLASSQ_PKT_INIT_PACKET(&pkt, kpkt);
3428 return ifnet_enqueue_common(ifp, ifcq, &pkt, flush, pdrop);
3429 }
3430
3431 errno_t
ifnet_enqueue_pkt(struct ifnet * ifp,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3432 ifnet_enqueue_pkt(struct ifnet *ifp, struct __kern_packet *kpkt,
3433 boolean_t flush, boolean_t *pdrop)
3434 {
3435 return ifnet_enqueue_pkt_common(ifp, NULL, kpkt, flush, pdrop);
3436 }
3437
3438 errno_t
ifnet_enqueue_ifcq_pkt(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)3439 ifnet_enqueue_ifcq_pkt(struct ifnet *ifp, struct ifclassq *ifcq,
3440 struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
3441 {
3442 return ifnet_enqueue_pkt_common(ifp, ifcq, kpkt, flush, pdrop);
3443 }
3444
3445 static errno_t
ifnet_enqueue_pkt_chain_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3446 ifnet_enqueue_pkt_chain_common(struct ifnet *ifp, struct ifclassq *ifcq,
3447 struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3448 uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3449 {
3450 classq_pkt_t head, tail;
3451
3452 ASSERT(k_head != NULL);
3453 ASSERT(k_tail != NULL);
3454 ASSERT(ifp != NULL);
3455 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
3456
3457 if (!IF_FULLY_ATTACHED(ifp)) {
3458 /* flag tested without lock for performance */
3459 pp_free_packet_chain(k_head, NULL);
3460 *pdrop = TRUE;
3461 return ENXIO;
3462 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
3463 pp_free_packet_chain(k_head, NULL);
3464 *pdrop = TRUE;
3465 return ENETDOWN;
3466 }
3467
3468 CLASSQ_PKT_INIT_PACKET(&head, k_head);
3469 CLASSQ_PKT_INIT_PACKET(&tail, k_tail);
3470 return ifnet_enqueue_ifclassq_chain(ifp, ifcq, &head, &tail, cnt, bytes,
3471 flush, pdrop);
3472 }
3473
3474 errno_t
ifnet_enqueue_pkt_chain(struct ifnet * ifp,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3475 ifnet_enqueue_pkt_chain(struct ifnet *ifp, struct __kern_packet *k_head,
3476 struct __kern_packet *k_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
3477 boolean_t *pdrop)
3478 {
3479 return ifnet_enqueue_pkt_chain_common(ifp, NULL, k_head, k_tail,
3480 cnt, bytes, flush, pdrop);
3481 }
3482
3483 errno_t
ifnet_enqueue_ifcq_pkt_chain(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)3484 ifnet_enqueue_ifcq_pkt_chain(struct ifnet *ifp, struct ifclassq *ifcq,
3485 struct __kern_packet *k_head, struct __kern_packet *k_tail, uint32_t cnt,
3486 uint32_t bytes, boolean_t flush, boolean_t *pdrop)
3487 {
3488 return ifnet_enqueue_pkt_chain_common(ifp, ifcq, k_head, k_tail,
3489 cnt, bytes, flush, pdrop);
3490 }
3491 #endif /* SKYWALK */
3492
3493 errno_t
ifnet_dequeue(struct ifnet * ifp,struct mbuf ** mp)3494 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3495 {
3496 errno_t rc;
3497 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3498
3499 if (ifp == NULL || mp == NULL) {
3500 return EINVAL;
3501 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3502 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3503 return ENXIO;
3504 }
3505 if (!ifnet_is_attached(ifp, 1)) {
3506 return ENXIO;
3507 }
3508
3509 #if SKYWALK
3510 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3511 #endif /* SKYWALK */
3512 rc = ifclassq_dequeue(ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3513 &pkt, NULL, NULL, NULL, 0);
3514 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3515 ifnet_decr_iorefcnt(ifp);
3516 *mp = pkt.cp_mbuf;
3517 m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3518 return rc;
3519 }
3520
3521 errno_t
ifnet_dequeue_service_class(struct ifnet * ifp,mbuf_svc_class_t sc,struct mbuf ** mp)3522 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3523 struct mbuf **mp)
3524 {
3525 errno_t rc;
3526 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3527
3528 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
3529 return EINVAL;
3530 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3531 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3532 return ENXIO;
3533 }
3534 if (!ifnet_is_attached(ifp, 1)) {
3535 return ENXIO;
3536 }
3537
3538 #if SKYWALK
3539 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3540 #endif /* SKYWALK */
3541 rc = ifclassq_dequeue_sc(ifp->if_snd, sc, 1,
3542 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL, 0);
3543 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
3544 ifnet_decr_iorefcnt(ifp);
3545 *mp = pkt.cp_mbuf;
3546 m_add_hdr_crumb_interface_output(*mp, ifp->if_index, false);
3547 return rc;
3548 }
3549
3550 errno_t
ifnet_dequeue_multi(struct ifnet * ifp,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3551 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3552 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3553 {
3554 errno_t rc;
3555 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3556 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3557
3558 if (ifp == NULL || head == NULL || pkt_limit < 1) {
3559 return EINVAL;
3560 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3561 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3562 return ENXIO;
3563 }
3564 if (!ifnet_is_attached(ifp, 1)) {
3565 return ENXIO;
3566 }
3567
3568 #if SKYWALK
3569 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3570 #endif /* SKYWALK */
3571 rc = ifclassq_dequeue(ifp->if_snd, pkt_limit,
3572 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len, 0);
3573 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3574 ifnet_decr_iorefcnt(ifp);
3575 *head = pkt_head.cp_mbuf;
3576 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3577 if (tail != NULL) {
3578 *tail = pkt_tail.cp_mbuf;
3579 }
3580 return rc;
3581 }
3582
3583 errno_t
ifnet_dequeue_multi_bytes(struct ifnet * ifp,u_int32_t byte_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3584 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3585 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3586 {
3587 errno_t rc;
3588 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3589 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3590
3591 if (ifp == NULL || head == NULL || byte_limit < 1) {
3592 return EINVAL;
3593 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3594 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3595 return ENXIO;
3596 }
3597 if (!ifnet_is_attached(ifp, 1)) {
3598 return ENXIO;
3599 }
3600
3601 #if SKYWALK
3602 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3603 #endif /* SKYWALK */
3604 rc = ifclassq_dequeue(ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3605 byte_limit, &pkt_head, &pkt_tail, cnt, len, 0);
3606 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3607 ifnet_decr_iorefcnt(ifp);
3608 *head = pkt_head.cp_mbuf;
3609 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3610 if (tail != NULL) {
3611 *tail = pkt_tail.cp_mbuf;
3612 }
3613 return rc;
3614 }
3615
3616 errno_t
ifnet_dequeue_service_class_multi(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)3617 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3618 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3619 u_int32_t *len)
3620 {
3621 errno_t rc;
3622 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
3623 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
3624
3625 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3626 !MBUF_VALID_SC(sc)) {
3627 return EINVAL;
3628 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3629 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
3630 return ENXIO;
3631 }
3632 if (!ifnet_is_attached(ifp, 1)) {
3633 return ENXIO;
3634 }
3635
3636 #if SKYWALK
3637 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
3638 #endif /* SKYWALK */
3639 rc = ifclassq_dequeue_sc(ifp->if_snd, sc, pkt_limit,
3640 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
3641 cnt, len, 0);
3642 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
3643 ifnet_decr_iorefcnt(ifp);
3644 *head = pkt_head.cp_mbuf;
3645 m_add_hdr_crumb_interface_output(*head, ifp->if_index, false);
3646 if (tail != NULL) {
3647 *tail = pkt_tail.cp_mbuf;
3648 }
3649 return rc;
3650 }
3651
3652 #if XNU_TARGET_OS_OSX
3653 errno_t
ifnet_framer_stub(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * dest,IFNET_LLADDR_T dest_linkaddr,IFNET_FRAME_TYPE_T frame_type,u_int32_t * pre,u_int32_t * post)3654 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3655 const struct sockaddr *dest,
3656 IFNET_LLADDR_T dest_linkaddr,
3657 IFNET_FRAME_TYPE_T frame_type,
3658 u_int32_t *pre, u_int32_t *post)
3659 {
3660 if (pre != NULL) {
3661 *pre = 0;
3662 }
3663 if (post != NULL) {
3664 *post = 0;
3665 }
3666
3667 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
3668 }
3669 #endif /* XNU_TARGET_OS_OSX */
3670
3671 /* If ifp is set, we will increment the generation for the interface */
3672 int
dlil_post_complete_msg(struct ifnet * ifp,struct kev_msg * event)3673 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
3674 {
3675 if (ifp != NULL) {
3676 ifnet_increment_generation(ifp);
3677 }
3678
3679 #if NECP
3680 necp_update_all_clients();
3681 #endif /* NECP */
3682
3683 return kev_post_msg(event);
3684 }
3685
3686 __private_extern__ void
dlil_post_sifflags_msg(struct ifnet * ifp)3687 dlil_post_sifflags_msg(struct ifnet * ifp)
3688 {
3689 struct kev_msg ev_msg;
3690 struct net_event_data ev_data;
3691
3692 bzero(&ev_data, sizeof(ev_data));
3693 bzero(&ev_msg, sizeof(ev_msg));
3694 ev_msg.vendor_code = KEV_VENDOR_APPLE;
3695 ev_msg.kev_class = KEV_NETWORK_CLASS;
3696 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
3697 ev_msg.event_code = KEV_DL_SIFFLAGS;
3698 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
3699 ev_data.if_family = ifp->if_family;
3700 ev_data.if_unit = (u_int32_t) ifp->if_unit;
3701 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
3702 ev_msg.dv[0].data_ptr = &ev_data;
3703 ev_msg.dv[1].data_length = 0;
3704 dlil_post_complete_msg(ifp, &ev_msg);
3705 }
3706
3707 #define TMP_IF_PROTO_ARR_SIZE 10
3708 static int
dlil_event_internal(struct ifnet * ifp,struct kev_msg * event,bool update_generation)3709 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
3710 {
3711 struct ifnet_filter *filter = NULL;
3712 struct if_proto *proto = NULL;
3713 int if_proto_count = 0;
3714 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
3715 struct if_proto **tmp_ifproto_arr = tmp_ifproto_stack_arr;
3716 int tmp_ifproto_arr_idx = 0;
3717
3718 /*
3719 * Pass the event to the interface filters
3720 */
3721 lck_mtx_lock_spin(&ifp->if_flt_lock);
3722 /* prevent filter list from changing in case we drop the lock */
3723 if_flt_monitor_busy(ifp);
3724 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3725 if (filter->filt_event != NULL) {
3726 lck_mtx_unlock(&ifp->if_flt_lock);
3727
3728 filter->filt_event(filter->filt_cookie, ifp,
3729 filter->filt_protocol, event);
3730
3731 lck_mtx_lock_spin(&ifp->if_flt_lock);
3732 }
3733 }
3734 /* we're done with the filter list */
3735 if_flt_monitor_unbusy(ifp);
3736 lck_mtx_unlock(&ifp->if_flt_lock);
3737
3738 /* Get an io ref count if the interface is attached */
3739 if (!ifnet_is_attached(ifp, 1)) {
3740 goto done;
3741 }
3742
3743 /*
3744 * An embedded tmp_list_entry in if_proto may still get
3745 * over-written by another thread after giving up ifnet lock,
3746 * therefore we are avoiding embedded pointers here.
3747 */
3748 ifnet_lock_shared(ifp);
3749 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
3750 if (if_proto_count) {
3751 int i;
3752 VERIFY(ifp->if_proto_hash != NULL);
3753 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
3754 tmp_ifproto_arr = tmp_ifproto_stack_arr;
3755 } else {
3756 tmp_ifproto_arr = kalloc_type(struct if_proto *,
3757 if_proto_count, Z_WAITOK | Z_ZERO);
3758 if (tmp_ifproto_arr == NULL) {
3759 ifnet_lock_done(ifp);
3760 goto cleanup;
3761 }
3762 }
3763
3764 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
3765 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
3766 next_hash) {
3767 if_proto_ref(proto);
3768 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
3769 tmp_ifproto_arr_idx++;
3770 }
3771 }
3772 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
3773 }
3774 ifnet_lock_done(ifp);
3775
3776 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
3777 tmp_ifproto_arr_idx++) {
3778 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
3779 VERIFY(proto != NULL);
3780 proto_media_event eventp =
3781 (proto->proto_kpi == kProtoKPI_v1 ?
3782 proto->kpi.v1.event :
3783 proto->kpi.v2.event);
3784
3785 if (eventp != NULL) {
3786 eventp(ifp, proto->protocol_family,
3787 event);
3788 }
3789 if_proto_free(proto);
3790 }
3791
3792 cleanup:
3793 if (tmp_ifproto_arr != tmp_ifproto_stack_arr) {
3794 kfree_type(struct if_proto *, if_proto_count, tmp_ifproto_arr);
3795 }
3796
3797 /* Pass the event to the interface */
3798 if (ifp->if_event != NULL) {
3799 ifp->if_event(ifp, event);
3800 }
3801
3802 /* Release the io ref count */
3803 ifnet_decr_iorefcnt(ifp);
3804 done:
3805 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
3806 }
3807
3808 errno_t
ifnet_event(ifnet_t ifp,struct kern_event_msg * event)3809 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
3810 {
3811 struct kev_msg kev_msg;
3812 int result = 0;
3813
3814 if (ifp == NULL || event == NULL) {
3815 return EINVAL;
3816 }
3817
3818 bzero(&kev_msg, sizeof(kev_msg));
3819 kev_msg.vendor_code = event->vendor_code;
3820 kev_msg.kev_class = event->kev_class;
3821 kev_msg.kev_subclass = event->kev_subclass;
3822 kev_msg.event_code = event->event_code;
3823 kev_msg.dv[0].data_ptr = &event->event_data;
3824 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
3825 kev_msg.dv[1].data_length = 0;
3826
3827 result = dlil_event_internal(ifp, &kev_msg, TRUE);
3828
3829 return result;
3830 }
3831
3832 /* The following is used to enqueue work items for ifnet ioctl events */
3833 static void ifnet_ioctl_event_callback(struct nwk_wq_entry *);
3834
3835 struct ifnet_ioctl_event {
3836 ifnet_ref_t ifp;
3837 u_long ioctl_code;
3838 };
3839
3840 struct ifnet_ioctl_event_nwk_wq_entry {
3841 struct nwk_wq_entry nwk_wqe;
3842 struct ifnet_ioctl_event ifnet_ioctl_ev_arg;
3843 };
3844
3845 void
ifnet_ioctl_async(struct ifnet * ifp,u_long ioctl_code)3846 ifnet_ioctl_async(struct ifnet *ifp, u_long ioctl_code)
3847 {
3848 struct ifnet_ioctl_event_nwk_wq_entry *p_ifnet_ioctl_ev = NULL;
3849 bool compare_expected;
3850
3851 /*
3852 * Get an io ref count if the interface is attached.
3853 * At this point it most likely is. We are taking a reference for
3854 * deferred processing.
3855 */
3856 if (!ifnet_is_attached(ifp, 1)) {
3857 os_log(OS_LOG_DEFAULT, "%s:%d %s Failed for ioctl %lu as interface "
3858 "is not attached",
3859 __func__, __LINE__, if_name(ifp), ioctl_code);
3860 return;
3861 }
3862 switch (ioctl_code) {
3863 case SIOCADDMULTI:
3864 compare_expected = false;
3865 if (!atomic_compare_exchange_strong(&ifp->if_mcast_add_signaled, &compare_expected, true)) {
3866 ifnet_decr_iorefcnt(ifp);
3867 return;
3868 }
3869 break;
3870 case SIOCDELMULTI:
3871 compare_expected = false;
3872 if (!atomic_compare_exchange_strong(&ifp->if_mcast_del_signaled, &compare_expected, true)) {
3873 ifnet_decr_iorefcnt(ifp);
3874 return;
3875 }
3876 break;
3877 default:
3878 os_log(OS_LOG_DEFAULT, "%s:%d %s unknown ioctl %lu",
3879 __func__, __LINE__, if_name(ifp), ioctl_code);
3880 return;
3881 }
3882
3883 p_ifnet_ioctl_ev = kalloc_type(struct ifnet_ioctl_event_nwk_wq_entry,
3884 Z_WAITOK | Z_ZERO | Z_NOFAIL);
3885
3886 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ifp = ifp;
3887 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ioctl_code = ioctl_code;
3888 p_ifnet_ioctl_ev->nwk_wqe.func = ifnet_ioctl_event_callback;
3889 nwk_wq_enqueue(&p_ifnet_ioctl_ev->nwk_wqe);
3890 }
3891
3892 static void
ifnet_ioctl_event_callback(struct nwk_wq_entry * nwk_item)3893 ifnet_ioctl_event_callback(struct nwk_wq_entry *nwk_item)
3894 {
3895 struct ifnet_ioctl_event_nwk_wq_entry *p_ev = __container_of(nwk_item,
3896 struct ifnet_ioctl_event_nwk_wq_entry, nwk_wqe);
3897
3898 ifnet_ref_t ifp = p_ev->ifnet_ioctl_ev_arg.ifp;
3899 u_long ioctl_code = p_ev->ifnet_ioctl_ev_arg.ioctl_code;
3900 int ret = 0;
3901
3902 switch (ioctl_code) {
3903 case SIOCADDMULTI:
3904 atomic_store(&ifp->if_mcast_add_signaled, false);
3905 break;
3906 case SIOCDELMULTI:
3907 atomic_store(&ifp->if_mcast_del_signaled, false);
3908 break;
3909 }
3910 if ((ret = ifnet_ioctl(ifp, 0, ioctl_code, NULL)) != 0) {
3911 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned %d for ioctl %lu",
3912 __func__, __LINE__, if_name(ifp), ret, ioctl_code);
3913 } else if (dlil_verbose) {
3914 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned successfully "
3915 "for ioctl %lu",
3916 __func__, __LINE__, if_name(ifp), ioctl_code);
3917 }
3918 ifnet_decr_iorefcnt(ifp);
3919 kfree_type(struct ifnet_ioctl_event_nwk_wq_entry, p_ev);
3920 return;
3921 }
3922
3923 errno_t
ifnet_ioctl(ifnet_t ifp,protocol_family_t proto_fam,u_long ioctl_code,void * ioctl_arg)3924 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
3925 void *ioctl_arg)
3926 {
3927 struct ifnet_filter *filter;
3928 int retval = EOPNOTSUPP;
3929 int result = 0;
3930
3931 if (ifp == NULL || ioctl_code == 0) {
3932 return EINVAL;
3933 }
3934
3935 /* Get an io ref count if the interface is attached */
3936 if (!ifnet_is_attached(ifp, 1)) {
3937 return EOPNOTSUPP;
3938 }
3939
3940 /*
3941 * Run the interface filters first.
3942 * We want to run all filters before calling the protocol,
3943 * interface family, or interface.
3944 */
3945 lck_mtx_lock_spin(&ifp->if_flt_lock);
3946 /* prevent filter list from changing in case we drop the lock */
3947 if_flt_monitor_busy(ifp);
3948 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3949 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
3950 filter->filt_protocol == proto_fam)) {
3951 lck_mtx_unlock(&ifp->if_flt_lock);
3952
3953 result = filter->filt_ioctl(filter->filt_cookie, ifp,
3954 proto_fam, ioctl_code, ioctl_arg);
3955
3956 lck_mtx_lock_spin(&ifp->if_flt_lock);
3957
3958 /* Only update retval if no one has handled the ioctl */
3959 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3960 if (result == ENOTSUP) {
3961 result = EOPNOTSUPP;
3962 }
3963 retval = result;
3964 if (retval != 0 && retval != EOPNOTSUPP) {
3965 /* we're done with the filter list */
3966 if_flt_monitor_unbusy(ifp);
3967 lck_mtx_unlock(&ifp->if_flt_lock);
3968 goto cleanup;
3969 }
3970 }
3971 }
3972 }
3973 /* we're done with the filter list */
3974 if_flt_monitor_unbusy(ifp);
3975 lck_mtx_unlock(&ifp->if_flt_lock);
3976
3977 /* Allow the protocol to handle the ioctl */
3978 if (proto_fam != 0) {
3979 struct if_proto *proto;
3980
3981 /* callee holds a proto refcnt upon success */
3982 ifnet_lock_shared(ifp);
3983 proto = find_attached_proto(ifp, proto_fam);
3984 ifnet_lock_done(ifp);
3985 if (proto != NULL) {
3986 proto_media_ioctl ioctlp =
3987 (proto->proto_kpi == kProtoKPI_v1 ?
3988 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
3989 result = EOPNOTSUPP;
3990 if (ioctlp != NULL) {
3991 result = ioctlp(ifp, proto_fam, ioctl_code,
3992 ioctl_arg);
3993 }
3994 if_proto_free(proto);
3995
3996 /* Only update retval if no one has handled the ioctl */
3997 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
3998 if (result == ENOTSUP) {
3999 result = EOPNOTSUPP;
4000 }
4001 retval = result;
4002 if (retval && retval != EOPNOTSUPP) {
4003 goto cleanup;
4004 }
4005 }
4006 }
4007 }
4008
4009 /* retval is either 0 or EOPNOTSUPP */
4010
4011 /*
4012 * Let the interface handle this ioctl.
4013 * If it returns EOPNOTSUPP, ignore that, we may have
4014 * already handled this in the protocol or family.
4015 */
4016 if (ifp->if_ioctl) {
4017 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
4018 }
4019
4020 /* Only update retval if no one has handled the ioctl */
4021 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
4022 if (result == ENOTSUP) {
4023 result = EOPNOTSUPP;
4024 }
4025 retval = result;
4026 if (retval && retval != EOPNOTSUPP) {
4027 goto cleanup;
4028 }
4029 }
4030
4031 cleanup:
4032 if (retval == EJUSTRETURN) {
4033 retval = 0;
4034 }
4035
4036 ifnet_decr_iorefcnt(ifp);
4037
4038 return retval;
4039 }
4040
4041 __private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp,bpf_tap_mode mode,bpf_packet_func callback)4042 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
4043 {
4044 errno_t error = 0;
4045
4046 if (ifp->if_set_bpf_tap) {
4047 /* Get an io reference on the interface if it is attached */
4048 if (!ifnet_is_attached(ifp, 1)) {
4049 return ENXIO;
4050 }
4051 error = ifp->if_set_bpf_tap(ifp, mode, callback);
4052 ifnet_decr_iorefcnt(ifp);
4053 }
4054 return error;
4055 }
4056
4057 errno_t
dlil_resolve_multi(struct ifnet * ifp,const struct sockaddr * proto_addr,struct sockaddr * ll_addr,size_t ll_len)4058 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
4059 struct sockaddr *ll_addr, size_t ll_len)
4060 {
4061 errno_t result = EOPNOTSUPP;
4062 struct if_proto *proto;
4063 const struct sockaddr *verify;
4064 proto_media_resolve_multi resolvep;
4065
4066 if (!ifnet_is_attached(ifp, 1)) {
4067 return result;
4068 }
4069
4070 SOCKADDR_ZERO(ll_addr, ll_len);
4071
4072 /* Call the protocol first; callee holds a proto refcnt upon success */
4073 ifnet_lock_shared(ifp);
4074 proto = find_attached_proto(ifp, proto_addr->sa_family);
4075 ifnet_lock_done(ifp);
4076 if (proto != NULL) {
4077 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
4078 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
4079 if (resolvep != NULL) {
4080 result = resolvep(ifp, proto_addr, SDL(ll_addr), ll_len);
4081 }
4082 if_proto_free(proto);
4083 }
4084
4085 /* Let the interface verify the multicast address */
4086 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
4087 if (result == 0) {
4088 verify = ll_addr;
4089 } else {
4090 verify = proto_addr;
4091 }
4092 result = ifp->if_check_multi(ifp, verify);
4093 }
4094
4095 ifnet_decr_iorefcnt(ifp);
4096 return result;
4097 }
4098
4099 __private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4100 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
4101 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4102 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4103 {
4104 struct if_proto *proto;
4105 errno_t result = 0;
4106
4107 if ((ifp->if_flags & IFF_NOARP) != 0) {
4108 result = ENOTSUP;
4109 goto done;
4110 }
4111
4112 /* callee holds a proto refcnt upon success */
4113 ifnet_lock_shared(ifp);
4114 proto = find_attached_proto(ifp, target_proto->sa_family);
4115 ifnet_lock_done(ifp);
4116 if (proto == NULL) {
4117 result = ENOTSUP;
4118 } else {
4119 proto_media_send_arp arpp;
4120 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
4121 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
4122 if (arpp == NULL) {
4123 result = ENOTSUP;
4124 } else {
4125 switch (arpop) {
4126 case ARPOP_REQUEST:
4127 arpstat.txrequests++;
4128 if (target_hw != NULL) {
4129 arpstat.txurequests++;
4130 }
4131 break;
4132 case ARPOP_REPLY:
4133 arpstat.txreplies++;
4134 break;
4135 }
4136 result = arpp(ifp, arpop, sender_hw, sender_proto,
4137 target_hw, target_proto);
4138 }
4139 if_proto_free(proto);
4140 }
4141 done:
4142 return result;
4143 }
4144
4145 static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,const struct sockaddr_in * target_sin)4146 _is_announcement(const struct sockaddr_in * sender_sin,
4147 const struct sockaddr_in * target_sin)
4148 {
4149 if (target_sin == NULL || sender_sin == NULL) {
4150 return FALSE;
4151 }
4152
4153 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
4154 }
4155
4156 __private_extern__ errno_t
dlil_send_arp(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto0,u_int32_t rtflags)4157 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
4158 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
4159 const struct sockaddr *target_proto0, u_int32_t rtflags)
4160 {
4161 errno_t result = 0;
4162 const struct sockaddr_in * sender_sin;
4163 const struct sockaddr_in * target_sin;
4164 struct sockaddr_inarp target_proto_sinarp;
4165 struct sockaddr *target_proto = __DECONST_SA(target_proto0);
4166
4167 if (target_proto == NULL || sender_proto == NULL) {
4168 return EINVAL;
4169 }
4170
4171 if (sender_proto->sa_family != target_proto->sa_family) {
4172 return EINVAL;
4173 }
4174
4175 /*
4176 * If the target is a (default) router, provide that
4177 * information to the send_arp callback routine.
4178 */
4179 if (rtflags & RTF_ROUTER) {
4180 SOCKADDR_COPY(target_proto, &target_proto_sinarp, sizeof(struct sockaddr_in));
4181 target_proto_sinarp.sin_other |= SIN_ROUTER;
4182 target_proto = SA(&target_proto_sinarp);
4183 }
4184
4185 /*
4186 * If this is an ARP request and the target IP is IPv4LL,
4187 * send the request on all interfaces. The exception is
4188 * an announcement, which must only appear on the specific
4189 * interface.
4190 */
4191 sender_sin = SIN(sender_proto);
4192 target_sin = SIN(target_proto);
4193 if (target_proto->sa_family == AF_INET &&
4194 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
4195 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
4196 !_is_announcement(sender_sin, target_sin)) {
4197 u_int32_t count;
4198 ifnet_ref_t *__counted_by(count) ifp_list;
4199 u_int32_t ifp_on;
4200
4201 result = ENOTSUP;
4202
4203 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
4204 for (ifp_on = 0; ifp_on < count; ifp_on++) {
4205 errno_t new_result;
4206 ifaddr_t source_hw = NULL;
4207 ifaddr_t source_ip = NULL;
4208 struct sockaddr_in source_ip_copy;
4209 ifnet_ref_t cur_ifp = ifp_list[ifp_on];
4210
4211 /*
4212 * Only arp on interfaces marked for IPv4LL
4213 * ARPing. This may mean that we don't ARP on
4214 * the interface the subnet route points to.
4215 */
4216 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
4217 continue;
4218 }
4219
4220 /* Find the source IP address */
4221 ifnet_lock_shared(cur_ifp);
4222 source_hw = cur_ifp->if_lladdr;
4223 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
4224 ifa_link) {
4225 IFA_LOCK(source_ip);
4226 if (source_ip->ifa_addr != NULL &&
4227 source_ip->ifa_addr->sa_family ==
4228 AF_INET) {
4229 /* Copy the source IP address */
4230 SOCKADDR_COPY(SIN(source_ip->ifa_addr), &source_ip_copy, sizeof(source_ip_copy));
4231 IFA_UNLOCK(source_ip);
4232 break;
4233 }
4234 IFA_UNLOCK(source_ip);
4235 }
4236
4237 /* No IP Source, don't arp */
4238 if (source_ip == NULL) {
4239 ifnet_lock_done(cur_ifp);
4240 continue;
4241 }
4242
4243 ifa_addref(source_hw);
4244 ifnet_lock_done(cur_ifp);
4245
4246 /* Send the ARP */
4247 new_result = dlil_send_arp_internal(cur_ifp,
4248 arpop, SDL(source_hw->ifa_addr),
4249 SA(&source_ip_copy), NULL,
4250 target_proto);
4251
4252 ifa_remref(source_hw);
4253 if (result == ENOTSUP) {
4254 result = new_result;
4255 }
4256 }
4257 ifnet_list_free_counted_by(ifp_list, count);
4258 }
4259 } else {
4260 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
4261 sender_proto, target_hw, target_proto);
4262 }
4263
4264 return result;
4265 }
4266
4267 /*
4268 * Caller must hold ifnet head lock.
4269 */
4270 static int
ifnet_lookup(struct ifnet * ifp)4271 ifnet_lookup(struct ifnet *ifp)
4272 {
4273 ifnet_ref_t _ifp;
4274
4275 ifnet_head_lock_assert(LCK_RW_ASSERT_HELD);
4276 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
4277 if (_ifp == ifp) {
4278 break;
4279 }
4280 }
4281 return _ifp != NULL;
4282 }
4283
4284 /*
4285 * Caller has to pass a non-zero refio argument to get a
4286 * IO reference count. This will prevent ifnet_detach from
4287 * being called when there are outstanding io reference counts.
4288 */
4289 int
ifnet_is_attached(struct ifnet * ifp,int refio)4290 ifnet_is_attached(struct ifnet *ifp, int refio)
4291 {
4292 int ret;
4293
4294 lck_mtx_lock_spin(&ifp->if_ref_lock);
4295 if ((ret = IF_FULLY_ATTACHED(ifp))) {
4296 if (refio > 0) {
4297 ifp->if_refio++;
4298 }
4299 }
4300 lck_mtx_unlock(&ifp->if_ref_lock);
4301
4302 return ret;
4303 }
4304
4305 void
ifnet_incr_pending_thread_count(struct ifnet * ifp)4306 ifnet_incr_pending_thread_count(struct ifnet *ifp)
4307 {
4308 lck_mtx_lock_spin(&ifp->if_ref_lock);
4309 ifp->if_threads_pending++;
4310 lck_mtx_unlock(&ifp->if_ref_lock);
4311 }
4312
4313 void
ifnet_decr_pending_thread_count(struct ifnet * ifp)4314 ifnet_decr_pending_thread_count(struct ifnet *ifp)
4315 {
4316 lck_mtx_lock_spin(&ifp->if_ref_lock);
4317 VERIFY(ifp->if_threads_pending > 0);
4318 ifp->if_threads_pending--;
4319 if (ifp->if_threads_pending == 0) {
4320 wakeup(&ifp->if_threads_pending);
4321 }
4322 lck_mtx_unlock(&ifp->if_ref_lock);
4323 }
4324
4325 /*
4326 * Caller must ensure the interface is attached; the assumption is that
4327 * there is at least an outstanding IO reference count held already.
4328 * Most callers would call ifnet_is_{attached,data_ready}() instead.
4329 */
4330 void
ifnet_incr_iorefcnt(struct ifnet * ifp)4331 ifnet_incr_iorefcnt(struct ifnet *ifp)
4332 {
4333 lck_mtx_lock_spin(&ifp->if_ref_lock);
4334 VERIFY(IF_FULLY_ATTACHED(ifp));
4335 VERIFY(ifp->if_refio > 0);
4336 ifp->if_refio++;
4337 lck_mtx_unlock(&ifp->if_ref_lock);
4338 }
4339
4340 __attribute__((always_inline))
4341 static void
ifnet_decr_iorefcnt_locked(struct ifnet * ifp)4342 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
4343 {
4344 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4345
4346 VERIFY(ifp->if_refio > 0);
4347 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4348
4349 ifp->if_refio--;
4350 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
4351
4352 /*
4353 * if there are no more outstanding io references, wakeup the
4354 * ifnet_detach thread if detaching flag is set.
4355 */
4356 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
4357 wakeup(&(ifp->if_refio));
4358 }
4359 }
4360
4361 void
ifnet_decr_iorefcnt(struct ifnet * ifp)4362 ifnet_decr_iorefcnt(struct ifnet *ifp)
4363 {
4364 lck_mtx_lock_spin(&ifp->if_ref_lock);
4365 ifnet_decr_iorefcnt_locked(ifp);
4366 lck_mtx_unlock(&ifp->if_ref_lock);
4367 }
4368
4369 boolean_t
ifnet_datamov_begin(struct ifnet * ifp)4370 ifnet_datamov_begin(struct ifnet *ifp)
4371 {
4372 boolean_t ret;
4373
4374 lck_mtx_lock_spin(&ifp->if_ref_lock);
4375 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
4376 ifp->if_refio++;
4377 ifp->if_datamov++;
4378 }
4379 lck_mtx_unlock(&ifp->if_ref_lock);
4380
4381 DTRACE_IP2(datamov__begin, struct ifnet *, ifp, boolean_t, ret);
4382 return ret;
4383 }
4384
4385 void
ifnet_datamov_end(struct ifnet * ifp)4386 ifnet_datamov_end(struct ifnet *ifp)
4387 {
4388 lck_mtx_lock_spin(&ifp->if_ref_lock);
4389 VERIFY(ifp->if_datamov > 0);
4390 /*
4391 * if there's no more thread moving data, wakeup any
4392 * drainers that's blocked waiting for this.
4393 */
4394 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
4395 DLIL_PRINTF("Waking up drainers on %s\n", if_name(ifp));
4396 DTRACE_IP1(datamov__drain__wake, struct ifnet *, ifp);
4397 wakeup(&(ifp->if_datamov));
4398 }
4399 ifnet_decr_iorefcnt_locked(ifp);
4400 lck_mtx_unlock(&ifp->if_ref_lock);
4401
4402 DTRACE_IP1(datamov__end, struct ifnet *, ifp);
4403 }
4404
4405 static void
ifnet_datamov_suspend_locked(struct ifnet * ifp)4406 ifnet_datamov_suspend_locked(struct ifnet *ifp)
4407 {
4408 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
4409 ifp->if_refio++;
4410 if (ifp->if_suspend++ == 0) {
4411 VERIFY(ifp->if_refflags & IFRF_READY);
4412 ifp->if_refflags &= ~IFRF_READY;
4413 }
4414 }
4415
4416 void
ifnet_datamov_suspend(struct ifnet * ifp)4417 ifnet_datamov_suspend(struct ifnet *ifp)
4418 {
4419 lck_mtx_lock_spin(&ifp->if_ref_lock);
4420 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4421 ifnet_datamov_suspend_locked(ifp);
4422 lck_mtx_unlock(&ifp->if_ref_lock);
4423 }
4424
4425 boolean_t
ifnet_datamov_suspend_if_needed(struct ifnet * ifp)4426 ifnet_datamov_suspend_if_needed(struct ifnet *ifp)
4427 {
4428 lck_mtx_lock_spin(&ifp->if_ref_lock);
4429 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4430 if (ifp->if_suspend > 0) {
4431 lck_mtx_unlock(&ifp->if_ref_lock);
4432 return FALSE;
4433 }
4434 ifnet_datamov_suspend_locked(ifp);
4435 lck_mtx_unlock(&ifp->if_ref_lock);
4436 return TRUE;
4437 }
4438
4439 void
ifnet_datamov_drain(struct ifnet * ifp)4440 ifnet_datamov_drain(struct ifnet *ifp)
4441 {
4442 lck_mtx_lock(&ifp->if_ref_lock);
4443 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
4444 /* data movement must already be suspended */
4445 VERIFY(ifp->if_suspend > 0);
4446 VERIFY(!(ifp->if_refflags & IFRF_READY));
4447 ifp->if_drainers++;
4448 while (ifp->if_datamov != 0) {
4449 DLIL_PRINTF("Waiting for data path(s) to quiesce on %s\n",
4450 if_name(ifp));
4451 DTRACE_IP1(datamov__wait, struct ifnet *, ifp);
4452 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
4453 (PZERO - 1), __func__, NULL);
4454 DTRACE_IP1(datamov__wake, struct ifnet *, ifp);
4455 }
4456 VERIFY(!(ifp->if_refflags & IFRF_READY));
4457 VERIFY(ifp->if_drainers > 0);
4458 ifp->if_drainers--;
4459 lck_mtx_unlock(&ifp->if_ref_lock);
4460
4461 /* purge the interface queues */
4462 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
4463 if_qflush_snd(ifp, false);
4464 }
4465 }
4466
4467 void
ifnet_datamov_suspend_and_drain(struct ifnet * ifp)4468 ifnet_datamov_suspend_and_drain(struct ifnet *ifp)
4469 {
4470 ifnet_datamov_suspend(ifp);
4471 ifnet_datamov_drain(ifp);
4472 }
4473
4474 void
ifnet_datamov_resume(struct ifnet * ifp)4475 ifnet_datamov_resume(struct ifnet *ifp)
4476 {
4477 lck_mtx_lock(&ifp->if_ref_lock);
4478 /* data movement must already be suspended */
4479 VERIFY(ifp->if_suspend > 0);
4480 if (--ifp->if_suspend == 0) {
4481 VERIFY(!(ifp->if_refflags & IFRF_READY));
4482 ifp->if_refflags |= IFRF_READY;
4483 }
4484 ifnet_decr_iorefcnt_locked(ifp);
4485 lck_mtx_unlock(&ifp->if_ref_lock);
4486 }
4487
4488 static errno_t
dlil_attach_protocol(struct if_proto * proto,const struct ifnet_demux_desc * __counted_by (demux_count)demux_list,u_int32_t demux_count,uint32_t * proto_count)4489 dlil_attach_protocol(struct if_proto *proto,
4490 const struct ifnet_demux_desc *__counted_by(demux_count) demux_list, u_int32_t demux_count,
4491 uint32_t *proto_count)
4492 {
4493 struct kev_dl_proto_data ev_pr_data;
4494 ifnet_ref_t ifp = proto->ifp;
4495 errno_t retval = 0;
4496 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
4497 struct if_proto *prev_proto;
4498 struct if_proto *_proto;
4499
4500 /* don't allow attaching anything but PF_BRIDGE to vmnet interfaces */
4501 if (IFNET_IS_VMNET(ifp) && proto->protocol_family != PF_BRIDGE) {
4502 return EINVAL;
4503 }
4504
4505 if (!ifnet_is_attached(ifp, 1)) {
4506 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
4507 __func__, if_name(ifp));
4508 return ENXIO;
4509 }
4510 /* callee holds a proto refcnt upon success */
4511 ifnet_lock_exclusive(ifp);
4512 _proto = find_attached_proto(ifp, proto->protocol_family);
4513 if (_proto != NULL) {
4514 ifnet_lock_done(ifp);
4515 if_proto_free(_proto);
4516 retval = EEXIST;
4517 goto ioref_done;
4518 }
4519
4520 /*
4521 * Call family module add_proto routine so it can refine the
4522 * demux descriptors as it wishes.
4523 */
4524 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
4525 demux_count);
4526 if (retval) {
4527 ifnet_lock_done(ifp);
4528 goto ioref_done;
4529 }
4530
4531 /*
4532 * Insert the protocol in the hash
4533 */
4534 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
4535 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
4536 prev_proto = SLIST_NEXT(prev_proto, next_hash);
4537 }
4538 if (prev_proto) {
4539 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
4540 } else {
4541 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
4542 proto, next_hash);
4543 }
4544
4545 /* hold a proto refcnt for attach */
4546 if_proto_ref(proto);
4547
4548 /*
4549 * The reserved field carries the number of protocol still attached
4550 * (subject to change)
4551 */
4552 ev_pr_data.proto_family = proto->protocol_family;
4553 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
4554
4555 ifnet_lock_done(ifp);
4556
4557 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
4558 (struct net_event_data *)&ev_pr_data,
4559 sizeof(struct kev_dl_proto_data), FALSE);
4560 if (proto_count != NULL) {
4561 *proto_count = ev_pr_data.proto_remaining_count;
4562 }
4563 ioref_done:
4564 ifnet_decr_iorefcnt(ifp);
4565 return retval;
4566 }
4567
4568 static void
dlil_handle_proto_attach(ifnet_t ifp,protocol_family_t protocol)4569 dlil_handle_proto_attach(ifnet_t ifp, protocol_family_t protocol)
4570 {
4571 /*
4572 * A protocol has been attached, mark the interface up.
4573 * This used to be done by configd.KernelEventMonitor, but that
4574 * is inherently prone to races (rdar://problem/30810208).
4575 */
4576 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
4577 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
4578 dlil_post_sifflags_msg(ifp);
4579 #if SKYWALK
4580 switch (protocol) {
4581 case AF_INET:
4582 case AF_INET6:
4583 /* don't attach the flowswitch unless attaching IP */
4584 dlil_attach_flowswitch_nexus(ifp);
4585 break;
4586 default:
4587 break;
4588 }
4589 #endif /* SKYWALK */
4590 }
4591
4592 errno_t
ifnet_attach_protocol(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param * proto_details)4593 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
4594 const struct ifnet_attach_proto_param *proto_details)
4595 {
4596 int retval = 0;
4597 struct if_proto *ifproto = NULL;
4598 uint32_t proto_count = 0;
4599
4600 ifnet_head_lock_shared();
4601 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4602 retval = EINVAL;
4603 goto end;
4604 }
4605 /* Check that the interface is in the global list */
4606 if (!ifnet_lookup(ifp)) {
4607 retval = ENXIO;
4608 goto end;
4609 }
4610
4611 ifproto = dlif_proto_alloc();
4612
4613 /* refcnt held above during lookup */
4614 ifproto->ifp = ifp;
4615 ifproto->protocol_family = protocol;
4616 ifproto->proto_kpi = kProtoKPI_v1;
4617 ifproto->kpi.v1.input = proto_details->input;
4618 ifproto->kpi.v1.pre_output = proto_details->pre_output;
4619 ifproto->kpi.v1.event = proto_details->event;
4620 ifproto->kpi.v1.ioctl = proto_details->ioctl;
4621 ifproto->kpi.v1.detached = proto_details->detached;
4622 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
4623 ifproto->kpi.v1.send_arp = proto_details->send_arp;
4624
4625 retval = dlil_attach_protocol(ifproto,
4626 proto_details->demux_list, proto_details->demux_count,
4627 &proto_count);
4628
4629 end:
4630 if (retval == EEXIST) {
4631 /* already attached */
4632 if (dlil_verbose) {
4633 DLIL_PRINTF("%s: protocol %d already attached\n",
4634 ifp != NULL ? if_name(ifp) : "N/A",
4635 protocol);
4636 }
4637 } else if (retval != 0) {
4638 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
4639 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4640 } else if (dlil_verbose) {
4641 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
4642 ifp != NULL ? if_name(ifp) : "N/A",
4643 protocol, proto_count);
4644 }
4645 ifnet_head_done();
4646 if (retval == 0) {
4647 dlil_handle_proto_attach(ifp, protocol);
4648 } else if (ifproto != NULL) {
4649 dlif_proto_free(ifproto);
4650 }
4651 return retval;
4652 }
4653
4654 errno_t
ifnet_attach_protocol_v2(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param_v2 * proto_details)4655 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
4656 const struct ifnet_attach_proto_param_v2 *proto_details)
4657 {
4658 int retval = 0;
4659 struct if_proto *ifproto = NULL;
4660 uint32_t proto_count = 0;
4661
4662 ifnet_head_lock_shared();
4663 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
4664 retval = EINVAL;
4665 goto end;
4666 }
4667 /* Check that the interface is in the global list */
4668 if (!ifnet_lookup(ifp)) {
4669 retval = ENXIO;
4670 goto end;
4671 }
4672
4673 ifproto = dlif_proto_alloc();
4674
4675 /* refcnt held above during lookup */
4676 ifproto->ifp = ifp;
4677 ifproto->protocol_family = protocol;
4678 ifproto->proto_kpi = kProtoKPI_v2;
4679 ifproto->kpi.v2.input = proto_details->input;
4680 ifproto->kpi.v2.pre_output = proto_details->pre_output;
4681 ifproto->kpi.v2.event = proto_details->event;
4682 ifproto->kpi.v2.ioctl = proto_details->ioctl;
4683 ifproto->kpi.v2.detached = proto_details->detached;
4684 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
4685 ifproto->kpi.v2.send_arp = proto_details->send_arp;
4686
4687 retval = dlil_attach_protocol(ifproto,
4688 proto_details->demux_list, proto_details->demux_count,
4689 &proto_count);
4690
4691 end:
4692 if (retval == EEXIST) {
4693 /* already attached */
4694 if (dlil_verbose) {
4695 DLIL_PRINTF("%s: protocol %d already attached\n",
4696 ifp != NULL ? if_name(ifp) : "N/A",
4697 protocol);
4698 }
4699 } else if (retval != 0) {
4700 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
4701 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
4702 } else if (dlil_verbose) {
4703 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
4704 ifp != NULL ? if_name(ifp) : "N/A",
4705 protocol, proto_count);
4706 }
4707 ifnet_head_done();
4708 if (retval == 0) {
4709 dlil_handle_proto_attach(ifp, protocol);
4710 } else if (ifproto != NULL) {
4711 dlif_proto_free(ifproto);
4712 }
4713 return retval;
4714 }
4715
4716 errno_t
ifnet_detach_protocol(ifnet_t ifp,protocol_family_t proto_family)4717 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
4718 {
4719 struct if_proto *proto = NULL;
4720 int retval = 0;
4721
4722 if (ifp == NULL || proto_family == 0) {
4723 retval = EINVAL;
4724 goto end;
4725 }
4726
4727 ifnet_lock_exclusive(ifp);
4728 /* callee holds a proto refcnt upon success */
4729 proto = find_attached_proto(ifp, proto_family);
4730 if (proto == NULL) {
4731 retval = ENXIO;
4732 ifnet_lock_done(ifp);
4733 goto end;
4734 }
4735
4736 /* call family module del_proto */
4737 if (ifp->if_del_proto) {
4738 ifp->if_del_proto(ifp, proto->protocol_family);
4739 }
4740
4741 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
4742 proto, if_proto, next_hash);
4743
4744 if (proto->proto_kpi == kProtoKPI_v1) {
4745 proto->kpi.v1.input = ifproto_media_input_v1;
4746 proto->kpi.v1.pre_output = ifproto_media_preout;
4747 proto->kpi.v1.event = ifproto_media_event;
4748 proto->kpi.v1.ioctl = ifproto_media_ioctl;
4749 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
4750 proto->kpi.v1.send_arp = ifproto_media_send_arp;
4751 } else {
4752 proto->kpi.v2.input = ifproto_media_input_v2;
4753 proto->kpi.v2.pre_output = ifproto_media_preout;
4754 proto->kpi.v2.event = ifproto_media_event;
4755 proto->kpi.v2.ioctl = ifproto_media_ioctl;
4756 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
4757 proto->kpi.v2.send_arp = ifproto_media_send_arp;
4758 }
4759 proto->detached = 1;
4760 ifnet_lock_done(ifp);
4761
4762 if (dlil_verbose) {
4763 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
4764 (proto->proto_kpi == kProtoKPI_v1) ?
4765 "v1" : "v2", proto_family);
4766 }
4767
4768 /* release proto refcnt held during protocol attach */
4769 if_proto_free(proto);
4770
4771 /*
4772 * Release proto refcnt held during lookup; the rest of
4773 * protocol detach steps will happen when the last proto
4774 * reference is released.
4775 */
4776 if_proto_free(proto);
4777
4778 end:
4779 return retval;
4780 }
4781
4782 static errno_t
ifproto_media_input_v1(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet,char * header)4783 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
4784 struct mbuf *packet, char *header)
4785 {
4786 #pragma unused(ifp, protocol, packet, header)
4787 return ENXIO;
4788 }
4789
4790 static errno_t
ifproto_media_input_v2(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet)4791 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
4792 struct mbuf *packet)
4793 {
4794 #pragma unused(ifp, protocol, packet)
4795 return ENXIO;
4796 }
4797
4798 static errno_t
ifproto_media_preout(struct ifnet * ifp,protocol_family_t protocol,mbuf_t * packet,const struct sockaddr * dest,void * route,IFNET_FRAME_TYPE_RW_T frame_type,IFNET_LLADDR_RW_T link_layer_dest)4799 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
4800 mbuf_t *packet, const struct sockaddr *dest, void *route,
4801 IFNET_FRAME_TYPE_RW_T frame_type, IFNET_LLADDR_RW_T link_layer_dest)
4802 {
4803 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
4804 return ENXIO;
4805 }
4806
4807 static void
ifproto_media_event(struct ifnet * ifp,protocol_family_t protocol,const struct kev_msg * event)4808 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
4809 const struct kev_msg *event)
4810 {
4811 #pragma unused(ifp, protocol, event)
4812 }
4813
4814 static errno_t
ifproto_media_ioctl(struct ifnet * ifp,protocol_family_t protocol,unsigned long command,void * argument)4815 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
4816 unsigned long command, void *argument)
4817 {
4818 #pragma unused(ifp, protocol, command, argument)
4819 return ENXIO;
4820 }
4821
4822 static errno_t
ifproto_media_resolve_multi(ifnet_t ifp,const struct sockaddr * proto_addr,struct sockaddr_dl * out_ll,size_t ll_len)4823 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
4824 struct sockaddr_dl *out_ll, size_t ll_len)
4825 {
4826 #pragma unused(ifp, proto_addr, out_ll, ll_len)
4827 return ENXIO;
4828 }
4829
4830 static errno_t
ifproto_media_send_arp(struct ifnet * ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)4831 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
4832 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
4833 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
4834 {
4835 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
4836 return ENXIO;
4837 }
4838
4839 extern int if_next_index(void);
4840 extern int tcp_ecn_outbound;
4841
4842 void
dlil_ifclassq_setup(struct ifnet * ifp,struct ifclassq * ifcq)4843 dlil_ifclassq_setup(struct ifnet *ifp, struct ifclassq *ifcq)
4844 {
4845 uint32_t sflags = 0;
4846 int err;
4847
4848 if (if_flowadv) {
4849 sflags |= PKTSCHEDF_QALG_FLOWCTL;
4850 }
4851
4852 if (if_delaybased_queue) {
4853 sflags |= PKTSCHEDF_QALG_DELAYBASED;
4854 }
4855
4856 if (ifp->if_output_sched_model ==
4857 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
4858 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
4859 }
4860 /* Inherit drop limit from the default queue */
4861 if (ifp->if_snd != ifcq) {
4862 IFCQ_PKT_DROP_LIMIT(ifcq) = IFCQ_PKT_DROP_LIMIT(ifp->if_snd);
4863 }
4864 /* Initialize transmit queue(s) */
4865 err = ifclassq_setup(ifcq, ifp, sflags);
4866 if (err != 0) {
4867 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
4868 "err=%d", __func__, ifp, err);
4869 /* NOTREACHED */
4870 }
4871 }
4872
4873 errno_t
ifnet_attach(ifnet_t ifp,const struct sockaddr_dl * ll_addr)4874 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
4875 {
4876 #if SKYWALK
4877 boolean_t netif_compat;
4878 if_nexus_netif nexus_netif;
4879 #endif /* SKYWALK */
4880 ifnet_ref_t tmp_if;
4881 struct ifaddr *ifa;
4882 struct if_data_internal if_data_saved;
4883 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
4884 struct dlil_threading_info *dl_inp;
4885 thread_continue_t thfunc = NULL;
4886 int err;
4887
4888 if (ifp == NULL) {
4889 return EINVAL;
4890 }
4891
4892 /*
4893 * Serialize ifnet attach using dlil_ifnet_lock, in order to
4894 * prevent the interface from being configured while it is
4895 * embryonic, as ifnet_head_lock is dropped and reacquired
4896 * below prior to marking the ifnet with IFRF_ATTACHED.
4897 */
4898 dlil_if_lock();
4899 ifnet_head_lock_exclusive();
4900 /* Verify we aren't already on the list */
4901 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
4902 if (tmp_if == ifp) {
4903 ifnet_head_done();
4904 dlil_if_unlock();
4905 return EEXIST;
4906 }
4907 }
4908
4909 lck_mtx_lock_spin(&ifp->if_ref_lock);
4910 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
4911 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
4912 __func__, ifp);
4913 /* NOTREACHED */
4914 }
4915 lck_mtx_unlock(&ifp->if_ref_lock);
4916
4917 ifnet_lock_exclusive(ifp);
4918
4919 /* Sanity check */
4920 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
4921 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
4922 VERIFY(ifp->if_threads_pending == 0);
4923
4924 if (ll_addr != NULL) {
4925 if (ifp->if_addrlen == 0) {
4926 ifp->if_addrlen = ll_addr->sdl_alen;
4927 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
4928 ifnet_lock_done(ifp);
4929 ifnet_head_done();
4930 dlil_if_unlock();
4931 return EINVAL;
4932 }
4933 }
4934
4935 /*
4936 * Allow interfaces without protocol families to attach
4937 * only if they have the necessary fields filled out.
4938 */
4939 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
4940 DLIL_PRINTF("%s: Attempt to attach interface without "
4941 "family module - %d\n", __func__, ifp->if_family);
4942 ifnet_lock_done(ifp);
4943 ifnet_head_done();
4944 dlil_if_unlock();
4945 return ENODEV;
4946 }
4947
4948 /* Allocate protocol hash table */
4949 VERIFY(ifp->if_proto_hash == NULL);
4950 ifp->if_proto_hash = kalloc_type(struct proto_hash_entry,
4951 PROTO_HASH_SLOTS, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4952 ifp->if_proto_hash_count = PROTO_HASH_SLOTS;
4953
4954 lck_mtx_lock_spin(&ifp->if_flt_lock);
4955 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
4956 TAILQ_INIT(&ifp->if_flt_head);
4957 VERIFY(ifp->if_flt_busy == 0);
4958 VERIFY(ifp->if_flt_waiters == 0);
4959 VERIFY(ifp->if_flt_non_os_count == 0);
4960 VERIFY(ifp->if_flt_no_tso_count == 0);
4961 lck_mtx_unlock(&ifp->if_flt_lock);
4962
4963 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
4964 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
4965 LIST_INIT(&ifp->if_multiaddrs);
4966 }
4967
4968 VERIFY(ifp->if_allhostsinm == NULL);
4969 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
4970 TAILQ_INIT(&ifp->if_addrhead);
4971
4972 if (ifp->if_index == 0) {
4973 int idx = if_next_index();
4974
4975 /*
4976 * Since we exhausted the list of
4977 * if_index's, try to find an empty slot
4978 * in ifindex2ifnet.
4979 */
4980 if (idx == -1 && if_index >= UINT16_MAX) {
4981 for (int i = 1; i < if_index; i++) {
4982 if (ifindex2ifnet[i] == NULL &&
4983 ifnet_addrs[i - 1] == NULL) {
4984 idx = i;
4985 break;
4986 }
4987 }
4988 }
4989 if (idx == -1) {
4990 ifp->if_index = 0;
4991 ifnet_lock_done(ifp);
4992 ifnet_head_done();
4993 dlil_if_unlock();
4994 return ENOBUFS;
4995 }
4996 ifp->if_index = (uint16_t)idx;
4997
4998 /* the lladdr passed at attach time is the permanent address */
4999 if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
5000 ll_addr->sdl_alen == ETHER_ADDR_LEN) {
5001 bcopy(CONST_LLADDR(ll_addr),
5002 dl_if->dl_if_permanent_ether,
5003 ETHER_ADDR_LEN);
5004 dl_if->dl_if_permanent_ether_is_set = 1;
5005 }
5006 }
5007 /* There should not be anything occupying this slot */
5008 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5009
5010 /* allocate (if needed) and initialize a link address */
5011 ifa = dlil_alloc_lladdr(ifp, ll_addr);
5012 if (ifa == NULL) {
5013 ifnet_lock_done(ifp);
5014 ifnet_head_done();
5015 dlil_if_unlock();
5016 return ENOBUFS;
5017 }
5018
5019 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
5020 ifnet_addrs[ifp->if_index - 1] = ifa;
5021
5022 /* make this address the first on the list */
5023 IFA_LOCK(ifa);
5024 /* hold a reference for ifnet_addrs[] */
5025 ifa_addref(ifa);
5026 /* if_attach_link_ifa() holds a reference for ifa_link */
5027 if_attach_link_ifa(ifp, ifa);
5028 IFA_UNLOCK(ifa);
5029
5030 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
5031 ifindex2ifnet[ifp->if_index] = ifp;
5032
5033 /* Hold a reference to the underlying dlil_ifnet */
5034 ifnet_reference(ifp);
5035
5036 /* Clear stats (save and restore other fields that we care) */
5037 if_data_saved = ifp->if_data;
5038 bzero(&ifp->if_data, sizeof(ifp->if_data));
5039 ifp->if_data.ifi_type = if_data_saved.ifi_type;
5040 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
5041 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
5042 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
5043 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
5044 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
5045 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
5046 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
5047 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
5048 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
5049 ifnet_touch_lastchange(ifp);
5050
5051 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
5052 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
5053 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
5054
5055 dlil_ifclassq_setup(ifp, ifp->if_snd);
5056
5057 /* Sanity checks on the input thread storage */
5058 dl_inp = &dl_if->dl_if_inpstorage;
5059 bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
5060 VERIFY(dl_inp->dlth_flags == 0);
5061 VERIFY(dl_inp->dlth_wtot == 0);
5062 VERIFY(dl_inp->dlth_ifp == NULL);
5063 VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
5064 VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
5065 VERIFY(!dl_inp->dlth_affinity);
5066 VERIFY(ifp->if_inp == NULL);
5067 VERIFY(dl_inp->dlth_thread == THREAD_NULL);
5068 VERIFY(dl_inp->dlth_strategy == NULL);
5069 VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
5070 VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
5071 VERIFY(dl_inp->dlth_affinity_tag == 0);
5072
5073 #if IFNET_INPUT_SANITY_CHK
5074 VERIFY(dl_inp->dlth_pkts_cnt == 0);
5075 #endif /* IFNET_INPUT_SANITY_CHK */
5076
5077 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5078 dlil_reset_rxpoll_params(ifp);
5079 /*
5080 * A specific DLIL input thread is created per non-loopback interface.
5081 */
5082 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
5083 ifp->if_inp = dl_inp;
5084 ifnet_incr_pending_thread_count(ifp);
5085 err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
5086 if (err == ENODEV) {
5087 VERIFY(thfunc == NULL);
5088 ifnet_decr_pending_thread_count(ifp);
5089 } else if (err != 0) {
5090 panic_plain("%s: ifp=%p couldn't get an input thread; "
5091 "err=%d", __func__, ifp, err);
5092 /* NOTREACHED */
5093 }
5094 }
5095 /*
5096 * If the driver supports the new transmit model, calculate flow hash
5097 * and create a workloop starter thread to invoke the if_start callback
5098 * where the packets may be dequeued and transmitted.
5099 */
5100 if (ifp->if_eflags & IFEF_TXSTART) {
5101 thread_precedence_policy_data_t info;
5102 __unused kern_return_t kret;
5103
5104 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
5105 VERIFY(ifp->if_flowhash != 0);
5106 VERIFY(ifp->if_start_thread == THREAD_NULL);
5107
5108 ifnet_set_start_cycle(ifp, NULL);
5109 ifp->if_start_active = 0;
5110 ifp->if_start_req = 0;
5111 ifp->if_start_flags = 0;
5112 VERIFY(ifp->if_start != NULL);
5113 ifnet_incr_pending_thread_count(ifp);
5114 if ((err = kernel_thread_start(ifnet_start_thread_func,
5115 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
5116 panic_plain("%s: "
5117 "ifp=%p couldn't get a start thread; "
5118 "err=%d", __func__, ifp, err);
5119 /* NOTREACHED */
5120 }
5121 bzero(&info, sizeof(info));
5122 info.importance = 1;
5123 kret = thread_policy_set(ifp->if_start_thread,
5124 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5125 THREAD_PRECEDENCE_POLICY_COUNT);
5126 ASSERT(kret == KERN_SUCCESS);
5127 } else {
5128 ifp->if_flowhash = 0;
5129 }
5130
5131 /* Reset polling parameters */
5132 ifnet_set_poll_cycle(ifp, NULL);
5133 ifp->if_poll_update = 0;
5134 ifp->if_poll_flags = 0;
5135 ifp->if_poll_req = 0;
5136 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5137
5138 /*
5139 * If the driver supports the new receive model, create a poller
5140 * thread to invoke if_input_poll callback where the packets may
5141 * be dequeued from the driver and processed for reception.
5142 * if the interface is netif compat then the poller thread is
5143 * managed by netif.
5144 */
5145 if (dlil_is_rxpoll_input(thfunc)) {
5146 thread_precedence_policy_data_t info;
5147 __unused kern_return_t kret;
5148 #if SKYWALK
5149 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5150 #endif /* SKYWALK */
5151 VERIFY(ifp->if_input_poll != NULL);
5152 VERIFY(ifp->if_input_ctl != NULL);
5153 ifnet_incr_pending_thread_count(ifp);
5154 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
5155 &ifp->if_poll_thread)) != KERN_SUCCESS) {
5156 panic_plain("%s: ifp=%p couldn't get a poll thread; "
5157 "err=%d", __func__, ifp, err);
5158 /* NOTREACHED */
5159 }
5160 bzero(&info, sizeof(info));
5161 info.importance = 1;
5162 kret = thread_policy_set(ifp->if_poll_thread,
5163 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
5164 THREAD_PRECEDENCE_POLICY_COUNT);
5165 ASSERT(kret == KERN_SUCCESS);
5166 }
5167
5168 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5169 VERIFY(ifp->if_desc.ifd_len == 0);
5170 VERIFY(ifp->if_desc.ifd_desc != NULL);
5171
5172 /* Record attach PC stacktrace */
5173 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
5174
5175 ifp->if_updatemcasts = 0;
5176 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
5177 struct ifmultiaddr *ifma;
5178 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
5179 IFMA_LOCK(ifma);
5180 if (ifma->ifma_addr->sa_family == AF_LINK ||
5181 ifma->ifma_addr->sa_family == AF_UNSPEC) {
5182 ifp->if_updatemcasts++;
5183 }
5184 IFMA_UNLOCK(ifma);
5185 }
5186
5187 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
5188 "membership(s)\n", if_name(ifp),
5189 ifp->if_updatemcasts);
5190 }
5191
5192 /* Clear logging parameters */
5193 bzero(&ifp->if_log, sizeof(ifp->if_log));
5194
5195 /* Clear foreground/realtime activity timestamps */
5196 ifp->if_fg_sendts = 0;
5197 ifp->if_rt_sendts = 0;
5198
5199 /* Clear throughput estimates and radio type */
5200 ifp->if_estimated_up_bucket = 0;
5201 ifp->if_estimated_down_bucket = 0;
5202 ifp->if_radio_type = 0;
5203 ifp->if_radio_channel = 0;
5204
5205 VERIFY(ifp->if_delegated.ifp == NULL);
5206 VERIFY(ifp->if_delegated.type == 0);
5207 VERIFY(ifp->if_delegated.family == 0);
5208 VERIFY(ifp->if_delegated.subfamily == 0);
5209 VERIFY(ifp->if_delegated.expensive == 0);
5210 VERIFY(ifp->if_delegated.constrained == 0);
5211 VERIFY(ifp->if_delegated.ultra_constrained == 0);
5212
5213 VERIFY(ifp->if_agentids == NULL);
5214 VERIFY(ifp->if_agentcount == 0);
5215
5216 /* Reset interface state */
5217 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5218 ifp->if_interface_state.valid_bitmask |=
5219 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
5220 ifp->if_interface_state.interface_availability =
5221 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
5222
5223 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
5224 if (ifp == lo_ifp) {
5225 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
5226 ifp->if_interface_state.valid_bitmask |=
5227 IF_INTERFACE_STATE_LQM_STATE_VALID;
5228 } else {
5229 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
5230 }
5231
5232 /*
5233 * Enable ECN capability on this interface depending on the
5234 * value of ECN global setting
5235 */
5236 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
5237 if_set_eflags(ifp, IFEF_ECN_ENABLE);
5238 if_clear_eflags(ifp, IFEF_ECN_DISABLE);
5239 }
5240
5241 /*
5242 * Built-in Cyclops always on policy for WiFi infra
5243 */
5244 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
5245 errno_t error;
5246
5247 error = if_set_qosmarking_mode(ifp,
5248 IFRTYPE_QOSMARKING_FASTLANE);
5249 if (error != 0) {
5250 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
5251 __func__, ifp->if_xname, error);
5252 } else {
5253 if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5254 #if (DEVELOPMENT || DEBUG)
5255 DLIL_PRINTF("%s fastlane enabled on %s\n",
5256 __func__, ifp->if_xname);
5257 #endif /* (DEVELOPMENT || DEBUG) */
5258 }
5259 }
5260
5261 ifnet_lock_done(ifp);
5262 ifnet_head_done();
5263
5264 #if SKYWALK
5265 netif_compat = dlil_attach_netif_compat_nexus(ifp, &nexus_netif);
5266 #endif /* SKYWALK */
5267
5268 lck_mtx_lock(&ifp->if_cached_route_lock);
5269 /* Enable forwarding cached route */
5270 ifp->if_fwd_cacheok = 1;
5271 /* Clean up any existing cached routes */
5272 ROUTE_RELEASE(&ifp->if_fwd_route);
5273 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
5274 ROUTE_RELEASE(&ifp->if_src_route);
5275 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
5276 ROUTE_RELEASE(&ifp->if_src_route6);
5277 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
5278 lck_mtx_unlock(&ifp->if_cached_route_lock);
5279
5280 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
5281
5282 /*
5283 * Allocate and attach IGMPv3/MLDv2 interface specific variables
5284 * and trees; do this before the ifnet is marked as attached.
5285 * The ifnet keeps the reference to the info structures even after
5286 * the ifnet is detached, since the network-layer records still
5287 * refer to the info structures even after that. This also
5288 * makes it possible for them to still function after the ifnet
5289 * is recycled or reattached.
5290 */
5291 #if INET
5292 if (IGMP_IFINFO(ifp) == NULL) {
5293 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
5294 VERIFY(IGMP_IFINFO(ifp) != NULL);
5295 } else {
5296 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
5297 igmp_domifreattach(IGMP_IFINFO(ifp));
5298 }
5299 #endif /* INET */
5300 if (MLD_IFINFO(ifp) == NULL) {
5301 MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
5302 VERIFY(MLD_IFINFO(ifp) != NULL);
5303 } else {
5304 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
5305 mld_domifreattach(MLD_IFINFO(ifp));
5306 }
5307
5308 VERIFY(ifp->if_data_threshold == 0);
5309 VERIFY(ifp->if_dt_tcall != NULL);
5310
5311 /*
5312 * Wait for the created kernel threads for I/O to get
5313 * scheduled and run at least once before we proceed
5314 * to mark interface as attached.
5315 */
5316 lck_mtx_lock(&ifp->if_ref_lock);
5317 while (ifp->if_threads_pending != 0) {
5318 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
5319 "interface %s to get scheduled at least once.\n",
5320 __func__, ifp->if_xname);
5321 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
5322 __func__, NULL);
5323 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
5324 }
5325 lck_mtx_unlock(&ifp->if_ref_lock);
5326 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
5327 "at least once. Proceeding.\n", __func__, ifp->if_xname);
5328
5329 /* Final mark this ifnet as attached. */
5330 ifnet_lock_exclusive(ifp);
5331 lck_mtx_lock_spin(&ifp->if_ref_lock);
5332 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
5333 lck_mtx_unlock(&ifp->if_ref_lock);
5334 if (net_rtref) {
5335 /* boot-args override; enable idle notification */
5336 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
5337 IFRF_IDLE_NOTIFY);
5338 } else {
5339 /* apply previous request(s) to set the idle flags, if any */
5340 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
5341 ifp->if_idle_new_flags_mask);
5342 }
5343 #if SKYWALK
5344 /* the interface is fully attached; let the nexus adapter know */
5345 if (netif_compat || dlil_is_native_netif_nexus(ifp)) {
5346 if (netif_compat) {
5347 if (sk_netif_compat_txmodel ==
5348 NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI) {
5349 ifnet_enqueue_multi_setup(ifp,
5350 sk_tx_delay_qlen, sk_tx_delay_timeout);
5351 }
5352 ifp->if_nx_netif = nexus_netif;
5353 }
5354 ifp->if_na_ops->ni_finalize(ifp->if_na, ifp);
5355 }
5356 #endif /* SKYWALK */
5357 ifnet_lock_done(ifp);
5358 dlil_if_unlock();
5359
5360 #if PF
5361 /*
5362 * Attach packet filter to this interface, if enabled.
5363 */
5364 pf_ifnet_hook(ifp, 1);
5365 #endif /* PF */
5366
5367 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0, FALSE);
5368
5369 if (dlil_verbose) {
5370 DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
5371 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
5372 }
5373
5374 return 0;
5375 }
5376
5377 static void
if_purgeaddrs(struct ifnet * ifp)5378 if_purgeaddrs(struct ifnet *ifp)
5379 {
5380 #if INET
5381 in_purgeaddrs(ifp);
5382 #endif /* INET */
5383 in6_purgeaddrs(ifp);
5384 }
5385
5386 errno_t
ifnet_detach(ifnet_t ifp)5387 ifnet_detach(ifnet_t ifp)
5388 {
5389 ifnet_ref_t delegated_ifp;
5390 struct nd_ifinfo *ndi = NULL;
5391
5392 if (ifp == NULL) {
5393 return EINVAL;
5394 }
5395
5396 ndi = ND_IFINFO(ifp);
5397 if (NULL != ndi) {
5398 ndi->cga_initialized = FALSE;
5399 }
5400 os_log(OS_LOG_DEFAULT, "%s detaching", if_name(ifp));
5401
5402 /* Mark the interface down */
5403 if_down(ifp);
5404
5405 /*
5406 * IMPORTANT NOTE
5407 *
5408 * Any field in the ifnet that relies on IF_FULLY_ATTACHED()
5409 * or equivalently, ifnet_is_attached(ifp, 1), can't be modified
5410 * until after we've waited for all I/O references to drain
5411 * in ifnet_detach_final().
5412 */
5413
5414 ifnet_head_lock_exclusive();
5415 ifnet_lock_exclusive(ifp);
5416
5417 if (ifp->if_output_netem != NULL) {
5418 netem_destroy(ifp->if_output_netem);
5419 ifp->if_output_netem = NULL;
5420 }
5421
5422 /*
5423 * Check to see if this interface has previously triggered
5424 * aggressive protocol draining; if so, decrement the global
5425 * refcnt and clear PR_AGGDRAIN on the route domain if
5426 * there are no more of such an interface around.
5427 */
5428 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
5429
5430 lck_mtx_lock_spin(&ifp->if_ref_lock);
5431 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
5432 lck_mtx_unlock(&ifp->if_ref_lock);
5433 ifnet_lock_done(ifp);
5434 ifnet_head_done();
5435 return EINVAL;
5436 } else if (ifp->if_refflags & IFRF_DETACHING) {
5437 /* Interface has already been detached */
5438 lck_mtx_unlock(&ifp->if_ref_lock);
5439 ifnet_lock_done(ifp);
5440 ifnet_head_done();
5441 return ENXIO;
5442 }
5443 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
5444 /* Indicate this interface is being detached */
5445 ifp->if_refflags &= ~IFRF_ATTACHED;
5446 ifp->if_refflags |= IFRF_DETACHING;
5447 lck_mtx_unlock(&ifp->if_ref_lock);
5448
5449 /* clean up flow control entry object if there's any */
5450 if (ifp->if_eflags & IFEF_TXSTART) {
5451 ifnet_flowadv(ifp->if_flowhash);
5452 }
5453
5454 /* Reset ECN enable/disable flags */
5455 /* Reset CLAT46 flag */
5456 if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
5457
5458 /*
5459 * We do not reset the TCP keep alive counters in case
5460 * a TCP connection stays connection after the interface
5461 * went down
5462 */
5463 if (ifp->if_tcp_kao_cnt > 0) {
5464 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
5465 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
5466 }
5467 ifp->if_tcp_kao_max = 0;
5468
5469 /*
5470 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
5471 * no longer be visible during lookups from this point.
5472 */
5473 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
5474 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
5475 ifp->if_link.tqe_next = NULL;
5476 ifp->if_link.tqe_prev = NULL;
5477 if (ifp->if_ordered_link.tqe_next != NULL ||
5478 ifp->if_ordered_link.tqe_prev != NULL) {
5479 ifnet_remove_from_ordered_list(ifp);
5480 }
5481 ifindex2ifnet[ifp->if_index] = NULL;
5482
5483 /* 18717626 - reset router mode */
5484 if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
5485 ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
5486
5487 /* Record detach PC stacktrace */
5488 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
5489
5490 /* Clear logging parameters */
5491 bzero(&ifp->if_log, sizeof(ifp->if_log));
5492
5493 /* Clear delegated interface info (reference released below) */
5494 delegated_ifp = ifp->if_delegated.ifp;
5495 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
5496
5497 /* Reset interface state */
5498 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
5499
5500 /*
5501 * Increment the generation count on interface deletion
5502 */
5503 ifp->if_creation_generation_id = os_atomic_inc(&if_creation_generation_count, relaxed);
5504
5505 ifnet_lock_done(ifp);
5506 ifnet_head_done();
5507
5508 /* Release reference held on the delegated interface */
5509 if (delegated_ifp != NULL) {
5510 ifnet_release(delegated_ifp);
5511 }
5512
5513 /* Reset Link Quality Metric (unless loopback [lo0]) */
5514 if (ifp != lo_ifp) {
5515 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
5516 }
5517
5518 /* Force reset link heuristics */
5519 if (ifp->if_link_heuristics_tcall != NULL) {
5520 thread_call_cancel_wait(ifp->if_link_heuristics_tcall);
5521 thread_call_free(ifp->if_link_heuristics_tcall);
5522 ifp->if_link_heuristics_tcall = NULL;
5523 }
5524 if_clear_xflags(ifp, IFXF_LINK_HEURISTICS);
5525
5526 /* Reset TCP local statistics */
5527 if (ifp->if_tcp_stat != NULL) {
5528 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
5529 }
5530
5531 /* Reset UDP local statistics */
5532 if (ifp->if_udp_stat != NULL) {
5533 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
5534 }
5535
5536 /* Reset ifnet IPv4 stats */
5537 if (ifp->if_ipv4_stat != NULL) {
5538 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
5539 }
5540
5541 /* Reset ifnet IPv6 stats */
5542 if (ifp->if_ipv6_stat != NULL) {
5543 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
5544 }
5545
5546 /* Release memory held for interface link status report */
5547 if (ifp->if_link_status != NULL) {
5548 kfree_type(struct if_link_status, ifp->if_link_status);
5549 ifp->if_link_status = NULL;
5550 }
5551
5552 /* Disable forwarding cached route */
5553 lck_mtx_lock(&ifp->if_cached_route_lock);
5554 ifp->if_fwd_cacheok = 0;
5555 lck_mtx_unlock(&ifp->if_cached_route_lock);
5556
5557 /* Disable data threshold and wait for any pending event posting */
5558 ifp->if_data_threshold = 0;
5559 VERIFY(ifp->if_dt_tcall != NULL);
5560 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
5561
5562 /*
5563 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
5564 * references to the info structures and leave them attached to
5565 * this ifnet.
5566 */
5567 #if INET
5568 igmp_domifdetach(ifp);
5569 #endif /* INET */
5570 mld_domifdetach(ifp);
5571
5572 #if SKYWALK
5573 /* Clean up any netns tokens still pointing to to this ifnet */
5574 netns_ifnet_detach(ifp);
5575 #endif /* SKYWALK */
5576 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0, FALSE);
5577
5578 /* Let worker thread take care of the rest, to avoid reentrancy */
5579 dlil_if_lock();
5580 ifnet_detaching_enqueue(ifp);
5581 dlil_if_unlock();
5582
5583 return 0;
5584 }
5585
5586 static void
ifnet_detaching_enqueue(struct ifnet * ifp)5587 ifnet_detaching_enqueue(struct ifnet *ifp)
5588 {
5589 dlil_if_lock_assert();
5590
5591 ++ifnet_detaching_cnt;
5592 VERIFY(ifnet_detaching_cnt != 0);
5593 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
5594 wakeup((caddr_t)&ifnet_delayed_run);
5595 }
5596
5597 static struct ifnet *
ifnet_detaching_dequeue(void)5598 ifnet_detaching_dequeue(void)
5599 {
5600 ifnet_ref_t ifp;
5601
5602 dlil_if_lock_assert();
5603
5604 ifp = TAILQ_FIRST(&ifnet_detaching_head);
5605 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
5606 if (ifp != NULL) {
5607 VERIFY(ifnet_detaching_cnt != 0);
5608 --ifnet_detaching_cnt;
5609 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
5610 ifp->if_detaching_link.tqe_next = NULL;
5611 ifp->if_detaching_link.tqe_prev = NULL;
5612 }
5613 return ifp;
5614 }
5615
5616 __attribute__((noreturn))
5617 static void
ifnet_detacher_thread_cont(void * v,wait_result_t wres)5618 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
5619 {
5620 #pragma unused(v, wres)
5621 ifnet_ref_t ifp;
5622
5623 dlil_if_lock();
5624 if (__improbable(ifnet_detaching_embryonic)) {
5625 ifnet_detaching_embryonic = FALSE;
5626 /* there's no lock ordering constrain so OK to do this here */
5627 dlil_decr_pending_thread_count();
5628 }
5629
5630 for (;;) {
5631 dlil_if_lock_assert();
5632
5633 if (ifnet_detaching_cnt == 0) {
5634 break;
5635 }
5636
5637 net_update_uptime();
5638
5639 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
5640
5641 /* Take care of detaching ifnet */
5642 ifp = ifnet_detaching_dequeue();
5643 if (ifp != NULL) {
5644 dlil_if_unlock();
5645 ifnet_detach_final(ifp);
5646 dlil_if_lock();
5647 }
5648 }
5649
5650 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5651 dlil_if_unlock();
5652 (void) thread_block(ifnet_detacher_thread_cont);
5653
5654 VERIFY(0); /* we should never get here */
5655 /* NOTREACHED */
5656 __builtin_unreachable();
5657 }
5658
5659 __dead2
5660 static void
ifnet_detacher_thread_func(void * v,wait_result_t w)5661 ifnet_detacher_thread_func(void *v, wait_result_t w)
5662 {
5663 #pragma unused(v, w)
5664 dlil_if_lock();
5665 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
5666 ifnet_detaching_embryonic = TRUE;
5667 /* wake up once to get out of embryonic state */
5668 wakeup((caddr_t)&ifnet_delayed_run);
5669 dlil_if_unlock();
5670 (void) thread_block(ifnet_detacher_thread_cont);
5671 VERIFY(0);
5672 /* NOTREACHED */
5673 __builtin_unreachable();
5674 }
5675
5676 static void
ifnet_detach_final(struct ifnet * ifp)5677 ifnet_detach_final(struct ifnet *ifp)
5678 {
5679 struct ifnet_filter *filter, *filter_next;
5680 struct dlil_ifnet *dlifp;
5681 struct ifnet_filter_head fhead;
5682 struct dlil_threading_info *inp;
5683 struct ifaddr *ifa;
5684 ifnet_detached_func if_free;
5685 int i;
5686 bool waited = false;
5687
5688 /* Let BPF know we're detaching */
5689 bpfdetach(ifp);
5690
5691 #if SKYWALK
5692 dlil_netif_detach_notify(ifp);
5693 /*
5694 * Wait for the datapath to quiesce before tearing down
5695 * netif/flowswitch nexuses.
5696 */
5697 dlil_quiesce_and_detach_nexuses(ifp);
5698 #endif /* SKYWALK */
5699
5700 lck_mtx_lock(&ifp->if_ref_lock);
5701 if (!(ifp->if_refflags & IFRF_DETACHING)) {
5702 panic("%s: flags mismatch (detaching not set) ifp=%p",
5703 __func__, ifp);
5704 /* NOTREACHED */
5705 }
5706
5707 /*
5708 * Wait until the existing IO references get released
5709 * before we proceed with ifnet_detach. This is not a
5710 * common case, so block without using a continuation.
5711 */
5712 while (ifp->if_refio > 0) {
5713 waited = true;
5714 DLIL_PRINTF("%s: %s waiting for IO references to drain\n",
5715 __func__, if_name(ifp));
5716 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
5717 (PZERO - 1), "ifnet_ioref_wait", NULL);
5718 }
5719 if (waited) {
5720 DLIL_PRINTF("%s: %s IO references drained\n",
5721 __func__, if_name(ifp));
5722 }
5723 VERIFY(ifp->if_datamov == 0);
5724 VERIFY(ifp->if_drainers == 0);
5725 VERIFY(ifp->if_suspend == 0);
5726 ifp->if_refflags &= ~IFRF_READY;
5727 lck_mtx_unlock(&ifp->if_ref_lock);
5728
5729 #if SKYWALK
5730 VERIFY(LIST_EMPTY(&ifp->if_netns_tokens));
5731 #endif /* SKYWALK */
5732 /* Drain and destroy send queue */
5733 ifclassq_teardown(ifp->if_snd);
5734
5735 /* Detach interface filters */
5736 lck_mtx_lock(&ifp->if_flt_lock);
5737 if_flt_monitor_enter(ifp);
5738
5739 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
5740 fhead = ifp->if_flt_head;
5741 TAILQ_INIT(&ifp->if_flt_head);
5742
5743 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
5744 filter_next = TAILQ_NEXT(filter, filt_next);
5745 lck_mtx_unlock(&ifp->if_flt_lock);
5746
5747 dlil_detach_filter_internal(filter, 1);
5748 lck_mtx_lock(&ifp->if_flt_lock);
5749 }
5750 if_flt_monitor_leave(ifp);
5751 lck_mtx_unlock(&ifp->if_flt_lock);
5752
5753 /* Tell upper layers to drop their network addresses */
5754 if_purgeaddrs(ifp);
5755
5756 ifnet_lock_exclusive(ifp);
5757
5758 /* Clear agent IDs */
5759 if (ifp->if_agentids != NULL) {
5760 kfree_data_sized_by(ifp->if_agentids, ifp->if_agentcount);
5761 }
5762
5763 bzero(&ifp->if_nx_netif, sizeof(ifp->if_nx_netif));
5764 bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
5765
5766 /* Unplumb all protocols */
5767 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
5768 struct if_proto *proto;
5769
5770 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5771 while (proto != NULL) {
5772 protocol_family_t family = proto->protocol_family;
5773 ifnet_lock_done(ifp);
5774 proto_unplumb(family, ifp);
5775 ifnet_lock_exclusive(ifp);
5776 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
5777 }
5778 /* There should not be any protocols left */
5779 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
5780 }
5781 kfree_type_counted_by(struct proto_hash_entry, ifp->if_proto_hash_count, ifp->if_proto_hash);
5782
5783 /* Detach (permanent) link address from if_addrhead */
5784 ifa = TAILQ_FIRST(&ifp->if_addrhead);
5785 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
5786 IFA_LOCK(ifa);
5787 if_detach_link_ifa(ifp, ifa);
5788 IFA_UNLOCK(ifa);
5789
5790 /* Remove (permanent) link address from ifnet_addrs[] */
5791 ifa_remref(ifa);
5792 ifnet_addrs[ifp->if_index - 1] = NULL;
5793
5794 /* This interface should not be on {ifnet_head,detaching} */
5795 VERIFY(ifp->if_link.tqe_next == NULL);
5796 VERIFY(ifp->if_link.tqe_prev == NULL);
5797 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
5798 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
5799 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
5800 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
5801
5802 /* The slot should have been emptied */
5803 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
5804
5805 /* There should not be any addresses left */
5806 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
5807
5808 /*
5809 * Signal the starter thread to terminate itself, and wait until
5810 * it has exited.
5811 */
5812 if (ifp->if_start_thread != THREAD_NULL) {
5813 lck_mtx_lock_spin(&ifp->if_start_lock);
5814 ifp->if_start_flags |= IFSF_TERMINATING;
5815 wakeup_one((caddr_t)&ifp->if_start_thread);
5816 lck_mtx_unlock(&ifp->if_start_lock);
5817
5818 /* wait for starter thread to terminate */
5819 lck_mtx_lock(&ifp->if_start_lock);
5820 while (ifp->if_start_thread != THREAD_NULL) {
5821 if (dlil_verbose) {
5822 DLIL_PRINTF("%s: waiting for %s starter thread to terminate\n",
5823 __func__,
5824 if_name(ifp));
5825 }
5826 (void) msleep(&ifp->if_start_thread,
5827 &ifp->if_start_lock, (PZERO - 1),
5828 "ifnet_start_thread_exit", NULL);
5829 }
5830 lck_mtx_unlock(&ifp->if_start_lock);
5831 if (dlil_verbose) {
5832 DLIL_PRINTF("%s: %s starter thread termination complete",
5833 __func__, if_name(ifp));
5834 }
5835 }
5836
5837 /*
5838 * Signal the poller thread to terminate itself, and wait until
5839 * it has exited.
5840 */
5841 if (ifp->if_poll_thread != THREAD_NULL) {
5842 #if SKYWALK
5843 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5844 #endif /* SKYWALK */
5845 lck_mtx_lock_spin(&ifp->if_poll_lock);
5846 ifp->if_poll_flags |= IF_POLLF_TERMINATING;
5847 wakeup_one((caddr_t)&ifp->if_poll_thread);
5848 lck_mtx_unlock(&ifp->if_poll_lock);
5849
5850 /* wait for poller thread to terminate */
5851 lck_mtx_lock(&ifp->if_poll_lock);
5852 while (ifp->if_poll_thread != THREAD_NULL) {
5853 if (dlil_verbose) {
5854 DLIL_PRINTF("%s: waiting for %s poller thread to terminate\n",
5855 __func__,
5856 if_name(ifp));
5857 }
5858 (void) msleep(&ifp->if_poll_thread,
5859 &ifp->if_poll_lock, (PZERO - 1),
5860 "ifnet_poll_thread_exit", NULL);
5861 }
5862 lck_mtx_unlock(&ifp->if_poll_lock);
5863 if (dlil_verbose) {
5864 DLIL_PRINTF("%s: %s poller thread termination complete\n",
5865 __func__, if_name(ifp));
5866 }
5867 }
5868
5869 /*
5870 * If thread affinity was set for the workloop thread, we will need
5871 * to tear down the affinity and release the extra reference count
5872 * taken at attach time. Does not apply to lo0 or other interfaces
5873 * without dedicated input threads.
5874 */
5875 if ((inp = ifp->if_inp) != NULL) {
5876 VERIFY(inp != dlil_main_input_thread);
5877
5878 if (inp->dlth_affinity) {
5879 struct thread *__single tp, *__single wtp, *__single ptp;
5880
5881 lck_mtx_lock_spin(&inp->dlth_lock);
5882 wtp = inp->dlth_driver_thread;
5883 inp->dlth_driver_thread = THREAD_NULL;
5884 ptp = inp->dlth_poller_thread;
5885 inp->dlth_poller_thread = THREAD_NULL;
5886 ASSERT(inp->dlth_thread != THREAD_NULL);
5887 tp = inp->dlth_thread; /* don't nullify now */
5888 inp->dlth_affinity_tag = 0;
5889 inp->dlth_affinity = FALSE;
5890 lck_mtx_unlock(&inp->dlth_lock);
5891
5892 /* Tear down poll thread affinity */
5893 if (ptp != NULL) {
5894 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
5895 VERIFY(ifp->if_xflags & IFXF_LEGACY);
5896 (void) dlil_affinity_set(ptp,
5897 THREAD_AFFINITY_TAG_NULL);
5898 thread_deallocate(ptp);
5899 }
5900
5901 /* Tear down workloop thread affinity */
5902 if (wtp != NULL) {
5903 (void) dlil_affinity_set(wtp,
5904 THREAD_AFFINITY_TAG_NULL);
5905 thread_deallocate(wtp);
5906 }
5907
5908 /* Tear down DLIL input thread affinity */
5909 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
5910 thread_deallocate(tp);
5911 }
5912
5913 /* disassociate ifp DLIL input thread */
5914 ifp->if_inp = NULL;
5915
5916 /* if the worker thread was created, tell it to terminate */
5917 if (inp->dlth_thread != THREAD_NULL) {
5918 lck_mtx_lock_spin(&inp->dlth_lock);
5919 inp->dlth_flags |= DLIL_INPUT_TERMINATE;
5920 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
5921 wakeup_one((caddr_t)&inp->dlth_flags);
5922 }
5923 lck_mtx_unlock(&inp->dlth_lock);
5924 ifnet_lock_done(ifp);
5925
5926 /* wait for the input thread to terminate */
5927 lck_mtx_lock_spin(&inp->dlth_lock);
5928 while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
5929 == 0) {
5930 (void) msleep(&inp->dlth_flags, &inp->dlth_lock,
5931 (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
5932 }
5933 lck_mtx_unlock(&inp->dlth_lock);
5934 ifnet_lock_exclusive(ifp);
5935 }
5936
5937 /* clean-up input thread state */
5938 dlil_clean_threading_info(inp);
5939 /* clean-up poll parameters */
5940 VERIFY(ifp->if_poll_thread == THREAD_NULL);
5941 dlil_reset_rxpoll_params(ifp);
5942 }
5943
5944 /* The driver might unload, so point these to ourselves */
5945 if_free = ifp->if_free;
5946 ifp->if_output_dlil = ifp_if_output;
5947 ifp->if_output = ifp_if_output;
5948 ifp->if_pre_enqueue = ifp_if_output;
5949 ifp->if_start = ifp_if_start;
5950 ifp->if_output_ctl = ifp_if_ctl;
5951 ifp->if_input_dlil = ifp_if_input;
5952 ifp->if_input_poll = ifp_if_input_poll;
5953 ifp->if_input_ctl = ifp_if_ctl;
5954 ifp->if_ioctl = ifp_if_ioctl;
5955 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
5956 ifp->if_free = ifp_if_free;
5957 ifp->if_demux = ifp_if_demux;
5958 ifp->if_event = ifp_if_event;
5959 ifp->if_framer_legacy = ifp_if_framer;
5960 ifp->if_framer = ifp_if_framer_extended;
5961 ifp->if_add_proto = ifp_if_add_proto;
5962 ifp->if_del_proto = ifp_if_del_proto;
5963 ifp->if_check_multi = ifp_if_check_multi;
5964
5965 /* wipe out interface description */
5966 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
5967 ifp->if_desc.ifd_len = 0;
5968 VERIFY(ifp->if_desc.ifd_desc != NULL);
5969 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
5970
5971 /* there shouldn't be any delegation by now */
5972 VERIFY(ifp->if_delegated.ifp == NULL);
5973 VERIFY(ifp->if_delegated.type == 0);
5974 VERIFY(ifp->if_delegated.family == 0);
5975 VERIFY(ifp->if_delegated.subfamily == 0);
5976 VERIFY(ifp->if_delegated.expensive == 0);
5977 VERIFY(ifp->if_delegated.constrained == 0);
5978 VERIFY(ifp->if_delegated.ultra_constrained == 0);
5979
5980 /* QoS marking get cleared */
5981 if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
5982 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
5983
5984 #if SKYWALK
5985 /* the nexus destructor is responsible for clearing these */
5986 VERIFY(ifp->if_na_ops == NULL);
5987 VERIFY(ifp->if_na == NULL);
5988 #endif /* SKYWALK */
5989
5990 /* interface could come up with different hwassist next time */
5991 ifp->if_hwassist = 0;
5992 ifp->if_capenable = 0;
5993
5994 /* promiscuous/allmulti counts need to start at zero again */
5995 ifp->if_pcount = 0;
5996 ifp->if_amcount = 0;
5997 ifp->if_flags &= ~(IFF_PROMISC | IFF_ALLMULTI);
5998
5999 ifnet_lock_done(ifp);
6000
6001 #if PF
6002 /*
6003 * Detach this interface from packet filter, if enabled.
6004 */
6005 pf_ifnet_hook(ifp, 0);
6006 #endif /* PF */
6007
6008 /* Filter list should be empty */
6009 lck_mtx_lock_spin(&ifp->if_flt_lock);
6010 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6011 VERIFY(ifp->if_flt_busy == 0);
6012 VERIFY(ifp->if_flt_waiters == 0);
6013 VERIFY(ifp->if_flt_non_os_count == 0);
6014 VERIFY(ifp->if_flt_no_tso_count == 0);
6015 lck_mtx_unlock(&ifp->if_flt_lock);
6016
6017 /* Last chance to drain send queue */
6018 if_qflush_snd(ifp, 0);
6019
6020 /* Last chance to cleanup any cached route */
6021 lck_mtx_lock(&ifp->if_cached_route_lock);
6022 VERIFY(!ifp->if_fwd_cacheok);
6023 ROUTE_RELEASE(&ifp->if_fwd_route);
6024 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
6025 ROUTE_RELEASE(&ifp->if_src_route);
6026 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
6027 ROUTE_RELEASE(&ifp->if_src_route6);
6028 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
6029 lck_mtx_unlock(&ifp->if_cached_route_lock);
6030
6031 /* Ignore any pending data threshold as the interface is anyways gone */
6032 ifp->if_data_threshold = 0;
6033
6034 VERIFY(ifp->if_dt_tcall != NULL);
6035 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
6036
6037 ifnet_llreach_ifdetach(ifp);
6038
6039 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0, FALSE);
6040
6041 /*
6042 * Finally, mark this ifnet as detached.
6043 */
6044 os_log(OS_LOG_DEFAULT, "%s detached", if_name(ifp));
6045
6046 lck_mtx_lock_spin(&ifp->if_ref_lock);
6047 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6048 panic("%s: flags mismatch (detaching not set) ifp=%p",
6049 __func__, ifp);
6050 /* NOTREACHED */
6051 }
6052 ifp->if_refflags &= ~IFRF_DETACHING;
6053 lck_mtx_unlock(&ifp->if_ref_lock);
6054 if (if_free != NULL) {
6055 if_free(ifp);
6056 }
6057
6058 ifclassq_release(&ifp->if_snd);
6059
6060 /* we're fully detached, clear the "in use" bit */
6061 dlifp = (struct dlil_ifnet *)ifp;
6062 lck_mtx_lock(&dlifp->dl_if_lock);
6063 ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
6064 dlifp->dl_if_flags &= ~DLIF_INUSE;
6065 lck_mtx_unlock(&dlifp->dl_if_lock);
6066
6067 /* Release reference held during ifnet attach */
6068 ifnet_release(ifp);
6069 }
6070
6071 errno_t
ifp_if_output(struct ifnet * ifp,struct mbuf * m)6072 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
6073 {
6074 #pragma unused(ifp)
6075 m_freem_list(m);
6076 return 0;
6077 }
6078
6079 void
ifp_if_start(struct ifnet * ifp)6080 ifp_if_start(struct ifnet *ifp)
6081 {
6082 ifnet_purge(ifp);
6083 }
6084
6085 static errno_t
ifp_if_input(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)6086 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
6087 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
6088 boolean_t poll, struct thread *tp)
6089 {
6090 #pragma unused(ifp, m_tail, s, poll, tp)
6091 m_freem_list(m_head);
6092 return ENXIO;
6093 }
6094
6095 static void
ifp_if_input_poll(struct ifnet * ifp,u_int32_t flags,u_int32_t max_cnt,struct mbuf ** m_head,struct mbuf ** m_tail,u_int32_t * cnt,u_int32_t * len)6096 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
6097 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
6098 {
6099 #pragma unused(ifp, flags, max_cnt)
6100 if (m_head != NULL) {
6101 *m_head = NULL;
6102 }
6103 if (m_tail != NULL) {
6104 *m_tail = NULL;
6105 }
6106 if (cnt != NULL) {
6107 *cnt = 0;
6108 }
6109 if (len != NULL) {
6110 *len = 0;
6111 }
6112 }
6113
6114 static errno_t
ifp_if_ctl(struct ifnet * ifp,ifnet_ctl_cmd_t cmd,u_int32_t arglen,void * arg)6115 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
6116 {
6117 #pragma unused(ifp, cmd, arglen, arg)
6118 return EOPNOTSUPP;
6119 }
6120
6121 static errno_t
ifp_if_demux(struct ifnet * ifp,struct mbuf * m,char * fh,protocol_family_t * pf)6122 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
6123 {
6124 #pragma unused(ifp, fh, pf)
6125 m_freem(m);
6126 return EJUSTRETURN;
6127 }
6128
6129 static errno_t
ifp_if_add_proto(struct ifnet * ifp,protocol_family_t pf,const struct ifnet_demux_desc * da,u_int32_t dc)6130 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
6131 const struct ifnet_demux_desc *da, u_int32_t dc)
6132 {
6133 #pragma unused(ifp, pf, da, dc)
6134 return EINVAL;
6135 }
6136
6137 static errno_t
ifp_if_del_proto(struct ifnet * ifp,protocol_family_t pf)6138 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
6139 {
6140 #pragma unused(ifp, pf)
6141 return EINVAL;
6142 }
6143
6144 static errno_t
ifp_if_check_multi(struct ifnet * ifp,const struct sockaddr * sa)6145 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
6146 {
6147 #pragma unused(ifp, sa)
6148 return EOPNOTSUPP;
6149 }
6150
6151 #if !XNU_TARGET_OS_OSX
6152 static errno_t
ifp_if_framer(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6153 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6154 const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t,
6155 u_int32_t *pre, u_int32_t *post)
6156 #else /* XNU_TARGET_OS_OSX */
6157 static errno_t
6158 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
6159 const struct sockaddr *sa, IFNET_LLADDR_T ll, IFNET_FRAME_TYPE_T t)
6160 #endif /* XNU_TARGET_OS_OSX */
6161 {
6162 #pragma unused(ifp, m, sa, ll, t)
6163 #if !XNU_TARGET_OS_OSX
6164 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
6165 #else /* XNU_TARGET_OS_OSX */
6166 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
6167 #endif /* XNU_TARGET_OS_OSX */
6168 }
6169
6170 static errno_t
ifp_if_framer_extended(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,IFNET_LLADDR_T ll,IFNET_FRAME_TYPE_T t,u_int32_t * pre,u_int32_t * post)6171 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
6172 const struct sockaddr *sa,
6173 IFNET_LLADDR_T ll,
6174 IFNET_FRAME_TYPE_T t,
6175 u_int32_t *pre, u_int32_t *post)
6176 {
6177 #pragma unused(ifp, sa, ll, t)
6178 m_freem(*m);
6179 *m = NULL;
6180
6181 if (pre != NULL) {
6182 *pre = 0;
6183 }
6184 if (post != NULL) {
6185 *post = 0;
6186 }
6187
6188 return EJUSTRETURN;
6189 }
6190
6191 errno_t
ifp_if_ioctl(struct ifnet * ifp,unsigned long cmd,void * arg)6192 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
6193 {
6194 #pragma unused(ifp, cmd, arg)
6195 return EOPNOTSUPP;
6196 }
6197
6198 static errno_t
ifp_if_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode tm,bpf_packet_func f)6199 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
6200 {
6201 #pragma unused(ifp, tm, f)
6202 /* XXX not sure what to do here */
6203 return 0;
6204 }
6205
6206 static void
ifp_if_free(struct ifnet * ifp)6207 ifp_if_free(struct ifnet *ifp)
6208 {
6209 #pragma unused(ifp)
6210 }
6211
6212 static void
ifp_if_event(struct ifnet * ifp,const struct kev_msg * e)6213 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
6214 {
6215 #pragma unused(ifp, e)
6216 }
6217
6218 __private_extern__ void
dlil_proto_unplumb_all(struct ifnet * ifp)6219 dlil_proto_unplumb_all(struct ifnet *ifp)
6220 {
6221 /*
6222 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
6223 * each bucket contains exactly one entry; PF_VLAN does not need an
6224 * explicit unplumb.
6225 *
6226 * if_proto_hash[3] is for other protocols; we expect anything
6227 * in this bucket to respond to the DETACHING event (which would
6228 * have happened by now) and do the unplumb then.
6229 */
6230 (void) proto_unplumb(PF_INET, ifp);
6231 (void) proto_unplumb(PF_INET6, ifp);
6232 }
6233
6234 static void
ifp_src_route_copyout(struct ifnet * ifp,struct route * dst)6235 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
6236 {
6237 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6238 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6239
6240 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
6241
6242 lck_mtx_unlock(&ifp->if_cached_route_lock);
6243 }
6244
6245 static void
ifp_src_route_copyin(struct ifnet * ifp,struct route * src)6246 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
6247 {
6248 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6249 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6250
6251 if (ifp->if_fwd_cacheok) {
6252 route_copyin(src, &ifp->if_src_route, sizeof(*src));
6253 } else {
6254 ROUTE_RELEASE(src);
6255 }
6256 lck_mtx_unlock(&ifp->if_cached_route_lock);
6257 }
6258
6259 static void
ifp_src_route6_copyout(struct ifnet * ifp,struct route_in6 * dst)6260 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
6261 {
6262 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6263 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6264
6265 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
6266 sizeof(*dst));
6267
6268 lck_mtx_unlock(&ifp->if_cached_route_lock);
6269 }
6270
6271 static void
ifp_src_route6_copyin(struct ifnet * ifp,struct route_in6 * src)6272 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
6273 {
6274 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
6275 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
6276
6277 if (ifp->if_fwd_cacheok) {
6278 route_copyin((struct route *)src,
6279 (struct route *)&ifp->if_src_route6, sizeof(*src));
6280 } else {
6281 ROUTE_RELEASE(src);
6282 }
6283 lck_mtx_unlock(&ifp->if_cached_route_lock);
6284 }
6285
6286 struct rtentry *
ifnet_cached_rtlookup_inet(struct ifnet * ifp,struct in_addr src_ip)6287 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
6288 {
6289 struct route src_rt;
6290 struct sockaddr_in *dst;
6291
6292 dst = SIN(&src_rt.ro_dst);
6293
6294 ifp_src_route_copyout(ifp, &src_rt);
6295
6296 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
6297 ROUTE_RELEASE(&src_rt);
6298 if (dst->sin_family != AF_INET) {
6299 SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6300 dst->sin_len = sizeof(src_rt.ro_dst);
6301 dst->sin_family = AF_INET;
6302 }
6303 dst->sin_addr = src_ip;
6304
6305 VERIFY(src_rt.ro_rt == NULL);
6306 src_rt.ro_rt = rtalloc1_scoped(SA(dst),
6307 0, 0, ifp->if_index);
6308
6309 if (src_rt.ro_rt != NULL) {
6310 /* retain a ref, copyin consumes one */
6311 struct rtentry *rte = src_rt.ro_rt;
6312 RT_ADDREF(rte);
6313 ifp_src_route_copyin(ifp, &src_rt);
6314 src_rt.ro_rt = rte;
6315 }
6316 }
6317
6318 return src_rt.ro_rt;
6319 }
6320
6321 struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet * ifp,struct in6_addr * src_ip6)6322 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
6323 {
6324 struct route_in6 src_rt;
6325
6326 ifp_src_route6_copyout(ifp, &src_rt);
6327
6328 if (ROUTE_UNUSABLE(&src_rt) ||
6329 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
6330 ROUTE_RELEASE(&src_rt);
6331 if (src_rt.ro_dst.sin6_family != AF_INET6) {
6332 SOCKADDR_ZERO(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
6333 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
6334 src_rt.ro_dst.sin6_family = AF_INET6;
6335 }
6336 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
6337 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
6338 sizeof(src_rt.ro_dst.sin6_addr));
6339
6340 if (src_rt.ro_rt == NULL) {
6341 src_rt.ro_rt = rtalloc1_scoped(
6342 SA(&src_rt.ro_dst), 0, 0,
6343 ifp->if_index);
6344
6345 if (src_rt.ro_rt != NULL) {
6346 /* retain a ref, copyin consumes one */
6347 struct rtentry *rte = src_rt.ro_rt;
6348 RT_ADDREF(rte);
6349 ifp_src_route6_copyin(ifp, &src_rt);
6350 src_rt.ro_rt = rte;
6351 }
6352 }
6353 }
6354
6355 return src_rt.ro_rt;
6356 }
6357
6358 void
if_lqm_update(struct ifnet * ifp,int lqm,int locked)6359 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
6360 {
6361 struct kev_dl_link_quality_metric_data ev_lqm_data;
6362 uint64_t now, delta;
6363 int8_t old_lqm;
6364 bool need_necp_client_update;
6365
6366 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
6367
6368 /* Normalize to edge */
6369 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
6370 lqm = IFNET_LQM_THRESH_ABORT;
6371 os_atomic_or(&tcbinfo.ipi_flags, INPCBINFO_HANDLE_LQM_ABORT, relaxed);
6372 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
6373 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
6374 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
6375 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
6376 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
6377 lqm <= IFNET_LQM_THRESH_POOR) {
6378 lqm = IFNET_LQM_THRESH_POOR;
6379 } else if (lqm > IFNET_LQM_THRESH_POOR &&
6380 lqm <= IFNET_LQM_THRESH_GOOD) {
6381 lqm = IFNET_LQM_THRESH_GOOD;
6382 }
6383
6384 /*
6385 * Take the lock if needed
6386 */
6387 if (!locked) {
6388 ifnet_lock_exclusive(ifp);
6389 }
6390
6391 if (lqm == ifp->if_interface_state.lqm_state &&
6392 (ifp->if_interface_state.valid_bitmask &
6393 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
6394 /*
6395 * Release the lock if was not held by the caller
6396 */
6397 if (!locked) {
6398 ifnet_lock_done(ifp);
6399 }
6400 return; /* nothing to update */
6401 }
6402
6403 net_update_uptime();
6404 now = net_uptime_ms();
6405 ASSERT(now >= ifp->if_lqmstate_start_time);
6406 delta = now - ifp->if_lqmstate_start_time;
6407
6408 old_lqm = ifp->if_interface_state.lqm_state;
6409 switch (old_lqm) {
6410 case IFNET_LQM_THRESH_GOOD:
6411 ifp->if_lqm_good_time += delta;
6412 break;
6413 case IFNET_LQM_THRESH_POOR:
6414 ifp->if_lqm_poor_time += delta;
6415 break;
6416 case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6417 ifp->if_lqm_min_viable_time += delta;
6418 break;
6419 case IFNET_LQM_THRESH_BAD:
6420 ifp->if_lqm_bad_time += delta;
6421 break;
6422 default:
6423 break;
6424 }
6425 switch (lqm) {
6426 case IFNET_LQM_THRESH_GOOD:
6427 ifp->if_lqm_good_cnt += 1;
6428 break;
6429 case IFNET_LQM_THRESH_POOR:
6430 ifp->if_lqm_poor_cnt += 1;
6431 break;
6432 case IFNET_LQM_THRESH_MINIMALLY_VIABLE:
6433 ifp->if_lqm_min_viable_cnt += 1;
6434 break;
6435 case IFNET_LQM_THRESH_BAD:
6436 ifp->if_lqm_bad_cnt += 1;
6437 break;
6438 default:
6439 break;
6440 }
6441 ifp->if_lqmstate_start_time = now;
6442
6443 ifp->if_interface_state.valid_bitmask |=
6444 IF_INTERFACE_STATE_LQM_STATE_VALID;
6445 ifp->if_interface_state.lqm_state = (int8_t)lqm;
6446
6447 /*
6448 * Update the link heuristics
6449 */
6450 need_necp_client_update = if_update_link_heuristic(ifp);
6451
6452 /*
6453 * Don't want to hold the lock when issuing kernel events or calling NECP
6454 */
6455 ifnet_lock_done(ifp);
6456
6457 if (need_necp_client_update) {
6458 necp_update_all_clients_immediately_if_needed(true);
6459 }
6460
6461 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
6462 ev_lqm_data.link_quality_metric = lqm;
6463
6464 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
6465 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data), FALSE);
6466
6467 /*
6468 * Reacquire the lock for the caller
6469 */
6470 if (locked) {
6471 ifnet_lock_exclusive(ifp);
6472 }
6473 }
6474
6475 static void
if_rrc_state_update(struct ifnet * ifp,unsigned int rrc_state)6476 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
6477 {
6478 struct kev_dl_rrc_state kev;
6479
6480 if (rrc_state == ifp->if_interface_state.rrc_state &&
6481 (ifp->if_interface_state.valid_bitmask &
6482 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6483 return;
6484 }
6485
6486 ifp->if_interface_state.valid_bitmask |=
6487 IF_INTERFACE_STATE_RRC_STATE_VALID;
6488
6489 ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
6490
6491 /*
6492 * Don't want to hold the lock when issuing kernel events
6493 */
6494 ifnet_lock_done(ifp);
6495
6496 bzero(&kev, sizeof(struct kev_dl_rrc_state));
6497 kev.rrc_state = rrc_state;
6498
6499 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
6500 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state), FALSE);
6501
6502 ifnet_lock_exclusive(ifp);
6503 }
6504
6505 errno_t
if_state_update(struct ifnet * ifp,struct if_interface_state * if_interface_state)6506 if_state_update(struct ifnet *ifp,
6507 struct if_interface_state *if_interface_state)
6508 {
6509 u_short if_index_available = 0;
6510
6511 ifnet_lock_exclusive(ifp);
6512
6513 if ((ifp->if_type != IFT_CELLULAR) &&
6514 (if_interface_state->valid_bitmask &
6515 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
6516 ifnet_lock_done(ifp);
6517 return ENOTSUP;
6518 }
6519 if ((if_interface_state->valid_bitmask &
6520 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
6521 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
6522 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
6523 ifnet_lock_done(ifp);
6524 return EINVAL;
6525 }
6526 if ((if_interface_state->valid_bitmask &
6527 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
6528 if_interface_state->rrc_state !=
6529 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
6530 if_interface_state->rrc_state !=
6531 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
6532 ifnet_lock_done(ifp);
6533 return EINVAL;
6534 }
6535
6536 if (if_interface_state->valid_bitmask &
6537 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6538 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
6539 }
6540 if (if_interface_state->valid_bitmask &
6541 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6542 if_rrc_state_update(ifp, if_interface_state->rrc_state);
6543 }
6544 if (if_interface_state->valid_bitmask &
6545 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6546 ifp->if_interface_state.valid_bitmask |=
6547 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6548 ifp->if_interface_state.interface_availability =
6549 if_interface_state->interface_availability;
6550
6551 if (ifp->if_interface_state.interface_availability ==
6552 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
6553 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
6554 __func__, if_name(ifp), ifp->if_index);
6555 if_index_available = ifp->if_index;
6556 } else {
6557 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
6558 __func__, if_name(ifp), ifp->if_index);
6559 }
6560 }
6561 ifnet_lock_done(ifp);
6562
6563 /*
6564 * Check if the TCP connections going on this interface should be
6565 * forced to send probe packets instead of waiting for TCP timers
6566 * to fire. This is done on an explicit notification such as
6567 * SIOCSIFINTERFACESTATE which marks the interface as available.
6568 */
6569 if (if_index_available > 0) {
6570 tcp_interface_send_probe(if_index_available);
6571 }
6572
6573 return 0;
6574 }
6575
6576 void
if_get_state(struct ifnet * ifp,struct if_interface_state * if_interface_state)6577 if_get_state(struct ifnet *ifp,
6578 struct if_interface_state *if_interface_state)
6579 {
6580 ifnet_lock_shared(ifp);
6581
6582 if_interface_state->valid_bitmask = 0;
6583
6584 if (ifp->if_interface_state.valid_bitmask &
6585 IF_INTERFACE_STATE_RRC_STATE_VALID) {
6586 if_interface_state->valid_bitmask |=
6587 IF_INTERFACE_STATE_RRC_STATE_VALID;
6588 if_interface_state->rrc_state =
6589 ifp->if_interface_state.rrc_state;
6590 }
6591 if (ifp->if_interface_state.valid_bitmask &
6592 IF_INTERFACE_STATE_LQM_STATE_VALID) {
6593 if_interface_state->valid_bitmask |=
6594 IF_INTERFACE_STATE_LQM_STATE_VALID;
6595 if_interface_state->lqm_state =
6596 ifp->if_interface_state.lqm_state;
6597 }
6598 if (ifp->if_interface_state.valid_bitmask &
6599 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
6600 if_interface_state->valid_bitmask |=
6601 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6602 if_interface_state->interface_availability =
6603 ifp->if_interface_state.interface_availability;
6604 }
6605
6606 ifnet_lock_done(ifp);
6607 }
6608
6609 errno_t
if_probe_connectivity(struct ifnet * ifp,u_int32_t conn_probe)6610 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
6611 {
6612 if (conn_probe > 1) {
6613 return EINVAL;
6614 }
6615 if (conn_probe == 0) {
6616 if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6617 } else {
6618 if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
6619 }
6620
6621 #if NECP
6622 necp_update_all_clients();
6623 #endif /* NECP */
6624
6625 tcp_probe_connectivity(ifp, conn_probe);
6626 return 0;
6627 }
6628
6629 /* for uuid.c */
6630 static int
get_ether_index(int * ret_other_index)6631 get_ether_index(int * ret_other_index)
6632 {
6633 ifnet_ref_t ifp;
6634 int en0_index = 0;
6635 int other_en_index = 0;
6636 int any_ether_index = 0;
6637 short best_unit = 0;
6638
6639 *ret_other_index = 0;
6640 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
6641 /*
6642 * find en0, or if not en0, the lowest unit en*, and if not
6643 * that, any ethernet
6644 */
6645 ifnet_lock_shared(ifp);
6646 if (strcmp(ifp->if_name, "en") == 0) {
6647 if (ifp->if_unit == 0) {
6648 /* found en0, we're done */
6649 en0_index = ifp->if_index;
6650 ifnet_lock_done(ifp);
6651 break;
6652 }
6653 if (other_en_index == 0 || ifp->if_unit < best_unit) {
6654 other_en_index = ifp->if_index;
6655 best_unit = ifp->if_unit;
6656 }
6657 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
6658 any_ether_index = ifp->if_index;
6659 }
6660 ifnet_lock_done(ifp);
6661 }
6662 if (en0_index == 0) {
6663 if (other_en_index != 0) {
6664 *ret_other_index = other_en_index;
6665 } else if (any_ether_index != 0) {
6666 *ret_other_index = any_ether_index;
6667 }
6668 }
6669 return en0_index;
6670 }
6671
6672 int
uuid_get_ethernet(u_int8_t * __counted_by (ETHER_ADDR_LEN)node)6673 uuid_get_ethernet(u_int8_t *__counted_by(ETHER_ADDR_LEN) node)
6674 {
6675 static int en0_index;
6676 ifnet_ref_t ifp;
6677 int other_index = 0;
6678 int the_index = 0;
6679 int ret;
6680
6681 ifnet_head_lock_shared();
6682 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
6683 en0_index = get_ether_index(&other_index);
6684 }
6685 if (en0_index != 0) {
6686 the_index = en0_index;
6687 } else if (other_index != 0) {
6688 the_index = other_index;
6689 }
6690 if (the_index != 0) {
6691 struct dlil_ifnet *dl_if;
6692
6693 ifp = ifindex2ifnet[the_index];
6694 VERIFY(ifp != NULL);
6695 dl_if = (struct dlil_ifnet *)ifp;
6696 if (dl_if->dl_if_permanent_ether_is_set != 0) {
6697 /*
6698 * Use the permanent ethernet address if it is
6699 * available because it will never change.
6700 */
6701 memcpy(node, dl_if->dl_if_permanent_ether,
6702 ETHER_ADDR_LEN);
6703 } else {
6704 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
6705 }
6706 ret = 0;
6707 } else {
6708 ret = -1;
6709 }
6710 ifnet_head_done();
6711 return ret;
6712 }
6713
6714 int
dlil_node_present(struct ifnet * ifp,struct sockaddr * sa,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6715 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
6716 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6717 {
6718 struct kev_dl_node_presence kev;
6719 struct sockaddr_dl *sdl;
6720 struct sockaddr_in6 *sin6;
6721 int ret = 0;
6722
6723 VERIFY(ifp);
6724 VERIFY(sa);
6725 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6726
6727 bzero(&kev, sizeof(kev));
6728 sin6 = &kev.sin6_node_address;
6729 sdl = &kev.sdl_node_address;
6730 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
6731 kev.rssi = rssi;
6732 kev.link_quality_metric = lqm;
6733 kev.node_proximity_metric = npm;
6734 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6735
6736 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
6737 if (ret == 0 || ret == EEXIST) {
6738 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6739 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6740 if (err != 0) {
6741 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
6742 "error %d\n", __func__, err);
6743 }
6744 }
6745
6746 if (ret == EEXIST) {
6747 ret = 0;
6748 }
6749 return ret;
6750 }
6751
6752 void
dlil_node_absent(struct ifnet * ifp,struct sockaddr * sa)6753 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
6754 {
6755 struct kev_dl_node_absence kev = {};
6756 struct sockaddr_in6 *kev_sin6 = NULL;
6757 struct sockaddr_dl *kev_sdl = NULL;
6758 int error = 0;
6759
6760 VERIFY(ifp != NULL);
6761 VERIFY(sa != NULL);
6762 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
6763
6764 kev_sin6 = &kev.sin6_node_address;
6765 kev_sdl = &kev.sdl_node_address;
6766
6767 if (sa->sa_family == AF_INET6) {
6768 /*
6769 * If IPv6 address is given, get the link layer
6770 * address from what was cached in the neighbor cache
6771 */
6772 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6773 SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6774 error = nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
6775 } else {
6776 /*
6777 * If passed address is AF_LINK type, derive the address
6778 * based on the link address.
6779 */
6780 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
6781 error = nd6_alt_node_absent(ifp, kev_sin6, NULL);
6782 }
6783
6784 if (error == 0) {
6785 kev_sdl->sdl_type = ifp->if_type;
6786 kev_sdl->sdl_index = ifp->if_index;
6787
6788 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
6789 &kev.link_data, sizeof(kev), FALSE);
6790 }
6791 }
6792
6793 int
dlil_node_present_v2(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr_dl * sdl,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])6794 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
6795 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
6796 {
6797 struct kev_dl_node_presence kev = {};
6798 struct sockaddr_dl *kev_sdl = NULL;
6799 struct sockaddr_in6 *kev_sin6 = NULL;
6800 int ret = 0;
6801
6802 VERIFY(ifp != NULL);
6803 VERIFY(sa != NULL && sdl != NULL);
6804 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
6805
6806 kev_sin6 = &kev.sin6_node_address;
6807 kev_sdl = &kev.sdl_node_address;
6808
6809 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
6810 SOCKADDR_COPY(sdl, kev_sdl, sdl->sdl_len);
6811 kev_sdl->sdl_type = ifp->if_type;
6812 kev_sdl->sdl_index = ifp->if_index;
6813
6814 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
6815 SOCKADDR_COPY(sa, kev_sin6, sa->sa_len);
6816
6817 kev.rssi = rssi;
6818 kev.link_quality_metric = lqm;
6819 kev.node_proximity_metric = npm;
6820 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
6821
6822 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
6823 if (ret == 0 || ret == EEXIST) {
6824 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
6825 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
6826 if (err != 0) {
6827 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
6828 }
6829 }
6830
6831 if (ret == EEXIST) {
6832 ret = 0;
6833 }
6834 return ret;
6835 }
6836
6837 const void *
dlil_ifaddr_bytes(const struct sockaddr_dl * sdl,size_t * sizep,kauth_cred_t * credp)6838 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
6839 kauth_cred_t *credp)
6840 {
6841 const u_int8_t *bytes;
6842 size_t size;
6843
6844 bytes = CONST_LLADDR(sdl);
6845 size = sdl->sdl_alen;
6846
6847 #if CONFIG_MACF
6848 if (dlil_lladdr_ckreq) {
6849 switch (sdl->sdl_type) {
6850 case IFT_ETHER:
6851 case IFT_IEEE1394:
6852 break;
6853 default:
6854 credp = NULL;
6855 break;
6856 }
6857 ;
6858
6859 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
6860 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
6861 [0] = 2
6862 };
6863
6864 bytes = unspec;
6865 }
6866 }
6867 #else
6868 #pragma unused(credp)
6869 #endif
6870
6871 if (sizep != NULL) {
6872 *sizep = size;
6873 }
6874 return bytes;
6875 }
6876
6877 void
dlil_report_issues(struct ifnet * ifp,u_int8_t modid[DLIL_MODIDLEN],u_int8_t info[DLIL_MODARGLEN])6878 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
6879 u_int8_t info[DLIL_MODARGLEN])
6880 {
6881 struct kev_dl_issues kev;
6882 struct timeval tv;
6883
6884 VERIFY(ifp != NULL);
6885 VERIFY(modid != NULL);
6886 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
6887 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
6888
6889 bzero(&kev, sizeof(kev));
6890
6891 microtime(&tv);
6892 kev.timestamp = tv.tv_sec;
6893 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
6894 if (info != NULL) {
6895 bcopy(info, &kev.info, DLIL_MODARGLEN);
6896 }
6897
6898 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
6899 &kev.link_data, sizeof(kev), FALSE);
6900 }
6901
6902 errno_t
ifnet_getset_opportunistic(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)6903 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
6904 struct proc *p)
6905 {
6906 u_int32_t level = IFNET_THROTTLE_OFF;
6907 errno_t result = 0;
6908
6909 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
6910
6911 if (cmd == SIOCSIFOPPORTUNISTIC) {
6912 /*
6913 * XXX: Use priv_check_cred() instead of root check?
6914 */
6915 if ((result = proc_suser(p)) != 0) {
6916 return result;
6917 }
6918
6919 if (ifr->ifr_opportunistic.ifo_flags ==
6920 IFRIFOF_BLOCK_OPPORTUNISTIC) {
6921 level = IFNET_THROTTLE_OPPORTUNISTIC;
6922 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
6923 level = IFNET_THROTTLE_OFF;
6924 } else {
6925 result = EINVAL;
6926 }
6927
6928 if (result == 0) {
6929 result = ifnet_set_throttle(ifp, level);
6930 }
6931 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
6932 ifr->ifr_opportunistic.ifo_flags = 0;
6933 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
6934 ifr->ifr_opportunistic.ifo_flags |=
6935 IFRIFOF_BLOCK_OPPORTUNISTIC;
6936 }
6937 }
6938
6939 /*
6940 * Return the count of current opportunistic connections
6941 * over the interface.
6942 */
6943 if (result == 0) {
6944 uint32_t flags = 0;
6945 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
6946 INPCB_OPPORTUNISTIC_SETCMD : 0;
6947 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
6948 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
6949 ifr->ifr_opportunistic.ifo_inuse =
6950 udp_count_opportunistic(ifp->if_index, flags) +
6951 tcp_count_opportunistic(ifp->if_index, flags);
6952 }
6953
6954 if (result == EALREADY) {
6955 result = 0;
6956 }
6957
6958 return result;
6959 }
6960
6961 int
ifnet_get_throttle(struct ifnet * ifp,u_int32_t * level)6962 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
6963 {
6964 struct ifclassq *ifq;
6965 int err = 0;
6966
6967 if (!(ifp->if_eflags & IFEF_TXSTART)) {
6968 return ENXIO;
6969 }
6970
6971 *level = IFNET_THROTTLE_OFF;
6972
6973 ifq = ifp->if_snd;
6974 IFCQ_LOCK(ifq);
6975 /* Throttling works only for IFCQ, not ALTQ instances */
6976 if (IFCQ_IS_ENABLED(ifq)) {
6977 cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
6978
6979 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
6980 *level = req.level;
6981 }
6982 IFCQ_UNLOCK(ifq);
6983
6984 return err;
6985 }
6986
6987 int
ifnet_set_throttle(struct ifnet * ifp,u_int32_t level)6988 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
6989 {
6990 struct ifclassq *ifq;
6991 int err = 0;
6992
6993 if (!(ifp->if_eflags & IFEF_TXSTART)) {
6994 return ENXIO;
6995 }
6996
6997 ifq = ifp->if_snd;
6998
6999 switch (level) {
7000 case IFNET_THROTTLE_OFF:
7001 case IFNET_THROTTLE_OPPORTUNISTIC:
7002 break;
7003 default:
7004 return EINVAL;
7005 }
7006
7007 IFCQ_LOCK(ifq);
7008 if (IFCQ_IS_ENABLED(ifq)) {
7009 cqrq_throttle_t req = { 1, level };
7010
7011 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
7012 }
7013 IFCQ_UNLOCK(ifq);
7014
7015 if (err == 0) {
7016 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
7017 level);
7018 #if NECP
7019 necp_update_all_clients();
7020 #endif /* NECP */
7021 if (level == IFNET_THROTTLE_OFF) {
7022 ifnet_start(ifp);
7023 }
7024 }
7025
7026 return err;
7027 }
7028
7029 errno_t
ifnet_getset_log(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)7030 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
7031 struct proc *p)
7032 {
7033 #pragma unused(p)
7034 errno_t result = 0;
7035 uint32_t flags;
7036 int level, category, subcategory;
7037
7038 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
7039
7040 if (cmd == SIOCSIFLOG) {
7041 if ((result = priv_check_cred(kauth_cred_get(),
7042 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
7043 return result;
7044 }
7045
7046 level = ifr->ifr_log.ifl_level;
7047 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
7048 result = EINVAL;
7049 }
7050
7051 flags = ifr->ifr_log.ifl_flags;
7052 if ((flags &= IFNET_LOGF_MASK) == 0) {
7053 result = EINVAL;
7054 }
7055
7056 category = ifr->ifr_log.ifl_category;
7057 subcategory = ifr->ifr_log.ifl_subcategory;
7058
7059 if (result == 0) {
7060 result = ifnet_set_log(ifp, level, flags,
7061 category, subcategory);
7062 }
7063 } else {
7064 result = ifnet_get_log(ifp, &level, &flags, &category,
7065 &subcategory);
7066 if (result == 0) {
7067 ifr->ifr_log.ifl_level = level;
7068 ifr->ifr_log.ifl_flags = flags;
7069 ifr->ifr_log.ifl_category = category;
7070 ifr->ifr_log.ifl_subcategory = subcategory;
7071 }
7072 }
7073
7074 return result;
7075 }
7076
7077 int
ifnet_set_log(struct ifnet * ifp,int32_t level,uint32_t flags,int32_t category,int32_t subcategory)7078 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
7079 int32_t category, int32_t subcategory)
7080 {
7081 int err = 0;
7082
7083 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
7084 VERIFY(flags & IFNET_LOGF_MASK);
7085
7086 /*
7087 * The logging level applies to all facilities; make sure to
7088 * update them all with the most current level.
7089 */
7090 flags |= ifp->if_log.flags;
7091
7092 if (ifp->if_output_ctl != NULL) {
7093 struct ifnet_log_params l;
7094
7095 bzero(&l, sizeof(l));
7096 l.level = level;
7097 l.flags = flags;
7098 l.flags &= ~IFNET_LOGF_DLIL;
7099 l.category = category;
7100 l.subcategory = subcategory;
7101
7102 /* Send this request to lower layers */
7103 if (l.flags != 0) {
7104 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
7105 sizeof(l), &l);
7106 }
7107 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
7108 /*
7109 * If targeted to the lower layers without an output
7110 * control callback registered on the interface, just
7111 * silently ignore facilities other than ours.
7112 */
7113 flags &= IFNET_LOGF_DLIL;
7114 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
7115 level = 0;
7116 }
7117 }
7118
7119 if (err == 0) {
7120 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
7121 ifp->if_log.flags = 0;
7122 } else {
7123 ifp->if_log.flags |= flags;
7124 }
7125
7126 log(LOG_INFO, "%s: logging level set to %d flags=0x%x "
7127 "arg=0x%x, category=%d subcategory=%d\n", if_name(ifp),
7128 ifp->if_log.level, ifp->if_log.flags, flags,
7129 category, subcategory);
7130 }
7131
7132 return err;
7133 }
7134
7135 int
ifnet_get_log(struct ifnet * ifp,int32_t * level,uint32_t * flags,int32_t * category,int32_t * subcategory)7136 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
7137 int32_t *category, int32_t *subcategory)
7138 {
7139 if (level != NULL) {
7140 *level = ifp->if_log.level;
7141 }
7142 if (flags != NULL) {
7143 *flags = ifp->if_log.flags;
7144 }
7145 if (category != NULL) {
7146 *category = ifp->if_log.category;
7147 }
7148 if (subcategory != NULL) {
7149 *subcategory = ifp->if_log.subcategory;
7150 }
7151
7152 return 0;
7153 }
7154
7155 int
ifnet_notify_address(struct ifnet * ifp,int af)7156 ifnet_notify_address(struct ifnet *ifp, int af)
7157 {
7158 struct ifnet_notify_address_params na;
7159
7160 #if PF
7161 (void) pf_ifaddr_hook(ifp);
7162 #endif /* PF */
7163
7164 if (ifp->if_output_ctl == NULL) {
7165 return EOPNOTSUPP;
7166 }
7167
7168 bzero(&na, sizeof(na));
7169 na.address_family = (sa_family_t)af;
7170
7171 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
7172 sizeof(na), &na);
7173 }
7174
7175 errno_t
ifnet_flowid(struct ifnet * ifp,uint32_t * flowid)7176 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
7177 {
7178 if (ifp == NULL || flowid == NULL) {
7179 return EINVAL;
7180 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7181 !IF_FULLY_ATTACHED(ifp)) {
7182 return ENXIO;
7183 }
7184
7185 *flowid = ifp->if_flowhash;
7186
7187 return 0;
7188 }
7189
7190 errno_t
ifnet_disable_output(struct ifnet * ifp)7191 ifnet_disable_output(struct ifnet *ifp)
7192 {
7193 int err = 0;
7194
7195 if (ifp == NULL) {
7196 return EINVAL;
7197 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7198 !IF_FULLY_ATTACHED(ifp)) {
7199 return ENXIO;
7200 }
7201
7202 lck_mtx_lock(&ifp->if_start_lock);
7203 if (ifp->if_start_flags & IFSF_FLOW_RESUME_PENDING) {
7204 ifp->if_start_flags &= ~(IFSF_FLOW_RESUME_PENDING | IFSF_FLOW_CONTROLLED);
7205 } else if ((err = ifnet_fc_add(ifp)) == 0) {
7206 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
7207 }
7208 lck_mtx_unlock(&ifp->if_start_lock);
7209
7210 return err;
7211 }
7212
7213 errno_t
ifnet_enable_output(struct ifnet * ifp)7214 ifnet_enable_output(struct ifnet *ifp)
7215 {
7216 if (ifp == NULL) {
7217 return EINVAL;
7218 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
7219 !IF_FULLY_ATTACHED(ifp)) {
7220 return ENXIO;
7221 }
7222
7223 ifnet_start_common(ifp, TRUE, FALSE);
7224 return 0;
7225 }
7226
7227 void
ifnet_flowadv(uint32_t flowhash)7228 ifnet_flowadv(uint32_t flowhash)
7229 {
7230 struct ifnet_fc_entry *ifce;
7231 ifnet_ref_t ifp;
7232
7233 ifce = ifnet_fc_get(flowhash);
7234 if (ifce == NULL) {
7235 return;
7236 }
7237
7238 VERIFY(ifce->ifce_ifp != NULL);
7239 ifp = ifce->ifce_ifp;
7240
7241 /* flow hash gets recalculated per attach, so check */
7242 if (ifnet_is_attached(ifp, 1)) {
7243 if (ifp->if_flowhash == flowhash) {
7244 lck_mtx_lock_spin(&ifp->if_start_lock);
7245 if ((ifp->if_start_flags & IFSF_FLOW_CONTROLLED) == 0) {
7246 ifp->if_start_flags |= IFSF_FLOW_RESUME_PENDING;
7247 }
7248 lck_mtx_unlock(&ifp->if_start_lock);
7249 (void) ifnet_enable_output(ifp);
7250 }
7251 ifnet_decr_iorefcnt(ifp);
7252 }
7253 ifnet_fc_entry_free(ifce);
7254 }
7255
7256 /*
7257 * Function to compare ifnet_fc_entries in ifnet flow control tree
7258 */
7259 static inline int
ifce_cmp(const struct ifnet_fc_entry * fc1,const struct ifnet_fc_entry * fc2)7260 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
7261 {
7262 return fc1->ifce_flowhash - fc2->ifce_flowhash;
7263 }
7264
7265 static int
ifnet_fc_add(struct ifnet * ifp)7266 ifnet_fc_add(struct ifnet *ifp)
7267 {
7268 struct ifnet_fc_entry keyfc, *ifce;
7269 uint32_t flowhash;
7270
7271 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
7272 VERIFY(ifp->if_flowhash != 0);
7273 flowhash = ifp->if_flowhash;
7274
7275 bzero(&keyfc, sizeof(keyfc));
7276 keyfc.ifce_flowhash = flowhash;
7277
7278 lck_mtx_lock_spin(&ifnet_fc_lock);
7279 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7280 if (ifce != NULL && ifce->ifce_ifp == ifp) {
7281 /* Entry is already in ifnet_fc_tree, return */
7282 lck_mtx_unlock(&ifnet_fc_lock);
7283 return 0;
7284 }
7285
7286 if (ifce != NULL) {
7287 /*
7288 * There is a different fc entry with the same flow hash
7289 * but different ifp pointer. There can be a collision
7290 * on flow hash but the probability is low. Let's just
7291 * avoid adding a second one when there is a collision.
7292 */
7293 lck_mtx_unlock(&ifnet_fc_lock);
7294 return EAGAIN;
7295 }
7296
7297 /* become regular mutex */
7298 lck_mtx_convert_spin(&ifnet_fc_lock);
7299
7300 ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
7301 ifce->ifce_flowhash = flowhash;
7302 ifce->ifce_ifp = ifp;
7303
7304 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7305 lck_mtx_unlock(&ifnet_fc_lock);
7306 return 0;
7307 }
7308
7309 static struct ifnet_fc_entry *
ifnet_fc_get(uint32_t flowhash)7310 ifnet_fc_get(uint32_t flowhash)
7311 {
7312 struct ifnet_fc_entry keyfc, *ifce;
7313 ifnet_ref_t ifp;
7314
7315 bzero(&keyfc, sizeof(keyfc));
7316 keyfc.ifce_flowhash = flowhash;
7317
7318 lck_mtx_lock_spin(&ifnet_fc_lock);
7319 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
7320 if (ifce == NULL) {
7321 /* Entry is not present in ifnet_fc_tree, return */
7322 lck_mtx_unlock(&ifnet_fc_lock);
7323 return NULL;
7324 }
7325
7326 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
7327
7328 VERIFY(ifce->ifce_ifp != NULL);
7329 ifp = ifce->ifce_ifp;
7330
7331 /* become regular mutex */
7332 lck_mtx_convert_spin(&ifnet_fc_lock);
7333
7334 if (!ifnet_is_attached(ifp, 0)) {
7335 /*
7336 * This ifp is not attached or in the process of being
7337 * detached; just don't process it.
7338 */
7339 ifnet_fc_entry_free(ifce);
7340 ifce = NULL;
7341 }
7342 lck_mtx_unlock(&ifnet_fc_lock);
7343
7344 return ifce;
7345 }
7346
7347 static void
ifnet_fc_entry_free(struct ifnet_fc_entry * ifce)7348 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
7349 {
7350 zfree(ifnet_fc_zone, ifce);
7351 }
7352
7353 static uint32_t
ifnet_calc_flowhash(struct ifnet * ifp)7354 ifnet_calc_flowhash(struct ifnet *ifp)
7355 {
7356 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
7357 uint32_t flowhash = 0;
7358
7359 if (ifnet_flowhash_seed == 0) {
7360 ifnet_flowhash_seed = RandomULong();
7361 }
7362
7363 bzero(&fh, sizeof(fh));
7364
7365 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
7366 fh.ifk_unit = ifp->if_unit;
7367 fh.ifk_flags = ifp->if_flags;
7368 fh.ifk_eflags = ifp->if_eflags;
7369 fh.ifk_capabilities = ifp->if_capabilities;
7370 fh.ifk_capenable = ifp->if_capenable;
7371 fh.ifk_output_sched_model = ifp->if_output_sched_model;
7372 fh.ifk_rand1 = RandomULong();
7373 fh.ifk_rand2 = RandomULong();
7374
7375 try_again:
7376 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
7377 if (flowhash == 0) {
7378 /* try to get a non-zero flowhash */
7379 ifnet_flowhash_seed = RandomULong();
7380 goto try_again;
7381 }
7382
7383 return flowhash;
7384 }
7385
7386 int
ifnet_set_netsignature(struct ifnet * ifp,uint8_t family,uint8_t len,uint16_t flags,uint8_t * __sized_by (len)data)7387 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
7388 uint16_t flags, uint8_t *__sized_by(len) data)
7389 {
7390 #pragma unused(flags)
7391 int error = 0;
7392
7393 switch (family) {
7394 case AF_INET:
7395 if_inetdata_lock_exclusive(ifp);
7396 if (IN_IFEXTRA(ifp) != NULL) {
7397 if (len == 0) {
7398 /* Allow clearing the signature */
7399 IN_IFEXTRA(ifp)->netsig_len = 0;
7400 bzero(IN_IFEXTRA(ifp)->netsig,
7401 sizeof(IN_IFEXTRA(ifp)->netsig));
7402 if_inetdata_lock_done(ifp);
7403 break;
7404 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
7405 error = EINVAL;
7406 if_inetdata_lock_done(ifp);
7407 break;
7408 }
7409 IN_IFEXTRA(ifp)->netsig_len = len;
7410 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
7411 } else {
7412 error = ENOMEM;
7413 }
7414 if_inetdata_lock_done(ifp);
7415 break;
7416
7417 case AF_INET6:
7418 if_inet6data_lock_exclusive(ifp);
7419 if (IN6_IFEXTRA(ifp) != NULL) {
7420 if (len == 0) {
7421 /* Allow clearing the signature */
7422 IN6_IFEXTRA(ifp)->netsig_len = 0;
7423 bzero(IN6_IFEXTRA(ifp)->netsig,
7424 sizeof(IN6_IFEXTRA(ifp)->netsig));
7425 if_inet6data_lock_done(ifp);
7426 break;
7427 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
7428 error = EINVAL;
7429 if_inet6data_lock_done(ifp);
7430 break;
7431 }
7432 IN6_IFEXTRA(ifp)->netsig_len = len;
7433 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
7434 } else {
7435 error = ENOMEM;
7436 }
7437 if_inet6data_lock_done(ifp);
7438 break;
7439
7440 default:
7441 error = EINVAL;
7442 break;
7443 }
7444
7445 return error;
7446 }
7447
7448 int
ifnet_get_netsignature(struct ifnet * ifp,uint8_t family,uint8_t * len,uint16_t * flags,uint8_t * __sized_by (* len)data)7449 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
7450 uint16_t *flags, uint8_t *__sized_by(*len) data)
7451 {
7452 int error = 0;
7453
7454 if (ifp == NULL || len == NULL || data == NULL) {
7455 return EINVAL;
7456 }
7457
7458 switch (family) {
7459 case AF_INET:
7460 if_inetdata_lock_shared(ifp);
7461 if (IN_IFEXTRA(ifp) != NULL) {
7462 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
7463 error = EINVAL;
7464 if_inetdata_lock_done(ifp);
7465 break;
7466 }
7467 if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
7468 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
7469 } else {
7470 error = ENOENT;
7471 }
7472 } else {
7473 error = ENOMEM;
7474 }
7475 if_inetdata_lock_done(ifp);
7476 break;
7477
7478 case AF_INET6:
7479 if_inet6data_lock_shared(ifp);
7480 if (IN6_IFEXTRA(ifp) != NULL) {
7481 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
7482 error = EINVAL;
7483 if_inet6data_lock_done(ifp);
7484 break;
7485 }
7486 if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
7487 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
7488 } else {
7489 error = ENOENT;
7490 }
7491 } else {
7492 error = ENOMEM;
7493 }
7494 if_inet6data_lock_done(ifp);
7495 break;
7496
7497 default:
7498 error = EINVAL;
7499 break;
7500 }
7501
7502 if (error == 0 && flags != NULL) {
7503 *flags = 0;
7504 }
7505
7506 return error;
7507 }
7508
7509 int
ifnet_set_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7510 ifnet_set_nat64prefix(struct ifnet *ifp,
7511 struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7512 {
7513 int i, error = 0, one_set = 0;
7514
7515 if_inet6data_lock_exclusive(ifp);
7516
7517 if (IN6_IFEXTRA(ifp) == NULL) {
7518 error = ENOMEM;
7519 goto out;
7520 }
7521
7522 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7523 uint32_t prefix_len =
7524 prefixes[i].prefix_len;
7525 struct in6_addr *prefix =
7526 &prefixes[i].ipv6_prefix;
7527
7528 if (prefix_len == 0) {
7529 clat_log0((LOG_DEBUG,
7530 "NAT64 prefixes purged from Interface %s\n",
7531 if_name(ifp)));
7532 /* Allow clearing the signature */
7533 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
7534 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7535 sizeof(struct in6_addr));
7536
7537 continue;
7538 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
7539 prefix_len != NAT64_PREFIX_LEN_40 &&
7540 prefix_len != NAT64_PREFIX_LEN_48 &&
7541 prefix_len != NAT64_PREFIX_LEN_56 &&
7542 prefix_len != NAT64_PREFIX_LEN_64 &&
7543 prefix_len != NAT64_PREFIX_LEN_96) {
7544 clat_log0((LOG_DEBUG,
7545 "NAT64 prefixlen is incorrect %d\n", prefix_len));
7546 error = EINVAL;
7547 goto out;
7548 }
7549
7550 if (IN6_IS_SCOPE_EMBED(prefix)) {
7551 clat_log0((LOG_DEBUG,
7552 "NAT64 prefix has interface/link local scope.\n"));
7553 error = EINVAL;
7554 goto out;
7555 }
7556
7557 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
7558 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
7559 sizeof(struct in6_addr));
7560 clat_log0((LOG_DEBUG,
7561 "NAT64 prefix set to %s with prefixlen: %d\n",
7562 ip6_sprintf(prefix), prefix_len));
7563 one_set = 1;
7564 }
7565
7566 out:
7567 if_inet6data_lock_done(ifp);
7568
7569 if (error == 0 && one_set != 0) {
7570 necp_update_all_clients();
7571 }
7572
7573 return error;
7574 }
7575
7576 int
ifnet_get_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * __counted_by (NAT64_MAX_NUM_PREFIXES)prefixes)7577 ifnet_get_nat64prefix(struct ifnet *ifp,
7578 struct ipv6_prefix *__counted_by(NAT64_MAX_NUM_PREFIXES) prefixes)
7579 {
7580 int i, found_one = 0, error = 0;
7581
7582 if (ifp == NULL) {
7583 return EINVAL;
7584 }
7585
7586 if_inet6data_lock_shared(ifp);
7587
7588 if (IN6_IFEXTRA(ifp) == NULL) {
7589 error = ENOMEM;
7590 goto out;
7591 }
7592
7593 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
7594 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
7595 found_one = 1;
7596 }
7597 }
7598
7599 if (found_one == 0) {
7600 error = ENOENT;
7601 goto out;
7602 }
7603
7604 if (prefixes) {
7605 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
7606 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
7607 }
7608
7609 out:
7610 if_inet6data_lock_done(ifp);
7611
7612 return error;
7613 }
7614
7615 #if DEBUG || DEVELOPMENT
7616 /* Blob for sum16 verification */
7617 static uint8_t sumdata[] = {
7618 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
7619 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
7620 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
7621 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
7622 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
7623 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
7624 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
7625 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
7626 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
7627 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
7628 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
7629 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
7630 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
7631 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
7632 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
7633 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
7634 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
7635 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
7636 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
7637 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
7638 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
7639 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
7640 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
7641 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
7642 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
7643 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
7644 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
7645 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
7646 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
7647 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
7648 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
7649 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
7650 0xc8, 0x28, 0x02, 0x00, 0x00
7651 };
7652
7653 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
7654 static struct {
7655 boolean_t init;
7656 uint16_t len;
7657 uint16_t sumr; /* reference */
7658 uint16_t sumrp; /* reference, precomputed */
7659 } sumtbl[] = {
7660 { FALSE, 0, 0, 0x0000 },
7661 { FALSE, 1, 0, 0x001f },
7662 { FALSE, 2, 0, 0x8b1f },
7663 { FALSE, 3, 0, 0x8b27 },
7664 { FALSE, 7, 0, 0x790e },
7665 { FALSE, 11, 0, 0xcb6d },
7666 { FALSE, 20, 0, 0x20dd },
7667 { FALSE, 27, 0, 0xbabd },
7668 { FALSE, 32, 0, 0xf3e8 },
7669 { FALSE, 37, 0, 0x197d },
7670 { FALSE, 43, 0, 0x9eae },
7671 { FALSE, 64, 0, 0x4678 },
7672 { FALSE, 127, 0, 0x9399 },
7673 { FALSE, 256, 0, 0xd147 },
7674 { FALSE, 325, 0, 0x0358 },
7675 };
7676 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
7677
7678 static void
dlil_verify_sum16(void)7679 dlil_verify_sum16(void)
7680 {
7681 struct mbuf *m;
7682 uint8_t *buf;
7683 int n;
7684
7685 /* Make sure test data plus extra room for alignment fits in cluster */
7686 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
7687
7688 kprintf("DLIL: running SUM16 self-tests ... ");
7689
7690 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
7691 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
7692
7693 buf = mtod(m, uint8_t *); /* base address */
7694
7695 for (n = 0; n < SUMTBL_MAX; n++) {
7696 uint16_t len = sumtbl[n].len;
7697 int i;
7698
7699 /* Verify for all possible alignments */
7700 for (i = 0; i < (int)sizeof(uint64_t); i++) {
7701 uint16_t sum, sumr;
7702 uint8_t *c;
7703
7704 /* Copy over test data to mbuf */
7705 VERIFY(len <= sizeof(sumdata));
7706 c = buf + i;
7707 bcopy(sumdata, c, len);
7708
7709 /* Zero-offset test (align by data pointer) */
7710 m->m_data = (uintptr_t)c;
7711 m->m_len = len;
7712 sum = m_sum16(m, 0, len);
7713
7714 if (!sumtbl[n].init) {
7715 sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
7716 sumtbl[n].sumr = sumr;
7717 sumtbl[n].init = TRUE;
7718 } else {
7719 sumr = sumtbl[n].sumr;
7720 }
7721
7722 /* Something is horribly broken; stop now */
7723 if (sumr != sumtbl[n].sumrp) {
7724 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
7725 "for len=%d align=%d sum=0x%04x "
7726 "[expected=0x%04x]\n", __func__,
7727 len, i, sum, sumr);
7728 /* NOTREACHED */
7729 } else if (sum != sumr) {
7730 panic_plain("\n%s: broken m_sum16() for len=%d "
7731 "align=%d sum=0x%04x [expected=0x%04x]\n",
7732 __func__, len, i, sum, sumr);
7733 /* NOTREACHED */
7734 }
7735
7736 /* Alignment test by offset (fixed data pointer) */
7737 m->m_data = (uintptr_t)buf;
7738 m->m_len = i + len;
7739 sum = m_sum16(m, i, len);
7740
7741 /* Something is horribly broken; stop now */
7742 if (sum != sumr) {
7743 panic_plain("\n%s: broken m_sum16() for len=%d "
7744 "offset=%d sum=0x%04x [expected=0x%04x]\n",
7745 __func__, len, i, sum, sumr);
7746 /* NOTREACHED */
7747 }
7748 #if INET
7749 /* Simple sum16 contiguous buffer test by aligment */
7750 sum = b_sum16(c, len);
7751
7752 /* Something is horribly broken; stop now */
7753 if (sum != sumr) {
7754 panic_plain("\n%s: broken b_sum16() for len=%d "
7755 "align=%d sum=0x%04x [expected=0x%04x]\n",
7756 __func__, len, i, sum, sumr);
7757 /* NOTREACHED */
7758 }
7759 #endif /* INET */
7760 }
7761 }
7762 m_freem(m);
7763
7764 kprintf("PASSED\n");
7765 }
7766 #endif /* DEBUG || DEVELOPMENT */
7767
7768 #define CASE_STRINGIFY(x) case x: return #x
7769
7770 __private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)7771 dlil_kev_dl_code_str(u_int32_t event_code)
7772 {
7773 switch (event_code) {
7774 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
7775 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
7776 CASE_STRINGIFY(KEV_DL_SIFMTU);
7777 CASE_STRINGIFY(KEV_DL_SIFPHYS);
7778 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
7779 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
7780 CASE_STRINGIFY(KEV_DL_ADDMULTI);
7781 CASE_STRINGIFY(KEV_DL_DELMULTI);
7782 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
7783 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
7784 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
7785 CASE_STRINGIFY(KEV_DL_LINK_OFF);
7786 CASE_STRINGIFY(KEV_DL_LINK_ON);
7787 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
7788 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
7789 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
7790 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
7791 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
7792 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
7793 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
7794 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
7795 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
7796 CASE_STRINGIFY(KEV_DL_PRIMARY_ELECTED);
7797 CASE_STRINGIFY(KEV_DL_ISSUES);
7798 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
7799 default:
7800 break;
7801 }
7802 return "";
7803 }
7804
7805 void
dlil_dt_tcall_fn(thread_call_param_t arg0,thread_call_param_t arg1)7806 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
7807 {
7808 #pragma unused(arg1)
7809 ifnet_ref_t ifp = arg0;
7810
7811 if (ifnet_is_attached(ifp, 1)) {
7812 nstat_ifnet_threshold_reached(ifp->if_index);
7813 ifnet_decr_iorefcnt(ifp);
7814 }
7815 }
7816
7817 void
ifnet_notify_data_threshold(struct ifnet * ifp)7818 ifnet_notify_data_threshold(struct ifnet *ifp)
7819 {
7820 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
7821 uint64_t oldbytes = ifp->if_dt_bytes;
7822
7823 ASSERT(ifp->if_dt_tcall != NULL);
7824
7825 /*
7826 * If we went over the threshold, notify NetworkStatistics.
7827 * We rate-limit it based on the threshold interval value.
7828 */
7829 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
7830 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
7831 !thread_call_isactive(ifp->if_dt_tcall)) {
7832 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
7833 uint64_t now = mach_absolute_time(), deadline = now;
7834 uint64_t ival;
7835
7836 if (tival != 0) {
7837 nanoseconds_to_absolutetime(tival, &ival);
7838 clock_deadline_for_periodic_event(ival, now, &deadline);
7839 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
7840 deadline);
7841 } else {
7842 (void) thread_call_enter(ifp->if_dt_tcall);
7843 }
7844 }
7845 }
7846
7847
7848 void
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow * ifs,struct ifnet * ifp)7849 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
7850 struct ifnet *ifp)
7851 {
7852 tcp_update_stats_per_flow(ifs, ifp);
7853 }
7854
7855 static inline u_int32_t
_set_flags(u_int32_t * flags_p,u_int32_t set_flags)7856 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
7857 {
7858 return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
7859 }
7860
7861 static inline u_int32_t
_clear_flags(u_int32_t * flags_p,u_int32_t clear_flags)7862 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
7863 {
7864 return (u_int32_t)OSBitAndAtomic(~clear_flags, flags_p);
7865 }
7866
7867 __private_extern__ u_int32_t
if_set_eflags(ifnet_t interface,u_int32_t set_flags)7868 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
7869 {
7870 return _set_flags(&interface->if_eflags, set_flags);
7871 }
7872
7873 __private_extern__ void
if_clear_eflags(ifnet_t interface,u_int32_t clear_flags)7874 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
7875 {
7876 _clear_flags(&interface->if_eflags, clear_flags);
7877 }
7878
7879 __private_extern__ u_int32_t
if_set_xflags(ifnet_t interface,u_int32_t set_flags)7880 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
7881 {
7882 return _set_flags(&interface->if_xflags, set_flags);
7883 }
7884
7885 __private_extern__ u_int32_t
if_clear_xflags(ifnet_t interface,u_int32_t clear_flags)7886 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
7887 {
7888 return _clear_flags(&interface->if_xflags, clear_flags);
7889 }
7890
7891 __private_extern__ void
ifnet_update_traffic_rule_genid(ifnet_t ifp)7892 ifnet_update_traffic_rule_genid(ifnet_t ifp)
7893 {
7894 os_atomic_inc(&ifp->if_traffic_rule_genid, relaxed);
7895 }
7896
7897 __private_extern__ boolean_t
ifnet_sync_traffic_rule_genid(ifnet_t ifp,uint32_t * genid)7898 ifnet_sync_traffic_rule_genid(ifnet_t ifp, uint32_t *genid)
7899 {
7900 if (*genid != ifp->if_traffic_rule_genid) {
7901 *genid = ifp->if_traffic_rule_genid;
7902 return TRUE;
7903 }
7904 return FALSE;
7905 }
7906 __private_extern__ void
ifnet_update_traffic_rule_count(ifnet_t ifp,uint32_t count)7907 ifnet_update_traffic_rule_count(ifnet_t ifp, uint32_t count)
7908 {
7909 os_atomic_store(&ifp->if_traffic_rule_count, count, release);
7910 ifnet_update_traffic_rule_genid(ifp);
7911 }
7912
7913
7914 #if SKYWALK
7915 static bool
net_check_compatible_if_filter(struct ifnet * ifp)7916 net_check_compatible_if_filter(struct ifnet *ifp)
7917 {
7918 if (ifp == NULL) {
7919 if (net_api_stats.nas_iflt_attach_count > net_api_stats.nas_iflt_attach_os_count) {
7920 return false;
7921 }
7922 } else {
7923 if (ifp->if_flt_non_os_count > 0) {
7924 return false;
7925 }
7926 }
7927 return true;
7928 }
7929 #endif /* SKYWALK */
7930
7931 #define DUMP_BUF_CHK() { \
7932 clen -= k; \
7933 if (clen < 1) \
7934 goto done; \
7935 c += k; \
7936 }
7937
7938 int dlil_dump_top_if_qlen(char *__counted_by(str_len), int str_len);
7939 int
dlil_dump_top_if_qlen(char * __counted_by (str_len)str,int str_len)7940 dlil_dump_top_if_qlen(char *__counted_by(str_len) str, int str_len)
7941 {
7942 char *c = str;
7943 int k, clen = str_len;
7944 ifnet_ref_t top_ifcq_ifp = NULL;
7945 uint32_t top_ifcq_len = 0;
7946 ifnet_ref_t top_inq_ifp = NULL;
7947 uint32_t top_inq_len = 0;
7948
7949 for (int ifidx = 1; ifidx < if_index; ifidx++) {
7950 ifnet_ref_t ifp = ifindex2ifnet[ifidx];
7951 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7952
7953 if (ifp == NULL) {
7954 continue;
7955 }
7956 if (ifp->if_snd != NULL && ifp->if_snd->ifcq_len > top_ifcq_len) {
7957 top_ifcq_len = ifp->if_snd->ifcq_len;
7958 top_ifcq_ifp = ifp;
7959 }
7960 if (dl_if->dl_if_inpstorage.dlth_pkts.qlen > top_inq_len) {
7961 top_inq_len = dl_if->dl_if_inpstorage.dlth_pkts.qlen;
7962 top_inq_ifp = ifp;
7963 }
7964 }
7965
7966 if (top_ifcq_ifp != NULL) {
7967 k = scnprintf(c, clen, "\ntop ifcq_len %u packets by %s\n",
7968 top_ifcq_len, top_ifcq_ifp->if_xname);
7969 DUMP_BUF_CHK();
7970 }
7971 if (top_inq_ifp != NULL) {
7972 k = scnprintf(c, clen, "\ntop inq_len %u packets by %s\n",
7973 top_inq_len, top_inq_ifp->if_xname);
7974 DUMP_BUF_CHK();
7975 }
7976 done:
7977 return str_len - clen;
7978 }
7979