1 /*
2 * Copyright (c) 1999-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34 #include <stddef.h>
35 #include <ptrauth.h>
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/domain.h>
44 #include <sys/user.h>
45 #include <sys/random.h>
46 #include <sys/socketvar.h>
47 #include <net/if_dl.h>
48 #include <net/if.h>
49 #include <net/route.h>
50 #include <net/if_var.h>
51 #include <net/dlil.h>
52 #include <net/if_arp.h>
53 #include <net/iptap.h>
54 #include <net/pktap.h>
55 #include <net/nwk_wq.h>
56 #include <sys/kern_event.h>
57 #include <sys/kdebug.h>
58 #include <sys/mcache.h>
59 #include <sys/syslog.h>
60 #include <sys/protosw.h>
61 #include <sys/priv.h>
62
63 #include <kern/assert.h>
64 #include <kern/task.h>
65 #include <kern/thread.h>
66 #include <kern/sched_prim.h>
67 #include <kern/locks.h>
68 #include <kern/zalloc.h>
69
70 #include <net/kpi_protocol.h>
71 #include <net/if_types.h>
72 #include <net/if_ipsec.h>
73 #include <net/if_llreach.h>
74 #include <net/if_utun.h>
75 #include <net/kpi_interfacefilter.h>
76 #include <net/classq/classq.h>
77 #include <net/classq/classq_sfb.h>
78 #include <net/flowhash.h>
79 #include <net/ntstat.h>
80 #if defined(SKYWALK) && defined(XNU_TARGET_OS_OSX)
81 #include <skywalk/lib/net_filter_event.h>
82 #endif /* SKYWALK && XNU_TARGET_OS_OSX */
83 #include <net/if_llatbl.h>
84 #include <net/net_api_stats.h>
85 #include <net/if_ports_used.h>
86 #include <net/if_vlan_var.h>
87 #include <netinet/in.h>
88 #if INET
89 #include <netinet/in_var.h>
90 #include <netinet/igmp_var.h>
91 #include <netinet/ip_var.h>
92 #include <netinet/tcp.h>
93 #include <netinet/tcp_var.h>
94 #include <netinet/udp.h>
95 #include <netinet/udp_var.h>
96 #include <netinet/if_ether.h>
97 #include <netinet/in_pcb.h>
98 #include <netinet/in_tclass.h>
99 #include <netinet/ip.h>
100 #include <netinet/ip_icmp.h>
101 #include <netinet/icmp_var.h>
102 #endif /* INET */
103
104 #include <net/nat464_utils.h>
105 #include <netinet6/in6_var.h>
106 #include <netinet6/nd6.h>
107 #include <netinet6/mld6_var.h>
108 #include <netinet6/scope6_var.h>
109 #include <netinet/ip6.h>
110 #include <netinet/icmp6.h>
111 #include <net/pf_pbuf.h>
112 #include <libkern/OSAtomic.h>
113 #include <libkern/tree.h>
114
115 #include <dev/random/randomdev.h>
116 #include <machine/machine_routines.h>
117
118 #include <mach/thread_act.h>
119 #include <mach/sdt.h>
120
121 #if CONFIG_MACF
122 #include <sys/kauth.h>
123 #include <security/mac_framework.h>
124 #include <net/ethernet.h>
125 #include <net/firewire.h>
126 #endif
127
128 #if PF
129 #include <net/pfvar.h>
130 #endif /* PF */
131 #include <net/pktsched/pktsched.h>
132 #include <net/pktsched/pktsched_netem.h>
133
134 #if NECP
135 #include <net/necp.h>
136 #endif /* NECP */
137
138 #if SKYWALK
139 #include <skywalk/packet/packet_queue.h>
140 #include <skywalk/nexus/netif/nx_netif.h>
141 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
142 #endif /* SKYWALK */
143
144 #include <os/log.h>
145
146 #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
147 #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
148 #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
149 #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
150 #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
151
152 #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
153 #define MAX_LINKADDR 4 /* LONGWORDS */
154
155 #if 1
156 #define DLIL_PRINTF printf
157 #else
158 #define DLIL_PRINTF kprintf
159 #endif
160
161 #define IF_DATA_REQUIRE_ALIGNED_64(f) \
162 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
163
164 #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
165 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
166
167 enum {
168 kProtoKPI_v1 = 1,
169 kProtoKPI_v2 = 2
170 };
171
172 /*
173 * List of if_proto structures in if_proto_hash[] is protected by
174 * the ifnet lock. The rest of the fields are initialized at protocol
175 * attach time and never change, thus no lock required as long as
176 * a reference to it is valid, via if_proto_ref().
177 */
178 struct if_proto {
179 SLIST_ENTRY(if_proto) next_hash;
180 u_int32_t refcount;
181 u_int32_t detached;
182 struct ifnet *ifp;
183 protocol_family_t protocol_family;
184 int proto_kpi;
185 union {
186 struct {
187 proto_media_input input;
188 proto_media_preout pre_output;
189 proto_media_event event;
190 proto_media_ioctl ioctl;
191 proto_media_detached detached;
192 proto_media_resolve_multi resolve_multi;
193 proto_media_send_arp send_arp;
194 } v1;
195 struct {
196 proto_media_input_v2 input;
197 proto_media_preout pre_output;
198 proto_media_event event;
199 proto_media_ioctl ioctl;
200 proto_media_detached detached;
201 proto_media_resolve_multi resolve_multi;
202 proto_media_send_arp send_arp;
203 } v2;
204 } kpi;
205 };
206
207 SLIST_HEAD(proto_hash_entry, if_proto);
208
209 #define DLIL_SDLDATALEN \
210 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
211
212 struct dlil_ifnet {
213 struct ifnet dl_if; /* public ifnet */
214 /*
215 * DLIL private fields, protected by dl_if_lock
216 */
217 decl_lck_mtx_data(, dl_if_lock);
218 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
219 u_int32_t dl_if_flags; /* flags (below) */
220 u_int32_t dl_if_refcnt; /* refcnt */
221 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
222 void *dl_if_uniqueid; /* unique interface id */
223 size_t dl_if_uniqueid_len; /* length of the unique id */
224 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
225 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
226 struct {
227 struct ifaddr ifa; /* lladdr ifa */
228 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
229 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
230 } dl_if_lladdr;
231 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
232 u_int8_t dl_if_permanent_ether[ETHER_ADDR_LEN]; /* permanent address */
233 u_int8_t dl_if_permanent_ether_is_set;
234 u_int8_t dl_if_unused;
235 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
236 ctrace_t dl_if_attach; /* attach PC stacktrace */
237 ctrace_t dl_if_detach; /* detach PC stacktrace */
238 };
239
240 /* Values for dl_if_flags (private to DLIL) */
241 #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
242 #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
243 #define DLIF_DEBUG 0x4 /* has debugging info */
244
245 #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
246
247 /* For gdb */
248 __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
249
250 struct dlil_ifnet_dbg {
251 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
252 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
253 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
254 /*
255 * Circular lists of ifnet_{reference,release} callers.
256 */
257 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
258 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
259 };
260
261 #define DLIL_TO_IFP(s) (&s->dl_if)
262 #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
263
264 struct ifnet_filter {
265 TAILQ_ENTRY(ifnet_filter) filt_next;
266 u_int32_t filt_skip;
267 u_int32_t filt_flags;
268 ifnet_t filt_ifp;
269 const char *filt_name;
270 void *filt_cookie;
271 protocol_family_t filt_protocol;
272 iff_input_func filt_input;
273 iff_output_func filt_output;
274 iff_event_func filt_event;
275 iff_ioctl_func filt_ioctl;
276 iff_detached_func filt_detached;
277 };
278
279 struct proto_input_entry;
280
281 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
282
283 static LCK_ATTR_DECLARE(dlil_lck_attributes, 0, 0);
284
285 static LCK_GRP_DECLARE(dlil_lock_group, "DLIL internal locks");
286 LCK_GRP_DECLARE(ifnet_lock_group, "ifnet locks");
287 static LCK_GRP_DECLARE(ifnet_head_lock_group, "ifnet head lock");
288 static LCK_GRP_DECLARE(ifnet_snd_lock_group, "ifnet snd locks");
289 static LCK_GRP_DECLARE(ifnet_rcv_lock_group, "ifnet rcv locks");
290
291 LCK_ATTR_DECLARE(ifnet_lock_attr, 0, 0);
292 static LCK_RW_DECLARE_ATTR(ifnet_head_lock, &ifnet_head_lock_group,
293 &dlil_lck_attributes);
294 static LCK_MTX_DECLARE_ATTR(dlil_ifnet_lock, &dlil_lock_group,
295 &dlil_lck_attributes);
296
297 #if DEBUG
298 static unsigned int ifnet_debug = 1; /* debugging (enabled) */
299 #else
300 static unsigned int ifnet_debug; /* debugging (disabled) */
301 #endif /* !DEBUG */
302 static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
303 static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
304 static struct zone *dlif_zone; /* zone for dlil_ifnet */
305 #define DLIF_ZONE_NAME "ifnet" /* zone name */
306
307 static ZONE_DEFINE(dlif_filt_zone, "ifnet_filter",
308 sizeof(struct ifnet_filter), ZC_ZFREE_CLEARMEM);
309
310 static ZONE_DEFINE(dlif_phash_zone, "ifnet_proto_hash",
311 sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS, ZC_ZFREE_CLEARMEM);
312
313 static ZONE_DEFINE(dlif_proto_zone, "ifnet_proto",
314 sizeof(struct if_proto), ZC_ZFREE_CLEARMEM);
315
316 static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
317 static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
318 static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
319 #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
320
321 static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
322 static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
323 static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
324 #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
325
326 static u_int32_t net_rtref;
327
328 static struct dlil_main_threading_info dlil_main_input_thread_info;
329 __private_extern__ struct dlil_threading_info *dlil_main_input_thread =
330 (struct dlil_threading_info *)&dlil_main_input_thread_info;
331
332 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
333 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
334 static void dlil_if_trace(struct dlil_ifnet *, int);
335 static void if_proto_ref(struct if_proto *);
336 static void if_proto_free(struct if_proto *);
337 static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
338 static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
339 u_int32_t list_count);
340 static void _dlil_if_release(ifnet_t ifp, bool clear_in_use);
341 static void if_flt_monitor_busy(struct ifnet *);
342 static void if_flt_monitor_unbusy(struct ifnet *);
343 static void if_flt_monitor_enter(struct ifnet *);
344 static void if_flt_monitor_leave(struct ifnet *);
345 static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
346 char **, protocol_family_t);
347 static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
348 protocol_family_t);
349 static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
350 const struct sockaddr_dl *);
351 static int ifnet_lookup(struct ifnet *);
352 static void if_purgeaddrs(struct ifnet *);
353
354 static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
355 struct mbuf *, char *);
356 static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
357 struct mbuf *);
358 static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
359 mbuf_t *, const struct sockaddr *, void *, char *, char *);
360 static void ifproto_media_event(struct ifnet *, protocol_family_t,
361 const struct kev_msg *);
362 static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
363 unsigned long, void *);
364 static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
365 struct sockaddr_dl *, size_t);
366 static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
367 const struct sockaddr_dl *, const struct sockaddr *,
368 const struct sockaddr_dl *, const struct sockaddr *);
369
370 static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
371 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
372 boolean_t poll, struct thread *tp);
373 static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
374 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
375 static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
376 static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
377 protocol_family_t *);
378 static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
379 const struct ifnet_demux_desc *, u_int32_t);
380 static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
381 static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
382 #if !XNU_TARGET_OS_OSX
383 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
384 const struct sockaddr *, const char *, const char *,
385 u_int32_t *, u_int32_t *);
386 #else /* XNU_TARGET_OS_OSX */
387 static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
388 const struct sockaddr *, const char *, const char *);
389 #endif /* XNU_TARGET_OS_OSX */
390 static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
391 const struct sockaddr *, const char *, const char *,
392 u_int32_t *, u_int32_t *);
393 static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
394 static void ifp_if_free(struct ifnet *);
395 static void ifp_if_event(struct ifnet *, const struct kev_msg *);
396 static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
397 static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
398
399 static errno_t dlil_input_async(struct dlil_threading_info *, struct ifnet *,
400 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
401 boolean_t, struct thread *);
402 static errno_t dlil_input_sync(struct dlil_threading_info *, struct ifnet *,
403 struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *,
404 boolean_t, struct thread *);
405
406 static void dlil_main_input_thread_func(void *, wait_result_t);
407 static void dlil_main_input_thread_cont(void *, wait_result_t);
408
409 static void dlil_input_thread_func(void *, wait_result_t);
410 static void dlil_input_thread_cont(void *, wait_result_t);
411
412 static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
413 static void dlil_rxpoll_input_thread_cont(void *, wait_result_t);
414
415 static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *,
416 thread_continue_t *);
417 static void dlil_terminate_input_thread(struct dlil_threading_info *);
418 static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
419 struct dlil_threading_info *, struct ifnet *, boolean_t);
420 static boolean_t dlil_input_stats_sync(struct ifnet *,
421 struct dlil_threading_info *);
422 static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
423 u_int32_t, ifnet_model_t, boolean_t);
424 static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
425 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
426 static int dlil_is_clat_needed(protocol_family_t, mbuf_t );
427 static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
428 static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
429 #if DEBUG || DEVELOPMENT
430 static void dlil_verify_sum16(void);
431 #endif /* DEBUG || DEVELOPMENT */
432 static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
433 protocol_family_t);
434 static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
435 protocol_family_t);
436
437 static void dlil_incr_pending_thread_count(void);
438 static void dlil_decr_pending_thread_count(void);
439
440 static void ifnet_detacher_thread_func(void *, wait_result_t);
441 static void ifnet_detacher_thread_cont(void *, wait_result_t);
442 static void ifnet_detach_final(struct ifnet *);
443 static void ifnet_detaching_enqueue(struct ifnet *);
444 static struct ifnet *ifnet_detaching_dequeue(void);
445
446 static void ifnet_start_thread_func(void *, wait_result_t);
447 static void ifnet_start_thread_cont(void *, wait_result_t);
448
449 static void ifnet_poll_thread_func(void *, wait_result_t);
450 static void ifnet_poll_thread_cont(void *, wait_result_t);
451
452 static errno_t ifnet_enqueue_common(struct ifnet *, struct ifclassq *,
453 classq_pkt_t *, boolean_t, boolean_t *);
454
455 static void ifp_src_route_copyout(struct ifnet *, struct route *);
456 static void ifp_src_route_copyin(struct ifnet *, struct route *);
457 static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
458 static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
459
460 static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
461 static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
462 static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
463 static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
464 static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
465 static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
466 static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
467 static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
468 static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
469 static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
470 static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
471
472 struct chain_len_stats tx_chain_len_stats;
473 static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
474
475 #if TEST_INPUT_THREAD_TERMINATION
476 static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
477 #endif /* TEST_INPUT_THREAD_TERMINATION */
478
479 /* The following are protected by dlil_ifnet_lock */
480 static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
481 static u_int32_t ifnet_detaching_cnt;
482 static boolean_t ifnet_detaching_embryonic;
483 static void *ifnet_delayed_run; /* wait channel for detaching thread */
484
485 static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group,
486 &dlil_lck_attributes);
487
488 static uint32_t ifnet_flowhash_seed;
489
490 struct ifnet_flowhash_key {
491 char ifk_name[IFNAMSIZ];
492 uint32_t ifk_unit;
493 uint32_t ifk_flags;
494 uint32_t ifk_eflags;
495 uint32_t ifk_capabilities;
496 uint32_t ifk_capenable;
497 uint32_t ifk_output_sched_model;
498 uint32_t ifk_rand1;
499 uint32_t ifk_rand2;
500 };
501
502 /* Flow control entry per interface */
503 struct ifnet_fc_entry {
504 RB_ENTRY(ifnet_fc_entry) ifce_entry;
505 u_int32_t ifce_flowhash;
506 struct ifnet *ifce_ifp;
507 };
508
509 static uint32_t ifnet_calc_flowhash(struct ifnet *);
510 static int ifce_cmp(const struct ifnet_fc_entry *,
511 const struct ifnet_fc_entry *);
512 static int ifnet_fc_add(struct ifnet *);
513 static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
514 static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
515
516 /* protected by ifnet_fc_lock */
517 RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
518 RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
519 RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
520
521 static ZONE_DEFINE(ifnet_fc_zone, "ifnet_fc_zone",
522 sizeof(struct ifnet_fc_entry), ZC_ZFREE_CLEARMEM);
523
524 extern void bpfdetach(struct ifnet *);
525 extern void proto_input_run(void);
526
527 extern uint32_t udp_count_opportunistic(unsigned int ifindex,
528 u_int32_t flags);
529 extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
530 u_int32_t flags);
531
532 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
533
534 #if CONFIG_MACF
535 #if !XNU_TARGET_OS_OSX
536 int dlil_lladdr_ckreq = 1;
537 #else /* XNU_TARGET_OS_OSX */
538 int dlil_lladdr_ckreq = 0;
539 #endif /* XNU_TARGET_OS_OSX */
540 #endif /* CONFIG_MACF */
541
542 #if DEBUG
543 int dlil_verbose = 1;
544 #else
545 int dlil_verbose = 0;
546 #endif /* DEBUG */
547 #if IFNET_INPUT_SANITY_CHK
548 /* sanity checking of input packet lists received */
549 static u_int32_t dlil_input_sanity_check = 0;
550 #endif /* IFNET_INPUT_SANITY_CHK */
551 /* rate limit debug messages */
552 struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
553
554 SYSCTL_DECL(_net_link_generic_system);
555
556 SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
557 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
558
559 #define IF_SNDQ_MINLEN 32
560 u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
561 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
562 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
563 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
564
565 #define IF_RCVQ_MINLEN 32
566 #define IF_RCVQ_MAXLEN 256
567 u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
568 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
569 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
570 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
571
572 #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
573 u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
574 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
575 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
576 "ilog2 of EWMA decay rate of avg inbound packets");
577
578 #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
579 #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
580 static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
581 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
582 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
583 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
584 "Q", "input poll mode freeze time");
585
586 #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
587 #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
588 static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
589 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
590 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
591 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
592 "Q", "input poll sampling time");
593
594 static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
595 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
596 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
597 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
598 "Q", "input poll interval (time)");
599
600 #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
601 u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
602 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
603 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
604 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
605
606 #define IF_RXPOLL_WLOWAT 10
607 static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
608 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
609 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat,
610 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
611 "I", "input poll wakeup low watermark");
612
613 #define IF_RXPOLL_WHIWAT 100
614 static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
615 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
616 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat,
617 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
618 "I", "input poll wakeup high watermark");
619
620 static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
621 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
622 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
623 "max packets per poll call");
624
625 u_int32_t if_rxpoll = 1;
626 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
627 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
628 sysctl_rxpoll, "I", "enable opportunistic input polling");
629
630 #if TEST_INPUT_THREAD_TERMINATION
631 static u_int32_t if_input_thread_termination_spin = 0;
632 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
633 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
634 &if_input_thread_termination_spin, 0,
635 sysctl_input_thread_termination_spin,
636 "I", "input thread termination spin limit");
637 #endif /* TEST_INPUT_THREAD_TERMINATION */
638
639 static u_int32_t cur_dlil_input_threads = 0;
640 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
641 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
642 "Current number of DLIL input threads");
643
644 #if IFNET_INPUT_SANITY_CHK
645 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
647 "Turn on sanity checking in DLIL input");
648 #endif /* IFNET_INPUT_SANITY_CHK */
649
650 static u_int32_t if_flowadv = 1;
651 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
652 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
653 "enable flow-advisory mechanism");
654
655 static u_int32_t if_delaybased_queue = 1;
656 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
657 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
658 "enable delay based dynamic queue sizing");
659
660 static uint64_t hwcksum_in_invalidated = 0;
661 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
662 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
663 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
664
665 uint32_t hwcksum_dbg = 0;
666 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
667 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
668 "enable hardware cksum debugging");
669
670 u_int32_t ifnet_start_delayed = 0;
671 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
672 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
673 "number of times start was delayed");
674
675 u_int32_t ifnet_delay_start_disabled = 0;
676 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
677 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
678 "number of times start was delayed");
679
680 static inline void
ifnet_delay_start_disabled_increment(void)681 ifnet_delay_start_disabled_increment(void)
682 {
683 OSIncrementAtomic(&ifnet_delay_start_disabled);
684 }
685
686 #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
687 #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
688 #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
689 #define HWCKSUM_DBG_MASK \
690 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
691 HWCKSUM_DBG_FINALIZE_FORCED)
692
693 static uint32_t hwcksum_dbg_mode = 0;
694 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
695 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
696 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
697
698 static uint64_t hwcksum_dbg_partial_forced = 0;
699 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
700 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
701 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
702
703 static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
704 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
705 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
706 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
707
708 static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
709 SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
710 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
711 &hwcksum_dbg_partial_rxoff_forced, 0,
712 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
713 "forced partial cksum rx offset");
714
715 static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
716 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
717 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
718 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
719 "adjusted partial cksum rx offset");
720
721 static uint64_t hwcksum_dbg_verified = 0;
722 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
723 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
724 &hwcksum_dbg_verified, "packets verified for having good checksum");
725
726 static uint64_t hwcksum_dbg_bad_cksum = 0;
727 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
728 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
729 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
730
731 static uint64_t hwcksum_dbg_bad_rxoff = 0;
732 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
733 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
734 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
735
736 static uint64_t hwcksum_dbg_adjusted = 0;
737 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
738 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
739 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
740
741 static uint64_t hwcksum_dbg_finalized_hdr = 0;
742 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
743 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
744 &hwcksum_dbg_finalized_hdr, "finalized headers");
745
746 static uint64_t hwcksum_dbg_finalized_data = 0;
747 SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
748 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
749 &hwcksum_dbg_finalized_data, "finalized payloads");
750
751 uint32_t hwcksum_tx = 1;
752 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
753 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
754 "enable transmit hardware checksum offload");
755
756 uint32_t hwcksum_rx = 1;
757 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
758 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
759 "enable receive hardware checksum offload");
760
761 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
762 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
763 sysctl_tx_chain_len_stats, "S", "");
764
765 uint32_t tx_chain_len_count = 0;
766 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
767 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
768
769 static uint32_t threshold_notify = 1; /* enable/disable */
770 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
771 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
772
773 static uint32_t threshold_interval = 2; /* in seconds */
774 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
775 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
776
777 #if (DEVELOPMENT || DEBUG)
778 static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
779 SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
780 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
781 #endif /* DEVELOPMENT || DEBUG */
782
783 struct net_api_stats net_api_stats;
784 SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED,
785 &net_api_stats, net_api_stats, "");
786
787 uint32_t net_wake_pkt_debug = 0;
788 SYSCTL_UINT(_net_link_generic_system, OID_AUTO, wake_pkt_debug,
789 CTLFLAG_RW | CTLFLAG_LOCKED, &net_wake_pkt_debug, 0, "");
790
791 static void log_hexdump(void *data, size_t len);
792
793 unsigned int net_rxpoll = 1;
794 unsigned int net_affinity = 1;
795 unsigned int net_async = 1; /* 0: synchronous, 1: asynchronous */
796
797 static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
798
799 extern u_int32_t inject_buckets;
800
801 /* DLIL data threshold thread call */
802 static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
803
804 void
ifnet_filter_update_tso(struct ifnet * ifp,boolean_t filter_enable)805 ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable)
806 {
807 /*
808 * update filter count and route_generation ID to let TCP
809 * know it should reevalute doing TSO or not
810 */
811 if (filter_enable) {
812 OSAddAtomic(1, &ifp->if_flt_no_tso_count);
813 } else {
814 VERIFY(ifp->if_flt_no_tso_count != 0);
815 OSAddAtomic(-1, &ifp->if_flt_no_tso_count);
816 }
817 routegenid_update();
818 }
819
820 #if SKYWALK
821
822 #if defined(XNU_TARGET_OS_OSX)
823 static bool net_check_compatible_if_filter(struct ifnet *ifp);
824 #endif /* XNU_TARGET_OS_OSX */
825
826 /* if_attach_nx flags defined in os_skywalk_private.h */
827 static unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT;
828 unsigned int if_enable_fsw_ip_netagent =
829 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
830 unsigned int if_enable_fsw_transport_netagent =
831 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
832
833 unsigned int if_netif_all =
834 ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0);
835
836 /* Configure flowswitch to use max mtu sized buffer */
837 static bool fsw_use_max_mtu_buffer = false;
838
839 #if (DEVELOPMENT || DEBUG)
840 static int
841 if_attach_nx_sysctl SYSCTL_HANDLER_ARGS
842 {
843 #pragma unused(oidp, arg1, arg2)
844 unsigned int new_value;
845 int changed;
846 int error = sysctl_io_number(req, if_attach_nx, sizeof(if_attach_nx),
847 &new_value, &changed);
848 if (error) {
849 return error;
850 }
851 if (changed) {
852 if ((new_value & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) !=
853 (if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT)) {
854 return ENOTSUP;
855 }
856 if_attach_nx = new_value;
857 }
858 return 0;
859 }
860
861 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, if_attach_nx,
862 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
863 0, 0, &if_attach_nx_sysctl, "IU", "attach nexus");
864
865 #endif /* DEVELOPMENT || DEBUG */
866
867 static int
868 if_enable_fsw_transport_netagent_sysctl SYSCTL_HANDLER_ARGS
869 {
870 #pragma unused(oidp, arg1, arg2)
871 unsigned int new_value;
872 int changed;
873 int error;
874
875 error = sysctl_io_number(req, if_enable_fsw_transport_netagent,
876 sizeof(if_enable_fsw_transport_netagent),
877 &new_value, &changed);
878 if (error == 0 && changed != 0) {
879 if (new_value != 0 && new_value != 1) {
880 /* only allow 0 or 1 */
881 error = EINVAL;
882 } else if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
883 /* netagent can be enabled/disabled */
884 if_enable_fsw_transport_netagent = new_value;
885 if (new_value == 0) {
886 kern_nexus_deregister_netagents();
887 } else {
888 kern_nexus_register_netagents();
889 }
890 } else {
891 /* netagent can't be enabled */
892 error = ENOTSUP;
893 }
894 }
895 return error;
896 }
897
898 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, enable_netagent,
899 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
900 0, 0, &if_enable_fsw_transport_netagent_sysctl, "IU",
901 "enable flowswitch netagent");
902
903 static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw);
904
905 #include <skywalk/os_skywalk_private.h>
906
907 boolean_t
ifnet_nx_noauto(ifnet_t ifp)908 ifnet_nx_noauto(ifnet_t ifp)
909 {
910 return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0;
911 }
912
913 boolean_t
ifnet_nx_noauto_flowswitch(ifnet_t ifp)914 ifnet_nx_noauto_flowswitch(ifnet_t ifp)
915 {
916 return ifnet_is_low_latency(ifp);
917 }
918
919 boolean_t
ifnet_is_low_latency(ifnet_t ifp)920 ifnet_is_low_latency(ifnet_t ifp)
921 {
922 return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0;
923 }
924
925 boolean_t
ifnet_needs_compat(ifnet_t ifp)926 ifnet_needs_compat(ifnet_t ifp)
927 {
928 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
929 return FALSE;
930 }
931 #if !XNU_TARGET_OS_OSX
932 /*
933 * To conserve memory, we plumb in the compat layer selectively; this
934 * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL.
935 * In particular, we check for Wi-Fi Access Point.
936 */
937 if (IFNET_IS_WIFI(ifp)) {
938 /* Wi-Fi Access Point */
939 if (ifp->if_name[0] == 'a' && ifp->if_name[1] == 'p' &&
940 ifp->if_name[2] == '\0') {
941 return if_netif_all;
942 }
943 }
944 #else /* XNU_TARGET_OS_OSX */
945 #pragma unused(ifp)
946 #endif /* XNU_TARGET_OS_OSX */
947 return TRUE;
948 }
949
950 boolean_t
ifnet_needs_fsw_transport_netagent(ifnet_t ifp)951 ifnet_needs_fsw_transport_netagent(ifnet_t ifp)
952 {
953 if (if_is_fsw_transport_netagent_enabled()) {
954 /* check if netagent has been manually enabled for ipsec/utun */
955 if (ifp->if_family == IFNET_FAMILY_IPSEC) {
956 return ipsec_interface_needs_netagent(ifp);
957 } else if (ifp->if_family == IFNET_FAMILY_UTUN) {
958 return utun_interface_needs_netagent(ifp);
959 }
960
961 /* check ifnet no auto nexus override */
962 if (ifnet_nx_noauto(ifp)) {
963 return FALSE;
964 }
965
966 /* check global if_attach_nx configuration */
967 switch (ifp->if_family) {
968 case IFNET_FAMILY_CELLULAR:
969 case IFNET_FAMILY_ETHERNET:
970 if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) {
971 return TRUE;
972 }
973 break;
974 default:
975 break;
976 }
977 }
978 return FALSE;
979 }
980
981 boolean_t
ifnet_needs_fsw_ip_netagent(ifnet_t ifp)982 ifnet_needs_fsw_ip_netagent(ifnet_t ifp)
983 {
984 #pragma unused(ifp)
985 if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) {
986 return TRUE;
987 }
988 return FALSE;
989 }
990
991 boolean_t
ifnet_needs_netif_netagent(ifnet_t ifp)992 ifnet_needs_netif_netagent(ifnet_t ifp)
993 {
994 #pragma unused(ifp)
995 return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0;
996 }
997
998 static boolean_t
dlil_detach_nexus_instance(nexus_controller_t controller,const char * func_str,uuid_t instance,uuid_t device)999 dlil_detach_nexus_instance(nexus_controller_t controller,
1000 const char *func_str, uuid_t instance, uuid_t device)
1001 {
1002 errno_t err;
1003
1004 if (instance == NULL || uuid_is_null(instance)) {
1005 return FALSE;
1006 }
1007
1008 /* followed by the device port */
1009 if (device != NULL && !uuid_is_null(device)) {
1010 err = kern_nexus_ifdetach(controller, instance, device);
1011 if (err != 0) {
1012 DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n",
1013 func_str, err);
1014 }
1015 }
1016 err = kern_nexus_controller_free_provider_instance(controller,
1017 instance);
1018 if (err != 0) {
1019 DLIL_PRINTF("%s free_provider_instance failed %d\n",
1020 func_str, err);
1021 }
1022 return TRUE;
1023 }
1024
1025 static boolean_t
dlil_detach_nexus(const char * func_str,uuid_t provider,uuid_t instance,uuid_t device)1026 dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance,
1027 uuid_t device)
1028 {
1029 boolean_t detached = FALSE;
1030 nexus_controller_t controller = kern_nexus_shared_controller();
1031 int err;
1032
1033 if (dlil_detach_nexus_instance(controller, func_str, instance,
1034 device)) {
1035 detached = TRUE;
1036 }
1037 if (provider != NULL && !uuid_is_null(provider)) {
1038 detached = TRUE;
1039 err = kern_nexus_controller_deregister_provider(controller,
1040 provider);
1041 if (err != 0) {
1042 DLIL_PRINTF("%s deregister_provider %d\n",
1043 func_str, err);
1044 }
1045 }
1046 return detached;
1047 }
1048
1049 static errno_t
dlil_create_provider_and_instance(nexus_controller_t controller,nexus_type_t type,ifnet_t ifp,uuid_t * provider,uuid_t * instance,nexus_attr_t attr)1050 dlil_create_provider_and_instance(nexus_controller_t controller,
1051 nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance,
1052 nexus_attr_t attr)
1053 {
1054 uuid_t dom_prov;
1055 errno_t err;
1056 nexus_name_t provider_name;
1057 const char *type_name =
1058 (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch";
1059 struct kern_nexus_init init;
1060
1061 err = kern_nexus_get_default_domain_provider(type, &dom_prov);
1062 if (err != 0) {
1063 DLIL_PRINTF("%s can't get %s provider, error %d\n",
1064 __func__, type_name, err);
1065 goto failed;
1066 }
1067
1068 snprintf((char *)provider_name, sizeof(provider_name),
1069 "com.apple.%s.%s", type_name, if_name(ifp));
1070 err = kern_nexus_controller_register_provider(controller,
1071 dom_prov,
1072 provider_name,
1073 NULL,
1074 0,
1075 attr,
1076 provider);
1077 if (err != 0) {
1078 DLIL_PRINTF("%s register %s provider failed, error %d\n",
1079 __func__, type_name, err);
1080 goto failed;
1081 }
1082 bzero(&init, sizeof(init));
1083 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1084 err = kern_nexus_controller_alloc_provider_instance(controller,
1085 *provider,
1086 NULL, NULL,
1087 instance, &init);
1088 if (err != 0) {
1089 DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n",
1090 __func__, type_name, err);
1091 kern_nexus_controller_deregister_provider(controller,
1092 *provider);
1093 goto failed;
1094 }
1095 failed:
1096 return err;
1097 }
1098
1099 static boolean_t
dlil_attach_netif_nexus_common(ifnet_t ifp,if_nexus_netif_t netif_nx)1100 dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx)
1101 {
1102 nexus_attr_t attr = NULL;
1103 nexus_controller_t controller;
1104 errno_t err;
1105
1106 if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) {
1107 /* it's already attached */
1108 if (dlil_verbose) {
1109 DLIL_PRINTF("%s: %s already has nexus attached\n",
1110 __func__, if_name(ifp));
1111 /* already attached */
1112 }
1113 goto failed;
1114 }
1115
1116 err = kern_nexus_attr_create(&attr);
1117 if (err != 0) {
1118 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
1119 if_name(ifp));
1120 goto failed;
1121 }
1122 err = kern_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, ifp->if_index);
1123 VERIFY(err == 0);
1124
1125 controller = kern_nexus_shared_controller();
1126
1127 /* create the netif provider and instance */
1128 err = dlil_create_provider_and_instance(controller,
1129 NEXUS_TYPE_NET_IF, ifp, &netif_nx->if_nif_provider,
1130 &netif_nx->if_nif_instance, attr);
1131 if (err != 0) {
1132 goto failed;
1133 }
1134 err = kern_nexus_ifattach(controller, netif_nx->if_nif_instance,
1135 ifp, NULL, FALSE, &netif_nx->if_nif_attach);
1136 if (err != 0) {
1137 DLIL_PRINTF("%s kern_nexus_ifattach %d\n",
1138 __func__, err);
1139 /* cleanup provider and instance */
1140 dlil_detach_nexus(__func__, netif_nx->if_nif_provider,
1141 netif_nx->if_nif_instance, NULL);
1142 goto failed;
1143 }
1144 return TRUE;
1145
1146 failed:
1147 if (attr != NULL) {
1148 kern_nexus_attr_destroy(attr);
1149 }
1150 return FALSE;
1151 }
1152
1153 static boolean_t
dlil_attach_netif_compat_nexus(ifnet_t ifp,if_nexus_netif_t netif_nx)1154 dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx)
1155 {
1156 if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) ||
1157 IFNET_IS_VMNET(ifp)) {
1158 goto failed;
1159 }
1160 switch (ifp->if_type) {
1161 case IFT_CELLULAR:
1162 case IFT_ETHER:
1163 if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) {
1164 /* don't auto-attach */
1165 goto failed;
1166 }
1167 break;
1168 default:
1169 /* don't auto-attach */
1170 goto failed;
1171 }
1172 return dlil_attach_netif_nexus_common(ifp, netif_nx);
1173
1174 failed:
1175 return FALSE;
1176 }
1177
1178 static boolean_t
dlil_is_native_netif_nexus(ifnet_t ifp)1179 dlil_is_native_netif_nexus(ifnet_t ifp)
1180 {
1181 return (ifp->if_eflags & IFEF_SKYWALK_NATIVE) && ifp->if_na != NULL;
1182 }
1183
1184 __attribute__((noinline))
1185 static void
dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)1186 dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif)
1187 {
1188 dlil_detach_nexus(__func__, nexus_netif->if_nif_provider,
1189 nexus_netif->if_nif_instance, nexus_netif->if_nif_attach);
1190 }
1191
1192 static inline int
dlil_siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)1193 dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
1194 {
1195 struct ifreq ifr;
1196 int error;
1197
1198 bzero(&ifr, sizeof(ifr));
1199 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
1200 if (error == 0) {
1201 *ifdm_p = ifr.ifr_devmtu;
1202 }
1203 return error;
1204 }
1205
1206 static inline int
_dlil_get_flowswitch_buffer_size(ifnet_t ifp,uuid_t netif,uint64_t * buf_size,bool * use_multi_buflet)1207 _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint64_t *buf_size,
1208 bool *use_multi_buflet)
1209 {
1210 struct kern_pbufpool_memory_info rx_pp_info;
1211 struct kern_pbufpool_memory_info tx_pp_info;
1212 uint32_t if_max_mtu = 0;
1213 uint32_t drv_buf_size;
1214 struct ifdevmtu ifdm;
1215 int err;
1216
1217 /*
1218 * To perform intra-stack RX aggregation flowswitch needs to use
1219 * multi-buflet packet.
1220 */
1221 *use_multi_buflet = (sk_fsw_rx_agg_tcp != 0);
1222
1223 /*
1224 * IP over Thunderbolt interface can deliver the largest IP packet,
1225 * but the driver advertises the MAX MTU as only 9K.
1226 */
1227 if (IFNET_IS_THUNDERBOLT_IP(ifp)) {
1228 if_max_mtu = IP_MAXPACKET;
1229 goto skip_mtu_ioctl;
1230 }
1231
1232 /* determine max mtu */
1233 bzero(&ifdm, sizeof(ifdm));
1234 err = dlil_siocgifdevmtu(ifp, &ifdm);
1235 if (__improbable(err != 0)) {
1236 DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n",
1237 __func__, if_name(ifp));
1238 /* use default flowswitch buffer size */
1239 if_max_mtu = NX_FSW_BUFSIZE;
1240 } else {
1241 DLIL_PRINTF("%s: %s %d %d\n", __func__, if_name(ifp),
1242 ifdm.ifdm_max, ifdm.ifdm_current);
1243 /* rdar://problem/44589731 */
1244 if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current);
1245 }
1246
1247 skip_mtu_ioctl:
1248 if (if_max_mtu == 0) {
1249 DLIL_PRINTF("%s: can't determine MAX MTU for %s\n",
1250 __func__, if_name(ifp));
1251 return EINVAL;
1252 }
1253 if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) {
1254 DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch "
1255 "max bufsize(%d)\n", __func__,
1256 if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE);
1257 return EINVAL;
1258 }
1259
1260 /*
1261 * for skywalk native driver, consult the driver packet pool also.
1262 */
1263 if (dlil_is_native_netif_nexus(ifp)) {
1264 err = kern_nexus_get_pbufpool_info(netif, &rx_pp_info,
1265 &tx_pp_info);
1266 if (err != 0) {
1267 DLIL_PRINTF("%s: can't get pbufpool info for %s\n",
1268 __func__, if_name(ifp));
1269 return ENXIO;
1270 }
1271 drv_buf_size = tx_pp_info.kpm_bufsize *
1272 tx_pp_info.kpm_max_frags;
1273 if (if_max_mtu > drv_buf_size) {
1274 DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, "
1275 "tx %d * %d) can't support max mtu(%d)\n", __func__,
1276 if_name(ifp), rx_pp_info.kpm_bufsize,
1277 rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize,
1278 tx_pp_info.kpm_max_frags, if_max_mtu);
1279 return EINVAL;
1280 }
1281 } else {
1282 drv_buf_size = if_max_mtu;
1283 }
1284
1285 if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) {
1286 _CASSERT((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET);
1287 *use_multi_buflet = true;
1288 /* default flowswitch buffer size */
1289 *buf_size = NX_FSW_BUFSIZE;
1290 } else {
1291 *buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE);
1292 }
1293 return 0;
1294 }
1295
1296 static boolean_t
_dlil_attach_flowswitch_nexus(ifnet_t ifp,if_nexus_flowswitch_t nexus_fsw)1297 _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw)
1298 {
1299 nexus_attr_t attr = NULL;
1300 nexus_controller_t controller;
1301 errno_t err = 0;
1302 uuid_t netif;
1303 uint64_t buf_size = 0;
1304 bool multi_buflet;
1305
1306 if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) ||
1307 IFNET_IS_VMNET(ifp)) {
1308 goto failed;
1309 }
1310
1311 if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) {
1312 /* not possible to attach (netif native/compat not plumbed) */
1313 goto failed;
1314 }
1315
1316 if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) {
1317 /* don't auto-attach */
1318 goto failed;
1319 }
1320
1321 /* get the netif instance from the ifp */
1322 err = kern_nexus_get_netif_instance(ifp, netif);
1323 if (err != 0) {
1324 DLIL_PRINTF("%s: can't find netif for %s\n", __func__,
1325 if_name(ifp));
1326 goto failed;
1327 }
1328
1329 err = kern_nexus_attr_create(&attr);
1330 if (err != 0) {
1331 DLIL_PRINTF("%s: nexus attr create for %s\n", __func__,
1332 if_name(ifp));
1333 goto failed;
1334 }
1335
1336 err = _dlil_get_flowswitch_buffer_size(ifp, netif, &buf_size,
1337 &multi_buflet);
1338 if (err != 0) {
1339 goto failed;
1340 }
1341 ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE));
1342
1343 /* Configure flowswitch buffer size */
1344 err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, buf_size);
1345 VERIFY(err == 0);
1346
1347 /*
1348 * Configure flowswitch to use super-packet (multi-buflet).
1349 */
1350 err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
1351 multi_buflet ? NX_PBUF_FRAGS_MAX : 1);
1352 VERIFY(err == 0);
1353
1354 /* create the flowswitch provider and instance */
1355 controller = kern_nexus_shared_controller();
1356 err = dlil_create_provider_and_instance(controller,
1357 NEXUS_TYPE_FLOW_SWITCH, ifp, &nexus_fsw->if_fsw_provider,
1358 &nexus_fsw->if_fsw_instance, attr);
1359 if (err != 0) {
1360 goto failed;
1361 }
1362
1363 /* attach the device port */
1364 err = kern_nexus_ifattach(controller, nexus_fsw->if_fsw_instance,
1365 NULL, netif, FALSE, &nexus_fsw->if_fsw_device);
1366 if (err != 0) {
1367 DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n",
1368 __func__, err, if_name(ifp));
1369 /* cleanup provider and instance */
1370 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
1371 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
1372 goto failed;
1373 }
1374 return TRUE;
1375
1376 failed:
1377 if (err != 0) {
1378 DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n",
1379 __func__, if_name(ifp), err);
1380 } else {
1381 DLIL_PRINTF("%s: not attaching flowswitch to %s\n",
1382 __func__, if_name(ifp));
1383 }
1384 if (attr != NULL) {
1385 kern_nexus_attr_destroy(attr);
1386 }
1387 return FALSE;
1388 }
1389
1390 static boolean_t
dlil_attach_flowswitch_nexus(ifnet_t ifp)1391 dlil_attach_flowswitch_nexus(ifnet_t ifp)
1392 {
1393 boolean_t attached;
1394 if_nexus_flowswitch nexus_fsw;
1395
1396 #if (DEVELOPMENT || DEBUG)
1397 if (skywalk_netif_direct_allowed(if_name(ifp))) {
1398 DLIL_PRINTF("skip attaching fsw to %s", if_name(ifp));
1399 return FALSE;
1400 }
1401 #endif /* (DEVELOPMENT || DEBUG) */
1402
1403 /*
1404 * flowswitch attachment is not supported for interface using the
1405 * legacy model (IFNET_INIT_LEGACY)
1406 */
1407 if ((ifp->if_eflags & IFEF_TXSTART) == 0) {
1408 DLIL_PRINTF("skip attaching fsw to %s using legacy TX model",
1409 if_name(ifp));
1410 return FALSE;
1411 }
1412
1413 if (uuid_is_null(ifp->if_nx_flowswitch.if_fsw_instance) == 0) {
1414 /* it's already attached */
1415 return FALSE;
1416 }
1417 bzero(&nexus_fsw, sizeof(nexus_fsw));
1418 attached = _dlil_attach_flowswitch_nexus(ifp, &nexus_fsw);
1419 if (attached) {
1420 ifnet_lock_exclusive(ifp);
1421 if (!IF_FULLY_ATTACHED(ifp)) {
1422 /* interface is going away */
1423 attached = FALSE;
1424 } else {
1425 ifp->if_nx_flowswitch = nexus_fsw;
1426 }
1427 ifnet_lock_done(ifp);
1428 if (!attached) {
1429 /* clean up flowswitch nexus */
1430 dlil_detach_flowswitch_nexus(&nexus_fsw);
1431 }
1432 }
1433 return attached;
1434 }
1435
1436 __attribute__((noinline))
1437 static void
dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)1438 dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw)
1439 {
1440 dlil_detach_nexus(__func__, nexus_fsw->if_fsw_provider,
1441 nexus_fsw->if_fsw_instance, nexus_fsw->if_fsw_device);
1442 }
1443
1444 __attribute__((noinline))
1445 static void
dlil_quiesce_and_detach_nexuses(ifnet_t ifp)1446 dlil_quiesce_and_detach_nexuses(ifnet_t ifp)
1447 {
1448 if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch;
1449 if_nexus_netif *nx_netif = &ifp->if_nx_netif;
1450
1451 ifnet_datamov_suspend_and_drain(ifp);
1452 if (!uuid_is_null(nx_fsw->if_fsw_device)) {
1453 ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider));
1454 ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance));
1455 dlil_detach_flowswitch_nexus(nx_fsw);
1456 bzero(nx_fsw, sizeof(*nx_fsw));
1457 } else {
1458 ASSERT(uuid_is_null(nx_fsw->if_fsw_provider));
1459 ASSERT(uuid_is_null(nx_fsw->if_fsw_instance));
1460 DTRACE_IP1(fsw__not__attached, ifnet_t, ifp);
1461 }
1462
1463 if (!uuid_is_null(nx_netif->if_nif_attach)) {
1464 ASSERT(!uuid_is_null(nx_netif->if_nif_provider));
1465 ASSERT(!uuid_is_null(nx_netif->if_nif_instance));
1466 dlil_detach_netif_nexus(nx_netif);
1467 bzero(nx_netif, sizeof(*nx_netif));
1468 } else {
1469 ASSERT(uuid_is_null(nx_netif->if_nif_provider));
1470 ASSERT(uuid_is_null(nx_netif->if_nif_instance));
1471 DTRACE_IP1(netif__not__attached, ifnet_t, ifp);
1472 }
1473 ifnet_datamov_resume(ifp);
1474 }
1475
1476 boolean_t
ifnet_add_netagent(ifnet_t ifp)1477 ifnet_add_netagent(ifnet_t ifp)
1478 {
1479 int error;
1480
1481 error = kern_nexus_interface_add_netagent(ifp);
1482 os_log(OS_LOG_DEFAULT,
1483 "kern_nexus_interface_add_netagent(%s) returned %d",
1484 ifp->if_xname, error);
1485 return error == 0;
1486 }
1487
1488 boolean_t
ifnet_remove_netagent(ifnet_t ifp)1489 ifnet_remove_netagent(ifnet_t ifp)
1490 {
1491 int error;
1492
1493 error = kern_nexus_interface_remove_netagent(ifp);
1494 os_log(OS_LOG_DEFAULT,
1495 "kern_nexus_interface_remove_netagent(%s) returned %d",
1496 ifp->if_xname, error);
1497 return error == 0;
1498 }
1499
1500 boolean_t
ifnet_attach_flowswitch_nexus(ifnet_t ifp)1501 ifnet_attach_flowswitch_nexus(ifnet_t ifp)
1502 {
1503 if (!IF_FULLY_ATTACHED(ifp)) {
1504 return FALSE;
1505 }
1506 return dlil_attach_flowswitch_nexus(ifp);
1507 }
1508
1509 boolean_t
ifnet_detach_flowswitch_nexus(ifnet_t ifp)1510 ifnet_detach_flowswitch_nexus(ifnet_t ifp)
1511 {
1512 if_nexus_flowswitch nexus_fsw;
1513
1514 ifnet_lock_exclusive(ifp);
1515 nexus_fsw = ifp->if_nx_flowswitch;
1516 bzero(&ifp->if_nx_flowswitch, sizeof(ifp->if_nx_flowswitch));
1517 ifnet_lock_done(ifp);
1518 return dlil_detach_nexus(__func__, nexus_fsw.if_fsw_provider,
1519 nexus_fsw.if_fsw_instance, nexus_fsw.if_fsw_device);
1520 }
1521
1522 boolean_t
ifnet_attach_netif_nexus(ifnet_t ifp)1523 ifnet_attach_netif_nexus(ifnet_t ifp)
1524 {
1525 boolean_t nexus_attached;
1526 if_nexus_netif nexus_netif;
1527
1528 if (!IF_FULLY_ATTACHED(ifp)) {
1529 return FALSE;
1530 }
1531 nexus_attached = dlil_attach_netif_nexus_common(ifp, &nexus_netif);
1532 if (nexus_attached) {
1533 ifnet_lock_exclusive(ifp);
1534 ifp->if_nx_netif = nexus_netif;
1535 ifnet_lock_done(ifp);
1536 }
1537 return nexus_attached;
1538 }
1539
1540 boolean_t
ifnet_detach_netif_nexus(ifnet_t ifp)1541 ifnet_detach_netif_nexus(ifnet_t ifp)
1542 {
1543 if_nexus_netif nexus_netif;
1544
1545 ifnet_lock_exclusive(ifp);
1546 nexus_netif = ifp->if_nx_netif;
1547 bzero(&ifp->if_nx_netif, sizeof(ifp->if_nx_netif));
1548 ifnet_lock_done(ifp);
1549
1550 return dlil_detach_nexus(__func__, nexus_netif.if_nif_provider,
1551 nexus_netif.if_nif_instance, nexus_netif.if_nif_attach);
1552 }
1553
1554 #endif /* SKYWALK */
1555
1556 #define DLIL_INPUT_CHECK(m, ifp) { \
1557 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
1558 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
1559 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
1560 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
1561 /* NOTREACHED */ \
1562 } \
1563 }
1564
1565 #define DLIL_EWMA(old, new, decay) do { \
1566 u_int32_t _avg; \
1567 if ((_avg = (old)) > 0) \
1568 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
1569 else \
1570 _avg = (new); \
1571 (old) = _avg; \
1572 } while (0)
1573
1574 #define MBPS (1ULL * 1000 * 1000)
1575 #define GBPS (MBPS * 1000)
1576
1577 struct rxpoll_time_tbl {
1578 u_int64_t speed; /* downlink speed */
1579 u_int32_t plowat; /* packets low watermark */
1580 u_int32_t phiwat; /* packets high watermark */
1581 u_int32_t blowat; /* bytes low watermark */
1582 u_int32_t bhiwat; /* bytes high watermark */
1583 };
1584
1585 static struct rxpoll_time_tbl rxpoll_tbl[] = {
1586 { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) },
1587 { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1588 { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1589 { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1590 { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) },
1591 { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 }
1592 };
1593
1594 static LCK_MTX_DECLARE_ATTR(dlil_thread_sync_lock, &dlil_lock_group,
1595 &dlil_lck_attributes);
1596 static uint32_t dlil_pending_thread_cnt = 0;
1597
1598 static void
dlil_incr_pending_thread_count(void)1599 dlil_incr_pending_thread_count(void)
1600 {
1601 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
1602 lck_mtx_lock(&dlil_thread_sync_lock);
1603 dlil_pending_thread_cnt++;
1604 lck_mtx_unlock(&dlil_thread_sync_lock);
1605 }
1606
1607 static void
dlil_decr_pending_thread_count(void)1608 dlil_decr_pending_thread_count(void)
1609 {
1610 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
1611 lck_mtx_lock(&dlil_thread_sync_lock);
1612 VERIFY(dlil_pending_thread_cnt > 0);
1613 dlil_pending_thread_cnt--;
1614 if (dlil_pending_thread_cnt == 0) {
1615 wakeup(&dlil_pending_thread_cnt);
1616 }
1617 lck_mtx_unlock(&dlil_thread_sync_lock);
1618 }
1619
1620 int
proto_hash_value(u_int32_t protocol_family)1621 proto_hash_value(u_int32_t protocol_family)
1622 {
1623 /*
1624 * dlil_proto_unplumb_all() depends on the mapping between
1625 * the hash bucket index and the protocol family defined
1626 * here; future changes must be applied there as well.
1627 */
1628 switch (protocol_family) {
1629 case PF_INET:
1630 return 0;
1631 case PF_INET6:
1632 return 1;
1633 case PF_VLAN:
1634 return 2;
1635 case PF_802154:
1636 return 3;
1637 case PF_UNSPEC:
1638 default:
1639 return 4;
1640 }
1641 }
1642
1643 /*
1644 * Caller must already be holding ifnet lock.
1645 */
1646 static struct if_proto *
find_attached_proto(struct ifnet * ifp,u_int32_t protocol_family)1647 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
1648 {
1649 struct if_proto *proto = NULL;
1650 u_int32_t i = proto_hash_value(protocol_family);
1651
1652 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1653
1654 if (ifp->if_proto_hash != NULL) {
1655 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
1656 }
1657
1658 while (proto != NULL && proto->protocol_family != protocol_family) {
1659 proto = SLIST_NEXT(proto, next_hash);
1660 }
1661
1662 if (proto != NULL) {
1663 if_proto_ref(proto);
1664 }
1665
1666 return proto;
1667 }
1668
1669 static void
if_proto_ref(struct if_proto * proto)1670 if_proto_ref(struct if_proto *proto)
1671 {
1672 atomic_add_32(&proto->refcount, 1);
1673 }
1674
1675 extern void if_rtproto_del(struct ifnet *ifp, int protocol);
1676
1677 static void
if_proto_free(struct if_proto * proto)1678 if_proto_free(struct if_proto *proto)
1679 {
1680 u_int32_t oldval;
1681 struct ifnet *ifp = proto->ifp;
1682 u_int32_t proto_family = proto->protocol_family;
1683 struct kev_dl_proto_data ev_pr_data;
1684
1685 oldval = atomic_add_32_ov(&proto->refcount, -1);
1686 if (oldval > 1) {
1687 return;
1688 }
1689
1690 if (proto->proto_kpi == kProtoKPI_v1) {
1691 if (proto->kpi.v1.detached) {
1692 proto->kpi.v1.detached(ifp, proto->protocol_family);
1693 }
1694 }
1695 if (proto->proto_kpi == kProtoKPI_v2) {
1696 if (proto->kpi.v2.detached) {
1697 proto->kpi.v2.detached(ifp, proto->protocol_family);
1698 }
1699 }
1700
1701 /*
1702 * Cleanup routes that may still be in the routing table for that
1703 * interface/protocol pair.
1704 */
1705 if_rtproto_del(ifp, proto_family);
1706
1707 ifnet_lock_shared(ifp);
1708
1709 /* No more reference on this, protocol must have been detached */
1710 VERIFY(proto->detached);
1711
1712 /*
1713 * The reserved field carries the number of protocol still attached
1714 * (subject to change)
1715 */
1716 ev_pr_data.proto_family = proto_family;
1717 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
1718
1719 ifnet_lock_done(ifp);
1720
1721 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
1722 (struct net_event_data *)&ev_pr_data,
1723 sizeof(struct kev_dl_proto_data), FALSE);
1724
1725 if (ev_pr_data.proto_remaining_count == 0) {
1726 /*
1727 * The protocol count has gone to zero, mark the interface down.
1728 * This used to be done by configd.KernelEventMonitor, but that
1729 * is inherently prone to races (rdar://problem/30810208).
1730 */
1731 (void) ifnet_set_flags(ifp, 0, IFF_UP);
1732 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
1733 dlil_post_sifflags_msg(ifp);
1734 }
1735
1736 zfree(dlif_proto_zone, proto);
1737 }
1738
1739 __private_extern__ void
ifnet_lock_assert(struct ifnet * ifp,ifnet_lock_assert_t what)1740 ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
1741 {
1742 #if !MACH_ASSERT
1743 #pragma unused(ifp)
1744 #endif
1745 unsigned int type = 0;
1746 int ass = 1;
1747
1748 switch (what) {
1749 case IFNET_LCK_ASSERT_EXCLUSIVE:
1750 type = LCK_RW_ASSERT_EXCLUSIVE;
1751 break;
1752
1753 case IFNET_LCK_ASSERT_SHARED:
1754 type = LCK_RW_ASSERT_SHARED;
1755 break;
1756
1757 case IFNET_LCK_ASSERT_OWNED:
1758 type = LCK_RW_ASSERT_HELD;
1759 break;
1760
1761 case IFNET_LCK_ASSERT_NOTOWNED:
1762 /* nothing to do here for RW lock; bypass assert */
1763 ass = 0;
1764 break;
1765
1766 default:
1767 panic("bad ifnet assert type: %d", what);
1768 /* NOTREACHED */
1769 }
1770 if (ass) {
1771 LCK_RW_ASSERT(&ifp->if_lock, type);
1772 }
1773 }
1774
1775 __private_extern__ void
ifnet_lock_shared(struct ifnet * ifp)1776 ifnet_lock_shared(struct ifnet *ifp)
1777 {
1778 lck_rw_lock_shared(&ifp->if_lock);
1779 }
1780
1781 __private_extern__ void
ifnet_lock_exclusive(struct ifnet * ifp)1782 ifnet_lock_exclusive(struct ifnet *ifp)
1783 {
1784 lck_rw_lock_exclusive(&ifp->if_lock);
1785 }
1786
1787 __private_extern__ void
ifnet_lock_done(struct ifnet * ifp)1788 ifnet_lock_done(struct ifnet *ifp)
1789 {
1790 lck_rw_done(&ifp->if_lock);
1791 }
1792
1793 #if INET
1794 __private_extern__ void
if_inetdata_lock_shared(struct ifnet * ifp)1795 if_inetdata_lock_shared(struct ifnet *ifp)
1796 {
1797 lck_rw_lock_shared(&ifp->if_inetdata_lock);
1798 }
1799
1800 __private_extern__ void
if_inetdata_lock_exclusive(struct ifnet * ifp)1801 if_inetdata_lock_exclusive(struct ifnet *ifp)
1802 {
1803 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1804 }
1805
1806 __private_extern__ void
if_inetdata_lock_done(struct ifnet * ifp)1807 if_inetdata_lock_done(struct ifnet *ifp)
1808 {
1809 lck_rw_done(&ifp->if_inetdata_lock);
1810 }
1811 #endif
1812
1813 __private_extern__ void
if_inet6data_lock_shared(struct ifnet * ifp)1814 if_inet6data_lock_shared(struct ifnet *ifp)
1815 {
1816 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1817 }
1818
1819 __private_extern__ void
if_inet6data_lock_exclusive(struct ifnet * ifp)1820 if_inet6data_lock_exclusive(struct ifnet *ifp)
1821 {
1822 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1823 }
1824
1825 __private_extern__ void
if_inet6data_lock_done(struct ifnet * ifp)1826 if_inet6data_lock_done(struct ifnet *ifp)
1827 {
1828 lck_rw_done(&ifp->if_inet6data_lock);
1829 }
1830
1831 __private_extern__ void
ifnet_head_lock_shared(void)1832 ifnet_head_lock_shared(void)
1833 {
1834 lck_rw_lock_shared(&ifnet_head_lock);
1835 }
1836
1837 __private_extern__ void
ifnet_head_lock_exclusive(void)1838 ifnet_head_lock_exclusive(void)
1839 {
1840 lck_rw_lock_exclusive(&ifnet_head_lock);
1841 }
1842
1843 __private_extern__ void
ifnet_head_done(void)1844 ifnet_head_done(void)
1845 {
1846 lck_rw_done(&ifnet_head_lock);
1847 }
1848
1849 __private_extern__ void
ifnet_head_assert_exclusive(void)1850 ifnet_head_assert_exclusive(void)
1851 {
1852 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1853 }
1854
1855 /*
1856 * dlil_ifp_protolist
1857 * - get the list of protocols attached to the interface, or just the number
1858 * of attached protocols
1859 * - if the number returned is greater than 'list_count', truncation occurred
1860 *
1861 * Note:
1862 * - caller must already be holding ifnet lock.
1863 */
1864 static u_int32_t
dlil_ifp_protolist(struct ifnet * ifp,protocol_family_t * list,u_int32_t list_count)1865 dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1866 u_int32_t list_count)
1867 {
1868 u_int32_t count = 0;
1869 int i;
1870
1871 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1872
1873 if (ifp->if_proto_hash == NULL) {
1874 goto done;
1875 }
1876
1877 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1878 struct if_proto *proto;
1879 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1880 if (list != NULL && count < list_count) {
1881 list[count] = proto->protocol_family;
1882 }
1883 count++;
1884 }
1885 }
1886 done:
1887 return count;
1888 }
1889
1890 __private_extern__ u_int32_t
if_get_protolist(struct ifnet * ifp,u_int32_t * protolist,u_int32_t count)1891 if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1892 {
1893 ifnet_lock_shared(ifp);
1894 count = dlil_ifp_protolist(ifp, protolist, count);
1895 ifnet_lock_done(ifp);
1896 return count;
1897 }
1898
1899 __private_extern__ void
if_free_protolist(u_int32_t * list)1900 if_free_protolist(u_int32_t *list)
1901 {
1902 kfree_data_addr(list);
1903 }
1904
1905 __private_extern__ int
dlil_post_msg(struct ifnet * ifp,u_int32_t event_subclass,u_int32_t event_code,struct net_event_data * event_data,u_int32_t event_data_len,boolean_t suppress_generation)1906 dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1907 u_int32_t event_code, struct net_event_data *event_data,
1908 u_int32_t event_data_len, boolean_t suppress_generation)
1909 {
1910 struct net_event_data ev_data;
1911 struct kev_msg ev_msg;
1912
1913 bzero(&ev_msg, sizeof(ev_msg));
1914 bzero(&ev_data, sizeof(ev_data));
1915 /*
1916 * a net event always starts with a net_event_data structure
1917 * but the caller can generate a simple net event or
1918 * provide a longer event structure to post
1919 */
1920 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1921 ev_msg.kev_class = KEV_NETWORK_CLASS;
1922 ev_msg.kev_subclass = event_subclass;
1923 ev_msg.event_code = event_code;
1924
1925 if (event_data == NULL) {
1926 event_data = &ev_data;
1927 event_data_len = sizeof(struct net_event_data);
1928 }
1929
1930 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1931 event_data->if_family = ifp->if_family;
1932 event_data->if_unit = (u_int32_t)ifp->if_unit;
1933
1934 ev_msg.dv[0].data_length = event_data_len;
1935 ev_msg.dv[0].data_ptr = event_data;
1936 ev_msg.dv[1].data_length = 0;
1937
1938 bool update_generation = true;
1939 if (event_subclass == KEV_DL_SUBCLASS) {
1940 /* Don't update interface generation for frequent link quality and state changes */
1941 switch (event_code) {
1942 case KEV_DL_LINK_QUALITY_METRIC_CHANGED:
1943 case KEV_DL_RRC_STATE_CHANGED:
1944 case KEV_DL_PRIMARY_ELECTED:
1945 update_generation = false;
1946 break;
1947 default:
1948 break;
1949 }
1950 }
1951
1952 /*
1953 * Some events that update generation counts might
1954 * want to suppress generation count.
1955 * One example is node presence/absence where we still
1956 * issue kernel event for the invocation but want to avoid
1957 * expensive operation of updating generation which triggers
1958 * NECP client updates.
1959 */
1960 if (suppress_generation) {
1961 update_generation = false;
1962 }
1963
1964 return dlil_event_internal(ifp, &ev_msg, update_generation);
1965 }
1966
1967 __private_extern__ int
dlil_alloc_local_stats(struct ifnet * ifp)1968 dlil_alloc_local_stats(struct ifnet *ifp)
1969 {
1970 int ret = EINVAL;
1971 void *buf, *base, **pbuf;
1972
1973 if (ifp == NULL) {
1974 goto end;
1975 }
1976
1977 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1978 /* allocate tcpstat_local structure */
1979 buf = zalloc_flags(dlif_tcpstat_zone,
1980 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1981
1982 /* Get the 64-bit aligned base address for this object */
1983 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
1984 sizeof(u_int64_t));
1985 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1986 ((intptr_t)buf + dlif_tcpstat_bufsize));
1987
1988 /*
1989 * Wind back a pointer size from the aligned base and
1990 * save the original address so we can free it later.
1991 */
1992 pbuf = (void **)((intptr_t)base - sizeof(void *));
1993 *pbuf = buf;
1994 ifp->if_tcp_stat = base;
1995
1996 /* allocate udpstat_local structure */
1997 buf = zalloc_flags(dlif_udpstat_zone,
1998 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1999
2000 /* Get the 64-bit aligned base address for this object */
2001 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
2002 sizeof(u_int64_t));
2003 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
2004 ((intptr_t)buf + dlif_udpstat_bufsize));
2005
2006 /*
2007 * Wind back a pointer size from the aligned base and
2008 * save the original address so we can free it later.
2009 */
2010 pbuf = (void **)((intptr_t)base - sizeof(void *));
2011 *pbuf = buf;
2012 ifp->if_udp_stat = base;
2013
2014 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
2015 IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
2016
2017 ret = 0;
2018 }
2019
2020 if (ifp->if_ipv4_stat == NULL) {
2021 ifp->if_ipv4_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
2022 }
2023
2024 if (ifp->if_ipv6_stat == NULL) {
2025 ifp->if_ipv6_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
2026 }
2027 end:
2028 if (ifp != NULL && ret != 0) {
2029 if (ifp->if_tcp_stat != NULL) {
2030 pbuf = (void **)
2031 ((intptr_t)ifp->if_tcp_stat - sizeof(void *));
2032 zfree(dlif_tcpstat_zone, *pbuf);
2033 ifp->if_tcp_stat = NULL;
2034 }
2035 if (ifp->if_udp_stat != NULL) {
2036 pbuf = (void **)
2037 ((intptr_t)ifp->if_udp_stat - sizeof(void *));
2038 zfree(dlif_udpstat_zone, *pbuf);
2039 ifp->if_udp_stat = NULL;
2040 }
2041 /* The macro kfree_type sets the passed pointer to NULL */
2042 if (ifp->if_ipv4_stat != NULL) {
2043 kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv4_stat);
2044 }
2045 if (ifp->if_ipv6_stat != NULL) {
2046 kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv6_stat);
2047 }
2048 }
2049
2050 return ret;
2051 }
2052
2053 static void
dlil_reset_rxpoll_params(ifnet_t ifp)2054 dlil_reset_rxpoll_params(ifnet_t ifp)
2055 {
2056 ASSERT(ifp != NULL);
2057 ifnet_set_poll_cycle(ifp, NULL);
2058 ifp->if_poll_update = 0;
2059 ifp->if_poll_flags = 0;
2060 ifp->if_poll_req = 0;
2061 ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF;
2062 bzero(&ifp->if_poll_tstats, sizeof(ifp->if_poll_tstats));
2063 bzero(&ifp->if_poll_pstats, sizeof(ifp->if_poll_pstats));
2064 bzero(&ifp->if_poll_sstats, sizeof(ifp->if_poll_sstats));
2065 net_timerclear(&ifp->if_poll_mode_holdtime);
2066 net_timerclear(&ifp->if_poll_mode_lasttime);
2067 net_timerclear(&ifp->if_poll_sample_holdtime);
2068 net_timerclear(&ifp->if_poll_sample_lasttime);
2069 net_timerclear(&ifp->if_poll_dbg_lasttime);
2070 }
2071
2072 static int
dlil_create_input_thread(ifnet_t ifp,struct dlil_threading_info * inp,thread_continue_t * thfunc)2073 dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp,
2074 thread_continue_t *thfunc)
2075 {
2076 boolean_t dlil_rxpoll_input;
2077 thread_continue_t func = NULL;
2078 u_int32_t limit;
2079 int error = 0;
2080
2081 dlil_rxpoll_input = (ifp != NULL && net_rxpoll &&
2082 (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY));
2083
2084 /* default strategy utilizes the DLIL worker thread */
2085 inp->dlth_strategy = dlil_input_async;
2086
2087 /* NULL ifp indicates the main input thread, called at dlil_init time */
2088 if (ifp == NULL) {
2089 /*
2090 * Main input thread only.
2091 */
2092 func = dlil_main_input_thread_func;
2093 VERIFY(inp == dlil_main_input_thread);
2094 (void) strlcat(inp->dlth_name,
2095 "main_input", DLIL_THREADNAME_LEN);
2096 } else if (dlil_rxpoll_input) {
2097 /*
2098 * Legacy (non-netif) hybrid polling.
2099 */
2100 func = dlil_rxpoll_input_thread_func;
2101 VERIFY(inp != dlil_main_input_thread);
2102 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
2103 "%s_input_poll", if_name(ifp));
2104 } else if (net_async || (ifp->if_xflags & IFXF_LEGACY)) {
2105 /*
2106 * Asynchronous strategy.
2107 */
2108 func = dlil_input_thread_func;
2109 VERIFY(inp != dlil_main_input_thread);
2110 (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN,
2111 "%s_input", if_name(ifp));
2112 } else {
2113 /*
2114 * Synchronous strategy if there's a netif below and
2115 * the device isn't capable of hybrid polling.
2116 */
2117 ASSERT(func == NULL);
2118 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
2119 VERIFY(inp != dlil_main_input_thread);
2120 ASSERT(!inp->dlth_affinity);
2121 inp->dlth_strategy = dlil_input_sync;
2122 }
2123 VERIFY(inp->dlth_thread == THREAD_NULL);
2124
2125 /* let caller know */
2126 if (thfunc != NULL) {
2127 *thfunc = func;
2128 }
2129
2130 inp->dlth_lock_grp = lck_grp_alloc_init(inp->dlth_name, LCK_GRP_ATTR_NULL);
2131 lck_mtx_init(&inp->dlth_lock, inp->dlth_lock_grp, &dlil_lck_attributes);
2132
2133 inp->dlth_ifp = ifp; /* NULL for main input thread */
2134 /*
2135 * For interfaces that support opportunistic polling, set the
2136 * low and high watermarks for outstanding inbound packets/bytes.
2137 * Also define freeze times for transitioning between modes
2138 * and updating the average.
2139 */
2140 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
2141 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
2142 if (ifp->if_xflags & IFXF_LEGACY) {
2143 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
2144 }
2145 } else {
2146 limit = (u_int32_t)-1;
2147 }
2148
2149 _qinit(&inp->dlth_pkts, Q_DROPTAIL, limit, QP_MBUF);
2150 if (inp == dlil_main_input_thread) {
2151 struct dlil_main_threading_info *inpm =
2152 (struct dlil_main_threading_info *)inp;
2153 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
2154 }
2155
2156 if (func == NULL) {
2157 ASSERT(!(ifp->if_xflags & IFXF_LEGACY));
2158 ASSERT(error == 0);
2159 error = ENODEV;
2160 goto done;
2161 }
2162
2163 error = kernel_thread_start(func, inp, &inp->dlth_thread);
2164 if (error == KERN_SUCCESS) {
2165 thread_precedence_policy_data_t info;
2166 __unused kern_return_t kret;
2167
2168 bzero(&info, sizeof(info));
2169 info.importance = 0;
2170 kret = thread_policy_set(inp->dlth_thread,
2171 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
2172 THREAD_PRECEDENCE_POLICY_COUNT);
2173 ASSERT(kret == KERN_SUCCESS);
2174 /*
2175 * We create an affinity set so that the matching workloop
2176 * thread or the starter thread (for loopback) can be
2177 * scheduled on the same processor set as the input thread.
2178 */
2179 if (net_affinity) {
2180 struct thread *tp = inp->dlth_thread;
2181 u_int32_t tag;
2182 /*
2183 * Randomize to reduce the probability
2184 * of affinity tag namespace collision.
2185 */
2186 read_frandom(&tag, sizeof(tag));
2187 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
2188 thread_reference(tp);
2189 inp->dlth_affinity_tag = tag;
2190 inp->dlth_affinity = TRUE;
2191 }
2192 }
2193 } else if (inp == dlil_main_input_thread) {
2194 panic_plain("%s: couldn't create main input thread", __func__);
2195 /* NOTREACHED */
2196 } else {
2197 panic_plain("%s: couldn't create %s input thread", __func__,
2198 if_name(ifp));
2199 /* NOTREACHED */
2200 }
2201 OSAddAtomic(1, &cur_dlil_input_threads);
2202
2203 done:
2204 return error;
2205 }
2206
2207 #if TEST_INPUT_THREAD_TERMINATION
2208 static int
2209 sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
2210 {
2211 #pragma unused(arg1, arg2)
2212 uint32_t i;
2213 int err;
2214
2215 i = if_input_thread_termination_spin;
2216
2217 err = sysctl_handle_int(oidp, &i, 0, req);
2218 if (err != 0 || req->newptr == USER_ADDR_NULL) {
2219 return err;
2220 }
2221
2222 if (net_rxpoll == 0) {
2223 return ENXIO;
2224 }
2225
2226 if_input_thread_termination_spin = i;
2227 return err;
2228 }
2229 #endif /* TEST_INPUT_THREAD_TERMINATION */
2230
2231 static void
dlil_clean_threading_info(struct dlil_threading_info * inp)2232 dlil_clean_threading_info(struct dlil_threading_info *inp)
2233 {
2234 lck_mtx_destroy(&inp->dlth_lock, inp->dlth_lock_grp);
2235 lck_grp_free(inp->dlth_lock_grp);
2236 inp->dlth_lock_grp = NULL;
2237
2238 inp->dlth_flags = 0;
2239 inp->dlth_wtot = 0;
2240 bzero(inp->dlth_name, sizeof(inp->dlth_name));
2241 inp->dlth_ifp = NULL;
2242 VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts));
2243 qlimit(&inp->dlth_pkts) = 0;
2244 bzero(&inp->dlth_stats, sizeof(inp->dlth_stats));
2245
2246 VERIFY(!inp->dlth_affinity);
2247 inp->dlth_thread = THREAD_NULL;
2248 inp->dlth_strategy = NULL;
2249 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
2250 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
2251 VERIFY(inp->dlth_affinity_tag == 0);
2252 #if IFNET_INPUT_SANITY_CHK
2253 inp->dlth_pkts_cnt = 0;
2254 #endif /* IFNET_INPUT_SANITY_CHK */
2255 }
2256
2257 static void
dlil_terminate_input_thread(struct dlil_threading_info * inp)2258 dlil_terminate_input_thread(struct dlil_threading_info *inp)
2259 {
2260 struct ifnet *ifp = inp->dlth_ifp;
2261 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2262
2263 VERIFY(current_thread() == inp->dlth_thread);
2264 VERIFY(inp != dlil_main_input_thread);
2265
2266 OSAddAtomic(-1, &cur_dlil_input_threads);
2267
2268 #if TEST_INPUT_THREAD_TERMINATION
2269 { /* do something useless that won't get optimized away */
2270 uint32_t v = 1;
2271 for (uint32_t i = 0;
2272 i < if_input_thread_termination_spin;
2273 i++) {
2274 v = (i + 1) * v;
2275 }
2276 DLIL_PRINTF("the value is %d\n", v);
2277 }
2278 #endif /* TEST_INPUT_THREAD_TERMINATION */
2279
2280 lck_mtx_lock_spin(&inp->dlth_lock);
2281 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
2282 VERIFY((inp->dlth_flags & DLIL_INPUT_TERMINATE) != 0);
2283 inp->dlth_flags |= DLIL_INPUT_TERMINATE_COMPLETE;
2284 wakeup_one((caddr_t)&inp->dlth_flags);
2285 lck_mtx_unlock(&inp->dlth_lock);
2286
2287 /* free up pending packets */
2288 if (pkt.cp_mbuf != NULL) {
2289 mbuf_freem_list(pkt.cp_mbuf);
2290 }
2291
2292 /* for the extra refcnt from kernel_thread_start() */
2293 thread_deallocate(current_thread());
2294
2295 if (dlil_verbose) {
2296 DLIL_PRINTF("%s: input thread terminated\n",
2297 if_name(ifp));
2298 }
2299
2300 /* this is the end */
2301 thread_terminate(current_thread());
2302 /* NOTREACHED */
2303 }
2304
2305 static kern_return_t
dlil_affinity_set(struct thread * tp,u_int32_t tag)2306 dlil_affinity_set(struct thread *tp, u_int32_t tag)
2307 {
2308 thread_affinity_policy_data_t policy;
2309
2310 bzero(&policy, sizeof(policy));
2311 policy.affinity_tag = tag;
2312 return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
2313 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
2314 }
2315
2316 #if defined(SKYWALK) && defined(XNU_TARGET_OS_OSX)
2317 static void
dlil_filter_event(struct eventhandler_entry_arg arg __unused,enum net_filter_event_subsystems state)2318 dlil_filter_event(struct eventhandler_entry_arg arg __unused,
2319 enum net_filter_event_subsystems state)
2320 {
2321 if (state == 0) {
2322 if_enable_fsw_transport_netagent = 1;
2323 } else {
2324 if_enable_fsw_transport_netagent = 0;
2325 }
2326 kern_nexus_update_netagents();
2327 }
2328 #endif /* SKYWALK && XNU_TARGET_OS_OSX */
2329
2330 void
dlil_init(void)2331 dlil_init(void)
2332 {
2333 thread_t thread = THREAD_NULL;
2334
2335 /*
2336 * The following fields must be 64-bit aligned for atomic operations.
2337 */
2338 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
2339 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
2340 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
2341 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
2342 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
2343 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
2344 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
2345 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
2346 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
2347 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
2348 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
2349 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
2350 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
2351 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
2352 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
2353
2354 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
2355 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
2356 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
2357 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
2358 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
2359 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
2360 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
2361 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
2362 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
2363 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
2364 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
2365 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
2366 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
2367 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
2368 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
2369
2370 /*
2371 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
2372 */
2373 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
2374 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
2375 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
2376 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
2377 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
2378 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
2379 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
2380 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
2381 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
2382 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
2383 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
2384 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
2385 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
2386 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
2387
2388 /*
2389 * ... as well as the mbuf checksum flags counterparts.
2390 */
2391 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
2392 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
2393 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
2394 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
2395 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
2396 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
2397 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
2398 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
2399 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
2400 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
2401 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
2402
2403 /*
2404 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
2405 */
2406 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
2407 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
2408
2409 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
2410 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
2411 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
2412 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
2413
2414 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
2415 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
2416 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
2417
2418 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
2419 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
2420 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
2421 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
2422 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
2423 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
2424 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
2425 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
2426 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
2427 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
2428 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
2429 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
2430 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
2431 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
2432 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
2433 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
2434 _CASSERT(IFRTYPE_FAMILY_6LOWPAN == IFNET_FAMILY_6LOWPAN);
2435 _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN);
2436 _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC);
2437
2438 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
2439 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
2440 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
2441 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
2442 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
2443 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
2444 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
2445 _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY);
2446 _CASSERT(IFRTYPE_SUBFAMILY_DEFAULT == IFNET_SUBFAMILY_DEFAULT);
2447
2448 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
2449 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
2450
2451 PE_parse_boot_argn("net_affinity", &net_affinity,
2452 sizeof(net_affinity));
2453
2454 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof(net_rxpoll));
2455
2456 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof(net_rtref));
2457
2458 PE_parse_boot_argn("net_async", &net_async, sizeof(net_async));
2459
2460 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof(ifnet_debug));
2461
2462 VERIFY(dlil_pending_thread_cnt == 0);
2463 #if SKYWALK
2464 boolean_t pe_enable_fsw_transport_netagent = FALSE;
2465 boolean_t pe_disable_fsw_transport_netagent = FALSE;
2466 boolean_t enable_fsw_netagent =
2467 (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) ||
2468 (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0);
2469
2470 /*
2471 * Check the device tree to see if Skywalk netagent has been explicitly
2472 * enabled or disabled. This can be overridden via if_attach_nx below.
2473 * Note that the property is a 0-length key, and so checking for the
2474 * presence itself is enough (no need to check for the actual value of
2475 * the retrieved variable.)
2476 */
2477 pe_enable_fsw_transport_netagent =
2478 PE_get_default("kern.skywalk_netagent_enable",
2479 &pe_enable_fsw_transport_netagent,
2480 sizeof(pe_enable_fsw_transport_netagent));
2481 pe_disable_fsw_transport_netagent =
2482 PE_get_default("kern.skywalk_netagent_disable",
2483 &pe_disable_fsw_transport_netagent,
2484 sizeof(pe_disable_fsw_transport_netagent));
2485
2486 /*
2487 * These two are mutually exclusive, i.e. they both can be absent,
2488 * but only one can be present at a time, and so we assert to make
2489 * sure it is correct.
2490 */
2491 VERIFY((!pe_enable_fsw_transport_netagent &&
2492 !pe_disable_fsw_transport_netagent) ||
2493 (pe_enable_fsw_transport_netagent ^
2494 pe_disable_fsw_transport_netagent));
2495
2496 if (pe_enable_fsw_transport_netagent) {
2497 kprintf("SK: netagent is enabled via an override for "
2498 "this platform\n");
2499 if_attach_nx = SKYWALK_NETWORKING_ENABLED;
2500 } else if (pe_disable_fsw_transport_netagent) {
2501 kprintf("SK: netagent is disabled via an override for "
2502 "this platform\n");
2503 if_attach_nx = SKYWALK_NETWORKING_DISABLED;
2504 } else {
2505 kprintf("SK: netagent is %s by default for this platform\n",
2506 (enable_fsw_netagent ? "enabled" : "disabled"));
2507 if_attach_nx = IF_ATTACH_NX_DEFAULT;
2508 }
2509
2510 /*
2511 * Now see if there's a boot-arg override.
2512 */
2513 (void) PE_parse_boot_argn("if_attach_nx", &if_attach_nx,
2514 sizeof(if_attach_nx));
2515 if_enable_fsw_transport_netagent =
2516 ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0);
2517
2518 if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0);
2519
2520 if (pe_disable_fsw_transport_netagent &&
2521 if_enable_fsw_transport_netagent) {
2522 kprintf("SK: netagent is force-enabled\n");
2523 } else if (!pe_disable_fsw_transport_netagent &&
2524 !if_enable_fsw_transport_netagent) {
2525 kprintf("SK: netagent is force-disabled\n");
2526 }
2527 #ifdef XNU_TARGET_OS_OSX
2528 if (if_enable_fsw_transport_netagent) {
2529 net_filter_event_register(dlil_filter_event);
2530 }
2531 #endif /* XNU_TARGET_OS_OSX */
2532
2533 #if (DEVELOPMENT || DEBUG)
2534 (void) PE_parse_boot_argn("fsw_use_max_mtu_buffer",
2535 &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer));
2536 #endif /* (DEVELOPMENT || DEBUG) */
2537
2538 #endif /* SKYWALK */
2539 dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) :
2540 sizeof(struct dlil_ifnet_dbg);
2541 /* Enforce 64-bit alignment for dlil_ifnet structure */
2542 dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t);
2543 dlif_bufsize = (uint32_t)P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t));
2544 dlif_zone = zone_create(DLIF_ZONE_NAME, dlif_bufsize, ZC_ZFREE_CLEARMEM);
2545
2546 dlif_tcpstat_size = sizeof(struct tcpstat_local);
2547 /* Enforce 64-bit alignment for tcpstat_local structure */
2548 dlif_tcpstat_bufsize =
2549 dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t);
2550 dlif_tcpstat_bufsize = (uint32_t)
2551 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t));
2552 dlif_tcpstat_zone = zone_create(DLIF_TCPSTAT_ZONE_NAME,
2553 dlif_tcpstat_bufsize, ZC_ZFREE_CLEARMEM);
2554
2555 dlif_udpstat_size = sizeof(struct udpstat_local);
2556 /* Enforce 64-bit alignment for udpstat_local structure */
2557 dlif_udpstat_bufsize =
2558 dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t);
2559 dlif_udpstat_bufsize = (uint32_t)
2560 P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t));
2561 dlif_udpstat_zone = zone_create(DLIF_UDPSTAT_ZONE_NAME,
2562 dlif_udpstat_bufsize, ZC_ZFREE_CLEARMEM);
2563
2564 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
2565
2566 TAILQ_INIT(&dlil_ifnet_head);
2567 TAILQ_INIT(&ifnet_head);
2568 TAILQ_INIT(&ifnet_detaching_head);
2569 TAILQ_INIT(&ifnet_ordered_head);
2570
2571 /* Initialize interface address subsystem */
2572 ifa_init();
2573
2574 #if PF
2575 /* Initialize the packet filter */
2576 pfinit();
2577 #endif /* PF */
2578
2579 /* Initialize queue algorithms */
2580 classq_init();
2581
2582 /* Initialize packet schedulers */
2583 pktsched_init();
2584
2585 /* Initialize flow advisory subsystem */
2586 flowadv_init();
2587
2588 /* Initialize the pktap virtual interface */
2589 pktap_init();
2590
2591 /* Initialize the service class to dscp map */
2592 net_qos_map_init();
2593
2594 /* Initialize the interface low power mode event handler */
2595 if_low_power_evhdlr_init();
2596
2597 /* Initialize the interface offload port list subsystem */
2598 if_ports_used_init();
2599
2600 #if DEBUG || DEVELOPMENT
2601 /* Run self-tests */
2602 dlil_verify_sum16();
2603 #endif /* DEBUG || DEVELOPMENT */
2604
2605 /*
2606 * Create and start up the main DLIL input thread and the interface
2607 * detacher threads once everything is initialized.
2608 */
2609 dlil_incr_pending_thread_count();
2610 (void) dlil_create_input_thread(NULL, dlil_main_input_thread, NULL);
2611
2612 /*
2613 * Create ifnet detacher thread.
2614 * When an interface gets detached, part of the detach processing
2615 * is delayed. The interface is added to delayed detach list
2616 * and this thread is woken up to call ifnet_detach_final
2617 * on these interfaces.
2618 */
2619 dlil_incr_pending_thread_count();
2620 if (kernel_thread_start(ifnet_detacher_thread_func,
2621 NULL, &thread) != KERN_SUCCESS) {
2622 panic_plain("%s: couldn't create detacher thread", __func__);
2623 /* NOTREACHED */
2624 }
2625 thread_deallocate(thread);
2626
2627 /*
2628 * Wait for the created kernel threads for dlil to get
2629 * scheduled and run at least once before we proceed
2630 */
2631 lck_mtx_lock(&dlil_thread_sync_lock);
2632 while (dlil_pending_thread_cnt != 0) {
2633 DLIL_PRINTF("%s: Waiting for all the create dlil kernel "
2634 "threads to get scheduled at least once.\n", __func__);
2635 (void) msleep(&dlil_pending_thread_cnt, &dlil_thread_sync_lock,
2636 (PZERO - 1), __func__, NULL);
2637 LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED);
2638 }
2639 lck_mtx_unlock(&dlil_thread_sync_lock);
2640 DLIL_PRINTF("%s: All the created dlil kernel threads have been "
2641 "scheduled at least once. Proceeding.\n", __func__);
2642 }
2643
2644 static void
if_flt_monitor_busy(struct ifnet * ifp)2645 if_flt_monitor_busy(struct ifnet *ifp)
2646 {
2647 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
2648
2649 ++ifp->if_flt_busy;
2650 VERIFY(ifp->if_flt_busy != 0);
2651 }
2652
2653 static void
if_flt_monitor_unbusy(struct ifnet * ifp)2654 if_flt_monitor_unbusy(struct ifnet *ifp)
2655 {
2656 if_flt_monitor_leave(ifp);
2657 }
2658
2659 static void
if_flt_monitor_enter(struct ifnet * ifp)2660 if_flt_monitor_enter(struct ifnet *ifp)
2661 {
2662 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
2663
2664 while (ifp->if_flt_busy) {
2665 ++ifp->if_flt_waiters;
2666 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
2667 (PZERO - 1), "if_flt_monitor", NULL);
2668 }
2669 if_flt_monitor_busy(ifp);
2670 }
2671
2672 static void
if_flt_monitor_leave(struct ifnet * ifp)2673 if_flt_monitor_leave(struct ifnet *ifp)
2674 {
2675 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
2676
2677 VERIFY(ifp->if_flt_busy != 0);
2678 --ifp->if_flt_busy;
2679
2680 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
2681 ifp->if_flt_waiters = 0;
2682 wakeup(&ifp->if_flt_head);
2683 }
2684 }
2685
2686 __private_extern__ int
dlil_attach_filter(struct ifnet * ifp,const struct iff_filter * if_filter,interface_filter_t * filter_ref,u_int32_t flags)2687 dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
2688 interface_filter_t *filter_ref, u_int32_t flags)
2689 {
2690 int retval = 0;
2691 struct ifnet_filter *filter = NULL;
2692
2693 ifnet_head_lock_shared();
2694
2695 /* Check that the interface is in the global list */
2696 if (!ifnet_lookup(ifp)) {
2697 retval = ENXIO;
2698 goto done;
2699 }
2700 if (!ifnet_is_attached(ifp, 1)) {
2701 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
2702 __func__, if_name(ifp));
2703 retval = ENXIO;
2704 goto done;
2705 }
2706
2707 filter = zalloc_flags(dlif_filt_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2708
2709 /* refcnt held above during lookup */
2710 filter->filt_flags = flags;
2711 filter->filt_ifp = ifp;
2712 filter->filt_cookie = if_filter->iff_cookie;
2713 filter->filt_name = if_filter->iff_name;
2714 filter->filt_protocol = if_filter->iff_protocol;
2715 /*
2716 * Do not install filter callbacks for internal coproc interface
2717 */
2718 if (!IFNET_IS_INTCOPROC(ifp)) {
2719 filter->filt_input = if_filter->iff_input;
2720 filter->filt_output = if_filter->iff_output;
2721 filter->filt_event = if_filter->iff_event;
2722 filter->filt_ioctl = if_filter->iff_ioctl;
2723 }
2724 filter->filt_detached = if_filter->iff_detached;
2725
2726 lck_mtx_lock(&ifp->if_flt_lock);
2727 if_flt_monitor_enter(ifp);
2728
2729 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
2730 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
2731
2732 *filter_ref = filter;
2733
2734 /*
2735 * Bump filter count and route_generation ID to let TCP
2736 * know it shouldn't do TSO on this connection
2737 */
2738 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2739 ifnet_filter_update_tso(ifp, TRUE);
2740 }
2741 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
2742 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
2743 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
2744 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_os_count);
2745 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
2746 } else {
2747 OSAddAtomic(1, &ifp->if_flt_non_os_count);
2748 }
2749 if_flt_monitor_leave(ifp);
2750 lck_mtx_unlock(&ifp->if_flt_lock);
2751
2752 #if defined(SKYWALK) && defined(XNU_TARGET_OS_OSX)
2753 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
2754 net_check_compatible_if_filter(NULL));
2755 #endif /* SKYWALK && XNU_TARGET_OS_OSX */
2756
2757 if (dlil_verbose) {
2758 DLIL_PRINTF("%s: %s filter attached\n", if_name(ifp),
2759 if_filter->iff_name);
2760 }
2761 ifnet_decr_iorefcnt(ifp);
2762
2763 done:
2764 ifnet_head_done();
2765 if (retval != 0 && ifp != NULL) {
2766 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
2767 if_name(ifp), if_filter->iff_name, retval);
2768 }
2769 if (retval != 0 && filter != NULL) {
2770 zfree(dlif_filt_zone, filter);
2771 }
2772
2773 return retval;
2774 }
2775
2776 static int
dlil_detach_filter_internal(interface_filter_t filter,int detached)2777 dlil_detach_filter_internal(interface_filter_t filter, int detached)
2778 {
2779 int retval = 0;
2780
2781 if (detached == 0) {
2782 ifnet_t ifp = NULL;
2783
2784 ifnet_head_lock_shared();
2785 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
2786 interface_filter_t entry = NULL;
2787
2788 lck_mtx_lock(&ifp->if_flt_lock);
2789 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
2790 if (entry != filter || entry->filt_skip) {
2791 continue;
2792 }
2793 /*
2794 * We've found a match; since it's possible
2795 * that the thread gets blocked in the monitor,
2796 * we do the lock dance. Interface should
2797 * not be detached since we still have a use
2798 * count held during filter attach.
2799 */
2800 entry->filt_skip = 1; /* skip input/output */
2801 lck_mtx_unlock(&ifp->if_flt_lock);
2802 ifnet_head_done();
2803
2804 lck_mtx_lock(&ifp->if_flt_lock);
2805 if_flt_monitor_enter(ifp);
2806 LCK_MTX_ASSERT(&ifp->if_flt_lock,
2807 LCK_MTX_ASSERT_OWNED);
2808
2809 /* Remove the filter from the list */
2810 TAILQ_REMOVE(&ifp->if_flt_head, filter,
2811 filt_next);
2812
2813 if (dlil_verbose) {
2814 DLIL_PRINTF("%s: %s filter detached\n",
2815 if_name(ifp), filter->filt_name);
2816 }
2817 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
2818 VERIFY(ifp->if_flt_non_os_count != 0);
2819 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
2820 }
2821 /*
2822 * Decrease filter count and route_generation
2823 * ID to let TCP know it should reevalute doing
2824 * TSO or not.
2825 */
2826 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2827 ifnet_filter_update_tso(ifp, FALSE);
2828 }
2829 if_flt_monitor_leave(ifp);
2830 lck_mtx_unlock(&ifp->if_flt_lock);
2831 goto destroy;
2832 }
2833 lck_mtx_unlock(&ifp->if_flt_lock);
2834 }
2835 ifnet_head_done();
2836
2837 /* filter parameter is not a valid filter ref */
2838 retval = EINVAL;
2839 goto done;
2840 } else {
2841 struct ifnet *ifp = filter->filt_ifp;
2842 /*
2843 * Here we are called from ifnet_detach_final(); the
2844 * caller had emptied if_flt_head and we're doing an
2845 * implicit filter detach because the interface is
2846 * about to go away. Make sure to adjust the counters
2847 * in this case. We don't need the protection of the
2848 * filter monitor since we're called as part of the
2849 * final detach in the context of the detacher thread.
2850 */
2851 if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) {
2852 VERIFY(ifp->if_flt_non_os_count != 0);
2853 OSAddAtomic(-1, &ifp->if_flt_non_os_count);
2854 }
2855 /*
2856 * Decrease filter count and route_generation
2857 * ID to let TCP know it should reevalute doing
2858 * TSO or not.
2859 */
2860 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
2861 ifnet_filter_update_tso(ifp, FALSE);
2862 }
2863 }
2864
2865 if (dlil_verbose) {
2866 DLIL_PRINTF("%s filter detached\n", filter->filt_name);
2867 }
2868
2869 destroy:
2870
2871 /* Call the detached function if there is one */
2872 if (filter->filt_detached) {
2873 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
2874 }
2875
2876 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
2877 if (filter->filt_flags & DLIL_IFF_INTERNAL) {
2878 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0);
2879 }
2880 #if defined(SKYWALK) && defined(XNU_TARGET_OS_OSX)
2881 net_filter_event_mark(NET_FILTER_EVENT_INTERFACE,
2882 net_check_compatible_if_filter(NULL));
2883 #endif /* SKYWALK && XNU_TARGET_OS_OSX */
2884
2885 /* Free the filter */
2886 zfree(dlif_filt_zone, filter);
2887 filter = NULL;
2888 done:
2889 if (retval != 0 && filter != NULL) {
2890 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
2891 filter->filt_name, retval);
2892 }
2893
2894 return retval;
2895 }
2896
2897 __private_extern__ void
dlil_detach_filter(interface_filter_t filter)2898 dlil_detach_filter(interface_filter_t filter)
2899 {
2900 if (filter == NULL) {
2901 return;
2902 }
2903 dlil_detach_filter_internal(filter, 0);
2904 }
2905
2906 __private_extern__ boolean_t
dlil_has_ip_filter(void)2907 dlil_has_ip_filter(void)
2908 {
2909 boolean_t has_filter = (net_api_stats.nas_ipf_add_count > 0);
2910 DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter);
2911 return has_filter;
2912 }
2913
2914 __private_extern__ boolean_t
dlil_has_if_filter(struct ifnet * ifp)2915 dlil_has_if_filter(struct ifnet *ifp)
2916 {
2917 boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head);
2918 DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter);
2919 return has_filter;
2920 }
2921
2922 static inline void
dlil_input_wakeup(struct dlil_threading_info * inp)2923 dlil_input_wakeup(struct dlil_threading_info *inp)
2924 {
2925 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
2926
2927 inp->dlth_flags |= DLIL_INPUT_WAITING;
2928 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
2929 inp->dlth_wtot++;
2930 wakeup_one((caddr_t)&inp->dlth_flags);
2931 }
2932 }
2933
2934 __attribute__((noreturn))
2935 static void
dlil_main_input_thread_func(void * v,wait_result_t w)2936 dlil_main_input_thread_func(void *v, wait_result_t w)
2937 {
2938 #pragma unused(w)
2939 struct dlil_threading_info *inp = v;
2940
2941 VERIFY(inp == dlil_main_input_thread);
2942 VERIFY(inp->dlth_ifp == NULL);
2943 VERIFY(current_thread() == inp->dlth_thread);
2944
2945 lck_mtx_lock(&inp->dlth_lock);
2946 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
2947 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
2948 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
2949 /* wake up once to get out of embryonic state */
2950 dlil_input_wakeup(inp);
2951 lck_mtx_unlock(&inp->dlth_lock);
2952 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
2953 /* NOTREACHED */
2954 __builtin_unreachable();
2955 }
2956
2957 /*
2958 * Main input thread:
2959 *
2960 * a) handles all inbound packets for lo0
2961 * b) handles all inbound packets for interfaces with no dedicated
2962 * input thread (e.g. anything but Ethernet/PDP or those that support
2963 * opportunistic polling.)
2964 * c) protocol registrations
2965 * d) packet injections
2966 */
2967 __attribute__((noreturn))
2968 static void
dlil_main_input_thread_cont(void * v,wait_result_t wres)2969 dlil_main_input_thread_cont(void *v, wait_result_t wres)
2970 {
2971 struct dlil_main_threading_info *inpm = v;
2972 struct dlil_threading_info *inp = v;
2973
2974 /* main input thread is uninterruptible */
2975 VERIFY(wres != THREAD_INTERRUPTED);
2976 lck_mtx_lock_spin(&inp->dlth_lock);
2977 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_TERMINATE |
2978 DLIL_INPUT_RUNNING)));
2979 inp->dlth_flags |= DLIL_INPUT_RUNNING;
2980
2981 while (1) {
2982 struct mbuf *m = NULL, *m_loop = NULL;
2983 u_int32_t m_cnt, m_cnt_loop;
2984 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
2985 boolean_t proto_req;
2986 boolean_t embryonic;
2987
2988 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
2989
2990 if (__improbable(embryonic =
2991 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
2992 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
2993 }
2994
2995 proto_req = (inp->dlth_flags &
2996 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2997
2998 /* Packets for non-dedicated interfaces other than lo0 */
2999 m_cnt = qlen(&inp->dlth_pkts);
3000 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
3001 m = pkt.cp_mbuf;
3002
3003 /* Packets exclusive to lo0 */
3004 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
3005 _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL);
3006 m_loop = pkt.cp_mbuf;
3007
3008 inp->dlth_wtot = 0;
3009
3010 lck_mtx_unlock(&inp->dlth_lock);
3011
3012 if (__improbable(embryonic)) {
3013 dlil_decr_pending_thread_count();
3014 }
3015
3016 /*
3017 * NOTE warning %%% attention !!!!
3018 * We should think about putting some thread starvation
3019 * safeguards if we deal with long chains of packets.
3020 */
3021 if (__probable(m_loop != NULL)) {
3022 dlil_input_packet_list_extended(lo_ifp, m_loop,
3023 m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF);
3024 }
3025
3026 if (__probable(m != NULL)) {
3027 dlil_input_packet_list_extended(NULL, m,
3028 m_cnt, IFNET_MODEL_INPUT_POLL_OFF);
3029 }
3030
3031 if (__improbable(proto_req)) {
3032 proto_input_run();
3033 }
3034
3035 lck_mtx_lock_spin(&inp->dlth_lock);
3036 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
3037 /* main input thread cannot be terminated */
3038 VERIFY(!(inp->dlth_flags & DLIL_INPUT_TERMINATE));
3039 if (!(inp->dlth_flags & ~DLIL_INPUT_RUNNING)) {
3040 break;
3041 }
3042 }
3043
3044 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
3045 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
3046 lck_mtx_unlock(&inp->dlth_lock);
3047 (void) thread_block_parameter(dlil_main_input_thread_cont, inp);
3048
3049 VERIFY(0); /* we should never get here */
3050 /* NOTREACHED */
3051 __builtin_unreachable();
3052 }
3053
3054 /*
3055 * Input thread for interfaces with legacy input model.
3056 */
3057 __attribute__((noreturn))
3058 static void
dlil_input_thread_func(void * v,wait_result_t w)3059 dlil_input_thread_func(void *v, wait_result_t w)
3060 {
3061 #pragma unused(w)
3062 char thread_name[MAXTHREADNAMESIZE];
3063 struct dlil_threading_info *inp = v;
3064 struct ifnet *ifp = inp->dlth_ifp;
3065
3066 VERIFY(inp != dlil_main_input_thread);
3067 VERIFY(ifp != NULL);
3068 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll ||
3069 !(ifp->if_xflags & IFXF_LEGACY));
3070 VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF ||
3071 !(ifp->if_xflags & IFXF_LEGACY));
3072 VERIFY(current_thread() == inp->dlth_thread);
3073
3074 /* construct the name for this thread, and then apply it */
3075 bzero(thread_name, sizeof(thread_name));
3076 (void) snprintf(thread_name, sizeof(thread_name),
3077 "dlil_input_%s", ifp->if_xname);
3078 thread_set_thread_name(inp->dlth_thread, thread_name);
3079
3080 lck_mtx_lock(&inp->dlth_lock);
3081 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
3082 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
3083 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
3084 /* wake up once to get out of embryonic state */
3085 dlil_input_wakeup(inp);
3086 lck_mtx_unlock(&inp->dlth_lock);
3087 (void) thread_block_parameter(dlil_input_thread_cont, inp);
3088 /* NOTREACHED */
3089 __builtin_unreachable();
3090 }
3091
3092 __attribute__((noreturn))
3093 static void
dlil_input_thread_cont(void * v,wait_result_t wres)3094 dlil_input_thread_cont(void *v, wait_result_t wres)
3095 {
3096 struct dlil_threading_info *inp = v;
3097 struct ifnet *ifp = inp->dlth_ifp;
3098
3099 lck_mtx_lock_spin(&inp->dlth_lock);
3100 if (__improbable(wres == THREAD_INTERRUPTED ||
3101 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
3102 goto terminate;
3103 }
3104
3105 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
3106 inp->dlth_flags |= DLIL_INPUT_RUNNING;
3107
3108 while (1) {
3109 struct mbuf *m = NULL;
3110 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3111 boolean_t notify = FALSE;
3112 boolean_t embryonic;
3113 u_int32_t m_cnt;
3114
3115 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
3116
3117 if (__improbable(embryonic =
3118 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
3119 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
3120 }
3121
3122 /*
3123 * Protocol registration and injection must always use
3124 * the main input thread; in theory the latter can utilize
3125 * the corresponding input thread where the packet arrived
3126 * on, but that requires our knowing the interface in advance
3127 * (and the benefits might not worth the trouble.)
3128 */
3129 VERIFY(!(inp->dlth_flags &
3130 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
3131
3132 /* Packets for this interface */
3133 m_cnt = qlen(&inp->dlth_pkts);
3134 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
3135 m = pkt.cp_mbuf;
3136
3137 inp->dlth_wtot = 0;
3138
3139 #if SKYWALK
3140 /*
3141 * If this interface is attached to a netif nexus,
3142 * the stats are already incremented there; otherwise
3143 * do it here.
3144 */
3145 if (!(ifp->if_capabilities & IFCAP_SKYWALK))
3146 #endif /* SKYWALK */
3147 notify = dlil_input_stats_sync(ifp, inp);
3148
3149 lck_mtx_unlock(&inp->dlth_lock);
3150
3151 if (__improbable(embryonic)) {
3152 ifnet_decr_pending_thread_count(ifp);
3153 }
3154
3155 if (__improbable(notify)) {
3156 ifnet_notify_data_threshold(ifp);
3157 }
3158
3159 /*
3160 * NOTE warning %%% attention !!!!
3161 * We should think about putting some thread starvation
3162 * safeguards if we deal with long chains of packets.
3163 */
3164 if (__probable(m != NULL)) {
3165 dlil_input_packet_list_extended(NULL, m,
3166 m_cnt, ifp->if_poll_mode);
3167 }
3168
3169 lck_mtx_lock_spin(&inp->dlth_lock);
3170 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
3171 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
3172 DLIL_INPUT_TERMINATE))) {
3173 break;
3174 }
3175 }
3176
3177 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
3178
3179 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
3180 terminate:
3181 lck_mtx_unlock(&inp->dlth_lock);
3182 dlil_terminate_input_thread(inp);
3183 /* NOTREACHED */
3184 } else {
3185 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
3186 lck_mtx_unlock(&inp->dlth_lock);
3187 (void) thread_block_parameter(dlil_input_thread_cont, inp);
3188 /* NOTREACHED */
3189 }
3190
3191 VERIFY(0); /* we should never get here */
3192 /* NOTREACHED */
3193 __builtin_unreachable();
3194 }
3195
3196 /*
3197 * Input thread for interfaces with opportunistic polling input model.
3198 */
3199 __attribute__((noreturn))
3200 static void
dlil_rxpoll_input_thread_func(void * v,wait_result_t w)3201 dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
3202 {
3203 #pragma unused(w)
3204 char thread_name[MAXTHREADNAMESIZE];
3205 struct dlil_threading_info *inp = v;
3206 struct ifnet *ifp = inp->dlth_ifp;
3207
3208 VERIFY(inp != dlil_main_input_thread);
3209 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) &&
3210 (ifp->if_xflags & IFXF_LEGACY));
3211 VERIFY(current_thread() == inp->dlth_thread);
3212
3213 /* construct the name for this thread, and then apply it */
3214 bzero(thread_name, sizeof(thread_name));
3215 (void) snprintf(thread_name, sizeof(thread_name),
3216 "dlil_input_poll_%s", ifp->if_xname);
3217 thread_set_thread_name(inp->dlth_thread, thread_name);
3218
3219 lck_mtx_lock(&inp->dlth_lock);
3220 VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING)));
3221 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
3222 inp->dlth_flags |= DLIL_INPUT_EMBRYONIC;
3223 /* wake up once to get out of embryonic state */
3224 dlil_input_wakeup(inp);
3225 lck_mtx_unlock(&inp->dlth_lock);
3226 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont, inp);
3227 /* NOTREACHED */
3228 __builtin_unreachable();
3229 }
3230
3231 __attribute__((noreturn))
3232 static void
dlil_rxpoll_input_thread_cont(void * v,wait_result_t wres)3233 dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres)
3234 {
3235 struct dlil_threading_info *inp = v;
3236 struct ifnet *ifp = inp->dlth_ifp;
3237 struct timespec ts;
3238
3239 lck_mtx_lock_spin(&inp->dlth_lock);
3240 if (__improbable(wres == THREAD_INTERRUPTED ||
3241 (inp->dlth_flags & DLIL_INPUT_TERMINATE))) {
3242 goto terminate;
3243 }
3244
3245 VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING));
3246 inp->dlth_flags |= DLIL_INPUT_RUNNING;
3247
3248 while (1) {
3249 struct mbuf *m = NULL;
3250 uint32_t m_cnt, poll_req = 0;
3251 uint64_t m_size = 0;
3252 ifnet_model_t mode;
3253 struct timespec now, delta;
3254 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
3255 boolean_t notify;
3256 boolean_t embryonic;
3257 uint64_t ival;
3258
3259 inp->dlth_flags &= ~DLIL_INPUT_WAITING;
3260
3261 if (__improbable(embryonic =
3262 (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) {
3263 inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC;
3264 goto skip;
3265 }
3266
3267 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
3268 ival = IF_RXPOLL_INTERVALTIME_MIN;
3269 }
3270
3271 /* Link parameters changed? */
3272 if (ifp->if_poll_update != 0) {
3273 ifp->if_poll_update = 0;
3274 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
3275 }
3276
3277 /* Current operating mode */
3278 mode = ifp->if_poll_mode;
3279
3280 /*
3281 * Protocol registration and injection must always use
3282 * the main input thread; in theory the latter can utilize
3283 * the corresponding input thread where the packet arrived
3284 * on, but that requires our knowing the interface in advance
3285 * (and the benefits might not worth the trouble.)
3286 */
3287 VERIFY(!(inp->dlth_flags &
3288 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)));
3289
3290 /* Total count of all packets */
3291 m_cnt = qlen(&inp->dlth_pkts);
3292
3293 /* Total bytes of all packets */
3294 m_size = qsize(&inp->dlth_pkts);
3295
3296 /* Packets for this interface */
3297 _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL);
3298 m = pkt.cp_mbuf;
3299 VERIFY(m != NULL || m_cnt == 0);
3300
3301 nanouptime(&now);
3302 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
3303 *(&ifp->if_poll_sample_lasttime) = *(&now);
3304 }
3305
3306 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
3307 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
3308 u_int32_t ptot, btot;
3309
3310 /* Accumulate statistics for current sampling */
3311 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
3312
3313 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
3314 goto skip;
3315 }
3316
3317 *(&ifp->if_poll_sample_lasttime) = *(&now);
3318
3319 /* Calculate min/max of inbound bytes */
3320 btot = (u_int32_t)ifp->if_poll_sstats.bytes;
3321 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
3322 ifp->if_rxpoll_bmin = btot;
3323 }
3324 if (btot > ifp->if_rxpoll_bmax) {
3325 ifp->if_rxpoll_bmax = btot;
3326 }
3327
3328 /* Calculate EWMA of inbound bytes */
3329 DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
3330
3331 /* Calculate min/max of inbound packets */
3332 ptot = (u_int32_t)ifp->if_poll_sstats.packets;
3333 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
3334 ifp->if_rxpoll_pmin = ptot;
3335 }
3336 if (ptot > ifp->if_rxpoll_pmax) {
3337 ifp->if_rxpoll_pmax = ptot;
3338 }
3339
3340 /* Calculate EWMA of inbound packets */
3341 DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
3342
3343 /* Reset sampling statistics */
3344 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
3345
3346 /* Calculate EWMA of wakeup requests */
3347 DLIL_EWMA(ifp->if_rxpoll_wavg, inp->dlth_wtot,
3348 if_rxpoll_decay);
3349 inp->dlth_wtot = 0;
3350
3351 if (dlil_verbose) {
3352 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
3353 *(&ifp->if_poll_dbg_lasttime) = *(&now);
3354 }
3355 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
3356 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
3357 *(&ifp->if_poll_dbg_lasttime) = *(&now);
3358 DLIL_PRINTF("%s: [%s] pkts avg %d max %d "
3359 "limits [%d/%d], wreq avg %d "
3360 "limits [%d/%d], bytes avg %d "
3361 "limits [%d/%d]\n", if_name(ifp),
3362 (ifp->if_poll_mode ==
3363 IFNET_MODEL_INPUT_POLL_ON) ?
3364 "ON" : "OFF", ifp->if_rxpoll_pavg,
3365 ifp->if_rxpoll_pmax,
3366 ifp->if_rxpoll_plowat,
3367 ifp->if_rxpoll_phiwat,
3368 ifp->if_rxpoll_wavg,
3369 ifp->if_rxpoll_wlowat,
3370 ifp->if_rxpoll_whiwat,
3371 ifp->if_rxpoll_bavg,
3372 ifp->if_rxpoll_blowat,
3373 ifp->if_rxpoll_bhiwat);
3374 }
3375 }
3376
3377 /* Perform mode transition, if necessary */
3378 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
3379 *(&ifp->if_poll_mode_lasttime) = *(&now);
3380 }
3381
3382 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
3383 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
3384 goto skip;
3385 }
3386
3387 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
3388 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
3389 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
3390 mode = IFNET_MODEL_INPUT_POLL_OFF;
3391 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
3392 (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat ||
3393 ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) &&
3394 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
3395 mode = IFNET_MODEL_INPUT_POLL_ON;
3396 }
3397
3398 if (mode != ifp->if_poll_mode) {
3399 ifp->if_poll_mode = mode;
3400 *(&ifp->if_poll_mode_lasttime) = *(&now);
3401 poll_req++;
3402 }
3403 }
3404 skip:
3405 notify = dlil_input_stats_sync(ifp, inp);
3406
3407 lck_mtx_unlock(&inp->dlth_lock);
3408
3409 if (__improbable(embryonic)) {
3410 ifnet_decr_pending_thread_count(ifp);
3411 }
3412
3413 if (__improbable(notify)) {
3414 ifnet_notify_data_threshold(ifp);
3415 }
3416
3417 /*
3418 * If there's a mode change and interface is still attached,
3419 * perform a downcall to the driver for the new mode. Also
3420 * hold an IO refcnt on the interface to prevent it from
3421 * being detached (will be release below.)
3422 */
3423 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
3424 struct ifnet_model_params p = {
3425 .model = mode, .reserved = { 0 }
3426 };
3427 errno_t err;
3428
3429 if (dlil_verbose) {
3430 DLIL_PRINTF("%s: polling is now %s, "
3431 "pkts avg %d max %d limits [%d/%d], "
3432 "wreq avg %d limits [%d/%d], "
3433 "bytes avg %d limits [%d/%d]\n",
3434 if_name(ifp),
3435 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
3436 "ON" : "OFF", ifp->if_rxpoll_pavg,
3437 ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat,
3438 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg,
3439 ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat,
3440 ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat,
3441 ifp->if_rxpoll_bhiwat);
3442 }
3443
3444 if ((err = ((*ifp->if_input_ctl)(ifp,
3445 IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) {
3446 DLIL_PRINTF("%s: error setting polling mode "
3447 "to %s (%d)\n", if_name(ifp),
3448 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
3449 "ON" : "OFF", err);
3450 }
3451
3452 switch (mode) {
3453 case IFNET_MODEL_INPUT_POLL_OFF:
3454 ifnet_set_poll_cycle(ifp, NULL);
3455 ifp->if_rxpoll_offreq++;
3456 if (err != 0) {
3457 ifp->if_rxpoll_offerr++;
3458 }
3459 break;
3460
3461 case IFNET_MODEL_INPUT_POLL_ON:
3462 net_nsectimer(&ival, &ts);
3463 ifnet_set_poll_cycle(ifp, &ts);
3464 ifnet_poll(ifp);
3465 ifp->if_rxpoll_onreq++;
3466 if (err != 0) {
3467 ifp->if_rxpoll_onerr++;
3468 }
3469 break;
3470
3471 default:
3472 VERIFY(0);
3473 /* NOTREACHED */
3474 }
3475
3476 /* Release the IO refcnt */
3477 ifnet_decr_iorefcnt(ifp);
3478 }
3479
3480 /*
3481 * NOTE warning %%% attention !!!!
3482 * We should think about putting some thread starvation
3483 * safeguards if we deal with long chains of packets.
3484 */
3485 if (__probable(m != NULL)) {
3486 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
3487 }
3488
3489 lck_mtx_lock_spin(&inp->dlth_lock);
3490 VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING);
3491 if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING |
3492 DLIL_INPUT_TERMINATE))) {
3493 break;
3494 }
3495 }
3496
3497 inp->dlth_flags &= ~DLIL_INPUT_RUNNING;
3498
3499 if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) {
3500 terminate:
3501 lck_mtx_unlock(&inp->dlth_lock);
3502 dlil_terminate_input_thread(inp);
3503 /* NOTREACHED */
3504 } else {
3505 (void) assert_wait(&inp->dlth_flags, THREAD_UNINT);
3506 lck_mtx_unlock(&inp->dlth_lock);
3507 (void) thread_block_parameter(dlil_rxpoll_input_thread_cont,
3508 inp);
3509 /* NOTREACHED */
3510 }
3511
3512 VERIFY(0); /* we should never get here */
3513 /* NOTREACHED */
3514 __builtin_unreachable();
3515 }
3516
3517 errno_t
dlil_rxpoll_validate_params(struct ifnet_poll_params * p)3518 dlil_rxpoll_validate_params(struct ifnet_poll_params *p)
3519 {
3520 if (p != NULL) {
3521 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
3522 (p->packets_lowat != 0 && p->packets_hiwat == 0)) {
3523 return EINVAL;
3524 }
3525 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
3526 p->packets_lowat >= p->packets_hiwat) {
3527 return EINVAL;
3528 }
3529 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
3530 (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) {
3531 return EINVAL;
3532 }
3533 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
3534 p->bytes_lowat >= p->bytes_hiwat) {
3535 return EINVAL;
3536 }
3537 if (p->interval_time != 0 &&
3538 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) {
3539 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
3540 }
3541 }
3542 return 0;
3543 }
3544
3545 void
dlil_rxpoll_update_params(struct ifnet * ifp,struct ifnet_poll_params * p)3546 dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p)
3547 {
3548 u_int64_t sample_holdtime, inbw;
3549
3550 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
3551 sample_holdtime = 0; /* polling is disabled */
3552 ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat =
3553 ifp->if_rxpoll_blowat = 0;
3554 ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat =
3555 ifp->if_rxpoll_bhiwat = (u_int32_t)-1;
3556 ifp->if_rxpoll_plim = 0;
3557 ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
3558 } else {
3559 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
3560 u_int64_t ival;
3561 unsigned int n, i;
3562
3563 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
3564 if (inbw < rxpoll_tbl[i].speed) {
3565 break;
3566 }
3567 n = i;
3568 }
3569 /* auto-tune if caller didn't specify a value */
3570 plowat = ((p == NULL || p->packets_lowat == 0) ?
3571 rxpoll_tbl[n].plowat : p->packets_lowat);
3572 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
3573 rxpoll_tbl[n].phiwat : p->packets_hiwat);
3574 blowat = ((p == NULL || p->bytes_lowat == 0) ?
3575 rxpoll_tbl[n].blowat : p->bytes_lowat);
3576 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
3577 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
3578 plim = ((p == NULL || p->packets_limit == 0 ||
3579 if_rxpoll_max != 0) ? if_rxpoll_max : p->packets_limit);
3580 ival = ((p == NULL || p->interval_time == 0 ||
3581 if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ?
3582 if_rxpoll_interval_time : p->interval_time);
3583
3584 VERIFY(plowat != 0 && phiwat != 0);
3585 VERIFY(blowat != 0 && bhiwat != 0);
3586 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
3587
3588 sample_holdtime = if_rxpoll_sample_holdtime;
3589 ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat;
3590 ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat;
3591 ifp->if_rxpoll_plowat = plowat;
3592 ifp->if_rxpoll_phiwat = phiwat;
3593 ifp->if_rxpoll_blowat = blowat;
3594 ifp->if_rxpoll_bhiwat = bhiwat;
3595 ifp->if_rxpoll_plim = plim;
3596 ifp->if_rxpoll_ival = ival;
3597 }
3598
3599 net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime);
3600 net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime);
3601
3602 if (dlil_verbose) {
3603 DLIL_PRINTF("%s: speed %llu bps, sample per %llu nsec, "
3604 "poll interval %llu nsec, pkts per poll %u, "
3605 "pkt limits [%u/%u], wreq limits [%u/%u], "
3606 "bytes limits [%u/%u]\n", if_name(ifp),
3607 inbw, sample_holdtime, ifp->if_rxpoll_ival,
3608 ifp->if_rxpoll_plim, ifp->if_rxpoll_plowat,
3609 ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wlowat,
3610 ifp->if_rxpoll_whiwat, ifp->if_rxpoll_blowat,
3611 ifp->if_rxpoll_bhiwat);
3612 }
3613 }
3614
3615 /*
3616 * Must be called on an attached ifnet (caller is expected to check.)
3617 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
3618 */
3619 errno_t
dlil_rxpoll_set_params(struct ifnet * ifp,struct ifnet_poll_params * p,boolean_t locked)3620 dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
3621 boolean_t locked)
3622 {
3623 errno_t err;
3624 struct dlil_threading_info *inp;
3625
3626 VERIFY(ifp != NULL);
3627 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
3628 return ENXIO;
3629 }
3630 err = dlil_rxpoll_validate_params(p);
3631 if (err != 0) {
3632 return err;
3633 }
3634
3635 if (!locked) {
3636 lck_mtx_lock(&inp->dlth_lock);
3637 }
3638 LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED);
3639 /*
3640 * Normally, we'd reset the parameters to the auto-tuned values
3641 * if the the input thread detects a change in link rate. If the
3642 * driver provides its own parameters right after a link rate
3643 * changes, but before the input thread gets to run, we want to
3644 * make sure to keep the driver's values. Clearing if_poll_update
3645 * will achieve that.
3646 */
3647 if (p != NULL && !locked && ifp->if_poll_update != 0) {
3648 ifp->if_poll_update = 0;
3649 }
3650 dlil_rxpoll_update_params(ifp, p);
3651 if (!locked) {
3652 lck_mtx_unlock(&inp->dlth_lock);
3653 }
3654 return 0;
3655 }
3656
3657 /*
3658 * Must be called on an attached ifnet (caller is expected to check.)
3659 */
3660 errno_t
dlil_rxpoll_get_params(struct ifnet * ifp,struct ifnet_poll_params * p)3661 dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
3662 {
3663 struct dlil_threading_info *inp;
3664
3665 VERIFY(ifp != NULL && p != NULL);
3666 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) {
3667 return ENXIO;
3668 }
3669
3670 bzero(p, sizeof(*p));
3671
3672 lck_mtx_lock(&inp->dlth_lock);
3673 p->packets_limit = ifp->if_rxpoll_plim;
3674 p->packets_lowat = ifp->if_rxpoll_plowat;
3675 p->packets_hiwat = ifp->if_rxpoll_phiwat;
3676 p->bytes_lowat = ifp->if_rxpoll_blowat;
3677 p->bytes_hiwat = ifp->if_rxpoll_bhiwat;
3678 p->interval_time = ifp->if_rxpoll_ival;
3679 lck_mtx_unlock(&inp->dlth_lock);
3680
3681 return 0;
3682 }
3683
3684 errno_t
ifnet_input(struct ifnet * ifp,struct mbuf * m_head,const struct ifnet_stat_increment_param * s)3685 ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
3686 const struct ifnet_stat_increment_param *s)
3687 {
3688 return ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE);
3689 }
3690
3691 errno_t
ifnet_input_extended(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)3692 ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
3693 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
3694 {
3695 return ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE);
3696 }
3697
3698 errno_t
ifnet_input_poll(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s)3699 ifnet_input_poll(struct ifnet *ifp, struct mbuf *m_head,
3700 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
3701 {
3702 return ifnet_input_common(ifp, m_head, m_tail, s,
3703 (m_head != NULL), TRUE);
3704 }
3705
3706 static errno_t
ifnet_input_common(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t ext,boolean_t poll)3707 ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
3708 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
3709 {
3710 dlil_input_func input_func;
3711 struct ifnet_stat_increment_param _s;
3712 u_int32_t m_cnt = 0, m_size = 0;
3713 struct mbuf *last;
3714 errno_t err = 0;
3715
3716 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
3717 if (m_head != NULL) {
3718 mbuf_freem_list(m_head);
3719 }
3720 return EINVAL;
3721 }
3722
3723 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
3724 VERIFY(m_tail == NULL || ext);
3725 VERIFY(s != NULL || !ext);
3726
3727 /*
3728 * Drop the packet(s) if the parameters are invalid, or if the
3729 * interface is no longer attached; else hold an IO refcnt to
3730 * prevent it from being detached (will be released below.)
3731 */
3732 if (ifp == NULL || (ifp != lo_ifp && !ifnet_datamov_begin(ifp))) {
3733 if (m_head != NULL) {
3734 mbuf_freem_list(m_head);
3735 }
3736 return EINVAL;
3737 }
3738
3739 input_func = ifp->if_input_dlil;
3740 VERIFY(input_func != NULL);
3741
3742 if (m_tail == NULL) {
3743 last = m_head;
3744 while (m_head != NULL) {
3745 #if IFNET_INPUT_SANITY_CHK
3746 if (__improbable(dlil_input_sanity_check != 0)) {
3747 DLIL_INPUT_CHECK(last, ifp);
3748 }
3749 #endif /* IFNET_INPUT_SANITY_CHK */
3750 m_cnt++;
3751 m_size += m_length(last);
3752 if (mbuf_nextpkt(last) == NULL) {
3753 break;
3754 }
3755 last = mbuf_nextpkt(last);
3756 }
3757 m_tail = last;
3758 } else {
3759 #if IFNET_INPUT_SANITY_CHK
3760 if (__improbable(dlil_input_sanity_check != 0)) {
3761 last = m_head;
3762 while (1) {
3763 DLIL_INPUT_CHECK(last, ifp);
3764 m_cnt++;
3765 m_size += m_length(last);
3766 if (mbuf_nextpkt(last) == NULL) {
3767 break;
3768 }
3769 last = mbuf_nextpkt(last);
3770 }
3771 } else {
3772 m_cnt = s->packets_in;
3773 m_size = s->bytes_in;
3774 last = m_tail;
3775 }
3776 #else
3777 m_cnt = s->packets_in;
3778 m_size = s->bytes_in;
3779 last = m_tail;
3780 #endif /* IFNET_INPUT_SANITY_CHK */
3781 }
3782
3783 if (last != m_tail) {
3784 panic_plain("%s: invalid input packet chain for %s, "
3785 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
3786 m_tail, last);
3787 }
3788
3789 /*
3790 * Assert packet count only for the extended variant, for backwards
3791 * compatibility, since this came directly from the device driver.
3792 * Relax this assertion for input bytes, as the driver may have
3793 * included the link-layer headers in the computation; hence
3794 * m_size is just an approximation.
3795 */
3796 if (ext && s->packets_in != m_cnt) {
3797 panic_plain("%s: input packet count mismatch for %s, "
3798 "%d instead of %d\n", __func__, if_name(ifp),
3799 s->packets_in, m_cnt);
3800 }
3801
3802 if (s == NULL) {
3803 bzero(&_s, sizeof(_s));
3804 s = &_s;
3805 } else {
3806 _s = *s;
3807 }
3808 _s.packets_in = m_cnt;
3809 _s.bytes_in = m_size;
3810
3811 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
3812
3813 if (ifp != lo_ifp) {
3814 /* Release the IO refcnt */
3815 ifnet_datamov_end(ifp);
3816 }
3817
3818 return err;
3819 }
3820
3821 #if SKYWALK
3822 errno_t
dlil_set_input_handler(struct ifnet * ifp,dlil_input_func fn)3823 dlil_set_input_handler(struct ifnet *ifp, dlil_input_func fn)
3824 {
3825 return atomic_test_set_ptr(&ifp->if_input_dlil,
3826 ptrauth_nop_cast(void *, &dlil_input_handler),
3827 ptrauth_nop_cast(void *, fn)) ? 0 : EBUSY;
3828 }
3829
3830 void
dlil_reset_input_handler(struct ifnet * ifp)3831 dlil_reset_input_handler(struct ifnet *ifp)
3832 {
3833 while (!atomic_test_set_ptr(&ifp->if_input_dlil,
3834 ptrauth_nop_cast(void *, ifp->if_input_dlil),
3835 ptrauth_nop_cast(void *, &dlil_input_handler))) {
3836 ;
3837 }
3838 }
3839 errno_t
dlil_set_output_handler(struct ifnet * ifp,dlil_output_func fn)3840 dlil_set_output_handler(struct ifnet *ifp, dlil_output_func fn)
3841 {
3842 return atomic_test_set_ptr(&ifp->if_output_dlil,
3843 ptrauth_nop_cast(void *, &dlil_output_handler),
3844 ptrauth_nop_cast(void *, fn)) ? 0 : EBUSY;
3845 }
3846
3847 void
dlil_reset_output_handler(struct ifnet * ifp)3848 dlil_reset_output_handler(struct ifnet *ifp)
3849 {
3850 while (!atomic_test_set_ptr(&ifp->if_output_dlil,
3851 ptrauth_nop_cast(void *, ifp->if_output_dlil),
3852 ptrauth_nop_cast(void *, &dlil_output_handler))) {
3853 ;
3854 }
3855 }
3856 #endif /* SKYWALK */
3857
3858 errno_t
dlil_output_handler(struct ifnet * ifp,struct mbuf * m)3859 dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
3860 {
3861 return ifp->if_output(ifp, m);
3862 }
3863
3864 errno_t
dlil_input_handler(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)3865 dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
3866 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
3867 boolean_t poll, struct thread *tp)
3868 {
3869 struct dlil_threading_info *inp = ifp->if_inp;
3870
3871 if (__improbable(inp == NULL)) {
3872 inp = dlil_main_input_thread;
3873 }
3874
3875 return inp->dlth_strategy(inp, ifp, m_head, m_tail, s, poll, tp);
3876 }
3877
3878 static errno_t
dlil_input_async(struct dlil_threading_info * inp,struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)3879 dlil_input_async(struct dlil_threading_info *inp,
3880 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
3881 const struct ifnet_stat_increment_param *s, boolean_t poll,
3882 struct thread *tp)
3883 {
3884 u_int32_t m_cnt = s->packets_in;
3885 u_int32_t m_size = s->bytes_in;
3886 boolean_t notify = FALSE;
3887
3888 /*
3889 * If there is a matching DLIL input thread associated with an
3890 * affinity set, associate this thread with the same set. We
3891 * will only do this once.
3892 */
3893 lck_mtx_lock_spin(&inp->dlth_lock);
3894 if (inp != dlil_main_input_thread && inp->dlth_affinity && tp != NULL &&
3895 ((!poll && inp->dlth_driver_thread == THREAD_NULL) ||
3896 (poll && inp->dlth_poller_thread == THREAD_NULL))) {
3897 u_int32_t tag = inp->dlth_affinity_tag;
3898
3899 if (poll) {
3900 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
3901 inp->dlth_poller_thread = tp;
3902 } else {
3903 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
3904 inp->dlth_driver_thread = tp;
3905 }
3906 lck_mtx_unlock(&inp->dlth_lock);
3907
3908 /* Associate the current thread with the new affinity tag */
3909 (void) dlil_affinity_set(tp, tag);
3910
3911 /*
3912 * Take a reference on the current thread; during detach,
3913 * we will need to refer to it in order to tear down its
3914 * affinity.
3915 */
3916 thread_reference(tp);
3917 lck_mtx_lock_spin(&inp->dlth_lock);
3918 }
3919
3920 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
3921
3922 /*
3923 * Because of loopbacked multicast we cannot stuff the ifp in
3924 * the rcvif of the packet header: loopback (lo0) packets use a
3925 * dedicated list so that we can later associate them with lo_ifp
3926 * on their way up the stack. Packets for other interfaces without
3927 * dedicated input threads go to the regular list.
3928 */
3929 if (m_head != NULL) {
3930 classq_pkt_t head, tail;
3931 CLASSQ_PKT_INIT_MBUF(&head, m_head);
3932 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
3933 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
3934 struct dlil_main_threading_info *inpm =
3935 (struct dlil_main_threading_info *)inp;
3936 _addq_multi(&inpm->lo_rcvq_pkts, &head, &tail,
3937 m_cnt, m_size);
3938 } else {
3939 _addq_multi(&inp->dlth_pkts, &head, &tail,
3940 m_cnt, m_size);
3941 }
3942 }
3943
3944 #if IFNET_INPUT_SANITY_CHK
3945 if (__improbable(dlil_input_sanity_check != 0)) {
3946 u_int32_t count = 0, size = 0;
3947 struct mbuf *m0;
3948
3949 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
3950 size += m_length(m0);
3951 count++;
3952 }
3953
3954 if (count != m_cnt) {
3955 panic_plain("%s: invalid total packet count %u "
3956 "(expected %u)\n", if_name(ifp), count, m_cnt);
3957 /* NOTREACHED */
3958 __builtin_unreachable();
3959 } else if (size != m_size) {
3960 panic_plain("%s: invalid total packet size %u "
3961 "(expected %u)\n", if_name(ifp), size, m_size);
3962 /* NOTREACHED */
3963 __builtin_unreachable();
3964 }
3965
3966 inp->dlth_pkts_cnt += m_cnt;
3967 }
3968 #endif /* IFNET_INPUT_SANITY_CHK */
3969
3970 dlil_input_stats_add(s, inp, ifp, poll);
3971 /*
3972 * If we're using the main input thread, synchronize the
3973 * stats now since we have the interface context. All
3974 * other cases involving dedicated input threads will
3975 * have their stats synchronized there.
3976 */
3977 if (inp == dlil_main_input_thread) {
3978 notify = dlil_input_stats_sync(ifp, inp);
3979 }
3980
3981 dlil_input_wakeup(inp);
3982 lck_mtx_unlock(&inp->dlth_lock);
3983
3984 if (notify) {
3985 ifnet_notify_data_threshold(ifp);
3986 }
3987
3988 return 0;
3989 }
3990
3991 static errno_t
dlil_input_sync(struct dlil_threading_info * inp,struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)3992 dlil_input_sync(struct dlil_threading_info *inp,
3993 struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
3994 const struct ifnet_stat_increment_param *s, boolean_t poll,
3995 struct thread *tp)
3996 {
3997 #pragma unused(tp)
3998 u_int32_t m_cnt = s->packets_in;
3999 u_int32_t m_size = s->bytes_in;
4000 boolean_t notify = FALSE;
4001 classq_pkt_t head, tail;
4002
4003 ASSERT(inp != dlil_main_input_thread);
4004
4005 /* XXX: should we just assert instead? */
4006 if (__improbable(m_head == NULL)) {
4007 return 0;
4008 }
4009
4010 CLASSQ_PKT_INIT_MBUF(&head, m_head);
4011 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
4012
4013 lck_mtx_lock_spin(&inp->dlth_lock);
4014 _addq_multi(&inp->dlth_pkts, &head, &tail, m_cnt, m_size);
4015
4016 #if IFNET_INPUT_SANITY_CHK
4017 if (__improbable(dlil_input_sanity_check != 0)) {
4018 u_int32_t count = 0, size = 0;
4019 struct mbuf *m0;
4020
4021 for (m0 = m_head; m0; m0 = mbuf_nextpkt(m0)) {
4022 size += m_length(m0);
4023 count++;
4024 }
4025
4026 if (count != m_cnt) {
4027 panic_plain("%s: invalid total packet count %u "
4028 "(expected %u)\n", if_name(ifp), count, m_cnt);
4029 /* NOTREACHED */
4030 __builtin_unreachable();
4031 } else if (size != m_size) {
4032 panic_plain("%s: invalid total packet size %u "
4033 "(expected %u)\n", if_name(ifp), size, m_size);
4034 /* NOTREACHED */
4035 __builtin_unreachable();
4036 }
4037
4038 inp->dlth_pkts_cnt += m_cnt;
4039 }
4040 #endif /* IFNET_INPUT_SANITY_CHK */
4041
4042 dlil_input_stats_add(s, inp, ifp, poll);
4043
4044 m_cnt = qlen(&inp->dlth_pkts);
4045 _getq_all(&inp->dlth_pkts, &head, NULL, NULL, NULL);
4046
4047 #if SKYWALK
4048 /*
4049 * If this interface is attached to a netif nexus,
4050 * the stats are already incremented there; otherwise
4051 * do it here.
4052 */
4053 if (!(ifp->if_capabilities & IFCAP_SKYWALK))
4054 #endif /* SKYWALK */
4055 notify = dlil_input_stats_sync(ifp, inp);
4056
4057 lck_mtx_unlock(&inp->dlth_lock);
4058
4059 if (notify) {
4060 ifnet_notify_data_threshold(ifp);
4061 }
4062
4063 /*
4064 * NOTE warning %%% attention !!!!
4065 * We should think about putting some thread starvation
4066 * safeguards if we deal with long chains of packets.
4067 */
4068 if (head.cp_mbuf != NULL) {
4069 dlil_input_packet_list_extended(NULL, head.cp_mbuf,
4070 m_cnt, ifp->if_poll_mode);
4071 }
4072
4073 return 0;
4074 }
4075
4076 #if SKYWALK
4077 errno_t
ifnet_set_output_handler(struct ifnet * ifp,ifnet_output_func fn)4078 ifnet_set_output_handler(struct ifnet *ifp, ifnet_output_func fn)
4079 {
4080 return atomic_test_set_ptr(&ifp->if_output,
4081 ptrauth_nop_cast(void *, ifp->if_save_output),
4082 ptrauth_nop_cast(void *, fn)) ? 0 : EBUSY;
4083 }
4084
4085 void
ifnet_reset_output_handler(struct ifnet * ifp)4086 ifnet_reset_output_handler(struct ifnet *ifp)
4087 {
4088 while (!atomic_test_set_ptr(&ifp->if_output,
4089 ptrauth_nop_cast(void *, ifp->if_output),
4090 ptrauth_nop_cast(void *, ifp->if_save_output))) {
4091 ;
4092 }
4093 }
4094
4095 errno_t
ifnet_set_start_handler(struct ifnet * ifp,ifnet_start_func fn)4096 ifnet_set_start_handler(struct ifnet *ifp, ifnet_start_func fn)
4097 {
4098 return atomic_test_set_ptr(&ifp->if_start,
4099 ptrauth_nop_cast(void *, ifp->if_save_start),
4100 ptrauth_nop_cast(void *, fn)) ? 0 : EBUSY;
4101 }
4102
4103 void
ifnet_reset_start_handler(struct ifnet * ifp)4104 ifnet_reset_start_handler(struct ifnet *ifp)
4105 {
4106 while (!atomic_test_set_ptr(&ifp->if_start,
4107 ptrauth_nop_cast(void *, ifp->if_start),
4108 ptrauth_nop_cast(void *, ifp->if_save_start))) {
4109 ;
4110 }
4111 }
4112 #endif /* SKYWALK */
4113
4114 static void
ifnet_start_common(struct ifnet * ifp,boolean_t resetfc)4115 ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
4116 {
4117 if (!(ifp->if_eflags & IFEF_TXSTART)) {
4118 return;
4119 }
4120 /*
4121 * If the starter thread is inactive, signal it to do work,
4122 * unless the interface is being flow controlled from below,
4123 * e.g. a virtual interface being flow controlled by a real
4124 * network interface beneath it, or it's been disabled via
4125 * a call to ifnet_disable_output().
4126 */
4127 lck_mtx_lock_spin(&ifp->if_start_lock);
4128 if (resetfc) {
4129 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
4130 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
4131 lck_mtx_unlock(&ifp->if_start_lock);
4132 return;
4133 }
4134 ifp->if_start_req++;
4135 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
4136 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
4137 IFCQ_LEN(ifp->if_snd) >= ifp->if_start_delay_qlen ||
4138 ifp->if_start_delayed == 0)) {
4139 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
4140 }
4141 lck_mtx_unlock(&ifp->if_start_lock);
4142 }
4143
4144 void
ifnet_start(struct ifnet * ifp)4145 ifnet_start(struct ifnet *ifp)
4146 {
4147 ifnet_start_common(ifp, FALSE);
4148 }
4149
4150 __attribute__((noreturn))
4151 static void
ifnet_start_thread_func(void * v,wait_result_t w)4152 ifnet_start_thread_func(void *v, wait_result_t w)
4153 {
4154 #pragma unused(w)
4155 struct ifnet *ifp = v;
4156 char thread_name[MAXTHREADNAMESIZE];
4157
4158 /* Construct the name for this thread, and then apply it. */
4159 bzero(thread_name, sizeof(thread_name));
4160 (void) snprintf(thread_name, sizeof(thread_name),
4161 "ifnet_start_%s", ifp->if_xname);
4162 #if SKYWALK
4163 /* override name for native Skywalk interface */
4164 if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
4165 (void) snprintf(thread_name, sizeof(thread_name),
4166 "skywalk_doorbell_%s_tx", ifp->if_xname);
4167 }
4168 #endif /* SKYWALK */
4169 ASSERT(ifp->if_start_thread == current_thread());
4170 thread_set_thread_name(current_thread(), thread_name);
4171
4172 /*
4173 * Treat the dedicated starter thread for lo0 as equivalent to
4174 * the driver workloop thread; if net_affinity is enabled for
4175 * the main input thread, associate this starter thread to it
4176 * by binding them with the same affinity tag. This is done
4177 * only once (as we only have one lo_ifp which never goes away.)
4178 */
4179 if (ifp == lo_ifp) {
4180 struct dlil_threading_info *inp = dlil_main_input_thread;
4181 struct thread *tp = current_thread();
4182 #if SKYWALK
4183 /* native skywalk loopback not yet implemented */
4184 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
4185 #endif /* SKYWALK */
4186
4187 lck_mtx_lock(&inp->dlth_lock);
4188 if (inp->dlth_affinity) {
4189 u_int32_t tag = inp->dlth_affinity_tag;
4190
4191 VERIFY(inp->dlth_driver_thread == THREAD_NULL);
4192 VERIFY(inp->dlth_poller_thread == THREAD_NULL);
4193 inp->dlth_driver_thread = tp;
4194 lck_mtx_unlock(&inp->dlth_lock);
4195
4196 /* Associate this thread with the affinity tag */
4197 (void) dlil_affinity_set(tp, tag);
4198 } else {
4199 lck_mtx_unlock(&inp->dlth_lock);
4200 }
4201 }
4202
4203 lck_mtx_lock(&ifp->if_start_lock);
4204 VERIFY(!ifp->if_start_embryonic && !ifp->if_start_active);
4205 (void) assert_wait(&ifp->if_start_thread, THREAD_UNINT);
4206 ifp->if_start_embryonic = 1;
4207 /* wake up once to get out of embryonic state */
4208 ifp->if_start_req++;
4209 (void) wakeup_one((caddr_t)&ifp->if_start_thread);
4210 lck_mtx_unlock(&ifp->if_start_lock);
4211 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
4212 /* NOTREACHED */
4213 __builtin_unreachable();
4214 }
4215
4216 __attribute__((noreturn))
4217 static void
ifnet_start_thread_cont(void * v,wait_result_t wres)4218 ifnet_start_thread_cont(void *v, wait_result_t wres)
4219 {
4220 struct ifnet *ifp = v;
4221 struct ifclassq *ifq = ifp->if_snd;
4222
4223 lck_mtx_lock_spin(&ifp->if_start_lock);
4224 if (__improbable(wres == THREAD_INTERRUPTED ||
4225 (ifp->if_start_flags & IFSF_TERMINATING) != 0)) {
4226 goto terminate;
4227 }
4228
4229 if (__improbable(ifp->if_start_embryonic)) {
4230 ifp->if_start_embryonic = 0;
4231 lck_mtx_unlock(&ifp->if_start_lock);
4232 ifnet_decr_pending_thread_count(ifp);
4233 lck_mtx_lock_spin(&ifp->if_start_lock);
4234 goto skip;
4235 }
4236
4237 ifp->if_start_active = 1;
4238
4239 /*
4240 * Keep on servicing until no more request.
4241 */
4242 for (;;) {
4243 u_int32_t req = ifp->if_start_req;
4244 if (!IFCQ_IS_EMPTY(ifq) &&
4245 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
4246 ifp->if_start_delayed == 0 &&
4247 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
4248 (ifp->if_eflags & IFEF_DELAY_START)) {
4249 ifp->if_start_delayed = 1;
4250 ifnet_start_delayed++;
4251 break;
4252 }
4253 ifp->if_start_delayed = 0;
4254 lck_mtx_unlock(&ifp->if_start_lock);
4255
4256 /*
4257 * If no longer attached, don't call start because ifp
4258 * is being destroyed; else hold an IO refcnt to
4259 * prevent the interface from being detached (will be
4260 * released below.)
4261 */
4262 if (!ifnet_datamov_begin(ifp)) {
4263 lck_mtx_lock_spin(&ifp->if_start_lock);
4264 break;
4265 }
4266
4267 /* invoke the driver's start routine */
4268 ((*ifp->if_start)(ifp));
4269
4270 /*
4271 * Release the io ref count taken above.
4272 */
4273 ifnet_datamov_end(ifp);
4274
4275 lck_mtx_lock_spin(&ifp->if_start_lock);
4276
4277 /*
4278 * If there's no pending request or if the
4279 * interface has been disabled, we're done.
4280 */
4281 #define _IFSF_DISABLED (IFSF_FLOW_CONTROLLED | IFSF_TERMINATING)
4282 if (req == ifp->if_start_req ||
4283 (ifp->if_start_flags & _IFSF_DISABLED) != 0) {
4284 break;
4285 }
4286 }
4287 skip:
4288 ifp->if_start_req = 0;
4289 ifp->if_start_active = 0;
4290
4291 #if SKYWALK
4292 /*
4293 * Wakeup any waiters, e.g. any threads waiting to
4294 * detach the interface from the flowswitch, etc.
4295 */
4296 if (ifp->if_start_waiters != 0) {
4297 ifp->if_start_waiters = 0;
4298 wakeup(&ifp->if_start_waiters);
4299 }
4300 #endif /* SKYWALK */
4301 if (__probable((ifp->if_start_flags & IFSF_TERMINATING) == 0)) {
4302 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
4303 struct timespec delay_start_ts;
4304 struct timespec *ts;
4305
4306 /*
4307 * Wakeup N ns from now if rate-controlled by TBR, and if
4308 * there are still packets in the send queue which haven't
4309 * been dequeued so far; else sleep indefinitely (ts = NULL)
4310 * until ifnet_start() is called again.
4311 */
4312 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
4313 &ifp->if_start_cycle : NULL);
4314
4315 if (ts == NULL && ifp->if_start_delayed == 1) {
4316 delay_start_ts.tv_sec = 0;
4317 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
4318 ts = &delay_start_ts;
4319 }
4320
4321 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) {
4322 ts = NULL;
4323 }
4324
4325 if (__improbable(ts != NULL)) {
4326 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
4327 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
4328 }
4329
4330 (void) assert_wait_deadline(&ifp->if_start_thread,
4331 THREAD_UNINT, deadline);
4332 lck_mtx_unlock(&ifp->if_start_lock);
4333 (void) thread_block_parameter(ifnet_start_thread_cont, ifp);
4334 /* NOTREACHED */
4335 } else {
4336 terminate:
4337 /* interface is detached? */
4338 ifnet_set_start_cycle(ifp, NULL);
4339
4340 /* clear if_start_thread to allow termination to continue */
4341 ASSERT(ifp->if_start_thread != THREAD_NULL);
4342 ifp->if_start_thread = THREAD_NULL;
4343 wakeup((caddr_t)&ifp->if_start_thread);
4344 lck_mtx_unlock(&ifp->if_start_lock);
4345
4346 if (dlil_verbose) {
4347 DLIL_PRINTF("%s: starter thread terminated\n",
4348 if_name(ifp));
4349 }
4350
4351 /* for the extra refcnt from kernel_thread_start() */
4352 thread_deallocate(current_thread());
4353 /* this is the end */
4354 thread_terminate(current_thread());
4355 /* NOTREACHED */
4356 }
4357
4358 /* must never get here */
4359 VERIFY(0);
4360 /* NOTREACHED */
4361 __builtin_unreachable();
4362 }
4363
4364 void
ifnet_set_start_cycle(struct ifnet * ifp,struct timespec * ts)4365 ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
4366 {
4367 if (ts == NULL) {
4368 bzero(&ifp->if_start_cycle, sizeof(ifp->if_start_cycle));
4369 } else {
4370 *(&ifp->if_start_cycle) = *ts;
4371 }
4372
4373 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
4374 DLIL_PRINTF("%s: restart interval set to %lu nsec\n",
4375 if_name(ifp), ts->tv_nsec);
4376 }
4377 }
4378
4379 static inline void
ifnet_poll_wakeup(struct ifnet * ifp)4380 ifnet_poll_wakeup(struct ifnet *ifp)
4381 {
4382 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
4383
4384 ifp->if_poll_req++;
4385 if (!(ifp->if_poll_flags & IF_POLLF_RUNNING) &&
4386 ifp->if_poll_thread != THREAD_NULL) {
4387 wakeup_one((caddr_t)&ifp->if_poll_thread);
4388 }
4389 }
4390
4391 void
ifnet_poll(struct ifnet * ifp)4392 ifnet_poll(struct ifnet *ifp)
4393 {
4394 /*
4395 * If the poller thread is inactive, signal it to do work.
4396 */
4397 lck_mtx_lock_spin(&ifp->if_poll_lock);
4398 ifnet_poll_wakeup(ifp);
4399 lck_mtx_unlock(&ifp->if_poll_lock);
4400 }
4401
4402 __attribute__((noreturn))
4403 static void
ifnet_poll_thread_func(void * v,wait_result_t w)4404 ifnet_poll_thread_func(void *v, wait_result_t w)
4405 {
4406 #pragma unused(w)
4407 char thread_name[MAXTHREADNAMESIZE];
4408 struct ifnet *ifp = v;
4409
4410 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
4411 VERIFY(current_thread() == ifp->if_poll_thread);
4412
4413 /* construct the name for this thread, and then apply it */
4414 bzero(thread_name, sizeof(thread_name));
4415 (void) snprintf(thread_name, sizeof(thread_name),
4416 "ifnet_poller_%s", ifp->if_xname);
4417 thread_set_thread_name(ifp->if_poll_thread, thread_name);
4418
4419 lck_mtx_lock(&ifp->if_poll_lock);
4420 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_EMBRYONIC | IF_POLLF_RUNNING)));
4421 (void) assert_wait(&ifp->if_poll_thread, THREAD_UNINT);
4422 ifp->if_poll_flags |= IF_POLLF_EMBRYONIC;
4423 /* wake up once to get out of embryonic state */
4424 ifnet_poll_wakeup(ifp);
4425 lck_mtx_unlock(&ifp->if_poll_lock);
4426 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
4427 /* NOTREACHED */
4428 __builtin_unreachable();
4429 }
4430
4431 __attribute__((noreturn))
4432 static void
ifnet_poll_thread_cont(void * v,wait_result_t wres)4433 ifnet_poll_thread_cont(void *v, wait_result_t wres)
4434 {
4435 struct dlil_threading_info *inp;
4436 struct ifnet *ifp = v;
4437 struct ifnet_stat_increment_param s;
4438 struct timespec start_time;
4439
4440 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
4441
4442 bzero(&s, sizeof(s));
4443 net_timerclear(&start_time);
4444
4445 lck_mtx_lock_spin(&ifp->if_poll_lock);
4446 if (__improbable(wres == THREAD_INTERRUPTED ||
4447 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
4448 goto terminate;
4449 }
4450
4451 inp = ifp->if_inp;
4452 VERIFY(inp != NULL);
4453
4454 if (__improbable(ifp->if_poll_flags & IF_POLLF_EMBRYONIC)) {
4455 ifp->if_poll_flags &= ~IF_POLLF_EMBRYONIC;
4456 lck_mtx_unlock(&ifp->if_poll_lock);
4457 ifnet_decr_pending_thread_count(ifp);
4458 lck_mtx_lock_spin(&ifp->if_poll_lock);
4459 goto skip;
4460 }
4461
4462 ifp->if_poll_flags |= IF_POLLF_RUNNING;
4463
4464 /*
4465 * Keep on servicing until no more request.
4466 */
4467 for (;;) {
4468 struct mbuf *m_head, *m_tail;
4469 u_int32_t m_lim, m_cnt, m_totlen;
4470 u_int16_t req = ifp->if_poll_req;
4471
4472 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
4473 MAX((qlimit(&inp->dlth_pkts)), (ifp->if_rxpoll_phiwat << 2));
4474 lck_mtx_unlock(&ifp->if_poll_lock);
4475
4476 /*
4477 * If no longer attached, there's nothing to do;
4478 * else hold an IO refcnt to prevent the interface
4479 * from being detached (will be released below.)
4480 */
4481 if (!ifnet_is_attached(ifp, 1)) {
4482 lck_mtx_lock_spin(&ifp->if_poll_lock);
4483 break;
4484 }
4485
4486 if (dlil_verbose > 1) {
4487 DLIL_PRINTF("%s: polling up to %d pkts, "
4488 "pkts avg %d max %d, wreq avg %d, "
4489 "bytes avg %d\n",
4490 if_name(ifp), m_lim,
4491 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
4492 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
4493 }
4494
4495 /* invoke the driver's input poll routine */
4496 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
4497 &m_cnt, &m_totlen));
4498
4499 if (m_head != NULL) {
4500 VERIFY(m_tail != NULL && m_cnt > 0);
4501
4502 if (dlil_verbose > 1) {
4503 DLIL_PRINTF("%s: polled %d pkts, "
4504 "pkts avg %d max %d, wreq avg %d, "
4505 "bytes avg %d\n",
4506 if_name(ifp), m_cnt,
4507 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax,
4508 ifp->if_rxpoll_wavg, ifp->if_rxpoll_bavg);
4509 }
4510
4511 /* stats are required for extended variant */
4512 s.packets_in = m_cnt;
4513 s.bytes_in = m_totlen;
4514
4515 (void) ifnet_input_common(ifp, m_head, m_tail,
4516 &s, TRUE, TRUE);
4517 } else {
4518 if (dlil_verbose > 1) {
4519 DLIL_PRINTF("%s: no packets, "
4520 "pkts avg %d max %d, wreq avg %d, "
4521 "bytes avg %d\n",
4522 if_name(ifp), ifp->if_rxpoll_pavg,
4523 ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
4524 ifp->if_rxpoll_bavg);
4525 }
4526
4527 (void) ifnet_input_common(ifp, NULL, NULL,
4528 NULL, FALSE, TRUE);
4529 }
4530
4531 /* Release the io ref count */
4532 ifnet_decr_iorefcnt(ifp);
4533
4534 lck_mtx_lock_spin(&ifp->if_poll_lock);
4535
4536 /* if there's no pending request, we're done */
4537 if (req == ifp->if_poll_req ||
4538 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0) {
4539 break;
4540 }
4541 }
4542 skip:
4543 ifp->if_poll_req = 0;
4544 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
4545
4546 if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
4547 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
4548 struct timespec *ts;
4549
4550 /*
4551 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
4552 * until ifnet_poll() is called again.
4553 */
4554 ts = &ifp->if_poll_cycle;
4555 if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
4556 ts = NULL;
4557 }
4558
4559 if (ts != NULL) {
4560 clock_interval_to_deadline((uint32_t)(ts->tv_nsec +
4561 (ts->tv_sec * NSEC_PER_SEC)), 1, &deadline);
4562 }
4563
4564 (void) assert_wait_deadline(&ifp->if_poll_thread,
4565 THREAD_UNINT, deadline);
4566 lck_mtx_unlock(&ifp->if_poll_lock);
4567 (void) thread_block_parameter(ifnet_poll_thread_cont, ifp);
4568 /* NOTREACHED */
4569 } else {
4570 terminate:
4571 /* interface is detached (maybe while asleep)? */
4572 ifnet_set_poll_cycle(ifp, NULL);
4573
4574 /* clear if_poll_thread to allow termination to continue */
4575 ASSERT(ifp->if_poll_thread != THREAD_NULL);
4576 ifp->if_poll_thread = THREAD_NULL;
4577 wakeup((caddr_t)&ifp->if_poll_thread);
4578 lck_mtx_unlock(&ifp->if_poll_lock);
4579
4580 if (dlil_verbose) {
4581 DLIL_PRINTF("%s: poller thread terminated\n",
4582 if_name(ifp));
4583 }
4584
4585 /* for the extra refcnt from kernel_thread_start() */
4586 thread_deallocate(current_thread());
4587 /* this is the end */
4588 thread_terminate(current_thread());
4589 /* NOTREACHED */
4590 }
4591
4592 /* must never get here */
4593 VERIFY(0);
4594 /* NOTREACHED */
4595 __builtin_unreachable();
4596 }
4597
4598 void
ifnet_set_poll_cycle(struct ifnet * ifp,struct timespec * ts)4599 ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
4600 {
4601 if (ts == NULL) {
4602 bzero(&ifp->if_poll_cycle, sizeof(ifp->if_poll_cycle));
4603 } else {
4604 *(&ifp->if_poll_cycle) = *ts;
4605 }
4606
4607 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) {
4608 DLIL_PRINTF("%s: poll interval set to %lu nsec\n",
4609 if_name(ifp), ts->tv_nsec);
4610 }
4611 }
4612
4613 void
ifnet_purge(struct ifnet * ifp)4614 ifnet_purge(struct ifnet *ifp)
4615 {
4616 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) {
4617 if_qflush_snd(ifp, false);
4618 }
4619 }
4620
4621 void
ifnet_update_sndq(struct ifclassq * ifq,cqev_t ev)4622 ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
4623 {
4624 IFCQ_LOCK_ASSERT_HELD(ifq);
4625
4626 if (!(IFCQ_IS_READY(ifq))) {
4627 return;
4628 }
4629
4630 if (IFCQ_TBR_IS_ENABLED(ifq)) {
4631 struct tb_profile tb = {
4632 .rate = ifq->ifcq_tbr.tbr_rate_raw,
4633 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
4634 };
4635 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
4636 }
4637
4638 ifclassq_update(ifq, ev);
4639 }
4640
4641 void
ifnet_update_rcv(struct ifnet * ifp,cqev_t ev)4642 ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
4643 {
4644 switch (ev) {
4645 case CLASSQ_EV_LINK_BANDWIDTH:
4646 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
4647 ifp->if_poll_update++;
4648 }
4649 break;
4650
4651 default:
4652 break;
4653 }
4654 }
4655
4656 errno_t
ifnet_set_output_sched_model(struct ifnet * ifp,u_int32_t model)4657 ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
4658 {
4659 struct ifclassq *ifq;
4660 u_int32_t omodel;
4661 errno_t err;
4662
4663 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX) {
4664 return EINVAL;
4665 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4666 return ENXIO;
4667 }
4668
4669 ifq = ifp->if_snd;
4670 IFCQ_LOCK(ifq);
4671 omodel = ifp->if_output_sched_model;
4672 ifp->if_output_sched_model = model;
4673 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
4674 ifp->if_output_sched_model = omodel;
4675 }
4676 IFCQ_UNLOCK(ifq);
4677
4678 return err;
4679 }
4680
4681 errno_t
ifnet_set_sndq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)4682 ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
4683 {
4684 if (ifp == NULL) {
4685 return EINVAL;
4686 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4687 return ENXIO;
4688 }
4689
4690 ifclassq_set_maxlen(ifp->if_snd, maxqlen);
4691
4692 return 0;
4693 }
4694
4695 errno_t
ifnet_get_sndq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)4696 ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
4697 {
4698 if (ifp == NULL || maxqlen == NULL) {
4699 return EINVAL;
4700 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4701 return ENXIO;
4702 }
4703
4704 *maxqlen = ifclassq_get_maxlen(ifp->if_snd);
4705
4706 return 0;
4707 }
4708
4709 errno_t
ifnet_get_sndq_len(struct ifnet * ifp,u_int32_t * pkts)4710 ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
4711 {
4712 errno_t err;
4713
4714 if (ifp == NULL || pkts == NULL) {
4715 err = EINVAL;
4716 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4717 err = ENXIO;
4718 } else {
4719 err = ifclassq_get_len(ifp->if_snd, MBUF_SC_UNSPEC,
4720 pkts, NULL);
4721 }
4722
4723 return err;
4724 }
4725
4726 errno_t
ifnet_get_service_class_sndq_len(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t * pkts,u_int32_t * bytes)4727 ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
4728 u_int32_t *pkts, u_int32_t *bytes)
4729 {
4730 errno_t err;
4731
4732 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
4733 (pkts == NULL && bytes == NULL)) {
4734 err = EINVAL;
4735 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
4736 err = ENXIO;
4737 } else {
4738 err = ifclassq_get_len(ifp->if_snd, sc, pkts, bytes);
4739 }
4740
4741 return err;
4742 }
4743
4744 errno_t
ifnet_set_rcvq_maxlen(struct ifnet * ifp,u_int32_t maxqlen)4745 ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
4746 {
4747 struct dlil_threading_info *inp;
4748
4749 if (ifp == NULL) {
4750 return EINVAL;
4751 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
4752 return ENXIO;
4753 }
4754
4755 if (maxqlen == 0) {
4756 maxqlen = if_rcvq_maxlen;
4757 } else if (maxqlen < IF_RCVQ_MINLEN) {
4758 maxqlen = IF_RCVQ_MINLEN;
4759 }
4760
4761 inp = ifp->if_inp;
4762 lck_mtx_lock(&inp->dlth_lock);
4763 qlimit(&inp->dlth_pkts) = maxqlen;
4764 lck_mtx_unlock(&inp->dlth_lock);
4765
4766 return 0;
4767 }
4768
4769 errno_t
ifnet_get_rcvq_maxlen(struct ifnet * ifp,u_int32_t * maxqlen)4770 ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
4771 {
4772 struct dlil_threading_info *inp;
4773
4774 if (ifp == NULL || maxqlen == NULL) {
4775 return EINVAL;
4776 } else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) {
4777 return ENXIO;
4778 }
4779
4780 inp = ifp->if_inp;
4781 lck_mtx_lock(&inp->dlth_lock);
4782 *maxqlen = qlimit(&inp->dlth_pkts);
4783 lck_mtx_unlock(&inp->dlth_lock);
4784 return 0;
4785 }
4786
4787 void
ifnet_enqueue_multi_setup(struct ifnet * ifp,uint16_t delay_qlen,uint16_t delay_timeout)4788 ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
4789 uint16_t delay_timeout)
4790 {
4791 if (delay_qlen > 0 && delay_timeout > 0) {
4792 if_set_eflags(ifp, IFEF_ENQUEUE_MULTI);
4793 ifp->if_start_delay_qlen = MIN(100, delay_qlen);
4794 ifp->if_start_delay_timeout = min(20000, delay_timeout);
4795 /* convert timeout to nanoseconds */
4796 ifp->if_start_delay_timeout *= 1000;
4797 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
4798 ifp->if_xname, (uint32_t)delay_qlen,
4799 (uint32_t)delay_timeout);
4800 } else {
4801 if_clear_eflags(ifp, IFEF_ENQUEUE_MULTI);
4802 }
4803 }
4804
4805 /*
4806 * This function clears the DSCP bits in the IPV4/V6 header pointed to by buf.
4807 * While it's ok for buf to be not 32 bit aligned, the caller must ensure that
4808 * buf holds the full header.
4809 */
4810 static __attribute__((noinline)) void
ifnet_mcast_clear_dscp(uint8_t * buf,uint8_t ip_ver)4811 ifnet_mcast_clear_dscp(uint8_t *buf, uint8_t ip_ver)
4812 {
4813 struct ip *ip;
4814 struct ip6_hdr *ip6;
4815 uint8_t lbuf[64] __attribute__((aligned(8)));
4816 uint8_t *p = buf;
4817
4818 if (ip_ver == IPVERSION) {
4819 uint8_t old_tos;
4820 uint32_t sum;
4821
4822 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
4823 DTRACE_IP1(not__aligned__v4, uint8_t *, buf);
4824 bcopy(buf, lbuf, sizeof(struct ip));
4825 p = lbuf;
4826 }
4827 ip = (struct ip *)(void *)p;
4828 if (__probable((ip->ip_tos & ~IPTOS_ECN_MASK) == 0)) {
4829 return;
4830 }
4831
4832 DTRACE_IP1(clear__v4, struct ip *, ip);
4833 old_tos = ip->ip_tos;
4834 ip->ip_tos &= IPTOS_ECN_MASK;
4835 sum = ip->ip_sum + htons(old_tos) - htons(ip->ip_tos);
4836 sum = (sum >> 16) + (sum & 0xffff);
4837 ip->ip_sum = (uint16_t)(sum & 0xffff);
4838
4839 if (__improbable(p == lbuf)) {
4840 bcopy(lbuf, buf, sizeof(struct ip));
4841 }
4842 } else {
4843 uint32_t flow;
4844 ASSERT(ip_ver == IPV6_VERSION);
4845
4846 if (__improbable(!IP_HDR_ALIGNED_P(p))) {
4847 DTRACE_IP1(not__aligned__v6, uint8_t *, buf);
4848 bcopy(buf, lbuf, sizeof(struct ip6_hdr));
4849 p = lbuf;
4850 }
4851 ip6 = (struct ip6_hdr *)(void *)p;
4852 flow = ntohl(ip6->ip6_flow);
4853 if (__probable((flow & IP6FLOW_DSCP_MASK) == 0)) {
4854 return;
4855 }
4856
4857 DTRACE_IP1(clear__v6, struct ip6_hdr *, ip6);
4858 ip6->ip6_flow = htonl(flow & ~IP6FLOW_DSCP_MASK);
4859
4860 if (__improbable(p == lbuf)) {
4861 bcopy(lbuf, buf, sizeof(struct ip6_hdr));
4862 }
4863 }
4864 }
4865
4866 static inline errno_t
ifnet_enqueue_ifclassq(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * p,boolean_t flush,boolean_t * pdrop)4867 ifnet_enqueue_ifclassq(struct ifnet *ifp, struct ifclassq *ifcq,
4868 classq_pkt_t *p, boolean_t flush, boolean_t *pdrop)
4869 {
4870 #if SKYWALK
4871 volatile struct sk_nexusadv *nxadv = NULL;
4872 #endif /* SKYWALK */
4873 volatile uint64_t *fg_ts = NULL;
4874 volatile uint64_t *rt_ts = NULL;
4875 struct timespec now;
4876 u_int64_t now_nsec = 0;
4877 int error = 0;
4878 uint8_t *mcast_buf = NULL;
4879 uint8_t ip_ver;
4880 uint32_t pktlen;
4881
4882 ASSERT(ifp->if_eflags & IFEF_TXSTART);
4883 #if SKYWALK
4884 /*
4885 * If attached to flowswitch, grab pointers to the
4886 * timestamp variables in the nexus advisory region.
4887 */
4888 if ((ifp->if_capabilities & IFCAP_SKYWALK) && ifp->if_na != NULL &&
4889 (nxadv = ifp->if_na->nifna_netif->nif_fsw_nxadv) != NULL) {
4890 fg_ts = &nxadv->nxadv_fg_sendts;
4891 rt_ts = &nxadv->nxadv_rt_sendts;
4892 }
4893 #endif /* SKYWALK */
4894
4895 /*
4896 * If packet already carries a timestamp, either from dlil_output()
4897 * or from flowswitch, use it here. Otherwise, record timestamp.
4898 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
4899 * the timestamp value is used internally there.
4900 */
4901 switch (p->cp_ptype) {
4902 case QP_MBUF:
4903 #if SKYWALK
4904 /*
4905 * Valid only for non-native (compat) Skywalk interface.
4906 * If the data source uses packet, caller must convert
4907 * it to mbuf first prior to calling this routine.
4908 */
4909 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
4910 #endif /* SKYWALK */
4911 ASSERT(p->cp_mbuf->m_flags & M_PKTHDR);
4912 ASSERT(p->cp_mbuf->m_nextpkt == NULL);
4913
4914 if (!(p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
4915 p->cp_mbuf->m_pkthdr.pkt_timestamp == 0) {
4916 nanouptime(&now);
4917 net_timernsec(&now, &now_nsec);
4918 p->cp_mbuf->m_pkthdr.pkt_timestamp = now_nsec;
4919 }
4920 p->cp_mbuf->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
4921 /*
4922 * If the packet service class is not background,
4923 * update the timestamp to indicate recent activity
4924 * on a foreground socket.
4925 */
4926 if ((p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
4927 p->cp_mbuf->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
4928 if (!(p->cp_mbuf->m_pkthdr.pkt_flags &
4929 PKTF_SO_BACKGROUND)) {
4930 ifp->if_fg_sendts = (uint32_t)_net_uptime;
4931 if (fg_ts != NULL) {
4932 *fg_ts = (uint32_t)_net_uptime;
4933 }
4934 }
4935 if (p->cp_mbuf->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
4936 ifp->if_rt_sendts = (uint32_t)_net_uptime;
4937 if (rt_ts != NULL) {
4938 *rt_ts = (uint32_t)_net_uptime;
4939 }
4940 }
4941 }
4942 pktlen = m_pktlen(p->cp_mbuf);
4943
4944 /*
4945 * Some Wi-Fi AP implementations do not correctly handle
4946 * multicast IP packets with DSCP bits set (radr://9331522).
4947 * As a workaround we clear the DSCP bits but keep service
4948 * class (rdar://51507725).
4949 */
4950 if ((p->cp_mbuf->m_flags & M_MCAST) != 0 &&
4951 IFNET_IS_WIFI_INFRA(ifp)) {
4952 size_t len = mbuf_len(p->cp_mbuf), hlen;
4953 struct ether_header *eh;
4954 boolean_t pullup = FALSE;
4955 uint16_t etype;
4956
4957 if (__improbable(len < sizeof(struct ether_header))) {
4958 DTRACE_IP1(small__ether, size_t, len);
4959 if ((p->cp_mbuf = m_pullup(p->cp_mbuf,
4960 sizeof(struct ether_header))) == NULL) {
4961 return ENOMEM;
4962 }
4963 }
4964 eh = (struct ether_header *)mbuf_data(p->cp_mbuf);
4965 etype = ntohs(eh->ether_type);
4966 if (etype == ETHERTYPE_IP) {
4967 hlen = sizeof(struct ether_header) +
4968 sizeof(struct ip);
4969 if (len < hlen) {
4970 DTRACE_IP1(small__v4, size_t, len);
4971 pullup = TRUE;
4972 }
4973 ip_ver = IPVERSION;
4974 } else if (etype == ETHERTYPE_IPV6) {
4975 hlen = sizeof(struct ether_header) +
4976 sizeof(struct ip6_hdr);
4977 if (len < hlen) {
4978 DTRACE_IP1(small__v6, size_t, len);
4979 pullup = TRUE;
4980 }
4981 ip_ver = IPV6_VERSION;
4982 } else {
4983 DTRACE_IP1(invalid__etype, uint16_t, etype);
4984 break;
4985 }
4986 if (pullup) {
4987 if ((p->cp_mbuf = m_pullup(p->cp_mbuf, (int)hlen)) ==
4988 NULL) {
4989 return ENOMEM;
4990 }
4991
4992 eh = (struct ether_header *)mbuf_data(
4993 p->cp_mbuf);
4994 }
4995 mcast_buf = (uint8_t *)(eh + 1);
4996 /*
4997 * ifnet_mcast_clear_dscp() will finish the work below.
4998 * Note that the pullups above ensure that mcast_buf
4999 * points to a full IP header.
5000 */
5001 }
5002 break;
5003
5004 #if SKYWALK
5005 case QP_PACKET:
5006 /*
5007 * Valid only for native Skywalk interface. If the data
5008 * source uses mbuf, caller must convert it to packet first
5009 * prior to calling this routine.
5010 */
5011 ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
5012 if (!(p->cp_kpkt->pkt_pflags & PKT_F_TS_VALID) ||
5013 p->cp_kpkt->pkt_timestamp == 0) {
5014 nanouptime(&now);
5015 net_timernsec(&now, &now_nsec);
5016 p->cp_kpkt->pkt_timestamp = now_nsec;
5017 }
5018 p->cp_kpkt->pkt_pflags &= ~PKT_F_TS_VALID;
5019 /*
5020 * If the packet service class is not background,
5021 * update the timestamps on the interface, as well as
5022 * the ones in nexus-wide advisory to indicate recent
5023 * activity on a foreground flow.
5024 */
5025 if (!(p->cp_kpkt->pkt_pflags & PKT_F_BACKGROUND)) {
5026 ifp->if_fg_sendts = (uint32_t)_net_uptime;
5027 if (fg_ts != NULL) {
5028 *fg_ts = (uint32_t)_net_uptime;
5029 }
5030 }
5031 if (p->cp_kpkt->pkt_pflags & PKT_F_REALTIME) {
5032 ifp->if_rt_sendts = (uint32_t)_net_uptime;
5033 if (rt_ts != NULL) {
5034 *rt_ts = (uint32_t)_net_uptime;
5035 }
5036 }
5037 pktlen = p->cp_kpkt->pkt_length;
5038
5039 /*
5040 * Some Wi-Fi AP implementations do not correctly handle
5041 * multicast IP packets with DSCP bits set (radr://9331522).
5042 * As a workaround we clear the DSCP bits but keep service
5043 * class (rdar://51507725).
5044 */
5045 if ((p->cp_kpkt->pkt_link_flags & PKT_LINKF_MCAST) != 0 &&
5046 IFNET_IS_WIFI_INFRA(ifp)) {
5047 uint8_t *baddr;
5048 struct ether_header *eh;
5049 uint16_t etype;
5050
5051 MD_BUFLET_ADDR_ABS(p->cp_kpkt, baddr);
5052 baddr += p->cp_kpkt->pkt_headroom;
5053 if (__improbable(pktlen < sizeof(struct ether_header))) {
5054 DTRACE_IP1(pkt__small__ether, __kern_packet *,
5055 p->cp_kpkt);
5056 break;
5057 }
5058 eh = (struct ether_header *)(void *)baddr;
5059 etype = ntohs(eh->ether_type);
5060 if (etype == ETHERTYPE_IP) {
5061 if (pktlen < sizeof(struct ether_header) +
5062 sizeof(struct ip)) {
5063 DTRACE_IP1(pkt__small__v4, uint32_t,
5064 pktlen);
5065 break;
5066 }
5067 ip_ver = IPVERSION;
5068 } else if (etype == ETHERTYPE_IPV6) {
5069 if (pktlen < sizeof(struct ether_header) +
5070 sizeof(struct ip6_hdr)) {
5071 DTRACE_IP1(pkt__small__v6, uint32_t,
5072 pktlen);
5073 break;
5074 }
5075 ip_ver = IPV6_VERSION;
5076 } else {
5077 DTRACE_IP1(pkt__invalid__etype, uint16_t,
5078 etype);
5079 break;
5080 }
5081 mcast_buf = (uint8_t *)(eh + 1);
5082 /*
5083 * ifnet_mcast_clear_dscp() will finish the work below.
5084 * The checks above verify that the IP header is in the
5085 * first buflet.
5086 */
5087 }
5088 break;
5089 #endif /* SKYWALK */
5090
5091 default:
5092 VERIFY(0);
5093 /* NOTREACHED */
5094 __builtin_unreachable();
5095 }
5096
5097 if (mcast_buf != NULL) {
5098 ifnet_mcast_clear_dscp(mcast_buf, ip_ver);
5099 }
5100
5101 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
5102 if (now_nsec == 0) {
5103 nanouptime(&now);
5104 net_timernsec(&now, &now_nsec);
5105 }
5106 /*
5107 * If the driver chose to delay start callback for
5108 * coalescing multiple packets, Then use the following
5109 * heuristics to make sure that start callback will
5110 * be delayed only when bulk data transfer is detected.
5111 * 1. number of packets enqueued in (delay_win * 2) is
5112 * greater than or equal to the delay qlen.
5113 * 2. If delay_start is enabled it will stay enabled for
5114 * another 10 idle windows. This is to take into account
5115 * variable RTT and burst traffic.
5116 * 3. If the time elapsed since last enqueue is more
5117 * than 200ms we disable delaying start callback. This is
5118 * is to take idle time into account.
5119 */
5120 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
5121 if (ifp->if_start_delay_swin > 0) {
5122 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
5123 ifp->if_start_delay_cnt++;
5124 } else if ((now_nsec - ifp->if_start_delay_swin)
5125 >= (200 * 1000 * 1000)) {
5126 ifp->if_start_delay_swin = now_nsec;
5127 ifp->if_start_delay_cnt = 1;
5128 ifp->if_start_delay_idle = 0;
5129 if (ifp->if_eflags & IFEF_DELAY_START) {
5130 if_clear_eflags(ifp, IFEF_DELAY_START);
5131 ifnet_delay_start_disabled_increment();
5132 }
5133 } else {
5134 if (ifp->if_start_delay_cnt >=
5135 ifp->if_start_delay_qlen) {
5136 if_set_eflags(ifp, IFEF_DELAY_START);
5137 ifp->if_start_delay_idle = 0;
5138 } else {
5139 if (ifp->if_start_delay_idle >= 10) {
5140 if_clear_eflags(ifp,
5141 IFEF_DELAY_START);
5142 ifnet_delay_start_disabled_increment();
5143 } else {
5144 ifp->if_start_delay_idle++;
5145 }
5146 }
5147 ifp->if_start_delay_swin = now_nsec;
5148 ifp->if_start_delay_cnt = 1;
5149 }
5150 } else {
5151 ifp->if_start_delay_swin = now_nsec;
5152 ifp->if_start_delay_cnt = 1;
5153 ifp->if_start_delay_idle = 0;
5154 if_clear_eflags(ifp, IFEF_DELAY_START);
5155 }
5156 } else {
5157 if_clear_eflags(ifp, IFEF_DELAY_START);
5158 }
5159
5160 /* enqueue the packet (caller consumes object) */
5161 error = ifclassq_enqueue(((ifcq != NULL) ? ifcq : ifp->if_snd), p, p,
5162 1, pktlen, pdrop);
5163
5164 /*
5165 * Tell the driver to start dequeueing; do this even when the queue
5166 * for the packet is suspended (EQSUSPENDED), as the driver could still
5167 * be dequeueing from other unsuspended queues.
5168 */
5169 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
5170 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED)) {
5171 ifnet_start(ifp);
5172 }
5173
5174 return error;
5175 }
5176
5177 static inline errno_t
ifnet_enqueue_ifclassq_chain(struct ifnet * ifp,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)5178 ifnet_enqueue_ifclassq_chain(struct ifnet *ifp, classq_pkt_t *head,
5179 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
5180 boolean_t *pdrop)
5181 {
5182 int error;
5183
5184 /* enqueue the packet (caller consumes object) */
5185 error = ifclassq_enqueue(ifp->if_snd, head, tail, cnt, bytes, pdrop);
5186
5187 /*
5188 * Tell the driver to start dequeueing; do this even when the queue
5189 * for the packet is suspended (EQSUSPENDED), as the driver could still
5190 * be dequeueing from other unsuspended queues.
5191 */
5192 if ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED) {
5193 ifnet_start(ifp);
5194 }
5195 return error;
5196 }
5197
5198 int
ifnet_enqueue_netem(void * handle,pktsched_pkt_t * pkts,uint32_t n_pkts)5199 ifnet_enqueue_netem(void *handle, pktsched_pkt_t *pkts, uint32_t n_pkts)
5200 {
5201 struct ifnet *ifp = handle;
5202 boolean_t pdrop; /* dummy */
5203 uint32_t i;
5204
5205 ASSERT(n_pkts >= 1);
5206 for (i = 0; i < n_pkts - 1; i++) {
5207 (void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
5208 FALSE, &pdrop);
5209 }
5210 /* flush with the last packet */
5211 (void) ifnet_enqueue_ifclassq(ifp, NULL, &pkts[i].pktsched_pkt,
5212 TRUE, &pdrop);
5213
5214 return 0;
5215 }
5216
5217 static inline errno_t
ifnet_enqueue_common(struct ifnet * ifp,struct ifclassq * ifcq,classq_pkt_t * pkt,boolean_t flush,boolean_t * pdrop)5218 ifnet_enqueue_common(struct ifnet *ifp, struct ifclassq *ifcq,
5219 classq_pkt_t *pkt, boolean_t flush, boolean_t *pdrop)
5220 {
5221 if (ifp->if_output_netem != NULL) {
5222 return netem_enqueue(ifp->if_output_netem, pkt, pdrop);
5223 } else {
5224 return ifnet_enqueue_ifclassq(ifp, ifcq, pkt, flush, pdrop);
5225 }
5226 }
5227
5228 errno_t
ifnet_enqueue(struct ifnet * ifp,struct mbuf * m)5229 ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
5230 {
5231 boolean_t pdrop;
5232 return ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop);
5233 }
5234
5235 errno_t
ifnet_enqueue_mbuf(struct ifnet * ifp,struct mbuf * m,boolean_t flush,boolean_t * pdrop)5236 ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
5237 boolean_t *pdrop)
5238 {
5239 classq_pkt_t pkt;
5240
5241 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
5242 m->m_nextpkt != NULL) {
5243 if (m != NULL) {
5244 m_freem_list(m);
5245 *pdrop = TRUE;
5246 }
5247 return EINVAL;
5248 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5249 !IF_FULLY_ATTACHED(ifp)) {
5250 /* flag tested without lock for performance */
5251 m_freem(m);
5252 *pdrop = TRUE;
5253 return ENXIO;
5254 } else if (!(ifp->if_flags & IFF_UP)) {
5255 m_freem(m);
5256 *pdrop = TRUE;
5257 return ENETDOWN;
5258 }
5259
5260 CLASSQ_PKT_INIT_MBUF(&pkt, m);
5261 return ifnet_enqueue_common(ifp, NULL, &pkt, flush, pdrop);
5262 }
5263
5264 errno_t
ifnet_enqueue_mbuf_chain(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)5265 ifnet_enqueue_mbuf_chain(struct ifnet *ifp, struct mbuf *m_head,
5266 struct mbuf *m_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
5267 boolean_t *pdrop)
5268 {
5269 classq_pkt_t head, tail;
5270
5271 ASSERT(m_head != NULL);
5272 ASSERT((m_head->m_flags & M_PKTHDR) != 0);
5273 ASSERT(m_tail != NULL);
5274 ASSERT((m_tail->m_flags & M_PKTHDR) != 0);
5275 ASSERT(ifp != NULL);
5276 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
5277
5278 if (!IF_FULLY_ATTACHED(ifp)) {
5279 /* flag tested without lock for performance */
5280 m_freem_list(m_head);
5281 *pdrop = TRUE;
5282 return ENXIO;
5283 } else if (!(ifp->if_flags & IFF_UP)) {
5284 m_freem_list(m_head);
5285 *pdrop = TRUE;
5286 return ENETDOWN;
5287 }
5288
5289 CLASSQ_PKT_INIT_MBUF(&head, m_head);
5290 CLASSQ_PKT_INIT_MBUF(&tail, m_tail);
5291 return ifnet_enqueue_ifclassq_chain(ifp, &head, &tail, cnt, bytes,
5292 flush, pdrop);
5293 }
5294
5295 #if SKYWALK
5296 static errno_t
ifnet_enqueue_pkt_common(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)5297 ifnet_enqueue_pkt_common(struct ifnet *ifp, struct ifclassq *ifcq,
5298 struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
5299 {
5300 classq_pkt_t pkt;
5301
5302 ASSERT(kpkt == NULL || kpkt->pkt_nextpkt == NULL);
5303
5304 if (__improbable(ifp == NULL || kpkt == NULL)) {
5305 if (kpkt != NULL) {
5306 pp_free_packet(__DECONST(struct kern_pbufpool *,
5307 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
5308 *pdrop = TRUE;
5309 }
5310 return EINVAL;
5311 } else if (__improbable(!(ifp->if_eflags & IFEF_TXSTART) ||
5312 !IF_FULLY_ATTACHED(ifp))) {
5313 /* flag tested without lock for performance */
5314 pp_free_packet(__DECONST(struct kern_pbufpool *,
5315 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
5316 *pdrop = TRUE;
5317 return ENXIO;
5318 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
5319 pp_free_packet(__DECONST(struct kern_pbufpool *,
5320 kpkt->pkt_qum.qum_pp), SK_PTR_ADDR(kpkt));
5321 *pdrop = TRUE;
5322 return ENETDOWN;
5323 }
5324
5325 CLASSQ_PKT_INIT_PACKET(&pkt, kpkt);
5326 return ifnet_enqueue_common(ifp, ifcq, &pkt, flush, pdrop);
5327 }
5328
5329 errno_t
ifnet_enqueue_pkt(struct ifnet * ifp,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)5330 ifnet_enqueue_pkt(struct ifnet *ifp, struct __kern_packet *kpkt,
5331 boolean_t flush, boolean_t *pdrop)
5332 {
5333 return ifnet_enqueue_pkt_common(ifp, NULL, kpkt, flush, pdrop);
5334 }
5335
5336 errno_t
ifnet_enqueue_ifcq_pkt(struct ifnet * ifp,struct ifclassq * ifcq,struct __kern_packet * kpkt,boolean_t flush,boolean_t * pdrop)5337 ifnet_enqueue_ifcq_pkt(struct ifnet *ifp, struct ifclassq *ifcq,
5338 struct __kern_packet *kpkt, boolean_t flush, boolean_t *pdrop)
5339 {
5340 return ifnet_enqueue_pkt_common(ifp, ifcq, kpkt, flush, pdrop);
5341 }
5342
5343 errno_t
ifnet_enqueue_pkt_chain(struct ifnet * ifp,struct __kern_packet * k_head,struct __kern_packet * k_tail,uint32_t cnt,uint32_t bytes,boolean_t flush,boolean_t * pdrop)5344 ifnet_enqueue_pkt_chain(struct ifnet *ifp, struct __kern_packet *k_head,
5345 struct __kern_packet *k_tail, uint32_t cnt, uint32_t bytes, boolean_t flush,
5346 boolean_t *pdrop)
5347 {
5348 classq_pkt_t head, tail;
5349
5350 ASSERT(k_head != NULL);
5351 ASSERT(k_tail != NULL);
5352 ASSERT(ifp != NULL);
5353 ASSERT((ifp->if_eflags & IFEF_TXSTART) != 0);
5354
5355 if (!IF_FULLY_ATTACHED(ifp)) {
5356 /* flag tested without lock for performance */
5357 pp_free_packet_chain(k_head, NULL);
5358 *pdrop = TRUE;
5359 return ENXIO;
5360 } else if (__improbable(!(ifp->if_flags & IFF_UP))) {
5361 pp_free_packet_chain(k_head, NULL);
5362 *pdrop = TRUE;
5363 return ENETDOWN;
5364 }
5365
5366 CLASSQ_PKT_INIT_PACKET(&head, k_head);
5367 CLASSQ_PKT_INIT_PACKET(&tail, k_tail);
5368 return ifnet_enqueue_ifclassq_chain(ifp, &head, &tail, cnt, bytes,
5369 flush, pdrop);
5370 }
5371 #endif /* SKYWALK */
5372
5373 errno_t
ifnet_dequeue(struct ifnet * ifp,struct mbuf ** mp)5374 ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
5375 {
5376 errno_t rc;
5377 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
5378
5379 if (ifp == NULL || mp == NULL) {
5380 return EINVAL;
5381 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5382 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5383 return ENXIO;
5384 }
5385 if (!ifnet_is_attached(ifp, 1)) {
5386 return ENXIO;
5387 }
5388
5389 #if SKYWALK
5390 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5391 #endif /* SKYWALK */
5392 rc = ifclassq_dequeue(ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
5393 &pkt, NULL, NULL, NULL);
5394 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
5395 ifnet_decr_iorefcnt(ifp);
5396 *mp = pkt.cp_mbuf;
5397 return rc;
5398 }
5399
5400 errno_t
ifnet_dequeue_service_class(struct ifnet * ifp,mbuf_svc_class_t sc,struct mbuf ** mp)5401 ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
5402 struct mbuf **mp)
5403 {
5404 errno_t rc;
5405 classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt);
5406
5407 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) {
5408 return EINVAL;
5409 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5410 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5411 return ENXIO;
5412 }
5413 if (!ifnet_is_attached(ifp, 1)) {
5414 return ENXIO;
5415 }
5416
5417 #if SKYWALK
5418 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5419 #endif /* SKYWALK */
5420 rc = ifclassq_dequeue_sc(ifp->if_snd, sc, 1,
5421 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt, NULL, NULL, NULL);
5422 VERIFY((pkt.cp_ptype == QP_MBUF) || (pkt.cp_mbuf == NULL));
5423 ifnet_decr_iorefcnt(ifp);
5424 *mp = pkt.cp_mbuf;
5425 return rc;
5426 }
5427
5428 errno_t
ifnet_dequeue_multi(struct ifnet * ifp,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)5429 ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
5430 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
5431 {
5432 errno_t rc;
5433 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
5434 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
5435
5436 if (ifp == NULL || head == NULL || pkt_limit < 1) {
5437 return EINVAL;
5438 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5439 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5440 return ENXIO;
5441 }
5442 if (!ifnet_is_attached(ifp, 1)) {
5443 return ENXIO;
5444 }
5445
5446 #if SKYWALK
5447 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5448 #endif /* SKYWALK */
5449 rc = ifclassq_dequeue(ifp->if_snd, pkt_limit,
5450 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail, cnt, len);
5451 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
5452 ifnet_decr_iorefcnt(ifp);
5453 *head = pkt_head.cp_mbuf;
5454 if (tail != NULL) {
5455 *tail = pkt_tail.cp_mbuf;
5456 }
5457 return rc;
5458 }
5459
5460 errno_t
ifnet_dequeue_multi_bytes(struct ifnet * ifp,u_int32_t byte_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)5461 ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
5462 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
5463 {
5464 errno_t rc;
5465 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
5466 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
5467
5468 if (ifp == NULL || head == NULL || byte_limit < 1) {
5469 return EINVAL;
5470 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5471 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5472 return ENXIO;
5473 }
5474 if (!ifnet_is_attached(ifp, 1)) {
5475 return ENXIO;
5476 }
5477
5478 #if SKYWALK
5479 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5480 #endif /* SKYWALK */
5481 rc = ifclassq_dequeue(ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
5482 byte_limit, &pkt_head, &pkt_tail, cnt, len);
5483 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
5484 ifnet_decr_iorefcnt(ifp);
5485 *head = pkt_head.cp_mbuf;
5486 if (tail != NULL) {
5487 *tail = pkt_tail.cp_mbuf;
5488 }
5489 return rc;
5490 }
5491
5492 errno_t
ifnet_dequeue_service_class_multi(struct ifnet * ifp,mbuf_svc_class_t sc,u_int32_t pkt_limit,struct mbuf ** head,struct mbuf ** tail,u_int32_t * cnt,u_int32_t * len)5493 ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
5494 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
5495 u_int32_t *len)
5496 {
5497 errno_t rc;
5498 classq_pkt_t pkt_head = CLASSQ_PKT_INITIALIZER(pkt_head);
5499 classq_pkt_t pkt_tail = CLASSQ_PKT_INITIALIZER(pkt_tail);
5500
5501 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
5502 !MBUF_VALID_SC(sc)) {
5503 return EINVAL;
5504 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
5505 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX) {
5506 return ENXIO;
5507 }
5508 if (!ifnet_is_attached(ifp, 1)) {
5509 return ENXIO;
5510 }
5511
5512 #if SKYWALK
5513 ASSERT(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
5514 #endif /* SKYWALK */
5515 rc = ifclassq_dequeue_sc(ifp->if_snd, sc, pkt_limit,
5516 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, &pkt_head, &pkt_tail,
5517 cnt, len);
5518 VERIFY((pkt_head.cp_ptype == QP_MBUF) || (pkt_head.cp_mbuf == NULL));
5519 ifnet_decr_iorefcnt(ifp);
5520 *head = pkt_head.cp_mbuf;
5521 if (tail != NULL) {
5522 *tail = pkt_tail.cp_mbuf;
5523 }
5524 return rc;
5525 }
5526
5527 #if XNU_TARGET_OS_OSX
5528 errno_t
ifnet_framer_stub(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * dest,const char * dest_linkaddr,const char * frame_type,u_int32_t * pre,u_int32_t * post)5529 ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
5530 const struct sockaddr *dest, const char *dest_linkaddr,
5531 const char *frame_type, u_int32_t *pre, u_int32_t *post)
5532 {
5533 if (pre != NULL) {
5534 *pre = 0;
5535 }
5536 if (post != NULL) {
5537 *post = 0;
5538 }
5539
5540 return ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type);
5541 }
5542 #endif /* XNU_TARGET_OS_OSX */
5543
5544 static boolean_t
packet_has_vlan_tag(struct mbuf * m)5545 packet_has_vlan_tag(struct mbuf * m)
5546 {
5547 u_int tag = 0;
5548
5549 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
5550 tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
5551 if (tag == 0) {
5552 /* the packet is just priority-tagged, clear the bit */
5553 m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
5554 }
5555 }
5556 return tag != 0;
5557 }
5558
5559 static int
dlil_interface_filters_input(struct ifnet * ifp,struct mbuf ** m_p,char ** frame_header_p,protocol_family_t protocol_family)5560 dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
5561 char **frame_header_p, protocol_family_t protocol_family)
5562 {
5563 boolean_t is_vlan_packet = FALSE;
5564 struct ifnet_filter *filter;
5565 struct mbuf *m = *m_p;
5566
5567 is_vlan_packet = packet_has_vlan_tag(m);
5568
5569 if (TAILQ_EMPTY(&ifp->if_flt_head)) {
5570 return 0;
5571 }
5572
5573 /*
5574 * Pass the inbound packet to the interface filters
5575 */
5576 lck_mtx_lock_spin(&ifp->if_flt_lock);
5577 /* prevent filter list from changing in case we drop the lock */
5578 if_flt_monitor_busy(ifp);
5579 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5580 int result;
5581
5582 /* exclude VLAN packets from external filters PR-3586856 */
5583 if (is_vlan_packet &&
5584 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
5585 continue;
5586 }
5587
5588 if (!filter->filt_skip && filter->filt_input != NULL &&
5589 (filter->filt_protocol == 0 ||
5590 filter->filt_protocol == protocol_family)) {
5591 lck_mtx_unlock(&ifp->if_flt_lock);
5592
5593 result = (*filter->filt_input)(filter->filt_cookie,
5594 ifp, protocol_family, m_p, frame_header_p);
5595
5596 lck_mtx_lock_spin(&ifp->if_flt_lock);
5597 if (result != 0) {
5598 /* we're done with the filter list */
5599 if_flt_monitor_unbusy(ifp);
5600 lck_mtx_unlock(&ifp->if_flt_lock);
5601 return result;
5602 }
5603 }
5604 }
5605 /* we're done with the filter list */
5606 if_flt_monitor_unbusy(ifp);
5607 lck_mtx_unlock(&ifp->if_flt_lock);
5608
5609 /*
5610 * Strip away M_PROTO1 bit prior to sending packet up the stack as
5611 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
5612 */
5613 if (*m_p != NULL) {
5614 (*m_p)->m_flags &= ~M_PROTO1;
5615 }
5616
5617 return 0;
5618 }
5619
5620 __attribute__((noinline))
5621 static int
dlil_interface_filters_output(struct ifnet * ifp,struct mbuf ** m_p,protocol_family_t protocol_family)5622 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
5623 protocol_family_t protocol_family)
5624 {
5625 boolean_t is_vlan_packet;
5626 struct ifnet_filter *filter;
5627 struct mbuf *m = *m_p;
5628
5629 is_vlan_packet = packet_has_vlan_tag(m);
5630
5631 /*
5632 * Pass the outbound packet to the interface filters
5633 */
5634 lck_mtx_lock_spin(&ifp->if_flt_lock);
5635 /* prevent filter list from changing in case we drop the lock */
5636 if_flt_monitor_busy(ifp);
5637 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5638 int result;
5639
5640 /* exclude VLAN packets from external filters PR-3586856 */
5641 if (is_vlan_packet &&
5642 (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
5643 continue;
5644 }
5645
5646 if (!filter->filt_skip && filter->filt_output != NULL &&
5647 (filter->filt_protocol == 0 ||
5648 filter->filt_protocol == protocol_family)) {
5649 lck_mtx_unlock(&ifp->if_flt_lock);
5650
5651 result = filter->filt_output(filter->filt_cookie, ifp,
5652 protocol_family, m_p);
5653
5654 lck_mtx_lock_spin(&ifp->if_flt_lock);
5655 if (result != 0) {
5656 /* we're done with the filter list */
5657 if_flt_monitor_unbusy(ifp);
5658 lck_mtx_unlock(&ifp->if_flt_lock);
5659 return result;
5660 }
5661 }
5662 }
5663 /* we're done with the filter list */
5664 if_flt_monitor_unbusy(ifp);
5665 lck_mtx_unlock(&ifp->if_flt_lock);
5666
5667 return 0;
5668 }
5669
5670 static void
dlil_ifproto_input(struct if_proto * ifproto,mbuf_t m)5671 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
5672 {
5673 int error;
5674
5675 if (ifproto->proto_kpi == kProtoKPI_v1) {
5676 /* Version 1 protocols get one packet at a time */
5677 while (m != NULL) {
5678 char * frame_header;
5679 mbuf_t next_packet;
5680
5681 next_packet = m->m_nextpkt;
5682 m->m_nextpkt = NULL;
5683 frame_header = m->m_pkthdr.pkt_hdr;
5684 m->m_pkthdr.pkt_hdr = NULL;
5685 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
5686 ifproto->protocol_family, m, frame_header);
5687 if (error != 0 && error != EJUSTRETURN) {
5688 m_freem(m);
5689 }
5690 m = next_packet;
5691 }
5692 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
5693 /* Version 2 protocols support packet lists */
5694 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
5695 ifproto->protocol_family, m);
5696 if (error != 0 && error != EJUSTRETURN) {
5697 m_freem_list(m);
5698 }
5699 }
5700 }
5701
5702 static void
dlil_input_stats_add(const struct ifnet_stat_increment_param * s,struct dlil_threading_info * inp,struct ifnet * ifp,boolean_t poll)5703 dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
5704 struct dlil_threading_info *inp, struct ifnet *ifp, boolean_t poll)
5705 {
5706 struct ifnet_stat_increment_param *d = &inp->dlth_stats;
5707
5708 if (s->packets_in != 0) {
5709 d->packets_in += s->packets_in;
5710 }
5711 if (s->bytes_in != 0) {
5712 d->bytes_in += s->bytes_in;
5713 }
5714 if (s->errors_in != 0) {
5715 d->errors_in += s->errors_in;
5716 }
5717
5718 if (s->packets_out != 0) {
5719 d->packets_out += s->packets_out;
5720 }
5721 if (s->bytes_out != 0) {
5722 d->bytes_out += s->bytes_out;
5723 }
5724 if (s->errors_out != 0) {
5725 d->errors_out += s->errors_out;
5726 }
5727
5728 if (s->collisions != 0) {
5729 d->collisions += s->collisions;
5730 }
5731 if (s->dropped != 0) {
5732 d->dropped += s->dropped;
5733 }
5734
5735 if (poll) {
5736 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
5737 }
5738 }
5739
5740 static boolean_t
dlil_input_stats_sync(struct ifnet * ifp,struct dlil_threading_info * inp)5741 dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
5742 {
5743 struct ifnet_stat_increment_param *s = &inp->dlth_stats;
5744
5745 /*
5746 * Use of atomic operations is unavoidable here because
5747 * these stats may also be incremented elsewhere via KPIs.
5748 */
5749 if (s->packets_in != 0) {
5750 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
5751 s->packets_in = 0;
5752 }
5753 if (s->bytes_in != 0) {
5754 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
5755 s->bytes_in = 0;
5756 }
5757 if (s->errors_in != 0) {
5758 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
5759 s->errors_in = 0;
5760 }
5761
5762 if (s->packets_out != 0) {
5763 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
5764 s->packets_out = 0;
5765 }
5766 if (s->bytes_out != 0) {
5767 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
5768 s->bytes_out = 0;
5769 }
5770 if (s->errors_out != 0) {
5771 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
5772 s->errors_out = 0;
5773 }
5774
5775 if (s->collisions != 0) {
5776 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
5777 s->collisions = 0;
5778 }
5779 if (s->dropped != 0) {
5780 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
5781 s->dropped = 0;
5782 }
5783
5784 /*
5785 * No need for atomic operations as they are modified here
5786 * only from within the DLIL input thread context.
5787 */
5788 if (ifp->if_poll_tstats.packets != 0) {
5789 ifp->if_poll_pstats.ifi_poll_packets += ifp->if_poll_tstats.packets;
5790 ifp->if_poll_tstats.packets = 0;
5791 }
5792 if (ifp->if_poll_tstats.bytes != 0) {
5793 ifp->if_poll_pstats.ifi_poll_bytes += ifp->if_poll_tstats.bytes;
5794 ifp->if_poll_tstats.bytes = 0;
5795 }
5796
5797 return ifp->if_data_threshold != 0;
5798 }
5799
5800 __private_extern__ void
dlil_input_packet_list(struct ifnet * ifp,struct mbuf * m)5801 dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
5802 {
5803 return dlil_input_packet_list_common(ifp, m, 0,
5804 IFNET_MODEL_INPUT_POLL_OFF, FALSE);
5805 }
5806
5807 __private_extern__ void
dlil_input_packet_list_extended(struct ifnet * ifp,struct mbuf * m,u_int32_t cnt,ifnet_model_t mode)5808 dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
5809 u_int32_t cnt, ifnet_model_t mode)
5810 {
5811 return dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE);
5812 }
5813
5814 static void
dlil_input_packet_list_common(struct ifnet * ifp_param,struct mbuf * m,u_int32_t cnt,ifnet_model_t mode,boolean_t ext)5815 dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
5816 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
5817 {
5818 int error = 0;
5819 protocol_family_t protocol_family;
5820 mbuf_t next_packet;
5821 ifnet_t ifp = ifp_param;
5822 char *frame_header = NULL;
5823 struct if_proto *last_ifproto = NULL;
5824 mbuf_t pkt_first = NULL;
5825 mbuf_t *pkt_next = NULL;
5826 u_int32_t poll_thresh = 0, poll_ival = 0;
5827 int iorefcnt = 0;
5828
5829 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
5830
5831 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
5832 (poll_ival = if_rxpoll_interval_pkts) > 0) {
5833 poll_thresh = cnt;
5834 }
5835
5836 while (m != NULL) {
5837 struct if_proto *ifproto = NULL;
5838 uint32_t pktf_mask; /* pkt flags to preserve */
5839
5840 m_add_crumb(m, PKT_CRUMB_DLIL_INPUT);
5841
5842 if (ifp_param == NULL) {
5843 ifp = m->m_pkthdr.rcvif;
5844 }
5845
5846 if ((ifp->if_eflags & IFEF_RXPOLL) &&
5847 (ifp->if_xflags & IFXF_LEGACY) && poll_thresh != 0 &&
5848 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) {
5849 ifnet_poll(ifp);
5850 }
5851
5852 /* Check if this mbuf looks valid */
5853 MBUF_INPUT_CHECK(m, ifp);
5854
5855 next_packet = m->m_nextpkt;
5856 m->m_nextpkt = NULL;
5857 frame_header = m->m_pkthdr.pkt_hdr;
5858 m->m_pkthdr.pkt_hdr = NULL;
5859
5860 /*
5861 * Get an IO reference count if the interface is not
5862 * loopback (lo0) and it is attached; lo0 never goes
5863 * away, so optimize for that.
5864 */
5865 if (ifp != lo_ifp) {
5866 /* iorefcnt is 0 if it hasn't been taken yet */
5867 if (iorefcnt == 0) {
5868 if (!ifnet_datamov_begin(ifp)) {
5869 m_freem(m);
5870 goto next;
5871 }
5872 }
5873 iorefcnt = 1;
5874 /*
5875 * Preserve the time stamp and skip pktap flags.
5876 */
5877 pktf_mask = PKTF_TS_VALID | PKTF_SKIP_PKTAP;
5878 } else {
5879 /*
5880 * If this arrived on lo0, preserve interface addr
5881 * info to allow for connectivity between loopback
5882 * and local interface addresses.
5883 */
5884 pktf_mask = (PKTF_LOOP | PKTF_IFAINFO);
5885 }
5886 pktf_mask |= PKTF_WAKE_PKT;
5887
5888 /* make sure packet comes in clean */
5889 m_classifier_init(m, pktf_mask);
5890
5891 ifp_inc_traffic_class_in(ifp, m);
5892
5893 /* find which protocol family this packet is for */
5894 ifnet_lock_shared(ifp);
5895 error = (*ifp->if_demux)(ifp, m, frame_header,
5896 &protocol_family);
5897 ifnet_lock_done(ifp);
5898 if (error != 0) {
5899 if (error == EJUSTRETURN) {
5900 goto next;
5901 }
5902 protocol_family = 0;
5903 }
5904
5905 #if (DEVELOPMENT || DEBUG)
5906 /*
5907 * For testing we do not care about broadcast and multicast packets as
5908 * they are not as controllable as unicast traffic
5909 */
5910 if (__improbable(ifp->if_xflags & IFXF_MARK_WAKE_PKT)) {
5911 if ((protocol_family == PF_INET || protocol_family == PF_INET6) &&
5912 (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5913 /*
5914 * This is a one-shot command
5915 */
5916 ifp->if_xflags &= ~IFXF_MARK_WAKE_PKT;
5917 m->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
5918 }
5919 }
5920 #endif /* (DEVELOPMENT || DEBUG) */
5921 if (__improbable(net_wake_pkt_debug > 0 && (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT))) {
5922 char buffer[64];
5923 size_t buflen = MIN(mbuf_pkthdr_len(m), sizeof(buffer));
5924
5925 os_log(OS_LOG_DEFAULT, "wake packet from %s len %d",
5926 ifp->if_xname, m_pktlen(m));
5927 if (mbuf_copydata(m, 0, buflen, buffer) == 0) {
5928 log_hexdump(buffer, buflen);
5929 }
5930 }
5931
5932 pktap_input(ifp, protocol_family, m, frame_header);
5933
5934 /* Drop v4 packets received on CLAT46 enabled cell interface */
5935 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp) &&
5936 ifp->if_type == IFT_CELLULAR) {
5937 m_freem(m);
5938 ip6stat.ip6s_clat464_in_v4_drop++;
5939 goto next;
5940 }
5941
5942 /* Translate the packet if it is received on CLAT interface */
5943 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
5944 && dlil_is_clat_needed(protocol_family, m)) {
5945 char *data = NULL;
5946 struct ether_header eh;
5947 struct ether_header *ehp = NULL;
5948
5949 if (ifp->if_type == IFT_ETHER) {
5950 ehp = (struct ether_header *)(void *)frame_header;
5951 /* Skip RX Ethernet packets if they are not IPV6 */
5952 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6) {
5953 goto skip_clat;
5954 }
5955
5956 /* Keep a copy of frame_header for Ethernet packets */
5957 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
5958 }
5959 error = dlil_clat64(ifp, &protocol_family, &m);
5960 data = (char *) mbuf_data(m);
5961 if (error != 0) {
5962 m_freem(m);
5963 ip6stat.ip6s_clat464_in_drop++;
5964 goto next;
5965 }
5966 /* Native v6 should be No-op */
5967 if (protocol_family != PF_INET) {
5968 goto skip_clat;
5969 }
5970
5971 /* Do this only for translated v4 packets. */
5972 switch (ifp->if_type) {
5973 case IFT_CELLULAR:
5974 frame_header = data;
5975 break;
5976 case IFT_ETHER:
5977 /*
5978 * Drop if the mbuf doesn't have enough
5979 * space for Ethernet header
5980 */
5981 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
5982 m_free(m);
5983 ip6stat.ip6s_clat464_in_drop++;
5984 goto next;
5985 }
5986 /*
5987 * Set the frame_header ETHER_HDR_LEN bytes
5988 * preceeding the data pointer. Change
5989 * the ether_type too.
5990 */
5991 frame_header = data - ETHER_HDR_LEN;
5992 eh.ether_type = htons(ETHERTYPE_IP);
5993 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
5994 break;
5995 }
5996 }
5997 skip_clat:
5998 /*
5999 * Match the wake packet against the list of ports that has been
6000 * been queried by the driver before the device went to sleep
6001 */
6002 if (__improbable(m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT)) {
6003 if (protocol_family != PF_INET && protocol_family != PF_INET6) {
6004 if_ports_used_match_mbuf(ifp, protocol_family, m);
6005 }
6006 }
6007 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
6008 !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
6009 dlil_input_cksum_dbg(ifp, m, frame_header,
6010 protocol_family);
6011 }
6012 /*
6013 * For partial checksum offload, we expect the driver to
6014 * set the start offset indicating the start of the span
6015 * that is covered by the hardware-computed checksum;
6016 * adjust this start offset accordingly because the data
6017 * pointer has been advanced beyond the link-layer header.
6018 *
6019 * Virtual lan types (bridge, vlan, bond) can call
6020 * dlil_input_packet_list() with the same packet with the
6021 * checksum flags set. Set a flag indicating that the
6022 * adjustment has already been done.
6023 */
6024 if ((m->m_pkthdr.csum_flags & CSUM_ADJUST_DONE) != 0) {
6025 /* adjustment has already been done */
6026 } else if ((m->m_pkthdr.csum_flags &
6027 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
6028 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
6029 int adj;
6030 if (frame_header == NULL ||
6031 frame_header < (char *)mbuf_datastart(m) ||
6032 frame_header > (char *)m->m_data ||
6033 (adj = (int)(m->m_data - frame_header)) >
6034 m->m_pkthdr.csum_rx_start) {
6035 m->m_pkthdr.csum_data = 0;
6036 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
6037 hwcksum_in_invalidated++;
6038 } else {
6039 m->m_pkthdr.csum_rx_start -= adj;
6040 }
6041 /* make sure we don't adjust more than once */
6042 m->m_pkthdr.csum_flags |= CSUM_ADJUST_DONE;
6043 }
6044 if (clat_debug) {
6045 pktap_input(ifp, protocol_family, m, frame_header);
6046 }
6047
6048 if (m->m_flags & (M_BCAST | M_MCAST)) {
6049 atomic_add_64(&ifp->if_imcasts, 1);
6050 }
6051
6052 /* run interface filters */
6053 error = dlil_interface_filters_input(ifp, &m,
6054 &frame_header, protocol_family);
6055 if (error != 0) {
6056 if (error != EJUSTRETURN) {
6057 m_freem(m);
6058 }
6059 goto next;
6060 }
6061 /*
6062 * A VLAN interface receives VLAN-tagged packets by attaching
6063 * its PF_VLAN protocol to a parent interface. When a VLAN
6064 * interface is a member of a bridge, the parent interface
6065 * receives VLAN-tagged M_PROMISC packets. A VLAN-tagged
6066 * M_PROMISC packet must be processed by the VLAN protocol
6067 * so that it can be sent up the stack via
6068 * dlil_input_packet_list(). That allows the bridge interface's
6069 * input filter, attached to the VLAN interface, to process
6070 * the packet.
6071 */
6072 if (protocol_family != PF_VLAN &&
6073 (m->m_flags & M_PROMISC) != 0) {
6074 m_freem(m);
6075 goto next;
6076 }
6077
6078 /* Lookup the protocol attachment to this interface */
6079 if (protocol_family == 0) {
6080 ifproto = NULL;
6081 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
6082 (last_ifproto->protocol_family == protocol_family)) {
6083 VERIFY(ifproto == NULL);
6084 ifproto = last_ifproto;
6085 if_proto_ref(last_ifproto);
6086 } else {
6087 VERIFY(ifproto == NULL);
6088 ifnet_lock_shared(ifp);
6089 /* callee holds a proto refcnt upon success */
6090 ifproto = find_attached_proto(ifp, protocol_family);
6091 ifnet_lock_done(ifp);
6092 }
6093 if (ifproto == NULL) {
6094 /* no protocol for this packet, discard */
6095 m_freem(m);
6096 goto next;
6097 }
6098 if (ifproto != last_ifproto) {
6099 if (last_ifproto != NULL) {
6100 /* pass up the list for the previous protocol */
6101 dlil_ifproto_input(last_ifproto, pkt_first);
6102 pkt_first = NULL;
6103 if_proto_free(last_ifproto);
6104 }
6105 last_ifproto = ifproto;
6106 if_proto_ref(ifproto);
6107 }
6108 /* extend the list */
6109 m->m_pkthdr.pkt_hdr = frame_header;
6110 if (pkt_first == NULL) {
6111 pkt_first = m;
6112 } else {
6113 *pkt_next = m;
6114 }
6115 pkt_next = &m->m_nextpkt;
6116
6117 next:
6118 if (next_packet == NULL && last_ifproto != NULL) {
6119 /* pass up the last list of packets */
6120 dlil_ifproto_input(last_ifproto, pkt_first);
6121 if_proto_free(last_ifproto);
6122 last_ifproto = NULL;
6123 }
6124 if (ifproto != NULL) {
6125 if_proto_free(ifproto);
6126 ifproto = NULL;
6127 }
6128
6129 m = next_packet;
6130
6131 /* update the driver's multicast filter, if needed */
6132 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
6133 ifp->if_updatemcasts = 0;
6134 }
6135 if (iorefcnt == 1) {
6136 /* If the next mbuf is on a different interface, unlock data-mov */
6137 if (!m || (ifp != ifp_param && ifp != m->m_pkthdr.rcvif)) {
6138 ifnet_datamov_end(ifp);
6139 iorefcnt = 0;
6140 }
6141 }
6142 }
6143
6144 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
6145 }
6146
6147 errno_t
if_mcasts_update(struct ifnet * ifp)6148 if_mcasts_update(struct ifnet *ifp)
6149 {
6150 errno_t err;
6151
6152 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
6153 if (err == EAFNOSUPPORT) {
6154 err = 0;
6155 }
6156 DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
6157 "(err=%d)\n", if_name(ifp),
6158 (err == 0 ? "successfully restored" : "failed to restore"),
6159 ifp->if_updatemcasts, err);
6160
6161 /* just return success */
6162 return 0;
6163 }
6164
6165 /* If ifp is set, we will increment the generation for the interface */
6166 int
dlil_post_complete_msg(struct ifnet * ifp,struct kev_msg * event)6167 dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
6168 {
6169 if (ifp != NULL) {
6170 ifnet_increment_generation(ifp);
6171 }
6172
6173 #if NECP
6174 necp_update_all_clients();
6175 #endif /* NECP */
6176
6177 return kev_post_msg(event);
6178 }
6179
6180 __private_extern__ void
dlil_post_sifflags_msg(struct ifnet * ifp)6181 dlil_post_sifflags_msg(struct ifnet * ifp)
6182 {
6183 struct kev_msg ev_msg;
6184 struct net_event_data ev_data;
6185
6186 bzero(&ev_data, sizeof(ev_data));
6187 bzero(&ev_msg, sizeof(ev_msg));
6188 ev_msg.vendor_code = KEV_VENDOR_APPLE;
6189 ev_msg.kev_class = KEV_NETWORK_CLASS;
6190 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
6191 ev_msg.event_code = KEV_DL_SIFFLAGS;
6192 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
6193 ev_data.if_family = ifp->if_family;
6194 ev_data.if_unit = (u_int32_t) ifp->if_unit;
6195 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
6196 ev_msg.dv[0].data_ptr = &ev_data;
6197 ev_msg.dv[1].data_length = 0;
6198 dlil_post_complete_msg(ifp, &ev_msg);
6199 }
6200
6201 #define TMP_IF_PROTO_ARR_SIZE 10
6202 static int
dlil_event_internal(struct ifnet * ifp,struct kev_msg * event,bool update_generation)6203 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
6204 {
6205 struct ifnet_filter *filter = NULL;
6206 struct if_proto *proto = NULL;
6207 int if_proto_count = 0;
6208 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
6209 struct if_proto **tmp_ifproto_arr = tmp_ifproto_stack_arr;
6210 int tmp_ifproto_arr_idx = 0;
6211
6212 /*
6213 * Pass the event to the interface filters
6214 */
6215 lck_mtx_lock_spin(&ifp->if_flt_lock);
6216 /* prevent filter list from changing in case we drop the lock */
6217 if_flt_monitor_busy(ifp);
6218 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
6219 if (filter->filt_event != NULL) {
6220 lck_mtx_unlock(&ifp->if_flt_lock);
6221
6222 filter->filt_event(filter->filt_cookie, ifp,
6223 filter->filt_protocol, event);
6224
6225 lck_mtx_lock_spin(&ifp->if_flt_lock);
6226 }
6227 }
6228 /* we're done with the filter list */
6229 if_flt_monitor_unbusy(ifp);
6230 lck_mtx_unlock(&ifp->if_flt_lock);
6231
6232 /* Get an io ref count if the interface is attached */
6233 if (!ifnet_is_attached(ifp, 1)) {
6234 goto done;
6235 }
6236
6237 /*
6238 * An embedded tmp_list_entry in if_proto may still get
6239 * over-written by another thread after giving up ifnet lock,
6240 * therefore we are avoiding embedded pointers here.
6241 */
6242 ifnet_lock_shared(ifp);
6243 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
6244 if (if_proto_count) {
6245 int i;
6246 VERIFY(ifp->if_proto_hash != NULL);
6247 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
6248 tmp_ifproto_arr = tmp_ifproto_stack_arr;
6249 } else {
6250 tmp_ifproto_arr = kalloc_type(struct if_proto *,
6251 if_proto_count, Z_WAITOK | Z_ZERO);
6252 if (tmp_ifproto_arr == NULL) {
6253 ifnet_lock_done(ifp);
6254 goto cleanup;
6255 }
6256 }
6257
6258 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6259 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
6260 next_hash) {
6261 if_proto_ref(proto);
6262 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
6263 tmp_ifproto_arr_idx++;
6264 }
6265 }
6266 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
6267 }
6268 ifnet_lock_done(ifp);
6269
6270 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
6271 tmp_ifproto_arr_idx++) {
6272 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
6273 VERIFY(proto != NULL);
6274 proto_media_event eventp =
6275 (proto->proto_kpi == kProtoKPI_v1 ?
6276 proto->kpi.v1.event :
6277 proto->kpi.v2.event);
6278
6279 if (eventp != NULL) {
6280 eventp(ifp, proto->protocol_family,
6281 event);
6282 }
6283 if_proto_free(proto);
6284 }
6285
6286 cleanup:
6287 if (tmp_ifproto_arr != tmp_ifproto_stack_arr) {
6288 kfree_type(struct if_proto *, if_proto_count, tmp_ifproto_arr);
6289 }
6290
6291 /* Pass the event to the interface */
6292 if (ifp->if_event != NULL) {
6293 ifp->if_event(ifp, event);
6294 }
6295
6296 /* Release the io ref count */
6297 ifnet_decr_iorefcnt(ifp);
6298 done:
6299 return dlil_post_complete_msg(update_generation ? ifp : NULL, event);
6300 }
6301
6302 errno_t
ifnet_event(ifnet_t ifp,struct kern_event_msg * event)6303 ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
6304 {
6305 struct kev_msg kev_msg;
6306 int result = 0;
6307
6308 if (ifp == NULL || event == NULL) {
6309 return EINVAL;
6310 }
6311
6312 bzero(&kev_msg, sizeof(kev_msg));
6313 kev_msg.vendor_code = event->vendor_code;
6314 kev_msg.kev_class = event->kev_class;
6315 kev_msg.kev_subclass = event->kev_subclass;
6316 kev_msg.event_code = event->event_code;
6317 kev_msg.dv[0].data_ptr = &event->event_data[0];
6318 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
6319 kev_msg.dv[1].data_length = 0;
6320
6321 result = dlil_event_internal(ifp, &kev_msg, TRUE);
6322
6323 return result;
6324 }
6325
6326 static void
dlil_count_chain_len(mbuf_t m,struct chain_len_stats * cls)6327 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
6328 {
6329 mbuf_t n = m;
6330 int chainlen = 0;
6331
6332 while (n != NULL) {
6333 chainlen++;
6334 n = n->m_next;
6335 }
6336 switch (chainlen) {
6337 case 0:
6338 break;
6339 case 1:
6340 atomic_add_64(&cls->cls_one, 1);
6341 break;
6342 case 2:
6343 atomic_add_64(&cls->cls_two, 1);
6344 break;
6345 case 3:
6346 atomic_add_64(&cls->cls_three, 1);
6347 break;
6348 case 4:
6349 atomic_add_64(&cls->cls_four, 1);
6350 break;
6351 case 5:
6352 default:
6353 atomic_add_64(&cls->cls_five_or_more, 1);
6354 break;
6355 }
6356 }
6357
6358 #if CONFIG_DTRACE
6359 __attribute__((noinline))
6360 static void
dlil_output_dtrace(ifnet_t ifp,protocol_family_t proto_family,mbuf_t m)6361 dlil_output_dtrace(ifnet_t ifp, protocol_family_t proto_family, mbuf_t m)
6362 {
6363 if (proto_family == PF_INET) {
6364 struct ip *ip = mtod(m, struct ip *);
6365 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
6366 struct ip *, ip, struct ifnet *, ifp,
6367 struct ip *, ip, struct ip6_hdr *, NULL);
6368 } else if (proto_family == PF_INET6) {
6369 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
6370 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
6371 struct ip6_hdr *, ip6, struct ifnet *, ifp,
6372 struct ip *, NULL, struct ip6_hdr *, ip6);
6373 }
6374 }
6375 #endif /* CONFIG_DTRACE */
6376
6377 /*
6378 * dlil_output
6379 *
6380 * Caller should have a lock on the protocol domain if the protocol
6381 * doesn't support finer grained locking. In most cases, the lock
6382 * will be held from the socket layer and won't be released until
6383 * we return back to the socket layer.
6384 *
6385 * This does mean that we must take a protocol lock before we take
6386 * an interface lock if we're going to take both. This makes sense
6387 * because a protocol is likely to interact with an ifp while it
6388 * is under the protocol lock.
6389 *
6390 * An advisory code will be returned if adv is not null. This
6391 * can be used to provide feedback about interface queues to the
6392 * application.
6393 */
6394 errno_t
dlil_output(ifnet_t ifp,protocol_family_t proto_family,mbuf_t packetlist,void * route,const struct sockaddr * dest,int raw,struct flowadv * adv)6395 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
6396 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
6397 {
6398 char *frame_type = NULL;
6399 char *dst_linkaddr = NULL;
6400 int retval = 0;
6401 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
6402 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
6403 struct if_proto *proto = NULL;
6404 mbuf_t m = NULL;
6405 mbuf_t send_head = NULL;
6406 mbuf_t *send_tail = &send_head;
6407 int iorefcnt = 0;
6408 u_int32_t pre = 0, post = 0;
6409 u_int32_t fpkts = 0, fbytes = 0;
6410 int32_t flen = 0;
6411 struct timespec now;
6412 u_int64_t now_nsec;
6413 boolean_t did_clat46 = FALSE;
6414 protocol_family_t old_proto_family = proto_family;
6415 struct sockaddr_in6 dest6;
6416 struct rtentry *rt = NULL;
6417 u_int32_t m_loop_set = 0;
6418
6419 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
6420
6421 /*
6422 * Get an io refcnt if the interface is attached to prevent ifnet_detach
6423 * from happening while this operation is in progress
6424 */
6425 if (!ifnet_datamov_begin(ifp)) {
6426 retval = ENXIO;
6427 goto cleanup;
6428 }
6429 iorefcnt = 1;
6430
6431 VERIFY(ifp->if_output_dlil != NULL);
6432
6433 /* update the driver's multicast filter, if needed */
6434 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) {
6435 ifp->if_updatemcasts = 0;
6436 }
6437
6438 frame_type = frame_type_buffer;
6439 dst_linkaddr = dst_linkaddr_buffer;
6440
6441 if (raw == 0) {
6442 ifnet_lock_shared(ifp);
6443 /* callee holds a proto refcnt upon success */
6444 proto = find_attached_proto(ifp, proto_family);
6445 if (proto == NULL) {
6446 ifnet_lock_done(ifp);
6447 retval = ENXIO;
6448 goto cleanup;
6449 }
6450 ifnet_lock_done(ifp);
6451 }
6452
6453 preout_again:
6454 if (packetlist == NULL) {
6455 goto cleanup;
6456 }
6457
6458 m = packetlist;
6459 packetlist = packetlist->m_nextpkt;
6460 m->m_nextpkt = NULL;
6461
6462 m_add_crumb(m, PKT_CRUMB_DLIL_OUTPUT);
6463
6464 /*
6465 * Perform address family translation for the first
6466 * packet outside the loop in order to perform address
6467 * lookup for the translated proto family.
6468 */
6469 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
6470 (ifp->if_type == IFT_CELLULAR ||
6471 dlil_is_clat_needed(proto_family, m))) {
6472 retval = dlil_clat46(ifp, &proto_family, &m);
6473 /*
6474 * Go to the next packet if translation fails
6475 */
6476 if (retval != 0) {
6477 m_freem(m);
6478 m = NULL;
6479 ip6stat.ip6s_clat464_out_drop++;
6480 /* Make sure that the proto family is PF_INET */
6481 ASSERT(proto_family == PF_INET);
6482 goto preout_again;
6483 }
6484 /*
6485 * Free the old one and make it point to the IPv6 proto structure.
6486 *
6487 * Change proto for the first time we have successfully
6488 * performed address family translation.
6489 */
6490 if (!did_clat46 && proto_family == PF_INET6) {
6491 did_clat46 = TRUE;
6492
6493 if (proto != NULL) {
6494 if_proto_free(proto);
6495 }
6496 ifnet_lock_shared(ifp);
6497 /* callee holds a proto refcnt upon success */
6498 proto = find_attached_proto(ifp, proto_family);
6499 if (proto == NULL) {
6500 ifnet_lock_done(ifp);
6501 retval = ENXIO;
6502 m_freem(m);
6503 m = NULL;
6504 goto cleanup;
6505 }
6506 ifnet_lock_done(ifp);
6507 if (ifp->if_type == IFT_ETHER) {
6508 /* Update the dest to translated v6 address */
6509 dest6.sin6_len = sizeof(struct sockaddr_in6);
6510 dest6.sin6_family = AF_INET6;
6511 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
6512 dest = (const struct sockaddr *)&dest6;
6513
6514 /*
6515 * Lookup route to the translated destination
6516 * Free this route ref during cleanup
6517 */
6518 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
6519 0, 0, ifp->if_index);
6520
6521 route = rt;
6522 }
6523 }
6524 }
6525
6526 /*
6527 * This path gets packet chain going to the same destination.
6528 * The pre output routine is used to either trigger resolution of
6529 * the next hop or retreive the next hop's link layer addressing.
6530 * For ex: ether_inet(6)_pre_output routine.
6531 *
6532 * If the routine returns EJUSTRETURN, it implies that packet has
6533 * been queued, and therefore we have to call preout_again for the
6534 * following packet in the chain.
6535 *
6536 * For errors other than EJUSTRETURN, the current packet is freed
6537 * and the rest of the chain (pointed by packetlist is freed as
6538 * part of clean up.
6539 *
6540 * Else if there is no error the retrieved information is used for
6541 * all the packets in the chain.
6542 */
6543 if (raw == 0) {
6544 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
6545 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
6546 retval = 0;
6547 if (preoutp != NULL) {
6548 retval = preoutp(ifp, proto_family, &m, dest, route,
6549 frame_type, dst_linkaddr);
6550
6551 if (retval != 0) {
6552 if (retval == EJUSTRETURN) {
6553 goto preout_again;
6554 }
6555 m_freem(m);
6556 m = NULL;
6557 goto cleanup;
6558 }
6559 }
6560 }
6561
6562 do {
6563 /*
6564 * pkt_hdr is set here to point to m_data prior to
6565 * calling into the framer. This value of pkt_hdr is
6566 * used by the netif gso logic to retrieve the ip header
6567 * for the TCP packets, offloaded for TSO processing.
6568 */
6569 if ((raw != 0) && (ifp->if_family == IFNET_FAMILY_ETHERNET)) {
6570 uint8_t vlan_encap_len = 0;
6571
6572 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_ENCAP_PRESENT) != 0) {
6573 vlan_encap_len = ETHER_VLAN_ENCAP_LEN;
6574 }
6575 m->m_pkthdr.pkt_hdr = mtod(m, char *) + ETHER_HDR_LEN + vlan_encap_len;
6576 } else {
6577 m->m_pkthdr.pkt_hdr = mtod(m, void *);
6578 }
6579
6580 /*
6581 * Perform address family translation if needed.
6582 * For now we only support stateless 4 to 6 translation
6583 * on the out path.
6584 *
6585 * The routine below translates IP header, updates protocol
6586 * checksum and also translates ICMP.
6587 *
6588 * We skip the first packet as it is already translated and
6589 * the proto family is set to PF_INET6.
6590 */
6591 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
6592 (ifp->if_type == IFT_CELLULAR ||
6593 dlil_is_clat_needed(proto_family, m))) {
6594 retval = dlil_clat46(ifp, &proto_family, &m);
6595 /* Goto the next packet if the translation fails */
6596 if (retval != 0) {
6597 m_freem(m);
6598 m = NULL;
6599 ip6stat.ip6s_clat464_out_drop++;
6600 goto next;
6601 }
6602 }
6603
6604 #if CONFIG_DTRACE
6605 if (!raw) {
6606 dlil_output_dtrace(ifp, proto_family, m);
6607 }
6608 #endif /* CONFIG_DTRACE */
6609
6610 if (raw == 0 && ifp->if_framer != NULL) {
6611 int rcvif_set = 0;
6612
6613 /*
6614 * If this is a broadcast packet that needs to be
6615 * looped back into the system, set the inbound ifp
6616 * to that of the outbound ifp. This will allow
6617 * us to determine that it is a legitimate packet
6618 * for the system. Only set the ifp if it's not
6619 * already set, just to be safe.
6620 */
6621 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
6622 m->m_pkthdr.rcvif == NULL) {
6623 m->m_pkthdr.rcvif = ifp;
6624 rcvif_set = 1;
6625 }
6626 m_loop_set = m->m_flags & M_LOOP;
6627 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
6628 frame_type, &pre, &post);
6629 if (retval != 0) {
6630 if (retval != EJUSTRETURN) {
6631 m_freem(m);
6632 }
6633 goto next;
6634 }
6635
6636 /*
6637 * For partial checksum offload, adjust the start
6638 * and stuff offsets based on the prepended header.
6639 */
6640 if ((m->m_pkthdr.csum_flags &
6641 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
6642 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
6643 m->m_pkthdr.csum_tx_stuff += pre;
6644 m->m_pkthdr.csum_tx_start += pre;
6645 }
6646
6647 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
6648 dlil_output_cksum_dbg(ifp, m, pre,
6649 proto_family);
6650 }
6651
6652 /*
6653 * Clear the ifp if it was set above, and to be
6654 * safe, only if it is still the same as the
6655 * outbound ifp we have in context. If it was
6656 * looped back, then a copy of it was sent to the
6657 * loopback interface with the rcvif set, and we
6658 * are clearing the one that will go down to the
6659 * layer below.
6660 */
6661 if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
6662 m->m_pkthdr.rcvif = NULL;
6663 }
6664 }
6665
6666 /*
6667 * Let interface filters (if any) do their thing ...
6668 */
6669 retval = dlil_interface_filters_output(ifp, &m, proto_family);
6670 if (retval != 0) {
6671 if (retval != EJUSTRETURN) {
6672 m_freem(m);
6673 }
6674 goto next;
6675 }
6676 /*
6677 * Strip away M_PROTO1 bit prior to sending packet
6678 * to the driver as this field may be used by the driver
6679 */
6680 m->m_flags &= ~M_PROTO1;
6681
6682 /*
6683 * If the underlying interface is not capable of handling a
6684 * packet whose data portion spans across physically disjoint
6685 * pages, we need to "normalize" the packet so that we pass
6686 * down a chain of mbufs where each mbuf points to a span that
6687 * resides in the system page boundary. If the packet does
6688 * not cross page(s), the following is a no-op.
6689 */
6690 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
6691 if ((m = m_normalize(m)) == NULL) {
6692 goto next;
6693 }
6694 }
6695
6696 /*
6697 * If this is a TSO packet, make sure the interface still
6698 * advertise TSO capability.
6699 */
6700 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
6701 retval = EMSGSIZE;
6702 m_freem(m);
6703 goto cleanup;
6704 }
6705
6706 ifp_inc_traffic_class_out(ifp, m);
6707
6708 #if SKYWALK
6709 /*
6710 * For native skywalk devices, packets will be passed to pktap
6711 * after GSO or after the mbuf to packet conversion.
6712 * This is done for IPv4/IPv6 packets only because there is no
6713 * space in the mbuf to pass down the proto family.
6714 */
6715 if (dlil_is_native_netif_nexus(ifp)) {
6716 if (raw || m->m_pkthdr.pkt_proto == 0) {
6717 pktap_output(ifp, proto_family, m, pre, post);
6718 m->m_pkthdr.pkt_flags |= PKTF_SKIP_PKTAP;
6719 }
6720 } else {
6721 pktap_output(ifp, proto_family, m, pre, post);
6722 }
6723 #else /* SKYWALK */
6724 pktap_output(ifp, proto_family, m, pre, post);
6725 #endif /* SKYWALK */
6726
6727 /*
6728 * Count the number of elements in the mbuf chain
6729 */
6730 if (tx_chain_len_count) {
6731 dlil_count_chain_len(m, &tx_chain_len_stats);
6732 }
6733
6734 /*
6735 * Record timestamp; ifnet_enqueue() will use this info
6736 * rather than redoing the work. An optimization could
6737 * involve doing this just once at the top, if there are
6738 * no interface filters attached, but that's probably
6739 * not a big deal.
6740 */
6741 nanouptime(&now);
6742 net_timernsec(&now, &now_nsec);
6743 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
6744
6745 /*
6746 * Discard partial sum information if this packet originated
6747 * from another interface; the packet would already have the
6748 * final checksum and we shouldn't recompute it.
6749 */
6750 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
6751 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
6752 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
6753 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
6754 m->m_pkthdr.csum_data = 0;
6755 }
6756
6757 /*
6758 * Finally, call the driver.
6759 */
6760 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
6761 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
6762 flen += (m_pktlen(m) - (pre + post));
6763 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
6764 }
6765 *send_tail = m;
6766 send_tail = &m->m_nextpkt;
6767 } else {
6768 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
6769 flen = (m_pktlen(m) - (pre + post));
6770 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
6771 } else {
6772 flen = 0;
6773 }
6774 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
6775 0, 0, 0, 0, 0);
6776 retval = (*ifp->if_output_dlil)(ifp, m);
6777 if (retval == EQFULL || retval == EQSUSPENDED) {
6778 if (adv != NULL && adv->code == FADV_SUCCESS) {
6779 adv->code = (retval == EQFULL ?
6780 FADV_FLOW_CONTROLLED :
6781 FADV_SUSPENDED);
6782 }
6783 retval = 0;
6784 }
6785 if (retval == 0 && flen > 0) {
6786 fbytes += flen;
6787 fpkts++;
6788 }
6789 if (retval != 0 && dlil_verbose) {
6790 DLIL_PRINTF("%s: output error on %s retval = %d\n",
6791 __func__, if_name(ifp),
6792 retval);
6793 }
6794 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
6795 0, 0, 0, 0, 0);
6796 }
6797 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
6798
6799 next:
6800 m = packetlist;
6801 if (m != NULL) {
6802 m->m_flags |= m_loop_set;
6803 packetlist = packetlist->m_nextpkt;
6804 m->m_nextpkt = NULL;
6805 }
6806 /* Reset the proto family to old proto family for CLAT */
6807 if (did_clat46) {
6808 proto_family = old_proto_family;
6809 }
6810 } while (m != NULL);
6811
6812 if (send_head != NULL) {
6813 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
6814 0, 0, 0, 0, 0);
6815 if (ifp->if_eflags & IFEF_SENDLIST) {
6816 retval = (*ifp->if_output_dlil)(ifp, send_head);
6817 if (retval == EQFULL || retval == EQSUSPENDED) {
6818 if (adv != NULL) {
6819 adv->code = (retval == EQFULL ?
6820 FADV_FLOW_CONTROLLED :
6821 FADV_SUSPENDED);
6822 }
6823 retval = 0;
6824 }
6825 if (retval == 0 && flen > 0) {
6826 fbytes += flen;
6827 fpkts++;
6828 }
6829 if (retval != 0 && dlil_verbose) {
6830 DLIL_PRINTF("%s: output error on %s retval = %d\n",
6831 __func__, if_name(ifp), retval);
6832 }
6833 } else {
6834 struct mbuf *send_m;
6835 int enq_cnt = 0;
6836 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
6837 while (send_head != NULL) {
6838 send_m = send_head;
6839 send_head = send_m->m_nextpkt;
6840 send_m->m_nextpkt = NULL;
6841 retval = (*ifp->if_output_dlil)(ifp, send_m);
6842 if (retval == EQFULL || retval == EQSUSPENDED) {
6843 if (adv != NULL) {
6844 adv->code = (retval == EQFULL ?
6845 FADV_FLOW_CONTROLLED :
6846 FADV_SUSPENDED);
6847 }
6848 retval = 0;
6849 }
6850 if (retval == 0) {
6851 enq_cnt++;
6852 if (flen > 0) {
6853 fpkts++;
6854 }
6855 }
6856 if (retval != 0 && dlil_verbose) {
6857 DLIL_PRINTF("%s: output error on %s "
6858 "retval = %d\n",
6859 __func__, if_name(ifp), retval);
6860 }
6861 }
6862 if (enq_cnt > 0) {
6863 fbytes += flen;
6864 ifnet_start(ifp);
6865 }
6866 }
6867 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
6868 }
6869
6870 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
6871
6872 cleanup:
6873 if (fbytes > 0) {
6874 ifp->if_fbytes += fbytes;
6875 }
6876 if (fpkts > 0) {
6877 ifp->if_fpackets += fpkts;
6878 }
6879 if (proto != NULL) {
6880 if_proto_free(proto);
6881 }
6882 if (packetlist) { /* if any packets are left, clean up */
6883 mbuf_freem_list(packetlist);
6884 }
6885 if (retval == EJUSTRETURN) {
6886 retval = 0;
6887 }
6888 if (iorefcnt == 1) {
6889 ifnet_datamov_end(ifp);
6890 }
6891 if (rt != NULL) {
6892 rtfree(rt);
6893 rt = NULL;
6894 }
6895
6896 return retval;
6897 }
6898
6899 /*
6900 * This routine checks if the destination address is not a loopback, link-local,
6901 * multicast or broadcast address.
6902 */
6903 static int
dlil_is_clat_needed(protocol_family_t proto_family,mbuf_t m)6904 dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
6905 {
6906 int ret = 0;
6907 switch (proto_family) {
6908 case PF_INET: {
6909 struct ip *iph = mtod(m, struct ip *);
6910 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
6911 ret = 1;
6912 }
6913 break;
6914 }
6915 case PF_INET6: {
6916 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
6917 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
6918 CLAT64_NEEDED(&ip6h->ip6_dst)) {
6919 ret = 1;
6920 }
6921 break;
6922 }
6923 }
6924
6925 return ret;
6926 }
6927 /*
6928 * @brief This routine translates IPv4 packet to IPv6 packet,
6929 * updates protocol checksum and also translates ICMP for code
6930 * along with inner header translation.
6931 *
6932 * @param ifp Pointer to the interface
6933 * @param proto_family pointer to protocol family. It is updated if function
6934 * performs the translation successfully.
6935 * @param m Pointer to the pointer pointing to the packet. Needed because this
6936 * routine can end up changing the mbuf to a different one.
6937 *
6938 * @return 0 on success or else a negative value.
6939 */
6940 static errno_t
dlil_clat46(ifnet_t ifp,protocol_family_t * proto_family,mbuf_t * m)6941 dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
6942 {
6943 VERIFY(*proto_family == PF_INET);
6944 VERIFY(IS_INTF_CLAT46(ifp));
6945
6946 pbuf_t pbuf_store, *pbuf = NULL;
6947 struct ip *iph = NULL;
6948 struct in_addr osrc, odst;
6949 uint8_t proto = 0;
6950 struct in6_ifaddr *ia6_clat_src = NULL;
6951 struct in6_addr *src = NULL;
6952 struct in6_addr dst;
6953 int error = 0;
6954 uint16_t off = 0;
6955 uint16_t tot_len = 0;
6956 uint16_t ip_id_val = 0;
6957 uint16_t ip_frag_off = 0;
6958
6959 boolean_t is_frag = FALSE;
6960 boolean_t is_first_frag = TRUE;
6961 boolean_t is_last_frag = TRUE;
6962
6963 pbuf_init_mbuf(&pbuf_store, *m, ifp);
6964 pbuf = &pbuf_store;
6965 iph = pbuf->pb_data;
6966
6967 osrc = iph->ip_src;
6968 odst = iph->ip_dst;
6969 proto = iph->ip_p;
6970 off = (uint16_t)(iph->ip_hl << 2);
6971 ip_id_val = iph->ip_id;
6972 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
6973
6974 tot_len = ntohs(iph->ip_len);
6975
6976 /*
6977 * For packets that are not first frags
6978 * we only need to adjust CSUM.
6979 * For 4 to 6, Fragmentation header gets appended
6980 * after proto translation.
6981 */
6982 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
6983 is_frag = TRUE;
6984
6985 /* If the offset is not zero, it is not first frag */
6986 if (ip_frag_off != 0) {
6987 is_first_frag = FALSE;
6988 }
6989
6990 /* If IP_MF is set, then it is not last frag */
6991 if (ntohs(iph->ip_off) & IP_MF) {
6992 is_last_frag = FALSE;
6993 }
6994 }
6995
6996 /*
6997 * Retrive the local IPv6 CLAT46 address reserved for stateless
6998 * translation.
6999 */
7000 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
7001 if (ia6_clat_src == NULL) {
7002 ip6stat.ip6s_clat464_out_nov6addr_drop++;
7003 error = -1;
7004 goto cleanup;
7005 }
7006
7007 src = &ia6_clat_src->ia_addr.sin6_addr;
7008
7009 /*
7010 * Translate IPv4 destination to IPv6 destination by using the
7011 * prefixes learned through prior PLAT discovery.
7012 */
7013 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
7014 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
7015 goto cleanup;
7016 }
7017
7018 /* Translate the IP header part first */
7019 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
7020 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
7021
7022 iph = NULL; /* Invalidate iph as pbuf has been modified */
7023
7024 if (error != 0) {
7025 ip6stat.ip6s_clat464_out_46transfail_drop++;
7026 goto cleanup;
7027 }
7028
7029 /*
7030 * Translate protocol header, update checksum, checksum flags
7031 * and related fields.
7032 */
7033 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
7034 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
7035
7036 if (error != 0) {
7037 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
7038 goto cleanup;
7039 }
7040
7041 /* Now insert the IPv6 fragment header */
7042 if (is_frag) {
7043 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
7044
7045 if (error != 0) {
7046 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
7047 goto cleanup;
7048 }
7049 }
7050
7051 cleanup:
7052 if (ia6_clat_src != NULL) {
7053 IFA_REMREF(&ia6_clat_src->ia_ifa);
7054 }
7055
7056 if (pbuf_is_valid(pbuf)) {
7057 *m = pbuf->pb_mbuf;
7058 pbuf->pb_mbuf = NULL;
7059 pbuf_destroy(pbuf);
7060 } else {
7061 error = -1;
7062 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
7063 }
7064
7065 if (error == 0) {
7066 *proto_family = PF_INET6;
7067 ip6stat.ip6s_clat464_out_success++;
7068 }
7069
7070 return error;
7071 }
7072
7073 /*
7074 * @brief This routine translates incoming IPv6 to IPv4 packet,
7075 * updates protocol checksum and also translates ICMPv6 outer
7076 * and inner headers
7077 *
7078 * @return 0 on success or else a negative value.
7079 */
7080 static errno_t
dlil_clat64(ifnet_t ifp,protocol_family_t * proto_family,mbuf_t * m)7081 dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
7082 {
7083 VERIFY(*proto_family == PF_INET6);
7084 VERIFY(IS_INTF_CLAT46(ifp));
7085
7086 struct ip6_hdr *ip6h = NULL;
7087 struct in6_addr osrc, odst;
7088 uint8_t proto = 0;
7089 struct in6_ifaddr *ia6_clat_dst = NULL;
7090 struct in_ifaddr *ia4_clat_dst = NULL;
7091 struct in_addr *dst = NULL;
7092 struct in_addr src;
7093 int error = 0;
7094 uint32_t off = 0;
7095 u_int64_t tot_len = 0;
7096 uint8_t tos = 0;
7097 boolean_t is_first_frag = TRUE;
7098
7099 /* Incoming mbuf does not contain valid IP6 header */
7100 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
7101 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
7102 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
7103 ip6stat.ip6s_clat464_in_tooshort_drop++;
7104 return -1;
7105 }
7106
7107 ip6h = mtod(*m, struct ip6_hdr *);
7108 /* Validate that mbuf contains IP payload equal to ip6_plen */
7109 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
7110 ip6stat.ip6s_clat464_in_tooshort_drop++;
7111 return -1;
7112 }
7113
7114 osrc = ip6h->ip6_src;
7115 odst = ip6h->ip6_dst;
7116
7117 /*
7118 * Retrieve the local CLAT46 reserved IPv6 address.
7119 * Let the packet pass if we don't find one, as the flag
7120 * may get set before IPv6 configuration has taken place.
7121 */
7122 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
7123 if (ia6_clat_dst == NULL) {
7124 goto done;
7125 }
7126
7127 /*
7128 * Check if the original dest in the packet is same as the reserved
7129 * CLAT46 IPv6 address
7130 */
7131 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
7132 pbuf_t pbuf_store, *pbuf = NULL;
7133 pbuf_init_mbuf(&pbuf_store, *m, ifp);
7134 pbuf = &pbuf_store;
7135
7136 /*
7137 * Retrive the local CLAT46 IPv4 address reserved for stateless
7138 * translation.
7139 */
7140 ia4_clat_dst = inifa_ifpclatv4(ifp);
7141 if (ia4_clat_dst == NULL) {
7142 IFA_REMREF(&ia6_clat_dst->ia_ifa);
7143 ip6stat.ip6s_clat464_in_nov4addr_drop++;
7144 error = -1;
7145 goto cleanup;
7146 }
7147 IFA_REMREF(&ia6_clat_dst->ia_ifa);
7148
7149 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
7150 dst = &ia4_clat_dst->ia_addr.sin_addr;
7151 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
7152 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
7153 error = -1;
7154 goto cleanup;
7155 }
7156
7157 ip6h = pbuf->pb_data;
7158 off = sizeof(struct ip6_hdr);
7159 proto = ip6h->ip6_nxt;
7160 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
7161 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
7162
7163 /*
7164 * Translate the IP header and update the fragmentation
7165 * header if needed
7166 */
7167 error = (nat464_translate_64(pbuf, off, tos, &proto,
7168 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
7169 0 : -1;
7170
7171 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
7172
7173 if (error != 0) {
7174 ip6stat.ip6s_clat464_in_64transfail_drop++;
7175 goto cleanup;
7176 }
7177
7178 /*
7179 * Translate protocol header, update checksum, checksum flags
7180 * and related fields.
7181 */
7182 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
7183 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
7184 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
7185
7186 if (error != 0) {
7187 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
7188 goto cleanup;
7189 }
7190
7191 cleanup:
7192 if (ia4_clat_dst != NULL) {
7193 IFA_REMREF(&ia4_clat_dst->ia_ifa);
7194 }
7195
7196 if (pbuf_is_valid(pbuf)) {
7197 *m = pbuf->pb_mbuf;
7198 pbuf->pb_mbuf = NULL;
7199 pbuf_destroy(pbuf);
7200 } else {
7201 error = -1;
7202 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
7203 }
7204
7205 if (error == 0) {
7206 *proto_family = PF_INET;
7207 ip6stat.ip6s_clat464_in_success++;
7208 }
7209 } /* CLAT traffic */
7210
7211 done:
7212 return error;
7213 }
7214
7215 /* The following is used to enqueue work items for ifnet ioctl events */
7216 static void ifnet_ioctl_event_callback(struct nwk_wq_entry *);
7217
7218 struct ifnet_ioctl_event {
7219 struct ifnet *ifp;
7220 u_long ioctl_code;
7221 };
7222
7223 struct ifnet_ioctl_event_nwk_wq_entry {
7224 struct nwk_wq_entry nwk_wqe;
7225 struct ifnet_ioctl_event ifnet_ioctl_ev_arg;
7226 };
7227
7228 void
ifnet_ioctl_async(struct ifnet * ifp,u_long ioctl_code)7229 ifnet_ioctl_async(struct ifnet *ifp, u_long ioctl_code)
7230 {
7231 struct ifnet_ioctl_event_nwk_wq_entry *p_ifnet_ioctl_ev = NULL;
7232
7233 /*
7234 * Get an io ref count if the interface is attached.
7235 * At this point it most likely is. We are taking a reference for
7236 * deferred processing.
7237 */
7238 if (!ifnet_is_attached(ifp, 1)) {
7239 os_log(OS_LOG_DEFAULT, "%s:%d %s Failed for ioctl %lu as interface "
7240 "is not attached",
7241 __func__, __LINE__, if_name(ifp), ioctl_code);
7242 return;
7243 }
7244
7245 p_ifnet_ioctl_ev = kalloc_type(struct ifnet_ioctl_event_nwk_wq_entry,
7246 Z_WAITOK | Z_ZERO | Z_NOFAIL);
7247
7248 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ifp = ifp;
7249 p_ifnet_ioctl_ev->ifnet_ioctl_ev_arg.ioctl_code = ioctl_code;
7250 p_ifnet_ioctl_ev->nwk_wqe.func = ifnet_ioctl_event_callback;
7251 nwk_wq_enqueue(&p_ifnet_ioctl_ev->nwk_wqe);
7252 }
7253
7254 static void
ifnet_ioctl_event_callback(struct nwk_wq_entry * nwk_item)7255 ifnet_ioctl_event_callback(struct nwk_wq_entry *nwk_item)
7256 {
7257 struct ifnet_ioctl_event_nwk_wq_entry *p_ev = __container_of(nwk_item,
7258 struct ifnet_ioctl_event_nwk_wq_entry, nwk_wqe);
7259
7260 struct ifnet *ifp = p_ev->ifnet_ioctl_ev_arg.ifp;
7261 u_long ioctl_code = p_ev->ifnet_ioctl_ev_arg.ioctl_code;
7262 int ret = 0;
7263
7264 if ((ret = ifnet_ioctl(ifp, 0, ioctl_code, NULL)) != 0) {
7265 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned %d for ioctl %lu",
7266 __func__, __LINE__, if_name(ifp), ret, ioctl_code);
7267 } else if (dlil_verbose) {
7268 os_log(OS_LOG_DEFAULT, "%s:%d %s ifnet_ioctl returned successfully "
7269 "for ioctl %lu",
7270 __func__, __LINE__, if_name(ifp), ioctl_code);
7271 }
7272 ifnet_decr_iorefcnt(ifp);
7273 kfree_type(struct ifnet_ioctl_event_nwk_wq_entry, p_ev);
7274 return;
7275 }
7276
7277 errno_t
ifnet_ioctl(ifnet_t ifp,protocol_family_t proto_fam,u_long ioctl_code,void * ioctl_arg)7278 ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
7279 void *ioctl_arg)
7280 {
7281 struct ifnet_filter *filter;
7282 int retval = EOPNOTSUPP;
7283 int result = 0;
7284
7285 if (ifp == NULL || ioctl_code == 0) {
7286 return EINVAL;
7287 }
7288
7289 /* Get an io ref count if the interface is attached */
7290 if (!ifnet_is_attached(ifp, 1)) {
7291 return EOPNOTSUPP;
7292 }
7293
7294 /*
7295 * Run the interface filters first.
7296 * We want to run all filters before calling the protocol,
7297 * interface family, or interface.
7298 */
7299 lck_mtx_lock_spin(&ifp->if_flt_lock);
7300 /* prevent filter list from changing in case we drop the lock */
7301 if_flt_monitor_busy(ifp);
7302 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
7303 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
7304 filter->filt_protocol == proto_fam)) {
7305 lck_mtx_unlock(&ifp->if_flt_lock);
7306
7307 result = filter->filt_ioctl(filter->filt_cookie, ifp,
7308 proto_fam, ioctl_code, ioctl_arg);
7309
7310 lck_mtx_lock_spin(&ifp->if_flt_lock);
7311
7312 /* Only update retval if no one has handled the ioctl */
7313 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
7314 if (result == ENOTSUP) {
7315 result = EOPNOTSUPP;
7316 }
7317 retval = result;
7318 if (retval != 0 && retval != EOPNOTSUPP) {
7319 /* we're done with the filter list */
7320 if_flt_monitor_unbusy(ifp);
7321 lck_mtx_unlock(&ifp->if_flt_lock);
7322 goto cleanup;
7323 }
7324 }
7325 }
7326 }
7327 /* we're done with the filter list */
7328 if_flt_monitor_unbusy(ifp);
7329 lck_mtx_unlock(&ifp->if_flt_lock);
7330
7331 /* Allow the protocol to handle the ioctl */
7332 if (proto_fam != 0) {
7333 struct if_proto *proto;
7334
7335 /* callee holds a proto refcnt upon success */
7336 ifnet_lock_shared(ifp);
7337 proto = find_attached_proto(ifp, proto_fam);
7338 ifnet_lock_done(ifp);
7339 if (proto != NULL) {
7340 proto_media_ioctl ioctlp =
7341 (proto->proto_kpi == kProtoKPI_v1 ?
7342 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
7343 result = EOPNOTSUPP;
7344 if (ioctlp != NULL) {
7345 result = ioctlp(ifp, proto_fam, ioctl_code,
7346 ioctl_arg);
7347 }
7348 if_proto_free(proto);
7349
7350 /* Only update retval if no one has handled the ioctl */
7351 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
7352 if (result == ENOTSUP) {
7353 result = EOPNOTSUPP;
7354 }
7355 retval = result;
7356 if (retval && retval != EOPNOTSUPP) {
7357 goto cleanup;
7358 }
7359 }
7360 }
7361 }
7362
7363 /* retval is either 0 or EOPNOTSUPP */
7364
7365 /*
7366 * Let the interface handle this ioctl.
7367 * If it returns EOPNOTSUPP, ignore that, we may have
7368 * already handled this in the protocol or family.
7369 */
7370 if (ifp->if_ioctl) {
7371 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
7372 }
7373
7374 /* Only update retval if no one has handled the ioctl */
7375 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
7376 if (result == ENOTSUP) {
7377 result = EOPNOTSUPP;
7378 }
7379 retval = result;
7380 if (retval && retval != EOPNOTSUPP) {
7381 goto cleanup;
7382 }
7383 }
7384
7385 cleanup:
7386 if (retval == EJUSTRETURN) {
7387 retval = 0;
7388 }
7389
7390 ifnet_decr_iorefcnt(ifp);
7391
7392 return retval;
7393 }
7394
7395 __private_extern__ errno_t
dlil_set_bpf_tap(ifnet_t ifp,bpf_tap_mode mode,bpf_packet_func callback)7396 dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
7397 {
7398 errno_t error = 0;
7399
7400
7401 if (ifp->if_set_bpf_tap) {
7402 /* Get an io reference on the interface if it is attached */
7403 if (!ifnet_is_attached(ifp, 1)) {
7404 return ENXIO;
7405 }
7406 error = ifp->if_set_bpf_tap(ifp, mode, callback);
7407 ifnet_decr_iorefcnt(ifp);
7408 }
7409 return error;
7410 }
7411
7412 errno_t
dlil_resolve_multi(struct ifnet * ifp,const struct sockaddr * proto_addr,struct sockaddr * ll_addr,size_t ll_len)7413 dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
7414 struct sockaddr *ll_addr, size_t ll_len)
7415 {
7416 errno_t result = EOPNOTSUPP;
7417 struct if_proto *proto;
7418 const struct sockaddr *verify;
7419 proto_media_resolve_multi resolvep;
7420
7421 if (!ifnet_is_attached(ifp, 1)) {
7422 return result;
7423 }
7424
7425 bzero(ll_addr, ll_len);
7426
7427 /* Call the protocol first; callee holds a proto refcnt upon success */
7428 ifnet_lock_shared(ifp);
7429 proto = find_attached_proto(ifp, proto_addr->sa_family);
7430 ifnet_lock_done(ifp);
7431 if (proto != NULL) {
7432 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
7433 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
7434 if (resolvep != NULL) {
7435 result = resolvep(ifp, proto_addr,
7436 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
7437 }
7438 if_proto_free(proto);
7439 }
7440
7441 /* Let the interface verify the multicast address */
7442 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
7443 if (result == 0) {
7444 verify = ll_addr;
7445 } else {
7446 verify = proto_addr;
7447 }
7448 result = ifp->if_check_multi(ifp, verify);
7449 }
7450
7451 ifnet_decr_iorefcnt(ifp);
7452 return result;
7453 }
7454
7455 __private_extern__ errno_t
dlil_send_arp_internal(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)7456 dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
7457 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
7458 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
7459 {
7460 struct if_proto *proto;
7461 errno_t result = 0;
7462
7463 /* callee holds a proto refcnt upon success */
7464 ifnet_lock_shared(ifp);
7465 proto = find_attached_proto(ifp, target_proto->sa_family);
7466 ifnet_lock_done(ifp);
7467 if (proto == NULL) {
7468 result = ENOTSUP;
7469 } else {
7470 proto_media_send_arp arpp;
7471 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
7472 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
7473 if (arpp == NULL) {
7474 result = ENOTSUP;
7475 } else {
7476 switch (arpop) {
7477 case ARPOP_REQUEST:
7478 arpstat.txrequests++;
7479 if (target_hw != NULL) {
7480 arpstat.txurequests++;
7481 }
7482 break;
7483 case ARPOP_REPLY:
7484 arpstat.txreplies++;
7485 break;
7486 }
7487 result = arpp(ifp, arpop, sender_hw, sender_proto,
7488 target_hw, target_proto);
7489 }
7490 if_proto_free(proto);
7491 }
7492
7493 return result;
7494 }
7495
7496 struct net_thread_marks { };
7497 static const struct net_thread_marks net_thread_marks_base = { };
7498
7499 __private_extern__ const net_thread_marks_t net_thread_marks_none =
7500 &net_thread_marks_base;
7501
7502 __private_extern__ net_thread_marks_t
net_thread_marks_push(u_int32_t push)7503 net_thread_marks_push(u_int32_t push)
7504 {
7505 static const char *const base = (const void*)&net_thread_marks_base;
7506 u_int32_t pop = 0;
7507
7508 if (push != 0) {
7509 struct uthread *uth = current_uthread();
7510
7511 pop = push & ~uth->uu_network_marks;
7512 if (pop != 0) {
7513 uth->uu_network_marks |= pop;
7514 }
7515 }
7516
7517 return (net_thread_marks_t)&base[pop];
7518 }
7519
7520 __private_extern__ net_thread_marks_t
net_thread_unmarks_push(u_int32_t unpush)7521 net_thread_unmarks_push(u_int32_t unpush)
7522 {
7523 static const char *const base = (const void*)&net_thread_marks_base;
7524 u_int32_t unpop = 0;
7525
7526 if (unpush != 0) {
7527 struct uthread *uth = current_uthread();
7528
7529 unpop = unpush & uth->uu_network_marks;
7530 if (unpop != 0) {
7531 uth->uu_network_marks &= ~unpop;
7532 }
7533 }
7534
7535 return (net_thread_marks_t)&base[unpop];
7536 }
7537
7538 __private_extern__ void
net_thread_marks_pop(net_thread_marks_t popx)7539 net_thread_marks_pop(net_thread_marks_t popx)
7540 {
7541 static const char *const base = (const void*)&net_thread_marks_base;
7542 const ptrdiff_t pop = (const char *)popx - (const char *)base;
7543
7544 if (pop != 0) {
7545 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
7546 struct uthread *uth = current_uthread();
7547
7548 VERIFY((pop & ones) == pop);
7549 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
7550 uth->uu_network_marks &= ~pop;
7551 }
7552 }
7553
7554 __private_extern__ void
net_thread_unmarks_pop(net_thread_marks_t unpopx)7555 net_thread_unmarks_pop(net_thread_marks_t unpopx)
7556 {
7557 static const char *const base = (const void*)&net_thread_marks_base;
7558 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
7559
7560 if (unpop != 0) {
7561 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
7562 struct uthread *uth = current_uthread();
7563
7564 VERIFY((unpop & ones) == unpop);
7565 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
7566 uth->uu_network_marks |= unpop;
7567 }
7568 }
7569
7570 __private_extern__ u_int32_t
net_thread_is_marked(u_int32_t check)7571 net_thread_is_marked(u_int32_t check)
7572 {
7573 if (check != 0) {
7574 struct uthread *uth = current_uthread();
7575 return uth->uu_network_marks & check;
7576 } else {
7577 return 0;
7578 }
7579 }
7580
7581 __private_extern__ u_int32_t
net_thread_is_unmarked(u_int32_t check)7582 net_thread_is_unmarked(u_int32_t check)
7583 {
7584 if (check != 0) {
7585 struct uthread *uth = current_uthread();
7586 return ~uth->uu_network_marks & check;
7587 } else {
7588 return 0;
7589 }
7590 }
7591
7592 static __inline__ int
_is_announcement(const struct sockaddr_in * sender_sin,const struct sockaddr_in * target_sin)7593 _is_announcement(const struct sockaddr_in * sender_sin,
7594 const struct sockaddr_in * target_sin)
7595 {
7596 if (target_sin == NULL || sender_sin == NULL) {
7597 return FALSE;
7598 }
7599
7600 return sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr;
7601 }
7602
7603 __private_extern__ errno_t
dlil_send_arp(ifnet_t ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto0,u_int32_t rtflags)7604 dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
7605 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
7606 const struct sockaddr *target_proto0, u_int32_t rtflags)
7607 {
7608 errno_t result = 0;
7609 const struct sockaddr_in * sender_sin;
7610 const struct sockaddr_in * target_sin;
7611 struct sockaddr_inarp target_proto_sinarp;
7612 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
7613
7614 if (target_proto == NULL || sender_proto == NULL) {
7615 return EINVAL;
7616 }
7617
7618 if (sender_proto->sa_family != target_proto->sa_family) {
7619 return EINVAL;
7620 }
7621
7622 /*
7623 * If the target is a (default) router, provide that
7624 * information to the send_arp callback routine.
7625 */
7626 if (rtflags & RTF_ROUTER) {
7627 bcopy(target_proto, &target_proto_sinarp,
7628 sizeof(struct sockaddr_in));
7629 target_proto_sinarp.sin_other |= SIN_ROUTER;
7630 target_proto = (struct sockaddr *)&target_proto_sinarp;
7631 }
7632
7633 /*
7634 * If this is an ARP request and the target IP is IPv4LL,
7635 * send the request on all interfaces. The exception is
7636 * an announcement, which must only appear on the specific
7637 * interface.
7638 */
7639 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
7640 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
7641 if (target_proto->sa_family == AF_INET &&
7642 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
7643 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
7644 !_is_announcement(sender_sin, target_sin)) {
7645 ifnet_t *ifp_list;
7646 u_int32_t count;
7647 u_int32_t ifp_on;
7648
7649 result = ENOTSUP;
7650
7651 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
7652 for (ifp_on = 0; ifp_on < count; ifp_on++) {
7653 errno_t new_result;
7654 ifaddr_t source_hw = NULL;
7655 ifaddr_t source_ip = NULL;
7656 struct sockaddr_in source_ip_copy;
7657 struct ifnet *cur_ifp = ifp_list[ifp_on];
7658
7659 /*
7660 * Only arp on interfaces marked for IPv4LL
7661 * ARPing. This may mean that we don't ARP on
7662 * the interface the subnet route points to.
7663 */
7664 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) {
7665 continue;
7666 }
7667
7668 /* Find the source IP address */
7669 ifnet_lock_shared(cur_ifp);
7670 source_hw = cur_ifp->if_lladdr;
7671 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
7672 ifa_link) {
7673 IFA_LOCK(source_ip);
7674 if (source_ip->ifa_addr != NULL &&
7675 source_ip->ifa_addr->sa_family ==
7676 AF_INET) {
7677 /* Copy the source IP address */
7678 source_ip_copy =
7679 *(struct sockaddr_in *)
7680 (void *)source_ip->ifa_addr;
7681 IFA_UNLOCK(source_ip);
7682 break;
7683 }
7684 IFA_UNLOCK(source_ip);
7685 }
7686
7687 /* No IP Source, don't arp */
7688 if (source_ip == NULL) {
7689 ifnet_lock_done(cur_ifp);
7690 continue;
7691 }
7692
7693 IFA_ADDREF(source_hw);
7694 ifnet_lock_done(cur_ifp);
7695
7696 /* Send the ARP */
7697 new_result = dlil_send_arp_internal(cur_ifp,
7698 arpop, (struct sockaddr_dl *)(void *)
7699 source_hw->ifa_addr,
7700 (struct sockaddr *)&source_ip_copy, NULL,
7701 target_proto);
7702
7703 IFA_REMREF(source_hw);
7704 if (result == ENOTSUP) {
7705 result = new_result;
7706 }
7707 }
7708 ifnet_list_free(ifp_list);
7709 }
7710 } else {
7711 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
7712 sender_proto, target_hw, target_proto);
7713 }
7714
7715 return result;
7716 }
7717
7718 /*
7719 * Caller must hold ifnet head lock.
7720 */
7721 static int
ifnet_lookup(struct ifnet * ifp)7722 ifnet_lookup(struct ifnet *ifp)
7723 {
7724 struct ifnet *_ifp;
7725
7726 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
7727 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
7728 if (_ifp == ifp) {
7729 break;
7730 }
7731 }
7732 return _ifp != NULL;
7733 }
7734
7735 /*
7736 * Caller has to pass a non-zero refio argument to get a
7737 * IO reference count. This will prevent ifnet_detach from
7738 * being called when there are outstanding io reference counts.
7739 */
7740 int
ifnet_is_attached(struct ifnet * ifp,int refio)7741 ifnet_is_attached(struct ifnet *ifp, int refio)
7742 {
7743 int ret;
7744
7745 lck_mtx_lock_spin(&ifp->if_ref_lock);
7746 if ((ret = IF_FULLY_ATTACHED(ifp))) {
7747 if (refio > 0) {
7748 ifp->if_refio++;
7749 }
7750 }
7751 lck_mtx_unlock(&ifp->if_ref_lock);
7752
7753 return ret;
7754 }
7755
7756 void
ifnet_incr_pending_thread_count(struct ifnet * ifp)7757 ifnet_incr_pending_thread_count(struct ifnet *ifp)
7758 {
7759 lck_mtx_lock_spin(&ifp->if_ref_lock);
7760 ifp->if_threads_pending++;
7761 lck_mtx_unlock(&ifp->if_ref_lock);
7762 }
7763
7764 void
ifnet_decr_pending_thread_count(struct ifnet * ifp)7765 ifnet_decr_pending_thread_count(struct ifnet *ifp)
7766 {
7767 lck_mtx_lock_spin(&ifp->if_ref_lock);
7768 VERIFY(ifp->if_threads_pending > 0);
7769 ifp->if_threads_pending--;
7770 if (ifp->if_threads_pending == 0) {
7771 wakeup(&ifp->if_threads_pending);
7772 }
7773 lck_mtx_unlock(&ifp->if_ref_lock);
7774 }
7775
7776 /*
7777 * Caller must ensure the interface is attached; the assumption is that
7778 * there is at least an outstanding IO reference count held already.
7779 * Most callers would call ifnet_is_{attached,data_ready}() instead.
7780 */
7781 void
ifnet_incr_iorefcnt(struct ifnet * ifp)7782 ifnet_incr_iorefcnt(struct ifnet *ifp)
7783 {
7784 lck_mtx_lock_spin(&ifp->if_ref_lock);
7785 VERIFY(IF_FULLY_ATTACHED(ifp));
7786 VERIFY(ifp->if_refio > 0);
7787 ifp->if_refio++;
7788 lck_mtx_unlock(&ifp->if_ref_lock);
7789 }
7790
7791 __attribute__((always_inline))
7792 static void
ifnet_decr_iorefcnt_locked(struct ifnet * ifp)7793 ifnet_decr_iorefcnt_locked(struct ifnet *ifp)
7794 {
7795 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
7796
7797 VERIFY(ifp->if_refio > 0);
7798 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
7799
7800 ifp->if_refio--;
7801 VERIFY(ifp->if_refio != 0 || ifp->if_datamov == 0);
7802
7803 /*
7804 * if there are no more outstanding io references, wakeup the
7805 * ifnet_detach thread if detaching flag is set.
7806 */
7807 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING)) {
7808 wakeup(&(ifp->if_refio));
7809 }
7810 }
7811
7812 void
ifnet_decr_iorefcnt(struct ifnet * ifp)7813 ifnet_decr_iorefcnt(struct ifnet *ifp)
7814 {
7815 lck_mtx_lock_spin(&ifp->if_ref_lock);
7816 ifnet_decr_iorefcnt_locked(ifp);
7817 lck_mtx_unlock(&ifp->if_ref_lock);
7818 }
7819
7820 boolean_t
ifnet_datamov_begin(struct ifnet * ifp)7821 ifnet_datamov_begin(struct ifnet *ifp)
7822 {
7823 boolean_t ret;
7824
7825 lck_mtx_lock_spin(&ifp->if_ref_lock);
7826 if ((ret = IF_FULLY_ATTACHED_AND_READY(ifp))) {
7827 ifp->if_refio++;
7828 ifp->if_datamov++;
7829 }
7830 lck_mtx_unlock(&ifp->if_ref_lock);
7831
7832 return ret;
7833 }
7834
7835 void
ifnet_datamov_end(struct ifnet * ifp)7836 ifnet_datamov_end(struct ifnet *ifp)
7837 {
7838 lck_mtx_lock_spin(&ifp->if_ref_lock);
7839 VERIFY(ifp->if_datamov > 0);
7840 /*
7841 * if there's no more thread moving data, wakeup any
7842 * drainers that's blocked waiting for this.
7843 */
7844 if (--ifp->if_datamov == 0 && ifp->if_drainers > 0) {
7845 DLIL_PRINTF("Waking up drainers on %s\n", if_name(ifp));
7846 DTRACE_IP1(datamov__drain__wake, struct ifnet *, ifp);
7847 wakeup(&(ifp->if_datamov));
7848 }
7849 ifnet_decr_iorefcnt_locked(ifp);
7850 lck_mtx_unlock(&ifp->if_ref_lock);
7851 }
7852
7853 static void
ifnet_datamov_suspend_locked(struct ifnet * ifp)7854 ifnet_datamov_suspend_locked(struct ifnet *ifp)
7855 {
7856 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_MTX_ASSERT_OWNED);
7857 ifp->if_refio++;
7858 if (ifp->if_suspend++ == 0) {
7859 VERIFY(ifp->if_refflags & IFRF_READY);
7860 ifp->if_refflags &= ~IFRF_READY;
7861 }
7862 }
7863
7864 void
ifnet_datamov_suspend(struct ifnet * ifp)7865 ifnet_datamov_suspend(struct ifnet *ifp)
7866 {
7867 lck_mtx_lock_spin(&ifp->if_ref_lock);
7868 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
7869 ifnet_datamov_suspend_locked(ifp);
7870 lck_mtx_unlock(&ifp->if_ref_lock);
7871 }
7872
7873 boolean_t
ifnet_datamov_suspend_if_needed(struct ifnet * ifp)7874 ifnet_datamov_suspend_if_needed(struct ifnet *ifp)
7875 {
7876 lck_mtx_lock_spin(&ifp->if_ref_lock);
7877 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
7878 if (ifp->if_suspend > 0) {
7879 lck_mtx_unlock(&ifp->if_ref_lock);
7880 return FALSE;
7881 }
7882 ifnet_datamov_suspend_locked(ifp);
7883 lck_mtx_unlock(&ifp->if_ref_lock);
7884 return TRUE;
7885 }
7886
7887 void
ifnet_datamov_drain(struct ifnet * ifp)7888 ifnet_datamov_drain(struct ifnet *ifp)
7889 {
7890 lck_mtx_lock(&ifp->if_ref_lock);
7891 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
7892 /* data movement must already be suspended */
7893 VERIFY(ifp->if_suspend > 0);
7894 VERIFY(!(ifp->if_refflags & IFRF_READY));
7895 ifp->if_drainers++;
7896 while (ifp->if_datamov != 0) {
7897 DLIL_PRINTF("Waiting for data path(s) to quiesce on %s\n",
7898 if_name(ifp));
7899 DTRACE_IP1(datamov__wait, struct ifnet *, ifp);
7900 (void) msleep(&(ifp->if_datamov), &ifp->if_ref_lock,
7901 (PZERO - 1), __func__, NULL);
7902 DTRACE_IP1(datamov__wake, struct ifnet *, ifp);
7903 }
7904 VERIFY(!(ifp->if_refflags & IFRF_READY));
7905 VERIFY(ifp->if_drainers > 0);
7906 ifp->if_drainers--;
7907 lck_mtx_unlock(&ifp->if_ref_lock);
7908
7909 /* purge the interface queues */
7910 if ((ifp->if_eflags & IFEF_TXSTART) != 0) {
7911 if_qflush_snd(ifp, false);
7912 }
7913 }
7914
7915 void
ifnet_datamov_suspend_and_drain(struct ifnet * ifp)7916 ifnet_datamov_suspend_and_drain(struct ifnet *ifp)
7917 {
7918 ifnet_datamov_suspend(ifp);
7919 ifnet_datamov_drain(ifp);
7920 }
7921
7922 void
ifnet_datamov_resume(struct ifnet * ifp)7923 ifnet_datamov_resume(struct ifnet *ifp)
7924 {
7925 lck_mtx_lock(&ifp->if_ref_lock);
7926 /* data movement must already be suspended */
7927 VERIFY(ifp->if_suspend > 0);
7928 if (--ifp->if_suspend == 0) {
7929 VERIFY(!(ifp->if_refflags & IFRF_READY));
7930 ifp->if_refflags |= IFRF_READY;
7931 }
7932 ifnet_decr_iorefcnt_locked(ifp);
7933 lck_mtx_unlock(&ifp->if_ref_lock);
7934 }
7935
7936 static void
dlil_if_trace(struct dlil_ifnet * dl_if,int refhold)7937 dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
7938 {
7939 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
7940 ctrace_t *tr;
7941 u_int32_t idx;
7942 u_int16_t *cnt;
7943
7944 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
7945 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
7946 /* NOTREACHED */
7947 }
7948
7949 if (refhold) {
7950 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
7951 tr = dl_if_dbg->dldbg_if_refhold;
7952 } else {
7953 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
7954 tr = dl_if_dbg->dldbg_if_refrele;
7955 }
7956
7957 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
7958 ctrace_record(&tr[idx]);
7959 }
7960
7961 errno_t
dlil_if_ref(struct ifnet * ifp)7962 dlil_if_ref(struct ifnet *ifp)
7963 {
7964 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7965
7966 if (dl_if == NULL) {
7967 return EINVAL;
7968 }
7969
7970 lck_mtx_lock_spin(&dl_if->dl_if_lock);
7971 ++dl_if->dl_if_refcnt;
7972 if (dl_if->dl_if_refcnt == 0) {
7973 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
7974 /* NOTREACHED */
7975 }
7976 if (dl_if->dl_if_trace != NULL) {
7977 (*dl_if->dl_if_trace)(dl_if, TRUE);
7978 }
7979 lck_mtx_unlock(&dl_if->dl_if_lock);
7980
7981 return 0;
7982 }
7983
7984 errno_t
dlil_if_free(struct ifnet * ifp)7985 dlil_if_free(struct ifnet *ifp)
7986 {
7987 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
7988 bool need_release = FALSE;
7989
7990 if (dl_if == NULL) {
7991 return EINVAL;
7992 }
7993
7994 lck_mtx_lock_spin(&dl_if->dl_if_lock);
7995 switch (dl_if->dl_if_refcnt) {
7996 case 0:
7997 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
7998 /* NOTREACHED */
7999 break;
8000 case 1:
8001 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
8002 need_release = TRUE;
8003 }
8004 break;
8005 default:
8006 break;
8007 }
8008 --dl_if->dl_if_refcnt;
8009 if (dl_if->dl_if_trace != NULL) {
8010 (*dl_if->dl_if_trace)(dl_if, FALSE);
8011 }
8012 lck_mtx_unlock(&dl_if->dl_if_lock);
8013 if (need_release) {
8014 _dlil_if_release(ifp, true);
8015 }
8016 return 0;
8017 }
8018
8019 static errno_t
dlil_attach_protocol(struct if_proto * proto,const struct ifnet_demux_desc * demux_list,u_int32_t demux_count,uint32_t * proto_count)8020 dlil_attach_protocol(struct if_proto *proto,
8021 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
8022 uint32_t * proto_count)
8023 {
8024 struct kev_dl_proto_data ev_pr_data;
8025 struct ifnet *ifp = proto->ifp;
8026 errno_t retval = 0;
8027 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
8028 struct if_proto *prev_proto;
8029 struct if_proto *_proto;
8030
8031 /* don't allow attaching anything but PF_BRIDGE to vmnet interfaces */
8032 if (IFNET_IS_VMNET(ifp) && proto->protocol_family != PF_BRIDGE) {
8033 return EINVAL;
8034 }
8035
8036 if (!ifnet_is_attached(ifp, 1)) {
8037 os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached",
8038 __func__, if_name(ifp));
8039 return ENXIO;
8040 }
8041 /* callee holds a proto refcnt upon success */
8042 ifnet_lock_exclusive(ifp);
8043 _proto = find_attached_proto(ifp, proto->protocol_family);
8044 if (_proto != NULL) {
8045 ifnet_lock_done(ifp);
8046 if_proto_free(_proto);
8047 retval = EEXIST;
8048 goto ioref_done;
8049 }
8050
8051 /*
8052 * Call family module add_proto routine so it can refine the
8053 * demux descriptors as it wishes.
8054 */
8055 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
8056 demux_count);
8057 if (retval) {
8058 ifnet_lock_done(ifp);
8059 goto ioref_done;
8060 }
8061
8062 /*
8063 * Insert the protocol in the hash
8064 */
8065 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
8066 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) {
8067 prev_proto = SLIST_NEXT(prev_proto, next_hash);
8068 }
8069 if (prev_proto) {
8070 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
8071 } else {
8072 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
8073 proto, next_hash);
8074 }
8075
8076 /* hold a proto refcnt for attach */
8077 if_proto_ref(proto);
8078
8079 /*
8080 * The reserved field carries the number of protocol still attached
8081 * (subject to change)
8082 */
8083 ev_pr_data.proto_family = proto->protocol_family;
8084 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
8085
8086 ifnet_lock_done(ifp);
8087
8088 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
8089 (struct net_event_data *)&ev_pr_data,
8090 sizeof(struct kev_dl_proto_data), FALSE);
8091 if (proto_count != NULL) {
8092 *proto_count = ev_pr_data.proto_remaining_count;
8093 }
8094 ioref_done:
8095 ifnet_decr_iorefcnt(ifp);
8096 return retval;
8097 }
8098
8099 static void
dlil_handle_proto_attach(ifnet_t ifp,protocol_family_t protocol)8100 dlil_handle_proto_attach(ifnet_t ifp, protocol_family_t protocol)
8101 {
8102 /*
8103 * A protocol has been attached, mark the interface up.
8104 * This used to be done by configd.KernelEventMonitor, but that
8105 * is inherently prone to races (rdar://problem/30810208).
8106 */
8107 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
8108 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
8109 dlil_post_sifflags_msg(ifp);
8110 #if SKYWALK
8111 switch (protocol) {
8112 case AF_INET:
8113 case AF_INET6:
8114 /* don't attach the flowswitch unless attaching IP */
8115 dlil_attach_flowswitch_nexus(ifp);
8116 break;
8117 default:
8118 break;
8119 }
8120 #endif /* SKYWALK */
8121 }
8122
8123 errno_t
ifnet_attach_protocol(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param * proto_details)8124 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
8125 const struct ifnet_attach_proto_param *proto_details)
8126 {
8127 int retval = 0;
8128 struct if_proto *ifproto = NULL;
8129 uint32_t proto_count = 0;
8130
8131 ifnet_head_lock_shared();
8132 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
8133 retval = EINVAL;
8134 goto end;
8135 }
8136 /* Check that the interface is in the global list */
8137 if (!ifnet_lookup(ifp)) {
8138 retval = ENXIO;
8139 goto end;
8140 }
8141
8142 ifproto = zalloc_flags(dlif_proto_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
8143
8144 /* refcnt held above during lookup */
8145 ifproto->ifp = ifp;
8146 ifproto->protocol_family = protocol;
8147 ifproto->proto_kpi = kProtoKPI_v1;
8148 ifproto->kpi.v1.input = proto_details->input;
8149 ifproto->kpi.v1.pre_output = proto_details->pre_output;
8150 ifproto->kpi.v1.event = proto_details->event;
8151 ifproto->kpi.v1.ioctl = proto_details->ioctl;
8152 ifproto->kpi.v1.detached = proto_details->detached;
8153 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
8154 ifproto->kpi.v1.send_arp = proto_details->send_arp;
8155
8156 retval = dlil_attach_protocol(ifproto,
8157 proto_details->demux_list, proto_details->demux_count,
8158 &proto_count);
8159
8160 end:
8161 if (retval == EEXIST) {
8162 /* already attached */
8163 if (dlil_verbose) {
8164 DLIL_PRINTF("%s: protocol %d already attached\n",
8165 ifp != NULL ? if_name(ifp) : "N/A",
8166 protocol);
8167 }
8168 } else if (retval != 0) {
8169 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
8170 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
8171 } else if (dlil_verbose) {
8172 DLIL_PRINTF("%s: attached v1 protocol %d (count = %d)\n",
8173 ifp != NULL ? if_name(ifp) : "N/A",
8174 protocol, proto_count);
8175 }
8176 ifnet_head_done();
8177 if (retval == 0) {
8178 dlil_handle_proto_attach(ifp, protocol);
8179 } else if (ifproto != NULL) {
8180 zfree(dlif_proto_zone, ifproto);
8181 }
8182 return retval;
8183 }
8184
8185 errno_t
ifnet_attach_protocol_v2(ifnet_t ifp,protocol_family_t protocol,const struct ifnet_attach_proto_param_v2 * proto_details)8186 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
8187 const struct ifnet_attach_proto_param_v2 *proto_details)
8188 {
8189 int retval = 0;
8190 struct if_proto *ifproto = NULL;
8191 uint32_t proto_count = 0;
8192
8193 ifnet_head_lock_shared();
8194 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
8195 retval = EINVAL;
8196 goto end;
8197 }
8198 /* Check that the interface is in the global list */
8199 if (!ifnet_lookup(ifp)) {
8200 retval = ENXIO;
8201 goto end;
8202 }
8203
8204 ifproto = zalloc_flags(dlif_proto_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
8205
8206 /* refcnt held above during lookup */
8207 ifproto->ifp = ifp;
8208 ifproto->protocol_family = protocol;
8209 ifproto->proto_kpi = kProtoKPI_v2;
8210 ifproto->kpi.v2.input = proto_details->input;
8211 ifproto->kpi.v2.pre_output = proto_details->pre_output;
8212 ifproto->kpi.v2.event = proto_details->event;
8213 ifproto->kpi.v2.ioctl = proto_details->ioctl;
8214 ifproto->kpi.v2.detached = proto_details->detached;
8215 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
8216 ifproto->kpi.v2.send_arp = proto_details->send_arp;
8217
8218 retval = dlil_attach_protocol(ifproto,
8219 proto_details->demux_list, proto_details->demux_count,
8220 &proto_count);
8221
8222 end:
8223 if (retval == EEXIST) {
8224 /* already attached */
8225 if (dlil_verbose) {
8226 DLIL_PRINTF("%s: protocol %d already attached\n",
8227 ifp != NULL ? if_name(ifp) : "N/A",
8228 protocol);
8229 }
8230 } else if (retval != 0) {
8231 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
8232 ifp != NULL ? if_name(ifp) : "N/A", protocol, retval);
8233 } else if (dlil_verbose) {
8234 DLIL_PRINTF("%s: attached v2 protocol %d (count = %d)\n",
8235 ifp != NULL ? if_name(ifp) : "N/A",
8236 protocol, proto_count);
8237 }
8238 ifnet_head_done();
8239 if (retval == 0) {
8240 dlil_handle_proto_attach(ifp, protocol);
8241 } else if (ifproto != NULL) {
8242 zfree(dlif_proto_zone, ifproto);
8243 }
8244 return retval;
8245 }
8246
8247 errno_t
ifnet_detach_protocol(ifnet_t ifp,protocol_family_t proto_family)8248 ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
8249 {
8250 struct if_proto *proto = NULL;
8251 int retval = 0;
8252
8253 if (ifp == NULL || proto_family == 0) {
8254 retval = EINVAL;
8255 goto end;
8256 }
8257
8258 ifnet_lock_exclusive(ifp);
8259 /* callee holds a proto refcnt upon success */
8260 proto = find_attached_proto(ifp, proto_family);
8261 if (proto == NULL) {
8262 retval = ENXIO;
8263 ifnet_lock_done(ifp);
8264 goto end;
8265 }
8266
8267 /* call family module del_proto */
8268 if (ifp->if_del_proto) {
8269 ifp->if_del_proto(ifp, proto->protocol_family);
8270 }
8271
8272 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
8273 proto, if_proto, next_hash);
8274
8275 if (proto->proto_kpi == kProtoKPI_v1) {
8276 proto->kpi.v1.input = ifproto_media_input_v1;
8277 proto->kpi.v1.pre_output = ifproto_media_preout;
8278 proto->kpi.v1.event = ifproto_media_event;
8279 proto->kpi.v1.ioctl = ifproto_media_ioctl;
8280 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
8281 proto->kpi.v1.send_arp = ifproto_media_send_arp;
8282 } else {
8283 proto->kpi.v2.input = ifproto_media_input_v2;
8284 proto->kpi.v2.pre_output = ifproto_media_preout;
8285 proto->kpi.v2.event = ifproto_media_event;
8286 proto->kpi.v2.ioctl = ifproto_media_ioctl;
8287 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
8288 proto->kpi.v2.send_arp = ifproto_media_send_arp;
8289 }
8290 proto->detached = 1;
8291 ifnet_lock_done(ifp);
8292
8293 if (dlil_verbose) {
8294 DLIL_PRINTF("%s: detached %s protocol %d\n", if_name(ifp),
8295 (proto->proto_kpi == kProtoKPI_v1) ?
8296 "v1" : "v2", proto_family);
8297 }
8298
8299 /* release proto refcnt held during protocol attach */
8300 if_proto_free(proto);
8301
8302 /*
8303 * Release proto refcnt held during lookup; the rest of
8304 * protocol detach steps will happen when the last proto
8305 * reference is released.
8306 */
8307 if_proto_free(proto);
8308
8309 end:
8310 return retval;
8311 }
8312
8313
8314 static errno_t
ifproto_media_input_v1(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet,char * header)8315 ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
8316 struct mbuf *packet, char *header)
8317 {
8318 #pragma unused(ifp, protocol, packet, header)
8319 return ENXIO;
8320 }
8321
8322 static errno_t
ifproto_media_input_v2(struct ifnet * ifp,protocol_family_t protocol,struct mbuf * packet)8323 ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
8324 struct mbuf *packet)
8325 {
8326 #pragma unused(ifp, protocol, packet)
8327 return ENXIO;
8328 }
8329
8330 static errno_t
ifproto_media_preout(struct ifnet * ifp,protocol_family_t protocol,mbuf_t * packet,const struct sockaddr * dest,void * route,char * frame_type,char * link_layer_dest)8331 ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
8332 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
8333 char *link_layer_dest)
8334 {
8335 #pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
8336 return ENXIO;
8337 }
8338
8339 static void
ifproto_media_event(struct ifnet * ifp,protocol_family_t protocol,const struct kev_msg * event)8340 ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
8341 const struct kev_msg *event)
8342 {
8343 #pragma unused(ifp, protocol, event)
8344 }
8345
8346 static errno_t
ifproto_media_ioctl(struct ifnet * ifp,protocol_family_t protocol,unsigned long command,void * argument)8347 ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
8348 unsigned long command, void *argument)
8349 {
8350 #pragma unused(ifp, protocol, command, argument)
8351 return ENXIO;
8352 }
8353
8354 static errno_t
ifproto_media_resolve_multi(ifnet_t ifp,const struct sockaddr * proto_addr,struct sockaddr_dl * out_ll,size_t ll_len)8355 ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
8356 struct sockaddr_dl *out_ll, size_t ll_len)
8357 {
8358 #pragma unused(ifp, proto_addr, out_ll, ll_len)
8359 return ENXIO;
8360 }
8361
8362 static errno_t
ifproto_media_send_arp(struct ifnet * ifp,u_short arpop,const struct sockaddr_dl * sender_hw,const struct sockaddr * sender_proto,const struct sockaddr_dl * target_hw,const struct sockaddr * target_proto)8363 ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
8364 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
8365 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
8366 {
8367 #pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
8368 return ENXIO;
8369 }
8370
8371 extern int if_next_index(void);
8372 extern int tcp_ecn_outbound;
8373
8374 void
dlil_ifclassq_setup(struct ifnet * ifp,struct ifclassq * ifcq)8375 dlil_ifclassq_setup(struct ifnet *ifp, struct ifclassq *ifcq)
8376 {
8377 uint32_t sflags = 0;
8378 int err;
8379
8380 if (if_flowadv) {
8381 sflags |= PKTSCHEDF_QALG_FLOWCTL;
8382 }
8383
8384 if (if_delaybased_queue) {
8385 sflags |= PKTSCHEDF_QALG_DELAYBASED;
8386 }
8387
8388 if (ifp->if_output_sched_model ==
8389 IFNET_SCHED_MODEL_DRIVER_MANAGED) {
8390 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
8391 }
8392 /* Inherit drop limit from the default queue */
8393 if (ifp->if_snd != ifcq) {
8394 IFCQ_PKT_DROP_LIMIT(ifcq) = IFCQ_PKT_DROP_LIMIT(ifp->if_snd);
8395 }
8396 /* Initialize transmit queue(s) */
8397 err = ifclassq_setup(ifcq, ifp, sflags);
8398 if (err != 0) {
8399 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
8400 "err=%d", __func__, ifp, err);
8401 /* NOTREACHED */
8402 }
8403 }
8404
8405 errno_t
ifnet_attach(ifnet_t ifp,const struct sockaddr_dl * ll_addr)8406 ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
8407 {
8408 #if SKYWALK
8409 boolean_t netif_compat;
8410 if_nexus_netif nexus_netif;
8411 #endif /* SKYWALK */
8412 struct ifnet *tmp_if;
8413 struct ifaddr *ifa;
8414 struct if_data_internal if_data_saved;
8415 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
8416 struct dlil_threading_info *dl_inp;
8417 thread_continue_t thfunc = NULL;
8418 int err;
8419
8420 if (ifp == NULL) {
8421 return EINVAL;
8422 }
8423
8424 /*
8425 * Serialize ifnet attach using dlil_ifnet_lock, in order to
8426 * prevent the interface from being configured while it is
8427 * embryonic, as ifnet_head_lock is dropped and reacquired
8428 * below prior to marking the ifnet with IFRF_ATTACHED.
8429 */
8430 dlil_if_lock();
8431 ifnet_head_lock_exclusive();
8432 /* Verify we aren't already on the list */
8433 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
8434 if (tmp_if == ifp) {
8435 ifnet_head_done();
8436 dlil_if_unlock();
8437 return EEXIST;
8438 }
8439 }
8440
8441 lck_mtx_lock_spin(&ifp->if_ref_lock);
8442 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
8443 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
8444 __func__, ifp);
8445 /* NOTREACHED */
8446 }
8447 lck_mtx_unlock(&ifp->if_ref_lock);
8448
8449 ifnet_lock_exclusive(ifp);
8450
8451 /* Sanity check */
8452 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
8453 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
8454 VERIFY(ifp->if_threads_pending == 0);
8455
8456 if (ll_addr != NULL) {
8457 if (ifp->if_addrlen == 0) {
8458 ifp->if_addrlen = ll_addr->sdl_alen;
8459 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
8460 ifnet_lock_done(ifp);
8461 ifnet_head_done();
8462 dlil_if_unlock();
8463 return EINVAL;
8464 }
8465 }
8466
8467 /*
8468 * Allow interfaces without protocol families to attach
8469 * only if they have the necessary fields filled out.
8470 */
8471 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
8472 DLIL_PRINTF("%s: Attempt to attach interface without "
8473 "family module - %d\n", __func__, ifp->if_family);
8474 ifnet_lock_done(ifp);
8475 ifnet_head_done();
8476 dlil_if_unlock();
8477 return ENODEV;
8478 }
8479
8480 /* Allocate protocol hash table */
8481 VERIFY(ifp->if_proto_hash == NULL);
8482 ifp->if_proto_hash = zalloc_flags(dlif_phash_zone,
8483 Z_WAITOK | Z_ZERO | Z_NOFAIL);
8484
8485 lck_mtx_lock_spin(&ifp->if_flt_lock);
8486 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
8487 TAILQ_INIT(&ifp->if_flt_head);
8488 VERIFY(ifp->if_flt_busy == 0);
8489 VERIFY(ifp->if_flt_waiters == 0);
8490 VERIFY(ifp->if_flt_non_os_count == 0);
8491 VERIFY(ifp->if_flt_no_tso_count == 0);
8492 lck_mtx_unlock(&ifp->if_flt_lock);
8493
8494 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
8495 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
8496 LIST_INIT(&ifp->if_multiaddrs);
8497 }
8498
8499 VERIFY(ifp->if_allhostsinm == NULL);
8500 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
8501 TAILQ_INIT(&ifp->if_addrhead);
8502
8503 if (ifp->if_index == 0) {
8504 int idx = if_next_index();
8505
8506 if (idx == -1) {
8507 ifp->if_index = 0;
8508 ifnet_lock_done(ifp);
8509 ifnet_head_done();
8510 dlil_if_unlock();
8511 return ENOBUFS;
8512 }
8513 ifp->if_index = (uint16_t)idx;
8514
8515 /* the lladdr passed at attach time is the permanent address */
8516 if (ll_addr != NULL && ifp->if_type == IFT_ETHER &&
8517 ll_addr->sdl_alen == ETHER_ADDR_LEN) {
8518 bcopy(CONST_LLADDR(ll_addr),
8519 dl_if->dl_if_permanent_ether,
8520 ETHER_ADDR_LEN);
8521 dl_if->dl_if_permanent_ether_is_set = 1;
8522 }
8523 }
8524 /* There should not be anything occupying this slot */
8525 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
8526
8527 /* allocate (if needed) and initialize a link address */
8528 ifa = dlil_alloc_lladdr(ifp, ll_addr);
8529 if (ifa == NULL) {
8530 ifnet_lock_done(ifp);
8531 ifnet_head_done();
8532 dlil_if_unlock();
8533 return ENOBUFS;
8534 }
8535
8536 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
8537 ifnet_addrs[ifp->if_index - 1] = ifa;
8538
8539 /* make this address the first on the list */
8540 IFA_LOCK(ifa);
8541 /* hold a reference for ifnet_addrs[] */
8542 IFA_ADDREF_LOCKED(ifa);
8543 /* if_attach_link_ifa() holds a reference for ifa_link */
8544 if_attach_link_ifa(ifp, ifa);
8545 IFA_UNLOCK(ifa);
8546
8547 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
8548 ifindex2ifnet[ifp->if_index] = ifp;
8549
8550 /* Hold a reference to the underlying dlil_ifnet */
8551 ifnet_reference(ifp);
8552
8553 /* Clear stats (save and restore other fields that we care) */
8554 if_data_saved = ifp->if_data;
8555 bzero(&ifp->if_data, sizeof(ifp->if_data));
8556 ifp->if_data.ifi_type = if_data_saved.ifi_type;
8557 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
8558 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
8559 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
8560 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
8561 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
8562 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
8563 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
8564 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
8565 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
8566 ifnet_touch_lastchange(ifp);
8567
8568 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
8569 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
8570 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
8571
8572 dlil_ifclassq_setup(ifp, ifp->if_snd);
8573
8574 /* Sanity checks on the input thread storage */
8575 dl_inp = &dl_if->dl_if_inpstorage;
8576 bzero(&dl_inp->dlth_stats, sizeof(dl_inp->dlth_stats));
8577 VERIFY(dl_inp->dlth_flags == 0);
8578 VERIFY(dl_inp->dlth_wtot == 0);
8579 VERIFY(dl_inp->dlth_ifp == NULL);
8580 VERIFY(qhead(&dl_inp->dlth_pkts) == NULL && qempty(&dl_inp->dlth_pkts));
8581 VERIFY(qlimit(&dl_inp->dlth_pkts) == 0);
8582 VERIFY(!dl_inp->dlth_affinity);
8583 VERIFY(ifp->if_inp == NULL);
8584 VERIFY(dl_inp->dlth_thread == THREAD_NULL);
8585 VERIFY(dl_inp->dlth_strategy == NULL);
8586 VERIFY(dl_inp->dlth_driver_thread == THREAD_NULL);
8587 VERIFY(dl_inp->dlth_poller_thread == THREAD_NULL);
8588 VERIFY(dl_inp->dlth_affinity_tag == 0);
8589
8590 #if IFNET_INPUT_SANITY_CHK
8591 VERIFY(dl_inp->dlth_pkts_cnt == 0);
8592 #endif /* IFNET_INPUT_SANITY_CHK */
8593
8594 VERIFY(ifp->if_poll_thread == THREAD_NULL);
8595 dlil_reset_rxpoll_params(ifp);
8596 /*
8597 * A specific DLIL input thread is created per non-loopback interface.
8598 */
8599 if (ifp->if_family != IFNET_FAMILY_LOOPBACK) {
8600 ifp->if_inp = dl_inp;
8601 ifnet_incr_pending_thread_count(ifp);
8602 err = dlil_create_input_thread(ifp, ifp->if_inp, &thfunc);
8603 if (err == ENODEV) {
8604 VERIFY(thfunc == NULL);
8605 ifnet_decr_pending_thread_count(ifp);
8606 } else if (err != 0) {
8607 panic_plain("%s: ifp=%p couldn't get an input thread; "
8608 "err=%d", __func__, ifp, err);
8609 /* NOTREACHED */
8610 }
8611 }
8612 /*
8613 * If the driver supports the new transmit model, calculate flow hash
8614 * and create a workloop starter thread to invoke the if_start callback
8615 * where the packets may be dequeued and transmitted.
8616 */
8617 if (ifp->if_eflags & IFEF_TXSTART) {
8618 thread_precedence_policy_data_t info;
8619 __unused kern_return_t kret;
8620
8621 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
8622 VERIFY(ifp->if_flowhash != 0);
8623 VERIFY(ifp->if_start_thread == THREAD_NULL);
8624
8625 ifnet_set_start_cycle(ifp, NULL);
8626 ifp->if_start_active = 0;
8627 ifp->if_start_req = 0;
8628 ifp->if_start_flags = 0;
8629 VERIFY(ifp->if_start != NULL);
8630 ifnet_incr_pending_thread_count(ifp);
8631 if ((err = kernel_thread_start(ifnet_start_thread_func,
8632 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
8633 panic_plain("%s: "
8634 "ifp=%p couldn't get a start thread; "
8635 "err=%d", __func__, ifp, err);
8636 /* NOTREACHED */
8637 }
8638 bzero(&info, sizeof(info));
8639 info.importance = 1;
8640 kret = thread_policy_set(ifp->if_start_thread,
8641 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
8642 THREAD_PRECEDENCE_POLICY_COUNT);
8643 ASSERT(kret == KERN_SUCCESS);
8644 } else {
8645 ifp->if_flowhash = 0;
8646 }
8647
8648 /* Reset polling parameters */
8649 ifnet_set_poll_cycle(ifp, NULL);
8650 ifp->if_poll_update = 0;
8651 ifp->if_poll_flags = 0;
8652 ifp->if_poll_req = 0;
8653 VERIFY(ifp->if_poll_thread == THREAD_NULL);
8654
8655 /*
8656 * If the driver supports the new receive model, create a poller
8657 * thread to invoke if_input_poll callback where the packets may
8658 * be dequeued from the driver and processed for reception.
8659 * if the interface is netif compat then the poller thread is
8660 * managed by netif.
8661 */
8662 if (thfunc == dlil_rxpoll_input_thread_func) {
8663 thread_precedence_policy_data_t info;
8664 __unused kern_return_t kret;
8665 #if SKYWALK
8666 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
8667 #endif /* SKYWALK */
8668 VERIFY(ifp->if_input_poll != NULL);
8669 VERIFY(ifp->if_input_ctl != NULL);
8670 ifnet_incr_pending_thread_count(ifp);
8671 if ((err = kernel_thread_start(ifnet_poll_thread_func, ifp,
8672 &ifp->if_poll_thread)) != KERN_SUCCESS) {
8673 panic_plain("%s: ifp=%p couldn't get a poll thread; "
8674 "err=%d", __func__, ifp, err);
8675 /* NOTREACHED */
8676 }
8677 bzero(&info, sizeof(info));
8678 info.importance = 1;
8679 kret = thread_policy_set(ifp->if_poll_thread,
8680 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
8681 THREAD_PRECEDENCE_POLICY_COUNT);
8682 ASSERT(kret == KERN_SUCCESS);
8683 }
8684
8685 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
8686 VERIFY(ifp->if_desc.ifd_len == 0);
8687 VERIFY(ifp->if_desc.ifd_desc != NULL);
8688
8689 /* Record attach PC stacktrace */
8690 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
8691
8692 ifp->if_updatemcasts = 0;
8693 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
8694 struct ifmultiaddr *ifma;
8695 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
8696 IFMA_LOCK(ifma);
8697 if (ifma->ifma_addr->sa_family == AF_LINK ||
8698 ifma->ifma_addr->sa_family == AF_UNSPEC) {
8699 ifp->if_updatemcasts++;
8700 }
8701 IFMA_UNLOCK(ifma);
8702 }
8703
8704 DLIL_PRINTF("%s: attached with %d suspended link-layer multicast "
8705 "membership(s)\n", if_name(ifp),
8706 ifp->if_updatemcasts);
8707 }
8708
8709 /* Clear logging parameters */
8710 bzero(&ifp->if_log, sizeof(ifp->if_log));
8711
8712 /* Clear foreground/realtime activity timestamps */
8713 ifp->if_fg_sendts = 0;
8714 ifp->if_rt_sendts = 0;
8715
8716 /* Clear throughput estimates and radio type */
8717 ifp->if_estimated_up_bucket = 0;
8718 ifp->if_estimated_down_bucket = 0;
8719 ifp->if_radio_type = 0;
8720 ifp->if_radio_channel = 0;
8721
8722 VERIFY(ifp->if_delegated.ifp == NULL);
8723 VERIFY(ifp->if_delegated.type == 0);
8724 VERIFY(ifp->if_delegated.family == 0);
8725 VERIFY(ifp->if_delegated.subfamily == 0);
8726 VERIFY(ifp->if_delegated.expensive == 0);
8727 VERIFY(ifp->if_delegated.constrained == 0);
8728
8729 VERIFY(ifp->if_agentids == NULL);
8730 VERIFY(ifp->if_agentcount == 0);
8731
8732 /* Reset interface state */
8733 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
8734 ifp->if_interface_state.valid_bitmask |=
8735 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
8736 ifp->if_interface_state.interface_availability =
8737 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
8738
8739 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
8740 if (ifp == lo_ifp) {
8741 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
8742 ifp->if_interface_state.valid_bitmask |=
8743 IF_INTERFACE_STATE_LQM_STATE_VALID;
8744 } else {
8745 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
8746 }
8747
8748 /*
8749 * Enable ECN capability on this interface depending on the
8750 * value of ECN global setting
8751 */
8752 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
8753 if_set_eflags(ifp, IFEF_ECN_ENABLE);
8754 if_clear_eflags(ifp, IFEF_ECN_DISABLE);
8755 }
8756
8757 /*
8758 * Built-in Cyclops always on policy for WiFi infra
8759 */
8760 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
8761 errno_t error;
8762
8763 error = if_set_qosmarking_mode(ifp,
8764 IFRTYPE_QOSMARKING_FASTLANE);
8765 if (error != 0) {
8766 DLIL_PRINTF("%s if_set_qosmarking_mode(%s) error %d\n",
8767 __func__, ifp->if_xname, error);
8768 } else {
8769 if_set_eflags(ifp, IFEF_QOSMARKING_ENABLED);
8770 #if (DEVELOPMENT || DEBUG)
8771 DLIL_PRINTF("%s fastlane enabled on %s\n",
8772 __func__, ifp->if_xname);
8773 #endif /* (DEVELOPMENT || DEBUG) */
8774 }
8775 }
8776
8777 ifnet_lock_done(ifp);
8778 ifnet_head_done();
8779
8780 #if SKYWALK
8781 netif_compat = dlil_attach_netif_compat_nexus(ifp, &nexus_netif);
8782 #endif /* SKYWALK */
8783
8784 lck_mtx_lock(&ifp->if_cached_route_lock);
8785 /* Enable forwarding cached route */
8786 ifp->if_fwd_cacheok = 1;
8787 /* Clean up any existing cached routes */
8788 ROUTE_RELEASE(&ifp->if_fwd_route);
8789 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
8790 ROUTE_RELEASE(&ifp->if_src_route);
8791 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
8792 ROUTE_RELEASE(&ifp->if_src_route6);
8793 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
8794 lck_mtx_unlock(&ifp->if_cached_route_lock);
8795
8796 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
8797
8798 /*
8799 * Allocate and attach IGMPv3/MLDv2 interface specific variables
8800 * and trees; do this before the ifnet is marked as attached.
8801 * The ifnet keeps the reference to the info structures even after
8802 * the ifnet is detached, since the network-layer records still
8803 * refer to the info structures even after that. This also
8804 * makes it possible for them to still function after the ifnet
8805 * is recycled or reattached.
8806 */
8807 #if INET
8808 if (IGMP_IFINFO(ifp) == NULL) {
8809 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, Z_WAITOK);
8810 VERIFY(IGMP_IFINFO(ifp) != NULL);
8811 } else {
8812 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
8813 igmp_domifreattach(IGMP_IFINFO(ifp));
8814 }
8815 #endif /* INET */
8816 if (MLD_IFINFO(ifp) == NULL) {
8817 MLD_IFINFO(ifp) = mld_domifattach(ifp, Z_WAITOK);
8818 VERIFY(MLD_IFINFO(ifp) != NULL);
8819 } else {
8820 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
8821 mld_domifreattach(MLD_IFINFO(ifp));
8822 }
8823
8824 VERIFY(ifp->if_data_threshold == 0);
8825 VERIFY(ifp->if_dt_tcall != NULL);
8826
8827 /*
8828 * Wait for the created kernel threads for I/O to get
8829 * scheduled and run at least once before we proceed
8830 * to mark interface as attached.
8831 */
8832 lck_mtx_lock(&ifp->if_ref_lock);
8833 while (ifp->if_threads_pending != 0) {
8834 DLIL_PRINTF("%s: Waiting for all kernel threads created for "
8835 "interface %s to get scheduled at least once.\n",
8836 __func__, ifp->if_xname);
8837 (void) msleep(&ifp->if_threads_pending, &ifp->if_ref_lock, (PZERO - 1),
8838 __func__, NULL);
8839 LCK_MTX_ASSERT(&ifp->if_ref_lock, LCK_ASSERT_OWNED);
8840 }
8841 lck_mtx_unlock(&ifp->if_ref_lock);
8842 DLIL_PRINTF("%s: All kernel threads created for interface %s have been scheduled "
8843 "at least once. Proceeding.\n", __func__, ifp->if_xname);
8844
8845 /* Final mark this ifnet as attached. */
8846 ifnet_lock_exclusive(ifp);
8847 lck_mtx_lock_spin(&ifp->if_ref_lock);
8848 ifp->if_refflags = (IFRF_ATTACHED | IFRF_READY); /* clears embryonic */
8849 lck_mtx_unlock(&ifp->if_ref_lock);
8850 if (net_rtref) {
8851 /* boot-args override; enable idle notification */
8852 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
8853 IFRF_IDLE_NOTIFY);
8854 } else {
8855 /* apply previous request(s) to set the idle flags, if any */
8856 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
8857 ifp->if_idle_new_flags_mask);
8858 }
8859 #if SKYWALK
8860 /* the interface is fully attached; let the nexus adapter know */
8861 if (netif_compat || dlil_is_native_netif_nexus(ifp)) {
8862 if (netif_compat) {
8863 if (sk_netif_compat_txmodel ==
8864 NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI) {
8865 ifnet_enqueue_multi_setup(ifp,
8866 sk_tx_delay_qlen, sk_tx_delay_timeout);
8867 }
8868 ifp->if_nx_netif = nexus_netif;
8869 }
8870 ifp->if_na_ops->ni_finalize(ifp->if_na, ifp);
8871 }
8872 #endif /* SKYWALK */
8873 ifnet_lock_done(ifp);
8874 dlil_if_unlock();
8875
8876 #if PF
8877 /*
8878 * Attach packet filter to this interface, if enabled.
8879 */
8880 pf_ifnet_hook(ifp, 1);
8881 #endif /* PF */
8882
8883 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0, FALSE);
8884
8885 if (dlil_verbose) {
8886 DLIL_PRINTF("%s: attached%s\n", if_name(ifp),
8887 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
8888 }
8889
8890 return 0;
8891 }
8892
8893 /*
8894 * Prepare the storage for the first/permanent link address, which must
8895 * must have the same lifetime as the ifnet itself. Although the link
8896 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
8897 * its location in memory must never change as it may still be referred
8898 * to by some parts of the system afterwards (unfortunate implementation
8899 * artifacts inherited from BSD.)
8900 *
8901 * Caller must hold ifnet lock as writer.
8902 */
8903 static struct ifaddr *
dlil_alloc_lladdr(struct ifnet * ifp,const struct sockaddr_dl * ll_addr)8904 dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
8905 {
8906 struct ifaddr *ifa, *oifa;
8907 struct sockaddr_dl *asdl, *msdl;
8908 char workbuf[IFNAMSIZ * 2];
8909 int namelen, masklen, socksize;
8910 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
8911
8912 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
8913 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
8914
8915 namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
8916 if_name(ifp));
8917 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
8918 + ((namelen > 0) ? namelen : 0);
8919 socksize = masklen + ifp->if_addrlen;
8920 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
8921 if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
8922 socksize = sizeof(struct sockaddr_dl);
8923 }
8924 socksize = ROUNDUP(socksize);
8925 #undef ROUNDUP
8926
8927 ifa = ifp->if_lladdr;
8928 if (socksize > DLIL_SDLMAXLEN ||
8929 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
8930 /*
8931 * Rare, but in the event that the link address requires
8932 * more storage space than DLIL_SDLMAXLEN, allocate the
8933 * largest possible storages for address and mask, such
8934 * that we can reuse the same space when if_addrlen grows.
8935 * This same space will be used when if_addrlen shrinks.
8936 */
8937 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
8938 int ifasize = sizeof(*ifa) + 2 * SOCK_MAXADDRLEN;
8939
8940 ifa = zalloc_permanent(ifasize, ZALIGN(struct ifaddr));
8941 ifa_lock_init(ifa);
8942 /* Don't set IFD_ALLOC, as this is permanent */
8943 ifa->ifa_debug = IFD_LINK;
8944 }
8945 IFA_LOCK(ifa);
8946 /* address and mask sockaddr_dl locations */
8947 asdl = (struct sockaddr_dl *)(ifa + 1);
8948 bzero(asdl, SOCK_MAXADDRLEN);
8949 msdl = (struct sockaddr_dl *)(void *)
8950 ((char *)asdl + SOCK_MAXADDRLEN);
8951 bzero(msdl, SOCK_MAXADDRLEN);
8952 } else {
8953 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
8954 /*
8955 * Use the storage areas for address and mask within the
8956 * dlil_ifnet structure. This is the most common case.
8957 */
8958 if (ifa == NULL) {
8959 ifa = &dl_if->dl_if_lladdr.ifa;
8960 ifa_lock_init(ifa);
8961 /* Don't set IFD_ALLOC, as this is permanent */
8962 ifa->ifa_debug = IFD_LINK;
8963 }
8964 IFA_LOCK(ifa);
8965 /* address and mask sockaddr_dl locations */
8966 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
8967 bzero(asdl, sizeof(dl_if->dl_if_lladdr.asdl));
8968 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
8969 bzero(msdl, sizeof(dl_if->dl_if_lladdr.msdl));
8970 }
8971
8972 /* hold a permanent reference for the ifnet itself */
8973 IFA_ADDREF_LOCKED(ifa);
8974 oifa = ifp->if_lladdr;
8975 ifp->if_lladdr = ifa;
8976
8977 VERIFY(ifa->ifa_debug == IFD_LINK);
8978 ifa->ifa_ifp = ifp;
8979 ifa->ifa_rtrequest = link_rtrequest;
8980 ifa->ifa_addr = (struct sockaddr *)asdl;
8981 asdl->sdl_len = (u_char)socksize;
8982 asdl->sdl_family = AF_LINK;
8983 if (namelen > 0) {
8984 bcopy(workbuf, asdl->sdl_data, min(namelen,
8985 sizeof(asdl->sdl_data)));
8986 asdl->sdl_nlen = (u_char)namelen;
8987 } else {
8988 asdl->sdl_nlen = 0;
8989 }
8990 asdl->sdl_index = ifp->if_index;
8991 asdl->sdl_type = ifp->if_type;
8992 if (ll_addr != NULL) {
8993 asdl->sdl_alen = ll_addr->sdl_alen;
8994 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
8995 } else {
8996 asdl->sdl_alen = 0;
8997 }
8998 ifa->ifa_netmask = (struct sockaddr *)msdl;
8999 msdl->sdl_len = (u_char)masklen;
9000 while (namelen > 0) {
9001 msdl->sdl_data[--namelen] = 0xff;
9002 }
9003 IFA_UNLOCK(ifa);
9004
9005 if (oifa != NULL) {
9006 IFA_REMREF(oifa);
9007 }
9008
9009 return ifa;
9010 }
9011
9012 static void
if_purgeaddrs(struct ifnet * ifp)9013 if_purgeaddrs(struct ifnet *ifp)
9014 {
9015 #if INET
9016 in_purgeaddrs(ifp);
9017 #endif /* INET */
9018 in6_purgeaddrs(ifp);
9019 }
9020
9021 errno_t
ifnet_detach(ifnet_t ifp)9022 ifnet_detach(ifnet_t ifp)
9023 {
9024 struct ifnet *delegated_ifp;
9025 struct nd_ifinfo *ndi = NULL;
9026
9027 if (ifp == NULL) {
9028 return EINVAL;
9029 }
9030
9031 ndi = ND_IFINFO(ifp);
9032 if (NULL != ndi) {
9033 ndi->cga_initialized = FALSE;
9034 }
9035
9036 /* Mark the interface down */
9037 if_down(ifp);
9038
9039 /*
9040 * IMPORTANT NOTE
9041 *
9042 * Any field in the ifnet that relies on IF_FULLY_ATTACHED()
9043 * or equivalently, ifnet_is_attached(ifp, 1), can't be modified
9044 * until after we've waited for all I/O references to drain
9045 * in ifnet_detach_final().
9046 */
9047
9048 ifnet_head_lock_exclusive();
9049 ifnet_lock_exclusive(ifp);
9050
9051 if (ifp->if_output_netem != NULL) {
9052 netem_destroy(ifp->if_output_netem);
9053 ifp->if_output_netem = NULL;
9054 }
9055
9056 /*
9057 * Check to see if this interface has previously triggered
9058 * aggressive protocol draining; if so, decrement the global
9059 * refcnt and clear PR_AGGDRAIN on the route domain if
9060 * there are no more of such an interface around.
9061 */
9062 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
9063
9064 lck_mtx_lock_spin(&ifp->if_ref_lock);
9065 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
9066 lck_mtx_unlock(&ifp->if_ref_lock);
9067 ifnet_lock_done(ifp);
9068 ifnet_head_done();
9069 return EINVAL;
9070 } else if (ifp->if_refflags & IFRF_DETACHING) {
9071 /* Interface has already been detached */
9072 lck_mtx_unlock(&ifp->if_ref_lock);
9073 ifnet_lock_done(ifp);
9074 ifnet_head_done();
9075 return ENXIO;
9076 }
9077 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
9078 /* Indicate this interface is being detached */
9079 ifp->if_refflags &= ~IFRF_ATTACHED;
9080 ifp->if_refflags |= IFRF_DETACHING;
9081 lck_mtx_unlock(&ifp->if_ref_lock);
9082
9083 if (dlil_verbose) {
9084 DLIL_PRINTF("%s: detaching\n", if_name(ifp));
9085 }
9086
9087 /* clean up flow control entry object if there's any */
9088 if (ifp->if_eflags & IFEF_TXSTART) {
9089 ifnet_flowadv(ifp->if_flowhash);
9090 }
9091
9092 /* Reset ECN enable/disable flags */
9093 /* Reset CLAT46 flag */
9094 if_clear_eflags(ifp, IFEF_ECN_ENABLE | IFEF_ECN_DISABLE | IFEF_CLAT46);
9095
9096 /*
9097 * We do not reset the TCP keep alive counters in case
9098 * a TCP connection stays connection after the interface
9099 * went down
9100 */
9101 if (ifp->if_tcp_kao_cnt > 0) {
9102 os_log(OS_LOG_DEFAULT, "%s %s tcp_kao_cnt %u not zero",
9103 __func__, if_name(ifp), ifp->if_tcp_kao_cnt);
9104 }
9105 ifp->if_tcp_kao_max = 0;
9106
9107 /*
9108 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
9109 * no longer be visible during lookups from this point.
9110 */
9111 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
9112 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
9113 ifp->if_link.tqe_next = NULL;
9114 ifp->if_link.tqe_prev = NULL;
9115 if (ifp->if_ordered_link.tqe_next != NULL ||
9116 ifp->if_ordered_link.tqe_prev != NULL) {
9117 ifnet_remove_from_ordered_list(ifp);
9118 }
9119 ifindex2ifnet[ifp->if_index] = NULL;
9120
9121 /* 18717626 - reset router mode */
9122 if_clear_eflags(ifp, IFEF_IPV4_ROUTER);
9123 ifp->if_ipv6_router_mode = IPV6_ROUTER_MODE_DISABLED;
9124
9125 /* Record detach PC stacktrace */
9126 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
9127
9128 /* Clear logging parameters */
9129 bzero(&ifp->if_log, sizeof(ifp->if_log));
9130
9131 /* Clear delegated interface info (reference released below) */
9132 delegated_ifp = ifp->if_delegated.ifp;
9133 bzero(&ifp->if_delegated, sizeof(ifp->if_delegated));
9134
9135 /* Reset interface state */
9136 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
9137
9138 ifnet_lock_done(ifp);
9139 ifnet_head_done();
9140
9141 /* Release reference held on the delegated interface */
9142 if (delegated_ifp != NULL) {
9143 ifnet_release(delegated_ifp);
9144 }
9145
9146 /* Reset Link Quality Metric (unless loopback [lo0]) */
9147 if (ifp != lo_ifp) {
9148 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
9149 }
9150
9151 /* Reset TCP local statistics */
9152 if (ifp->if_tcp_stat != NULL) {
9153 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
9154 }
9155
9156 /* Reset UDP local statistics */
9157 if (ifp->if_udp_stat != NULL) {
9158 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
9159 }
9160
9161 /* Reset ifnet IPv4 stats */
9162 if (ifp->if_ipv4_stat != NULL) {
9163 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
9164 }
9165
9166 /* Reset ifnet IPv6 stats */
9167 if (ifp->if_ipv6_stat != NULL) {
9168 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
9169 }
9170
9171 /* Release memory held for interface link status report */
9172 if (ifp->if_link_status != NULL) {
9173 kfree_type(struct if_link_status, ifp->if_link_status);
9174 ifp->if_link_status = NULL;
9175 }
9176
9177 /* Let BPF know we're detaching */
9178 bpfdetach(ifp);
9179
9180 /* Disable forwarding cached route */
9181 lck_mtx_lock(&ifp->if_cached_route_lock);
9182 ifp->if_fwd_cacheok = 0;
9183 lck_mtx_unlock(&ifp->if_cached_route_lock);
9184
9185 /* Disable data threshold and wait for any pending event posting */
9186 ifp->if_data_threshold = 0;
9187 VERIFY(ifp->if_dt_tcall != NULL);
9188 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
9189
9190 /*
9191 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
9192 * references to the info structures and leave them attached to
9193 * this ifnet.
9194 */
9195 #if INET
9196 igmp_domifdetach(ifp);
9197 #endif /* INET */
9198 mld_domifdetach(ifp);
9199
9200 #if SKYWALK
9201 /* Clean up any netns tokens still pointing to to this ifnet */
9202 netns_ifnet_detach(ifp);
9203 #endif /* SKYWALK */
9204 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0, FALSE);
9205
9206 /* Let worker thread take care of the rest, to avoid reentrancy */
9207 dlil_if_lock();
9208 ifnet_detaching_enqueue(ifp);
9209 dlil_if_unlock();
9210
9211 return 0;
9212 }
9213
9214 static void
ifnet_detaching_enqueue(struct ifnet * ifp)9215 ifnet_detaching_enqueue(struct ifnet *ifp)
9216 {
9217 dlil_if_lock_assert();
9218
9219 ++ifnet_detaching_cnt;
9220 VERIFY(ifnet_detaching_cnt != 0);
9221 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
9222 wakeup((caddr_t)&ifnet_delayed_run);
9223 }
9224
9225 static struct ifnet *
ifnet_detaching_dequeue(void)9226 ifnet_detaching_dequeue(void)
9227 {
9228 struct ifnet *ifp;
9229
9230 dlil_if_lock_assert();
9231
9232 ifp = TAILQ_FIRST(&ifnet_detaching_head);
9233 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
9234 if (ifp != NULL) {
9235 VERIFY(ifnet_detaching_cnt != 0);
9236 --ifnet_detaching_cnt;
9237 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
9238 ifp->if_detaching_link.tqe_next = NULL;
9239 ifp->if_detaching_link.tqe_prev = NULL;
9240 }
9241 return ifp;
9242 }
9243
9244 __attribute__((noreturn))
9245 static void
ifnet_detacher_thread_cont(void * v,wait_result_t wres)9246 ifnet_detacher_thread_cont(void *v, wait_result_t wres)
9247 {
9248 #pragma unused(v, wres)
9249 struct ifnet *ifp;
9250
9251 dlil_if_lock();
9252 if (__improbable(ifnet_detaching_embryonic)) {
9253 ifnet_detaching_embryonic = FALSE;
9254 /* there's no lock ordering constrain so OK to do this here */
9255 dlil_decr_pending_thread_count();
9256 }
9257
9258 for (;;) {
9259 dlil_if_lock_assert();
9260
9261 if (ifnet_detaching_cnt == 0) {
9262 break;
9263 }
9264
9265 net_update_uptime();
9266
9267 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
9268
9269 /* Take care of detaching ifnet */
9270 ifp = ifnet_detaching_dequeue();
9271 if (ifp != NULL) {
9272 dlil_if_unlock();
9273 ifnet_detach_final(ifp);
9274 dlil_if_lock();
9275 }
9276 }
9277
9278 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
9279 dlil_if_unlock();
9280 (void) thread_block(ifnet_detacher_thread_cont);
9281
9282 VERIFY(0); /* we should never get here */
9283 /* NOTREACHED */
9284 __builtin_unreachable();
9285 }
9286
9287 __dead2
9288 static void
ifnet_detacher_thread_func(void * v,wait_result_t w)9289 ifnet_detacher_thread_func(void *v, wait_result_t w)
9290 {
9291 #pragma unused(v, w)
9292 dlil_if_lock();
9293 (void) assert_wait(&ifnet_delayed_run, THREAD_UNINT);
9294 ifnet_detaching_embryonic = TRUE;
9295 /* wake up once to get out of embryonic state */
9296 wakeup((caddr_t)&ifnet_delayed_run);
9297 dlil_if_unlock();
9298 (void) thread_block(ifnet_detacher_thread_cont);
9299 VERIFY(0);
9300 /* NOTREACHED */
9301 __builtin_unreachable();
9302 }
9303
9304 static void
ifnet_detach_final(struct ifnet * ifp)9305 ifnet_detach_final(struct ifnet *ifp)
9306 {
9307 struct ifnet_filter *filter, *filter_next;
9308 struct dlil_ifnet *dlifp;
9309 struct ifnet_filter_head fhead;
9310 struct dlil_threading_info *inp;
9311 struct ifaddr *ifa;
9312 ifnet_detached_func if_free;
9313 int i;
9314
9315 #if SKYWALK
9316 /*
9317 * Wait for the datapath to quiesce before tearing down
9318 * netif/flowswitch nexuses.
9319 */
9320 dlil_quiesce_and_detach_nexuses(ifp);
9321 #endif /* SKYWALK */
9322
9323 lck_mtx_lock(&ifp->if_ref_lock);
9324 if (!(ifp->if_refflags & IFRF_DETACHING)) {
9325 panic("%s: flags mismatch (detaching not set) ifp=%p",
9326 __func__, ifp);
9327 /* NOTREACHED */
9328 }
9329
9330 /*
9331 * Wait until the existing IO references get released
9332 * before we proceed with ifnet_detach. This is not a
9333 * common case, so block without using a continuation.
9334 */
9335 while (ifp->if_refio > 0) {
9336 DLIL_PRINTF("%s: Waiting for IO references on %s interface "
9337 "to be released\n", __func__, if_name(ifp));
9338 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
9339 (PZERO - 1), "ifnet_ioref_wait", NULL);
9340 }
9341
9342 VERIFY(ifp->if_datamov == 0);
9343 VERIFY(ifp->if_drainers == 0);
9344 VERIFY(ifp->if_suspend == 0);
9345 ifp->if_refflags &= ~IFRF_READY;
9346 lck_mtx_unlock(&ifp->if_ref_lock);
9347
9348 /* Clear agent IDs */
9349 if (ifp->if_agentids != NULL) {
9350 kfree_data(ifp->if_agentids,
9351 sizeof(uuid_t) * ifp->if_agentcount);
9352 ifp->if_agentids = NULL;
9353 }
9354 ifp->if_agentcount = 0;
9355
9356 #if SKYWALK
9357 VERIFY(SLIST_EMPTY(&ifp->if_netns_tokens));
9358 #endif /* SKYWALK */
9359 /* Drain and destroy send queue */
9360 ifclassq_teardown(ifp->if_snd);
9361
9362 /* Detach interface filters */
9363 lck_mtx_lock(&ifp->if_flt_lock);
9364 if_flt_monitor_enter(ifp);
9365
9366 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
9367 fhead = ifp->if_flt_head;
9368 TAILQ_INIT(&ifp->if_flt_head);
9369
9370 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
9371 filter_next = TAILQ_NEXT(filter, filt_next);
9372 lck_mtx_unlock(&ifp->if_flt_lock);
9373
9374 dlil_detach_filter_internal(filter, 1);
9375 lck_mtx_lock(&ifp->if_flt_lock);
9376 }
9377 if_flt_monitor_leave(ifp);
9378 lck_mtx_unlock(&ifp->if_flt_lock);
9379
9380 /* Tell upper layers to drop their network addresses */
9381 if_purgeaddrs(ifp);
9382
9383 ifnet_lock_exclusive(ifp);
9384
9385 /* Unplumb all protocols */
9386 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
9387 struct if_proto *proto;
9388
9389 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
9390 while (proto != NULL) {
9391 protocol_family_t family = proto->protocol_family;
9392 ifnet_lock_done(ifp);
9393 proto_unplumb(family, ifp);
9394 ifnet_lock_exclusive(ifp);
9395 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
9396 }
9397 /* There should not be any protocols left */
9398 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
9399 }
9400 zfree(dlif_phash_zone, ifp->if_proto_hash);
9401 ifp->if_proto_hash = NULL;
9402
9403 /* Detach (permanent) link address from if_addrhead */
9404 ifa = TAILQ_FIRST(&ifp->if_addrhead);
9405 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
9406 IFA_LOCK(ifa);
9407 if_detach_link_ifa(ifp, ifa);
9408 IFA_UNLOCK(ifa);
9409
9410 /* Remove (permanent) link address from ifnet_addrs[] */
9411 IFA_REMREF(ifa);
9412 ifnet_addrs[ifp->if_index - 1] = NULL;
9413
9414 /* This interface should not be on {ifnet_head,detaching} */
9415 VERIFY(ifp->if_link.tqe_next == NULL);
9416 VERIFY(ifp->if_link.tqe_prev == NULL);
9417 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
9418 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
9419 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
9420 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
9421
9422 /* The slot should have been emptied */
9423 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
9424
9425 /* There should not be any addresses left */
9426 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
9427
9428 /*
9429 * Signal the starter thread to terminate itself, and wait until
9430 * it has exited.
9431 */
9432 if (ifp->if_start_thread != THREAD_NULL) {
9433 lck_mtx_lock_spin(&ifp->if_start_lock);
9434 ifp->if_start_flags |= IFSF_TERMINATING;
9435 wakeup_one((caddr_t)&ifp->if_start_thread);
9436 lck_mtx_unlock(&ifp->if_start_lock);
9437
9438 /* wait for starter thread to terminate */
9439 lck_mtx_lock(&ifp->if_start_lock);
9440 while (ifp->if_start_thread != THREAD_NULL) {
9441 if (dlil_verbose) {
9442 DLIL_PRINTF("%s: waiting for %s starter thread to terminate\n",
9443 __func__,
9444 if_name(ifp));
9445 }
9446 (void) msleep(&ifp->if_start_thread,
9447 &ifp->if_start_lock, (PZERO - 1),
9448 "ifnet_start_thread_exit", NULL);
9449 }
9450 lck_mtx_unlock(&ifp->if_start_lock);
9451 if (dlil_verbose) {
9452 DLIL_PRINTF("%s: %s starter thread termination complete",
9453 __func__, if_name(ifp));
9454 }
9455 }
9456
9457 /*
9458 * Signal the poller thread to terminate itself, and wait until
9459 * it has exited.
9460 */
9461 if (ifp->if_poll_thread != THREAD_NULL) {
9462 #if SKYWALK
9463 VERIFY(!(ifp->if_eflags & IFEF_SKYWALK_NATIVE));
9464 #endif /* SKYWALK */
9465 lck_mtx_lock_spin(&ifp->if_poll_lock);
9466 ifp->if_poll_flags |= IF_POLLF_TERMINATING;
9467 wakeup_one((caddr_t)&ifp->if_poll_thread);
9468 lck_mtx_unlock(&ifp->if_poll_lock);
9469
9470 /* wait for poller thread to terminate */
9471 lck_mtx_lock(&ifp->if_poll_lock);
9472 while (ifp->if_poll_thread != THREAD_NULL) {
9473 if (dlil_verbose) {
9474 DLIL_PRINTF("%s: waiting for %s poller thread to terminate\n",
9475 __func__,
9476 if_name(ifp));
9477 }
9478 (void) msleep(&ifp->if_poll_thread,
9479 &ifp->if_poll_lock, (PZERO - 1),
9480 "ifnet_poll_thread_exit", NULL);
9481 }
9482 lck_mtx_unlock(&ifp->if_poll_lock);
9483 if (dlil_verbose) {
9484 DLIL_PRINTF("%s: %s poller thread termination complete\n",
9485 __func__, if_name(ifp));
9486 }
9487 }
9488
9489 /*
9490 * If thread affinity was set for the workloop thread, we will need
9491 * to tear down the affinity and release the extra reference count
9492 * taken at attach time. Does not apply to lo0 or other interfaces
9493 * without dedicated input threads.
9494 */
9495 if ((inp = ifp->if_inp) != NULL) {
9496 VERIFY(inp != dlil_main_input_thread);
9497
9498 if (inp->dlth_affinity) {
9499 struct thread *tp, *wtp, *ptp;
9500
9501 lck_mtx_lock_spin(&inp->dlth_lock);
9502 wtp = inp->dlth_driver_thread;
9503 inp->dlth_driver_thread = THREAD_NULL;
9504 ptp = inp->dlth_poller_thread;
9505 inp->dlth_poller_thread = THREAD_NULL;
9506 ASSERT(inp->dlth_thread != THREAD_NULL);
9507 tp = inp->dlth_thread; /* don't nullify now */
9508 inp->dlth_affinity_tag = 0;
9509 inp->dlth_affinity = FALSE;
9510 lck_mtx_unlock(&inp->dlth_lock);
9511
9512 /* Tear down poll thread affinity */
9513 if (ptp != NULL) {
9514 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
9515 VERIFY(ifp->if_xflags & IFXF_LEGACY);
9516 (void) dlil_affinity_set(ptp,
9517 THREAD_AFFINITY_TAG_NULL);
9518 thread_deallocate(ptp);
9519 }
9520
9521 /* Tear down workloop thread affinity */
9522 if (wtp != NULL) {
9523 (void) dlil_affinity_set(wtp,
9524 THREAD_AFFINITY_TAG_NULL);
9525 thread_deallocate(wtp);
9526 }
9527
9528 /* Tear down DLIL input thread affinity */
9529 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
9530 thread_deallocate(tp);
9531 }
9532
9533 /* disassociate ifp DLIL input thread */
9534 ifp->if_inp = NULL;
9535
9536 /* if the worker thread was created, tell it to terminate */
9537 if (inp->dlth_thread != THREAD_NULL) {
9538 lck_mtx_lock_spin(&inp->dlth_lock);
9539 inp->dlth_flags |= DLIL_INPUT_TERMINATE;
9540 if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) {
9541 wakeup_one((caddr_t)&inp->dlth_flags);
9542 }
9543 lck_mtx_unlock(&inp->dlth_lock);
9544 ifnet_lock_done(ifp);
9545
9546 /* wait for the input thread to terminate */
9547 lck_mtx_lock_spin(&inp->dlth_lock);
9548 while ((inp->dlth_flags & DLIL_INPUT_TERMINATE_COMPLETE)
9549 == 0) {
9550 (void) msleep(&inp->dlth_flags, &inp->dlth_lock,
9551 (PZERO - 1) | PSPIN, inp->dlth_name, NULL);
9552 }
9553 lck_mtx_unlock(&inp->dlth_lock);
9554 ifnet_lock_exclusive(ifp);
9555 }
9556
9557 /* clean-up input thread state */
9558 dlil_clean_threading_info(inp);
9559 /* clean-up poll parameters */
9560 VERIFY(ifp->if_poll_thread == THREAD_NULL);
9561 dlil_reset_rxpoll_params(ifp);
9562 }
9563
9564 /* The driver might unload, so point these to ourselves */
9565 if_free = ifp->if_free;
9566 ifp->if_output_dlil = ifp_if_output;
9567 ifp->if_output = ifp_if_output;
9568 ifp->if_pre_enqueue = ifp_if_output;
9569 ifp->if_start = ifp_if_start;
9570 ifp->if_output_ctl = ifp_if_ctl;
9571 ifp->if_input_dlil = ifp_if_input;
9572 ifp->if_input_poll = ifp_if_input_poll;
9573 ifp->if_input_ctl = ifp_if_ctl;
9574 ifp->if_ioctl = ifp_if_ioctl;
9575 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
9576 ifp->if_free = ifp_if_free;
9577 ifp->if_demux = ifp_if_demux;
9578 ifp->if_event = ifp_if_event;
9579 ifp->if_framer_legacy = ifp_if_framer;
9580 ifp->if_framer = ifp_if_framer_extended;
9581 ifp->if_add_proto = ifp_if_add_proto;
9582 ifp->if_del_proto = ifp_if_del_proto;
9583 ifp->if_check_multi = ifp_if_check_multi;
9584
9585 /* wipe out interface description */
9586 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
9587 ifp->if_desc.ifd_len = 0;
9588 VERIFY(ifp->if_desc.ifd_desc != NULL);
9589 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
9590
9591 /* there shouldn't be any delegation by now */
9592 VERIFY(ifp->if_delegated.ifp == NULL);
9593 VERIFY(ifp->if_delegated.type == 0);
9594 VERIFY(ifp->if_delegated.family == 0);
9595 VERIFY(ifp->if_delegated.subfamily == 0);
9596 VERIFY(ifp->if_delegated.expensive == 0);
9597 VERIFY(ifp->if_delegated.constrained == 0);
9598
9599 /* QoS marking get cleared */
9600 if_clear_eflags(ifp, IFEF_QOSMARKING_ENABLED);
9601 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
9602
9603 #if SKYWALK
9604 /* the nexus destructor is responsible for clearing these */
9605 VERIFY(ifp->if_na_ops == NULL);
9606 VERIFY(ifp->if_na == NULL);
9607 #endif /* SKYWALK */
9608
9609 /* promiscuous count needs to start at zero again */
9610 ifp->if_pcount = 0;
9611 ifp->if_flags &= ~IFF_PROMISC;
9612
9613 ifnet_lock_done(ifp);
9614
9615 #if PF
9616 /*
9617 * Detach this interface from packet filter, if enabled.
9618 */
9619 pf_ifnet_hook(ifp, 0);
9620 #endif /* PF */
9621
9622 /* Filter list should be empty */
9623 lck_mtx_lock_spin(&ifp->if_flt_lock);
9624 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
9625 VERIFY(ifp->if_flt_busy == 0);
9626 VERIFY(ifp->if_flt_waiters == 0);
9627 VERIFY(ifp->if_flt_non_os_count == 0);
9628 VERIFY(ifp->if_flt_no_tso_count == 0);
9629 lck_mtx_unlock(&ifp->if_flt_lock);
9630
9631 /* Last chance to drain send queue */
9632 if_qflush_snd(ifp, 0);
9633
9634 /* Last chance to cleanup any cached route */
9635 lck_mtx_lock(&ifp->if_cached_route_lock);
9636 VERIFY(!ifp->if_fwd_cacheok);
9637 ROUTE_RELEASE(&ifp->if_fwd_route);
9638 bzero(&ifp->if_fwd_route, sizeof(ifp->if_fwd_route));
9639 ROUTE_RELEASE(&ifp->if_src_route);
9640 bzero(&ifp->if_src_route, sizeof(ifp->if_src_route));
9641 ROUTE_RELEASE(&ifp->if_src_route6);
9642 bzero(&ifp->if_src_route6, sizeof(ifp->if_src_route6));
9643 lck_mtx_unlock(&ifp->if_cached_route_lock);
9644
9645 VERIFY(ifp->if_data_threshold == 0);
9646 VERIFY(ifp->if_dt_tcall != NULL);
9647 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
9648
9649 ifnet_llreach_ifdetach(ifp);
9650
9651 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0, FALSE);
9652
9653 /*
9654 * Finally, mark this ifnet as detached.
9655 */
9656 if (dlil_verbose) {
9657 DLIL_PRINTF("%s: detached\n", if_name(ifp));
9658 }
9659 lck_mtx_lock_spin(&ifp->if_ref_lock);
9660 if (!(ifp->if_refflags & IFRF_DETACHING)) {
9661 panic("%s: flags mismatch (detaching not set) ifp=%p",
9662 __func__, ifp);
9663 /* NOTREACHED */
9664 }
9665 ifp->if_refflags &= ~IFRF_DETACHING;
9666 lck_mtx_unlock(&ifp->if_ref_lock);
9667 if (if_free != NULL) {
9668 if_free(ifp);
9669 }
9670
9671 ifclassq_release(&ifp->if_snd);
9672
9673 /* we're fully detached, clear the "in use" bit */
9674 dlifp = (struct dlil_ifnet *)ifp;
9675 lck_mtx_lock(&dlifp->dl_if_lock);
9676 ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
9677 dlifp->dl_if_flags &= ~DLIF_INUSE;
9678 lck_mtx_unlock(&dlifp->dl_if_lock);
9679
9680 /* Release reference held during ifnet attach */
9681 ifnet_release(ifp);
9682 }
9683
9684 errno_t
ifp_if_output(struct ifnet * ifp,struct mbuf * m)9685 ifp_if_output(struct ifnet *ifp, struct mbuf *m)
9686 {
9687 #pragma unused(ifp)
9688 m_freem_list(m);
9689 return 0;
9690 }
9691
9692 void
ifp_if_start(struct ifnet * ifp)9693 ifp_if_start(struct ifnet *ifp)
9694 {
9695 ifnet_purge(ifp);
9696 }
9697
9698 static errno_t
ifp_if_input(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)9699 ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
9700 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
9701 boolean_t poll, struct thread *tp)
9702 {
9703 #pragma unused(ifp, m_tail, s, poll, tp)
9704 m_freem_list(m_head);
9705 return ENXIO;
9706 }
9707
9708 static void
ifp_if_input_poll(struct ifnet * ifp,u_int32_t flags,u_int32_t max_cnt,struct mbuf ** m_head,struct mbuf ** m_tail,u_int32_t * cnt,u_int32_t * len)9709 ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
9710 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
9711 {
9712 #pragma unused(ifp, flags, max_cnt)
9713 if (m_head != NULL) {
9714 *m_head = NULL;
9715 }
9716 if (m_tail != NULL) {
9717 *m_tail = NULL;
9718 }
9719 if (cnt != NULL) {
9720 *cnt = 0;
9721 }
9722 if (len != NULL) {
9723 *len = 0;
9724 }
9725 }
9726
9727 static errno_t
ifp_if_ctl(struct ifnet * ifp,ifnet_ctl_cmd_t cmd,u_int32_t arglen,void * arg)9728 ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
9729 {
9730 #pragma unused(ifp, cmd, arglen, arg)
9731 return EOPNOTSUPP;
9732 }
9733
9734 static errno_t
ifp_if_demux(struct ifnet * ifp,struct mbuf * m,char * fh,protocol_family_t * pf)9735 ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
9736 {
9737 #pragma unused(ifp, fh, pf)
9738 m_freem(m);
9739 return EJUSTRETURN;
9740 }
9741
9742 static errno_t
ifp_if_add_proto(struct ifnet * ifp,protocol_family_t pf,const struct ifnet_demux_desc * da,u_int32_t dc)9743 ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
9744 const struct ifnet_demux_desc *da, u_int32_t dc)
9745 {
9746 #pragma unused(ifp, pf, da, dc)
9747 return EINVAL;
9748 }
9749
9750 static errno_t
ifp_if_del_proto(struct ifnet * ifp,protocol_family_t pf)9751 ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
9752 {
9753 #pragma unused(ifp, pf)
9754 return EINVAL;
9755 }
9756
9757 static errno_t
ifp_if_check_multi(struct ifnet * ifp,const struct sockaddr * sa)9758 ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
9759 {
9760 #pragma unused(ifp, sa)
9761 return EOPNOTSUPP;
9762 }
9763
9764 #if !XNU_TARGET_OS_OSX
9765 static errno_t
ifp_if_framer(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,const char * ll,const char * t,u_int32_t * pre,u_int32_t * post)9766 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
9767 const struct sockaddr *sa, const char *ll, const char *t,
9768 u_int32_t *pre, u_int32_t *post)
9769 #else /* XNU_TARGET_OS_OSX */
9770 static errno_t
9771 ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
9772 const struct sockaddr *sa, const char *ll, const char *t)
9773 #endif /* XNU_TARGET_OS_OSX */
9774 {
9775 #pragma unused(ifp, m, sa, ll, t)
9776 #if !XNU_TARGET_OS_OSX
9777 return ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post);
9778 #else /* XNU_TARGET_OS_OSX */
9779 return ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL);
9780 #endif /* XNU_TARGET_OS_OSX */
9781 }
9782
9783 static errno_t
ifp_if_framer_extended(struct ifnet * ifp,struct mbuf ** m,const struct sockaddr * sa,const char * ll,const char * t,u_int32_t * pre,u_int32_t * post)9784 ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
9785 const struct sockaddr *sa, const char *ll, const char *t,
9786 u_int32_t *pre, u_int32_t *post)
9787 {
9788 #pragma unused(ifp, sa, ll, t)
9789 m_freem(*m);
9790 *m = NULL;
9791
9792 if (pre != NULL) {
9793 *pre = 0;
9794 }
9795 if (post != NULL) {
9796 *post = 0;
9797 }
9798
9799 return EJUSTRETURN;
9800 }
9801
9802 errno_t
ifp_if_ioctl(struct ifnet * ifp,unsigned long cmd,void * arg)9803 ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
9804 {
9805 #pragma unused(ifp, cmd, arg)
9806 return EOPNOTSUPP;
9807 }
9808
9809 static errno_t
ifp_if_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode tm,bpf_packet_func f)9810 ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
9811 {
9812 #pragma unused(ifp, tm, f)
9813 /* XXX not sure what to do here */
9814 return 0;
9815 }
9816
9817 static void
ifp_if_free(struct ifnet * ifp)9818 ifp_if_free(struct ifnet *ifp)
9819 {
9820 #pragma unused(ifp)
9821 }
9822
9823 static void
ifp_if_event(struct ifnet * ifp,const struct kev_msg * e)9824 ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
9825 {
9826 #pragma unused(ifp, e)
9827 }
9828
9829 int
dlil_if_acquire(u_int32_t family,const void * uniqueid,size_t uniqueid_len,const char * ifxname,struct ifnet ** ifp)9830 dlil_if_acquire(u_int32_t family, const void *uniqueid,
9831 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
9832 {
9833 struct ifnet *ifp1 = NULL;
9834 struct dlil_ifnet *dlifp1 = NULL;
9835 struct dlil_ifnet *dlifp1_saved = NULL;
9836 void *buf, *base, **pbuf;
9837 int ret = 0;
9838
9839 VERIFY(*ifp == NULL);
9840 dlil_if_lock();
9841 /*
9842 * We absolutely can't have an interface with the same name
9843 * in in-use state.
9844 * To make sure of that list has to be traversed completely
9845 */
9846 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
9847 ifp1 = (struct ifnet *)dlifp1;
9848
9849 if (ifp1->if_family != family) {
9850 continue;
9851 }
9852
9853 /*
9854 * If interface is in use, return EBUSY if either unique id
9855 * or interface extended names are the same
9856 */
9857 lck_mtx_lock(&dlifp1->dl_if_lock);
9858 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0 &&
9859 (dlifp1->dl_if_flags & DLIF_INUSE) != 0) {
9860 lck_mtx_unlock(&dlifp1->dl_if_lock);
9861 ret = EBUSY;
9862 goto end;
9863 }
9864
9865 if (uniqueid_len != 0 &&
9866 uniqueid_len == dlifp1->dl_if_uniqueid_len &&
9867 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
9868 if ((dlifp1->dl_if_flags & DLIF_INUSE) != 0) {
9869 lck_mtx_unlock(&dlifp1->dl_if_lock);
9870 ret = EBUSY;
9871 goto end;
9872 }
9873 if (dlifp1_saved == NULL) {
9874 /* cache the first match */
9875 dlifp1_saved = dlifp1;
9876 }
9877 /*
9878 * Do not break or jump to end as we have to traverse
9879 * the whole list to ensure there are no name collisions
9880 */
9881 }
9882 lck_mtx_unlock(&dlifp1->dl_if_lock);
9883 }
9884
9885 /* If there's an interface that can be recycled, use that */
9886 if (dlifp1_saved != NULL) {
9887 lck_mtx_lock(&dlifp1_saved->dl_if_lock);
9888 if ((dlifp1_saved->dl_if_flags & DLIF_INUSE) != 0) {
9889 /* some other thread got in ahead of us */
9890 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
9891 ret = EBUSY;
9892 goto end;
9893 }
9894 dlifp1_saved->dl_if_flags |= (DLIF_INUSE | DLIF_REUSE);
9895 lck_mtx_unlock(&dlifp1_saved->dl_if_lock);
9896 *ifp = (struct ifnet *)dlifp1_saved;
9897 dlil_if_ref(*ifp);
9898 goto end;
9899 }
9900
9901 /* no interface found, allocate a new one */
9902 buf = zalloc_flags(dlif_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
9903
9904 /* Get the 64-bit aligned base address for this object */
9905 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t),
9906 sizeof(u_int64_t));
9907 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
9908
9909 /*
9910 * Wind back a pointer size from the aligned base and
9911 * save the original address so we can free it later.
9912 */
9913 pbuf = (void **)((intptr_t)base - sizeof(void *));
9914 *pbuf = buf;
9915 dlifp1 = base;
9916
9917 if (uniqueid_len) {
9918 dlifp1->dl_if_uniqueid = kalloc_data(uniqueid_len,
9919 Z_WAITOK);
9920 if (dlifp1->dl_if_uniqueid == NULL) {
9921 zfree(dlif_zone, buf);
9922 ret = ENOMEM;
9923 goto end;
9924 }
9925 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
9926 dlifp1->dl_if_uniqueid_len = uniqueid_len;
9927 }
9928
9929 ifp1 = (struct ifnet *)dlifp1;
9930 dlifp1->dl_if_flags = DLIF_INUSE;
9931 if (ifnet_debug) {
9932 dlifp1->dl_if_flags |= DLIF_DEBUG;
9933 dlifp1->dl_if_trace = dlil_if_trace;
9934 }
9935 ifp1->if_name = dlifp1->dl_if_namestorage;
9936 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
9937
9938 /* initialize interface description */
9939 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
9940 ifp1->if_desc.ifd_len = 0;
9941 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
9942
9943 #if SKYWALK
9944 SLIST_INIT(&ifp1->if_netns_tokens);
9945 #endif /* SKYWALK */
9946
9947 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
9948 DLIL_PRINTF("%s: failed to allocate if local stats, "
9949 "error: %d\n", __func__, ret);
9950 /* This probably shouldn't be fatal */
9951 ret = 0;
9952 }
9953
9954 lck_mtx_init(&dlifp1->dl_if_lock, &ifnet_lock_group, &ifnet_lock_attr);
9955 lck_rw_init(&ifp1->if_lock, &ifnet_lock_group, &ifnet_lock_attr);
9956 lck_mtx_init(&ifp1->if_ref_lock, &ifnet_lock_group, &ifnet_lock_attr);
9957 lck_mtx_init(&ifp1->if_flt_lock, &ifnet_lock_group, &ifnet_lock_attr);
9958 lck_mtx_init(&ifp1->if_addrconfig_lock, &ifnet_lock_group,
9959 &ifnet_lock_attr);
9960 lck_rw_init(&ifp1->if_llreach_lock, &ifnet_lock_group, &ifnet_lock_attr);
9961 #if INET
9962 lck_rw_init(&ifp1->if_inetdata_lock, &ifnet_lock_group,
9963 &ifnet_lock_attr);
9964 ifp1->if_inetdata = NULL;
9965 #endif
9966 lck_mtx_init(&ifp1->if_inet6_ioctl_lock, &ifnet_lock_group, &ifnet_lock_attr);
9967 ifp1->if_inet6_ioctl_busy = FALSE;
9968 lck_rw_init(&ifp1->if_inet6data_lock, &ifnet_lock_group,
9969 &ifnet_lock_attr);
9970 ifp1->if_inet6data = NULL;
9971 lck_rw_init(&ifp1->if_link_status_lock, &ifnet_lock_group,
9972 &ifnet_lock_attr);
9973 ifp1->if_link_status = NULL;
9974
9975 /* for send data paths */
9976 lck_mtx_init(&ifp1->if_start_lock, &ifnet_snd_lock_group,
9977 &ifnet_lock_attr);
9978 lck_mtx_init(&ifp1->if_cached_route_lock, &ifnet_snd_lock_group,
9979 &ifnet_lock_attr);
9980
9981 /* for receive data paths */
9982 lck_mtx_init(&ifp1->if_poll_lock, &ifnet_rcv_lock_group,
9983 &ifnet_lock_attr);
9984
9985 /* thread call allocation is done with sleeping zalloc */
9986 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
9987 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
9988 if (ifp1->if_dt_tcall == NULL) {
9989 panic_plain("%s: couldn't create if_dt_tcall", __func__);
9990 /* NOTREACHED */
9991 }
9992
9993 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
9994
9995 *ifp = ifp1;
9996 dlil_if_ref(*ifp);
9997
9998 end:
9999 dlil_if_unlock();
10000
10001 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof(u_int64_t)) &&
10002 IS_P2ALIGNED(&ifp1->if_data, sizeof(u_int64_t))));
10003
10004 return ret;
10005 }
10006
10007 static void
_dlil_if_release(ifnet_t ifp,bool clear_in_use)10008 _dlil_if_release(ifnet_t ifp, bool clear_in_use)
10009 {
10010 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
10011
10012 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
10013 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
10014 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
10015 }
10016
10017 ifnet_lock_exclusive(ifp);
10018 if (ifp->if_broadcast.length > sizeof(ifp->if_broadcast.u.buffer)) {
10019 kfree_data(ifp->if_broadcast.u.ptr, ifp->if_broadcast.length);
10020 ifp->if_broadcast.length = 0;
10021 ifp->if_broadcast.u.ptr = NULL;
10022 }
10023 lck_mtx_lock(&dlifp->dl_if_lock);
10024 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
10025 ifp->if_name = dlifp->dl_if_namestorage;
10026 /* Reset external name (name + unit) */
10027 ifp->if_xname = dlifp->dl_if_xnamestorage;
10028 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
10029 "%s?", ifp->if_name);
10030 if (clear_in_use) {
10031 ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
10032 dlifp->dl_if_flags &= ~DLIF_INUSE;
10033 }
10034 lck_mtx_unlock(&dlifp->dl_if_lock);
10035 ifnet_lock_done(ifp);
10036 }
10037
10038 __private_extern__ void
dlil_if_release(ifnet_t ifp)10039 dlil_if_release(ifnet_t ifp)
10040 {
10041 _dlil_if_release(ifp, false);
10042 }
10043
10044 __private_extern__ void
dlil_if_lock(void)10045 dlil_if_lock(void)
10046 {
10047 lck_mtx_lock(&dlil_ifnet_lock);
10048 }
10049
10050 __private_extern__ void
dlil_if_unlock(void)10051 dlil_if_unlock(void)
10052 {
10053 lck_mtx_unlock(&dlil_ifnet_lock);
10054 }
10055
10056 __private_extern__ void
dlil_if_lock_assert(void)10057 dlil_if_lock_assert(void)
10058 {
10059 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
10060 }
10061
10062 __private_extern__ void
dlil_proto_unplumb_all(struct ifnet * ifp)10063 dlil_proto_unplumb_all(struct ifnet *ifp)
10064 {
10065 /*
10066 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
10067 * each bucket contains exactly one entry; PF_VLAN does not need an
10068 * explicit unplumb.
10069 *
10070 * if_proto_hash[3] is for other protocols; we expect anything
10071 * in this bucket to respond to the DETACHING event (which would
10072 * have happened by now) and do the unplumb then.
10073 */
10074 (void) proto_unplumb(PF_INET, ifp);
10075 (void) proto_unplumb(PF_INET6, ifp);
10076 }
10077
10078 static void
ifp_src_route_copyout(struct ifnet * ifp,struct route * dst)10079 ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
10080 {
10081 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
10082 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
10083
10084 route_copyout(dst, &ifp->if_src_route, sizeof(*dst));
10085
10086 lck_mtx_unlock(&ifp->if_cached_route_lock);
10087 }
10088
10089 static void
ifp_src_route_copyin(struct ifnet * ifp,struct route * src)10090 ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
10091 {
10092 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
10093 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
10094
10095 if (ifp->if_fwd_cacheok) {
10096 route_copyin(src, &ifp->if_src_route, sizeof(*src));
10097 } else {
10098 ROUTE_RELEASE(src);
10099 }
10100 lck_mtx_unlock(&ifp->if_cached_route_lock);
10101 }
10102
10103 static void
ifp_src_route6_copyout(struct ifnet * ifp,struct route_in6 * dst)10104 ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
10105 {
10106 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
10107 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
10108
10109 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
10110 sizeof(*dst));
10111
10112 lck_mtx_unlock(&ifp->if_cached_route_lock);
10113 }
10114
10115 static void
ifp_src_route6_copyin(struct ifnet * ifp,struct route_in6 * src)10116 ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
10117 {
10118 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
10119 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
10120
10121 if (ifp->if_fwd_cacheok) {
10122 route_copyin((struct route *)src,
10123 (struct route *)&ifp->if_src_route6, sizeof(*src));
10124 } else {
10125 ROUTE_RELEASE(src);
10126 }
10127 lck_mtx_unlock(&ifp->if_cached_route_lock);
10128 }
10129
10130 struct rtentry *
ifnet_cached_rtlookup_inet(struct ifnet * ifp,struct in_addr src_ip)10131 ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
10132 {
10133 struct route src_rt;
10134 struct sockaddr_in *dst;
10135
10136 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
10137
10138 ifp_src_route_copyout(ifp, &src_rt);
10139
10140 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
10141 ROUTE_RELEASE(&src_rt);
10142 if (dst->sin_family != AF_INET) {
10143 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
10144 dst->sin_len = sizeof(src_rt.ro_dst);
10145 dst->sin_family = AF_INET;
10146 }
10147 dst->sin_addr = src_ip;
10148
10149 VERIFY(src_rt.ro_rt == NULL);
10150 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
10151 0, 0, ifp->if_index);
10152
10153 if (src_rt.ro_rt != NULL) {
10154 /* retain a ref, copyin consumes one */
10155 struct rtentry *rte = src_rt.ro_rt;
10156 RT_ADDREF(rte);
10157 ifp_src_route_copyin(ifp, &src_rt);
10158 src_rt.ro_rt = rte;
10159 }
10160 }
10161
10162 return src_rt.ro_rt;
10163 }
10164
10165 struct rtentry *
ifnet_cached_rtlookup_inet6(struct ifnet * ifp,struct in6_addr * src_ip6)10166 ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
10167 {
10168 struct route_in6 src_rt;
10169
10170 ifp_src_route6_copyout(ifp, &src_rt);
10171
10172 if (ROUTE_UNUSABLE(&src_rt) ||
10173 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
10174 ROUTE_RELEASE(&src_rt);
10175 if (src_rt.ro_dst.sin6_family != AF_INET6) {
10176 bzero(&src_rt.ro_dst, sizeof(src_rt.ro_dst));
10177 src_rt.ro_dst.sin6_len = sizeof(src_rt.ro_dst);
10178 src_rt.ro_dst.sin6_family = AF_INET6;
10179 }
10180 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
10181 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
10182 sizeof(src_rt.ro_dst.sin6_addr));
10183
10184 if (src_rt.ro_rt == NULL) {
10185 src_rt.ro_rt = rtalloc1_scoped(
10186 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
10187 ifp->if_index);
10188
10189 if (src_rt.ro_rt != NULL) {
10190 /* retain a ref, copyin consumes one */
10191 struct rtentry *rte = src_rt.ro_rt;
10192 RT_ADDREF(rte);
10193 ifp_src_route6_copyin(ifp, &src_rt);
10194 src_rt.ro_rt = rte;
10195 }
10196 }
10197 }
10198
10199 return src_rt.ro_rt;
10200 }
10201
10202 void
if_lqm_update(struct ifnet * ifp,int lqm,int locked)10203 if_lqm_update(struct ifnet *ifp, int lqm, int locked)
10204 {
10205 struct kev_dl_link_quality_metric_data ev_lqm_data;
10206
10207 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
10208
10209 /* Normalize to edge */
10210 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
10211 lqm = IFNET_LQM_THRESH_ABORT;
10212 atomic_bitset_32(&tcbinfo.ipi_flags,
10213 INPCBINFO_HANDLE_LQM_ABORT);
10214 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
10215 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
10216 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
10217 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
10218 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
10219 lqm <= IFNET_LQM_THRESH_POOR) {
10220 lqm = IFNET_LQM_THRESH_POOR;
10221 } else if (lqm > IFNET_LQM_THRESH_POOR &&
10222 lqm <= IFNET_LQM_THRESH_GOOD) {
10223 lqm = IFNET_LQM_THRESH_GOOD;
10224 }
10225
10226 /*
10227 * Take the lock if needed
10228 */
10229 if (!locked) {
10230 ifnet_lock_exclusive(ifp);
10231 }
10232
10233 if (lqm == ifp->if_interface_state.lqm_state &&
10234 (ifp->if_interface_state.valid_bitmask &
10235 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
10236 /*
10237 * Release the lock if was not held by the caller
10238 */
10239 if (!locked) {
10240 ifnet_lock_done(ifp);
10241 }
10242 return; /* nothing to update */
10243 }
10244 ifp->if_interface_state.valid_bitmask |=
10245 IF_INTERFACE_STATE_LQM_STATE_VALID;
10246 ifp->if_interface_state.lqm_state = (int8_t)lqm;
10247
10248 /*
10249 * Don't want to hold the lock when issuing kernel events
10250 */
10251 ifnet_lock_done(ifp);
10252
10253 bzero(&ev_lqm_data, sizeof(ev_lqm_data));
10254 ev_lqm_data.link_quality_metric = lqm;
10255
10256 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
10257 (struct net_event_data *)&ev_lqm_data, sizeof(ev_lqm_data), FALSE);
10258
10259 /*
10260 * Reacquire the lock for the caller
10261 */
10262 if (locked) {
10263 ifnet_lock_exclusive(ifp);
10264 }
10265 }
10266
10267 static void
if_rrc_state_update(struct ifnet * ifp,unsigned int rrc_state)10268 if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
10269 {
10270 struct kev_dl_rrc_state kev;
10271
10272 if (rrc_state == ifp->if_interface_state.rrc_state &&
10273 (ifp->if_interface_state.valid_bitmask &
10274 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
10275 return;
10276 }
10277
10278 ifp->if_interface_state.valid_bitmask |=
10279 IF_INTERFACE_STATE_RRC_STATE_VALID;
10280
10281 ifp->if_interface_state.rrc_state = (uint8_t)rrc_state;
10282
10283 /*
10284 * Don't want to hold the lock when issuing kernel events
10285 */
10286 ifnet_lock_done(ifp);
10287
10288 bzero(&kev, sizeof(struct kev_dl_rrc_state));
10289 kev.rrc_state = rrc_state;
10290
10291 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
10292 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state), FALSE);
10293
10294 ifnet_lock_exclusive(ifp);
10295 }
10296
10297 errno_t
if_state_update(struct ifnet * ifp,struct if_interface_state * if_interface_state)10298 if_state_update(struct ifnet *ifp,
10299 struct if_interface_state *if_interface_state)
10300 {
10301 u_short if_index_available = 0;
10302
10303 ifnet_lock_exclusive(ifp);
10304
10305 if ((ifp->if_type != IFT_CELLULAR) &&
10306 (if_interface_state->valid_bitmask &
10307 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
10308 ifnet_lock_done(ifp);
10309 return ENOTSUP;
10310 }
10311 if ((if_interface_state->valid_bitmask &
10312 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
10313 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
10314 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
10315 ifnet_lock_done(ifp);
10316 return EINVAL;
10317 }
10318 if ((if_interface_state->valid_bitmask &
10319 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
10320 if_interface_state->rrc_state !=
10321 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
10322 if_interface_state->rrc_state !=
10323 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
10324 ifnet_lock_done(ifp);
10325 return EINVAL;
10326 }
10327
10328 if (if_interface_state->valid_bitmask &
10329 IF_INTERFACE_STATE_LQM_STATE_VALID) {
10330 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
10331 }
10332 if (if_interface_state->valid_bitmask &
10333 IF_INTERFACE_STATE_RRC_STATE_VALID) {
10334 if_rrc_state_update(ifp, if_interface_state->rrc_state);
10335 }
10336 if (if_interface_state->valid_bitmask &
10337 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
10338 ifp->if_interface_state.valid_bitmask |=
10339 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
10340 ifp->if_interface_state.interface_availability =
10341 if_interface_state->interface_availability;
10342
10343 if (ifp->if_interface_state.interface_availability ==
10344 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
10345 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) available\n",
10346 __func__, if_name(ifp), ifp->if_index);
10347 if_index_available = ifp->if_index;
10348 } else {
10349 os_log(OS_LOG_DEFAULT, "%s: interface %s (%u) unavailable)\n",
10350 __func__, if_name(ifp), ifp->if_index);
10351 }
10352 }
10353 ifnet_lock_done(ifp);
10354
10355 /*
10356 * Check if the TCP connections going on this interface should be
10357 * forced to send probe packets instead of waiting for TCP timers
10358 * to fire. This is done on an explicit notification such as
10359 * SIOCSIFINTERFACESTATE which marks the interface as available.
10360 */
10361 if (if_index_available > 0) {
10362 tcp_interface_send_probe(if_index_available);
10363 }
10364
10365 return 0;
10366 }
10367
10368 void
if_get_state(struct ifnet * ifp,struct if_interface_state * if_interface_state)10369 if_get_state(struct ifnet *ifp,
10370 struct if_interface_state *if_interface_state)
10371 {
10372 ifnet_lock_shared(ifp);
10373
10374 if_interface_state->valid_bitmask = 0;
10375
10376 if (ifp->if_interface_state.valid_bitmask &
10377 IF_INTERFACE_STATE_RRC_STATE_VALID) {
10378 if_interface_state->valid_bitmask |=
10379 IF_INTERFACE_STATE_RRC_STATE_VALID;
10380 if_interface_state->rrc_state =
10381 ifp->if_interface_state.rrc_state;
10382 }
10383 if (ifp->if_interface_state.valid_bitmask &
10384 IF_INTERFACE_STATE_LQM_STATE_VALID) {
10385 if_interface_state->valid_bitmask |=
10386 IF_INTERFACE_STATE_LQM_STATE_VALID;
10387 if_interface_state->lqm_state =
10388 ifp->if_interface_state.lqm_state;
10389 }
10390 if (ifp->if_interface_state.valid_bitmask &
10391 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
10392 if_interface_state->valid_bitmask |=
10393 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
10394 if_interface_state->interface_availability =
10395 ifp->if_interface_state.interface_availability;
10396 }
10397
10398 ifnet_lock_done(ifp);
10399 }
10400
10401 errno_t
if_probe_connectivity(struct ifnet * ifp,u_int32_t conn_probe)10402 if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
10403 {
10404 if (conn_probe > 1) {
10405 return EINVAL;
10406 }
10407 if (conn_probe == 0) {
10408 if_clear_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
10409 } else {
10410 if_set_eflags(ifp, IFEF_PROBE_CONNECTIVITY);
10411 }
10412
10413 #if NECP
10414 necp_update_all_clients();
10415 #endif /* NECP */
10416
10417 tcp_probe_connectivity(ifp, conn_probe);
10418 return 0;
10419 }
10420
10421 /* for uuid.c */
10422 static int
get_ether_index(int * ret_other_index)10423 get_ether_index(int * ret_other_index)
10424 {
10425 struct ifnet *ifp;
10426 int en0_index = 0;
10427 int other_en_index = 0;
10428 int any_ether_index = 0;
10429 short best_unit = 0;
10430
10431 *ret_other_index = 0;
10432 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
10433 /*
10434 * find en0, or if not en0, the lowest unit en*, and if not
10435 * that, any ethernet
10436 */
10437 ifnet_lock_shared(ifp);
10438 if (strcmp(ifp->if_name, "en") == 0) {
10439 if (ifp->if_unit == 0) {
10440 /* found en0, we're done */
10441 en0_index = ifp->if_index;
10442 ifnet_lock_done(ifp);
10443 break;
10444 }
10445 if (other_en_index == 0 || ifp->if_unit < best_unit) {
10446 other_en_index = ifp->if_index;
10447 best_unit = ifp->if_unit;
10448 }
10449 } else if (ifp->if_type == IFT_ETHER && any_ether_index == 0) {
10450 any_ether_index = ifp->if_index;
10451 }
10452 ifnet_lock_done(ifp);
10453 }
10454 if (en0_index == 0) {
10455 if (other_en_index != 0) {
10456 *ret_other_index = other_en_index;
10457 } else if (any_ether_index != 0) {
10458 *ret_other_index = any_ether_index;
10459 }
10460 }
10461 return en0_index;
10462 }
10463
10464 int
uuid_get_ethernet(u_int8_t * node)10465 uuid_get_ethernet(u_int8_t *node)
10466 {
10467 static int en0_index;
10468 struct ifnet *ifp;
10469 int other_index = 0;
10470 int the_index = 0;
10471 int ret;
10472
10473 ifnet_head_lock_shared();
10474 if (en0_index == 0 || ifindex2ifnet[en0_index] == NULL) {
10475 en0_index = get_ether_index(&other_index);
10476 }
10477 if (en0_index != 0) {
10478 the_index = en0_index;
10479 } else if (other_index != 0) {
10480 the_index = other_index;
10481 }
10482 if (the_index != 0) {
10483 struct dlil_ifnet *dl_if;
10484
10485 ifp = ifindex2ifnet[the_index];
10486 VERIFY(ifp != NULL);
10487 dl_if = (struct dlil_ifnet *)ifp;
10488 if (dl_if->dl_if_permanent_ether_is_set != 0) {
10489 /*
10490 * Use the permanent ethernet address if it is
10491 * available because it will never change.
10492 */
10493 memcpy(node, dl_if->dl_if_permanent_ether,
10494 ETHER_ADDR_LEN);
10495 } else {
10496 memcpy(node, IF_LLADDR(ifp), ETHER_ADDR_LEN);
10497 }
10498 ret = 0;
10499 } else {
10500 ret = -1;
10501 }
10502 ifnet_head_done();
10503 return ret;
10504 }
10505
10506 static int
10507 sysctl_rxpoll SYSCTL_HANDLER_ARGS
10508 {
10509 #pragma unused(arg1, arg2)
10510 uint32_t i;
10511 int err;
10512
10513 i = if_rxpoll;
10514
10515 err = sysctl_handle_int(oidp, &i, 0, req);
10516 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10517 return err;
10518 }
10519
10520 if (net_rxpoll == 0) {
10521 return ENXIO;
10522 }
10523
10524 if_rxpoll = i;
10525 return err;
10526 }
10527
10528 static int
10529 sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
10530 {
10531 #pragma unused(arg1, arg2)
10532 uint64_t q;
10533 int err;
10534
10535 q = if_rxpoll_mode_holdtime;
10536
10537 err = sysctl_handle_quad(oidp, &q, 0, req);
10538 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10539 return err;
10540 }
10541
10542 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) {
10543 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
10544 }
10545
10546 if_rxpoll_mode_holdtime = q;
10547
10548 return err;
10549 }
10550
10551 static int
10552 sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
10553 {
10554 #pragma unused(arg1, arg2)
10555 uint64_t q;
10556 int err;
10557
10558 q = if_rxpoll_sample_holdtime;
10559
10560 err = sysctl_handle_quad(oidp, &q, 0, req);
10561 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10562 return err;
10563 }
10564
10565 if (q < IF_RXPOLL_SAMPLETIME_MIN) {
10566 q = IF_RXPOLL_SAMPLETIME_MIN;
10567 }
10568
10569 if_rxpoll_sample_holdtime = q;
10570
10571 return err;
10572 }
10573
10574 static int
10575 sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
10576 {
10577 #pragma unused(arg1, arg2)
10578 uint64_t q;
10579 int err;
10580
10581 q = if_rxpoll_interval_time;
10582
10583 err = sysctl_handle_quad(oidp, &q, 0, req);
10584 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10585 return err;
10586 }
10587
10588 if (q < IF_RXPOLL_INTERVALTIME_MIN) {
10589 q = IF_RXPOLL_INTERVALTIME_MIN;
10590 }
10591
10592 if_rxpoll_interval_time = q;
10593
10594 return err;
10595 }
10596
10597 static int
10598 sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
10599 {
10600 #pragma unused(arg1, arg2)
10601 uint32_t i;
10602 int err;
10603
10604 i = if_sysctl_rxpoll_wlowat;
10605
10606 err = sysctl_handle_int(oidp, &i, 0, req);
10607 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10608 return err;
10609 }
10610
10611 if (i == 0 || i >= if_sysctl_rxpoll_whiwat) {
10612 return EINVAL;
10613 }
10614
10615 if_sysctl_rxpoll_wlowat = i;
10616 return err;
10617 }
10618
10619 static int
10620 sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
10621 {
10622 #pragma unused(arg1, arg2)
10623 uint32_t i;
10624 int err;
10625
10626 i = if_sysctl_rxpoll_whiwat;
10627
10628 err = sysctl_handle_int(oidp, &i, 0, req);
10629 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10630 return err;
10631 }
10632
10633 if (i <= if_sysctl_rxpoll_wlowat) {
10634 return EINVAL;
10635 }
10636
10637 if_sysctl_rxpoll_whiwat = i;
10638 return err;
10639 }
10640
10641 static int
10642 sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
10643 {
10644 #pragma unused(arg1, arg2)
10645 int i, err;
10646
10647 i = if_sndq_maxlen;
10648
10649 err = sysctl_handle_int(oidp, &i, 0, req);
10650 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10651 return err;
10652 }
10653
10654 if (i < IF_SNDQ_MINLEN) {
10655 i = IF_SNDQ_MINLEN;
10656 }
10657
10658 if_sndq_maxlen = i;
10659 return err;
10660 }
10661
10662 static int
10663 sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
10664 {
10665 #pragma unused(arg1, arg2)
10666 int i, err;
10667
10668 i = if_rcvq_maxlen;
10669
10670 err = sysctl_handle_int(oidp, &i, 0, req);
10671 if (err != 0 || req->newptr == USER_ADDR_NULL) {
10672 return err;
10673 }
10674
10675 if (i < IF_RCVQ_MINLEN) {
10676 i = IF_RCVQ_MINLEN;
10677 }
10678
10679 if_rcvq_maxlen = i;
10680 return err;
10681 }
10682
10683 int
dlil_node_present(struct ifnet * ifp,struct sockaddr * sa,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])10684 dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
10685 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
10686 {
10687 struct kev_dl_node_presence kev;
10688 struct sockaddr_dl *sdl;
10689 struct sockaddr_in6 *sin6;
10690 int ret = 0;
10691
10692 VERIFY(ifp);
10693 VERIFY(sa);
10694 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
10695
10696 bzero(&kev, sizeof(kev));
10697 sin6 = &kev.sin6_node_address;
10698 sdl = &kev.sdl_node_address;
10699 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
10700 kev.rssi = rssi;
10701 kev.link_quality_metric = lqm;
10702 kev.node_proximity_metric = npm;
10703 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
10704
10705 ret = nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
10706 if (ret == 0 || ret == EEXIST) {
10707 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
10708 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
10709 if (err != 0) {
10710 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with"
10711 "error %d\n", __func__, err);
10712 }
10713 }
10714
10715 if (ret == EEXIST) {
10716 ret = 0;
10717 }
10718 return ret;
10719 }
10720
10721 void
dlil_node_absent(struct ifnet * ifp,struct sockaddr * sa)10722 dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
10723 {
10724 struct kev_dl_node_absence kev = {};
10725 struct sockaddr_in6 *kev_sin6 = NULL;
10726 struct sockaddr_dl *kev_sdl = NULL;
10727 int error = 0;
10728
10729 VERIFY(ifp != NULL);
10730 VERIFY(sa != NULL);
10731 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
10732
10733 kev_sin6 = &kev.sin6_node_address;
10734 kev_sdl = &kev.sdl_node_address;
10735
10736 if (sa->sa_family == AF_INET6) {
10737 /*
10738 * If IPv6 address is given, get the link layer
10739 * address from what was cached in the neighbor cache
10740 */
10741 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
10742 bcopy(sa, kev_sin6, sa->sa_len);
10743 error = nd6_alt_node_absent(ifp, kev_sin6, kev_sdl);
10744 } else {
10745 /*
10746 * If passed address is AF_LINK type, derive the address
10747 * based on the link address.
10748 */
10749 nd6_alt_node_addr_decompose(ifp, sa, kev_sdl, kev_sin6);
10750 error = nd6_alt_node_absent(ifp, kev_sin6, NULL);
10751 }
10752
10753 if (error == 0) {
10754 kev_sdl->sdl_type = ifp->if_type;
10755 kev_sdl->sdl_index = ifp->if_index;
10756
10757 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
10758 &kev.link_data, sizeof(kev), FALSE);
10759 }
10760 }
10761
10762 int
dlil_node_present_v2(struct ifnet * ifp,struct sockaddr * sa,struct sockaddr_dl * sdl,int32_t rssi,int lqm,int npm,u_int8_t srvinfo[48])10763 dlil_node_present_v2(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdl,
10764 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
10765 {
10766 struct kev_dl_node_presence kev = {};
10767 struct sockaddr_dl *kev_sdl = NULL;
10768 struct sockaddr_in6 *kev_sin6 = NULL;
10769 int ret = 0;
10770
10771 VERIFY(ifp != NULL);
10772 VERIFY(sa != NULL && sdl != NULL);
10773 VERIFY(sa->sa_family == AF_INET6 && sdl->sdl_family == AF_LINK);
10774
10775 kev_sin6 = &kev.sin6_node_address;
10776 kev_sdl = &kev.sdl_node_address;
10777
10778 VERIFY(sdl->sdl_len <= sizeof(*kev_sdl));
10779 bcopy(sdl, kev_sdl, sdl->sdl_len);
10780 kev_sdl->sdl_type = ifp->if_type;
10781 kev_sdl->sdl_index = ifp->if_index;
10782
10783 VERIFY(sa->sa_len <= sizeof(*kev_sin6));
10784 bcopy(sa, kev_sin6, sa->sa_len);
10785
10786 kev.rssi = rssi;
10787 kev.link_quality_metric = lqm;
10788 kev.node_proximity_metric = npm;
10789 bcopy(srvinfo, kev.node_service_info, sizeof(kev.node_service_info));
10790
10791 ret = nd6_alt_node_present(ifp, SIN6(sa), sdl, rssi, lqm, npm);
10792 if (ret == 0 || ret == EEXIST) {
10793 int err = dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
10794 &kev.link_data, sizeof(kev), (ret == EEXIST) ? TRUE : FALSE);
10795 if (err != 0) {
10796 log(LOG_ERR, "%s: Post DL_NODE_PRESENCE failed with error %d\n", __func__, err);
10797 }
10798 }
10799
10800 if (ret == EEXIST) {
10801 ret = 0;
10802 }
10803 return ret;
10804 }
10805
10806 const void *
dlil_ifaddr_bytes(const struct sockaddr_dl * sdl,size_t * sizep,kauth_cred_t * credp)10807 dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
10808 kauth_cred_t *credp)
10809 {
10810 const u_int8_t *bytes;
10811 size_t size;
10812
10813 bytes = CONST_LLADDR(sdl);
10814 size = sdl->sdl_alen;
10815
10816 #if CONFIG_MACF
10817 if (dlil_lladdr_ckreq) {
10818 switch (sdl->sdl_type) {
10819 case IFT_ETHER:
10820 case IFT_IEEE1394:
10821 break;
10822 default:
10823 credp = NULL;
10824 break;
10825 }
10826 ;
10827
10828 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
10829 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
10830 [0] = 2
10831 };
10832
10833 bytes = unspec;
10834 }
10835 }
10836 #else
10837 #pragma unused(credp)
10838 #endif
10839
10840 if (sizep != NULL) {
10841 *sizep = size;
10842 }
10843 return bytes;
10844 }
10845
10846 void
dlil_report_issues(struct ifnet * ifp,u_int8_t modid[DLIL_MODIDLEN],u_int8_t info[DLIL_MODARGLEN])10847 dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
10848 u_int8_t info[DLIL_MODARGLEN])
10849 {
10850 struct kev_dl_issues kev;
10851 struct timeval tv;
10852
10853 VERIFY(ifp != NULL);
10854 VERIFY(modid != NULL);
10855 _CASSERT(sizeof(kev.modid) == DLIL_MODIDLEN);
10856 _CASSERT(sizeof(kev.info) == DLIL_MODARGLEN);
10857
10858 bzero(&kev, sizeof(kev));
10859
10860 microtime(&tv);
10861 kev.timestamp = tv.tv_sec;
10862 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
10863 if (info != NULL) {
10864 bcopy(info, &kev.info, DLIL_MODARGLEN);
10865 }
10866
10867 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
10868 &kev.link_data, sizeof(kev), FALSE);
10869 }
10870
10871 errno_t
ifnet_getset_opportunistic(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)10872 ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
10873 struct proc *p)
10874 {
10875 u_int32_t level = IFNET_THROTTLE_OFF;
10876 errno_t result = 0;
10877
10878 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
10879
10880 if (cmd == SIOCSIFOPPORTUNISTIC) {
10881 /*
10882 * XXX: Use priv_check_cred() instead of root check?
10883 */
10884 if ((result = proc_suser(p)) != 0) {
10885 return result;
10886 }
10887
10888 if (ifr->ifr_opportunistic.ifo_flags ==
10889 IFRIFOF_BLOCK_OPPORTUNISTIC) {
10890 level = IFNET_THROTTLE_OPPORTUNISTIC;
10891 } else if (ifr->ifr_opportunistic.ifo_flags == 0) {
10892 level = IFNET_THROTTLE_OFF;
10893 } else {
10894 result = EINVAL;
10895 }
10896
10897 if (result == 0) {
10898 result = ifnet_set_throttle(ifp, level);
10899 }
10900 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
10901 ifr->ifr_opportunistic.ifo_flags = 0;
10902 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
10903 ifr->ifr_opportunistic.ifo_flags |=
10904 IFRIFOF_BLOCK_OPPORTUNISTIC;
10905 }
10906 }
10907
10908 /*
10909 * Return the count of current opportunistic connections
10910 * over the interface.
10911 */
10912 if (result == 0) {
10913 uint32_t flags = 0;
10914 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
10915 INPCB_OPPORTUNISTIC_SETCMD : 0;
10916 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
10917 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
10918 ifr->ifr_opportunistic.ifo_inuse =
10919 udp_count_opportunistic(ifp->if_index, flags) +
10920 tcp_count_opportunistic(ifp->if_index, flags);
10921 }
10922
10923 if (result == EALREADY) {
10924 result = 0;
10925 }
10926
10927 return result;
10928 }
10929
10930 int
ifnet_get_throttle(struct ifnet * ifp,u_int32_t * level)10931 ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
10932 {
10933 struct ifclassq *ifq;
10934 int err = 0;
10935
10936 if (!(ifp->if_eflags & IFEF_TXSTART)) {
10937 return ENXIO;
10938 }
10939
10940 *level = IFNET_THROTTLE_OFF;
10941
10942 ifq = ifp->if_snd;
10943 IFCQ_LOCK(ifq);
10944 /* Throttling works only for IFCQ, not ALTQ instances */
10945 if (IFCQ_IS_ENABLED(ifq)) {
10946 cqrq_throttle_t req = { 0, IFNET_THROTTLE_OFF };
10947
10948 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
10949 *level = req.level;
10950 }
10951 IFCQ_UNLOCK(ifq);
10952
10953 return err;
10954 }
10955
10956 int
ifnet_set_throttle(struct ifnet * ifp,u_int32_t level)10957 ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
10958 {
10959 struct ifclassq *ifq;
10960 int err = 0;
10961
10962 if (!(ifp->if_eflags & IFEF_TXSTART)) {
10963 return ENXIO;
10964 }
10965
10966 ifq = ifp->if_snd;
10967
10968 switch (level) {
10969 case IFNET_THROTTLE_OFF:
10970 case IFNET_THROTTLE_OPPORTUNISTIC:
10971 break;
10972 default:
10973 return EINVAL;
10974 }
10975
10976 IFCQ_LOCK(ifq);
10977 if (IFCQ_IS_ENABLED(ifq)) {
10978 cqrq_throttle_t req = { 1, level };
10979
10980 err = fq_if_request_classq(ifq, CLASSQRQ_THROTTLE, &req);
10981 }
10982 IFCQ_UNLOCK(ifq);
10983
10984 if (err == 0) {
10985 DLIL_PRINTF("%s: throttling level set to %d\n", if_name(ifp),
10986 level);
10987 #if NECP
10988 necp_update_all_clients();
10989 #endif /* NECP */
10990 if (level == IFNET_THROTTLE_OFF) {
10991 ifnet_start(ifp);
10992 }
10993 }
10994
10995 return err;
10996 }
10997
10998 errno_t
ifnet_getset_log(ifnet_t ifp,u_long cmd,struct ifreq * ifr,struct proc * p)10999 ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
11000 struct proc *p)
11001 {
11002 #pragma unused(p)
11003 errno_t result = 0;
11004 uint32_t flags;
11005 int level, category, subcategory;
11006
11007 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
11008
11009 if (cmd == SIOCSIFLOG) {
11010 if ((result = priv_check_cred(kauth_cred_get(),
11011 PRIV_NET_INTERFACE_CONTROL, 0)) != 0) {
11012 return result;
11013 }
11014
11015 level = ifr->ifr_log.ifl_level;
11016 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) {
11017 result = EINVAL;
11018 }
11019
11020 flags = ifr->ifr_log.ifl_flags;
11021 if ((flags &= IFNET_LOGF_MASK) == 0) {
11022 result = EINVAL;
11023 }
11024
11025 category = ifr->ifr_log.ifl_category;
11026 subcategory = ifr->ifr_log.ifl_subcategory;
11027
11028 if (result == 0) {
11029 result = ifnet_set_log(ifp, level, flags,
11030 category, subcategory);
11031 }
11032 } else {
11033 result = ifnet_get_log(ifp, &level, &flags, &category,
11034 &subcategory);
11035 if (result == 0) {
11036 ifr->ifr_log.ifl_level = level;
11037 ifr->ifr_log.ifl_flags = flags;
11038 ifr->ifr_log.ifl_category = category;
11039 ifr->ifr_log.ifl_subcategory = subcategory;
11040 }
11041 }
11042
11043 return result;
11044 }
11045
11046 int
ifnet_set_log(struct ifnet * ifp,int32_t level,uint32_t flags,int32_t category,int32_t subcategory)11047 ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
11048 int32_t category, int32_t subcategory)
11049 {
11050 int err = 0;
11051
11052 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
11053 VERIFY(flags & IFNET_LOGF_MASK);
11054
11055 /*
11056 * The logging level applies to all facilities; make sure to
11057 * update them all with the most current level.
11058 */
11059 flags |= ifp->if_log.flags;
11060
11061 if (ifp->if_output_ctl != NULL) {
11062 struct ifnet_log_params l;
11063
11064 bzero(&l, sizeof(l));
11065 l.level = level;
11066 l.flags = flags;
11067 l.flags &= ~IFNET_LOGF_DLIL;
11068 l.category = category;
11069 l.subcategory = subcategory;
11070
11071 /* Send this request to lower layers */
11072 if (l.flags != 0) {
11073 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
11074 sizeof(l), &l);
11075 }
11076 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
11077 /*
11078 * If targeted to the lower layers without an output
11079 * control callback registered on the interface, just
11080 * silently ignore facilities other than ours.
11081 */
11082 flags &= IFNET_LOGF_DLIL;
11083 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL))) {
11084 level = 0;
11085 }
11086 }
11087
11088 if (err == 0) {
11089 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) {
11090 ifp->if_log.flags = 0;
11091 } else {
11092 ifp->if_log.flags |= flags;
11093 }
11094
11095 log(LOG_INFO, "%s: logging level set to %d flags=%b "
11096 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
11097 ifp->if_log.level, ifp->if_log.flags,
11098 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
11099 category, subcategory);
11100 }
11101
11102 return err;
11103 }
11104
11105 int
ifnet_get_log(struct ifnet * ifp,int32_t * level,uint32_t * flags,int32_t * category,int32_t * subcategory)11106 ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
11107 int32_t *category, int32_t *subcategory)
11108 {
11109 if (level != NULL) {
11110 *level = ifp->if_log.level;
11111 }
11112 if (flags != NULL) {
11113 *flags = ifp->if_log.flags;
11114 }
11115 if (category != NULL) {
11116 *category = ifp->if_log.category;
11117 }
11118 if (subcategory != NULL) {
11119 *subcategory = ifp->if_log.subcategory;
11120 }
11121
11122 return 0;
11123 }
11124
11125 int
ifnet_notify_address(struct ifnet * ifp,int af)11126 ifnet_notify_address(struct ifnet *ifp, int af)
11127 {
11128 struct ifnet_notify_address_params na;
11129
11130 #if PF
11131 (void) pf_ifaddr_hook(ifp);
11132 #endif /* PF */
11133
11134 if (ifp->if_output_ctl == NULL) {
11135 return EOPNOTSUPP;
11136 }
11137
11138 bzero(&na, sizeof(na));
11139 na.address_family = (sa_family_t)af;
11140
11141 return ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
11142 sizeof(na), &na);
11143 }
11144
11145 errno_t
ifnet_flowid(struct ifnet * ifp,uint32_t * flowid)11146 ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
11147 {
11148 if (ifp == NULL || flowid == NULL) {
11149 return EINVAL;
11150 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
11151 !IF_FULLY_ATTACHED(ifp)) {
11152 return ENXIO;
11153 }
11154
11155 *flowid = ifp->if_flowhash;
11156
11157 return 0;
11158 }
11159
11160 errno_t
ifnet_disable_output(struct ifnet * ifp)11161 ifnet_disable_output(struct ifnet *ifp)
11162 {
11163 int err;
11164
11165 if (ifp == NULL) {
11166 return EINVAL;
11167 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
11168 !IF_FULLY_ATTACHED(ifp)) {
11169 return ENXIO;
11170 }
11171
11172 if ((err = ifnet_fc_add(ifp)) == 0) {
11173 lck_mtx_lock_spin(&ifp->if_start_lock);
11174 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
11175 lck_mtx_unlock(&ifp->if_start_lock);
11176 }
11177 return err;
11178 }
11179
11180 errno_t
ifnet_enable_output(struct ifnet * ifp)11181 ifnet_enable_output(struct ifnet *ifp)
11182 {
11183 if (ifp == NULL) {
11184 return EINVAL;
11185 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
11186 !IF_FULLY_ATTACHED(ifp)) {
11187 return ENXIO;
11188 }
11189
11190 ifnet_start_common(ifp, TRUE);
11191 return 0;
11192 }
11193
11194 void
ifnet_flowadv(uint32_t flowhash)11195 ifnet_flowadv(uint32_t flowhash)
11196 {
11197 struct ifnet_fc_entry *ifce;
11198 struct ifnet *ifp;
11199
11200 ifce = ifnet_fc_get(flowhash);
11201 if (ifce == NULL) {
11202 return;
11203 }
11204
11205 VERIFY(ifce->ifce_ifp != NULL);
11206 ifp = ifce->ifce_ifp;
11207
11208 /* flow hash gets recalculated per attach, so check */
11209 if (ifnet_is_attached(ifp, 1)) {
11210 if (ifp->if_flowhash == flowhash) {
11211 (void) ifnet_enable_output(ifp);
11212 }
11213 ifnet_decr_iorefcnt(ifp);
11214 }
11215 ifnet_fc_entry_free(ifce);
11216 }
11217
11218 /*
11219 * Function to compare ifnet_fc_entries in ifnet flow control tree
11220 */
11221 static inline int
ifce_cmp(const struct ifnet_fc_entry * fc1,const struct ifnet_fc_entry * fc2)11222 ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
11223 {
11224 return fc1->ifce_flowhash - fc2->ifce_flowhash;
11225 }
11226
11227 static int
ifnet_fc_add(struct ifnet * ifp)11228 ifnet_fc_add(struct ifnet *ifp)
11229 {
11230 struct ifnet_fc_entry keyfc, *ifce;
11231 uint32_t flowhash;
11232
11233 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
11234 VERIFY(ifp->if_flowhash != 0);
11235 flowhash = ifp->if_flowhash;
11236
11237 bzero(&keyfc, sizeof(keyfc));
11238 keyfc.ifce_flowhash = flowhash;
11239
11240 lck_mtx_lock_spin(&ifnet_fc_lock);
11241 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
11242 if (ifce != NULL && ifce->ifce_ifp == ifp) {
11243 /* Entry is already in ifnet_fc_tree, return */
11244 lck_mtx_unlock(&ifnet_fc_lock);
11245 return 0;
11246 }
11247
11248 if (ifce != NULL) {
11249 /*
11250 * There is a different fc entry with the same flow hash
11251 * but different ifp pointer. There can be a collision
11252 * on flow hash but the probability is low. Let's just
11253 * avoid adding a second one when there is a collision.
11254 */
11255 lck_mtx_unlock(&ifnet_fc_lock);
11256 return EAGAIN;
11257 }
11258
11259 /* become regular mutex */
11260 lck_mtx_convert_spin(&ifnet_fc_lock);
11261
11262 ifce = zalloc_flags(ifnet_fc_zone, Z_WAITOK | Z_ZERO);
11263 ifce->ifce_flowhash = flowhash;
11264 ifce->ifce_ifp = ifp;
11265
11266 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
11267 lck_mtx_unlock(&ifnet_fc_lock);
11268 return 0;
11269 }
11270
11271 static struct ifnet_fc_entry *
ifnet_fc_get(uint32_t flowhash)11272 ifnet_fc_get(uint32_t flowhash)
11273 {
11274 struct ifnet_fc_entry keyfc, *ifce;
11275 struct ifnet *ifp;
11276
11277 bzero(&keyfc, sizeof(keyfc));
11278 keyfc.ifce_flowhash = flowhash;
11279
11280 lck_mtx_lock_spin(&ifnet_fc_lock);
11281 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
11282 if (ifce == NULL) {
11283 /* Entry is not present in ifnet_fc_tree, return */
11284 lck_mtx_unlock(&ifnet_fc_lock);
11285 return NULL;
11286 }
11287
11288 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
11289
11290 VERIFY(ifce->ifce_ifp != NULL);
11291 ifp = ifce->ifce_ifp;
11292
11293 /* become regular mutex */
11294 lck_mtx_convert_spin(&ifnet_fc_lock);
11295
11296 if (!ifnet_is_attached(ifp, 0)) {
11297 /*
11298 * This ifp is not attached or in the process of being
11299 * detached; just don't process it.
11300 */
11301 ifnet_fc_entry_free(ifce);
11302 ifce = NULL;
11303 }
11304 lck_mtx_unlock(&ifnet_fc_lock);
11305
11306 return ifce;
11307 }
11308
11309 static void
ifnet_fc_entry_free(struct ifnet_fc_entry * ifce)11310 ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
11311 {
11312 zfree(ifnet_fc_zone, ifce);
11313 }
11314
11315 static uint32_t
ifnet_calc_flowhash(struct ifnet * ifp)11316 ifnet_calc_flowhash(struct ifnet *ifp)
11317 {
11318 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
11319 uint32_t flowhash = 0;
11320
11321 if (ifnet_flowhash_seed == 0) {
11322 ifnet_flowhash_seed = RandomULong();
11323 }
11324
11325 bzero(&fh, sizeof(fh));
11326
11327 (void) snprintf(fh.ifk_name, sizeof(fh.ifk_name), "%s", ifp->if_name);
11328 fh.ifk_unit = ifp->if_unit;
11329 fh.ifk_flags = ifp->if_flags;
11330 fh.ifk_eflags = ifp->if_eflags;
11331 fh.ifk_capabilities = ifp->if_capabilities;
11332 fh.ifk_capenable = ifp->if_capenable;
11333 fh.ifk_output_sched_model = ifp->if_output_sched_model;
11334 fh.ifk_rand1 = RandomULong();
11335 fh.ifk_rand2 = RandomULong();
11336
11337 try_again:
11338 flowhash = net_flowhash(&fh, sizeof(fh), ifnet_flowhash_seed);
11339 if (flowhash == 0) {
11340 /* try to get a non-zero flowhash */
11341 ifnet_flowhash_seed = RandomULong();
11342 goto try_again;
11343 }
11344
11345 return flowhash;
11346 }
11347
11348 int
ifnet_set_netsignature(struct ifnet * ifp,uint8_t family,uint8_t len,uint16_t flags,uint8_t * data)11349 ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
11350 uint16_t flags, uint8_t *data)
11351 {
11352 #pragma unused(flags)
11353 int error = 0;
11354
11355 switch (family) {
11356 case AF_INET:
11357 if_inetdata_lock_exclusive(ifp);
11358 if (IN_IFEXTRA(ifp) != NULL) {
11359 if (len == 0) {
11360 /* Allow clearing the signature */
11361 IN_IFEXTRA(ifp)->netsig_len = 0;
11362 bzero(IN_IFEXTRA(ifp)->netsig,
11363 sizeof(IN_IFEXTRA(ifp)->netsig));
11364 if_inetdata_lock_done(ifp);
11365 break;
11366 } else if (len > sizeof(IN_IFEXTRA(ifp)->netsig)) {
11367 error = EINVAL;
11368 if_inetdata_lock_done(ifp);
11369 break;
11370 }
11371 IN_IFEXTRA(ifp)->netsig_len = len;
11372 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
11373 } else {
11374 error = ENOMEM;
11375 }
11376 if_inetdata_lock_done(ifp);
11377 break;
11378
11379 case AF_INET6:
11380 if_inet6data_lock_exclusive(ifp);
11381 if (IN6_IFEXTRA(ifp) != NULL) {
11382 if (len == 0) {
11383 /* Allow clearing the signature */
11384 IN6_IFEXTRA(ifp)->netsig_len = 0;
11385 bzero(IN6_IFEXTRA(ifp)->netsig,
11386 sizeof(IN6_IFEXTRA(ifp)->netsig));
11387 if_inet6data_lock_done(ifp);
11388 break;
11389 } else if (len > sizeof(IN6_IFEXTRA(ifp)->netsig)) {
11390 error = EINVAL;
11391 if_inet6data_lock_done(ifp);
11392 break;
11393 }
11394 IN6_IFEXTRA(ifp)->netsig_len = len;
11395 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
11396 } else {
11397 error = ENOMEM;
11398 }
11399 if_inet6data_lock_done(ifp);
11400 break;
11401
11402 default:
11403 error = EINVAL;
11404 break;
11405 }
11406
11407 return error;
11408 }
11409
11410 int
ifnet_get_netsignature(struct ifnet * ifp,uint8_t family,uint8_t * len,uint16_t * flags,uint8_t * data)11411 ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
11412 uint16_t *flags, uint8_t *data)
11413 {
11414 int error = 0;
11415
11416 if (ifp == NULL || len == NULL || data == NULL) {
11417 return EINVAL;
11418 }
11419
11420 switch (family) {
11421 case AF_INET:
11422 if_inetdata_lock_shared(ifp);
11423 if (IN_IFEXTRA(ifp) != NULL) {
11424 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
11425 error = EINVAL;
11426 if_inetdata_lock_done(ifp);
11427 break;
11428 }
11429 if ((*len = (uint8_t)IN_IFEXTRA(ifp)->netsig_len) > 0) {
11430 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
11431 } else {
11432 error = ENOENT;
11433 }
11434 } else {
11435 error = ENOMEM;
11436 }
11437 if_inetdata_lock_done(ifp);
11438 break;
11439
11440 case AF_INET6:
11441 if_inet6data_lock_shared(ifp);
11442 if (IN6_IFEXTRA(ifp) != NULL) {
11443 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
11444 error = EINVAL;
11445 if_inet6data_lock_done(ifp);
11446 break;
11447 }
11448 if ((*len = (uint8_t)IN6_IFEXTRA(ifp)->netsig_len) > 0) {
11449 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
11450 } else {
11451 error = ENOENT;
11452 }
11453 } else {
11454 error = ENOMEM;
11455 }
11456 if_inet6data_lock_done(ifp);
11457 break;
11458
11459 default:
11460 error = EINVAL;
11461 break;
11462 }
11463
11464 if (error == 0 && flags != NULL) {
11465 *flags = 0;
11466 }
11467
11468 return error;
11469 }
11470
11471 int
ifnet_set_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * prefixes)11472 ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
11473 {
11474 int i, error = 0, one_set = 0;
11475
11476 if_inet6data_lock_exclusive(ifp);
11477
11478 if (IN6_IFEXTRA(ifp) == NULL) {
11479 error = ENOMEM;
11480 goto out;
11481 }
11482
11483 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
11484 uint32_t prefix_len =
11485 prefixes[i].prefix_len;
11486 struct in6_addr *prefix =
11487 &prefixes[i].ipv6_prefix;
11488
11489 if (prefix_len == 0) {
11490 clat_log0((LOG_DEBUG,
11491 "NAT64 prefixes purged from Interface %s\n",
11492 if_name(ifp)));
11493 /* Allow clearing the signature */
11494 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
11495 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
11496 sizeof(struct in6_addr));
11497
11498 continue;
11499 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
11500 prefix_len != NAT64_PREFIX_LEN_40 &&
11501 prefix_len != NAT64_PREFIX_LEN_48 &&
11502 prefix_len != NAT64_PREFIX_LEN_56 &&
11503 prefix_len != NAT64_PREFIX_LEN_64 &&
11504 prefix_len != NAT64_PREFIX_LEN_96) {
11505 clat_log0((LOG_DEBUG,
11506 "NAT64 prefixlen is incorrect %d\n", prefix_len));
11507 error = EINVAL;
11508 goto out;
11509 }
11510
11511 if (IN6_IS_SCOPE_EMBED(prefix)) {
11512 clat_log0((LOG_DEBUG,
11513 "NAT64 prefix has interface/link local scope.\n"));
11514 error = EINVAL;
11515 goto out;
11516 }
11517
11518 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
11519 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
11520 sizeof(struct in6_addr));
11521 clat_log0((LOG_DEBUG,
11522 "NAT64 prefix set to %s with prefixlen: %d\n",
11523 ip6_sprintf(prefix), prefix_len));
11524 one_set = 1;
11525 }
11526
11527 out:
11528 if_inet6data_lock_done(ifp);
11529
11530 if (error == 0 && one_set != 0) {
11531 necp_update_all_clients();
11532 }
11533
11534 return error;
11535 }
11536
11537 int
ifnet_get_nat64prefix(struct ifnet * ifp,struct ipv6_prefix * prefixes)11538 ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
11539 {
11540 int i, found_one = 0, error = 0;
11541
11542 if (ifp == NULL) {
11543 return EINVAL;
11544 }
11545
11546 if_inet6data_lock_shared(ifp);
11547
11548 if (IN6_IFEXTRA(ifp) == NULL) {
11549 error = ENOMEM;
11550 goto out;
11551 }
11552
11553 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
11554 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0) {
11555 found_one = 1;
11556 }
11557 }
11558
11559 if (found_one == 0) {
11560 error = ENOENT;
11561 goto out;
11562 }
11563
11564 if (prefixes) {
11565 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
11566 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
11567 }
11568
11569 out:
11570 if_inet6data_lock_done(ifp);
11571
11572 return error;
11573 }
11574
11575 __attribute__((noinline))
11576 static void
dlil_output_cksum_dbg(struct ifnet * ifp,struct mbuf * m,uint32_t hoff,protocol_family_t pf)11577 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
11578 protocol_family_t pf)
11579 {
11580 #pragma unused(ifp)
11581 uint32_t did_sw;
11582
11583 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
11584 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
11585 return;
11586 }
11587
11588 switch (pf) {
11589 case PF_INET:
11590 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
11591 if (did_sw & CSUM_DELAY_IP) {
11592 hwcksum_dbg_finalized_hdr++;
11593 }
11594 if (did_sw & CSUM_DELAY_DATA) {
11595 hwcksum_dbg_finalized_data++;
11596 }
11597 break;
11598 case PF_INET6:
11599 /*
11600 * Checksum offload should not have been enabled when
11601 * extension headers exist; that also means that we
11602 * cannot force-finalize packets with extension headers.
11603 * Indicate to the callee should it skip such case by
11604 * setting optlen to -1.
11605 */
11606 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
11607 m->m_pkthdr.csum_flags);
11608 if (did_sw & CSUM_DELAY_IPV6_DATA) {
11609 hwcksum_dbg_finalized_data++;
11610 }
11611 break;
11612 default:
11613 return;
11614 }
11615 }
11616
11617 static void
dlil_input_cksum_dbg(struct ifnet * ifp,struct mbuf * m,char * frame_header,protocol_family_t pf)11618 dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
11619 protocol_family_t pf)
11620 {
11621 uint16_t sum = 0;
11622 uint32_t hlen;
11623
11624 if (frame_header == NULL ||
11625 frame_header < (char *)mbuf_datastart(m) ||
11626 frame_header > (char *)m->m_data) {
11627 DLIL_PRINTF("%s: frame header pointer 0x%llx out of range "
11628 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
11629 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
11630 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
11631 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
11632 (uint64_t)VM_KERNEL_ADDRPERM(m));
11633 return;
11634 }
11635 hlen = (uint32_t)(m->m_data - frame_header);
11636
11637 switch (pf) {
11638 case PF_INET:
11639 case PF_INET6:
11640 break;
11641 default:
11642 return;
11643 }
11644
11645 /*
11646 * Force partial checksum offload; useful to simulate cases
11647 * where the hardware does not support partial checksum offload,
11648 * in order to validate correctness throughout the layers above.
11649 */
11650 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
11651 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
11652
11653 if (foff > (uint32_t)m->m_pkthdr.len) {
11654 return;
11655 }
11656
11657 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
11658
11659 /* Compute 16-bit 1's complement sum from forced offset */
11660 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
11661
11662 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
11663 m->m_pkthdr.csum_rx_val = sum;
11664 m->m_pkthdr.csum_rx_start = (uint16_t)(foff + hlen);
11665
11666 hwcksum_dbg_partial_forced++;
11667 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
11668 }
11669
11670 /*
11671 * Partial checksum offload verification (and adjustment);
11672 * useful to validate and test cases where the hardware
11673 * supports partial checksum offload.
11674 */
11675 if ((m->m_pkthdr.csum_flags &
11676 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
11677 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
11678 uint32_t rxoff;
11679
11680 /* Start offset must begin after frame header */
11681 rxoff = m->m_pkthdr.csum_rx_start;
11682 if (hlen > rxoff) {
11683 hwcksum_dbg_bad_rxoff++;
11684 if (dlil_verbose) {
11685 DLIL_PRINTF("%s: partial cksum start offset %d "
11686 "is less than frame header length %d for "
11687 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
11688 (uint64_t)VM_KERNEL_ADDRPERM(m));
11689 }
11690 return;
11691 }
11692 rxoff -= hlen;
11693
11694 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
11695 /*
11696 * Compute the expected 16-bit 1's complement sum;
11697 * skip this if we've already computed it above
11698 * when partial checksum offload is forced.
11699 */
11700 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
11701
11702 /* Hardware or driver is buggy */
11703 if (sum != m->m_pkthdr.csum_rx_val) {
11704 hwcksum_dbg_bad_cksum++;
11705 if (dlil_verbose) {
11706 DLIL_PRINTF("%s: bad partial cksum value "
11707 "0x%x (expected 0x%x) for mbuf "
11708 "0x%llx [rx_start %d]\n",
11709 if_name(ifp),
11710 m->m_pkthdr.csum_rx_val, sum,
11711 (uint64_t)VM_KERNEL_ADDRPERM(m),
11712 m->m_pkthdr.csum_rx_start);
11713 }
11714 return;
11715 }
11716 }
11717 hwcksum_dbg_verified++;
11718
11719 /*
11720 * This code allows us to emulate various hardwares that
11721 * perform 16-bit 1's complement sum beginning at various
11722 * start offset values.
11723 */
11724 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
11725 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
11726
11727 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) {
11728 return;
11729 }
11730
11731 sum = m_adj_sum16(m, rxoff, aoff,
11732 m_pktlen(m) - aoff, sum);
11733
11734 m->m_pkthdr.csum_rx_val = sum;
11735 m->m_pkthdr.csum_rx_start = (uint16_t)(aoff + hlen);
11736
11737 hwcksum_dbg_adjusted++;
11738 }
11739 }
11740 }
11741
11742 static int
11743 sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
11744 {
11745 #pragma unused(arg1, arg2)
11746 u_int32_t i;
11747 int err;
11748
11749 i = hwcksum_dbg_mode;
11750
11751 err = sysctl_handle_int(oidp, &i, 0, req);
11752 if (err != 0 || req->newptr == USER_ADDR_NULL) {
11753 return err;
11754 }
11755
11756 if (hwcksum_dbg == 0) {
11757 return ENODEV;
11758 }
11759
11760 if ((i & ~HWCKSUM_DBG_MASK) != 0) {
11761 return EINVAL;
11762 }
11763
11764 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
11765
11766 return err;
11767 }
11768
11769 static int
11770 sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
11771 {
11772 #pragma unused(arg1, arg2)
11773 u_int32_t i;
11774 int err;
11775
11776 i = hwcksum_dbg_partial_rxoff_forced;
11777
11778 err = sysctl_handle_int(oidp, &i, 0, req);
11779 if (err != 0 || req->newptr == USER_ADDR_NULL) {
11780 return err;
11781 }
11782
11783 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
11784 return ENODEV;
11785 }
11786
11787 hwcksum_dbg_partial_rxoff_forced = i;
11788
11789 return err;
11790 }
11791
11792 static int
11793 sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
11794 {
11795 #pragma unused(arg1, arg2)
11796 u_int32_t i;
11797 int err;
11798
11799 i = hwcksum_dbg_partial_rxoff_adj;
11800
11801 err = sysctl_handle_int(oidp, &i, 0, req);
11802 if (err != 0 || req->newptr == USER_ADDR_NULL) {
11803 return err;
11804 }
11805
11806 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) {
11807 return ENODEV;
11808 }
11809
11810 hwcksum_dbg_partial_rxoff_adj = i;
11811
11812 return err;
11813 }
11814
11815 static int
11816 sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
11817 {
11818 #pragma unused(oidp, arg1, arg2)
11819 int err;
11820
11821 if (req->oldptr == USER_ADDR_NULL) {
11822 }
11823 if (req->newptr != USER_ADDR_NULL) {
11824 return EPERM;
11825 }
11826 err = SYSCTL_OUT(req, &tx_chain_len_stats,
11827 sizeof(struct chain_len_stats));
11828
11829 return err;
11830 }
11831
11832
11833 #if DEBUG || DEVELOPMENT
11834 /* Blob for sum16 verification */
11835 static uint8_t sumdata[] = {
11836 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
11837 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
11838 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
11839 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
11840 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
11841 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
11842 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
11843 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
11844 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
11845 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
11846 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
11847 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
11848 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
11849 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
11850 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
11851 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
11852 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
11853 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
11854 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
11855 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
11856 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
11857 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
11858 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
11859 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
11860 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
11861 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
11862 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
11863 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
11864 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
11865 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
11866 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
11867 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
11868 0xc8, 0x28, 0x02, 0x00, 0x00
11869 };
11870
11871 /* Precomputed 16-bit 1's complement sums for various spans of the above data */
11872 static struct {
11873 boolean_t init;
11874 uint16_t len;
11875 uint16_t sumr; /* reference */
11876 uint16_t sumrp; /* reference, precomputed */
11877 } sumtbl[] = {
11878 { FALSE, 0, 0, 0x0000 },
11879 { FALSE, 1, 0, 0x001f },
11880 { FALSE, 2, 0, 0x8b1f },
11881 { FALSE, 3, 0, 0x8b27 },
11882 { FALSE, 7, 0, 0x790e },
11883 { FALSE, 11, 0, 0xcb6d },
11884 { FALSE, 20, 0, 0x20dd },
11885 { FALSE, 27, 0, 0xbabd },
11886 { FALSE, 32, 0, 0xf3e8 },
11887 { FALSE, 37, 0, 0x197d },
11888 { FALSE, 43, 0, 0x9eae },
11889 { FALSE, 64, 0, 0x4678 },
11890 { FALSE, 127, 0, 0x9399 },
11891 { FALSE, 256, 0, 0xd147 },
11892 { FALSE, 325, 0, 0x0358 },
11893 };
11894 #define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
11895
11896 static void
dlil_verify_sum16(void)11897 dlil_verify_sum16(void)
11898 {
11899 struct mbuf *m;
11900 uint8_t *buf;
11901 int n;
11902
11903 /* Make sure test data plus extra room for alignment fits in cluster */
11904 _CASSERT((sizeof(sumdata) + (sizeof(uint64_t) * 2)) <= MCLBYTES);
11905
11906 kprintf("DLIL: running SUM16 self-tests ... ");
11907
11908 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
11909 m_align(m, sizeof(sumdata) + (sizeof(uint64_t) * 2));
11910
11911 buf = mtod(m, uint8_t *); /* base address */
11912
11913 for (n = 0; n < SUMTBL_MAX; n++) {
11914 uint16_t len = sumtbl[n].len;
11915 int i;
11916
11917 /* Verify for all possible alignments */
11918 for (i = 0; i < (int)sizeof(uint64_t); i++) {
11919 uint16_t sum, sumr;
11920 uint8_t *c;
11921
11922 /* Copy over test data to mbuf */
11923 VERIFY(len <= sizeof(sumdata));
11924 c = buf + i;
11925 bcopy(sumdata, c, len);
11926
11927 /* Zero-offset test (align by data pointer) */
11928 m->m_data = (caddr_t)c;
11929 m->m_len = len;
11930 sum = m_sum16(m, 0, len);
11931
11932 if (!sumtbl[n].init) {
11933 sumr = (uint16_t)in_cksum_mbuf_ref(m, len, 0, 0);
11934 sumtbl[n].sumr = sumr;
11935 sumtbl[n].init = TRUE;
11936 } else {
11937 sumr = sumtbl[n].sumr;
11938 }
11939
11940 /* Something is horribly broken; stop now */
11941 if (sumr != sumtbl[n].sumrp) {
11942 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
11943 "for len=%d align=%d sum=0x%04x "
11944 "[expected=0x%04x]\n", __func__,
11945 len, i, sum, sumr);
11946 /* NOTREACHED */
11947 } else if (sum != sumr) {
11948 panic_plain("\n%s: broken m_sum16() for len=%d "
11949 "align=%d sum=0x%04x [expected=0x%04x]\n",
11950 __func__, len, i, sum, sumr);
11951 /* NOTREACHED */
11952 }
11953
11954 /* Alignment test by offset (fixed data pointer) */
11955 m->m_data = (caddr_t)buf;
11956 m->m_len = i + len;
11957 sum = m_sum16(m, i, len);
11958
11959 /* Something is horribly broken; stop now */
11960 if (sum != sumr) {
11961 panic_plain("\n%s: broken m_sum16() for len=%d "
11962 "offset=%d sum=0x%04x [expected=0x%04x]\n",
11963 __func__, len, i, sum, sumr);
11964 /* NOTREACHED */
11965 }
11966 #if INET
11967 /* Simple sum16 contiguous buffer test by aligment */
11968 sum = b_sum16(c, len);
11969
11970 /* Something is horribly broken; stop now */
11971 if (sum != sumr) {
11972 panic_plain("\n%s: broken b_sum16() for len=%d "
11973 "align=%d sum=0x%04x [expected=0x%04x]\n",
11974 __func__, len, i, sum, sumr);
11975 /* NOTREACHED */
11976 }
11977 #endif /* INET */
11978 }
11979 }
11980 m_freem(m);
11981
11982 kprintf("PASSED\n");
11983 }
11984 #endif /* DEBUG || DEVELOPMENT */
11985
11986 #define CASE_STRINGIFY(x) case x: return #x
11987
11988 __private_extern__ const char *
dlil_kev_dl_code_str(u_int32_t event_code)11989 dlil_kev_dl_code_str(u_int32_t event_code)
11990 {
11991 switch (event_code) {
11992 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
11993 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
11994 CASE_STRINGIFY(KEV_DL_SIFMTU);
11995 CASE_STRINGIFY(KEV_DL_SIFPHYS);
11996 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
11997 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
11998 CASE_STRINGIFY(KEV_DL_ADDMULTI);
11999 CASE_STRINGIFY(KEV_DL_DELMULTI);
12000 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
12001 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
12002 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
12003 CASE_STRINGIFY(KEV_DL_LINK_OFF);
12004 CASE_STRINGIFY(KEV_DL_LINK_ON);
12005 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
12006 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
12007 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
12008 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
12009 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
12010 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
12011 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
12012 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
12013 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
12014 CASE_STRINGIFY(KEV_DL_PRIMARY_ELECTED);
12015 CASE_STRINGIFY(KEV_DL_ISSUES);
12016 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
12017 default:
12018 break;
12019 }
12020 return "";
12021 }
12022
12023 static void
dlil_dt_tcall_fn(thread_call_param_t arg0,thread_call_param_t arg1)12024 dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
12025 {
12026 #pragma unused(arg1)
12027 struct ifnet *ifp = arg0;
12028
12029 if (ifnet_is_attached(ifp, 1)) {
12030 nstat_ifnet_threshold_reached(ifp->if_index);
12031 ifnet_decr_iorefcnt(ifp);
12032 }
12033 }
12034
12035 void
ifnet_notify_data_threshold(struct ifnet * ifp)12036 ifnet_notify_data_threshold(struct ifnet *ifp)
12037 {
12038 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
12039 uint64_t oldbytes = ifp->if_dt_bytes;
12040
12041 ASSERT(ifp->if_dt_tcall != NULL);
12042
12043 /*
12044 * If we went over the threshold, notify NetworkStatistics.
12045 * We rate-limit it based on the threshold interval value.
12046 */
12047 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
12048 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
12049 !thread_call_isactive(ifp->if_dt_tcall)) {
12050 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
12051 uint64_t now = mach_absolute_time(), deadline = now;
12052 uint64_t ival;
12053
12054 if (tival != 0) {
12055 nanoseconds_to_absolutetime(tival, &ival);
12056 clock_deadline_for_periodic_event(ival, now, &deadline);
12057 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
12058 deadline);
12059 } else {
12060 (void) thread_call_enter(ifp->if_dt_tcall);
12061 }
12062 }
12063 }
12064
12065 #if (DEVELOPMENT || DEBUG)
12066 /*
12067 * The sysctl variable name contains the input parameters of
12068 * ifnet_get_keepalive_offload_frames()
12069 * ifp (interface index): name[0]
12070 * frames_array_count: name[1]
12071 * frame_data_offset: name[2]
12072 * The return length gives used_frames_count
12073 */
12074 static int
12075 sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
12076 {
12077 #pragma unused(oidp)
12078 int *name = (int *)arg1;
12079 u_int namelen = arg2;
12080 int idx;
12081 ifnet_t ifp = NULL;
12082 u_int32_t frames_array_count;
12083 size_t frame_data_offset;
12084 u_int32_t used_frames_count;
12085 struct ifnet_keepalive_offload_frame *frames_array = NULL;
12086 int error = 0;
12087 u_int32_t i;
12088
12089 /*
12090 * Only root can get look at other people TCP frames
12091 */
12092 error = proc_suser(current_proc());
12093 if (error != 0) {
12094 goto done;
12095 }
12096 /*
12097 * Validate the input parameters
12098 */
12099 if (req->newptr != USER_ADDR_NULL) {
12100 error = EPERM;
12101 goto done;
12102 }
12103 if (namelen != 3) {
12104 error = EINVAL;
12105 goto done;
12106 }
12107 if (req->oldptr == USER_ADDR_NULL) {
12108 error = EINVAL;
12109 goto done;
12110 }
12111 if (req->oldlen == 0) {
12112 error = EINVAL;
12113 goto done;
12114 }
12115 idx = name[0];
12116 frames_array_count = name[1];
12117 frame_data_offset = name[2];
12118
12119 /* Make sure the passed buffer is large enough */
12120 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
12121 req->oldlen) {
12122 error = ENOMEM;
12123 goto done;
12124 }
12125
12126 ifnet_head_lock_shared();
12127 if (!IF_INDEX_IN_RANGE(idx)) {
12128 ifnet_head_done();
12129 error = ENOENT;
12130 goto done;
12131 }
12132 ifp = ifindex2ifnet[idx];
12133 ifnet_head_done();
12134
12135 frames_array = (struct ifnet_keepalive_offload_frame *)kalloc_data(
12136 frames_array_count * sizeof(struct ifnet_keepalive_offload_frame),
12137 Z_WAITOK);
12138 if (frames_array == NULL) {
12139 error = ENOMEM;
12140 goto done;
12141 }
12142
12143 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
12144 frames_array_count, frame_data_offset, &used_frames_count);
12145 if (error != 0) {
12146 DLIL_PRINTF("%s: ifnet_get_keepalive_offload_frames error %d\n",
12147 __func__, error);
12148 goto done;
12149 }
12150
12151 for (i = 0; i < used_frames_count; i++) {
12152 error = SYSCTL_OUT(req, frames_array + i,
12153 sizeof(struct ifnet_keepalive_offload_frame));
12154 if (error != 0) {
12155 goto done;
12156 }
12157 }
12158 done:
12159 if (frames_array != NULL) {
12160 kfree_data(frames_array, frames_array_count *
12161 sizeof(struct ifnet_keepalive_offload_frame));
12162 }
12163 return error;
12164 }
12165 #endif /* DEVELOPMENT || DEBUG */
12166
12167 void
ifnet_update_stats_per_flow(struct ifnet_stats_per_flow * ifs,struct ifnet * ifp)12168 ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
12169 struct ifnet *ifp)
12170 {
12171 tcp_update_stats_per_flow(ifs, ifp);
12172 }
12173
12174 static inline u_int32_t
_set_flags(u_int32_t * flags_p,u_int32_t set_flags)12175 _set_flags(u_int32_t *flags_p, u_int32_t set_flags)
12176 {
12177 return (u_int32_t)OSBitOrAtomic(set_flags, flags_p);
12178 }
12179
12180 static inline void
_clear_flags(u_int32_t * flags_p,u_int32_t clear_flags)12181 _clear_flags(u_int32_t *flags_p, u_int32_t clear_flags)
12182 {
12183 OSBitAndAtomic(~clear_flags, flags_p);
12184 }
12185
12186 __private_extern__ u_int32_t
if_set_eflags(ifnet_t interface,u_int32_t set_flags)12187 if_set_eflags(ifnet_t interface, u_int32_t set_flags)
12188 {
12189 return _set_flags(&interface->if_eflags, set_flags);
12190 }
12191
12192 __private_extern__ void
if_clear_eflags(ifnet_t interface,u_int32_t clear_flags)12193 if_clear_eflags(ifnet_t interface, u_int32_t clear_flags)
12194 {
12195 _clear_flags(&interface->if_eflags, clear_flags);
12196 }
12197
12198 __private_extern__ u_int32_t
if_set_xflags(ifnet_t interface,u_int32_t set_flags)12199 if_set_xflags(ifnet_t interface, u_int32_t set_flags)
12200 {
12201 return _set_flags(&interface->if_xflags, set_flags);
12202 }
12203
12204 __private_extern__ void
if_clear_xflags(ifnet_t interface,u_int32_t clear_flags)12205 if_clear_xflags(ifnet_t interface, u_int32_t clear_flags)
12206 {
12207 _clear_flags(&interface->if_xflags, clear_flags);
12208 }
12209
12210 static void
log_hexdump(void * data,size_t len)12211 log_hexdump(void *data, size_t len)
12212 {
12213 size_t i, j, k;
12214 unsigned char *ptr = (unsigned char *)data;
12215 #define MAX_DUMP_BUF 32
12216 unsigned char buf[3 * MAX_DUMP_BUF + 1];
12217
12218 for (i = 0; i < len; i += MAX_DUMP_BUF) {
12219 for (j = i, k = 0; j < i + MAX_DUMP_BUF && j < len; j++) {
12220 unsigned char msnbl = ptr[j] >> 4;
12221 unsigned char lsnbl = ptr[j] & 0x0f;
12222
12223 buf[k++] = msnbl < 10 ? msnbl + '0' : msnbl + 'a' - 10;
12224 buf[k++] = lsnbl < 10 ? lsnbl + '0' : lsnbl + 'a' - 10;
12225
12226 if ((j % 2) == 1) {
12227 buf[k++] = ' ';
12228 }
12229 if ((j % MAX_DUMP_BUF) == MAX_DUMP_BUF - 1) {
12230 buf[k++] = ' ';
12231 }
12232 }
12233 buf[k] = 0;
12234 os_log(OS_LOG_DEFAULT, "%3lu: %s", i, buf);
12235 }
12236 }
12237
12238 #if defined(SKYWALK) && defined(XNU_TARGET_OS_OSX)
12239 static bool
net_check_compatible_if_filter(struct ifnet * ifp)12240 net_check_compatible_if_filter(struct ifnet *ifp)
12241 {
12242 if (ifp == NULL) {
12243 if (net_api_stats.nas_iflt_attach_count > net_api_stats.nas_iflt_attach_os_count) {
12244 return false;
12245 }
12246 } else {
12247 if (ifp->if_flt_non_os_count > 0) {
12248 return false;
12249 }
12250 }
12251 return true;
12252 }
12253 #endif /* SKYWALK && XNU_TARGET_OS_OSX */
12254
12255 #define DUMP_BUF_CHK() { \
12256 clen -= k; \
12257 if (clen < 1) \
12258 goto done; \
12259 c += k; \
12260 }
12261
12262 int dlil_dump_top_if_qlen(char *, int);
12263 int
dlil_dump_top_if_qlen(char * str,int str_len)12264 dlil_dump_top_if_qlen(char *str, int str_len)
12265 {
12266 char *c = str;
12267 int k, clen = str_len;
12268 struct ifnet *top_ifcq_ifp = NULL;
12269 uint32_t top_ifcq_len = 0;
12270 struct ifnet *top_inq_ifp = NULL;
12271 uint32_t top_inq_len = 0;
12272
12273 for (int ifidx = 1; ifidx < if_index; ifidx++) {
12274 struct ifnet *ifp = ifindex2ifnet[ifidx];
12275 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
12276
12277 if (ifp == NULL) {
12278 continue;
12279 }
12280 if (ifp->if_snd != NULL && ifp->if_snd->ifcq_len > top_ifcq_len) {
12281 top_ifcq_len = ifp->if_snd->ifcq_len;
12282 top_ifcq_ifp = ifp;
12283 }
12284 if (dl_if->dl_if_inpstorage.dlth_pkts.qlen > top_inq_len) {
12285 top_inq_len = dl_if->dl_if_inpstorage.dlth_pkts.qlen;
12286 top_inq_ifp = ifp;
12287 }
12288 }
12289
12290 if (top_ifcq_ifp != NULL) {
12291 k = scnprintf(c, clen, "\ntop ifcq_len %u packets by %s\n",
12292 top_ifcq_len, top_ifcq_ifp->if_xname);
12293 DUMP_BUF_CHK();
12294 }
12295 if (top_inq_ifp != NULL) {
12296 k = scnprintf(c, clen, "\ntop inq_len %u packets by %s\n",
12297 top_inq_len, top_inq_ifp->if_xname);
12298 DUMP_BUF_CHK();
12299 }
12300 done:
12301 return str_len - clen;
12302 }
12303