xref: /xnu-12377.61.12/bsd/net/if_utun.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2008-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 
31 /* ----------------------------------------------------------------------------------
32  *   Application of kernel control for interface creation
33  *
34  *   Theory of operation:
35  *   utun (user tunnel) acts as glue between kernel control sockets and network interfaces.
36  *   This kernel control will register an interface for every client that connects.
37  *   ---------------------------------------------------------------------------------- */
38 
39 #include <sys/systm.h>
40 #include <sys/kern_control.h>
41 #include <net/kpi_protocol.h>
42 #include <net/kpi_interface.h>
43 #include <sys/socket.h>
44 #include <net/if.h>
45 #include <net/if_types.h>
46 #include <net/bpf.h>
47 #include <net/if_utun.h>
48 #include <sys/mbuf.h>
49 #include <sys/sockio.h>
50 #include <netinet/in.h>
51 #include <netinet/ip.h>
52 #include <netinet6/in6_var.h>
53 #include <netinet6/in6_var.h>
54 #include <sys/kauth.h>
55 #include <net/necp.h>
56 #include <kern/zalloc.h>
57 
58 #include <net/sockaddr_utils.h>
59 
60 #include <os/log.h>
61 
62 #if SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
63 #include <skywalk/os_skywalk_private.h>
64 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
65 #include <skywalk/nexus/netif/nx_netif.h>
66 #define UTUN_NEXUS 1
67 #else // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
68 #define UTUN_NEXUS 0
69 #endif // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
70 
71 /* Kernel Control functions */
72 static errno_t  utun_ctl_setup(u_int32_t *unit, void **unitinfo);
73 static errno_t  utun_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74     void **unitinfo);
75 static errno_t  utun_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76     void **unitinfo);
77 static errno_t  utun_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78     void *unitinfo);
79 static errno_t  utun_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80     void *unitinfo, mbuf_t m, int flags);
81 static errno_t  utun_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82     int opt, void *__sized_by(*len) data, size_t *len);
83 static errno_t  utun_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84     int opt, void *__sized_by(len) data, size_t len);
85 static void             utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
86     int flags);
87 
88 /* Network Interface functions */
89 static void     utun_start(ifnet_t interface);
90 static errno_t  utun_framer(ifnet_t interface, mbuf_t *packet,
91     const struct sockaddr *dest,
92     IFNET_LLADDR_T dest_lladdr,
93     IFNET_FRAME_TYPE_T frame_type,
94     u_int32_t *prepend_len, u_int32_t *postpend_len);
95 static errno_t  utun_output(ifnet_t interface, mbuf_t data);
96 static errno_t  utun_demux(ifnet_t interface, mbuf_t data, char *frame_header,
97     protocol_family_t *protocol);
98 static errno_t  utun_add_proto(ifnet_t interface, protocol_family_t protocol,
99     const struct ifnet_demux_desc *demux_array,
100     u_int32_t demux_count);
101 static errno_t  utun_del_proto(ifnet_t interface, protocol_family_t protocol);
102 static errno_t  utun_ioctl(ifnet_t interface, u_long cmd, void *data);
103 static void             utun_detached(ifnet_t interface);
104 
105 /* Protocol handlers */
106 static errno_t  utun_attach_proto(ifnet_t interface, protocol_family_t proto);
107 static errno_t  utun_proto_input(ifnet_t interface, protocol_family_t protocol,
108     mbuf_t m, char *frame_header);
109 static errno_t utun_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
110     mbuf_t *packet, const struct sockaddr *dest, void *route,
111     char *frame_type, char *link_layer_dest);
112 
113 #if UTUN_NEXUS
114 static nexus_controller_t utun_ncd;
115 static int utun_ncd_refcount;
116 static uuid_t utun_kpipe_uuid;
117 static uuid_t utun_nx_dom_prov;
118 
119 typedef struct utun_nx {
120 	uuid_t if_provider;
121 	uuid_t if_instance;
122 	uuid_t fsw_provider;
123 	uuid_t fsw_instance;
124 	uuid_t fsw_device;
125 	uuid_t fsw_agent;
126 } *utun_nx_t;
127 
128 /* Data path states */
129 #define UTUN_PCB_DATA_PATH_READY    0x1
130 
131 /* Macros to set/clear/test data path states */
132 #define UTUN_SET_DATA_PATH_READY(_pcb) \
133     ((_pcb)->utun_pcb_data_path_state |= UTUN_PCB_DATA_PATH_READY)
134 #define UTUN_CLR_DATA_PATH_READY(_pcb) \
135     ((_pcb)->utun_pcb_data_path_state &= ~UTUN_PCB_DATA_PATH_READY)
136 #define UTUN_IS_DATA_PATH_READY(_pcb) \
137     (((_pcb)->utun_pcb_data_path_state & UTUN_PCB_DATA_PATH_READY) != 0)
138 
139 #define UTUN_IF_DEFAULT_SLOT_SIZE 2048
140 #define UTUN_IF_DEFAULT_RING_SIZE 64
141 #define UTUN_IF_DEFAULT_TX_FSW_RING_SIZE 64
142 #define UTUN_IF_DEFAULT_RX_FSW_RING_SIZE 128
143 #define UTUN_IF_DEFAULT_BUF_SEG_SIZE    skmem_usr_buf_seg_size
144 #define UTUN_IF_HEADROOM_SIZE 32
145 
146 #define UTUN_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
147 #define UTUN_IF_MAX_RING_COUNT UTUN_IF_WMM_RING_COUNT
148 #define UTUN_NETIF_WMM_TX_RING_COUNT UTUN_IF_WMM_RING_COUNT
149 #define UTUN_NETIF_WMM_RX_RING_COUNT 1
150 #define UTUN_NETIF_MAX_TX_RING_COUNT UTUN_NETIF_WMM_TX_RING_COUNT
151 #define UTUN_NETIF_MAX_RX_RING_COUNT UTUN_NETIF_WMM_RX_RING_COUNT
152 
153 #define UTUN_IF_MIN_RING_SIZE 8
154 #define UTUN_IF_MAX_RING_SIZE 1024
155 
156 #define UTUN_IF_MIN_SLOT_SIZE 1024
157 #define UTUN_IF_MAX_SLOT_SIZE (32 * 1024)
158 
159 #define UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT 512
160 
161 static int if_utun_max_pending_input = UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT;
162 
163 static int sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS;
164 static int sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
165 static int sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
166 
167 static int if_utun_ring_size = UTUN_IF_DEFAULT_RING_SIZE;
168 static int if_utun_tx_fsw_ring_size = UTUN_IF_DEFAULT_TX_FSW_RING_SIZE;
169 static int if_utun_rx_fsw_ring_size = UTUN_IF_DEFAULT_RX_FSW_RING_SIZE;
170 
171 SYSCTL_DECL(_net_utun);
172 SYSCTL_NODE(_net, OID_AUTO, utun, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "UTun");
173 
174 SYSCTL_INT(_net_utun, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_utun_max_pending_input, 0, "");
175 SYSCTL_PROC(_net_utun, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
176     &if_utun_ring_size, UTUN_IF_DEFAULT_RING_SIZE, &sysctl_if_utun_ring_size, "I", "");
177 SYSCTL_PROC(_net_utun, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
178     &if_utun_tx_fsw_ring_size, UTUN_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_utun_tx_fsw_ring_size, "I", "");
179 SYSCTL_PROC(_net_utun, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
180     &if_utun_rx_fsw_ring_size, UTUN_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_utun_rx_fsw_ring_size, "I", "");
181 
182 static errno_t
183 utun_register_nexus(void);
184 
185 static errno_t
186 utun_netif_prepare(__unused kern_nexus_t nexus, ifnet_t ifp);
187 static errno_t
188 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
189     proc_t p, kern_nexus_t nexus,
190     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx);
191 static errno_t
192 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
193     kern_channel_t channel);
194 static void
195 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
196     kern_channel_t channel);
197 static void
198 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
199     kern_channel_t channel);
200 static void
201 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
202     kern_channel_t channel);
203 static errno_t
204 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
205     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
206     void **ring_ctx);
207 static void
208 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
209     kern_channel_ring_t ring);
210 static errno_t
211 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
212     kern_channel_ring_t ring, uint32_t flags);
213 static errno_t
214 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
215     kern_channel_ring_t ring, uint32_t flags);
216 #endif // UTUN_NEXUS
217 
218 /* Control block allocated for each kernel control connection */
219 struct utun_pcb {
220 	TAILQ_ENTRY(utun_pcb)   utun_chain;
221 	kern_ctl_ref    utun_ctlref;
222 	ifnet_t                 utun_ifp;
223 	u_int32_t               utun_unit;
224 	u_int32_t               utun_unique_id;
225 	u_int32_t               utun_external_flags;
226 	// These internal flags are only used within this driver
227 	u_int32_t               utun_internal_flags;
228 	int                     utun_ext_ifdata_stats;
229 	u_int32_t               utun_max_pending_packets;
230 	char                    utun_if_xname[IFXNAMSIZ];
231 	char                    utun_unique_name[IFXNAMSIZ];
232 	// PCB lock protects state fields and rings
233 	decl_lck_rw_data(, utun_pcb_lock);
234 	struct mbuf *   utun_input_chain;
235 	struct mbuf *   utun_input_chain_last;
236 	u_int32_t               utun_input_chain_count;
237 	// Input chain lock protects the list of input mbufs
238 	// The input chain lock must be taken AFTER the PCB lock if both are held
239 	lck_mtx_t               utun_input_chain_lock;
240 
241 #if UTUN_NEXUS
242 	// lock to protect utun_pcb_data_move & utun_pcb_drainers
243 	decl_lck_mtx_data(, utun_pcb_data_move_lock);
244 	u_int32_t               utun_pcb_data_move; /* number of data moving contexts */
245 	u_int32_t               utun_pcb_drainers; /* number of threads waiting to drain */
246 	u_int32_t               utun_pcb_data_path_state; /* internal state of interface data path */
247 
248 	struct utun_nx  utun_nx;
249 	u_int32_t               utun_kpipe_count;
250 	pid_t                   utun_kpipe_pid;
251 	uuid_t                  utun_kpipe_uuid[UTUN_IF_MAX_RING_COUNT];
252 	void *                  utun_kpipe_rxring[UTUN_IF_MAX_RING_COUNT];
253 	void *                  utun_kpipe_txring[UTUN_IF_MAX_RING_COUNT];
254 	kern_pbufpool_t         utun_kpipe_pp;
255 	u_int32_t               utun_kpipe_tx_ring_size;
256 	u_int32_t               utun_kpipe_rx_ring_size;
257 	uuid_t                  utun_kpipe_proc_uuid;
258 
259 	kern_nexus_t            utun_netif_nexus;
260 	kern_pbufpool_t         utun_netif_pp;
261 	void *                  utun_netif_rxring[UTUN_NETIF_MAX_RX_RING_COUNT];
262 	void *                  utun_netif_txring[UTUN_NETIF_MAX_TX_RING_COUNT];
263 	uint64_t                utun_netif_txring_size;
264 
265 	u_int32_t               utun_slot_size;
266 	u_int32_t               utun_netif_ring_size;
267 	u_int32_t               utun_tx_fsw_ring_size;
268 	u_int32_t               utun_rx_fsw_ring_size;
269 	// Auto attach flowswitch when netif is enabled. When set to false,
270 	// it allows userspace nexus controller to attach and own flowswitch.
271 	bool                    utun_attach_fsw;
272 	bool                    utun_netif_connected;
273 	bool                    utun_use_netif;
274 	bool                    utun_needs_netagent;
275 #endif // UTUN_NEXUS
276 };
277 
278 /* These are internal flags not exposed outside this file */
279 #define UTUN_FLAGS_KPIPE_ALLOCATED 1
280 
281 static errno_t utun_pkt_input(struct utun_pcb *pcb, mbuf_t m);
282 
283 /* data movement refcounting functions */
284 #if UTUN_NEXUS
285 static boolean_t utun_data_move_begin(struct utun_pcb *pcb);
286 static void utun_data_move_end(struct utun_pcb *pcb);
287 static void utun_wait_data_move_drain(struct utun_pcb *pcb);
288 #endif // UTUN_NEXUS
289 
290 #define UTUN_DEFAULT_MTU 1500
291 #define UTUN_HEADER_SIZE(_pcb) (sizeof(u_int32_t) + (((_pcb)->utun_external_flags & UTUN_FLAGS_ENABLE_PROC_UUID) ? sizeof(uuid_t) : 0))
292 
293 static kern_ctl_ref     utun_kctlref;
294 static LCK_ATTR_DECLARE(utun_lck_attr, 0, 0);
295 static LCK_GRP_DECLARE(utun_lck_grp, "utun");
296 static LCK_MTX_DECLARE_ATTR(utun_lock, &utun_lck_grp, &utun_lck_attr);
297 
298 TAILQ_HEAD(utun_list, utun_pcb) utun_head;
299 
300 static KALLOC_TYPE_DEFINE(utun_pcb_zone, struct utun_pcb, NET_KT_DEFAULT);
301 
302 #if UTUN_NEXUS
303 /* Macros to clear/set/test flags. */
304 static inline void
utun_flag_set(struct utun_pcb * pcb,uint32_t flag)305 utun_flag_set(struct utun_pcb *pcb, uint32_t flag)
306 {
307 	pcb->utun_internal_flags |= flag;
308 }
309 
310 static inline void
utun_flag_clr(struct utun_pcb * pcb,uint32_t flag)311 utun_flag_clr(struct utun_pcb *pcb, uint32_t flag)
312 {
313 	pcb->utun_internal_flags &= ~flag;
314 }
315 
316 static inline bool
utun_flag_isset(struct utun_pcb * pcb,uint32_t flag)317 utun_flag_isset(struct utun_pcb *pcb, uint32_t flag)
318 {
319 	return !!(pcb->utun_internal_flags & flag);
320 }
321 
322 static inline bool
utun_in_wmm_mode(struct utun_pcb * pcb)323 utun_in_wmm_mode(struct utun_pcb *pcb)
324 {
325 	return pcb->utun_kpipe_count == UTUN_IF_WMM_RING_COUNT;
326 }
327 
328 static uint8_t
utun_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)329 utun_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
330 {
331 	switch (svc_class) {
332 	case KPKT_SC_VO: {
333 		return 0;
334 	}
335 	case KPKT_SC_VI: {
336 		return 1;
337 	}
338 	case KPKT_SC_BE: {
339 		return 2;
340 	}
341 	case KPKT_SC_BK: {
342 		return 3;
343 	}
344 	default: {
345 		VERIFY(0);
346 		return 0;
347 	}
348 	}
349 }
350 
351 static int
352 sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS
353 {
354 #pragma unused(arg1, arg2)
355 	int value = if_utun_ring_size;
356 
357 	int error = sysctl_handle_int(oidp, &value, 0, req);
358 	if (error || !req->newptr) {
359 		return error;
360 	}
361 
362 	if (value < UTUN_IF_MIN_RING_SIZE ||
363 	    value > UTUN_IF_MAX_RING_SIZE) {
364 		return EINVAL;
365 	}
366 
367 	if_utun_ring_size = value;
368 
369 	return 0;
370 }
371 
372 static int
373 sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
374 {
375 #pragma unused(arg1, arg2)
376 	int value = if_utun_tx_fsw_ring_size;
377 
378 	int error = sysctl_handle_int(oidp, &value, 0, req);
379 	if (error || !req->newptr) {
380 		return error;
381 	}
382 
383 	if (value < UTUN_IF_MIN_RING_SIZE ||
384 	    value > UTUN_IF_MAX_RING_SIZE) {
385 		return EINVAL;
386 	}
387 
388 	if_utun_tx_fsw_ring_size = value;
389 
390 	return 0;
391 }
392 
393 static int
394 sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
395 {
396 #pragma unused(arg1, arg2)
397 	int value = if_utun_rx_fsw_ring_size;
398 
399 	int error = sysctl_handle_int(oidp, &value, 0, req);
400 	if (error || !req->newptr) {
401 		return error;
402 	}
403 
404 	if (value < UTUN_IF_MIN_RING_SIZE ||
405 	    value > UTUN_IF_MAX_RING_SIZE) {
406 		return EINVAL;
407 	}
408 
409 	if_utun_rx_fsw_ring_size = value;
410 
411 	return 0;
412 }
413 
414 static errno_t
utun_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)415 utun_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
416     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
417     void **ring_ctx)
418 {
419 #pragma unused(nxprov)
420 #pragma unused(channel)
421 #pragma unused(ring_ctx)
422 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
423 	if (!is_tx_ring) {
424 		VERIFY(pcb->utun_netif_rxring[0] == NULL);
425 		pcb->utun_netif_rxring[0] = ring;
426 	} else {
427 		uint8_t ring_idx = 0;
428 		if (utun_in_wmm_mode(pcb)) {
429 			int err;
430 			kern_packet_svc_class_t svc_class;
431 			err = kern_channel_get_service_class(ring, &svc_class);
432 			VERIFY(err == 0);
433 			ring_idx = utun_find_tx_ring_by_svc(svc_class);
434 			VERIFY(ring_idx < UTUN_IF_WMM_RING_COUNT);
435 		}
436 
437 		*ring_ctx = __unsafe_forge_single(void *, (uintptr_t)ring_idx);
438 
439 		VERIFY(pcb->utun_netif_txring[ring_idx] == NULL);
440 		pcb->utun_netif_txring[ring_idx] = ring;
441 	}
442 	return 0;
443 }
444 
445 static void
utun_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)446 utun_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
447     kern_channel_ring_t ring)
448 {
449 #pragma unused(nxprov)
450 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
451 	bool found = false;
452 
453 	for (int i = 0; i < UTUN_NETIF_MAX_RX_RING_COUNT; i++) {
454 		if (pcb->utun_netif_rxring[i] == ring) {
455 			pcb->utun_netif_rxring[i] = NULL;
456 			VERIFY(!found);
457 			found = true;
458 		}
459 	}
460 	for (int i = 0; i < UTUN_NETIF_MAX_TX_RING_COUNT; i++) {
461 		if (pcb->utun_netif_txring[i] == ring) {
462 			pcb->utun_netif_txring[i] = NULL;
463 			VERIFY(!found);
464 			found = true;
465 		}
466 	}
467 	VERIFY(found);
468 }
469 
470 static errno_t
utun_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)471 utun_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
472     kern_channel_ring_t tx_ring, uint32_t flags)
473 {
474 #pragma unused(nxprov)
475 #pragma unused(flags)
476 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
477 
478 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
479 
480 	if (!utun_data_move_begin(pcb)) {
481 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
482 		    __func__, if_name(pcb->utun_ifp));
483 		return 0;
484 	}
485 
486 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
487 
488 	struct kern_channel_ring_stat_increment tx_ring_stats;
489 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
490 	kern_channel_slot_t tx_pslot = NULL;
491 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
492 	kern_packet_t tx_chain_ph = 0;
493 
494 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
495 
496 	if (tx_slot == NULL) {
497 		// Nothing to write, don't bother signalling
498 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
499 		utun_data_move_end(pcb);
500 		return 0;
501 	}
502 
503 	if (pcb->utun_kpipe_count > 0 &&
504 	    utun_flag_isset(pcb, UTUN_FLAGS_KPIPE_ALLOCATED)) {
505 		// Select the corresponding kpipe rx ring
506 		uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(tx_ring);
507 		VERIFY(ring_idx < UTUN_IF_MAX_RING_COUNT);
508 		kern_channel_ring_t __single rx_ring = pcb->utun_kpipe_rxring[ring_idx];
509 
510 		// Unlock while calling notify
511 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
512 
513 		// Signal the kernel pipe ring to read
514 		if (rx_ring != NULL) {
515 			kern_channel_notify(rx_ring, 0);
516 		}
517 		utun_data_move_end(pcb);
518 		return 0;
519 	}
520 
521 	// If we're here, we're injecting into the utun kernel control socket
522 	while (tx_slot != NULL) {
523 		size_t length = 0;
524 		mbuf_ref_t data = NULL;
525 
526 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
527 
528 		if (tx_ph == 0) {
529 			// Advance TX ring
530 			tx_pslot = tx_slot;
531 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
532 			continue;
533 		}
534 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
535 		if (tx_chain_ph != 0) {
536 			kern_packet_append(tx_ph, tx_chain_ph);
537 		}
538 		tx_chain_ph = tx_ph;
539 
540 		// Advance TX ring
541 		tx_pslot = tx_slot;
542 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
543 
544 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
545 		VERIFY(tx_buf != NULL);
546 
547 		bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
548 
549 		uint32_t tx_offset = kern_buflet_get_data_offset(tx_buf);
550 		uint32_t tx_length = kern_buflet_get_data_length(tx_buf);
551 		/* tx_baddr is the absolute buffer address */
552 		uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
553 		    kern_buflet_get_data_address(tx_buf),
554 		    kern_buflet_get_data_limit(tx_buf));
555 		VERIFY(tx_baddr != 0);
556 
557 		// The offset must be large enough for the headers
558 		VERIFY(tx_offset >= UTUN_HEADER_SIZE(pcb));
559 
560 		// Find family
561 		uint32_t af = 0;
562 		uint8_t vhl = *(uint8_t *)(tx_baddr + tx_offset);
563 		u_int ip_version = (vhl >> 4);
564 		switch (ip_version) {
565 		case 4: {
566 			af = AF_INET;
567 			break;
568 		}
569 		case 6: {
570 			af = AF_INET6;
571 			break;
572 		}
573 		default: {
574 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s: unknown ip version %u vhl %u tx_offset %u len %u header_size %zu\n",
575 			    pcb->utun_ifp->if_xname, ip_version, vhl, tx_offset, tx_length,
576 			    UTUN_HEADER_SIZE(pcb));
577 			break;
578 		}
579 		}
580 
581 		tx_offset -= UTUN_HEADER_SIZE(pcb);
582 		tx_length += UTUN_HEADER_SIZE(pcb);
583 		tx_baddr += tx_offset;
584 
585 		length = MIN(tx_length, pcb->utun_slot_size);
586 
587 		// Copy in family
588 		memcpy(tx_baddr, &af, sizeof(af));
589 		if (pcb->utun_external_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
590 			kern_packet_get_euuid(tx_ph, (void *)(tx_baddr + sizeof(af)));
591 		}
592 
593 		if (length > 0) {
594 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
595 			if (error == 0) {
596 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
597 				if (error == 0) {
598 					error = utun_output(pcb->utun_ifp, data);
599 					if (error != 0) {
600 						os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - utun_output error %d\n", pcb->utun_ifp->if_xname, error);
601 					}
602 				} else {
603 					os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->utun_ifp->if_xname, length, error);
604 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
605 					STATS_INC(nifs, NETIF_STATS_DROP);
606 					mbuf_freem(data);
607 					data = NULL;
608 				}
609 			} else {
610 				os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->utun_ifp->if_xname, error);
611 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
612 				STATS_INC(nifs, NETIF_STATS_DROP);
613 			}
614 		} else {
615 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - 0 length packet\n", pcb->utun_ifp->if_xname);
616 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
617 			STATS_INC(nifs, NETIF_STATS_DROP);
618 		}
619 
620 		if (data == NULL) {
621 			continue;
622 		}
623 
624 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
625 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
626 
627 		tx_ring_stats.kcrsi_slots_transferred++;
628 		tx_ring_stats.kcrsi_bytes_transferred += length;
629 	}
630 	if (tx_chain_ph != 0) {
631 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
632 	}
633 	if (tx_pslot) {
634 		kern_channel_advance_slot(tx_ring, tx_pslot);
635 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
636 		(void)kern_channel_reclaim(tx_ring);
637 	}
638 
639 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
640 	utun_data_move_end(pcb);
641 	return 0;
642 }
643 
644 static errno_t
utun_netif_tx_doorbell_one(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,uint32_t flags,uint8_t ring_idx)645 utun_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
646     kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
647 {
648 #pragma unused(nxprov)
649 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
650 	boolean_t more = false;
651 	errno_t rc = 0;
652 
653 	VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
654 
655 	if (!utun_data_move_begin(pcb)) {
656 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
657 		    __func__, if_name(pcb->utun_ifp));
658 		return 0;
659 	}
660 
661 	/*
662 	 * Refill and sync the ring; we may be racing against another thread doing
663 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
664 	 * variant here.
665 	 */
666 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
667 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
668 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
669 		    if_name(pcb->utun_ifp), ring->ckr_name, rc);
670 	}
671 
672 	(void) kr_enter(ring, TRUE);
673 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
674 	if (ring != pcb->utun_netif_txring[ring_idx]) {
675 		// ring no longer valid
676 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
677 		kr_exit(ring);
678 		os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
679 		    if_name(pcb->utun_ifp), ring->ckr_name, ring_idx);
680 		return ENXIO;
681 	}
682 
683 	if (pcb->utun_kpipe_count > 0) {
684 		uint32_t tx_available = kern_channel_available_slot_count(ring);
685 		if (pcb->utun_netif_txring_size > 0 &&
686 		    tx_available >= pcb->utun_netif_txring_size - 1) {
687 			// No room left in tx ring, disable output for now
688 			errno_t error = ifnet_disable_output(pcb->utun_ifp);
689 			if (error != 0) {
690 				os_log_error(OS_LOG_DEFAULT, "utun_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
691 			}
692 		}
693 	}
694 
695 	if (pcb->utun_kpipe_count > 0) {
696 		kern_channel_ring_t __single rx_ring = pcb->utun_kpipe_rxring[ring_idx];
697 
698 		// Unlock while calling notify
699 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
700 		// Signal the kernel pipe ring to read
701 		if (rx_ring != NULL) {
702 			kern_channel_notify(rx_ring, 0);
703 		}
704 	} else {
705 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
706 	}
707 
708 	kr_exit(ring);
709 	utun_data_move_end(pcb);
710 	return 0;
711 }
712 
713 static errno_t
utun_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)714 utun_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
715     kern_channel_ring_t ring, __unused uint32_t flags)
716 {
717 	errno_t ret = 0;
718 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
719 
720 	if (!utun_data_move_begin(pcb)) {
721 		os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->utun_ifp));
722 		return 0;
723 	}
724 
725 	if (utun_in_wmm_mode(pcb)) {
726 		for (uint8_t i = 0; i < UTUN_IF_WMM_RING_COUNT; i++) {
727 			kern_channel_ring_t __single nring = pcb->utun_netif_txring[i];
728 			ret = utun_netif_tx_doorbell_one(nxprov, nexus, nring, flags, i);
729 			if (ret) {
730 				break;
731 			}
732 		}
733 	} else {
734 		ret = utun_netif_tx_doorbell_one(nxprov, nexus, ring, flags, 0);
735 	}
736 
737 	utun_data_move_end(pcb);
738 	return ret;
739 }
740 static errno_t
utun_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)741 utun_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
742     kern_channel_ring_t rx_ring, uint32_t flags)
743 {
744 #pragma unused(nxprov)
745 #pragma unused(flags)
746 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
747 	struct kern_channel_ring_stat_increment rx_ring_stats;
748 
749 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
750 
751 	if (!utun_data_move_begin(pcb)) {
752 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
753 		    __func__, if_name(pcb->utun_ifp));
754 		return 0;
755 	}
756 
757 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
758 
759 	// Reclaim user-released slots
760 	(void) kern_channel_reclaim(rx_ring);
761 
762 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
763 
764 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
765 	if (avail == 0) {
766 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
767 		utun_data_move_end(pcb);
768 		return 0;
769 	}
770 
771 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
772 	VERIFY(rx_pp != NULL);
773 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
774 	kern_channel_slot_t rx_pslot = NULL;
775 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
776 
777 	while (rx_slot != NULL) {
778 		// Check for a waiting packet
779 		lck_mtx_lock(&pcb->utun_input_chain_lock);
780 		mbuf_t data = pcb->utun_input_chain;
781 		if (data == NULL) {
782 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
783 			break;
784 		}
785 
786 		// Allocate rx packet
787 		kern_packet_t rx_ph = 0;
788 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
789 		if (__improbable(error != 0)) {
790 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
791 			STATS_INC(nifs, NETIF_STATS_DROP);
792 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
793 			break;
794 		}
795 
796 		// Advance waiting packets
797 		if (pcb->utun_input_chain_count > 0) {
798 			pcb->utun_input_chain_count--;
799 		}
800 		pcb->utun_input_chain = data->m_nextpkt;
801 		data->m_nextpkt = NULL;
802 		if (pcb->utun_input_chain == NULL) {
803 			pcb->utun_input_chain_last = NULL;
804 		}
805 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
806 
807 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
808 		size_t length = mbuf_pkthdr_len(data);
809 
810 		if (length < header_offset) {
811 			// mbuf is too small
812 			mbuf_freem(data);
813 			kern_pbufpool_free(rx_pp, rx_ph);
814 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
815 			STATS_INC(nifs, NETIF_STATS_DROP);
816 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length too short for header %zu < %zu\n",
817 			    pcb->utun_ifp->if_xname, length, header_offset);
818 			continue;
819 		}
820 
821 		length -= header_offset;
822 		if (length > PP_BUF_SIZE_DEF(rx_pp)) {
823 			// Flush data
824 			mbuf_freem(data);
825 			kern_pbufpool_free(rx_pp, rx_ph);
826 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
827 			STATS_INC(nifs, NETIF_STATS_DROP);
828 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length %zu > %u\n",
829 			    pcb->utun_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
830 			continue;
831 		}
832 
833 		mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
834 
835 		// Fillout rx packet
836 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
837 		VERIFY(rx_buf != NULL);
838 		void *rx_baddr = __unsafe_forge_bidi_indexable(void *,
839 		    kern_buflet_get_data_address(rx_buf),
840 		    kern_buflet_get_data_limit(rx_buf));
841 		VERIFY(rx_baddr != NULL);
842 
843 		// Copy-in data from mbuf to buflet
844 		mbuf_copydata(data, header_offset, length, rx_baddr);
845 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
846 
847 		// Finalize and attach the packet
848 		error = kern_buflet_set_data_offset(rx_buf, 0);
849 		VERIFY(error == 0);
850 		error = kern_buflet_set_data_length(rx_buf, length);
851 		VERIFY(error == 0);
852 		error = kern_packet_set_headroom(rx_ph, 0);
853 		VERIFY(error == 0);
854 		error = kern_packet_finalize(rx_ph);
855 		VERIFY(error == 0);
856 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
857 		VERIFY(error == 0);
858 
859 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
860 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
861 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
862 
863 		rx_ring_stats.kcrsi_slots_transferred++;
864 		rx_ring_stats.kcrsi_bytes_transferred += length;
865 
866 		mbuf_freem(data);
867 
868 		// Advance ring
869 		rx_pslot = rx_slot;
870 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
871 	}
872 
873 	for (uint8_t ring_idx = 0; ring_idx < pcb->utun_kpipe_count; ring_idx++) {
874 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
875 		kern_channel_slot_t tx_pslot = NULL;
876 		kern_channel_slot_t tx_slot = NULL;
877 
878 		kern_channel_ring_t __single tx_ring = pcb->utun_kpipe_txring[ring_idx];
879 		if (tx_ring == NULL) {
880 			// Net-If TX ring not set up yet, nothing to read
881 			goto done;
882 		}
883 		// Unlock utun before entering ring
884 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
885 
886 		(void)kr_enter(tx_ring, TRUE);
887 
888 		// Lock again after entering and validate
889 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
890 		if (tx_ring != pcb->utun_kpipe_txring[ring_idx]) {
891 			goto done;
892 		}
893 
894 		tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
895 		if (tx_slot == NULL) {
896 			// Nothing to read, don't bother signalling
897 			goto done;
898 		}
899 
900 		while (rx_slot != NULL && tx_slot != NULL) {
901 			// Allocate rx packet
902 			kern_packet_t rx_ph = 0;
903 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
904 
905 			// Advance TX ring
906 			tx_pslot = tx_slot;
907 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
908 
909 			/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
910 			if (tx_ph == 0) {
911 				continue;
912 			}
913 
914 			/* XXX We could try this alloc before advancing the slot to avoid
915 			 * dropping the packet on failure to allocate.
916 			 */
917 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
918 			if (__improbable(error != 0)) {
919 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
920 				STATS_INC(nifs, NETIF_STATS_DROP);
921 				break;
922 			}
923 
924 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
925 			VERIFY(tx_buf != NULL);
926 			uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
927 			    kern_buflet_get_data_address(tx_buf),
928 			    kern_buflet_get_data_limit(tx_buf));
929 			VERIFY(tx_baddr != 0);
930 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
931 
932 			// Check packet length
933 			size_t header_offset = UTUN_HEADER_SIZE(pcb);
934 			uint32_t tx_length = kern_packet_get_data_length(tx_ph);
935 			if (tx_length < header_offset) {
936 				// Packet is too small
937 				kern_pbufpool_free(rx_pp, rx_ph);
938 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
939 				STATS_INC(nifs, NETIF_STATS_DROP);
940 				os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: packet length too short for header %u < %zu\n",
941 				    pcb->utun_ifp->if_xname, tx_length, header_offset);
942 				continue;
943 			}
944 
945 			size_t length = MIN(tx_length - header_offset,
946 			    pcb->utun_slot_size);
947 
948 			tx_ring_stats.kcrsi_slots_transferred++;
949 			tx_ring_stats.kcrsi_bytes_transferred += length;
950 
951 			// Fillout rx packet
952 			kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
953 			VERIFY(rx_buf != NULL);
954 			void *rx_baddr = __unsafe_forge_bidi_indexable(void *,
955 			    kern_buflet_get_data_address(rx_buf),
956 			    kern_buflet_get_data_limit(rx_buf));
957 			VERIFY(rx_baddr != NULL);
958 
959 			// Copy-in data from tx to rx
960 			memcpy((void *)rx_baddr, (void *)(tx_baddr + header_offset), length);
961 			kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
962 
963 			// Finalize and attach the packet
964 			error = kern_buflet_set_data_offset(rx_buf, 0);
965 			VERIFY(error == 0);
966 			error = kern_buflet_set_data_length(rx_buf, length);
967 			VERIFY(error == 0);
968 			error = kern_packet_set_headroom(rx_ph, 0);
969 			VERIFY(error == 0);
970 
971 			if (__packet_get_wake_flag(tx_ph)) {
972 				__packet_set_wake_flag(rx_ph);
973 			}
974 
975 			error = kern_packet_finalize(rx_ph);
976 			VERIFY(error == 0);
977 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
978 			VERIFY(error == 0);
979 
980 			STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
981 			STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
982 			bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
983 
984 			rx_ring_stats.kcrsi_slots_transferred++;
985 			rx_ring_stats.kcrsi_bytes_transferred += length;
986 
987 			rx_pslot = rx_slot;
988 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
989 		}
990 
991 done:
992 		if (tx_pslot) {
993 			kern_channel_advance_slot(tx_ring, tx_pslot);
994 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
995 			(void)kern_channel_reclaim(tx_ring);
996 		}
997 
998 		// Unlock first, then exit ring
999 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
1000 		if (tx_ring != NULL) {
1001 			if (tx_pslot != NULL) {
1002 				kern_channel_notify(tx_ring, 0);
1003 			}
1004 			kr_exit(tx_ring);
1005 		}
1006 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
1007 	}
1008 
1009 	if (rx_pslot) {
1010 		kern_channel_advance_slot(rx_ring, rx_pslot);
1011 		kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
1012 	}
1013 
1014 
1015 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
1016 
1017 	utun_data_move_end(pcb);
1018 	return 0;
1019 }
1020 
1021 static errno_t
utun_nexus_ifattach(struct utun_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)1022 utun_nexus_ifattach(struct utun_pcb *pcb,
1023     struct ifnet_init_eparams *init_params,
1024     struct ifnet **ifp)
1025 {
1026 	errno_t err;
1027 	nexus_controller_t controller = kern_nexus_shared_controller();
1028 	struct kern_nexus_net_init net_init;
1029 	struct kern_pbufpool_init pp_init;
1030 
1031 	nexus_name_t provider_name;
1032 	snprintf((char *)provider_name, sizeof(provider_name),
1033 	    "com.apple.netif.%s", pcb->utun_if_xname);
1034 
1035 	struct kern_nexus_provider_init prov_init = {
1036 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1037 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1038 		.nxpi_pre_connect = utun_nexus_pre_connect,
1039 		.nxpi_connected = utun_nexus_connected,
1040 		.nxpi_pre_disconnect = utun_netif_pre_disconnect,
1041 		.nxpi_disconnected = utun_nexus_disconnected,
1042 		.nxpi_ring_init = utun_netif_ring_init,
1043 		.nxpi_ring_fini = utun_netif_ring_fini,
1044 		.nxpi_slot_init = NULL,
1045 		.nxpi_slot_fini = NULL,
1046 		.nxpi_sync_tx = utun_netif_sync_tx,
1047 		.nxpi_sync_rx = utun_netif_sync_rx,
1048 		.nxpi_tx_doorbell = utun_netif_tx_doorbell,
1049 	};
1050 
1051 	nexus_attr_t __single nxa = NULL;
1052 	err = kern_nexus_attr_create(&nxa);
1053 	if (err != 0) {
1054 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1055 		    __func__, err);
1056 		goto failed;
1057 	}
1058 
1059 	uint64_t slot_buffer_size = pcb->utun_slot_size;
1060 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1061 	VERIFY(err == 0);
1062 
1063 	// Reset ring size for netif nexus to limit memory usage
1064 	uint64_t ring_size = pcb->utun_netif_ring_size;
1065 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1066 	VERIFY(err == 0);
1067 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1068 	VERIFY(err == 0);
1069 
1070 	if (utun_in_wmm_mode(pcb)) {
1071 		os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
1072 		    __func__, pcb->utun_if_xname);
1073 
1074 		init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
1075 
1076 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_RINGS,
1077 		    UTUN_NETIF_WMM_TX_RING_COUNT);
1078 		VERIFY(err == 0);
1079 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_RINGS,
1080 		    UTUN_NETIF_WMM_RX_RING_COUNT);
1081 		VERIFY(err == 0);
1082 
1083 		err = kern_nexus_attr_set(nxa, NEXUS_ATTR_QMAP, NEXUS_QMAP_TYPE_WMM);
1084 		VERIFY(err == 0);
1085 	}
1086 
1087 	pcb->utun_netif_txring_size = ring_size;
1088 
1089 	bzero(&pp_init, sizeof(pp_init));
1090 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1091 	pp_init.kbi_flags |= (KBIF_VIRTUAL_DEVICE | KBIF_USER_ACCESS);
1092 	// Note: we need more packets than can be held in the tx and rx rings because
1093 	// packets can also be in the AQM queue(s)
1094 	pp_init.kbi_packets = pcb->utun_netif_ring_size * (2 * pcb->utun_kpipe_count + 1);
1095 	pp_init.kbi_bufsize = pcb->utun_slot_size;
1096 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
1097 	pp_init.kbi_max_frags = 1;
1098 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1099 	    "%s", provider_name);
1100 	pp_init.kbi_ctx = NULL;
1101 	pp_init.kbi_ctx_retain = NULL;
1102 	pp_init.kbi_ctx_release = NULL;
1103 
1104 	err = kern_pbufpool_create(&pp_init, &pcb->utun_netif_pp, NULL);
1105 	if (err != 0) {
1106 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
1107 		goto failed;
1108 	}
1109 
1110 	err = kern_nexus_controller_register_provider(controller,
1111 	    utun_nx_dom_prov,
1112 	    provider_name,
1113 	    &prov_init,
1114 	    sizeof(prov_init),
1115 	    nxa,
1116 	    &pcb->utun_nx.if_provider);
1117 	if (err != 0) {
1118 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
1119 		    __func__, err);
1120 		goto failed;
1121 	}
1122 
1123 	bzero(&net_init, sizeof(net_init));
1124 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
1125 	net_init.nxneti_flags = 0;
1126 	net_init.nxneti_eparams = init_params;
1127 	net_init.nxneti_lladdr = NULL;
1128 	net_init.nxneti_prepare = utun_netif_prepare;
1129 	net_init.nxneti_rx_pbufpool = pcb->utun_netif_pp;
1130 	net_init.nxneti_tx_pbufpool = pcb->utun_netif_pp;
1131 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
1132 	    pcb->utun_nx.if_provider,
1133 	    pcb,
1134 	    NULL,
1135 	    &pcb->utun_nx.if_instance,
1136 	    &net_init,
1137 	    ifp);
1138 	if (err != 0) {
1139 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
1140 		    __func__, err);
1141 		kern_nexus_controller_deregister_provider(controller,
1142 		    pcb->utun_nx.if_provider);
1143 		uuid_clear(pcb->utun_nx.if_provider);
1144 		goto failed;
1145 	}
1146 
1147 failed:
1148 	if (nxa) {
1149 		kern_nexus_attr_destroy(nxa);
1150 	}
1151 	if (err && pcb->utun_netif_pp != NULL) {
1152 		kern_pbufpool_destroy(pcb->utun_netif_pp);
1153 		pcb->utun_netif_pp = NULL;
1154 	}
1155 	return err;
1156 }
1157 
1158 static void
utun_detach_provider_and_instance(uuid_t provider,uuid_t instance)1159 utun_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1160 {
1161 	nexus_controller_t controller = kern_nexus_shared_controller();
1162 	errno_t err;
1163 
1164 	if (!uuid_is_null(instance)) {
1165 		err = kern_nexus_controller_free_provider_instance(controller,
1166 		    instance);
1167 		if (err != 0) {
1168 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
1169 			    __func__, err);
1170 		}
1171 		uuid_clear(instance);
1172 	}
1173 	if (!uuid_is_null(provider)) {
1174 		err = kern_nexus_controller_deregister_provider(controller,
1175 		    provider);
1176 		if (err != 0) {
1177 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
1178 		}
1179 		uuid_clear(provider);
1180 	}
1181 	return;
1182 }
1183 
1184 static void
utun_nexus_detach(struct utun_pcb * pcb)1185 utun_nexus_detach(struct utun_pcb *pcb)
1186 {
1187 	utun_nx_t nx = &pcb->utun_nx;
1188 	nexus_controller_t controller = kern_nexus_shared_controller();
1189 	errno_t err;
1190 
1191 	if (!uuid_is_null(nx->fsw_device)) {
1192 		err = kern_nexus_ifdetach(controller,
1193 		    nx->fsw_instance,
1194 		    nx->fsw_device);
1195 		if (err != 0) {
1196 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
1197 			    __func__, err);
1198 		}
1199 	}
1200 
1201 	utun_detach_provider_and_instance(nx->fsw_provider,
1202 	    nx->fsw_instance);
1203 	utun_detach_provider_and_instance(nx->if_provider,
1204 	    nx->if_instance);
1205 
1206 	if (pcb->utun_netif_pp != NULL) {
1207 		kern_pbufpool_destroy(pcb->utun_netif_pp);
1208 		pcb->utun_netif_pp = NULL;
1209 	}
1210 	memset(nx, 0, sizeof(*nx));
1211 }
1212 
1213 static errno_t
utun_create_fs_provider_and_instance(struct utun_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)1214 utun_create_fs_provider_and_instance(struct utun_pcb *pcb,
1215     const char *type_name,
1216     const char *ifname,
1217     uuid_t *provider, uuid_t *instance)
1218 {
1219 	nexus_attr_t __single attr = NULL;
1220 	nexus_controller_t controller = kern_nexus_shared_controller();
1221 	uuid_t dom_prov;
1222 	errno_t err;
1223 	struct kern_nexus_init init;
1224 	nexus_name_t    provider_name;
1225 
1226 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1227 	    &dom_prov);
1228 	if (err != 0) {
1229 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
1230 		    __func__, type_name, err);
1231 		goto failed;
1232 	}
1233 
1234 	err = kern_nexus_attr_create(&attr);
1235 	if (err != 0) {
1236 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1237 		    __func__, err);
1238 		goto failed;
1239 	}
1240 
1241 	uint64_t slot_buffer_size = pcb->utun_slot_size;
1242 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1243 	VERIFY(err == 0);
1244 
1245 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1246 	uint64_t tx_ring_size = pcb->utun_tx_fsw_ring_size;
1247 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1248 	VERIFY(err == 0);
1249 	uint64_t rx_ring_size = pcb->utun_rx_fsw_ring_size;
1250 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1251 	VERIFY(err == 0);
1252 	/*
1253 	 * Configure flowswitch to use super-packet (multi-buflet).
1254 	 * This allows flowswitch to perform intra-stack packet aggregation.
1255 	 */
1256 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
1257 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
1258 	VERIFY(err == 0);
1259 
1260 	snprintf((char *)provider_name, sizeof(provider_name),
1261 	    "com.apple.%s.%s", type_name, ifname);
1262 	err = kern_nexus_controller_register_provider(controller,
1263 	    dom_prov,
1264 	    provider_name,
1265 	    NULL,
1266 	    0,
1267 	    attr,
1268 	    provider);
1269 	kern_nexus_attr_destroy(attr);
1270 	attr = NULL;
1271 	if (err != 0) {
1272 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
1273 		    __func__, type_name, err);
1274 		goto failed;
1275 	}
1276 	bzero(&init, sizeof(init));
1277 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1278 	err = kern_nexus_controller_alloc_provider_instance(controller,
1279 	    *provider,
1280 	    NULL, NULL,
1281 	    instance, &init);
1282 	if (err != 0) {
1283 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
1284 		    __func__, type_name, err);
1285 		kern_nexus_controller_deregister_provider(controller,
1286 		    *provider);
1287 		uuid_clear(*provider);
1288 	}
1289 failed:
1290 	return err;
1291 }
1292 
1293 static errno_t
utun_flowswitch_attach(struct utun_pcb * pcb)1294 utun_flowswitch_attach(struct utun_pcb *pcb)
1295 {
1296 	nexus_controller_t controller = kern_nexus_shared_controller();
1297 	errno_t err = 0;
1298 	utun_nx_t nx = &pcb->utun_nx;
1299 
1300 	// Allocate flowswitch
1301 	err = utun_create_fs_provider_and_instance(pcb,
1302 	    "flowswitch",
1303 	    pcb->utun_ifp->if_xname,
1304 	    &nx->fsw_provider,
1305 	    &nx->fsw_instance);
1306 	if (err != 0) {
1307 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
1308 		    __func__);
1309 		goto failed;
1310 	}
1311 
1312 	// Attach flowswitch to device port
1313 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
1314 	    NULL, nx->if_instance,
1315 	    FALSE, &nx->fsw_device);
1316 	if (err != 0) {
1317 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
1318 		goto failed;
1319 	}
1320 
1321 	// Extract the agent UUID and save for later
1322 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
1323 	if (flowswitch_nx != NULL) {
1324 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
1325 		if (flowswitch != NULL) {
1326 			FSW_RLOCK(flowswitch);
1327 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
1328 			FSW_UNLOCK(flowswitch);
1329 		} else {
1330 			os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - flowswitch is NULL\n");
1331 		}
1332 		nx_release(flowswitch_nx);
1333 	} else {
1334 		os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - unable to find flowswitch nexus\n");
1335 	}
1336 
1337 	return 0;
1338 
1339 failed:
1340 	utun_nexus_detach(pcb);
1341 
1342 	errno_t detach_error = 0;
1343 	if ((detach_error = ifnet_detach(pcb->utun_ifp)) != 0) {
1344 		panic("utun_flowswitch_attach - ifnet_detach failed: %d", detach_error);
1345 		/* NOT REACHED */
1346 	}
1347 
1348 	return err;
1349 }
1350 
1351 static errno_t
utun_register_kernel_pipe_nexus(struct utun_pcb * pcb)1352 utun_register_kernel_pipe_nexus(struct utun_pcb *pcb)
1353 {
1354 	nexus_attr_t __single nxa = NULL;
1355 	errno_t result;
1356 
1357 	lck_mtx_lock(&utun_lock);
1358 	if (utun_ncd_refcount++) {
1359 		lck_mtx_unlock(&utun_lock);
1360 		return 0;
1361 	}
1362 
1363 	result = kern_nexus_controller_create(&utun_ncd);
1364 	if (result) {
1365 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
1366 		    __FUNCTION__, result);
1367 		goto done;
1368 	}
1369 
1370 	uuid_t dom_prov;
1371 	result = kern_nexus_get_default_domain_provider(
1372 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1373 	if (result) {
1374 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
1375 		    __FUNCTION__, result);
1376 		goto done;
1377 	}
1378 
1379 	struct kern_nexus_provider_init prov_init = {
1380 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1381 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1382 		.nxpi_pre_connect = utun_nexus_pre_connect,
1383 		.nxpi_connected = utun_nexus_connected,
1384 		.nxpi_pre_disconnect = utun_nexus_pre_disconnect,
1385 		.nxpi_disconnected = utun_nexus_disconnected,
1386 		.nxpi_ring_init = utun_kpipe_ring_init,
1387 		.nxpi_ring_fini = utun_kpipe_ring_fini,
1388 		.nxpi_slot_init = NULL,
1389 		.nxpi_slot_fini = NULL,
1390 		.nxpi_sync_tx = utun_kpipe_sync_tx,
1391 		.nxpi_sync_rx = utun_kpipe_sync_rx,
1392 		.nxpi_tx_doorbell = NULL,
1393 	};
1394 
1395 	result = kern_nexus_attr_create(&nxa);
1396 	if (result) {
1397 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1398 		    __FUNCTION__, result);
1399 		goto done;
1400 	}
1401 
1402 	uint64_t slot_buffer_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1403 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1404 	VERIFY(result == 0);
1405 
1406 	// Reset ring size for kernel pipe nexus to limit memory usage
1407 	uint64_t ring_size =
1408 	    pcb->utun_kpipe_tx_ring_size != 0 ? pcb->utun_kpipe_tx_ring_size :
1409 	    if_utun_ring_size;
1410 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1411 	VERIFY(result == 0);
1412 
1413 	ring_size =
1414 	    pcb->utun_kpipe_rx_ring_size != 0 ? pcb->utun_kpipe_rx_ring_size :
1415 	    if_utun_ring_size;
1416 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1417 	VERIFY(result == 0);
1418 
1419 	nexus_domain_provider_name_t domain_provider_name = "com.apple.nexus.utun.kpipe";
1420 
1421 	result = kern_nexus_controller_register_provider(utun_ncd,
1422 	    dom_prov,
1423 	    domain_provider_name,
1424 	    &prov_init,
1425 	    sizeof(prov_init),
1426 	    nxa,
1427 	    &utun_kpipe_uuid);
1428 	if (result) {
1429 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
1430 		    __FUNCTION__, result);
1431 		goto done;
1432 	}
1433 
1434 done:
1435 	if (nxa) {
1436 		kern_nexus_attr_destroy(nxa);
1437 	}
1438 
1439 	if (result) {
1440 		if (utun_ncd) {
1441 			kern_nexus_controller_destroy(utun_ncd);
1442 			utun_ncd = NULL;
1443 		}
1444 		utun_ncd_refcount = 0;
1445 	}
1446 
1447 	lck_mtx_unlock(&utun_lock);
1448 
1449 	return result;
1450 }
1451 
1452 static void
utun_unregister_kernel_pipe_nexus(void)1453 utun_unregister_kernel_pipe_nexus(void)
1454 {
1455 	lck_mtx_lock(&utun_lock);
1456 
1457 	VERIFY(utun_ncd_refcount > 0);
1458 
1459 	if (--utun_ncd_refcount == 0) {
1460 		kern_nexus_controller_destroy(utun_ncd);
1461 		utun_ncd = NULL;
1462 	}
1463 
1464 	lck_mtx_unlock(&utun_lock);
1465 }
1466 
1467 /* This structure only holds onto kpipe channels that need to be
1468  * freed in the future, but are cleared from the pcb under lock
1469  */
1470 struct utun_detached_channels {
1471 	int count;
1472 	kern_pbufpool_t pp;
1473 	uuid_t uuids[UTUN_IF_MAX_RING_COUNT];
1474 };
1475 
1476 static void
utun_detach_channels(struct utun_pcb * pcb,struct utun_detached_channels * dc)1477 utun_detach_channels(struct utun_pcb *pcb, struct utun_detached_channels *dc)
1478 {
1479 	LCK_RW_ASSERT(&pcb->utun_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
1480 
1481 	if (!utun_flag_isset(pcb, UTUN_FLAGS_KPIPE_ALLOCATED)) {
1482 		for (int i = 0; i < UTUN_IF_MAX_RING_COUNT; i++) {
1483 			VERIFY(uuid_is_null(pcb->utun_kpipe_uuid[i]));
1484 		}
1485 		dc->count = 0;
1486 		return;
1487 	}
1488 
1489 	dc->count = pcb->utun_kpipe_count;
1490 
1491 	VERIFY(dc->count >= 0);
1492 	VERIFY(dc->count <= UTUN_IF_MAX_RING_COUNT);
1493 
1494 	for (int i = 0; i < dc->count; i++) {
1495 		VERIFY(!uuid_is_null(pcb->utun_kpipe_uuid[i]));
1496 		uuid_copy(dc->uuids[i], pcb->utun_kpipe_uuid[i]);
1497 		uuid_clear(pcb->utun_kpipe_uuid[i]);
1498 	}
1499 	for (int i = dc->count; i < UTUN_IF_MAX_RING_COUNT; i++) {
1500 		VERIFY(uuid_is_null(pcb->utun_kpipe_uuid[i]));
1501 	}
1502 
1503 	if (dc->count) {
1504 		VERIFY(pcb->utun_kpipe_pp);
1505 	} else {
1506 		VERIFY(!pcb->utun_kpipe_pp);
1507 	}
1508 
1509 	dc->pp = pcb->utun_kpipe_pp;
1510 
1511 	pcb->utun_kpipe_pp = NULL;
1512 
1513 	utun_flag_clr(pcb, UTUN_FLAGS_KPIPE_ALLOCATED);
1514 }
1515 
1516 static void
utun_free_channels(struct utun_detached_channels * dc)1517 utun_free_channels(struct utun_detached_channels *dc)
1518 {
1519 	if (!dc->count) {
1520 		return;
1521 	}
1522 
1523 	for (int i = 0; i < dc->count; i++) {
1524 		errno_t result;
1525 		result = kern_nexus_controller_free_provider_instance(utun_ncd,
1526 		    dc->uuids[i]);
1527 		VERIFY(!result);
1528 	}
1529 
1530 	VERIFY(dc->pp);
1531 	kern_pbufpool_destroy(dc->pp);
1532 
1533 	utun_unregister_kernel_pipe_nexus();
1534 
1535 	memset(dc, 0, sizeof(*dc));
1536 }
1537 
1538 static errno_t
utun_enable_channel(struct utun_pcb * pcb,struct proc * proc)1539 utun_enable_channel(struct utun_pcb *pcb, struct proc *proc)
1540 {
1541 	struct kern_nexus_init init;
1542 	struct kern_pbufpool_init pp_init;
1543 	errno_t result;
1544 
1545 	kauth_cred_t cred = kauth_cred_get();
1546 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
1547 	if (result) {
1548 		return result;
1549 	}
1550 
1551 	VERIFY(pcb->utun_kpipe_count);
1552 	VERIFY(!utun_flag_isset(pcb, UTUN_FLAGS_KPIPE_ALLOCATED));
1553 
1554 	result = utun_register_kernel_pipe_nexus(pcb);
1555 	if (result) {
1556 		return result;
1557 	}
1558 
1559 	VERIFY(utun_ncd);
1560 
1561 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1562 
1563 	/*
1564 	 * Make sure we can fit packets in the channel buffers and
1565 	 * Allow an extra 4 bytes for the protocol number header in the channel
1566 	 */
1567 	if (pcb->utun_ifp->if_mtu + UTUN_HEADER_SIZE(pcb) > pcb->utun_slot_size) {
1568 		result = EOPNOTSUPP;
1569 		goto done;
1570 	}
1571 
1572 	bzero(&pp_init, sizeof(pp_init));
1573 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1574 	pp_init.kbi_flags |= (KBIF_VIRTUAL_DEVICE | KBIF_USER_ACCESS);
1575 	// Note: We only needs are many packets as can be held in the tx and rx rings
1576 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2 * pcb->utun_kpipe_count;
1577 	pp_init.kbi_bufsize = pcb->utun_slot_size;
1578 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
1579 	pp_init.kbi_max_frags = 1;
1580 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1581 	    "com.apple.kpipe.%s", pcb->utun_if_xname);
1582 	pp_init.kbi_ctx = NULL;
1583 	pp_init.kbi_ctx_retain = NULL;
1584 	pp_init.kbi_ctx_release = NULL;
1585 
1586 	result = kern_pbufpool_create(&pp_init, &pcb->utun_kpipe_pp,
1587 	    NULL);
1588 	if (result != 0) {
1589 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, result);
1590 		goto done;
1591 	}
1592 
1593 	bzero(&init, sizeof(init));
1594 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1595 	init.nxi_tx_pbufpool = pcb->utun_kpipe_pp;
1596 
1597 	for (unsigned int i = 0; i < pcb->utun_kpipe_count; i++) {
1598 		VERIFY(uuid_is_null(pcb->utun_kpipe_uuid[i]));
1599 		result = kern_nexus_controller_alloc_provider_instance(utun_ncd,
1600 		    utun_kpipe_uuid, pcb, NULL, &pcb->utun_kpipe_uuid[i], &init);
1601 
1602 		if (result == 0) {
1603 			nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
1604 			uuid_t uuid_null = {};
1605 			const bool has_proc_uuid = !uuid_is_null(pcb->utun_kpipe_proc_uuid);
1606 			pid_t pid = pcb->utun_kpipe_pid;
1607 			if (!pid && !has_proc_uuid) {
1608 				pid = proc_pid(proc);
1609 			}
1610 			result = kern_nexus_controller_bind_provider_instance(utun_ncd,
1611 			    pcb->utun_kpipe_uuid[i], &port,
1612 			    pid, has_proc_uuid ? pcb->utun_kpipe_proc_uuid : uuid_null, NULL,
1613 			    0, has_proc_uuid ? NEXUS_BIND_EXEC_UUID : NEXUS_BIND_PID);
1614 		}
1615 
1616 		if (result != 0) {
1617 			/* Unwind all of them on error */
1618 			for (int j = 0; j < UTUN_IF_MAX_RING_COUNT; j++) {
1619 				if (!uuid_is_null(pcb->utun_kpipe_uuid[j])) {
1620 					kern_nexus_controller_free_provider_instance(utun_ncd,
1621 					    pcb->utun_kpipe_uuid[j]);
1622 					uuid_clear(pcb->utun_kpipe_uuid[j]);
1623 				}
1624 			}
1625 			goto done;
1626 		}
1627 	}
1628 
1629 done:
1630 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1631 
1632 	if (result) {
1633 		if (pcb->utun_kpipe_pp != NULL) {
1634 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1635 			pcb->utun_kpipe_pp = NULL;
1636 		}
1637 		utun_unregister_kernel_pipe_nexus();
1638 	} else {
1639 		utun_flag_set(pcb, UTUN_FLAGS_KPIPE_ALLOCATED);
1640 	}
1641 
1642 	return result;
1643 }
1644 
1645 #endif // UTUN_NEXUS
1646 
1647 errno_t
utun_register_control(void)1648 utun_register_control(void)
1649 {
1650 	struct kern_ctl_reg kern_ctl;
1651 	errno_t result = 0;
1652 
1653 #if UTUN_NEXUS
1654 	utun_register_nexus();
1655 #endif // UTUN_NEXUS
1656 
1657 	TAILQ_INIT(&utun_head);
1658 
1659 	bzero(&kern_ctl, sizeof(kern_ctl));
1660 	strlcpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
1661 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
1662 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP | CTL_FLAG_REG_EXTENDED; /* Require root */
1663 	kern_ctl.ctl_sendsize = 512 * 1024;
1664 	kern_ctl.ctl_recvsize = 512 * 1024;
1665 	kern_ctl.ctl_setup = utun_ctl_setup;
1666 	kern_ctl.ctl_bind = utun_ctl_bind;
1667 	kern_ctl.ctl_connect = utun_ctl_connect;
1668 	kern_ctl.ctl_disconnect = utun_ctl_disconnect;
1669 	kern_ctl.ctl_send = utun_ctl_send;
1670 	kern_ctl.ctl_setopt = utun_ctl_setopt;
1671 	kern_ctl.ctl_getopt = utun_ctl_getopt;
1672 	kern_ctl.ctl_rcvd = utun_ctl_rcvd;
1673 
1674 	result = ctl_register(&kern_ctl, &utun_kctlref);
1675 	if (result != 0) {
1676 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - ctl_register failed: %d\n", result);
1677 		return result;
1678 	}
1679 
1680 	/* Register the protocol plumbers */
1681 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN,
1682 	    utun_attach_proto, NULL)) != 0) {
1683 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN) failed: %d\n",
1684 		    result);
1685 		ctl_deregister(utun_kctlref);
1686 		return result;
1687 	}
1688 
1689 	/* Register the protocol plumbers */
1690 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN,
1691 	    utun_attach_proto, NULL)) != 0) {
1692 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_UTUN);
1693 		ctl_deregister(utun_kctlref);
1694 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN) failed: %d\n",
1695 		    result);
1696 		return result;
1697 	}
1698 
1699 	return 0;
1700 }
1701 
1702 /* Kernel control functions */
1703 
1704 static inline int
utun_find_by_unit(u_int32_t unit)1705 utun_find_by_unit(u_int32_t unit)
1706 {
1707 	struct utun_pcb *next_pcb = NULL;
1708 	int found = 0;
1709 
1710 	TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1711 		if (next_pcb->utun_unit == unit) {
1712 			found = 1;
1713 			break;
1714 		}
1715 	}
1716 
1717 	return found;
1718 }
1719 
1720 static inline void
utun_free_pcb(struct utun_pcb * pcb,bool locked)1721 utun_free_pcb(struct utun_pcb *pcb, bool locked)
1722 {
1723 #if UTUN_NEXUS
1724 	mbuf_freem_list(pcb->utun_input_chain);
1725 	pcb->utun_input_chain_count = 0;
1726 	lck_mtx_destroy(&pcb->utun_input_chain_lock, &utun_lck_grp);
1727 	lck_mtx_destroy(&pcb->utun_pcb_data_move_lock, &utun_lck_grp);
1728 #endif // UTUN_NEXUS
1729 	lck_rw_destroy(&pcb->utun_pcb_lock, &utun_lck_grp);
1730 	if (!locked) {
1731 		lck_mtx_lock(&utun_lock);
1732 	}
1733 	TAILQ_REMOVE(&utun_head, pcb, utun_chain);
1734 	if (!locked) {
1735 		lck_mtx_unlock(&utun_lock);
1736 	}
1737 	zfree(utun_pcb_zone, pcb);
1738 }
1739 
1740 static errno_t
utun_ctl_setup(u_int32_t * unit,void ** unitinfo)1741 utun_ctl_setup(u_int32_t *unit, void **unitinfo)
1742 {
1743 	if (unit == NULL || unitinfo == NULL) {
1744 		return EINVAL;
1745 	}
1746 
1747 	lck_mtx_lock(&utun_lock);
1748 
1749 	/* Find next available unit */
1750 	if (*unit == 0) {
1751 		*unit = 1;
1752 		while (*unit != ctl_maxunit) {
1753 			if (utun_find_by_unit(*unit)) {
1754 				(*unit)++;
1755 			} else {
1756 				break;
1757 			}
1758 		}
1759 		if (*unit == ctl_maxunit) {
1760 			lck_mtx_unlock(&utun_lock);
1761 			return EBUSY;
1762 		}
1763 	} else if (utun_find_by_unit(*unit)) {
1764 		lck_mtx_unlock(&utun_lock);
1765 		return EBUSY;
1766 	}
1767 
1768 	/* Find some open interface id */
1769 	u_int32_t chosen_unique_id = 1;
1770 	struct utun_pcb *next_pcb = TAILQ_LAST(&utun_head, utun_list);
1771 	if (next_pcb != NULL) {
1772 		/* List was not empty, add one to the last item */
1773 		chosen_unique_id = next_pcb->utun_unique_id + 1;
1774 		next_pcb = NULL;
1775 
1776 		/*
1777 		 * If this wrapped the id number, start looking at
1778 		 * the front of the list for an unused id.
1779 		 */
1780 		if (chosen_unique_id == 0) {
1781 			/* Find the next unused ID */
1782 			chosen_unique_id = 1;
1783 			TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1784 				if (next_pcb->utun_unique_id > chosen_unique_id) {
1785 					/* We found a gap */
1786 					break;
1787 				}
1788 
1789 				chosen_unique_id = next_pcb->utun_unique_id + 1;
1790 			}
1791 		}
1792 	}
1793 
1794 	struct utun_pcb *__single pcb = zalloc_flags(utun_pcb_zone, Z_WAITOK | Z_ZERO);
1795 
1796 	*unitinfo = pcb;
1797 	pcb->utun_unit = *unit;
1798 	pcb->utun_unique_id = chosen_unique_id;
1799 
1800 	if (next_pcb != NULL) {
1801 		TAILQ_INSERT_BEFORE(next_pcb, pcb, utun_chain);
1802 	} else {
1803 		TAILQ_INSERT_TAIL(&utun_head, pcb, utun_chain);
1804 	}
1805 
1806 	lck_mtx_unlock(&utun_lock);
1807 
1808 	return 0;
1809 }
1810 
1811 static errno_t
utun_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1812 utun_ctl_bind(kern_ctl_ref kctlref,
1813     struct sockaddr_ctl *sac,
1814     void **unitinfo)
1815 {
1816 	if (*unitinfo == NULL) {
1817 		u_int32_t unit = 0;
1818 		(void)utun_ctl_setup(&unit, unitinfo);
1819 	}
1820 
1821 	struct utun_pcb *__single pcb = (struct utun_pcb *)*unitinfo;
1822 	if (pcb == NULL) {
1823 		return EINVAL;
1824 	}
1825 
1826 	if (pcb->utun_ctlref != NULL) {
1827 		// Return if bind was already called
1828 		return EINVAL;
1829 	}
1830 
1831 	pcb->utun_ctlref = kctlref;
1832 	pcb->utun_unit = sac->sc_unit;
1833 	pcb->utun_max_pending_packets = 1;
1834 
1835 #if UTUN_NEXUS
1836 	pcb->utun_use_netif = false;
1837 	pcb->utun_attach_fsw = true;
1838 	pcb->utun_netif_connected = false;
1839 	pcb->utun_slot_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1840 	pcb->utun_netif_ring_size = if_utun_ring_size;
1841 	pcb->utun_tx_fsw_ring_size = if_utun_tx_fsw_ring_size;
1842 	pcb->utun_rx_fsw_ring_size = if_utun_rx_fsw_ring_size;
1843 	pcb->utun_input_chain_count = 0;
1844 	lck_mtx_init(&pcb->utun_input_chain_lock, &utun_lck_grp, &utun_lck_attr);
1845 	lck_mtx_init(&pcb->utun_pcb_data_move_lock,
1846 	    &utun_lck_grp, &utun_lck_attr);
1847 #endif // UTUN_NEXUS
1848 
1849 	lck_rw_init(&pcb->utun_pcb_lock, &utun_lck_grp, &utun_lck_attr);
1850 
1851 	return 0;
1852 }
1853 
1854 static errno_t
utun_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1855 utun_ctl_connect(kern_ctl_ref kctlref,
1856     struct sockaddr_ctl *sac,
1857     void **unitinfo)
1858 {
1859 	struct ifnet_init_eparams utun_init = {};
1860 	errno_t result = 0;
1861 
1862 	if (*unitinfo == NULL) {
1863 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1864 	}
1865 
1866 	struct utun_pcb *__single pcb = *unitinfo;
1867 	if (pcb == NULL) {
1868 		return EINVAL;
1869 	}
1870 
1871 	/* Handle case where utun_ctl_setup() was called, but utun_ctl_bind() was not */
1872 	if (pcb->utun_ctlref == NULL) {
1873 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1874 	}
1875 
1876 	snprintf(pcb->utun_if_xname, sizeof(pcb->utun_if_xname), "utun%d", pcb->utun_unit - 1);
1877 	snprintf(pcb->utun_unique_name, sizeof(pcb->utun_unique_name), "utunid%d", pcb->utun_unique_id - 1);
1878 
1879 	/* Create the interface */
1880 	bzero(&utun_init, sizeof(utun_init));
1881 	utun_init.ver = IFNET_INIT_CURRENT_VERSION;
1882 	utun_init.len = sizeof(utun_init);
1883 
1884 #if UTUN_NEXUS
1885 	if (pcb->utun_use_netif) {
1886 		utun_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
1887 		utun_init.tx_headroom = UTUN_IF_HEADROOM_SIZE;
1888 	} else
1889 #endif // UTUN_NEXUS
1890 	{
1891 		utun_init.flags = IFNET_INIT_NX_NOAUTO;
1892 		utun_init.start = utun_start;
1893 		utun_init.framer_extended = utun_framer;
1894 	}
1895 	utun_init.name = "utun";
1896 	utun_init.unit = pcb->utun_unit - 1;
1897 	utun_init.uniqueid_len = strbuflen(pcb->utun_unique_name);
1898 	utun_init.uniqueid = pcb->utun_unique_name;
1899 	utun_init.family = IFNET_FAMILY_UTUN;
1900 	utun_init.type = IFT_OTHER;
1901 	utun_init.demux = utun_demux;
1902 	utun_init.add_proto = utun_add_proto;
1903 	utun_init.del_proto = utun_del_proto;
1904 	utun_init.softc = pcb;
1905 	utun_init.ioctl = utun_ioctl;
1906 	utun_init.free = utun_detached;
1907 
1908 #if UTUN_NEXUS
1909 	/* We don't support kpipes without a netif */
1910 	if (pcb->utun_kpipe_count > 0 && !pcb->utun_use_netif) {
1911 		result = ENOTSUP;
1912 		os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - kpipe requires netif: failed %d\n", result);
1913 		utun_free_pcb(pcb, false);
1914 		*unitinfo = NULL;
1915 		return result;
1916 	}
1917 
1918 	if (pcb->utun_use_netif) {
1919 		result = utun_nexus_ifattach(pcb, &utun_init, &pcb->utun_ifp);
1920 		if (result != 0) {
1921 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_nexus_ifattach failed: %d\n", result);
1922 			utun_free_pcb(pcb, false);
1923 			*unitinfo = NULL;
1924 			return result;
1925 		}
1926 
1927 		if (pcb->utun_kpipe_count) {
1928 			result = utun_enable_channel(pcb, current_proc());
1929 			if (result) {
1930 				os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
1931 				    __func__, pcb->utun_if_xname);
1932 				utun_free_pcb(pcb, false);
1933 				*unitinfo = NULL;
1934 				return result;
1935 			}
1936 		}
1937 
1938 		if (pcb->utun_attach_fsw) {
1939 			result = utun_flowswitch_attach(pcb);
1940 			if (result != 0) {
1941 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_flowswitch_attach failed: %d\n", result);
1942 				// Do not call utun_free_pcb(). We will be attached already, and will be freed later
1943 				// in utun_detached().
1944 				*unitinfo = NULL;
1945 				return result;
1946 			}
1947 		}
1948 
1949 		/* Attach to bpf */
1950 		bpfattach(pcb->utun_ifp, DLT_RAW, 0);
1951 	} else
1952 #endif // UTUN_NEXUS
1953 	{
1954 		/*
1955 		 * Upon success, this holds an ifnet reference which we will
1956 		 * release via ifnet_release() at final detach time.
1957 		 */
1958 		result = ifnet_allocate_extended(&utun_init, &pcb->utun_ifp);
1959 		if (result != 0) {
1960 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_allocate failed: %d\n", result);
1961 			utun_free_pcb(pcb, false);
1962 			*unitinfo = NULL;
1963 			return result;
1964 		}
1965 
1966 		/* Set flags and additional information. */
1967 		ifnet_set_mtu(pcb->utun_ifp, UTUN_DEFAULT_MTU);
1968 		ifnet_set_flags(pcb->utun_ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
1969 
1970 		/* The interface must generate its own IPv6 LinkLocal address,
1971 		 * if possible following the recommendation of RFC2472 to the 64bit interface ID
1972 		 */
1973 		ifnet_set_eflags(pcb->utun_ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
1974 
1975 		/* Reset the stats in case as the interface may have been recycled */
1976 		struct ifnet_stats_param stats;
1977 		bzero(&stats, sizeof(struct ifnet_stats_param));
1978 		ifnet_set_stat(pcb->utun_ifp, &stats);
1979 
1980 		/* Attach the interface */
1981 		result = ifnet_attach(pcb->utun_ifp, NULL);
1982 		if (result != 0) {
1983 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_attach failed: %d\n", result);
1984 			/* Release reference now since attach failed */
1985 			ifnet_release(pcb->utun_ifp);
1986 			utun_free_pcb(pcb, false);
1987 			*unitinfo = NULL;
1988 			return result;
1989 		}
1990 
1991 		/* Attach to bpf */
1992 		bpfattach(pcb->utun_ifp, DLT_NULL, UTUN_HEADER_SIZE(pcb));
1993 
1994 #if UTUN_NEXUS
1995 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
1996 		UTUN_SET_DATA_PATH_READY(pcb);
1997 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
1998 #endif // UTUN_NEXUS
1999 	}
2000 
2001 	/* The interfaces resoures allocated, mark it as running */
2002 	ifnet_set_flags(pcb->utun_ifp, IFF_RUNNING, IFF_RUNNING);
2003 
2004 	return result;
2005 }
2006 
2007 static errno_t
utun_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)2008 utun_detach_ip(ifnet_t interface,
2009     protocol_family_t protocol,
2010     socket_t pf_socket)
2011 {
2012 	errno_t result = EPROTONOSUPPORT;
2013 
2014 	/* Attempt a detach */
2015 	if (protocol == PF_INET) {
2016 		struct ifreq    ifr;
2017 
2018 		bzero(&ifr, sizeof(ifr));
2019 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2020 		    ifnet_name(interface), ifnet_unit(interface));
2021 
2022 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
2023 	} else if (protocol == PF_INET6) {
2024 		struct in6_ifreq        ifr6;
2025 
2026 		bzero(&ifr6, sizeof(ifr6));
2027 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2028 		    ifnet_name(interface), ifnet_unit(interface));
2029 
2030 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
2031 	}
2032 
2033 	return result;
2034 }
2035 
2036 static void
utun_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)2037 utun_remove_address(ifnet_t interface,
2038     protocol_family_t protocol,
2039     ifaddr_t address,
2040     socket_t pf_socket)
2041 {
2042 	errno_t result = 0;
2043 
2044 	/* Attempt a detach */
2045 	if (protocol == PF_INET) {
2046 		struct ifreq ifr;
2047 
2048 		bzero(&ifr, sizeof(ifr));
2049 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
2050 		    ifnet_name(interface), ifnet_unit(interface));
2051 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
2052 		if (result != 0) {
2053 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed: %d", result);
2054 		} else {
2055 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
2056 			if (result != 0) {
2057 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR failed: %d", result);
2058 			}
2059 		}
2060 	} else if (protocol == PF_INET6) {
2061 		struct in6_ifreq ifr6;
2062 
2063 		bzero(&ifr6, sizeof(ifr6));
2064 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
2065 		    ifnet_name(interface), ifnet_unit(interface));
2066 		result = ifaddr_address(address, SA(&ifr6.ifr_addr),
2067 		    sizeof(ifr6.ifr_addr));
2068 		if (result != 0) {
2069 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed (v6): %d",
2070 			    result);
2071 		} else {
2072 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
2073 			if (result != 0) {
2074 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR_IN6 failed: %d",
2075 				    result);
2076 			}
2077 		}
2078 	}
2079 }
2080 
2081 static void
utun_cleanup_family(ifnet_t interface,protocol_family_t protocol)2082 utun_cleanup_family(ifnet_t interface,
2083     protocol_family_t protocol)
2084 {
2085 	errno_t result = 0;
2086 	socket_ref_t pf_socket = NULL;
2087 	ifaddr_t *__null_terminated addresses = NULL;
2088 
2089 	if (protocol != PF_INET && protocol != PF_INET6) {
2090 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - invalid protocol family %d\n", protocol);
2091 		return;
2092 	}
2093 
2094 	/* Create a socket for removing addresses and detaching the protocol */
2095 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
2096 	if (result != 0) {
2097 		if (result != EAFNOSUPPORT) {
2098 			os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - failed to create %s socket: %d\n",
2099 			    protocol == PF_INET ? "IP" : "IPv6", result);
2100 		}
2101 		goto cleanup;
2102 	}
2103 
2104 	/* always set SS_PRIV, we want to close and detach regardless */
2105 	sock_setpriv(pf_socket, 1);
2106 
2107 	result = utun_detach_ip(interface, protocol, pf_socket);
2108 	if (result == 0 || result == ENXIO) {
2109 		/* We are done! We either detached or weren't attached. */
2110 		goto cleanup;
2111 	} else if (result != EBUSY) {
2112 		/* Uh, not really sure what happened here... */
2113 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
2114 		goto cleanup;
2115 	}
2116 
2117 	/*
2118 	 * At this point, we received an EBUSY error. This means there are
2119 	 * addresses attached. We should detach them and then try again.
2120 	 */
2121 	result = ifnet_get_address_list_family(interface, &addresses, protocol);
2122 	if (result != 0) {
2123 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
2124 		    ifnet_name(interface), ifnet_unit(interface),
2125 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
2126 		goto cleanup;
2127 	}
2128 
2129 	for (ifaddr_t *__null_terminated addr = addresses; *addr != NULL; addr++) {
2130 		utun_remove_address(interface, protocol, *addr, pf_socket);
2131 	}
2132 	ifnet_free_address_list(addresses);
2133 	addresses = NULL;
2134 
2135 	/*
2136 	 * The addresses should be gone, we should try the remove again.
2137 	 */
2138 	result = utun_detach_ip(interface, protocol, pf_socket);
2139 	if (result != 0 && result != ENXIO) {
2140 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
2141 	}
2142 
2143 cleanup:
2144 	if (pf_socket != NULL) {
2145 		sock_close(pf_socket);
2146 	}
2147 
2148 	if (addresses != NULL) {
2149 		ifnet_free_address_list(addresses);
2150 	}
2151 }
2152 
2153 static errno_t
utun_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)2154 utun_ctl_disconnect(__unused kern_ctl_ref kctlref,
2155     __unused u_int32_t unit,
2156     void *unitinfo)
2157 {
2158 	struct utun_pcb *__single pcb = unitinfo;
2159 	ifnet_t ifp = NULL;
2160 	errno_t result = 0;
2161 
2162 	if (pcb == NULL) {
2163 		return EINVAL;
2164 	}
2165 
2166 #if UTUN_NEXUS
2167 	/* Wait until all threads in the data paths are done. */
2168 	utun_wait_data_move_drain(pcb);
2169 	// Tell the nexus to stop all rings
2170 	if (pcb->utun_netif_nexus != NULL && pcb->utun_netif_connected) {
2171 		kern_nexus_stop(pcb->utun_netif_nexus);
2172 	}
2173 #endif // UTUN_NEXUS
2174 
2175 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2176 
2177 #if UTUN_NEXUS
2178 	struct utun_detached_channels dc = {};
2179 	utun_detach_channels(pcb, &dc);
2180 #endif // UTUN_NEXUS
2181 
2182 	pcb->utun_ctlref = NULL;
2183 
2184 	ifp = pcb->utun_ifp;
2185 	if (ifp != NULL) {
2186 #if UTUN_NEXUS
2187 		// Tell the nexus to stop all rings
2188 		if (pcb->utun_netif_nexus != NULL) {
2189 			/*
2190 			 * Quiesce the interface and flush any pending outbound packets.
2191 			 */
2192 			if_down(ifp);
2193 
2194 			/*
2195 			 * Suspend data movement and wait for IO threads to exit.
2196 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
2197 			 * do this because utun nexuses are attached/detached separately.
2198 			 */
2199 			ifnet_datamov_suspend_and_drain(ifp);
2200 			if ((result = ifnet_detach(ifp)) != 0) {
2201 				panic("utun_ctl_disconnect - ifnet_detach failed: %d", result);
2202 			}
2203 
2204 			/*
2205 			 * We want to do everything in our power to ensure that the interface
2206 			 * really goes away when the socket is closed. We must remove IP/IPv6
2207 			 * addresses and detach the protocols. Finally, we can remove and
2208 			 * release the interface.
2209 			 */
2210 			utun_cleanup_family(ifp, AF_INET);
2211 			utun_cleanup_family(ifp, AF_INET6);
2212 
2213 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2214 
2215 			utun_free_channels(&dc);
2216 			utun_nexus_detach(pcb);
2217 
2218 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
2219 			ifnet_datamov_resume(ifp);
2220 		} else
2221 #endif // UTUN_NEXUS
2222 		{
2223 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2224 
2225 #if UTUN_NEXUS
2226 			utun_free_channels(&dc);
2227 #endif // UTUN_NEXUS
2228 
2229 			/*
2230 			 * We want to do everything in our power to ensure that the interface
2231 			 * really goes away when the socket is closed. We must remove IP/IPv6
2232 			 * addresses and detach the protocols. Finally, we can remove and
2233 			 * release the interface.
2234 			 */
2235 			utun_cleanup_family(ifp, AF_INET);
2236 			utun_cleanup_family(ifp, AF_INET6);
2237 
2238 			/*
2239 			 * Detach now; utun_detach() will be called asynchronously once
2240 			 * the I/O reference count drops to 0.  There we will invoke
2241 			 * ifnet_release().
2242 			 */
2243 			if ((result = ifnet_detach(ifp)) != 0) {
2244 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_disconnect - ifnet_detach failed: %d\n", result);
2245 			}
2246 		}
2247 	} else {
2248 		// Bound, but not connected
2249 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2250 		utun_free_pcb(pcb, false);
2251 	}
2252 
2253 	return 0;
2254 }
2255 
2256 static errno_t
utun_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,mbuf_t m,__unused int flags)2257 utun_ctl_send(__unused kern_ctl_ref kctlref,
2258     __unused u_int32_t unit,
2259     void *unitinfo,
2260     mbuf_t m,
2261     __unused int flags)
2262 {
2263 	/*
2264 	 * The userland ABI requires the first four bytes have the protocol family
2265 	 * in network byte order: swap them
2266 	 */
2267 	if (m_pktlen(m) >= (int32_t)UTUN_HEADER_SIZE((struct utun_pcb *)unitinfo)) {
2268 		*mtod(m, protocol_family_t *) = ntohl(*mtod(m, protocol_family_t *));
2269 	} else {
2270 		os_log_error(OS_LOG_DEFAULT, "%s - unexpected short mbuf pkt len %d\n", __func__, m_pktlen(m));
2271 	}
2272 
2273 	return utun_pkt_input((struct utun_pcb *)unitinfo, m);
2274 }
2275 
2276 static errno_t
utun_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (len)data,size_t len)2277 utun_ctl_setopt(__unused kern_ctl_ref kctlref,
2278     __unused u_int32_t unit,
2279     void *unitinfo,
2280     int opt,
2281     void *__sized_by(len) data,
2282     size_t len)
2283 {
2284 	struct utun_pcb *__single pcb = unitinfo;
2285 	errno_t result = 0;
2286 	/* check for privileges for privileged options */
2287 	switch (opt) {
2288 	case UTUN_OPT_FLAGS:
2289 	case UTUN_OPT_EXT_IFDATA_STATS:
2290 	case UTUN_OPT_SET_DELEGATE_INTERFACE:
2291 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2292 			return EPERM;
2293 		}
2294 		break;
2295 	}
2296 
2297 	switch (opt) {
2298 	case UTUN_OPT_FLAGS:
2299 		if (len != sizeof(u_int32_t)) {
2300 			result = EMSGSIZE;
2301 			break;
2302 		}
2303 		if (pcb->utun_ifp != NULL) {
2304 			// Only can set before connecting
2305 			result = EINVAL;
2306 			break;
2307 		}
2308 		pcb->utun_external_flags = *(u_int32_t *)data;
2309 		break;
2310 
2311 	case UTUN_OPT_EXT_IFDATA_STATS:
2312 		if (len != sizeof(int)) {
2313 			result = EMSGSIZE;
2314 			break;
2315 		}
2316 		if (pcb->utun_ifp == NULL) {
2317 			// Only can set after connecting
2318 			result = EINVAL;
2319 			break;
2320 		}
2321 		pcb->utun_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2322 		break;
2323 
2324 	case UTUN_OPT_INC_IFDATA_STATS_IN:
2325 	case UTUN_OPT_INC_IFDATA_STATS_OUT: {
2326 		struct utun_stats_param *utsp = (struct utun_stats_param *)data;
2327 
2328 		if (utsp == NULL || len < sizeof(struct utun_stats_param)) {
2329 			result = EINVAL;
2330 			break;
2331 		}
2332 		if (pcb->utun_ifp == NULL) {
2333 			// Only can set after connecting
2334 			result = EINVAL;
2335 			break;
2336 		}
2337 		if (!pcb->utun_ext_ifdata_stats) {
2338 			result = EINVAL;
2339 			break;
2340 		}
2341 		if (opt == UTUN_OPT_INC_IFDATA_STATS_IN) {
2342 			ifnet_stat_increment_in(pcb->utun_ifp, utsp->utsp_packets,
2343 			    utsp->utsp_bytes, utsp->utsp_errors);
2344 		} else {
2345 			ifnet_stat_increment_out(pcb->utun_ifp, utsp->utsp_packets,
2346 			    utsp->utsp_bytes, utsp->utsp_errors);
2347 		}
2348 		break;
2349 	}
2350 	case UTUN_OPT_SET_DELEGATE_INTERFACE: {
2351 		ifnet_ref_t     del_ifp = NULL;
2352 		char            name[IFNAMSIZ];
2353 
2354 		if (len > IFNAMSIZ - 1) {
2355 			result = EMSGSIZE;
2356 			break;
2357 		}
2358 		if (pcb->utun_ifp == NULL) {
2359 			// Only can set after connecting
2360 			result = EINVAL;
2361 			break;
2362 		}
2363 		if (len != 0) {            /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2364 			bcopy(data, name, len);
2365 			name[len] = 0;
2366 			result = ifnet_find_by_name(__unsafe_null_terminated_from_indexable(name), &del_ifp);
2367 		}
2368 		if (result == 0) {
2369 			result = ifnet_set_delegate(pcb->utun_ifp, del_ifp);
2370 			if (del_ifp) {
2371 				ifnet_release(del_ifp);
2372 			}
2373 		}
2374 		break;
2375 	}
2376 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2377 		u_int32_t max_pending_packets = 0;
2378 		if (len != sizeof(u_int32_t)) {
2379 			result = EMSGSIZE;
2380 			break;
2381 		}
2382 		max_pending_packets = *(u_int32_t *)data;
2383 		if (max_pending_packets == 0) {
2384 			result = EINVAL;
2385 			break;
2386 		}
2387 		pcb->utun_max_pending_packets = max_pending_packets;
2388 		break;
2389 	}
2390 #if UTUN_NEXUS
2391 	case UTUN_OPT_ENABLE_CHANNEL: {
2392 		if (len != sizeof(int)) {
2393 			result = EMSGSIZE;
2394 			break;
2395 		}
2396 		if (pcb->utun_ifp != NULL) {
2397 			// Only can set before connecting
2398 			result = EINVAL;
2399 			break;
2400 		}
2401 		int *intp = __unsafe_forge_single(int *, data);
2402 		if (*intp != 0 &&
2403 		    *intp != 1 &&
2404 		    *intp != UTUN_IF_WMM_RING_COUNT) {
2405 			result = EINVAL;
2406 			break;
2407 		}
2408 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2409 		pcb->utun_kpipe_count = *(int *)data;
2410 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2411 		break;
2412 	}
2413 	case UTUN_OPT_CHANNEL_BIND_PID: {
2414 		if (len != sizeof(pid_t)) {
2415 			result = EMSGSIZE;
2416 			break;
2417 		}
2418 		if (pcb->utun_ifp != NULL) {
2419 			// Only can set before connecting
2420 			result = EINVAL;
2421 			break;
2422 		}
2423 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2424 		pcb->utun_kpipe_pid = *(pid_t *)data;
2425 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2426 		break;
2427 	}
2428 
2429 	case UTUN_OPT_CHANNEL_BIND_UUID: {
2430 		if (len != sizeof(uuid_t)) {
2431 			result = EMSGSIZE;
2432 			break;
2433 		}
2434 		if (pcb->utun_ifp != NULL) {
2435 			// Only can set before connecting
2436 			result = EINVAL;
2437 			break;
2438 		}
2439 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2440 		uuid_copy(pcb->utun_kpipe_proc_uuid, *((uuid_t *)data));
2441 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2442 		break;
2443 	}
2444 
2445 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2446 		if (len != sizeof(int)) {
2447 			result = EMSGSIZE;
2448 			break;
2449 		}
2450 		if (pcb->utun_ifp == NULL) {
2451 			// Only can set after connecting
2452 			result = EINVAL;
2453 			break;
2454 		}
2455 		if (!if_is_fsw_transport_netagent_enabled()) {
2456 			result = ENOTSUP;
2457 			break;
2458 		}
2459 		if (uuid_is_null(pcb->utun_nx.fsw_agent)) {
2460 			result = ENOENT;
2461 			break;
2462 		}
2463 
2464 		uint32_t flags = netagent_get_flags(pcb->utun_nx.fsw_agent);
2465 
2466 		if (*(int *)data) {
2467 			pcb->utun_needs_netagent = true;
2468 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
2469 			    NETAGENT_FLAG_NEXUS_LISTENER);
2470 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2471 		} else {
2472 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
2473 			    NETAGENT_FLAG_NEXUS_LISTENER);
2474 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2475 			pcb->utun_needs_netagent = false;
2476 		}
2477 		break;
2478 	}
2479 	case UTUN_OPT_ATTACH_FLOWSWITCH: {
2480 		if (len != sizeof(int)) {
2481 			result = EMSGSIZE;
2482 			break;
2483 		}
2484 		if (pcb->utun_ifp != NULL) {
2485 			// Only can set before connecting
2486 			result = EINVAL;
2487 			break;
2488 		}
2489 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2490 		pcb->utun_attach_fsw = !!(*(int *)data);
2491 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2492 		break;
2493 	}
2494 	case UTUN_OPT_ENABLE_NETIF: {
2495 		if (len != sizeof(int)) {
2496 			result = EMSGSIZE;
2497 			break;
2498 		}
2499 		if (pcb->utun_ifp != NULL) {
2500 			// Only can set before connecting
2501 			result = EINVAL;
2502 			break;
2503 		}
2504 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2505 		pcb->utun_use_netif = !!(*(int *)data);
2506 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2507 		break;
2508 	}
2509 	case UTUN_OPT_SLOT_SIZE: {
2510 		if (len != sizeof(u_int32_t)) {
2511 			result = EMSGSIZE;
2512 			break;
2513 		}
2514 		if (pcb->utun_ifp != NULL) {
2515 			// Only can set before connecting
2516 			result = EINVAL;
2517 			break;
2518 		}
2519 		u_int32_t slot_size = *(u_int32_t *)data;
2520 		if (slot_size < UTUN_IF_MIN_SLOT_SIZE ||
2521 		    slot_size > UTUN_IF_MAX_SLOT_SIZE) {
2522 			return EINVAL;
2523 		}
2524 		pcb->utun_slot_size = slot_size;
2525 		break;
2526 	}
2527 	case UTUN_OPT_NETIF_RING_SIZE: {
2528 		if (len != sizeof(u_int32_t)) {
2529 			result = EMSGSIZE;
2530 			break;
2531 		}
2532 		if (pcb->utun_ifp != NULL) {
2533 			// Only can set before connecting
2534 			result = EINVAL;
2535 			break;
2536 		}
2537 		u_int32_t ring_size = *(u_int32_t *)data;
2538 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2539 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2540 			return EINVAL;
2541 		}
2542 		pcb->utun_netif_ring_size = ring_size;
2543 		break;
2544 	}
2545 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2546 		if (len != sizeof(u_int32_t)) {
2547 			result = EMSGSIZE;
2548 			break;
2549 		}
2550 		if (pcb->utun_ifp != NULL) {
2551 			// Only can set before connecting
2552 			result = EINVAL;
2553 			break;
2554 		}
2555 		u_int32_t ring_size = *(u_int32_t *)data;
2556 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2557 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2558 			return EINVAL;
2559 		}
2560 		pcb->utun_tx_fsw_ring_size = ring_size;
2561 		break;
2562 	}
2563 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2564 		if (len != sizeof(u_int32_t)) {
2565 			result = EMSGSIZE;
2566 			break;
2567 		}
2568 		if (pcb->utun_ifp != NULL) {
2569 			// Only can set before connecting
2570 			result = EINVAL;
2571 			break;
2572 		}
2573 		u_int32_t ring_size = *(u_int32_t *)data;
2574 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2575 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2576 			return EINVAL;
2577 		}
2578 		pcb->utun_rx_fsw_ring_size = ring_size;
2579 		break;
2580 	}
2581 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2582 		if (len != sizeof(u_int32_t)) {
2583 			result = EMSGSIZE;
2584 			break;
2585 		}
2586 		if (pcb->utun_ifp != NULL) {
2587 			// Only can set before connecting
2588 			result = EINVAL;
2589 			break;
2590 		}
2591 		u_int32_t ring_size = *(u_int32_t *)data;
2592 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2593 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2594 			return EINVAL;
2595 		}
2596 		pcb->utun_kpipe_tx_ring_size = ring_size;
2597 		break;
2598 	}
2599 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2600 		if (len != sizeof(u_int32_t)) {
2601 			result = EMSGSIZE;
2602 			break;
2603 		}
2604 		if (pcb->utun_ifp != NULL) {
2605 			// Only can set before connecting
2606 			result = EINVAL;
2607 			break;
2608 		}
2609 		u_int32_t ring_size = *(u_int32_t *)data;
2610 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2611 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2612 			return EINVAL;
2613 		}
2614 		pcb->utun_kpipe_rx_ring_size = ring_size;
2615 		break;
2616 	}
2617 #endif // UTUN_NEXUS
2618 	default: {
2619 		result = ENOPROTOOPT;
2620 		break;
2621 	}
2622 	}
2623 
2624 	return result;
2625 }
2626 
2627 static errno_t
utun_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (* len)data,size_t * len)2628 utun_ctl_getopt(__unused kern_ctl_ref kctlref,
2629     __unused u_int32_t unit,
2630     void *unitinfo,
2631     int opt,
2632     void *__sized_by(*len) data,
2633     size_t *len)
2634 {
2635 	struct utun_pcb *__single pcb = unitinfo;
2636 	errno_t result = 0;
2637 
2638 	switch (opt) {
2639 	case UTUN_OPT_FLAGS:
2640 		if (*len != sizeof(u_int32_t)) {
2641 			result = EMSGSIZE;
2642 		} else {
2643 			*(u_int32_t *)data = pcb->utun_external_flags;
2644 		}
2645 		break;
2646 
2647 	case UTUN_OPT_EXT_IFDATA_STATS:
2648 		if (*len != sizeof(int)) {
2649 			result = EMSGSIZE;
2650 		} else {
2651 			*(int *)data = (pcb->utun_ext_ifdata_stats) ? 1 : 0;
2652 		}
2653 		break;
2654 
2655 	case UTUN_OPT_IFNAME:
2656 		if (*len < MIN(strbuflen(pcb->utun_if_xname) + 1, sizeof(pcb->utun_if_xname))) {
2657 			result = EMSGSIZE;
2658 		} else {
2659 			if (pcb->utun_ifp == NULL) {
2660 				// Only can get after connecting
2661 				result = EINVAL;
2662 				break;
2663 			}
2664 			*len = scnprintf(data, *len, "%s", pcb->utun_if_xname) + 1;
2665 		}
2666 		break;
2667 
2668 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2669 		if (*len != sizeof(u_int32_t)) {
2670 			result = EMSGSIZE;
2671 		} else {
2672 			*((u_int32_t *)data) = pcb->utun_max_pending_packets;
2673 		}
2674 		break;
2675 	}
2676 
2677 #if UTUN_NEXUS
2678 	case UTUN_OPT_ENABLE_CHANNEL: {
2679 		if (*len != sizeof(int)) {
2680 			result = EMSGSIZE;
2681 		} else {
2682 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2683 			*(int *)data = pcb->utun_kpipe_count;
2684 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2685 		}
2686 		break;
2687 	}
2688 
2689 	case UTUN_OPT_CHANNEL_BIND_PID: {
2690 		if (*len != sizeof(pid_t)) {
2691 			result = EMSGSIZE;
2692 		} else {
2693 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2694 			*(pid_t *)data = pcb->utun_kpipe_pid;
2695 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2696 		}
2697 		break;
2698 	}
2699 
2700 	case UTUN_OPT_CHANNEL_BIND_UUID: {
2701 		if (*len != sizeof(uuid_t)) {
2702 			result = EMSGSIZE;
2703 		} else {
2704 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2705 			uuid_copy(*((uuid_t *)data), pcb->utun_kpipe_proc_uuid);
2706 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2707 		}
2708 		break;
2709 	}
2710 
2711 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2712 		if (*len != sizeof(int)) {
2713 			result = EMSGSIZE;
2714 		} else {
2715 			*(int *)data = if_check_netagent(pcb->utun_ifp, pcb->utun_nx.fsw_agent);
2716 		}
2717 		break;
2718 	}
2719 
2720 	case UTUN_OPT_ENABLE_NETIF: {
2721 		if (*len != sizeof(int)) {
2722 			result = EMSGSIZE;
2723 		} else {
2724 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2725 			*(int *)data = !!pcb->utun_use_netif;
2726 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2727 		}
2728 		break;
2729 	}
2730 
2731 	case UTUN_OPT_GET_CHANNEL_UUID: {
2732 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2733 		if (!utun_flag_isset(pcb, UTUN_FLAGS_KPIPE_ALLOCATED)) {
2734 			result = ENXIO;
2735 		} else if (*len != sizeof(uuid_t) * pcb->utun_kpipe_count) {
2736 			result = EMSGSIZE;
2737 		} else {
2738 			for (unsigned i = 0; i < pcb->utun_kpipe_count; i++) {
2739 				uuid_copy(((uuid_t *)data)[i], pcb->utun_kpipe_uuid[i]);
2740 			}
2741 		}
2742 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2743 		break;
2744 	}
2745 	case UTUN_OPT_SLOT_SIZE: {
2746 		if (*len != sizeof(u_int32_t)) {
2747 			result = EMSGSIZE;
2748 		} else {
2749 			*(u_int32_t *)data = pcb->utun_slot_size;
2750 		}
2751 		break;
2752 	}
2753 	case UTUN_OPT_NETIF_RING_SIZE: {
2754 		if (*len != sizeof(u_int32_t)) {
2755 			result = EMSGSIZE;
2756 		} else {
2757 			*(u_int32_t *)data = pcb->utun_netif_ring_size;
2758 		}
2759 		break;
2760 	}
2761 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2762 		if (*len != sizeof(u_int32_t)) {
2763 			result = EMSGSIZE;
2764 		} else {
2765 			*(u_int32_t *)data = pcb->utun_tx_fsw_ring_size;
2766 		}
2767 		break;
2768 	}
2769 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2770 		if (*len != sizeof(u_int32_t)) {
2771 			result = EMSGSIZE;
2772 		} else {
2773 			*(u_int32_t *)data = pcb->utun_rx_fsw_ring_size;
2774 		}
2775 		break;
2776 	}
2777 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2778 		if (*len != sizeof(u_int32_t)) {
2779 			result = EMSGSIZE;
2780 		} else {
2781 			*(u_int32_t *)data = pcb->utun_kpipe_tx_ring_size;
2782 		}
2783 		break;
2784 	}
2785 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2786 		if (*len != sizeof(u_int32_t)) {
2787 			result = EMSGSIZE;
2788 		} else {
2789 			*(u_int32_t *)data = pcb->utun_kpipe_rx_ring_size;
2790 		}
2791 		break;
2792 	}
2793 #endif // UTUN_NEXUS
2794 
2795 	default:
2796 		result = ENOPROTOOPT;
2797 		break;
2798 	}
2799 
2800 	return result;
2801 }
2802 
2803 static void
utun_ctl_rcvd(kern_ctl_ref kctlref,u_int32_t unit,void * unitinfo,int flags)2804 utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags)
2805 {
2806 #pragma unused(flags)
2807 	bool reenable_output = false;
2808 	struct utun_pcb *__single pcb = unitinfo;
2809 	if (pcb == NULL) {
2810 		return;
2811 	}
2812 	ifnet_lock_exclusive(pcb->utun_ifp);
2813 
2814 	u_int32_t utun_packet_cnt;
2815 	errno_t error_pc = ctl_getenqueuepacketcount(kctlref, unit, &utun_packet_cnt);
2816 	if (error_pc != 0) {
2817 		os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2818 		utun_packet_cnt = 0;
2819 	}
2820 
2821 	if (utun_packet_cnt < pcb->utun_max_pending_packets) {
2822 		reenable_output = true;
2823 	}
2824 
2825 	if (reenable_output) {
2826 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
2827 		if (error != 0) {
2828 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ifnet_enable_output returned error %d\n", error);
2829 		}
2830 	}
2831 	ifnet_lock_done(pcb->utun_ifp);
2832 }
2833 
2834 /* Network Interface functions */
2835 static void
utun_start(ifnet_t interface)2836 utun_start(ifnet_t interface)
2837 {
2838 	mbuf_ref_t data;
2839 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2840 
2841 	VERIFY(pcb != NULL);
2842 
2843 #if UTUN_NEXUS
2844 	VERIFY(pcb->utun_kpipe_count == 0); // kpipe > 0 enforces use_netif
2845 #endif // UTUN_NEXUS
2846 
2847 	for (;;) {
2848 		bool can_accept_packets = true;
2849 		ifnet_lock_shared(pcb->utun_ifp);
2850 
2851 		u_int32_t utun_packet_cnt;
2852 		errno_t error_pc = ctl_getenqueuepacketcount(pcb->utun_ctlref, pcb->utun_unit, &utun_packet_cnt);
2853 		if (error_pc != 0) {
2854 			os_log_error(OS_LOG_DEFAULT, "utun_start: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2855 			utun_packet_cnt = 0;
2856 		}
2857 
2858 		can_accept_packets = (utun_packet_cnt < pcb->utun_max_pending_packets);
2859 		if (!can_accept_packets && pcb->utun_ctlref) {
2860 			u_int32_t difference = 0;
2861 			if (ctl_getenqueuereadable(pcb->utun_ctlref, pcb->utun_unit, &difference) == 0) {
2862 				if (difference > 0) {
2863 					// If the low-water mark has not yet been reached, we still need to enqueue data
2864 					// into the buffer
2865 					can_accept_packets = true;
2866 				}
2867 			}
2868 		}
2869 		if (!can_accept_packets) {
2870 			errno_t error = ifnet_disable_output(interface);
2871 			if (error != 0) {
2872 				os_log_error(OS_LOG_DEFAULT, "utun_start: ifnet_disable_output returned error %d\n", error);
2873 			}
2874 			ifnet_lock_done(pcb->utun_ifp);
2875 			break;
2876 		}
2877 		ifnet_lock_done(pcb->utun_ifp);
2878 		if (ifnet_dequeue(interface, &data) != 0) {
2879 			break;
2880 		}
2881 		if (utun_output(interface, data) != 0) {
2882 			break;
2883 		}
2884 	}
2885 }
2886 
2887 static errno_t
utun_output(ifnet_t interface,mbuf_t data)2888 utun_output(ifnet_t     interface,
2889     mbuf_t data)
2890 {
2891 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2892 	errno_t result;
2893 
2894 	VERIFY(interface == pcb->utun_ifp);
2895 
2896 #if UTUN_NEXUS
2897 	if (!pcb->utun_use_netif)
2898 #endif // UTUN_NEXUS
2899 	{
2900 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2901 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
2902 		}
2903 	}
2904 
2905 	if (pcb->utun_external_flags & UTUN_FLAGS_NO_OUTPUT) {
2906 		/* flush data */
2907 		mbuf_freem(data);
2908 		return 0;
2909 	}
2910 
2911 	// otherwise, fall thru to ctl_enqueumbuf
2912 	if (pcb->utun_ctlref) {
2913 		int     length;
2914 
2915 		/*
2916 		 * The ABI requires the protocol in network byte order
2917 		 */
2918 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2919 			*mtod(data, uint32_t *) = htonl(*mtod(data, uint32_t *));
2920 		}
2921 
2922 		length = mbuf_pkthdr_len(data);
2923 		result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR);
2924 		if (result != 0) {
2925 			mbuf_freem(data);
2926 			os_log_error(OS_LOG_DEFAULT, "utun_output - ctl_enqueuembuf failed: %d\n", result);
2927 #if UTUN_NEXUS
2928 			if (!pcb->utun_use_netif)
2929 #endif // UTUN_NEXUS
2930 			{
2931 				ifnet_stat_increment_out(interface, 0, 0, 1);
2932 			}
2933 		} else {
2934 #if UTUN_NEXUS
2935 			if (!pcb->utun_use_netif)
2936 #endif // UTUN_NEXUS
2937 			{
2938 				if (!pcb->utun_ext_ifdata_stats) {
2939 					ifnet_stat_increment_out(interface, 1, length, 0);
2940 				}
2941 			}
2942 		}
2943 	} else {
2944 		mbuf_freem(data);
2945 	}
2946 
2947 	return 0;
2948 }
2949 
2950 static errno_t
utun_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)2951 utun_demux(__unused ifnet_t interface,
2952     mbuf_t data,
2953     __unused char *frame_header,
2954     protocol_family_t *protocol)
2955 {
2956 #if UTUN_NEXUS
2957 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2958 	struct ip *ip;
2959 	u_int ip_version;
2960 #endif
2961 
2962 	while (data != NULL && mbuf_len(data) < 1) {
2963 		data = mbuf_next(data);
2964 	}
2965 
2966 	if (data == NULL) {
2967 		return ENOENT;
2968 	}
2969 
2970 #if UTUN_NEXUS
2971 	if (pcb->utun_use_netif) {
2972 		ip = mtod(data, struct ip *);
2973 		ip_version = ip->ip_v;
2974 
2975 		switch (ip_version) {
2976 		case 4:
2977 			*protocol = PF_INET;
2978 			return 0;
2979 		case 6:
2980 			*protocol = PF_INET6;
2981 			return 0;
2982 		default:
2983 			*protocol = 0;
2984 			break;
2985 		}
2986 	} else
2987 #endif // UTUN_NEXUS
2988 	{
2989 		*protocol = *mtod(data, uint32_t *);
2990 	}
2991 
2992 	return 0;
2993 }
2994 
2995 static errno_t
utun_framer(ifnet_t interface,mbuf_t * packet,__unused const struct sockaddr * dest,__unused IFNET_LLADDR_T dest_lladdr,IFNET_FRAME_TYPE_T frame_type,u_int32_t * prepend_len,u_int32_t * postpend_len)2996 utun_framer(ifnet_t interface,
2997     mbuf_t *packet,
2998     __unused const struct sockaddr *dest,
2999     __unused IFNET_LLADDR_T dest_lladdr,
3000     IFNET_FRAME_TYPE_T frame_type,
3001     u_int32_t *prepend_len,
3002     u_int32_t *postpend_len)
3003 {
3004 	struct utun_pcb *__single pcb = ifnet_softc(interface);
3005 	VERIFY(interface == pcb->utun_ifp);
3006 
3007 	u_int32_t header_length = UTUN_HEADER_SIZE(pcb);
3008 	if (mbuf_prepend(packet, header_length, MBUF_DONTWAIT) != 0) {
3009 		os_log_error(OS_LOG_DEFAULT, "utun_framer - ifnet_output prepend failed\n");
3010 
3011 		ifnet_stat_increment_out(interface, 0, 0, 1);
3012 
3013 		// just	return, because the buffer was freed in mbuf_prepend
3014 		return EJUSTRETURN;
3015 	}
3016 	if (prepend_len != NULL) {
3017 		*prepend_len = header_length;
3018 	}
3019 	if (postpend_len != NULL) {
3020 		*postpend_len = 0;
3021 	}
3022 
3023 	// place protocol number at the beginning of the mbuf
3024 	*mtod(*packet, protocol_family_t *) = *(protocol_family_t *)(uintptr_t)(size_t)frame_type;
3025 
3026 #if NECP
3027 	// Add process uuid if applicable
3028 	if (pcb->utun_external_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
3029 		if (m_pktlen(*packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
3030 			u_int8_t *header = mtod(*packet, uint8_t*);
3031 			int uuid_err = necp_get_app_uuid_from_packet(*packet, (void *)(header + sizeof(u_int32_t)));
3032 			if (uuid_err != 0) {
3033 				os_log_error(OS_LOG_DEFAULT, "Received app uuid error %d for %s%d\n", uuid_err, ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp));
3034 			}
3035 		} else {
3036 			os_log_error(OS_LOG_DEFAULT, "Cannot set proc uuid for %s%d, size %d < %zu\n", ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp),
3037 			    m_pktlen(*packet), UTUN_HEADER_SIZE(pcb));
3038 		}
3039 	}
3040 #endif // NECP
3041 
3042 	return 0;
3043 }
3044 
3045 static errno_t
utun_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)3046 utun_add_proto(__unused ifnet_t interface,
3047     protocol_family_t protocol,
3048     __unused const struct ifnet_demux_desc *demux_array,
3049     __unused u_int32_t demux_count)
3050 {
3051 	switch (protocol) {
3052 	case PF_INET:
3053 		return 0;
3054 	case PF_INET6:
3055 		return 0;
3056 	default:
3057 		break;
3058 	}
3059 
3060 	return ENOPROTOOPT;
3061 }
3062 
3063 static errno_t
utun_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)3064 utun_del_proto(__unused ifnet_t interface,
3065     __unused protocol_family_t protocol)
3066 {
3067 	return 0;
3068 }
3069 
3070 static errno_t
utun_ioctl(ifnet_t interface,u_long command,void * data)3071 utun_ioctl(ifnet_t interface,
3072     u_long command,
3073     void *data)
3074 {
3075 #if UTUN_NEXUS
3076 	struct utun_pcb *__single pcb = ifnet_softc(interface);
3077 #endif
3078 	errno_t result = 0;
3079 
3080 	switch (command) {
3081 	case SIOCSIFMTU: {
3082 #if UTUN_NEXUS
3083 		if (pcb->utun_use_netif) {
3084 			// Make sure we can fit packets in the channel buffers
3085 			// Allow for the headroom in the slot
3086 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) + UTUN_IF_HEADROOM_SIZE > pcb->utun_slot_size) {
3087 				result = EINVAL;
3088 			} else {
3089 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
3090 			}
3091 		} else
3092 #endif // UTUN_NEXUS
3093 		{
3094 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
3095 		}
3096 		break;
3097 	}
3098 
3099 	case SIOCSIFSUBFAMILY: {
3100 		uint32_t subfamily;
3101 
3102 		subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
3103 		switch (subfamily) {
3104 		case IFRTYPE_SUBFAMILY_BLUETOOTH:
3105 			interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
3106 			break;
3107 		case IFRTYPE_SUBFAMILY_WIFI:
3108 			interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
3109 			break;
3110 		case IFRTYPE_SUBFAMILY_QUICKRELAY:
3111 			interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
3112 			break;
3113 		case IFRTYPE_SUBFAMILY_DEFAULT:
3114 			interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
3115 			break;
3116 		default:
3117 			result = EINVAL;
3118 			break;
3119 		}
3120 		break;
3121 	}
3122 
3123 	case SIOCSIFPEEREGRESSFUNCTIONALTYPE: {
3124 		uint32_t peeregressinterfacetype;
3125 		peeregressinterfacetype = ((struct ifreq*)data)->ifr_ifru.ifru_peer_egress_functional_type;
3126 		switch (peeregressinterfacetype) {
3127 		case IFRTYPE_FUNCTIONAL_WIFI_INFRA:
3128 		case IFRTYPE_FUNCTIONAL_CELLULAR:
3129 		case IFRTYPE_FUNCTIONAL_WIRED:
3130 		case IFRTYPE_FUNCTIONAL_UNKNOWN:
3131 			interface->peer_egress_functional_type = peeregressinterfacetype;
3132 			break;
3133 		default:
3134 			result = EINVAL;
3135 			break;
3136 		}
3137 		break;
3138 	}
3139 
3140 	case SIOCSIFFLAGS:
3141 		/* ifioctl() takes care of it */
3142 		break;
3143 
3144 	default:
3145 		result = EOPNOTSUPP;
3146 	}
3147 
3148 	return result;
3149 }
3150 
3151 static void
utun_detached(ifnet_t interface)3152 utun_detached(ifnet_t interface)
3153 {
3154 	struct utun_pcb *__single pcb = ifnet_softc(interface);
3155 	(void)ifnet_release(interface);
3156 	lck_mtx_lock(&utun_lock);
3157 	utun_free_pcb(pcb, true);
3158 	(void)ifnet_dispose(interface);
3159 	lck_mtx_unlock(&utun_lock);
3160 }
3161 
3162 /* Protocol Handlers */
3163 
3164 static errno_t
utun_proto_input(__unused ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)3165 utun_proto_input(__unused ifnet_t interface,
3166     protocol_family_t protocol,
3167     mbuf_t m,
3168     __unused char *frame_header)
3169 {
3170 	struct utun_pcb *__single pcb = ifnet_softc(interface);
3171 #if UTUN_NEXUS
3172 	if (!pcb->utun_use_netif)
3173 #endif // UTUN_NEXUS
3174 	{
3175 		mbuf_adj(m, UTUN_HEADER_SIZE(pcb));
3176 	}
3177 	int32_t pktlen = m->m_pkthdr.len;
3178 	if (proto_input(protocol, m) != 0) {
3179 		m_freem(m);
3180 #if UTUN_NEXUS
3181 		if (!pcb->utun_use_netif)
3182 #endif // UTUN_NEXUS
3183 		{
3184 			ifnet_stat_increment_in(interface, 0, 0, 1);
3185 		}
3186 	} else {
3187 #if UTUN_NEXUS
3188 		if (!pcb->utun_use_netif)
3189 #endif // UTUN_NEXUS
3190 		{
3191 			ifnet_stat_increment_in(interface, 1, pktlen, 0);
3192 		}
3193 	}
3194 
3195 	return 0;
3196 }
3197 
3198 static errno_t
utun_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,char * frame_type,__unused char * link_layer_dest)3199 utun_proto_pre_output(__unused ifnet_t interface,
3200     protocol_family_t protocol,
3201     __unused mbuf_t *packet,
3202     __unused const struct sockaddr *dest,
3203     __unused void *route,
3204     char *frame_type,
3205     __unused char *link_layer_dest)
3206 {
3207 	*(protocol_family_t *)(void *)frame_type = protocol;
3208 	return 0;
3209 }
3210 
3211 static errno_t
utun_attach_proto(ifnet_t interface,protocol_family_t protocol)3212 utun_attach_proto(ifnet_t interface,
3213     protocol_family_t protocol)
3214 {
3215 	struct ifnet_attach_proto_param proto;
3216 
3217 	bzero(&proto, sizeof(proto));
3218 	proto.input = utun_proto_input;
3219 	proto.pre_output = utun_proto_pre_output;
3220 
3221 	errno_t result = ifnet_attach_protocol(interface, protocol, &proto);
3222 	if (result != 0 && result != EEXIST) {
3223 		os_log_error(OS_LOG_DEFAULT, "utun_attach_inet - ifnet_attach_protocol %d failed: %d\n",
3224 		    protocol, result);
3225 	}
3226 
3227 	return result;
3228 }
3229 
3230 static errno_t
utun_pkt_input(struct utun_pcb * pcb,mbuf_t packet)3231 utun_pkt_input(struct utun_pcb *pcb, mbuf_t packet)
3232 {
3233 #if UTUN_NEXUS
3234 	if (pcb->utun_use_netif) {
3235 		if (!utun_data_move_begin(pcb)) {
3236 			os_log_info(OS_LOG_DEFAULT,
3237 			    "%s: data path stopped for %s\n",
3238 			    __func__, if_name(pcb->utun_ifp));
3239 			return ENXIO;
3240 		}
3241 
3242 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
3243 
3244 		lck_mtx_lock(&pcb->utun_input_chain_lock);
3245 
3246 		if (pcb->utun_input_chain_count > (u_int32_t)if_utun_max_pending_input) {
3247 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
3248 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3249 			utun_data_move_end(pcb);
3250 			return ENOSPC;
3251 		}
3252 
3253 		if (pcb->utun_input_chain != NULL) {
3254 			pcb->utun_input_chain_last->m_nextpkt = packet;
3255 		} else {
3256 			pcb->utun_input_chain = packet;
3257 		}
3258 		pcb->utun_input_chain_count++;
3259 		while (packet->m_nextpkt) {
3260 			VERIFY(packet != packet->m_nextpkt);
3261 			packet = packet->m_nextpkt;
3262 			pcb->utun_input_chain_count++;
3263 		}
3264 		pcb->utun_input_chain_last = packet;
3265 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
3266 
3267 		kern_channel_ring_t __single rx_ring = pcb->utun_netif_rxring[0];
3268 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3269 
3270 		if (rx_ring != NULL) {
3271 			kern_channel_notify(rx_ring, 0);
3272 		}
3273 
3274 		utun_data_move_end(pcb);
3275 		return 0;
3276 	} else
3277 #endif // UTUN_NEXUS
3278 	{
3279 		mbuf_pkthdr_setrcvif(packet, pcb->utun_ifp);
3280 
3281 		if (m_pktlen(packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
3282 			bpf_tap_in(pcb->utun_ifp, DLT_NULL, packet, 0, 0);
3283 		}
3284 		if (pcb->utun_external_flags & UTUN_FLAGS_NO_INPUT) {
3285 			/* flush data */
3286 			mbuf_freem(packet);
3287 			return 0;
3288 		}
3289 
3290 		errno_t result = 0;
3291 		if (!pcb->utun_ext_ifdata_stats) {
3292 			struct ifnet_stat_increment_param incs = {};
3293 			incs.packets_in = 1;
3294 			incs.bytes_in = mbuf_pkthdr_len(packet);
3295 			result = ifnet_input(pcb->utun_ifp, packet, &incs);
3296 		} else {
3297 			result = ifnet_input(pcb->utun_ifp, packet, NULL);
3298 		}
3299 		if (result != 0) {
3300 			ifnet_stat_increment_in(pcb->utun_ifp, 0, 0, 1);
3301 
3302 			os_log_error(OS_LOG_DEFAULT, "%s - ifnet_input failed: %d\n", __FUNCTION__, result);
3303 		}
3304 
3305 		return 0;
3306 	}
3307 }
3308 
3309 #if UTUN_NEXUS
3310 
3311 static errno_t
utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)3312 utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
3313 {
3314 	return 0;
3315 }
3316 
3317 static void
utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)3318 utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
3319 {
3320 	// Ignore
3321 }
3322 
3323 static errno_t
utun_register_nexus(void)3324 utun_register_nexus(void)
3325 {
3326 	const struct kern_nexus_domain_provider_init dp_init = {
3327 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
3328 		.nxdpi_flags = 0,
3329 		.nxdpi_init = utun_nxdp_init,
3330 		.nxdpi_fini = utun_nxdp_fini
3331 	};
3332 	errno_t err = 0;
3333 	nexus_domain_provider_name_t domain_provider_name = "com.apple.utun";
3334 
3335 	/* utun_nxdp_init() is called before this function returns */
3336 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
3337 	    domain_provider_name,
3338 	    &dp_init, sizeof(dp_init),
3339 	    &utun_nx_dom_prov);
3340 	if (err != 0) {
3341 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
3342 		return err;
3343 	}
3344 	return 0;
3345 }
3346 boolean_t
utun_interface_needs_netagent(ifnet_t interface)3347 utun_interface_needs_netagent(ifnet_t interface)
3348 {
3349 	struct utun_pcb *__single pcb = NULL;
3350 
3351 	if (interface == NULL) {
3352 		return FALSE;
3353 	}
3354 
3355 	pcb = ifnet_softc(interface);
3356 
3357 	if (pcb == NULL) {
3358 		return FALSE;
3359 	}
3360 
3361 	return pcb->utun_needs_netagent == true;
3362 }
3363 
3364 static errno_t
utun_ifnet_set_attrs(ifnet_t ifp)3365 utun_ifnet_set_attrs(ifnet_t ifp)
3366 {
3367 	/* Set flags and additional information. */
3368 	ifnet_set_mtu(ifp, 1500);
3369 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
3370 
3371 	/* The interface must generate its own IPv6 LinkLocal address,
3372 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
3373 	 */
3374 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
3375 
3376 	return 0;
3377 }
3378 
3379 static errno_t
utun_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)3380 utun_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
3381 {
3382 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3383 	pcb->utun_netif_nexus = nexus;
3384 	return utun_ifnet_set_attrs(ifp);
3385 }
3386 
3387 static errno_t
utun_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)3388 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
3389     proc_t p, kern_nexus_t nexus,
3390     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
3391 {
3392 #pragma unused(nxprov, p)
3393 #pragma unused(nexus, nexus_port, channel, ch_ctx)
3394 	return 0;
3395 }
3396 
3397 static errno_t
utun_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3398 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3399     kern_channel_t channel)
3400 {
3401 #pragma unused(nxprov, channel)
3402 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3403 	boolean_t ok = ifnet_get_ioref(pcb->utun_ifp);
3404 	if (pcb->utun_netif_nexus == nexus) {
3405 		pcb->utun_netif_connected = true;
3406 	}
3407 	if (ok) {
3408 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3409 		UTUN_SET_DATA_PATH_READY(pcb);
3410 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3411 	}
3412 	return ok ? 0 : ENXIO;
3413 }
3414 
3415 static void
utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3416 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3417     kern_channel_t channel)
3418 {
3419 #pragma unused(nxprov, channel)
3420 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3421 	/* Wait until all threads in the data paths are done. */
3422 	utun_wait_data_move_drain(pcb);
3423 }
3424 
3425 static void
utun_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3426 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3427     kern_channel_t channel)
3428 {
3429 #pragma unused(nxprov, channel)
3430 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3431 	/* Wait until all threads in the data paths are done. */
3432 	utun_wait_data_move_drain(pcb);
3433 }
3434 
3435 static void
utun_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3436 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3437     kern_channel_t channel)
3438 {
3439 #pragma unused(nxprov, channel)
3440 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3441 	if (pcb->utun_netif_nexus == nexus) {
3442 		pcb->utun_netif_connected = false;
3443 		if (pcb->utun_attach_fsw) {
3444 			// disconnected by flowswitch that was attached by us
3445 			pcb->utun_netif_nexus = NULL;
3446 		}
3447 	}
3448 	ifnet_decr_iorefcnt(pcb->utun_ifp);
3449 }
3450 
3451 static errno_t
utun_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)3452 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3453     kern_channel_t channel, kern_channel_ring_t ring,
3454     boolean_t is_tx_ring, void **ring_ctx)
3455 {
3456 #pragma unused(nxprov)
3457 #pragma unused(channel)
3458 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3459 	uint8_t ring_idx;
3460 
3461 	for (ring_idx = 0; ring_idx < pcb->utun_kpipe_count; ring_idx++) {
3462 		if (!uuid_compare(channel->ch_info->cinfo_nx_uuid, pcb->utun_kpipe_uuid[ring_idx])) {
3463 			break;
3464 		}
3465 	}
3466 
3467 	if (ring_idx == pcb->utun_kpipe_count) {
3468 		uuid_string_t uuidstr;
3469 		uuid_unparse(channel->ch_info->cinfo_nx_uuid, uuidstr);
3470 		os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->utun_if_xname, uuidstr);
3471 		return ENOENT;
3472 	}
3473 
3474 	*ring_ctx = __unsafe_forge_single(void *, (uintptr_t)ring_idx);
3475 
3476 	if (!is_tx_ring) {
3477 		VERIFY(pcb->utun_kpipe_rxring[ring_idx] == NULL);
3478 		pcb->utun_kpipe_rxring[ring_idx] = ring;
3479 	} else {
3480 		VERIFY(pcb->utun_kpipe_txring[ring_idx] == NULL);
3481 		pcb->utun_kpipe_txring[ring_idx] = ring;
3482 	}
3483 	return 0;
3484 }
3485 
3486 static void
utun_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)3487 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3488     kern_channel_ring_t ring)
3489 {
3490 #pragma unused(nxprov)
3491 	bool found = false;
3492 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3493 
3494 	for (unsigned int i = 0; i < pcb->utun_kpipe_count; i++) {
3495 		if (pcb->utun_kpipe_rxring[i] == ring) {
3496 			pcb->utun_kpipe_rxring[i] = NULL;
3497 			found = true;
3498 		} else if (pcb->utun_kpipe_txring[i] == ring) {
3499 			pcb->utun_kpipe_txring[i] = NULL;
3500 			found = true;
3501 		}
3502 	}
3503 	VERIFY(found);
3504 }
3505 
3506 static errno_t
utun_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)3507 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3508     kern_channel_ring_t tx_ring, uint32_t flags)
3509 {
3510 #pragma unused(nxprov)
3511 #pragma unused(flags)
3512 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3513 
3514 	if (!utun_data_move_begin(pcb)) {
3515 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3516 		    __func__, if_name(pcb->utun_ifp));
3517 		return 0;
3518 	}
3519 
3520 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3521 	if (!utun_flag_isset(pcb, UTUN_FLAGS_KPIPE_ALLOCATED)) {
3522 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3523 		utun_data_move_end(pcb);
3524 		return 0;
3525 	}
3526 
3527 	if (pcb->utun_use_netif) {
3528 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3529 		if (tx_slot == NULL) {
3530 			// Nothing to write, bail
3531 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3532 			utun_data_move_end(pcb);
3533 			return 0;
3534 		}
3535 
3536 		// Signal the netif ring to read
3537 		kern_channel_ring_t __single rx_ring = pcb->utun_netif_rxring[0];
3538 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3539 		if (rx_ring != NULL) {
3540 			kern_channel_notify(rx_ring, 0);
3541 		}
3542 	} else {
3543 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3544 
3545 		struct ifnet_stat_increment_param incs = {};
3546 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
3547 		MBUFQ_HEAD(mbufq) mbq;
3548 		MBUFQ_INIT(&mbq);
3549 		kern_channel_slot_t tx_pslot = NULL;
3550 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3551 		while (tx_slot != NULL) {
3552 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3553 
3554 			// Advance TX ring
3555 			tx_pslot = tx_slot;
3556 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3557 
3558 			if (tx_ph == 0) {
3559 				continue;
3560 			}
3561 
3562 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3563 			VERIFY(tx_buf != NULL);
3564 			uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
3565 			    kern_buflet_get_data_address(tx_buf),
3566 			    kern_buflet_get_data_limit(tx_buf));
3567 			VERIFY(tx_baddr != 0);
3568 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3569 
3570 			size_t length = MIN(kern_packet_get_data_length(tx_ph),
3571 			    pcb->utun_slot_size);
3572 
3573 			mbuf_ref_t data = NULL;
3574 			if (length >= UTUN_HEADER_SIZE(pcb) &&
3575 			    !(pcb->utun_external_flags & UTUN_FLAGS_NO_INPUT)) {
3576 				errno_t error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_HEADER, &data);
3577 				VERIFY(0 == error);
3578 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_WAITOK);
3579 				VERIFY(0 == error);
3580 				/*
3581 				 * The userland ABI requires the first four bytes have
3582 				 * the protocol family in network byte order: swap them
3583 				 */
3584 				*mtod(data, uint32_t*) = ntohl(*mtod(data, uint32_t *));
3585 				mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
3586 				bpf_tap_in(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3587 				incs.packets_in++;
3588 				incs.bytes_in += length;
3589 				MBUFQ_ENQUEUE(&mbq, data);
3590 			}
3591 		}
3592 		if (tx_pslot) {
3593 			kern_channel_advance_slot(tx_ring, tx_pslot);
3594 			tx_ring_stats.kcrsi_slots_transferred = incs.packets_in;
3595 			tx_ring_stats.kcrsi_bytes_transferred = incs.bytes_in;
3596 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3597 			(void) kern_channel_reclaim(tx_ring);
3598 		}
3599 		if (!MBUFQ_EMPTY(&mbq)) {
3600 			(void) ifnet_input_extended(pcb->utun_ifp, MBUFQ_FIRST(&mbq),
3601 			    MBUFQ_LAST(&mbq), &incs);
3602 			MBUFQ_INIT(&mbq);
3603 		}
3604 	}
3605 
3606 	utun_data_move_end(pcb);
3607 	return 0;
3608 }
3609 
3610 static errno_t
utun_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)3611 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3612     kern_channel_ring_t rx_ring, uint32_t flags)
3613 {
3614 #pragma unused(nxprov)
3615 #pragma unused(flags)
3616 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3617 	struct kern_channel_ring_stat_increment rx_ring_stats = {};
3618 	uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(rx_ring);
3619 
3620 	if (!utun_data_move_begin(pcb)) {
3621 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3622 		    __func__, if_name(pcb->utun_ifp));
3623 		return 0;
3624 	}
3625 
3626 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3627 
3628 	if (!utun_flag_isset(pcb, UTUN_FLAGS_KPIPE_ALLOCATED)) {
3629 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3630 		utun_data_move_end(pcb);
3631 		return 0;
3632 	}
3633 
3634 	VERIFY(pcb->utun_kpipe_count > 0);
3635 	VERIFY(ring_idx <= pcb->utun_kpipe_count);
3636 
3637 	/* reclaim user-released slots */
3638 	(void) kern_channel_reclaim(rx_ring);
3639 
3640 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
3641 	if (avail == 0) {
3642 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3643 		utun_data_move_end(pcb);
3644 		return 0;
3645 	}
3646 
3647 	kern_channel_ring_t __single tx_ring = pcb->utun_netif_txring[ring_idx];
3648 	if (tx_ring == NULL ||
3649 	    pcb->utun_netif_nexus == NULL) {
3650 		// Net-If TX ring not set up yet, nothing to read
3651 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3652 		utun_data_move_end(pcb);
3653 		return 0;
3654 	}
3655 
3656 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->utun_netif_nexus)->nif_stats;
3657 
3658 	// Unlock utun before entering ring
3659 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3660 
3661 	(void)kr_enter(tx_ring, TRUE);
3662 
3663 	// Lock again after entering and validate
3664 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3665 	if (tx_ring != pcb->utun_netif_txring[ring_idx]) {
3666 		// Ring no longer valid
3667 		// Unlock first, then exit ring
3668 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3669 		kr_exit(tx_ring);
3670 		utun_data_move_end(pcb);
3671 		return 0;
3672 	}
3673 
3674 	struct kern_channel_ring_stat_increment tx_ring_stats;
3675 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
3676 	kern_channel_slot_t tx_pslot = NULL;
3677 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3678 	if (tx_slot == NULL) {
3679 		// Nothing to read, don't bother signalling
3680 		// Unlock first, then exit ring
3681 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3682 		kr_exit(tx_ring);
3683 		utun_data_move_end(pcb);
3684 		return 0;
3685 	}
3686 
3687 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3688 	VERIFY(rx_pp != NULL);
3689 	struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
3690 	VERIFY(tx_pp != NULL);
3691 	kern_channel_slot_t rx_pslot = NULL;
3692 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3693 	kern_packet_t tx_chain_ph = 0;
3694 
3695 	while (rx_slot != NULL && tx_slot != NULL) {
3696 		size_t length;
3697 		kern_buflet_t rx_buf;
3698 		uint8_t *rx_baddr;
3699 
3700 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3701 
3702 		/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
3703 		if (tx_ph == 0) {
3704 			// Advance TX ring
3705 			tx_pslot = tx_slot;
3706 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3707 			continue;
3708 		}
3709 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
3710 		if (tx_chain_ph != 0) {
3711 			kern_packet_append(tx_ph, tx_chain_ph);
3712 		}
3713 		tx_chain_ph = tx_ph;
3714 
3715 		// Advance TX ring
3716 		tx_pslot = tx_slot;
3717 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3718 
3719 		// Allocate rx packet
3720 		kern_packet_t rx_ph = 0;
3721 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3722 		if (__improbable(error != 0)) {
3723 			os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3724 			    pcb->utun_ifp->if_xname);
3725 			break;
3726 		}
3727 
3728 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3729 		VERIFY(tx_buf != NULL);
3730 		uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
3731 		    kern_buflet_get_data_address(tx_buf),
3732 		    kern_buflet_get_data_limit(tx_buf));
3733 		VERIFY(tx_baddr != NULL);
3734 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
3735 
3736 		bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
3737 
3738 		length = MIN(kern_packet_get_data_length(tx_ph) + UTUN_HEADER_SIZE(pcb),
3739 		    pcb->utun_slot_size);
3740 
3741 		tx_ring_stats.kcrsi_slots_transferred++;
3742 		tx_ring_stats.kcrsi_bytes_transferred += length;
3743 
3744 		if (length < UTUN_HEADER_SIZE(pcb) ||
3745 		    length > pcb->utun_slot_size ||
3746 		    length > PP_BUF_SIZE_DEF(rx_pp) ||
3747 		    (pcb->utun_external_flags & UTUN_FLAGS_NO_OUTPUT)) {
3748 			/* flush data */
3749 			kern_pbufpool_free(rx_pp, rx_ph);
3750 			os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: invalid length %zu header_size %zu\n",
3751 			    pcb->utun_ifp->if_xname, length, UTUN_HEADER_SIZE(pcb));
3752 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
3753 			STATS_INC(nifs, NETIF_STATS_DROP);
3754 			continue;
3755 		}
3756 
3757 		/* fillout packet */
3758 		rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3759 		VERIFY(rx_buf != NULL);
3760 		rx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
3761 		    kern_buflet_get_data_address(rx_buf),
3762 		    kern_buflet_get_data_limit(rx_buf));
3763 		VERIFY(rx_baddr != NULL);
3764 
3765 		// Find family
3766 		uint32_t af = 0;
3767 		uint8_t vhl = *(uint8_t *)(tx_baddr);
3768 		u_int ip_version = (vhl >> 4);
3769 		switch (ip_version) {
3770 		case 4: {
3771 			af = AF_INET;
3772 			break;
3773 		}
3774 		case 6: {
3775 			af = AF_INET6;
3776 			break;
3777 		}
3778 		default: {
3779 			os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: unknown ip version %u vhl %u header_size %zu\n",
3780 			    pcb->utun_ifp->if_xname, ip_version, vhl, UTUN_HEADER_SIZE(pcb));
3781 			break;
3782 		}
3783 		}
3784 
3785 		// Copy header
3786 		af = htonl(af);
3787 		memcpy(rx_baddr, &af, sizeof(af));
3788 		if (pcb->utun_external_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
3789 			uuid_t uuid;
3790 			kern_packet_get_euuid(tx_ph, uuid);
3791 			memcpy(rx_baddr + sizeof(af), uuid, sizeof(uuid));
3792 		}
3793 
3794 		// Copy data from tx to rx
3795 		memcpy(rx_baddr + UTUN_HEADER_SIZE(pcb), tx_baddr, length - UTUN_HEADER_SIZE(pcb));
3796 		kern_packet_clear_flow_uuid(rx_ph); // zero flow id
3797 
3798 		/* finalize and attach the packet */
3799 		error = kern_buflet_set_data_offset(rx_buf, 0);
3800 		VERIFY(error == 0);
3801 		error = kern_buflet_set_data_length(rx_buf, length);
3802 		VERIFY(error == 0);
3803 		error = kern_packet_finalize(rx_ph);
3804 		VERIFY(error == 0);
3805 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3806 		VERIFY(error == 0);
3807 
3808 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
3809 		STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
3810 
3811 		rx_ring_stats.kcrsi_slots_transferred++;
3812 		rx_ring_stats.kcrsi_bytes_transferred += length;
3813 
3814 		rx_pslot = rx_slot;
3815 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3816 	}
3817 
3818 	if (rx_pslot) {
3819 		kern_channel_advance_slot(rx_ring, rx_pslot);
3820 		kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
3821 	}
3822 
3823 	if (tx_chain_ph != 0) {
3824 		kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
3825 	}
3826 
3827 	if (tx_pslot) {
3828 		kern_channel_advance_slot(tx_ring, tx_pslot);
3829 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3830 		(void)kern_channel_reclaim(tx_ring);
3831 	}
3832 
3833 	/* just like utun_ctl_rcvd(), always reenable output */
3834 	errno_t error = ifnet_enable_output(pcb->utun_ifp);
3835 	if (error != 0) {
3836 		os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
3837 	}
3838 
3839 	// Unlock first, then exit ring
3840 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3841 
3842 	if (tx_pslot != NULL) {
3843 		kern_channel_notify(tx_ring, 0);
3844 	}
3845 	kr_exit(tx_ring);
3846 
3847 	utun_data_move_end(pcb);
3848 	return 0;
3849 }
3850 
3851 #endif // UTUN_NEXUS
3852 
3853 
3854 /*
3855  * These are place holders until coreTLS kext stops calling them
3856  */
3857 errno_t utun_ctl_register_dtls(void *reg);
3858 int utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family);
3859 void utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb);
3860 
3861 errno_t
utun_ctl_register_dtls(void * reg)3862 utun_ctl_register_dtls(void *reg)
3863 {
3864 #pragma unused(reg)
3865 	return 0;
3866 }
3867 
3868 int
utun_pkt_dtls_input(struct utun_pcb * pcb,mbuf_t * pkt,protocol_family_t family)3869 utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family)
3870 {
3871 #pragma unused(pcb)
3872 #pragma unused(pkt)
3873 #pragma unused(family)
3874 	return 0;
3875 }
3876 
3877 void
utun_ctl_disable_crypto_dtls(struct utun_pcb * pcb)3878 utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb)
3879 {
3880 #pragma unused(pcb)
3881 }
3882 
3883 #if UTUN_NEXUS
3884 static boolean_t
utun_data_move_begin(struct utun_pcb * pcb)3885 utun_data_move_begin(struct utun_pcb *pcb)
3886 {
3887 	bool data_path_ready = false;
3888 
3889 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3890 	if ((data_path_ready = UTUN_IS_DATA_PATH_READY(pcb))) {
3891 		pcb->utun_pcb_data_move++;
3892 	}
3893 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3894 
3895 	return data_path_ready;
3896 }
3897 
3898 static void
utun_data_move_end(struct utun_pcb * pcb)3899 utun_data_move_end(struct utun_pcb *pcb)
3900 {
3901 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3902 	VERIFY(pcb->utun_pcb_data_move > 0);
3903 	/*
3904 	 * if there's no more thread moving data, wakeup any
3905 	 * drainers that are blocked waiting for this.
3906 	 */
3907 	if (--pcb->utun_pcb_data_move == 0 && pcb->utun_pcb_drainers > 0) {
3908 		wakeup(&(pcb->utun_pcb_data_move));
3909 	}
3910 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3911 }
3912 
3913 static void
utun_data_move_drain(struct utun_pcb * pcb)3914 utun_data_move_drain(struct utun_pcb *pcb)
3915 {
3916 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3917 	/* data path must already be marked as not ready */
3918 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3919 	pcb->utun_pcb_drainers++;
3920 	while (pcb->utun_pcb_data_move != 0) {
3921 		(void) msleep(&(pcb->utun_pcb_data_move),
3922 		    &pcb->utun_pcb_data_move_lock, (PZERO - 1), __func__, NULL);
3923 	}
3924 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3925 	VERIFY(pcb->utun_pcb_drainers > 0);
3926 	pcb->utun_pcb_drainers--;
3927 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3928 }
3929 
3930 static void
utun_wait_data_move_drain(struct utun_pcb * pcb)3931 utun_wait_data_move_drain(struct utun_pcb *pcb)
3932 {
3933 	/*
3934 	 * Mark the data path as not usable.
3935 	 */
3936 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3937 	UTUN_CLR_DATA_PATH_READY(pcb);
3938 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3939 
3940 	/* Wait until all threads in the data path are done. */
3941 	utun_data_move_drain(pcb);
3942 }
3943 #endif // UTUN_NEXUS
3944