xref: /xnu-11417.140.69/bsd/net/if_utun.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2008-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 
31 /* ----------------------------------------------------------------------------------
32  *   Application of kernel control for interface creation
33  *
34  *   Theory of operation:
35  *   utun (user tunnel) acts as glue between kernel control sockets and network interfaces.
36  *   This kernel control will register an interface for every client that connects.
37  *   ---------------------------------------------------------------------------------- */
38 
39 #include <sys/systm.h>
40 #include <sys/kern_control.h>
41 #include <net/kpi_protocol.h>
42 #include <net/kpi_interface.h>
43 #include <sys/socket.h>
44 #include <net/if.h>
45 #include <net/if_types.h>
46 #include <net/bpf.h>
47 #include <net/if_utun.h>
48 #include <sys/mbuf.h>
49 #include <sys/sockio.h>
50 #include <netinet/in.h>
51 #include <netinet/ip.h>
52 #include <netinet6/in6_var.h>
53 #include <netinet6/in6_var.h>
54 #include <sys/kauth.h>
55 #include <net/necp.h>
56 #include <kern/zalloc.h>
57 
58 #include <net/sockaddr_utils.h>
59 
60 #include <os/log.h>
61 
62 #if SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
63 #include <skywalk/os_skywalk_private.h>
64 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
65 #include <skywalk/nexus/netif/nx_netif.h>
66 #define UTUN_NEXUS 1
67 #else // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
68 #define UTUN_NEXUS 0
69 #endif // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
70 
71 #if UTUN_NEXUS
72 static nexus_controller_t utun_ncd;
73 static int utun_ncd_refcount;
74 static uuid_t utun_kpipe_uuid;
75 static uuid_t utun_nx_dom_prov;
76 
77 typedef struct utun_nx {
78 	uuid_t if_provider;
79 	uuid_t if_instance;
80 	uuid_t fsw_provider;
81 	uuid_t fsw_instance;
82 	uuid_t fsw_device;
83 	uuid_t fsw_agent;
84 } *utun_nx_t;
85 
86 #endif // UTUN_NEXUS
87 
88 /* Control block allocated for each kernel control connection */
89 struct utun_pcb {
90 	TAILQ_ENTRY(utun_pcb)   utun_chain;
91 	kern_ctl_ref    utun_ctlref;
92 	ifnet_t                 utun_ifp;
93 	u_int32_t               utun_unit;
94 	u_int32_t               utun_unique_id;
95 	u_int32_t               utun_flags;
96 	int                     utun_ext_ifdata_stats;
97 	u_int32_t               utun_max_pending_packets;
98 	char                    utun_if_xname[IFXNAMSIZ];
99 	char                    utun_unique_name[IFXNAMSIZ];
100 	// PCB lock protects state fields and rings
101 	decl_lck_rw_data(, utun_pcb_lock);
102 	struct mbuf *   utun_input_chain;
103 	struct mbuf *   utun_input_chain_last;
104 	u_int32_t               utun_input_chain_count;
105 	// Input chain lock protects the list of input mbufs
106 	// The input chain lock must be taken AFTER the PCB lock if both are held
107 	lck_mtx_t               utun_input_chain_lock;
108 
109 #if UTUN_NEXUS
110 	// lock to protect utun_pcb_data_move & utun_pcb_drainers
111 	decl_lck_mtx_data(, utun_pcb_data_move_lock);
112 	u_int32_t               utun_pcb_data_move; /* number of data moving contexts */
113 	u_int32_t               utun_pcb_drainers; /* number of threads waiting to drain */
114 	u_int32_t               utun_pcb_data_path_state; /* internal state of interface data path */
115 
116 	struct utun_nx  utun_nx;
117 	int                     utun_kpipe_enabled;
118 	uuid_t                  utun_kpipe_uuid;
119 	void *                  utun_kpipe_rxring;
120 	void *                  utun_kpipe_txring;
121 	kern_pbufpool_t         utun_kpipe_pp;
122 	u_int32_t               utun_kpipe_tx_ring_size;
123 	u_int32_t               utun_kpipe_rx_ring_size;
124 
125 	kern_nexus_t    utun_netif_nexus;
126 	kern_pbufpool_t         utun_netif_pp;
127 	void *                  utun_netif_rxring;
128 	void *                  utun_netif_txring;
129 	uint64_t                utun_netif_txring_size;
130 
131 	u_int32_t               utun_slot_size;
132 	u_int32_t               utun_netif_ring_size;
133 	u_int32_t               utun_tx_fsw_ring_size;
134 	u_int32_t               utun_rx_fsw_ring_size;
135 	// Auto attach flowswitch when netif is enabled. When set to false,
136 	// it allows userspace nexus controller to attach and own flowswitch.
137 	bool                    utun_attach_fsw;
138 	bool                    utun_netif_connected;
139 	bool                    utun_use_netif;
140 	bool                    utun_needs_netagent;
141 #endif // UTUN_NEXUS
142 };
143 
144 /* Kernel Control functions */
145 static errno_t  utun_ctl_setup(u_int32_t *unit, void **unitinfo);
146 static errno_t  utun_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
147     void **unitinfo);
148 static errno_t  utun_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
149     void **unitinfo);
150 static errno_t  utun_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
151     void *unitinfo);
152 static errno_t  utun_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
153     void *unitinfo, mbuf_t m, int flags);
154 static errno_t  utun_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
155     int opt, void *__sized_by(*len) data, size_t *len);
156 static errno_t  utun_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
157     int opt, void *__sized_by(len) data, size_t len);
158 static void             utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
159     int flags);
160 
161 /* Network Interface functions */
162 static void     utun_start(ifnet_t interface);
163 static errno_t  utun_framer(ifnet_t interface, mbuf_t *packet,
164     const struct sockaddr *dest,
165     IFNET_LLADDR_T dest_lladdr,
166     IFNET_FRAME_TYPE_T frame_type,
167     u_int32_t *prepend_len, u_int32_t *postpend_len);
168 static errno_t  utun_output(ifnet_t interface, mbuf_t data);
169 static errno_t  utun_demux(ifnet_t interface, mbuf_t data, char *frame_header,
170     protocol_family_t *protocol);
171 static errno_t  utun_add_proto(ifnet_t interface, protocol_family_t protocol,
172     const struct ifnet_demux_desc *demux_array,
173     u_int32_t demux_count);
174 static errno_t  utun_del_proto(ifnet_t interface, protocol_family_t protocol);
175 static errno_t  utun_ioctl(ifnet_t interface, u_long cmd, void *data);
176 static void             utun_detached(ifnet_t interface);
177 
178 /* Protocol handlers */
179 static errno_t  utun_attach_proto(ifnet_t interface, protocol_family_t proto);
180 static errno_t  utun_proto_input(ifnet_t interface, protocol_family_t protocol,
181     mbuf_t m, char *frame_header);
182 static errno_t utun_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
183     mbuf_t *packet, const struct sockaddr *dest, void *route,
184     char *frame_type, char *link_layer_dest);
185 static errno_t utun_pkt_input(struct utun_pcb *pcb, mbuf_t m);
186 
187 /* data movement refcounting functions */
188 #if UTUN_NEXUS
189 static boolean_t utun_data_move_begin(struct utun_pcb *pcb);
190 static void utun_data_move_end(struct utun_pcb *pcb);
191 static void utun_wait_data_move_drain(struct utun_pcb *pcb);
192 
193 /* Data path states */
194 #define UTUN_PCB_DATA_PATH_READY    0x1
195 
196 /* Macros to set/clear/test data path states */
197 #define UTUN_SET_DATA_PATH_READY(_pcb) \
198     ((_pcb)->utun_pcb_data_path_state |= UTUN_PCB_DATA_PATH_READY)
199 #define UTUN_CLR_DATA_PATH_READY(_pcb) \
200     ((_pcb)->utun_pcb_data_path_state &= ~UTUN_PCB_DATA_PATH_READY)
201 #define UTUN_IS_DATA_PATH_READY(_pcb) \
202     (((_pcb)->utun_pcb_data_path_state & UTUN_PCB_DATA_PATH_READY) != 0)
203 
204 #define UTUN_IF_DEFAULT_SLOT_SIZE 2048
205 #define UTUN_IF_DEFAULT_RING_SIZE 64
206 #define UTUN_IF_DEFAULT_TX_FSW_RING_SIZE 64
207 #define UTUN_IF_DEFAULT_RX_FSW_RING_SIZE 128
208 #define UTUN_IF_DEFAULT_BUF_SEG_SIZE    skmem_usr_buf_seg_size
209 #define UTUN_IF_HEADROOM_SIZE 32
210 
211 #define UTUN_IF_MIN_RING_SIZE 8
212 #define UTUN_IF_MAX_RING_SIZE 1024
213 
214 #define UTUN_IF_MIN_SLOT_SIZE 1024
215 #define UTUN_IF_MAX_SLOT_SIZE (32 * 1024)
216 
217 #define UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT 512
218 
219 static int if_utun_max_pending_input = UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT;
220 
221 static int sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS;
222 static int sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
223 static int sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
224 
225 static int if_utun_ring_size = UTUN_IF_DEFAULT_RING_SIZE;
226 static int if_utun_tx_fsw_ring_size = UTUN_IF_DEFAULT_TX_FSW_RING_SIZE;
227 static int if_utun_rx_fsw_ring_size = UTUN_IF_DEFAULT_RX_FSW_RING_SIZE;
228 
229 SYSCTL_DECL(_net_utun);
230 SYSCTL_NODE(_net, OID_AUTO, utun, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "UTun");
231 
232 SYSCTL_INT(_net_utun, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_utun_max_pending_input, 0, "");
233 SYSCTL_PROC(_net_utun, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
234     &if_utun_ring_size, UTUN_IF_DEFAULT_RING_SIZE, &sysctl_if_utun_ring_size, "I", "");
235 SYSCTL_PROC(_net_utun, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
236     &if_utun_tx_fsw_ring_size, UTUN_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_utun_tx_fsw_ring_size, "I", "");
237 SYSCTL_PROC(_net_utun, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
238     &if_utun_rx_fsw_ring_size, UTUN_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_utun_rx_fsw_ring_size, "I", "");
239 
240 static errno_t
241 utun_register_nexus(void);
242 
243 static errno_t
244 utun_netif_prepare(__unused kern_nexus_t nexus, ifnet_t ifp);
245 static errno_t
246 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
247     proc_t p, kern_nexus_t nexus,
248     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx);
249 static errno_t
250 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
251     kern_channel_t channel);
252 static void
253 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
254     kern_channel_t channel);
255 static void
256 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
257     kern_channel_t channel);
258 static void
259 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
260     kern_channel_t channel);
261 static errno_t
262 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
263     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
264     void **ring_ctx);
265 static void
266 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
267     kern_channel_ring_t ring);
268 static errno_t
269 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
270     kern_channel_ring_t ring, uint32_t flags);
271 static errno_t
272 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
273     kern_channel_ring_t ring, uint32_t flags);
274 #endif // UTUN_NEXUS
275 
276 #define UTUN_DEFAULT_MTU 1500
277 #define UTUN_HEADER_SIZE(_pcb) (sizeof(u_int32_t) + (((_pcb)->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) ? sizeof(uuid_t) : 0))
278 
279 static kern_ctl_ref     utun_kctlref;
280 static LCK_ATTR_DECLARE(utun_lck_attr, 0, 0);
281 static LCK_GRP_DECLARE(utun_lck_grp, "utun");
282 static LCK_MTX_DECLARE_ATTR(utun_lock, &utun_lck_grp, &utun_lck_attr);
283 
284 TAILQ_HEAD(utun_list, utun_pcb) utun_head;
285 
286 static KALLOC_TYPE_DEFINE(utun_pcb_zone, struct utun_pcb, NET_KT_DEFAULT);
287 
288 #if UTUN_NEXUS
289 
290 static int
291 sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS
292 {
293 #pragma unused(arg1, arg2)
294 	int value = if_utun_ring_size;
295 
296 	int error = sysctl_handle_int(oidp, &value, 0, req);
297 	if (error || !req->newptr) {
298 		return error;
299 	}
300 
301 	if (value < UTUN_IF_MIN_RING_SIZE ||
302 	    value > UTUN_IF_MAX_RING_SIZE) {
303 		return EINVAL;
304 	}
305 
306 	if_utun_ring_size = value;
307 
308 	return 0;
309 }
310 
311 static int
312 sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
313 {
314 #pragma unused(arg1, arg2)
315 	int value = if_utun_tx_fsw_ring_size;
316 
317 	int error = sysctl_handle_int(oidp, &value, 0, req);
318 	if (error || !req->newptr) {
319 		return error;
320 	}
321 
322 	if (value < UTUN_IF_MIN_RING_SIZE ||
323 	    value > UTUN_IF_MAX_RING_SIZE) {
324 		return EINVAL;
325 	}
326 
327 	if_utun_tx_fsw_ring_size = value;
328 
329 	return 0;
330 }
331 
332 static int
333 sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
334 {
335 #pragma unused(arg1, arg2)
336 	int value = if_utun_rx_fsw_ring_size;
337 
338 	int error = sysctl_handle_int(oidp, &value, 0, req);
339 	if (error || !req->newptr) {
340 		return error;
341 	}
342 
343 	if (value < UTUN_IF_MIN_RING_SIZE ||
344 	    value > UTUN_IF_MAX_RING_SIZE) {
345 		return EINVAL;
346 	}
347 
348 	if_utun_rx_fsw_ring_size = value;
349 
350 	return 0;
351 }
352 
353 static errno_t
utun_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)354 utun_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
355     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
356     void **ring_ctx)
357 {
358 #pragma unused(nxprov)
359 #pragma unused(channel)
360 #pragma unused(ring_ctx)
361 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
362 	if (!is_tx_ring) {
363 		VERIFY(pcb->utun_netif_rxring == NULL);
364 		pcb->utun_netif_rxring = ring;
365 	} else {
366 		VERIFY(pcb->utun_netif_txring == NULL);
367 		pcb->utun_netif_txring = ring;
368 	}
369 	return 0;
370 }
371 
372 static void
utun_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)373 utun_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
374     kern_channel_ring_t ring)
375 {
376 #pragma unused(nxprov)
377 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
378 	if (pcb->utun_netif_rxring == ring) {
379 		pcb->utun_netif_rxring = NULL;
380 	} else if (pcb->utun_netif_txring == ring) {
381 		pcb->utun_netif_txring = NULL;
382 	}
383 }
384 
385 static errno_t
utun_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)386 utun_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
387     kern_channel_ring_t tx_ring, uint32_t flags)
388 {
389 #pragma unused(nxprov)
390 #pragma unused(flags)
391 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
392 
393 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
394 
395 	if (!utun_data_move_begin(pcb)) {
396 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
397 		    __func__, if_name(pcb->utun_ifp));
398 		return 0;
399 	}
400 
401 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
402 
403 	struct kern_channel_ring_stat_increment tx_ring_stats;
404 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
405 	kern_channel_slot_t tx_pslot = NULL;
406 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
407 	kern_packet_t tx_chain_ph = 0;
408 
409 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
410 
411 	if (tx_slot == NULL) {
412 		// Nothing to write, don't bother signalling
413 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
414 		utun_data_move_end(pcb);
415 		return 0;
416 	}
417 
418 	if (pcb->utun_kpipe_enabled) {
419 		kern_channel_ring_t __single rx_ring = pcb->utun_kpipe_rxring;
420 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
421 
422 		// Signal the kernel pipe ring to read
423 		if (rx_ring != NULL) {
424 			kern_channel_notify(rx_ring, 0);
425 		}
426 		utun_data_move_end(pcb);
427 		return 0;
428 	}
429 
430 	// If we're here, we're injecting into the utun kernel control socket
431 	while (tx_slot != NULL) {
432 		size_t length = 0;
433 		mbuf_ref_t data = NULL;
434 
435 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
436 
437 		if (tx_ph == 0) {
438 			// Advance TX ring
439 			tx_pslot = tx_slot;
440 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
441 			continue;
442 		}
443 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
444 		if (tx_chain_ph != 0) {
445 			kern_packet_append(tx_ph, tx_chain_ph);
446 		}
447 		tx_chain_ph = tx_ph;
448 
449 		// Advance TX ring
450 		tx_pslot = tx_slot;
451 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
452 
453 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
454 		VERIFY(tx_buf != NULL);
455 
456 		bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
457 
458 		uint32_t tx_offset = kern_buflet_get_data_offset(tx_buf);
459 		uint32_t tx_length = kern_buflet_get_data_length(tx_buf);
460 		/* tx_baddr is the absolute buffer address */
461 		uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
462 		    kern_buflet_get_data_address(tx_buf),
463 		    kern_buflet_get_data_limit(tx_buf));
464 		VERIFY(tx_baddr != 0);
465 
466 		// The offset must be large enough for the headers
467 		VERIFY(tx_offset >= UTUN_HEADER_SIZE(pcb));
468 
469 		// Find family
470 		uint32_t af = 0;
471 		uint8_t vhl = *(uint8_t *)(tx_baddr + tx_offset);
472 		u_int ip_version = (vhl >> 4);
473 		switch (ip_version) {
474 		case 4: {
475 			af = AF_INET;
476 			break;
477 		}
478 		case 6: {
479 			af = AF_INET6;
480 			break;
481 		}
482 		default: {
483 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s: unknown ip version %u vhl %u tx_offset %u len %u header_size %zu\n",
484 			    pcb->utun_ifp->if_xname, ip_version, vhl, tx_offset, tx_length,
485 			    UTUN_HEADER_SIZE(pcb));
486 			break;
487 		}
488 		}
489 
490 		tx_offset -= UTUN_HEADER_SIZE(pcb);
491 		tx_length += UTUN_HEADER_SIZE(pcb);
492 		tx_baddr += tx_offset;
493 
494 		length = MIN(tx_length, pcb->utun_slot_size);
495 
496 		// Copy in family
497 		memcpy(tx_baddr, &af, sizeof(af));
498 		if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
499 			kern_packet_get_euuid(tx_ph, (void *)(tx_baddr + sizeof(af)));
500 		}
501 
502 		if (length > 0) {
503 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
504 			if (error == 0) {
505 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
506 				if (error == 0) {
507 					error = utun_output(pcb->utun_ifp, data);
508 					if (error != 0) {
509 						os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - utun_output error %d\n", pcb->utun_ifp->if_xname, error);
510 					}
511 				} else {
512 					os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->utun_ifp->if_xname, length, error);
513 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
514 					STATS_INC(nifs, NETIF_STATS_DROP);
515 					mbuf_freem(data);
516 					data = NULL;
517 				}
518 			} else {
519 				os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->utun_ifp->if_xname, error);
520 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
521 				STATS_INC(nifs, NETIF_STATS_DROP);
522 			}
523 		} else {
524 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - 0 length packet\n", pcb->utun_ifp->if_xname);
525 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
526 			STATS_INC(nifs, NETIF_STATS_DROP);
527 		}
528 
529 		if (data == NULL) {
530 			continue;
531 		}
532 
533 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
534 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
535 
536 		tx_ring_stats.kcrsi_slots_transferred++;
537 		tx_ring_stats.kcrsi_bytes_transferred += length;
538 	}
539 	if (tx_chain_ph != 0) {
540 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
541 	}
542 	if (tx_pslot) {
543 		kern_channel_advance_slot(tx_ring, tx_pslot);
544 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
545 		(void)kern_channel_reclaim(tx_ring);
546 	}
547 
548 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
549 	utun_data_move_end(pcb);
550 	return 0;
551 }
552 
553 static errno_t
utun_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)554 utun_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
555     kern_channel_ring_t ring, __unused uint32_t flags)
556 {
557 #pragma unused(nxprov)
558 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
559 	boolean_t more = false;
560 	errno_t rc = 0;
561 
562 	if (!utun_data_move_begin(pcb)) {
563 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
564 		    __func__, if_name(pcb->utun_ifp));
565 		return 0;
566 	}
567 
568 	/*
569 	 * Refill and sync the ring; we may be racing against another thread doing
570 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
571 	 * variant here.
572 	 */
573 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
574 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
575 		os_log_error(OS_LOG_DEFAULT, "%s, tx refill failed %d\n", __func__, rc);
576 	}
577 
578 	(void) kr_enter(ring, TRUE);
579 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
580 
581 	if (pcb->utun_kpipe_enabled) {
582 		uint32_t tx_available = kern_channel_available_slot_count(ring);
583 		if (pcb->utun_netif_txring_size > 0 &&
584 		    tx_available >= pcb->utun_netif_txring_size - 1) {
585 			// No room left in tx ring, disable output for now
586 			errno_t error = ifnet_disable_output(pcb->utun_ifp);
587 			if (error != 0) {
588 				os_log_error(OS_LOG_DEFAULT, "utun_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
589 			}
590 		}
591 	}
592 
593 	if (pcb->utun_kpipe_enabled) {
594 		kern_channel_ring_t __single rx_ring = pcb->utun_kpipe_rxring;
595 
596 		// Unlock while calling notify
597 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
598 		// Signal the kernel pipe ring to read
599 		if (rx_ring != NULL) {
600 			kern_channel_notify(rx_ring, 0);
601 		}
602 	} else {
603 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
604 	}
605 
606 	kr_exit(ring);
607 	utun_data_move_end(pcb);
608 	return 0;
609 }
610 
611 static errno_t
utun_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)612 utun_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
613     kern_channel_ring_t rx_ring, uint32_t flags)
614 {
615 #pragma unused(nxprov)
616 #pragma unused(flags)
617 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
618 	struct kern_channel_ring_stat_increment rx_ring_stats;
619 
620 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
621 
622 	if (!utun_data_move_begin(pcb)) {
623 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
624 		    __func__, if_name(pcb->utun_ifp));
625 		return 0;
626 	}
627 
628 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
629 
630 	// Reclaim user-released slots
631 	(void) kern_channel_reclaim(rx_ring);
632 
633 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
634 
635 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
636 	if (avail == 0) {
637 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
638 		utun_data_move_end(pcb);
639 		return 0;
640 	}
641 
642 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
643 	VERIFY(rx_pp != NULL);
644 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
645 	kern_channel_slot_t rx_pslot = NULL;
646 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
647 
648 	while (rx_slot != NULL) {
649 		// Check for a waiting packet
650 		lck_mtx_lock(&pcb->utun_input_chain_lock);
651 		mbuf_t data = pcb->utun_input_chain;
652 		if (data == NULL) {
653 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
654 			break;
655 		}
656 
657 		// Allocate rx packet
658 		kern_packet_t rx_ph = 0;
659 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
660 		if (__improbable(error != 0)) {
661 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
662 			STATS_INC(nifs, NETIF_STATS_DROP);
663 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
664 			break;
665 		}
666 
667 		// Advance waiting packets
668 		if (pcb->utun_input_chain_count > 0) {
669 			pcb->utun_input_chain_count--;
670 		}
671 		pcb->utun_input_chain = data->m_nextpkt;
672 		data->m_nextpkt = NULL;
673 		if (pcb->utun_input_chain == NULL) {
674 			pcb->utun_input_chain_last = NULL;
675 		}
676 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
677 
678 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
679 		size_t length = mbuf_pkthdr_len(data);
680 
681 		if (length < header_offset) {
682 			// mbuf is too small
683 			mbuf_freem(data);
684 			kern_pbufpool_free(rx_pp, rx_ph);
685 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
686 			STATS_INC(nifs, NETIF_STATS_DROP);
687 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length too short for header %zu < %zu\n",
688 			    pcb->utun_ifp->if_xname, length, header_offset);
689 			continue;
690 		}
691 
692 		length -= header_offset;
693 		if (length > PP_BUF_SIZE_DEF(rx_pp)) {
694 			// Flush data
695 			mbuf_freem(data);
696 			kern_pbufpool_free(rx_pp, rx_ph);
697 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
698 			STATS_INC(nifs, NETIF_STATS_DROP);
699 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length %zu > %u\n",
700 			    pcb->utun_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
701 			continue;
702 		}
703 
704 		mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
705 
706 		// Fillout rx packet
707 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
708 		VERIFY(rx_buf != NULL);
709 		void *rx_baddr = __unsafe_forge_bidi_indexable(void *,
710 		    kern_buflet_get_data_address(rx_buf),
711 		    kern_buflet_get_data_limit(rx_buf));
712 		VERIFY(rx_baddr != NULL);
713 
714 		// Copy-in data from mbuf to buflet
715 		mbuf_copydata(data, header_offset, length, rx_baddr);
716 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
717 
718 		// Finalize and attach the packet
719 		error = kern_buflet_set_data_offset(rx_buf, 0);
720 		VERIFY(error == 0);
721 		error = kern_buflet_set_data_length(rx_buf, length);
722 		VERIFY(error == 0);
723 		error = kern_packet_set_headroom(rx_ph, 0);
724 		VERIFY(error == 0);
725 		error = kern_packet_finalize(rx_ph);
726 		VERIFY(error == 0);
727 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
728 		VERIFY(error == 0);
729 
730 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
731 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
732 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
733 
734 		rx_ring_stats.kcrsi_slots_transferred++;
735 		rx_ring_stats.kcrsi_bytes_transferred += length;
736 
737 		mbuf_freem(data);
738 
739 		// Advance ring
740 		rx_pslot = rx_slot;
741 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
742 	}
743 
744 	struct kern_channel_ring_stat_increment tx_ring_stats;
745 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
746 	kern_channel_ring_t __single tx_ring = pcb->utun_kpipe_txring;
747 	kern_channel_slot_t tx_pslot = NULL;
748 	kern_channel_slot_t tx_slot = NULL;
749 	if (tx_ring == NULL) {
750 		// Net-If TX ring not set up yet, nothing to read
751 		goto done;
752 	}
753 	// Unlock utun before entering ring
754 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
755 
756 	(void)kr_enter(tx_ring, TRUE);
757 
758 	// Lock again after entering and validate
759 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
760 	if (tx_ring != pcb->utun_kpipe_txring) {
761 		goto done;
762 	}
763 
764 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
765 	if (tx_slot == NULL) {
766 		// Nothing to read, don't bother signalling
767 		goto done;
768 	}
769 
770 	while (rx_slot != NULL && tx_slot != NULL) {
771 		// Allocate rx packet
772 		kern_packet_t rx_ph = 0;
773 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
774 
775 		// Advance TX ring
776 		tx_pslot = tx_slot;
777 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
778 
779 		/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
780 		if (tx_ph == 0) {
781 			continue;
782 		}
783 
784 		/* XXX We could try this alloc before advancing the slot to avoid
785 		 * dropping the packet on failure to allocate.
786 		 */
787 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
788 		if (__improbable(error != 0)) {
789 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
790 			STATS_INC(nifs, NETIF_STATS_DROP);
791 			break;
792 		}
793 
794 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
795 		VERIFY(tx_buf != NULL);
796 		uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
797 		    kern_buflet_get_data_address(tx_buf),
798 		    kern_buflet_get_data_limit(tx_buf));
799 		VERIFY(tx_baddr != 0);
800 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
801 
802 		// Check packet length
803 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
804 		uint32_t tx_length = kern_packet_get_data_length(tx_ph);
805 		if (tx_length < header_offset) {
806 			// Packet is too small
807 			kern_pbufpool_free(rx_pp, rx_ph);
808 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
809 			STATS_INC(nifs, NETIF_STATS_DROP);
810 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: packet length too short for header %u < %zu\n",
811 			    pcb->utun_ifp->if_xname, tx_length, header_offset);
812 			continue;
813 		}
814 
815 		size_t length = MIN(tx_length - header_offset,
816 		    pcb->utun_slot_size);
817 
818 		tx_ring_stats.kcrsi_slots_transferred++;
819 		tx_ring_stats.kcrsi_bytes_transferred += length;
820 
821 		// Fillout rx packet
822 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
823 		VERIFY(rx_buf != NULL);
824 		void *rx_baddr = __unsafe_forge_bidi_indexable(void *,
825 		    kern_buflet_get_data_address(rx_buf),
826 		    kern_buflet_get_data_limit(rx_buf));
827 		VERIFY(rx_baddr != NULL);
828 
829 		// Copy-in data from tx to rx
830 		memcpy((void *)rx_baddr, (void *)(tx_baddr + header_offset), length);
831 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
832 
833 		// Finalize and attach the packet
834 		error = kern_buflet_set_data_offset(rx_buf, 0);
835 		VERIFY(error == 0);
836 		error = kern_buflet_set_data_length(rx_buf, length);
837 		VERIFY(error == 0);
838 		error = kern_packet_set_headroom(rx_ph, 0);
839 		VERIFY(error == 0);
840 		error = kern_packet_finalize(rx_ph);
841 		VERIFY(error == 0);
842 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
843 		VERIFY(error == 0);
844 
845 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
846 		STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
847 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
848 
849 		rx_ring_stats.kcrsi_slots_transferred++;
850 		rx_ring_stats.kcrsi_bytes_transferred += length;
851 
852 		rx_pslot = rx_slot;
853 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
854 	}
855 
856 done:
857 	if (rx_pslot) {
858 		kern_channel_advance_slot(rx_ring, rx_pslot);
859 		kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
860 	}
861 
862 	if (tx_pslot) {
863 		kern_channel_advance_slot(tx_ring, tx_pslot);
864 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
865 		(void)kern_channel_reclaim(tx_ring);
866 	}
867 
868 	// Unlock first, then exit ring
869 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
870 	if (tx_ring != NULL) {
871 		if (tx_pslot != NULL) {
872 			kern_channel_notify(tx_ring, 0);
873 		}
874 		kr_exit(tx_ring);
875 	}
876 
877 	utun_data_move_end(pcb);
878 	return 0;
879 }
880 
881 static errno_t
utun_nexus_ifattach(struct utun_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)882 utun_nexus_ifattach(struct utun_pcb *pcb,
883     struct ifnet_init_eparams *init_params,
884     struct ifnet **ifp)
885 {
886 	errno_t err;
887 	nexus_controller_t controller = kern_nexus_shared_controller();
888 	struct kern_nexus_net_init net_init;
889 	struct kern_pbufpool_init pp_init;
890 
891 	nexus_name_t provider_name;
892 	snprintf((char *)provider_name, sizeof(provider_name),
893 	    "com.apple.netif.%s", pcb->utun_if_xname);
894 
895 	struct kern_nexus_provider_init prov_init = {
896 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
897 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
898 		.nxpi_pre_connect = utun_nexus_pre_connect,
899 		.nxpi_connected = utun_nexus_connected,
900 		.nxpi_pre_disconnect = utun_netif_pre_disconnect,
901 		.nxpi_disconnected = utun_nexus_disconnected,
902 		.nxpi_ring_init = utun_netif_ring_init,
903 		.nxpi_ring_fini = utun_netif_ring_fini,
904 		.nxpi_slot_init = NULL,
905 		.nxpi_slot_fini = NULL,
906 		.nxpi_sync_tx = utun_netif_sync_tx,
907 		.nxpi_sync_rx = utun_netif_sync_rx,
908 		.nxpi_tx_doorbell = utun_netif_tx_doorbell,
909 	};
910 
911 	nexus_attr_t __single nxa = NULL;
912 	err = kern_nexus_attr_create(&nxa);
913 	if (err != 0) {
914 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
915 		    __func__, err);
916 		goto failed;
917 	}
918 
919 	uint64_t slot_buffer_size = pcb->utun_slot_size;
920 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
921 	VERIFY(err == 0);
922 
923 	// Reset ring size for netif nexus to limit memory usage
924 	uint64_t ring_size = pcb->utun_netif_ring_size;
925 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
926 	VERIFY(err == 0);
927 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
928 	VERIFY(err == 0);
929 
930 	pcb->utun_netif_txring_size = ring_size;
931 
932 	bzero(&pp_init, sizeof(pp_init));
933 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
934 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
935 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
936 	pp_init.kbi_bufsize = pcb->utun_slot_size;
937 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
938 	pp_init.kbi_max_frags = 1;
939 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
940 	    "%s", provider_name);
941 	pp_init.kbi_ctx = NULL;
942 	pp_init.kbi_ctx_retain = NULL;
943 	pp_init.kbi_ctx_release = NULL;
944 
945 	err = kern_pbufpool_create(&pp_init, &pcb->utun_netif_pp, NULL);
946 	if (err != 0) {
947 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
948 		goto failed;
949 	}
950 
951 	err = kern_nexus_controller_register_provider(controller,
952 	    utun_nx_dom_prov,
953 	    provider_name,
954 	    &prov_init,
955 	    sizeof(prov_init),
956 	    nxa,
957 	    &pcb->utun_nx.if_provider);
958 	if (err != 0) {
959 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
960 		    __func__, err);
961 		goto failed;
962 	}
963 
964 	bzero(&net_init, sizeof(net_init));
965 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
966 	net_init.nxneti_flags = 0;
967 	net_init.nxneti_eparams = init_params;
968 	net_init.nxneti_lladdr = NULL;
969 	net_init.nxneti_prepare = utun_netif_prepare;
970 	net_init.nxneti_rx_pbufpool = pcb->utun_netif_pp;
971 	net_init.nxneti_tx_pbufpool = pcb->utun_netif_pp;
972 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
973 	    pcb->utun_nx.if_provider,
974 	    pcb,
975 	    NULL,
976 	    &pcb->utun_nx.if_instance,
977 	    &net_init,
978 	    ifp);
979 	if (err != 0) {
980 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
981 		    __func__, err);
982 		kern_nexus_controller_deregister_provider(controller,
983 		    pcb->utun_nx.if_provider);
984 		uuid_clear(pcb->utun_nx.if_provider);
985 		goto failed;
986 	}
987 
988 failed:
989 	if (nxa) {
990 		kern_nexus_attr_destroy(nxa);
991 	}
992 	if (err && pcb->utun_netif_pp != NULL) {
993 		kern_pbufpool_destroy(pcb->utun_netif_pp);
994 		pcb->utun_netif_pp = NULL;
995 	}
996 	return err;
997 }
998 
999 static void
utun_detach_provider_and_instance(uuid_t provider,uuid_t instance)1000 utun_detach_provider_and_instance(uuid_t provider, uuid_t instance)
1001 {
1002 	nexus_controller_t controller = kern_nexus_shared_controller();
1003 	errno_t err;
1004 
1005 	if (!uuid_is_null(instance)) {
1006 		err = kern_nexus_controller_free_provider_instance(controller,
1007 		    instance);
1008 		if (err != 0) {
1009 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
1010 			    __func__, err);
1011 		}
1012 		uuid_clear(instance);
1013 	}
1014 	if (!uuid_is_null(provider)) {
1015 		err = kern_nexus_controller_deregister_provider(controller,
1016 		    provider);
1017 		if (err != 0) {
1018 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
1019 		}
1020 		uuid_clear(provider);
1021 	}
1022 	return;
1023 }
1024 
1025 static void
utun_nexus_detach(struct utun_pcb * pcb)1026 utun_nexus_detach(struct utun_pcb *pcb)
1027 {
1028 	utun_nx_t nx = &pcb->utun_nx;
1029 	nexus_controller_t controller = kern_nexus_shared_controller();
1030 	errno_t err;
1031 
1032 	if (!uuid_is_null(nx->fsw_device)) {
1033 		err = kern_nexus_ifdetach(controller,
1034 		    nx->fsw_instance,
1035 		    nx->fsw_device);
1036 		if (err != 0) {
1037 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
1038 			    __func__, err);
1039 		}
1040 	}
1041 
1042 	utun_detach_provider_and_instance(nx->fsw_provider,
1043 	    nx->fsw_instance);
1044 	utun_detach_provider_and_instance(nx->if_provider,
1045 	    nx->if_instance);
1046 
1047 	if (pcb->utun_netif_pp != NULL) {
1048 		kern_pbufpool_destroy(pcb->utun_netif_pp);
1049 		pcb->utun_netif_pp = NULL;
1050 	}
1051 	memset(nx, 0, sizeof(*nx));
1052 }
1053 
1054 static errno_t
utun_create_fs_provider_and_instance(struct utun_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)1055 utun_create_fs_provider_and_instance(struct utun_pcb *pcb,
1056     const char *type_name,
1057     const char *ifname,
1058     uuid_t *provider, uuid_t *instance)
1059 {
1060 	nexus_attr_t __single attr = NULL;
1061 	nexus_controller_t controller = kern_nexus_shared_controller();
1062 	uuid_t dom_prov;
1063 	errno_t err;
1064 	struct kern_nexus_init init;
1065 	nexus_name_t    provider_name;
1066 
1067 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1068 	    &dom_prov);
1069 	if (err != 0) {
1070 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
1071 		    __func__, type_name, err);
1072 		goto failed;
1073 	}
1074 
1075 	err = kern_nexus_attr_create(&attr);
1076 	if (err != 0) {
1077 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1078 		    __func__, err);
1079 		goto failed;
1080 	}
1081 
1082 	uint64_t slot_buffer_size = pcb->utun_slot_size;
1083 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1084 	VERIFY(err == 0);
1085 
1086 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1087 	uint64_t tx_ring_size = pcb->utun_tx_fsw_ring_size;
1088 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1089 	VERIFY(err == 0);
1090 	uint64_t rx_ring_size = pcb->utun_rx_fsw_ring_size;
1091 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1092 	VERIFY(err == 0);
1093 	/*
1094 	 * Configure flowswitch to use super-packet (multi-buflet).
1095 	 * This allows flowswitch to perform intra-stack packet aggregation.
1096 	 */
1097 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
1098 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
1099 	VERIFY(err == 0);
1100 
1101 	snprintf((char *)provider_name, sizeof(provider_name),
1102 	    "com.apple.%s.%s", type_name, ifname);
1103 	err = kern_nexus_controller_register_provider(controller,
1104 	    dom_prov,
1105 	    provider_name,
1106 	    NULL,
1107 	    0,
1108 	    attr,
1109 	    provider);
1110 	kern_nexus_attr_destroy(attr);
1111 	attr = NULL;
1112 	if (err != 0) {
1113 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
1114 		    __func__, type_name, err);
1115 		goto failed;
1116 	}
1117 	bzero(&init, sizeof(init));
1118 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1119 	err = kern_nexus_controller_alloc_provider_instance(controller,
1120 	    *provider,
1121 	    NULL, NULL,
1122 	    instance, &init);
1123 	if (err != 0) {
1124 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
1125 		    __func__, type_name, err);
1126 		kern_nexus_controller_deregister_provider(controller,
1127 		    *provider);
1128 		uuid_clear(*provider);
1129 	}
1130 failed:
1131 	return err;
1132 }
1133 
1134 static errno_t
utun_flowswitch_attach(struct utun_pcb * pcb)1135 utun_flowswitch_attach(struct utun_pcb *pcb)
1136 {
1137 	nexus_controller_t controller = kern_nexus_shared_controller();
1138 	errno_t err = 0;
1139 	utun_nx_t nx = &pcb->utun_nx;
1140 
1141 	// Allocate flowswitch
1142 	err = utun_create_fs_provider_and_instance(pcb,
1143 	    "flowswitch",
1144 	    pcb->utun_ifp->if_xname,
1145 	    &nx->fsw_provider,
1146 	    &nx->fsw_instance);
1147 	if (err != 0) {
1148 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
1149 		    __func__);
1150 		goto failed;
1151 	}
1152 
1153 	// Attach flowswitch to device port
1154 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
1155 	    NULL, nx->if_instance,
1156 	    FALSE, &nx->fsw_device);
1157 	if (err != 0) {
1158 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
1159 		goto failed;
1160 	}
1161 
1162 	// Extract the agent UUID and save for later
1163 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
1164 	if (flowswitch_nx != NULL) {
1165 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
1166 		if (flowswitch != NULL) {
1167 			FSW_RLOCK(flowswitch);
1168 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
1169 			FSW_UNLOCK(flowswitch);
1170 		} else {
1171 			os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - flowswitch is NULL\n");
1172 		}
1173 		nx_release(flowswitch_nx);
1174 	} else {
1175 		os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - unable to find flowswitch nexus\n");
1176 	}
1177 
1178 	return 0;
1179 
1180 failed:
1181 	utun_nexus_detach(pcb);
1182 
1183 	errno_t detach_error = 0;
1184 	if ((detach_error = ifnet_detach(pcb->utun_ifp)) != 0) {
1185 		panic("utun_flowswitch_attach - ifnet_detach failed: %d", detach_error);
1186 		/* NOT REACHED */
1187 	}
1188 
1189 	return err;
1190 }
1191 
1192 static errno_t
utun_register_kernel_pipe_nexus(struct utun_pcb * pcb)1193 utun_register_kernel_pipe_nexus(struct utun_pcb *pcb)
1194 {
1195 	nexus_attr_t __single nxa = NULL;
1196 	errno_t result;
1197 
1198 	lck_mtx_lock(&utun_lock);
1199 	if (utun_ncd_refcount++) {
1200 		lck_mtx_unlock(&utun_lock);
1201 		return 0;
1202 	}
1203 
1204 	result = kern_nexus_controller_create(&utun_ncd);
1205 	if (result) {
1206 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
1207 		    __FUNCTION__, result);
1208 		goto done;
1209 	}
1210 
1211 	uuid_t dom_prov;
1212 	result = kern_nexus_get_default_domain_provider(
1213 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1214 	if (result) {
1215 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
1216 		    __FUNCTION__, result);
1217 		goto done;
1218 	}
1219 
1220 	struct kern_nexus_provider_init prov_init = {
1221 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1222 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1223 		.nxpi_pre_connect = utun_nexus_pre_connect,
1224 		.nxpi_connected = utun_nexus_connected,
1225 		.nxpi_pre_disconnect = utun_nexus_pre_disconnect,
1226 		.nxpi_disconnected = utun_nexus_disconnected,
1227 		.nxpi_ring_init = utun_kpipe_ring_init,
1228 		.nxpi_ring_fini = utun_kpipe_ring_fini,
1229 		.nxpi_slot_init = NULL,
1230 		.nxpi_slot_fini = NULL,
1231 		.nxpi_sync_tx = utun_kpipe_sync_tx,
1232 		.nxpi_sync_rx = utun_kpipe_sync_rx,
1233 		.nxpi_tx_doorbell = NULL,
1234 	};
1235 
1236 	result = kern_nexus_attr_create(&nxa);
1237 	if (result) {
1238 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1239 		    __FUNCTION__, result);
1240 		goto done;
1241 	}
1242 
1243 	uint64_t slot_buffer_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1244 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1245 	VERIFY(result == 0);
1246 
1247 	// Reset ring size for kernel pipe nexus to limit memory usage
1248 	uint64_t ring_size =
1249 	    pcb->utun_kpipe_tx_ring_size != 0 ? pcb->utun_kpipe_tx_ring_size :
1250 	    if_utun_ring_size;
1251 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1252 	VERIFY(result == 0);
1253 
1254 	ring_size =
1255 	    pcb->utun_kpipe_rx_ring_size != 0 ? pcb->utun_kpipe_rx_ring_size :
1256 	    if_utun_ring_size;
1257 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1258 	VERIFY(result == 0);
1259 
1260 	nexus_domain_provider_name_t domain_provider_name = "com.apple.nexus.utun.kpipe";
1261 
1262 	result = kern_nexus_controller_register_provider(utun_ncd,
1263 	    dom_prov,
1264 	    domain_provider_name,
1265 	    &prov_init,
1266 	    sizeof(prov_init),
1267 	    nxa,
1268 	    &utun_kpipe_uuid);
1269 	if (result) {
1270 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
1271 		    __FUNCTION__, result);
1272 		goto done;
1273 	}
1274 
1275 done:
1276 	if (nxa) {
1277 		kern_nexus_attr_destroy(nxa);
1278 	}
1279 
1280 	if (result) {
1281 		if (utun_ncd) {
1282 			kern_nexus_controller_destroy(utun_ncd);
1283 			utun_ncd = NULL;
1284 		}
1285 		utun_ncd_refcount = 0;
1286 	}
1287 
1288 	lck_mtx_unlock(&utun_lock);
1289 
1290 	return result;
1291 }
1292 
1293 static void
utun_unregister_kernel_pipe_nexus(void)1294 utun_unregister_kernel_pipe_nexus(void)
1295 {
1296 	lck_mtx_lock(&utun_lock);
1297 
1298 	VERIFY(utun_ncd_refcount > 0);
1299 
1300 	if (--utun_ncd_refcount == 0) {
1301 		kern_nexus_controller_destroy(utun_ncd);
1302 		utun_ncd = NULL;
1303 	}
1304 
1305 	lck_mtx_unlock(&utun_lock);
1306 }
1307 
1308 // For use by socket option, not internally
1309 static errno_t
utun_disable_channel(struct utun_pcb * pcb)1310 utun_disable_channel(struct utun_pcb *pcb)
1311 {
1312 	errno_t result;
1313 	int enabled;
1314 	uuid_t uuid;
1315 
1316 	/* Wait until all threads in the data paths are done. */
1317 	utun_wait_data_move_drain(pcb);
1318 
1319 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1320 
1321 	enabled = pcb->utun_kpipe_enabled;
1322 	uuid_copy(uuid, pcb->utun_kpipe_uuid);
1323 
1324 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid) == !enabled);
1325 
1326 	pcb->utun_kpipe_enabled = 0;
1327 	uuid_clear(pcb->utun_kpipe_uuid);
1328 
1329 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1330 
1331 	if (enabled) {
1332 		result = kern_nexus_controller_free_provider_instance(utun_ncd, uuid);
1333 	} else {
1334 		result = ENXIO;
1335 	}
1336 
1337 	if (!result) {
1338 		if (pcb->utun_kpipe_pp != NULL) {
1339 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1340 			pcb->utun_kpipe_pp = NULL;
1341 		}
1342 		utun_unregister_kernel_pipe_nexus();
1343 	}
1344 
1345 	return result;
1346 }
1347 
1348 static errno_t
utun_enable_channel(struct utun_pcb * pcb,struct proc * proc)1349 utun_enable_channel(struct utun_pcb *pcb, struct proc *proc)
1350 {
1351 	struct kern_nexus_init init;
1352 	struct kern_pbufpool_init pp_init;
1353 	errno_t result;
1354 
1355 	kauth_cred_t cred = kauth_cred_get();
1356 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
1357 	if (result) {
1358 		return result;
1359 	}
1360 
1361 	result = utun_register_kernel_pipe_nexus(pcb);
1362 	if (result) {
1363 		return result;
1364 	}
1365 
1366 	VERIFY(utun_ncd);
1367 
1368 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1369 
1370 	if (pcb->utun_kpipe_enabled) {
1371 		result = EEXIST; // return success instead?
1372 		goto done;
1373 	}
1374 
1375 	/*
1376 	 * Make sure we can fit packets in the channel buffers and
1377 	 * Allow an extra 4 bytes for the protocol number header in the channel
1378 	 */
1379 	if (pcb->utun_ifp->if_mtu + UTUN_HEADER_SIZE(pcb) > pcb->utun_slot_size) {
1380 		result = EOPNOTSUPP;
1381 		goto done;
1382 	}
1383 
1384 	bzero(&pp_init, sizeof(pp_init));
1385 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1386 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1387 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
1388 	pp_init.kbi_bufsize = pcb->utun_slot_size;
1389 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
1390 	pp_init.kbi_max_frags = 1;
1391 	pp_init.kbi_flags |= KBIF_QUANTUM;
1392 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1393 	    "com.apple.kpipe.%s", pcb->utun_if_xname);
1394 	pp_init.kbi_ctx = NULL;
1395 	pp_init.kbi_ctx_retain = NULL;
1396 	pp_init.kbi_ctx_release = NULL;
1397 
1398 	result = kern_pbufpool_create(&pp_init, &pcb->utun_kpipe_pp,
1399 	    NULL);
1400 	if (result != 0) {
1401 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, result);
1402 		goto done;
1403 	}
1404 
1405 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid));
1406 	bzero(&init, sizeof(init));
1407 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1408 	init.nxi_tx_pbufpool = pcb->utun_kpipe_pp;
1409 	result = kern_nexus_controller_alloc_provider_instance(utun_ncd,
1410 	    utun_kpipe_uuid, pcb, NULL, &pcb->utun_kpipe_uuid, &init);
1411 	if (result) {
1412 		goto done;
1413 	}
1414 
1415 	nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
1416 	uuid_t uuid_null = {};
1417 	result = kern_nexus_controller_bind_provider_instance(utun_ncd,
1418 	    pcb->utun_kpipe_uuid, &port,
1419 	    proc_pid(proc), uuid_null, NULL, 0, NEXUS_BIND_PID);
1420 	if (result) {
1421 		kern_nexus_controller_free_provider_instance(utun_ncd,
1422 		    pcb->utun_kpipe_uuid);
1423 		uuid_clear(pcb->utun_kpipe_uuid);
1424 		goto done;
1425 	}
1426 
1427 	pcb->utun_kpipe_enabled = 1;
1428 
1429 done:
1430 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1431 
1432 	if (result) {
1433 		if (pcb->utun_kpipe_pp != NULL) {
1434 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1435 			pcb->utun_kpipe_pp = NULL;
1436 		}
1437 		utun_unregister_kernel_pipe_nexus();
1438 	}
1439 
1440 	return result;
1441 }
1442 
1443 #endif // UTUN_NEXUS
1444 
1445 errno_t
utun_register_control(void)1446 utun_register_control(void)
1447 {
1448 	struct kern_ctl_reg kern_ctl;
1449 	errno_t result = 0;
1450 
1451 #if UTUN_NEXUS
1452 	utun_register_nexus();
1453 #endif // UTUN_NEXUS
1454 
1455 	TAILQ_INIT(&utun_head);
1456 
1457 	bzero(&kern_ctl, sizeof(kern_ctl));
1458 	strlcpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
1459 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
1460 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP | CTL_FLAG_REG_EXTENDED; /* Require root */
1461 	kern_ctl.ctl_sendsize = 512 * 1024;
1462 	kern_ctl.ctl_recvsize = 512 * 1024;
1463 	kern_ctl.ctl_setup = utun_ctl_setup;
1464 	kern_ctl.ctl_bind = utun_ctl_bind;
1465 	kern_ctl.ctl_connect = utun_ctl_connect;
1466 	kern_ctl.ctl_disconnect = utun_ctl_disconnect;
1467 	kern_ctl.ctl_send = utun_ctl_send;
1468 	kern_ctl.ctl_setopt = utun_ctl_setopt;
1469 	kern_ctl.ctl_getopt = utun_ctl_getopt;
1470 	kern_ctl.ctl_rcvd = utun_ctl_rcvd;
1471 
1472 	result = ctl_register(&kern_ctl, &utun_kctlref);
1473 	if (result != 0) {
1474 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - ctl_register failed: %d\n", result);
1475 		return result;
1476 	}
1477 
1478 	/* Register the protocol plumbers */
1479 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN,
1480 	    utun_attach_proto, NULL)) != 0) {
1481 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN) failed: %d\n",
1482 		    result);
1483 		ctl_deregister(utun_kctlref);
1484 		return result;
1485 	}
1486 
1487 	/* Register the protocol plumbers */
1488 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN,
1489 	    utun_attach_proto, NULL)) != 0) {
1490 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_UTUN);
1491 		ctl_deregister(utun_kctlref);
1492 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN) failed: %d\n",
1493 		    result);
1494 		return result;
1495 	}
1496 
1497 	return 0;
1498 }
1499 
1500 /* Kernel control functions */
1501 
1502 static inline int
utun_find_by_unit(u_int32_t unit)1503 utun_find_by_unit(u_int32_t unit)
1504 {
1505 	struct utun_pcb *next_pcb = NULL;
1506 	int found = 0;
1507 
1508 	TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1509 		if (next_pcb->utun_unit == unit) {
1510 			found = 1;
1511 			break;
1512 		}
1513 	}
1514 
1515 	return found;
1516 }
1517 
1518 static inline void
utun_free_pcb(struct utun_pcb * pcb,bool locked)1519 utun_free_pcb(struct utun_pcb *pcb, bool locked)
1520 {
1521 #if UTUN_NEXUS
1522 	mbuf_freem_list(pcb->utun_input_chain);
1523 	pcb->utun_input_chain_count = 0;
1524 	lck_mtx_destroy(&pcb->utun_input_chain_lock, &utun_lck_grp);
1525 	lck_mtx_destroy(&pcb->utun_pcb_data_move_lock, &utun_lck_grp);
1526 #endif // UTUN_NEXUS
1527 	lck_rw_destroy(&pcb->utun_pcb_lock, &utun_lck_grp);
1528 	if (!locked) {
1529 		lck_mtx_lock(&utun_lock);
1530 	}
1531 	TAILQ_REMOVE(&utun_head, pcb, utun_chain);
1532 	if (!locked) {
1533 		lck_mtx_unlock(&utun_lock);
1534 	}
1535 	zfree(utun_pcb_zone, pcb);
1536 }
1537 
1538 static errno_t
utun_ctl_setup(u_int32_t * unit,void ** unitinfo)1539 utun_ctl_setup(u_int32_t *unit, void **unitinfo)
1540 {
1541 	if (unit == NULL || unitinfo == NULL) {
1542 		return EINVAL;
1543 	}
1544 
1545 	lck_mtx_lock(&utun_lock);
1546 
1547 	/* Find next available unit */
1548 	if (*unit == 0) {
1549 		*unit = 1;
1550 		while (*unit != ctl_maxunit) {
1551 			if (utun_find_by_unit(*unit)) {
1552 				(*unit)++;
1553 			} else {
1554 				break;
1555 			}
1556 		}
1557 		if (*unit == ctl_maxunit) {
1558 			lck_mtx_unlock(&utun_lock);
1559 			return EBUSY;
1560 		}
1561 	} else if (utun_find_by_unit(*unit)) {
1562 		lck_mtx_unlock(&utun_lock);
1563 		return EBUSY;
1564 	}
1565 
1566 	/* Find some open interface id */
1567 	u_int32_t chosen_unique_id = 1;
1568 	struct utun_pcb *next_pcb = TAILQ_LAST(&utun_head, utun_list);
1569 	if (next_pcb != NULL) {
1570 		/* List was not empty, add one to the last item */
1571 		chosen_unique_id = next_pcb->utun_unique_id + 1;
1572 		next_pcb = NULL;
1573 
1574 		/*
1575 		 * If this wrapped the id number, start looking at
1576 		 * the front of the list for an unused id.
1577 		 */
1578 		if (chosen_unique_id == 0) {
1579 			/* Find the next unused ID */
1580 			chosen_unique_id = 1;
1581 			TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1582 				if (next_pcb->utun_unique_id > chosen_unique_id) {
1583 					/* We found a gap */
1584 					break;
1585 				}
1586 
1587 				chosen_unique_id = next_pcb->utun_unique_id + 1;
1588 			}
1589 		}
1590 	}
1591 
1592 	struct utun_pcb *__single pcb = zalloc_flags(utun_pcb_zone, Z_WAITOK | Z_ZERO);
1593 
1594 	*unitinfo = pcb;
1595 	pcb->utun_unit = *unit;
1596 	pcb->utun_unique_id = chosen_unique_id;
1597 
1598 	if (next_pcb != NULL) {
1599 		TAILQ_INSERT_BEFORE(next_pcb, pcb, utun_chain);
1600 	} else {
1601 		TAILQ_INSERT_TAIL(&utun_head, pcb, utun_chain);
1602 	}
1603 
1604 	lck_mtx_unlock(&utun_lock);
1605 
1606 	return 0;
1607 }
1608 
1609 static errno_t
utun_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1610 utun_ctl_bind(kern_ctl_ref kctlref,
1611     struct sockaddr_ctl *sac,
1612     void **unitinfo)
1613 {
1614 	if (*unitinfo == NULL) {
1615 		u_int32_t unit = 0;
1616 		(void)utun_ctl_setup(&unit, unitinfo);
1617 	}
1618 
1619 	struct utun_pcb *__single pcb = (struct utun_pcb *)*unitinfo;
1620 	if (pcb == NULL) {
1621 		return EINVAL;
1622 	}
1623 
1624 	if (pcb->utun_ctlref != NULL) {
1625 		// Return if bind was already called
1626 		return EINVAL;
1627 	}
1628 
1629 	pcb->utun_ctlref = kctlref;
1630 	pcb->utun_unit = sac->sc_unit;
1631 	pcb->utun_max_pending_packets = 1;
1632 
1633 #if UTUN_NEXUS
1634 	pcb->utun_use_netif = false;
1635 	pcb->utun_attach_fsw = true;
1636 	pcb->utun_netif_connected = false;
1637 	pcb->utun_slot_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1638 	pcb->utun_netif_ring_size = if_utun_ring_size;
1639 	pcb->utun_tx_fsw_ring_size = if_utun_tx_fsw_ring_size;
1640 	pcb->utun_rx_fsw_ring_size = if_utun_rx_fsw_ring_size;
1641 	pcb->utun_input_chain_count = 0;
1642 	lck_mtx_init(&pcb->utun_input_chain_lock, &utun_lck_grp, &utun_lck_attr);
1643 	lck_mtx_init(&pcb->utun_pcb_data_move_lock,
1644 	    &utun_lck_grp, &utun_lck_attr);
1645 #endif // UTUN_NEXUS
1646 
1647 	lck_rw_init(&pcb->utun_pcb_lock, &utun_lck_grp, &utun_lck_attr);
1648 
1649 	return 0;
1650 }
1651 
1652 static errno_t
utun_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1653 utun_ctl_connect(kern_ctl_ref kctlref,
1654     struct sockaddr_ctl *sac,
1655     void **unitinfo)
1656 {
1657 	struct ifnet_init_eparams utun_init = {};
1658 	errno_t result = 0;
1659 
1660 	if (*unitinfo == NULL) {
1661 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1662 	}
1663 
1664 	struct utun_pcb *__single pcb = *unitinfo;
1665 	if (pcb == NULL) {
1666 		return EINVAL;
1667 	}
1668 
1669 	/* Handle case where utun_ctl_setup() was called, but ipsec_ctl_bind() was not */
1670 	if (pcb->utun_ctlref == NULL) {
1671 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1672 	}
1673 
1674 	snprintf(pcb->utun_if_xname, sizeof(pcb->utun_if_xname), "utun%d", pcb->utun_unit - 1);
1675 	snprintf(pcb->utun_unique_name, sizeof(pcb->utun_unique_name), "utunid%d", pcb->utun_unique_id - 1);
1676 
1677 	/* Create the interface */
1678 	bzero(&utun_init, sizeof(utun_init));
1679 	utun_init.ver = IFNET_INIT_CURRENT_VERSION;
1680 	utun_init.len = sizeof(utun_init);
1681 
1682 #if UTUN_NEXUS
1683 	if (pcb->utun_use_netif) {
1684 		utun_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
1685 		utun_init.tx_headroom = UTUN_IF_HEADROOM_SIZE;
1686 	} else
1687 #endif // UTUN_NEXUS
1688 	{
1689 		utun_init.flags = IFNET_INIT_NX_NOAUTO;
1690 		utun_init.start = utun_start;
1691 		utun_init.framer_extended = utun_framer;
1692 	}
1693 	utun_init.name = "utun";
1694 	utun_init.unit = pcb->utun_unit - 1;
1695 	utun_init.uniqueid_len = strbuflen(pcb->utun_unique_name);
1696 	utun_init.uniqueid = pcb->utun_unique_name;
1697 	utun_init.family = IFNET_FAMILY_UTUN;
1698 	utun_init.type = IFT_OTHER;
1699 	utun_init.demux = utun_demux;
1700 	utun_init.add_proto = utun_add_proto;
1701 	utun_init.del_proto = utun_del_proto;
1702 	utun_init.softc = pcb;
1703 	utun_init.ioctl = utun_ioctl;
1704 	utun_init.free = utun_detached;
1705 
1706 #if UTUN_NEXUS
1707 	if (pcb->utun_use_netif) {
1708 		result = utun_nexus_ifattach(pcb, &utun_init, &pcb->utun_ifp);
1709 		if (result != 0) {
1710 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_nexus_ifattach failed: %d\n", result);
1711 			utun_free_pcb(pcb, false);
1712 			*unitinfo = NULL;
1713 			return result;
1714 		}
1715 
1716 		if (pcb->utun_attach_fsw) {
1717 			result = utun_flowswitch_attach(pcb);
1718 			if (result != 0) {
1719 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_flowswitch_attach failed: %d\n", result);
1720 				// Do not call utun_free_pcb(). We will be attached already, and will be freed later
1721 				// in utun_detached().
1722 				*unitinfo = NULL;
1723 				return result;
1724 			}
1725 		}
1726 
1727 		/* Attach to bpf */
1728 		bpfattach(pcb->utun_ifp, DLT_RAW, 0);
1729 	} else
1730 #endif // UTUN_NEXUS
1731 	{
1732 		/*
1733 		 * Upon success, this holds an ifnet reference which we will
1734 		 * release via ifnet_release() at final detach time.
1735 		 */
1736 		result = ifnet_allocate_extended(&utun_init, &pcb->utun_ifp);
1737 		if (result != 0) {
1738 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_allocate failed: %d\n", result);
1739 			utun_free_pcb(pcb, false);
1740 			*unitinfo = NULL;
1741 			return result;
1742 		}
1743 
1744 		/* Set flags and additional information. */
1745 		ifnet_set_mtu(pcb->utun_ifp, UTUN_DEFAULT_MTU);
1746 		ifnet_set_flags(pcb->utun_ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
1747 
1748 		/* The interface must generate its own IPv6 LinkLocal address,
1749 		 * if possible following the recommendation of RFC2472 to the 64bit interface ID
1750 		 */
1751 		ifnet_set_eflags(pcb->utun_ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
1752 
1753 		/* Reset the stats in case as the interface may have been recycled */
1754 		struct ifnet_stats_param stats;
1755 		bzero(&stats, sizeof(struct ifnet_stats_param));
1756 		ifnet_set_stat(pcb->utun_ifp, &stats);
1757 
1758 		/* Attach the interface */
1759 		result = ifnet_attach(pcb->utun_ifp, NULL);
1760 		if (result != 0) {
1761 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_attach failed: %d\n", result);
1762 			/* Release reference now since attach failed */
1763 			ifnet_release(pcb->utun_ifp);
1764 			utun_free_pcb(pcb, false);
1765 			*unitinfo = NULL;
1766 			return result;
1767 		}
1768 
1769 		/* Attach to bpf */
1770 		bpfattach(pcb->utun_ifp, DLT_NULL, UTUN_HEADER_SIZE(pcb));
1771 
1772 #if UTUN_NEXUS
1773 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
1774 		UTUN_SET_DATA_PATH_READY(pcb);
1775 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
1776 #endif // UTUN_NEXUS
1777 	}
1778 
1779 	/* The interfaces resoures allocated, mark it as running */
1780 	ifnet_set_flags(pcb->utun_ifp, IFF_RUNNING, IFF_RUNNING);
1781 
1782 	return result;
1783 }
1784 
1785 static errno_t
utun_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)1786 utun_detach_ip(ifnet_t interface,
1787     protocol_family_t protocol,
1788     socket_t pf_socket)
1789 {
1790 	errno_t result = EPROTONOSUPPORT;
1791 
1792 	/* Attempt a detach */
1793 	if (protocol == PF_INET) {
1794 		struct ifreq    ifr;
1795 
1796 		bzero(&ifr, sizeof(ifr));
1797 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1798 		    ifnet_name(interface), ifnet_unit(interface));
1799 
1800 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
1801 	} else if (protocol == PF_INET6) {
1802 		struct in6_ifreq        ifr6;
1803 
1804 		bzero(&ifr6, sizeof(ifr6));
1805 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1806 		    ifnet_name(interface), ifnet_unit(interface));
1807 
1808 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
1809 	}
1810 
1811 	return result;
1812 }
1813 
1814 static void
utun_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)1815 utun_remove_address(ifnet_t interface,
1816     protocol_family_t protocol,
1817     ifaddr_t address,
1818     socket_t pf_socket)
1819 {
1820 	errno_t result = 0;
1821 
1822 	/* Attempt a detach */
1823 	if (protocol == PF_INET) {
1824 		struct ifreq ifr;
1825 
1826 		bzero(&ifr, sizeof(ifr));
1827 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1828 		    ifnet_name(interface), ifnet_unit(interface));
1829 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
1830 		if (result != 0) {
1831 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed: %d", result);
1832 		} else {
1833 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
1834 			if (result != 0) {
1835 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR failed: %d", result);
1836 			}
1837 		}
1838 	} else if (protocol == PF_INET6) {
1839 		struct in6_ifreq ifr6;
1840 
1841 		bzero(&ifr6, sizeof(ifr6));
1842 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1843 		    ifnet_name(interface), ifnet_unit(interface));
1844 		result = ifaddr_address(address, SA(&ifr6.ifr_addr),
1845 		    sizeof(ifr6.ifr_addr));
1846 		if (result != 0) {
1847 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed (v6): %d",
1848 			    result);
1849 		} else {
1850 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
1851 			if (result != 0) {
1852 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR_IN6 failed: %d",
1853 				    result);
1854 			}
1855 		}
1856 	}
1857 }
1858 
1859 static void
utun_cleanup_family(ifnet_t interface,protocol_family_t protocol)1860 utun_cleanup_family(ifnet_t interface,
1861     protocol_family_t protocol)
1862 {
1863 	errno_t result = 0;
1864 	socket_ref_t pf_socket = NULL;
1865 	ifaddr_t *__null_terminated addresses = NULL;
1866 
1867 	if (protocol != PF_INET && protocol != PF_INET6) {
1868 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - invalid protocol family %d\n", protocol);
1869 		return;
1870 	}
1871 
1872 	/* Create a socket for removing addresses and detaching the protocol */
1873 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
1874 	if (result != 0) {
1875 		if (result != EAFNOSUPPORT) {
1876 			os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - failed to create %s socket: %d\n",
1877 			    protocol == PF_INET ? "IP" : "IPv6", result);
1878 		}
1879 		goto cleanup;
1880 	}
1881 
1882 	/* always set SS_PRIV, we want to close and detach regardless */
1883 	sock_setpriv(pf_socket, 1);
1884 
1885 	result = utun_detach_ip(interface, protocol, pf_socket);
1886 	if (result == 0 || result == ENXIO) {
1887 		/* We are done! We either detached or weren't attached. */
1888 		goto cleanup;
1889 	} else if (result != EBUSY) {
1890 		/* Uh, not really sure what happened here... */
1891 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1892 		goto cleanup;
1893 	}
1894 
1895 	/*
1896 	 * At this point, we received an EBUSY error. This means there are
1897 	 * addresses attached. We should detach them and then try again.
1898 	 */
1899 	result = ifnet_get_address_list_family(interface, &addresses, protocol);
1900 	if (result != 0) {
1901 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
1902 		    ifnet_name(interface), ifnet_unit(interface),
1903 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
1904 		goto cleanup;
1905 	}
1906 
1907 	for (ifaddr_t *__null_terminated addr = addresses; *addr != NULL; addr++) {
1908 		utun_remove_address(interface, protocol, *addr, pf_socket);
1909 	}
1910 	ifnet_free_address_list(addresses);
1911 	addresses = NULL;
1912 
1913 	/*
1914 	 * The addresses should be gone, we should try the remove again.
1915 	 */
1916 	result = utun_detach_ip(interface, protocol, pf_socket);
1917 	if (result != 0 && result != ENXIO) {
1918 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1919 	}
1920 
1921 cleanup:
1922 	if (pf_socket != NULL) {
1923 		sock_close(pf_socket);
1924 	}
1925 
1926 	if (addresses != NULL) {
1927 		ifnet_free_address_list(addresses);
1928 	}
1929 }
1930 
1931 static errno_t
utun_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)1932 utun_ctl_disconnect(__unused kern_ctl_ref kctlref,
1933     __unused u_int32_t unit,
1934     void *unitinfo)
1935 {
1936 	struct utun_pcb *__single pcb = unitinfo;
1937 	ifnet_t ifp = NULL;
1938 	errno_t result = 0;
1939 
1940 	if (pcb == NULL) {
1941 		return EINVAL;
1942 	}
1943 
1944 #if UTUN_NEXUS
1945 	/* Wait until all threads in the data paths are done. */
1946 	utun_wait_data_move_drain(pcb);
1947 	// Tell the nexus to stop all rings
1948 	if (pcb->utun_netif_nexus != NULL && pcb->utun_netif_connected) {
1949 		kern_nexus_stop(pcb->utun_netif_nexus);
1950 	}
1951 #endif // UTUN_NEXUS
1952 
1953 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1954 
1955 #if UTUN_NEXUS
1956 	uuid_t kpipe_uuid;
1957 	uuid_copy(kpipe_uuid, pcb->utun_kpipe_uuid);
1958 	uuid_clear(pcb->utun_kpipe_uuid);
1959 	pcb->utun_kpipe_enabled = FALSE;
1960 #endif // UTUN_NEXUS
1961 
1962 	pcb->utun_ctlref = NULL;
1963 
1964 	ifp = pcb->utun_ifp;
1965 	if (ifp != NULL) {
1966 #if UTUN_NEXUS
1967 		// Tell the nexus to stop all rings
1968 		if (pcb->utun_netif_nexus != NULL) {
1969 			/*
1970 			 * Quiesce the interface and flush any pending outbound packets.
1971 			 */
1972 			if_down(ifp);
1973 
1974 			/*
1975 			 * Suspend data movement and wait for IO threads to exit.
1976 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
1977 			 * do this because utun nexuses are attached/detached separately.
1978 			 */
1979 			ifnet_datamov_suspend_and_drain(ifp);
1980 			if ((result = ifnet_detach(ifp)) != 0) {
1981 				panic("utun_ctl_disconnect - ifnet_detach failed: %d", result);
1982 			}
1983 
1984 			/*
1985 			 * We want to do everything in our power to ensure that the interface
1986 			 * really goes away when the socket is closed. We must remove IP/IPv6
1987 			 * addresses and detach the protocols. Finally, we can remove and
1988 			 * release the interface.
1989 			 */
1990 			utun_cleanup_family(ifp, AF_INET);
1991 			utun_cleanup_family(ifp, AF_INET6);
1992 
1993 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1994 
1995 			if (!uuid_is_null(kpipe_uuid)) {
1996 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
1997 					if (pcb->utun_kpipe_pp != NULL) {
1998 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1999 						pcb->utun_kpipe_pp = NULL;
2000 					}
2001 					utun_unregister_kernel_pipe_nexus();
2002 				}
2003 			}
2004 			utun_nexus_detach(pcb);
2005 
2006 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
2007 			ifnet_datamov_resume(ifp);
2008 		} else
2009 #endif // UTUN_NEXUS
2010 		{
2011 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2012 
2013 #if UTUN_NEXUS
2014 			if (!uuid_is_null(kpipe_uuid)) {
2015 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
2016 					if (pcb->utun_kpipe_pp != NULL) {
2017 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
2018 						pcb->utun_kpipe_pp = NULL;
2019 					}
2020 					utun_unregister_kernel_pipe_nexus();
2021 				}
2022 			}
2023 #endif // UTUN_NEXUS
2024 
2025 			/*
2026 			 * We want to do everything in our power to ensure that the interface
2027 			 * really goes away when the socket is closed. We must remove IP/IPv6
2028 			 * addresses and detach the protocols. Finally, we can remove and
2029 			 * release the interface.
2030 			 */
2031 			utun_cleanup_family(ifp, AF_INET);
2032 			utun_cleanup_family(ifp, AF_INET6);
2033 
2034 			/*
2035 			 * Detach now; utun_detach() will be called asynchronously once
2036 			 * the I/O reference count drops to 0.  There we will invoke
2037 			 * ifnet_release().
2038 			 */
2039 			if ((result = ifnet_detach(ifp)) != 0) {
2040 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_disconnect - ifnet_detach failed: %d\n", result);
2041 			}
2042 		}
2043 	} else {
2044 		// Bound, but not connected
2045 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2046 		utun_free_pcb(pcb, false);
2047 	}
2048 
2049 	return 0;
2050 }
2051 
2052 static errno_t
utun_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,mbuf_t m,__unused int flags)2053 utun_ctl_send(__unused kern_ctl_ref kctlref,
2054     __unused u_int32_t unit,
2055     void *unitinfo,
2056     mbuf_t m,
2057     __unused int flags)
2058 {
2059 	/*
2060 	 * The userland ABI requires the first four bytes have the protocol family
2061 	 * in network byte order: swap them
2062 	 */
2063 	if (m_pktlen(m) >= (int32_t)UTUN_HEADER_SIZE((struct utun_pcb *)unitinfo)) {
2064 		*mtod(m, protocol_family_t *) = ntohl(*mtod(m, protocol_family_t *));
2065 	} else {
2066 		os_log_error(OS_LOG_DEFAULT, "%s - unexpected short mbuf pkt len %d\n", __func__, m_pktlen(m));
2067 	}
2068 
2069 	return utun_pkt_input((struct utun_pcb *)unitinfo, m);
2070 }
2071 
2072 static errno_t
utun_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (len)data,size_t len)2073 utun_ctl_setopt(__unused kern_ctl_ref kctlref,
2074     __unused u_int32_t unit,
2075     void *unitinfo,
2076     int opt,
2077     void *__sized_by(len) data,
2078     size_t len)
2079 {
2080 	struct utun_pcb *__single pcb = unitinfo;
2081 	errno_t result = 0;
2082 	/* check for privileges for privileged options */
2083 	switch (opt) {
2084 	case UTUN_OPT_FLAGS:
2085 	case UTUN_OPT_EXT_IFDATA_STATS:
2086 	case UTUN_OPT_SET_DELEGATE_INTERFACE:
2087 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2088 			return EPERM;
2089 		}
2090 		break;
2091 	}
2092 
2093 	switch (opt) {
2094 	case UTUN_OPT_FLAGS:
2095 		if (len != sizeof(u_int32_t)) {
2096 			result = EMSGSIZE;
2097 			break;
2098 		}
2099 		if (pcb->utun_ifp != NULL) {
2100 			// Only can set before connecting
2101 			result = EINVAL;
2102 			break;
2103 		}
2104 		pcb->utun_flags = *(u_int32_t *)data;
2105 		break;
2106 
2107 	case UTUN_OPT_EXT_IFDATA_STATS:
2108 		if (len != sizeof(int)) {
2109 			result = EMSGSIZE;
2110 			break;
2111 		}
2112 		if (pcb->utun_ifp == NULL) {
2113 			// Only can set after connecting
2114 			result = EINVAL;
2115 			break;
2116 		}
2117 		pcb->utun_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2118 		break;
2119 
2120 	case UTUN_OPT_INC_IFDATA_STATS_IN:
2121 	case UTUN_OPT_INC_IFDATA_STATS_OUT: {
2122 		struct utun_stats_param *utsp = (struct utun_stats_param *)data;
2123 
2124 		if (utsp == NULL || len < sizeof(struct utun_stats_param)) {
2125 			result = EINVAL;
2126 			break;
2127 		}
2128 		if (pcb->utun_ifp == NULL) {
2129 			// Only can set after connecting
2130 			result = EINVAL;
2131 			break;
2132 		}
2133 		if (!pcb->utun_ext_ifdata_stats) {
2134 			result = EINVAL;
2135 			break;
2136 		}
2137 		if (opt == UTUN_OPT_INC_IFDATA_STATS_IN) {
2138 			ifnet_stat_increment_in(pcb->utun_ifp, utsp->utsp_packets,
2139 			    utsp->utsp_bytes, utsp->utsp_errors);
2140 		} else {
2141 			ifnet_stat_increment_out(pcb->utun_ifp, utsp->utsp_packets,
2142 			    utsp->utsp_bytes, utsp->utsp_errors);
2143 		}
2144 		break;
2145 	}
2146 	case UTUN_OPT_SET_DELEGATE_INTERFACE: {
2147 		ifnet_ref_t     del_ifp = NULL;
2148 		char            name[IFNAMSIZ];
2149 
2150 		if (len > IFNAMSIZ - 1) {
2151 			result = EMSGSIZE;
2152 			break;
2153 		}
2154 		if (pcb->utun_ifp == NULL) {
2155 			// Only can set after connecting
2156 			result = EINVAL;
2157 			break;
2158 		}
2159 		if (len != 0) {            /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2160 			bcopy(data, name, len);
2161 			name[len] = 0;
2162 			result = ifnet_find_by_name(__unsafe_null_terminated_from_indexable(name), &del_ifp);
2163 		}
2164 		if (result == 0) {
2165 			result = ifnet_set_delegate(pcb->utun_ifp, del_ifp);
2166 			if (del_ifp) {
2167 				ifnet_release(del_ifp);
2168 			}
2169 		}
2170 		break;
2171 	}
2172 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2173 		u_int32_t max_pending_packets = 0;
2174 		if (len != sizeof(u_int32_t)) {
2175 			result = EMSGSIZE;
2176 			break;
2177 		}
2178 		max_pending_packets = *(u_int32_t *)data;
2179 		if (max_pending_packets == 0) {
2180 			result = EINVAL;
2181 			break;
2182 		}
2183 		pcb->utun_max_pending_packets = max_pending_packets;
2184 		break;
2185 	}
2186 #if UTUN_NEXUS
2187 	case UTUN_OPT_ENABLE_CHANNEL: {
2188 		if (len != sizeof(int)) {
2189 			result = EMSGSIZE;
2190 			break;
2191 		}
2192 		if (pcb->utun_ifp == NULL) {
2193 			// Only can set after connecting
2194 			result = EINVAL;
2195 			break;
2196 		}
2197 		if (*(int *)data) {
2198 			result = utun_enable_channel(pcb, current_proc());
2199 		} else {
2200 			result = utun_disable_channel(pcb);
2201 		}
2202 		break;
2203 	}
2204 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2205 		if (len != sizeof(int)) {
2206 			result = EMSGSIZE;
2207 			break;
2208 		}
2209 		if (pcb->utun_ifp == NULL) {
2210 			// Only can set after connecting
2211 			result = EINVAL;
2212 			break;
2213 		}
2214 		if (!if_is_fsw_transport_netagent_enabled()) {
2215 			result = ENOTSUP;
2216 			break;
2217 		}
2218 		if (uuid_is_null(pcb->utun_nx.fsw_agent)) {
2219 			result = ENOENT;
2220 			break;
2221 		}
2222 
2223 		uint32_t flags = netagent_get_flags(pcb->utun_nx.fsw_agent);
2224 
2225 		if (*(int *)data) {
2226 			pcb->utun_needs_netagent = true;
2227 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
2228 			    NETAGENT_FLAG_NEXUS_LISTENER);
2229 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2230 		} else {
2231 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
2232 			    NETAGENT_FLAG_NEXUS_LISTENER);
2233 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2234 			pcb->utun_needs_netagent = false;
2235 		}
2236 		break;
2237 	}
2238 	case UTUN_OPT_ATTACH_FLOWSWITCH: {
2239 		if (len != sizeof(int)) {
2240 			result = EMSGSIZE;
2241 			break;
2242 		}
2243 		if (pcb->utun_ifp != NULL) {
2244 			// Only can set before connecting
2245 			result = EINVAL;
2246 			break;
2247 		}
2248 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2249 		pcb->utun_attach_fsw = !!(*(int *)data);
2250 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2251 		break;
2252 	}
2253 	case UTUN_OPT_ENABLE_NETIF: {
2254 		if (len != sizeof(int)) {
2255 			result = EMSGSIZE;
2256 			break;
2257 		}
2258 		if (pcb->utun_ifp != NULL) {
2259 			// Only can set before connecting
2260 			result = EINVAL;
2261 			break;
2262 		}
2263 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2264 		pcb->utun_use_netif = !!(*(int *)data);
2265 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2266 		break;
2267 	}
2268 	case UTUN_OPT_SLOT_SIZE: {
2269 		if (len != sizeof(u_int32_t)) {
2270 			result = EMSGSIZE;
2271 			break;
2272 		}
2273 		if (pcb->utun_ifp != NULL) {
2274 			// Only can set before connecting
2275 			result = EINVAL;
2276 			break;
2277 		}
2278 		u_int32_t slot_size = *(u_int32_t *)data;
2279 		if (slot_size < UTUN_IF_MIN_SLOT_SIZE ||
2280 		    slot_size > UTUN_IF_MAX_SLOT_SIZE) {
2281 			return EINVAL;
2282 		}
2283 		pcb->utun_slot_size = slot_size;
2284 		break;
2285 	}
2286 	case UTUN_OPT_NETIF_RING_SIZE: {
2287 		if (len != sizeof(u_int32_t)) {
2288 			result = EMSGSIZE;
2289 			break;
2290 		}
2291 		if (pcb->utun_ifp != NULL) {
2292 			// Only can set before connecting
2293 			result = EINVAL;
2294 			break;
2295 		}
2296 		u_int32_t ring_size = *(u_int32_t *)data;
2297 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2298 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2299 			return EINVAL;
2300 		}
2301 		pcb->utun_netif_ring_size = ring_size;
2302 		break;
2303 	}
2304 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2305 		if (len != sizeof(u_int32_t)) {
2306 			result = EMSGSIZE;
2307 			break;
2308 		}
2309 		if (pcb->utun_ifp != NULL) {
2310 			// Only can set before connecting
2311 			result = EINVAL;
2312 			break;
2313 		}
2314 		u_int32_t ring_size = *(u_int32_t *)data;
2315 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2316 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2317 			return EINVAL;
2318 		}
2319 		pcb->utun_tx_fsw_ring_size = ring_size;
2320 		break;
2321 	}
2322 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2323 		if (len != sizeof(u_int32_t)) {
2324 			result = EMSGSIZE;
2325 			break;
2326 		}
2327 		if (pcb->utun_ifp != NULL) {
2328 			// Only can set before connecting
2329 			result = EINVAL;
2330 			break;
2331 		}
2332 		u_int32_t ring_size = *(u_int32_t *)data;
2333 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2334 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2335 			return EINVAL;
2336 		}
2337 		pcb->utun_rx_fsw_ring_size = ring_size;
2338 		break;
2339 	}
2340 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2341 		if (len != sizeof(u_int32_t)) {
2342 			result = EMSGSIZE;
2343 			break;
2344 		}
2345 		if (pcb->utun_ifp != NULL) {
2346 			// Only can set before connecting
2347 			result = EINVAL;
2348 			break;
2349 		}
2350 		u_int32_t ring_size = *(u_int32_t *)data;
2351 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2352 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2353 			return EINVAL;
2354 		}
2355 		pcb->utun_kpipe_tx_ring_size = ring_size;
2356 		break;
2357 	}
2358 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2359 		if (len != sizeof(u_int32_t)) {
2360 			result = EMSGSIZE;
2361 			break;
2362 		}
2363 		if (pcb->utun_ifp != NULL) {
2364 			// Only can set before connecting
2365 			result = EINVAL;
2366 			break;
2367 		}
2368 		u_int32_t ring_size = *(u_int32_t *)data;
2369 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2370 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2371 			return EINVAL;
2372 		}
2373 		pcb->utun_kpipe_rx_ring_size = ring_size;
2374 		break;
2375 	}
2376 #endif // UTUN_NEXUS
2377 	default: {
2378 		result = ENOPROTOOPT;
2379 		break;
2380 	}
2381 	}
2382 
2383 	return result;
2384 }
2385 
2386 static errno_t
utun_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (* len)data,size_t * len)2387 utun_ctl_getopt(__unused kern_ctl_ref kctlref,
2388     __unused u_int32_t unit,
2389     void *unitinfo,
2390     int opt,
2391     void *__sized_by(*len) data,
2392     size_t *len)
2393 {
2394 	struct utun_pcb *__single pcb = unitinfo;
2395 	errno_t result = 0;
2396 
2397 	switch (opt) {
2398 	case UTUN_OPT_FLAGS:
2399 		if (*len != sizeof(u_int32_t)) {
2400 			result = EMSGSIZE;
2401 		} else {
2402 			*(u_int32_t *)data = pcb->utun_flags;
2403 		}
2404 		break;
2405 
2406 	case UTUN_OPT_EXT_IFDATA_STATS:
2407 		if (*len != sizeof(int)) {
2408 			result = EMSGSIZE;
2409 		} else {
2410 			*(int *)data = (pcb->utun_ext_ifdata_stats) ? 1 : 0;
2411 		}
2412 		break;
2413 
2414 	case UTUN_OPT_IFNAME:
2415 		if (*len < MIN(strbuflen(pcb->utun_if_xname) + 1, sizeof(pcb->utun_if_xname))) {
2416 			result = EMSGSIZE;
2417 		} else {
2418 			if (pcb->utun_ifp == NULL) {
2419 				// Only can get after connecting
2420 				result = EINVAL;
2421 				break;
2422 			}
2423 			*len = scnprintf(data, *len, "%s", pcb->utun_if_xname) + 1;
2424 		}
2425 		break;
2426 
2427 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2428 		if (*len != sizeof(u_int32_t)) {
2429 			result = EMSGSIZE;
2430 		} else {
2431 			*((u_int32_t *)data) = pcb->utun_max_pending_packets;
2432 		}
2433 		break;
2434 	}
2435 
2436 #if UTUN_NEXUS
2437 	case UTUN_OPT_ENABLE_CHANNEL: {
2438 		if (*len != sizeof(int)) {
2439 			result = EMSGSIZE;
2440 		} else {
2441 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2442 			*(int *)data = pcb->utun_kpipe_enabled;
2443 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2444 		}
2445 		break;
2446 	}
2447 
2448 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2449 		if (*len != sizeof(int)) {
2450 			result = EMSGSIZE;
2451 		} else {
2452 			*(int *)data = if_check_netagent(pcb->utun_ifp, pcb->utun_nx.fsw_agent);
2453 		}
2454 		break;
2455 	}
2456 
2457 	case UTUN_OPT_ENABLE_NETIF: {
2458 		if (*len != sizeof(int)) {
2459 			result = EMSGSIZE;
2460 		} else {
2461 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2462 			*(int *)data = !!pcb->utun_use_netif;
2463 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2464 		}
2465 		break;
2466 	}
2467 
2468 	case UTUN_OPT_GET_CHANNEL_UUID: {
2469 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2470 		if (uuid_is_null(pcb->utun_kpipe_uuid)) {
2471 			result = ENXIO;
2472 		} else if (*len != sizeof(uuid_t)) {
2473 			result = EMSGSIZE;
2474 		} else {
2475 			uuid_copy(data, pcb->utun_kpipe_uuid);
2476 		}
2477 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2478 		break;
2479 	}
2480 	case UTUN_OPT_SLOT_SIZE: {
2481 		if (*len != sizeof(u_int32_t)) {
2482 			result = EMSGSIZE;
2483 		} else {
2484 			*(u_int32_t *)data = pcb->utun_slot_size;
2485 		}
2486 		break;
2487 	}
2488 	case UTUN_OPT_NETIF_RING_SIZE: {
2489 		if (*len != sizeof(u_int32_t)) {
2490 			result = EMSGSIZE;
2491 		} else {
2492 			*(u_int32_t *)data = pcb->utun_netif_ring_size;
2493 		}
2494 		break;
2495 	}
2496 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2497 		if (*len != sizeof(u_int32_t)) {
2498 			result = EMSGSIZE;
2499 		} else {
2500 			*(u_int32_t *)data = pcb->utun_tx_fsw_ring_size;
2501 		}
2502 		break;
2503 	}
2504 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2505 		if (*len != sizeof(u_int32_t)) {
2506 			result = EMSGSIZE;
2507 		} else {
2508 			*(u_int32_t *)data = pcb->utun_rx_fsw_ring_size;
2509 		}
2510 		break;
2511 	}
2512 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2513 		if (*len != sizeof(u_int32_t)) {
2514 			result = EMSGSIZE;
2515 		} else {
2516 			*(u_int32_t *)data = pcb->utun_kpipe_tx_ring_size;
2517 		}
2518 		break;
2519 	}
2520 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2521 		if (*len != sizeof(u_int32_t)) {
2522 			result = EMSGSIZE;
2523 		} else {
2524 			*(u_int32_t *)data = pcb->utun_kpipe_rx_ring_size;
2525 		}
2526 		break;
2527 	}
2528 #endif // UTUN_NEXUS
2529 
2530 	default:
2531 		result = ENOPROTOOPT;
2532 		break;
2533 	}
2534 
2535 	return result;
2536 }
2537 
2538 static void
utun_ctl_rcvd(kern_ctl_ref kctlref,u_int32_t unit,void * unitinfo,int flags)2539 utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags)
2540 {
2541 #pragma unused(flags)
2542 	bool reenable_output = false;
2543 	struct utun_pcb *__single pcb = unitinfo;
2544 	if (pcb == NULL) {
2545 		return;
2546 	}
2547 	ifnet_lock_exclusive(pcb->utun_ifp);
2548 
2549 	u_int32_t utun_packet_cnt;
2550 	errno_t error_pc = ctl_getenqueuepacketcount(kctlref, unit, &utun_packet_cnt);
2551 	if (error_pc != 0) {
2552 		os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2553 		utun_packet_cnt = 0;
2554 	}
2555 
2556 	if (utun_packet_cnt < pcb->utun_max_pending_packets) {
2557 		reenable_output = true;
2558 	}
2559 
2560 	if (reenable_output) {
2561 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
2562 		if (error != 0) {
2563 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ifnet_enable_output returned error %d\n", error);
2564 		}
2565 	}
2566 	ifnet_lock_done(pcb->utun_ifp);
2567 }
2568 
2569 /* Network Interface functions */
2570 static void
utun_start(ifnet_t interface)2571 utun_start(ifnet_t interface)
2572 {
2573 	mbuf_ref_t data;
2574 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2575 
2576 	VERIFY(pcb != NULL);
2577 
2578 #if UTUN_NEXUS
2579 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
2580 	if (pcb->utun_kpipe_enabled) {
2581 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2582 		if (!utun_data_move_begin(pcb)) {
2583 			os_log_info(OS_LOG_DEFAULT,
2584 			    "%s: data path stopped for %s\n",
2585 			    __func__, if_name(pcb->utun_ifp));
2586 			return;
2587 		}
2588 		/* It's possible to have channels enabled, but not yet have the channel opened,
2589 		 * in which case the rxring will not be set
2590 		 */
2591 		if (pcb->utun_kpipe_rxring != NULL) {
2592 			kern_channel_notify(pcb->utun_kpipe_rxring, 0);
2593 		}
2594 		utun_data_move_end(pcb);
2595 		return;
2596 	}
2597 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2598 #endif // UTUN_NEXUS
2599 
2600 	for (;;) {
2601 		bool can_accept_packets = true;
2602 		ifnet_lock_shared(pcb->utun_ifp);
2603 
2604 		u_int32_t utun_packet_cnt;
2605 		errno_t error_pc = ctl_getenqueuepacketcount(pcb->utun_ctlref, pcb->utun_unit, &utun_packet_cnt);
2606 		if (error_pc != 0) {
2607 			os_log_error(OS_LOG_DEFAULT, "utun_start: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2608 			utun_packet_cnt = 0;
2609 		}
2610 
2611 		can_accept_packets = (utun_packet_cnt < pcb->utun_max_pending_packets);
2612 		if (!can_accept_packets && pcb->utun_ctlref) {
2613 			u_int32_t difference = 0;
2614 			if (ctl_getenqueuereadable(pcb->utun_ctlref, pcb->utun_unit, &difference) == 0) {
2615 				if (difference > 0) {
2616 					// If the low-water mark has not yet been reached, we still need to enqueue data
2617 					// into the buffer
2618 					can_accept_packets = true;
2619 				}
2620 			}
2621 		}
2622 		if (!can_accept_packets) {
2623 			errno_t error = ifnet_disable_output(interface);
2624 			if (error != 0) {
2625 				os_log_error(OS_LOG_DEFAULT, "utun_start: ifnet_disable_output returned error %d\n", error);
2626 			}
2627 			ifnet_lock_done(pcb->utun_ifp);
2628 			break;
2629 		}
2630 		ifnet_lock_done(pcb->utun_ifp);
2631 		if (ifnet_dequeue(interface, &data) != 0) {
2632 			break;
2633 		}
2634 		if (utun_output(interface, data) != 0) {
2635 			break;
2636 		}
2637 	}
2638 }
2639 
2640 static errno_t
utun_output(ifnet_t interface,mbuf_t data)2641 utun_output(ifnet_t     interface,
2642     mbuf_t data)
2643 {
2644 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2645 	errno_t result;
2646 
2647 	VERIFY(interface == pcb->utun_ifp);
2648 
2649 #if UTUN_NEXUS
2650 	if (!pcb->utun_use_netif)
2651 #endif // UTUN_NEXUS
2652 	{
2653 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2654 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
2655 		}
2656 	}
2657 
2658 	if (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT) {
2659 		/* flush data */
2660 		mbuf_freem(data);
2661 		return 0;
2662 	}
2663 
2664 	// otherwise, fall thru to ctl_enqueumbuf
2665 	if (pcb->utun_ctlref) {
2666 		int     length;
2667 
2668 		/*
2669 		 * The ABI requires the protocol in network byte order
2670 		 */
2671 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2672 			*mtod(data, uint32_t *) = htonl(*mtod(data, uint32_t *));
2673 		}
2674 
2675 		length = mbuf_pkthdr_len(data);
2676 		result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR);
2677 		if (result != 0) {
2678 			mbuf_freem(data);
2679 			os_log_error(OS_LOG_DEFAULT, "utun_output - ctl_enqueuembuf failed: %d\n", result);
2680 #if UTUN_NEXUS
2681 			if (!pcb->utun_use_netif)
2682 #endif // UTUN_NEXUS
2683 			{
2684 				ifnet_stat_increment_out(interface, 0, 0, 1);
2685 			}
2686 		} else {
2687 #if UTUN_NEXUS
2688 			if (!pcb->utun_use_netif)
2689 #endif // UTUN_NEXUS
2690 			{
2691 				if (!pcb->utun_ext_ifdata_stats) {
2692 					ifnet_stat_increment_out(interface, 1, length, 0);
2693 				}
2694 			}
2695 		}
2696 	} else {
2697 		mbuf_freem(data);
2698 	}
2699 
2700 	return 0;
2701 }
2702 
2703 static errno_t
utun_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)2704 utun_demux(__unused ifnet_t interface,
2705     mbuf_t data,
2706     __unused char *frame_header,
2707     protocol_family_t *protocol)
2708 {
2709 #if UTUN_NEXUS
2710 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2711 	struct ip *ip;
2712 	u_int ip_version;
2713 #endif
2714 
2715 	while (data != NULL && mbuf_len(data) < 1) {
2716 		data = mbuf_next(data);
2717 	}
2718 
2719 	if (data == NULL) {
2720 		return ENOENT;
2721 	}
2722 
2723 #if UTUN_NEXUS
2724 	if (pcb->utun_use_netif) {
2725 		ip = mtod(data, struct ip *);
2726 		ip_version = ip->ip_v;
2727 
2728 		switch (ip_version) {
2729 		case 4:
2730 			*protocol = PF_INET;
2731 			return 0;
2732 		case 6:
2733 			*protocol = PF_INET6;
2734 			return 0;
2735 		default:
2736 			*protocol = 0;
2737 			break;
2738 		}
2739 	} else
2740 #endif // UTUN_NEXUS
2741 	{
2742 		*protocol = *mtod(data, uint32_t *);
2743 	}
2744 
2745 	return 0;
2746 }
2747 
2748 static errno_t
utun_framer(ifnet_t interface,mbuf_t * packet,__unused const struct sockaddr * dest,__unused IFNET_LLADDR_T dest_lladdr,IFNET_FRAME_TYPE_T frame_type,u_int32_t * prepend_len,u_int32_t * postpend_len)2749 utun_framer(ifnet_t interface,
2750     mbuf_t *packet,
2751     __unused const struct sockaddr *dest,
2752     __unused IFNET_LLADDR_T dest_lladdr,
2753     IFNET_FRAME_TYPE_T frame_type,
2754     u_int32_t *prepend_len,
2755     u_int32_t *postpend_len)
2756 {
2757 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2758 	VERIFY(interface == pcb->utun_ifp);
2759 
2760 	u_int32_t header_length = UTUN_HEADER_SIZE(pcb);
2761 	if (mbuf_prepend(packet, header_length, MBUF_DONTWAIT) != 0) {
2762 		os_log_error(OS_LOG_DEFAULT, "utun_framer - ifnet_output prepend failed\n");
2763 
2764 		ifnet_stat_increment_out(interface, 0, 0, 1);
2765 
2766 		// just	return, because the buffer was freed in mbuf_prepend
2767 		return EJUSTRETURN;
2768 	}
2769 	if (prepend_len != NULL) {
2770 		*prepend_len = header_length;
2771 	}
2772 	if (postpend_len != NULL) {
2773 		*postpend_len = 0;
2774 	}
2775 
2776 	// place protocol number at the beginning of the mbuf
2777 	*mtod(*packet, protocol_family_t *) = *(protocol_family_t *)(uintptr_t)(size_t)frame_type;
2778 
2779 #if NECP
2780 	// Add process uuid if applicable
2781 	if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
2782 		if (m_pktlen(*packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2783 			u_int8_t *header = mtod(*packet, uint8_t*);
2784 			int uuid_err = necp_get_app_uuid_from_packet(*packet, (void *)(header + sizeof(u_int32_t)));
2785 			if (uuid_err != 0) {
2786 				os_log_error(OS_LOG_DEFAULT, "Received app uuid error %d for %s%d\n", uuid_err, ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp));
2787 			}
2788 		} else {
2789 			os_log_error(OS_LOG_DEFAULT, "Cannot set proc uuid for %s%d, size %d < %zu\n", ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp),
2790 			    m_pktlen(*packet), UTUN_HEADER_SIZE(pcb));
2791 		}
2792 	}
2793 #endif // NECP
2794 
2795 	return 0;
2796 }
2797 
2798 static errno_t
utun_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)2799 utun_add_proto(__unused ifnet_t interface,
2800     protocol_family_t protocol,
2801     __unused const struct ifnet_demux_desc *demux_array,
2802     __unused u_int32_t demux_count)
2803 {
2804 	switch (protocol) {
2805 	case PF_INET:
2806 		return 0;
2807 	case PF_INET6:
2808 		return 0;
2809 	default:
2810 		break;
2811 	}
2812 
2813 	return ENOPROTOOPT;
2814 }
2815 
2816 static errno_t
utun_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)2817 utun_del_proto(__unused ifnet_t interface,
2818     __unused protocol_family_t protocol)
2819 {
2820 	return 0;
2821 }
2822 
2823 static errno_t
utun_ioctl(ifnet_t interface,u_long command,void * data)2824 utun_ioctl(ifnet_t interface,
2825     u_long command,
2826     void *data)
2827 {
2828 #if UTUN_NEXUS
2829 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2830 #endif
2831 	errno_t result = 0;
2832 
2833 	switch (command) {
2834 	case SIOCSIFMTU: {
2835 #if UTUN_NEXUS
2836 		if (pcb->utun_use_netif) {
2837 			// Make sure we can fit packets in the channel buffers
2838 			// Allow for the headroom in the slot
2839 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) + UTUN_IF_HEADROOM_SIZE > pcb->utun_slot_size) {
2840 				result = EINVAL;
2841 			} else {
2842 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
2843 			}
2844 		} else
2845 #endif // UTUN_NEXUS
2846 		{
2847 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
2848 		}
2849 		break;
2850 	}
2851 
2852 	case SIOCSIFFLAGS:
2853 		/* ifioctl() takes care of it */
2854 		break;
2855 
2856 	default:
2857 		result = EOPNOTSUPP;
2858 	}
2859 
2860 	return result;
2861 }
2862 
2863 static void
utun_detached(ifnet_t interface)2864 utun_detached(ifnet_t interface)
2865 {
2866 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2867 	(void)ifnet_release(interface);
2868 	lck_mtx_lock(&utun_lock);
2869 	utun_free_pcb(pcb, true);
2870 	(void)ifnet_dispose(interface);
2871 	lck_mtx_unlock(&utun_lock);
2872 }
2873 
2874 /* Protocol Handlers */
2875 
2876 static errno_t
utun_proto_input(__unused ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)2877 utun_proto_input(__unused ifnet_t interface,
2878     protocol_family_t protocol,
2879     mbuf_t m,
2880     __unused char *frame_header)
2881 {
2882 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2883 #if UTUN_NEXUS
2884 	if (!pcb->utun_use_netif)
2885 #endif // UTUN_NEXUS
2886 	{
2887 		mbuf_adj(m, UTUN_HEADER_SIZE(pcb));
2888 	}
2889 	int32_t pktlen = m->m_pkthdr.len;
2890 	if (proto_input(protocol, m) != 0) {
2891 		m_freem(m);
2892 #if UTUN_NEXUS
2893 		if (!pcb->utun_use_netif)
2894 #endif // UTUN_NEXUS
2895 		{
2896 			ifnet_stat_increment_in(interface, 0, 0, 1);
2897 		}
2898 	} else {
2899 #if UTUN_NEXUS
2900 		if (!pcb->utun_use_netif)
2901 #endif // UTUN_NEXUS
2902 		{
2903 			ifnet_stat_increment_in(interface, 1, pktlen, 0);
2904 		}
2905 	}
2906 
2907 	return 0;
2908 }
2909 
2910 static errno_t
utun_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,char * frame_type,__unused char * link_layer_dest)2911 utun_proto_pre_output(__unused ifnet_t interface,
2912     protocol_family_t protocol,
2913     __unused mbuf_t *packet,
2914     __unused const struct sockaddr *dest,
2915     __unused void *route,
2916     char *frame_type,
2917     __unused char *link_layer_dest)
2918 {
2919 	*(protocol_family_t *)(void *)frame_type = protocol;
2920 	return 0;
2921 }
2922 
2923 static errno_t
utun_attach_proto(ifnet_t interface,protocol_family_t protocol)2924 utun_attach_proto(ifnet_t interface,
2925     protocol_family_t protocol)
2926 {
2927 	struct ifnet_attach_proto_param proto;
2928 
2929 	bzero(&proto, sizeof(proto));
2930 	proto.input = utun_proto_input;
2931 	proto.pre_output = utun_proto_pre_output;
2932 
2933 	errno_t result = ifnet_attach_protocol(interface, protocol, &proto);
2934 	if (result != 0 && result != EEXIST) {
2935 		os_log_error(OS_LOG_DEFAULT, "utun_attach_inet - ifnet_attach_protocol %d failed: %d\n",
2936 		    protocol, result);
2937 	}
2938 
2939 	return result;
2940 }
2941 
2942 static errno_t
utun_pkt_input(struct utun_pcb * pcb,mbuf_t packet)2943 utun_pkt_input(struct utun_pcb *pcb, mbuf_t packet)
2944 {
2945 #if UTUN_NEXUS
2946 	if (pcb->utun_use_netif) {
2947 		if (!utun_data_move_begin(pcb)) {
2948 			os_log_info(OS_LOG_DEFAULT,
2949 			    "%s: data path stopped for %s\n",
2950 			    __func__, if_name(pcb->utun_ifp));
2951 			return ENXIO;
2952 		}
2953 
2954 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2955 
2956 		lck_mtx_lock(&pcb->utun_input_chain_lock);
2957 
2958 		if (pcb->utun_input_chain_count > (u_int32_t)if_utun_max_pending_input) {
2959 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
2960 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2961 			utun_data_move_end(pcb);
2962 			return ENOSPC;
2963 		}
2964 
2965 		if (pcb->utun_input_chain != NULL) {
2966 			pcb->utun_input_chain_last->m_nextpkt = packet;
2967 		} else {
2968 			pcb->utun_input_chain = packet;
2969 		}
2970 		pcb->utun_input_chain_count++;
2971 		while (packet->m_nextpkt) {
2972 			VERIFY(packet != packet->m_nextpkt);
2973 			packet = packet->m_nextpkt;
2974 			pcb->utun_input_chain_count++;
2975 		}
2976 		pcb->utun_input_chain_last = packet;
2977 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
2978 
2979 		kern_channel_ring_t __single rx_ring = pcb->utun_netif_rxring;
2980 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2981 
2982 		if (rx_ring != NULL) {
2983 			kern_channel_notify(rx_ring, 0);
2984 		}
2985 
2986 		utun_data_move_end(pcb);
2987 		return 0;
2988 	} else
2989 #endif // UTUN_NEXUS
2990 	{
2991 		mbuf_pkthdr_setrcvif(packet, pcb->utun_ifp);
2992 
2993 		if (m_pktlen(packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2994 			bpf_tap_in(pcb->utun_ifp, DLT_NULL, packet, 0, 0);
2995 		}
2996 		if (pcb->utun_flags & UTUN_FLAGS_NO_INPUT) {
2997 			/* flush data */
2998 			mbuf_freem(packet);
2999 			return 0;
3000 		}
3001 
3002 		errno_t result = 0;
3003 		if (!pcb->utun_ext_ifdata_stats) {
3004 			struct ifnet_stat_increment_param incs = {};
3005 			incs.packets_in = 1;
3006 			incs.bytes_in = mbuf_pkthdr_len(packet);
3007 			result = ifnet_input(pcb->utun_ifp, packet, &incs);
3008 		} else {
3009 			result = ifnet_input(pcb->utun_ifp, packet, NULL);
3010 		}
3011 		if (result != 0) {
3012 			ifnet_stat_increment_in(pcb->utun_ifp, 0, 0, 1);
3013 
3014 			os_log_error(OS_LOG_DEFAULT, "%s - ifnet_input failed: %d\n", __FUNCTION__, result);
3015 		}
3016 
3017 		return 0;
3018 	}
3019 }
3020 
3021 #if UTUN_NEXUS
3022 
3023 static errno_t
utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)3024 utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
3025 {
3026 	return 0;
3027 }
3028 
3029 static void
utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)3030 utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
3031 {
3032 	// Ignore
3033 }
3034 
3035 static errno_t
utun_register_nexus(void)3036 utun_register_nexus(void)
3037 {
3038 	const struct kern_nexus_domain_provider_init dp_init = {
3039 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
3040 		.nxdpi_flags = 0,
3041 		.nxdpi_init = utun_nxdp_init,
3042 		.nxdpi_fini = utun_nxdp_fini
3043 	};
3044 	errno_t err = 0;
3045 	nexus_domain_provider_name_t domain_provider_name = "com.apple.utun";
3046 
3047 	/* utun_nxdp_init() is called before this function returns */
3048 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
3049 	    domain_provider_name,
3050 	    &dp_init, sizeof(dp_init),
3051 	    &utun_nx_dom_prov);
3052 	if (err != 0) {
3053 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
3054 		return err;
3055 	}
3056 	return 0;
3057 }
3058 boolean_t
utun_interface_needs_netagent(ifnet_t interface)3059 utun_interface_needs_netagent(ifnet_t interface)
3060 {
3061 	struct utun_pcb *__single pcb = NULL;
3062 
3063 	if (interface == NULL) {
3064 		return FALSE;
3065 	}
3066 
3067 	pcb = ifnet_softc(interface);
3068 
3069 	if (pcb == NULL) {
3070 		return FALSE;
3071 	}
3072 
3073 	return pcb->utun_needs_netagent == true;
3074 }
3075 
3076 static errno_t
utun_ifnet_set_attrs(ifnet_t ifp)3077 utun_ifnet_set_attrs(ifnet_t ifp)
3078 {
3079 	/* Set flags and additional information. */
3080 	ifnet_set_mtu(ifp, 1500);
3081 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
3082 
3083 	/* The interface must generate its own IPv6 LinkLocal address,
3084 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
3085 	 */
3086 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
3087 
3088 	return 0;
3089 }
3090 
3091 static errno_t
utun_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)3092 utun_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
3093 {
3094 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3095 	pcb->utun_netif_nexus = nexus;
3096 	return utun_ifnet_set_attrs(ifp);
3097 }
3098 
3099 static errno_t
utun_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)3100 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
3101     proc_t p, kern_nexus_t nexus,
3102     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
3103 {
3104 #pragma unused(nxprov, p)
3105 #pragma unused(nexus, nexus_port, channel, ch_ctx)
3106 	return 0;
3107 }
3108 
3109 static errno_t
utun_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3110 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3111     kern_channel_t channel)
3112 {
3113 #pragma unused(nxprov, channel)
3114 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3115 	boolean_t ok = ifnet_is_attached(pcb->utun_ifp, 1);
3116 	if (pcb->utun_netif_nexus == nexus) {
3117 		pcb->utun_netif_connected = true;
3118 	}
3119 	if (ok) {
3120 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3121 		UTUN_SET_DATA_PATH_READY(pcb);
3122 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3123 	}
3124 	return ok ? 0 : ENXIO;
3125 }
3126 
3127 static void
utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3128 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3129     kern_channel_t channel)
3130 {
3131 #pragma unused(nxprov, channel)
3132 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3133 	/* Wait until all threads in the data paths are done. */
3134 	utun_wait_data_move_drain(pcb);
3135 }
3136 
3137 static void
utun_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3138 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3139     kern_channel_t channel)
3140 {
3141 #pragma unused(nxprov, channel)
3142 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3143 	/* Wait until all threads in the data paths are done. */
3144 	utun_wait_data_move_drain(pcb);
3145 }
3146 
3147 static void
utun_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3148 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3149     kern_channel_t channel)
3150 {
3151 #pragma unused(nxprov, channel)
3152 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3153 	if (pcb->utun_netif_nexus == nexus) {
3154 		pcb->utun_netif_connected = false;
3155 		if (pcb->utun_attach_fsw) {
3156 			// disconnected by flowswitch that was attached by us
3157 			pcb->utun_netif_nexus = NULL;
3158 		}
3159 	}
3160 	ifnet_decr_iorefcnt(pcb->utun_ifp);
3161 }
3162 
3163 static errno_t
utun_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)3164 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3165     kern_channel_t channel, kern_channel_ring_t ring,
3166     boolean_t is_tx_ring, void **ring_ctx)
3167 {
3168 #pragma unused(nxprov)
3169 #pragma unused(channel)
3170 #pragma unused(ring_ctx)
3171 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3172 	if (!is_tx_ring) {
3173 		VERIFY(pcb->utun_kpipe_rxring == NULL);
3174 		pcb->utun_kpipe_rxring = ring;
3175 	} else {
3176 		VERIFY(pcb->utun_kpipe_txring == NULL);
3177 		pcb->utun_kpipe_txring = ring;
3178 	}
3179 	return 0;
3180 }
3181 
3182 static void
utun_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)3183 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3184     kern_channel_ring_t ring)
3185 {
3186 #pragma unused(nxprov)
3187 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3188 	if (pcb->utun_kpipe_rxring == ring) {
3189 		pcb->utun_kpipe_rxring = NULL;
3190 	} else if (pcb->utun_kpipe_txring == ring) {
3191 		pcb->utun_kpipe_txring = NULL;
3192 	}
3193 }
3194 
3195 static errno_t
utun_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)3196 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3197     kern_channel_ring_t tx_ring, uint32_t flags)
3198 {
3199 #pragma unused(nxprov)
3200 #pragma unused(flags)
3201 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3202 
3203 	if (!utun_data_move_begin(pcb)) {
3204 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3205 		    __func__, if_name(pcb->utun_ifp));
3206 		return 0;
3207 	}
3208 
3209 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3210 	int channel_enabled = pcb->utun_kpipe_enabled;
3211 	if (!channel_enabled) {
3212 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3213 		utun_data_move_end(pcb);
3214 		return 0;
3215 	}
3216 
3217 	if (pcb->utun_use_netif) {
3218 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3219 		if (tx_slot == NULL) {
3220 			// Nothing to write, bail
3221 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3222 			utun_data_move_end(pcb);
3223 			return 0;
3224 		}
3225 
3226 		// Signal the netif ring to read
3227 		kern_channel_ring_t __single rx_ring = pcb->utun_netif_rxring;
3228 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3229 		if (rx_ring != NULL) {
3230 			kern_channel_notify(rx_ring, 0);
3231 		}
3232 	} else {
3233 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3234 
3235 		struct ifnet_stat_increment_param incs = {};
3236 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
3237 		MBUFQ_HEAD(mbufq) mbq;
3238 		MBUFQ_INIT(&mbq);
3239 		kern_channel_slot_t tx_pslot = NULL;
3240 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3241 		while (tx_slot != NULL) {
3242 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3243 
3244 			// Advance TX ring
3245 			tx_pslot = tx_slot;
3246 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3247 
3248 			if (tx_ph == 0) {
3249 				continue;
3250 			}
3251 
3252 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3253 			VERIFY(tx_buf != NULL);
3254 			uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
3255 			    kern_buflet_get_data_address(tx_buf),
3256 			    kern_buflet_get_data_limit(tx_buf));
3257 			VERIFY(tx_baddr != 0);
3258 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3259 
3260 			size_t length = MIN(kern_packet_get_data_length(tx_ph),
3261 			    pcb->utun_slot_size);
3262 
3263 			mbuf_ref_t data = NULL;
3264 			if (length >= UTUN_HEADER_SIZE(pcb) &&
3265 			    !(pcb->utun_flags & UTUN_FLAGS_NO_INPUT)) {
3266 				errno_t error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_HEADER, &data);
3267 				VERIFY(0 == error);
3268 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_WAITOK);
3269 				VERIFY(0 == error);
3270 				/*
3271 				 * The userland ABI requires the first four bytes have
3272 				 * the protocol family in network byte order: swap them
3273 				 */
3274 				*mtod(data, uint32_t*) = ntohl(*mtod(data, uint32_t *));
3275 				mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
3276 				bpf_tap_in(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3277 				incs.packets_in++;
3278 				incs.bytes_in += length;
3279 				MBUFQ_ENQUEUE(&mbq, data);
3280 			}
3281 		}
3282 		if (tx_pslot) {
3283 			kern_channel_advance_slot(tx_ring, tx_pslot);
3284 			tx_ring_stats.kcrsi_slots_transferred = incs.packets_in;
3285 			tx_ring_stats.kcrsi_bytes_transferred = incs.bytes_in;
3286 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3287 			(void) kern_channel_reclaim(tx_ring);
3288 		}
3289 		if (!MBUFQ_EMPTY(&mbq)) {
3290 			(void) ifnet_input_extended(pcb->utun_ifp, MBUFQ_FIRST(&mbq),
3291 			    MBUFQ_LAST(&mbq), &incs);
3292 			MBUFQ_INIT(&mbq);
3293 		}
3294 	}
3295 
3296 	utun_data_move_end(pcb);
3297 	return 0;
3298 }
3299 
3300 static errno_t
utun_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)3301 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3302     kern_channel_ring_t rx_ring, uint32_t flags)
3303 {
3304 #pragma unused(nxprov)
3305 #pragma unused(flags)
3306 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3307 	struct kern_channel_ring_stat_increment rx_ring_stats = {};
3308 
3309 	if (!utun_data_move_begin(pcb)) {
3310 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3311 		    __func__, if_name(pcb->utun_ifp));
3312 		return 0;
3313 	}
3314 
3315 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3316 
3317 	int channel_enabled = pcb->utun_kpipe_enabled;
3318 	if (!channel_enabled) {
3319 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3320 		utun_data_move_end(pcb);
3321 		return 0;
3322 	}
3323 
3324 	/* reclaim user-released slots */
3325 	(void) kern_channel_reclaim(rx_ring);
3326 
3327 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
3328 	if (avail == 0) {
3329 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3330 		utun_data_move_end(pcb);
3331 		return 0;
3332 	}
3333 
3334 	if (pcb->utun_use_netif) {
3335 		kern_channel_ring_t __single tx_ring = pcb->utun_netif_txring;
3336 		if (tx_ring == NULL ||
3337 		    pcb->utun_netif_nexus == NULL) {
3338 			// Net-If TX ring not set up yet, nothing to read
3339 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3340 			utun_data_move_end(pcb);
3341 			return 0;
3342 		}
3343 
3344 		struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->utun_netif_nexus)->nif_stats;
3345 
3346 		// Unlock utun before entering ring
3347 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3348 
3349 		(void)kr_enter(tx_ring, TRUE);
3350 
3351 		// Lock again after entering and validate
3352 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
3353 		if (tx_ring != pcb->utun_netif_txring) {
3354 			// Ring no longer valid
3355 			// Unlock first, then exit ring
3356 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3357 			kr_exit(tx_ring);
3358 			utun_data_move_end(pcb);
3359 			return 0;
3360 		}
3361 
3362 		struct kern_channel_ring_stat_increment tx_ring_stats;
3363 		bzero(&tx_ring_stats, sizeof(tx_ring_stats));
3364 		kern_channel_slot_t tx_pslot = NULL;
3365 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3366 		if (tx_slot == NULL) {
3367 			// Nothing to read, don't bother signalling
3368 			// Unlock first, then exit ring
3369 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3370 			kr_exit(tx_ring);
3371 			utun_data_move_end(pcb);
3372 			return 0;
3373 		}
3374 
3375 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3376 		VERIFY(rx_pp != NULL);
3377 		struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
3378 		VERIFY(tx_pp != NULL);
3379 		kern_channel_slot_t rx_pslot = NULL;
3380 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3381 		kern_packet_t tx_chain_ph = 0;
3382 
3383 		while (rx_slot != NULL && tx_slot != NULL) {
3384 			size_t length;
3385 			kern_buflet_t rx_buf;
3386 			uint8_t *rx_baddr;
3387 
3388 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3389 
3390 			/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
3391 			if (tx_ph == 0) {
3392 				// Advance TX ring
3393 				tx_pslot = tx_slot;
3394 				tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3395 				continue;
3396 			}
3397 			(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
3398 			if (tx_chain_ph != 0) {
3399 				kern_packet_append(tx_ph, tx_chain_ph);
3400 			}
3401 			tx_chain_ph = tx_ph;
3402 
3403 			// Advance TX ring
3404 			tx_pslot = tx_slot;
3405 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3406 
3407 			// Allocate rx packet
3408 			kern_packet_t rx_ph = 0;
3409 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3410 			if (__improbable(error != 0)) {
3411 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3412 				    pcb->utun_ifp->if_xname);
3413 				break;
3414 			}
3415 
3416 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3417 			VERIFY(tx_buf != NULL);
3418 			uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
3419 			    kern_buflet_get_data_address(tx_buf),
3420 			    kern_buflet_get_data_limit(tx_buf));
3421 			VERIFY(tx_baddr != NULL);
3422 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3423 
3424 			bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
3425 
3426 			length = MIN(kern_packet_get_data_length(tx_ph) + UTUN_HEADER_SIZE(pcb),
3427 			    pcb->utun_slot_size);
3428 
3429 			tx_ring_stats.kcrsi_slots_transferred++;
3430 			tx_ring_stats.kcrsi_bytes_transferred += length;
3431 
3432 			if (length < UTUN_HEADER_SIZE(pcb) ||
3433 			    length > pcb->utun_slot_size ||
3434 			    length > PP_BUF_SIZE_DEF(rx_pp) ||
3435 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3436 				/* flush data */
3437 				kern_pbufpool_free(rx_pp, rx_ph);
3438 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: invalid length %zu header_size %zu\n",
3439 				    pcb->utun_ifp->if_xname, length, UTUN_HEADER_SIZE(pcb));
3440 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
3441 				STATS_INC(nifs, NETIF_STATS_DROP);
3442 				continue;
3443 			}
3444 
3445 			/* fillout packet */
3446 			rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3447 			VERIFY(rx_buf != NULL);
3448 			rx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
3449 			    kern_buflet_get_data_address(rx_buf),
3450 			    kern_buflet_get_data_limit(rx_buf));
3451 			VERIFY(rx_baddr != NULL);
3452 
3453 			// Find family
3454 			uint32_t af = 0;
3455 			uint8_t vhl = *(uint8_t *)(tx_baddr);
3456 			u_int ip_version = (vhl >> 4);
3457 			switch (ip_version) {
3458 			case 4: {
3459 				af = AF_INET;
3460 				break;
3461 			}
3462 			case 6: {
3463 				af = AF_INET6;
3464 				break;
3465 			}
3466 			default: {
3467 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: unknown ip version %u vhl %u header_size %zu\n",
3468 				    pcb->utun_ifp->if_xname, ip_version, vhl, UTUN_HEADER_SIZE(pcb));
3469 				break;
3470 			}
3471 			}
3472 
3473 			// Copy header
3474 			af = htonl(af);
3475 			memcpy(rx_baddr, &af, sizeof(af));
3476 			if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
3477 				uuid_t uuid;
3478 				kern_packet_get_euuid(tx_ph, uuid);
3479 				memcpy(rx_baddr + sizeof(af), uuid, sizeof(uuid));
3480 			}
3481 
3482 			// Copy data from tx to rx
3483 			memcpy(rx_baddr + UTUN_HEADER_SIZE(pcb), tx_baddr, length - UTUN_HEADER_SIZE(pcb));
3484 			kern_packet_clear_flow_uuid(rx_ph); // zero flow id
3485 
3486 			/* finalize and attach the packet */
3487 			error = kern_buflet_set_data_offset(rx_buf, 0);
3488 			VERIFY(error == 0);
3489 			error = kern_buflet_set_data_length(rx_buf, length);
3490 			VERIFY(error == 0);
3491 			error = kern_packet_finalize(rx_ph);
3492 			VERIFY(error == 0);
3493 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3494 			VERIFY(error == 0);
3495 
3496 			STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
3497 			STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
3498 
3499 			rx_ring_stats.kcrsi_slots_transferred++;
3500 			rx_ring_stats.kcrsi_bytes_transferred += length;
3501 
3502 			rx_pslot = rx_slot;
3503 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3504 		}
3505 
3506 		if (rx_pslot) {
3507 			kern_channel_advance_slot(rx_ring, rx_pslot);
3508 			kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
3509 		}
3510 
3511 		if (tx_chain_ph != 0) {
3512 			kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
3513 		}
3514 
3515 		if (tx_pslot) {
3516 			kern_channel_advance_slot(tx_ring, tx_pslot);
3517 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3518 			(void)kern_channel_reclaim(tx_ring);
3519 		}
3520 
3521 		/* just like utun_ctl_rcvd(), always reenable output */
3522 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
3523 		if (error != 0) {
3524 			os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
3525 		}
3526 
3527 		// Unlock first, then exit ring
3528 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3529 
3530 		if (tx_pslot != NULL) {
3531 			kern_channel_notify(tx_ring, 0);
3532 		}
3533 		kr_exit(tx_ring);
3534 	} else {
3535 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3536 
3537 		uint32_t mb_cnt = 0;
3538 		uint32_t mb_len = 0;
3539 		mbuf_ref_t mb_head = NULL;
3540 		mbuf_ref_t mb_tail = NULL;
3541 
3542 		if (ifnet_dequeue_multi(pcb->utun_ifp, avail, &mb_head,
3543 		    &mb_tail, &mb_cnt, &mb_len) != 0) {
3544 			utun_data_move_end(pcb);
3545 			return 0;
3546 		}
3547 		VERIFY(mb_cnt <= avail);
3548 
3549 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3550 		VERIFY(rx_pp != NULL);
3551 		kern_channel_slot_t rx_pslot = NULL;
3552 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3553 		while (rx_slot) {
3554 			size_t length = 0;
3555 			mbuf_t data = NULL;
3556 			if ((data = mb_head) == NULL) {
3557 				VERIFY(mb_cnt == 0);
3558 				break;
3559 			}
3560 			mb_head = mbuf_nextpkt(mb_head);
3561 			mbuf_setnextpkt(data, NULL);
3562 			VERIFY(mb_cnt != 0);
3563 			--mb_cnt;
3564 			length = mbuf_pkthdr_len(data);
3565 			if (length < UTUN_HEADER_SIZE(pcb) ||
3566 			    length > pcb->utun_slot_size ||
3567 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3568 				/* flush data */
3569 				mbuf_freem(data);
3570 				continue;
3571 			}
3572 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3573 
3574 			// Allocate rx packet
3575 			kern_packet_t rx_ph = 0;
3576 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3577 			if (__improbable(error != 0)) {
3578 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3579 				    pcb->utun_ifp->if_xname);
3580 				break;
3581 			}
3582 
3583 			/*
3584 			 * The ABI requires the protocol in network byte order
3585 			 */
3586 			*mtod(data, uint32_t*) = htonl(*mtod(data, uint32_t *));
3587 
3588 			// Fillout rx packet
3589 			kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3590 			VERIFY(rx_buf != NULL);
3591 			void *rx_baddr = __unsafe_forge_bidi_indexable(void *,
3592 			    kern_buflet_get_data_address(rx_buf),
3593 			    kern_buflet_get_data_limit(rx_buf));
3594 			VERIFY(rx_baddr != NULL);
3595 
3596 			// Copy-in data from mbuf to buflet
3597 			mbuf_copydata(data, 0, length, rx_baddr);
3598 			kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
3599 
3600 			// Finalize and attach the packet
3601 			error = kern_buflet_set_data_offset(rx_buf, 0);
3602 			VERIFY(error == 0);
3603 			error = kern_buflet_set_data_length(rx_buf, length);
3604 			VERIFY(error == 0);
3605 			error = kern_packet_finalize(rx_ph);
3606 			VERIFY(error == 0);
3607 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3608 			VERIFY(error == 0);
3609 
3610 			rx_ring_stats.kcrsi_slots_transferred++;
3611 			rx_ring_stats.kcrsi_bytes_transferred += length;
3612 
3613 			if (!pcb->utun_ext_ifdata_stats) {
3614 				ifnet_stat_increment_out(pcb->utun_ifp, 1, length, 0);
3615 			}
3616 
3617 			mbuf_freem(data);
3618 
3619 			rx_pslot = rx_slot;
3620 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3621 		}
3622 		if (rx_pslot) {
3623 			kern_channel_advance_slot(rx_ring, rx_pslot);
3624 			kern_channel_increment_ring_stats(rx_ring, &rx_ring_stats);
3625 		}
3626 		if (mb_head != NULL) {
3627 			VERIFY(mb_cnt != 0);
3628 			mbuf_freem_list(mb_head);
3629 		}
3630 	}
3631 
3632 	utun_data_move_end(pcb);
3633 	return 0;
3634 }
3635 
3636 #endif // UTUN_NEXUS
3637 
3638 
3639 /*
3640  * These are place holders until coreTLS kext stops calling them
3641  */
3642 errno_t utun_ctl_register_dtls(void *reg);
3643 int utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family);
3644 void utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb);
3645 
3646 errno_t
utun_ctl_register_dtls(void * reg)3647 utun_ctl_register_dtls(void *reg)
3648 {
3649 #pragma unused(reg)
3650 	return 0;
3651 }
3652 
3653 int
utun_pkt_dtls_input(struct utun_pcb * pcb,mbuf_t * pkt,protocol_family_t family)3654 utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family)
3655 {
3656 #pragma unused(pcb)
3657 #pragma unused(pkt)
3658 #pragma unused(family)
3659 	return 0;
3660 }
3661 
3662 void
utun_ctl_disable_crypto_dtls(struct utun_pcb * pcb)3663 utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb)
3664 {
3665 #pragma unused(pcb)
3666 }
3667 
3668 #if UTUN_NEXUS
3669 static boolean_t
utun_data_move_begin(struct utun_pcb * pcb)3670 utun_data_move_begin(struct utun_pcb *pcb)
3671 {
3672 	bool data_path_ready = false;
3673 
3674 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3675 	if ((data_path_ready = UTUN_IS_DATA_PATH_READY(pcb))) {
3676 		pcb->utun_pcb_data_move++;
3677 	}
3678 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3679 
3680 	return data_path_ready;
3681 }
3682 
3683 static void
utun_data_move_end(struct utun_pcb * pcb)3684 utun_data_move_end(struct utun_pcb *pcb)
3685 {
3686 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3687 	VERIFY(pcb->utun_pcb_data_move > 0);
3688 	/*
3689 	 * if there's no more thread moving data, wakeup any
3690 	 * drainers that are blocked waiting for this.
3691 	 */
3692 	if (--pcb->utun_pcb_data_move == 0 && pcb->utun_pcb_drainers > 0) {
3693 		wakeup(&(pcb->utun_pcb_data_move));
3694 	}
3695 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3696 }
3697 
3698 static void
utun_data_move_drain(struct utun_pcb * pcb)3699 utun_data_move_drain(struct utun_pcb *pcb)
3700 {
3701 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3702 	/* data path must already be marked as not ready */
3703 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3704 	pcb->utun_pcb_drainers++;
3705 	while (pcb->utun_pcb_data_move != 0) {
3706 		(void) msleep(&(pcb->utun_pcb_data_move),
3707 		    &pcb->utun_pcb_data_move_lock, (PZERO - 1), __func__, NULL);
3708 	}
3709 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3710 	VERIFY(pcb->utun_pcb_drainers > 0);
3711 	pcb->utun_pcb_drainers--;
3712 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3713 }
3714 
3715 static void
utun_wait_data_move_drain(struct utun_pcb * pcb)3716 utun_wait_data_move_drain(struct utun_pcb *pcb)
3717 {
3718 	/*
3719 	 * Mark the data path as not usable.
3720 	 */
3721 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3722 	UTUN_CLR_DATA_PATH_READY(pcb);
3723 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3724 
3725 	/* Wait until all threads in the data path are done. */
3726 	utun_data_move_drain(pcb);
3727 }
3728 #endif // UTUN_NEXUS
3729