xref: /xnu-11215.41.3/bsd/net/if_utun.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2008-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 
31 /* ----------------------------------------------------------------------------------
32  *   Application of kernel control for interface creation
33  *
34  *   Theory of operation:
35  *   utun (user tunnel) acts as glue between kernel control sockets and network interfaces.
36  *   This kernel control will register an interface for every client that connects.
37  *   ---------------------------------------------------------------------------------- */
38 
39 #include <sys/systm.h>
40 #include <sys/kern_control.h>
41 #include <net/kpi_protocol.h>
42 #include <net/kpi_interface.h>
43 #include <sys/socket.h>
44 #include <net/if.h>
45 #include <net/if_types.h>
46 #include <net/bpf.h>
47 #include <net/if_utun.h>
48 #include <sys/mbuf.h>
49 #include <sys/sockio.h>
50 #include <netinet/in.h>
51 #include <netinet/ip.h>
52 #include <netinet6/in6_var.h>
53 #include <netinet6/in6_var.h>
54 #include <sys/kauth.h>
55 #include <net/necp.h>
56 #include <kern/zalloc.h>
57 
58 #include <net/sockaddr_utils.h>
59 
60 #include <os/log.h>
61 
62 #if SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
63 #include <skywalk/os_skywalk_private.h>
64 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
65 #include <skywalk/nexus/netif/nx_netif.h>
66 #define UTUN_NEXUS 1
67 #else // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
68 #define UTUN_NEXUS 0
69 #endif // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
70 
71 #if UTUN_NEXUS
72 static nexus_controller_t utun_ncd;
73 static int utun_ncd_refcount;
74 static uuid_t utun_kpipe_uuid;
75 static uuid_t utun_nx_dom_prov;
76 
77 typedef struct utun_nx {
78 	uuid_t if_provider;
79 	uuid_t if_instance;
80 	uuid_t fsw_provider;
81 	uuid_t fsw_instance;
82 	uuid_t fsw_device;
83 	uuid_t fsw_agent;
84 } *utun_nx_t;
85 
86 #endif // UTUN_NEXUS
87 
88 /* Control block allocated for each kernel control connection */
89 struct utun_pcb {
90 	TAILQ_ENTRY(utun_pcb)   utun_chain;
91 	kern_ctl_ref    utun_ctlref;
92 	ifnet_t                 utun_ifp;
93 	u_int32_t               utun_unit;
94 	u_int32_t               utun_unique_id;
95 	u_int32_t               utun_flags;
96 	int                     utun_ext_ifdata_stats;
97 	u_int32_t               utun_max_pending_packets;
98 	char                    utun_if_xname[IFXNAMSIZ];
99 	char                    utun_unique_name[IFXNAMSIZ];
100 	// PCB lock protects state fields and rings
101 	decl_lck_rw_data(, utun_pcb_lock);
102 	struct mbuf *   utun_input_chain;
103 	struct mbuf *   utun_input_chain_last;
104 	u_int32_t               utun_input_chain_count;
105 	// Input chain lock protects the list of input mbufs
106 	// The input chain lock must be taken AFTER the PCB lock if both are held
107 	lck_mtx_t               utun_input_chain_lock;
108 
109 #if UTUN_NEXUS
110 	// lock to protect utun_pcb_data_move & utun_pcb_drainers
111 	decl_lck_mtx_data(, utun_pcb_data_move_lock);
112 	u_int32_t               utun_pcb_data_move; /* number of data moving contexts */
113 	u_int32_t               utun_pcb_drainers; /* number of threads waiting to drain */
114 	u_int32_t               utun_pcb_data_path_state; /* internal state of interface data path */
115 
116 	struct utun_nx  utun_nx;
117 	int                     utun_kpipe_enabled;
118 	uuid_t                  utun_kpipe_uuid;
119 	void *                  utun_kpipe_rxring;
120 	void *                  utun_kpipe_txring;
121 	kern_pbufpool_t         utun_kpipe_pp;
122 	u_int32_t               utun_kpipe_tx_ring_size;
123 	u_int32_t               utun_kpipe_rx_ring_size;
124 
125 	kern_nexus_t    utun_netif_nexus;
126 	kern_pbufpool_t         utun_netif_pp;
127 	void *                  utun_netif_rxring;
128 	void *                  utun_netif_txring;
129 	uint64_t                utun_netif_txring_size;
130 
131 	u_int32_t               utun_slot_size;
132 	u_int32_t               utun_netif_ring_size;
133 	u_int32_t               utun_tx_fsw_ring_size;
134 	u_int32_t               utun_rx_fsw_ring_size;
135 	// Auto attach flowswitch when netif is enabled. When set to false,
136 	// it allows userspace nexus controller to attach and own flowswitch.
137 	bool                    utun_attach_fsw;
138 	bool                    utun_netif_connected;
139 	bool                    utun_use_netif;
140 	bool                    utun_needs_netagent;
141 #endif // UTUN_NEXUS
142 };
143 
144 /* Kernel Control functions */
145 static errno_t  utun_ctl_setup(u_int32_t *unit, void **unitinfo);
146 static errno_t  utun_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
147     void **unitinfo);
148 static errno_t  utun_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
149     void **unitinfo);
150 static errno_t  utun_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
151     void *unitinfo);
152 static errno_t  utun_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
153     void *unitinfo, mbuf_t m, int flags);
154 static errno_t  utun_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
155     int opt, void *__sized_by(*len) data, size_t *len);
156 static errno_t  utun_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
157     int opt, void *__sized_by(len) data, size_t len);
158 static void             utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
159     int flags);
160 
161 /* Network Interface functions */
162 static void     utun_start(ifnet_t interface);
163 static errno_t  utun_framer(ifnet_t interface, mbuf_t *packet,
164     const struct sockaddr *dest, const char *desk_linkaddr,
165     const char *frame_type, u_int32_t *prepend_len, u_int32_t *postpend_len);
166 static errno_t  utun_output(ifnet_t interface, mbuf_t data);
167 static errno_t  utun_demux(ifnet_t interface, mbuf_t data, char *frame_header,
168     protocol_family_t *protocol);
169 static errno_t  utun_add_proto(ifnet_t interface, protocol_family_t protocol,
170     const struct ifnet_demux_desc *demux_array,
171     u_int32_t demux_count);
172 static errno_t  utun_del_proto(ifnet_t interface, protocol_family_t protocol);
173 static errno_t  utun_ioctl(ifnet_t interface, u_long cmd, void *data);
174 static void             utun_detached(ifnet_t interface);
175 
176 /* Protocol handlers */
177 static errno_t  utun_attach_proto(ifnet_t interface, protocol_family_t proto);
178 static errno_t  utun_proto_input(ifnet_t interface, protocol_family_t protocol,
179     mbuf_t m, char *frame_header);
180 static errno_t utun_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
181     mbuf_t *packet, const struct sockaddr *dest, void *route,
182     char *frame_type, char *link_layer_dest);
183 static errno_t utun_pkt_input(struct utun_pcb *pcb, mbuf_t m);
184 
185 /* data movement refcounting functions */
186 #if UTUN_NEXUS
187 static boolean_t utun_data_move_begin(struct utun_pcb *pcb);
188 static void utun_data_move_end(struct utun_pcb *pcb);
189 static void utun_wait_data_move_drain(struct utun_pcb *pcb);
190 
191 /* Data path states */
192 #define UTUN_PCB_DATA_PATH_READY    0x1
193 
194 /* Macros to set/clear/test data path states */
195 #define UTUN_SET_DATA_PATH_READY(_pcb) \
196     ((_pcb)->utun_pcb_data_path_state |= UTUN_PCB_DATA_PATH_READY)
197 #define UTUN_CLR_DATA_PATH_READY(_pcb) \
198     ((_pcb)->utun_pcb_data_path_state &= ~UTUN_PCB_DATA_PATH_READY)
199 #define UTUN_IS_DATA_PATH_READY(_pcb) \
200     (((_pcb)->utun_pcb_data_path_state & UTUN_PCB_DATA_PATH_READY) != 0)
201 
202 #define UTUN_IF_DEFAULT_SLOT_SIZE 2048
203 #define UTUN_IF_DEFAULT_RING_SIZE 64
204 #define UTUN_IF_DEFAULT_TX_FSW_RING_SIZE 64
205 #define UTUN_IF_DEFAULT_RX_FSW_RING_SIZE 128
206 #define UTUN_IF_DEFAULT_BUF_SEG_SIZE    skmem_usr_buf_seg_size
207 #define UTUN_IF_HEADROOM_SIZE 32
208 
209 #define UTUN_IF_MIN_RING_SIZE 8
210 #define UTUN_IF_MAX_RING_SIZE 1024
211 
212 #define UTUN_IF_MIN_SLOT_SIZE 1024
213 #define UTUN_IF_MAX_SLOT_SIZE (32 * 1024)
214 
215 #define UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT 512
216 
217 static int if_utun_max_pending_input = UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT;
218 
219 static int sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS;
220 static int sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
221 static int sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
222 
223 static int if_utun_ring_size = UTUN_IF_DEFAULT_RING_SIZE;
224 static int if_utun_tx_fsw_ring_size = UTUN_IF_DEFAULT_TX_FSW_RING_SIZE;
225 static int if_utun_rx_fsw_ring_size = UTUN_IF_DEFAULT_RX_FSW_RING_SIZE;
226 
227 SYSCTL_DECL(_net_utun);
228 SYSCTL_NODE(_net, OID_AUTO, utun, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "UTun");
229 
230 SYSCTL_INT(_net_utun, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_utun_max_pending_input, 0, "");
231 SYSCTL_PROC(_net_utun, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
232     &if_utun_ring_size, UTUN_IF_DEFAULT_RING_SIZE, &sysctl_if_utun_ring_size, "I", "");
233 SYSCTL_PROC(_net_utun, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
234     &if_utun_tx_fsw_ring_size, UTUN_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_utun_tx_fsw_ring_size, "I", "");
235 SYSCTL_PROC(_net_utun, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
236     &if_utun_rx_fsw_ring_size, UTUN_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_utun_rx_fsw_ring_size, "I", "");
237 
238 static errno_t
239 utun_register_nexus(void);
240 
241 static errno_t
242 utun_netif_prepare(__unused kern_nexus_t nexus, ifnet_t ifp);
243 static errno_t
244 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
245     proc_t p, kern_nexus_t nexus,
246     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx);
247 static errno_t
248 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
249     kern_channel_t channel);
250 static void
251 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
252     kern_channel_t channel);
253 static void
254 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
255     kern_channel_t channel);
256 static void
257 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
258     kern_channel_t channel);
259 static errno_t
260 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
261     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
262     void **ring_ctx);
263 static void
264 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
265     kern_channel_ring_t ring);
266 static errno_t
267 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
268     kern_channel_ring_t ring, uint32_t flags);
269 static errno_t
270 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
271     kern_channel_ring_t ring, uint32_t flags);
272 #endif // UTUN_NEXUS
273 
274 #define UTUN_DEFAULT_MTU 1500
275 #define UTUN_HEADER_SIZE(_pcb) (sizeof(u_int32_t) + (((_pcb)->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) ? sizeof(uuid_t) : 0))
276 
277 static kern_ctl_ref     utun_kctlref;
278 static LCK_ATTR_DECLARE(utun_lck_attr, 0, 0);
279 static LCK_GRP_DECLARE(utun_lck_grp, "utun");
280 static LCK_MTX_DECLARE_ATTR(utun_lock, &utun_lck_grp, &utun_lck_attr);
281 
282 TAILQ_HEAD(utun_list, utun_pcb) utun_head;
283 
284 static KALLOC_TYPE_DEFINE(utun_pcb_zone, struct utun_pcb, NET_KT_DEFAULT);
285 
286 #if UTUN_NEXUS
287 
288 static int
289 sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS
290 {
291 #pragma unused(arg1, arg2)
292 	int value = if_utun_ring_size;
293 
294 	int error = sysctl_handle_int(oidp, &value, 0, req);
295 	if (error || !req->newptr) {
296 		return error;
297 	}
298 
299 	if (value < UTUN_IF_MIN_RING_SIZE ||
300 	    value > UTUN_IF_MAX_RING_SIZE) {
301 		return EINVAL;
302 	}
303 
304 	if_utun_ring_size = value;
305 
306 	return 0;
307 }
308 
309 static int
310 sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
311 {
312 #pragma unused(arg1, arg2)
313 	int value = if_utun_tx_fsw_ring_size;
314 
315 	int error = sysctl_handle_int(oidp, &value, 0, req);
316 	if (error || !req->newptr) {
317 		return error;
318 	}
319 
320 	if (value < UTUN_IF_MIN_RING_SIZE ||
321 	    value > UTUN_IF_MAX_RING_SIZE) {
322 		return EINVAL;
323 	}
324 
325 	if_utun_tx_fsw_ring_size = value;
326 
327 	return 0;
328 }
329 
330 static int
331 sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
332 {
333 #pragma unused(arg1, arg2)
334 	int value = if_utun_rx_fsw_ring_size;
335 
336 	int error = sysctl_handle_int(oidp, &value, 0, req);
337 	if (error || !req->newptr) {
338 		return error;
339 	}
340 
341 	if (value < UTUN_IF_MIN_RING_SIZE ||
342 	    value > UTUN_IF_MAX_RING_SIZE) {
343 		return EINVAL;
344 	}
345 
346 	if_utun_rx_fsw_ring_size = value;
347 
348 	return 0;
349 }
350 
351 static errno_t
utun_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)352 utun_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
353     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
354     void **ring_ctx)
355 {
356 #pragma unused(nxprov)
357 #pragma unused(channel)
358 #pragma unused(ring_ctx)
359 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
360 	if (!is_tx_ring) {
361 		VERIFY(pcb->utun_netif_rxring == NULL);
362 		pcb->utun_netif_rxring = ring;
363 	} else {
364 		VERIFY(pcb->utun_netif_txring == NULL);
365 		pcb->utun_netif_txring = ring;
366 	}
367 	return 0;
368 }
369 
370 static void
utun_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)371 utun_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
372     kern_channel_ring_t ring)
373 {
374 #pragma unused(nxprov)
375 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
376 	if (pcb->utun_netif_rxring == ring) {
377 		pcb->utun_netif_rxring = NULL;
378 	} else if (pcb->utun_netif_txring == ring) {
379 		pcb->utun_netif_txring = NULL;
380 	}
381 }
382 
383 static errno_t
utun_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)384 utun_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
385     kern_channel_ring_t tx_ring, uint32_t flags)
386 {
387 #pragma unused(nxprov)
388 #pragma unused(flags)
389 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
390 
391 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
392 
393 	if (!utun_data_move_begin(pcb)) {
394 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
395 		    __func__, if_name(pcb->utun_ifp));
396 		return 0;
397 	}
398 
399 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
400 
401 	struct kern_channel_ring_stat_increment tx_ring_stats;
402 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
403 	kern_channel_slot_t tx_pslot = NULL;
404 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
405 	kern_packet_t tx_chain_ph = 0;
406 
407 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
408 
409 	if (tx_slot == NULL) {
410 		// Nothing to write, don't bother signalling
411 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
412 		utun_data_move_end(pcb);
413 		return 0;
414 	}
415 
416 	if (pcb->utun_kpipe_enabled) {
417 		kern_channel_ring_t __single rx_ring = pcb->utun_kpipe_rxring;
418 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
419 
420 		// Signal the kernel pipe ring to read
421 		if (rx_ring != NULL) {
422 			kern_channel_notify(rx_ring, 0);
423 		}
424 		utun_data_move_end(pcb);
425 		return 0;
426 	}
427 
428 	// If we're here, we're injecting into the utun kernel control socket
429 	while (tx_slot != NULL) {
430 		size_t length = 0;
431 		mbuf_ref_t data = NULL;
432 
433 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
434 
435 		if (tx_ph == 0) {
436 			// Advance TX ring
437 			tx_pslot = tx_slot;
438 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
439 			continue;
440 		}
441 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
442 		if (tx_chain_ph != 0) {
443 			kern_packet_append(tx_ph, tx_chain_ph);
444 		}
445 		tx_chain_ph = tx_ph;
446 
447 		// Advance TX ring
448 		tx_pslot = tx_slot;
449 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
450 
451 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
452 		VERIFY(tx_buf != NULL);
453 
454 		bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
455 
456 		uint32_t tx_offset = kern_buflet_get_data_offset(tx_buf);
457 		uint32_t tx_length = kern_buflet_get_data_length(tx_buf);
458 		/* tx_baddr is the absolute buffer address */
459 		uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
460 		    kern_buflet_get_data_address(tx_buf),
461 		    kern_buflet_get_data_limit(tx_buf));
462 		VERIFY(tx_baddr != 0);
463 
464 		// The offset must be large enough for the headers
465 		VERIFY(tx_offset >= UTUN_HEADER_SIZE(pcb));
466 
467 		// Find family
468 		uint32_t af = 0;
469 		uint8_t vhl = *(uint8_t *)(tx_baddr + tx_offset);
470 		u_int ip_version = (vhl >> 4);
471 		switch (ip_version) {
472 		case 4: {
473 			af = AF_INET;
474 			break;
475 		}
476 		case 6: {
477 			af = AF_INET6;
478 			break;
479 		}
480 		default: {
481 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s: unknown ip version %u vhl %u tx_offset %u len %u header_size %zu\n",
482 			    pcb->utun_ifp->if_xname, ip_version, vhl, tx_offset, tx_length,
483 			    UTUN_HEADER_SIZE(pcb));
484 			break;
485 		}
486 		}
487 
488 		tx_offset -= UTUN_HEADER_SIZE(pcb);
489 		tx_length += UTUN_HEADER_SIZE(pcb);
490 		tx_baddr += tx_offset;
491 
492 		length = MIN(tx_length, pcb->utun_slot_size);
493 
494 		// Copy in family
495 		memcpy(tx_baddr, &af, sizeof(af));
496 		if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
497 			kern_packet_get_euuid(tx_ph, (void *)(tx_baddr + sizeof(af)));
498 		}
499 
500 		if (length > 0) {
501 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
502 			if (error == 0) {
503 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
504 				if (error == 0) {
505 					error = utun_output(pcb->utun_ifp, data);
506 					if (error != 0) {
507 						os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - utun_output error %d\n", pcb->utun_ifp->if_xname, error);
508 					}
509 				} else {
510 					os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->utun_ifp->if_xname, length, error);
511 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
512 					STATS_INC(nifs, NETIF_STATS_DROP);
513 					mbuf_freem(data);
514 					data = NULL;
515 				}
516 			} else {
517 				os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->utun_ifp->if_xname, error);
518 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
519 				STATS_INC(nifs, NETIF_STATS_DROP);
520 			}
521 		} else {
522 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - 0 length packet\n", pcb->utun_ifp->if_xname);
523 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
524 			STATS_INC(nifs, NETIF_STATS_DROP);
525 		}
526 
527 		if (data == NULL) {
528 			continue;
529 		}
530 
531 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
532 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
533 
534 		tx_ring_stats.kcrsi_slots_transferred++;
535 		tx_ring_stats.kcrsi_bytes_transferred += length;
536 	}
537 	if (tx_chain_ph != 0) {
538 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
539 	}
540 	if (tx_pslot) {
541 		kern_channel_advance_slot(tx_ring, tx_pslot);
542 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
543 		(void)kern_channel_reclaim(tx_ring);
544 	}
545 
546 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
547 	utun_data_move_end(pcb);
548 	return 0;
549 }
550 
551 static errno_t
utun_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)552 utun_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
553     kern_channel_ring_t ring, __unused uint32_t flags)
554 {
555 #pragma unused(nxprov)
556 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
557 	boolean_t more = false;
558 	errno_t rc = 0;
559 
560 	if (!utun_data_move_begin(pcb)) {
561 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
562 		    __func__, if_name(pcb->utun_ifp));
563 		return 0;
564 	}
565 
566 	/*
567 	 * Refill and sync the ring; we may be racing against another thread doing
568 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
569 	 * variant here.
570 	 */
571 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
572 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
573 		os_log_error(OS_LOG_DEFAULT, "%s, tx refill failed %d\n", __func__, rc);
574 	}
575 
576 	(void) kr_enter(ring, TRUE);
577 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
578 
579 	if (pcb->utun_kpipe_enabled) {
580 		uint32_t tx_available = kern_channel_available_slot_count(ring);
581 		if (pcb->utun_netif_txring_size > 0 &&
582 		    tx_available >= pcb->utun_netif_txring_size - 1) {
583 			// No room left in tx ring, disable output for now
584 			errno_t error = ifnet_disable_output(pcb->utun_ifp);
585 			if (error != 0) {
586 				os_log_error(OS_LOG_DEFAULT, "utun_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
587 			}
588 		}
589 	}
590 
591 	if (pcb->utun_kpipe_enabled) {
592 		kern_channel_ring_t __single rx_ring = pcb->utun_kpipe_rxring;
593 
594 		// Unlock while calling notify
595 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
596 		// Signal the kernel pipe ring to read
597 		if (rx_ring != NULL) {
598 			kern_channel_notify(rx_ring, 0);
599 		}
600 	} else {
601 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
602 	}
603 
604 	kr_exit(ring);
605 	utun_data_move_end(pcb);
606 	return 0;
607 }
608 
609 static errno_t
utun_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)610 utun_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
611     kern_channel_ring_t rx_ring, uint32_t flags)
612 {
613 #pragma unused(nxprov)
614 #pragma unused(flags)
615 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
616 	struct kern_channel_ring_stat_increment rx_ring_stats;
617 
618 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
619 
620 	if (!utun_data_move_begin(pcb)) {
621 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
622 		    __func__, if_name(pcb->utun_ifp));
623 		return 0;
624 	}
625 
626 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
627 
628 	// Reclaim user-released slots
629 	(void) kern_channel_reclaim(rx_ring);
630 
631 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
632 
633 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
634 	if (avail == 0) {
635 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
636 		utun_data_move_end(pcb);
637 		return 0;
638 	}
639 
640 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
641 	VERIFY(rx_pp != NULL);
642 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
643 	kern_channel_slot_t rx_pslot = NULL;
644 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
645 
646 	while (rx_slot != NULL) {
647 		// Check for a waiting packet
648 		lck_mtx_lock(&pcb->utun_input_chain_lock);
649 		mbuf_t data = pcb->utun_input_chain;
650 		if (data == NULL) {
651 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
652 			break;
653 		}
654 
655 		// Allocate rx packet
656 		kern_packet_t rx_ph = 0;
657 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
658 		if (__improbable(error != 0)) {
659 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
660 			STATS_INC(nifs, NETIF_STATS_DROP);
661 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
662 			break;
663 		}
664 
665 		// Advance waiting packets
666 		if (pcb->utun_input_chain_count > 0) {
667 			pcb->utun_input_chain_count--;
668 		}
669 		pcb->utun_input_chain = data->m_nextpkt;
670 		data->m_nextpkt = NULL;
671 		if (pcb->utun_input_chain == NULL) {
672 			pcb->utun_input_chain_last = NULL;
673 		}
674 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
675 
676 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
677 		size_t length = mbuf_pkthdr_len(data);
678 
679 		if (length < header_offset) {
680 			// mbuf is too small
681 			mbuf_freem(data);
682 			kern_pbufpool_free(rx_pp, rx_ph);
683 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
684 			STATS_INC(nifs, NETIF_STATS_DROP);
685 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length too short for header %zu < %zu\n",
686 			    pcb->utun_ifp->if_xname, length, header_offset);
687 			continue;
688 		}
689 
690 		length -= header_offset;
691 		if (length > PP_BUF_SIZE_DEF(rx_pp)) {
692 			// Flush data
693 			mbuf_freem(data);
694 			kern_pbufpool_free(rx_pp, rx_ph);
695 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
696 			STATS_INC(nifs, NETIF_STATS_DROP);
697 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length %zu > %u\n",
698 			    pcb->utun_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
699 			continue;
700 		}
701 
702 		mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
703 
704 		// Fillout rx packet
705 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
706 		VERIFY(rx_buf != NULL);
707 		void *__single rx_baddr = kern_buflet_get_data_address(rx_buf);
708 		VERIFY(rx_baddr != NULL);
709 
710 		// Copy-in data from mbuf to buflet
711 		mbuf_copydata(data, header_offset, length, (void *)rx_baddr);
712 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
713 
714 		// Finalize and attach the packet
715 		error = kern_buflet_set_data_offset(rx_buf, 0);
716 		VERIFY(error == 0);
717 		error = kern_buflet_set_data_length(rx_buf, length);
718 		VERIFY(error == 0);
719 		error = kern_packet_set_headroom(rx_ph, 0);
720 		VERIFY(error == 0);
721 		error = kern_packet_finalize(rx_ph);
722 		VERIFY(error == 0);
723 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
724 		VERIFY(error == 0);
725 
726 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
727 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
728 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
729 
730 		rx_ring_stats.kcrsi_slots_transferred++;
731 		rx_ring_stats.kcrsi_bytes_transferred += length;
732 
733 		mbuf_freem(data);
734 
735 		// Advance ring
736 		rx_pslot = rx_slot;
737 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
738 	}
739 
740 	struct kern_channel_ring_stat_increment tx_ring_stats;
741 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
742 	kern_channel_ring_t __single tx_ring = pcb->utun_kpipe_txring;
743 	kern_channel_slot_t tx_pslot = NULL;
744 	kern_channel_slot_t tx_slot = NULL;
745 	if (tx_ring == NULL) {
746 		// Net-If TX ring not set up yet, nothing to read
747 		goto done;
748 	}
749 	// Unlock utun before entering ring
750 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
751 
752 	(void)kr_enter(tx_ring, TRUE);
753 
754 	// Lock again after entering and validate
755 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
756 	if (tx_ring != pcb->utun_kpipe_txring) {
757 		goto done;
758 	}
759 
760 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
761 	if (tx_slot == NULL) {
762 		// Nothing to read, don't bother signalling
763 		goto done;
764 	}
765 
766 	while (rx_slot != NULL && tx_slot != NULL) {
767 		// Allocate rx packet
768 		kern_packet_t rx_ph = 0;
769 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
770 
771 		// Advance TX ring
772 		tx_pslot = tx_slot;
773 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
774 
775 		/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
776 		if (tx_ph == 0) {
777 			continue;
778 		}
779 
780 		/* XXX We could try this alloc before advancing the slot to avoid
781 		 * dropping the packet on failure to allocate.
782 		 */
783 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
784 		if (__improbable(error != 0)) {
785 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
786 			STATS_INC(nifs, NETIF_STATS_DROP);
787 			break;
788 		}
789 
790 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
791 		VERIFY(tx_buf != NULL);
792 		uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
793 		    kern_buflet_get_data_address(tx_buf),
794 		    kern_buflet_get_data_limit(tx_buf));
795 		VERIFY(tx_baddr != 0);
796 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
797 
798 		// Check packet length
799 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
800 		uint32_t tx_length = kern_packet_get_data_length(tx_ph);
801 		if (tx_length < header_offset) {
802 			// Packet is too small
803 			kern_pbufpool_free(rx_pp, rx_ph);
804 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
805 			STATS_INC(nifs, NETIF_STATS_DROP);
806 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: packet length too short for header %u < %zu\n",
807 			    pcb->utun_ifp->if_xname, tx_length, header_offset);
808 			continue;
809 		}
810 
811 		size_t length = MIN(tx_length - header_offset,
812 		    pcb->utun_slot_size);
813 
814 		tx_ring_stats.kcrsi_slots_transferred++;
815 		tx_ring_stats.kcrsi_bytes_transferred += length;
816 
817 		// Fillout rx packet
818 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
819 		VERIFY(rx_buf != NULL);
820 		void *rx_baddr = __unsafe_forge_bidi_indexable(void *,
821 		    kern_buflet_get_data_address(rx_buf),
822 		    kern_buflet_get_data_limit(rx_buf));
823 		VERIFY(rx_baddr != NULL);
824 
825 		// Copy-in data from tx to rx
826 		memcpy((void *)rx_baddr, (void *)(tx_baddr + header_offset), length);
827 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
828 
829 		// Finalize and attach the packet
830 		error = kern_buflet_set_data_offset(rx_buf, 0);
831 		VERIFY(error == 0);
832 		error = kern_buflet_set_data_length(rx_buf, length);
833 		VERIFY(error == 0);
834 		error = kern_packet_set_headroom(rx_ph, 0);
835 		VERIFY(error == 0);
836 		error = kern_packet_finalize(rx_ph);
837 		VERIFY(error == 0);
838 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
839 		VERIFY(error == 0);
840 
841 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
842 		STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
843 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
844 
845 		rx_ring_stats.kcrsi_slots_transferred++;
846 		rx_ring_stats.kcrsi_bytes_transferred += length;
847 
848 		rx_pslot = rx_slot;
849 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
850 	}
851 
852 done:
853 	if (rx_pslot) {
854 		kern_channel_advance_slot(rx_ring, rx_pslot);
855 		kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
856 	}
857 
858 	if (tx_pslot) {
859 		kern_channel_advance_slot(tx_ring, tx_pslot);
860 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
861 		(void)kern_channel_reclaim(tx_ring);
862 	}
863 
864 	// Unlock first, then exit ring
865 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
866 	if (tx_ring != NULL) {
867 		if (tx_pslot != NULL) {
868 			kern_channel_notify(tx_ring, 0);
869 		}
870 		kr_exit(tx_ring);
871 	}
872 
873 	utun_data_move_end(pcb);
874 	return 0;
875 }
876 
877 static errno_t
utun_nexus_ifattach(struct utun_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)878 utun_nexus_ifattach(struct utun_pcb *pcb,
879     struct ifnet_init_eparams *init_params,
880     struct ifnet **ifp)
881 {
882 	errno_t err;
883 	nexus_controller_t controller = kern_nexus_shared_controller();
884 	struct kern_nexus_net_init net_init;
885 	struct kern_pbufpool_init pp_init;
886 
887 	nexus_name_t provider_name;
888 	snprintf((char *)provider_name, sizeof(provider_name),
889 	    "com.apple.netif.%s", pcb->utun_if_xname);
890 
891 	struct kern_nexus_provider_init prov_init = {
892 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
893 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
894 		.nxpi_pre_connect = utun_nexus_pre_connect,
895 		.nxpi_connected = utun_nexus_connected,
896 		.nxpi_pre_disconnect = utun_netif_pre_disconnect,
897 		.nxpi_disconnected = utun_nexus_disconnected,
898 		.nxpi_ring_init = utun_netif_ring_init,
899 		.nxpi_ring_fini = utun_netif_ring_fini,
900 		.nxpi_slot_init = NULL,
901 		.nxpi_slot_fini = NULL,
902 		.nxpi_sync_tx = utun_netif_sync_tx,
903 		.nxpi_sync_rx = utun_netif_sync_rx,
904 		.nxpi_tx_doorbell = utun_netif_tx_doorbell,
905 	};
906 
907 	nexus_attr_t __single nxa = NULL;
908 	err = kern_nexus_attr_create(&nxa);
909 	if (err != 0) {
910 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
911 		    __func__, err);
912 		goto failed;
913 	}
914 
915 	uint64_t slot_buffer_size = pcb->utun_slot_size;
916 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
917 	VERIFY(err == 0);
918 
919 	// Reset ring size for netif nexus to limit memory usage
920 	uint64_t ring_size = pcb->utun_netif_ring_size;
921 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
922 	VERIFY(err == 0);
923 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
924 	VERIFY(err == 0);
925 
926 	pcb->utun_netif_txring_size = ring_size;
927 
928 	bzero(&pp_init, sizeof(pp_init));
929 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
930 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
931 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
932 	pp_init.kbi_bufsize = pcb->utun_slot_size;
933 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
934 	pp_init.kbi_max_frags = 1;
935 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
936 	    "%s", provider_name);
937 	pp_init.kbi_ctx = NULL;
938 	pp_init.kbi_ctx_retain = NULL;
939 	pp_init.kbi_ctx_release = NULL;
940 
941 	err = kern_pbufpool_create(&pp_init, &pcb->utun_netif_pp, NULL);
942 	if (err != 0) {
943 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
944 		goto failed;
945 	}
946 
947 	err = kern_nexus_controller_register_provider(controller,
948 	    utun_nx_dom_prov,
949 	    provider_name,
950 	    &prov_init,
951 	    sizeof(prov_init),
952 	    nxa,
953 	    &pcb->utun_nx.if_provider);
954 	if (err != 0) {
955 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
956 		    __func__, err);
957 		goto failed;
958 	}
959 
960 	bzero(&net_init, sizeof(net_init));
961 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
962 	net_init.nxneti_flags = 0;
963 	net_init.nxneti_eparams = init_params;
964 	net_init.nxneti_lladdr = NULL;
965 	net_init.nxneti_prepare = utun_netif_prepare;
966 	net_init.nxneti_rx_pbufpool = pcb->utun_netif_pp;
967 	net_init.nxneti_tx_pbufpool = pcb->utun_netif_pp;
968 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
969 	    pcb->utun_nx.if_provider,
970 	    pcb,
971 	    NULL,
972 	    &pcb->utun_nx.if_instance,
973 	    &net_init,
974 	    ifp);
975 	if (err != 0) {
976 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
977 		    __func__, err);
978 		kern_nexus_controller_deregister_provider(controller,
979 		    pcb->utun_nx.if_provider);
980 		uuid_clear(pcb->utun_nx.if_provider);
981 		goto failed;
982 	}
983 
984 failed:
985 	if (nxa) {
986 		kern_nexus_attr_destroy(nxa);
987 	}
988 	if (err && pcb->utun_netif_pp != NULL) {
989 		kern_pbufpool_destroy(pcb->utun_netif_pp);
990 		pcb->utun_netif_pp = NULL;
991 	}
992 	return err;
993 }
994 
995 static void
utun_detach_provider_and_instance(uuid_t provider,uuid_t instance)996 utun_detach_provider_and_instance(uuid_t provider, uuid_t instance)
997 {
998 	nexus_controller_t controller = kern_nexus_shared_controller();
999 	errno_t err;
1000 
1001 	if (!uuid_is_null(instance)) {
1002 		err = kern_nexus_controller_free_provider_instance(controller,
1003 		    instance);
1004 		if (err != 0) {
1005 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
1006 			    __func__, err);
1007 		}
1008 		uuid_clear(instance);
1009 	}
1010 	if (!uuid_is_null(provider)) {
1011 		err = kern_nexus_controller_deregister_provider(controller,
1012 		    provider);
1013 		if (err != 0) {
1014 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
1015 		}
1016 		uuid_clear(provider);
1017 	}
1018 	return;
1019 }
1020 
1021 static void
utun_nexus_detach(struct utun_pcb * pcb)1022 utun_nexus_detach(struct utun_pcb *pcb)
1023 {
1024 	utun_nx_t nx = &pcb->utun_nx;
1025 	nexus_controller_t controller = kern_nexus_shared_controller();
1026 	errno_t err;
1027 
1028 	if (!uuid_is_null(nx->fsw_device)) {
1029 		err = kern_nexus_ifdetach(controller,
1030 		    nx->fsw_instance,
1031 		    nx->fsw_device);
1032 		if (err != 0) {
1033 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
1034 			    __func__, err);
1035 		}
1036 	}
1037 
1038 	utun_detach_provider_and_instance(nx->fsw_provider,
1039 	    nx->fsw_instance);
1040 	utun_detach_provider_and_instance(nx->if_provider,
1041 	    nx->if_instance);
1042 
1043 	if (pcb->utun_netif_pp != NULL) {
1044 		kern_pbufpool_destroy(pcb->utun_netif_pp);
1045 		pcb->utun_netif_pp = NULL;
1046 	}
1047 	memset(nx, 0, sizeof(*nx));
1048 }
1049 
1050 static errno_t
utun_create_fs_provider_and_instance(struct utun_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)1051 utun_create_fs_provider_and_instance(struct utun_pcb *pcb,
1052     const char *type_name,
1053     const char *ifname,
1054     uuid_t *provider, uuid_t *instance)
1055 {
1056 	nexus_attr_t __single attr = NULL;
1057 	nexus_controller_t controller = kern_nexus_shared_controller();
1058 	uuid_t dom_prov;
1059 	errno_t err;
1060 	struct kern_nexus_init init;
1061 	nexus_name_t    provider_name;
1062 
1063 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1064 	    &dom_prov);
1065 	if (err != 0) {
1066 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
1067 		    __func__, type_name, err);
1068 		goto failed;
1069 	}
1070 
1071 	err = kern_nexus_attr_create(&attr);
1072 	if (err != 0) {
1073 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1074 		    __func__, err);
1075 		goto failed;
1076 	}
1077 
1078 	uint64_t slot_buffer_size = pcb->utun_slot_size;
1079 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1080 	VERIFY(err == 0);
1081 
1082 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1083 	uint64_t tx_ring_size = pcb->utun_tx_fsw_ring_size;
1084 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1085 	VERIFY(err == 0);
1086 	uint64_t rx_ring_size = pcb->utun_rx_fsw_ring_size;
1087 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1088 	VERIFY(err == 0);
1089 	/*
1090 	 * Configure flowswitch to use super-packet (multi-buflet).
1091 	 * This allows flowswitch to perform intra-stack packet aggregation.
1092 	 */
1093 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
1094 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
1095 	VERIFY(err == 0);
1096 
1097 	snprintf((char *)provider_name, sizeof(provider_name),
1098 	    "com.apple.%s.%s", type_name, ifname);
1099 	err = kern_nexus_controller_register_provider(controller,
1100 	    dom_prov,
1101 	    provider_name,
1102 	    NULL,
1103 	    0,
1104 	    attr,
1105 	    provider);
1106 	kern_nexus_attr_destroy(attr);
1107 	attr = NULL;
1108 	if (err != 0) {
1109 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
1110 		    __func__, type_name, err);
1111 		goto failed;
1112 	}
1113 	bzero(&init, sizeof(init));
1114 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1115 	err = kern_nexus_controller_alloc_provider_instance(controller,
1116 	    *provider,
1117 	    NULL, NULL,
1118 	    instance, &init);
1119 	if (err != 0) {
1120 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
1121 		    __func__, type_name, err);
1122 		kern_nexus_controller_deregister_provider(controller,
1123 		    *provider);
1124 		uuid_clear(*provider);
1125 	}
1126 failed:
1127 	return err;
1128 }
1129 
1130 static errno_t
utun_flowswitch_attach(struct utun_pcb * pcb)1131 utun_flowswitch_attach(struct utun_pcb *pcb)
1132 {
1133 	nexus_controller_t controller = kern_nexus_shared_controller();
1134 	errno_t err = 0;
1135 	utun_nx_t nx = &pcb->utun_nx;
1136 
1137 	// Allocate flowswitch
1138 	err = utun_create_fs_provider_and_instance(pcb,
1139 	    "flowswitch",
1140 	    pcb->utun_ifp->if_xname,
1141 	    &nx->fsw_provider,
1142 	    &nx->fsw_instance);
1143 	if (err != 0) {
1144 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
1145 		    __func__);
1146 		goto failed;
1147 	}
1148 
1149 	// Attach flowswitch to device port
1150 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
1151 	    NULL, nx->if_instance,
1152 	    FALSE, &nx->fsw_device);
1153 	if (err != 0) {
1154 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
1155 		goto failed;
1156 	}
1157 
1158 	// Extract the agent UUID and save for later
1159 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
1160 	if (flowswitch_nx != NULL) {
1161 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
1162 		if (flowswitch != NULL) {
1163 			FSW_RLOCK(flowswitch);
1164 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
1165 			FSW_UNLOCK(flowswitch);
1166 		} else {
1167 			os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - flowswitch is NULL\n");
1168 		}
1169 		nx_release(flowswitch_nx);
1170 	} else {
1171 		os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - unable to find flowswitch nexus\n");
1172 	}
1173 
1174 	return 0;
1175 
1176 failed:
1177 	utun_nexus_detach(pcb);
1178 
1179 	errno_t detach_error = 0;
1180 	if ((detach_error = ifnet_detach(pcb->utun_ifp)) != 0) {
1181 		panic("utun_flowswitch_attach - ifnet_detach failed: %d", detach_error);
1182 		/* NOT REACHED */
1183 	}
1184 
1185 	return err;
1186 }
1187 
1188 static errno_t
utun_register_kernel_pipe_nexus(struct utun_pcb * pcb)1189 utun_register_kernel_pipe_nexus(struct utun_pcb *pcb)
1190 {
1191 	nexus_attr_t __single nxa = NULL;
1192 	errno_t result;
1193 
1194 	lck_mtx_lock(&utun_lock);
1195 	if (utun_ncd_refcount++) {
1196 		lck_mtx_unlock(&utun_lock);
1197 		return 0;
1198 	}
1199 
1200 	result = kern_nexus_controller_create(&utun_ncd);
1201 	if (result) {
1202 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
1203 		    __FUNCTION__, result);
1204 		goto done;
1205 	}
1206 
1207 	uuid_t dom_prov;
1208 	result = kern_nexus_get_default_domain_provider(
1209 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1210 	if (result) {
1211 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
1212 		    __FUNCTION__, result);
1213 		goto done;
1214 	}
1215 
1216 	struct kern_nexus_provider_init prov_init = {
1217 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1218 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1219 		.nxpi_pre_connect = utun_nexus_pre_connect,
1220 		.nxpi_connected = utun_nexus_connected,
1221 		.nxpi_pre_disconnect = utun_nexus_pre_disconnect,
1222 		.nxpi_disconnected = utun_nexus_disconnected,
1223 		.nxpi_ring_init = utun_kpipe_ring_init,
1224 		.nxpi_ring_fini = utun_kpipe_ring_fini,
1225 		.nxpi_slot_init = NULL,
1226 		.nxpi_slot_fini = NULL,
1227 		.nxpi_sync_tx = utun_kpipe_sync_tx,
1228 		.nxpi_sync_rx = utun_kpipe_sync_rx,
1229 		.nxpi_tx_doorbell = NULL,
1230 	};
1231 
1232 	result = kern_nexus_attr_create(&nxa);
1233 	if (result) {
1234 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1235 		    __FUNCTION__, result);
1236 		goto done;
1237 	}
1238 
1239 	uint64_t slot_buffer_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1240 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1241 	VERIFY(result == 0);
1242 
1243 	// Reset ring size for kernel pipe nexus to limit memory usage
1244 	uint64_t ring_size =
1245 	    pcb->utun_kpipe_tx_ring_size != 0 ? pcb->utun_kpipe_tx_ring_size :
1246 	    if_utun_ring_size;
1247 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1248 	VERIFY(result == 0);
1249 
1250 	ring_size =
1251 	    pcb->utun_kpipe_rx_ring_size != 0 ? pcb->utun_kpipe_rx_ring_size :
1252 	    if_utun_ring_size;
1253 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1254 	VERIFY(result == 0);
1255 
1256 	nexus_domain_provider_name_t domain_provider_name = "com.apple.nexus.utun.kpipe";
1257 
1258 	result = kern_nexus_controller_register_provider(utun_ncd,
1259 	    dom_prov,
1260 	    domain_provider_name,
1261 	    &prov_init,
1262 	    sizeof(prov_init),
1263 	    nxa,
1264 	    &utun_kpipe_uuid);
1265 	if (result) {
1266 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
1267 		    __FUNCTION__, result);
1268 		goto done;
1269 	}
1270 
1271 done:
1272 	if (nxa) {
1273 		kern_nexus_attr_destroy(nxa);
1274 	}
1275 
1276 	if (result) {
1277 		if (utun_ncd) {
1278 			kern_nexus_controller_destroy(utun_ncd);
1279 			utun_ncd = NULL;
1280 		}
1281 		utun_ncd_refcount = 0;
1282 	}
1283 
1284 	lck_mtx_unlock(&utun_lock);
1285 
1286 	return result;
1287 }
1288 
1289 static void
utun_unregister_kernel_pipe_nexus(void)1290 utun_unregister_kernel_pipe_nexus(void)
1291 {
1292 	lck_mtx_lock(&utun_lock);
1293 
1294 	VERIFY(utun_ncd_refcount > 0);
1295 
1296 	if (--utun_ncd_refcount == 0) {
1297 		kern_nexus_controller_destroy(utun_ncd);
1298 		utun_ncd = NULL;
1299 	}
1300 
1301 	lck_mtx_unlock(&utun_lock);
1302 }
1303 
1304 // For use by socket option, not internally
1305 static errno_t
utun_disable_channel(struct utun_pcb * pcb)1306 utun_disable_channel(struct utun_pcb *pcb)
1307 {
1308 	errno_t result;
1309 	int enabled;
1310 	uuid_t uuid;
1311 
1312 	/* Wait until all threads in the data paths are done. */
1313 	utun_wait_data_move_drain(pcb);
1314 
1315 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1316 
1317 	enabled = pcb->utun_kpipe_enabled;
1318 	uuid_copy(uuid, pcb->utun_kpipe_uuid);
1319 
1320 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid) == !enabled);
1321 
1322 	pcb->utun_kpipe_enabled = 0;
1323 	uuid_clear(pcb->utun_kpipe_uuid);
1324 
1325 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1326 
1327 	if (enabled) {
1328 		result = kern_nexus_controller_free_provider_instance(utun_ncd, uuid);
1329 	} else {
1330 		result = ENXIO;
1331 	}
1332 
1333 	if (!result) {
1334 		if (pcb->utun_kpipe_pp != NULL) {
1335 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1336 			pcb->utun_kpipe_pp = NULL;
1337 		}
1338 		utun_unregister_kernel_pipe_nexus();
1339 	}
1340 
1341 	return result;
1342 }
1343 
1344 static errno_t
utun_enable_channel(struct utun_pcb * pcb,struct proc * proc)1345 utun_enable_channel(struct utun_pcb *pcb, struct proc *proc)
1346 {
1347 	struct kern_nexus_init init;
1348 	struct kern_pbufpool_init pp_init;
1349 	errno_t result;
1350 
1351 	kauth_cred_t cred = kauth_cred_get();
1352 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
1353 	if (result) {
1354 		return result;
1355 	}
1356 
1357 	result = utun_register_kernel_pipe_nexus(pcb);
1358 	if (result) {
1359 		return result;
1360 	}
1361 
1362 	VERIFY(utun_ncd);
1363 
1364 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1365 
1366 	if (pcb->utun_kpipe_enabled) {
1367 		result = EEXIST; // return success instead?
1368 		goto done;
1369 	}
1370 
1371 	/*
1372 	 * Make sure we can fit packets in the channel buffers and
1373 	 * Allow an extra 4 bytes for the protocol number header in the channel
1374 	 */
1375 	if (pcb->utun_ifp->if_mtu + UTUN_HEADER_SIZE(pcb) > pcb->utun_slot_size) {
1376 		result = EOPNOTSUPP;
1377 		goto done;
1378 	}
1379 
1380 	bzero(&pp_init, sizeof(pp_init));
1381 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1382 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1383 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
1384 	pp_init.kbi_bufsize = pcb->utun_slot_size;
1385 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
1386 	pp_init.kbi_max_frags = 1;
1387 	pp_init.kbi_flags |= KBIF_QUANTUM;
1388 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1389 	    "com.apple.kpipe.%s", pcb->utun_if_xname);
1390 	pp_init.kbi_ctx = NULL;
1391 	pp_init.kbi_ctx_retain = NULL;
1392 	pp_init.kbi_ctx_release = NULL;
1393 
1394 	result = kern_pbufpool_create(&pp_init, &pcb->utun_kpipe_pp,
1395 	    NULL);
1396 	if (result != 0) {
1397 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, result);
1398 		goto done;
1399 	}
1400 
1401 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid));
1402 	bzero(&init, sizeof(init));
1403 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1404 	init.nxi_tx_pbufpool = pcb->utun_kpipe_pp;
1405 	result = kern_nexus_controller_alloc_provider_instance(utun_ncd,
1406 	    utun_kpipe_uuid, pcb, NULL, &pcb->utun_kpipe_uuid, &init);
1407 	if (result) {
1408 		goto done;
1409 	}
1410 
1411 	nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
1412 	uuid_t uuid_null = {};
1413 	result = kern_nexus_controller_bind_provider_instance(utun_ncd,
1414 	    pcb->utun_kpipe_uuid, &port,
1415 	    proc_pid(proc), uuid_null, NULL, 0, NEXUS_BIND_PID);
1416 	if (result) {
1417 		kern_nexus_controller_free_provider_instance(utun_ncd,
1418 		    pcb->utun_kpipe_uuid);
1419 		uuid_clear(pcb->utun_kpipe_uuid);
1420 		goto done;
1421 	}
1422 
1423 	pcb->utun_kpipe_enabled = 1;
1424 
1425 done:
1426 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1427 
1428 	if (result) {
1429 		if (pcb->utun_kpipe_pp != NULL) {
1430 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1431 			pcb->utun_kpipe_pp = NULL;
1432 		}
1433 		utun_unregister_kernel_pipe_nexus();
1434 	}
1435 
1436 	return result;
1437 }
1438 
1439 #endif // UTUN_NEXUS
1440 
1441 errno_t
utun_register_control(void)1442 utun_register_control(void)
1443 {
1444 	struct kern_ctl_reg kern_ctl;
1445 	errno_t result = 0;
1446 
1447 #if UTUN_NEXUS
1448 	utun_register_nexus();
1449 #endif // UTUN_NEXUS
1450 
1451 	TAILQ_INIT(&utun_head);
1452 
1453 	bzero(&kern_ctl, sizeof(kern_ctl));
1454 	strlcpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
1455 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
1456 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP | CTL_FLAG_REG_EXTENDED; /* Require root */
1457 	kern_ctl.ctl_sendsize = 512 * 1024;
1458 	kern_ctl.ctl_recvsize = 512 * 1024;
1459 	kern_ctl.ctl_setup = utun_ctl_setup;
1460 	kern_ctl.ctl_bind = utun_ctl_bind;
1461 	kern_ctl.ctl_connect = utun_ctl_connect;
1462 	kern_ctl.ctl_disconnect = utun_ctl_disconnect;
1463 	kern_ctl.ctl_send = utun_ctl_send;
1464 	kern_ctl.ctl_setopt = utun_ctl_setopt;
1465 	kern_ctl.ctl_getopt = utun_ctl_getopt;
1466 	kern_ctl.ctl_rcvd = utun_ctl_rcvd;
1467 
1468 	result = ctl_register(&kern_ctl, &utun_kctlref);
1469 	if (result != 0) {
1470 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - ctl_register failed: %d\n", result);
1471 		return result;
1472 	}
1473 
1474 	/* Register the protocol plumbers */
1475 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN,
1476 	    utun_attach_proto, NULL)) != 0) {
1477 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN) failed: %d\n",
1478 		    result);
1479 		ctl_deregister(utun_kctlref);
1480 		return result;
1481 	}
1482 
1483 	/* Register the protocol plumbers */
1484 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN,
1485 	    utun_attach_proto, NULL)) != 0) {
1486 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_UTUN);
1487 		ctl_deregister(utun_kctlref);
1488 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN) failed: %d\n",
1489 		    result);
1490 		return result;
1491 	}
1492 
1493 	return 0;
1494 }
1495 
1496 /* Kernel control functions */
1497 
1498 static inline int
utun_find_by_unit(u_int32_t unit)1499 utun_find_by_unit(u_int32_t unit)
1500 {
1501 	struct utun_pcb *next_pcb = NULL;
1502 	int found = 0;
1503 
1504 	TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1505 		if (next_pcb->utun_unit == unit) {
1506 			found = 1;
1507 			break;
1508 		}
1509 	}
1510 
1511 	return found;
1512 }
1513 
1514 static inline void
utun_free_pcb(struct utun_pcb * pcb,bool locked)1515 utun_free_pcb(struct utun_pcb *pcb, bool locked)
1516 {
1517 #if UTUN_NEXUS
1518 	mbuf_freem_list(pcb->utun_input_chain);
1519 	pcb->utun_input_chain_count = 0;
1520 	lck_mtx_destroy(&pcb->utun_input_chain_lock, &utun_lck_grp);
1521 	lck_mtx_destroy(&pcb->utun_pcb_data_move_lock, &utun_lck_grp);
1522 #endif // UTUN_NEXUS
1523 	lck_rw_destroy(&pcb->utun_pcb_lock, &utun_lck_grp);
1524 	if (!locked) {
1525 		lck_mtx_lock(&utun_lock);
1526 	}
1527 	TAILQ_REMOVE(&utun_head, pcb, utun_chain);
1528 	if (!locked) {
1529 		lck_mtx_unlock(&utun_lock);
1530 	}
1531 	zfree(utun_pcb_zone, pcb);
1532 }
1533 
1534 static errno_t
utun_ctl_setup(u_int32_t * unit,void ** unitinfo)1535 utun_ctl_setup(u_int32_t *unit, void **unitinfo)
1536 {
1537 	if (unit == NULL || unitinfo == NULL) {
1538 		return EINVAL;
1539 	}
1540 
1541 	lck_mtx_lock(&utun_lock);
1542 
1543 	/* Find next available unit */
1544 	if (*unit == 0) {
1545 		*unit = 1;
1546 		while (*unit != ctl_maxunit) {
1547 			if (utun_find_by_unit(*unit)) {
1548 				(*unit)++;
1549 			} else {
1550 				break;
1551 			}
1552 		}
1553 		if (*unit == ctl_maxunit) {
1554 			lck_mtx_unlock(&utun_lock);
1555 			return EBUSY;
1556 		}
1557 	} else if (utun_find_by_unit(*unit)) {
1558 		lck_mtx_unlock(&utun_lock);
1559 		return EBUSY;
1560 	}
1561 
1562 	/* Find some open interface id */
1563 	u_int32_t chosen_unique_id = 1;
1564 	struct utun_pcb *next_pcb = TAILQ_LAST(&utun_head, utun_list);
1565 	if (next_pcb != NULL) {
1566 		/* List was not empty, add one to the last item */
1567 		chosen_unique_id = next_pcb->utun_unique_id + 1;
1568 		next_pcb = NULL;
1569 
1570 		/*
1571 		 * If this wrapped the id number, start looking at
1572 		 * the front of the list for an unused id.
1573 		 */
1574 		if (chosen_unique_id == 0) {
1575 			/* Find the next unused ID */
1576 			chosen_unique_id = 1;
1577 			TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1578 				if (next_pcb->utun_unique_id > chosen_unique_id) {
1579 					/* We found a gap */
1580 					break;
1581 				}
1582 
1583 				chosen_unique_id = next_pcb->utun_unique_id + 1;
1584 			}
1585 		}
1586 	}
1587 
1588 	struct utun_pcb *__single pcb = zalloc_flags(utun_pcb_zone, Z_WAITOK | Z_ZERO);
1589 
1590 	*unitinfo = pcb;
1591 	pcb->utun_unit = *unit;
1592 	pcb->utun_unique_id = chosen_unique_id;
1593 
1594 	if (next_pcb != NULL) {
1595 		TAILQ_INSERT_BEFORE(next_pcb, pcb, utun_chain);
1596 	} else {
1597 		TAILQ_INSERT_TAIL(&utun_head, pcb, utun_chain);
1598 	}
1599 
1600 	lck_mtx_unlock(&utun_lock);
1601 
1602 	return 0;
1603 }
1604 
1605 static errno_t
utun_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1606 utun_ctl_bind(kern_ctl_ref kctlref,
1607     struct sockaddr_ctl *sac,
1608     void **unitinfo)
1609 {
1610 	if (*unitinfo == NULL) {
1611 		u_int32_t unit = 0;
1612 		(void)utun_ctl_setup(&unit, unitinfo);
1613 	}
1614 
1615 	struct utun_pcb *__single pcb = (struct utun_pcb *)*unitinfo;
1616 	if (pcb == NULL) {
1617 		return EINVAL;
1618 	}
1619 
1620 	if (pcb->utun_ctlref != NULL) {
1621 		// Return if bind was already called
1622 		return EINVAL;
1623 	}
1624 
1625 	pcb->utun_ctlref = kctlref;
1626 	pcb->utun_unit = sac->sc_unit;
1627 	pcb->utun_max_pending_packets = 1;
1628 
1629 #if UTUN_NEXUS
1630 	pcb->utun_use_netif = false;
1631 	pcb->utun_attach_fsw = true;
1632 	pcb->utun_netif_connected = false;
1633 	pcb->utun_slot_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1634 	pcb->utun_netif_ring_size = if_utun_ring_size;
1635 	pcb->utun_tx_fsw_ring_size = if_utun_tx_fsw_ring_size;
1636 	pcb->utun_rx_fsw_ring_size = if_utun_rx_fsw_ring_size;
1637 	pcb->utun_input_chain_count = 0;
1638 	lck_mtx_init(&pcb->utun_input_chain_lock, &utun_lck_grp, &utun_lck_attr);
1639 	lck_mtx_init(&pcb->utun_pcb_data_move_lock,
1640 	    &utun_lck_grp, &utun_lck_attr);
1641 #endif // UTUN_NEXUS
1642 
1643 	lck_rw_init(&pcb->utun_pcb_lock, &utun_lck_grp, &utun_lck_attr);
1644 
1645 	return 0;
1646 }
1647 
1648 static errno_t
utun_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1649 utun_ctl_connect(kern_ctl_ref kctlref,
1650     struct sockaddr_ctl *sac,
1651     void **unitinfo)
1652 {
1653 	struct ifnet_init_eparams utun_init = {};
1654 	errno_t result = 0;
1655 
1656 	if (*unitinfo == NULL) {
1657 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1658 	}
1659 
1660 	struct utun_pcb *__single pcb = *unitinfo;
1661 	if (pcb == NULL) {
1662 		return EINVAL;
1663 	}
1664 
1665 	/* Handle case where utun_ctl_setup() was called, but ipsec_ctl_bind() was not */
1666 	if (pcb->utun_ctlref == NULL) {
1667 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1668 	}
1669 
1670 	snprintf(pcb->utun_if_xname, sizeof(pcb->utun_if_xname), "utun%d", pcb->utun_unit - 1);
1671 	snprintf(pcb->utun_unique_name, sizeof(pcb->utun_unique_name), "utunid%d", pcb->utun_unique_id - 1);
1672 
1673 	/* Create the interface */
1674 	bzero(&utun_init, sizeof(utun_init));
1675 	utun_init.ver = IFNET_INIT_CURRENT_VERSION;
1676 	utun_init.len = sizeof(utun_init);
1677 
1678 #if UTUN_NEXUS
1679 	if (pcb->utun_use_netif) {
1680 		utun_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
1681 		utun_init.tx_headroom = UTUN_IF_HEADROOM_SIZE;
1682 	} else
1683 #endif // UTUN_NEXUS
1684 	{
1685 		utun_init.flags = IFNET_INIT_NX_NOAUTO;
1686 		utun_init.start = utun_start;
1687 		utun_init.framer_extended = utun_framer;
1688 	}
1689 	utun_init.name = "utun";
1690 	utun_init.unit = pcb->utun_unit - 1;
1691 	utun_init.uniqueid_len = strbuflen(pcb->utun_unique_name);
1692 	utun_init.uniqueid = pcb->utun_unique_name;
1693 	utun_init.family = IFNET_FAMILY_UTUN;
1694 	utun_init.type = IFT_OTHER;
1695 	utun_init.demux = utun_demux;
1696 	utun_init.add_proto = utun_add_proto;
1697 	utun_init.del_proto = utun_del_proto;
1698 	utun_init.softc = pcb;
1699 	utun_init.ioctl = utun_ioctl;
1700 	utun_init.free = utun_detached;
1701 
1702 #if UTUN_NEXUS
1703 	if (pcb->utun_use_netif) {
1704 		result = utun_nexus_ifattach(pcb, &utun_init, &pcb->utun_ifp);
1705 		if (result != 0) {
1706 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_nexus_ifattach failed: %d\n", result);
1707 			utun_free_pcb(pcb, false);
1708 			*unitinfo = NULL;
1709 			return result;
1710 		}
1711 
1712 		if (pcb->utun_attach_fsw) {
1713 			result = utun_flowswitch_attach(pcb);
1714 			if (result != 0) {
1715 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_flowswitch_attach failed: %d\n", result);
1716 				// Do not call utun_free_pcb(). We will be attached already, and will be freed later
1717 				// in utun_detached().
1718 				*unitinfo = NULL;
1719 				return result;
1720 			}
1721 		}
1722 
1723 		/* Attach to bpf */
1724 		bpfattach(pcb->utun_ifp, DLT_RAW, 0);
1725 	} else
1726 #endif // UTUN_NEXUS
1727 	{
1728 		/*
1729 		 * Upon success, this holds an ifnet reference which we will
1730 		 * release via ifnet_release() at final detach time.
1731 		 */
1732 		result = ifnet_allocate_extended(&utun_init, &pcb->utun_ifp);
1733 		if (result != 0) {
1734 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_allocate failed: %d\n", result);
1735 			utun_free_pcb(pcb, false);
1736 			*unitinfo = NULL;
1737 			return result;
1738 		}
1739 
1740 		/* Set flags and additional information. */
1741 		ifnet_set_mtu(pcb->utun_ifp, UTUN_DEFAULT_MTU);
1742 		ifnet_set_flags(pcb->utun_ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
1743 
1744 		/* The interface must generate its own IPv6 LinkLocal address,
1745 		 * if possible following the recommendation of RFC2472 to the 64bit interface ID
1746 		 */
1747 		ifnet_set_eflags(pcb->utun_ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
1748 
1749 		/* Reset the stats in case as the interface may have been recycled */
1750 		struct ifnet_stats_param stats;
1751 		bzero(&stats, sizeof(struct ifnet_stats_param));
1752 		ifnet_set_stat(pcb->utun_ifp, &stats);
1753 
1754 		/* Attach the interface */
1755 		result = ifnet_attach(pcb->utun_ifp, NULL);
1756 		if (result != 0) {
1757 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_attach failed: %d\n", result);
1758 			/* Release reference now since attach failed */
1759 			ifnet_release(pcb->utun_ifp);
1760 			utun_free_pcb(pcb, false);
1761 			*unitinfo = NULL;
1762 			return result;
1763 		}
1764 
1765 		/* Attach to bpf */
1766 		bpfattach(pcb->utun_ifp, DLT_NULL, UTUN_HEADER_SIZE(pcb));
1767 
1768 #if UTUN_NEXUS
1769 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
1770 		UTUN_SET_DATA_PATH_READY(pcb);
1771 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
1772 #endif // UTUN_NEXUS
1773 	}
1774 
1775 	/* The interfaces resoures allocated, mark it as running */
1776 	ifnet_set_flags(pcb->utun_ifp, IFF_RUNNING, IFF_RUNNING);
1777 
1778 	return result;
1779 }
1780 
1781 static errno_t
utun_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)1782 utun_detach_ip(ifnet_t interface,
1783     protocol_family_t protocol,
1784     socket_t pf_socket)
1785 {
1786 	errno_t result = EPROTONOSUPPORT;
1787 
1788 	/* Attempt a detach */
1789 	if (protocol == PF_INET) {
1790 		struct ifreq    ifr;
1791 
1792 		bzero(&ifr, sizeof(ifr));
1793 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1794 		    ifnet_name(interface), ifnet_unit(interface));
1795 
1796 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
1797 	} else if (protocol == PF_INET6) {
1798 		struct in6_ifreq        ifr6;
1799 
1800 		bzero(&ifr6, sizeof(ifr6));
1801 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1802 		    ifnet_name(interface), ifnet_unit(interface));
1803 
1804 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
1805 	}
1806 
1807 	return result;
1808 }
1809 
1810 static void
utun_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)1811 utun_remove_address(ifnet_t interface,
1812     protocol_family_t protocol,
1813     ifaddr_t address,
1814     socket_t pf_socket)
1815 {
1816 	errno_t result = 0;
1817 
1818 	/* Attempt a detach */
1819 	if (protocol == PF_INET) {
1820 		struct ifreq ifr;
1821 
1822 		bzero(&ifr, sizeof(ifr));
1823 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1824 		    ifnet_name(interface), ifnet_unit(interface));
1825 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
1826 		if (result != 0) {
1827 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed: %d", result);
1828 		} else {
1829 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
1830 			if (result != 0) {
1831 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR failed: %d", result);
1832 			}
1833 		}
1834 	} else if (protocol == PF_INET6) {
1835 		struct in6_ifreq ifr6;
1836 
1837 		bzero(&ifr6, sizeof(ifr6));
1838 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1839 		    ifnet_name(interface), ifnet_unit(interface));
1840 		result = ifaddr_address(address, SA(&ifr6.ifr_addr),
1841 		    sizeof(ifr6.ifr_addr));
1842 		if (result != 0) {
1843 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed (v6): %d",
1844 			    result);
1845 		} else {
1846 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
1847 			if (result != 0) {
1848 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR_IN6 failed: %d",
1849 				    result);
1850 			}
1851 		}
1852 	}
1853 }
1854 
1855 static void
utun_cleanup_family(ifnet_t interface,protocol_family_t protocol)1856 utun_cleanup_family(ifnet_t interface,
1857     protocol_family_t protocol)
1858 {
1859 	errno_t result = 0;
1860 	socket_ref_t pf_socket = NULL;
1861 	ifaddr_t *__null_terminated addresses = NULL;
1862 
1863 	if (protocol != PF_INET && protocol != PF_INET6) {
1864 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - invalid protocol family %d\n", protocol);
1865 		return;
1866 	}
1867 
1868 	/* Create a socket for removing addresses and detaching the protocol */
1869 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
1870 	if (result != 0) {
1871 		if (result != EAFNOSUPPORT) {
1872 			os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - failed to create %s socket: %d\n",
1873 			    protocol == PF_INET ? "IP" : "IPv6", result);
1874 		}
1875 		goto cleanup;
1876 	}
1877 
1878 	/* always set SS_PRIV, we want to close and detach regardless */
1879 	sock_setpriv(pf_socket, 1);
1880 
1881 	result = utun_detach_ip(interface, protocol, pf_socket);
1882 	if (result == 0 || result == ENXIO) {
1883 		/* We are done! We either detached or weren't attached. */
1884 		goto cleanup;
1885 	} else if (result != EBUSY) {
1886 		/* Uh, not really sure what happened here... */
1887 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1888 		goto cleanup;
1889 	}
1890 
1891 	/*
1892 	 * At this point, we received an EBUSY error. This means there are
1893 	 * addresses attached. We should detach them and then try again.
1894 	 */
1895 	result = ifnet_get_address_list_family(interface, &addresses, protocol);
1896 	if (result != 0) {
1897 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
1898 		    ifnet_name(interface), ifnet_unit(interface),
1899 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
1900 		goto cleanup;
1901 	}
1902 
1903 	for (ifaddr_t *__null_terminated addr = addresses; *addr != NULL; addr++) {
1904 		utun_remove_address(interface, protocol, *addr, pf_socket);
1905 	}
1906 	ifnet_free_address_list(addresses);
1907 	addresses = NULL;
1908 
1909 	/*
1910 	 * The addresses should be gone, we should try the remove again.
1911 	 */
1912 	result = utun_detach_ip(interface, protocol, pf_socket);
1913 	if (result != 0 && result != ENXIO) {
1914 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1915 	}
1916 
1917 cleanup:
1918 	if (pf_socket != NULL) {
1919 		sock_close(pf_socket);
1920 	}
1921 
1922 	if (addresses != NULL) {
1923 		ifnet_free_address_list(addresses);
1924 	}
1925 }
1926 
1927 static errno_t
utun_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)1928 utun_ctl_disconnect(__unused kern_ctl_ref kctlref,
1929     __unused u_int32_t unit,
1930     void *unitinfo)
1931 {
1932 	struct utun_pcb *__single pcb = unitinfo;
1933 	ifnet_t ifp = NULL;
1934 	errno_t result = 0;
1935 
1936 	if (pcb == NULL) {
1937 		return EINVAL;
1938 	}
1939 
1940 #if UTUN_NEXUS
1941 	/* Wait until all threads in the data paths are done. */
1942 	utun_wait_data_move_drain(pcb);
1943 	// Tell the nexus to stop all rings
1944 	if (pcb->utun_netif_nexus != NULL && pcb->utun_netif_connected) {
1945 		kern_nexus_stop(pcb->utun_netif_nexus);
1946 	}
1947 #endif // UTUN_NEXUS
1948 
1949 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1950 
1951 #if UTUN_NEXUS
1952 	uuid_t kpipe_uuid;
1953 	uuid_copy(kpipe_uuid, pcb->utun_kpipe_uuid);
1954 	uuid_clear(pcb->utun_kpipe_uuid);
1955 	pcb->utun_kpipe_enabled = FALSE;
1956 #endif // UTUN_NEXUS
1957 
1958 	pcb->utun_ctlref = NULL;
1959 
1960 	ifp = pcb->utun_ifp;
1961 	if (ifp != NULL) {
1962 #if UTUN_NEXUS
1963 		// Tell the nexus to stop all rings
1964 		if (pcb->utun_netif_nexus != NULL) {
1965 			/*
1966 			 * Quiesce the interface and flush any pending outbound packets.
1967 			 */
1968 			if_down(ifp);
1969 
1970 			/*
1971 			 * Suspend data movement and wait for IO threads to exit.
1972 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
1973 			 * do this because utun nexuses are attached/detached separately.
1974 			 */
1975 			ifnet_datamov_suspend_and_drain(ifp);
1976 			if ((result = ifnet_detach(ifp)) != 0) {
1977 				panic("utun_ctl_disconnect - ifnet_detach failed: %d", result);
1978 			}
1979 
1980 			/*
1981 			 * We want to do everything in our power to ensure that the interface
1982 			 * really goes away when the socket is closed. We must remove IP/IPv6
1983 			 * addresses and detach the protocols. Finally, we can remove and
1984 			 * release the interface.
1985 			 */
1986 			utun_cleanup_family(ifp, AF_INET);
1987 			utun_cleanup_family(ifp, AF_INET6);
1988 
1989 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1990 
1991 			if (!uuid_is_null(kpipe_uuid)) {
1992 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
1993 					if (pcb->utun_kpipe_pp != NULL) {
1994 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1995 						pcb->utun_kpipe_pp = NULL;
1996 					}
1997 					utun_unregister_kernel_pipe_nexus();
1998 				}
1999 			}
2000 			utun_nexus_detach(pcb);
2001 
2002 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
2003 			ifnet_datamov_resume(ifp);
2004 		} else
2005 #endif // UTUN_NEXUS
2006 		{
2007 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2008 
2009 #if UTUN_NEXUS
2010 			if (!uuid_is_null(kpipe_uuid)) {
2011 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
2012 					if (pcb->utun_kpipe_pp != NULL) {
2013 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
2014 						pcb->utun_kpipe_pp = NULL;
2015 					}
2016 					utun_unregister_kernel_pipe_nexus();
2017 				}
2018 			}
2019 #endif // UTUN_NEXUS
2020 
2021 			/*
2022 			 * We want to do everything in our power to ensure that the interface
2023 			 * really goes away when the socket is closed. We must remove IP/IPv6
2024 			 * addresses and detach the protocols. Finally, we can remove and
2025 			 * release the interface.
2026 			 */
2027 			utun_cleanup_family(ifp, AF_INET);
2028 			utun_cleanup_family(ifp, AF_INET6);
2029 
2030 			/*
2031 			 * Detach now; utun_detach() will be called asynchronously once
2032 			 * the I/O reference count drops to 0.  There we will invoke
2033 			 * ifnet_release().
2034 			 */
2035 			if ((result = ifnet_detach(ifp)) != 0) {
2036 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_disconnect - ifnet_detach failed: %d\n", result);
2037 			}
2038 		}
2039 	} else {
2040 		// Bound, but not connected
2041 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2042 		utun_free_pcb(pcb, false);
2043 	}
2044 
2045 	return 0;
2046 }
2047 
2048 static errno_t
utun_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,mbuf_t m,__unused int flags)2049 utun_ctl_send(__unused kern_ctl_ref kctlref,
2050     __unused u_int32_t unit,
2051     void *unitinfo,
2052     mbuf_t m,
2053     __unused int flags)
2054 {
2055 	/*
2056 	 * The userland ABI requires the first four bytes have the protocol family
2057 	 * in network byte order: swap them
2058 	 */
2059 	if (m_pktlen(m) >= (int32_t)UTUN_HEADER_SIZE((struct utun_pcb *)unitinfo)) {
2060 		*mtod(m, protocol_family_t *) = ntohl(*mtod(m, protocol_family_t *));
2061 	} else {
2062 		os_log_error(OS_LOG_DEFAULT, "%s - unexpected short mbuf pkt len %d\n", __func__, m_pktlen(m));
2063 	}
2064 
2065 	return utun_pkt_input((struct utun_pcb *)unitinfo, m);
2066 }
2067 
2068 static errno_t
utun_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (len)data,size_t len)2069 utun_ctl_setopt(__unused kern_ctl_ref kctlref,
2070     __unused u_int32_t unit,
2071     void *unitinfo,
2072     int opt,
2073     void *__sized_by(len) data,
2074     size_t len)
2075 {
2076 	struct utun_pcb *__single pcb = unitinfo;
2077 	errno_t result = 0;
2078 	/* check for privileges for privileged options */
2079 	switch (opt) {
2080 	case UTUN_OPT_FLAGS:
2081 	case UTUN_OPT_EXT_IFDATA_STATS:
2082 	case UTUN_OPT_SET_DELEGATE_INTERFACE:
2083 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2084 			return EPERM;
2085 		}
2086 		break;
2087 	}
2088 
2089 	switch (opt) {
2090 	case UTUN_OPT_FLAGS:
2091 		if (len != sizeof(u_int32_t)) {
2092 			result = EMSGSIZE;
2093 			break;
2094 		}
2095 		if (pcb->utun_ifp != NULL) {
2096 			// Only can set before connecting
2097 			result = EINVAL;
2098 			break;
2099 		}
2100 		pcb->utun_flags = *(u_int32_t *)data;
2101 		break;
2102 
2103 	case UTUN_OPT_EXT_IFDATA_STATS:
2104 		if (len != sizeof(int)) {
2105 			result = EMSGSIZE;
2106 			break;
2107 		}
2108 		if (pcb->utun_ifp == NULL) {
2109 			// Only can set after connecting
2110 			result = EINVAL;
2111 			break;
2112 		}
2113 		pcb->utun_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2114 		break;
2115 
2116 	case UTUN_OPT_INC_IFDATA_STATS_IN:
2117 	case UTUN_OPT_INC_IFDATA_STATS_OUT: {
2118 		struct utun_stats_param *utsp = (struct utun_stats_param *)data;
2119 
2120 		if (utsp == NULL || len < sizeof(struct utun_stats_param)) {
2121 			result = EINVAL;
2122 			break;
2123 		}
2124 		if (pcb->utun_ifp == NULL) {
2125 			// Only can set after connecting
2126 			result = EINVAL;
2127 			break;
2128 		}
2129 		if (!pcb->utun_ext_ifdata_stats) {
2130 			result = EINVAL;
2131 			break;
2132 		}
2133 		if (opt == UTUN_OPT_INC_IFDATA_STATS_IN) {
2134 			ifnet_stat_increment_in(pcb->utun_ifp, utsp->utsp_packets,
2135 			    utsp->utsp_bytes, utsp->utsp_errors);
2136 		} else {
2137 			ifnet_stat_increment_out(pcb->utun_ifp, utsp->utsp_packets,
2138 			    utsp->utsp_bytes, utsp->utsp_errors);
2139 		}
2140 		break;
2141 	}
2142 	case UTUN_OPT_SET_DELEGATE_INTERFACE: {
2143 		ifnet_ref_t     del_ifp = NULL;
2144 		char            name[IFNAMSIZ];
2145 
2146 		if (len > IFNAMSIZ - 1) {
2147 			result = EMSGSIZE;
2148 			break;
2149 		}
2150 		if (pcb->utun_ifp == NULL) {
2151 			// Only can set after connecting
2152 			result = EINVAL;
2153 			break;
2154 		}
2155 		if (len != 0) {            /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2156 			bcopy(data, name, len);
2157 			name[len] = 0;
2158 			result = ifnet_find_by_name(__unsafe_null_terminated_from_indexable(name), &del_ifp);
2159 		}
2160 		if (result == 0) {
2161 			result = ifnet_set_delegate(pcb->utun_ifp, del_ifp);
2162 			if (del_ifp) {
2163 				ifnet_release(del_ifp);
2164 			}
2165 		}
2166 		break;
2167 	}
2168 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2169 		u_int32_t max_pending_packets = 0;
2170 		if (len != sizeof(u_int32_t)) {
2171 			result = EMSGSIZE;
2172 			break;
2173 		}
2174 		max_pending_packets = *(u_int32_t *)data;
2175 		if (max_pending_packets == 0) {
2176 			result = EINVAL;
2177 			break;
2178 		}
2179 		pcb->utun_max_pending_packets = max_pending_packets;
2180 		break;
2181 	}
2182 #if UTUN_NEXUS
2183 	case UTUN_OPT_ENABLE_CHANNEL: {
2184 		if (len != sizeof(int)) {
2185 			result = EMSGSIZE;
2186 			break;
2187 		}
2188 		if (pcb->utun_ifp == NULL) {
2189 			// Only can set after connecting
2190 			result = EINVAL;
2191 			break;
2192 		}
2193 		if (*(int *)data) {
2194 			result = utun_enable_channel(pcb, current_proc());
2195 		} else {
2196 			result = utun_disable_channel(pcb);
2197 		}
2198 		break;
2199 	}
2200 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2201 		if (len != sizeof(int)) {
2202 			result = EMSGSIZE;
2203 			break;
2204 		}
2205 		if (pcb->utun_ifp == NULL) {
2206 			// Only can set after connecting
2207 			result = EINVAL;
2208 			break;
2209 		}
2210 		if (!if_is_fsw_transport_netagent_enabled()) {
2211 			result = ENOTSUP;
2212 			break;
2213 		}
2214 		if (uuid_is_null(pcb->utun_nx.fsw_agent)) {
2215 			result = ENOENT;
2216 			break;
2217 		}
2218 
2219 		uint32_t flags = netagent_get_flags(pcb->utun_nx.fsw_agent);
2220 
2221 		if (*(int *)data) {
2222 			pcb->utun_needs_netagent = true;
2223 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
2224 			    NETAGENT_FLAG_NEXUS_LISTENER);
2225 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2226 		} else {
2227 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
2228 			    NETAGENT_FLAG_NEXUS_LISTENER);
2229 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2230 			pcb->utun_needs_netagent = false;
2231 		}
2232 		break;
2233 	}
2234 	case UTUN_OPT_ATTACH_FLOWSWITCH: {
2235 		if (len != sizeof(int)) {
2236 			result = EMSGSIZE;
2237 			break;
2238 		}
2239 		if (pcb->utun_ifp != NULL) {
2240 			// Only can set before connecting
2241 			result = EINVAL;
2242 			break;
2243 		}
2244 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2245 		pcb->utun_attach_fsw = !!(*(int *)data);
2246 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2247 		break;
2248 	}
2249 	case UTUN_OPT_ENABLE_NETIF: {
2250 		if (len != sizeof(int)) {
2251 			result = EMSGSIZE;
2252 			break;
2253 		}
2254 		if (pcb->utun_ifp != NULL) {
2255 			// Only can set before connecting
2256 			result = EINVAL;
2257 			break;
2258 		}
2259 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2260 		pcb->utun_use_netif = !!(*(int *)data);
2261 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2262 		break;
2263 	}
2264 	case UTUN_OPT_SLOT_SIZE: {
2265 		if (len != sizeof(u_int32_t)) {
2266 			result = EMSGSIZE;
2267 			break;
2268 		}
2269 		if (pcb->utun_ifp != NULL) {
2270 			// Only can set before connecting
2271 			result = EINVAL;
2272 			break;
2273 		}
2274 		u_int32_t slot_size = *(u_int32_t *)data;
2275 		if (slot_size < UTUN_IF_MIN_SLOT_SIZE ||
2276 		    slot_size > UTUN_IF_MAX_SLOT_SIZE) {
2277 			return EINVAL;
2278 		}
2279 		pcb->utun_slot_size = slot_size;
2280 		break;
2281 	}
2282 	case UTUN_OPT_NETIF_RING_SIZE: {
2283 		if (len != sizeof(u_int32_t)) {
2284 			result = EMSGSIZE;
2285 			break;
2286 		}
2287 		if (pcb->utun_ifp != NULL) {
2288 			// Only can set before connecting
2289 			result = EINVAL;
2290 			break;
2291 		}
2292 		u_int32_t ring_size = *(u_int32_t *)data;
2293 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2294 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2295 			return EINVAL;
2296 		}
2297 		pcb->utun_netif_ring_size = ring_size;
2298 		break;
2299 	}
2300 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2301 		if (len != sizeof(u_int32_t)) {
2302 			result = EMSGSIZE;
2303 			break;
2304 		}
2305 		if (pcb->utun_ifp != NULL) {
2306 			// Only can set before connecting
2307 			result = EINVAL;
2308 			break;
2309 		}
2310 		u_int32_t ring_size = *(u_int32_t *)data;
2311 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2312 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2313 			return EINVAL;
2314 		}
2315 		pcb->utun_tx_fsw_ring_size = ring_size;
2316 		break;
2317 	}
2318 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2319 		if (len != sizeof(u_int32_t)) {
2320 			result = EMSGSIZE;
2321 			break;
2322 		}
2323 		if (pcb->utun_ifp != NULL) {
2324 			// Only can set before connecting
2325 			result = EINVAL;
2326 			break;
2327 		}
2328 		u_int32_t ring_size = *(u_int32_t *)data;
2329 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2330 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2331 			return EINVAL;
2332 		}
2333 		pcb->utun_rx_fsw_ring_size = ring_size;
2334 		break;
2335 	}
2336 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2337 		if (len != sizeof(u_int32_t)) {
2338 			result = EMSGSIZE;
2339 			break;
2340 		}
2341 		if (pcb->utun_ifp != NULL) {
2342 			// Only can set before connecting
2343 			result = EINVAL;
2344 			break;
2345 		}
2346 		u_int32_t ring_size = *(u_int32_t *)data;
2347 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2348 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2349 			return EINVAL;
2350 		}
2351 		pcb->utun_kpipe_tx_ring_size = ring_size;
2352 		break;
2353 	}
2354 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2355 		if (len != sizeof(u_int32_t)) {
2356 			result = EMSGSIZE;
2357 			break;
2358 		}
2359 		if (pcb->utun_ifp != NULL) {
2360 			// Only can set before connecting
2361 			result = EINVAL;
2362 			break;
2363 		}
2364 		u_int32_t ring_size = *(u_int32_t *)data;
2365 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2366 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2367 			return EINVAL;
2368 		}
2369 		pcb->utun_kpipe_rx_ring_size = ring_size;
2370 		break;
2371 	}
2372 #endif // UTUN_NEXUS
2373 	default: {
2374 		result = ENOPROTOOPT;
2375 		break;
2376 	}
2377 	}
2378 
2379 	return result;
2380 }
2381 
2382 static errno_t
utun_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * __sized_by (* len)data,size_t * len)2383 utun_ctl_getopt(__unused kern_ctl_ref kctlref,
2384     __unused u_int32_t unit,
2385     void *unitinfo,
2386     int opt,
2387     void *__sized_by(*len) data,
2388     size_t *len)
2389 {
2390 	struct utun_pcb *__single pcb = unitinfo;
2391 	errno_t result = 0;
2392 
2393 	switch (opt) {
2394 	case UTUN_OPT_FLAGS:
2395 		if (*len != sizeof(u_int32_t)) {
2396 			result = EMSGSIZE;
2397 		} else {
2398 			*(u_int32_t *)data = pcb->utun_flags;
2399 		}
2400 		break;
2401 
2402 	case UTUN_OPT_EXT_IFDATA_STATS:
2403 		if (*len != sizeof(int)) {
2404 			result = EMSGSIZE;
2405 		} else {
2406 			*(int *)data = (pcb->utun_ext_ifdata_stats) ? 1 : 0;
2407 		}
2408 		break;
2409 
2410 	case UTUN_OPT_IFNAME:
2411 		if (*len < MIN(strbuflen(pcb->utun_if_xname) + 1, sizeof(pcb->utun_if_xname))) {
2412 			result = EMSGSIZE;
2413 		} else {
2414 			if (pcb->utun_ifp == NULL) {
2415 				// Only can get after connecting
2416 				result = EINVAL;
2417 				break;
2418 			}
2419 			*len = scnprintf(data, *len, "%s", pcb->utun_if_xname) + 1;
2420 		}
2421 		break;
2422 
2423 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2424 		if (*len != sizeof(u_int32_t)) {
2425 			result = EMSGSIZE;
2426 		} else {
2427 			*((u_int32_t *)data) = pcb->utun_max_pending_packets;
2428 		}
2429 		break;
2430 	}
2431 
2432 #if UTUN_NEXUS
2433 	case UTUN_OPT_ENABLE_CHANNEL: {
2434 		if (*len != sizeof(int)) {
2435 			result = EMSGSIZE;
2436 		} else {
2437 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2438 			*(int *)data = pcb->utun_kpipe_enabled;
2439 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2440 		}
2441 		break;
2442 	}
2443 
2444 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2445 		if (*len != sizeof(int)) {
2446 			result = EMSGSIZE;
2447 		} else {
2448 			*(int *)data = if_check_netagent(pcb->utun_ifp, pcb->utun_nx.fsw_agent);
2449 		}
2450 		break;
2451 	}
2452 
2453 	case UTUN_OPT_ENABLE_NETIF: {
2454 		if (*len != sizeof(int)) {
2455 			result = EMSGSIZE;
2456 		} else {
2457 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2458 			*(int *)data = !!pcb->utun_use_netif;
2459 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2460 		}
2461 		break;
2462 	}
2463 
2464 	case UTUN_OPT_GET_CHANNEL_UUID: {
2465 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2466 		if (uuid_is_null(pcb->utun_kpipe_uuid)) {
2467 			result = ENXIO;
2468 		} else if (*len != sizeof(uuid_t)) {
2469 			result = EMSGSIZE;
2470 		} else {
2471 			uuid_copy(data, pcb->utun_kpipe_uuid);
2472 		}
2473 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2474 		break;
2475 	}
2476 	case UTUN_OPT_SLOT_SIZE: {
2477 		if (*len != sizeof(u_int32_t)) {
2478 			result = EMSGSIZE;
2479 		} else {
2480 			*(u_int32_t *)data = pcb->utun_slot_size;
2481 		}
2482 		break;
2483 	}
2484 	case UTUN_OPT_NETIF_RING_SIZE: {
2485 		if (*len != sizeof(u_int32_t)) {
2486 			result = EMSGSIZE;
2487 		} else {
2488 			*(u_int32_t *)data = pcb->utun_netif_ring_size;
2489 		}
2490 		break;
2491 	}
2492 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2493 		if (*len != sizeof(u_int32_t)) {
2494 			result = EMSGSIZE;
2495 		} else {
2496 			*(u_int32_t *)data = pcb->utun_tx_fsw_ring_size;
2497 		}
2498 		break;
2499 	}
2500 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2501 		if (*len != sizeof(u_int32_t)) {
2502 			result = EMSGSIZE;
2503 		} else {
2504 			*(u_int32_t *)data = pcb->utun_rx_fsw_ring_size;
2505 		}
2506 		break;
2507 	}
2508 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2509 		if (*len != sizeof(u_int32_t)) {
2510 			result = EMSGSIZE;
2511 		} else {
2512 			*(u_int32_t *)data = pcb->utun_kpipe_tx_ring_size;
2513 		}
2514 		break;
2515 	}
2516 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2517 		if (*len != sizeof(u_int32_t)) {
2518 			result = EMSGSIZE;
2519 		} else {
2520 			*(u_int32_t *)data = pcb->utun_kpipe_rx_ring_size;
2521 		}
2522 		break;
2523 	}
2524 #endif // UTUN_NEXUS
2525 
2526 	default:
2527 		result = ENOPROTOOPT;
2528 		break;
2529 	}
2530 
2531 	return result;
2532 }
2533 
2534 static void
utun_ctl_rcvd(kern_ctl_ref kctlref,u_int32_t unit,void * unitinfo,int flags)2535 utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags)
2536 {
2537 #pragma unused(flags)
2538 	bool reenable_output = false;
2539 	struct utun_pcb *__single pcb = unitinfo;
2540 	if (pcb == NULL) {
2541 		return;
2542 	}
2543 	ifnet_lock_exclusive(pcb->utun_ifp);
2544 
2545 	u_int32_t utun_packet_cnt;
2546 	errno_t error_pc = ctl_getenqueuepacketcount(kctlref, unit, &utun_packet_cnt);
2547 	if (error_pc != 0) {
2548 		os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2549 		utun_packet_cnt = 0;
2550 	}
2551 
2552 	if (utun_packet_cnt < pcb->utun_max_pending_packets) {
2553 		reenable_output = true;
2554 	}
2555 
2556 	if (reenable_output) {
2557 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
2558 		if (error != 0) {
2559 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ifnet_enable_output returned error %d\n", error);
2560 		}
2561 	}
2562 	ifnet_lock_done(pcb->utun_ifp);
2563 }
2564 
2565 /* Network Interface functions */
2566 static void
utun_start(ifnet_t interface)2567 utun_start(ifnet_t interface)
2568 {
2569 	mbuf_ref_t data;
2570 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2571 
2572 	VERIFY(pcb != NULL);
2573 
2574 #if UTUN_NEXUS
2575 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
2576 	if (pcb->utun_kpipe_enabled) {
2577 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2578 		if (!utun_data_move_begin(pcb)) {
2579 			os_log_info(OS_LOG_DEFAULT,
2580 			    "%s: data path stopped for %s\n",
2581 			    __func__, if_name(pcb->utun_ifp));
2582 			return;
2583 		}
2584 		/* It's possible to have channels enabled, but not yet have the channel opened,
2585 		 * in which case the rxring will not be set
2586 		 */
2587 		if (pcb->utun_kpipe_rxring != NULL) {
2588 			kern_channel_notify(pcb->utun_kpipe_rxring, 0);
2589 		}
2590 		utun_data_move_end(pcb);
2591 		return;
2592 	}
2593 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2594 #endif // UTUN_NEXUS
2595 
2596 	for (;;) {
2597 		bool can_accept_packets = true;
2598 		ifnet_lock_shared(pcb->utun_ifp);
2599 
2600 		u_int32_t utun_packet_cnt;
2601 		errno_t error_pc = ctl_getenqueuepacketcount(pcb->utun_ctlref, pcb->utun_unit, &utun_packet_cnt);
2602 		if (error_pc != 0) {
2603 			os_log_error(OS_LOG_DEFAULT, "utun_start: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2604 			utun_packet_cnt = 0;
2605 		}
2606 
2607 		can_accept_packets = (utun_packet_cnt < pcb->utun_max_pending_packets);
2608 		if (!can_accept_packets && pcb->utun_ctlref) {
2609 			u_int32_t difference = 0;
2610 			if (ctl_getenqueuereadable(pcb->utun_ctlref, pcb->utun_unit, &difference) == 0) {
2611 				if (difference > 0) {
2612 					// If the low-water mark has not yet been reached, we still need to enqueue data
2613 					// into the buffer
2614 					can_accept_packets = true;
2615 				}
2616 			}
2617 		}
2618 		if (!can_accept_packets) {
2619 			errno_t error = ifnet_disable_output(interface);
2620 			if (error != 0) {
2621 				os_log_error(OS_LOG_DEFAULT, "utun_start: ifnet_disable_output returned error %d\n", error);
2622 			}
2623 			ifnet_lock_done(pcb->utun_ifp);
2624 			break;
2625 		}
2626 		ifnet_lock_done(pcb->utun_ifp);
2627 		if (ifnet_dequeue(interface, &data) != 0) {
2628 			break;
2629 		}
2630 		if (utun_output(interface, data) != 0) {
2631 			break;
2632 		}
2633 	}
2634 }
2635 
2636 static errno_t
utun_output(ifnet_t interface,mbuf_t data)2637 utun_output(ifnet_t     interface,
2638     mbuf_t data)
2639 {
2640 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2641 	errno_t result;
2642 
2643 	VERIFY(interface == pcb->utun_ifp);
2644 
2645 #if UTUN_NEXUS
2646 	if (!pcb->utun_use_netif)
2647 #endif // UTUN_NEXUS
2648 	{
2649 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2650 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
2651 		}
2652 	}
2653 
2654 	if (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT) {
2655 		/* flush data */
2656 		mbuf_freem(data);
2657 		return 0;
2658 	}
2659 
2660 	// otherwise, fall thru to ctl_enqueumbuf
2661 	if (pcb->utun_ctlref) {
2662 		int     length;
2663 
2664 		/*
2665 		 * The ABI requires the protocol in network byte order
2666 		 */
2667 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2668 			*mtod(data, uint32_t *) = htonl(*mtod(data, uint32_t *));
2669 		}
2670 
2671 		length = mbuf_pkthdr_len(data);
2672 		result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR);
2673 		if (result != 0) {
2674 			mbuf_freem(data);
2675 			os_log_error(OS_LOG_DEFAULT, "utun_output - ctl_enqueuembuf failed: %d\n", result);
2676 #if UTUN_NEXUS
2677 			if (!pcb->utun_use_netif)
2678 #endif // UTUN_NEXUS
2679 			{
2680 				ifnet_stat_increment_out(interface, 0, 0, 1);
2681 			}
2682 		} else {
2683 #if UTUN_NEXUS
2684 			if (!pcb->utun_use_netif)
2685 #endif // UTUN_NEXUS
2686 			{
2687 				if (!pcb->utun_ext_ifdata_stats) {
2688 					ifnet_stat_increment_out(interface, 1, length, 0);
2689 				}
2690 			}
2691 		}
2692 	} else {
2693 		mbuf_freem(data);
2694 	}
2695 
2696 	return 0;
2697 }
2698 
2699 static errno_t
utun_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)2700 utun_demux(__unused ifnet_t interface,
2701     mbuf_t data,
2702     __unused char *frame_header,
2703     protocol_family_t *protocol)
2704 {
2705 #if UTUN_NEXUS
2706 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2707 	struct ip *ip;
2708 	u_int ip_version;
2709 #endif
2710 
2711 	while (data != NULL && mbuf_len(data) < 1) {
2712 		data = mbuf_next(data);
2713 	}
2714 
2715 	if (data == NULL) {
2716 		return ENOENT;
2717 	}
2718 
2719 #if UTUN_NEXUS
2720 	if (pcb->utun_use_netif) {
2721 		ip = mtod(data, struct ip *);
2722 		ip_version = ip->ip_v;
2723 
2724 		switch (ip_version) {
2725 		case 4:
2726 			*protocol = PF_INET;
2727 			return 0;
2728 		case 6:
2729 			*protocol = PF_INET6;
2730 			return 0;
2731 		default:
2732 			*protocol = 0;
2733 			break;
2734 		}
2735 	} else
2736 #endif // UTUN_NEXUS
2737 	{
2738 		*protocol = *mtod(data, uint32_t *);
2739 	}
2740 
2741 	return 0;
2742 }
2743 
2744 static errno_t
utun_framer(ifnet_t interface,mbuf_t * packet,__unused const struct sockaddr * dest,__unused const char * desk_linkaddr,const char * frame_type,u_int32_t * prepend_len,u_int32_t * postpend_len)2745 utun_framer(ifnet_t interface,
2746     mbuf_t *packet,
2747     __unused const struct sockaddr *dest,
2748     __unused const char *desk_linkaddr,
2749     const char *frame_type,
2750     u_int32_t *prepend_len,
2751     u_int32_t *postpend_len)
2752 {
2753 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2754 	VERIFY(interface == pcb->utun_ifp);
2755 
2756 	u_int32_t header_length = UTUN_HEADER_SIZE(pcb);
2757 	if (mbuf_prepend(packet, header_length, MBUF_DONTWAIT) != 0) {
2758 		os_log_error(OS_LOG_DEFAULT, "utun_framer - ifnet_output prepend failed\n");
2759 
2760 		ifnet_stat_increment_out(interface, 0, 0, 1);
2761 
2762 		// just	return, because the buffer was freed in mbuf_prepend
2763 		return EJUSTRETURN;
2764 	}
2765 	if (prepend_len != NULL) {
2766 		*prepend_len = header_length;
2767 	}
2768 	if (postpend_len != NULL) {
2769 		*postpend_len = 0;
2770 	}
2771 
2772 	// place protocol number at the beginning of the mbuf
2773 	*mtod(*packet, protocol_family_t *) = *(protocol_family_t *)(uintptr_t)(size_t)frame_type;
2774 
2775 #if NECP
2776 	// Add process uuid if applicable
2777 	if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
2778 		if (m_pktlen(*packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2779 			u_int8_t *header = mtod(*packet, uint8_t*);
2780 			int uuid_err = necp_get_app_uuid_from_packet(*packet, (void *)(header + sizeof(u_int32_t)));
2781 			if (uuid_err != 0) {
2782 				os_log_error(OS_LOG_DEFAULT, "Received app uuid error %d for %s%d\n", uuid_err, ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp));
2783 			}
2784 		} else {
2785 			os_log_error(OS_LOG_DEFAULT, "Cannot set proc uuid for %s%d, size %d < %zu\n", ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp),
2786 			    m_pktlen(*packet), UTUN_HEADER_SIZE(pcb));
2787 		}
2788 	}
2789 #endif // NECP
2790 
2791 	return 0;
2792 }
2793 
2794 static errno_t
utun_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)2795 utun_add_proto(__unused ifnet_t interface,
2796     protocol_family_t protocol,
2797     __unused const struct ifnet_demux_desc *demux_array,
2798     __unused u_int32_t demux_count)
2799 {
2800 	switch (protocol) {
2801 	case PF_INET:
2802 		return 0;
2803 	case PF_INET6:
2804 		return 0;
2805 	default:
2806 		break;
2807 	}
2808 
2809 	return ENOPROTOOPT;
2810 }
2811 
2812 static errno_t
utun_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)2813 utun_del_proto(__unused ifnet_t interface,
2814     __unused protocol_family_t protocol)
2815 {
2816 	return 0;
2817 }
2818 
2819 static errno_t
utun_ioctl(ifnet_t interface,u_long command,void * data)2820 utun_ioctl(ifnet_t interface,
2821     u_long command,
2822     void *data)
2823 {
2824 #if UTUN_NEXUS
2825 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2826 #endif
2827 	errno_t result = 0;
2828 
2829 	switch (command) {
2830 	case SIOCSIFMTU: {
2831 #if UTUN_NEXUS
2832 		if (pcb->utun_use_netif) {
2833 			// Make sure we can fit packets in the channel buffers
2834 			// Allow for the headroom in the slot
2835 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) + UTUN_IF_HEADROOM_SIZE > pcb->utun_slot_size) {
2836 				result = EINVAL;
2837 			} else {
2838 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
2839 			}
2840 		} else
2841 #endif // UTUN_NEXUS
2842 		{
2843 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
2844 		}
2845 		break;
2846 	}
2847 
2848 	case SIOCSIFFLAGS:
2849 		/* ifioctl() takes care of it */
2850 		break;
2851 
2852 	default:
2853 		result = EOPNOTSUPP;
2854 	}
2855 
2856 	return result;
2857 }
2858 
2859 static void
utun_detached(ifnet_t interface)2860 utun_detached(ifnet_t interface)
2861 {
2862 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2863 	(void)ifnet_release(interface);
2864 	lck_mtx_lock(&utun_lock);
2865 	utun_free_pcb(pcb, true);
2866 	(void)ifnet_dispose(interface);
2867 	lck_mtx_unlock(&utun_lock);
2868 }
2869 
2870 /* Protocol Handlers */
2871 
2872 static errno_t
utun_proto_input(__unused ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)2873 utun_proto_input(__unused ifnet_t interface,
2874     protocol_family_t protocol,
2875     mbuf_t m,
2876     __unused char *frame_header)
2877 {
2878 	struct utun_pcb *__single pcb = ifnet_softc(interface);
2879 #if UTUN_NEXUS
2880 	if (!pcb->utun_use_netif)
2881 #endif // UTUN_NEXUS
2882 	{
2883 		mbuf_adj(m, UTUN_HEADER_SIZE(pcb));
2884 	}
2885 	int32_t pktlen = m->m_pkthdr.len;
2886 	if (proto_input(protocol, m) != 0) {
2887 		m_freem(m);
2888 #if UTUN_NEXUS
2889 		if (!pcb->utun_use_netif)
2890 #endif // UTUN_NEXUS
2891 		{
2892 			ifnet_stat_increment_in(interface, 0, 0, 1);
2893 		}
2894 	} else {
2895 #if UTUN_NEXUS
2896 		if (!pcb->utun_use_netif)
2897 #endif // UTUN_NEXUS
2898 		{
2899 			ifnet_stat_increment_in(interface, 1, pktlen, 0);
2900 		}
2901 	}
2902 
2903 	return 0;
2904 }
2905 
2906 static errno_t
utun_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,char * frame_type,__unused char * link_layer_dest)2907 utun_proto_pre_output(__unused ifnet_t interface,
2908     protocol_family_t protocol,
2909     __unused mbuf_t *packet,
2910     __unused const struct sockaddr *dest,
2911     __unused void *route,
2912     char *frame_type,
2913     __unused char *link_layer_dest)
2914 {
2915 	*(protocol_family_t *)(void *)frame_type = protocol;
2916 	return 0;
2917 }
2918 
2919 static errno_t
utun_attach_proto(ifnet_t interface,protocol_family_t protocol)2920 utun_attach_proto(ifnet_t interface,
2921     protocol_family_t protocol)
2922 {
2923 	struct ifnet_attach_proto_param proto;
2924 
2925 	bzero(&proto, sizeof(proto));
2926 	proto.input = utun_proto_input;
2927 	proto.pre_output = utun_proto_pre_output;
2928 
2929 	errno_t result = ifnet_attach_protocol(interface, protocol, &proto);
2930 	if (result != 0 && result != EEXIST) {
2931 		os_log_error(OS_LOG_DEFAULT, "utun_attach_inet - ifnet_attach_protocol %d failed: %d\n",
2932 		    protocol, result);
2933 	}
2934 
2935 	return result;
2936 }
2937 
2938 static errno_t
utun_pkt_input(struct utun_pcb * pcb,mbuf_t packet)2939 utun_pkt_input(struct utun_pcb *pcb, mbuf_t packet)
2940 {
2941 #if UTUN_NEXUS
2942 	if (pcb->utun_use_netif) {
2943 		if (!utun_data_move_begin(pcb)) {
2944 			os_log_info(OS_LOG_DEFAULT,
2945 			    "%s: data path stopped for %s\n",
2946 			    __func__, if_name(pcb->utun_ifp));
2947 			return ENXIO;
2948 		}
2949 
2950 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2951 
2952 		lck_mtx_lock(&pcb->utun_input_chain_lock);
2953 
2954 		if (pcb->utun_input_chain_count > (u_int32_t)if_utun_max_pending_input) {
2955 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
2956 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2957 			utun_data_move_end(pcb);
2958 			return ENOSPC;
2959 		}
2960 
2961 		if (pcb->utun_input_chain != NULL) {
2962 			pcb->utun_input_chain_last->m_nextpkt = packet;
2963 		} else {
2964 			pcb->utun_input_chain = packet;
2965 		}
2966 		pcb->utun_input_chain_count++;
2967 		while (packet->m_nextpkt) {
2968 			VERIFY(packet != packet->m_nextpkt);
2969 			packet = packet->m_nextpkt;
2970 			pcb->utun_input_chain_count++;
2971 		}
2972 		pcb->utun_input_chain_last = packet;
2973 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
2974 
2975 		kern_channel_ring_t __single rx_ring = pcb->utun_netif_rxring;
2976 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2977 
2978 		if (rx_ring != NULL) {
2979 			kern_channel_notify(rx_ring, 0);
2980 		}
2981 
2982 		utun_data_move_end(pcb);
2983 		return 0;
2984 	} else
2985 #endif // UTUN_NEXUS
2986 	{
2987 		mbuf_pkthdr_setrcvif(packet, pcb->utun_ifp);
2988 
2989 		if (m_pktlen(packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2990 			bpf_tap_in(pcb->utun_ifp, DLT_NULL, packet, 0, 0);
2991 		}
2992 		if (pcb->utun_flags & UTUN_FLAGS_NO_INPUT) {
2993 			/* flush data */
2994 			mbuf_freem(packet);
2995 			return 0;
2996 		}
2997 
2998 		errno_t result = 0;
2999 		if (!pcb->utun_ext_ifdata_stats) {
3000 			struct ifnet_stat_increment_param incs = {};
3001 			incs.packets_in = 1;
3002 			incs.bytes_in = mbuf_pkthdr_len(packet);
3003 			result = ifnet_input(pcb->utun_ifp, packet, &incs);
3004 		} else {
3005 			result = ifnet_input(pcb->utun_ifp, packet, NULL);
3006 		}
3007 		if (result != 0) {
3008 			ifnet_stat_increment_in(pcb->utun_ifp, 0, 0, 1);
3009 
3010 			os_log_error(OS_LOG_DEFAULT, "%s - ifnet_input failed: %d\n", __FUNCTION__, result);
3011 		}
3012 
3013 		return 0;
3014 	}
3015 }
3016 
3017 #if UTUN_NEXUS
3018 
3019 static errno_t
utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)3020 utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
3021 {
3022 	return 0;
3023 }
3024 
3025 static void
utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)3026 utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
3027 {
3028 	// Ignore
3029 }
3030 
3031 static errno_t
utun_register_nexus(void)3032 utun_register_nexus(void)
3033 {
3034 	const struct kern_nexus_domain_provider_init dp_init = {
3035 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
3036 		.nxdpi_flags = 0,
3037 		.nxdpi_init = utun_nxdp_init,
3038 		.nxdpi_fini = utun_nxdp_fini
3039 	};
3040 	errno_t err = 0;
3041 	nexus_domain_provider_name_t domain_provider_name = "com.apple.utun";
3042 
3043 	/* utun_nxdp_init() is called before this function returns */
3044 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
3045 	    domain_provider_name,
3046 	    &dp_init, sizeof(dp_init),
3047 	    &utun_nx_dom_prov);
3048 	if (err != 0) {
3049 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
3050 		return err;
3051 	}
3052 	return 0;
3053 }
3054 boolean_t
utun_interface_needs_netagent(ifnet_t interface)3055 utun_interface_needs_netagent(ifnet_t interface)
3056 {
3057 	struct utun_pcb *__single pcb = NULL;
3058 
3059 	if (interface == NULL) {
3060 		return FALSE;
3061 	}
3062 
3063 	pcb = ifnet_softc(interface);
3064 
3065 	if (pcb == NULL) {
3066 		return FALSE;
3067 	}
3068 
3069 	return pcb->utun_needs_netagent == true;
3070 }
3071 
3072 static errno_t
utun_ifnet_set_attrs(ifnet_t ifp)3073 utun_ifnet_set_attrs(ifnet_t ifp)
3074 {
3075 	/* Set flags and additional information. */
3076 	ifnet_set_mtu(ifp, 1500);
3077 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
3078 
3079 	/* The interface must generate its own IPv6 LinkLocal address,
3080 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
3081 	 */
3082 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
3083 
3084 	return 0;
3085 }
3086 
3087 static errno_t
utun_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)3088 utun_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
3089 {
3090 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3091 	pcb->utun_netif_nexus = nexus;
3092 	return utun_ifnet_set_attrs(ifp);
3093 }
3094 
3095 static errno_t
utun_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)3096 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
3097     proc_t p, kern_nexus_t nexus,
3098     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
3099 {
3100 #pragma unused(nxprov, p)
3101 #pragma unused(nexus, nexus_port, channel, ch_ctx)
3102 	return 0;
3103 }
3104 
3105 static errno_t
utun_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3106 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3107     kern_channel_t channel)
3108 {
3109 #pragma unused(nxprov, channel)
3110 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3111 	boolean_t ok = ifnet_is_attached(pcb->utun_ifp, 1);
3112 	if (pcb->utun_netif_nexus == nexus) {
3113 		pcb->utun_netif_connected = true;
3114 	}
3115 	if (ok) {
3116 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3117 		UTUN_SET_DATA_PATH_READY(pcb);
3118 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3119 	}
3120 	return ok ? 0 : ENXIO;
3121 }
3122 
3123 static void
utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3124 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3125     kern_channel_t channel)
3126 {
3127 #pragma unused(nxprov, channel)
3128 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3129 	/* Wait until all threads in the data paths are done. */
3130 	utun_wait_data_move_drain(pcb);
3131 }
3132 
3133 static void
utun_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3134 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3135     kern_channel_t channel)
3136 {
3137 #pragma unused(nxprov, channel)
3138 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3139 	/* Wait until all threads in the data paths are done. */
3140 	utun_wait_data_move_drain(pcb);
3141 }
3142 
3143 static void
utun_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3144 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3145     kern_channel_t channel)
3146 {
3147 #pragma unused(nxprov, channel)
3148 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3149 	if (pcb->utun_netif_nexus == nexus) {
3150 		pcb->utun_netif_connected = false;
3151 		if (pcb->utun_attach_fsw) {
3152 			// disconnected by flowswitch that was attached by us
3153 			pcb->utun_netif_nexus = NULL;
3154 		}
3155 	}
3156 	ifnet_decr_iorefcnt(pcb->utun_ifp);
3157 }
3158 
3159 static errno_t
utun_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)3160 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3161     kern_channel_t channel, kern_channel_ring_t ring,
3162     boolean_t is_tx_ring, void **ring_ctx)
3163 {
3164 #pragma unused(nxprov)
3165 #pragma unused(channel)
3166 #pragma unused(ring_ctx)
3167 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3168 	if (!is_tx_ring) {
3169 		VERIFY(pcb->utun_kpipe_rxring == NULL);
3170 		pcb->utun_kpipe_rxring = ring;
3171 	} else {
3172 		VERIFY(pcb->utun_kpipe_txring == NULL);
3173 		pcb->utun_kpipe_txring = ring;
3174 	}
3175 	return 0;
3176 }
3177 
3178 static void
utun_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)3179 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3180     kern_channel_ring_t ring)
3181 {
3182 #pragma unused(nxprov)
3183 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3184 	if (pcb->utun_kpipe_rxring == ring) {
3185 		pcb->utun_kpipe_rxring = NULL;
3186 	} else if (pcb->utun_kpipe_txring == ring) {
3187 		pcb->utun_kpipe_txring = NULL;
3188 	}
3189 }
3190 
3191 static errno_t
utun_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)3192 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3193     kern_channel_ring_t tx_ring, uint32_t flags)
3194 {
3195 #pragma unused(nxprov)
3196 #pragma unused(flags)
3197 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3198 
3199 	if (!utun_data_move_begin(pcb)) {
3200 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3201 		    __func__, if_name(pcb->utun_ifp));
3202 		return 0;
3203 	}
3204 
3205 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3206 	int channel_enabled = pcb->utun_kpipe_enabled;
3207 	if (!channel_enabled) {
3208 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3209 		utun_data_move_end(pcb);
3210 		return 0;
3211 	}
3212 
3213 	if (pcb->utun_use_netif) {
3214 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3215 		if (tx_slot == NULL) {
3216 			// Nothing to write, bail
3217 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3218 			utun_data_move_end(pcb);
3219 			return 0;
3220 		}
3221 
3222 		// Signal the netif ring to read
3223 		kern_channel_ring_t __single rx_ring = pcb->utun_netif_rxring;
3224 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3225 		if (rx_ring != NULL) {
3226 			kern_channel_notify(rx_ring, 0);
3227 		}
3228 	} else {
3229 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3230 
3231 		struct ifnet_stat_increment_param incs = {};
3232 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
3233 		MBUFQ_HEAD(mbufq) mbq;
3234 		MBUFQ_INIT(&mbq);
3235 		kern_channel_slot_t tx_pslot = NULL;
3236 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3237 		while (tx_slot != NULL) {
3238 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3239 
3240 			// Advance TX ring
3241 			tx_pslot = tx_slot;
3242 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3243 
3244 			if (tx_ph == 0) {
3245 				continue;
3246 			}
3247 
3248 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3249 			VERIFY(tx_buf != NULL);
3250 			uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
3251 			    kern_buflet_get_data_address(tx_buf),
3252 			    kern_buflet_get_data_limit(tx_buf));
3253 			VERIFY(tx_baddr != 0);
3254 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3255 
3256 			size_t length = MIN(kern_packet_get_data_length(tx_ph),
3257 			    pcb->utun_slot_size);
3258 
3259 			mbuf_ref_t data = NULL;
3260 			if (length >= UTUN_HEADER_SIZE(pcb) &&
3261 			    !(pcb->utun_flags & UTUN_FLAGS_NO_INPUT)) {
3262 				errno_t error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_HEADER, &data);
3263 				VERIFY(0 == error);
3264 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_WAITOK);
3265 				VERIFY(0 == error);
3266 				/*
3267 				 * The userland ABI requires the first four bytes have
3268 				 * the protocol family in network byte order: swap them
3269 				 */
3270 				*mtod(data, uint32_t*) = ntohl(*mtod(data, uint32_t *));
3271 				mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
3272 				bpf_tap_in(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3273 				incs.packets_in++;
3274 				incs.bytes_in += length;
3275 				MBUFQ_ENQUEUE(&mbq, data);
3276 			}
3277 		}
3278 		if (tx_pslot) {
3279 			kern_channel_advance_slot(tx_ring, tx_pslot);
3280 			tx_ring_stats.kcrsi_slots_transferred = incs.packets_in;
3281 			tx_ring_stats.kcrsi_bytes_transferred = incs.bytes_in;
3282 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3283 			(void) kern_channel_reclaim(tx_ring);
3284 		}
3285 		if (!MBUFQ_EMPTY(&mbq)) {
3286 			(void) ifnet_input_extended(pcb->utun_ifp, MBUFQ_FIRST(&mbq),
3287 			    MBUFQ_LAST(&mbq), &incs);
3288 			MBUFQ_INIT(&mbq);
3289 		}
3290 	}
3291 
3292 	utun_data_move_end(pcb);
3293 	return 0;
3294 }
3295 
3296 static errno_t
utun_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)3297 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3298     kern_channel_ring_t rx_ring, uint32_t flags)
3299 {
3300 #pragma unused(nxprov)
3301 #pragma unused(flags)
3302 	struct utun_pcb *__single pcb = kern_nexus_get_context(nexus);
3303 	struct kern_channel_ring_stat_increment rx_ring_stats = {};
3304 
3305 	if (!utun_data_move_begin(pcb)) {
3306 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3307 		    __func__, if_name(pcb->utun_ifp));
3308 		return 0;
3309 	}
3310 
3311 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3312 
3313 	int channel_enabled = pcb->utun_kpipe_enabled;
3314 	if (!channel_enabled) {
3315 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3316 		utun_data_move_end(pcb);
3317 		return 0;
3318 	}
3319 
3320 	/* reclaim user-released slots */
3321 	(void) kern_channel_reclaim(rx_ring);
3322 
3323 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
3324 	if (avail == 0) {
3325 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3326 		utun_data_move_end(pcb);
3327 		return 0;
3328 	}
3329 
3330 	if (pcb->utun_use_netif) {
3331 		kern_channel_ring_t __single tx_ring = pcb->utun_netif_txring;
3332 		if (tx_ring == NULL ||
3333 		    pcb->utun_netif_nexus == NULL) {
3334 			// Net-If TX ring not set up yet, nothing to read
3335 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3336 			utun_data_move_end(pcb);
3337 			return 0;
3338 		}
3339 
3340 		struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->utun_netif_nexus)->nif_stats;
3341 
3342 		// Unlock utun before entering ring
3343 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3344 
3345 		(void)kr_enter(tx_ring, TRUE);
3346 
3347 		// Lock again after entering and validate
3348 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
3349 		if (tx_ring != pcb->utun_netif_txring) {
3350 			// Ring no longer valid
3351 			// Unlock first, then exit ring
3352 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3353 			kr_exit(tx_ring);
3354 			utun_data_move_end(pcb);
3355 			return 0;
3356 		}
3357 
3358 		struct kern_channel_ring_stat_increment tx_ring_stats;
3359 		bzero(&tx_ring_stats, sizeof(tx_ring_stats));
3360 		kern_channel_slot_t tx_pslot = NULL;
3361 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3362 		if (tx_slot == NULL) {
3363 			// Nothing to read, don't bother signalling
3364 			// Unlock first, then exit ring
3365 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3366 			kr_exit(tx_ring);
3367 			utun_data_move_end(pcb);
3368 			return 0;
3369 		}
3370 
3371 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3372 		VERIFY(rx_pp != NULL);
3373 		struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
3374 		VERIFY(tx_pp != NULL);
3375 		kern_channel_slot_t rx_pslot = NULL;
3376 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3377 		kern_packet_t tx_chain_ph = 0;
3378 
3379 		while (rx_slot != NULL && tx_slot != NULL) {
3380 			size_t length;
3381 			kern_buflet_t rx_buf;
3382 			uint8_t *rx_baddr;
3383 
3384 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3385 
3386 			/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
3387 			if (tx_ph == 0) {
3388 				// Advance TX ring
3389 				tx_pslot = tx_slot;
3390 				tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3391 				continue;
3392 			}
3393 			(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
3394 			if (tx_chain_ph != 0) {
3395 				kern_packet_append(tx_ph, tx_chain_ph);
3396 			}
3397 			tx_chain_ph = tx_ph;
3398 
3399 			// Advance TX ring
3400 			tx_pslot = tx_slot;
3401 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3402 
3403 			// Allocate rx packet
3404 			kern_packet_t rx_ph = 0;
3405 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3406 			if (__improbable(error != 0)) {
3407 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3408 				    pcb->utun_ifp->if_xname);
3409 				break;
3410 			}
3411 
3412 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3413 			VERIFY(tx_buf != NULL);
3414 			uint8_t *tx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
3415 			    kern_buflet_get_data_address(tx_buf),
3416 			    kern_buflet_get_data_limit(tx_buf));
3417 			VERIFY(tx_baddr != NULL);
3418 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3419 
3420 			bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
3421 
3422 			length = MIN(kern_packet_get_data_length(tx_ph) + UTUN_HEADER_SIZE(pcb),
3423 			    pcb->utun_slot_size);
3424 
3425 			tx_ring_stats.kcrsi_slots_transferred++;
3426 			tx_ring_stats.kcrsi_bytes_transferred += length;
3427 
3428 			if (length < UTUN_HEADER_SIZE(pcb) ||
3429 			    length > pcb->utun_slot_size ||
3430 			    length > PP_BUF_SIZE_DEF(rx_pp) ||
3431 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3432 				/* flush data */
3433 				kern_pbufpool_free(rx_pp, rx_ph);
3434 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: invalid length %zu header_size %zu\n",
3435 				    pcb->utun_ifp->if_xname, length, UTUN_HEADER_SIZE(pcb));
3436 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
3437 				STATS_INC(nifs, NETIF_STATS_DROP);
3438 				continue;
3439 			}
3440 
3441 			/* fillout packet */
3442 			rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3443 			VERIFY(rx_buf != NULL);
3444 			rx_baddr = __unsafe_forge_bidi_indexable(uint8_t *,
3445 			    kern_buflet_get_data_address(rx_buf),
3446 			    kern_buflet_get_data_limit(rx_buf));
3447 			VERIFY(rx_baddr != NULL);
3448 
3449 			// Find family
3450 			uint32_t af = 0;
3451 			uint8_t vhl = *(uint8_t *)(tx_baddr);
3452 			u_int ip_version = (vhl >> 4);
3453 			switch (ip_version) {
3454 			case 4: {
3455 				af = AF_INET;
3456 				break;
3457 			}
3458 			case 6: {
3459 				af = AF_INET6;
3460 				break;
3461 			}
3462 			default: {
3463 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: unknown ip version %u vhl %u header_size %zu\n",
3464 				    pcb->utun_ifp->if_xname, ip_version, vhl, UTUN_HEADER_SIZE(pcb));
3465 				break;
3466 			}
3467 			}
3468 
3469 			// Copy header
3470 			af = htonl(af);
3471 			memcpy(rx_baddr, &af, sizeof(af));
3472 			if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
3473 				uuid_t uuid;
3474 				kern_packet_get_euuid(tx_ph, uuid);
3475 				memcpy(rx_baddr + sizeof(af), uuid, sizeof(uuid));
3476 			}
3477 
3478 			// Copy data from tx to rx
3479 			memcpy(rx_baddr + UTUN_HEADER_SIZE(pcb), tx_baddr, length - UTUN_HEADER_SIZE(pcb));
3480 			kern_packet_clear_flow_uuid(rx_ph); // zero flow id
3481 
3482 			/* finalize and attach the packet */
3483 			error = kern_buflet_set_data_offset(rx_buf, 0);
3484 			VERIFY(error == 0);
3485 			error = kern_buflet_set_data_length(rx_buf, length);
3486 			VERIFY(error == 0);
3487 			error = kern_packet_finalize(rx_ph);
3488 			VERIFY(error == 0);
3489 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3490 			VERIFY(error == 0);
3491 
3492 			STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
3493 			STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
3494 
3495 			rx_ring_stats.kcrsi_slots_transferred++;
3496 			rx_ring_stats.kcrsi_bytes_transferred += length;
3497 
3498 			rx_pslot = rx_slot;
3499 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3500 		}
3501 
3502 		if (rx_pslot) {
3503 			kern_channel_advance_slot(rx_ring, rx_pslot);
3504 			kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
3505 		}
3506 
3507 		if (tx_chain_ph != 0) {
3508 			kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
3509 		}
3510 
3511 		if (tx_pslot) {
3512 			kern_channel_advance_slot(tx_ring, tx_pslot);
3513 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3514 			(void)kern_channel_reclaim(tx_ring);
3515 		}
3516 
3517 		/* just like utun_ctl_rcvd(), always reenable output */
3518 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
3519 		if (error != 0) {
3520 			os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
3521 		}
3522 
3523 		// Unlock first, then exit ring
3524 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3525 
3526 		if (tx_pslot != NULL) {
3527 			kern_channel_notify(tx_ring, 0);
3528 		}
3529 		kr_exit(tx_ring);
3530 	} else {
3531 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3532 
3533 		uint32_t mb_cnt = 0;
3534 		uint32_t mb_len = 0;
3535 		mbuf_ref_t mb_head = NULL;
3536 		mbuf_ref_t mb_tail = NULL;
3537 
3538 		if (ifnet_dequeue_multi(pcb->utun_ifp, avail, &mb_head,
3539 		    &mb_tail, &mb_cnt, &mb_len) != 0) {
3540 			utun_data_move_end(pcb);
3541 			return 0;
3542 		}
3543 		VERIFY(mb_cnt <= avail);
3544 
3545 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3546 		VERIFY(rx_pp != NULL);
3547 		kern_channel_slot_t rx_pslot = NULL;
3548 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3549 		while (rx_slot) {
3550 			size_t length = 0;
3551 			mbuf_t data = NULL;
3552 			if ((data = mb_head) == NULL) {
3553 				VERIFY(mb_cnt == 0);
3554 				break;
3555 			}
3556 			mb_head = mbuf_nextpkt(mb_head);
3557 			mbuf_setnextpkt(data, NULL);
3558 			VERIFY(mb_cnt != 0);
3559 			--mb_cnt;
3560 			length = mbuf_pkthdr_len(data);
3561 			if (length < UTUN_HEADER_SIZE(pcb) ||
3562 			    length > pcb->utun_slot_size ||
3563 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3564 				/* flush data */
3565 				mbuf_freem(data);
3566 				continue;
3567 			}
3568 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3569 
3570 			// Allocate rx packet
3571 			kern_packet_t rx_ph = 0;
3572 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3573 			if (__improbable(error != 0)) {
3574 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3575 				    pcb->utun_ifp->if_xname);
3576 				break;
3577 			}
3578 
3579 			/*
3580 			 * The ABI requires the protocol in network byte order
3581 			 */
3582 			*mtod(data, uint32_t*) = htonl(*mtod(data, uint32_t *));
3583 
3584 			// Fillout rx packet
3585 			kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3586 			VERIFY(rx_buf != NULL);
3587 			void *__single rx_baddr = kern_buflet_get_data_address(rx_buf);
3588 			VERIFY(rx_baddr != NULL);
3589 
3590 			// Copy-in data from mbuf to buflet
3591 			mbuf_copydata(data, 0, length, (void *)rx_baddr);
3592 			kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
3593 
3594 			// Finalize and attach the packet
3595 			error = kern_buflet_set_data_offset(rx_buf, 0);
3596 			VERIFY(error == 0);
3597 			error = kern_buflet_set_data_length(rx_buf, length);
3598 			VERIFY(error == 0);
3599 			error = kern_packet_finalize(rx_ph);
3600 			VERIFY(error == 0);
3601 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3602 			VERIFY(error == 0);
3603 
3604 			rx_ring_stats.kcrsi_slots_transferred++;
3605 			rx_ring_stats.kcrsi_bytes_transferred += length;
3606 
3607 			if (!pcb->utun_ext_ifdata_stats) {
3608 				ifnet_stat_increment_out(pcb->utun_ifp, 1, length, 0);
3609 			}
3610 
3611 			mbuf_freem(data);
3612 
3613 			rx_pslot = rx_slot;
3614 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3615 		}
3616 		if (rx_pslot) {
3617 			kern_channel_advance_slot(rx_ring, rx_pslot);
3618 			kern_channel_increment_ring_stats(rx_ring, &rx_ring_stats);
3619 		}
3620 		if (mb_head != NULL) {
3621 			VERIFY(mb_cnt != 0);
3622 			mbuf_freem_list(mb_head);
3623 		}
3624 	}
3625 
3626 	utun_data_move_end(pcb);
3627 	return 0;
3628 }
3629 
3630 #endif // UTUN_NEXUS
3631 
3632 
3633 /*
3634  * These are place holders until coreTLS kext stops calling them
3635  */
3636 errno_t utun_ctl_register_dtls(void *reg);
3637 int utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family);
3638 void utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb);
3639 
3640 errno_t
utun_ctl_register_dtls(void * reg)3641 utun_ctl_register_dtls(void *reg)
3642 {
3643 #pragma unused(reg)
3644 	return 0;
3645 }
3646 
3647 int
utun_pkt_dtls_input(struct utun_pcb * pcb,mbuf_t * pkt,protocol_family_t family)3648 utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family)
3649 {
3650 #pragma unused(pcb)
3651 #pragma unused(pkt)
3652 #pragma unused(family)
3653 	return 0;
3654 }
3655 
3656 void
utun_ctl_disable_crypto_dtls(struct utun_pcb * pcb)3657 utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb)
3658 {
3659 #pragma unused(pcb)
3660 }
3661 
3662 #if UTUN_NEXUS
3663 static boolean_t
utun_data_move_begin(struct utun_pcb * pcb)3664 utun_data_move_begin(struct utun_pcb *pcb)
3665 {
3666 	bool data_path_ready = false;
3667 
3668 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3669 	if ((data_path_ready = UTUN_IS_DATA_PATH_READY(pcb))) {
3670 		pcb->utun_pcb_data_move++;
3671 	}
3672 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3673 
3674 	return data_path_ready;
3675 }
3676 
3677 static void
utun_data_move_end(struct utun_pcb * pcb)3678 utun_data_move_end(struct utun_pcb *pcb)
3679 {
3680 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3681 	VERIFY(pcb->utun_pcb_data_move > 0);
3682 	/*
3683 	 * if there's no more thread moving data, wakeup any
3684 	 * drainers that are blocked waiting for this.
3685 	 */
3686 	if (--pcb->utun_pcb_data_move == 0 && pcb->utun_pcb_drainers > 0) {
3687 		wakeup(&(pcb->utun_pcb_data_move));
3688 	}
3689 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3690 }
3691 
3692 static void
utun_data_move_drain(struct utun_pcb * pcb)3693 utun_data_move_drain(struct utun_pcb *pcb)
3694 {
3695 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3696 	/* data path must already be marked as not ready */
3697 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3698 	pcb->utun_pcb_drainers++;
3699 	while (pcb->utun_pcb_data_move != 0) {
3700 		(void) msleep(&(pcb->utun_pcb_data_move),
3701 		    &pcb->utun_pcb_data_move_lock, (PZERO - 1), __func__, NULL);
3702 	}
3703 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3704 	VERIFY(pcb->utun_pcb_drainers > 0);
3705 	pcb->utun_pcb_drainers--;
3706 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3707 }
3708 
3709 static void
utun_wait_data_move_drain(struct utun_pcb * pcb)3710 utun_wait_data_move_drain(struct utun_pcb *pcb)
3711 {
3712 	/*
3713 	 * Mark the data path as not usable.
3714 	 */
3715 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3716 	UTUN_CLR_DATA_PATH_READY(pcb);
3717 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3718 
3719 	/* Wait until all threads in the data path are done. */
3720 	utun_data_move_drain(pcb);
3721 }
3722 #endif // UTUN_NEXUS
3723