xref: /xnu-10063.121.3/bsd/net/if_utun.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2008-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 
31 /* ----------------------------------------------------------------------------------
32  *   Application of kernel control for interface creation
33  *
34  *   Theory of operation:
35  *   utun (user tunnel) acts as glue between kernel control sockets and network interfaces.
36  *   This kernel control will register an interface for every client that connects.
37  *   ---------------------------------------------------------------------------------- */
38 
39 #include <sys/systm.h>
40 #include <sys/kern_control.h>
41 #include <net/kpi_protocol.h>
42 #include <net/kpi_interface.h>
43 #include <sys/socket.h>
44 #include <net/if.h>
45 #include <net/if_types.h>
46 #include <net/bpf.h>
47 #include <net/if_utun.h>
48 #include <sys/mbuf.h>
49 #include <sys/sockio.h>
50 #include <netinet/in.h>
51 #include <netinet/ip.h>
52 #include <netinet6/in6_var.h>
53 #include <netinet6/in6_var.h>
54 #include <sys/kauth.h>
55 #include <net/necp.h>
56 #include <kern/zalloc.h>
57 
58 #include <net/sockaddr_utils.h>
59 
60 #include <os/log.h>
61 
62 #if SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
63 #include <skywalk/os_skywalk_private.h>
64 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
65 #include <skywalk/nexus/netif/nx_netif.h>
66 #define UTUN_NEXUS 1
67 #else // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
68 #define UTUN_NEXUS 0
69 #endif // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
70 
71 #if UTUN_NEXUS
72 static nexus_controller_t utun_ncd;
73 static int utun_ncd_refcount;
74 static uuid_t utun_kpipe_uuid;
75 static uuid_t utun_nx_dom_prov;
76 
77 typedef struct utun_nx {
78 	uuid_t if_provider;
79 	uuid_t if_instance;
80 	uuid_t fsw_provider;
81 	uuid_t fsw_instance;
82 	uuid_t fsw_device;
83 	uuid_t fsw_agent;
84 } *utun_nx_t;
85 
86 #endif // UTUN_NEXUS
87 
88 /* Control block allocated for each kernel control connection */
89 struct utun_pcb {
90 	TAILQ_ENTRY(utun_pcb)   utun_chain;
91 	kern_ctl_ref    utun_ctlref;
92 	ifnet_t                 utun_ifp;
93 	u_int32_t               utun_unit;
94 	u_int32_t               utun_unique_id;
95 	u_int32_t               utun_flags;
96 	int                     utun_ext_ifdata_stats;
97 	u_int32_t               utun_max_pending_packets;
98 	char                    utun_if_xname[IFXNAMSIZ];
99 	char                    utun_unique_name[IFXNAMSIZ];
100 	// PCB lock protects state fields and rings
101 	decl_lck_rw_data(, utun_pcb_lock);
102 	struct mbuf *   utun_input_chain;
103 	struct mbuf *   utun_input_chain_last;
104 	u_int32_t               utun_input_chain_count;
105 	// Input chain lock protects the list of input mbufs
106 	// The input chain lock must be taken AFTER the PCB lock if both are held
107 	lck_mtx_t               utun_input_chain_lock;
108 
109 #if UTUN_NEXUS
110 	// lock to protect utun_pcb_data_move & utun_pcb_drainers
111 	decl_lck_mtx_data(, utun_pcb_data_move_lock);
112 	u_int32_t               utun_pcb_data_move; /* number of data moving contexts */
113 	u_int32_t               utun_pcb_drainers; /* number of threads waiting to drain */
114 	u_int32_t               utun_pcb_data_path_state; /* internal state of interface data path */
115 
116 	struct utun_nx  utun_nx;
117 	int                     utun_kpipe_enabled;
118 	uuid_t                  utun_kpipe_uuid;
119 	void *                  utun_kpipe_rxring;
120 	void *                  utun_kpipe_txring;
121 	kern_pbufpool_t         utun_kpipe_pp;
122 	u_int32_t               utun_kpipe_tx_ring_size;
123 	u_int32_t               utun_kpipe_rx_ring_size;
124 
125 	kern_nexus_t    utun_netif_nexus;
126 	kern_pbufpool_t         utun_netif_pp;
127 	void *                  utun_netif_rxring;
128 	void *                  utun_netif_txring;
129 	uint64_t                utun_netif_txring_size;
130 
131 	u_int32_t               utun_slot_size;
132 	u_int32_t               utun_netif_ring_size;
133 	u_int32_t               utun_tx_fsw_ring_size;
134 	u_int32_t               utun_rx_fsw_ring_size;
135 	// Auto attach flowswitch when netif is enabled. When set to false,
136 	// it allows userspace nexus controller to attach and own flowswitch.
137 	bool                    utun_attach_fsw;
138 	bool                    utun_netif_connected;
139 	bool                    utun_use_netif;
140 	bool                    utun_needs_netagent;
141 #endif // UTUN_NEXUS
142 };
143 
144 /* Kernel Control functions */
145 static errno_t  utun_ctl_setup(u_int32_t *unit, void **unitinfo);
146 static errno_t  utun_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
147     void **unitinfo);
148 static errno_t  utun_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
149     void **unitinfo);
150 static errno_t  utun_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
151     void *unitinfo);
152 static errno_t  utun_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
153     void *unitinfo, mbuf_t m, int flags);
154 static errno_t  utun_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
155     int opt, void *data, size_t *len);
156 static errno_t  utun_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
157     int opt, void *data, size_t len);
158 static void             utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
159     int flags);
160 
161 /* Network Interface functions */
162 static void     utun_start(ifnet_t interface);
163 static errno_t  utun_framer(ifnet_t interface, mbuf_t *packet,
164     const struct sockaddr *dest, const char *desk_linkaddr,
165     const char *frame_type, u_int32_t *prepend_len, u_int32_t *postpend_len);
166 static errno_t  utun_output(ifnet_t interface, mbuf_t data);
167 static errno_t  utun_demux(ifnet_t interface, mbuf_t data, char *frame_header,
168     protocol_family_t *protocol);
169 static errno_t  utun_add_proto(ifnet_t interface, protocol_family_t protocol,
170     const struct ifnet_demux_desc *demux_array,
171     u_int32_t demux_count);
172 static errno_t  utun_del_proto(ifnet_t interface, protocol_family_t protocol);
173 static errno_t  utun_ioctl(ifnet_t interface, u_long cmd, void *data);
174 static void             utun_detached(ifnet_t interface);
175 
176 /* Protocol handlers */
177 static errno_t  utun_attach_proto(ifnet_t interface, protocol_family_t proto);
178 static errno_t  utun_proto_input(ifnet_t interface, protocol_family_t protocol,
179     mbuf_t m, char *frame_header);
180 static errno_t utun_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
181     mbuf_t *packet, const struct sockaddr *dest, void *route,
182     char *frame_type, char *link_layer_dest);
183 static errno_t utun_pkt_input(struct utun_pcb *pcb, mbuf_t m);
184 
185 /* data movement refcounting functions */
186 #if UTUN_NEXUS
187 static boolean_t utun_data_move_begin(struct utun_pcb *pcb);
188 static void utun_data_move_end(struct utun_pcb *pcb);
189 static void utun_wait_data_move_drain(struct utun_pcb *pcb);
190 
191 /* Data path states */
192 #define UTUN_PCB_DATA_PATH_READY    0x1
193 
194 /* Macros to set/clear/test data path states */
195 #define UTUN_SET_DATA_PATH_READY(_pcb) \
196     ((_pcb)->utun_pcb_data_path_state |= UTUN_PCB_DATA_PATH_READY)
197 #define UTUN_CLR_DATA_PATH_READY(_pcb) \
198     ((_pcb)->utun_pcb_data_path_state &= ~UTUN_PCB_DATA_PATH_READY)
199 #define UTUN_IS_DATA_PATH_READY(_pcb) \
200     (((_pcb)->utun_pcb_data_path_state & UTUN_PCB_DATA_PATH_READY) != 0)
201 
202 #define UTUN_IF_DEFAULT_SLOT_SIZE 2048
203 #define UTUN_IF_DEFAULT_RING_SIZE 64
204 #define UTUN_IF_DEFAULT_TX_FSW_RING_SIZE 64
205 #define UTUN_IF_DEFAULT_RX_FSW_RING_SIZE 128
206 #define UTUN_IF_DEFAULT_BUF_SEG_SIZE    skmem_usr_buf_seg_size
207 #define UTUN_IF_HEADROOM_SIZE 32
208 
209 #define UTUN_IF_MIN_RING_SIZE 8
210 #define UTUN_IF_MAX_RING_SIZE 1024
211 
212 #define UTUN_IF_MIN_SLOT_SIZE 1024
213 #define UTUN_IF_MAX_SLOT_SIZE 4096
214 
215 #define UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT 512
216 
217 static int if_utun_max_pending_input = UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT;
218 
219 static int sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS;
220 static int sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
221 static int sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
222 
223 static int if_utun_ring_size = UTUN_IF_DEFAULT_RING_SIZE;
224 static int if_utun_tx_fsw_ring_size = UTUN_IF_DEFAULT_TX_FSW_RING_SIZE;
225 static int if_utun_rx_fsw_ring_size = UTUN_IF_DEFAULT_RX_FSW_RING_SIZE;
226 
227 SYSCTL_DECL(_net_utun);
228 SYSCTL_NODE(_net, OID_AUTO, utun, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "UTun");
229 
230 SYSCTL_INT(_net_utun, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_utun_max_pending_input, 0, "");
231 SYSCTL_PROC(_net_utun, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
232     &if_utun_ring_size, UTUN_IF_DEFAULT_RING_SIZE, &sysctl_if_utun_ring_size, "I", "");
233 SYSCTL_PROC(_net_utun, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
234     &if_utun_tx_fsw_ring_size, UTUN_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_utun_tx_fsw_ring_size, "I", "");
235 SYSCTL_PROC(_net_utun, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
236     &if_utun_rx_fsw_ring_size, UTUN_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_utun_rx_fsw_ring_size, "I", "");
237 
238 static errno_t
239 utun_register_nexus(void);
240 
241 static errno_t
242 utun_netif_prepare(__unused kern_nexus_t nexus, ifnet_t ifp);
243 static errno_t
244 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
245     proc_t p, kern_nexus_t nexus,
246     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx);
247 static errno_t
248 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
249     kern_channel_t channel);
250 static void
251 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
252     kern_channel_t channel);
253 static void
254 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
255     kern_channel_t channel);
256 static void
257 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
258     kern_channel_t channel);
259 static errno_t
260 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
261     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
262     void **ring_ctx);
263 static void
264 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
265     kern_channel_ring_t ring);
266 static errno_t
267 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
268     kern_channel_ring_t ring, uint32_t flags);
269 static errno_t
270 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
271     kern_channel_ring_t ring, uint32_t flags);
272 #endif // UTUN_NEXUS
273 
274 #define UTUN_DEFAULT_MTU 1500
275 #define UTUN_HEADER_SIZE(_pcb) (sizeof(u_int32_t) + (((_pcb)->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) ? sizeof(uuid_t) : 0))
276 
277 static kern_ctl_ref     utun_kctlref;
278 static LCK_ATTR_DECLARE(utun_lck_attr, 0, 0);
279 static LCK_GRP_DECLARE(utun_lck_grp, "utun");
280 static LCK_MTX_DECLARE_ATTR(utun_lock, &utun_lck_grp, &utun_lck_attr);
281 
282 TAILQ_HEAD(utun_list, utun_pcb) utun_head;
283 
284 static KALLOC_TYPE_DEFINE(utun_pcb_zone, struct utun_pcb, NET_KT_DEFAULT);
285 
286 #if UTUN_NEXUS
287 
288 static int
289 sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS
290 {
291 #pragma unused(arg1, arg2)
292 	int value = if_utun_ring_size;
293 
294 	int error = sysctl_handle_int(oidp, &value, 0, req);
295 	if (error || !req->newptr) {
296 		return error;
297 	}
298 
299 	if (value < UTUN_IF_MIN_RING_SIZE ||
300 	    value > UTUN_IF_MAX_RING_SIZE) {
301 		return EINVAL;
302 	}
303 
304 	if_utun_ring_size = value;
305 
306 	return 0;
307 }
308 
309 static int
310 sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
311 {
312 #pragma unused(arg1, arg2)
313 	int value = if_utun_tx_fsw_ring_size;
314 
315 	int error = sysctl_handle_int(oidp, &value, 0, req);
316 	if (error || !req->newptr) {
317 		return error;
318 	}
319 
320 	if (value < UTUN_IF_MIN_RING_SIZE ||
321 	    value > UTUN_IF_MAX_RING_SIZE) {
322 		return EINVAL;
323 	}
324 
325 	if_utun_tx_fsw_ring_size = value;
326 
327 	return 0;
328 }
329 
330 static int
331 sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
332 {
333 #pragma unused(arg1, arg2)
334 	int value = if_utun_rx_fsw_ring_size;
335 
336 	int error = sysctl_handle_int(oidp, &value, 0, req);
337 	if (error || !req->newptr) {
338 		return error;
339 	}
340 
341 	if (value < UTUN_IF_MIN_RING_SIZE ||
342 	    value > UTUN_IF_MAX_RING_SIZE) {
343 		return EINVAL;
344 	}
345 
346 	if_utun_rx_fsw_ring_size = value;
347 
348 	return 0;
349 }
350 
351 static errno_t
utun_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)352 utun_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
353     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
354     void **ring_ctx)
355 {
356 #pragma unused(nxprov)
357 #pragma unused(channel)
358 #pragma unused(ring_ctx)
359 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
360 	if (!is_tx_ring) {
361 		VERIFY(pcb->utun_netif_rxring == NULL);
362 		pcb->utun_netif_rxring = ring;
363 	} else {
364 		VERIFY(pcb->utun_netif_txring == NULL);
365 		pcb->utun_netif_txring = ring;
366 	}
367 	return 0;
368 }
369 
370 static void
utun_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)371 utun_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
372     kern_channel_ring_t ring)
373 {
374 #pragma unused(nxprov)
375 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
376 	if (pcb->utun_netif_rxring == ring) {
377 		pcb->utun_netif_rxring = NULL;
378 	} else if (pcb->utun_netif_txring == ring) {
379 		pcb->utun_netif_txring = NULL;
380 	}
381 }
382 
383 static errno_t
utun_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)384 utun_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
385     kern_channel_ring_t tx_ring, uint32_t flags)
386 {
387 #pragma unused(nxprov)
388 #pragma unused(flags)
389 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
390 
391 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
392 
393 	if (!utun_data_move_begin(pcb)) {
394 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
395 		    __func__, if_name(pcb->utun_ifp));
396 		return 0;
397 	}
398 
399 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
400 
401 	struct kern_channel_ring_stat_increment tx_ring_stats;
402 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
403 	kern_channel_slot_t tx_pslot = NULL;
404 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
405 	kern_packet_t tx_chain_ph = 0;
406 
407 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
408 
409 	if (tx_slot == NULL) {
410 		// Nothing to write, don't bother signalling
411 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
412 		utun_data_move_end(pcb);
413 		return 0;
414 	}
415 
416 	if (pcb->utun_kpipe_enabled) {
417 		kern_channel_ring_t rx_ring = pcb->utun_kpipe_rxring;
418 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
419 
420 		// Signal the kernel pipe ring to read
421 		if (rx_ring != NULL) {
422 			kern_channel_notify(rx_ring, 0);
423 		}
424 		utun_data_move_end(pcb);
425 		return 0;
426 	}
427 
428 	// If we're here, we're injecting into the utun kernel control socket
429 	while (tx_slot != NULL) {
430 		size_t length = 0;
431 		mbuf_t data = NULL;
432 
433 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
434 
435 		if (tx_ph == 0) {
436 			// Advance TX ring
437 			tx_pslot = tx_slot;
438 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
439 			continue;
440 		}
441 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
442 		if (tx_chain_ph != 0) {
443 			kern_packet_append(tx_ph, tx_chain_ph);
444 		}
445 		tx_chain_ph = tx_ph;
446 
447 		// Advance TX ring
448 		tx_pslot = tx_slot;
449 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
450 
451 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
452 		VERIFY(tx_buf != NULL);
453 
454 		/* tx_baddr is the absolute buffer address */
455 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
456 		VERIFY(tx_baddr != 0);
457 
458 		bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
459 
460 		uint32_t tx_offset = kern_buflet_get_data_offset(tx_buf);
461 		uint32_t tx_length = kern_buflet_get_data_length(tx_buf);
462 
463 		// The offset must be large enough for the headers
464 		VERIFY(tx_offset >= UTUN_HEADER_SIZE(pcb));
465 
466 		// Find family
467 		uint32_t af = 0;
468 		uint8_t vhl = *(uint8_t *)(tx_baddr + tx_offset);
469 		u_int ip_version = (vhl >> 4);
470 		switch (ip_version) {
471 		case 4: {
472 			af = AF_INET;
473 			break;
474 		}
475 		case 6: {
476 			af = AF_INET6;
477 			break;
478 		}
479 		default: {
480 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s: unknown ip version %u vhl %u tx_offset %u len %u header_size %zu\n",
481 			    pcb->utun_ifp->if_xname, ip_version, vhl, tx_offset, tx_length,
482 			    UTUN_HEADER_SIZE(pcb));
483 			break;
484 		}
485 		}
486 
487 		tx_offset -= UTUN_HEADER_SIZE(pcb);
488 		tx_length += UTUN_HEADER_SIZE(pcb);
489 		tx_baddr += tx_offset;
490 
491 		length = MIN(tx_length, pcb->utun_slot_size);
492 
493 		// Copy in family
494 		memcpy(tx_baddr, &af, sizeof(af));
495 		if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
496 			kern_packet_get_euuid(tx_ph, (void *)(tx_baddr + sizeof(af)));
497 		}
498 
499 		if (length > 0) {
500 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
501 			if (error == 0) {
502 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
503 				if (error == 0) {
504 					error = utun_output(pcb->utun_ifp, data);
505 					if (error != 0) {
506 						os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - utun_output error %d\n", pcb->utun_ifp->if_xname, error);
507 					}
508 				} else {
509 					os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->utun_ifp->if_xname, length, error);
510 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
511 					STATS_INC(nifs, NETIF_STATS_DROP);
512 					mbuf_freem(data);
513 					data = NULL;
514 				}
515 			} else {
516 				os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->utun_ifp->if_xname, error);
517 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
518 				STATS_INC(nifs, NETIF_STATS_DROP);
519 			}
520 		} else {
521 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - 0 length packet\n", pcb->utun_ifp->if_xname);
522 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
523 			STATS_INC(nifs, NETIF_STATS_DROP);
524 		}
525 
526 		if (data == NULL) {
527 			continue;
528 		}
529 
530 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
531 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
532 
533 		tx_ring_stats.kcrsi_slots_transferred++;
534 		tx_ring_stats.kcrsi_bytes_transferred += length;
535 	}
536 	if (tx_chain_ph != 0) {
537 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
538 	}
539 	if (tx_pslot) {
540 		kern_channel_advance_slot(tx_ring, tx_pslot);
541 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
542 		(void)kern_channel_reclaim(tx_ring);
543 	}
544 
545 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
546 	utun_data_move_end(pcb);
547 	return 0;
548 }
549 
550 static errno_t
utun_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)551 utun_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
552     kern_channel_ring_t ring, __unused uint32_t flags)
553 {
554 #pragma unused(nxprov)
555 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
556 	boolean_t more = false;
557 	errno_t rc = 0;
558 
559 	if (!utun_data_move_begin(pcb)) {
560 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
561 		    __func__, if_name(pcb->utun_ifp));
562 		return 0;
563 	}
564 
565 	/*
566 	 * Refill and sync the ring; we may be racing against another thread doing
567 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
568 	 * variant here.
569 	 */
570 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
571 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
572 		os_log_error(OS_LOG_DEFAULT, "%s, tx refill failed %d\n", __func__, rc);
573 	}
574 
575 	(void) kr_enter(ring, TRUE);
576 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
577 
578 	if (pcb->utun_kpipe_enabled) {
579 		uint32_t tx_available = kern_channel_available_slot_count(ring);
580 		if (pcb->utun_netif_txring_size > 0 &&
581 		    tx_available >= pcb->utun_netif_txring_size - 1) {
582 			// No room left in tx ring, disable output for now
583 			errno_t error = ifnet_disable_output(pcb->utun_ifp);
584 			if (error != 0) {
585 				os_log_error(OS_LOG_DEFAULT, "utun_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
586 			}
587 		}
588 	}
589 
590 	if (pcb->utun_kpipe_enabled) {
591 		kern_channel_ring_t rx_ring = pcb->utun_kpipe_rxring;
592 
593 		// Unlock while calling notify
594 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
595 		// Signal the kernel pipe ring to read
596 		if (rx_ring != NULL) {
597 			kern_channel_notify(rx_ring, 0);
598 		}
599 	} else {
600 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
601 	}
602 
603 	kr_exit(ring);
604 	utun_data_move_end(pcb);
605 	return 0;
606 }
607 
608 static errno_t
utun_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)609 utun_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
610     kern_channel_ring_t rx_ring, uint32_t flags)
611 {
612 #pragma unused(nxprov)
613 #pragma unused(flags)
614 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
615 	struct kern_channel_ring_stat_increment rx_ring_stats;
616 
617 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
618 
619 	if (!utun_data_move_begin(pcb)) {
620 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
621 		    __func__, if_name(pcb->utun_ifp));
622 		return 0;
623 	}
624 
625 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
626 
627 	// Reclaim user-released slots
628 	(void) kern_channel_reclaim(rx_ring);
629 
630 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
631 
632 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
633 	if (avail == 0) {
634 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
635 		utun_data_move_end(pcb);
636 		return 0;
637 	}
638 
639 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
640 	VERIFY(rx_pp != NULL);
641 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
642 	kern_channel_slot_t rx_pslot = NULL;
643 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
644 
645 	while (rx_slot != NULL) {
646 		// Check for a waiting packet
647 		lck_mtx_lock(&pcb->utun_input_chain_lock);
648 		mbuf_t data = pcb->utun_input_chain;
649 		if (data == NULL) {
650 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
651 			break;
652 		}
653 
654 		// Allocate rx packet
655 		kern_packet_t rx_ph = 0;
656 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
657 		if (__improbable(error != 0)) {
658 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
659 			STATS_INC(nifs, NETIF_STATS_DROP);
660 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
661 			break;
662 		}
663 
664 		// Advance waiting packets
665 		if (pcb->utun_input_chain_count > 0) {
666 			pcb->utun_input_chain_count--;
667 		}
668 		pcb->utun_input_chain = data->m_nextpkt;
669 		data->m_nextpkt = NULL;
670 		if (pcb->utun_input_chain == NULL) {
671 			pcb->utun_input_chain_last = NULL;
672 		}
673 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
674 
675 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
676 		size_t length = mbuf_pkthdr_len(data);
677 
678 		if (length < header_offset) {
679 			// mbuf is too small
680 			mbuf_freem(data);
681 			kern_pbufpool_free(rx_pp, rx_ph);
682 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
683 			STATS_INC(nifs, NETIF_STATS_DROP);
684 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length too short for header %zu < %zu\n",
685 			    pcb->utun_ifp->if_xname, length, header_offset);
686 			continue;
687 		}
688 
689 		length -= header_offset;
690 		if (length > PP_BUF_SIZE_DEF(rx_pp)) {
691 			// Flush data
692 			mbuf_freem(data);
693 			kern_pbufpool_free(rx_pp, rx_ph);
694 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
695 			STATS_INC(nifs, NETIF_STATS_DROP);
696 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length %zu > %u\n",
697 			    pcb->utun_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
698 			continue;
699 		}
700 
701 		mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
702 
703 		// Fillout rx packet
704 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
705 		VERIFY(rx_buf != NULL);
706 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
707 		VERIFY(rx_baddr != NULL);
708 
709 		// Copy-in data from mbuf to buflet
710 		mbuf_copydata(data, header_offset, length, (void *)rx_baddr);
711 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
712 
713 		// Finalize and attach the packet
714 		error = kern_buflet_set_data_offset(rx_buf, 0);
715 		VERIFY(error == 0);
716 		error = kern_buflet_set_data_length(rx_buf, length);
717 		VERIFY(error == 0);
718 		error = kern_packet_set_headroom(rx_ph, 0);
719 		VERIFY(error == 0);
720 		error = kern_packet_finalize(rx_ph);
721 		VERIFY(error == 0);
722 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
723 		VERIFY(error == 0);
724 
725 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
726 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
727 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
728 
729 		rx_ring_stats.kcrsi_slots_transferred++;
730 		rx_ring_stats.kcrsi_bytes_transferred += length;
731 
732 		mbuf_freem(data);
733 
734 		// Advance ring
735 		rx_pslot = rx_slot;
736 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
737 	}
738 
739 	struct kern_channel_ring_stat_increment tx_ring_stats;
740 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
741 	kern_channel_ring_t tx_ring = pcb->utun_kpipe_txring;
742 	kern_channel_slot_t tx_pslot = NULL;
743 	kern_channel_slot_t tx_slot = NULL;
744 	if (tx_ring == NULL) {
745 		// Net-If TX ring not set up yet, nothing to read
746 		goto done;
747 	}
748 	// Unlock utun before entering ring
749 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
750 
751 	(void)kr_enter(tx_ring, TRUE);
752 
753 	// Lock again after entering and validate
754 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
755 	if (tx_ring != pcb->utun_kpipe_txring) {
756 		goto done;
757 	}
758 
759 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
760 	if (tx_slot == NULL) {
761 		// Nothing to read, don't bother signalling
762 		goto done;
763 	}
764 
765 	while (rx_slot != NULL && tx_slot != NULL) {
766 		// Allocate rx packet
767 		kern_packet_t rx_ph = 0;
768 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
769 
770 		// Advance TX ring
771 		tx_pslot = tx_slot;
772 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
773 
774 		/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
775 		if (tx_ph == 0) {
776 			continue;
777 		}
778 
779 		/* XXX We could try this alloc before advancing the slot to avoid
780 		 * dropping the packet on failure to allocate.
781 		 */
782 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
783 		if (__improbable(error != 0)) {
784 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
785 			STATS_INC(nifs, NETIF_STATS_DROP);
786 			break;
787 		}
788 
789 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
790 		VERIFY(tx_buf != NULL);
791 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
792 		VERIFY(tx_baddr != 0);
793 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
794 
795 		// Check packet length
796 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
797 		uint32_t tx_length = kern_packet_get_data_length(tx_ph);
798 		if (tx_length < header_offset) {
799 			// Packet is too small
800 			kern_pbufpool_free(rx_pp, rx_ph);
801 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
802 			STATS_INC(nifs, NETIF_STATS_DROP);
803 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: packet length too short for header %u < %zu\n",
804 			    pcb->utun_ifp->if_xname, tx_length, header_offset);
805 			continue;
806 		}
807 
808 		size_t length = MIN(tx_length - header_offset,
809 		    pcb->utun_slot_size);
810 
811 		tx_ring_stats.kcrsi_slots_transferred++;
812 		tx_ring_stats.kcrsi_bytes_transferred += length;
813 
814 		// Fillout rx packet
815 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
816 		VERIFY(rx_buf != NULL);
817 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
818 		VERIFY(rx_baddr != NULL);
819 
820 		// Copy-in data from tx to rx
821 		memcpy((void *)rx_baddr, (void *)(tx_baddr + header_offset), length);
822 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
823 
824 		// Finalize and attach the packet
825 		error = kern_buflet_set_data_offset(rx_buf, 0);
826 		VERIFY(error == 0);
827 		error = kern_buflet_set_data_length(rx_buf, length);
828 		VERIFY(error == 0);
829 		error = kern_packet_set_headroom(rx_ph, 0);
830 		VERIFY(error == 0);
831 		error = kern_packet_finalize(rx_ph);
832 		VERIFY(error == 0);
833 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
834 		VERIFY(error == 0);
835 
836 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
837 		STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
838 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
839 
840 		rx_ring_stats.kcrsi_slots_transferred++;
841 		rx_ring_stats.kcrsi_bytes_transferred += length;
842 
843 		rx_pslot = rx_slot;
844 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
845 	}
846 
847 done:
848 	if (rx_pslot) {
849 		kern_channel_advance_slot(rx_ring, rx_pslot);
850 		kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
851 	}
852 
853 	if (tx_pslot) {
854 		kern_channel_advance_slot(tx_ring, tx_pslot);
855 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
856 		(void)kern_channel_reclaim(tx_ring);
857 	}
858 
859 	// Unlock first, then exit ring
860 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
861 	if (tx_ring != NULL) {
862 		if (tx_pslot != NULL) {
863 			kern_channel_notify(tx_ring, 0);
864 		}
865 		kr_exit(tx_ring);
866 	}
867 
868 	utun_data_move_end(pcb);
869 	return 0;
870 }
871 
872 static errno_t
utun_nexus_ifattach(struct utun_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)873 utun_nexus_ifattach(struct utun_pcb *pcb,
874     struct ifnet_init_eparams *init_params,
875     struct ifnet **ifp)
876 {
877 	errno_t err;
878 	nexus_controller_t controller = kern_nexus_shared_controller();
879 	struct kern_nexus_net_init net_init;
880 	struct kern_pbufpool_init pp_init;
881 
882 	nexus_name_t provider_name;
883 	snprintf((char *)provider_name, sizeof(provider_name),
884 	    "com.apple.netif.%s", pcb->utun_if_xname);
885 
886 	struct kern_nexus_provider_init prov_init = {
887 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
888 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
889 		.nxpi_pre_connect = utun_nexus_pre_connect,
890 		.nxpi_connected = utun_nexus_connected,
891 		.nxpi_pre_disconnect = utun_netif_pre_disconnect,
892 		.nxpi_disconnected = utun_nexus_disconnected,
893 		.nxpi_ring_init = utun_netif_ring_init,
894 		.nxpi_ring_fini = utun_netif_ring_fini,
895 		.nxpi_slot_init = NULL,
896 		.nxpi_slot_fini = NULL,
897 		.nxpi_sync_tx = utun_netif_sync_tx,
898 		.nxpi_sync_rx = utun_netif_sync_rx,
899 		.nxpi_tx_doorbell = utun_netif_tx_doorbell,
900 	};
901 
902 	nexus_attr_t nxa = NULL;
903 	err = kern_nexus_attr_create(&nxa);
904 	if (err != 0) {
905 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
906 		    __func__, err);
907 		goto failed;
908 	}
909 
910 	uint64_t slot_buffer_size = pcb->utun_slot_size;
911 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
912 	VERIFY(err == 0);
913 
914 	// Reset ring size for netif nexus to limit memory usage
915 	uint64_t ring_size = pcb->utun_netif_ring_size;
916 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
917 	VERIFY(err == 0);
918 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
919 	VERIFY(err == 0);
920 
921 	pcb->utun_netif_txring_size = ring_size;
922 
923 	bzero(&pp_init, sizeof(pp_init));
924 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
925 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
926 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
927 	pp_init.kbi_bufsize = pcb->utun_slot_size;
928 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
929 	pp_init.kbi_max_frags = 1;
930 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
931 	    "%s", provider_name);
932 	pp_init.kbi_ctx = NULL;
933 	pp_init.kbi_ctx_retain = NULL;
934 	pp_init.kbi_ctx_release = NULL;
935 
936 	err = kern_pbufpool_create(&pp_init, &pcb->utun_netif_pp, NULL);
937 	if (err != 0) {
938 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
939 		goto failed;
940 	}
941 
942 	err = kern_nexus_controller_register_provider(controller,
943 	    utun_nx_dom_prov,
944 	    provider_name,
945 	    &prov_init,
946 	    sizeof(prov_init),
947 	    nxa,
948 	    &pcb->utun_nx.if_provider);
949 	if (err != 0) {
950 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
951 		    __func__, err);
952 		goto failed;
953 	}
954 
955 	bzero(&net_init, sizeof(net_init));
956 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
957 	net_init.nxneti_flags = 0;
958 	net_init.nxneti_eparams = init_params;
959 	net_init.nxneti_lladdr = NULL;
960 	net_init.nxneti_prepare = utun_netif_prepare;
961 	net_init.nxneti_rx_pbufpool = pcb->utun_netif_pp;
962 	net_init.nxneti_tx_pbufpool = pcb->utun_netif_pp;
963 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
964 	    pcb->utun_nx.if_provider,
965 	    pcb,
966 	    NULL,
967 	    &pcb->utun_nx.if_instance,
968 	    &net_init,
969 	    ifp);
970 	if (err != 0) {
971 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
972 		    __func__, err);
973 		kern_nexus_controller_deregister_provider(controller,
974 		    pcb->utun_nx.if_provider);
975 		uuid_clear(pcb->utun_nx.if_provider);
976 		goto failed;
977 	}
978 
979 failed:
980 	if (nxa) {
981 		kern_nexus_attr_destroy(nxa);
982 	}
983 	if (err && pcb->utun_netif_pp != NULL) {
984 		kern_pbufpool_destroy(pcb->utun_netif_pp);
985 		pcb->utun_netif_pp = NULL;
986 	}
987 	return err;
988 }
989 
990 static void
utun_detach_provider_and_instance(uuid_t provider,uuid_t instance)991 utun_detach_provider_and_instance(uuid_t provider, uuid_t instance)
992 {
993 	nexus_controller_t controller = kern_nexus_shared_controller();
994 	errno_t err;
995 
996 	if (!uuid_is_null(instance)) {
997 		err = kern_nexus_controller_free_provider_instance(controller,
998 		    instance);
999 		if (err != 0) {
1000 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
1001 			    __func__, err);
1002 		}
1003 		uuid_clear(instance);
1004 	}
1005 	if (!uuid_is_null(provider)) {
1006 		err = kern_nexus_controller_deregister_provider(controller,
1007 		    provider);
1008 		if (err != 0) {
1009 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
1010 		}
1011 		uuid_clear(provider);
1012 	}
1013 	return;
1014 }
1015 
1016 static void
utun_nexus_detach(struct utun_pcb * pcb)1017 utun_nexus_detach(struct utun_pcb *pcb)
1018 {
1019 	utun_nx_t nx = &pcb->utun_nx;
1020 	nexus_controller_t controller = kern_nexus_shared_controller();
1021 	errno_t err;
1022 
1023 	if (!uuid_is_null(nx->fsw_device)) {
1024 		err = kern_nexus_ifdetach(controller,
1025 		    nx->fsw_instance,
1026 		    nx->fsw_device);
1027 		if (err != 0) {
1028 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
1029 			    __func__, err);
1030 		}
1031 	}
1032 
1033 	utun_detach_provider_and_instance(nx->fsw_provider,
1034 	    nx->fsw_instance);
1035 	utun_detach_provider_and_instance(nx->if_provider,
1036 	    nx->if_instance);
1037 
1038 	if (pcb->utun_netif_pp != NULL) {
1039 		kern_pbufpool_destroy(pcb->utun_netif_pp);
1040 		pcb->utun_netif_pp = NULL;
1041 	}
1042 	memset(nx, 0, sizeof(*nx));
1043 }
1044 
1045 static errno_t
utun_create_fs_provider_and_instance(struct utun_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)1046 utun_create_fs_provider_and_instance(struct utun_pcb *pcb,
1047     const char *type_name,
1048     const char *ifname,
1049     uuid_t *provider, uuid_t *instance)
1050 {
1051 	nexus_attr_t attr = NULL;
1052 	nexus_controller_t controller = kern_nexus_shared_controller();
1053 	uuid_t dom_prov;
1054 	errno_t err;
1055 	struct kern_nexus_init init;
1056 	nexus_name_t    provider_name;
1057 
1058 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1059 	    &dom_prov);
1060 	if (err != 0) {
1061 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
1062 		    __func__, type_name, err);
1063 		goto failed;
1064 	}
1065 
1066 	err = kern_nexus_attr_create(&attr);
1067 	if (err != 0) {
1068 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1069 		    __func__, err);
1070 		goto failed;
1071 	}
1072 
1073 	uint64_t slot_buffer_size = pcb->utun_slot_size;
1074 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1075 	VERIFY(err == 0);
1076 
1077 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1078 	uint64_t tx_ring_size = pcb->utun_tx_fsw_ring_size;
1079 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1080 	VERIFY(err == 0);
1081 	uint64_t rx_ring_size = pcb->utun_rx_fsw_ring_size;
1082 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1083 	VERIFY(err == 0);
1084 	/*
1085 	 * Configure flowswitch to use super-packet (multi-buflet).
1086 	 * This allows flowswitch to perform intra-stack packet aggregation.
1087 	 */
1088 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
1089 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
1090 	VERIFY(err == 0);
1091 
1092 	snprintf((char *)provider_name, sizeof(provider_name),
1093 	    "com.apple.%s.%s", type_name, ifname);
1094 	err = kern_nexus_controller_register_provider(controller,
1095 	    dom_prov,
1096 	    provider_name,
1097 	    NULL,
1098 	    0,
1099 	    attr,
1100 	    provider);
1101 	kern_nexus_attr_destroy(attr);
1102 	attr = NULL;
1103 	if (err != 0) {
1104 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
1105 		    __func__, type_name, err);
1106 		goto failed;
1107 	}
1108 	bzero(&init, sizeof(init));
1109 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1110 	err = kern_nexus_controller_alloc_provider_instance(controller,
1111 	    *provider,
1112 	    NULL, NULL,
1113 	    instance, &init);
1114 	if (err != 0) {
1115 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
1116 		    __func__, type_name, err);
1117 		kern_nexus_controller_deregister_provider(controller,
1118 		    *provider);
1119 		uuid_clear(*provider);
1120 	}
1121 failed:
1122 	return err;
1123 }
1124 
1125 static errno_t
utun_flowswitch_attach(struct utun_pcb * pcb)1126 utun_flowswitch_attach(struct utun_pcb *pcb)
1127 {
1128 	nexus_controller_t controller = kern_nexus_shared_controller();
1129 	errno_t err = 0;
1130 	utun_nx_t nx = &pcb->utun_nx;
1131 
1132 	// Allocate flowswitch
1133 	err = utun_create_fs_provider_and_instance(pcb,
1134 	    "flowswitch",
1135 	    pcb->utun_ifp->if_xname,
1136 	    &nx->fsw_provider,
1137 	    &nx->fsw_instance);
1138 	if (err != 0) {
1139 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
1140 		    __func__);
1141 		goto failed;
1142 	}
1143 
1144 	// Attach flowswitch to device port
1145 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
1146 	    NULL, nx->if_instance,
1147 	    FALSE, &nx->fsw_device);
1148 	if (err != 0) {
1149 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
1150 		goto failed;
1151 	}
1152 
1153 	// Extract the agent UUID and save for later
1154 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
1155 	if (flowswitch_nx != NULL) {
1156 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
1157 		if (flowswitch != NULL) {
1158 			FSW_RLOCK(flowswitch);
1159 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
1160 			FSW_UNLOCK(flowswitch);
1161 		} else {
1162 			os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - flowswitch is NULL\n");
1163 		}
1164 		nx_release(flowswitch_nx);
1165 	} else {
1166 		os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - unable to find flowswitch nexus\n");
1167 	}
1168 
1169 	return 0;
1170 
1171 failed:
1172 	utun_nexus_detach(pcb);
1173 
1174 	errno_t detach_error = 0;
1175 	if ((detach_error = ifnet_detach(pcb->utun_ifp)) != 0) {
1176 		panic("utun_flowswitch_attach - ifnet_detach failed: %d", detach_error);
1177 		/* NOT REACHED */
1178 	}
1179 
1180 	return err;
1181 }
1182 
1183 static errno_t
utun_register_kernel_pipe_nexus(struct utun_pcb * pcb)1184 utun_register_kernel_pipe_nexus(struct utun_pcb *pcb)
1185 {
1186 	nexus_attr_t nxa = NULL;
1187 	errno_t result;
1188 
1189 	lck_mtx_lock(&utun_lock);
1190 	if (utun_ncd_refcount++) {
1191 		lck_mtx_unlock(&utun_lock);
1192 		return 0;
1193 	}
1194 
1195 	result = kern_nexus_controller_create(&utun_ncd);
1196 	if (result) {
1197 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
1198 		    __FUNCTION__, result);
1199 		goto done;
1200 	}
1201 
1202 	uuid_t dom_prov;
1203 	result = kern_nexus_get_default_domain_provider(
1204 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1205 	if (result) {
1206 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
1207 		    __FUNCTION__, result);
1208 		goto done;
1209 	}
1210 
1211 	struct kern_nexus_provider_init prov_init = {
1212 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1213 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1214 		.nxpi_pre_connect = utun_nexus_pre_connect,
1215 		.nxpi_connected = utun_nexus_connected,
1216 		.nxpi_pre_disconnect = utun_nexus_pre_disconnect,
1217 		.nxpi_disconnected = utun_nexus_disconnected,
1218 		.nxpi_ring_init = utun_kpipe_ring_init,
1219 		.nxpi_ring_fini = utun_kpipe_ring_fini,
1220 		.nxpi_slot_init = NULL,
1221 		.nxpi_slot_fini = NULL,
1222 		.nxpi_sync_tx = utun_kpipe_sync_tx,
1223 		.nxpi_sync_rx = utun_kpipe_sync_rx,
1224 		.nxpi_tx_doorbell = NULL,
1225 	};
1226 
1227 	result = kern_nexus_attr_create(&nxa);
1228 	if (result) {
1229 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1230 		    __FUNCTION__, result);
1231 		goto done;
1232 	}
1233 
1234 	uint64_t slot_buffer_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1235 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1236 	VERIFY(result == 0);
1237 
1238 	// Reset ring size for kernel pipe nexus to limit memory usage
1239 	uint64_t ring_size =
1240 	    pcb->utun_kpipe_tx_ring_size != 0 ? pcb->utun_kpipe_tx_ring_size :
1241 	    if_utun_ring_size;
1242 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1243 	VERIFY(result == 0);
1244 
1245 	ring_size =
1246 	    pcb->utun_kpipe_rx_ring_size != 0 ? pcb->utun_kpipe_rx_ring_size :
1247 	    if_utun_ring_size;
1248 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1249 	VERIFY(result == 0);
1250 
1251 	result = kern_nexus_controller_register_provider(utun_ncd,
1252 	    dom_prov,
1253 	    (const uint8_t *)"com.apple.nexus.utun.kpipe",
1254 	    &prov_init,
1255 	    sizeof(prov_init),
1256 	    nxa,
1257 	    &utun_kpipe_uuid);
1258 	if (result) {
1259 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
1260 		    __FUNCTION__, result);
1261 		goto done;
1262 	}
1263 
1264 done:
1265 	if (nxa) {
1266 		kern_nexus_attr_destroy(nxa);
1267 	}
1268 
1269 	if (result) {
1270 		if (utun_ncd) {
1271 			kern_nexus_controller_destroy(utun_ncd);
1272 			utun_ncd = NULL;
1273 		}
1274 		utun_ncd_refcount = 0;
1275 	}
1276 
1277 	lck_mtx_unlock(&utun_lock);
1278 
1279 	return result;
1280 }
1281 
1282 static void
utun_unregister_kernel_pipe_nexus(void)1283 utun_unregister_kernel_pipe_nexus(void)
1284 {
1285 	lck_mtx_lock(&utun_lock);
1286 
1287 	VERIFY(utun_ncd_refcount > 0);
1288 
1289 	if (--utun_ncd_refcount == 0) {
1290 		kern_nexus_controller_destroy(utun_ncd);
1291 		utun_ncd = NULL;
1292 	}
1293 
1294 	lck_mtx_unlock(&utun_lock);
1295 }
1296 
1297 // For use by socket option, not internally
1298 static errno_t
utun_disable_channel(struct utun_pcb * pcb)1299 utun_disable_channel(struct utun_pcb *pcb)
1300 {
1301 	errno_t result;
1302 	int enabled;
1303 	uuid_t uuid;
1304 
1305 	/* Wait until all threads in the data paths are done. */
1306 	utun_wait_data_move_drain(pcb);
1307 
1308 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1309 
1310 	enabled = pcb->utun_kpipe_enabled;
1311 	uuid_copy(uuid, pcb->utun_kpipe_uuid);
1312 
1313 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid) == !enabled);
1314 
1315 	pcb->utun_kpipe_enabled = 0;
1316 	uuid_clear(pcb->utun_kpipe_uuid);
1317 
1318 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1319 
1320 	if (enabled) {
1321 		result = kern_nexus_controller_free_provider_instance(utun_ncd, uuid);
1322 	} else {
1323 		result = ENXIO;
1324 	}
1325 
1326 	if (!result) {
1327 		if (pcb->utun_kpipe_pp != NULL) {
1328 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1329 			pcb->utun_kpipe_pp = NULL;
1330 		}
1331 		utun_unregister_kernel_pipe_nexus();
1332 	}
1333 
1334 	return result;
1335 }
1336 
1337 static errno_t
utun_enable_channel(struct utun_pcb * pcb,struct proc * proc)1338 utun_enable_channel(struct utun_pcb *pcb, struct proc *proc)
1339 {
1340 	struct kern_nexus_init init;
1341 	struct kern_pbufpool_init pp_init;
1342 	errno_t result;
1343 
1344 	kauth_cred_t cred = kauth_cred_get();
1345 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
1346 	if (result) {
1347 		return result;
1348 	}
1349 
1350 	result = utun_register_kernel_pipe_nexus(pcb);
1351 	if (result) {
1352 		return result;
1353 	}
1354 
1355 	VERIFY(utun_ncd);
1356 
1357 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1358 
1359 	if (pcb->utun_kpipe_enabled) {
1360 		result = EEXIST; // return success instead?
1361 		goto done;
1362 	}
1363 
1364 	/*
1365 	 * Make sure we can fit packets in the channel buffers and
1366 	 * Allow an extra 4 bytes for the protocol number header in the channel
1367 	 */
1368 	if (pcb->utun_ifp->if_mtu + UTUN_HEADER_SIZE(pcb) > pcb->utun_slot_size) {
1369 		result = EOPNOTSUPP;
1370 		goto done;
1371 	}
1372 
1373 	bzero(&pp_init, sizeof(pp_init));
1374 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1375 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1376 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
1377 	pp_init.kbi_bufsize = pcb->utun_slot_size;
1378 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
1379 	pp_init.kbi_max_frags = 1;
1380 	pp_init.kbi_flags |= KBIF_QUANTUM;
1381 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1382 	    "com.apple.kpipe.%s", pcb->utun_if_xname);
1383 	pp_init.kbi_ctx = NULL;
1384 	pp_init.kbi_ctx_retain = NULL;
1385 	pp_init.kbi_ctx_release = NULL;
1386 
1387 	result = kern_pbufpool_create(&pp_init, &pcb->utun_kpipe_pp,
1388 	    NULL);
1389 	if (result != 0) {
1390 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, result);
1391 		goto done;
1392 	}
1393 
1394 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid));
1395 	bzero(&init, sizeof(init));
1396 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1397 	init.nxi_tx_pbufpool = pcb->utun_kpipe_pp;
1398 	result = kern_nexus_controller_alloc_provider_instance(utun_ncd,
1399 	    utun_kpipe_uuid, pcb, NULL, &pcb->utun_kpipe_uuid, &init);
1400 	if (result) {
1401 		goto done;
1402 	}
1403 
1404 	nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
1405 	result = kern_nexus_controller_bind_provider_instance(utun_ncd,
1406 	    pcb->utun_kpipe_uuid, &port,
1407 	    proc_pid(proc), NULL, NULL, 0, NEXUS_BIND_PID);
1408 	if (result) {
1409 		kern_nexus_controller_free_provider_instance(utun_ncd,
1410 		    pcb->utun_kpipe_uuid);
1411 		uuid_clear(pcb->utun_kpipe_uuid);
1412 		goto done;
1413 	}
1414 
1415 	pcb->utun_kpipe_enabled = 1;
1416 
1417 done:
1418 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1419 
1420 	if (result) {
1421 		if (pcb->utun_kpipe_pp != NULL) {
1422 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1423 			pcb->utun_kpipe_pp = NULL;
1424 		}
1425 		utun_unregister_kernel_pipe_nexus();
1426 	}
1427 
1428 	return result;
1429 }
1430 
1431 #endif // UTUN_NEXUS
1432 
1433 errno_t
utun_register_control(void)1434 utun_register_control(void)
1435 {
1436 	struct kern_ctl_reg kern_ctl;
1437 	errno_t result = 0;
1438 
1439 #if UTUN_NEXUS
1440 	utun_register_nexus();
1441 #endif // UTUN_NEXUS
1442 
1443 	TAILQ_INIT(&utun_head);
1444 
1445 	bzero(&kern_ctl, sizeof(kern_ctl));
1446 	strlcpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
1447 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
1448 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP | CTL_FLAG_REG_EXTENDED; /* Require root */
1449 	kern_ctl.ctl_sendsize = 512 * 1024;
1450 	kern_ctl.ctl_recvsize = 512 * 1024;
1451 	kern_ctl.ctl_setup = utun_ctl_setup;
1452 	kern_ctl.ctl_bind = utun_ctl_bind;
1453 	kern_ctl.ctl_connect = utun_ctl_connect;
1454 	kern_ctl.ctl_disconnect = utun_ctl_disconnect;
1455 	kern_ctl.ctl_send = utun_ctl_send;
1456 	kern_ctl.ctl_setopt = utun_ctl_setopt;
1457 	kern_ctl.ctl_getopt = utun_ctl_getopt;
1458 	kern_ctl.ctl_rcvd = utun_ctl_rcvd;
1459 
1460 	result = ctl_register(&kern_ctl, &utun_kctlref);
1461 	if (result != 0) {
1462 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - ctl_register failed: %d\n", result);
1463 		return result;
1464 	}
1465 
1466 	/* Register the protocol plumbers */
1467 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN,
1468 	    utun_attach_proto, NULL)) != 0) {
1469 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN) failed: %d\n",
1470 		    result);
1471 		ctl_deregister(utun_kctlref);
1472 		return result;
1473 	}
1474 
1475 	/* Register the protocol plumbers */
1476 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN,
1477 	    utun_attach_proto, NULL)) != 0) {
1478 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_UTUN);
1479 		ctl_deregister(utun_kctlref);
1480 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN) failed: %d\n",
1481 		    result);
1482 		return result;
1483 	}
1484 
1485 	return 0;
1486 }
1487 
1488 /* Kernel control functions */
1489 
1490 static inline int
utun_find_by_unit(u_int32_t unit)1491 utun_find_by_unit(u_int32_t unit)
1492 {
1493 	struct utun_pcb *next_pcb = NULL;
1494 	int found = 0;
1495 
1496 	TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1497 		if (next_pcb->utun_unit == unit) {
1498 			found = 1;
1499 			break;
1500 		}
1501 	}
1502 
1503 	return found;
1504 }
1505 
1506 static inline void
utun_free_pcb(struct utun_pcb * pcb,bool locked)1507 utun_free_pcb(struct utun_pcb *pcb, bool locked)
1508 {
1509 #if UTUN_NEXUS
1510 	mbuf_freem_list(pcb->utun_input_chain);
1511 	pcb->utun_input_chain_count = 0;
1512 	lck_mtx_destroy(&pcb->utun_input_chain_lock, &utun_lck_grp);
1513 	lck_mtx_destroy(&pcb->utun_pcb_data_move_lock, &utun_lck_grp);
1514 #endif // UTUN_NEXUS
1515 	lck_rw_destroy(&pcb->utun_pcb_lock, &utun_lck_grp);
1516 	if (!locked) {
1517 		lck_mtx_lock(&utun_lock);
1518 	}
1519 	TAILQ_REMOVE(&utun_head, pcb, utun_chain);
1520 	if (!locked) {
1521 		lck_mtx_unlock(&utun_lock);
1522 	}
1523 	zfree(utun_pcb_zone, pcb);
1524 }
1525 
1526 static errno_t
utun_ctl_setup(u_int32_t * unit,void ** unitinfo)1527 utun_ctl_setup(u_int32_t *unit, void **unitinfo)
1528 {
1529 	if (unit == NULL || unitinfo == NULL) {
1530 		return EINVAL;
1531 	}
1532 
1533 	lck_mtx_lock(&utun_lock);
1534 
1535 	/* Find next available unit */
1536 	if (*unit == 0) {
1537 		*unit = 1;
1538 		while (*unit != ctl_maxunit) {
1539 			if (utun_find_by_unit(*unit)) {
1540 				(*unit)++;
1541 			} else {
1542 				break;
1543 			}
1544 		}
1545 		if (*unit == ctl_maxunit) {
1546 			lck_mtx_unlock(&utun_lock);
1547 			return EBUSY;
1548 		}
1549 	} else if (utun_find_by_unit(*unit)) {
1550 		lck_mtx_unlock(&utun_lock);
1551 		return EBUSY;
1552 	}
1553 
1554 	/* Find some open interface id */
1555 	u_int32_t chosen_unique_id = 1;
1556 	struct utun_pcb *next_pcb = TAILQ_LAST(&utun_head, utun_list);
1557 	if (next_pcb != NULL) {
1558 		/* List was not empty, add one to the last item */
1559 		chosen_unique_id = next_pcb->utun_unique_id + 1;
1560 		next_pcb = NULL;
1561 
1562 		/*
1563 		 * If this wrapped the id number, start looking at
1564 		 * the front of the list for an unused id.
1565 		 */
1566 		if (chosen_unique_id == 0) {
1567 			/* Find the next unused ID */
1568 			chosen_unique_id = 1;
1569 			TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1570 				if (next_pcb->utun_unique_id > chosen_unique_id) {
1571 					/* We found a gap */
1572 					break;
1573 				}
1574 
1575 				chosen_unique_id = next_pcb->utun_unique_id + 1;
1576 			}
1577 		}
1578 	}
1579 
1580 	struct utun_pcb *pcb = zalloc_flags(utun_pcb_zone, Z_WAITOK | Z_ZERO);
1581 
1582 	*unitinfo = pcb;
1583 	pcb->utun_unit = *unit;
1584 	pcb->utun_unique_id = chosen_unique_id;
1585 
1586 	if (next_pcb != NULL) {
1587 		TAILQ_INSERT_BEFORE(next_pcb, pcb, utun_chain);
1588 	} else {
1589 		TAILQ_INSERT_TAIL(&utun_head, pcb, utun_chain);
1590 	}
1591 
1592 	lck_mtx_unlock(&utun_lock);
1593 
1594 	return 0;
1595 }
1596 
1597 static errno_t
utun_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1598 utun_ctl_bind(kern_ctl_ref kctlref,
1599     struct sockaddr_ctl *sac,
1600     void **unitinfo)
1601 {
1602 	if (*unitinfo == NULL) {
1603 		u_int32_t unit = 0;
1604 		(void)utun_ctl_setup(&unit, unitinfo);
1605 	}
1606 
1607 	struct utun_pcb *pcb = (struct utun_pcb *)*unitinfo;
1608 	if (pcb == NULL) {
1609 		return EINVAL;
1610 	}
1611 
1612 	if (pcb->utun_ctlref != NULL) {
1613 		// Return if bind was already called
1614 		return EINVAL;
1615 	}
1616 
1617 	pcb->utun_ctlref = kctlref;
1618 	pcb->utun_unit = sac->sc_unit;
1619 	pcb->utun_max_pending_packets = 1;
1620 
1621 #if UTUN_NEXUS
1622 	pcb->utun_use_netif = false;
1623 	pcb->utun_attach_fsw = true;
1624 	pcb->utun_netif_connected = false;
1625 	pcb->utun_slot_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1626 	pcb->utun_netif_ring_size = if_utun_ring_size;
1627 	pcb->utun_tx_fsw_ring_size = if_utun_tx_fsw_ring_size;
1628 	pcb->utun_rx_fsw_ring_size = if_utun_rx_fsw_ring_size;
1629 	pcb->utun_input_chain_count = 0;
1630 	lck_mtx_init(&pcb->utun_input_chain_lock, &utun_lck_grp, &utun_lck_attr);
1631 	lck_mtx_init(&pcb->utun_pcb_data_move_lock,
1632 	    &utun_lck_grp, &utun_lck_attr);
1633 #endif // UTUN_NEXUS
1634 
1635 	lck_rw_init(&pcb->utun_pcb_lock, &utun_lck_grp, &utun_lck_attr);
1636 
1637 	return 0;
1638 }
1639 
1640 static errno_t
utun_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1641 utun_ctl_connect(kern_ctl_ref kctlref,
1642     struct sockaddr_ctl *sac,
1643     void **unitinfo)
1644 {
1645 	struct ifnet_init_eparams utun_init = {};
1646 	errno_t result = 0;
1647 
1648 	if (*unitinfo == NULL) {
1649 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1650 	}
1651 
1652 	struct utun_pcb *pcb = *unitinfo;
1653 	if (pcb == NULL) {
1654 		return EINVAL;
1655 	}
1656 
1657 	/* Handle case where utun_ctl_setup() was called, but ipsec_ctl_bind() was not */
1658 	if (pcb->utun_ctlref == NULL) {
1659 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1660 	}
1661 
1662 	snprintf(pcb->utun_if_xname, sizeof(pcb->utun_if_xname), "utun%d", pcb->utun_unit - 1);
1663 	snprintf(pcb->utun_unique_name, sizeof(pcb->utun_unique_name), "utunid%d", pcb->utun_unique_id - 1);
1664 
1665 	/* Create the interface */
1666 	bzero(&utun_init, sizeof(utun_init));
1667 	utun_init.ver = IFNET_INIT_CURRENT_VERSION;
1668 	utun_init.len = sizeof(utun_init);
1669 
1670 #if UTUN_NEXUS
1671 	if (pcb->utun_use_netif) {
1672 		utun_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
1673 		utun_init.tx_headroom = UTUN_IF_HEADROOM_SIZE;
1674 	} else
1675 #endif // UTUN_NEXUS
1676 	{
1677 		utun_init.flags = IFNET_INIT_NX_NOAUTO;
1678 		utun_init.start = utun_start;
1679 		utun_init.framer_extended = utun_framer;
1680 	}
1681 	utun_init.name = "utun";
1682 	utun_init.unit = pcb->utun_unit - 1;
1683 	utun_init.uniqueid = pcb->utun_unique_name;
1684 	utun_init.uniqueid_len = strlen(pcb->utun_unique_name);
1685 	utun_init.family = IFNET_FAMILY_UTUN;
1686 	utun_init.type = IFT_OTHER;
1687 	utun_init.demux = utun_demux;
1688 	utun_init.add_proto = utun_add_proto;
1689 	utun_init.del_proto = utun_del_proto;
1690 	utun_init.softc = pcb;
1691 	utun_init.ioctl = utun_ioctl;
1692 	utun_init.free = utun_detached;
1693 
1694 #if UTUN_NEXUS
1695 	if (pcb->utun_use_netif) {
1696 		result = utun_nexus_ifattach(pcb, &utun_init, &pcb->utun_ifp);
1697 		if (result != 0) {
1698 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_nexus_ifattach failed: %d\n", result);
1699 			utun_free_pcb(pcb, false);
1700 			*unitinfo = NULL;
1701 			return result;
1702 		}
1703 
1704 		if (pcb->utun_attach_fsw) {
1705 			result = utun_flowswitch_attach(pcb);
1706 			if (result != 0) {
1707 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_flowswitch_attach failed: %d\n", result);
1708 				// Do not call utun_free_pcb(). We will be attached already, and will be freed later
1709 				// in utun_detached().
1710 				*unitinfo = NULL;
1711 				return result;
1712 			}
1713 		}
1714 
1715 		/* Attach to bpf */
1716 		bpfattach(pcb->utun_ifp, DLT_RAW, 0);
1717 	} else
1718 #endif // UTUN_NEXUS
1719 	{
1720 		/*
1721 		 * Upon success, this holds an ifnet reference which we will
1722 		 * release via ifnet_release() at final detach time.
1723 		 */
1724 		result = ifnet_allocate_extended(&utun_init, &pcb->utun_ifp);
1725 		if (result != 0) {
1726 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_allocate failed: %d\n", result);
1727 			utun_free_pcb(pcb, false);
1728 			*unitinfo = NULL;
1729 			return result;
1730 		}
1731 
1732 		/* Set flags and additional information. */
1733 		ifnet_set_mtu(pcb->utun_ifp, UTUN_DEFAULT_MTU);
1734 		ifnet_set_flags(pcb->utun_ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
1735 
1736 		/* The interface must generate its own IPv6 LinkLocal address,
1737 		 * if possible following the recommendation of RFC2472 to the 64bit interface ID
1738 		 */
1739 		ifnet_set_eflags(pcb->utun_ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
1740 
1741 		/* Reset the stats in case as the interface may have been recycled */
1742 		struct ifnet_stats_param stats;
1743 		bzero(&stats, sizeof(struct ifnet_stats_param));
1744 		ifnet_set_stat(pcb->utun_ifp, &stats);
1745 
1746 		/* Attach the interface */
1747 		result = ifnet_attach(pcb->utun_ifp, NULL);
1748 		if (result != 0) {
1749 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_attach failed: %d\n", result);
1750 			/* Release reference now since attach failed */
1751 			ifnet_release(pcb->utun_ifp);
1752 			utun_free_pcb(pcb, false);
1753 			*unitinfo = NULL;
1754 			return result;
1755 		}
1756 
1757 		/* Attach to bpf */
1758 		bpfattach(pcb->utun_ifp, DLT_NULL, UTUN_HEADER_SIZE(pcb));
1759 
1760 #if UTUN_NEXUS
1761 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
1762 		UTUN_SET_DATA_PATH_READY(pcb);
1763 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
1764 #endif // UTUN_NEXUS
1765 	}
1766 
1767 	/* The interfaces resoures allocated, mark it as running */
1768 	ifnet_set_flags(pcb->utun_ifp, IFF_RUNNING, IFF_RUNNING);
1769 
1770 	return result;
1771 }
1772 
1773 static errno_t
utun_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)1774 utun_detach_ip(ifnet_t interface,
1775     protocol_family_t protocol,
1776     socket_t pf_socket)
1777 {
1778 	errno_t result = EPROTONOSUPPORT;
1779 
1780 	/* Attempt a detach */
1781 	if (protocol == PF_INET) {
1782 		struct ifreq    ifr;
1783 
1784 		bzero(&ifr, sizeof(ifr));
1785 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1786 		    ifnet_name(interface), ifnet_unit(interface));
1787 
1788 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
1789 	} else if (protocol == PF_INET6) {
1790 		struct in6_ifreq        ifr6;
1791 
1792 		bzero(&ifr6, sizeof(ifr6));
1793 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1794 		    ifnet_name(interface), ifnet_unit(interface));
1795 
1796 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
1797 	}
1798 
1799 	return result;
1800 }
1801 
1802 static void
utun_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)1803 utun_remove_address(ifnet_t interface,
1804     protocol_family_t protocol,
1805     ifaddr_t address,
1806     socket_t pf_socket)
1807 {
1808 	errno_t result = 0;
1809 
1810 	/* Attempt a detach */
1811 	if (protocol == PF_INET) {
1812 		struct ifreq ifr;
1813 
1814 		bzero(&ifr, sizeof(ifr));
1815 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1816 		    ifnet_name(interface), ifnet_unit(interface));
1817 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
1818 		if (result != 0) {
1819 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed: %d", result);
1820 		} else {
1821 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
1822 			if (result != 0) {
1823 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR failed: %d", result);
1824 			}
1825 		}
1826 	} else if (protocol == PF_INET6) {
1827 		struct in6_ifreq ifr6;
1828 
1829 		bzero(&ifr6, sizeof(ifr6));
1830 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1831 		    ifnet_name(interface), ifnet_unit(interface));
1832 		result = ifaddr_address(address, SA(&ifr6.ifr_addr),
1833 		    sizeof(ifr6.ifr_addr));
1834 		if (result != 0) {
1835 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed (v6): %d",
1836 			    result);
1837 		} else {
1838 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
1839 			if (result != 0) {
1840 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR_IN6 failed: %d",
1841 				    result);
1842 			}
1843 		}
1844 	}
1845 }
1846 
1847 static void
utun_cleanup_family(ifnet_t interface,protocol_family_t protocol)1848 utun_cleanup_family(ifnet_t interface,
1849     protocol_family_t protocol)
1850 {
1851 	errno_t result = 0;
1852 	socket_t pf_socket = NULL;
1853 	ifaddr_t *addresses = NULL;
1854 	int i;
1855 
1856 	if (protocol != PF_INET && protocol != PF_INET6) {
1857 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - invalid protocol family %d\n", protocol);
1858 		return;
1859 	}
1860 
1861 	/* Create a socket for removing addresses and detaching the protocol */
1862 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
1863 	if (result != 0) {
1864 		if (result != EAFNOSUPPORT) {
1865 			os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - failed to create %s socket: %d\n",
1866 			    protocol == PF_INET ? "IP" : "IPv6", result);
1867 		}
1868 		goto cleanup;
1869 	}
1870 
1871 	/* always set SS_PRIV, we want to close and detach regardless */
1872 	sock_setpriv(pf_socket, 1);
1873 
1874 	result = utun_detach_ip(interface, protocol, pf_socket);
1875 	if (result == 0 || result == ENXIO) {
1876 		/* We are done! We either detached or weren't attached. */
1877 		goto cleanup;
1878 	} else if (result != EBUSY) {
1879 		/* Uh, not really sure what happened here... */
1880 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1881 		goto cleanup;
1882 	}
1883 
1884 	/*
1885 	 * At this point, we received an EBUSY error. This means there are
1886 	 * addresses attached. We should detach them and then try again.
1887 	 */
1888 	result = ifnet_get_address_list_family(interface, &addresses, protocol);
1889 	if (result != 0) {
1890 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
1891 		    ifnet_name(interface), ifnet_unit(interface),
1892 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
1893 		goto cleanup;
1894 	}
1895 
1896 	for (i = 0; addresses[i] != 0; i++) {
1897 		utun_remove_address(interface, protocol, addresses[i], pf_socket);
1898 	}
1899 	ifnet_free_address_list(addresses);
1900 	addresses = NULL;
1901 
1902 	/*
1903 	 * The addresses should be gone, we should try the remove again.
1904 	 */
1905 	result = utun_detach_ip(interface, protocol, pf_socket);
1906 	if (result != 0 && result != ENXIO) {
1907 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1908 	}
1909 
1910 cleanup:
1911 	if (pf_socket != NULL) {
1912 		sock_close(pf_socket);
1913 	}
1914 
1915 	if (addresses != NULL) {
1916 		ifnet_free_address_list(addresses);
1917 	}
1918 }
1919 
1920 static errno_t
utun_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)1921 utun_ctl_disconnect(__unused kern_ctl_ref kctlref,
1922     __unused u_int32_t unit,
1923     void *unitinfo)
1924 {
1925 	struct utun_pcb *pcb = unitinfo;
1926 	ifnet_t ifp = NULL;
1927 	errno_t result = 0;
1928 
1929 	if (pcb == NULL) {
1930 		return EINVAL;
1931 	}
1932 
1933 #if UTUN_NEXUS
1934 	/* Wait until all threads in the data paths are done. */
1935 	utun_wait_data_move_drain(pcb);
1936 	// Tell the nexus to stop all rings
1937 	if (pcb->utun_netif_nexus != NULL && pcb->utun_netif_connected) {
1938 		kern_nexus_stop(pcb->utun_netif_nexus);
1939 	}
1940 #endif // UTUN_NEXUS
1941 
1942 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1943 
1944 #if UTUN_NEXUS
1945 	uuid_t kpipe_uuid;
1946 	uuid_copy(kpipe_uuid, pcb->utun_kpipe_uuid);
1947 	uuid_clear(pcb->utun_kpipe_uuid);
1948 	pcb->utun_kpipe_enabled = FALSE;
1949 #endif // UTUN_NEXUS
1950 
1951 	pcb->utun_ctlref = NULL;
1952 
1953 	ifp = pcb->utun_ifp;
1954 	if (ifp != NULL) {
1955 #if UTUN_NEXUS
1956 		// Tell the nexus to stop all rings
1957 		if (pcb->utun_netif_nexus != NULL) {
1958 			/*
1959 			 * Quiesce the interface and flush any pending outbound packets.
1960 			 */
1961 			if_down(ifp);
1962 
1963 			/*
1964 			 * Suspend data movement and wait for IO threads to exit.
1965 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
1966 			 * do this because utun nexuses are attached/detached separately.
1967 			 */
1968 			ifnet_datamov_suspend_and_drain(ifp);
1969 			if ((result = ifnet_detach(ifp)) != 0) {
1970 				panic("utun_ctl_disconnect - ifnet_detach failed: %d", result);
1971 			}
1972 
1973 			/*
1974 			 * We want to do everything in our power to ensure that the interface
1975 			 * really goes away when the socket is closed. We must remove IP/IPv6
1976 			 * addresses and detach the protocols. Finally, we can remove and
1977 			 * release the interface.
1978 			 */
1979 			utun_cleanup_family(ifp, AF_INET);
1980 			utun_cleanup_family(ifp, AF_INET6);
1981 
1982 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1983 
1984 			if (!uuid_is_null(kpipe_uuid)) {
1985 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
1986 					if (pcb->utun_kpipe_pp != NULL) {
1987 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1988 						pcb->utun_kpipe_pp = NULL;
1989 					}
1990 					utun_unregister_kernel_pipe_nexus();
1991 				}
1992 			}
1993 			utun_nexus_detach(pcb);
1994 
1995 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
1996 			ifnet_datamov_resume(ifp);
1997 		} else
1998 #endif // UTUN_NEXUS
1999 		{
2000 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2001 
2002 #if UTUN_NEXUS
2003 			if (!uuid_is_null(kpipe_uuid)) {
2004 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
2005 					if (pcb->utun_kpipe_pp != NULL) {
2006 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
2007 						pcb->utun_kpipe_pp = NULL;
2008 					}
2009 					utun_unregister_kernel_pipe_nexus();
2010 				}
2011 			}
2012 #endif // UTUN_NEXUS
2013 
2014 			/*
2015 			 * We want to do everything in our power to ensure that the interface
2016 			 * really goes away when the socket is closed. We must remove IP/IPv6
2017 			 * addresses and detach the protocols. Finally, we can remove and
2018 			 * release the interface.
2019 			 */
2020 			utun_cleanup_family(ifp, AF_INET);
2021 			utun_cleanup_family(ifp, AF_INET6);
2022 
2023 			/*
2024 			 * Detach now; utun_detach() will be called asynchronously once
2025 			 * the I/O reference count drops to 0.  There we will invoke
2026 			 * ifnet_release().
2027 			 */
2028 			if ((result = ifnet_detach(ifp)) != 0) {
2029 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_disconnect - ifnet_detach failed: %d\n", result);
2030 			}
2031 		}
2032 	} else {
2033 		// Bound, but not connected
2034 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2035 		utun_free_pcb(pcb, false);
2036 	}
2037 
2038 	return 0;
2039 }
2040 
2041 static errno_t
utun_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,mbuf_t m,__unused int flags)2042 utun_ctl_send(__unused kern_ctl_ref kctlref,
2043     __unused u_int32_t unit,
2044     void *unitinfo,
2045     mbuf_t m,
2046     __unused int flags)
2047 {
2048 	/*
2049 	 * The userland ABI requires the first four bytes have the protocol family
2050 	 * in network byte order: swap them
2051 	 */
2052 	if (m_pktlen(m) >= (int32_t)UTUN_HEADER_SIZE((struct utun_pcb *)unitinfo)) {
2053 		*(protocol_family_t *)mbuf_data(m) = ntohl(*(protocol_family_t *)mbuf_data(m));
2054 	} else {
2055 		os_log_error(OS_LOG_DEFAULT, "%s - unexpected short mbuf pkt len %d\n", __func__, m_pktlen(m));
2056 	}
2057 
2058 	return utun_pkt_input((struct utun_pcb *)unitinfo, m);
2059 }
2060 
2061 static errno_t
utun_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t len)2062 utun_ctl_setopt(__unused kern_ctl_ref kctlref,
2063     __unused u_int32_t unit,
2064     void *unitinfo,
2065     int opt,
2066     void *data,
2067     size_t len)
2068 {
2069 	struct utun_pcb *pcb = unitinfo;
2070 	errno_t result = 0;
2071 	/* check for privileges for privileged options */
2072 	switch (opt) {
2073 	case UTUN_OPT_FLAGS:
2074 	case UTUN_OPT_EXT_IFDATA_STATS:
2075 	case UTUN_OPT_SET_DELEGATE_INTERFACE:
2076 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2077 			return EPERM;
2078 		}
2079 		break;
2080 	}
2081 
2082 	switch (opt) {
2083 	case UTUN_OPT_FLAGS:
2084 		if (len != sizeof(u_int32_t)) {
2085 			result = EMSGSIZE;
2086 			break;
2087 		}
2088 		if (pcb->utun_ifp != NULL) {
2089 			// Only can set before connecting
2090 			result = EINVAL;
2091 			break;
2092 		}
2093 		pcb->utun_flags = *(u_int32_t *)data;
2094 		break;
2095 
2096 	case UTUN_OPT_EXT_IFDATA_STATS:
2097 		if (len != sizeof(int)) {
2098 			result = EMSGSIZE;
2099 			break;
2100 		}
2101 		if (pcb->utun_ifp == NULL) {
2102 			// Only can set after connecting
2103 			result = EINVAL;
2104 			break;
2105 		}
2106 		pcb->utun_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2107 		break;
2108 
2109 	case UTUN_OPT_INC_IFDATA_STATS_IN:
2110 	case UTUN_OPT_INC_IFDATA_STATS_OUT: {
2111 		struct utun_stats_param *utsp = (struct utun_stats_param *)data;
2112 
2113 		if (utsp == NULL || len < sizeof(struct utun_stats_param)) {
2114 			result = EINVAL;
2115 			break;
2116 		}
2117 		if (pcb->utun_ifp == NULL) {
2118 			// Only can set after connecting
2119 			result = EINVAL;
2120 			break;
2121 		}
2122 		if (!pcb->utun_ext_ifdata_stats) {
2123 			result = EINVAL;
2124 			break;
2125 		}
2126 		if (opt == UTUN_OPT_INC_IFDATA_STATS_IN) {
2127 			ifnet_stat_increment_in(pcb->utun_ifp, utsp->utsp_packets,
2128 			    utsp->utsp_bytes, utsp->utsp_errors);
2129 		} else {
2130 			ifnet_stat_increment_out(pcb->utun_ifp, utsp->utsp_packets,
2131 			    utsp->utsp_bytes, utsp->utsp_errors);
2132 		}
2133 		break;
2134 	}
2135 	case UTUN_OPT_SET_DELEGATE_INTERFACE: {
2136 		ifnet_t         del_ifp = NULL;
2137 		char            name[IFNAMSIZ];
2138 
2139 		if (len > IFNAMSIZ - 1) {
2140 			result = EMSGSIZE;
2141 			break;
2142 		}
2143 		if (pcb->utun_ifp == NULL) {
2144 			// Only can set after connecting
2145 			result = EINVAL;
2146 			break;
2147 		}
2148 		if (len != 0) {            /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2149 			bcopy(data, name, len);
2150 			name[len] = 0;
2151 			result = ifnet_find_by_name(name, &del_ifp);
2152 		}
2153 		if (result == 0) {
2154 			result = ifnet_set_delegate(pcb->utun_ifp, del_ifp);
2155 			if (del_ifp) {
2156 				ifnet_release(del_ifp);
2157 			}
2158 		}
2159 		break;
2160 	}
2161 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2162 		u_int32_t max_pending_packets = 0;
2163 		if (len != sizeof(u_int32_t)) {
2164 			result = EMSGSIZE;
2165 			break;
2166 		}
2167 		max_pending_packets = *(u_int32_t *)data;
2168 		if (max_pending_packets == 0) {
2169 			result = EINVAL;
2170 			break;
2171 		}
2172 		pcb->utun_max_pending_packets = max_pending_packets;
2173 		break;
2174 	}
2175 #if UTUN_NEXUS
2176 	case UTUN_OPT_ENABLE_CHANNEL: {
2177 		if (len != sizeof(int)) {
2178 			result = EMSGSIZE;
2179 			break;
2180 		}
2181 		if (pcb->utun_ifp == NULL) {
2182 			// Only can set after connecting
2183 			result = EINVAL;
2184 			break;
2185 		}
2186 		if (*(int *)data) {
2187 			result = utun_enable_channel(pcb, current_proc());
2188 		} else {
2189 			result = utun_disable_channel(pcb);
2190 		}
2191 		break;
2192 	}
2193 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2194 		if (len != sizeof(int)) {
2195 			result = EMSGSIZE;
2196 			break;
2197 		}
2198 		if (pcb->utun_ifp == NULL) {
2199 			// Only can set after connecting
2200 			result = EINVAL;
2201 			break;
2202 		}
2203 		if (!if_is_fsw_transport_netagent_enabled()) {
2204 			result = ENOTSUP;
2205 			break;
2206 		}
2207 		if (uuid_is_null(pcb->utun_nx.fsw_agent)) {
2208 			result = ENOENT;
2209 			break;
2210 		}
2211 
2212 		uint32_t flags = netagent_get_flags(pcb->utun_nx.fsw_agent);
2213 
2214 		if (*(int *)data) {
2215 			pcb->utun_needs_netagent = true;
2216 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
2217 			    NETAGENT_FLAG_NEXUS_LISTENER);
2218 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2219 		} else {
2220 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
2221 			    NETAGENT_FLAG_NEXUS_LISTENER);
2222 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2223 			pcb->utun_needs_netagent = false;
2224 		}
2225 		break;
2226 	}
2227 	case UTUN_OPT_ATTACH_FLOWSWITCH: {
2228 		if (len != sizeof(int)) {
2229 			result = EMSGSIZE;
2230 			break;
2231 		}
2232 		if (pcb->utun_ifp != NULL) {
2233 			// Only can set before connecting
2234 			result = EINVAL;
2235 			break;
2236 		}
2237 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2238 		pcb->utun_attach_fsw = !!(*(int *)data);
2239 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2240 		break;
2241 	}
2242 	case UTUN_OPT_ENABLE_NETIF: {
2243 		if (len != sizeof(int)) {
2244 			result = EMSGSIZE;
2245 			break;
2246 		}
2247 		if (pcb->utun_ifp != NULL) {
2248 			// Only can set before connecting
2249 			result = EINVAL;
2250 			break;
2251 		}
2252 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2253 		pcb->utun_use_netif = !!(*(int *)data);
2254 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2255 		break;
2256 	}
2257 	case UTUN_OPT_SLOT_SIZE: {
2258 		if (len != sizeof(u_int32_t)) {
2259 			result = EMSGSIZE;
2260 			break;
2261 		}
2262 		if (pcb->utun_ifp != NULL) {
2263 			// Only can set before connecting
2264 			result = EINVAL;
2265 			break;
2266 		}
2267 		u_int32_t slot_size = *(u_int32_t *)data;
2268 		if (slot_size < UTUN_IF_MIN_SLOT_SIZE ||
2269 		    slot_size > UTUN_IF_MAX_SLOT_SIZE) {
2270 			return EINVAL;
2271 		}
2272 		pcb->utun_slot_size = slot_size;
2273 		break;
2274 	}
2275 	case UTUN_OPT_NETIF_RING_SIZE: {
2276 		if (len != sizeof(u_int32_t)) {
2277 			result = EMSGSIZE;
2278 			break;
2279 		}
2280 		if (pcb->utun_ifp != NULL) {
2281 			// Only can set before connecting
2282 			result = EINVAL;
2283 			break;
2284 		}
2285 		u_int32_t ring_size = *(u_int32_t *)data;
2286 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2287 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2288 			return EINVAL;
2289 		}
2290 		pcb->utun_netif_ring_size = ring_size;
2291 		break;
2292 	}
2293 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2294 		if (len != sizeof(u_int32_t)) {
2295 			result = EMSGSIZE;
2296 			break;
2297 		}
2298 		if (pcb->utun_ifp != NULL) {
2299 			// Only can set before connecting
2300 			result = EINVAL;
2301 			break;
2302 		}
2303 		u_int32_t ring_size = *(u_int32_t *)data;
2304 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2305 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2306 			return EINVAL;
2307 		}
2308 		pcb->utun_tx_fsw_ring_size = ring_size;
2309 		break;
2310 	}
2311 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2312 		if (len != sizeof(u_int32_t)) {
2313 			result = EMSGSIZE;
2314 			break;
2315 		}
2316 		if (pcb->utun_ifp != NULL) {
2317 			// Only can set before connecting
2318 			result = EINVAL;
2319 			break;
2320 		}
2321 		u_int32_t ring_size = *(u_int32_t *)data;
2322 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2323 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2324 			return EINVAL;
2325 		}
2326 		pcb->utun_rx_fsw_ring_size = ring_size;
2327 		break;
2328 	}
2329 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2330 		if (len != sizeof(u_int32_t)) {
2331 			result = EMSGSIZE;
2332 			break;
2333 		}
2334 		if (pcb->utun_ifp != NULL) {
2335 			// Only can set before connecting
2336 			result = EINVAL;
2337 			break;
2338 		}
2339 		u_int32_t ring_size = *(u_int32_t *)data;
2340 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2341 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2342 			return EINVAL;
2343 		}
2344 		pcb->utun_kpipe_tx_ring_size = ring_size;
2345 		break;
2346 	}
2347 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2348 		if (len != sizeof(u_int32_t)) {
2349 			result = EMSGSIZE;
2350 			break;
2351 		}
2352 		if (pcb->utun_ifp != NULL) {
2353 			// Only can set before connecting
2354 			result = EINVAL;
2355 			break;
2356 		}
2357 		u_int32_t ring_size = *(u_int32_t *)data;
2358 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2359 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2360 			return EINVAL;
2361 		}
2362 		pcb->utun_kpipe_rx_ring_size = ring_size;
2363 		break;
2364 	}
2365 #endif // UTUN_NEXUS
2366 	default: {
2367 		result = ENOPROTOOPT;
2368 		break;
2369 	}
2370 	}
2371 
2372 	return result;
2373 }
2374 
2375 static errno_t
utun_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t * len)2376 utun_ctl_getopt(__unused kern_ctl_ref kctlref,
2377     __unused u_int32_t unit,
2378     void *unitinfo,
2379     int opt,
2380     void *data,
2381     size_t *len)
2382 {
2383 	struct utun_pcb *pcb = unitinfo;
2384 	errno_t result = 0;
2385 
2386 	switch (opt) {
2387 	case UTUN_OPT_FLAGS:
2388 		if (*len != sizeof(u_int32_t)) {
2389 			result = EMSGSIZE;
2390 		} else {
2391 			*(u_int32_t *)data = pcb->utun_flags;
2392 		}
2393 		break;
2394 
2395 	case UTUN_OPT_EXT_IFDATA_STATS:
2396 		if (*len != sizeof(int)) {
2397 			result = EMSGSIZE;
2398 		} else {
2399 			*(int *)data = (pcb->utun_ext_ifdata_stats) ? 1 : 0;
2400 		}
2401 		break;
2402 
2403 	case UTUN_OPT_IFNAME:
2404 		if (*len < MIN(strlen(pcb->utun_if_xname) + 1, sizeof(pcb->utun_if_xname))) {
2405 			result = EMSGSIZE;
2406 		} else {
2407 			if (pcb->utun_ifp == NULL) {
2408 				// Only can get after connecting
2409 				result = EINVAL;
2410 				break;
2411 			}
2412 			*len = scnprintf(data, *len, "%s", pcb->utun_if_xname) + 1;
2413 		}
2414 		break;
2415 
2416 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2417 		if (*len != sizeof(u_int32_t)) {
2418 			result = EMSGSIZE;
2419 		} else {
2420 			*((u_int32_t *)data) = pcb->utun_max_pending_packets;
2421 		}
2422 		break;
2423 	}
2424 
2425 #if UTUN_NEXUS
2426 	case UTUN_OPT_ENABLE_CHANNEL: {
2427 		if (*len != sizeof(int)) {
2428 			result = EMSGSIZE;
2429 		} else {
2430 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2431 			*(int *)data = pcb->utun_kpipe_enabled;
2432 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2433 		}
2434 		break;
2435 	}
2436 
2437 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2438 		if (*len != sizeof(int)) {
2439 			result = EMSGSIZE;
2440 		} else {
2441 			*(int *)data = if_check_netagent(pcb->utun_ifp, pcb->utun_nx.fsw_agent);
2442 		}
2443 		break;
2444 	}
2445 
2446 	case UTUN_OPT_ENABLE_NETIF: {
2447 		if (*len != sizeof(int)) {
2448 			result = EMSGSIZE;
2449 		} else {
2450 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2451 			*(int *)data = !!pcb->utun_use_netif;
2452 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2453 		}
2454 		break;
2455 	}
2456 
2457 	case UTUN_OPT_GET_CHANNEL_UUID: {
2458 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2459 		if (uuid_is_null(pcb->utun_kpipe_uuid)) {
2460 			result = ENXIO;
2461 		} else if (*len != sizeof(uuid_t)) {
2462 			result = EMSGSIZE;
2463 		} else {
2464 			uuid_copy(data, pcb->utun_kpipe_uuid);
2465 		}
2466 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2467 		break;
2468 	}
2469 	case UTUN_OPT_SLOT_SIZE: {
2470 		if (*len != sizeof(u_int32_t)) {
2471 			result = EMSGSIZE;
2472 		} else {
2473 			*(u_int32_t *)data = pcb->utun_slot_size;
2474 		}
2475 		break;
2476 	}
2477 	case UTUN_OPT_NETIF_RING_SIZE: {
2478 		if (*len != sizeof(u_int32_t)) {
2479 			result = EMSGSIZE;
2480 		} else {
2481 			*(u_int32_t *)data = pcb->utun_netif_ring_size;
2482 		}
2483 		break;
2484 	}
2485 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2486 		if (*len != sizeof(u_int32_t)) {
2487 			result = EMSGSIZE;
2488 		} else {
2489 			*(u_int32_t *)data = pcb->utun_tx_fsw_ring_size;
2490 		}
2491 		break;
2492 	}
2493 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2494 		if (*len != sizeof(u_int32_t)) {
2495 			result = EMSGSIZE;
2496 		} else {
2497 			*(u_int32_t *)data = pcb->utun_rx_fsw_ring_size;
2498 		}
2499 		break;
2500 	}
2501 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2502 		if (*len != sizeof(u_int32_t)) {
2503 			result = EMSGSIZE;
2504 		} else {
2505 			*(u_int32_t *)data = pcb->utun_kpipe_tx_ring_size;
2506 		}
2507 		break;
2508 	}
2509 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2510 		if (*len != sizeof(u_int32_t)) {
2511 			result = EMSGSIZE;
2512 		} else {
2513 			*(u_int32_t *)data = pcb->utun_kpipe_rx_ring_size;
2514 		}
2515 		break;
2516 	}
2517 #endif // UTUN_NEXUS
2518 
2519 	default:
2520 		result = ENOPROTOOPT;
2521 		break;
2522 	}
2523 
2524 	return result;
2525 }
2526 
2527 static void
utun_ctl_rcvd(kern_ctl_ref kctlref,u_int32_t unit,void * unitinfo,int flags)2528 utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags)
2529 {
2530 #pragma unused(flags)
2531 	bool reenable_output = false;
2532 	struct utun_pcb *pcb = unitinfo;
2533 	if (pcb == NULL) {
2534 		return;
2535 	}
2536 	ifnet_lock_exclusive(pcb->utun_ifp);
2537 
2538 	u_int32_t utun_packet_cnt;
2539 	errno_t error_pc = ctl_getenqueuepacketcount(kctlref, unit, &utun_packet_cnt);
2540 	if (error_pc != 0) {
2541 		os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2542 		utun_packet_cnt = 0;
2543 	}
2544 
2545 	if (utun_packet_cnt < pcb->utun_max_pending_packets) {
2546 		reenable_output = true;
2547 	}
2548 
2549 	if (reenable_output) {
2550 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
2551 		if (error != 0) {
2552 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ifnet_enable_output returned error %d\n", error);
2553 		}
2554 	}
2555 	ifnet_lock_done(pcb->utun_ifp);
2556 }
2557 
2558 /* Network Interface functions */
2559 static void
utun_start(ifnet_t interface)2560 utun_start(ifnet_t interface)
2561 {
2562 	mbuf_t data;
2563 	struct utun_pcb *pcb = ifnet_softc(interface);
2564 
2565 	VERIFY(pcb != NULL);
2566 
2567 #if UTUN_NEXUS
2568 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
2569 	if (pcb->utun_kpipe_enabled) {
2570 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2571 		if (!utun_data_move_begin(pcb)) {
2572 			os_log_info(OS_LOG_DEFAULT,
2573 			    "%s: data path stopped for %s\n",
2574 			    __func__, if_name(pcb->utun_ifp));
2575 			return;
2576 		}
2577 		/* It's possible to have channels enabled, but not yet have the channel opened,
2578 		 * in which case the rxring will not be set
2579 		 */
2580 		if (pcb->utun_kpipe_rxring != NULL) {
2581 			kern_channel_notify(pcb->utun_kpipe_rxring, 0);
2582 		}
2583 		utun_data_move_end(pcb);
2584 		return;
2585 	}
2586 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2587 #endif // UTUN_NEXUS
2588 
2589 	for (;;) {
2590 		bool can_accept_packets = true;
2591 		ifnet_lock_shared(pcb->utun_ifp);
2592 
2593 		u_int32_t utun_packet_cnt;
2594 		errno_t error_pc = ctl_getenqueuepacketcount(pcb->utun_ctlref, pcb->utun_unit, &utun_packet_cnt);
2595 		if (error_pc != 0) {
2596 			os_log_error(OS_LOG_DEFAULT, "utun_start: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2597 			utun_packet_cnt = 0;
2598 		}
2599 
2600 		can_accept_packets = (utun_packet_cnt < pcb->utun_max_pending_packets);
2601 		if (!can_accept_packets && pcb->utun_ctlref) {
2602 			u_int32_t difference = 0;
2603 			if (ctl_getenqueuereadable(pcb->utun_ctlref, pcb->utun_unit, &difference) == 0) {
2604 				if (difference > 0) {
2605 					// If the low-water mark has not yet been reached, we still need to enqueue data
2606 					// into the buffer
2607 					can_accept_packets = true;
2608 				}
2609 			}
2610 		}
2611 		if (!can_accept_packets) {
2612 			errno_t error = ifnet_disable_output(interface);
2613 			if (error != 0) {
2614 				os_log_error(OS_LOG_DEFAULT, "utun_start: ifnet_disable_output returned error %d\n", error);
2615 			}
2616 			ifnet_lock_done(pcb->utun_ifp);
2617 			break;
2618 		}
2619 		ifnet_lock_done(pcb->utun_ifp);
2620 		if (ifnet_dequeue(interface, &data) != 0) {
2621 			break;
2622 		}
2623 		if (utun_output(interface, data) != 0) {
2624 			break;
2625 		}
2626 	}
2627 }
2628 
2629 static errno_t
utun_output(ifnet_t interface,mbuf_t data)2630 utun_output(ifnet_t     interface,
2631     mbuf_t data)
2632 {
2633 	struct utun_pcb *pcb = ifnet_softc(interface);
2634 	errno_t result;
2635 
2636 	VERIFY(interface == pcb->utun_ifp);
2637 
2638 #if UTUN_NEXUS
2639 	if (!pcb->utun_use_netif)
2640 #endif // UTUN_NEXUS
2641 	{
2642 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2643 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
2644 		}
2645 	}
2646 
2647 	if (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT) {
2648 		/* flush data */
2649 		mbuf_freem(data);
2650 		return 0;
2651 	}
2652 
2653 	// otherwise, fall thru to ctl_enqueumbuf
2654 	if (pcb->utun_ctlref) {
2655 		int     length;
2656 
2657 		/*
2658 		 * The ABI requires the protocol in network byte order
2659 		 */
2660 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2661 			*(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data));
2662 		}
2663 
2664 		length = mbuf_pkthdr_len(data);
2665 		result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR);
2666 		if (result != 0) {
2667 			mbuf_freem(data);
2668 			os_log_error(OS_LOG_DEFAULT, "utun_output - ctl_enqueuembuf failed: %d\n", result);
2669 #if UTUN_NEXUS
2670 			if (!pcb->utun_use_netif)
2671 #endif // UTUN_NEXUS
2672 			{
2673 				ifnet_stat_increment_out(interface, 0, 0, 1);
2674 			}
2675 		} else {
2676 #if UTUN_NEXUS
2677 			if (!pcb->utun_use_netif)
2678 #endif // UTUN_NEXUS
2679 			{
2680 				if (!pcb->utun_ext_ifdata_stats) {
2681 					ifnet_stat_increment_out(interface, 1, length, 0);
2682 				}
2683 			}
2684 		}
2685 	} else {
2686 		mbuf_freem(data);
2687 	}
2688 
2689 	return 0;
2690 }
2691 
2692 static errno_t
utun_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)2693 utun_demux(__unused ifnet_t interface,
2694     mbuf_t data,
2695     __unused char *frame_header,
2696     protocol_family_t *protocol)
2697 {
2698 #if UTUN_NEXUS
2699 	struct utun_pcb *pcb = ifnet_softc(interface);
2700 	struct ip *ip;
2701 	u_int ip_version;
2702 #endif
2703 
2704 	while (data != NULL && mbuf_len(data) < 1) {
2705 		data = mbuf_next(data);
2706 	}
2707 
2708 	if (data == NULL) {
2709 		return ENOENT;
2710 	}
2711 
2712 #if UTUN_NEXUS
2713 	if (pcb->utun_use_netif) {
2714 		ip = mtod(data, struct ip *);
2715 		ip_version = ip->ip_v;
2716 
2717 		switch (ip_version) {
2718 		case 4:
2719 			*protocol = PF_INET;
2720 			return 0;
2721 		case 6:
2722 			*protocol = PF_INET6;
2723 			return 0;
2724 		default:
2725 			*protocol = 0;
2726 			break;
2727 		}
2728 	} else
2729 #endif // UTUN_NEXUS
2730 	{
2731 		*protocol = *(u_int32_t *)mbuf_data(data);
2732 	}
2733 
2734 	return 0;
2735 }
2736 
2737 static errno_t
utun_framer(ifnet_t interface,mbuf_t * packet,__unused const struct sockaddr * dest,__unused const char * desk_linkaddr,const char * frame_type,u_int32_t * prepend_len,u_int32_t * postpend_len)2738 utun_framer(ifnet_t interface,
2739     mbuf_t *packet,
2740     __unused const struct sockaddr *dest,
2741     __unused const char *desk_linkaddr,
2742     const char *frame_type,
2743     u_int32_t *prepend_len,
2744     u_int32_t *postpend_len)
2745 {
2746 	struct utun_pcb *pcb = ifnet_softc(interface);
2747 	VERIFY(interface == pcb->utun_ifp);
2748 
2749 	u_int32_t header_length = UTUN_HEADER_SIZE(pcb);
2750 	if (mbuf_prepend(packet, header_length, MBUF_DONTWAIT) != 0) {
2751 		os_log_error(OS_LOG_DEFAULT, "utun_framer - ifnet_output prepend failed\n");
2752 
2753 		ifnet_stat_increment_out(interface, 0, 0, 1);
2754 
2755 		// just	return, because the buffer was freed in mbuf_prepend
2756 		return EJUSTRETURN;
2757 	}
2758 	if (prepend_len != NULL) {
2759 		*prepend_len = header_length;
2760 	}
2761 	if (postpend_len != NULL) {
2762 		*postpend_len = 0;
2763 	}
2764 
2765 	// place protocol number at the beginning of the mbuf
2766 	*(protocol_family_t *)mbuf_data(*packet) = *(protocol_family_t *)(uintptr_t)(size_t)frame_type;
2767 
2768 #if NECP
2769 	// Add process uuid if applicable
2770 	if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
2771 		if (m_pktlen(*packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2772 			u_int8_t *header = (u_int8_t *)mbuf_data(*packet);
2773 			int uuid_err = necp_get_app_uuid_from_packet(*packet, (void *)(header + sizeof(u_int32_t)));
2774 			if (uuid_err != 0) {
2775 				os_log_error(OS_LOG_DEFAULT, "Received app uuid error %d for %s%d\n", uuid_err, ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp));
2776 			}
2777 		} else {
2778 			os_log_error(OS_LOG_DEFAULT, "Cannot set proc uuid for %s%d, size %d < %zu\n", ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp),
2779 			    m_pktlen(*packet), UTUN_HEADER_SIZE(pcb));
2780 		}
2781 	}
2782 #endif // NECP
2783 
2784 	return 0;
2785 }
2786 
2787 static errno_t
utun_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)2788 utun_add_proto(__unused ifnet_t interface,
2789     protocol_family_t protocol,
2790     __unused const struct ifnet_demux_desc *demux_array,
2791     __unused u_int32_t demux_count)
2792 {
2793 	switch (protocol) {
2794 	case PF_INET:
2795 		return 0;
2796 	case PF_INET6:
2797 		return 0;
2798 	default:
2799 		break;
2800 	}
2801 
2802 	return ENOPROTOOPT;
2803 }
2804 
2805 static errno_t
utun_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)2806 utun_del_proto(__unused ifnet_t interface,
2807     __unused protocol_family_t protocol)
2808 {
2809 	return 0;
2810 }
2811 
2812 static errno_t
utun_ioctl(ifnet_t interface,u_long command,void * data)2813 utun_ioctl(ifnet_t interface,
2814     u_long command,
2815     void *data)
2816 {
2817 #if UTUN_NEXUS
2818 	struct utun_pcb *pcb = ifnet_softc(interface);
2819 #endif
2820 	errno_t result = 0;
2821 
2822 	switch (command) {
2823 	case SIOCSIFMTU: {
2824 #if UTUN_NEXUS
2825 		if (pcb->utun_use_netif) {
2826 			// Make sure we can fit packets in the channel buffers
2827 			// Allow for the headroom in the slot
2828 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) + UTUN_IF_HEADROOM_SIZE > pcb->utun_slot_size) {
2829 				result = EINVAL;
2830 			} else {
2831 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
2832 			}
2833 		} else
2834 #endif // UTUN_NEXUS
2835 		{
2836 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
2837 		}
2838 		break;
2839 	}
2840 
2841 	case SIOCSIFFLAGS:
2842 		/* ifioctl() takes care of it */
2843 		break;
2844 
2845 	default:
2846 		result = EOPNOTSUPP;
2847 	}
2848 
2849 	return result;
2850 }
2851 
2852 static void
utun_detached(ifnet_t interface)2853 utun_detached(ifnet_t interface)
2854 {
2855 	struct utun_pcb *pcb = ifnet_softc(interface);
2856 	(void)ifnet_release(interface);
2857 	lck_mtx_lock(&utun_lock);
2858 	utun_free_pcb(pcb, true);
2859 	(void)ifnet_dispose(interface);
2860 	lck_mtx_unlock(&utun_lock);
2861 }
2862 
2863 /* Protocol Handlers */
2864 
2865 static errno_t
utun_proto_input(__unused ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)2866 utun_proto_input(__unused ifnet_t interface,
2867     protocol_family_t protocol,
2868     mbuf_t m,
2869     __unused char *frame_header)
2870 {
2871 	struct utun_pcb *pcb = ifnet_softc(interface);
2872 #if UTUN_NEXUS
2873 	if (!pcb->utun_use_netif)
2874 #endif // UTUN_NEXUS
2875 	{
2876 		mbuf_adj(m, UTUN_HEADER_SIZE(pcb));
2877 	}
2878 	int32_t pktlen = m->m_pkthdr.len;
2879 	if (proto_input(protocol, m) != 0) {
2880 		m_freem(m);
2881 #if UTUN_NEXUS
2882 		if (!pcb->utun_use_netif)
2883 #endif // UTUN_NEXUS
2884 		{
2885 			ifnet_stat_increment_in(interface, 0, 0, 1);
2886 		}
2887 	} else {
2888 #if UTUN_NEXUS
2889 		if (!pcb->utun_use_netif)
2890 #endif // UTUN_NEXUS
2891 		{
2892 			ifnet_stat_increment_in(interface, 1, pktlen, 0);
2893 		}
2894 	}
2895 
2896 	return 0;
2897 }
2898 
2899 static errno_t
utun_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,char * frame_type,__unused char * link_layer_dest)2900 utun_proto_pre_output(__unused ifnet_t interface,
2901     protocol_family_t protocol,
2902     __unused mbuf_t *packet,
2903     __unused const struct sockaddr *dest,
2904     __unused void *route,
2905     char *frame_type,
2906     __unused char *link_layer_dest)
2907 {
2908 	*(protocol_family_t *)(void *)frame_type = protocol;
2909 	return 0;
2910 }
2911 
2912 static errno_t
utun_attach_proto(ifnet_t interface,protocol_family_t protocol)2913 utun_attach_proto(ifnet_t interface,
2914     protocol_family_t protocol)
2915 {
2916 	struct ifnet_attach_proto_param proto;
2917 
2918 	bzero(&proto, sizeof(proto));
2919 	proto.input = utun_proto_input;
2920 	proto.pre_output = utun_proto_pre_output;
2921 
2922 	errno_t result = ifnet_attach_protocol(interface, protocol, &proto);
2923 	if (result != 0 && result != EEXIST) {
2924 		os_log_error(OS_LOG_DEFAULT, "utun_attach_inet - ifnet_attach_protocol %d failed: %d\n",
2925 		    protocol, result);
2926 	}
2927 
2928 	return result;
2929 }
2930 
2931 static errno_t
utun_pkt_input(struct utun_pcb * pcb,mbuf_t packet)2932 utun_pkt_input(struct utun_pcb *pcb, mbuf_t packet)
2933 {
2934 #if UTUN_NEXUS
2935 	if (pcb->utun_use_netif) {
2936 		if (!utun_data_move_begin(pcb)) {
2937 			os_log_info(OS_LOG_DEFAULT,
2938 			    "%s: data path stopped for %s\n",
2939 			    __func__, if_name(pcb->utun_ifp));
2940 			return ENXIO;
2941 		}
2942 
2943 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2944 
2945 		lck_mtx_lock(&pcb->utun_input_chain_lock);
2946 
2947 		if (pcb->utun_input_chain_count > (u_int32_t)if_utun_max_pending_input) {
2948 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
2949 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2950 			utun_data_move_end(pcb);
2951 			return ENOSPC;
2952 		}
2953 
2954 		if (pcb->utun_input_chain != NULL) {
2955 			pcb->utun_input_chain_last->m_nextpkt = packet;
2956 		} else {
2957 			pcb->utun_input_chain = packet;
2958 		}
2959 		pcb->utun_input_chain_count++;
2960 		while (packet->m_nextpkt) {
2961 			VERIFY(packet != packet->m_nextpkt);
2962 			packet = packet->m_nextpkt;
2963 			pcb->utun_input_chain_count++;
2964 		}
2965 		pcb->utun_input_chain_last = packet;
2966 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
2967 
2968 		kern_channel_ring_t rx_ring = pcb->utun_netif_rxring;
2969 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2970 
2971 		if (rx_ring != NULL) {
2972 			kern_channel_notify(rx_ring, 0);
2973 		}
2974 
2975 		utun_data_move_end(pcb);
2976 		return 0;
2977 	} else
2978 #endif // UTUN_NEXUS
2979 	{
2980 		mbuf_pkthdr_setrcvif(packet, pcb->utun_ifp);
2981 
2982 		if (m_pktlen(packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2983 			bpf_tap_in(pcb->utun_ifp, DLT_NULL, packet, 0, 0);
2984 		}
2985 		if (pcb->utun_flags & UTUN_FLAGS_NO_INPUT) {
2986 			/* flush data */
2987 			mbuf_freem(packet);
2988 			return 0;
2989 		}
2990 
2991 		errno_t result = 0;
2992 		if (!pcb->utun_ext_ifdata_stats) {
2993 			struct ifnet_stat_increment_param incs = {};
2994 			incs.packets_in = 1;
2995 			incs.bytes_in = mbuf_pkthdr_len(packet);
2996 			result = ifnet_input(pcb->utun_ifp, packet, &incs);
2997 		} else {
2998 			result = ifnet_input(pcb->utun_ifp, packet, NULL);
2999 		}
3000 		if (result != 0) {
3001 			ifnet_stat_increment_in(pcb->utun_ifp, 0, 0, 1);
3002 
3003 			os_log_error(OS_LOG_DEFAULT, "%s - ifnet_input failed: %d\n", __FUNCTION__, result);
3004 		}
3005 
3006 		return 0;
3007 	}
3008 }
3009 
3010 #if UTUN_NEXUS
3011 
3012 static errno_t
utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)3013 utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
3014 {
3015 	return 0;
3016 }
3017 
3018 static void
utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)3019 utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
3020 {
3021 	// Ignore
3022 }
3023 
3024 static errno_t
utun_register_nexus(void)3025 utun_register_nexus(void)
3026 {
3027 	const struct kern_nexus_domain_provider_init dp_init = {
3028 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
3029 		.nxdpi_flags = 0,
3030 		.nxdpi_init = utun_nxdp_init,
3031 		.nxdpi_fini = utun_nxdp_fini
3032 	};
3033 	errno_t err = 0;
3034 
3035 	/* utun_nxdp_init() is called before this function returns */
3036 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
3037 	    (const uint8_t *) "com.apple.utun",
3038 	    &dp_init, sizeof(dp_init),
3039 	    &utun_nx_dom_prov);
3040 	if (err != 0) {
3041 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
3042 		return err;
3043 	}
3044 	return 0;
3045 }
3046 boolean_t
utun_interface_needs_netagent(ifnet_t interface)3047 utun_interface_needs_netagent(ifnet_t interface)
3048 {
3049 	struct utun_pcb *pcb = NULL;
3050 
3051 	if (interface == NULL) {
3052 		return FALSE;
3053 	}
3054 
3055 	pcb = ifnet_softc(interface);
3056 
3057 	if (pcb == NULL) {
3058 		return FALSE;
3059 	}
3060 
3061 	return pcb->utun_needs_netagent == true;
3062 }
3063 
3064 static errno_t
utun_ifnet_set_attrs(ifnet_t ifp)3065 utun_ifnet_set_attrs(ifnet_t ifp)
3066 {
3067 	/* Set flags and additional information. */
3068 	ifnet_set_mtu(ifp, 1500);
3069 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
3070 
3071 	/* The interface must generate its own IPv6 LinkLocal address,
3072 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
3073 	 */
3074 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
3075 
3076 	return 0;
3077 }
3078 
3079 static errno_t
utun_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)3080 utun_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
3081 {
3082 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3083 	pcb->utun_netif_nexus = nexus;
3084 	return utun_ifnet_set_attrs(ifp);
3085 }
3086 
3087 static errno_t
utun_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)3088 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
3089     proc_t p, kern_nexus_t nexus,
3090     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
3091 {
3092 #pragma unused(nxprov, p)
3093 #pragma unused(nexus, nexus_port, channel, ch_ctx)
3094 	return 0;
3095 }
3096 
3097 static errno_t
utun_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3098 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3099     kern_channel_t channel)
3100 {
3101 #pragma unused(nxprov, channel)
3102 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3103 	boolean_t ok = ifnet_is_attached(pcb->utun_ifp, 1);
3104 	if (pcb->utun_netif_nexus == nexus) {
3105 		pcb->utun_netif_connected = true;
3106 	}
3107 	if (ok) {
3108 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3109 		UTUN_SET_DATA_PATH_READY(pcb);
3110 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3111 	}
3112 	return ok ? 0 : ENXIO;
3113 }
3114 
3115 static void
utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3116 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3117     kern_channel_t channel)
3118 {
3119 #pragma unused(nxprov, channel)
3120 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3121 	/* Wait until all threads in the data paths are done. */
3122 	utun_wait_data_move_drain(pcb);
3123 }
3124 
3125 static void
utun_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3126 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3127     kern_channel_t channel)
3128 {
3129 #pragma unused(nxprov, channel)
3130 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3131 	/* Wait until all threads in the data paths are done. */
3132 	utun_wait_data_move_drain(pcb);
3133 }
3134 
3135 static void
utun_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3136 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3137     kern_channel_t channel)
3138 {
3139 #pragma unused(nxprov, channel)
3140 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3141 	if (pcb->utun_netif_nexus == nexus) {
3142 		pcb->utun_netif_connected = false;
3143 		if (pcb->utun_attach_fsw) {
3144 			// disconnected by flowswitch that was attached by us
3145 			pcb->utun_netif_nexus = NULL;
3146 		}
3147 	}
3148 	ifnet_decr_iorefcnt(pcb->utun_ifp);
3149 }
3150 
3151 static errno_t
utun_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)3152 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3153     kern_channel_t channel, kern_channel_ring_t ring,
3154     boolean_t is_tx_ring, void **ring_ctx)
3155 {
3156 #pragma unused(nxprov)
3157 #pragma unused(channel)
3158 #pragma unused(ring_ctx)
3159 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3160 	if (!is_tx_ring) {
3161 		VERIFY(pcb->utun_kpipe_rxring == NULL);
3162 		pcb->utun_kpipe_rxring = ring;
3163 	} else {
3164 		VERIFY(pcb->utun_kpipe_txring == NULL);
3165 		pcb->utun_kpipe_txring = ring;
3166 	}
3167 	return 0;
3168 }
3169 
3170 static void
utun_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)3171 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3172     kern_channel_ring_t ring)
3173 {
3174 #pragma unused(nxprov)
3175 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3176 	if (pcb->utun_kpipe_rxring == ring) {
3177 		pcb->utun_kpipe_rxring = NULL;
3178 	} else if (pcb->utun_kpipe_txring == ring) {
3179 		pcb->utun_kpipe_txring = NULL;
3180 	}
3181 }
3182 
3183 static errno_t
utun_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)3184 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3185     kern_channel_ring_t tx_ring, uint32_t flags)
3186 {
3187 #pragma unused(nxprov)
3188 #pragma unused(flags)
3189 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3190 
3191 	if (!utun_data_move_begin(pcb)) {
3192 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3193 		    __func__, if_name(pcb->utun_ifp));
3194 		return 0;
3195 	}
3196 
3197 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3198 	int channel_enabled = pcb->utun_kpipe_enabled;
3199 	if (!channel_enabled) {
3200 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3201 		utun_data_move_end(pcb);
3202 		return 0;
3203 	}
3204 
3205 	if (pcb->utun_use_netif) {
3206 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3207 		if (tx_slot == NULL) {
3208 			// Nothing to write, bail
3209 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3210 			utun_data_move_end(pcb);
3211 			return 0;
3212 		}
3213 
3214 		// Signal the netif ring to read
3215 		kern_channel_ring_t rx_ring = pcb->utun_netif_rxring;
3216 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3217 		if (rx_ring != NULL) {
3218 			kern_channel_notify(rx_ring, 0);
3219 		}
3220 	} else {
3221 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3222 
3223 		struct ifnet_stat_increment_param incs = {};
3224 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
3225 		MBUFQ_HEAD(mbufq) mbq;
3226 		MBUFQ_INIT(&mbq);
3227 		kern_channel_slot_t tx_pslot = NULL;
3228 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3229 		while (tx_slot != NULL) {
3230 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3231 
3232 			// Advance TX ring
3233 			tx_pslot = tx_slot;
3234 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3235 
3236 			if (tx_ph == 0) {
3237 				continue;
3238 			}
3239 
3240 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3241 			VERIFY(tx_buf != NULL);
3242 			uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
3243 			VERIFY(tx_baddr != 0);
3244 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3245 
3246 			size_t length = MIN(kern_packet_get_data_length(tx_ph),
3247 			    pcb->utun_slot_size);
3248 
3249 			mbuf_t data = NULL;
3250 			if (length >= UTUN_HEADER_SIZE(pcb) &&
3251 			    !(pcb->utun_flags & UTUN_FLAGS_NO_INPUT)) {
3252 				errno_t error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_HEADER, &data);
3253 				VERIFY(0 == error);
3254 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_WAITOK);
3255 				VERIFY(0 == error);
3256 				/*
3257 				 * The userland ABI requires the first four bytes have
3258 				 * the protocol family in network byte order: swap them
3259 				 */
3260 				*(uint32_t *)mbuf_data(data) = ntohl(*(uint32_t *)mbuf_data(data));
3261 				mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
3262 				bpf_tap_in(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3263 				incs.packets_in++;
3264 				incs.bytes_in += length;
3265 				MBUFQ_ENQUEUE(&mbq, data);
3266 			}
3267 		}
3268 		if (tx_pslot) {
3269 			kern_channel_advance_slot(tx_ring, tx_pslot);
3270 			tx_ring_stats.kcrsi_slots_transferred = incs.packets_in;
3271 			tx_ring_stats.kcrsi_bytes_transferred = incs.bytes_in;
3272 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3273 			(void) kern_channel_reclaim(tx_ring);
3274 		}
3275 		if (!MBUFQ_EMPTY(&mbq)) {
3276 			(void) ifnet_input_extended(pcb->utun_ifp, MBUFQ_FIRST(&mbq),
3277 			    MBUFQ_LAST(&mbq), &incs);
3278 			MBUFQ_INIT(&mbq);
3279 		}
3280 	}
3281 
3282 	utun_data_move_end(pcb);
3283 	return 0;
3284 }
3285 
3286 static errno_t
utun_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)3287 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3288     kern_channel_ring_t rx_ring, uint32_t flags)
3289 {
3290 #pragma unused(nxprov)
3291 #pragma unused(flags)
3292 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3293 	struct kern_channel_ring_stat_increment rx_ring_stats = {};
3294 
3295 	if (!utun_data_move_begin(pcb)) {
3296 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3297 		    __func__, if_name(pcb->utun_ifp));
3298 		return 0;
3299 	}
3300 
3301 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3302 
3303 	int channel_enabled = pcb->utun_kpipe_enabled;
3304 	if (!channel_enabled) {
3305 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3306 		utun_data_move_end(pcb);
3307 		return 0;
3308 	}
3309 
3310 	/* reclaim user-released slots */
3311 	(void) kern_channel_reclaim(rx_ring);
3312 
3313 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
3314 	if (avail == 0) {
3315 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3316 		utun_data_move_end(pcb);
3317 		return 0;
3318 	}
3319 
3320 	if (pcb->utun_use_netif) {
3321 		kern_channel_ring_t tx_ring = pcb->utun_netif_txring;
3322 		if (tx_ring == NULL ||
3323 		    pcb->utun_netif_nexus == NULL) {
3324 			// Net-If TX ring not set up yet, nothing to read
3325 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3326 			utun_data_move_end(pcb);
3327 			return 0;
3328 		}
3329 
3330 		struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->utun_netif_nexus)->nif_stats;
3331 
3332 		// Unlock utun before entering ring
3333 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3334 
3335 		(void)kr_enter(tx_ring, TRUE);
3336 
3337 		// Lock again after entering and validate
3338 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
3339 		if (tx_ring != pcb->utun_netif_txring) {
3340 			// Ring no longer valid
3341 			// Unlock first, then exit ring
3342 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3343 			kr_exit(tx_ring);
3344 			utun_data_move_end(pcb);
3345 			return 0;
3346 		}
3347 
3348 		struct kern_channel_ring_stat_increment tx_ring_stats;
3349 		bzero(&tx_ring_stats, sizeof(tx_ring_stats));
3350 		kern_channel_slot_t tx_pslot = NULL;
3351 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3352 		if (tx_slot == NULL) {
3353 			// Nothing to read, don't bother signalling
3354 			// Unlock first, then exit ring
3355 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3356 			kr_exit(tx_ring);
3357 			utun_data_move_end(pcb);
3358 			return 0;
3359 		}
3360 
3361 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3362 		VERIFY(rx_pp != NULL);
3363 		struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
3364 		VERIFY(tx_pp != NULL);
3365 		kern_channel_slot_t rx_pslot = NULL;
3366 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3367 		kern_packet_t tx_chain_ph = 0;
3368 
3369 		while (rx_slot != NULL && tx_slot != NULL) {
3370 			size_t length;
3371 			kern_buflet_t rx_buf;
3372 			void *rx_baddr;
3373 
3374 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3375 
3376 			/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
3377 			if (tx_ph == 0) {
3378 				// Advance TX ring
3379 				tx_pslot = tx_slot;
3380 				tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3381 				continue;
3382 			}
3383 			(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
3384 			if (tx_chain_ph != 0) {
3385 				kern_packet_append(tx_ph, tx_chain_ph);
3386 			}
3387 			tx_chain_ph = tx_ph;
3388 
3389 			// Advance TX ring
3390 			tx_pslot = tx_slot;
3391 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3392 
3393 			// Allocate rx packet
3394 			kern_packet_t rx_ph = 0;
3395 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3396 			if (__improbable(error != 0)) {
3397 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3398 				    pcb->utun_ifp->if_xname);
3399 				break;
3400 			}
3401 
3402 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3403 			VERIFY(tx_buf != NULL);
3404 			uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
3405 			VERIFY(tx_baddr != NULL);
3406 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3407 
3408 			bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
3409 
3410 			length = MIN(kern_packet_get_data_length(tx_ph) + UTUN_HEADER_SIZE(pcb),
3411 			    pcb->utun_slot_size);
3412 
3413 			tx_ring_stats.kcrsi_slots_transferred++;
3414 			tx_ring_stats.kcrsi_bytes_transferred += length;
3415 
3416 			if (length < UTUN_HEADER_SIZE(pcb) ||
3417 			    length > pcb->utun_slot_size ||
3418 			    length > PP_BUF_SIZE_DEF(rx_pp) ||
3419 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3420 				/* flush data */
3421 				kern_pbufpool_free(rx_pp, rx_ph);
3422 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: invalid length %zu header_size %zu\n",
3423 				    pcb->utun_ifp->if_xname, length, UTUN_HEADER_SIZE(pcb));
3424 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
3425 				STATS_INC(nifs, NETIF_STATS_DROP);
3426 				continue;
3427 			}
3428 
3429 			/* fillout packet */
3430 			rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3431 			VERIFY(rx_buf != NULL);
3432 			rx_baddr = kern_buflet_get_data_address(rx_buf);
3433 			VERIFY(rx_baddr != NULL);
3434 
3435 			// Find family
3436 			uint32_t af = 0;
3437 			uint8_t vhl = *(uint8_t *)(tx_baddr);
3438 			u_int ip_version = (vhl >> 4);
3439 			switch (ip_version) {
3440 			case 4: {
3441 				af = AF_INET;
3442 				break;
3443 			}
3444 			case 6: {
3445 				af = AF_INET6;
3446 				break;
3447 			}
3448 			default: {
3449 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: unknown ip version %u vhl %u header_size %zu\n",
3450 				    pcb->utun_ifp->if_xname, ip_version, vhl, UTUN_HEADER_SIZE(pcb));
3451 				break;
3452 			}
3453 			}
3454 
3455 			// Copy header
3456 			af = htonl(af);
3457 			memcpy((void *)rx_baddr, &af, sizeof(af));
3458 			if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
3459 				kern_packet_get_euuid(tx_ph, (void *)((uintptr_t)rx_baddr + sizeof(af)));
3460 			}
3461 
3462 			// Copy data from tx to rx
3463 			memcpy((void *)((uintptr_t)rx_baddr + UTUN_HEADER_SIZE(pcb)), (void *)tx_baddr, length - UTUN_HEADER_SIZE(pcb));
3464 			kern_packet_clear_flow_uuid(rx_ph); // zero flow id
3465 
3466 			/* finalize and attach the packet */
3467 			error = kern_buflet_set_data_offset(rx_buf, 0);
3468 			VERIFY(error == 0);
3469 			error = kern_buflet_set_data_length(rx_buf, length);
3470 			VERIFY(error == 0);
3471 			error = kern_packet_finalize(rx_ph);
3472 			VERIFY(error == 0);
3473 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3474 			VERIFY(error == 0);
3475 
3476 			STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
3477 			STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
3478 
3479 			rx_ring_stats.kcrsi_slots_transferred++;
3480 			rx_ring_stats.kcrsi_bytes_transferred += length;
3481 
3482 			rx_pslot = rx_slot;
3483 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3484 		}
3485 
3486 		if (rx_pslot) {
3487 			kern_channel_advance_slot(rx_ring, rx_pslot);
3488 			kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
3489 		}
3490 
3491 		if (tx_chain_ph != 0) {
3492 			kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
3493 		}
3494 
3495 		if (tx_pslot) {
3496 			kern_channel_advance_slot(tx_ring, tx_pslot);
3497 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3498 			(void)kern_channel_reclaim(tx_ring);
3499 		}
3500 
3501 		/* just like utun_ctl_rcvd(), always reenable output */
3502 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
3503 		if (error != 0) {
3504 			os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
3505 		}
3506 
3507 		// Unlock first, then exit ring
3508 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3509 
3510 		if (tx_pslot != NULL) {
3511 			kern_channel_notify(tx_ring, 0);
3512 		}
3513 		kr_exit(tx_ring);
3514 	} else {
3515 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3516 
3517 		uint32_t mb_cnt = 0;
3518 		uint32_t mb_len = 0;
3519 		struct mbuf *mb_head = NULL;
3520 		struct mbuf *mb_tail = NULL;
3521 
3522 		if (ifnet_dequeue_multi(pcb->utun_ifp, avail, &mb_head,
3523 		    &mb_tail, &mb_cnt, &mb_len) != 0) {
3524 			utun_data_move_end(pcb);
3525 			return 0;
3526 		}
3527 		VERIFY(mb_cnt <= avail);
3528 
3529 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3530 		VERIFY(rx_pp != NULL);
3531 		kern_channel_slot_t rx_pslot = NULL;
3532 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3533 		while (rx_slot) {
3534 			size_t length = 0;
3535 			mbuf_t data = NULL;
3536 			if ((data = mb_head) == NULL) {
3537 				VERIFY(mb_cnt == 0);
3538 				break;
3539 			}
3540 			mb_head = mbuf_nextpkt(mb_head);
3541 			mbuf_setnextpkt(data, NULL);
3542 			VERIFY(mb_cnt != 0);
3543 			--mb_cnt;
3544 			length = mbuf_pkthdr_len(data);
3545 			if (length < UTUN_HEADER_SIZE(pcb) ||
3546 			    length > pcb->utun_slot_size ||
3547 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3548 				/* flush data */
3549 				mbuf_freem(data);
3550 				continue;
3551 			}
3552 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3553 
3554 			// Allocate rx packet
3555 			kern_packet_t rx_ph = 0;
3556 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3557 			if (__improbable(error != 0)) {
3558 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3559 				    pcb->utun_ifp->if_xname);
3560 				break;
3561 			}
3562 
3563 			/*
3564 			 * The ABI requires the protocol in network byte order
3565 			 */
3566 			*(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data));
3567 
3568 			// Fillout rx packet
3569 			kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3570 			VERIFY(rx_buf != NULL);
3571 			void *rx_baddr = kern_buflet_get_data_address(rx_buf);
3572 			VERIFY(rx_baddr != NULL);
3573 
3574 			// Copy-in data from mbuf to buflet
3575 			mbuf_copydata(data, 0, length, (void *)rx_baddr);
3576 			kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
3577 
3578 			// Finalize and attach the packet
3579 			error = kern_buflet_set_data_offset(rx_buf, 0);
3580 			VERIFY(error == 0);
3581 			error = kern_buflet_set_data_length(rx_buf, length);
3582 			VERIFY(error == 0);
3583 			error = kern_packet_finalize(rx_ph);
3584 			VERIFY(error == 0);
3585 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3586 			VERIFY(error == 0);
3587 
3588 			rx_ring_stats.kcrsi_slots_transferred++;
3589 			rx_ring_stats.kcrsi_bytes_transferred += length;
3590 
3591 			if (!pcb->utun_ext_ifdata_stats) {
3592 				ifnet_stat_increment_out(pcb->utun_ifp, 1, length, 0);
3593 			}
3594 
3595 			mbuf_freem(data);
3596 
3597 			rx_pslot = rx_slot;
3598 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3599 		}
3600 		if (rx_pslot) {
3601 			kern_channel_advance_slot(rx_ring, rx_pslot);
3602 			kern_channel_increment_ring_stats(rx_ring, &rx_ring_stats);
3603 		}
3604 		if (mb_head != NULL) {
3605 			VERIFY(mb_cnt != 0);
3606 			mbuf_freem_list(mb_head);
3607 		}
3608 	}
3609 
3610 	utun_data_move_end(pcb);
3611 	return 0;
3612 }
3613 
3614 #endif // UTUN_NEXUS
3615 
3616 
3617 /*
3618  * These are place holders until coreTLS kext stops calling them
3619  */
3620 errno_t utun_ctl_register_dtls(void *reg);
3621 int utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family);
3622 void utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb);
3623 
3624 errno_t
utun_ctl_register_dtls(void * reg)3625 utun_ctl_register_dtls(void *reg)
3626 {
3627 #pragma unused(reg)
3628 	return 0;
3629 }
3630 
3631 int
utun_pkt_dtls_input(struct utun_pcb * pcb,mbuf_t * pkt,protocol_family_t family)3632 utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family)
3633 {
3634 #pragma unused(pcb)
3635 #pragma unused(pkt)
3636 #pragma unused(family)
3637 	return 0;
3638 }
3639 
3640 void
utun_ctl_disable_crypto_dtls(struct utun_pcb * pcb)3641 utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb)
3642 {
3643 #pragma unused(pcb)
3644 }
3645 
3646 #if UTUN_NEXUS
3647 static boolean_t
utun_data_move_begin(struct utun_pcb * pcb)3648 utun_data_move_begin(struct utun_pcb *pcb)
3649 {
3650 	bool data_path_ready = false;
3651 
3652 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3653 	if ((data_path_ready = UTUN_IS_DATA_PATH_READY(pcb))) {
3654 		pcb->utun_pcb_data_move++;
3655 	}
3656 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3657 
3658 	return data_path_ready;
3659 }
3660 
3661 static void
utun_data_move_end(struct utun_pcb * pcb)3662 utun_data_move_end(struct utun_pcb *pcb)
3663 {
3664 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3665 	VERIFY(pcb->utun_pcb_data_move > 0);
3666 	/*
3667 	 * if there's no more thread moving data, wakeup any
3668 	 * drainers that are blocked waiting for this.
3669 	 */
3670 	if (--pcb->utun_pcb_data_move == 0 && pcb->utun_pcb_drainers > 0) {
3671 		wakeup(&(pcb->utun_pcb_data_move));
3672 	}
3673 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3674 }
3675 
3676 static void
utun_data_move_drain(struct utun_pcb * pcb)3677 utun_data_move_drain(struct utun_pcb *pcb)
3678 {
3679 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3680 	/* data path must already be marked as not ready */
3681 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3682 	pcb->utun_pcb_drainers++;
3683 	while (pcb->utun_pcb_data_move != 0) {
3684 		(void) msleep(&(pcb->utun_pcb_data_move),
3685 		    &pcb->utun_pcb_data_move_lock, (PZERO - 1), __func__, NULL);
3686 	}
3687 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3688 	VERIFY(pcb->utun_pcb_drainers > 0);
3689 	pcb->utun_pcb_drainers--;
3690 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3691 }
3692 
3693 static void
utun_wait_data_move_drain(struct utun_pcb * pcb)3694 utun_wait_data_move_drain(struct utun_pcb *pcb)
3695 {
3696 	/*
3697 	 * Mark the data path as not usable.
3698 	 */
3699 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3700 	UTUN_CLR_DATA_PATH_READY(pcb);
3701 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3702 
3703 	/* Wait until all threads in the data path are done. */
3704 	utun_data_move_drain(pcb);
3705 }
3706 #endif // UTUN_NEXUS
3707