xref: /xnu-8796.101.5/bsd/net/if_utun.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2008-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 
31 /* ----------------------------------------------------------------------------------
32  *   Application of kernel control for interface creation
33  *
34  *   Theory of operation:
35  *   utun (user tunnel) acts as glue between kernel control sockets and network interfaces.
36  *   This kernel control will register an interface for every client that connects.
37  *   ---------------------------------------------------------------------------------- */
38 
39 #include <sys/systm.h>
40 #include <sys/kern_control.h>
41 #include <net/kpi_protocol.h>
42 #include <net/kpi_interface.h>
43 #include <sys/socket.h>
44 #include <net/if.h>
45 #include <net/if_types.h>
46 #include <net/bpf.h>
47 #include <net/if_utun.h>
48 #include <sys/mbuf.h>
49 #include <sys/sockio.h>
50 #include <netinet/in.h>
51 #include <netinet/ip.h>
52 #include <netinet6/in6_var.h>
53 #include <netinet6/in6_var.h>
54 #include <sys/kauth.h>
55 #include <net/necp.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58 
59 #if SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define UTUN_NEXUS 1
64 #else // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
65 #define UTUN_NEXUS 0
66 #endif // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
67 
68 #if UTUN_NEXUS
69 static nexus_controller_t utun_ncd;
70 static int utun_ncd_refcount;
71 static uuid_t utun_kpipe_uuid;
72 static uuid_t utun_nx_dom_prov;
73 
74 typedef struct utun_nx {
75 	uuid_t if_provider;
76 	uuid_t if_instance;
77 	uuid_t fsw_provider;
78 	uuid_t fsw_instance;
79 	uuid_t fsw_device;
80 	uuid_t fsw_agent;
81 } *utun_nx_t;
82 
83 #endif // UTUN_NEXUS
84 
85 /* Control block allocated for each kernel control connection */
86 struct utun_pcb {
87 	TAILQ_ENTRY(utun_pcb)   utun_chain;
88 	kern_ctl_ref    utun_ctlref;
89 	ifnet_t                 utun_ifp;
90 	u_int32_t               utun_unit;
91 	u_int32_t               utun_unique_id;
92 	u_int32_t               utun_flags;
93 	int                     utun_ext_ifdata_stats;
94 	u_int32_t               utun_max_pending_packets;
95 	char                    utun_if_xname[IFXNAMSIZ];
96 	char                    utun_unique_name[IFXNAMSIZ];
97 	// PCB lock protects state fields and rings
98 	decl_lck_rw_data(, utun_pcb_lock);
99 	struct mbuf *   utun_input_chain;
100 	struct mbuf *   utun_input_chain_last;
101 	u_int32_t               utun_input_chain_count;
102 	// Input chain lock protects the list of input mbufs
103 	// The input chain lock must be taken AFTER the PCB lock if both are held
104 	lck_mtx_t               utun_input_chain_lock;
105 
106 #if UTUN_NEXUS
107 	// lock to protect utun_pcb_data_move & utun_pcb_drainers
108 	decl_lck_mtx_data(, utun_pcb_data_move_lock);
109 	u_int32_t               utun_pcb_data_move; /* number of data moving contexts */
110 	u_int32_t               utun_pcb_drainers; /* number of threads waiting to drain */
111 	u_int32_t               utun_pcb_data_path_state; /* internal state of interface data path */
112 
113 	struct utun_nx  utun_nx;
114 	int                     utun_kpipe_enabled;
115 	uuid_t                  utun_kpipe_uuid;
116 	void *                  utun_kpipe_rxring;
117 	void *                  utun_kpipe_txring;
118 	kern_pbufpool_t         utun_kpipe_pp;
119 	u_int32_t               utun_kpipe_tx_ring_size;
120 	u_int32_t               utun_kpipe_rx_ring_size;
121 
122 	kern_nexus_t    utun_netif_nexus;
123 	kern_pbufpool_t         utun_netif_pp;
124 	void *                  utun_netif_rxring;
125 	void *                  utun_netif_txring;
126 	uint64_t                utun_netif_txring_size;
127 
128 	u_int32_t               utun_slot_size;
129 	u_int32_t               utun_netif_ring_size;
130 	u_int32_t               utun_tx_fsw_ring_size;
131 	u_int32_t               utun_rx_fsw_ring_size;
132 	// Auto attach flowswitch when netif is enabled. When set to false,
133 	// it allows userspace nexus controller to attach and own flowswitch.
134 	bool                    utun_attach_fsw;
135 	bool                    utun_netif_connected;
136 	bool                    utun_use_netif;
137 	bool                    utun_needs_netagent;
138 #endif // UTUN_NEXUS
139 };
140 
141 /* Kernel Control functions */
142 static errno_t  utun_ctl_setup(u_int32_t *unit, void **unitinfo);
143 static errno_t  utun_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
144     void **unitinfo);
145 static errno_t  utun_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
146     void **unitinfo);
147 static errno_t  utun_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
148     void *unitinfo);
149 static errno_t  utun_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
150     void *unitinfo, mbuf_t m, int flags);
151 static errno_t  utun_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
152     int opt, void *data, size_t *len);
153 static errno_t  utun_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
154     int opt, void *data, size_t len);
155 static void             utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
156     int flags);
157 
158 /* Network Interface functions */
159 static void     utun_start(ifnet_t interface);
160 static errno_t  utun_framer(ifnet_t interface, mbuf_t *packet,
161     const struct sockaddr *dest, const char *desk_linkaddr,
162     const char *frame_type, u_int32_t *prepend_len, u_int32_t *postpend_len);
163 static errno_t  utun_output(ifnet_t interface, mbuf_t data);
164 static errno_t  utun_demux(ifnet_t interface, mbuf_t data, char *frame_header,
165     protocol_family_t *protocol);
166 static errno_t  utun_add_proto(ifnet_t interface, protocol_family_t protocol,
167     const struct ifnet_demux_desc *demux_array,
168     u_int32_t demux_count);
169 static errno_t  utun_del_proto(ifnet_t interface, protocol_family_t protocol);
170 static errno_t  utun_ioctl(ifnet_t interface, u_long cmd, void *data);
171 static void             utun_detached(ifnet_t interface);
172 
173 /* Protocol handlers */
174 static errno_t  utun_attach_proto(ifnet_t interface, protocol_family_t proto);
175 static errno_t  utun_proto_input(ifnet_t interface, protocol_family_t protocol,
176     mbuf_t m, char *frame_header);
177 static errno_t utun_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
178     mbuf_t *packet, const struct sockaddr *dest, void *route,
179     char *frame_type, char *link_layer_dest);
180 static errno_t utun_pkt_input(struct utun_pcb *pcb, mbuf_t m);
181 
182 /* data movement refcounting functions */
183 #if UTUN_NEXUS
184 static boolean_t utun_data_move_begin(struct utun_pcb *pcb);
185 static void utun_data_move_end(struct utun_pcb *pcb);
186 static void utun_wait_data_move_drain(struct utun_pcb *pcb);
187 
188 /* Data path states */
189 #define UTUN_PCB_DATA_PATH_READY    0x1
190 
191 /* Macros to set/clear/test data path states */
192 #define UTUN_SET_DATA_PATH_READY(_pcb) \
193     ((_pcb)->utun_pcb_data_path_state |= UTUN_PCB_DATA_PATH_READY)
194 #define UTUN_CLR_DATA_PATH_READY(_pcb) \
195     ((_pcb)->utun_pcb_data_path_state &= ~UTUN_PCB_DATA_PATH_READY)
196 #define UTUN_IS_DATA_PATH_READY(_pcb) \
197     (((_pcb)->utun_pcb_data_path_state & UTUN_PCB_DATA_PATH_READY) != 0)
198 
199 #define UTUN_IF_DEFAULT_SLOT_SIZE 2048
200 #define UTUN_IF_DEFAULT_RING_SIZE 64
201 #define UTUN_IF_DEFAULT_TX_FSW_RING_SIZE 64
202 #define UTUN_IF_DEFAULT_RX_FSW_RING_SIZE 128
203 #define UTUN_IF_DEFAULT_BUF_SEG_SIZE    skmem_usr_buf_seg_size
204 #define UTUN_IF_HEADROOM_SIZE 32
205 
206 #define UTUN_IF_MIN_RING_SIZE 8
207 #define UTUN_IF_MAX_RING_SIZE 1024
208 
209 #define UTUN_IF_MIN_SLOT_SIZE 1024
210 #define UTUN_IF_MAX_SLOT_SIZE 4096
211 
212 #define UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT 512
213 
214 static int if_utun_max_pending_input = UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT;
215 
216 static int sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS;
217 static int sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
218 static int sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
219 
220 static int if_utun_ring_size = UTUN_IF_DEFAULT_RING_SIZE;
221 static int if_utun_tx_fsw_ring_size = UTUN_IF_DEFAULT_TX_FSW_RING_SIZE;
222 static int if_utun_rx_fsw_ring_size = UTUN_IF_DEFAULT_RX_FSW_RING_SIZE;
223 
224 SYSCTL_DECL(_net_utun);
225 SYSCTL_NODE(_net, OID_AUTO, utun, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "UTun");
226 
227 SYSCTL_INT(_net_utun, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_utun_max_pending_input, 0, "");
228 SYSCTL_PROC(_net_utun, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
229     &if_utun_ring_size, UTUN_IF_DEFAULT_RING_SIZE, &sysctl_if_utun_ring_size, "I", "");
230 SYSCTL_PROC(_net_utun, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
231     &if_utun_tx_fsw_ring_size, UTUN_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_utun_tx_fsw_ring_size, "I", "");
232 SYSCTL_PROC(_net_utun, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
233     &if_utun_rx_fsw_ring_size, UTUN_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_utun_rx_fsw_ring_size, "I", "");
234 
235 static errno_t
236 utun_register_nexus(void);
237 
238 static errno_t
239 utun_netif_prepare(__unused kern_nexus_t nexus, ifnet_t ifp);
240 static errno_t
241 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
242     proc_t p, kern_nexus_t nexus,
243     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx);
244 static errno_t
245 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
246     kern_channel_t channel);
247 static void
248 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
249     kern_channel_t channel);
250 static void
251 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
252     kern_channel_t channel);
253 static void
254 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
255     kern_channel_t channel);
256 static errno_t
257 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
258     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
259     void **ring_ctx);
260 static void
261 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
262     kern_channel_ring_t ring);
263 static errno_t
264 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
265     kern_channel_ring_t ring, uint32_t flags);
266 static errno_t
267 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
268     kern_channel_ring_t ring, uint32_t flags);
269 #endif // UTUN_NEXUS
270 
271 #define UTUN_DEFAULT_MTU 1500
272 #define UTUN_HEADER_SIZE(_pcb) (sizeof(u_int32_t) + (((_pcb)->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) ? sizeof(uuid_t) : 0))
273 
274 static kern_ctl_ref     utun_kctlref;
275 static LCK_ATTR_DECLARE(utun_lck_attr, 0, 0);
276 static LCK_GRP_DECLARE(utun_lck_grp, "utun");
277 static LCK_MTX_DECLARE_ATTR(utun_lock, &utun_lck_grp, &utun_lck_attr);
278 
279 TAILQ_HEAD(utun_list, utun_pcb) utun_head;
280 
281 static KALLOC_TYPE_DEFINE(utun_pcb_zone, struct utun_pcb, NET_KT_DEFAULT);
282 
283 #if UTUN_NEXUS
284 
285 static int
286 sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS
287 {
288 #pragma unused(arg1, arg2)
289 	int value = if_utun_ring_size;
290 
291 	int error = sysctl_handle_int(oidp, &value, 0, req);
292 	if (error || !req->newptr) {
293 		return error;
294 	}
295 
296 	if (value < UTUN_IF_MIN_RING_SIZE ||
297 	    value > UTUN_IF_MAX_RING_SIZE) {
298 		return EINVAL;
299 	}
300 
301 	if_utun_ring_size = value;
302 
303 	return 0;
304 }
305 
306 static int
307 sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
308 {
309 #pragma unused(arg1, arg2)
310 	int value = if_utun_tx_fsw_ring_size;
311 
312 	int error = sysctl_handle_int(oidp, &value, 0, req);
313 	if (error || !req->newptr) {
314 		return error;
315 	}
316 
317 	if (value < UTUN_IF_MIN_RING_SIZE ||
318 	    value > UTUN_IF_MAX_RING_SIZE) {
319 		return EINVAL;
320 	}
321 
322 	if_utun_tx_fsw_ring_size = value;
323 
324 	return 0;
325 }
326 
327 static int
328 sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
329 {
330 #pragma unused(arg1, arg2)
331 	int value = if_utun_rx_fsw_ring_size;
332 
333 	int error = sysctl_handle_int(oidp, &value, 0, req);
334 	if (error || !req->newptr) {
335 		return error;
336 	}
337 
338 	if (value < UTUN_IF_MIN_RING_SIZE ||
339 	    value > UTUN_IF_MAX_RING_SIZE) {
340 		return EINVAL;
341 	}
342 
343 	if_utun_rx_fsw_ring_size = value;
344 
345 	return 0;
346 }
347 
348 static errno_t
utun_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)349 utun_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
350     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
351     void **ring_ctx)
352 {
353 #pragma unused(nxprov)
354 #pragma unused(channel)
355 #pragma unused(ring_ctx)
356 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
357 	if (!is_tx_ring) {
358 		VERIFY(pcb->utun_netif_rxring == NULL);
359 		pcb->utun_netif_rxring = ring;
360 	} else {
361 		VERIFY(pcb->utun_netif_txring == NULL);
362 		pcb->utun_netif_txring = ring;
363 	}
364 	return 0;
365 }
366 
367 static void
utun_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)368 utun_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
369     kern_channel_ring_t ring)
370 {
371 #pragma unused(nxprov)
372 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
373 	if (pcb->utun_netif_rxring == ring) {
374 		pcb->utun_netif_rxring = NULL;
375 	} else if (pcb->utun_netif_txring == ring) {
376 		pcb->utun_netif_txring = NULL;
377 	}
378 }
379 
380 static errno_t
utun_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)381 utun_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
382     kern_channel_ring_t tx_ring, uint32_t flags)
383 {
384 #pragma unused(nxprov)
385 #pragma unused(flags)
386 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
387 
388 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
389 
390 	if (!utun_data_move_begin(pcb)) {
391 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
392 		    __func__, if_name(pcb->utun_ifp));
393 		return 0;
394 	}
395 
396 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
397 
398 	struct kern_channel_ring_stat_increment tx_ring_stats;
399 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
400 	kern_channel_slot_t tx_pslot = NULL;
401 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
402 	kern_packet_t tx_chain_ph = 0;
403 
404 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
405 
406 	if (tx_slot == NULL) {
407 		// Nothing to write, don't bother signalling
408 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
409 		utun_data_move_end(pcb);
410 		return 0;
411 	}
412 
413 	if (pcb->utun_kpipe_enabled) {
414 		kern_channel_ring_t rx_ring = pcb->utun_kpipe_rxring;
415 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
416 
417 		// Signal the kernel pipe ring to read
418 		if (rx_ring != NULL) {
419 			kern_channel_notify(rx_ring, 0);
420 		}
421 		utun_data_move_end(pcb);
422 		return 0;
423 	}
424 
425 	// If we're here, we're injecting into the utun kernel control socket
426 	while (tx_slot != NULL) {
427 		size_t length = 0;
428 		mbuf_t data = NULL;
429 
430 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
431 
432 		if (tx_ph == 0) {
433 			// Advance TX ring
434 			tx_pslot = tx_slot;
435 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
436 			continue;
437 		}
438 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
439 		if (tx_chain_ph != 0) {
440 			kern_packet_append(tx_ph, tx_chain_ph);
441 		}
442 		tx_chain_ph = tx_ph;
443 
444 		// Advance TX ring
445 		tx_pslot = tx_slot;
446 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
447 
448 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
449 		VERIFY(tx_buf != NULL);
450 
451 		/* tx_baddr is the absolute buffer address */
452 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
453 		VERIFY(tx_baddr != 0);
454 
455 		bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
456 
457 		uint16_t tx_offset = kern_buflet_get_data_offset(tx_buf);
458 		uint32_t tx_length = kern_buflet_get_data_length(tx_buf);
459 
460 		// The offset must be large enough for the headers
461 		VERIFY(tx_offset >= UTUN_HEADER_SIZE(pcb));
462 
463 		// Find family
464 		uint32_t af = 0;
465 		uint8_t vhl = *(uint8_t *)(tx_baddr + tx_offset);
466 		u_int ip_version = (vhl >> 4);
467 		switch (ip_version) {
468 		case 4: {
469 			af = AF_INET;
470 			break;
471 		}
472 		case 6: {
473 			af = AF_INET6;
474 			break;
475 		}
476 		default: {
477 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s: unknown ip version %u vhl %u tx_offset %u len %u header_size %zu\n",
478 			    pcb->utun_ifp->if_xname, ip_version, vhl, tx_offset, tx_length,
479 			    UTUN_HEADER_SIZE(pcb));
480 			break;
481 		}
482 		}
483 
484 		tx_offset -= UTUN_HEADER_SIZE(pcb);
485 		tx_length += UTUN_HEADER_SIZE(pcb);
486 		tx_baddr += tx_offset;
487 
488 		length = MIN(tx_length, pcb->utun_slot_size);
489 
490 		// Copy in family
491 		memcpy(tx_baddr, &af, sizeof(af));
492 		if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
493 			kern_packet_get_euuid(tx_ph, (void *)(tx_baddr + sizeof(af)));
494 		}
495 
496 		if (length > 0) {
497 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
498 			if (error == 0) {
499 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
500 				if (error == 0) {
501 					error = utun_output(pcb->utun_ifp, data);
502 					if (error != 0) {
503 						os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - utun_output error %d\n", pcb->utun_ifp->if_xname, error);
504 					}
505 				} else {
506 					os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->utun_ifp->if_xname, length, error);
507 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
508 					STATS_INC(nifs, NETIF_STATS_DROP);
509 					mbuf_freem(data);
510 					data = NULL;
511 				}
512 			} else {
513 				os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->utun_ifp->if_xname, error);
514 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
515 				STATS_INC(nifs, NETIF_STATS_DROP);
516 			}
517 		} else {
518 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - 0 length packet\n", pcb->utun_ifp->if_xname);
519 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
520 			STATS_INC(nifs, NETIF_STATS_DROP);
521 		}
522 
523 		if (data == NULL) {
524 			continue;
525 		}
526 
527 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
528 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
529 
530 		tx_ring_stats.kcrsi_slots_transferred++;
531 		tx_ring_stats.kcrsi_bytes_transferred += length;
532 	}
533 	if (tx_chain_ph != 0) {
534 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
535 	}
536 	if (tx_pslot) {
537 		kern_channel_advance_slot(tx_ring, tx_pslot);
538 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
539 		(void)kern_channel_reclaim(tx_ring);
540 	}
541 
542 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
543 	utun_data_move_end(pcb);
544 	return 0;
545 }
546 
547 static errno_t
utun_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)548 utun_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
549     kern_channel_ring_t ring, __unused uint32_t flags)
550 {
551 #pragma unused(nxprov)
552 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
553 	boolean_t more = false;
554 	errno_t rc = 0;
555 
556 	if (!utun_data_move_begin(pcb)) {
557 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
558 		    __func__, if_name(pcb->utun_ifp));
559 		return 0;
560 	}
561 
562 	/*
563 	 * Refill and sync the ring; we may be racing against another thread doing
564 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
565 	 * variant here.
566 	 */
567 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
568 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
569 		os_log_error(OS_LOG_DEFAULT, "%s, tx refill failed %d\n", __func__, rc);
570 	}
571 
572 	(void) kr_enter(ring, TRUE);
573 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
574 
575 	if (pcb->utun_kpipe_enabled) {
576 		uint32_t tx_available = kern_channel_available_slot_count(ring);
577 		if (pcb->utun_netif_txring_size > 0 &&
578 		    tx_available >= pcb->utun_netif_txring_size - 1) {
579 			// No room left in tx ring, disable output for now
580 			errno_t error = ifnet_disable_output(pcb->utun_ifp);
581 			if (error != 0) {
582 				os_log_error(OS_LOG_DEFAULT, "utun_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
583 			}
584 		}
585 	}
586 
587 	if (pcb->utun_kpipe_enabled) {
588 		kern_channel_ring_t rx_ring = pcb->utun_kpipe_rxring;
589 
590 		// Unlock while calling notify
591 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
592 		// Signal the kernel pipe ring to read
593 		if (rx_ring != NULL) {
594 			kern_channel_notify(rx_ring, 0);
595 		}
596 	} else {
597 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
598 	}
599 
600 	kr_exit(ring);
601 	utun_data_move_end(pcb);
602 	return 0;
603 }
604 
605 static errno_t
utun_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)606 utun_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
607     kern_channel_ring_t rx_ring, uint32_t flags)
608 {
609 #pragma unused(nxprov)
610 #pragma unused(flags)
611 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
612 	struct kern_channel_ring_stat_increment rx_ring_stats;
613 
614 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
615 
616 	if (!utun_data_move_begin(pcb)) {
617 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
618 		    __func__, if_name(pcb->utun_ifp));
619 		return 0;
620 	}
621 
622 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
623 
624 	// Reclaim user-released slots
625 	(void) kern_channel_reclaim(rx_ring);
626 
627 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
628 
629 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
630 	if (avail == 0) {
631 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
632 		utun_data_move_end(pcb);
633 		return 0;
634 	}
635 
636 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
637 	VERIFY(rx_pp != NULL);
638 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
639 	kern_channel_slot_t rx_pslot = NULL;
640 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
641 
642 	while (rx_slot != NULL) {
643 		// Check for a waiting packet
644 		lck_mtx_lock(&pcb->utun_input_chain_lock);
645 		mbuf_t data = pcb->utun_input_chain;
646 		if (data == NULL) {
647 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
648 			break;
649 		}
650 
651 		// Allocate rx packet
652 		kern_packet_t rx_ph = 0;
653 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
654 		if (__improbable(error != 0)) {
655 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
656 			STATS_INC(nifs, NETIF_STATS_DROP);
657 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
658 			break;
659 		}
660 
661 		// Advance waiting packets
662 		if (pcb->utun_input_chain_count > 0) {
663 			pcb->utun_input_chain_count--;
664 		}
665 		pcb->utun_input_chain = data->m_nextpkt;
666 		data->m_nextpkt = NULL;
667 		if (pcb->utun_input_chain == NULL) {
668 			pcb->utun_input_chain_last = NULL;
669 		}
670 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
671 
672 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
673 		size_t length = mbuf_pkthdr_len(data);
674 
675 		if (length < header_offset) {
676 			// mbuf is too small
677 			mbuf_freem(data);
678 			kern_pbufpool_free(rx_pp, rx_ph);
679 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
680 			STATS_INC(nifs, NETIF_STATS_DROP);
681 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length too short for header %zu < %zu\n",
682 			    pcb->utun_ifp->if_xname, length, header_offset);
683 			continue;
684 		}
685 
686 		length -= header_offset;
687 		if (length > PP_BUF_SIZE_DEF(rx_pp)) {
688 			// Flush data
689 			mbuf_freem(data);
690 			kern_pbufpool_free(rx_pp, rx_ph);
691 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
692 			STATS_INC(nifs, NETIF_STATS_DROP);
693 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length %zu > %u\n",
694 			    pcb->utun_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
695 			continue;
696 		}
697 
698 		mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
699 
700 		// Fillout rx packet
701 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
702 		VERIFY(rx_buf != NULL);
703 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
704 		VERIFY(rx_baddr != NULL);
705 
706 		// Copy-in data from mbuf to buflet
707 		mbuf_copydata(data, header_offset, length, (void *)rx_baddr);
708 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
709 
710 		// Finalize and attach the packet
711 		error = kern_buflet_set_data_offset(rx_buf, 0);
712 		VERIFY(error == 0);
713 		error = kern_buflet_set_data_length(rx_buf, length);
714 		VERIFY(error == 0);
715 		error = kern_packet_set_headroom(rx_ph, 0);
716 		VERIFY(error == 0);
717 		error = kern_packet_finalize(rx_ph);
718 		VERIFY(error == 0);
719 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
720 		VERIFY(error == 0);
721 
722 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
723 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
724 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
725 
726 		rx_ring_stats.kcrsi_slots_transferred++;
727 		rx_ring_stats.kcrsi_bytes_transferred += length;
728 
729 		mbuf_freem(data);
730 
731 		// Advance ring
732 		rx_pslot = rx_slot;
733 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
734 	}
735 
736 	struct kern_channel_ring_stat_increment tx_ring_stats;
737 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
738 	kern_channel_ring_t tx_ring = pcb->utun_kpipe_txring;
739 	kern_channel_slot_t tx_pslot = NULL;
740 	kern_channel_slot_t tx_slot = NULL;
741 	if (tx_ring == NULL) {
742 		// Net-If TX ring not set up yet, nothing to read
743 		goto done;
744 	}
745 	// Unlock utun before entering ring
746 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
747 
748 	(void)kr_enter(tx_ring, TRUE);
749 
750 	// Lock again after entering and validate
751 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
752 	if (tx_ring != pcb->utun_kpipe_txring) {
753 		goto done;
754 	}
755 
756 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
757 	if (tx_slot == NULL) {
758 		// Nothing to read, don't bother signalling
759 		goto done;
760 	}
761 
762 	while (rx_slot != NULL && tx_slot != NULL) {
763 		// Allocate rx packet
764 		kern_packet_t rx_ph = 0;
765 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
766 
767 		// Advance TX ring
768 		tx_pslot = tx_slot;
769 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
770 
771 		/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
772 		if (tx_ph == 0) {
773 			continue;
774 		}
775 
776 		/* XXX We could try this alloc before advancing the slot to avoid
777 		 * dropping the packet on failure to allocate.
778 		 */
779 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
780 		if (__improbable(error != 0)) {
781 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
782 			STATS_INC(nifs, NETIF_STATS_DROP);
783 			break;
784 		}
785 
786 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
787 		VERIFY(tx_buf != NULL);
788 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
789 		VERIFY(tx_baddr != 0);
790 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
791 
792 		// Check packet length
793 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
794 		uint32_t tx_length = kern_packet_get_data_length(tx_ph);
795 		if (tx_length < header_offset) {
796 			// Packet is too small
797 			kern_pbufpool_free(rx_pp, rx_ph);
798 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
799 			STATS_INC(nifs, NETIF_STATS_DROP);
800 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: packet length too short for header %u < %zu\n",
801 			    pcb->utun_ifp->if_xname, tx_length, header_offset);
802 			continue;
803 		}
804 
805 		size_t length = MIN(tx_length - header_offset,
806 		    pcb->utun_slot_size);
807 
808 		tx_ring_stats.kcrsi_slots_transferred++;
809 		tx_ring_stats.kcrsi_bytes_transferred += length;
810 
811 		// Fillout rx packet
812 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
813 		VERIFY(rx_buf != NULL);
814 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
815 		VERIFY(rx_baddr != NULL);
816 
817 		// Copy-in data from tx to rx
818 		memcpy((void *)rx_baddr, (void *)(tx_baddr + header_offset), length);
819 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
820 
821 		// Finalize and attach the packet
822 		error = kern_buflet_set_data_offset(rx_buf, 0);
823 		VERIFY(error == 0);
824 		error = kern_buflet_set_data_length(rx_buf, length);
825 		VERIFY(error == 0);
826 		error = kern_packet_set_headroom(rx_ph, 0);
827 		VERIFY(error == 0);
828 		error = kern_packet_finalize(rx_ph);
829 		VERIFY(error == 0);
830 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
831 		VERIFY(error == 0);
832 
833 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
834 		STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
835 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
836 
837 		rx_ring_stats.kcrsi_slots_transferred++;
838 		rx_ring_stats.kcrsi_bytes_transferred += length;
839 
840 		rx_pslot = rx_slot;
841 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
842 	}
843 
844 done:
845 	if (rx_pslot) {
846 		kern_channel_advance_slot(rx_ring, rx_pslot);
847 		kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
848 	}
849 
850 	if (tx_pslot) {
851 		kern_channel_advance_slot(tx_ring, tx_pslot);
852 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
853 		(void)kern_channel_reclaim(tx_ring);
854 	}
855 
856 	// Unlock first, then exit ring
857 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
858 	if (tx_ring != NULL) {
859 		if (tx_pslot != NULL) {
860 			kern_channel_notify(tx_ring, 0);
861 		}
862 		kr_exit(tx_ring);
863 	}
864 
865 	utun_data_move_end(pcb);
866 	return 0;
867 }
868 
869 static errno_t
utun_nexus_ifattach(struct utun_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)870 utun_nexus_ifattach(struct utun_pcb *pcb,
871     struct ifnet_init_eparams *init_params,
872     struct ifnet **ifp)
873 {
874 	errno_t err;
875 	nexus_controller_t controller = kern_nexus_shared_controller();
876 	struct kern_nexus_net_init net_init;
877 	struct kern_pbufpool_init pp_init;
878 
879 	nexus_name_t provider_name;
880 	snprintf((char *)provider_name, sizeof(provider_name),
881 	    "com.apple.netif.%s", pcb->utun_if_xname);
882 
883 	struct kern_nexus_provider_init prov_init = {
884 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
885 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
886 		.nxpi_pre_connect = utun_nexus_pre_connect,
887 		.nxpi_connected = utun_nexus_connected,
888 		.nxpi_pre_disconnect = utun_netif_pre_disconnect,
889 		.nxpi_disconnected = utun_nexus_disconnected,
890 		.nxpi_ring_init = utun_netif_ring_init,
891 		.nxpi_ring_fini = utun_netif_ring_fini,
892 		.nxpi_slot_init = NULL,
893 		.nxpi_slot_fini = NULL,
894 		.nxpi_sync_tx = utun_netif_sync_tx,
895 		.nxpi_sync_rx = utun_netif_sync_rx,
896 		.nxpi_tx_doorbell = utun_netif_tx_doorbell,
897 	};
898 
899 	nexus_attr_t nxa = NULL;
900 	err = kern_nexus_attr_create(&nxa);
901 	if (err != 0) {
902 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
903 		    __func__, err);
904 		goto failed;
905 	}
906 
907 	uint64_t slot_buffer_size = pcb->utun_slot_size;
908 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
909 	VERIFY(err == 0);
910 
911 	// Reset ring size for netif nexus to limit memory usage
912 	uint64_t ring_size = pcb->utun_netif_ring_size;
913 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
914 	VERIFY(err == 0);
915 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
916 	VERIFY(err == 0);
917 
918 	pcb->utun_netif_txring_size = ring_size;
919 
920 	bzero(&pp_init, sizeof(pp_init));
921 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
922 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
923 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
924 	pp_init.kbi_bufsize = pcb->utun_slot_size;
925 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
926 	pp_init.kbi_max_frags = 1;
927 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
928 	    "%s", provider_name);
929 	pp_init.kbi_ctx = NULL;
930 	pp_init.kbi_ctx_retain = NULL;
931 	pp_init.kbi_ctx_release = NULL;
932 
933 	err = kern_pbufpool_create(&pp_init, &pcb->utun_netif_pp, NULL);
934 	if (err != 0) {
935 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
936 		goto failed;
937 	}
938 
939 	err = kern_nexus_controller_register_provider(controller,
940 	    utun_nx_dom_prov,
941 	    provider_name,
942 	    &prov_init,
943 	    sizeof(prov_init),
944 	    nxa,
945 	    &pcb->utun_nx.if_provider);
946 	if (err != 0) {
947 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
948 		    __func__, err);
949 		goto failed;
950 	}
951 
952 	bzero(&net_init, sizeof(net_init));
953 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
954 	net_init.nxneti_flags = 0;
955 	net_init.nxneti_eparams = init_params;
956 	net_init.nxneti_lladdr = NULL;
957 	net_init.nxneti_prepare = utun_netif_prepare;
958 	net_init.nxneti_rx_pbufpool = pcb->utun_netif_pp;
959 	net_init.nxneti_tx_pbufpool = pcb->utun_netif_pp;
960 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
961 	    pcb->utun_nx.if_provider,
962 	    pcb,
963 	    NULL,
964 	    &pcb->utun_nx.if_instance,
965 	    &net_init,
966 	    ifp);
967 	if (err != 0) {
968 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
969 		    __func__, err);
970 		kern_nexus_controller_deregister_provider(controller,
971 		    pcb->utun_nx.if_provider);
972 		uuid_clear(pcb->utun_nx.if_provider);
973 		goto failed;
974 	}
975 
976 failed:
977 	if (nxa) {
978 		kern_nexus_attr_destroy(nxa);
979 	}
980 	if (err && pcb->utun_netif_pp != NULL) {
981 		kern_pbufpool_destroy(pcb->utun_netif_pp);
982 		pcb->utun_netif_pp = NULL;
983 	}
984 	return err;
985 }
986 
987 static void
utun_detach_provider_and_instance(uuid_t provider,uuid_t instance)988 utun_detach_provider_and_instance(uuid_t provider, uuid_t instance)
989 {
990 	nexus_controller_t controller = kern_nexus_shared_controller();
991 	errno_t err;
992 
993 	if (!uuid_is_null(instance)) {
994 		err = kern_nexus_controller_free_provider_instance(controller,
995 		    instance);
996 		if (err != 0) {
997 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
998 			    __func__, err);
999 		}
1000 		uuid_clear(instance);
1001 	}
1002 	if (!uuid_is_null(provider)) {
1003 		err = kern_nexus_controller_deregister_provider(controller,
1004 		    provider);
1005 		if (err != 0) {
1006 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
1007 		}
1008 		uuid_clear(provider);
1009 	}
1010 	return;
1011 }
1012 
1013 static void
utun_nexus_detach(struct utun_pcb * pcb)1014 utun_nexus_detach(struct utun_pcb *pcb)
1015 {
1016 	utun_nx_t nx = &pcb->utun_nx;
1017 	nexus_controller_t controller = kern_nexus_shared_controller();
1018 	errno_t err;
1019 
1020 	if (!uuid_is_null(nx->fsw_device)) {
1021 		err = kern_nexus_ifdetach(controller,
1022 		    nx->fsw_instance,
1023 		    nx->fsw_device);
1024 		if (err != 0) {
1025 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
1026 			    __func__, err);
1027 		}
1028 	}
1029 
1030 	utun_detach_provider_and_instance(nx->fsw_provider,
1031 	    nx->fsw_instance);
1032 	utun_detach_provider_and_instance(nx->if_provider,
1033 	    nx->if_instance);
1034 
1035 	if (pcb->utun_netif_pp != NULL) {
1036 		kern_pbufpool_destroy(pcb->utun_netif_pp);
1037 		pcb->utun_netif_pp = NULL;
1038 	}
1039 	memset(nx, 0, sizeof(*nx));
1040 }
1041 
1042 static errno_t
utun_create_fs_provider_and_instance(struct utun_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)1043 utun_create_fs_provider_and_instance(struct utun_pcb *pcb,
1044     const char *type_name,
1045     const char *ifname,
1046     uuid_t *provider, uuid_t *instance)
1047 {
1048 	nexus_attr_t attr = NULL;
1049 	nexus_controller_t controller = kern_nexus_shared_controller();
1050 	uuid_t dom_prov;
1051 	errno_t err;
1052 	struct kern_nexus_init init;
1053 	nexus_name_t    provider_name;
1054 
1055 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1056 	    &dom_prov);
1057 	if (err != 0) {
1058 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
1059 		    __func__, type_name, err);
1060 		goto failed;
1061 	}
1062 
1063 	err = kern_nexus_attr_create(&attr);
1064 	if (err != 0) {
1065 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1066 		    __func__, err);
1067 		goto failed;
1068 	}
1069 
1070 	uint64_t slot_buffer_size = pcb->utun_slot_size;
1071 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1072 	VERIFY(err == 0);
1073 
1074 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1075 	uint64_t tx_ring_size = pcb->utun_tx_fsw_ring_size;
1076 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1077 	VERIFY(err == 0);
1078 	uint64_t rx_ring_size = pcb->utun_rx_fsw_ring_size;
1079 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1080 	VERIFY(err == 0);
1081 	/*
1082 	 * Configure flowswitch to use super-packet (multi-buflet).
1083 	 * This allows flowswitch to perform intra-stack packet aggregation.
1084 	 */
1085 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
1086 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
1087 	VERIFY(err == 0);
1088 
1089 	snprintf((char *)provider_name, sizeof(provider_name),
1090 	    "com.apple.%s.%s", type_name, ifname);
1091 	err = kern_nexus_controller_register_provider(controller,
1092 	    dom_prov,
1093 	    provider_name,
1094 	    NULL,
1095 	    0,
1096 	    attr,
1097 	    provider);
1098 	kern_nexus_attr_destroy(attr);
1099 	attr = NULL;
1100 	if (err != 0) {
1101 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
1102 		    __func__, type_name, err);
1103 		goto failed;
1104 	}
1105 	bzero(&init, sizeof(init));
1106 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1107 	err = kern_nexus_controller_alloc_provider_instance(controller,
1108 	    *provider,
1109 	    NULL, NULL,
1110 	    instance, &init);
1111 	if (err != 0) {
1112 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
1113 		    __func__, type_name, err);
1114 		kern_nexus_controller_deregister_provider(controller,
1115 		    *provider);
1116 		uuid_clear(*provider);
1117 	}
1118 failed:
1119 	return err;
1120 }
1121 
1122 static errno_t
utun_flowswitch_attach(struct utun_pcb * pcb)1123 utun_flowswitch_attach(struct utun_pcb *pcb)
1124 {
1125 	nexus_controller_t controller = kern_nexus_shared_controller();
1126 	errno_t err = 0;
1127 	utun_nx_t nx = &pcb->utun_nx;
1128 
1129 	// Allocate flowswitch
1130 	err = utun_create_fs_provider_and_instance(pcb,
1131 	    "flowswitch",
1132 	    pcb->utun_ifp->if_xname,
1133 	    &nx->fsw_provider,
1134 	    &nx->fsw_instance);
1135 	if (err != 0) {
1136 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
1137 		    __func__);
1138 		goto failed;
1139 	}
1140 
1141 	// Attach flowswitch to device port
1142 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
1143 	    NULL, nx->if_instance,
1144 	    FALSE, &nx->fsw_device);
1145 	if (err != 0) {
1146 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
1147 		goto failed;
1148 	}
1149 
1150 	// Extract the agent UUID and save for later
1151 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
1152 	if (flowswitch_nx != NULL) {
1153 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
1154 		if (flowswitch != NULL) {
1155 			FSW_RLOCK(flowswitch);
1156 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
1157 			FSW_UNLOCK(flowswitch);
1158 		} else {
1159 			os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - flowswitch is NULL\n");
1160 		}
1161 		nx_release(flowswitch_nx);
1162 	} else {
1163 		os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - unable to find flowswitch nexus\n");
1164 	}
1165 
1166 	return 0;
1167 
1168 failed:
1169 	utun_nexus_detach(pcb);
1170 
1171 	errno_t detach_error = 0;
1172 	if ((detach_error = ifnet_detach(pcb->utun_ifp)) != 0) {
1173 		panic("utun_flowswitch_attach - ifnet_detach failed: %d", detach_error);
1174 		/* NOT REACHED */
1175 	}
1176 
1177 	return err;
1178 }
1179 
1180 static errno_t
utun_register_kernel_pipe_nexus(struct utun_pcb * pcb)1181 utun_register_kernel_pipe_nexus(struct utun_pcb *pcb)
1182 {
1183 	nexus_attr_t nxa = NULL;
1184 	errno_t result;
1185 
1186 	lck_mtx_lock(&utun_lock);
1187 	if (utun_ncd_refcount++) {
1188 		lck_mtx_unlock(&utun_lock);
1189 		return 0;
1190 	}
1191 
1192 	result = kern_nexus_controller_create(&utun_ncd);
1193 	if (result) {
1194 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
1195 		    __FUNCTION__, result);
1196 		goto done;
1197 	}
1198 
1199 	uuid_t dom_prov;
1200 	result = kern_nexus_get_default_domain_provider(
1201 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1202 	if (result) {
1203 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
1204 		    __FUNCTION__, result);
1205 		goto done;
1206 	}
1207 
1208 	struct kern_nexus_provider_init prov_init = {
1209 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1210 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1211 		.nxpi_pre_connect = utun_nexus_pre_connect,
1212 		.nxpi_connected = utun_nexus_connected,
1213 		.nxpi_pre_disconnect = utun_nexus_pre_disconnect,
1214 		.nxpi_disconnected = utun_nexus_disconnected,
1215 		.nxpi_ring_init = utun_kpipe_ring_init,
1216 		.nxpi_ring_fini = utun_kpipe_ring_fini,
1217 		.nxpi_slot_init = NULL,
1218 		.nxpi_slot_fini = NULL,
1219 		.nxpi_sync_tx = utun_kpipe_sync_tx,
1220 		.nxpi_sync_rx = utun_kpipe_sync_rx,
1221 		.nxpi_tx_doorbell = NULL,
1222 	};
1223 
1224 	result = kern_nexus_attr_create(&nxa);
1225 	if (result) {
1226 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1227 		    __FUNCTION__, result);
1228 		goto done;
1229 	}
1230 
1231 	uint64_t slot_buffer_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1232 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1233 	VERIFY(result == 0);
1234 
1235 	// Reset ring size for kernel pipe nexus to limit memory usage
1236 	uint64_t ring_size =
1237 	    pcb->utun_kpipe_tx_ring_size != 0 ? pcb->utun_kpipe_tx_ring_size :
1238 	    if_utun_ring_size;
1239 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1240 	VERIFY(result == 0);
1241 
1242 	ring_size =
1243 	    pcb->utun_kpipe_rx_ring_size != 0 ? pcb->utun_kpipe_rx_ring_size :
1244 	    if_utun_ring_size;
1245 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1246 	VERIFY(result == 0);
1247 
1248 	result = kern_nexus_controller_register_provider(utun_ncd,
1249 	    dom_prov,
1250 	    (const uint8_t *)"com.apple.nexus.utun.kpipe",
1251 	    &prov_init,
1252 	    sizeof(prov_init),
1253 	    nxa,
1254 	    &utun_kpipe_uuid);
1255 	if (result) {
1256 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
1257 		    __FUNCTION__, result);
1258 		goto done;
1259 	}
1260 
1261 done:
1262 	if (nxa) {
1263 		kern_nexus_attr_destroy(nxa);
1264 	}
1265 
1266 	if (result) {
1267 		if (utun_ncd) {
1268 			kern_nexus_controller_destroy(utun_ncd);
1269 			utun_ncd = NULL;
1270 		}
1271 		utun_ncd_refcount = 0;
1272 	}
1273 
1274 	lck_mtx_unlock(&utun_lock);
1275 
1276 	return result;
1277 }
1278 
1279 static void
utun_unregister_kernel_pipe_nexus(void)1280 utun_unregister_kernel_pipe_nexus(void)
1281 {
1282 	lck_mtx_lock(&utun_lock);
1283 
1284 	VERIFY(utun_ncd_refcount > 0);
1285 
1286 	if (--utun_ncd_refcount == 0) {
1287 		kern_nexus_controller_destroy(utun_ncd);
1288 		utun_ncd = NULL;
1289 	}
1290 
1291 	lck_mtx_unlock(&utun_lock);
1292 }
1293 
1294 // For use by socket option, not internally
1295 static errno_t
utun_disable_channel(struct utun_pcb * pcb)1296 utun_disable_channel(struct utun_pcb *pcb)
1297 {
1298 	errno_t result;
1299 	int enabled;
1300 	uuid_t uuid;
1301 
1302 	/* Wait until all threads in the data paths are done. */
1303 	utun_wait_data_move_drain(pcb);
1304 
1305 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1306 
1307 	enabled = pcb->utun_kpipe_enabled;
1308 	uuid_copy(uuid, pcb->utun_kpipe_uuid);
1309 
1310 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid) == !enabled);
1311 
1312 	pcb->utun_kpipe_enabled = 0;
1313 	uuid_clear(pcb->utun_kpipe_uuid);
1314 
1315 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1316 
1317 	if (enabled) {
1318 		result = kern_nexus_controller_free_provider_instance(utun_ncd, uuid);
1319 	} else {
1320 		result = ENXIO;
1321 	}
1322 
1323 	if (!result) {
1324 		if (pcb->utun_kpipe_pp != NULL) {
1325 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1326 			pcb->utun_kpipe_pp = NULL;
1327 		}
1328 		utun_unregister_kernel_pipe_nexus();
1329 	}
1330 
1331 	return result;
1332 }
1333 
1334 static errno_t
utun_enable_channel(struct utun_pcb * pcb,struct proc * proc)1335 utun_enable_channel(struct utun_pcb *pcb, struct proc *proc)
1336 {
1337 	struct kern_nexus_init init;
1338 	struct kern_pbufpool_init pp_init;
1339 	errno_t result;
1340 
1341 	kauth_cred_t cred = kauth_cred_get();
1342 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
1343 	if (result) {
1344 		return result;
1345 	}
1346 
1347 	result = utun_register_kernel_pipe_nexus(pcb);
1348 	if (result) {
1349 		return result;
1350 	}
1351 
1352 	VERIFY(utun_ncd);
1353 
1354 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1355 
1356 	if (pcb->utun_kpipe_enabled) {
1357 		result = EEXIST; // return success instead?
1358 		goto done;
1359 	}
1360 
1361 	/*
1362 	 * Make sure we can fit packets in the channel buffers and
1363 	 * Allow an extra 4 bytes for the protocol number header in the channel
1364 	 */
1365 	if (pcb->utun_ifp->if_mtu + UTUN_HEADER_SIZE(pcb) > pcb->utun_slot_size) {
1366 		result = EOPNOTSUPP;
1367 		goto done;
1368 	}
1369 
1370 	bzero(&pp_init, sizeof(pp_init));
1371 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1372 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1373 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
1374 	pp_init.kbi_bufsize = pcb->utun_slot_size;
1375 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
1376 	pp_init.kbi_max_frags = 1;
1377 	pp_init.kbi_flags |= KBIF_QUANTUM;
1378 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1379 	    "com.apple.kpipe.%s", pcb->utun_if_xname);
1380 	pp_init.kbi_ctx = NULL;
1381 	pp_init.kbi_ctx_retain = NULL;
1382 	pp_init.kbi_ctx_release = NULL;
1383 
1384 	result = kern_pbufpool_create(&pp_init, &pcb->utun_kpipe_pp,
1385 	    NULL);
1386 	if (result != 0) {
1387 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, result);
1388 		goto done;
1389 	}
1390 
1391 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid));
1392 	bzero(&init, sizeof(init));
1393 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1394 	init.nxi_tx_pbufpool = pcb->utun_kpipe_pp;
1395 	result = kern_nexus_controller_alloc_provider_instance(utun_ncd,
1396 	    utun_kpipe_uuid, pcb, NULL, &pcb->utun_kpipe_uuid, &init);
1397 	if (result) {
1398 		goto done;
1399 	}
1400 
1401 	nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
1402 	result = kern_nexus_controller_bind_provider_instance(utun_ncd,
1403 	    pcb->utun_kpipe_uuid, &port,
1404 	    proc_pid(proc), NULL, NULL, 0, NEXUS_BIND_PID);
1405 	if (result) {
1406 		kern_nexus_controller_free_provider_instance(utun_ncd,
1407 		    pcb->utun_kpipe_uuid);
1408 		uuid_clear(pcb->utun_kpipe_uuid);
1409 		goto done;
1410 	}
1411 
1412 	pcb->utun_kpipe_enabled = 1;
1413 
1414 done:
1415 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1416 
1417 	if (result) {
1418 		if (pcb->utun_kpipe_pp != NULL) {
1419 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1420 			pcb->utun_kpipe_pp = NULL;
1421 		}
1422 		utun_unregister_kernel_pipe_nexus();
1423 	}
1424 
1425 	return result;
1426 }
1427 
1428 #endif // UTUN_NEXUS
1429 
1430 errno_t
utun_register_control(void)1431 utun_register_control(void)
1432 {
1433 	struct kern_ctl_reg kern_ctl;
1434 	errno_t result = 0;
1435 
1436 #if UTUN_NEXUS
1437 	utun_register_nexus();
1438 #endif // UTUN_NEXUS
1439 
1440 	TAILQ_INIT(&utun_head);
1441 
1442 	bzero(&kern_ctl, sizeof(kern_ctl));
1443 	strlcpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
1444 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
1445 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP | CTL_FLAG_REG_EXTENDED; /* Require root */
1446 	kern_ctl.ctl_sendsize = 512 * 1024;
1447 	kern_ctl.ctl_recvsize = 512 * 1024;
1448 	kern_ctl.ctl_setup = utun_ctl_setup;
1449 	kern_ctl.ctl_bind = utun_ctl_bind;
1450 	kern_ctl.ctl_connect = utun_ctl_connect;
1451 	kern_ctl.ctl_disconnect = utun_ctl_disconnect;
1452 	kern_ctl.ctl_send = utun_ctl_send;
1453 	kern_ctl.ctl_setopt = utun_ctl_setopt;
1454 	kern_ctl.ctl_getopt = utun_ctl_getopt;
1455 	kern_ctl.ctl_rcvd = utun_ctl_rcvd;
1456 
1457 	result = ctl_register(&kern_ctl, &utun_kctlref);
1458 	if (result != 0) {
1459 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - ctl_register failed: %d\n", result);
1460 		return result;
1461 	}
1462 
1463 	/* Register the protocol plumbers */
1464 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN,
1465 	    utun_attach_proto, NULL)) != 0) {
1466 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN) failed: %d\n",
1467 		    result);
1468 		ctl_deregister(utun_kctlref);
1469 		return result;
1470 	}
1471 
1472 	/* Register the protocol plumbers */
1473 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN,
1474 	    utun_attach_proto, NULL)) != 0) {
1475 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_UTUN);
1476 		ctl_deregister(utun_kctlref);
1477 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN) failed: %d\n",
1478 		    result);
1479 		return result;
1480 	}
1481 
1482 	return 0;
1483 }
1484 
1485 /* Kernel control functions */
1486 
1487 static inline int
utun_find_by_unit(u_int32_t unit)1488 utun_find_by_unit(u_int32_t unit)
1489 {
1490 	struct utun_pcb *next_pcb = NULL;
1491 	int found = 0;
1492 
1493 	TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1494 		if (next_pcb->utun_unit == unit) {
1495 			found = 1;
1496 			break;
1497 		}
1498 	}
1499 
1500 	return found;
1501 }
1502 
1503 static inline void
utun_free_pcb(struct utun_pcb * pcb,bool locked)1504 utun_free_pcb(struct utun_pcb *pcb, bool locked)
1505 {
1506 #if UTUN_NEXUS
1507 	mbuf_freem_list(pcb->utun_input_chain);
1508 	pcb->utun_input_chain_count = 0;
1509 	lck_mtx_destroy(&pcb->utun_input_chain_lock, &utun_lck_grp);
1510 	lck_mtx_destroy(&pcb->utun_pcb_data_move_lock, &utun_lck_grp);
1511 #endif // UTUN_NEXUS
1512 	lck_rw_destroy(&pcb->utun_pcb_lock, &utun_lck_grp);
1513 	if (!locked) {
1514 		lck_mtx_lock(&utun_lock);
1515 	}
1516 	TAILQ_REMOVE(&utun_head, pcb, utun_chain);
1517 	if (!locked) {
1518 		lck_mtx_unlock(&utun_lock);
1519 	}
1520 	zfree(utun_pcb_zone, pcb);
1521 }
1522 
1523 static errno_t
utun_ctl_setup(u_int32_t * unit,void ** unitinfo)1524 utun_ctl_setup(u_int32_t *unit, void **unitinfo)
1525 {
1526 	if (unit == NULL || unitinfo == NULL) {
1527 		return EINVAL;
1528 	}
1529 
1530 	lck_mtx_lock(&utun_lock);
1531 
1532 	/* Find next available unit */
1533 	if (*unit == 0) {
1534 		*unit = 1;
1535 		while (*unit != ctl_maxunit) {
1536 			if (utun_find_by_unit(*unit)) {
1537 				(*unit)++;
1538 			} else {
1539 				break;
1540 			}
1541 		}
1542 		if (*unit == ctl_maxunit) {
1543 			lck_mtx_unlock(&utun_lock);
1544 			return EBUSY;
1545 		}
1546 	} else if (utun_find_by_unit(*unit)) {
1547 		lck_mtx_unlock(&utun_lock);
1548 		return EBUSY;
1549 	}
1550 
1551 	/* Find some open interface id */
1552 	u_int32_t chosen_unique_id = 1;
1553 	struct utun_pcb *next_pcb = TAILQ_LAST(&utun_head, utun_list);
1554 	if (next_pcb != NULL) {
1555 		/* List was not empty, add one to the last item */
1556 		chosen_unique_id = next_pcb->utun_unique_id + 1;
1557 		next_pcb = NULL;
1558 
1559 		/*
1560 		 * If this wrapped the id number, start looking at
1561 		 * the front of the list for an unused id.
1562 		 */
1563 		if (chosen_unique_id == 0) {
1564 			/* Find the next unused ID */
1565 			chosen_unique_id = 1;
1566 			TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1567 				if (next_pcb->utun_unique_id > chosen_unique_id) {
1568 					/* We found a gap */
1569 					break;
1570 				}
1571 
1572 				chosen_unique_id = next_pcb->utun_unique_id + 1;
1573 			}
1574 		}
1575 	}
1576 
1577 	struct utun_pcb *pcb = zalloc_flags(utun_pcb_zone, Z_WAITOK | Z_ZERO);
1578 
1579 	*unitinfo = pcb;
1580 	pcb->utun_unit = *unit;
1581 	pcb->utun_unique_id = chosen_unique_id;
1582 
1583 	if (next_pcb != NULL) {
1584 		TAILQ_INSERT_BEFORE(next_pcb, pcb, utun_chain);
1585 	} else {
1586 		TAILQ_INSERT_TAIL(&utun_head, pcb, utun_chain);
1587 	}
1588 
1589 	lck_mtx_unlock(&utun_lock);
1590 
1591 	return 0;
1592 }
1593 
1594 static errno_t
utun_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1595 utun_ctl_bind(kern_ctl_ref kctlref,
1596     struct sockaddr_ctl *sac,
1597     void **unitinfo)
1598 {
1599 	if (*unitinfo == NULL) {
1600 		u_int32_t unit = 0;
1601 		(void)utun_ctl_setup(&unit, unitinfo);
1602 	}
1603 
1604 	struct utun_pcb *pcb = (struct utun_pcb *)*unitinfo;
1605 	if (pcb == NULL) {
1606 		return EINVAL;
1607 	}
1608 
1609 	pcb->utun_ctlref = kctlref;
1610 	pcb->utun_unit = sac->sc_unit;
1611 	pcb->utun_max_pending_packets = 1;
1612 
1613 #if UTUN_NEXUS
1614 	pcb->utun_use_netif = false;
1615 	pcb->utun_attach_fsw = true;
1616 	pcb->utun_netif_connected = false;
1617 	pcb->utun_slot_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1618 	pcb->utun_netif_ring_size = if_utun_ring_size;
1619 	pcb->utun_tx_fsw_ring_size = if_utun_tx_fsw_ring_size;
1620 	pcb->utun_rx_fsw_ring_size = if_utun_rx_fsw_ring_size;
1621 	pcb->utun_input_chain_count = 0;
1622 	lck_mtx_init(&pcb->utun_input_chain_lock, &utun_lck_grp, &utun_lck_attr);
1623 	lck_mtx_init(&pcb->utun_pcb_data_move_lock,
1624 	    &utun_lck_grp, &utun_lck_attr);
1625 #endif // UTUN_NEXUS
1626 
1627 	lck_rw_init(&pcb->utun_pcb_lock, &utun_lck_grp, &utun_lck_attr);
1628 
1629 	return 0;
1630 }
1631 
1632 static errno_t
utun_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1633 utun_ctl_connect(kern_ctl_ref kctlref,
1634     struct sockaddr_ctl *sac,
1635     void **unitinfo)
1636 {
1637 	struct ifnet_init_eparams utun_init = {};
1638 	errno_t result = 0;
1639 
1640 	if (*unitinfo == NULL) {
1641 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1642 	}
1643 
1644 	struct utun_pcb *pcb = *unitinfo;
1645 	if (pcb == NULL) {
1646 		return EINVAL;
1647 	}
1648 
1649 	/* Handle case where utun_ctl_setup() was called, but ipsec_ctl_bind() was not */
1650 	if (pcb->utun_ctlref == NULL) {
1651 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1652 	}
1653 
1654 	snprintf(pcb->utun_if_xname, sizeof(pcb->utun_if_xname), "utun%d", pcb->utun_unit - 1);
1655 	snprintf(pcb->utun_unique_name, sizeof(pcb->utun_unique_name), "utunid%d", pcb->utun_unique_id - 1);
1656 
1657 	/* Create the interface */
1658 	bzero(&utun_init, sizeof(utun_init));
1659 	utun_init.ver = IFNET_INIT_CURRENT_VERSION;
1660 	utun_init.len = sizeof(utun_init);
1661 
1662 #if UTUN_NEXUS
1663 	if (pcb->utun_use_netif) {
1664 		utun_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
1665 		utun_init.tx_headroom = UTUN_IF_HEADROOM_SIZE;
1666 	} else
1667 #endif // UTUN_NEXUS
1668 	{
1669 		utun_init.flags = IFNET_INIT_NX_NOAUTO;
1670 		utun_init.start = utun_start;
1671 		utun_init.framer_extended = utun_framer;
1672 	}
1673 	utun_init.name = "utun";
1674 	utun_init.unit = pcb->utun_unit - 1;
1675 	utun_init.uniqueid = pcb->utun_unique_name;
1676 	utun_init.uniqueid_len = strlen(pcb->utun_unique_name);
1677 	utun_init.family = IFNET_FAMILY_UTUN;
1678 	utun_init.type = IFT_OTHER;
1679 	utun_init.demux = utun_demux;
1680 	utun_init.add_proto = utun_add_proto;
1681 	utun_init.del_proto = utun_del_proto;
1682 	utun_init.softc = pcb;
1683 	utun_init.ioctl = utun_ioctl;
1684 	utun_init.free = utun_detached;
1685 
1686 #if UTUN_NEXUS
1687 	if (pcb->utun_use_netif) {
1688 		result = utun_nexus_ifattach(pcb, &utun_init, &pcb->utun_ifp);
1689 		if (result != 0) {
1690 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_nexus_ifattach failed: %d\n", result);
1691 			utun_free_pcb(pcb, false);
1692 			*unitinfo = NULL;
1693 			return result;
1694 		}
1695 
1696 		if (pcb->utun_attach_fsw) {
1697 			result = utun_flowswitch_attach(pcb);
1698 			if (result != 0) {
1699 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_flowswitch_attach failed: %d\n", result);
1700 				// Do not call utun_free_pcb(). We will be attached already, and will be freed later
1701 				// in utun_detached().
1702 				*unitinfo = NULL;
1703 				return result;
1704 			}
1705 		}
1706 
1707 		/* Attach to bpf */
1708 		bpfattach(pcb->utun_ifp, DLT_RAW, 0);
1709 	} else
1710 #endif // UTUN_NEXUS
1711 	{
1712 		/*
1713 		 * Upon success, this holds an ifnet reference which we will
1714 		 * release via ifnet_release() at final detach time.
1715 		 */
1716 		result = ifnet_allocate_extended(&utun_init, &pcb->utun_ifp);
1717 		if (result != 0) {
1718 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_allocate failed: %d\n", result);
1719 			utun_free_pcb(pcb, false);
1720 			*unitinfo = NULL;
1721 			return result;
1722 		}
1723 
1724 		/* Set flags and additional information. */
1725 		ifnet_set_mtu(pcb->utun_ifp, UTUN_DEFAULT_MTU);
1726 		ifnet_set_flags(pcb->utun_ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
1727 
1728 		/* The interface must generate its own IPv6 LinkLocal address,
1729 		 * if possible following the recommendation of RFC2472 to the 64bit interface ID
1730 		 */
1731 		ifnet_set_eflags(pcb->utun_ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
1732 
1733 		/* Reset the stats in case as the interface may have been recycled */
1734 		struct ifnet_stats_param stats;
1735 		bzero(&stats, sizeof(struct ifnet_stats_param));
1736 		ifnet_set_stat(pcb->utun_ifp, &stats);
1737 
1738 		/* Attach the interface */
1739 		result = ifnet_attach(pcb->utun_ifp, NULL);
1740 		if (result != 0) {
1741 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_attach failed: %d\n", result);
1742 			/* Release reference now since attach failed */
1743 			ifnet_release(pcb->utun_ifp);
1744 			utun_free_pcb(pcb, false);
1745 			*unitinfo = NULL;
1746 			return result;
1747 		}
1748 
1749 		/* Attach to bpf */
1750 		bpfattach(pcb->utun_ifp, DLT_NULL, UTUN_HEADER_SIZE(pcb));
1751 
1752 #if UTUN_NEXUS
1753 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
1754 		UTUN_SET_DATA_PATH_READY(pcb);
1755 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
1756 #endif // UTUN_NEXUS
1757 	}
1758 
1759 	/* The interfaces resoures allocated, mark it as running */
1760 	ifnet_set_flags(pcb->utun_ifp, IFF_RUNNING, IFF_RUNNING);
1761 
1762 	return result;
1763 }
1764 
1765 static errno_t
utun_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)1766 utun_detach_ip(ifnet_t interface,
1767     protocol_family_t protocol,
1768     socket_t pf_socket)
1769 {
1770 	errno_t result = EPROTONOSUPPORT;
1771 
1772 	/* Attempt a detach */
1773 	if (protocol == PF_INET) {
1774 		struct ifreq    ifr;
1775 
1776 		bzero(&ifr, sizeof(ifr));
1777 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1778 		    ifnet_name(interface), ifnet_unit(interface));
1779 
1780 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
1781 	} else if (protocol == PF_INET6) {
1782 		struct in6_ifreq        ifr6;
1783 
1784 		bzero(&ifr6, sizeof(ifr6));
1785 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1786 		    ifnet_name(interface), ifnet_unit(interface));
1787 
1788 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
1789 	}
1790 
1791 	return result;
1792 }
1793 
1794 static void
utun_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)1795 utun_remove_address(ifnet_t interface,
1796     protocol_family_t protocol,
1797     ifaddr_t address,
1798     socket_t pf_socket)
1799 {
1800 	errno_t result = 0;
1801 
1802 	/* Attempt a detach */
1803 	if (protocol == PF_INET) {
1804 		struct ifreq ifr;
1805 
1806 		bzero(&ifr, sizeof(ifr));
1807 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1808 		    ifnet_name(interface), ifnet_unit(interface));
1809 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
1810 		if (result != 0) {
1811 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed: %d", result);
1812 		} else {
1813 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
1814 			if (result != 0) {
1815 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR failed: %d", result);
1816 			}
1817 		}
1818 	} else if (protocol == PF_INET6) {
1819 		struct in6_ifreq ifr6;
1820 
1821 		bzero(&ifr6, sizeof(ifr6));
1822 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1823 		    ifnet_name(interface), ifnet_unit(interface));
1824 		result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
1825 		    sizeof(ifr6.ifr_addr));
1826 		if (result != 0) {
1827 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed (v6): %d",
1828 			    result);
1829 		} else {
1830 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
1831 			if (result != 0) {
1832 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR_IN6 failed: %d",
1833 				    result);
1834 			}
1835 		}
1836 	}
1837 }
1838 
1839 static void
utun_cleanup_family(ifnet_t interface,protocol_family_t protocol)1840 utun_cleanup_family(ifnet_t interface,
1841     protocol_family_t protocol)
1842 {
1843 	errno_t result = 0;
1844 	socket_t pf_socket = NULL;
1845 	ifaddr_t *addresses = NULL;
1846 	int i;
1847 
1848 	if (protocol != PF_INET && protocol != PF_INET6) {
1849 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - invalid protocol family %d\n", protocol);
1850 		return;
1851 	}
1852 
1853 	/* Create a socket for removing addresses and detaching the protocol */
1854 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
1855 	if (result != 0) {
1856 		if (result != EAFNOSUPPORT) {
1857 			os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - failed to create %s socket: %d\n",
1858 			    protocol == PF_INET ? "IP" : "IPv6", result);
1859 		}
1860 		goto cleanup;
1861 	}
1862 
1863 	/* always set SS_PRIV, we want to close and detach regardless */
1864 	sock_setpriv(pf_socket, 1);
1865 
1866 	result = utun_detach_ip(interface, protocol, pf_socket);
1867 	if (result == 0 || result == ENXIO) {
1868 		/* We are done! We either detached or weren't attached. */
1869 		goto cleanup;
1870 	} else if (result != EBUSY) {
1871 		/* Uh, not really sure what happened here... */
1872 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1873 		goto cleanup;
1874 	}
1875 
1876 	/*
1877 	 * At this point, we received an EBUSY error. This means there are
1878 	 * addresses attached. We should detach them and then try again.
1879 	 */
1880 	result = ifnet_get_address_list_family(interface, &addresses, protocol);
1881 	if (result != 0) {
1882 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
1883 		    ifnet_name(interface), ifnet_unit(interface),
1884 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
1885 		goto cleanup;
1886 	}
1887 
1888 	for (i = 0; addresses[i] != 0; i++) {
1889 		utun_remove_address(interface, protocol, addresses[i], pf_socket);
1890 	}
1891 	ifnet_free_address_list(addresses);
1892 	addresses = NULL;
1893 
1894 	/*
1895 	 * The addresses should be gone, we should try the remove again.
1896 	 */
1897 	result = utun_detach_ip(interface, protocol, pf_socket);
1898 	if (result != 0 && result != ENXIO) {
1899 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1900 	}
1901 
1902 cleanup:
1903 	if (pf_socket != NULL) {
1904 		sock_close(pf_socket);
1905 	}
1906 
1907 	if (addresses != NULL) {
1908 		ifnet_free_address_list(addresses);
1909 	}
1910 }
1911 
1912 static errno_t
utun_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)1913 utun_ctl_disconnect(__unused kern_ctl_ref kctlref,
1914     __unused u_int32_t unit,
1915     void *unitinfo)
1916 {
1917 	struct utun_pcb *pcb = unitinfo;
1918 	ifnet_t ifp = NULL;
1919 	errno_t result = 0;
1920 
1921 	if (pcb == NULL) {
1922 		return EINVAL;
1923 	}
1924 
1925 #if UTUN_NEXUS
1926 	/* Wait until all threads in the data paths are done. */
1927 	utun_wait_data_move_drain(pcb);
1928 	// Tell the nexus to stop all rings
1929 	if (pcb->utun_netif_nexus != NULL && pcb->utun_netif_connected) {
1930 		kern_nexus_stop(pcb->utun_netif_nexus);
1931 	}
1932 #endif // UTUN_NEXUS
1933 
1934 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1935 
1936 #if UTUN_NEXUS
1937 	uuid_t kpipe_uuid;
1938 	uuid_copy(kpipe_uuid, pcb->utun_kpipe_uuid);
1939 	uuid_clear(pcb->utun_kpipe_uuid);
1940 	pcb->utun_kpipe_enabled = FALSE;
1941 #endif // UTUN_NEXUS
1942 
1943 	pcb->utun_ctlref = NULL;
1944 
1945 	ifp = pcb->utun_ifp;
1946 	if (ifp != NULL) {
1947 #if UTUN_NEXUS
1948 		// Tell the nexus to stop all rings
1949 		if (pcb->utun_netif_nexus != NULL) {
1950 			/*
1951 			 * Quiesce the interface and flush any pending outbound packets.
1952 			 */
1953 			if_down(ifp);
1954 
1955 			/*
1956 			 * Suspend data movement and wait for IO threads to exit.
1957 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
1958 			 * do this because utun nexuses are attached/detached separately.
1959 			 */
1960 			ifnet_datamov_suspend_and_drain(ifp);
1961 			if ((result = ifnet_detach(ifp)) != 0) {
1962 				panic("utun_ctl_disconnect - ifnet_detach failed: %d", result);
1963 			}
1964 
1965 			/*
1966 			 * We want to do everything in our power to ensure that the interface
1967 			 * really goes away when the socket is closed. We must remove IP/IPv6
1968 			 * addresses and detach the protocols. Finally, we can remove and
1969 			 * release the interface.
1970 			 */
1971 			utun_cleanup_family(ifp, AF_INET);
1972 			utun_cleanup_family(ifp, AF_INET6);
1973 
1974 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1975 
1976 			if (!uuid_is_null(kpipe_uuid)) {
1977 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
1978 					if (pcb->utun_kpipe_pp != NULL) {
1979 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1980 						pcb->utun_kpipe_pp = NULL;
1981 					}
1982 					utun_unregister_kernel_pipe_nexus();
1983 				}
1984 			}
1985 			utun_nexus_detach(pcb);
1986 
1987 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
1988 			ifnet_datamov_resume(ifp);
1989 		} else
1990 #endif // UTUN_NEXUS
1991 		{
1992 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1993 
1994 #if UTUN_NEXUS
1995 			if (!uuid_is_null(kpipe_uuid)) {
1996 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
1997 					if (pcb->utun_kpipe_pp != NULL) {
1998 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1999 						pcb->utun_kpipe_pp = NULL;
2000 					}
2001 					utun_unregister_kernel_pipe_nexus();
2002 				}
2003 			}
2004 #endif // UTUN_NEXUS
2005 
2006 			/*
2007 			 * We want to do everything in our power to ensure that the interface
2008 			 * really goes away when the socket is closed. We must remove IP/IPv6
2009 			 * addresses and detach the protocols. Finally, we can remove and
2010 			 * release the interface.
2011 			 */
2012 			utun_cleanup_family(ifp, AF_INET);
2013 			utun_cleanup_family(ifp, AF_INET6);
2014 
2015 			/*
2016 			 * Detach now; utun_detach() will be called asynchronously once
2017 			 * the I/O reference count drops to 0.  There we will invoke
2018 			 * ifnet_release().
2019 			 */
2020 			if ((result = ifnet_detach(ifp)) != 0) {
2021 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_disconnect - ifnet_detach failed: %d\n", result);
2022 			}
2023 		}
2024 	} else {
2025 		// Bound, but not connected
2026 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2027 		utun_free_pcb(pcb, false);
2028 	}
2029 
2030 	return 0;
2031 }
2032 
2033 static errno_t
utun_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,mbuf_t m,__unused int flags)2034 utun_ctl_send(__unused kern_ctl_ref kctlref,
2035     __unused u_int32_t unit,
2036     void *unitinfo,
2037     mbuf_t m,
2038     __unused int flags)
2039 {
2040 	/*
2041 	 * The userland ABI requires the first four bytes have the protocol family
2042 	 * in network byte order: swap them
2043 	 */
2044 	if (m_pktlen(m) >= (int32_t)UTUN_HEADER_SIZE((struct utun_pcb *)unitinfo)) {
2045 		*(protocol_family_t *)mbuf_data(m) = ntohl(*(protocol_family_t *)mbuf_data(m));
2046 	} else {
2047 		os_log_error(OS_LOG_DEFAULT, "%s - unexpected short mbuf pkt len %d\n", __func__, m_pktlen(m));
2048 	}
2049 
2050 	return utun_pkt_input((struct utun_pcb *)unitinfo, m);
2051 }
2052 
2053 static errno_t
utun_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t len)2054 utun_ctl_setopt(__unused kern_ctl_ref kctlref,
2055     __unused u_int32_t unit,
2056     void *unitinfo,
2057     int opt,
2058     void *data,
2059     size_t len)
2060 {
2061 	struct utun_pcb *pcb = unitinfo;
2062 	errno_t result = 0;
2063 	/* check for privileges for privileged options */
2064 	switch (opt) {
2065 	case UTUN_OPT_FLAGS:
2066 	case UTUN_OPT_EXT_IFDATA_STATS:
2067 	case UTUN_OPT_SET_DELEGATE_INTERFACE:
2068 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2069 			return EPERM;
2070 		}
2071 		break;
2072 	}
2073 
2074 	switch (opt) {
2075 	case UTUN_OPT_FLAGS:
2076 		if (len != sizeof(u_int32_t)) {
2077 			result = EMSGSIZE;
2078 			break;
2079 		}
2080 		if (pcb->utun_ifp != NULL) {
2081 			// Only can set before connecting
2082 			result = EINVAL;
2083 			break;
2084 		}
2085 		pcb->utun_flags = *(u_int32_t *)data;
2086 		break;
2087 
2088 	case UTUN_OPT_EXT_IFDATA_STATS:
2089 		if (len != sizeof(int)) {
2090 			result = EMSGSIZE;
2091 			break;
2092 		}
2093 		if (pcb->utun_ifp == NULL) {
2094 			// Only can set after connecting
2095 			result = EINVAL;
2096 			break;
2097 		}
2098 		pcb->utun_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2099 		break;
2100 
2101 	case UTUN_OPT_INC_IFDATA_STATS_IN:
2102 	case UTUN_OPT_INC_IFDATA_STATS_OUT: {
2103 		struct utun_stats_param *utsp = (struct utun_stats_param *)data;
2104 
2105 		if (utsp == NULL || len < sizeof(struct utun_stats_param)) {
2106 			result = EINVAL;
2107 			break;
2108 		}
2109 		if (pcb->utun_ifp == NULL) {
2110 			// Only can set after connecting
2111 			result = EINVAL;
2112 			break;
2113 		}
2114 		if (!pcb->utun_ext_ifdata_stats) {
2115 			result = EINVAL;
2116 			break;
2117 		}
2118 		if (opt == UTUN_OPT_INC_IFDATA_STATS_IN) {
2119 			ifnet_stat_increment_in(pcb->utun_ifp, utsp->utsp_packets,
2120 			    utsp->utsp_bytes, utsp->utsp_errors);
2121 		} else {
2122 			ifnet_stat_increment_out(pcb->utun_ifp, utsp->utsp_packets,
2123 			    utsp->utsp_bytes, utsp->utsp_errors);
2124 		}
2125 		break;
2126 	}
2127 	case UTUN_OPT_SET_DELEGATE_INTERFACE: {
2128 		ifnet_t         del_ifp = NULL;
2129 		char            name[IFNAMSIZ];
2130 
2131 		if (len > IFNAMSIZ - 1) {
2132 			result = EMSGSIZE;
2133 			break;
2134 		}
2135 		if (pcb->utun_ifp == NULL) {
2136 			// Only can set after connecting
2137 			result = EINVAL;
2138 			break;
2139 		}
2140 		if (len != 0) {            /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2141 			bcopy(data, name, len);
2142 			name[len] = 0;
2143 			result = ifnet_find_by_name(name, &del_ifp);
2144 		}
2145 		if (result == 0) {
2146 			result = ifnet_set_delegate(pcb->utun_ifp, del_ifp);
2147 			if (del_ifp) {
2148 				ifnet_release(del_ifp);
2149 			}
2150 		}
2151 		break;
2152 	}
2153 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2154 		u_int32_t max_pending_packets = 0;
2155 		if (len != sizeof(u_int32_t)) {
2156 			result = EMSGSIZE;
2157 			break;
2158 		}
2159 		max_pending_packets = *(u_int32_t *)data;
2160 		if (max_pending_packets == 0) {
2161 			result = EINVAL;
2162 			break;
2163 		}
2164 		pcb->utun_max_pending_packets = max_pending_packets;
2165 		break;
2166 	}
2167 #if UTUN_NEXUS
2168 	case UTUN_OPT_ENABLE_CHANNEL: {
2169 		if (len != sizeof(int)) {
2170 			result = EMSGSIZE;
2171 			break;
2172 		}
2173 		if (pcb->utun_ifp == NULL) {
2174 			// Only can set after connecting
2175 			result = EINVAL;
2176 			break;
2177 		}
2178 		if (*(int *)data) {
2179 			result = utun_enable_channel(pcb, current_proc());
2180 		} else {
2181 			result = utun_disable_channel(pcb);
2182 		}
2183 		break;
2184 	}
2185 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2186 		if (len != sizeof(int)) {
2187 			result = EMSGSIZE;
2188 			break;
2189 		}
2190 		if (pcb->utun_ifp == NULL) {
2191 			// Only can set after connecting
2192 			result = EINVAL;
2193 			break;
2194 		}
2195 		if (!if_is_fsw_transport_netagent_enabled()) {
2196 			result = ENOTSUP;
2197 			break;
2198 		}
2199 		if (uuid_is_null(pcb->utun_nx.fsw_agent)) {
2200 			result = ENOENT;
2201 			break;
2202 		}
2203 
2204 		uint32_t flags = netagent_get_flags(pcb->utun_nx.fsw_agent);
2205 
2206 		if (*(int *)data) {
2207 			pcb->utun_needs_netagent = true;
2208 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
2209 			    NETAGENT_FLAG_NEXUS_LISTENER);
2210 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2211 		} else {
2212 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
2213 			    NETAGENT_FLAG_NEXUS_LISTENER);
2214 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2215 			pcb->utun_needs_netagent = false;
2216 		}
2217 		break;
2218 	}
2219 	case UTUN_OPT_ATTACH_FLOWSWITCH: {
2220 		if (len != sizeof(int)) {
2221 			result = EMSGSIZE;
2222 			break;
2223 		}
2224 		if (pcb->utun_ifp != NULL) {
2225 			// Only can set before connecting
2226 			result = EINVAL;
2227 			break;
2228 		}
2229 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2230 		pcb->utun_attach_fsw = !!(*(int *)data);
2231 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2232 		break;
2233 	}
2234 	case UTUN_OPT_ENABLE_NETIF: {
2235 		if (len != sizeof(int)) {
2236 			result = EMSGSIZE;
2237 			break;
2238 		}
2239 		if (pcb->utun_ifp != NULL) {
2240 			// Only can set before connecting
2241 			result = EINVAL;
2242 			break;
2243 		}
2244 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2245 		pcb->utun_use_netif = !!(*(int *)data);
2246 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2247 		break;
2248 	}
2249 	case UTUN_OPT_SLOT_SIZE: {
2250 		if (len != sizeof(u_int32_t)) {
2251 			result = EMSGSIZE;
2252 			break;
2253 		}
2254 		if (pcb->utun_ifp != NULL) {
2255 			// Only can set before connecting
2256 			result = EINVAL;
2257 			break;
2258 		}
2259 		u_int32_t slot_size = *(u_int32_t *)data;
2260 		if (slot_size < UTUN_IF_MIN_SLOT_SIZE ||
2261 		    slot_size > UTUN_IF_MAX_SLOT_SIZE) {
2262 			return EINVAL;
2263 		}
2264 		pcb->utun_slot_size = slot_size;
2265 		break;
2266 	}
2267 	case UTUN_OPT_NETIF_RING_SIZE: {
2268 		if (len != sizeof(u_int32_t)) {
2269 			result = EMSGSIZE;
2270 			break;
2271 		}
2272 		if (pcb->utun_ifp != NULL) {
2273 			// Only can set before connecting
2274 			result = EINVAL;
2275 			break;
2276 		}
2277 		u_int32_t ring_size = *(u_int32_t *)data;
2278 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2279 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2280 			return EINVAL;
2281 		}
2282 		pcb->utun_netif_ring_size = ring_size;
2283 		break;
2284 	}
2285 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2286 		if (len != sizeof(u_int32_t)) {
2287 			result = EMSGSIZE;
2288 			break;
2289 		}
2290 		if (pcb->utun_ifp != NULL) {
2291 			// Only can set before connecting
2292 			result = EINVAL;
2293 			break;
2294 		}
2295 		u_int32_t ring_size = *(u_int32_t *)data;
2296 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2297 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2298 			return EINVAL;
2299 		}
2300 		pcb->utun_tx_fsw_ring_size = ring_size;
2301 		break;
2302 	}
2303 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2304 		if (len != sizeof(u_int32_t)) {
2305 			result = EMSGSIZE;
2306 			break;
2307 		}
2308 		if (pcb->utun_ifp != NULL) {
2309 			// Only can set before connecting
2310 			result = EINVAL;
2311 			break;
2312 		}
2313 		u_int32_t ring_size = *(u_int32_t *)data;
2314 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2315 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2316 			return EINVAL;
2317 		}
2318 		pcb->utun_rx_fsw_ring_size = ring_size;
2319 		break;
2320 	}
2321 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2322 		if (len != sizeof(u_int32_t)) {
2323 			result = EMSGSIZE;
2324 			break;
2325 		}
2326 		if (pcb->utun_ifp != NULL) {
2327 			// Only can set before connecting
2328 			result = EINVAL;
2329 			break;
2330 		}
2331 		u_int32_t ring_size = *(u_int32_t *)data;
2332 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2333 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2334 			return EINVAL;
2335 		}
2336 		pcb->utun_kpipe_tx_ring_size = ring_size;
2337 		break;
2338 	}
2339 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2340 		if (len != sizeof(u_int32_t)) {
2341 			result = EMSGSIZE;
2342 			break;
2343 		}
2344 		if (pcb->utun_ifp != NULL) {
2345 			// Only can set before connecting
2346 			result = EINVAL;
2347 			break;
2348 		}
2349 		u_int32_t ring_size = *(u_int32_t *)data;
2350 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2351 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2352 			return EINVAL;
2353 		}
2354 		pcb->utun_kpipe_rx_ring_size = ring_size;
2355 		break;
2356 	}
2357 #endif // UTUN_NEXUS
2358 	default: {
2359 		result = ENOPROTOOPT;
2360 		break;
2361 	}
2362 	}
2363 
2364 	return result;
2365 }
2366 
2367 static errno_t
utun_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t * len)2368 utun_ctl_getopt(__unused kern_ctl_ref kctlref,
2369     __unused u_int32_t unit,
2370     void *unitinfo,
2371     int opt,
2372     void *data,
2373     size_t *len)
2374 {
2375 	struct utun_pcb *pcb = unitinfo;
2376 	errno_t result = 0;
2377 
2378 	switch (opt) {
2379 	case UTUN_OPT_FLAGS:
2380 		if (*len != sizeof(u_int32_t)) {
2381 			result = EMSGSIZE;
2382 		} else {
2383 			*(u_int32_t *)data = pcb->utun_flags;
2384 		}
2385 		break;
2386 
2387 	case UTUN_OPT_EXT_IFDATA_STATS:
2388 		if (*len != sizeof(int)) {
2389 			result = EMSGSIZE;
2390 		} else {
2391 			*(int *)data = (pcb->utun_ext_ifdata_stats) ? 1 : 0;
2392 		}
2393 		break;
2394 
2395 	case UTUN_OPT_IFNAME:
2396 		if (*len < MIN(strlen(pcb->utun_if_xname) + 1, sizeof(pcb->utun_if_xname))) {
2397 			result = EMSGSIZE;
2398 		} else {
2399 			if (pcb->utun_ifp == NULL) {
2400 				// Only can get after connecting
2401 				result = EINVAL;
2402 				break;
2403 			}
2404 			*len = scnprintf(data, *len, "%s", pcb->utun_if_xname) + 1;
2405 		}
2406 		break;
2407 
2408 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2409 		if (*len != sizeof(u_int32_t)) {
2410 			result = EMSGSIZE;
2411 		} else {
2412 			*((u_int32_t *)data) = pcb->utun_max_pending_packets;
2413 		}
2414 		break;
2415 	}
2416 
2417 #if UTUN_NEXUS
2418 	case UTUN_OPT_ENABLE_CHANNEL: {
2419 		if (*len != sizeof(int)) {
2420 			result = EMSGSIZE;
2421 		} else {
2422 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2423 			*(int *)data = pcb->utun_kpipe_enabled;
2424 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2425 		}
2426 		break;
2427 	}
2428 
2429 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2430 		if (*len != sizeof(int)) {
2431 			result = EMSGSIZE;
2432 		} else {
2433 			*(int *)data = if_check_netagent(pcb->utun_ifp, pcb->utun_nx.fsw_agent);
2434 		}
2435 		break;
2436 	}
2437 
2438 	case UTUN_OPT_ENABLE_NETIF: {
2439 		if (*len != sizeof(int)) {
2440 			result = EMSGSIZE;
2441 		} else {
2442 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2443 			*(int *)data = !!pcb->utun_use_netif;
2444 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2445 		}
2446 		break;
2447 	}
2448 
2449 	case UTUN_OPT_GET_CHANNEL_UUID: {
2450 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2451 		if (uuid_is_null(pcb->utun_kpipe_uuid)) {
2452 			result = ENXIO;
2453 		} else if (*len != sizeof(uuid_t)) {
2454 			result = EMSGSIZE;
2455 		} else {
2456 			uuid_copy(data, pcb->utun_kpipe_uuid);
2457 		}
2458 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2459 		break;
2460 	}
2461 	case UTUN_OPT_SLOT_SIZE: {
2462 		if (*len != sizeof(u_int32_t)) {
2463 			result = EMSGSIZE;
2464 		} else {
2465 			*(u_int32_t *)data = pcb->utun_slot_size;
2466 		}
2467 		break;
2468 	}
2469 	case UTUN_OPT_NETIF_RING_SIZE: {
2470 		if (*len != sizeof(u_int32_t)) {
2471 			result = EMSGSIZE;
2472 		} else {
2473 			*(u_int32_t *)data = pcb->utun_netif_ring_size;
2474 		}
2475 		break;
2476 	}
2477 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2478 		if (*len != sizeof(u_int32_t)) {
2479 			result = EMSGSIZE;
2480 		} else {
2481 			*(u_int32_t *)data = pcb->utun_tx_fsw_ring_size;
2482 		}
2483 		break;
2484 	}
2485 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2486 		if (*len != sizeof(u_int32_t)) {
2487 			result = EMSGSIZE;
2488 		} else {
2489 			*(u_int32_t *)data = pcb->utun_rx_fsw_ring_size;
2490 		}
2491 		break;
2492 	}
2493 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2494 		if (*len != sizeof(u_int32_t)) {
2495 			result = EMSGSIZE;
2496 		} else {
2497 			*(u_int32_t *)data = pcb->utun_kpipe_tx_ring_size;
2498 		}
2499 		break;
2500 	}
2501 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2502 		if (*len != sizeof(u_int32_t)) {
2503 			result = EMSGSIZE;
2504 		} else {
2505 			*(u_int32_t *)data = pcb->utun_kpipe_rx_ring_size;
2506 		}
2507 		break;
2508 	}
2509 #endif // UTUN_NEXUS
2510 
2511 	default:
2512 		result = ENOPROTOOPT;
2513 		break;
2514 	}
2515 
2516 	return result;
2517 }
2518 
2519 static void
utun_ctl_rcvd(kern_ctl_ref kctlref,u_int32_t unit,void * unitinfo,int flags)2520 utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags)
2521 {
2522 #pragma unused(flags)
2523 	bool reenable_output = false;
2524 	struct utun_pcb *pcb = unitinfo;
2525 	if (pcb == NULL) {
2526 		return;
2527 	}
2528 	ifnet_lock_exclusive(pcb->utun_ifp);
2529 
2530 	u_int32_t utun_packet_cnt;
2531 	errno_t error_pc = ctl_getenqueuepacketcount(kctlref, unit, &utun_packet_cnt);
2532 	if (error_pc != 0) {
2533 		os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2534 		utun_packet_cnt = 0;
2535 	}
2536 
2537 	if (utun_packet_cnt < pcb->utun_max_pending_packets) {
2538 		reenable_output = true;
2539 	}
2540 
2541 	if (reenable_output) {
2542 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
2543 		if (error != 0) {
2544 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ifnet_enable_output returned error %d\n", error);
2545 		}
2546 	}
2547 	ifnet_lock_done(pcb->utun_ifp);
2548 }
2549 
2550 /* Network Interface functions */
2551 static void
utun_start(ifnet_t interface)2552 utun_start(ifnet_t interface)
2553 {
2554 	mbuf_t data;
2555 	struct utun_pcb *pcb = ifnet_softc(interface);
2556 
2557 	VERIFY(pcb != NULL);
2558 
2559 #if UTUN_NEXUS
2560 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
2561 	if (pcb->utun_kpipe_enabled) {
2562 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2563 		if (!utun_data_move_begin(pcb)) {
2564 			os_log_info(OS_LOG_DEFAULT,
2565 			    "%s: data path stopped for %s\n",
2566 			    __func__, if_name(pcb->utun_ifp));
2567 			return;
2568 		}
2569 		/* It's possible to have channels enabled, but not yet have the channel opened,
2570 		 * in which case the rxring will not be set
2571 		 */
2572 		if (pcb->utun_kpipe_rxring != NULL) {
2573 			kern_channel_notify(pcb->utun_kpipe_rxring, 0);
2574 		}
2575 		utun_data_move_end(pcb);
2576 		return;
2577 	}
2578 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2579 #endif // UTUN_NEXUS
2580 
2581 	for (;;) {
2582 		bool can_accept_packets = true;
2583 		ifnet_lock_shared(pcb->utun_ifp);
2584 
2585 		u_int32_t utun_packet_cnt;
2586 		errno_t error_pc = ctl_getenqueuepacketcount(pcb->utun_ctlref, pcb->utun_unit, &utun_packet_cnt);
2587 		if (error_pc != 0) {
2588 			os_log_error(OS_LOG_DEFAULT, "utun_start: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2589 			utun_packet_cnt = 0;
2590 		}
2591 
2592 		can_accept_packets = (utun_packet_cnt < pcb->utun_max_pending_packets);
2593 		if (!can_accept_packets && pcb->utun_ctlref) {
2594 			u_int32_t difference = 0;
2595 			if (ctl_getenqueuereadable(pcb->utun_ctlref, pcb->utun_unit, &difference) == 0) {
2596 				if (difference > 0) {
2597 					// If the low-water mark has not yet been reached, we still need to enqueue data
2598 					// into the buffer
2599 					can_accept_packets = true;
2600 				}
2601 			}
2602 		}
2603 		if (!can_accept_packets) {
2604 			errno_t error = ifnet_disable_output(interface);
2605 			if (error != 0) {
2606 				os_log_error(OS_LOG_DEFAULT, "utun_start: ifnet_disable_output returned error %d\n", error);
2607 			}
2608 			ifnet_lock_done(pcb->utun_ifp);
2609 			break;
2610 		}
2611 		ifnet_lock_done(pcb->utun_ifp);
2612 		if (ifnet_dequeue(interface, &data) != 0) {
2613 			break;
2614 		}
2615 		if (utun_output(interface, data) != 0) {
2616 			break;
2617 		}
2618 	}
2619 }
2620 
2621 static errno_t
utun_output(ifnet_t interface,mbuf_t data)2622 utun_output(ifnet_t     interface,
2623     mbuf_t data)
2624 {
2625 	struct utun_pcb *pcb = ifnet_softc(interface);
2626 	errno_t result;
2627 
2628 	VERIFY(interface == pcb->utun_ifp);
2629 
2630 #if UTUN_NEXUS
2631 	if (!pcb->utun_use_netif)
2632 #endif // UTUN_NEXUS
2633 	{
2634 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2635 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
2636 		}
2637 	}
2638 
2639 	if (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT) {
2640 		/* flush data */
2641 		mbuf_freem(data);
2642 		return 0;
2643 	}
2644 
2645 	// otherwise, fall thru to ctl_enqueumbuf
2646 	if (pcb->utun_ctlref) {
2647 		int     length;
2648 
2649 		/*
2650 		 * The ABI requires the protocol in network byte order
2651 		 */
2652 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2653 			*(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data));
2654 		}
2655 
2656 		length = mbuf_pkthdr_len(data);
2657 		result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR);
2658 		if (result != 0) {
2659 			mbuf_freem(data);
2660 			os_log_error(OS_LOG_DEFAULT, "utun_output - ctl_enqueuembuf failed: %d\n", result);
2661 #if UTUN_NEXUS
2662 			if (!pcb->utun_use_netif)
2663 #endif // UTUN_NEXUS
2664 			{
2665 				ifnet_stat_increment_out(interface, 0, 0, 1);
2666 			}
2667 		} else {
2668 #if UTUN_NEXUS
2669 			if (!pcb->utun_use_netif)
2670 #endif // UTUN_NEXUS
2671 			{
2672 				if (!pcb->utun_ext_ifdata_stats) {
2673 					ifnet_stat_increment_out(interface, 1, length, 0);
2674 				}
2675 			}
2676 		}
2677 	} else {
2678 		mbuf_freem(data);
2679 	}
2680 
2681 	return 0;
2682 }
2683 
2684 static errno_t
utun_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)2685 utun_demux(__unused ifnet_t interface,
2686     mbuf_t data,
2687     __unused char *frame_header,
2688     protocol_family_t *protocol)
2689 {
2690 #if UTUN_NEXUS
2691 	struct utun_pcb *pcb = ifnet_softc(interface);
2692 	struct ip *ip;
2693 	u_int ip_version;
2694 #endif
2695 
2696 	while (data != NULL && mbuf_len(data) < 1) {
2697 		data = mbuf_next(data);
2698 	}
2699 
2700 	if (data == NULL) {
2701 		return ENOENT;
2702 	}
2703 
2704 #if UTUN_NEXUS
2705 	if (pcb->utun_use_netif) {
2706 		ip = mtod(data, struct ip *);
2707 		ip_version = ip->ip_v;
2708 
2709 		switch (ip_version) {
2710 		case 4:
2711 			*protocol = PF_INET;
2712 			return 0;
2713 		case 6:
2714 			*protocol = PF_INET6;
2715 			return 0;
2716 		default:
2717 			*protocol = 0;
2718 			break;
2719 		}
2720 	} else
2721 #endif // UTUN_NEXUS
2722 	{
2723 		*protocol = *(u_int32_t *)mbuf_data(data);
2724 	}
2725 
2726 	return 0;
2727 }
2728 
2729 static errno_t
utun_framer(ifnet_t interface,mbuf_t * packet,__unused const struct sockaddr * dest,__unused const char * desk_linkaddr,const char * frame_type,u_int32_t * prepend_len,u_int32_t * postpend_len)2730 utun_framer(ifnet_t interface,
2731     mbuf_t *packet,
2732     __unused const struct sockaddr *dest,
2733     __unused const char *desk_linkaddr,
2734     const char *frame_type,
2735     u_int32_t *prepend_len,
2736     u_int32_t *postpend_len)
2737 {
2738 	struct utun_pcb *pcb = ifnet_softc(interface);
2739 	VERIFY(interface == pcb->utun_ifp);
2740 
2741 	u_int32_t header_length = UTUN_HEADER_SIZE(pcb);
2742 	if (mbuf_prepend(packet, header_length, MBUF_DONTWAIT) != 0) {
2743 		os_log_error(OS_LOG_DEFAULT, "utun_framer - ifnet_output prepend failed\n");
2744 
2745 		ifnet_stat_increment_out(interface, 0, 0, 1);
2746 
2747 		// just	return, because the buffer was freed in mbuf_prepend
2748 		return EJUSTRETURN;
2749 	}
2750 	if (prepend_len != NULL) {
2751 		*prepend_len = header_length;
2752 	}
2753 	if (postpend_len != NULL) {
2754 		*postpend_len = 0;
2755 	}
2756 
2757 	// place protocol number at the beginning of the mbuf
2758 	*(protocol_family_t *)mbuf_data(*packet) = *(protocol_family_t *)(uintptr_t)(size_t)frame_type;
2759 
2760 #if NECP
2761 	// Add process uuid if applicable
2762 	if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
2763 		if (m_pktlen(*packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2764 			u_int8_t *header = (u_int8_t *)mbuf_data(*packet);
2765 			int uuid_err = necp_get_app_uuid_from_packet(*packet, (void *)(header + sizeof(u_int32_t)));
2766 			if (uuid_err != 0) {
2767 				os_log_error(OS_LOG_DEFAULT, "Received app uuid error %d for %s%d\n", uuid_err, ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp));
2768 			}
2769 		} else {
2770 			os_log_error(OS_LOG_DEFAULT, "Cannot set proc uuid for %s%d, size %d < %zu\n", ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp),
2771 			    m_pktlen(*packet), UTUN_HEADER_SIZE(pcb));
2772 		}
2773 	}
2774 #endif // NECP
2775 
2776 	return 0;
2777 }
2778 
2779 static errno_t
utun_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)2780 utun_add_proto(__unused ifnet_t interface,
2781     protocol_family_t protocol,
2782     __unused const struct ifnet_demux_desc *demux_array,
2783     __unused u_int32_t demux_count)
2784 {
2785 	switch (protocol) {
2786 	case PF_INET:
2787 		return 0;
2788 	case PF_INET6:
2789 		return 0;
2790 	default:
2791 		break;
2792 	}
2793 
2794 	return ENOPROTOOPT;
2795 }
2796 
2797 static errno_t
utun_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)2798 utun_del_proto(__unused ifnet_t interface,
2799     __unused protocol_family_t protocol)
2800 {
2801 	return 0;
2802 }
2803 
2804 static errno_t
utun_ioctl(ifnet_t interface,u_long command,void * data)2805 utun_ioctl(ifnet_t interface,
2806     u_long command,
2807     void *data)
2808 {
2809 #if UTUN_NEXUS
2810 	struct utun_pcb *pcb = ifnet_softc(interface);
2811 #endif
2812 	errno_t result = 0;
2813 
2814 	switch (command) {
2815 	case SIOCSIFMTU: {
2816 #if UTUN_NEXUS
2817 		if (pcb->utun_use_netif) {
2818 			// Make sure we can fit packets in the channel buffers
2819 			// Allow for the headroom in the slot
2820 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) + UTUN_IF_HEADROOM_SIZE > pcb->utun_slot_size) {
2821 				result = EINVAL;
2822 			} else {
2823 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
2824 			}
2825 		} else
2826 #endif // UTUN_NEXUS
2827 		{
2828 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
2829 		}
2830 		break;
2831 	}
2832 
2833 	case SIOCSIFFLAGS:
2834 		/* ifioctl() takes care of it */
2835 		break;
2836 
2837 	default:
2838 		result = EOPNOTSUPP;
2839 	}
2840 
2841 	return result;
2842 }
2843 
2844 static void
utun_detached(ifnet_t interface)2845 utun_detached(ifnet_t interface)
2846 {
2847 	struct utun_pcb *pcb = ifnet_softc(interface);
2848 	(void)ifnet_release(interface);
2849 	lck_mtx_lock(&utun_lock);
2850 	utun_free_pcb(pcb, true);
2851 	(void)ifnet_dispose(interface);
2852 	lck_mtx_unlock(&utun_lock);
2853 }
2854 
2855 /* Protocol Handlers */
2856 
2857 static errno_t
utun_proto_input(__unused ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)2858 utun_proto_input(__unused ifnet_t interface,
2859     protocol_family_t protocol,
2860     mbuf_t m,
2861     __unused char *frame_header)
2862 {
2863 	struct utun_pcb *pcb = ifnet_softc(interface);
2864 #if UTUN_NEXUS
2865 	if (!pcb->utun_use_netif)
2866 #endif // UTUN_NEXUS
2867 	{
2868 		mbuf_adj(m, UTUN_HEADER_SIZE(pcb));
2869 	}
2870 	int32_t pktlen = m->m_pkthdr.len;
2871 	if (proto_input(protocol, m) != 0) {
2872 		m_freem(m);
2873 #if UTUN_NEXUS
2874 		if (!pcb->utun_use_netif)
2875 #endif // UTUN_NEXUS
2876 		{
2877 			ifnet_stat_increment_in(interface, 0, 0, 1);
2878 		}
2879 	} else {
2880 #if UTUN_NEXUS
2881 		if (!pcb->utun_use_netif)
2882 #endif // UTUN_NEXUS
2883 		{
2884 			ifnet_stat_increment_in(interface, 1, pktlen, 0);
2885 		}
2886 	}
2887 
2888 	return 0;
2889 }
2890 
2891 static errno_t
utun_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,char * frame_type,__unused char * link_layer_dest)2892 utun_proto_pre_output(__unused ifnet_t interface,
2893     protocol_family_t protocol,
2894     __unused mbuf_t *packet,
2895     __unused const struct sockaddr *dest,
2896     __unused void *route,
2897     char *frame_type,
2898     __unused char *link_layer_dest)
2899 {
2900 	*(protocol_family_t *)(void *)frame_type = protocol;
2901 	return 0;
2902 }
2903 
2904 static errno_t
utun_attach_proto(ifnet_t interface,protocol_family_t protocol)2905 utun_attach_proto(ifnet_t interface,
2906     protocol_family_t protocol)
2907 {
2908 	struct ifnet_attach_proto_param proto;
2909 
2910 	bzero(&proto, sizeof(proto));
2911 	proto.input = utun_proto_input;
2912 	proto.pre_output = utun_proto_pre_output;
2913 
2914 	errno_t result = ifnet_attach_protocol(interface, protocol, &proto);
2915 	if (result != 0 && result != EEXIST) {
2916 		os_log_error(OS_LOG_DEFAULT, "utun_attach_inet - ifnet_attach_protocol %d failed: %d\n",
2917 		    protocol, result);
2918 	}
2919 
2920 	return result;
2921 }
2922 
2923 static errno_t
utun_pkt_input(struct utun_pcb * pcb,mbuf_t packet)2924 utun_pkt_input(struct utun_pcb *pcb, mbuf_t packet)
2925 {
2926 #if UTUN_NEXUS
2927 	if (pcb->utun_use_netif) {
2928 		if (!utun_data_move_begin(pcb)) {
2929 			os_log_info(OS_LOG_DEFAULT,
2930 			    "%s: data path stopped for %s\n",
2931 			    __func__, if_name(pcb->utun_ifp));
2932 			return ENXIO;
2933 		}
2934 
2935 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2936 
2937 		lck_mtx_lock(&pcb->utun_input_chain_lock);
2938 
2939 		if (pcb->utun_input_chain_count > (u_int32_t)if_utun_max_pending_input) {
2940 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
2941 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2942 			utun_data_move_end(pcb);
2943 			return ENOSPC;
2944 		}
2945 
2946 		if (pcb->utun_input_chain != NULL) {
2947 			pcb->utun_input_chain_last->m_nextpkt = packet;
2948 		} else {
2949 			pcb->utun_input_chain = packet;
2950 		}
2951 		pcb->utun_input_chain_count++;
2952 		while (packet->m_nextpkt) {
2953 			VERIFY(packet != packet->m_nextpkt);
2954 			packet = packet->m_nextpkt;
2955 			pcb->utun_input_chain_count++;
2956 		}
2957 		pcb->utun_input_chain_last = packet;
2958 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
2959 
2960 		kern_channel_ring_t rx_ring = pcb->utun_netif_rxring;
2961 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2962 
2963 		if (rx_ring != NULL) {
2964 			kern_channel_notify(rx_ring, 0);
2965 		}
2966 
2967 		utun_data_move_end(pcb);
2968 		return 0;
2969 	} else
2970 #endif // UTUN_NEXUS
2971 	{
2972 		mbuf_pkthdr_setrcvif(packet, pcb->utun_ifp);
2973 
2974 		if (m_pktlen(packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2975 			bpf_tap_in(pcb->utun_ifp, DLT_NULL, packet, 0, 0);
2976 		}
2977 		if (pcb->utun_flags & UTUN_FLAGS_NO_INPUT) {
2978 			/* flush data */
2979 			mbuf_freem(packet);
2980 			return 0;
2981 		}
2982 
2983 		errno_t result = 0;
2984 		if (!pcb->utun_ext_ifdata_stats) {
2985 			struct ifnet_stat_increment_param incs = {};
2986 			incs.packets_in = 1;
2987 			incs.bytes_in = mbuf_pkthdr_len(packet);
2988 			result = ifnet_input(pcb->utun_ifp, packet, &incs);
2989 		} else {
2990 			result = ifnet_input(pcb->utun_ifp, packet, NULL);
2991 		}
2992 		if (result != 0) {
2993 			ifnet_stat_increment_in(pcb->utun_ifp, 0, 0, 1);
2994 
2995 			os_log_error(OS_LOG_DEFAULT, "%s - ifnet_input failed: %d\n", __FUNCTION__, result);
2996 		}
2997 
2998 		return 0;
2999 	}
3000 }
3001 
3002 #if UTUN_NEXUS
3003 
3004 static errno_t
utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)3005 utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
3006 {
3007 	return 0;
3008 }
3009 
3010 static void
utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)3011 utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
3012 {
3013 	// Ignore
3014 }
3015 
3016 static errno_t
utun_register_nexus(void)3017 utun_register_nexus(void)
3018 {
3019 	const struct kern_nexus_domain_provider_init dp_init = {
3020 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
3021 		.nxdpi_flags = 0,
3022 		.nxdpi_init = utun_nxdp_init,
3023 		.nxdpi_fini = utun_nxdp_fini
3024 	};
3025 	errno_t err = 0;
3026 
3027 	/* utun_nxdp_init() is called before this function returns */
3028 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
3029 	    (const uint8_t *) "com.apple.utun",
3030 	    &dp_init, sizeof(dp_init),
3031 	    &utun_nx_dom_prov);
3032 	if (err != 0) {
3033 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
3034 		return err;
3035 	}
3036 	return 0;
3037 }
3038 boolean_t
utun_interface_needs_netagent(ifnet_t interface)3039 utun_interface_needs_netagent(ifnet_t interface)
3040 {
3041 	struct utun_pcb *pcb = NULL;
3042 
3043 	if (interface == NULL) {
3044 		return FALSE;
3045 	}
3046 
3047 	pcb = ifnet_softc(interface);
3048 
3049 	if (pcb == NULL) {
3050 		return FALSE;
3051 	}
3052 
3053 	return pcb->utun_needs_netagent == true;
3054 }
3055 
3056 static errno_t
utun_ifnet_set_attrs(ifnet_t ifp)3057 utun_ifnet_set_attrs(ifnet_t ifp)
3058 {
3059 	/* Set flags and additional information. */
3060 	ifnet_set_mtu(ifp, 1500);
3061 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
3062 
3063 	/* The interface must generate its own IPv6 LinkLocal address,
3064 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
3065 	 */
3066 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
3067 
3068 	return 0;
3069 }
3070 
3071 static errno_t
utun_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)3072 utun_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
3073 {
3074 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3075 	pcb->utun_netif_nexus = nexus;
3076 	return utun_ifnet_set_attrs(ifp);
3077 }
3078 
3079 static errno_t
utun_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)3080 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
3081     proc_t p, kern_nexus_t nexus,
3082     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
3083 {
3084 #pragma unused(nxprov, p)
3085 #pragma unused(nexus, nexus_port, channel, ch_ctx)
3086 	return 0;
3087 }
3088 
3089 static errno_t
utun_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3090 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3091     kern_channel_t channel)
3092 {
3093 #pragma unused(nxprov, channel)
3094 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3095 	boolean_t ok = ifnet_is_attached(pcb->utun_ifp, 1);
3096 	if (pcb->utun_netif_nexus == nexus) {
3097 		pcb->utun_netif_connected = true;
3098 	}
3099 	if (ok) {
3100 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3101 		UTUN_SET_DATA_PATH_READY(pcb);
3102 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3103 	}
3104 	return ok ? 0 : ENXIO;
3105 }
3106 
3107 static void
utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3108 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3109     kern_channel_t channel)
3110 {
3111 #pragma unused(nxprov, channel)
3112 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3113 	/* Wait until all threads in the data paths are done. */
3114 	utun_wait_data_move_drain(pcb);
3115 }
3116 
3117 static void
utun_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3118 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3119     kern_channel_t channel)
3120 {
3121 #pragma unused(nxprov, channel)
3122 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3123 	/* Wait until all threads in the data paths are done. */
3124 	utun_wait_data_move_drain(pcb);
3125 }
3126 
3127 static void
utun_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3128 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3129     kern_channel_t channel)
3130 {
3131 #pragma unused(nxprov, channel)
3132 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3133 	if (pcb->utun_netif_nexus == nexus) {
3134 		pcb->utun_netif_connected = false;
3135 		if (pcb->utun_attach_fsw) {
3136 			// disconnected by flowswitch that was attached by us
3137 			pcb->utun_netif_nexus = NULL;
3138 		}
3139 	}
3140 	ifnet_decr_iorefcnt(pcb->utun_ifp);
3141 }
3142 
3143 static errno_t
utun_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)3144 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3145     kern_channel_t channel, kern_channel_ring_t ring,
3146     boolean_t is_tx_ring, void **ring_ctx)
3147 {
3148 #pragma unused(nxprov)
3149 #pragma unused(channel)
3150 #pragma unused(ring_ctx)
3151 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3152 	if (!is_tx_ring) {
3153 		VERIFY(pcb->utun_kpipe_rxring == NULL);
3154 		pcb->utun_kpipe_rxring = ring;
3155 	} else {
3156 		VERIFY(pcb->utun_kpipe_txring == NULL);
3157 		pcb->utun_kpipe_txring = ring;
3158 	}
3159 	return 0;
3160 }
3161 
3162 static void
utun_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)3163 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3164     kern_channel_ring_t ring)
3165 {
3166 #pragma unused(nxprov)
3167 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3168 	if (pcb->utun_kpipe_rxring == ring) {
3169 		pcb->utun_kpipe_rxring = NULL;
3170 	} else if (pcb->utun_kpipe_txring == ring) {
3171 		pcb->utun_kpipe_txring = NULL;
3172 	}
3173 }
3174 
3175 static errno_t
utun_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)3176 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3177     kern_channel_ring_t tx_ring, uint32_t flags)
3178 {
3179 #pragma unused(nxprov)
3180 #pragma unused(flags)
3181 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3182 
3183 	if (!utun_data_move_begin(pcb)) {
3184 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3185 		    __func__, if_name(pcb->utun_ifp));
3186 		return 0;
3187 	}
3188 
3189 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3190 	int channel_enabled = pcb->utun_kpipe_enabled;
3191 	if (!channel_enabled) {
3192 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3193 		utun_data_move_end(pcb);
3194 		return 0;
3195 	}
3196 
3197 	if (pcb->utun_use_netif) {
3198 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3199 		if (tx_slot == NULL) {
3200 			// Nothing to write, bail
3201 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3202 			utun_data_move_end(pcb);
3203 			return 0;
3204 		}
3205 
3206 		// Signal the netif ring to read
3207 		kern_channel_ring_t rx_ring = pcb->utun_netif_rxring;
3208 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3209 		if (rx_ring != NULL) {
3210 			kern_channel_notify(rx_ring, 0);
3211 		}
3212 	} else {
3213 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3214 
3215 		struct ifnet_stat_increment_param incs = {};
3216 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
3217 		MBUFQ_HEAD(mbufq) mbq;
3218 		MBUFQ_INIT(&mbq);
3219 		kern_channel_slot_t tx_pslot = NULL;
3220 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3221 		while (tx_slot != NULL) {
3222 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3223 
3224 			// Advance TX ring
3225 			tx_pslot = tx_slot;
3226 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3227 
3228 			if (tx_ph == 0) {
3229 				continue;
3230 			}
3231 
3232 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3233 			VERIFY(tx_buf != NULL);
3234 			uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
3235 			VERIFY(tx_baddr != 0);
3236 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3237 
3238 			size_t length = MIN(kern_packet_get_data_length(tx_ph),
3239 			    pcb->utun_slot_size);
3240 
3241 			mbuf_t data = NULL;
3242 			if (length >= UTUN_HEADER_SIZE(pcb) &&
3243 			    !(pcb->utun_flags & UTUN_FLAGS_NO_INPUT)) {
3244 				errno_t error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_HEADER, &data);
3245 				VERIFY(0 == error);
3246 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_WAITOK);
3247 				VERIFY(0 == error);
3248 				/*
3249 				 * The userland ABI requires the first four bytes have
3250 				 * the protocol family in network byte order: swap them
3251 				 */
3252 				*(uint32_t *)mbuf_data(data) = ntohl(*(uint32_t *)mbuf_data(data));
3253 				mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
3254 				bpf_tap_in(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3255 				incs.packets_in++;
3256 				incs.bytes_in += length;
3257 				MBUFQ_ENQUEUE(&mbq, data);
3258 			}
3259 		}
3260 		if (tx_pslot) {
3261 			kern_channel_advance_slot(tx_ring, tx_pslot);
3262 			tx_ring_stats.kcrsi_slots_transferred = incs.packets_in;
3263 			tx_ring_stats.kcrsi_bytes_transferred = incs.bytes_in;
3264 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3265 			(void) kern_channel_reclaim(tx_ring);
3266 		}
3267 		if (!MBUFQ_EMPTY(&mbq)) {
3268 			(void) ifnet_input_extended(pcb->utun_ifp, MBUFQ_FIRST(&mbq),
3269 			    MBUFQ_LAST(&mbq), &incs);
3270 			MBUFQ_INIT(&mbq);
3271 		}
3272 	}
3273 
3274 	utun_data_move_end(pcb);
3275 	return 0;
3276 }
3277 
3278 static errno_t
utun_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)3279 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3280     kern_channel_ring_t rx_ring, uint32_t flags)
3281 {
3282 #pragma unused(nxprov)
3283 #pragma unused(flags)
3284 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3285 	struct kern_channel_ring_stat_increment rx_ring_stats = {};
3286 
3287 	if (!utun_data_move_begin(pcb)) {
3288 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3289 		    __func__, if_name(pcb->utun_ifp));
3290 		return 0;
3291 	}
3292 
3293 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3294 
3295 	int channel_enabled = pcb->utun_kpipe_enabled;
3296 	if (!channel_enabled) {
3297 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3298 		utun_data_move_end(pcb);
3299 		return 0;
3300 	}
3301 
3302 	/* reclaim user-released slots */
3303 	(void) kern_channel_reclaim(rx_ring);
3304 
3305 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
3306 	if (avail == 0) {
3307 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3308 		utun_data_move_end(pcb);
3309 		return 0;
3310 	}
3311 
3312 	if (pcb->utun_use_netif) {
3313 		kern_channel_ring_t tx_ring = pcb->utun_netif_txring;
3314 		if (tx_ring == NULL ||
3315 		    pcb->utun_netif_nexus == NULL) {
3316 			// Net-If TX ring not set up yet, nothing to read
3317 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3318 			utun_data_move_end(pcb);
3319 			return 0;
3320 		}
3321 
3322 		struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->utun_netif_nexus)->nif_stats;
3323 
3324 		// Unlock utun before entering ring
3325 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3326 
3327 		(void)kr_enter(tx_ring, TRUE);
3328 
3329 		// Lock again after entering and validate
3330 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
3331 		if (tx_ring != pcb->utun_netif_txring) {
3332 			// Ring no longer valid
3333 			// Unlock first, then exit ring
3334 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3335 			kr_exit(tx_ring);
3336 			utun_data_move_end(pcb);
3337 			return 0;
3338 		}
3339 
3340 		struct kern_channel_ring_stat_increment tx_ring_stats;
3341 		bzero(&tx_ring_stats, sizeof(tx_ring_stats));
3342 		kern_channel_slot_t tx_pslot = NULL;
3343 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3344 		if (tx_slot == NULL) {
3345 			// Nothing to read, don't bother signalling
3346 			// Unlock first, then exit ring
3347 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3348 			kr_exit(tx_ring);
3349 			utun_data_move_end(pcb);
3350 			return 0;
3351 		}
3352 
3353 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3354 		VERIFY(rx_pp != NULL);
3355 		struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
3356 		VERIFY(tx_pp != NULL);
3357 		kern_channel_slot_t rx_pslot = NULL;
3358 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3359 		kern_packet_t tx_chain_ph = 0;
3360 
3361 		while (rx_slot != NULL && tx_slot != NULL) {
3362 			size_t length;
3363 			kern_buflet_t rx_buf;
3364 			void *rx_baddr;
3365 
3366 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3367 
3368 			/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
3369 			if (tx_ph == 0) {
3370 				// Advance TX ring
3371 				tx_pslot = tx_slot;
3372 				tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3373 				continue;
3374 			}
3375 			(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
3376 			if (tx_chain_ph != 0) {
3377 				kern_packet_append(tx_ph, tx_chain_ph);
3378 			}
3379 			tx_chain_ph = tx_ph;
3380 
3381 			// Advance TX ring
3382 			tx_pslot = tx_slot;
3383 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3384 
3385 			// Allocate rx packet
3386 			kern_packet_t rx_ph = 0;
3387 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3388 			if (__improbable(error != 0)) {
3389 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3390 				    pcb->utun_ifp->if_xname);
3391 				break;
3392 			}
3393 
3394 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3395 			VERIFY(tx_buf != NULL);
3396 			uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
3397 			VERIFY(tx_baddr != NULL);
3398 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3399 
3400 			bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
3401 
3402 			length = MIN(kern_packet_get_data_length(tx_ph) + UTUN_HEADER_SIZE(pcb),
3403 			    pcb->utun_slot_size);
3404 
3405 			tx_ring_stats.kcrsi_slots_transferred++;
3406 			tx_ring_stats.kcrsi_bytes_transferred += length;
3407 
3408 			if (length < UTUN_HEADER_SIZE(pcb) ||
3409 			    length > pcb->utun_slot_size ||
3410 			    length > PP_BUF_SIZE_DEF(rx_pp) ||
3411 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3412 				/* flush data */
3413 				kern_pbufpool_free(rx_pp, rx_ph);
3414 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: invalid length %zu header_size %zu\n",
3415 				    pcb->utun_ifp->if_xname, length, UTUN_HEADER_SIZE(pcb));
3416 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
3417 				STATS_INC(nifs, NETIF_STATS_DROP);
3418 				continue;
3419 			}
3420 
3421 			/* fillout packet */
3422 			rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3423 			VERIFY(rx_buf != NULL);
3424 			rx_baddr = kern_buflet_get_data_address(rx_buf);
3425 			VERIFY(rx_baddr != NULL);
3426 
3427 			// Find family
3428 			uint32_t af = 0;
3429 			uint8_t vhl = *(uint8_t *)(tx_baddr);
3430 			u_int ip_version = (vhl >> 4);
3431 			switch (ip_version) {
3432 			case 4: {
3433 				af = AF_INET;
3434 				break;
3435 			}
3436 			case 6: {
3437 				af = AF_INET6;
3438 				break;
3439 			}
3440 			default: {
3441 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: unknown ip version %u vhl %u header_size %zu\n",
3442 				    pcb->utun_ifp->if_xname, ip_version, vhl, UTUN_HEADER_SIZE(pcb));
3443 				break;
3444 			}
3445 			}
3446 
3447 			// Copy header
3448 			af = htonl(af);
3449 			memcpy((void *)rx_baddr, &af, sizeof(af));
3450 			if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
3451 				kern_packet_get_euuid(tx_ph, (void *)((uintptr_t)rx_baddr + sizeof(af)));
3452 			}
3453 
3454 			// Copy data from tx to rx
3455 			memcpy((void *)((uintptr_t)rx_baddr + UTUN_HEADER_SIZE(pcb)), (void *)tx_baddr, length - UTUN_HEADER_SIZE(pcb));
3456 			kern_packet_clear_flow_uuid(rx_ph); // zero flow id
3457 
3458 			/* finalize and attach the packet */
3459 			error = kern_buflet_set_data_offset(rx_buf, 0);
3460 			VERIFY(error == 0);
3461 			error = kern_buflet_set_data_length(rx_buf, length);
3462 			VERIFY(error == 0);
3463 			error = kern_packet_finalize(rx_ph);
3464 			VERIFY(error == 0);
3465 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3466 			VERIFY(error == 0);
3467 
3468 			STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
3469 			STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
3470 
3471 			rx_ring_stats.kcrsi_slots_transferred++;
3472 			rx_ring_stats.kcrsi_bytes_transferred += length;
3473 
3474 			rx_pslot = rx_slot;
3475 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3476 		}
3477 
3478 		if (rx_pslot) {
3479 			kern_channel_advance_slot(rx_ring, rx_pslot);
3480 			kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
3481 		}
3482 
3483 		if (tx_chain_ph != 0) {
3484 			kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
3485 		}
3486 
3487 		if (tx_pslot) {
3488 			kern_channel_advance_slot(tx_ring, tx_pslot);
3489 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3490 			(void)kern_channel_reclaim(tx_ring);
3491 		}
3492 
3493 		/* just like utun_ctl_rcvd(), always reenable output */
3494 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
3495 		if (error != 0) {
3496 			os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
3497 		}
3498 
3499 		// Unlock first, then exit ring
3500 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3501 
3502 		if (tx_pslot != NULL) {
3503 			kern_channel_notify(tx_ring, 0);
3504 		}
3505 		kr_exit(tx_ring);
3506 	} else {
3507 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3508 
3509 		uint32_t mb_cnt = 0;
3510 		uint32_t mb_len = 0;
3511 		struct mbuf *mb_head = NULL;
3512 		struct mbuf *mb_tail = NULL;
3513 
3514 		if (ifnet_dequeue_multi(pcb->utun_ifp, avail, &mb_head,
3515 		    &mb_tail, &mb_cnt, &mb_len) != 0) {
3516 			utun_data_move_end(pcb);
3517 			return 0;
3518 		}
3519 		VERIFY(mb_cnt <= avail);
3520 
3521 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3522 		VERIFY(rx_pp != NULL);
3523 		kern_channel_slot_t rx_pslot = NULL;
3524 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3525 		while (rx_slot) {
3526 			size_t length = 0;
3527 			mbuf_t data = NULL;
3528 			if ((data = mb_head) == NULL) {
3529 				VERIFY(mb_cnt == 0);
3530 				break;
3531 			}
3532 			mb_head = mbuf_nextpkt(mb_head);
3533 			mbuf_setnextpkt(data, NULL);
3534 			VERIFY(mb_cnt != 0);
3535 			--mb_cnt;
3536 			length = mbuf_pkthdr_len(data);
3537 			if (length < UTUN_HEADER_SIZE(pcb) ||
3538 			    length > pcb->utun_slot_size ||
3539 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3540 				/* flush data */
3541 				mbuf_freem(data);
3542 				continue;
3543 			}
3544 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3545 
3546 			// Allocate rx packet
3547 			kern_packet_t rx_ph = 0;
3548 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3549 			if (__improbable(error != 0)) {
3550 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3551 				    pcb->utun_ifp->if_xname);
3552 				break;
3553 			}
3554 
3555 			/*
3556 			 * The ABI requires the protocol in network byte order
3557 			 */
3558 			*(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data));
3559 
3560 			// Fillout rx packet
3561 			kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3562 			VERIFY(rx_buf != NULL);
3563 			void *rx_baddr = kern_buflet_get_data_address(rx_buf);
3564 			VERIFY(rx_baddr != NULL);
3565 
3566 			// Copy-in data from mbuf to buflet
3567 			mbuf_copydata(data, 0, length, (void *)rx_baddr);
3568 			kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
3569 
3570 			// Finalize and attach the packet
3571 			error = kern_buflet_set_data_offset(rx_buf, 0);
3572 			VERIFY(error == 0);
3573 			error = kern_buflet_set_data_length(rx_buf, length);
3574 			VERIFY(error == 0);
3575 			error = kern_packet_finalize(rx_ph);
3576 			VERIFY(error == 0);
3577 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3578 			VERIFY(error == 0);
3579 
3580 			rx_ring_stats.kcrsi_slots_transferred++;
3581 			rx_ring_stats.kcrsi_bytes_transferred += length;
3582 
3583 			if (!pcb->utun_ext_ifdata_stats) {
3584 				ifnet_stat_increment_out(pcb->utun_ifp, 1, length, 0);
3585 			}
3586 
3587 			mbuf_freem(data);
3588 
3589 			rx_pslot = rx_slot;
3590 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3591 		}
3592 		if (rx_pslot) {
3593 			kern_channel_advance_slot(rx_ring, rx_pslot);
3594 			kern_channel_increment_ring_stats(rx_ring, &rx_ring_stats);
3595 		}
3596 		if (mb_head != NULL) {
3597 			VERIFY(mb_cnt != 0);
3598 			mbuf_freem_list(mb_head);
3599 		}
3600 	}
3601 
3602 	utun_data_move_end(pcb);
3603 	return 0;
3604 }
3605 
3606 #endif // UTUN_NEXUS
3607 
3608 
3609 /*
3610  * These are place holders until coreTLS kext stops calling them
3611  */
3612 errno_t utun_ctl_register_dtls(void *reg);
3613 int utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family);
3614 void utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb);
3615 
3616 errno_t
utun_ctl_register_dtls(void * reg)3617 utun_ctl_register_dtls(void *reg)
3618 {
3619 #pragma unused(reg)
3620 	return 0;
3621 }
3622 
3623 int
utun_pkt_dtls_input(struct utun_pcb * pcb,mbuf_t * pkt,protocol_family_t family)3624 utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family)
3625 {
3626 #pragma unused(pcb)
3627 #pragma unused(pkt)
3628 #pragma unused(family)
3629 	return 0;
3630 }
3631 
3632 void
utun_ctl_disable_crypto_dtls(struct utun_pcb * pcb)3633 utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb)
3634 {
3635 #pragma unused(pcb)
3636 }
3637 
3638 #if UTUN_NEXUS
3639 static boolean_t
utun_data_move_begin(struct utun_pcb * pcb)3640 utun_data_move_begin(struct utun_pcb *pcb)
3641 {
3642 	bool data_path_ready = false;
3643 
3644 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3645 	if ((data_path_ready = UTUN_IS_DATA_PATH_READY(pcb))) {
3646 		pcb->utun_pcb_data_move++;
3647 	}
3648 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3649 
3650 	return data_path_ready;
3651 }
3652 
3653 static void
utun_data_move_end(struct utun_pcb * pcb)3654 utun_data_move_end(struct utun_pcb *pcb)
3655 {
3656 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3657 	VERIFY(pcb->utun_pcb_data_move > 0);
3658 	/*
3659 	 * if there's no more thread moving data, wakeup any
3660 	 * drainers that are blocked waiting for this.
3661 	 */
3662 	if (--pcb->utun_pcb_data_move == 0 && pcb->utun_pcb_drainers > 0) {
3663 		wakeup(&(pcb->utun_pcb_data_move));
3664 	}
3665 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3666 }
3667 
3668 static void
utun_data_move_drain(struct utun_pcb * pcb)3669 utun_data_move_drain(struct utun_pcb *pcb)
3670 {
3671 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3672 	/* data path must already be marked as not ready */
3673 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3674 	pcb->utun_pcb_drainers++;
3675 	while (pcb->utun_pcb_data_move != 0) {
3676 		(void) msleep(&(pcb->utun_pcb_data_move),
3677 		    &pcb->utun_pcb_data_move_lock, (PZERO - 1), __func__, NULL);
3678 	}
3679 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3680 	VERIFY(pcb->utun_pcb_drainers > 0);
3681 	pcb->utun_pcb_drainers--;
3682 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3683 }
3684 
3685 static void
utun_wait_data_move_drain(struct utun_pcb * pcb)3686 utun_wait_data_move_drain(struct utun_pcb *pcb)
3687 {
3688 	/*
3689 	 * Mark the data path as not usable.
3690 	 */
3691 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3692 	UTUN_CLR_DATA_PATH_READY(pcb);
3693 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3694 
3695 	/* Wait until all threads in the data path are done. */
3696 	utun_data_move_drain(pcb);
3697 }
3698 #endif // UTUN_NEXUS
3699