xref: /xnu-8792.61.2/bsd/net/if_utun.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2008-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 
30 
31 /* ----------------------------------------------------------------------------------
32  *   Application of kernel control for interface creation
33  *
34  *   Theory of operation:
35  *   utun (user tunnel) acts as glue between kernel control sockets and network interfaces.
36  *   This kernel control will register an interface for every client that connects.
37  *   ---------------------------------------------------------------------------------- */
38 
39 #include <sys/systm.h>
40 #include <sys/kern_control.h>
41 #include <net/kpi_protocol.h>
42 #include <net/kpi_interface.h>
43 #include <sys/socket.h>
44 #include <net/if.h>
45 #include <net/if_types.h>
46 #include <net/bpf.h>
47 #include <net/if_utun.h>
48 #include <sys/mbuf.h>
49 #include <sys/sockio.h>
50 #include <netinet/in.h>
51 #include <netinet/ip.h>
52 #include <netinet6/in6_var.h>
53 #include <netinet6/in6_var.h>
54 #include <sys/kauth.h>
55 #include <net/necp.h>
56 #include <kern/zalloc.h>
57 #include <os/log.h>
58 
59 #if SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
60 #include <skywalk/os_skywalk_private.h>
61 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62 #include <skywalk/nexus/netif/nx_netif.h>
63 #define UTUN_NEXUS 1
64 #else // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
65 #define UTUN_NEXUS 0
66 #endif // SKYWALK && CONFIG_NEXUS_KERNEL_PIPE
67 
68 #if UTUN_NEXUS
69 static nexus_controller_t utun_ncd;
70 static int utun_ncd_refcount;
71 static uuid_t utun_kpipe_uuid;
72 static uuid_t utun_nx_dom_prov;
73 
74 typedef struct utun_nx {
75 	uuid_t if_provider;
76 	uuid_t if_instance;
77 	uuid_t fsw_provider;
78 	uuid_t fsw_instance;
79 	uuid_t fsw_device;
80 	uuid_t fsw_agent;
81 } *utun_nx_t;
82 
83 #endif // UTUN_NEXUS
84 
85 /* Control block allocated for each kernel control connection */
86 struct utun_pcb {
87 	TAILQ_ENTRY(utun_pcb)   utun_chain;
88 	kern_ctl_ref    utun_ctlref;
89 	ifnet_t                 utun_ifp;
90 	u_int32_t               utun_unit;
91 	u_int32_t               utun_unique_id;
92 	u_int32_t               utun_flags;
93 	int                     utun_ext_ifdata_stats;
94 	u_int32_t               utun_max_pending_packets;
95 	char                    utun_if_xname[IFXNAMSIZ];
96 	char                    utun_unique_name[IFXNAMSIZ];
97 	// PCB lock protects state fields and rings
98 	decl_lck_rw_data(, utun_pcb_lock);
99 	struct mbuf *   utun_input_chain;
100 	struct mbuf *   utun_input_chain_last;
101 	u_int32_t               utun_input_chain_count;
102 	// Input chain lock protects the list of input mbufs
103 	// The input chain lock must be taken AFTER the PCB lock if both are held
104 	lck_mtx_t               utun_input_chain_lock;
105 
106 #if UTUN_NEXUS
107 	// lock to protect utun_pcb_data_move & utun_pcb_drainers
108 	decl_lck_mtx_data(, utun_pcb_data_move_lock);
109 	u_int32_t               utun_pcb_data_move; /* number of data moving contexts */
110 	u_int32_t               utun_pcb_drainers; /* number of threads waiting to drain */
111 	u_int32_t               utun_pcb_data_path_state; /* internal state of interface data path */
112 
113 	struct utun_nx  utun_nx;
114 	int                     utun_kpipe_enabled;
115 	uuid_t                  utun_kpipe_uuid;
116 	void *                  utun_kpipe_rxring;
117 	void *                  utun_kpipe_txring;
118 	kern_pbufpool_t         utun_kpipe_pp;
119 	u_int32_t               utun_kpipe_tx_ring_size;
120 	u_int32_t               utun_kpipe_rx_ring_size;
121 
122 	kern_nexus_t    utun_netif_nexus;
123 	kern_pbufpool_t         utun_netif_pp;
124 	void *                  utun_netif_rxring;
125 	void *                  utun_netif_txring;
126 	uint64_t                utun_netif_txring_size;
127 
128 	u_int32_t               utun_slot_size;
129 	u_int32_t               utun_netif_ring_size;
130 	u_int32_t               utun_tx_fsw_ring_size;
131 	u_int32_t               utun_rx_fsw_ring_size;
132 	// Auto attach flowswitch when netif is enabled. When set to false,
133 	// it allows userspace nexus controller to attach and own flowswitch.
134 	bool                    utun_attach_fsw;
135 	bool                    utun_netif_connected;
136 	bool                    utun_use_netif;
137 	bool                    utun_needs_netagent;
138 #endif // UTUN_NEXUS
139 };
140 
141 /* Kernel Control functions */
142 static errno_t  utun_ctl_setup(u_int32_t *unit, void **unitinfo);
143 static errno_t  utun_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
144     void **unitinfo);
145 static errno_t  utun_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
146     void **unitinfo);
147 static errno_t  utun_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
148     void *unitinfo);
149 static errno_t  utun_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
150     void *unitinfo, mbuf_t m, int flags);
151 static errno_t  utun_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
152     int opt, void *data, size_t *len);
153 static errno_t  utun_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
154     int opt, void *data, size_t len);
155 static void             utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
156     int flags);
157 
158 /* Network Interface functions */
159 static void     utun_start(ifnet_t interface);
160 static errno_t  utun_framer(ifnet_t interface, mbuf_t *packet,
161     const struct sockaddr *dest, const char *desk_linkaddr,
162     const char *frame_type, u_int32_t *prepend_len, u_int32_t *postpend_len);
163 static errno_t  utun_output(ifnet_t interface, mbuf_t data);
164 static errno_t  utun_demux(ifnet_t interface, mbuf_t data, char *frame_header,
165     protocol_family_t *protocol);
166 static errno_t  utun_add_proto(ifnet_t interface, protocol_family_t protocol,
167     const struct ifnet_demux_desc *demux_array,
168     u_int32_t demux_count);
169 static errno_t  utun_del_proto(ifnet_t interface, protocol_family_t protocol);
170 static errno_t  utun_ioctl(ifnet_t interface, u_long cmd, void *data);
171 static void             utun_detached(ifnet_t interface);
172 
173 /* Protocol handlers */
174 static errno_t  utun_attach_proto(ifnet_t interface, protocol_family_t proto);
175 static errno_t  utun_proto_input(ifnet_t interface, protocol_family_t protocol,
176     mbuf_t m, char *frame_header);
177 static errno_t utun_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
178     mbuf_t *packet, const struct sockaddr *dest, void *route,
179     char *frame_type, char *link_layer_dest);
180 static errno_t utun_pkt_input(struct utun_pcb *pcb, mbuf_t m);
181 
182 /* data movement refcounting functions */
183 #if UTUN_NEXUS
184 static boolean_t utun_data_move_begin(struct utun_pcb *pcb);
185 static void utun_data_move_end(struct utun_pcb *pcb);
186 static void utun_wait_data_move_drain(struct utun_pcb *pcb);
187 
188 /* Data path states */
189 #define UTUN_PCB_DATA_PATH_READY    0x1
190 
191 /* Macros to set/clear/test data path states */
192 #define UTUN_SET_DATA_PATH_READY(_pcb) \
193     ((_pcb)->utun_pcb_data_path_state |= UTUN_PCB_DATA_PATH_READY)
194 #define UTUN_CLR_DATA_PATH_READY(_pcb) \
195     ((_pcb)->utun_pcb_data_path_state &= ~UTUN_PCB_DATA_PATH_READY)
196 #define UTUN_IS_DATA_PATH_READY(_pcb) \
197     (((_pcb)->utun_pcb_data_path_state & UTUN_PCB_DATA_PATH_READY) != 0)
198 
199 #define UTUN_IF_DEFAULT_SLOT_SIZE 2048
200 #define UTUN_IF_DEFAULT_RING_SIZE 64
201 #define UTUN_IF_DEFAULT_TX_FSW_RING_SIZE 64
202 #define UTUN_IF_DEFAULT_RX_FSW_RING_SIZE 128
203 #define UTUN_IF_DEFAULT_BUF_SEG_SIZE    skmem_usr_buf_seg_size
204 #define UTUN_IF_HEADROOM_SIZE 32
205 
206 #define UTUN_IF_MIN_RING_SIZE 8
207 #define UTUN_IF_MAX_RING_SIZE 1024
208 
209 #define UTUN_IF_MIN_SLOT_SIZE 1024
210 #define UTUN_IF_MAX_SLOT_SIZE 4096
211 
212 #define UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT 512
213 
214 static int if_utun_max_pending_input = UTUN_DEFAULT_MAX_PENDING_INPUT_COUNT;
215 
216 static int sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS;
217 static int sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
218 static int sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
219 
220 static int if_utun_ring_size = UTUN_IF_DEFAULT_RING_SIZE;
221 static int if_utun_tx_fsw_ring_size = UTUN_IF_DEFAULT_TX_FSW_RING_SIZE;
222 static int if_utun_rx_fsw_ring_size = UTUN_IF_DEFAULT_RX_FSW_RING_SIZE;
223 
224 SYSCTL_DECL(_net_utun);
225 SYSCTL_NODE(_net, OID_AUTO, utun, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "UTun");
226 
227 SYSCTL_INT(_net_utun, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_utun_max_pending_input, 0, "");
228 SYSCTL_PROC(_net_utun, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
229     &if_utun_ring_size, UTUN_IF_DEFAULT_RING_SIZE, &sysctl_if_utun_ring_size, "I", "");
230 SYSCTL_PROC(_net_utun, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
231     &if_utun_tx_fsw_ring_size, UTUN_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_utun_tx_fsw_ring_size, "I", "");
232 SYSCTL_PROC(_net_utun, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
233     &if_utun_rx_fsw_ring_size, UTUN_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_utun_rx_fsw_ring_size, "I", "");
234 
235 static errno_t
236 utun_register_nexus(void);
237 
238 static errno_t
239 utun_netif_prepare(__unused kern_nexus_t nexus, ifnet_t ifp);
240 static errno_t
241 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
242     proc_t p, kern_nexus_t nexus,
243     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx);
244 static errno_t
245 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
246     kern_channel_t channel);
247 static void
248 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
249     kern_channel_t channel);
250 static void
251 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
252     kern_channel_t channel);
253 static void
254 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
255     kern_channel_t channel);
256 static errno_t
257 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
258     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
259     void **ring_ctx);
260 static void
261 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
262     kern_channel_ring_t ring);
263 static errno_t
264 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
265     kern_channel_ring_t ring, uint32_t flags);
266 static errno_t
267 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
268     kern_channel_ring_t ring, uint32_t flags);
269 #endif // UTUN_NEXUS
270 
271 #define UTUN_DEFAULT_MTU 1500
272 #define UTUN_HEADER_SIZE(_pcb) (sizeof(u_int32_t) + (((_pcb)->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) ? sizeof(uuid_t) : 0))
273 
274 static kern_ctl_ref     utun_kctlref;
275 static LCK_ATTR_DECLARE(utun_lck_attr, 0, 0);
276 static LCK_GRP_DECLARE(utun_lck_grp, "utun");
277 static LCK_MTX_DECLARE_ATTR(utun_lock, &utun_lck_grp, &utun_lck_attr);
278 
279 TAILQ_HEAD(utun_list, utun_pcb) utun_head;
280 
281 static ZONE_DEFINE(utun_pcb_zone, "net.if_utun",
282     sizeof(struct utun_pcb), ZC_ZFREE_CLEARMEM);
283 
284 #if UTUN_NEXUS
285 
286 static int
287 sysctl_if_utun_ring_size SYSCTL_HANDLER_ARGS
288 {
289 #pragma unused(arg1, arg2)
290 	int value = if_utun_ring_size;
291 
292 	int error = sysctl_handle_int(oidp, &value, 0, req);
293 	if (error || !req->newptr) {
294 		return error;
295 	}
296 
297 	if (value < UTUN_IF_MIN_RING_SIZE ||
298 	    value > UTUN_IF_MAX_RING_SIZE) {
299 		return EINVAL;
300 	}
301 
302 	if_utun_ring_size = value;
303 
304 	return 0;
305 }
306 
307 static int
308 sysctl_if_utun_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
309 {
310 #pragma unused(arg1, arg2)
311 	int value = if_utun_tx_fsw_ring_size;
312 
313 	int error = sysctl_handle_int(oidp, &value, 0, req);
314 	if (error || !req->newptr) {
315 		return error;
316 	}
317 
318 	if (value < UTUN_IF_MIN_RING_SIZE ||
319 	    value > UTUN_IF_MAX_RING_SIZE) {
320 		return EINVAL;
321 	}
322 
323 	if_utun_tx_fsw_ring_size = value;
324 
325 	return 0;
326 }
327 
328 static int
329 sysctl_if_utun_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
330 {
331 #pragma unused(arg1, arg2)
332 	int value = if_utun_rx_fsw_ring_size;
333 
334 	int error = sysctl_handle_int(oidp, &value, 0, req);
335 	if (error || !req->newptr) {
336 		return error;
337 	}
338 
339 	if (value < UTUN_IF_MIN_RING_SIZE ||
340 	    value > UTUN_IF_MAX_RING_SIZE) {
341 		return EINVAL;
342 	}
343 
344 	if_utun_rx_fsw_ring_size = value;
345 
346 	return 0;
347 }
348 
349 static errno_t
utun_netif_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)350 utun_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
351     kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
352     void **ring_ctx)
353 {
354 #pragma unused(nxprov)
355 #pragma unused(channel)
356 #pragma unused(ring_ctx)
357 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
358 	if (!is_tx_ring) {
359 		VERIFY(pcb->utun_netif_rxring == NULL);
360 		pcb->utun_netif_rxring = ring;
361 	} else {
362 		VERIFY(pcb->utun_netif_txring == NULL);
363 		pcb->utun_netif_txring = ring;
364 	}
365 	return 0;
366 }
367 
368 static void
utun_netif_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)369 utun_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
370     kern_channel_ring_t ring)
371 {
372 #pragma unused(nxprov)
373 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
374 	if (pcb->utun_netif_rxring == ring) {
375 		pcb->utun_netif_rxring = NULL;
376 	} else if (pcb->utun_netif_txring == ring) {
377 		pcb->utun_netif_txring = NULL;
378 	}
379 }
380 
381 static errno_t
utun_netif_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)382 utun_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
383     kern_channel_ring_t tx_ring, uint32_t flags)
384 {
385 #pragma unused(nxprov)
386 #pragma unused(flags)
387 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
388 
389 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
390 
391 	if (!utun_data_move_begin(pcb)) {
392 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
393 		    __func__, if_name(pcb->utun_ifp));
394 		return 0;
395 	}
396 
397 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
398 
399 	struct kern_channel_ring_stat_increment tx_ring_stats;
400 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
401 	kern_channel_slot_t tx_pslot = NULL;
402 	kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
403 	kern_packet_t tx_chain_ph = 0;
404 
405 	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
406 
407 	if (tx_slot == NULL) {
408 		// Nothing to write, don't bother signalling
409 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
410 		utun_data_move_end(pcb);
411 		return 0;
412 	}
413 
414 	if (pcb->utun_kpipe_enabled) {
415 		kern_channel_ring_t rx_ring = pcb->utun_kpipe_rxring;
416 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
417 
418 		// Signal the kernel pipe ring to read
419 		if (rx_ring != NULL) {
420 			kern_channel_notify(rx_ring, 0);
421 		}
422 		utun_data_move_end(pcb);
423 		return 0;
424 	}
425 
426 	// If we're here, we're injecting into the utun kernel control socket
427 	while (tx_slot != NULL) {
428 		size_t length = 0;
429 		mbuf_t data = NULL;
430 
431 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
432 
433 		if (tx_ph == 0) {
434 			// Advance TX ring
435 			tx_pslot = tx_slot;
436 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
437 			continue;
438 		}
439 		(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
440 		if (tx_chain_ph != 0) {
441 			kern_packet_append(tx_ph, tx_chain_ph);
442 		}
443 		tx_chain_ph = tx_ph;
444 
445 		// Advance TX ring
446 		tx_pslot = tx_slot;
447 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
448 
449 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
450 		VERIFY(tx_buf != NULL);
451 
452 		/* tx_baddr is the absolute buffer address */
453 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
454 		VERIFY(tx_baddr != 0);
455 
456 		bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
457 
458 		uint16_t tx_offset = kern_buflet_get_data_offset(tx_buf);
459 		uint32_t tx_length = kern_buflet_get_data_length(tx_buf);
460 
461 		// The offset must be large enough for the headers
462 		VERIFY(tx_offset >= UTUN_HEADER_SIZE(pcb));
463 
464 		// Find family
465 		uint32_t af = 0;
466 		uint8_t vhl = *(uint8_t *)(tx_baddr + tx_offset);
467 		u_int ip_version = (vhl >> 4);
468 		switch (ip_version) {
469 		case 4: {
470 			af = AF_INET;
471 			break;
472 		}
473 		case 6: {
474 			af = AF_INET6;
475 			break;
476 		}
477 		default: {
478 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s: unknown ip version %u vhl %u tx_offset %u len %u header_size %zu\n",
479 			    pcb->utun_ifp->if_xname, ip_version, vhl, tx_offset, tx_length,
480 			    UTUN_HEADER_SIZE(pcb));
481 			break;
482 		}
483 		}
484 
485 		tx_offset -= UTUN_HEADER_SIZE(pcb);
486 		tx_length += UTUN_HEADER_SIZE(pcb);
487 		tx_baddr += tx_offset;
488 
489 		length = MIN(tx_length, pcb->utun_slot_size);
490 
491 		// Copy in family
492 		memcpy(tx_baddr, &af, sizeof(af));
493 		if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
494 			kern_packet_get_euuid(tx_ph, (void *)(tx_baddr + sizeof(af)));
495 		}
496 
497 		if (length > 0) {
498 			errno_t error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &data);
499 			if (error == 0) {
500 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_DONTWAIT);
501 				if (error == 0) {
502 					error = utun_output(pcb->utun_ifp, data);
503 					if (error != 0) {
504 						os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - utun_output error %d\n", pcb->utun_ifp->if_xname, error);
505 					}
506 				} else {
507 					os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->utun_ifp->if_xname, length, error);
508 					STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
509 					STATS_INC(nifs, NETIF_STATS_DROP);
510 					mbuf_freem(data);
511 					data = NULL;
512 				}
513 			} else {
514 				os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->utun_ifp->if_xname, error);
515 				STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
516 				STATS_INC(nifs, NETIF_STATS_DROP);
517 			}
518 		} else {
519 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_tx %s - 0 length packet\n", pcb->utun_ifp->if_xname);
520 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
521 			STATS_INC(nifs, NETIF_STATS_DROP);
522 		}
523 
524 		if (data == NULL) {
525 			continue;
526 		}
527 
528 		STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
529 		STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
530 
531 		tx_ring_stats.kcrsi_slots_transferred++;
532 		tx_ring_stats.kcrsi_bytes_transferred += length;
533 	}
534 	if (tx_chain_ph != 0) {
535 		kern_pbufpool_free_chain(tx_ring->ckr_pp, tx_chain_ph);
536 	}
537 	if (tx_pslot) {
538 		kern_channel_advance_slot(tx_ring, tx_pslot);
539 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
540 		(void)kern_channel_reclaim(tx_ring);
541 	}
542 
543 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
544 	utun_data_move_end(pcb);
545 	return 0;
546 }
547 
548 static errno_t
utun_netif_tx_doorbell(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring,__unused uint32_t flags)549 utun_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
550     kern_channel_ring_t ring, __unused uint32_t flags)
551 {
552 #pragma unused(nxprov)
553 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
554 	boolean_t more = false;
555 	errno_t rc = 0;
556 
557 	if (!utun_data_move_begin(pcb)) {
558 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
559 		    __func__, if_name(pcb->utun_ifp));
560 		return 0;
561 	}
562 
563 	/*
564 	 * Refill and sync the ring; we may be racing against another thread doing
565 	 * an RX sync that also wants to do kr_enter(), and so use the blocking
566 	 * variant here.
567 	 */
568 	rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
569 	if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
570 		os_log_error(OS_LOG_DEFAULT, "%s, tx refill failed %d\n", __func__, rc);
571 	}
572 
573 	(void) kr_enter(ring, TRUE);
574 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
575 
576 	if (pcb->utun_kpipe_enabled) {
577 		uint32_t tx_available = kern_channel_available_slot_count(ring);
578 		if (pcb->utun_netif_txring_size > 0 &&
579 		    tx_available >= pcb->utun_netif_txring_size - 1) {
580 			// No room left in tx ring, disable output for now
581 			errno_t error = ifnet_disable_output(pcb->utun_ifp);
582 			if (error != 0) {
583 				os_log_error(OS_LOG_DEFAULT, "utun_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
584 			}
585 		}
586 	}
587 
588 	if (pcb->utun_kpipe_enabled) {
589 		kern_channel_ring_t rx_ring = pcb->utun_kpipe_rxring;
590 
591 		// Unlock while calling notify
592 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
593 		// Signal the kernel pipe ring to read
594 		if (rx_ring != NULL) {
595 			kern_channel_notify(rx_ring, 0);
596 		}
597 	} else {
598 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
599 	}
600 
601 	kr_exit(ring);
602 	utun_data_move_end(pcb);
603 	return 0;
604 }
605 
606 static errno_t
utun_netif_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)607 utun_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
608     kern_channel_ring_t rx_ring, uint32_t flags)
609 {
610 #pragma unused(nxprov)
611 #pragma unused(flags)
612 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
613 	struct kern_channel_ring_stat_increment rx_ring_stats;
614 
615 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
616 
617 	if (!utun_data_move_begin(pcb)) {
618 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
619 		    __func__, if_name(pcb->utun_ifp));
620 		return 0;
621 	}
622 
623 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
624 
625 	// Reclaim user-released slots
626 	(void) kern_channel_reclaim(rx_ring);
627 
628 	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
629 
630 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
631 	if (avail == 0) {
632 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
633 		utun_data_move_end(pcb);
634 		return 0;
635 	}
636 
637 	struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
638 	VERIFY(rx_pp != NULL);
639 	bzero(&rx_ring_stats, sizeof(rx_ring_stats));
640 	kern_channel_slot_t rx_pslot = NULL;
641 	kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
642 
643 	while (rx_slot != NULL) {
644 		// Check for a waiting packet
645 		lck_mtx_lock(&pcb->utun_input_chain_lock);
646 		mbuf_t data = pcb->utun_input_chain;
647 		if (data == NULL) {
648 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
649 			break;
650 		}
651 
652 		// Allocate rx packet
653 		kern_packet_t rx_ph = 0;
654 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
655 		if (__improbable(error != 0)) {
656 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
657 			STATS_INC(nifs, NETIF_STATS_DROP);
658 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
659 			break;
660 		}
661 
662 		// Advance waiting packets
663 		if (pcb->utun_input_chain_count > 0) {
664 			pcb->utun_input_chain_count--;
665 		}
666 		pcb->utun_input_chain = data->m_nextpkt;
667 		data->m_nextpkt = NULL;
668 		if (pcb->utun_input_chain == NULL) {
669 			pcb->utun_input_chain_last = NULL;
670 		}
671 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
672 
673 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
674 		size_t length = mbuf_pkthdr_len(data);
675 
676 		if (length < header_offset) {
677 			// mbuf is too small
678 			mbuf_freem(data);
679 			kern_pbufpool_free(rx_pp, rx_ph);
680 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
681 			STATS_INC(nifs, NETIF_STATS_DROP);
682 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length too short for header %zu < %zu\n",
683 			    pcb->utun_ifp->if_xname, length, header_offset);
684 			continue;
685 		}
686 
687 		length -= header_offset;
688 		if (length > PP_BUF_SIZE_DEF(rx_pp)) {
689 			// Flush data
690 			mbuf_freem(data);
691 			kern_pbufpool_free(rx_pp, rx_ph);
692 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
693 			STATS_INC(nifs, NETIF_STATS_DROP);
694 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: legacy packet length %zu > %u\n",
695 			    pcb->utun_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
696 			continue;
697 		}
698 
699 		mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
700 
701 		// Fillout rx packet
702 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
703 		VERIFY(rx_buf != NULL);
704 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
705 		VERIFY(rx_baddr != NULL);
706 
707 		// Copy-in data from mbuf to buflet
708 		mbuf_copydata(data, header_offset, length, (void *)rx_baddr);
709 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
710 
711 		// Finalize and attach the packet
712 		error = kern_buflet_set_data_offset(rx_buf, 0);
713 		VERIFY(error == 0);
714 		error = kern_buflet_set_data_length(rx_buf, length);
715 		VERIFY(error == 0);
716 		error = kern_packet_set_headroom(rx_ph, 0);
717 		VERIFY(error == 0);
718 		error = kern_packet_finalize(rx_ph);
719 		VERIFY(error == 0);
720 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
721 		VERIFY(error == 0);
722 
723 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
724 		STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
725 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
726 
727 		rx_ring_stats.kcrsi_slots_transferred++;
728 		rx_ring_stats.kcrsi_bytes_transferred += length;
729 
730 		mbuf_freem(data);
731 
732 		// Advance ring
733 		rx_pslot = rx_slot;
734 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
735 	}
736 
737 	struct kern_channel_ring_stat_increment tx_ring_stats;
738 	bzero(&tx_ring_stats, sizeof(tx_ring_stats));
739 	kern_channel_ring_t tx_ring = pcb->utun_kpipe_txring;
740 	kern_channel_slot_t tx_pslot = NULL;
741 	kern_channel_slot_t tx_slot = NULL;
742 	if (tx_ring == NULL) {
743 		// Net-If TX ring not set up yet, nothing to read
744 		goto done;
745 	}
746 	// Unlock utun before entering ring
747 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
748 
749 	(void)kr_enter(tx_ring, TRUE);
750 
751 	// Lock again after entering and validate
752 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
753 	if (tx_ring != pcb->utun_kpipe_txring) {
754 		goto done;
755 	}
756 
757 	tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
758 	if (tx_slot == NULL) {
759 		// Nothing to read, don't bother signalling
760 		goto done;
761 	}
762 
763 	while (rx_slot != NULL && tx_slot != NULL) {
764 		// Allocate rx packet
765 		kern_packet_t rx_ph = 0;
766 		kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
767 
768 		// Advance TX ring
769 		tx_pslot = tx_slot;
770 		tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
771 
772 		/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
773 		if (tx_ph == 0) {
774 			continue;
775 		}
776 
777 		/* XXX We could try this alloc before advancing the slot to avoid
778 		 * dropping the packet on failure to allocate.
779 		 */
780 		errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
781 		if (__improbable(error != 0)) {
782 			STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
783 			STATS_INC(nifs, NETIF_STATS_DROP);
784 			break;
785 		}
786 
787 		kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
788 		VERIFY(tx_buf != NULL);
789 		uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
790 		VERIFY(tx_baddr != 0);
791 		tx_baddr += kern_buflet_get_data_offset(tx_buf);
792 
793 		// Check packet length
794 		size_t header_offset = UTUN_HEADER_SIZE(pcb);
795 		uint32_t tx_length = kern_packet_get_data_length(tx_ph);
796 		if (tx_length < header_offset) {
797 			// Packet is too small
798 			kern_pbufpool_free(rx_pp, rx_ph);
799 			STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
800 			STATS_INC(nifs, NETIF_STATS_DROP);
801 			os_log_error(OS_LOG_DEFAULT, "utun_netif_sync_rx %s: packet length too short for header %u < %zu\n",
802 			    pcb->utun_ifp->if_xname, tx_length, header_offset);
803 			continue;
804 		}
805 
806 		size_t length = MIN(tx_length - header_offset,
807 		    pcb->utun_slot_size);
808 
809 		tx_ring_stats.kcrsi_slots_transferred++;
810 		tx_ring_stats.kcrsi_bytes_transferred += length;
811 
812 		// Fillout rx packet
813 		kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
814 		VERIFY(rx_buf != NULL);
815 		void *rx_baddr = kern_buflet_get_data_address(rx_buf);
816 		VERIFY(rx_baddr != NULL);
817 
818 		// Copy-in data from tx to rx
819 		memcpy((void *)rx_baddr, (void *)(tx_baddr + header_offset), length);
820 		kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
821 
822 		// Finalize and attach the packet
823 		error = kern_buflet_set_data_offset(rx_buf, 0);
824 		VERIFY(error == 0);
825 		error = kern_buflet_set_data_length(rx_buf, length);
826 		VERIFY(error == 0);
827 		error = kern_packet_set_headroom(rx_ph, 0);
828 		VERIFY(error == 0);
829 		error = kern_packet_finalize(rx_ph);
830 		VERIFY(error == 0);
831 		error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
832 		VERIFY(error == 0);
833 
834 		STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
835 		STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
836 		bpf_tap_packet_in(pcb->utun_ifp, DLT_RAW, rx_ph, NULL, 0);
837 
838 		rx_ring_stats.kcrsi_slots_transferred++;
839 		rx_ring_stats.kcrsi_bytes_transferred += length;
840 
841 		rx_pslot = rx_slot;
842 		rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
843 	}
844 
845 done:
846 	if (rx_pslot) {
847 		kern_channel_advance_slot(rx_ring, rx_pslot);
848 		kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
849 	}
850 
851 	if (tx_pslot) {
852 		kern_channel_advance_slot(tx_ring, tx_pslot);
853 		kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
854 		(void)kern_channel_reclaim(tx_ring);
855 	}
856 
857 	// Unlock first, then exit ring
858 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
859 	if (tx_ring != NULL) {
860 		if (tx_pslot != NULL) {
861 			kern_channel_notify(tx_ring, 0);
862 		}
863 		kr_exit(tx_ring);
864 	}
865 
866 	utun_data_move_end(pcb);
867 	return 0;
868 }
869 
870 static errno_t
utun_nexus_ifattach(struct utun_pcb * pcb,struct ifnet_init_eparams * init_params,struct ifnet ** ifp)871 utun_nexus_ifattach(struct utun_pcb *pcb,
872     struct ifnet_init_eparams *init_params,
873     struct ifnet **ifp)
874 {
875 	errno_t err;
876 	nexus_controller_t controller = kern_nexus_shared_controller();
877 	struct kern_nexus_net_init net_init;
878 	struct kern_pbufpool_init pp_init;
879 
880 	nexus_name_t provider_name;
881 	snprintf((char *)provider_name, sizeof(provider_name),
882 	    "com.apple.netif.%s", pcb->utun_if_xname);
883 
884 	struct kern_nexus_provider_init prov_init = {
885 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
886 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
887 		.nxpi_pre_connect = utun_nexus_pre_connect,
888 		.nxpi_connected = utun_nexus_connected,
889 		.nxpi_pre_disconnect = utun_netif_pre_disconnect,
890 		.nxpi_disconnected = utun_nexus_disconnected,
891 		.nxpi_ring_init = utun_netif_ring_init,
892 		.nxpi_ring_fini = utun_netif_ring_fini,
893 		.nxpi_slot_init = NULL,
894 		.nxpi_slot_fini = NULL,
895 		.nxpi_sync_tx = utun_netif_sync_tx,
896 		.nxpi_sync_rx = utun_netif_sync_rx,
897 		.nxpi_tx_doorbell = utun_netif_tx_doorbell,
898 	};
899 
900 	nexus_attr_t nxa = NULL;
901 	err = kern_nexus_attr_create(&nxa);
902 	if (err != 0) {
903 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
904 		    __func__, err);
905 		goto failed;
906 	}
907 
908 	uint64_t slot_buffer_size = pcb->utun_slot_size;
909 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
910 	VERIFY(err == 0);
911 
912 	// Reset ring size for netif nexus to limit memory usage
913 	uint64_t ring_size = pcb->utun_netif_ring_size;
914 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
915 	VERIFY(err == 0);
916 	err = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
917 	VERIFY(err == 0);
918 
919 	pcb->utun_netif_txring_size = ring_size;
920 
921 	bzero(&pp_init, sizeof(pp_init));
922 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
923 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
924 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
925 	pp_init.kbi_bufsize = pcb->utun_slot_size;
926 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
927 	pp_init.kbi_max_frags = 1;
928 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
929 	    "%s", provider_name);
930 	pp_init.kbi_ctx = NULL;
931 	pp_init.kbi_ctx_retain = NULL;
932 	pp_init.kbi_ctx_release = NULL;
933 
934 	err = kern_pbufpool_create(&pp_init, &pcb->utun_netif_pp, NULL);
935 	if (err != 0) {
936 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
937 		goto failed;
938 	}
939 
940 	err = kern_nexus_controller_register_provider(controller,
941 	    utun_nx_dom_prov,
942 	    provider_name,
943 	    &prov_init,
944 	    sizeof(prov_init),
945 	    nxa,
946 	    &pcb->utun_nx.if_provider);
947 	if (err != 0) {
948 		os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
949 		    __func__, err);
950 		goto failed;
951 	}
952 
953 	bzero(&net_init, sizeof(net_init));
954 	net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
955 	net_init.nxneti_flags = 0;
956 	net_init.nxneti_eparams = init_params;
957 	net_init.nxneti_lladdr = NULL;
958 	net_init.nxneti_prepare = utun_netif_prepare;
959 	net_init.nxneti_rx_pbufpool = pcb->utun_netif_pp;
960 	net_init.nxneti_tx_pbufpool = pcb->utun_netif_pp;
961 	err = kern_nexus_controller_alloc_net_provider_instance(controller,
962 	    pcb->utun_nx.if_provider,
963 	    pcb,
964 	    NULL,
965 	    &pcb->utun_nx.if_instance,
966 	    &net_init,
967 	    ifp);
968 	if (err != 0) {
969 		os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
970 		    __func__, err);
971 		kern_nexus_controller_deregister_provider(controller,
972 		    pcb->utun_nx.if_provider);
973 		uuid_clear(pcb->utun_nx.if_provider);
974 		goto failed;
975 	}
976 
977 failed:
978 	if (nxa) {
979 		kern_nexus_attr_destroy(nxa);
980 	}
981 	if (err && pcb->utun_netif_pp != NULL) {
982 		kern_pbufpool_destroy(pcb->utun_netif_pp);
983 		pcb->utun_netif_pp = NULL;
984 	}
985 	return err;
986 }
987 
988 static void
utun_detach_provider_and_instance(uuid_t provider,uuid_t instance)989 utun_detach_provider_and_instance(uuid_t provider, uuid_t instance)
990 {
991 	nexus_controller_t controller = kern_nexus_shared_controller();
992 	errno_t err;
993 
994 	if (!uuid_is_null(instance)) {
995 		err = kern_nexus_controller_free_provider_instance(controller,
996 		    instance);
997 		if (err != 0) {
998 			os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
999 			    __func__, err);
1000 		}
1001 		uuid_clear(instance);
1002 	}
1003 	if (!uuid_is_null(provider)) {
1004 		err = kern_nexus_controller_deregister_provider(controller,
1005 		    provider);
1006 		if (err != 0) {
1007 			os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
1008 		}
1009 		uuid_clear(provider);
1010 	}
1011 	return;
1012 }
1013 
1014 static void
utun_nexus_detach(struct utun_pcb * pcb)1015 utun_nexus_detach(struct utun_pcb *pcb)
1016 {
1017 	utun_nx_t nx = &pcb->utun_nx;
1018 	nexus_controller_t controller = kern_nexus_shared_controller();
1019 	errno_t err;
1020 
1021 	if (!uuid_is_null(nx->fsw_device)) {
1022 		err = kern_nexus_ifdetach(controller,
1023 		    nx->fsw_instance,
1024 		    nx->fsw_device);
1025 		if (err != 0) {
1026 			os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
1027 			    __func__, err);
1028 		}
1029 	}
1030 
1031 	utun_detach_provider_and_instance(nx->fsw_provider,
1032 	    nx->fsw_instance);
1033 	utun_detach_provider_and_instance(nx->if_provider,
1034 	    nx->if_instance);
1035 
1036 	if (pcb->utun_netif_pp != NULL) {
1037 		kern_pbufpool_destroy(pcb->utun_netif_pp);
1038 		pcb->utun_netif_pp = NULL;
1039 	}
1040 	memset(nx, 0, sizeof(*nx));
1041 }
1042 
1043 static errno_t
utun_create_fs_provider_and_instance(struct utun_pcb * pcb,const char * type_name,const char * ifname,uuid_t * provider,uuid_t * instance)1044 utun_create_fs_provider_and_instance(struct utun_pcb *pcb,
1045     const char *type_name,
1046     const char *ifname,
1047     uuid_t *provider, uuid_t *instance)
1048 {
1049 	nexus_attr_t attr = NULL;
1050 	nexus_controller_t controller = kern_nexus_shared_controller();
1051 	uuid_t dom_prov;
1052 	errno_t err;
1053 	struct kern_nexus_init init;
1054 	nexus_name_t    provider_name;
1055 
1056 	err = kern_nexus_get_default_domain_provider(NEXUS_TYPE_FLOW_SWITCH,
1057 	    &dom_prov);
1058 	if (err != 0) {
1059 		os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
1060 		    __func__, type_name, err);
1061 		goto failed;
1062 	}
1063 
1064 	err = kern_nexus_attr_create(&attr);
1065 	if (err != 0) {
1066 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1067 		    __func__, err);
1068 		goto failed;
1069 	}
1070 
1071 	uint64_t slot_buffer_size = pcb->utun_slot_size;
1072 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1073 	VERIFY(err == 0);
1074 
1075 	// Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
1076 	uint64_t tx_ring_size = pcb->utun_tx_fsw_ring_size;
1077 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, tx_ring_size);
1078 	VERIFY(err == 0);
1079 	uint64_t rx_ring_size = pcb->utun_rx_fsw_ring_size;
1080 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, rx_ring_size);
1081 	VERIFY(err == 0);
1082 	/*
1083 	 * Configure flowswitch to use super-packet (multi-buflet).
1084 	 * This allows flowswitch to perform intra-stack packet aggregation.
1085 	 */
1086 	err = kern_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS,
1087 	    NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
1088 	VERIFY(err == 0);
1089 
1090 	snprintf((char *)provider_name, sizeof(provider_name),
1091 	    "com.apple.%s.%s", type_name, ifname);
1092 	err = kern_nexus_controller_register_provider(controller,
1093 	    dom_prov,
1094 	    provider_name,
1095 	    NULL,
1096 	    0,
1097 	    attr,
1098 	    provider);
1099 	kern_nexus_attr_destroy(attr);
1100 	attr = NULL;
1101 	if (err != 0) {
1102 		os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
1103 		    __func__, type_name, err);
1104 		goto failed;
1105 	}
1106 	bzero(&init, sizeof(init));
1107 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1108 	err = kern_nexus_controller_alloc_provider_instance(controller,
1109 	    *provider,
1110 	    NULL, NULL,
1111 	    instance, &init);
1112 	if (err != 0) {
1113 		os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
1114 		    __func__, type_name, err);
1115 		kern_nexus_controller_deregister_provider(controller,
1116 		    *provider);
1117 		uuid_clear(*provider);
1118 	}
1119 failed:
1120 	return err;
1121 }
1122 
1123 static errno_t
utun_flowswitch_attach(struct utun_pcb * pcb)1124 utun_flowswitch_attach(struct utun_pcb *pcb)
1125 {
1126 	nexus_controller_t controller = kern_nexus_shared_controller();
1127 	errno_t err = 0;
1128 	utun_nx_t nx = &pcb->utun_nx;
1129 
1130 	// Allocate flowswitch
1131 	err = utun_create_fs_provider_and_instance(pcb,
1132 	    "flowswitch",
1133 	    pcb->utun_ifp->if_xname,
1134 	    &nx->fsw_provider,
1135 	    &nx->fsw_instance);
1136 	if (err != 0) {
1137 		os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
1138 		    __func__);
1139 		goto failed;
1140 	}
1141 
1142 	// Attach flowswitch to device port
1143 	err = kern_nexus_ifattach(controller, nx->fsw_instance,
1144 	    NULL, nx->if_instance,
1145 	    FALSE, &nx->fsw_device);
1146 	if (err != 0) {
1147 		os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
1148 		goto failed;
1149 	}
1150 
1151 	// Extract the agent UUID and save for later
1152 	struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
1153 	if (flowswitch_nx != NULL) {
1154 		struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
1155 		if (flowswitch != NULL) {
1156 			FSW_RLOCK(flowswitch);
1157 			uuid_copy(nx->fsw_agent, flowswitch->fsw_agent_uuid);
1158 			FSW_UNLOCK(flowswitch);
1159 		} else {
1160 			os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - flowswitch is NULL\n");
1161 		}
1162 		nx_release(flowswitch_nx);
1163 	} else {
1164 		os_log_error(OS_LOG_DEFAULT, "utun_flowswitch_attach - unable to find flowswitch nexus\n");
1165 	}
1166 
1167 	return 0;
1168 
1169 failed:
1170 	utun_nexus_detach(pcb);
1171 
1172 	errno_t detach_error = 0;
1173 	if ((detach_error = ifnet_detach(pcb->utun_ifp)) != 0) {
1174 		panic("utun_flowswitch_attach - ifnet_detach failed: %d", detach_error);
1175 		/* NOT REACHED */
1176 	}
1177 
1178 	return err;
1179 }
1180 
1181 static errno_t
utun_register_kernel_pipe_nexus(struct utun_pcb * pcb)1182 utun_register_kernel_pipe_nexus(struct utun_pcb *pcb)
1183 {
1184 	nexus_attr_t nxa = NULL;
1185 	errno_t result;
1186 
1187 	lck_mtx_lock(&utun_lock);
1188 	if (utun_ncd_refcount++) {
1189 		lck_mtx_unlock(&utun_lock);
1190 		return 0;
1191 	}
1192 
1193 	result = kern_nexus_controller_create(&utun_ncd);
1194 	if (result) {
1195 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
1196 		    __FUNCTION__, result);
1197 		goto done;
1198 	}
1199 
1200 	uuid_t dom_prov;
1201 	result = kern_nexus_get_default_domain_provider(
1202 		NEXUS_TYPE_KERNEL_PIPE, &dom_prov);
1203 	if (result) {
1204 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
1205 		    __FUNCTION__, result);
1206 		goto done;
1207 	}
1208 
1209 	struct kern_nexus_provider_init prov_init = {
1210 		.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1211 		.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
1212 		.nxpi_pre_connect = utun_nexus_pre_connect,
1213 		.nxpi_connected = utun_nexus_connected,
1214 		.nxpi_pre_disconnect = utun_nexus_pre_disconnect,
1215 		.nxpi_disconnected = utun_nexus_disconnected,
1216 		.nxpi_ring_init = utun_kpipe_ring_init,
1217 		.nxpi_ring_fini = utun_kpipe_ring_fini,
1218 		.nxpi_slot_init = NULL,
1219 		.nxpi_slot_fini = NULL,
1220 		.nxpi_sync_tx = utun_kpipe_sync_tx,
1221 		.nxpi_sync_rx = utun_kpipe_sync_rx,
1222 		.nxpi_tx_doorbell = NULL,
1223 	};
1224 
1225 	result = kern_nexus_attr_create(&nxa);
1226 	if (result) {
1227 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
1228 		    __FUNCTION__, result);
1229 		goto done;
1230 	}
1231 
1232 	uint64_t slot_buffer_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1233 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_SLOT_BUF_SIZE, slot_buffer_size);
1234 	VERIFY(result == 0);
1235 
1236 	// Reset ring size for kernel pipe nexus to limit memory usage
1237 	uint64_t ring_size =
1238 	    pcb->utun_kpipe_tx_ring_size != 0 ? pcb->utun_kpipe_tx_ring_size :
1239 	    if_utun_ring_size;
1240 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_TX_SLOTS, ring_size);
1241 	VERIFY(result == 0);
1242 
1243 	ring_size =
1244 	    pcb->utun_kpipe_rx_ring_size != 0 ? pcb->utun_kpipe_rx_ring_size :
1245 	    if_utun_ring_size;
1246 	result = kern_nexus_attr_set(nxa, NEXUS_ATTR_RX_SLOTS, ring_size);
1247 	VERIFY(result == 0);
1248 
1249 	result = kern_nexus_controller_register_provider(utun_ncd,
1250 	    dom_prov,
1251 	    (const uint8_t *)"com.apple.nexus.utun.kpipe",
1252 	    &prov_init,
1253 	    sizeof(prov_init),
1254 	    nxa,
1255 	    &utun_kpipe_uuid);
1256 	if (result) {
1257 		os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
1258 		    __FUNCTION__, result);
1259 		goto done;
1260 	}
1261 
1262 done:
1263 	if (nxa) {
1264 		kern_nexus_attr_destroy(nxa);
1265 	}
1266 
1267 	if (result) {
1268 		if (utun_ncd) {
1269 			kern_nexus_controller_destroy(utun_ncd);
1270 			utun_ncd = NULL;
1271 		}
1272 		utun_ncd_refcount = 0;
1273 	}
1274 
1275 	lck_mtx_unlock(&utun_lock);
1276 
1277 	return result;
1278 }
1279 
1280 static void
utun_unregister_kernel_pipe_nexus(void)1281 utun_unregister_kernel_pipe_nexus(void)
1282 {
1283 	lck_mtx_lock(&utun_lock);
1284 
1285 	VERIFY(utun_ncd_refcount > 0);
1286 
1287 	if (--utun_ncd_refcount == 0) {
1288 		kern_nexus_controller_destroy(utun_ncd);
1289 		utun_ncd = NULL;
1290 	}
1291 
1292 	lck_mtx_unlock(&utun_lock);
1293 }
1294 
1295 // For use by socket option, not internally
1296 static errno_t
utun_disable_channel(struct utun_pcb * pcb)1297 utun_disable_channel(struct utun_pcb *pcb)
1298 {
1299 	errno_t result;
1300 	int enabled;
1301 	uuid_t uuid;
1302 
1303 	/* Wait until all threads in the data paths are done. */
1304 	utun_wait_data_move_drain(pcb);
1305 
1306 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1307 
1308 	enabled = pcb->utun_kpipe_enabled;
1309 	uuid_copy(uuid, pcb->utun_kpipe_uuid);
1310 
1311 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid) == !enabled);
1312 
1313 	pcb->utun_kpipe_enabled = 0;
1314 	uuid_clear(pcb->utun_kpipe_uuid);
1315 
1316 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1317 
1318 	if (enabled) {
1319 		result = kern_nexus_controller_free_provider_instance(utun_ncd, uuid);
1320 	} else {
1321 		result = ENXIO;
1322 	}
1323 
1324 	if (!result) {
1325 		if (pcb->utun_kpipe_pp != NULL) {
1326 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1327 			pcb->utun_kpipe_pp = NULL;
1328 		}
1329 		utun_unregister_kernel_pipe_nexus();
1330 	}
1331 
1332 	return result;
1333 }
1334 
1335 static errno_t
utun_enable_channel(struct utun_pcb * pcb,struct proc * proc)1336 utun_enable_channel(struct utun_pcb *pcb, struct proc *proc)
1337 {
1338 	struct kern_nexus_init init;
1339 	struct kern_pbufpool_init pp_init;
1340 	errno_t result;
1341 
1342 	kauth_cred_t cred = kauth_cred_get();
1343 	result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, 0);
1344 	if (result) {
1345 		return result;
1346 	}
1347 
1348 	result = utun_register_kernel_pipe_nexus(pcb);
1349 	if (result) {
1350 		return result;
1351 	}
1352 
1353 	VERIFY(utun_ncd);
1354 
1355 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1356 
1357 	if (pcb->utun_kpipe_enabled) {
1358 		result = EEXIST; // return success instead?
1359 		goto done;
1360 	}
1361 
1362 	/*
1363 	 * Make sure we can fit packets in the channel buffers and
1364 	 * Allow an extra 4 bytes for the protocol number header in the channel
1365 	 */
1366 	if (pcb->utun_ifp->if_mtu + UTUN_HEADER_SIZE(pcb) > pcb->utun_slot_size) {
1367 		result = EOPNOTSUPP;
1368 		goto done;
1369 	}
1370 
1371 	bzero(&pp_init, sizeof(pp_init));
1372 	pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
1373 	pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
1374 	pp_init.kbi_packets = pcb->utun_netif_ring_size * 2;
1375 	pp_init.kbi_bufsize = pcb->utun_slot_size;
1376 	pp_init.kbi_buf_seg_size = UTUN_IF_DEFAULT_BUF_SEG_SIZE;
1377 	pp_init.kbi_max_frags = 1;
1378 	pp_init.kbi_flags |= KBIF_QUANTUM;
1379 	(void) snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
1380 	    "com.apple.kpipe.%s", pcb->utun_if_xname);
1381 	pp_init.kbi_ctx = NULL;
1382 	pp_init.kbi_ctx_retain = NULL;
1383 	pp_init.kbi_ctx_release = NULL;
1384 
1385 	result = kern_pbufpool_create(&pp_init, &pcb->utun_kpipe_pp,
1386 	    NULL);
1387 	if (result != 0) {
1388 		os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, result);
1389 		goto done;
1390 	}
1391 
1392 	VERIFY(uuid_is_null(pcb->utun_kpipe_uuid));
1393 	bzero(&init, sizeof(init));
1394 	init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
1395 	init.nxi_tx_pbufpool = pcb->utun_kpipe_pp;
1396 	result = kern_nexus_controller_alloc_provider_instance(utun_ncd,
1397 	    utun_kpipe_uuid, pcb, NULL, &pcb->utun_kpipe_uuid, &init);
1398 	if (result) {
1399 		goto done;
1400 	}
1401 
1402 	nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
1403 	result = kern_nexus_controller_bind_provider_instance(utun_ncd,
1404 	    pcb->utun_kpipe_uuid, &port,
1405 	    proc_pid(proc), NULL, NULL, 0, NEXUS_BIND_PID);
1406 	if (result) {
1407 		kern_nexus_controller_free_provider_instance(utun_ncd,
1408 		    pcb->utun_kpipe_uuid);
1409 		uuid_clear(pcb->utun_kpipe_uuid);
1410 		goto done;
1411 	}
1412 
1413 	pcb->utun_kpipe_enabled = 1;
1414 
1415 done:
1416 	lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1417 
1418 	if (result) {
1419 		if (pcb->utun_kpipe_pp != NULL) {
1420 			kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1421 			pcb->utun_kpipe_pp = NULL;
1422 		}
1423 		utun_unregister_kernel_pipe_nexus();
1424 	}
1425 
1426 	return result;
1427 }
1428 
1429 #endif // UTUN_NEXUS
1430 
1431 errno_t
utun_register_control(void)1432 utun_register_control(void)
1433 {
1434 	struct kern_ctl_reg kern_ctl;
1435 	errno_t result = 0;
1436 
1437 #if UTUN_NEXUS
1438 	utun_register_nexus();
1439 #endif // UTUN_NEXUS
1440 
1441 	TAILQ_INIT(&utun_head);
1442 
1443 	bzero(&kern_ctl, sizeof(kern_ctl));
1444 	strlcpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
1445 	kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
1446 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP | CTL_FLAG_REG_EXTENDED; /* Require root */
1447 	kern_ctl.ctl_sendsize = 512 * 1024;
1448 	kern_ctl.ctl_recvsize = 512 * 1024;
1449 	kern_ctl.ctl_setup = utun_ctl_setup;
1450 	kern_ctl.ctl_bind = utun_ctl_bind;
1451 	kern_ctl.ctl_connect = utun_ctl_connect;
1452 	kern_ctl.ctl_disconnect = utun_ctl_disconnect;
1453 	kern_ctl.ctl_send = utun_ctl_send;
1454 	kern_ctl.ctl_setopt = utun_ctl_setopt;
1455 	kern_ctl.ctl_getopt = utun_ctl_getopt;
1456 	kern_ctl.ctl_rcvd = utun_ctl_rcvd;
1457 
1458 	result = ctl_register(&kern_ctl, &utun_kctlref);
1459 	if (result != 0) {
1460 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - ctl_register failed: %d\n", result);
1461 		return result;
1462 	}
1463 
1464 	/* Register the protocol plumbers */
1465 	if ((result = proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN,
1466 	    utun_attach_proto, NULL)) != 0) {
1467 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_UTUN) failed: %d\n",
1468 		    result);
1469 		ctl_deregister(utun_kctlref);
1470 		return result;
1471 	}
1472 
1473 	/* Register the protocol plumbers */
1474 	if ((result = proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN,
1475 	    utun_attach_proto, NULL)) != 0) {
1476 		proto_unregister_plumber(PF_INET, IFNET_FAMILY_UTUN);
1477 		ctl_deregister(utun_kctlref);
1478 		os_log_error(OS_LOG_DEFAULT, "utun_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_UTUN) failed: %d\n",
1479 		    result);
1480 		return result;
1481 	}
1482 
1483 	return 0;
1484 }
1485 
1486 /* Kernel control functions */
1487 
1488 static inline int
utun_find_by_unit(u_int32_t unit)1489 utun_find_by_unit(u_int32_t unit)
1490 {
1491 	struct utun_pcb *next_pcb = NULL;
1492 	int found = 0;
1493 
1494 	TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1495 		if (next_pcb->utun_unit == unit) {
1496 			found = 1;
1497 			break;
1498 		}
1499 	}
1500 
1501 	return found;
1502 }
1503 
1504 static inline void
utun_free_pcb(struct utun_pcb * pcb,bool locked)1505 utun_free_pcb(struct utun_pcb *pcb, bool locked)
1506 {
1507 #if UTUN_NEXUS
1508 	mbuf_freem_list(pcb->utun_input_chain);
1509 	pcb->utun_input_chain_count = 0;
1510 	lck_mtx_destroy(&pcb->utun_input_chain_lock, &utun_lck_grp);
1511 	lck_mtx_destroy(&pcb->utun_pcb_data_move_lock, &utun_lck_grp);
1512 #endif // UTUN_NEXUS
1513 	lck_rw_destroy(&pcb->utun_pcb_lock, &utun_lck_grp);
1514 	if (!locked) {
1515 		lck_mtx_lock(&utun_lock);
1516 	}
1517 	TAILQ_REMOVE(&utun_head, pcb, utun_chain);
1518 	if (!locked) {
1519 		lck_mtx_unlock(&utun_lock);
1520 	}
1521 	zfree(utun_pcb_zone, pcb);
1522 }
1523 
1524 static errno_t
utun_ctl_setup(u_int32_t * unit,void ** unitinfo)1525 utun_ctl_setup(u_int32_t *unit, void **unitinfo)
1526 {
1527 	if (unit == NULL || unitinfo == NULL) {
1528 		return EINVAL;
1529 	}
1530 
1531 	lck_mtx_lock(&utun_lock);
1532 
1533 	/* Find next available unit */
1534 	if (*unit == 0) {
1535 		*unit = 1;
1536 		while (*unit != ctl_maxunit) {
1537 			if (utun_find_by_unit(*unit)) {
1538 				(*unit)++;
1539 			} else {
1540 				break;
1541 			}
1542 		}
1543 		if (*unit == ctl_maxunit) {
1544 			lck_mtx_unlock(&utun_lock);
1545 			return EBUSY;
1546 		}
1547 	} else if (utun_find_by_unit(*unit)) {
1548 		lck_mtx_unlock(&utun_lock);
1549 		return EBUSY;
1550 	}
1551 
1552 	/* Find some open interface id */
1553 	u_int32_t chosen_unique_id = 1;
1554 	struct utun_pcb *next_pcb = TAILQ_LAST(&utun_head, utun_list);
1555 	if (next_pcb != NULL) {
1556 		/* List was not empty, add one to the last item */
1557 		chosen_unique_id = next_pcb->utun_unique_id + 1;
1558 		next_pcb = NULL;
1559 
1560 		/*
1561 		 * If this wrapped the id number, start looking at
1562 		 * the front of the list for an unused id.
1563 		 */
1564 		if (chosen_unique_id == 0) {
1565 			/* Find the next unused ID */
1566 			chosen_unique_id = 1;
1567 			TAILQ_FOREACH(next_pcb, &utun_head, utun_chain) {
1568 				if (next_pcb->utun_unique_id > chosen_unique_id) {
1569 					/* We found a gap */
1570 					break;
1571 				}
1572 
1573 				chosen_unique_id = next_pcb->utun_unique_id + 1;
1574 			}
1575 		}
1576 	}
1577 
1578 	struct utun_pcb *pcb = zalloc_flags(utun_pcb_zone, Z_WAITOK | Z_ZERO);
1579 
1580 	*unitinfo = pcb;
1581 	pcb->utun_unit = *unit;
1582 	pcb->utun_unique_id = chosen_unique_id;
1583 
1584 	if (next_pcb != NULL) {
1585 		TAILQ_INSERT_BEFORE(next_pcb, pcb, utun_chain);
1586 	} else {
1587 		TAILQ_INSERT_TAIL(&utun_head, pcb, utun_chain);
1588 	}
1589 
1590 	lck_mtx_unlock(&utun_lock);
1591 
1592 	return 0;
1593 }
1594 
1595 static errno_t
utun_ctl_bind(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1596 utun_ctl_bind(kern_ctl_ref kctlref,
1597     struct sockaddr_ctl *sac,
1598     void **unitinfo)
1599 {
1600 	if (*unitinfo == NULL) {
1601 		u_int32_t unit = 0;
1602 		(void)utun_ctl_setup(&unit, unitinfo);
1603 	}
1604 
1605 	struct utun_pcb *pcb = (struct utun_pcb *)*unitinfo;
1606 	if (pcb == NULL) {
1607 		return EINVAL;
1608 	}
1609 
1610 	pcb->utun_ctlref = kctlref;
1611 	pcb->utun_unit = sac->sc_unit;
1612 	pcb->utun_max_pending_packets = 1;
1613 
1614 #if UTUN_NEXUS
1615 	pcb->utun_use_netif = false;
1616 	pcb->utun_attach_fsw = true;
1617 	pcb->utun_netif_connected = false;
1618 	pcb->utun_slot_size = UTUN_IF_DEFAULT_SLOT_SIZE;
1619 	pcb->utun_netif_ring_size = if_utun_ring_size;
1620 	pcb->utun_tx_fsw_ring_size = if_utun_tx_fsw_ring_size;
1621 	pcb->utun_rx_fsw_ring_size = if_utun_rx_fsw_ring_size;
1622 	pcb->utun_input_chain_count = 0;
1623 	lck_mtx_init(&pcb->utun_input_chain_lock, &utun_lck_grp, &utun_lck_attr);
1624 	lck_mtx_init(&pcb->utun_pcb_data_move_lock,
1625 	    &utun_lck_grp, &utun_lck_attr);
1626 #endif // UTUN_NEXUS
1627 
1628 	lck_rw_init(&pcb->utun_pcb_lock, &utun_lck_grp, &utun_lck_attr);
1629 
1630 	return 0;
1631 }
1632 
1633 static errno_t
utun_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)1634 utun_ctl_connect(kern_ctl_ref kctlref,
1635     struct sockaddr_ctl *sac,
1636     void **unitinfo)
1637 {
1638 	struct ifnet_init_eparams utun_init = {};
1639 	errno_t result = 0;
1640 
1641 	if (*unitinfo == NULL) {
1642 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1643 	}
1644 
1645 	struct utun_pcb *pcb = *unitinfo;
1646 	if (pcb == NULL) {
1647 		return EINVAL;
1648 	}
1649 
1650 	/* Handle case where utun_ctl_setup() was called, but ipsec_ctl_bind() was not */
1651 	if (pcb->utun_ctlref == NULL) {
1652 		(void)utun_ctl_bind(kctlref, sac, unitinfo);
1653 	}
1654 
1655 	snprintf(pcb->utun_if_xname, sizeof(pcb->utun_if_xname), "utun%d", pcb->utun_unit - 1);
1656 	snprintf(pcb->utun_unique_name, sizeof(pcb->utun_unique_name), "utunid%d", pcb->utun_unique_id - 1);
1657 
1658 	/* Create the interface */
1659 	bzero(&utun_init, sizeof(utun_init));
1660 	utun_init.ver = IFNET_INIT_CURRENT_VERSION;
1661 	utun_init.len = sizeof(utun_init);
1662 
1663 #if UTUN_NEXUS
1664 	if (pcb->utun_use_netif) {
1665 		utun_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
1666 		utun_init.tx_headroom = UTUN_IF_HEADROOM_SIZE;
1667 	} else
1668 #endif // UTUN_NEXUS
1669 	{
1670 		utun_init.flags = IFNET_INIT_NX_NOAUTO;
1671 		utun_init.start = utun_start;
1672 		utun_init.framer_extended = utun_framer;
1673 	}
1674 	utun_init.name = "utun";
1675 	utun_init.unit = pcb->utun_unit - 1;
1676 	utun_init.uniqueid = pcb->utun_unique_name;
1677 	utun_init.uniqueid_len = strlen(pcb->utun_unique_name);
1678 	utun_init.family = IFNET_FAMILY_UTUN;
1679 	utun_init.type = IFT_OTHER;
1680 	utun_init.demux = utun_demux;
1681 	utun_init.add_proto = utun_add_proto;
1682 	utun_init.del_proto = utun_del_proto;
1683 	utun_init.softc = pcb;
1684 	utun_init.ioctl = utun_ioctl;
1685 	utun_init.free = utun_detached;
1686 
1687 #if UTUN_NEXUS
1688 	if (pcb->utun_use_netif) {
1689 		result = utun_nexus_ifattach(pcb, &utun_init, &pcb->utun_ifp);
1690 		if (result != 0) {
1691 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_nexus_ifattach failed: %d\n", result);
1692 			utun_free_pcb(pcb, false);
1693 			*unitinfo = NULL;
1694 			return result;
1695 		}
1696 
1697 		if (pcb->utun_attach_fsw) {
1698 			result = utun_flowswitch_attach(pcb);
1699 			if (result != 0) {
1700 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - utun_flowswitch_attach failed: %d\n", result);
1701 				// Do not call utun_free_pcb(). We will be attached already, and will be freed later
1702 				// in utun_detached().
1703 				*unitinfo = NULL;
1704 				return result;
1705 			}
1706 		}
1707 
1708 		/* Attach to bpf */
1709 		bpfattach(pcb->utun_ifp, DLT_RAW, 0);
1710 	} else
1711 #endif // UTUN_NEXUS
1712 	{
1713 		/*
1714 		 * Upon success, this holds an ifnet reference which we will
1715 		 * release via ifnet_release() at final detach time.
1716 		 */
1717 		result = ifnet_allocate_extended(&utun_init, &pcb->utun_ifp);
1718 		if (result != 0) {
1719 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_allocate failed: %d\n", result);
1720 			utun_free_pcb(pcb, false);
1721 			*unitinfo = NULL;
1722 			return result;
1723 		}
1724 
1725 		/* Set flags and additional information. */
1726 		ifnet_set_mtu(pcb->utun_ifp, UTUN_DEFAULT_MTU);
1727 		ifnet_set_flags(pcb->utun_ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
1728 
1729 		/* The interface must generate its own IPv6 LinkLocal address,
1730 		 * if possible following the recommendation of RFC2472 to the 64bit interface ID
1731 		 */
1732 		ifnet_set_eflags(pcb->utun_ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
1733 
1734 		/* Reset the stats in case as the interface may have been recycled */
1735 		struct ifnet_stats_param stats;
1736 		bzero(&stats, sizeof(struct ifnet_stats_param));
1737 		ifnet_set_stat(pcb->utun_ifp, &stats);
1738 
1739 		/* Attach the interface */
1740 		result = ifnet_attach(pcb->utun_ifp, NULL);
1741 		if (result != 0) {
1742 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_connect - ifnet_attach failed: %d\n", result);
1743 			/* Release reference now since attach failed */
1744 			ifnet_release(pcb->utun_ifp);
1745 			utun_free_pcb(pcb, false);
1746 			*unitinfo = NULL;
1747 			return result;
1748 		}
1749 
1750 		/* Attach to bpf */
1751 		bpfattach(pcb->utun_ifp, DLT_NULL, UTUN_HEADER_SIZE(pcb));
1752 
1753 #if UTUN_NEXUS
1754 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
1755 		UTUN_SET_DATA_PATH_READY(pcb);
1756 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
1757 #endif // UTUN_NEXUS
1758 	}
1759 
1760 	/* The interfaces resoures allocated, mark it as running */
1761 	ifnet_set_flags(pcb->utun_ifp, IFF_RUNNING, IFF_RUNNING);
1762 
1763 	return result;
1764 }
1765 
1766 static errno_t
utun_detach_ip(ifnet_t interface,protocol_family_t protocol,socket_t pf_socket)1767 utun_detach_ip(ifnet_t interface,
1768     protocol_family_t protocol,
1769     socket_t pf_socket)
1770 {
1771 	errno_t result = EPROTONOSUPPORT;
1772 
1773 	/* Attempt a detach */
1774 	if (protocol == PF_INET) {
1775 		struct ifreq    ifr;
1776 
1777 		bzero(&ifr, sizeof(ifr));
1778 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1779 		    ifnet_name(interface), ifnet_unit(interface));
1780 
1781 		result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
1782 	} else if (protocol == PF_INET6) {
1783 		struct in6_ifreq        ifr6;
1784 
1785 		bzero(&ifr6, sizeof(ifr6));
1786 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1787 		    ifnet_name(interface), ifnet_unit(interface));
1788 
1789 		result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
1790 	}
1791 
1792 	return result;
1793 }
1794 
1795 static void
utun_remove_address(ifnet_t interface,protocol_family_t protocol,ifaddr_t address,socket_t pf_socket)1796 utun_remove_address(ifnet_t interface,
1797     protocol_family_t protocol,
1798     ifaddr_t address,
1799     socket_t pf_socket)
1800 {
1801 	errno_t result = 0;
1802 
1803 	/* Attempt a detach */
1804 	if (protocol == PF_INET) {
1805 		struct ifreq ifr;
1806 
1807 		bzero(&ifr, sizeof(ifr));
1808 		snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
1809 		    ifnet_name(interface), ifnet_unit(interface));
1810 		result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
1811 		if (result != 0) {
1812 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed: %d", result);
1813 		} else {
1814 			result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
1815 			if (result != 0) {
1816 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR failed: %d", result);
1817 			}
1818 		}
1819 	} else if (protocol == PF_INET6) {
1820 		struct in6_ifreq ifr6;
1821 
1822 		bzero(&ifr6, sizeof(ifr6));
1823 		snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
1824 		    ifnet_name(interface), ifnet_unit(interface));
1825 		result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
1826 		    sizeof(ifr6.ifr_addr));
1827 		if (result != 0) {
1828 			os_log_error(OS_LOG_DEFAULT, "utun_remove_address - ifaddr_address failed (v6): %d",
1829 			    result);
1830 		} else {
1831 			result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
1832 			if (result != 0) {
1833 				os_log_error(OS_LOG_DEFAULT, "utun_remove_address - SIOCDIFADDR_IN6 failed: %d",
1834 				    result);
1835 			}
1836 		}
1837 	}
1838 }
1839 
1840 static void
utun_cleanup_family(ifnet_t interface,protocol_family_t protocol)1841 utun_cleanup_family(ifnet_t interface,
1842     protocol_family_t protocol)
1843 {
1844 	errno_t result = 0;
1845 	socket_t pf_socket = NULL;
1846 	ifaddr_t *addresses = NULL;
1847 	int i;
1848 
1849 	if (protocol != PF_INET && protocol != PF_INET6) {
1850 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - invalid protocol family %d\n", protocol);
1851 		return;
1852 	}
1853 
1854 	/* Create a socket for removing addresses and detaching the protocol */
1855 	result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
1856 	if (result != 0) {
1857 		if (result != EAFNOSUPPORT) {
1858 			os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - failed to create %s socket: %d\n",
1859 			    protocol == PF_INET ? "IP" : "IPv6", result);
1860 		}
1861 		goto cleanup;
1862 	}
1863 
1864 	/* always set SS_PRIV, we want to close and detach regardless */
1865 	sock_setpriv(pf_socket, 1);
1866 
1867 	result = utun_detach_ip(interface, protocol, pf_socket);
1868 	if (result == 0 || result == ENXIO) {
1869 		/* We are done! We either detached or weren't attached. */
1870 		goto cleanup;
1871 	} else if (result != EBUSY) {
1872 		/* Uh, not really sure what happened here... */
1873 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1874 		goto cleanup;
1875 	}
1876 
1877 	/*
1878 	 * At this point, we received an EBUSY error. This means there are
1879 	 * addresses attached. We should detach them and then try again.
1880 	 */
1881 	result = ifnet_get_address_list_family(interface, &addresses, protocol);
1882 	if (result != 0) {
1883 		os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
1884 		    ifnet_name(interface), ifnet_unit(interface),
1885 		    protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
1886 		goto cleanup;
1887 	}
1888 
1889 	for (i = 0; addresses[i] != 0; i++) {
1890 		utun_remove_address(interface, protocol, addresses[i], pf_socket);
1891 	}
1892 	ifnet_free_address_list(addresses);
1893 	addresses = NULL;
1894 
1895 	/*
1896 	 * The addresses should be gone, we should try the remove again.
1897 	 */
1898 	result = utun_detach_ip(interface, protocol, pf_socket);
1899 	if (result != 0 && result != ENXIO) {
1900 		os_log_error(OS_LOG_DEFAULT, "utun_cleanup_family - utun_detach_ip failed: %d\n", result);
1901 	}
1902 
1903 cleanup:
1904 	if (pf_socket != NULL) {
1905 		sock_close(pf_socket);
1906 	}
1907 
1908 	if (addresses != NULL) {
1909 		ifnet_free_address_list(addresses);
1910 	}
1911 }
1912 
1913 static errno_t
utun_ctl_disconnect(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo)1914 utun_ctl_disconnect(__unused kern_ctl_ref kctlref,
1915     __unused u_int32_t unit,
1916     void *unitinfo)
1917 {
1918 	struct utun_pcb *pcb = unitinfo;
1919 	ifnet_t ifp = NULL;
1920 	errno_t result = 0;
1921 
1922 	if (pcb == NULL) {
1923 		return EINVAL;
1924 	}
1925 
1926 #if UTUN_NEXUS
1927 	/* Wait until all threads in the data paths are done. */
1928 	utun_wait_data_move_drain(pcb);
1929 	// Tell the nexus to stop all rings
1930 	if (pcb->utun_netif_nexus != NULL && pcb->utun_netif_connected) {
1931 		kern_nexus_stop(pcb->utun_netif_nexus);
1932 	}
1933 #endif // UTUN_NEXUS
1934 
1935 	lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
1936 
1937 #if UTUN_NEXUS
1938 	uuid_t kpipe_uuid;
1939 	uuid_copy(kpipe_uuid, pcb->utun_kpipe_uuid);
1940 	uuid_clear(pcb->utun_kpipe_uuid);
1941 	pcb->utun_kpipe_enabled = FALSE;
1942 #endif // UTUN_NEXUS
1943 
1944 	pcb->utun_ctlref = NULL;
1945 
1946 	ifp = pcb->utun_ifp;
1947 	if (ifp != NULL) {
1948 #if UTUN_NEXUS
1949 		// Tell the nexus to stop all rings
1950 		if (pcb->utun_netif_nexus != NULL) {
1951 			/*
1952 			 * Quiesce the interface and flush any pending outbound packets.
1953 			 */
1954 			if_down(ifp);
1955 
1956 			/*
1957 			 * Suspend data movement and wait for IO threads to exit.
1958 			 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
1959 			 * do this because utun nexuses are attached/detached separately.
1960 			 */
1961 			ifnet_datamov_suspend_and_drain(ifp);
1962 			if ((result = ifnet_detach(ifp)) != 0) {
1963 				panic("utun_ctl_disconnect - ifnet_detach failed: %d", result);
1964 			}
1965 
1966 			/*
1967 			 * We want to do everything in our power to ensure that the interface
1968 			 * really goes away when the socket is closed. We must remove IP/IPv6
1969 			 * addresses and detach the protocols. Finally, we can remove and
1970 			 * release the interface.
1971 			 */
1972 			utun_cleanup_family(ifp, AF_INET);
1973 			utun_cleanup_family(ifp, AF_INET6);
1974 
1975 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1976 
1977 			if (!uuid_is_null(kpipe_uuid)) {
1978 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
1979 					if (pcb->utun_kpipe_pp != NULL) {
1980 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
1981 						pcb->utun_kpipe_pp = NULL;
1982 					}
1983 					utun_unregister_kernel_pipe_nexus();
1984 				}
1985 			}
1986 			utun_nexus_detach(pcb);
1987 
1988 			/* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
1989 			ifnet_datamov_resume(ifp);
1990 		} else
1991 #endif // UTUN_NEXUS
1992 		{
1993 			lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
1994 
1995 #if UTUN_NEXUS
1996 			if (!uuid_is_null(kpipe_uuid)) {
1997 				if (kern_nexus_controller_free_provider_instance(utun_ncd, kpipe_uuid) == 0) {
1998 					if (pcb->utun_kpipe_pp != NULL) {
1999 						kern_pbufpool_destroy(pcb->utun_kpipe_pp);
2000 						pcb->utun_kpipe_pp = NULL;
2001 					}
2002 					utun_unregister_kernel_pipe_nexus();
2003 				}
2004 			}
2005 #endif // UTUN_NEXUS
2006 
2007 			/*
2008 			 * We want to do everything in our power to ensure that the interface
2009 			 * really goes away when the socket is closed. We must remove IP/IPv6
2010 			 * addresses and detach the protocols. Finally, we can remove and
2011 			 * release the interface.
2012 			 */
2013 			utun_cleanup_family(ifp, AF_INET);
2014 			utun_cleanup_family(ifp, AF_INET6);
2015 
2016 			/*
2017 			 * Detach now; utun_detach() will be called asynchronously once
2018 			 * the I/O reference count drops to 0.  There we will invoke
2019 			 * ifnet_release().
2020 			 */
2021 			if ((result = ifnet_detach(ifp)) != 0) {
2022 				os_log_error(OS_LOG_DEFAULT, "utun_ctl_disconnect - ifnet_detach failed: %d\n", result);
2023 			}
2024 		}
2025 	} else {
2026 		// Bound, but not connected
2027 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2028 		utun_free_pcb(pcb, false);
2029 	}
2030 
2031 	return 0;
2032 }
2033 
2034 static errno_t
utun_ctl_send(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,mbuf_t m,__unused int flags)2035 utun_ctl_send(__unused kern_ctl_ref kctlref,
2036     __unused u_int32_t unit,
2037     void *unitinfo,
2038     mbuf_t m,
2039     __unused int flags)
2040 {
2041 	/*
2042 	 * The userland ABI requires the first four bytes have the protocol family
2043 	 * in network byte order: swap them
2044 	 */
2045 	if (m_pktlen(m) >= (int32_t)UTUN_HEADER_SIZE((struct utun_pcb *)unitinfo)) {
2046 		*(protocol_family_t *)mbuf_data(m) = ntohl(*(protocol_family_t *)mbuf_data(m));
2047 	} else {
2048 		os_log_error(OS_LOG_DEFAULT, "%s - unexpected short mbuf pkt len %d\n", __func__, m_pktlen(m));
2049 	}
2050 
2051 	return utun_pkt_input((struct utun_pcb *)unitinfo, m);
2052 }
2053 
2054 static errno_t
utun_ctl_setopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t len)2055 utun_ctl_setopt(__unused kern_ctl_ref kctlref,
2056     __unused u_int32_t unit,
2057     void *unitinfo,
2058     int opt,
2059     void *data,
2060     size_t len)
2061 {
2062 	struct utun_pcb *pcb = unitinfo;
2063 	errno_t result = 0;
2064 	/* check for privileges for privileged options */
2065 	switch (opt) {
2066 	case UTUN_OPT_FLAGS:
2067 	case UTUN_OPT_EXT_IFDATA_STATS:
2068 	case UTUN_OPT_SET_DELEGATE_INTERFACE:
2069 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
2070 			return EPERM;
2071 		}
2072 		break;
2073 	}
2074 
2075 	switch (opt) {
2076 	case UTUN_OPT_FLAGS:
2077 		if (len != sizeof(u_int32_t)) {
2078 			result = EMSGSIZE;
2079 			break;
2080 		}
2081 		if (pcb->utun_ifp != NULL) {
2082 			// Only can set before connecting
2083 			result = EINVAL;
2084 			break;
2085 		}
2086 		pcb->utun_flags = *(u_int32_t *)data;
2087 		break;
2088 
2089 	case UTUN_OPT_EXT_IFDATA_STATS:
2090 		if (len != sizeof(int)) {
2091 			result = EMSGSIZE;
2092 			break;
2093 		}
2094 		if (pcb->utun_ifp == NULL) {
2095 			// Only can set after connecting
2096 			result = EINVAL;
2097 			break;
2098 		}
2099 		pcb->utun_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
2100 		break;
2101 
2102 	case UTUN_OPT_INC_IFDATA_STATS_IN:
2103 	case UTUN_OPT_INC_IFDATA_STATS_OUT: {
2104 		struct utun_stats_param *utsp = (struct utun_stats_param *)data;
2105 
2106 		if (utsp == NULL || len < sizeof(struct utun_stats_param)) {
2107 			result = EINVAL;
2108 			break;
2109 		}
2110 		if (pcb->utun_ifp == NULL) {
2111 			// Only can set after connecting
2112 			result = EINVAL;
2113 			break;
2114 		}
2115 		if (!pcb->utun_ext_ifdata_stats) {
2116 			result = EINVAL;
2117 			break;
2118 		}
2119 		if (opt == UTUN_OPT_INC_IFDATA_STATS_IN) {
2120 			ifnet_stat_increment_in(pcb->utun_ifp, utsp->utsp_packets,
2121 			    utsp->utsp_bytes, utsp->utsp_errors);
2122 		} else {
2123 			ifnet_stat_increment_out(pcb->utun_ifp, utsp->utsp_packets,
2124 			    utsp->utsp_bytes, utsp->utsp_errors);
2125 		}
2126 		break;
2127 	}
2128 	case UTUN_OPT_SET_DELEGATE_INTERFACE: {
2129 		ifnet_t         del_ifp = NULL;
2130 		char            name[IFNAMSIZ];
2131 
2132 		if (len > IFNAMSIZ - 1) {
2133 			result = EMSGSIZE;
2134 			break;
2135 		}
2136 		if (pcb->utun_ifp == NULL) {
2137 			// Only can set after connecting
2138 			result = EINVAL;
2139 			break;
2140 		}
2141 		if (len != 0) {            /* if len==0, del_ifp will be NULL causing the delegate to be removed */
2142 			bcopy(data, name, len);
2143 			name[len] = 0;
2144 			result = ifnet_find_by_name(name, &del_ifp);
2145 		}
2146 		if (result == 0) {
2147 			result = ifnet_set_delegate(pcb->utun_ifp, del_ifp);
2148 			if (del_ifp) {
2149 				ifnet_release(del_ifp);
2150 			}
2151 		}
2152 		break;
2153 	}
2154 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2155 		u_int32_t max_pending_packets = 0;
2156 		if (len != sizeof(u_int32_t)) {
2157 			result = EMSGSIZE;
2158 			break;
2159 		}
2160 		max_pending_packets = *(u_int32_t *)data;
2161 		if (max_pending_packets == 0) {
2162 			result = EINVAL;
2163 			break;
2164 		}
2165 		pcb->utun_max_pending_packets = max_pending_packets;
2166 		break;
2167 	}
2168 #if UTUN_NEXUS
2169 	case UTUN_OPT_ENABLE_CHANNEL: {
2170 		if (len != sizeof(int)) {
2171 			result = EMSGSIZE;
2172 			break;
2173 		}
2174 		if (pcb->utun_ifp == NULL) {
2175 			// Only can set after connecting
2176 			result = EINVAL;
2177 			break;
2178 		}
2179 		if (*(int *)data) {
2180 			result = utun_enable_channel(pcb, current_proc());
2181 		} else {
2182 			result = utun_disable_channel(pcb);
2183 		}
2184 		break;
2185 	}
2186 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2187 		if (len != sizeof(int)) {
2188 			result = EMSGSIZE;
2189 			break;
2190 		}
2191 		if (pcb->utun_ifp == NULL) {
2192 			// Only can set after connecting
2193 			result = EINVAL;
2194 			break;
2195 		}
2196 		if (!if_is_fsw_transport_netagent_enabled()) {
2197 			result = ENOTSUP;
2198 			break;
2199 		}
2200 		if (uuid_is_null(pcb->utun_nx.fsw_agent)) {
2201 			result = ENOENT;
2202 			break;
2203 		}
2204 
2205 		uint32_t flags = netagent_get_flags(pcb->utun_nx.fsw_agent);
2206 
2207 		if (*(int *)data) {
2208 			pcb->utun_needs_netagent = true;
2209 			flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
2210 			    NETAGENT_FLAG_NEXUS_LISTENER);
2211 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2212 		} else {
2213 			flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
2214 			    NETAGENT_FLAG_NEXUS_LISTENER);
2215 			result = netagent_set_flags(pcb->utun_nx.fsw_agent, flags);
2216 			pcb->utun_needs_netagent = false;
2217 		}
2218 		break;
2219 	}
2220 	case UTUN_OPT_ATTACH_FLOWSWITCH: {
2221 		if (len != sizeof(int)) {
2222 			result = EMSGSIZE;
2223 			break;
2224 		}
2225 		if (pcb->utun_ifp != NULL) {
2226 			// Only can set before connecting
2227 			result = EINVAL;
2228 			break;
2229 		}
2230 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2231 		pcb->utun_attach_fsw = !!(*(int *)data);
2232 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2233 		break;
2234 	}
2235 	case UTUN_OPT_ENABLE_NETIF: {
2236 		if (len != sizeof(int)) {
2237 			result = EMSGSIZE;
2238 			break;
2239 		}
2240 		if (pcb->utun_ifp != NULL) {
2241 			// Only can set before connecting
2242 			result = EINVAL;
2243 			break;
2244 		}
2245 		lck_rw_lock_exclusive(&pcb->utun_pcb_lock);
2246 		pcb->utun_use_netif = !!(*(int *)data);
2247 		lck_rw_unlock_exclusive(&pcb->utun_pcb_lock);
2248 		break;
2249 	}
2250 	case UTUN_OPT_SLOT_SIZE: {
2251 		if (len != sizeof(u_int32_t)) {
2252 			result = EMSGSIZE;
2253 			break;
2254 		}
2255 		if (pcb->utun_ifp != NULL) {
2256 			// Only can set before connecting
2257 			result = EINVAL;
2258 			break;
2259 		}
2260 		u_int32_t slot_size = *(u_int32_t *)data;
2261 		if (slot_size < UTUN_IF_MIN_SLOT_SIZE ||
2262 		    slot_size > UTUN_IF_MAX_SLOT_SIZE) {
2263 			return EINVAL;
2264 		}
2265 		pcb->utun_slot_size = slot_size;
2266 		break;
2267 	}
2268 	case UTUN_OPT_NETIF_RING_SIZE: {
2269 		if (len != sizeof(u_int32_t)) {
2270 			result = EMSGSIZE;
2271 			break;
2272 		}
2273 		if (pcb->utun_ifp != NULL) {
2274 			// Only can set before connecting
2275 			result = EINVAL;
2276 			break;
2277 		}
2278 		u_int32_t ring_size = *(u_int32_t *)data;
2279 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2280 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2281 			return EINVAL;
2282 		}
2283 		pcb->utun_netif_ring_size = ring_size;
2284 		break;
2285 	}
2286 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2287 		if (len != sizeof(u_int32_t)) {
2288 			result = EMSGSIZE;
2289 			break;
2290 		}
2291 		if (pcb->utun_ifp != NULL) {
2292 			// Only can set before connecting
2293 			result = EINVAL;
2294 			break;
2295 		}
2296 		u_int32_t ring_size = *(u_int32_t *)data;
2297 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2298 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2299 			return EINVAL;
2300 		}
2301 		pcb->utun_tx_fsw_ring_size = ring_size;
2302 		break;
2303 	}
2304 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2305 		if (len != sizeof(u_int32_t)) {
2306 			result = EMSGSIZE;
2307 			break;
2308 		}
2309 		if (pcb->utun_ifp != NULL) {
2310 			// Only can set before connecting
2311 			result = EINVAL;
2312 			break;
2313 		}
2314 		u_int32_t ring_size = *(u_int32_t *)data;
2315 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2316 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2317 			return EINVAL;
2318 		}
2319 		pcb->utun_rx_fsw_ring_size = ring_size;
2320 		break;
2321 	}
2322 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2323 		if (len != sizeof(u_int32_t)) {
2324 			result = EMSGSIZE;
2325 			break;
2326 		}
2327 		if (pcb->utun_ifp != NULL) {
2328 			// Only can set before connecting
2329 			result = EINVAL;
2330 			break;
2331 		}
2332 		u_int32_t ring_size = *(u_int32_t *)data;
2333 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2334 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2335 			return EINVAL;
2336 		}
2337 		pcb->utun_kpipe_tx_ring_size = ring_size;
2338 		break;
2339 	}
2340 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2341 		if (len != sizeof(u_int32_t)) {
2342 			result = EMSGSIZE;
2343 			break;
2344 		}
2345 		if (pcb->utun_ifp != NULL) {
2346 			// Only can set before connecting
2347 			result = EINVAL;
2348 			break;
2349 		}
2350 		u_int32_t ring_size = *(u_int32_t *)data;
2351 		if (ring_size < UTUN_IF_MIN_RING_SIZE ||
2352 		    ring_size > UTUN_IF_MAX_RING_SIZE) {
2353 			return EINVAL;
2354 		}
2355 		pcb->utun_kpipe_rx_ring_size = ring_size;
2356 		break;
2357 	}
2358 #endif // UTUN_NEXUS
2359 	default: {
2360 		result = ENOPROTOOPT;
2361 		break;
2362 	}
2363 	}
2364 
2365 	return result;
2366 }
2367 
2368 static errno_t
utun_ctl_getopt(__unused kern_ctl_ref kctlref,__unused u_int32_t unit,void * unitinfo,int opt,void * data,size_t * len)2369 utun_ctl_getopt(__unused kern_ctl_ref kctlref,
2370     __unused u_int32_t unit,
2371     void *unitinfo,
2372     int opt,
2373     void *data,
2374     size_t *len)
2375 {
2376 	struct utun_pcb *pcb = unitinfo;
2377 	errno_t result = 0;
2378 
2379 	switch (opt) {
2380 	case UTUN_OPT_FLAGS:
2381 		if (*len != sizeof(u_int32_t)) {
2382 			result = EMSGSIZE;
2383 		} else {
2384 			*(u_int32_t *)data = pcb->utun_flags;
2385 		}
2386 		break;
2387 
2388 	case UTUN_OPT_EXT_IFDATA_STATS:
2389 		if (*len != sizeof(int)) {
2390 			result = EMSGSIZE;
2391 		} else {
2392 			*(int *)data = (pcb->utun_ext_ifdata_stats) ? 1 : 0;
2393 		}
2394 		break;
2395 
2396 	case UTUN_OPT_IFNAME:
2397 		if (*len < MIN(strlen(pcb->utun_if_xname) + 1, sizeof(pcb->utun_if_xname))) {
2398 			result = EMSGSIZE;
2399 		} else {
2400 			if (pcb->utun_ifp == NULL) {
2401 				// Only can get after connecting
2402 				result = EINVAL;
2403 				break;
2404 			}
2405 			*len = scnprintf(data, *len, "%s", pcb->utun_if_xname) + 1;
2406 		}
2407 		break;
2408 
2409 	case UTUN_OPT_MAX_PENDING_PACKETS: {
2410 		if (*len != sizeof(u_int32_t)) {
2411 			result = EMSGSIZE;
2412 		} else {
2413 			*((u_int32_t *)data) = pcb->utun_max_pending_packets;
2414 		}
2415 		break;
2416 	}
2417 
2418 #if UTUN_NEXUS
2419 	case UTUN_OPT_ENABLE_CHANNEL: {
2420 		if (*len != sizeof(int)) {
2421 			result = EMSGSIZE;
2422 		} else {
2423 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2424 			*(int *)data = pcb->utun_kpipe_enabled;
2425 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2426 		}
2427 		break;
2428 	}
2429 
2430 	case UTUN_OPT_ENABLE_FLOWSWITCH: {
2431 		if (*len != sizeof(int)) {
2432 			result = EMSGSIZE;
2433 		} else {
2434 			*(int *)data = if_check_netagent(pcb->utun_ifp, pcb->utun_nx.fsw_agent);
2435 		}
2436 		break;
2437 	}
2438 
2439 	case UTUN_OPT_ENABLE_NETIF: {
2440 		if (*len != sizeof(int)) {
2441 			result = EMSGSIZE;
2442 		} else {
2443 			lck_rw_lock_shared(&pcb->utun_pcb_lock);
2444 			*(int *)data = !!pcb->utun_use_netif;
2445 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2446 		}
2447 		break;
2448 	}
2449 
2450 	case UTUN_OPT_GET_CHANNEL_UUID: {
2451 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2452 		if (uuid_is_null(pcb->utun_kpipe_uuid)) {
2453 			result = ENXIO;
2454 		} else if (*len != sizeof(uuid_t)) {
2455 			result = EMSGSIZE;
2456 		} else {
2457 			uuid_copy(data, pcb->utun_kpipe_uuid);
2458 		}
2459 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2460 		break;
2461 	}
2462 	case UTUN_OPT_SLOT_SIZE: {
2463 		if (*len != sizeof(u_int32_t)) {
2464 			result = EMSGSIZE;
2465 		} else {
2466 			*(u_int32_t *)data = pcb->utun_slot_size;
2467 		}
2468 		break;
2469 	}
2470 	case UTUN_OPT_NETIF_RING_SIZE: {
2471 		if (*len != sizeof(u_int32_t)) {
2472 			result = EMSGSIZE;
2473 		} else {
2474 			*(u_int32_t *)data = pcb->utun_netif_ring_size;
2475 		}
2476 		break;
2477 	}
2478 	case UTUN_OPT_TX_FSW_RING_SIZE: {
2479 		if (*len != sizeof(u_int32_t)) {
2480 			result = EMSGSIZE;
2481 		} else {
2482 			*(u_int32_t *)data = pcb->utun_tx_fsw_ring_size;
2483 		}
2484 		break;
2485 	}
2486 	case UTUN_OPT_RX_FSW_RING_SIZE: {
2487 		if (*len != sizeof(u_int32_t)) {
2488 			result = EMSGSIZE;
2489 		} else {
2490 			*(u_int32_t *)data = pcb->utun_rx_fsw_ring_size;
2491 		}
2492 		break;
2493 	}
2494 	case UTUN_OPT_KPIPE_TX_RING_SIZE: {
2495 		if (*len != sizeof(u_int32_t)) {
2496 			result = EMSGSIZE;
2497 		} else {
2498 			*(u_int32_t *)data = pcb->utun_kpipe_tx_ring_size;
2499 		}
2500 		break;
2501 	}
2502 	case UTUN_OPT_KPIPE_RX_RING_SIZE: {
2503 		if (*len != sizeof(u_int32_t)) {
2504 			result = EMSGSIZE;
2505 		} else {
2506 			*(u_int32_t *)data = pcb->utun_kpipe_rx_ring_size;
2507 		}
2508 		break;
2509 	}
2510 #endif // UTUN_NEXUS
2511 
2512 	default:
2513 		result = ENOPROTOOPT;
2514 		break;
2515 	}
2516 
2517 	return result;
2518 }
2519 
2520 static void
utun_ctl_rcvd(kern_ctl_ref kctlref,u_int32_t unit,void * unitinfo,int flags)2521 utun_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int flags)
2522 {
2523 #pragma unused(flags)
2524 	bool reenable_output = false;
2525 	struct utun_pcb *pcb = unitinfo;
2526 	if (pcb == NULL) {
2527 		return;
2528 	}
2529 	ifnet_lock_exclusive(pcb->utun_ifp);
2530 
2531 	u_int32_t utun_packet_cnt;
2532 	errno_t error_pc = ctl_getenqueuepacketcount(kctlref, unit, &utun_packet_cnt);
2533 	if (error_pc != 0) {
2534 		os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2535 		utun_packet_cnt = 0;
2536 	}
2537 
2538 	if (utun_packet_cnt < pcb->utun_max_pending_packets) {
2539 		reenable_output = true;
2540 	}
2541 
2542 	if (reenable_output) {
2543 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
2544 		if (error != 0) {
2545 			os_log_error(OS_LOG_DEFAULT, "utun_ctl_rcvd: ifnet_enable_output returned error %d\n", error);
2546 		}
2547 	}
2548 	ifnet_lock_done(pcb->utun_ifp);
2549 }
2550 
2551 /* Network Interface functions */
2552 static void
utun_start(ifnet_t interface)2553 utun_start(ifnet_t interface)
2554 {
2555 	mbuf_t data;
2556 	struct utun_pcb *pcb = ifnet_softc(interface);
2557 
2558 	VERIFY(pcb != NULL);
2559 
2560 #if UTUN_NEXUS
2561 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
2562 	if (pcb->utun_kpipe_enabled) {
2563 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2564 		if (!utun_data_move_begin(pcb)) {
2565 			os_log_info(OS_LOG_DEFAULT,
2566 			    "%s: data path stopped for %s\n",
2567 			    __func__, if_name(pcb->utun_ifp));
2568 			return;
2569 		}
2570 		/* It's possible to have channels enabled, but not yet have the channel opened,
2571 		 * in which case the rxring will not be set
2572 		 */
2573 		if (pcb->utun_kpipe_rxring != NULL) {
2574 			kern_channel_notify(pcb->utun_kpipe_rxring, 0);
2575 		}
2576 		utun_data_move_end(pcb);
2577 		return;
2578 	}
2579 	lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2580 #endif // UTUN_NEXUS
2581 
2582 	for (;;) {
2583 		bool can_accept_packets = true;
2584 		ifnet_lock_shared(pcb->utun_ifp);
2585 
2586 		u_int32_t utun_packet_cnt;
2587 		errno_t error_pc = ctl_getenqueuepacketcount(pcb->utun_ctlref, pcb->utun_unit, &utun_packet_cnt);
2588 		if (error_pc != 0) {
2589 			os_log_error(OS_LOG_DEFAULT, "utun_start: ctl_getenqueuepacketcount returned error %d\n", error_pc);
2590 			utun_packet_cnt = 0;
2591 		}
2592 
2593 		can_accept_packets = (utun_packet_cnt < pcb->utun_max_pending_packets);
2594 		if (!can_accept_packets && pcb->utun_ctlref) {
2595 			u_int32_t difference = 0;
2596 			if (ctl_getenqueuereadable(pcb->utun_ctlref, pcb->utun_unit, &difference) == 0) {
2597 				if (difference > 0) {
2598 					// If the low-water mark has not yet been reached, we still need to enqueue data
2599 					// into the buffer
2600 					can_accept_packets = true;
2601 				}
2602 			}
2603 		}
2604 		if (!can_accept_packets) {
2605 			errno_t error = ifnet_disable_output(interface);
2606 			if (error != 0) {
2607 				os_log_error(OS_LOG_DEFAULT, "utun_start: ifnet_disable_output returned error %d\n", error);
2608 			}
2609 			ifnet_lock_done(pcb->utun_ifp);
2610 			break;
2611 		}
2612 		ifnet_lock_done(pcb->utun_ifp);
2613 		if (ifnet_dequeue(interface, &data) != 0) {
2614 			break;
2615 		}
2616 		if (utun_output(interface, data) != 0) {
2617 			break;
2618 		}
2619 	}
2620 }
2621 
2622 static errno_t
utun_output(ifnet_t interface,mbuf_t data)2623 utun_output(ifnet_t     interface,
2624     mbuf_t data)
2625 {
2626 	struct utun_pcb *pcb = ifnet_softc(interface);
2627 	errno_t result;
2628 
2629 	VERIFY(interface == pcb->utun_ifp);
2630 
2631 #if UTUN_NEXUS
2632 	if (!pcb->utun_use_netif)
2633 #endif // UTUN_NEXUS
2634 	{
2635 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2636 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
2637 		}
2638 	}
2639 
2640 	if (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT) {
2641 		/* flush data */
2642 		mbuf_freem(data);
2643 		return 0;
2644 	}
2645 
2646 	// otherwise, fall thru to ctl_enqueumbuf
2647 	if (pcb->utun_ctlref) {
2648 		int     length;
2649 
2650 		/*
2651 		 * The ABI requires the protocol in network byte order
2652 		 */
2653 		if (m_pktlen(data) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2654 			*(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data));
2655 		}
2656 
2657 		length = mbuf_pkthdr_len(data);
2658 		result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR);
2659 		if (result != 0) {
2660 			mbuf_freem(data);
2661 			os_log_error(OS_LOG_DEFAULT, "utun_output - ctl_enqueuembuf failed: %d\n", result);
2662 #if UTUN_NEXUS
2663 			if (!pcb->utun_use_netif)
2664 #endif // UTUN_NEXUS
2665 			{
2666 				ifnet_stat_increment_out(interface, 0, 0, 1);
2667 			}
2668 		} else {
2669 #if UTUN_NEXUS
2670 			if (!pcb->utun_use_netif)
2671 #endif // UTUN_NEXUS
2672 			{
2673 				if (!pcb->utun_ext_ifdata_stats) {
2674 					ifnet_stat_increment_out(interface, 1, length, 0);
2675 				}
2676 			}
2677 		}
2678 	} else {
2679 		mbuf_freem(data);
2680 	}
2681 
2682 	return 0;
2683 }
2684 
2685 static errno_t
utun_demux(__unused ifnet_t interface,mbuf_t data,__unused char * frame_header,protocol_family_t * protocol)2686 utun_demux(__unused ifnet_t interface,
2687     mbuf_t data,
2688     __unused char *frame_header,
2689     protocol_family_t *protocol)
2690 {
2691 #if UTUN_NEXUS
2692 	struct utun_pcb *pcb = ifnet_softc(interface);
2693 	struct ip *ip;
2694 	u_int ip_version;
2695 #endif
2696 
2697 	while (data != NULL && mbuf_len(data) < 1) {
2698 		data = mbuf_next(data);
2699 	}
2700 
2701 	if (data == NULL) {
2702 		return ENOENT;
2703 	}
2704 
2705 #if UTUN_NEXUS
2706 	if (pcb->utun_use_netif) {
2707 		ip = mtod(data, struct ip *);
2708 		ip_version = ip->ip_v;
2709 
2710 		switch (ip_version) {
2711 		case 4:
2712 			*protocol = PF_INET;
2713 			return 0;
2714 		case 6:
2715 			*protocol = PF_INET6;
2716 			return 0;
2717 		default:
2718 			*protocol = 0;
2719 			break;
2720 		}
2721 	} else
2722 #endif // UTUN_NEXUS
2723 	{
2724 		*protocol = *(u_int32_t *)mbuf_data(data);
2725 	}
2726 
2727 	return 0;
2728 }
2729 
2730 static errno_t
utun_framer(ifnet_t interface,mbuf_t * packet,__unused const struct sockaddr * dest,__unused const char * desk_linkaddr,const char * frame_type,u_int32_t * prepend_len,u_int32_t * postpend_len)2731 utun_framer(ifnet_t interface,
2732     mbuf_t *packet,
2733     __unused const struct sockaddr *dest,
2734     __unused const char *desk_linkaddr,
2735     const char *frame_type,
2736     u_int32_t *prepend_len,
2737     u_int32_t *postpend_len)
2738 {
2739 	struct utun_pcb *pcb = ifnet_softc(interface);
2740 	VERIFY(interface == pcb->utun_ifp);
2741 
2742 	u_int32_t header_length = UTUN_HEADER_SIZE(pcb);
2743 	if (mbuf_prepend(packet, header_length, MBUF_DONTWAIT) != 0) {
2744 		os_log_error(OS_LOG_DEFAULT, "utun_framer - ifnet_output prepend failed\n");
2745 
2746 		ifnet_stat_increment_out(interface, 0, 0, 1);
2747 
2748 		// just	return, because the buffer was freed in mbuf_prepend
2749 		return EJUSTRETURN;
2750 	}
2751 	if (prepend_len != NULL) {
2752 		*prepend_len = header_length;
2753 	}
2754 	if (postpend_len != NULL) {
2755 		*postpend_len = 0;
2756 	}
2757 
2758 	// place protocol number at the beginning of the mbuf
2759 	*(protocol_family_t *)mbuf_data(*packet) = *(protocol_family_t *)(uintptr_t)(size_t)frame_type;
2760 
2761 #if NECP
2762 	// Add process uuid if applicable
2763 	if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
2764 		if (m_pktlen(*packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2765 			u_int8_t *header = (u_int8_t *)mbuf_data(*packet);
2766 			int uuid_err = necp_get_app_uuid_from_packet(*packet, (void *)(header + sizeof(u_int32_t)));
2767 			if (uuid_err != 0) {
2768 				os_log_error(OS_LOG_DEFAULT, "Received app uuid error %d for %s%d\n", uuid_err, ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp));
2769 			}
2770 		} else {
2771 			os_log_error(OS_LOG_DEFAULT, "Cannot set proc uuid for %s%d, size %d < %zu\n", ifnet_name(pcb->utun_ifp), ifnet_unit(pcb->utun_ifp),
2772 			    m_pktlen(*packet), UTUN_HEADER_SIZE(pcb));
2773 		}
2774 	}
2775 #endif // NECP
2776 
2777 	return 0;
2778 }
2779 
2780 static errno_t
utun_add_proto(__unused ifnet_t interface,protocol_family_t protocol,__unused const struct ifnet_demux_desc * demux_array,__unused u_int32_t demux_count)2781 utun_add_proto(__unused ifnet_t interface,
2782     protocol_family_t protocol,
2783     __unused const struct ifnet_demux_desc *demux_array,
2784     __unused u_int32_t demux_count)
2785 {
2786 	switch (protocol) {
2787 	case PF_INET:
2788 		return 0;
2789 	case PF_INET6:
2790 		return 0;
2791 	default:
2792 		break;
2793 	}
2794 
2795 	return ENOPROTOOPT;
2796 }
2797 
2798 static errno_t
utun_del_proto(__unused ifnet_t interface,__unused protocol_family_t protocol)2799 utun_del_proto(__unused ifnet_t interface,
2800     __unused protocol_family_t protocol)
2801 {
2802 	return 0;
2803 }
2804 
2805 static errno_t
utun_ioctl(ifnet_t interface,u_long command,void * data)2806 utun_ioctl(ifnet_t interface,
2807     u_long command,
2808     void *data)
2809 {
2810 #if UTUN_NEXUS
2811 	struct utun_pcb *pcb = ifnet_softc(interface);
2812 #endif
2813 	errno_t result = 0;
2814 
2815 	switch (command) {
2816 	case SIOCSIFMTU: {
2817 #if UTUN_NEXUS
2818 		if (pcb->utun_use_netif) {
2819 			// Make sure we can fit packets in the channel buffers
2820 			// Allow for the headroom in the slot
2821 			if (((uint64_t)((struct ifreq*)data)->ifr_mtu) + UTUN_IF_HEADROOM_SIZE > pcb->utun_slot_size) {
2822 				result = EINVAL;
2823 			} else {
2824 				ifnet_set_mtu(interface, (uint32_t)((struct ifreq*)data)->ifr_mtu);
2825 			}
2826 		} else
2827 #endif // UTUN_NEXUS
2828 		{
2829 			ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
2830 		}
2831 		break;
2832 	}
2833 
2834 	case SIOCSIFFLAGS:
2835 		/* ifioctl() takes care of it */
2836 		break;
2837 
2838 	default:
2839 		result = EOPNOTSUPP;
2840 	}
2841 
2842 	return result;
2843 }
2844 
2845 static void
utun_detached(ifnet_t interface)2846 utun_detached(ifnet_t interface)
2847 {
2848 	struct utun_pcb *pcb = ifnet_softc(interface);
2849 	(void)ifnet_release(interface);
2850 	lck_mtx_lock(&utun_lock);
2851 	utun_free_pcb(pcb, true);
2852 	(void)ifnet_dispose(interface);
2853 	lck_mtx_unlock(&utun_lock);
2854 }
2855 
2856 /* Protocol Handlers */
2857 
2858 static errno_t
utun_proto_input(__unused ifnet_t interface,protocol_family_t protocol,mbuf_t m,__unused char * frame_header)2859 utun_proto_input(__unused ifnet_t interface,
2860     protocol_family_t protocol,
2861     mbuf_t m,
2862     __unused char *frame_header)
2863 {
2864 	struct utun_pcb *pcb = ifnet_softc(interface);
2865 #if UTUN_NEXUS
2866 	if (!pcb->utun_use_netif)
2867 #endif // UTUN_NEXUS
2868 	{
2869 		mbuf_adj(m, UTUN_HEADER_SIZE(pcb));
2870 	}
2871 	int32_t pktlen = m->m_pkthdr.len;
2872 	if (proto_input(protocol, m) != 0) {
2873 		m_freem(m);
2874 #if UTUN_NEXUS
2875 		if (!pcb->utun_use_netif)
2876 #endif // UTUN_NEXUS
2877 		{
2878 			ifnet_stat_increment_in(interface, 0, 0, 1);
2879 		}
2880 	} else {
2881 #if UTUN_NEXUS
2882 		if (!pcb->utun_use_netif)
2883 #endif // UTUN_NEXUS
2884 		{
2885 			ifnet_stat_increment_in(interface, 1, pktlen, 0);
2886 		}
2887 	}
2888 
2889 	return 0;
2890 }
2891 
2892 static errno_t
utun_proto_pre_output(__unused ifnet_t interface,protocol_family_t protocol,__unused mbuf_t * packet,__unused const struct sockaddr * dest,__unused void * route,char * frame_type,__unused char * link_layer_dest)2893 utun_proto_pre_output(__unused ifnet_t interface,
2894     protocol_family_t protocol,
2895     __unused mbuf_t *packet,
2896     __unused const struct sockaddr *dest,
2897     __unused void *route,
2898     char *frame_type,
2899     __unused char *link_layer_dest)
2900 {
2901 	*(protocol_family_t *)(void *)frame_type = protocol;
2902 	return 0;
2903 }
2904 
2905 static errno_t
utun_attach_proto(ifnet_t interface,protocol_family_t protocol)2906 utun_attach_proto(ifnet_t interface,
2907     protocol_family_t protocol)
2908 {
2909 	struct ifnet_attach_proto_param proto;
2910 
2911 	bzero(&proto, sizeof(proto));
2912 	proto.input = utun_proto_input;
2913 	proto.pre_output = utun_proto_pre_output;
2914 
2915 	errno_t result = ifnet_attach_protocol(interface, protocol, &proto);
2916 	if (result != 0 && result != EEXIST) {
2917 		os_log_error(OS_LOG_DEFAULT, "utun_attach_inet - ifnet_attach_protocol %d failed: %d\n",
2918 		    protocol, result);
2919 	}
2920 
2921 	return result;
2922 }
2923 
2924 static errno_t
utun_pkt_input(struct utun_pcb * pcb,mbuf_t packet)2925 utun_pkt_input(struct utun_pcb *pcb, mbuf_t packet)
2926 {
2927 #if UTUN_NEXUS
2928 	if (pcb->utun_use_netif) {
2929 		if (!utun_data_move_begin(pcb)) {
2930 			os_log_info(OS_LOG_DEFAULT,
2931 			    "%s: data path stopped for %s\n",
2932 			    __func__, if_name(pcb->utun_ifp));
2933 			return ENXIO;
2934 		}
2935 
2936 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
2937 
2938 		lck_mtx_lock(&pcb->utun_input_chain_lock);
2939 
2940 		if (pcb->utun_input_chain_count > (u_int32_t)if_utun_max_pending_input) {
2941 			lck_mtx_unlock(&pcb->utun_input_chain_lock);
2942 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2943 			utun_data_move_end(pcb);
2944 			return ENOSPC;
2945 		}
2946 
2947 		if (pcb->utun_input_chain != NULL) {
2948 			pcb->utun_input_chain_last->m_nextpkt = packet;
2949 		} else {
2950 			pcb->utun_input_chain = packet;
2951 		}
2952 		pcb->utun_input_chain_count++;
2953 		while (packet->m_nextpkt) {
2954 			VERIFY(packet != packet->m_nextpkt);
2955 			packet = packet->m_nextpkt;
2956 			pcb->utun_input_chain_count++;
2957 		}
2958 		pcb->utun_input_chain_last = packet;
2959 		lck_mtx_unlock(&pcb->utun_input_chain_lock);
2960 
2961 		kern_channel_ring_t rx_ring = pcb->utun_netif_rxring;
2962 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
2963 
2964 		if (rx_ring != NULL) {
2965 			kern_channel_notify(rx_ring, 0);
2966 		}
2967 
2968 		utun_data_move_end(pcb);
2969 		return 0;
2970 	} else
2971 #endif // UTUN_NEXUS
2972 	{
2973 		mbuf_pkthdr_setrcvif(packet, pcb->utun_ifp);
2974 
2975 		if (m_pktlen(packet) >= (int32_t)UTUN_HEADER_SIZE(pcb)) {
2976 			bpf_tap_in(pcb->utun_ifp, DLT_NULL, packet, 0, 0);
2977 		}
2978 		if (pcb->utun_flags & UTUN_FLAGS_NO_INPUT) {
2979 			/* flush data */
2980 			mbuf_freem(packet);
2981 			return 0;
2982 		}
2983 
2984 		errno_t result = 0;
2985 		if (!pcb->utun_ext_ifdata_stats) {
2986 			struct ifnet_stat_increment_param incs = {};
2987 			incs.packets_in = 1;
2988 			incs.bytes_in = mbuf_pkthdr_len(packet);
2989 			result = ifnet_input(pcb->utun_ifp, packet, &incs);
2990 		} else {
2991 			result = ifnet_input(pcb->utun_ifp, packet, NULL);
2992 		}
2993 		if (result != 0) {
2994 			ifnet_stat_increment_in(pcb->utun_ifp, 0, 0, 1);
2995 
2996 			os_log_error(OS_LOG_DEFAULT, "%s - ifnet_input failed: %d\n", __FUNCTION__, result);
2997 		}
2998 
2999 		return 0;
3000 	}
3001 }
3002 
3003 #if UTUN_NEXUS
3004 
3005 static errno_t
utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)3006 utun_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
3007 {
3008 	return 0;
3009 }
3010 
3011 static void
utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)3012 utun_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
3013 {
3014 	// Ignore
3015 }
3016 
3017 static errno_t
utun_register_nexus(void)3018 utun_register_nexus(void)
3019 {
3020 	const struct kern_nexus_domain_provider_init dp_init = {
3021 		.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
3022 		.nxdpi_flags = 0,
3023 		.nxdpi_init = utun_nxdp_init,
3024 		.nxdpi_fini = utun_nxdp_fini
3025 	};
3026 	errno_t err = 0;
3027 
3028 	/* utun_nxdp_init() is called before this function returns */
3029 	err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
3030 	    (const uint8_t *) "com.apple.utun",
3031 	    &dp_init, sizeof(dp_init),
3032 	    &utun_nx_dom_prov);
3033 	if (err != 0) {
3034 		os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
3035 		return err;
3036 	}
3037 	return 0;
3038 }
3039 boolean_t
utun_interface_needs_netagent(ifnet_t interface)3040 utun_interface_needs_netagent(ifnet_t interface)
3041 {
3042 	struct utun_pcb *pcb = NULL;
3043 
3044 	if (interface == NULL) {
3045 		return FALSE;
3046 	}
3047 
3048 	pcb = ifnet_softc(interface);
3049 
3050 	if (pcb == NULL) {
3051 		return FALSE;
3052 	}
3053 
3054 	return pcb->utun_needs_netagent == true;
3055 }
3056 
3057 static errno_t
utun_ifnet_set_attrs(ifnet_t ifp)3058 utun_ifnet_set_attrs(ifnet_t ifp)
3059 {
3060 	/* Set flags and additional information. */
3061 	ifnet_set_mtu(ifp, 1500);
3062 	ifnet_set_flags(ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
3063 
3064 	/* The interface must generate its own IPv6 LinkLocal address,
3065 	 * if possible following the recommendation of RFC2472 to the 64bit interface ID
3066 	 */
3067 	ifnet_set_eflags(ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
3068 
3069 	return 0;
3070 }
3071 
3072 static errno_t
utun_netif_prepare(kern_nexus_t nexus,ifnet_t ifp)3073 utun_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
3074 {
3075 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3076 	pcb->utun_netif_nexus = nexus;
3077 	return utun_ifnet_set_attrs(ifp);
3078 }
3079 
3080 static errno_t
utun_nexus_pre_connect(kern_nexus_provider_t nxprov,proc_t p,kern_nexus_t nexus,nexus_port_t nexus_port,kern_channel_t channel,void ** ch_ctx)3081 utun_nexus_pre_connect(kern_nexus_provider_t nxprov,
3082     proc_t p, kern_nexus_t nexus,
3083     nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
3084 {
3085 #pragma unused(nxprov, p)
3086 #pragma unused(nexus, nexus_port, channel, ch_ctx)
3087 	return 0;
3088 }
3089 
3090 static errno_t
utun_nexus_connected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3091 utun_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3092     kern_channel_t channel)
3093 {
3094 #pragma unused(nxprov, channel)
3095 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3096 	boolean_t ok = ifnet_is_attached(pcb->utun_ifp, 1);
3097 	if (pcb->utun_netif_nexus == nexus) {
3098 		pcb->utun_netif_connected = true;
3099 	}
3100 	if (ok) {
3101 		lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3102 		UTUN_SET_DATA_PATH_READY(pcb);
3103 		lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3104 	}
3105 	return ok ? 0 : ENXIO;
3106 }
3107 
3108 static void
utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3109 utun_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3110     kern_channel_t channel)
3111 {
3112 #pragma unused(nxprov, channel)
3113 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3114 	/* Wait until all threads in the data paths are done. */
3115 	utun_wait_data_move_drain(pcb);
3116 }
3117 
3118 static void
utun_netif_pre_disconnect(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3119 utun_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3120     kern_channel_t channel)
3121 {
3122 #pragma unused(nxprov, channel)
3123 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3124 	/* Wait until all threads in the data paths are done. */
3125 	utun_wait_data_move_drain(pcb);
3126 }
3127 
3128 static void
utun_nexus_disconnected(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel)3129 utun_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3130     kern_channel_t channel)
3131 {
3132 #pragma unused(nxprov, channel)
3133 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3134 	if (pcb->utun_netif_nexus == nexus) {
3135 		pcb->utun_netif_connected = false;
3136 		if (pcb->utun_attach_fsw) {
3137 			// disconnected by flowswitch that was attached by us
3138 			pcb->utun_netif_nexus = NULL;
3139 		}
3140 	}
3141 	ifnet_decr_iorefcnt(pcb->utun_ifp);
3142 }
3143 
3144 static errno_t
utun_kpipe_ring_init(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_t channel,kern_channel_ring_t ring,boolean_t is_tx_ring,void ** ring_ctx)3145 utun_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3146     kern_channel_t channel, kern_channel_ring_t ring,
3147     boolean_t is_tx_ring, void **ring_ctx)
3148 {
3149 #pragma unused(nxprov)
3150 #pragma unused(channel)
3151 #pragma unused(ring_ctx)
3152 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3153 	if (!is_tx_ring) {
3154 		VERIFY(pcb->utun_kpipe_rxring == NULL);
3155 		pcb->utun_kpipe_rxring = ring;
3156 	} else {
3157 		VERIFY(pcb->utun_kpipe_txring == NULL);
3158 		pcb->utun_kpipe_txring = ring;
3159 	}
3160 	return 0;
3161 }
3162 
3163 static void
utun_kpipe_ring_fini(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t ring)3164 utun_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3165     kern_channel_ring_t ring)
3166 {
3167 #pragma unused(nxprov)
3168 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3169 	if (pcb->utun_kpipe_rxring == ring) {
3170 		pcb->utun_kpipe_rxring = NULL;
3171 	} else if (pcb->utun_kpipe_txring == ring) {
3172 		pcb->utun_kpipe_txring = NULL;
3173 	}
3174 }
3175 
3176 static errno_t
utun_kpipe_sync_tx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t tx_ring,uint32_t flags)3177 utun_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3178     kern_channel_ring_t tx_ring, uint32_t flags)
3179 {
3180 #pragma unused(nxprov)
3181 #pragma unused(flags)
3182 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3183 
3184 	if (!utun_data_move_begin(pcb)) {
3185 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3186 		    __func__, if_name(pcb->utun_ifp));
3187 		return 0;
3188 	}
3189 
3190 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3191 	int channel_enabled = pcb->utun_kpipe_enabled;
3192 	if (!channel_enabled) {
3193 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3194 		utun_data_move_end(pcb);
3195 		return 0;
3196 	}
3197 
3198 	if (pcb->utun_use_netif) {
3199 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3200 		if (tx_slot == NULL) {
3201 			// Nothing to write, bail
3202 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3203 			utun_data_move_end(pcb);
3204 			return 0;
3205 		}
3206 
3207 		// Signal the netif ring to read
3208 		kern_channel_ring_t rx_ring = pcb->utun_netif_rxring;
3209 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3210 		if (rx_ring != NULL) {
3211 			kern_channel_notify(rx_ring, 0);
3212 		}
3213 	} else {
3214 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3215 
3216 		struct ifnet_stat_increment_param incs = {};
3217 		struct kern_channel_ring_stat_increment tx_ring_stats = {};
3218 		MBUFQ_HEAD(mbufq) mbq;
3219 		MBUFQ_INIT(&mbq);
3220 		kern_channel_slot_t tx_pslot = NULL;
3221 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3222 		while (tx_slot != NULL) {
3223 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3224 
3225 			// Advance TX ring
3226 			tx_pslot = tx_slot;
3227 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3228 
3229 			if (tx_ph == 0) {
3230 				continue;
3231 			}
3232 
3233 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3234 			VERIFY(tx_buf != NULL);
3235 			uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
3236 			VERIFY(tx_baddr != 0);
3237 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3238 
3239 			size_t length = MIN(kern_packet_get_data_length(tx_ph),
3240 			    pcb->utun_slot_size);
3241 
3242 			mbuf_t data = NULL;
3243 			if (length >= UTUN_HEADER_SIZE(pcb) &&
3244 			    !(pcb->utun_flags & UTUN_FLAGS_NO_INPUT)) {
3245 				errno_t error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_HEADER, &data);
3246 				VERIFY(0 == error);
3247 				error = mbuf_copyback(data, 0, length, tx_baddr, MBUF_WAITOK);
3248 				VERIFY(0 == error);
3249 				/*
3250 				 * The userland ABI requires the first four bytes have
3251 				 * the protocol family in network byte order: swap them
3252 				 */
3253 				*(uint32_t *)mbuf_data(data) = ntohl(*(uint32_t *)mbuf_data(data));
3254 				mbuf_pkthdr_setrcvif(data, pcb->utun_ifp);
3255 				bpf_tap_in(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3256 				incs.packets_in++;
3257 				incs.bytes_in += length;
3258 				MBUFQ_ENQUEUE(&mbq, data);
3259 			}
3260 		}
3261 		if (tx_pslot) {
3262 			kern_channel_advance_slot(tx_ring, tx_pslot);
3263 			tx_ring_stats.kcrsi_slots_transferred = incs.packets_in;
3264 			tx_ring_stats.kcrsi_bytes_transferred = incs.bytes_in;
3265 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3266 			(void) kern_channel_reclaim(tx_ring);
3267 		}
3268 		if (!MBUFQ_EMPTY(&mbq)) {
3269 			(void) ifnet_input_extended(pcb->utun_ifp, MBUFQ_FIRST(&mbq),
3270 			    MBUFQ_LAST(&mbq), &incs);
3271 			MBUFQ_INIT(&mbq);
3272 		}
3273 	}
3274 
3275 	utun_data_move_end(pcb);
3276 	return 0;
3277 }
3278 
3279 static errno_t
utun_kpipe_sync_rx(kern_nexus_provider_t nxprov,kern_nexus_t nexus,kern_channel_ring_t rx_ring,uint32_t flags)3280 utun_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
3281     kern_channel_ring_t rx_ring, uint32_t flags)
3282 {
3283 #pragma unused(nxprov)
3284 #pragma unused(flags)
3285 	struct utun_pcb *pcb = kern_nexus_get_context(nexus);
3286 	struct kern_channel_ring_stat_increment rx_ring_stats = {};
3287 
3288 	if (!utun_data_move_begin(pcb)) {
3289 		os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n",
3290 		    __func__, if_name(pcb->utun_ifp));
3291 		return 0;
3292 	}
3293 
3294 	lck_rw_lock_shared(&pcb->utun_pcb_lock);
3295 
3296 	int channel_enabled = pcb->utun_kpipe_enabled;
3297 	if (!channel_enabled) {
3298 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3299 		utun_data_move_end(pcb);
3300 		return 0;
3301 	}
3302 
3303 	/* reclaim user-released slots */
3304 	(void) kern_channel_reclaim(rx_ring);
3305 
3306 	uint32_t avail = kern_channel_available_slot_count(rx_ring);
3307 	if (avail == 0) {
3308 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3309 		utun_data_move_end(pcb);
3310 		return 0;
3311 	}
3312 
3313 	if (pcb->utun_use_netif) {
3314 		kern_channel_ring_t tx_ring = pcb->utun_netif_txring;
3315 		if (tx_ring == NULL ||
3316 		    pcb->utun_netif_nexus == NULL) {
3317 			// Net-If TX ring not set up yet, nothing to read
3318 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3319 			utun_data_move_end(pcb);
3320 			return 0;
3321 		}
3322 
3323 		struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->utun_netif_nexus)->nif_stats;
3324 
3325 		// Unlock utun before entering ring
3326 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3327 
3328 		(void)kr_enter(tx_ring, TRUE);
3329 
3330 		// Lock again after entering and validate
3331 		lck_rw_lock_shared(&pcb->utun_pcb_lock);
3332 		if (tx_ring != pcb->utun_netif_txring) {
3333 			// Ring no longer valid
3334 			// Unlock first, then exit ring
3335 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3336 			kr_exit(tx_ring);
3337 			utun_data_move_end(pcb);
3338 			return 0;
3339 		}
3340 
3341 		struct kern_channel_ring_stat_increment tx_ring_stats;
3342 		bzero(&tx_ring_stats, sizeof(tx_ring_stats));
3343 		kern_channel_slot_t tx_pslot = NULL;
3344 		kern_channel_slot_t tx_slot = kern_channel_get_next_slot(tx_ring, NULL, NULL);
3345 		if (tx_slot == NULL) {
3346 			// Nothing to read, don't bother signalling
3347 			// Unlock first, then exit ring
3348 			lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3349 			kr_exit(tx_ring);
3350 			utun_data_move_end(pcb);
3351 			return 0;
3352 		}
3353 
3354 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3355 		VERIFY(rx_pp != NULL);
3356 		struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
3357 		VERIFY(tx_pp != NULL);
3358 		kern_channel_slot_t rx_pslot = NULL;
3359 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3360 		kern_packet_t tx_chain_ph = 0;
3361 
3362 		while (rx_slot != NULL && tx_slot != NULL) {
3363 			size_t length;
3364 			kern_buflet_t rx_buf;
3365 			void *rx_baddr;
3366 
3367 			kern_packet_t tx_ph = kern_channel_slot_get_packet(tx_ring, tx_slot);
3368 
3369 			/* Skip slot if packet is zero-length or marked as dropped (QUMF_DROPPED) */
3370 			if (tx_ph == 0) {
3371 				// Advance TX ring
3372 				tx_pslot = tx_slot;
3373 				tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3374 				continue;
3375 			}
3376 			(void) kern_channel_slot_detach_packet(tx_ring, tx_slot, tx_ph);
3377 			if (tx_chain_ph != 0) {
3378 				kern_packet_append(tx_ph, tx_chain_ph);
3379 			}
3380 			tx_chain_ph = tx_ph;
3381 
3382 			// Advance TX ring
3383 			tx_pslot = tx_slot;
3384 			tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL);
3385 
3386 			// Allocate rx packet
3387 			kern_packet_t rx_ph = 0;
3388 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3389 			if (__improbable(error != 0)) {
3390 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3391 				    pcb->utun_ifp->if_xname);
3392 				break;
3393 			}
3394 
3395 			kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
3396 			VERIFY(tx_buf != NULL);
3397 			uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
3398 			VERIFY(tx_baddr != NULL);
3399 			tx_baddr += kern_buflet_get_data_offset(tx_buf);
3400 
3401 			bpf_tap_packet_out(pcb->utun_ifp, DLT_RAW, tx_ph, NULL, 0);
3402 
3403 			length = MIN(kern_packet_get_data_length(tx_ph) + UTUN_HEADER_SIZE(pcb),
3404 			    pcb->utun_slot_size);
3405 
3406 			tx_ring_stats.kcrsi_slots_transferred++;
3407 			tx_ring_stats.kcrsi_bytes_transferred += length;
3408 
3409 			if (length < UTUN_HEADER_SIZE(pcb) ||
3410 			    length > pcb->utun_slot_size ||
3411 			    length > PP_BUF_SIZE_DEF(rx_pp) ||
3412 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3413 				/* flush data */
3414 				kern_pbufpool_free(rx_pp, rx_ph);
3415 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: invalid length %zu header_size %zu\n",
3416 				    pcb->utun_ifp->if_xname, length, UTUN_HEADER_SIZE(pcb));
3417 				STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
3418 				STATS_INC(nifs, NETIF_STATS_DROP);
3419 				continue;
3420 			}
3421 
3422 			/* fillout packet */
3423 			rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3424 			VERIFY(rx_buf != NULL);
3425 			rx_baddr = kern_buflet_get_data_address(rx_buf);
3426 			VERIFY(rx_baddr != NULL);
3427 
3428 			// Find family
3429 			uint32_t af = 0;
3430 			uint8_t vhl = *(uint8_t *)(tx_baddr);
3431 			u_int ip_version = (vhl >> 4);
3432 			switch (ip_version) {
3433 			case 4: {
3434 				af = AF_INET;
3435 				break;
3436 			}
3437 			case 6: {
3438 				af = AF_INET6;
3439 				break;
3440 			}
3441 			default: {
3442 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: unknown ip version %u vhl %u header_size %zu\n",
3443 				    pcb->utun_ifp->if_xname, ip_version, vhl, UTUN_HEADER_SIZE(pcb));
3444 				break;
3445 			}
3446 			}
3447 
3448 			// Copy header
3449 			af = htonl(af);
3450 			memcpy((void *)rx_baddr, &af, sizeof(af));
3451 			if (pcb->utun_flags & UTUN_FLAGS_ENABLE_PROC_UUID) {
3452 				kern_packet_get_euuid(tx_ph, (void *)((uintptr_t)rx_baddr + sizeof(af)));
3453 			}
3454 
3455 			// Copy data from tx to rx
3456 			memcpy((void *)((uintptr_t)rx_baddr + UTUN_HEADER_SIZE(pcb)), (void *)tx_baddr, length - UTUN_HEADER_SIZE(pcb));
3457 			kern_packet_clear_flow_uuid(rx_ph); // zero flow id
3458 
3459 			/* finalize and attach the packet */
3460 			error = kern_buflet_set_data_offset(rx_buf, 0);
3461 			VERIFY(error == 0);
3462 			error = kern_buflet_set_data_length(rx_buf, length);
3463 			VERIFY(error == 0);
3464 			error = kern_packet_finalize(rx_ph);
3465 			VERIFY(error == 0);
3466 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3467 			VERIFY(error == 0);
3468 
3469 			STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
3470 			STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
3471 
3472 			rx_ring_stats.kcrsi_slots_transferred++;
3473 			rx_ring_stats.kcrsi_bytes_transferred += length;
3474 
3475 			rx_pslot = rx_slot;
3476 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3477 		}
3478 
3479 		if (rx_pslot) {
3480 			kern_channel_advance_slot(rx_ring, rx_pslot);
3481 			kern_channel_increment_ring_net_stats(rx_ring, pcb->utun_ifp, &rx_ring_stats);
3482 		}
3483 
3484 		if (tx_chain_ph != 0) {
3485 			kern_pbufpool_free_chain(tx_pp, tx_chain_ph);
3486 		}
3487 
3488 		if (tx_pslot) {
3489 			kern_channel_advance_slot(tx_ring, tx_pslot);
3490 			kern_channel_increment_ring_net_stats(tx_ring, pcb->utun_ifp, &tx_ring_stats);
3491 			(void)kern_channel_reclaim(tx_ring);
3492 		}
3493 
3494 		/* just like utun_ctl_rcvd(), always reenable output */
3495 		errno_t error = ifnet_enable_output(pcb->utun_ifp);
3496 		if (error != 0) {
3497 			os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
3498 		}
3499 
3500 		// Unlock first, then exit ring
3501 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3502 
3503 		if (tx_pslot != NULL) {
3504 			kern_channel_notify(tx_ring, 0);
3505 		}
3506 		kr_exit(tx_ring);
3507 	} else {
3508 		lck_rw_unlock_shared(&pcb->utun_pcb_lock);
3509 
3510 		uint32_t mb_cnt = 0;
3511 		uint32_t mb_len = 0;
3512 		struct mbuf *mb_head = NULL;
3513 		struct mbuf *mb_tail = NULL;
3514 
3515 		if (ifnet_dequeue_multi(pcb->utun_ifp, avail, &mb_head,
3516 		    &mb_tail, &mb_cnt, &mb_len) != 0) {
3517 			utun_data_move_end(pcb);
3518 			return 0;
3519 		}
3520 		VERIFY(mb_cnt <= avail);
3521 
3522 		struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
3523 		VERIFY(rx_pp != NULL);
3524 		kern_channel_slot_t rx_pslot = NULL;
3525 		kern_channel_slot_t rx_slot = kern_channel_get_next_slot(rx_ring, NULL, NULL);
3526 		while (rx_slot) {
3527 			size_t length = 0;
3528 			mbuf_t data = NULL;
3529 			if ((data = mb_head) == NULL) {
3530 				VERIFY(mb_cnt == 0);
3531 				break;
3532 			}
3533 			mb_head = mbuf_nextpkt(mb_head);
3534 			mbuf_setnextpkt(data, NULL);
3535 			VERIFY(mb_cnt != 0);
3536 			--mb_cnt;
3537 			length = mbuf_pkthdr_len(data);
3538 			if (length < UTUN_HEADER_SIZE(pcb) ||
3539 			    length > pcb->utun_slot_size ||
3540 			    (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT)) {
3541 				/* flush data */
3542 				mbuf_freem(data);
3543 				continue;
3544 			}
3545 			bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0);
3546 
3547 			// Allocate rx packet
3548 			kern_packet_t rx_ph = 0;
3549 			errno_t error = kern_pbufpool_alloc_nosleep(rx_pp, 1, &rx_ph);
3550 			if (__improbable(error != 0)) {
3551 				os_log_error(OS_LOG_DEFAULT, "utun_kpipe_sync_rx %s: failed to allocate packet\n",
3552 				    pcb->utun_ifp->if_xname);
3553 				break;
3554 			}
3555 
3556 			/*
3557 			 * The ABI requires the protocol in network byte order
3558 			 */
3559 			*(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data));
3560 
3561 			// Fillout rx packet
3562 			kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
3563 			VERIFY(rx_buf != NULL);
3564 			void *rx_baddr = kern_buflet_get_data_address(rx_buf);
3565 			VERIFY(rx_baddr != NULL);
3566 
3567 			// Copy-in data from mbuf to buflet
3568 			mbuf_copydata(data, 0, length, (void *)rx_baddr);
3569 			kern_packet_clear_flow_uuid(rx_ph);     // Zero flow id
3570 
3571 			// Finalize and attach the packet
3572 			error = kern_buflet_set_data_offset(rx_buf, 0);
3573 			VERIFY(error == 0);
3574 			error = kern_buflet_set_data_length(rx_buf, length);
3575 			VERIFY(error == 0);
3576 			error = kern_packet_finalize(rx_ph);
3577 			VERIFY(error == 0);
3578 			error = kern_channel_slot_attach_packet(rx_ring, rx_slot, rx_ph);
3579 			VERIFY(error == 0);
3580 
3581 			rx_ring_stats.kcrsi_slots_transferred++;
3582 			rx_ring_stats.kcrsi_bytes_transferred += length;
3583 
3584 			if (!pcb->utun_ext_ifdata_stats) {
3585 				ifnet_stat_increment_out(pcb->utun_ifp, 1, length, 0);
3586 			}
3587 
3588 			mbuf_freem(data);
3589 
3590 			rx_pslot = rx_slot;
3591 			rx_slot = kern_channel_get_next_slot(rx_ring, rx_slot, NULL);
3592 		}
3593 		if (rx_pslot) {
3594 			kern_channel_advance_slot(rx_ring, rx_pslot);
3595 			kern_channel_increment_ring_stats(rx_ring, &rx_ring_stats);
3596 		}
3597 		if (mb_head != NULL) {
3598 			VERIFY(mb_cnt != 0);
3599 			mbuf_freem_list(mb_head);
3600 		}
3601 	}
3602 
3603 	utun_data_move_end(pcb);
3604 	return 0;
3605 }
3606 
3607 #endif // UTUN_NEXUS
3608 
3609 
3610 /*
3611  * These are place holders until coreTLS kext stops calling them
3612  */
3613 errno_t utun_ctl_register_dtls(void *reg);
3614 int utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family);
3615 void utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb);
3616 
3617 errno_t
utun_ctl_register_dtls(void * reg)3618 utun_ctl_register_dtls(void *reg)
3619 {
3620 #pragma unused(reg)
3621 	return 0;
3622 }
3623 
3624 int
utun_pkt_dtls_input(struct utun_pcb * pcb,mbuf_t * pkt,protocol_family_t family)3625 utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family)
3626 {
3627 #pragma unused(pcb)
3628 #pragma unused(pkt)
3629 #pragma unused(family)
3630 	return 0;
3631 }
3632 
3633 void
utun_ctl_disable_crypto_dtls(struct utun_pcb * pcb)3634 utun_ctl_disable_crypto_dtls(struct utun_pcb   *pcb)
3635 {
3636 #pragma unused(pcb)
3637 }
3638 
3639 #if UTUN_NEXUS
3640 static boolean_t
utun_data_move_begin(struct utun_pcb * pcb)3641 utun_data_move_begin(struct utun_pcb *pcb)
3642 {
3643 	bool data_path_ready = false;
3644 
3645 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3646 	if ((data_path_ready = UTUN_IS_DATA_PATH_READY(pcb))) {
3647 		pcb->utun_pcb_data_move++;
3648 	}
3649 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3650 
3651 	return data_path_ready;
3652 }
3653 
3654 static void
utun_data_move_end(struct utun_pcb * pcb)3655 utun_data_move_end(struct utun_pcb *pcb)
3656 {
3657 	lck_mtx_lock_spin(&pcb->utun_pcb_data_move_lock);
3658 	VERIFY(pcb->utun_pcb_data_move > 0);
3659 	/*
3660 	 * if there's no more thread moving data, wakeup any
3661 	 * drainers that are blocked waiting for this.
3662 	 */
3663 	if (--pcb->utun_pcb_data_move == 0 && pcb->utun_pcb_drainers > 0) {
3664 		wakeup(&(pcb->utun_pcb_data_move));
3665 	}
3666 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3667 }
3668 
3669 static void
utun_data_move_drain(struct utun_pcb * pcb)3670 utun_data_move_drain(struct utun_pcb *pcb)
3671 {
3672 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3673 	/* data path must already be marked as not ready */
3674 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3675 	pcb->utun_pcb_drainers++;
3676 	while (pcb->utun_pcb_data_move != 0) {
3677 		(void) msleep(&(pcb->utun_pcb_data_move),
3678 		    &pcb->utun_pcb_data_move_lock, (PZERO - 1), __func__, NULL);
3679 	}
3680 	VERIFY(!UTUN_IS_DATA_PATH_READY(pcb));
3681 	VERIFY(pcb->utun_pcb_drainers > 0);
3682 	pcb->utun_pcb_drainers--;
3683 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3684 }
3685 
3686 static void
utun_wait_data_move_drain(struct utun_pcb * pcb)3687 utun_wait_data_move_drain(struct utun_pcb *pcb)
3688 {
3689 	/*
3690 	 * Mark the data path as not usable.
3691 	 */
3692 	lck_mtx_lock(&pcb->utun_pcb_data_move_lock);
3693 	UTUN_CLR_DATA_PATH_READY(pcb);
3694 	lck_mtx_unlock(&pcb->utun_pcb_data_move_lock);
3695 
3696 	/* Wait until all threads in the data path are done. */
3697 	utun_data_move_drain(pcb);
3698 }
3699 #endif // UTUN_NEXUS
3700