xref: /xnu-11417.140.69/bsd/netinet/flow_divert.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2012-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <sys/file_internal.h>
46 #include <sys/kauth.h>
47 #include <libkern/tree.h>
48 #include <kern/locks.h>
49 #include <kern/debug.h>
50 #include <kern/task.h>
51 #include <mach/task_info.h>
52 #include <net/if_var.h>
53 #include <net/route.h>
54 #include <net/flowhash.h>
55 #include <net/ntstat.h>
56 #include <net/content_filter.h>
57 #include <net/necp.h>
58 #include <netinet/in.h>
59 #include <netinet/in_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/flow_divert.h>
64 #include <netinet/flow_divert_proto.h>
65 #include <netinet6/in6_pcb.h>
66 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
68 #include <libkern/crypto/sha1.h>
69 #include <libkern/crypto/crypto_internal.h>
70 #include <os/log.h>
71 #include <corecrypto/cc.h>
72 #include <net/sockaddr_utils.h>
73 #if CONTENT_FILTER
74 #include <net/content_filter.h>
75 #endif /* CONTENT_FILTER */
76 
77 #define FLOW_DIVERT_CONNECT_STARTED             0x00000001
78 #define FLOW_DIVERT_READ_CLOSED                 0x00000002
79 #define FLOW_DIVERT_WRITE_CLOSED                0x00000004
80 #define FLOW_DIVERT_TUNNEL_RD_CLOSED            0x00000008
81 #define FLOW_DIVERT_TUNNEL_WR_CLOSED            0x00000010
82 #define FLOW_DIVERT_HAS_HMAC                    0x00000040
83 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED          0x00000080
84 #define FLOW_DIVERT_IMPLICIT_CONNECT            0x00000100
85 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR          0x00000200
86 #define FLOW_DIVERT_HAS_TOKEN                   0x00000400
87 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR       0x00000800
88 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT         0x00001000
89 
90 #define FDLOG(level, pcb, format, ...) \
91 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
92 
93 #define FDLOG0(level, pcb, msg) \
94 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
95 
96 #define FDRETAIN(pcb)                   if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
97 #define FDRELEASE(pcb)                                                                                                          \
98 	do {                                                                                                                                    \
99 	        if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) {       \
100 	                flow_divert_pcb_destroy(pcb);                                                                   \
101 	        }                                                                                                                                       \
102 	} while (0)
103 
104 #define FDGRP_RETAIN(grp)  if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
105 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
106 
107 #define FDLOCK(pcb)                             lck_mtx_lock(&(pcb)->mtx)
108 #define FDUNLOCK(pcb)                           lck_mtx_unlock(&(pcb)->mtx)
109 
110 #define FD_CTL_SENDBUFF_SIZE                    (128 * 1024)
111 
112 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED           0
113 
114 #define GROUP_COUNT_MAX                         31
115 #define FLOW_DIVERT_MAX_NAME_SIZE               4096
116 #define FLOW_DIVERT_MAX_KEY_SIZE                1024
117 #define FLOW_DIVERT_MAX_TRIE_MEMORY             (1024 * 1024)
118 
119 #define CHILD_MAP_SIZE                  256
120 #define NULL_TRIE_IDX                   0xffff
121 #define TRIE_NODE(t, i)                 ((t)->nodes[(i)])
122 #define TRIE_CHILD(t, i, b)             (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
123 #define TRIE_BYTE(t, i)                 ((t)->bytes[(i)])
124 
125 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
126 
127 static struct flow_divert_pcb           nil_pcb;
128 
129 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
130 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
131 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
132     &flow_divert_mtx_attr);
133 
134 static TAILQ_HEAD(_flow_divert_group_list, flow_divert_group) g_flow_divert_in_process_group_list;
135 
136 static struct flow_divert_group         **g_flow_divert_groups __indexable = NULL;
137 static uint32_t                         g_active_group_count    = 0;
138 
139 static  errno_t                         g_init_result           = 0;
140 
141 static  kern_ctl_ref                    g_flow_divert_kctl_ref  = NULL;
142 
143 static struct protosw                   g_flow_divert_in_protosw;
144 static struct pr_usrreqs                g_flow_divert_in_usrreqs;
145 static struct protosw                   g_flow_divert_in_udp_protosw;
146 static struct pr_usrreqs                g_flow_divert_in_udp_usrreqs;
147 static struct ip6protosw                g_flow_divert_in6_protosw;
148 static struct pr_usrreqs                g_flow_divert_in6_usrreqs;
149 static struct ip6protosw                g_flow_divert_in6_udp_protosw;
150 static struct pr_usrreqs                g_flow_divert_in6_udp_usrreqs;
151 
152 static struct protosw                   *g_tcp_protosw          = NULL;
153 static struct ip6protosw                *g_tcp6_protosw         = NULL;
154 static struct protosw                   *g_udp_protosw          = NULL;
155 static struct ip6protosw                *g_udp6_protosw         = NULL;
156 
157 static KALLOC_TYPE_DEFINE(flow_divert_group_zone, struct flow_divert_group,
158     NET_KT_DEFAULT);
159 static KALLOC_TYPE_DEFINE(flow_divert_pcb_zone, struct flow_divert_pcb,
160     NET_KT_DEFAULT);
161 
162 static errno_t
163 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
164 
165 static boolean_t
166 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
167 
168 static int
169 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr);
170 
171 struct sockaddr *
172 flow_divert_get_buffered_target_address(mbuf_ref_t buffer);
173 
174 static void
175 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed);
176 
177 static void flow_divert_group_destroy(struct flow_divert_group *group);
178 
179 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)180 flow_divert_syslog_type_to_oslog_type(int syslog_type)
181 {
182 	switch (syslog_type) {
183 	case LOG_ERR: return OS_LOG_TYPE_ERROR;
184 	case LOG_INFO: return OS_LOG_TYPE_INFO;
185 	case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
186 	default: return OS_LOG_TYPE_DEFAULT;
187 	}
188 }
189 
190 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)191 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
192 {
193 	return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
194 }
195 
196 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
197 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
198 
199 static const char *
flow_divert_packet_type2str(uint8_t packet_type)200 flow_divert_packet_type2str(uint8_t packet_type)
201 {
202 	switch (packet_type) {
203 	case FLOW_DIVERT_PKT_CONNECT:
204 		return "connect";
205 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
206 		return "connect result";
207 	case FLOW_DIVERT_PKT_DATA:
208 		return "data";
209 	case FLOW_DIVERT_PKT_CLOSE:
210 		return "close";
211 	case FLOW_DIVERT_PKT_READ_NOTIFY:
212 		return "read notification";
213 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
214 		return "properties update";
215 	case FLOW_DIVERT_PKT_APP_MAP_CREATE:
216 		return "app map create";
217 	default:
218 		return "unknown";
219 	}
220 }
221 
222 static inline void
flow_divert_lock_socket(struct socket * so,struct flow_divert_pcb * fd_cb)223 flow_divert_lock_socket(struct socket *so, struct flow_divert_pcb *fd_cb)
224 {
225 	socket_lock(so, 0);
226 	fd_cb->plugin_locked = true;
227 }
228 
229 static inline void
flow_divert_unlock_socket(struct socket * so,struct flow_divert_pcb * fd_cb)230 flow_divert_unlock_socket(struct socket *so, struct flow_divert_pcb *fd_cb)
231 {
232 	fd_cb->plugin_locked = false;
233 	socket_unlock(so, 0);
234 }
235 
236 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)237 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
238 {
239 	struct flow_divert_pcb  key_item;
240 	struct flow_divert_pcb  *fd_cb          = NULL;
241 
242 	key_item.hash = hash;
243 
244 	lck_rw_lock_shared(&group->lck);
245 	fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
246 	FDRETAIN(fd_cb);
247 	lck_rw_done(&group->lck);
248 
249 	return fd_cb;
250 }
251 
252 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)253 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
254 {
255 	struct flow_divert_group *group = NULL;
256 	lck_rw_lock_shared(&g_flow_divert_group_lck);
257 	if (g_active_group_count == 0) {
258 		if (fd_cb != NULL) {
259 			FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
260 		}
261 	} else if (ctl_unit == 0 || (ctl_unit >= GROUP_COUNT_MAX && ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
262 		FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
263 	} else if (ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
264 		if (g_flow_divert_groups == NULL) {
265 			if (fd_cb != NULL) {
266 				FDLOG0(LOG_ERR, fd_cb, "No active non-in-process groups, flow divert cannot be used for this socket");
267 			}
268 		} else {
269 			group = g_flow_divert_groups[ctl_unit];
270 			if (group == NULL) {
271 				if (fd_cb != NULL) {
272 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
273 				}
274 			} else {
275 				FDGRP_RETAIN(group);
276 			}
277 		}
278 	} else {
279 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
280 			if (fd_cb != NULL) {
281 				FDLOG0(LOG_ERR, fd_cb, "No active in-process groups, flow divert cannot be used for this socket");
282 			}
283 		} else {
284 			struct flow_divert_group *group_cursor = NULL;
285 			TAILQ_FOREACH(group_cursor, &g_flow_divert_in_process_group_list, chain) {
286 				if (group_cursor->ctl_unit == ctl_unit) {
287 					group = group_cursor;
288 					break;
289 				}
290 			}
291 			if (group == NULL) {
292 				if (fd_cb != NULL) {
293 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u not found, flow divert cannot be used for this socket", ctl_unit);
294 				}
295 			} else if (fd_cb != NULL &&
296 			    (fd_cb->so == NULL ||
297 			    group_cursor->in_process_pid != fd_cb->so->last_pid)) {
298 				FDLOG(LOG_ERR, fd_cb, "Cannot access group for control unit %u, mismatched PID (%u != %u)",
299 				    ctl_unit, group_cursor->in_process_pid, fd_cb->so ? fd_cb->so->last_pid : 0);
300 				group = NULL;
301 			} else {
302 				FDGRP_RETAIN(group);
303 			}
304 		}
305 	}
306 	lck_rw_done(&g_flow_divert_group_lck);
307 	return group;
308 }
309 
310 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)311 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
312 {
313 	int error = 0;
314 	lck_rw_lock_exclusive(&group->lck);
315 	if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
316 		if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
317 			fd_cb->group = group;
318 			fd_cb->control_group_unit = group->ctl_unit;
319 			FDRETAIN(fd_cb); /* The group now has a reference */
320 		} else {
321 			FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
322 			error = EEXIST;
323 		}
324 	} else {
325 		FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
326 		error = ENOENT;
327 	}
328 	lck_rw_done(&group->lck);
329 	return error;
330 }
331 
332 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)333 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
334 {
335 	errno_t error = 0;
336 	struct flow_divert_group *group = NULL;
337 	static uint32_t g_nextkey = 1;
338 	static uint32_t g_hash_seed = 0;
339 	int try_count = 0;
340 
341 	group = flow_divert_group_lookup(ctl_unit, fd_cb);
342 	if (group == NULL) {
343 		return ENOENT;
344 	}
345 
346 	do {
347 		uint32_t key[2];
348 		uint32_t idx;
349 
350 		key[0] = g_nextkey++;
351 		key[1] = RandomULong();
352 
353 		if (g_hash_seed == 0) {
354 			g_hash_seed = RandomULong();
355 		}
356 
357 		error = 0;
358 		fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
359 
360 		for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
361 			if (idx == ctl_unit) {
362 				continue;
363 			}
364 			struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
365 			if (curr_group != NULL) {
366 				lck_rw_lock_shared(&curr_group->lck);
367 				if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
368 					error = EEXIST;
369 				}
370 				lck_rw_done(&curr_group->lck);
371 				FDGRP_RELEASE(curr_group);
372 			}
373 		}
374 
375 		if (error == 0) {
376 			error = flow_divert_pcb_insert(fd_cb, group);
377 		}
378 	} while (error == EEXIST && try_count++ < 3);
379 
380 	if (error == EEXIST) {
381 		FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
382 		fd_cb->hash = 0;
383 	}
384 
385 	FDGRP_RELEASE(group);
386 	return error;
387 }
388 
389 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)390 flow_divert_pcb_create(socket_t so)
391 {
392 	struct flow_divert_pcb  *new_pcb = NULL;
393 
394 	new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
395 	lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
396 	new_pcb->so = so;
397 	new_pcb->log_level = nil_pcb.log_level;
398 
399 	FDRETAIN(new_pcb);      /* Represents the socket's reference */
400 
401 	return new_pcb;
402 }
403 
404 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)405 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
406 {
407 	FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %llu, tunnel tx %llu, tunnel rx %llu",
408 	    fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
409 
410 	if (fd_cb->connect_token != NULL) {
411 		mbuf_freem(fd_cb->connect_token);
412 	}
413 	if (fd_cb->connect_packet != NULL) {
414 		mbuf_freem(fd_cb->connect_packet);
415 	}
416 	if (fd_cb->app_data != NULL) {
417 		kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
418 	}
419 	if (fd_cb->original_remote_endpoint != NULL) {
420 		free_sockaddr(fd_cb->original_remote_endpoint);
421 	}
422 	zfree(flow_divert_pcb_zone, fd_cb);
423 }
424 
425 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)426 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
427 {
428 	if (fd_cb->group != NULL) {
429 		struct flow_divert_group *group = fd_cb->group;
430 		lck_rw_lock_exclusive(&group->lck);
431 		FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
432 		RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
433 		fd_cb->group = NULL;
434 		FDRELEASE(fd_cb);                               /* Release the group's reference */
435 		lck_rw_done(&group->lck);
436 	}
437 }
438 
439 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_ref_t * packet)440 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_ref_t *packet)
441 {
442 	struct flow_divert_packet_header        hdr;
443 	int                                     error           = 0;
444 
445 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
446 	if (error) {
447 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
448 		return error;
449 	}
450 
451 	hdr.packet_type = packet_type;
452 	hdr.conn_id = htonl(fd_cb->hash);
453 
454 	/* Lay down the header */
455 	error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
456 	if (error) {
457 		FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
458 		mbuf_freem(*packet);
459 		*packet = NULL;
460 		return error;
461 	}
462 
463 	return 0;
464 }
465 
466 static int
flow_divert_packet_append_tlv(mbuf_ref_t packet,uint8_t type,uint32_t length,const void __sized_by (length)* value)467 flow_divert_packet_append_tlv(mbuf_ref_t packet, uint8_t type, uint32_t length, const void __sized_by(length) *value)
468 {
469 	uint32_t        net_length      = htonl(length);
470 	int                     error           = 0;
471 
472 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
473 	if (error) {
474 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
475 		return error;
476 	}
477 
478 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
479 	if (error) {
480 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
481 		return error;
482 	}
483 
484 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
485 	if (error) {
486 		FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
487 		return error;
488 	}
489 
490 	return error;
491 }
492 
493 static int
flow_divert_packet_find_tlv(mbuf_ref_t packet,int offset,uint8_t type,int * err,int next)494 flow_divert_packet_find_tlv(mbuf_ref_t packet, int offset, uint8_t type, int *err, int next)
495 {
496 	size_t      cursor      = offset;
497 	int         error       = 0;
498 	uint32_t    curr_length = 0;
499 	uint8_t     curr_type   = 0;
500 
501 	*err = 0;
502 
503 	do {
504 		if (!next) {
505 			error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
506 			if (error) {
507 				*err = ENOENT;
508 				return -1;
509 			}
510 		} else {
511 			next = 0;
512 			curr_type = FLOW_DIVERT_TLV_NIL;
513 		}
514 
515 		if (curr_type != type) {
516 			cursor += sizeof(curr_type);
517 			error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
518 			if (error) {
519 				*err = error;
520 				return -1;
521 			}
522 
523 			cursor += (sizeof(curr_length) + ntohl(curr_length));
524 		}
525 	} while (curr_type != type);
526 
527 	return (int)cursor;
528 }
529 
530 static int
flow_divert_packet_get_tlv(mbuf_ref_t packet,int offset,uint8_t type,size_t buff_len,void * buff __sized_by (buff_len),uint32_t * val_size)531 flow_divert_packet_get_tlv(mbuf_ref_t packet, int offset, uint8_t type, size_t buff_len, void *buff __sized_by(buff_len), uint32_t *val_size)
532 {
533 	int         error      = 0;
534 	uint32_t    length     = 0;
535 	int         tlv_offset = 0;
536 
537 	tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
538 	if (tlv_offset < 0) {
539 		return error;
540 	}
541 
542 	error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
543 	if (error) {
544 		return error;
545 	}
546 
547 	length = ntohl(length);
548 
549 	uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
550 
551 	if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
552 		FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
553 		return EINVAL;
554 	}
555 
556 	if (val_size != NULL) {
557 		*val_size = length;
558 	}
559 
560 	if (buff != NULL && buff_len > 0) {
561 		memset(buff, 0, buff_len);
562 		size_t to_copy = (length < buff_len) ? length : buff_len;
563 		error = mbuf_copydata(packet, data_offset, to_copy, buff);
564 		if (error) {
565 			return error;
566 		}
567 	}
568 
569 	return 0;
570 }
571 
572 static int
flow_divert_packet_compute_hmac(mbuf_ref_t packet,struct flow_divert_group * group,uint8_t * hmac)573 flow_divert_packet_compute_hmac(mbuf_ref_t packet, struct flow_divert_group *group, uint8_t *hmac)
574 {
575 	mbuf_ref_t  curr_mbuf       = packet;
576 
577 	if (g_crypto_funcs == NULL || group->token_key == NULL) {
578 		return ENOPROTOOPT;
579 	}
580 
581 	cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
582 	g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
583 
584 	while (curr_mbuf != NULL) {
585 		g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mtod(curr_mbuf, void *));
586 		curr_mbuf = mbuf_next(curr_mbuf);
587 	}
588 
589 	g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
590 
591 	return 0;
592 }
593 
594 static int
flow_divert_packet_verify_hmac(mbuf_ref_t packet,uint32_t ctl_unit)595 flow_divert_packet_verify_hmac(mbuf_ref_t packet, uint32_t ctl_unit)
596 {
597 	int error = 0;
598 	struct flow_divert_group *group = NULL;
599 	int hmac_offset;
600 	uint8_t packet_hmac[SHA_DIGEST_LENGTH];
601 	uint8_t computed_hmac[SHA_DIGEST_LENGTH];
602 	mbuf_ref_t tail;
603 
604 	group = flow_divert_group_lookup(ctl_unit, NULL);
605 	if (group == NULL) {
606 		FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
607 		return ENOPROTOOPT;
608 	}
609 
610 	lck_rw_lock_shared(&group->lck);
611 
612 	if (group->token_key == NULL) {
613 		error = ENOPROTOOPT;
614 		goto done;
615 	}
616 
617 	hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
618 	if (hmac_offset < 0) {
619 		goto done;
620 	}
621 
622 	error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
623 	if (error) {
624 		goto done;
625 	}
626 
627 	/* Chop off the HMAC TLV */
628 	error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
629 	if (error) {
630 		goto done;
631 	}
632 
633 	mbuf_free(tail);
634 
635 	error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
636 	if (error) {
637 		goto done;
638 	}
639 
640 	if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
641 		FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
642 		error = EINVAL;
643 		goto done;
644 	}
645 
646 done:
647 	if (group != NULL) {
648 		lck_rw_done(&group->lck);
649 		FDGRP_RELEASE(group);
650 	}
651 	return error;
652 }
653 
654 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)655 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
656 {
657 	struct inpcb *inp = NULL;
658 	struct ifnet *ifp = NULL;
659 	stats_functional_type ifnet_count_type = stats_functional_type_none;
660 
661 	inp = sotoinpcb(fd_cb->so);
662 	if (inp == NULL) {
663 		return;
664 	}
665 
666 	if (inp->inp_vflag & INP_IPV4) {
667 		ifp = inp->inp_last_outifp;
668 	} else if (inp->inp_vflag & INP_IPV6) {
669 		ifp = inp->in6p_last_outifp;
670 	}
671 	if (ifp != NULL) {
672 		ifnet_count_type = IFNET_COUNT_TYPE(ifp);
673 	}
674 
675 	if (send) {
676 		INP_ADD_STAT(inp, ifnet_count_type, txpackets, 1);
677 		INP_ADD_STAT(inp, ifnet_count_type, txbytes, data_len);
678 	} else {
679 		INP_ADD_STAT(inp, ifnet_count_type, rxpackets, 1);
680 		INP_ADD_STAT(inp, ifnet_count_type, rxbytes, data_len);
681 	}
682 	inp_set_activity_bitmap(inp);
683 }
684 
685 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)686 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
687 {
688 	struct inpcb *inp = sotoinpcb(fd_cb->so);
689 	if (INP_NO_CELLULAR(inp)) {
690 		struct ifnet *ifp = NULL;
691 		if (inp->inp_vflag & INP_IPV4) {
692 			ifp = inp->inp_last_outifp;
693 		} else if (inp->inp_vflag & INP_IPV6) {
694 			ifp = inp->in6p_last_outifp;
695 		}
696 		if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
697 			FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
698 			return EHOSTUNREACH;
699 		}
700 	}
701 	return 0;
702 }
703 
704 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)705 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
706 {
707 	struct inpcb *inp = sotoinpcb(fd_cb->so);
708 	if (INP_NO_EXPENSIVE(inp)) {
709 		struct ifnet *ifp = NULL;
710 		if (inp->inp_vflag & INP_IPV4) {
711 			ifp = inp->inp_last_outifp;
712 		} else if (inp->inp_vflag & INP_IPV6) {
713 			ifp = inp->in6p_last_outifp;
714 		}
715 		if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
716 			FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
717 			return EHOSTUNREACH;
718 		}
719 	}
720 	return 0;
721 }
722 
723 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)724 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
725 {
726 	struct inpcb *inp = sotoinpcb(fd_cb->so);
727 	if (INP_NO_CONSTRAINED(inp)) {
728 		struct ifnet *ifp = NULL;
729 		if (inp->inp_vflag & INP_IPV4) {
730 			ifp = inp->inp_last_outifp;
731 		} else if (inp->inp_vflag & INP_IPV6) {
732 			ifp = inp->in6p_last_outifp;
733 		}
734 		if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
735 			FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
736 			return EHOSTUNREACH;
737 		}
738 	}
739 	return 0;
740 }
741 
742 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,bool tunnel,bool flush_snd)743 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, bool tunnel, bool flush_snd)
744 {
745 	if (how != SHUT_RD) {
746 		fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
747 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
748 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
749 			if (flush_snd) {
750 				/* If the tunnel is not accepting writes any more, then flush the send buffer */
751 				sbflush(&fd_cb->so->so_snd);
752 			}
753 		}
754 	}
755 	if (how != SHUT_WR) {
756 		fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
757 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
758 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
759 		}
760 	}
761 }
762 
763 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)764 trie_node_alloc(struct flow_divert_trie *trie)
765 {
766 	if (trie->nodes_free_next < trie->nodes_count) {
767 		uint16_t node_idx = trie->nodes_free_next++;
768 		TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
769 		return node_idx;
770 	} else {
771 		return NULL_TRIE_IDX;
772 	}
773 }
774 
775 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)776 trie_child_map_alloc(struct flow_divert_trie *trie)
777 {
778 	if (trie->child_maps_free_next < trie->child_maps_count) {
779 		return trie->child_maps_free_next++;
780 	} else {
781 		return NULL_TRIE_IDX;
782 	}
783 }
784 
785 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)786 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
787 {
788 	uint16_t start = trie->bytes_free_next;
789 	if (start + bytes_size <= trie->bytes_count) {
790 		if (start != bytes_idx) {
791 			memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
792 		}
793 		trie->bytes_free_next += bytes_size;
794 		return start;
795 	} else {
796 		return NULL_TRIE_IDX;
797 	}
798 }
799 
800 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)801 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
802 {
803 	uint16_t current = trie->root;
804 	uint16_t child = trie->root;
805 	uint16_t string_end = string_start + (uint16_t)string_len;
806 	uint16_t string_idx = string_start;
807 	uint16_t string_remainder = (uint16_t)string_len;
808 
809 	while (child != NULL_TRIE_IDX) {
810 		uint16_t parent = current;
811 		uint16_t node_idx;
812 		uint16_t current_end;
813 
814 		current = child;
815 		child = NULL_TRIE_IDX;
816 
817 		current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
818 
819 		for (node_idx = TRIE_NODE(trie, current).start;
820 		    node_idx < current_end &&
821 		    string_idx < string_end &&
822 		    TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
823 		    node_idx++, string_idx++) {
824 			;
825 		}
826 
827 		string_remainder = string_end - string_idx;
828 
829 		if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
830 			/*
831 			 * We did not reach the end of the current node's string.
832 			 * We need to split the current node into two:
833 			 *   1. A new node that contains the prefix of the node that matches
834 			 *      the prefix of the string being inserted.
835 			 *   2. The current node modified to point to the remainder
836 			 *      of the current node's string.
837 			 */
838 			uint16_t prefix = trie_node_alloc(trie);
839 			if (prefix == NULL_TRIE_IDX) {
840 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
841 				return NULL_TRIE_IDX;
842 			}
843 
844 			/*
845 			 * Prefix points to the portion of the current nodes's string that has matched
846 			 * the input string thus far.
847 			 */
848 			TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
849 			TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
850 
851 			/*
852 			 * Prefix has the current node as the child corresponding to the first byte
853 			 * after the split.
854 			 */
855 			TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
856 			if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
857 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
858 				return NULL_TRIE_IDX;
859 			}
860 			TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
861 
862 			/* Parent has the prefix as the child correspoding to the first byte in the prefix */
863 			TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
864 
865 			/* Current node is adjusted to point to the remainder */
866 			TRIE_NODE(trie, current).start = node_idx;
867 			TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
868 
869 			/* We want to insert the new leaf (if any) as a child of the prefix */
870 			current = prefix;
871 		}
872 
873 		if (string_remainder > 0) {
874 			/*
875 			 * We still have bytes in the string that have not been matched yet.
876 			 * If the current node has children, iterate to the child corresponding
877 			 * to the next byte in the string.
878 			 */
879 			if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
880 				child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
881 			}
882 		}
883 	} /* while (child != NULL_TRIE_IDX) */
884 
885 	if (string_remainder > 0) {
886 		/* Add a new leaf containing the remainder of the string */
887 		uint16_t leaf = trie_node_alloc(trie);
888 		if (leaf == NULL_TRIE_IDX) {
889 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
890 			return NULL_TRIE_IDX;
891 		}
892 
893 		TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
894 		if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
895 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
896 			return NULL_TRIE_IDX;
897 		}
898 		TRIE_NODE(trie, leaf).length = string_remainder;
899 
900 		/* Set the new leaf as the child of the current node */
901 		if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
902 			TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
903 			if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
904 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
905 				return NULL_TRIE_IDX;
906 			}
907 		}
908 		TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
909 		current = leaf;
910 	} /* else duplicate or this string is a prefix of one of the existing strings */
911 
912 	return current;
913 }
914 
915 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
916 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes __sized_by (string_bytes_count),__unused size_t string_bytes_count)917 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes __sized_by(string_bytes_count), __unused size_t string_bytes_count)
918 {
919 	uint16_t current = trie->root;
920 	uint16_t string_idx = 0;
921 
922 	while (current != NULL_TRIE_IDX) {
923 		uint16_t next = NULL_TRIE_IDX;
924 		uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
925 		uint16_t node_idx;
926 
927 		for (node_idx = TRIE_NODE(trie, current).start;
928 		    node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
929 		    node_idx++, string_idx++) {
930 			;
931 		}
932 
933 		if (node_idx == node_end) {
934 			if (string_bytes[string_idx] == '\0') {
935 				return current; /* Got an exact match */
936 			} else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
937 			    0 == strlcmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
938 				return current; /* Got an apple webclip id prefix match */
939 			} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
940 				next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
941 			}
942 		}
943 		current = next;
944 	}
945 
946 	return NULL_TRIE_IDX;
947 }
948 
949 struct uuid_search_info {
950 	uuid_t      target_uuid;
951 	char        *found_signing_id __sized_by(found_signing_id_size);
952 	boolean_t   found_multiple_signing_ids;
953 	proc_t      found_proc;
954 	size_t      found_signing_id_size;
955 };
956 
957 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)958 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
959 {
960 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
961 	int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
962 
963 	if (info->found_signing_id != NULL) {
964 		if (!info->found_multiple_signing_ids) {
965 			/* All processes that were found had the same signing identifier, so just claim this first one and be done. */
966 			info->found_proc = p;
967 			result = PROC_CLAIMED_DONE;
968 		} else {
969 			uuid_string_t uuid_str;
970 			uuid_unparse(info->target_uuid, uuid_str);
971 			FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
972 		}
973 		kfree_data_sized_by(info->found_signing_id, info->found_signing_id_size);
974 	}
975 
976 	if (result == PROC_RETURNED_DONE) {
977 		uuid_string_t uuid_str;
978 		uuid_unparse(info->target_uuid, uuid_str);
979 		FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
980 	}
981 
982 	return result;
983 }
984 
985 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)986 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
987 {
988 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
989 	int include = 0;
990 
991 	if (info->found_multiple_signing_ids) {
992 		return include;
993 	}
994 
995 	const unsigned char * p_uuid = proc_executableuuid_addr(p);
996 	include = (uuid_compare(p_uuid, info->target_uuid) == 0);
997 	if (include) {
998 		const char *signing_id __null_terminated = cs_identity_get(p);
999 		if (signing_id != NULL) {
1000 			FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
1001 			size_t signing_id_size = strlen(signing_id) + 1;
1002 			if (info->found_signing_id == NULL) {
1003 				info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
1004 				info->found_signing_id_size = signing_id_size;
1005 				strlcpy(info->found_signing_id, signing_id, signing_id_size);
1006 			} else if (strlcmp(info->found_signing_id, signing_id, info->found_signing_id_size)) {
1007 				info->found_multiple_signing_ids = TRUE;
1008 			}
1009 		} else {
1010 			info->found_multiple_signing_ids = TRUE;
1011 		}
1012 		include = !info->found_multiple_signing_ids;
1013 	}
1014 
1015 	return include;
1016 }
1017 
1018 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)1019 flow_divert_find_proc_by_uuid(uuid_t uuid)
1020 {
1021 	struct uuid_search_info info;
1022 
1023 	if (LOG_INFO <= nil_pcb.log_level) {
1024 		uuid_string_t uuid_str;
1025 		uuid_unparse(uuid, uuid_str);
1026 		FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
1027 	}
1028 
1029 	memset(&info, 0, sizeof(info));
1030 	info.found_proc = PROC_NULL;
1031 	uuid_copy(info.target_uuid, uuid);
1032 
1033 	proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
1034 
1035 	return info.found_proc;
1036 }
1037 
1038 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet,bool is_effective)1039 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet, bool is_effective)
1040 {
1041 	int error = 0;
1042 	uint8_t *cdhash = NULL;
1043 	audit_token_t audit_token = {};
1044 	const char *proc_cs_id __null_terminated = signing_id;
1045 
1046 	proc_lock(proc);
1047 
1048 	if (proc_cs_id == NULL) {
1049 		if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1050 			proc_cs_id = cs_identity_get(proc);
1051 		} else {
1052 			FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1053 		}
1054 	}
1055 
1056 	if (is_effective) {
1057 		lck_rw_lock_shared(&fd_cb->group->lck);
1058 		if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1059 			if (proc_cs_id != NULL) {
1060 				size_t proc_cs_id_size = strlen(proc_cs_id) + 1;
1061 				uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)__unsafe_null_terminated_to_indexable(proc_cs_id), proc_cs_id_size);
1062 				if (result == NULL_TRIE_IDX) {
1063 					FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1064 					error = EPERM;
1065 				} else {
1066 					FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1067 				}
1068 			} else {
1069 				error = EPERM;
1070 			}
1071 		}
1072 		lck_rw_done(&fd_cb->group->lck);
1073 	}
1074 
1075 	if (error != 0) {
1076 		goto done;
1077 	}
1078 
1079 	/*
1080 	 * If signing_id is not NULL then it came from the flow divert token and will be added
1081 	 * as part of the token, so there is no need to add it here.
1082 	 */
1083 	if (signing_id == NULL && proc_cs_id != NULL) {
1084 		error = flow_divert_packet_append_tlv(connect_packet,
1085 		    (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1086 		    (uint32_t)strlen(proc_cs_id),
1087 		    __terminated_by_to_indexable(proc_cs_id));
1088 		if (error != 0) {
1089 			FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1090 			goto done;
1091 		}
1092 	}
1093 
1094 	cdhash = cs_get_cdhash(proc);
1095 	if (cdhash != NULL) {
1096 		error = flow_divert_packet_append_tlv(connect_packet,
1097 		    (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1098 		    SHA1_RESULTLEN,
1099 		    cdhash);
1100 		if (error) {
1101 			FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1102 			goto done;
1103 		}
1104 	} else {
1105 		FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1106 	}
1107 
1108 	task_t task __single = proc_task(proc);
1109 	if (task != TASK_NULL) {
1110 		mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1111 		kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1112 		if (rc == KERN_SUCCESS) {
1113 			int append_error = flow_divert_packet_append_tlv(connect_packet,
1114 			    (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1115 			    sizeof(audit_token_t),
1116 			    &audit_token);
1117 			if (append_error) {
1118 				FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1119 			}
1120 		}
1121 	}
1122 
1123 done:
1124 	proc_unlock(proc);
1125 
1126 	return error;
1127 }
1128 
1129 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet)1130 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet)
1131 {
1132 	int error = 0;
1133 	proc_t effective_proc = PROC_NULL;
1134 	proc_t responsible_proc = PROC_NULL;
1135 	proc_t real_proc = proc_find(so->last_pid);
1136 	bool release_real_proc = true;
1137 
1138 	proc_t src_proc = PROC_NULL;
1139 	proc_t real_src_proc = PROC_NULL;
1140 
1141 	if (real_proc == PROC_NULL) {
1142 		FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1143 		release_real_proc = false;
1144 		real_proc = proc;
1145 		if (real_proc == PROC_NULL) {
1146 			real_proc = current_proc();
1147 		}
1148 	}
1149 
1150 	if (so->so_flags & SOF_DELEGATED) {
1151 		if (proc_getpid(real_proc) != so->e_pid) {
1152 			effective_proc = proc_find(so->e_pid);
1153 		} else {
1154 			const unsigned char * real_proc_uuid = proc_executableuuid_addr(real_proc);
1155 			if (uuid_compare(real_proc_uuid, so->e_uuid)) {
1156 				effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1157 			}
1158 		}
1159 	}
1160 
1161 #if defined(XNU_TARGET_OS_OSX)
1162 	lck_rw_lock_shared(&fd_cb->group->lck);
1163 	if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1164 		if (so->so_rpid > 0) {
1165 			responsible_proc = proc_find(so->so_rpid);
1166 		}
1167 	}
1168 	lck_rw_done(&fd_cb->group->lck);
1169 #endif
1170 
1171 	real_src_proc = real_proc;
1172 
1173 	if (responsible_proc != PROC_NULL) {
1174 		src_proc = responsible_proc;
1175 		if (effective_proc != NULL) {
1176 			real_src_proc = effective_proc;
1177 		}
1178 	} else if (effective_proc != PROC_NULL) {
1179 		src_proc = effective_proc;
1180 	} else {
1181 		src_proc = real_proc;
1182 	}
1183 
1184 	error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1185 	if (error != 0) {
1186 		goto done;
1187 	}
1188 
1189 	if (real_src_proc != NULL && real_src_proc != src_proc) {
1190 		error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1191 		if (error != 0) {
1192 			goto done;
1193 		}
1194 	}
1195 
1196 done:
1197 	if (responsible_proc != PROC_NULL) {
1198 		proc_rele(responsible_proc);
1199 	}
1200 
1201 	if (effective_proc != PROC_NULL) {
1202 		proc_rele(effective_proc);
1203 	}
1204 
1205 	if (real_proc != PROC_NULL && release_real_proc) {
1206 		proc_rele(real_proc);
1207 	}
1208 
1209 	return error;
1210 }
1211 
1212 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet)1213 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet)
1214 {
1215 	int             error;
1216 
1217 	if (fd_cb->group == NULL) {
1218 		FDLOG0(LOG_ERR, fd_cb, "no provider, cannot send packet");
1219 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1220 			error = ECONNABORTED;
1221 		} else {
1222 			error = EHOSTUNREACH;
1223 		}
1224 		return error;
1225 	}
1226 
1227 	lck_rw_lock_shared(&fd_cb->group->lck);
1228 
1229 	if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1230 		error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1231 		if (error) {
1232 			FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_send_packet: ctl_enqueuembuf returned an error: %d", error);
1233 		}
1234 	} else {
1235 		error = ENOBUFS;
1236 	}
1237 
1238 	if (error == ENOBUFS) {
1239 		if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1240 			lck_rw_lock_exclusive(&fd_cb->group->lck);
1241 		}
1242 		MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1243 		error = 0;
1244 		OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1245 	}
1246 
1247 	lck_rw_done(&fd_cb->group->lck);
1248 
1249 	return error;
1250 }
1251 
1252 static void
flow_divert_append_domain_name(char * domain_name __null_terminated,void * ctx)1253 flow_divert_append_domain_name(char *domain_name __null_terminated, void *ctx)
1254 {
1255 	mbuf_ref_t packet = (mbuf_ref_t)ctx;
1256 	size_t domain_name_length = 0;
1257 
1258 	if (packet == NULL || domain_name == NULL) {
1259 		return;
1260 	}
1261 
1262 	domain_name_length = strlen(domain_name);
1263 	if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1264 		int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, __terminated_by_to_indexable(domain_name));
1265 		if (error) {
1266 			FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1267 		}
1268 	}
1269 }
1270 
1271 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_ref_t * out_connect_packet)1272 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_ref_t *out_connect_packet)
1273 {
1274 	int                     error           = 0;
1275 	int                     flow_type       = 0;
1276 	char *                  signing_id __indexable = NULL;
1277 	uint32_t                sid_size        = 0;
1278 	mbuf_ref_t              connect_packet  = NULL;
1279 	cfil_sock_id_t          cfil_sock_id    = CFIL_SOCK_ID_NONE;
1280 	const void              *cfil_id        = NULL;
1281 	size_t                  cfil_id_size    = 0;
1282 	struct inpcb            *inp            = sotoinpcb(so);
1283 	struct ifnet            *ifp            = NULL;
1284 	uint32_t                flags           = 0;
1285 
1286 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1287 	if (error) {
1288 		goto done;
1289 	}
1290 
1291 	if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1292 		int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1293 		if (find_error == 0 && sid_size > 0) {
1294 			signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1295 			if (signing_id != NULL) {
1296 				flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1297 				FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1298 			}
1299 		}
1300 	}
1301 
1302 	// TODO: remove ternary operator after rdar://121487109 is fixed
1303 	error = flow_divert_add_all_proc_info(fd_cb, so, p, NULL == signing_id ? NULL : __unsafe_null_terminated_from_indexable(signing_id), connect_packet);
1304 
1305 	if (signing_id != NULL) {
1306 		kfree_data(signing_id, sid_size + 1);
1307 	}
1308 
1309 	if (error) {
1310 		FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1311 		goto done;
1312 	}
1313 
1314 	error = flow_divert_packet_append_tlv(connect_packet,
1315 	    FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1316 	    sizeof(fd_cb->so->so_traffic_class),
1317 	    &fd_cb->so->so_traffic_class);
1318 	if (error) {
1319 		goto done;
1320 	}
1321 
1322 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1323 		flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1324 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1325 		flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1326 	} else {
1327 		error = EINVAL;
1328 		goto done;
1329 	}
1330 	error = flow_divert_packet_append_tlv(connect_packet,
1331 	    FLOW_DIVERT_TLV_FLOW_TYPE,
1332 	    sizeof(flow_type),
1333 	    &flow_type);
1334 
1335 	if (error) {
1336 		goto done;
1337 	}
1338 
1339 	if (fd_cb->connect_token != NULL) {
1340 		unsigned int token_len = m_length(fd_cb->connect_token);
1341 		mbuf_concatenate(connect_packet, fd_cb->connect_token);
1342 		mbuf_pkthdr_adjustlen(connect_packet, token_len);
1343 		fd_cb->connect_token = NULL;
1344 	} else {
1345 		error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1346 		if (error) {
1347 			goto done;
1348 		}
1349 
1350 		necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1351 	}
1352 
1353 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1354 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, SA_BYTES(&(fd_cb->local_endpoint.sa)));
1355 		if (error) {
1356 			goto done;
1357 		}
1358 	}
1359 
1360 	if (inp->inp_vflag & INP_IPV4) {
1361 		ifp = inp->inp_last_outifp;
1362 	} else if (inp->inp_vflag & INP_IPV6) {
1363 		ifp = inp->in6p_last_outifp;
1364 	}
1365 	if ((inp->inp_flags & INP_BOUND_IF) ||
1366 	    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1367 	    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1368 		flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1369 		if (ifp == NULL) {
1370 			ifp = inp->inp_boundifp;
1371 		}
1372 	}
1373 	if (ifp != NULL) {
1374 		uint32_t flow_if_index = ifp->if_index;
1375 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1376 		    sizeof(flow_if_index), &flow_if_index);
1377 		if (error) {
1378 			goto done;
1379 		}
1380 	}
1381 
1382 	if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1383 		flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1384 	}
1385 
1386 	if (flags != 0) {
1387 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1388 		if (error) {
1389 			goto done;
1390 		}
1391 	}
1392 
1393 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
1394 		cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1395 	} else {
1396 		cfil_sock_id = cfil_sock_id_from_socket(so);
1397 	}
1398 
1399 	if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1400 		cfil_id = &cfil_sock_id;
1401 		cfil_id_size = sizeof(cfil_sock_id);
1402 	} else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1403 		cfil_id = &inp->necp_client_uuid;
1404 		cfil_id_size = sizeof(inp->necp_client_uuid);
1405 	}
1406 
1407 	if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1408 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1409 		if (error) {
1410 			goto done;
1411 		}
1412 	}
1413 
1414 done:
1415 	if (!error) {
1416 		*out_connect_packet = connect_packet;
1417 	} else if (connect_packet != NULL) {
1418 		mbuf_freem(connect_packet);
1419 	}
1420 
1421 	return error;
1422 }
1423 
1424 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1425 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1426 {
1427 	int             error                   = 0;
1428 	mbuf_ref_t      connect_packet          = fd_cb->connect_packet;
1429 	mbuf_ref_t      saved_connect_packet    = NULL;
1430 
1431 	if (connect_packet != NULL) {
1432 		error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1433 		if (error) {
1434 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1435 			goto done;
1436 		}
1437 
1438 		error = flow_divert_send_packet(fd_cb, connect_packet);
1439 		if (error) {
1440 			goto done;
1441 		}
1442 
1443 		fd_cb->connect_packet = saved_connect_packet;
1444 		saved_connect_packet = NULL;
1445 	} else {
1446 		error = ENOENT;
1447 	}
1448 done:
1449 	if (saved_connect_packet != NULL) {
1450 		mbuf_freem(saved_connect_packet);
1451 	}
1452 
1453 	return error;
1454 }
1455 
1456 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1457 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1458 {
1459 	int             error       = 0;
1460 	mbuf_ref_t      packet      = NULL;
1461 	int             rbuff_space = 0;
1462 
1463 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1464 	if (error) {
1465 		FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1466 		goto done;
1467 	}
1468 
1469 	rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1470 	if (rbuff_space < 0) {
1471 		rbuff_space = 0;
1472 	}
1473 	rbuff_space = htonl(rbuff_space);
1474 	error = flow_divert_packet_append_tlv(packet,
1475 	    FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1476 	    sizeof(rbuff_space),
1477 	    &rbuff_space);
1478 	if (error) {
1479 		goto done;
1480 	}
1481 
1482 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1483 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, SA_BYTES(&(fd_cb->local_endpoint.sa)));
1484 		if (error) {
1485 			goto done;
1486 		}
1487 	}
1488 
1489 	error = flow_divert_send_packet(fd_cb, packet);
1490 	if (error) {
1491 		goto done;
1492 	}
1493 
1494 done:
1495 	if (error && packet != NULL) {
1496 		mbuf_freem(packet);
1497 	}
1498 
1499 	return error;
1500 }
1501 
1502 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1503 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1504 {
1505 	int         error   = 0;
1506 	mbuf_ref_t  packet  = NULL;
1507 	uint32_t    zero    = 0;
1508 
1509 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1510 	if (error) {
1511 		FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1512 		goto done;
1513 	}
1514 
1515 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1516 	if (error) {
1517 		FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1518 		goto done;
1519 	}
1520 
1521 	how = htonl(how);
1522 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1523 	if (error) {
1524 		FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1525 		goto done;
1526 	}
1527 
1528 	error = flow_divert_send_packet(fd_cb, packet);
1529 	if (error) {
1530 		goto done;
1531 	}
1532 
1533 done:
1534 	if (error && packet != NULL) {
1535 		mbuf_freem(packet);
1536 	}
1537 
1538 	return error;
1539 }
1540 
1541 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1542 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1543 {
1544 	if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1545 	    (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1546 		return SHUT_RDWR;
1547 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1548 		return SHUT_RD;
1549 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1550 		return SHUT_WR;
1551 	}
1552 
1553 	return -1;
1554 }
1555 
1556 /*
1557  * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1558  * writes. Returns FALSE otherwise.
1559  */
1560 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1561 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1562 {
1563 	int             how             = -1;
1564 
1565 	/* Do not send any close messages if there is still data in the send buffer */
1566 	if (fd_cb->so->so_snd.sb_cc == 0) {
1567 		if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1568 			/* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1569 			how = SHUT_RD;
1570 		}
1571 		if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1572 			/* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1573 			if (how == SHUT_RD) {
1574 				how = SHUT_RDWR;
1575 			} else {
1576 				how = SHUT_WR;
1577 			}
1578 		}
1579 	}
1580 
1581 	if (how != -1) {
1582 		FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1583 		if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1584 			/* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1585 			if (how != SHUT_RD) {
1586 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1587 			}
1588 			if (how != SHUT_WR) {
1589 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1590 			}
1591 		}
1592 	}
1593 }
1594 
1595 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len)1596 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len)
1597 {
1598 	mbuf_ref_t  packet = NULL;
1599 	mbuf_ref_t  last   = NULL;
1600 	int         error  = 0;
1601 
1602 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1603 	if (error || packet == NULL) {
1604 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1605 		goto done;
1606 	}
1607 
1608 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1609 		last = m_last(packet);
1610 		mbuf_setnext(last, data);
1611 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1612 	} else {
1613 		data_len = 0;
1614 	}
1615 	error = flow_divert_send_packet(fd_cb, packet);
1616 	if (error == 0 && data_len > 0) {
1617 		fd_cb->bytes_sent += data_len;
1618 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1619 	}
1620 
1621 done:
1622 	if (error) {
1623 		if (last != NULL) {
1624 			mbuf_setnext(last, NULL);
1625 		}
1626 		if (packet != NULL) {
1627 			mbuf_freem(packet);
1628 		}
1629 	}
1630 
1631 	return error;
1632 }
1633 
1634 static errno_t
flow_divert_send_datagram_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len,struct sockaddr * toaddr,Boolean is_fragment,size_t datagram_size)1635 flow_divert_send_datagram_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len, struct sockaddr *toaddr, Boolean is_fragment, size_t datagram_size)
1636 {
1637 	mbuf_ref_t  packet = NULL;
1638 	mbuf_ref_t  last   = NULL;
1639 	int         error  = 0;
1640 
1641 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1642 	if (error || packet == NULL) {
1643 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1644 		goto done;
1645 	}
1646 
1647 	if (toaddr != NULL) {
1648 		error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1649 		if (error) {
1650 			FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1651 			goto done;
1652 		}
1653 	}
1654 	if (is_fragment) {
1655 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_IS_FRAGMENT, sizeof(is_fragment), &is_fragment);
1656 		if (error) {
1657 			FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_IS_FRAGMENT) failed: %d", error);
1658 			goto done;
1659 		}
1660 	}
1661 
1662 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_DATAGRAM_SIZE, sizeof(datagram_size), &datagram_size);
1663 	if (error) {
1664 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_DATAGRAM_SIZE) failed: %d", error);
1665 		goto done;
1666 	}
1667 
1668 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1669 		last = m_last(packet);
1670 		mbuf_setnext(last, data);
1671 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1672 	} else {
1673 		data_len = 0;
1674 	}
1675 	error = flow_divert_send_packet(fd_cb, packet);
1676 	if (error == 0 && data_len > 0) {
1677 		fd_cb->bytes_sent += data_len;
1678 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1679 	}
1680 
1681 done:
1682 	if (error) {
1683 		if (last != NULL) {
1684 			mbuf_setnext(last, NULL);
1685 		}
1686 		if (packet != NULL) {
1687 			mbuf_freem(packet);
1688 		}
1689 	}
1690 
1691 	return error;
1692 }
1693 
1694 static errno_t
flow_divert_send_fragmented_datagram(struct flow_divert_pcb * fd_cb,mbuf_ref_t datagram,size_t datagram_len,struct sockaddr * toaddr)1695 flow_divert_send_fragmented_datagram(struct flow_divert_pcb *fd_cb, mbuf_ref_t datagram, size_t datagram_len, struct sockaddr *toaddr)
1696 {
1697 	mbuf_ref_t  next_data       = datagram;
1698 	size_t      remaining_len   = datagram_len;
1699 	mbuf_ref_t  remaining_data  = NULL;
1700 	int         error           = 0;
1701 	bool        first           = true;
1702 
1703 	while (remaining_len > 0 && next_data != NULL) {
1704 		size_t to_send = remaining_len;
1705 		remaining_data = NULL;
1706 
1707 		if (to_send > FLOW_DIVERT_CHUNK_SIZE) {
1708 			to_send = FLOW_DIVERT_CHUNK_SIZE;
1709 			error = mbuf_split(next_data, to_send, MBUF_DONTWAIT, &remaining_data);
1710 			if (error) {
1711 				break;
1712 			}
1713 		}
1714 
1715 		error = flow_divert_send_datagram_packet(fd_cb, next_data, to_send, (first ? toaddr : NULL), TRUE, (first ? datagram_len : 0));
1716 		if (error) {
1717 			break;
1718 		}
1719 
1720 		first = false;
1721 		remaining_len -= to_send;
1722 		next_data = remaining_data;
1723 	}
1724 
1725 	if (error) {
1726 		if (next_data != NULL) {
1727 			mbuf_freem(next_data);
1728 		}
1729 		if (remaining_data != NULL) {
1730 			mbuf_freem(remaining_data);
1731 		}
1732 	}
1733 	return error;
1734 }
1735 
1736 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1737 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1738 {
1739 	size_t      to_send;
1740 	size_t      sent    = 0;
1741 	int         error   = 0;
1742 	mbuf_ref_t  buffer;
1743 
1744 	to_send = fd_cb->so->so_snd.sb_cc;
1745 	buffer = fd_cb->so->so_snd.sb_mb;
1746 
1747 	if (buffer == NULL && to_send > 0) {
1748 		FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1749 		return;
1750 	}
1751 
1752 	/* Ignore the send window if force is enabled */
1753 	if (!force && (to_send > fd_cb->send_window)) {
1754 		to_send = fd_cb->send_window;
1755 	}
1756 
1757 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1758 		while (sent < to_send) {
1759 			mbuf_ref_t  data;
1760 			size_t      data_len;
1761 
1762 			data_len = to_send - sent;
1763 			if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1764 				data_len = FLOW_DIVERT_CHUNK_SIZE;
1765 			}
1766 
1767 			error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1768 			if (error) {
1769 				FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1770 				break;
1771 			}
1772 
1773 			error = flow_divert_send_data_packet(fd_cb, data, data_len);
1774 			if (error) {
1775 				if (data != NULL) {
1776 					mbuf_freem(data);
1777 				}
1778 				break;
1779 			}
1780 
1781 			sent += data_len;
1782 		}
1783 		sbdrop(&fd_cb->so->so_snd, (int)sent);
1784 		sowwakeup(fd_cb->so);
1785 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1786 		mbuf_ref_t  data;
1787 		mbuf_ref_t  m;
1788 		size_t      data_len;
1789 
1790 		while (buffer) {
1791 			struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1792 
1793 			m = buffer;
1794 			if (toaddr != NULL) {
1795 				/* look for data in the chain */
1796 				do {
1797 					m = m->m_next;
1798 					if (m != NULL && m->m_type == MT_DATA) {
1799 						break;
1800 					}
1801 				} while (m);
1802 				if (m == NULL) {
1803 					/* unexpected */
1804 					FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1805 					goto move_on;
1806 				}
1807 			}
1808 			data_len = mbuf_pkthdr_len(m);
1809 			if (data_len > 0) {
1810 				FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1811 				error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1812 				if (error) {
1813 					FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1814 					break;
1815 				}
1816 			} else {
1817 				data = NULL;
1818 			}
1819 			if (data_len <= FLOW_DIVERT_CHUNK_SIZE) {
1820 				error = flow_divert_send_datagram_packet(fd_cb, data, data_len, toaddr, FALSE, 0);
1821 			} else {
1822 				error = flow_divert_send_fragmented_datagram(fd_cb, data, data_len, toaddr);
1823 				data = NULL;
1824 			}
1825 			if (error) {
1826 				if (data != NULL) {
1827 					mbuf_freem(data);
1828 				}
1829 				break;
1830 			}
1831 			sent += data_len;
1832 move_on:
1833 			buffer = buffer->m_nextpkt;
1834 			(void) sbdroprecord(&(fd_cb->so->so_snd));
1835 		}
1836 	}
1837 
1838 	if (sent > 0) {
1839 		FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1840 		if (fd_cb->send_window >= sent) {
1841 			fd_cb->send_window -= sent;
1842 		} else {
1843 			fd_cb->send_window = 0;
1844 		}
1845 	}
1846 }
1847 
1848 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_size,struct sockaddr * toaddr)1849 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_size, struct sockaddr *toaddr)
1850 {
1851 	size_t to_send = data_size;
1852 	int error = 0;
1853 
1854 	if (to_send > fd_cb->send_window) {
1855 		to_send = fd_cb->send_window;
1856 	}
1857 
1858 	if (fd_cb->so->so_snd.sb_cc > 0) {
1859 		to_send = 0;    /* If the send buffer is non-empty, then we can't send anything */
1860 	}
1861 
1862 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1863 		size_t sent = 0;
1864 		mbuf_ref_t remaining_data = data;
1865 		size_t remaining_size = data_size;
1866 		mbuf_ref_t pkt_data = NULL;
1867 		while (sent < to_send && remaining_data != NULL && remaining_size > 0) {
1868 			size_t  pkt_data_len;
1869 
1870 			pkt_data = remaining_data;
1871 
1872 			if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1873 				pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1874 			} else {
1875 				pkt_data_len = to_send - sent;
1876 			}
1877 
1878 			if (pkt_data_len < remaining_size) {
1879 				error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1880 				if (error) {
1881 					FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1882 					pkt_data = NULL;
1883 					break;
1884 				}
1885 				remaining_size -= pkt_data_len;
1886 			} else {
1887 				remaining_data = NULL;
1888 				remaining_size = 0;
1889 			}
1890 
1891 			error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len);
1892 			if (error) {
1893 				break;
1894 			}
1895 
1896 			pkt_data = NULL;
1897 			sent += pkt_data_len;
1898 		}
1899 
1900 		if (fd_cb->send_window >= sent) {
1901 			fd_cb->send_window -= sent;
1902 		} else {
1903 			fd_cb->send_window = 0;
1904 		}
1905 
1906 		error = 0;
1907 
1908 		if (pkt_data != NULL) {
1909 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1910 				if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1911 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1912 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1913 				}
1914 			} else {
1915 				mbuf_freem(pkt_data);
1916 				error = ENOBUFS;
1917 			}
1918 		}
1919 
1920 		if (remaining_data != NULL) {
1921 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1922 				if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1923 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1924 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1925 				}
1926 			} else {
1927 				mbuf_freem(remaining_data);
1928 				error = ENOBUFS;
1929 			}
1930 		}
1931 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1932 		int send_dgram_error = 0;
1933 		if (to_send || data_size == 0) {
1934 			if (data_size <= FLOW_DIVERT_CHUNK_SIZE) {
1935 				send_dgram_error = flow_divert_send_datagram_packet(fd_cb, data, data_size, toaddr, FALSE, 0);
1936 			} else {
1937 				send_dgram_error = flow_divert_send_fragmented_datagram(fd_cb, data, data_size, toaddr);
1938 				data = NULL;
1939 			}
1940 			if (send_dgram_error) {
1941 				FDLOG(LOG_NOTICE, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu", send_dgram_error, data_size);
1942 			} else {
1943 				if (data_size >= fd_cb->send_window) {
1944 					fd_cb->send_window = 0;
1945 				} else {
1946 					fd_cb->send_window -= data_size;
1947 				}
1948 				data = NULL;
1949 			}
1950 		}
1951 
1952 		if (data != NULL) {
1953 			/* buffer it */
1954 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1955 				if (toaddr != NULL) {
1956 					int append_error = 0;
1957 					if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &append_error)) {
1958 						FDLOG(LOG_ERR, fd_cb,
1959 						    "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d",
1960 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, append_error);
1961 					}
1962 				} else {
1963 					if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1964 						FDLOG(LOG_ERR, fd_cb,
1965 						    "sbappendrecord failed. send buffer size = %u, send_window = %u",
1966 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1967 					}
1968 				}
1969 			} else {
1970 				FDLOG(LOG_ERR, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu, dropping the datagram", error, data_size);
1971 				mbuf_freem(data);
1972 			}
1973 		}
1974 	}
1975 
1976 	return error;
1977 }
1978 
1979 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1980 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1981 {
1982 	int         error  = 0;
1983 	mbuf_ref_t  packet = NULL;
1984 
1985 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1986 	if (error) {
1987 		FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1988 		goto done;
1989 	}
1990 
1991 	error = flow_divert_send_packet(fd_cb, packet);
1992 	if (error) {
1993 		goto done;
1994 	}
1995 
1996 done:
1997 	if (error && packet != NULL) {
1998 		mbuf_freem(packet);
1999 	}
2000 
2001 	return error;
2002 }
2003 
2004 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)2005 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
2006 {
2007 	int         error  = 0;
2008 	mbuf_ref_t  packet = NULL;
2009 
2010 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
2011 	if (error) {
2012 		FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
2013 		goto done;
2014 	}
2015 
2016 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
2017 	if (error) {
2018 		FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
2019 		goto done;
2020 	}
2021 
2022 	error = flow_divert_send_packet(fd_cb, packet);
2023 	if (error) {
2024 		goto done;
2025 	}
2026 
2027 done:
2028 	if (error && packet != NULL) {
2029 		mbuf_freem(packet);
2030 	}
2031 
2032 	return error;
2033 }
2034 
2035 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)2036 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
2037 {
2038 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2039 
2040 	if (local_endpoint->sa_family == AF_INET6) {
2041 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2042 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2043 			inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
2044 			inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
2045 			in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
2046 		}
2047 		if (inp->inp_lport == 0) {
2048 			inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
2049 		}
2050 	} else if (local_endpoint->sa_family == AF_INET) {
2051 		if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2052 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2053 			inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
2054 		}
2055 		if (inp->inp_lport == 0) {
2056 			inp->inp_lport = (satosin(local_endpoint))->sin_port;
2057 		}
2058 	}
2059 }
2060 
2061 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)2062 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
2063 {
2064 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2065 
2066 	if (remote_endpoint->sa_family == AF_INET6) {
2067 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
2068 			inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
2069 			inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
2070 			in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
2071 		}
2072 		if (inp->inp_fport == 0) {
2073 			inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
2074 		}
2075 	} else if (remote_endpoint->sa_family == AF_INET) {
2076 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
2077 			inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
2078 		}
2079 		if (inp->inp_fport == 0) {
2080 			inp->inp_fport = (satosin(remote_endpoint))->sin_port;
2081 		}
2082 	}
2083 }
2084 
2085 static uint32_t
flow_divert_derive_kernel_control_unit(pid_t pid,uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)2086 flow_divert_derive_kernel_control_unit(pid_t pid, uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
2087 {
2088 	uint32_t result = *ctl_unit;
2089 
2090 	// There are two models supported for deriving control units:
2091 	// 1. A series of flow divert units that allow "transparently" failing
2092 	//    over to the next unit. For this model, the aggregate_unit contains list
2093 	//    of all control units (between 1 and 30) masked over each other.
2094 	// 2. An indication that in-process flow divert should be preferred, with
2095 	//    an out of process flow divert to fail over to. For this model, the
2096 	//    ctl_unit is FLOW_DIVERT_IN_PROCESS_UNIT. In this case, that unit
2097 	//    is returned first, with the unpacked aggregate unit returned as a
2098 	//    fallback.
2099 	*is_aggregate = false;
2100 	if (*ctl_unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
2101 		bool found_unit = false;
2102 		if (pid != 0) {
2103 			// Look for an in-process group that is already open, and use that unit
2104 			struct flow_divert_group *group = NULL;
2105 			TAILQ_FOREACH(group, &g_flow_divert_in_process_group_list, chain) {
2106 				if (group->in_process_pid == pid) {
2107 					// Found an in-process group for our same PID, use it
2108 					found_unit = true;
2109 					result = group->ctl_unit;
2110 					break;
2111 				}
2112 			}
2113 
2114 			// If an in-process group isn't open yet, send a signal up through NECP to request one
2115 			if (!found_unit) {
2116 				necp_client_request_in_process_flow_divert(pid);
2117 			}
2118 		}
2119 
2120 		// If a unit was found, return it
2121 		if (found_unit) {
2122 			if (aggregate_unit != NULL && *aggregate_unit != 0) {
2123 				*is_aggregate = true;
2124 			}
2125 			// The next time around, the aggregate unit values will be picked up
2126 			*ctl_unit = 0;
2127 			return result;
2128 		}
2129 
2130 		// If no unit was found, fall through and clear out the ctl_unit
2131 		result = 0;
2132 		*ctl_unit = 0;
2133 	}
2134 
2135 	if (aggregate_unit != NULL && *aggregate_unit != 0) {
2136 		uint32_t counter;
2137 		struct flow_divert_group *lower_order_group = NULL;
2138 
2139 		for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
2140 			if ((*aggregate_unit) & (1 << counter)) {
2141 				struct flow_divert_group *group = NULL;
2142 				group = flow_divert_group_lookup(counter + 1, NULL);
2143 
2144 				if (group != NULL) {
2145 					if (lower_order_group == NULL) {
2146 						lower_order_group = group;
2147 					} else if ((group->order < lower_order_group->order)) {
2148 						lower_order_group = group;
2149 					}
2150 				}
2151 			}
2152 		}
2153 
2154 		if (lower_order_group != NULL) {
2155 			*aggregate_unit &= ~(1 << (lower_order_group->ctl_unit - 1));
2156 			*is_aggregate = true;
2157 			return lower_order_group->ctl_unit;
2158 		} else {
2159 			*ctl_unit = 0;
2160 			return result;
2161 		}
2162 	} else {
2163 		*ctl_unit = 0;
2164 		return result;
2165 	}
2166 }
2167 
2168 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)2169 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
2170 {
2171 	int error = 0;
2172 	uint32_t policy_control_unit = fd_cb->policy_control_unit;
2173 
2174 	flow_divert_pcb_remove(fd_cb);
2175 
2176 	do {
2177 		struct flow_divert_group *next_group = NULL;
2178 		bool is_aggregate = false;
2179 		uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(0, &policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
2180 
2181 		if (fd_cb->control_group_unit == next_ctl_unit) {
2182 			FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
2183 			error = EALREADY;
2184 			break;
2185 		}
2186 
2187 		if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
2188 			FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
2189 			error = ENOENT;
2190 			break;
2191 		}
2192 
2193 		next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
2194 		if (next_group == NULL) {
2195 			FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
2196 			continue;
2197 		}
2198 
2199 		FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
2200 
2201 		error = flow_divert_pcb_insert(fd_cb, next_group);
2202 		if (error == 0) {
2203 			if (is_aggregate) {
2204 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2205 			} else {
2206 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2207 			}
2208 		}
2209 		FDGRP_RELEASE(next_group);
2210 	} while (fd_cb->group == NULL);
2211 
2212 	if (fd_cb->group == NULL) {
2213 		return error ? error : ENOENT;
2214 	}
2215 
2216 	error = flow_divert_send_connect_packet(fd_cb);
2217 	if (error) {
2218 		FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2219 		flow_divert_pcb_remove(fd_cb);
2220 		error = ENOENT;
2221 	}
2222 
2223 	return error;
2224 }
2225 
2226 static int
flow_divert_disable(struct flow_divert_pcb * fd_cb)2227 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2228 {
2229 	struct socket *so = NULL;
2230 	mbuf_ref_t buffer;
2231 	int error = 0;
2232 	proc_t last_proc = NULL;
2233 	struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2234 	bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2235 	struct inpcb *inp = NULL;
2236 
2237 	so = fd_cb->so;
2238 	if (so == NULL) {
2239 		goto done;
2240 	}
2241 
2242 	FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2243 
2244 	/* Restore the IP state */
2245 	inp = sotoinpcb(so);
2246 	inp->inp_vflag = fd_cb->original_vflag;
2247 	inp->inp_faddr.s_addr = INADDR_ANY;
2248 	inp->inp_fport = 0;
2249 	memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2250 	inp->inp_fifscope = IFSCOPE_NONE;
2251 	inp->in6p_fport = 0;
2252 	/* If flow divert set the local address, clear it out */
2253 	if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2254 		inp->inp_laddr.s_addr = INADDR_ANY;
2255 		memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2256 		inp->inp_lifscope = IFSCOPE_NONE;
2257 	}
2258 	inp->inp_last_outifp = fd_cb->original_last_outifp;
2259 	inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2260 
2261 	/* Dis-associate the socket */
2262 	so->so_flags &= ~SOF_FLOW_DIVERT;
2263 	so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2264 	so->so_fd_pcb = NULL;
2265 	fd_cb->so = NULL;
2266 
2267 	FDRELEASE(fd_cb); /* Release the socket's reference */
2268 
2269 	/* Revert back to the original protocol */
2270 	so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2271 
2272 	/* Reset the socket state to avoid confusing NECP */
2273 	so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2274 
2275 	last_proc = proc_find(so->last_pid);
2276 
2277 	if (do_connect) {
2278 		/* Connect using the original protocol */
2279 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2280 		if (error) {
2281 			FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2282 			goto done;
2283 		}
2284 	}
2285 
2286 	buffer = so->so_snd.sb_mb;
2287 	if (buffer == NULL) {
2288 		/* No buffered data, done */
2289 		goto done;
2290 	}
2291 
2292 	/* Send any buffered data using the original protocol */
2293 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2294 		mbuf_ref_t  data_to_send = NULL;
2295 		size_t      data_len     = so->so_snd.sb_cc;
2296 
2297 		error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2298 		if (error) {
2299 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2300 			goto done;
2301 		}
2302 
2303 		sbflush(&so->so_snd);
2304 
2305 		if (data_to_send->m_flags & M_PKTHDR) {
2306 			mbuf_pkthdr_setlen(data_to_send, data_len);
2307 		}
2308 
2309 		error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2310 		    0,
2311 		    data_to_send,
2312 		    NULL,
2313 		    NULL,
2314 		    (last_proc != NULL ? last_proc : current_proc()));
2315 
2316 		if (error && error != EWOULDBLOCK) {
2317 			FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2318 		} else {
2319 			error = 0;
2320 		}
2321 	} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2322 		struct sockbuf *sb = &so->so_snd;
2323 		MBUFQ_HEAD(send_queue_head) send_queue;
2324 		MBUFQ_INIT(&send_queue);
2325 
2326 		/* Flush the send buffer, moving all records to a temporary queue */
2327 		while (sb->sb_mb != NULL) {
2328 			mbuf_ref_t record = sb->sb_mb;
2329 			mbuf_ref_t m = record;
2330 			sb->sb_mb = sb->sb_mb->m_nextpkt;
2331 			while (m != NULL) {
2332 				sbfree(sb, m);
2333 				m = m->m_next;
2334 			}
2335 			record->m_nextpkt = NULL;
2336 			MBUFQ_ENQUEUE(&send_queue, record);
2337 		}
2338 		SB_EMPTY_FIXUP(sb);
2339 
2340 		while (!MBUFQ_EMPTY(&send_queue)) {
2341 			mbuf_ref_t next_record = MBUFQ_FIRST(&send_queue);
2342 			mbuf_ref_t addr = NULL;
2343 			mbuf_ref_t control = NULL;
2344 			mbuf_ref_t last_control = NULL;
2345 			mbuf_ref_t data = NULL;
2346 			mbuf_ref_t m = next_record;
2347 			struct sockaddr *to_endpoint = NULL;
2348 
2349 			MBUFQ_DEQUEUE(&send_queue, next_record);
2350 
2351 			while (m != NULL) {
2352 				if (m->m_type == MT_SONAME) {
2353 					addr = m;
2354 				} else if (m->m_type == MT_CONTROL) {
2355 					if (control == NULL) {
2356 						control = m;
2357 					}
2358 					last_control = m;
2359 				} else if (m->m_type == MT_DATA) {
2360 					data = m;
2361 					break;
2362 				}
2363 				m = m->m_next;
2364 			}
2365 
2366 			if (addr != NULL && !do_connect) {
2367 				to_endpoint = flow_divert_get_buffered_target_address(addr);
2368 				if (to_endpoint == NULL) {
2369 					FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2370 				}
2371 			}
2372 
2373 			if (data == NULL) {
2374 				FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2375 				mbuf_freem(next_record);
2376 				continue;
2377 			}
2378 
2379 			if (!(data->m_flags & M_PKTHDR)) {
2380 				FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2381 				mbuf_freem(next_record);
2382 				continue;
2383 			}
2384 
2385 			if (addr != NULL) {
2386 				addr->m_next = NULL;
2387 			}
2388 
2389 			if (last_control != NULL) {
2390 				last_control->m_next = NULL;
2391 			}
2392 
2393 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2394 			    0,
2395 			    data,
2396 			    to_endpoint,
2397 			    control,
2398 			    (last_proc != NULL ? last_proc : current_proc()));
2399 
2400 			if (addr != NULL) {
2401 				mbuf_freem(addr);
2402 			}
2403 
2404 			if (error) {
2405 				FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2406 			}
2407 		}
2408 	}
2409 done:
2410 	if (last_proc != NULL) {
2411 		proc_rele(last_proc);
2412 	}
2413 
2414 	return error;
2415 }
2416 
2417 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2418 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2419 {
2420 	struct socket           *so             = NULL;
2421 	struct inpcb            *inp            = NULL;
2422 	struct ifnet            *current_ifp    = NULL;
2423 	struct ifnet * __single new_ifp         = NULL;
2424 	int                     error           = 0;
2425 
2426 	so = fd_cb->so;
2427 	if (so == NULL) {
2428 		return;
2429 	}
2430 
2431 	inp = sotoinpcb(so);
2432 
2433 	if (out_if_index <= 0) {
2434 		return;
2435 	}
2436 
2437 	if (inp->inp_vflag & INP_IPV6) {
2438 		current_ifp = inp->in6p_last_outifp;
2439 	} else {
2440 		current_ifp = inp->inp_last_outifp;
2441 	}
2442 
2443 	if (current_ifp != NULL) {
2444 		if (current_ifp->if_index == out_if_index) {
2445 			/* No change */
2446 			return;
2447 		}
2448 
2449 		/* Scope the socket to the given interface */
2450 		error = inp_bindif(inp, out_if_index, &new_ifp);
2451 		if (error != 0) {
2452 			FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2453 			return;
2454 		}
2455 
2456 		if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2457 			/* Get the appropriate address for the given interface */
2458 			if (inp->inp_vflag & INP_IPV6) {
2459 				inp->in6p_laddr = sa6_any.sin6_addr;
2460 				error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2461 			} else {
2462 				inp->inp_laddr.s_addr = INADDR_ANY;
2463 				error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2464 			}
2465 
2466 			if (error != 0) {
2467 				FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2468 			}
2469 		}
2470 	} else {
2471 		ifnet_head_lock_shared();
2472 		if (IF_INDEX_IN_RANGE(out_if_index)) {
2473 			new_ifp = ifindex2ifnet[out_if_index];
2474 		}
2475 		ifnet_head_done();
2476 	}
2477 
2478 	/* Update the "last interface" of the socket */
2479 	if (new_ifp != NULL) {
2480 		if (inp->inp_vflag & INP_IPV6) {
2481 			inp->in6p_last_outifp = new_ifp;
2482 		} else {
2483 			inp->inp_last_outifp = new_ifp;
2484 		}
2485 
2486 #if SKYWALK
2487 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2488 			netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2489 		}
2490 #endif /* SKYWALK */
2491 	}
2492 }
2493 
2494 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2495 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2496 {
2497 	uint32_t connect_error = 0;
2498 	uint32_t ctl_unit = 0;
2499 	int error = 0;
2500 	union sockaddr_in_4_6 local_endpoint = {};
2501 	union sockaddr_in_4_6 remote_endpoint = {};
2502 	int out_if_index = 0;
2503 	uint32_t send_window = 0;
2504 	uint32_t app_data_length = 0;
2505 	struct inpcb *inp = NULL;
2506 	struct socket *so = fd_cb->so;
2507 	bool local_address_is_valid = false;
2508 
2509 	memset(&local_endpoint, 0, sizeof(local_endpoint));
2510 	memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2511 
2512 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2513 	if (error) {
2514 		FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2515 		return;
2516 	}
2517 
2518 	connect_error = ntohl(connect_error);
2519 	FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2520 
2521 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2522 	if (error) {
2523 		FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2524 		return;
2525 	}
2526 
2527 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2528 	if (error) {
2529 		FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2530 	}
2531 
2532 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sin6), NULL);
2533 	if (error) {
2534 		FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2535 	}
2536 
2537 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sin6), NULL);
2538 	if (error) {
2539 		FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2540 	}
2541 
2542 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2543 	if (error) {
2544 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2545 	}
2546 
2547 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2548 	if (error) {
2549 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2550 	}
2551 
2552 	error = 0;
2553 
2554 	if (!SO_IS_DIVERTED(so)) {
2555 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2556 		return;
2557 	}
2558 
2559 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2560 		FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2561 		return;
2562 	}
2563 
2564 	inp = sotoinpcb(so);
2565 
2566 	if (connect_error || error) {
2567 		goto set_socket_state;
2568 	}
2569 
2570 	if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2571 		if (local_endpoint.sa.sa_family == AF_INET) {
2572 			local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2573 			if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2574 				local_address_is_valid = true;
2575 				fd_cb->local_endpoint = local_endpoint;
2576 				inp->inp_laddr.s_addr = INADDR_ANY;
2577 			} else {
2578 				fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2579 			}
2580 		} else if (local_endpoint.sa.sa_family == AF_INET6) {
2581 			local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2582 			if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2583 				local_address_is_valid = true;
2584 				fd_cb->local_endpoint = local_endpoint;
2585 				inp->in6p_laddr = sa6_any.sin6_addr;
2586 			} else {
2587 				fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2588 			}
2589 		}
2590 	}
2591 
2592 	flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2593 	flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2594 
2595 	if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2596 		if (remote_endpoint.sa.sa_family == AF_INET) {
2597 			remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2598 		} else if (remote_endpoint.sa.sa_family == AF_INET6) {
2599 			remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2600 		}
2601 		flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2602 	}
2603 
2604 	if (app_data_length > 0) {
2605 		uint8_t * app_data = NULL;
2606 		app_data = kalloc_data(app_data_length, Z_WAITOK);
2607 		if (app_data != NULL) {
2608 			error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2609 			if (error == 0) {
2610 				FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2611 				if (fd_cb->app_data != NULL) {
2612 					kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
2613 				}
2614 				fd_cb->app_data = app_data;
2615 				fd_cb->app_data_length = app_data_length;
2616 			} else {
2617 				FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2618 				kfree_data(app_data, app_data_length);
2619 			}
2620 		} else {
2621 			FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2622 		}
2623 	}
2624 
2625 	if (error) {
2626 		goto set_socket_state;
2627 	}
2628 
2629 	if (fd_cb->group == NULL) {
2630 		error = EINVAL;
2631 		goto set_socket_state;
2632 	}
2633 
2634 	ctl_unit = ntohl(ctl_unit);
2635 	if (ctl_unit > 0) {
2636 		int insert_error = 0;
2637 		struct flow_divert_group *grp = NULL;
2638 
2639 		if (ctl_unit >= GROUP_COUNT_MAX) {
2640 			FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2641 			error = EINVAL;
2642 			goto set_socket_state;
2643 		}
2644 
2645 		grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2646 		if (grp == NULL) {
2647 			error = ECONNRESET;
2648 			goto set_socket_state;
2649 		}
2650 
2651 		flow_divert_pcb_remove(fd_cb);
2652 		insert_error = flow_divert_pcb_insert(fd_cb, grp);
2653 		FDGRP_RELEASE(grp);
2654 
2655 		if (insert_error != 0) {
2656 			error = ECONNRESET;
2657 			goto set_socket_state;
2658 		}
2659 	}
2660 
2661 	fd_cb->send_window = ntohl(send_window);
2662 
2663 set_socket_state:
2664 	if (!connect_error && !error) {
2665 		FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2666 		error = flow_divert_send_connect_result(fd_cb);
2667 	}
2668 
2669 	if (connect_error || error) {
2670 		if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2671 			/* The plugin rejected the flow and the control unit is an aggregation of multiple plugins, try to move to the next one */
2672 			error = flow_divert_try_next_group(fd_cb);
2673 			if (error && fd_cb->policy_control_unit == 0) {
2674 				/* No more plugins available, disable flow divert */
2675 				error = flow_divert_disable(fd_cb);
2676 			}
2677 
2678 			if (error == 0) {
2679 				return;
2680 			}
2681 			so->so_error = (uint16_t)error;
2682 		} else if (!connect_error) {
2683 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
2684 			so->so_error = (uint16_t)error;
2685 			/* The plugin did not close the flow, so notify the plugin */
2686 			flow_divert_send_close_if_needed(fd_cb);
2687 		} else {
2688 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
2689 			so->so_error = (uint16_t)connect_error;
2690 		}
2691 		flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
2692 	} else {
2693 #if NECP
2694 		/* Update NECP client with connected five-tuple */
2695 		if (!uuid_is_null(inp->necp_client_uuid)) {
2696 			socket_unlock(so, 0);
2697 			necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2698 			socket_lock(so, 0);
2699 			if (!SO_IS_DIVERTED(so)) {
2700 				/* The socket was closed while it was unlocked */
2701 				return;
2702 			}
2703 		}
2704 #endif /* NECP */
2705 
2706 		flow_divert_send_buffered_data(fd_cb, FALSE);
2707 		soisconnected(so);
2708 	}
2709 
2710 	/* We don't need the connect packet any more */
2711 	if (fd_cb->connect_packet != NULL) {
2712 		mbuf_freem(fd_cb->connect_packet);
2713 		fd_cb->connect_packet = NULL;
2714 	}
2715 
2716 	/* We don't need the original remote endpoint any more */
2717 	free_sockaddr(fd_cb->original_remote_endpoint);
2718 }
2719 
2720 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2721 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2722 {
2723 	uint32_t close_error = 0;
2724 	int error = 0;
2725 	int how = 0;
2726 	struct socket *so = fd_cb->so;
2727 	bool is_connected = (SOCK_TYPE(so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2728 
2729 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2730 	if (error) {
2731 		FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2732 		return;
2733 	}
2734 
2735 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2736 	if (error) {
2737 		FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2738 		return;
2739 	}
2740 
2741 	how = ntohl(how);
2742 
2743 	FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2744 
2745 	if (!SO_IS_DIVERTED(so)) {
2746 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2747 		return;
2748 	}
2749 
2750 	so->so_error = (uint16_t)ntohl(close_error);
2751 
2752 	flow_divert_update_closed_state(fd_cb, how, true, true);
2753 
2754 	/* Only do this for connected flows because "shutdown by peer" doesn't make sense for unconnected datagram flows */
2755 	how = flow_divert_tunnel_how_closed(fd_cb);
2756 	if (how == SHUT_RDWR) {
2757 		flow_divert_disconnect_socket(so, is_connected, true);
2758 	} else if (how == SHUT_RD && is_connected) {
2759 		socantrcvmore(so);
2760 	} else if (how == SHUT_WR && is_connected) {
2761 		socantsendmore(so);
2762 	}
2763 }
2764 
2765 static mbuf_ref_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2766 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2767 {
2768 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2769 	bool need_recvdstaddr = false;
2770 	/* Socket flow tracking needs to see the local address */
2771 	need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2772 	if ((inp->inp_vflag & INP_IPV4) &&
2773 	    fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2774 	    ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2775 		return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2776 	} else if ((inp->inp_vflag & INP_IPV6) &&
2777 	    fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2778 	    ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2779 		struct in6_pktinfo pi6;
2780 		memset(&pi6, 0, sizeof(pi6));
2781 		pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2782 
2783 		return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2784 	}
2785 	return NULL;
2786 }
2787 
2788 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,size_t offset)2789 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, size_t offset)
2790 {
2791 	int error = 0;
2792 	struct socket *so = fd_cb->so;
2793 	mbuf_ref_t data = NULL;
2794 	size_t  data_size;
2795 	struct sockaddr_storage remote_address;
2796 	boolean_t got_remote_sa = FALSE;
2797 	boolean_t appended = FALSE;
2798 	boolean_t append_success = FALSE;
2799 
2800 	if (!SO_IS_DIVERTED(so)) {
2801 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2802 		return error;
2803 	}
2804 
2805 	if (sbspace(&so->so_rcv) == 0) {
2806 		error = ENOBUFS;
2807 		fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2808 		FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2809 		return error;
2810 	}
2811 
2812 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
2813 		uint32_t val_size = 0;
2814 
2815 		/* check if we got remote address with data */
2816 		memset(&remote_address, 0, sizeof(remote_address));
2817 		error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2818 		if (error || val_size > sizeof(remote_address)) {
2819 			FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2820 			error = 0;
2821 		} else {
2822 			if (remote_address.ss_len > sizeof(remote_address)) {
2823 				remote_address.ss_len = sizeof(remote_address);
2824 			}
2825 			/* validate the address */
2826 			if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2827 				got_remote_sa = TRUE;
2828 			} else {
2829 				FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2830 			}
2831 			offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2832 		}
2833 	}
2834 
2835 	data_size = (mbuf_pkthdr_len(packet) - offset);
2836 
2837 	if (so->so_state & SS_CANTRCVMORE) {
2838 		FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2839 		return error;
2840 	}
2841 
2842 	if (SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) {
2843 		FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(so));
2844 		return error;
2845 	}
2846 
2847 	FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2848 
2849 	error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2850 	if (error || data == NULL) {
2851 		FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2852 		return error;
2853 	}
2854 
2855 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2856 		appended = (sbappendstream(&so->so_rcv, data) != 0);
2857 		append_success = TRUE;
2858 	} else {
2859 		struct sockaddr * __single append_sa = NULL;
2860 		mbuf_ref_t mctl;
2861 
2862 		if (got_remote_sa == TRUE) {
2863 			error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2864 		} else {
2865 			if (SOCK_CHECK_DOM(so, AF_INET6)) {
2866 				error = in6_mapped_peeraddr(so, &append_sa);
2867 			} else {
2868 				error = in_getpeeraddr(so, &append_sa);
2869 			}
2870 		}
2871 		if (error) {
2872 			FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2873 		}
2874 
2875 		mctl = flow_divert_create_control_mbuf(fd_cb);
2876 		int append_error = 0;
2877 		appended = sbappendaddr(&so->so_rcv, append_sa, data, mctl, &append_error);
2878 		if (appended || append_error == 0) {
2879 			append_success = TRUE;
2880 		} else {
2881 			FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2882 		}
2883 
2884 		free_sockaddr(append_sa);
2885 	}
2886 
2887 	if (append_success) {
2888 		fd_cb->bytes_received += data_size;
2889 		flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2890 	}
2891 
2892 	if (appended) {
2893 		sorwakeup(so);
2894 	}
2895 
2896 	return error;
2897 }
2898 
2899 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2900 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2901 {
2902 	uint32_t        read_count              = 0;
2903 	int             error                   = 0;
2904 
2905 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2906 	if (error) {
2907 		FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2908 		return;
2909 	}
2910 
2911 	FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2912 
2913 	if (!SO_IS_DIVERTED(fd_cb->so)) {
2914 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2915 		return;
2916 	}
2917 
2918 	fd_cb->send_window += ntohl(read_count);
2919 	flow_divert_send_buffered_data(fd_cb, FALSE);
2920 }
2921 
2922 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_ref_t packet,int offset)2923 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
2924 {
2925 	int error         = 0;
2926 	uint32_t key_size = 0;
2927 	int log_level     = 0;
2928 	uint32_t flags    = 0;
2929 	int32_t order     = FLOW_DIVERT_ORDER_LAST;
2930 
2931 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2932 	if (error) {
2933 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2934 		return;
2935 	}
2936 
2937 	if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2938 		FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2939 		return;
2940 	}
2941 
2942 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2943 	if (!error) {
2944 		nil_pcb.log_level = (uint8_t)log_level;
2945 	}
2946 
2947 	lck_rw_lock_exclusive(&group->lck);
2948 
2949 	if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
2950 		FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
2951 		lck_rw_done(&group->lck);
2952 		return;
2953 	}
2954 
2955 	if (group->token_key != NULL) {
2956 		kfree_data_sized_by(group->token_key, group->token_key_size);
2957 	}
2958 
2959 	group->token_key = kalloc_data(key_size, Z_WAITOK);
2960 	group->token_key_size = key_size;
2961 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2962 	if (error) {
2963 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2964 		kfree_data_sized_by(group->token_key, group->token_key_size);
2965 		lck_rw_done(&group->lck);
2966 		return;
2967 	}
2968 
2969 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2970 	if (!error) {
2971 		group->flags = flags;
2972 	}
2973 
2974 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ORDER, sizeof(order), &order, NULL);
2975 	if (!error) {
2976 		FDLOG(LOG_INFO, &nil_pcb, "group %u order is %u", group->ctl_unit, order);
2977 		group->order = order;
2978 	}
2979 
2980 	lck_rw_done(&group->lck);
2981 }
2982 
2983 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2984 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2985 {
2986 	int error = 0;
2987 	int out_if_index = 0;
2988 	uint32_t app_data_length = 0;
2989 	struct socket *so = fd_cb->so;
2990 
2991 	FDLOG0(LOG_INFO, fd_cb, "received a properties update");
2992 
2993 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2994 	if (error) {
2995 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
2996 	}
2997 
2998 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2999 	if (error) {
3000 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
3001 	}
3002 
3003 	if (!SO_IS_DIVERTED(so)) {
3004 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
3005 		return;
3006 	}
3007 
3008 	if (out_if_index > 0) {
3009 		flow_divert_scope(fd_cb, out_if_index, true);
3010 		flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3011 	}
3012 
3013 	if (app_data_length > 0) {
3014 		uint8_t * app_data __indexable = NULL;
3015 		app_data = kalloc_data(app_data_length, Z_WAITOK);
3016 		if (app_data != NULL) {
3017 			error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
3018 			if (error == 0) {
3019 				if (fd_cb->app_data != NULL) {
3020 					kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
3021 				}
3022 				fd_cb->app_data = app_data;
3023 				fd_cb->app_data_length = app_data_length;
3024 			} else {
3025 				FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
3026 				kfree_data(app_data, app_data_length);
3027 			}
3028 		} else {
3029 			FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
3030 		}
3031 	}
3032 }
3033 
3034 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_ref_t packet,int offset)3035 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
3036 {
3037 	size_t                  bytes_mem_size      = 0;
3038 	size_t                  child_maps_mem_size = 0;
3039 	size_t                  nodes_mem_size      = 0;
3040 	size_t                  trie_memory_size    = 0;
3041 	int                     cursor              = 0;
3042 	int                     error               = 0;
3043 	struct flow_divert_trie new_trie;
3044 	int                     insert_error        = 0;
3045 	int                     prefix_count        = -1;
3046 	int                     signing_id_count    = 0;
3047 	size_t                  bytes_count         = 0;
3048 	size_t                  nodes_count         = 0;
3049 	size_t                  maps_count          = 0;
3050 
3051 	lck_rw_lock_exclusive(&group->lck);
3052 
3053 	/* Re-set the current trie */
3054 	if (group->signing_id_trie.memory != NULL) {
3055 		kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
3056 	}
3057 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
3058 	group->signing_id_trie.root = NULL_TRIE_IDX;
3059 
3060 	memset(&new_trie, 0, sizeof(new_trie));
3061 
3062 	/* Get the number of shared prefixes in the new set of signing ID strings */
3063 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
3064 
3065 	if (prefix_count < 0 || error) {
3066 		FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
3067 		lck_rw_done(&group->lck);
3068 		return;
3069 	}
3070 
3071 	/* Compute the number of signing IDs and the total amount of bytes needed to store them */
3072 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3073 	    cursor >= 0;
3074 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3075 		uint32_t sid_size = 0;
3076 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3077 		if (error || sid_size == 0) {
3078 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
3079 			signing_id_count = 0;
3080 			break;
3081 		}
3082 		if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
3083 			FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
3084 			signing_id_count = 0;
3085 			break;
3086 		}
3087 		signing_id_count++;
3088 	}
3089 
3090 	if (signing_id_count == 0) {
3091 		lck_rw_done(&group->lck);
3092 		FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
3093 		return;
3094 	}
3095 
3096 	if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
3097 		lck_rw_done(&group->lck);
3098 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
3099 		return;
3100 	}
3101 
3102 	if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
3103 		lck_rw_done(&group->lck);
3104 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
3105 		return;
3106 	}
3107 
3108 	if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
3109 		lck_rw_done(&group->lck);
3110 		FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
3111 		return;
3112 	}
3113 
3114 	FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
3115 	    nodes_count, maps_count, bytes_count);
3116 
3117 	if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
3118 	    os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
3119 	    os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
3120 	    os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
3121 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
3122 		lck_rw_done(&group->lck);
3123 		return;
3124 	}
3125 
3126 	if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
3127 		FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
3128 		lck_rw_done(&group->lck);
3129 		return;
3130 	}
3131 
3132 	new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
3133 	new_trie.memory_size = trie_memory_size;
3134 	if (new_trie.memory == NULL) {
3135 		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
3136 		    nodes_mem_size + child_maps_mem_size + bytes_mem_size);
3137 		lck_rw_done(&group->lck);
3138 		return;
3139 	}
3140 
3141 	/* Initialize the free lists */
3142 	new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
3143 	new_trie.nodes_count = (uint16_t)nodes_count;
3144 
3145 	new_trie.nodes_free_next = 0;
3146 	memset(new_trie.nodes, 0, nodes_mem_size);
3147 
3148 	new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
3149 	new_trie.child_maps_count = (uint16_t)maps_count;
3150 	new_trie.child_maps_size = child_maps_mem_size;
3151 
3152 	new_trie.child_maps_free_next = 0;
3153 	memset(new_trie.child_maps, 0xff, child_maps_mem_size);
3154 
3155 	new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
3156 	new_trie.bytes_count = (uint16_t)bytes_count;
3157 
3158 	new_trie.bytes_free_next = 0;
3159 	memset(new_trie.bytes, 0, bytes_mem_size);
3160 
3161 	/* The root is an empty node */
3162 	new_trie.root = trie_node_alloc(&new_trie);
3163 
3164 	/* Add each signing ID to the trie */
3165 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3166 	    cursor >= 0;
3167 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3168 		uint32_t sid_size = 0;
3169 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3170 		if (error || sid_size == 0) {
3171 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
3172 			insert_error = EINVAL;
3173 			break;
3174 		}
3175 		if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
3176 			uint16_t new_node_idx;
3177 			error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
3178 			if (error) {
3179 				FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
3180 				insert_error = EINVAL;
3181 				break;
3182 			}
3183 			new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
3184 			if (new_node_idx == NULL_TRIE_IDX) {
3185 				insert_error = EINVAL;
3186 				break;
3187 			}
3188 		} else {
3189 			FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3190 			insert_error = ENOBUFS;
3191 			break;
3192 		}
3193 	}
3194 
3195 	if (!insert_error) {
3196 		group->signing_id_trie = new_trie;
3197 	} else {
3198 		kfree_data_sized_by(new_trie.memory, new_trie.memory_size);
3199 	}
3200 
3201 	lck_rw_done(&group->lck);
3202 }
3203 
3204 static void
flow_divert_handle_flow_states_request(struct flow_divert_group * group)3205 flow_divert_handle_flow_states_request(struct flow_divert_group *group)
3206 {
3207 	struct flow_divert_pcb *fd_cb;
3208 	mbuf_ref_t packet = NULL;
3209 	SLIST_HEAD(, flow_divert_pcb) tmp_list;
3210 	int error = 0;
3211 	uint32_t ctl_unit = 0;
3212 
3213 	SLIST_INIT(&tmp_list);
3214 
3215 	error = flow_divert_packet_init(&nil_pcb, FLOW_DIVERT_PKT_FLOW_STATES, &packet);
3216 	if (error || packet == NULL) {
3217 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_packet_init failed: %d, cannot send flow states", error);
3218 		return;
3219 	}
3220 
3221 	lck_rw_lock_shared(&group->lck);
3222 
3223 	if (!MBUFQ_EMPTY(&group->send_queue)) {
3224 		FDLOG0(LOG_WARNING, &nil_pcb, "flow_divert_handle_flow_states_request: group send queue is not empty");
3225 	}
3226 
3227 	ctl_unit = group->ctl_unit;
3228 
3229 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3230 		FDRETAIN(fd_cb);
3231 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3232 	}
3233 
3234 	lck_rw_done(&group->lck);
3235 
3236 	SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
3237 		FDLOCK(fd_cb);
3238 		if (fd_cb->so != NULL) {
3239 			struct flow_divert_flow_state state = {};
3240 			struct socket *so = fd_cb->so;
3241 			flow_divert_lock_socket(so, fd_cb);
3242 
3243 			state.conn_id = fd_cb->hash;
3244 			state.bytes_written_by_app = fd_cb->bytes_written_by_app;
3245 			state.bytes_sent = fd_cb->bytes_sent;
3246 			state.bytes_received = fd_cb->bytes_received;
3247 			state.send_window = fd_cb->send_window;
3248 			state.send_buffer_bytes = so->so_snd.sb_cc;
3249 
3250 			error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_FLOW_STATE, sizeof(state), &state);
3251 			if (error) {
3252 				FDLOG(LOG_ERR, fd_cb, "Failed to add a flow state: %d", error);
3253 			}
3254 
3255 			flow_divert_unlock_socket(so, fd_cb);
3256 		}
3257 		FDUNLOCK(fd_cb);
3258 		FDRELEASE(fd_cb);
3259 	}
3260 
3261 	error = ctl_enqueuembuf(g_flow_divert_kctl_ref, ctl_unit, packet, CTL_DATA_EOR);
3262 	if (error) {
3263 		FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_handle_flow_states_request: ctl_enqueuembuf returned an error: %d", error);
3264 		mbuf_freem(packet);
3265 	}
3266 }
3267 
3268 static int
flow_divert_input(mbuf_ref_t packet,struct flow_divert_group * group)3269 flow_divert_input(mbuf_ref_t packet, struct flow_divert_group *group)
3270 {
3271 	struct flow_divert_packet_header    hdr;
3272 	int                                 error  = 0;
3273 	struct flow_divert_pcb              *fd_cb;
3274 
3275 	if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3276 		FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3277 		error = EINVAL;
3278 		goto done;
3279 	}
3280 
3281 	error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3282 	if (error) {
3283 		FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3284 		error = ENOBUFS;
3285 		goto done;
3286 	}
3287 
3288 	hdr.conn_id = ntohl(hdr.conn_id);
3289 
3290 	if (hdr.conn_id == 0) {
3291 		switch (hdr.packet_type) {
3292 		case FLOW_DIVERT_PKT_GROUP_INIT:
3293 			flow_divert_handle_group_init(group, packet, sizeof(hdr));
3294 			break;
3295 		case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3296 			flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3297 			break;
3298 		case FLOW_DIVERT_PKT_FLOW_STATES_REQUEST:
3299 			flow_divert_handle_flow_states_request(group);
3300 			break;
3301 		default:
3302 			FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3303 			break;
3304 		}
3305 		goto done;
3306 	}
3307 
3308 	fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group);             /* This retains the PCB */
3309 	if (fd_cb == NULL) {
3310 		if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3311 			FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3312 		}
3313 		goto done;
3314 	}
3315 
3316 	FDLOCK(fd_cb);
3317 	if (fd_cb->so != NULL) {
3318 		struct socket *so = fd_cb->so;
3319 		flow_divert_lock_socket(so, fd_cb);
3320 
3321 		switch (hdr.packet_type) {
3322 		case FLOW_DIVERT_PKT_CONNECT_RESULT:
3323 			flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3324 			break;
3325 		case FLOW_DIVERT_PKT_CLOSE:
3326 			flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3327 			break;
3328 		case FLOW_DIVERT_PKT_DATA:
3329 			error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3330 			break;
3331 		case FLOW_DIVERT_PKT_READ_NOTIFY:
3332 			flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3333 			break;
3334 		case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3335 			flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3336 			break;
3337 		default:
3338 			FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3339 			break;
3340 		}
3341 
3342 		flow_divert_unlock_socket(so, fd_cb);
3343 	}
3344 	FDUNLOCK(fd_cb);
3345 
3346 	FDRELEASE(fd_cb);
3347 
3348 done:
3349 	mbuf_freem(packet);
3350 	return error;
3351 }
3352 
3353 static void
flow_divert_close_all(struct flow_divert_group * group)3354 flow_divert_close_all(struct flow_divert_group *group)
3355 {
3356 	struct flow_divert_pcb                  *fd_cb;
3357 	SLIST_HEAD(, flow_divert_pcb)   tmp_list;
3358 
3359 	SLIST_INIT(&tmp_list);
3360 
3361 	lck_rw_lock_exclusive(&group->lck);
3362 
3363 	MBUFQ_DRAIN(&group->send_queue);
3364 
3365 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3366 		FDRETAIN(fd_cb);
3367 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3368 	}
3369 
3370 	group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3371 
3372 	lck_rw_done(&group->lck);
3373 
3374 	while (!SLIST_EMPTY(&tmp_list)) {
3375 		fd_cb = SLIST_FIRST(&tmp_list);
3376 		FDLOCK(fd_cb);
3377 		SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3378 		if (fd_cb->so != NULL) {
3379 			struct socket *so = fd_cb->so;
3380 			flow_divert_lock_socket(so, fd_cb);
3381 			flow_divert_pcb_remove(fd_cb);
3382 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
3383 			so->so_error = ECONNABORTED;
3384 			flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3385 			flow_divert_unlock_socket(so, fd_cb);
3386 		}
3387 		FDUNLOCK(fd_cb);
3388 		FDRELEASE(fd_cb);
3389 	}
3390 }
3391 
3392 void
flow_divert_detach(struct socket * so)3393 flow_divert_detach(struct socket *so)
3394 {
3395 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3396 
3397 	if (!SO_IS_DIVERTED(so)) {
3398 		return;
3399 	}
3400 
3401 	so->so_flags &= ~SOF_FLOW_DIVERT;
3402 	so->so_fd_pcb = NULL;
3403 
3404 	FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3405 
3406 	if (fd_cb->group != NULL) {
3407 		/* Last-ditch effort to send any buffered data */
3408 		flow_divert_send_buffered_data(fd_cb, TRUE);
3409 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3410 		flow_divert_send_close_if_needed(fd_cb);
3411 		/* Remove from the group */
3412 		flow_divert_pcb_remove(fd_cb);
3413 	}
3414 
3415 	sbflush(&so->so_snd);
3416 	sbflush(&so->so_rcv);
3417 
3418 	flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3419 
3420 	if (!fd_cb->plugin_locked) {
3421 		socket_unlock(so, 0);
3422 		FDLOCK(fd_cb);
3423 	}
3424 	fd_cb->so = NULL;
3425 	if (!fd_cb->plugin_locked) {
3426 		FDUNLOCK(fd_cb);
3427 		socket_lock(so, 0);
3428 	}
3429 
3430 	FDRELEASE(fd_cb);       /* Release the socket's reference */
3431 }
3432 
3433 static int
flow_divert_close(struct socket * so)3434 flow_divert_close(struct socket *so)
3435 {
3436 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3437 
3438 	if (!SO_IS_DIVERTED(so)) {
3439 		return EINVAL;
3440 	}
3441 
3442 	FDLOG0(LOG_INFO, fd_cb, "Closing");
3443 
3444 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3445 		soisdisconnecting(so);
3446 		sbflush(&so->so_rcv);
3447 	}
3448 
3449 	FDRETAIN(fd_cb);
3450 
3451 	flow_divert_send_buffered_data(fd_cb, TRUE);
3452 	flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3453 	flow_divert_send_close_if_needed(fd_cb);
3454 
3455 	/* Remove from the group */
3456 	flow_divert_pcb_remove(fd_cb);
3457 
3458 	flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3459 
3460 	FDRELEASE(fd_cb);
3461 
3462 	return 0;
3463 }
3464 
3465 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3466 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3467     sae_connid_t cid __unused)
3468 {
3469 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3470 		return EINVAL;
3471 	}
3472 
3473 	return flow_divert_close(so);
3474 }
3475 
3476 static int
flow_divert_shutdown(struct socket * so)3477 flow_divert_shutdown(struct socket *so)
3478 {
3479 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3480 
3481 	if (!SO_IS_DIVERTED(so)) {
3482 		return EINVAL;
3483 	}
3484 
3485 	FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3486 
3487 	socantsendmore(so);
3488 
3489 	FDRETAIN(fd_cb);
3490 
3491 	flow_divert_update_closed_state(fd_cb, SHUT_WR, false, true);
3492 	flow_divert_send_close_if_needed(fd_cb);
3493 	if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
3494 		flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3495 	}
3496 
3497 	FDRELEASE(fd_cb);
3498 
3499 	return 0;
3500 }
3501 
3502 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3503 flow_divert_rcvd(struct socket *so, int flags __unused)
3504 {
3505 	struct flow_divert_pcb  *fd_cb = so->so_fd_pcb;
3506 	int space = 0;
3507 
3508 	if (!SO_IS_DIVERTED(so)) {
3509 		return EINVAL;
3510 	}
3511 
3512 	space = sbspace(&so->so_rcv);
3513 	FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3514 	if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3515 	    (space > 0) &&
3516 	    flow_divert_send_read_notification(fd_cb) == 0) {
3517 		FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3518 		fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3519 	}
3520 
3521 	return 0;
3522 }
3523 
3524 static int
flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet,struct sockaddr * toaddr)3525 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr)
3526 {
3527 	int error = 0;
3528 	int port  = 0;
3529 
3530 	if (!flow_divert_is_sockaddr_valid(toaddr)) {
3531 		FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3532 		error = EINVAL;
3533 		goto done;
3534 	}
3535 
3536 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, SA_BYTES(toaddr));
3537 	if (error) {
3538 		goto done;
3539 	}
3540 
3541 	if (toaddr->sa_family == AF_INET) {
3542 		port = ntohs((satosin(toaddr))->sin_port);
3543 	} else {
3544 		port = ntohs((satosin6(toaddr))->sin6_port);
3545 	}
3546 
3547 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3548 	if (error) {
3549 		goto done;
3550 	}
3551 
3552 done:
3553 	return error;
3554 }
3555 
3556 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_ref_t buffer)3557 flow_divert_get_buffered_target_address(mbuf_ref_t buffer)
3558 {
3559 	if (buffer != NULL && buffer->m_type == MT_SONAME) {
3560 		struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3561 		if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3562 			return toaddr;
3563 		}
3564 	}
3565 	return NULL;
3566 }
3567 
3568 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3569 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3570 {
3571 	switch (addr->sa_family) {
3572 	case AF_INET:
3573 		if (addr->sa_len < sizeof(struct sockaddr_in)) {
3574 			return FALSE;
3575 		}
3576 		break;
3577 	case AF_INET6:
3578 		if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3579 			return FALSE;
3580 		}
3581 		break;
3582 	default:
3583 		return FALSE;
3584 	}
3585 	return TRUE;
3586 }
3587 
3588 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3589 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3590     struct sockaddr **dup)
3591 {
3592 	int                                             error           = 0;
3593 	struct sockaddr                 *result;
3594 	struct sockaddr_storage ss;
3595 
3596 	if (addr != NULL) {
3597 		result = addr;
3598 	} else {
3599 		memset(&ss, 0, sizeof(ss));
3600 		ss.ss_family = family;
3601 		if (ss.ss_family == AF_INET) {
3602 			ss.ss_len = sizeof(struct sockaddr_in);
3603 		} else if (ss.ss_family == AF_INET6) {
3604 			ss.ss_len = sizeof(struct sockaddr_in6);
3605 		} else {
3606 			error = EINVAL;
3607 		}
3608 		result = (struct sockaddr *)&ss;
3609 	}
3610 
3611 	if (!error) {
3612 		*dup = dup_sockaddr(result, 1);
3613 		if (*dup == NULL) {
3614 			error = ENOBUFS;
3615 		}
3616 	}
3617 
3618 	return error;
3619 }
3620 
3621 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected,bool delay_if_needed)3622 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed)
3623 {
3624 	if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3625 		soisdisconnected(so);
3626 	}
3627 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
3628 		if (delay_if_needed) {
3629 			cfil_sock_is_dead(so);
3630 		} else {
3631 			struct inpcb *inp = sotoinpcb(so);
3632 			if (SOCK_CHECK_DOM(so, PF_INET6)) {
3633 				in6_pcbdetach(inp);
3634 			} else {
3635 				in_pcbdetach(inp);
3636 			}
3637 		}
3638 	}
3639 }
3640 
3641 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3642 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3643 {
3644 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3645 
3646 	if (!SO_IS_DIVERTED(so)) {
3647 		return EINVAL;
3648 	}
3649 
3650 	if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3651 		if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3652 			flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3653 		}
3654 	}
3655 
3656 	if (SOCK_DOM(so) == PF_INET) {
3657 		return g_tcp_protosw->pr_ctloutput(so, sopt);
3658 	} else if (SOCK_DOM(so) == PF_INET6) {
3659 		return g_tcp6_protosw->pr_ctloutput(so, sopt);
3660 	}
3661 	return 0;
3662 }
3663 
3664 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3665 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3666 {
3667 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3668 	int                     error           = 0;
3669 	struct inpcb            *inp            = sotoinpcb(so);
3670 	struct sockaddr_in      *sinp;
3671 	mbuf_ref_t              connect_packet  = NULL;
3672 	int                     do_send         = 1;
3673 
3674 	if (!SO_IS_DIVERTED(so)) {
3675 		return EINVAL;
3676 	}
3677 
3678 	if (fd_cb->group == NULL) {
3679 		error = ENETUNREACH;
3680 		goto done;
3681 	}
3682 
3683 	if (inp == NULL) {
3684 		error = EINVAL;
3685 		goto done;
3686 	} else if (inp->inp_state == INPCB_STATE_DEAD) {
3687 		if (so->so_error) {
3688 			error = so->so_error;
3689 			so->so_error = 0;
3690 		} else {
3691 			error = EINVAL;
3692 		}
3693 		goto done;
3694 	}
3695 
3696 	if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3697 		error = EALREADY;
3698 		goto done;
3699 	}
3700 
3701 	FDLOG0(LOG_INFO, fd_cb, "Connecting");
3702 
3703 	if (fd_cb->connect_packet == NULL) {
3704 		struct sockaddr_in sin = {};
3705 		struct ifnet * __single ifp = NULL;
3706 
3707 		if (to == NULL) {
3708 			FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3709 			error = EINVAL;
3710 			goto done;
3711 		}
3712 
3713 		if (!flow_divert_is_sockaddr_valid(to)) {
3714 			FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3715 			error = EINVAL;
3716 			goto done;
3717 		}
3718 
3719 		fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3720 		if (fd_cb->original_remote_endpoint == NULL) {
3721 			FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3722 			error = ENOMEM;
3723 			goto done;
3724 		}
3725 		fd_cb->original_vflag = inp->inp_vflag;
3726 		fd_cb->original_last_outifp = inp->inp_last_outifp;
3727 		fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3728 
3729 		sinp = (struct sockaddr_in *)(void *)to;
3730 		if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3731 			error = EAFNOSUPPORT;
3732 			goto done;
3733 		}
3734 
3735 		if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3736 			struct sockaddr_in6 sin6 = {};
3737 			sin6.sin6_family = AF_INET6;
3738 			sin6.sin6_len = sizeof(struct sockaddr_in6);
3739 			sin6.sin6_port = satosin6(to)->sin6_port;
3740 			sin6.sin6_addr = satosin6(to)->sin6_addr;
3741 			if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3742 				in6_sin6_2_sin(&sin, &sin6);
3743 				to = (struct sockaddr *)&sin;
3744 			}
3745 		}
3746 
3747 		if (to->sa_family == AF_INET6) {
3748 			struct sockaddr_in6 *to6 = satosin6(to);
3749 
3750 			inp->inp_vflag &= ~INP_IPV4;
3751 			inp->inp_vflag |= INP_IPV6;
3752 			fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3753 			fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3754 			fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3755 			error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3756 			if (error) {
3757 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3758 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3759 					error = 0;
3760 				} else {
3761 					goto done;
3762 				}
3763 			}
3764 			if (ifp != NULL) {
3765 				inp->in6p_last_outifp = ifp;
3766 				ifnet_release(ifp);
3767 			}
3768 
3769 			if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3770 			    in6_embedded_scope &&
3771 			    fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3772 				fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3773 				fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3774 			}
3775 
3776 			if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3777 			    in6_embedded_scope &&
3778 			    to6->sin6_addr.s6_addr16[1] != 0) {
3779 				to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3780 				to6->sin6_addr.s6_addr16[1] = 0;
3781 			}
3782 		} else if (to->sa_family == AF_INET) {
3783 			inp->inp_vflag |= INP_IPV4;
3784 			inp->inp_vflag &= ~INP_IPV6;
3785 			fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3786 			fd_cb->local_endpoint.sin.sin_family = AF_INET;
3787 			fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3788 			error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3789 			if (error) {
3790 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3791 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3792 					error = 0;
3793 				} else {
3794 					goto done;
3795 				}
3796 			}
3797 			if (ifp != NULL) {
3798 				inp->inp_last_outifp = ifp;
3799 				ifnet_release(ifp);
3800 			}
3801 		} else {
3802 			FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3803 		}
3804 
3805 		error = flow_divert_check_no_cellular(fd_cb) ||
3806 		    flow_divert_check_no_expensive(fd_cb) ||
3807 		    flow_divert_check_no_constrained(fd_cb);
3808 		if (error) {
3809 			goto done;
3810 		}
3811 
3812 		if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3813 		    !implicit || /* connect() was called or */
3814 		    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3815 		    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3816 			fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3817 		}
3818 
3819 		error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3820 		if (error) {
3821 			goto done;
3822 		}
3823 
3824 		if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3825 			flow_divert_set_remote_endpoint(fd_cb, to);
3826 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3827 		}
3828 
3829 		if (implicit) {
3830 			fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3831 		}
3832 
3833 		if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3834 			FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3835 			do_send = 0;
3836 		}
3837 
3838 		fd_cb->connect_packet = connect_packet;
3839 		connect_packet = NULL;
3840 	} else {
3841 		FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3842 	}
3843 
3844 	if (do_send) {
3845 		error = flow_divert_send_connect_packet(fd_cb);
3846 		if (error) {
3847 			goto done;
3848 		}
3849 
3850 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3851 	}
3852 
3853 	if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3854 		soisconnected(so);
3855 	} else {
3856 		soisconnecting(so);
3857 	}
3858 
3859 done:
3860 	return error;
3861 }
3862 
3863 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3864 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3865 {
3866 #if CONTENT_FILTER
3867 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3868 		int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3869 		if (error != 0) {
3870 			struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3871 			FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3872 			return error;
3873 		}
3874 	}
3875 #endif /* CONTENT_FILTER */
3876 
3877 	return flow_divert_connect_out_internal(so, to, p, false);
3878 }
3879 
3880 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3881 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3882     struct proc *p, uint32_t ifscope, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3883 {
3884 	struct inpcb *inp = sotoinpcb(so);
3885 	int error;
3886 
3887 	if (inp == NULL) {
3888 		return EINVAL;
3889 	}
3890 
3891 	VERIFY(dst != NULL);
3892 
3893 #if CONTENT_FILTER && NECP
3894 	struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3895 	if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3896 	    SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3897 		inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3898 	}
3899 #endif /* CONTENT_FILTER */
3900 
3901 	/* bind socket to the specified interface, if requested */
3902 	if (ifscope != IFSCOPE_NONE &&
3903 	    (error = inp_bindif(inp, ifscope, NULL)) != 0) {
3904 		return error;
3905 	}
3906 
3907 	error = flow_divert_connect_out(so, dst, p);
3908 
3909 	if (error != 0) {
3910 		return error;
3911 	}
3912 
3913 	/* if there is data, send it */
3914 	if (auio != NULL) {
3915 		user_ssize_t datalen = 0;
3916 
3917 		socket_unlock(so, 0);
3918 
3919 		VERIFY(bytes_written != NULL);
3920 
3921 		datalen = uio_resid(auio);
3922 		error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3923 		socket_lock(so, 0);
3924 
3925 		if (error == 0 || error == EWOULDBLOCK) {
3926 			*bytes_written = datalen - uio_resid(auio);
3927 		}
3928 
3929 		/*
3930 		 * sosend returns EWOULDBLOCK if it's a non-blocking
3931 		 * socket or a timeout occured (this allows to return
3932 		 * the amount of queued data through sendit()).
3933 		 *
3934 		 * However, connectx() returns EINPROGRESS in case of a
3935 		 * blocking socket. So we change the return value here.
3936 		 */
3937 		if (error == EWOULDBLOCK) {
3938 			error = EINPROGRESS;
3939 		}
3940 	}
3941 
3942 	if (error == 0 && pcid != NULL) {
3943 		*pcid = 1;      /* there is only 1 connection for a TCP */
3944 	}
3945 
3946 	return error;
3947 }
3948 
3949 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3950 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3951     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3952     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3953     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3954 {
3955 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3956 }
3957 
3958 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3959 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3960     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3961     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3962     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3963 {
3964 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3965 }
3966 
3967 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)3968 flow_divert_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
3969 {
3970 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3971 	int                     error   = 0;
3972 	struct inpcb            *inp;
3973 #if CONTENT_FILTER
3974 	struct m_tag *cfil_tag = NULL;
3975 #endif
3976 
3977 	if (!SO_IS_DIVERTED(so)) {
3978 		return EINVAL;
3979 	}
3980 
3981 	inp = sotoinpcb(so);
3982 	if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3983 		error = ECONNRESET;
3984 		goto done;
3985 	}
3986 
3987 	if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
3988 		/* The provider considers this datagram flow to be closed, so no data can be sent */
3989 		FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
3990 		error = EHOSTUNREACH;
3991 		goto done;
3992 	}
3993 
3994 #if CONTENT_FILTER
3995 	/*
3996 	 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
3997 	 * retrieve the CFIL saved remote address from the mbuf and use it.
3998 	 */
3999 	if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
4000 		struct sockaddr * __single cfil_faddr = NULL;
4001 		cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
4002 		if (cfil_tag) {
4003 			to = (struct sockaddr *)(void *)cfil_faddr;
4004 		}
4005 		FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
4006 	}
4007 #endif
4008 
4009 	/* Implicit connect */
4010 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4011 		FDLOG0(LOG_INFO, fd_cb, "implicit connect");
4012 
4013 		error = flow_divert_connect_out_internal(so, to, p, true);
4014 		if (error) {
4015 			goto done;
4016 		}
4017 	} else {
4018 		error = flow_divert_check_no_cellular(fd_cb) ||
4019 		    flow_divert_check_no_expensive(fd_cb) ||
4020 		    flow_divert_check_no_constrained(fd_cb);
4021 		if (error) {
4022 			goto done;
4023 		}
4024 	}
4025 
4026 	if (data != NULL) {
4027 		size_t data_size = 0;
4028 		if (mbuf_flags(data) & M_PKTHDR) {
4029 			data_size = mbuf_pkthdr_len(data);
4030 		} else {
4031 			for (mbuf_t blob = data; blob != NULL; blob = mbuf_next(blob)) {
4032 				data_size += mbuf_len(blob);
4033 			}
4034 		}
4035 
4036 		FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", data_size);
4037 		fd_cb->bytes_written_by_app += data_size;
4038 
4039 		error = flow_divert_send_app_data(fd_cb, data, data_size, to);
4040 
4041 		data = NULL;
4042 
4043 		if (error) {
4044 			goto done;
4045 		}
4046 	}
4047 
4048 	if (flags & PRUS_EOF) {
4049 		flow_divert_shutdown(so);
4050 	}
4051 
4052 done:
4053 	if (data) {
4054 		mbuf_freem(data);
4055 	}
4056 	if (control) {
4057 		mbuf_freem(control);
4058 	}
4059 #if CONTENT_FILTER
4060 	if (cfil_tag) {
4061 		m_tag_free(cfil_tag);
4062 	}
4063 #endif
4064 
4065 	return error;
4066 }
4067 
4068 static int
flow_divert_preconnect(struct socket * so)4069 flow_divert_preconnect(struct socket *so)
4070 {
4071 	int error = 0;
4072 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4073 
4074 	if (!SO_IS_DIVERTED(so)) {
4075 		return EINVAL;
4076 	}
4077 
4078 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4079 		FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
4080 		error = flow_divert_send_connect_packet(so->so_fd_pcb);
4081 		if (error) {
4082 			return error;
4083 		}
4084 
4085 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
4086 	}
4087 
4088 	soclearfastopen(so);
4089 
4090 	return error;
4091 }
4092 
4093 static void
flow_divert_set_protosw(struct socket * so)4094 flow_divert_set_protosw(struct socket *so)
4095 {
4096 	if (SOCK_DOM(so) == PF_INET) {
4097 		so->so_proto = &g_flow_divert_in_protosw;
4098 	} else {
4099 		so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
4100 	}
4101 }
4102 
4103 static void
flow_divert_set_udp_protosw(struct socket * so)4104 flow_divert_set_udp_protosw(struct socket *so)
4105 {
4106 	if (SOCK_DOM(so) == PF_INET) {
4107 		so->so_proto = &g_flow_divert_in_udp_protosw;
4108 	} else {
4109 		so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
4110 	}
4111 }
4112 
4113 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)4114 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
4115 {
4116 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4117 	struct inpcb *inp;
4118 	int error = 0;
4119 
4120 	inp = sotoinpcb(so);
4121 	if (inp == NULL) {
4122 		error = EINVAL;
4123 		goto done;
4124 	}
4125 
4126 	if (fd_cb == NULL) {
4127 		error = flow_divert_pcb_init(so);
4128 		fd_cb  = so->so_fd_pcb;
4129 		if (error != 0 || fd_cb == NULL) {
4130 			goto done;
4131 		}
4132 	}
4133 	return flow_divert_data_out(so, flags, data, to, control, p);
4134 
4135 done:
4136 	if (data) {
4137 		mbuf_freem(data);
4138 	}
4139 	if (control) {
4140 		mbuf_freem(control);
4141 	}
4142 
4143 	return error;
4144 }
4145 
4146 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)4147 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
4148 {
4149 	errno_t error = 0;
4150 	struct flow_divert_pcb *fd_cb = NULL;
4151 	uint32_t agg_unit = aggregate_unit;
4152 	uint32_t policy_control_unit = ctl_unit;
4153 	bool is_aggregate = false;
4154 
4155 	if (so->so_flags & SOF_FLOW_DIVERT) {
4156 		return EALREADY;
4157 	}
4158 
4159 	fd_cb = flow_divert_pcb_create(so);
4160 	if (fd_cb == NULL) {
4161 		return ENOMEM;
4162 	}
4163 
4164 	do {
4165 		uint32_t group_unit = flow_divert_derive_kernel_control_unit(so->last_pid, &policy_control_unit, &agg_unit, &is_aggregate);
4166 		if (group_unit == 0 || (group_unit >= GROUP_COUNT_MAX && group_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
4167 			FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
4168 			error = EINVAL;
4169 			break;
4170 		}
4171 
4172 		error = flow_divert_add_to_group(fd_cb, group_unit);
4173 		if (error == 0) {
4174 			so->so_fd_pcb = fd_cb;
4175 			so->so_flags |= SOF_FLOW_DIVERT;
4176 			fd_cb->control_group_unit = group_unit;
4177 			fd_cb->policy_control_unit = ctl_unit;
4178 			fd_cb->aggregate_unit = agg_unit;
4179 			if (is_aggregate) {
4180 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4181 			} else {
4182 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4183 			}
4184 
4185 			if (SOCK_TYPE(so) == SOCK_STREAM) {
4186 				flow_divert_set_protosw(so);
4187 			} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
4188 				flow_divert_set_udp_protosw(so);
4189 			}
4190 
4191 			FDLOG0(LOG_INFO, fd_cb, "Created");
4192 		} else if (error != ENOENT) {
4193 			FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
4194 		}
4195 	} while (error == ENOENT);
4196 
4197 	if (error != 0) {
4198 		FDRELEASE(fd_cb);
4199 	}
4200 
4201 	return error;
4202 }
4203 
4204 errno_t
flow_divert_pcb_init(struct socket * so)4205 flow_divert_pcb_init(struct socket *so)
4206 {
4207 	struct inpcb *inp = sotoinpcb(so);
4208 	uint32_t aggregate_units = 0;
4209 	uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
4210 	return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
4211 }
4212 
4213 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)4214 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
4215 {
4216 	uint32_t        ctl_unit        = 0;
4217 	uint32_t        key_unit        = 0;
4218 	uint32_t        aggregate_unit  = 0;
4219 	int             error           = 0;
4220 	int             hmac_error      = 0;
4221 	mbuf_ref_t      token           = NULL;
4222 
4223 	if (so->so_flags & SOF_FLOW_DIVERT) {
4224 		error = EALREADY;
4225 		goto done;
4226 	}
4227 
4228 	if (g_init_result) {
4229 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
4230 		error = ENOPROTOOPT;
4231 		goto done;
4232 	}
4233 
4234 	if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
4235 	    (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
4236 	    (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
4237 		error = EINVAL;
4238 		goto done;
4239 	} else {
4240 		if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
4241 			struct tcpcb *tp = sototcpcb(so);
4242 			if (tp == NULL || tp->t_state != TCPS_CLOSED) {
4243 				error = EINVAL;
4244 				goto done;
4245 			}
4246 		}
4247 	}
4248 
4249 	error = soopt_getm(sopt, &token);
4250 	if (error) {
4251 		token = NULL;
4252 		goto done;
4253 	}
4254 
4255 	error = soopt_mcopyin(sopt, token);
4256 	if (error) {
4257 		token = NULL;
4258 		goto done;
4259 	}
4260 
4261 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
4262 	if (!error) {
4263 		key_unit = ntohl(key_unit);
4264 		if (key_unit >= GROUP_COUNT_MAX) {
4265 			key_unit = 0;
4266 		}
4267 	} else if (error != ENOENT) {
4268 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
4269 		goto done;
4270 	} else {
4271 		key_unit = 0;
4272 	}
4273 
4274 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
4275 	if (error) {
4276 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
4277 		goto done;
4278 	}
4279 
4280 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4281 	if (error && error != ENOENT) {
4282 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4283 		goto done;
4284 	}
4285 
4286 	/* A valid kernel control unit is required */
4287 	ctl_unit = ntohl(ctl_unit);
4288 	aggregate_unit = ntohl(aggregate_unit);
4289 
4290 	if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4291 		hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4292 		if (hmac_error && hmac_error != ENOENT) {
4293 			FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4294 			error = hmac_error;
4295 			goto done;
4296 		}
4297 	}
4298 
4299 	error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4300 	if (error == 0) {
4301 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4302 		int log_level = LOG_NOTICE;
4303 
4304 		error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4305 		if (error == 0) {
4306 			fd_cb->log_level = (uint8_t)log_level;
4307 		}
4308 		error = 0;
4309 
4310 		fd_cb->connect_token = token;
4311 		token = NULL;
4312 
4313 		fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4314 	}
4315 
4316 	if (hmac_error == 0) {
4317 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4318 		if (fd_cb != NULL) {
4319 			fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4320 		}
4321 	}
4322 
4323 done:
4324 	if (token != NULL) {
4325 		mbuf_freem(token);
4326 	}
4327 
4328 	return error;
4329 }
4330 
4331 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4332 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4333 {
4334 	uint32_t                    ctl_unit;
4335 	int                         error                   = 0;
4336 	uint8_t                     hmac[SHA_DIGEST_LENGTH];
4337 	struct flow_divert_pcb      *fd_cb                  = so->so_fd_pcb;
4338 	mbuf_ref_t                  token                   = NULL;
4339 	struct flow_divert_group    *control_group          = NULL;
4340 
4341 	if (!SO_IS_DIVERTED(so)) {
4342 		error = EINVAL;
4343 		goto done;
4344 	}
4345 
4346 	if (fd_cb->group == NULL) {
4347 		error = EINVAL;
4348 		goto done;
4349 	}
4350 
4351 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4352 	if (error) {
4353 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4354 		goto done;
4355 	}
4356 
4357 	ctl_unit = htonl(fd_cb->group->ctl_unit);
4358 
4359 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4360 	if (error) {
4361 		goto done;
4362 	}
4363 
4364 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4365 	if (error) {
4366 		goto done;
4367 	}
4368 
4369 	if (fd_cb->app_data != NULL) {
4370 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4371 		if (error) {
4372 			goto done;
4373 		}
4374 	}
4375 
4376 	control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4377 	if (control_group != NULL) {
4378 		lck_rw_lock_shared(&control_group->lck);
4379 		ctl_unit = htonl(control_group->ctl_unit);
4380 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4381 		if (!error) {
4382 			error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4383 		}
4384 		lck_rw_done(&control_group->lck);
4385 		FDGRP_RELEASE(control_group);
4386 	} else {
4387 		error = ENOPROTOOPT;
4388 	}
4389 
4390 	if (error) {
4391 		goto done;
4392 	}
4393 
4394 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4395 	if (error) {
4396 		goto done;
4397 	}
4398 
4399 	if (sopt->sopt_val == USER_ADDR_NULL) {
4400 		/* If the caller passed NULL to getsockopt, just set the size of the token and return */
4401 		sopt->sopt_valsize = mbuf_pkthdr_len(token);
4402 		goto done;
4403 	}
4404 
4405 	error = soopt_mcopyout(sopt, token);
4406 	if (error) {
4407 		token = NULL;   /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4408 		goto done;
4409 	}
4410 
4411 done:
4412 	if (token != NULL) {
4413 		mbuf_freem(token);
4414 	}
4415 
4416 	return error;
4417 }
4418 
4419 void
flow_divert_group_destroy(struct flow_divert_group * group)4420 flow_divert_group_destroy(struct flow_divert_group *group)
4421 {
4422 	lck_rw_lock_exclusive(&group->lck);
4423 
4424 	FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4425 
4426 	if (group->token_key != NULL) {
4427 		memset(group->token_key, 0, group->token_key_size);
4428 		kfree_data_sized_by(group->token_key, group->token_key_size);
4429 	}
4430 
4431 	/* Re-set the current trie */
4432 	if (group->signing_id_trie.memory != NULL) {
4433 		kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
4434 	}
4435 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4436 	group->signing_id_trie.root = NULL_TRIE_IDX;
4437 
4438 	lck_rw_done(&group->lck);
4439 
4440 	zfree(flow_divert_group_zone, group);
4441 }
4442 
4443 static struct flow_divert_group *
flow_divert_allocate_group(u_int32_t unit,pid_t pid)4444 flow_divert_allocate_group(u_int32_t unit, pid_t pid)
4445 {
4446 	struct flow_divert_group *new_group = NULL;
4447 	new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4448 	lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4449 	RB_INIT(&new_group->pcb_tree);
4450 	new_group->ctl_unit = unit;
4451 	new_group->in_process_pid = pid;
4452 	MBUFQ_INIT(&new_group->send_queue);
4453 	new_group->signing_id_trie.root = NULL_TRIE_IDX;
4454 	new_group->ref_count = 1;
4455 	new_group->order = FLOW_DIVERT_ORDER_LAST;
4456 	return new_group;
4457 }
4458 
4459 static errno_t
flow_divert_kctl_setup(u_int32_t * unit,void ** unitinfo)4460 flow_divert_kctl_setup(u_int32_t *unit, void **unitinfo)
4461 {
4462 	if (unit == NULL || unitinfo == NULL) {
4463 		return EINVAL;
4464 	}
4465 
4466 	struct flow_divert_group *new_group = NULL;
4467 	errno_t error = 0;
4468 	lck_rw_lock_shared(&g_flow_divert_group_lck);
4469 	if (*unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
4470 		// Return next unused in-process unit
4471 		u_int32_t unit_cursor = FLOW_DIVERT_IN_PROCESS_UNIT_MIN;
4472 		struct flow_divert_group *group_next = NULL;
4473 		TAILQ_FOREACH(group_next, &g_flow_divert_in_process_group_list, chain) {
4474 			if (group_next->ctl_unit > unit_cursor) {
4475 				// Found a gap, lets fill it in
4476 				break;
4477 			}
4478 			unit_cursor = group_next->ctl_unit + 1;
4479 			if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4480 				break;
4481 			}
4482 		}
4483 		if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4484 			error = EBUSY;
4485 		} else {
4486 			*unit = unit_cursor;
4487 			new_group = flow_divert_allocate_group(*unit, proc_pid(current_proc()));
4488 			if (group_next != NULL) {
4489 				TAILQ_INSERT_BEFORE(group_next, new_group, chain);
4490 			} else {
4491 				TAILQ_INSERT_TAIL(&g_flow_divert_in_process_group_list, new_group, chain);
4492 			}
4493 			g_active_group_count++;
4494 		}
4495 	} else {
4496 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
4497 			error = EPERM;
4498 		} else {
4499 			if (g_flow_divert_groups == NULL) {
4500 				g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4501 				    GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4502 			}
4503 
4504 			// Return next unused group unit
4505 			bool found_unused_unit = false;
4506 			u_int32_t unit_cursor;
4507 			for (unit_cursor = 1; unit_cursor < GROUP_COUNT_MAX; unit_cursor++) {
4508 				struct flow_divert_group *group = g_flow_divert_groups[unit_cursor];
4509 				if (group == NULL) {
4510 					// Open slot, assign this one
4511 					*unit = unit_cursor;
4512 					new_group = flow_divert_allocate_group(*unit, 0);
4513 					g_flow_divert_groups[*unit] = new_group;
4514 					found_unused_unit = true;
4515 					g_active_group_count++;
4516 					break;
4517 				}
4518 			}
4519 			if (!found_unused_unit) {
4520 				error = EBUSY;
4521 			}
4522 		}
4523 	}
4524 	lck_rw_done(&g_flow_divert_group_lck);
4525 
4526 	*unitinfo = new_group;
4527 
4528 	return error;
4529 }
4530 
4531 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4532 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4533 {
4534 	if (unitinfo == NULL) {
4535 		return EINVAL;
4536 	}
4537 
4538 	// Just validate. The group will already have been allocated.
4539 	struct flow_divert_group *group = (struct flow_divert_group *)*unitinfo;
4540 	if (group == NULL || sac->sc_unit != group->ctl_unit) {
4541 		FDLOG(LOG_ERR, &nil_pcb, "Flow divert connect fail, unit mismatch %u != %u",
4542 		    sac->sc_unit, group ? group->ctl_unit : 0);
4543 		return EINVAL;
4544 	}
4545 
4546 	return 0;
4547 }
4548 
4549 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4550 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4551 {
4552 	struct flow_divert_group    *group  = NULL;
4553 	errno_t                     error   = 0;
4554 
4555 	if (unitinfo == NULL) {
4556 		return 0;
4557 	}
4558 
4559 	FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4560 
4561 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4562 
4563 	if (g_active_group_count == 0) {
4564 		panic("flow divert group %u is disconnecting, but no groups are active (active count = %u)",
4565 		    unit, g_active_group_count);
4566 	}
4567 
4568 	if (unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
4569 		if (unit >= GROUP_COUNT_MAX) {
4570 			return EINVAL;
4571 		}
4572 
4573 		if (g_flow_divert_groups == NULL) {
4574 			panic("flow divert group %u is disconnecting, but groups array is NULL",
4575 			    unit);
4576 		}
4577 		group = g_flow_divert_groups[unit];
4578 
4579 		if (group != (struct flow_divert_group *)unitinfo) {
4580 			panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4581 		}
4582 
4583 		g_flow_divert_groups[unit] = NULL;
4584 	} else {
4585 		group = (struct flow_divert_group *)unitinfo;
4586 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
4587 			panic("flow divert group %u is disconnecting, but in-process group list is empty",
4588 			    unit);
4589 		}
4590 
4591 		TAILQ_REMOVE(&g_flow_divert_in_process_group_list, group, chain);
4592 	}
4593 
4594 	g_active_group_count--;
4595 
4596 	if (g_active_group_count == 0) {
4597 		kfree_type(struct flow_divert_group *,
4598 		    GROUP_COUNT_MAX, g_flow_divert_groups);
4599 		g_flow_divert_groups = NULL;
4600 	}
4601 
4602 	lck_rw_done(&g_flow_divert_group_lck);
4603 
4604 	if (group != NULL) {
4605 		flow_divert_close_all(group);
4606 		FDGRP_RELEASE(group);
4607 	} else {
4608 		error = EINVAL;
4609 	}
4610 
4611 	return error;
4612 }
4613 
4614 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_ref_t m,__unused int flags)4615 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_ref_t m, __unused int flags)
4616 {
4617 	errno_t error = 0;
4618 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4619 	if (group != NULL) {
4620 		error = flow_divert_input(m, group);
4621 		FDGRP_RELEASE(group);
4622 	} else {
4623 		error = ENOENT;
4624 	}
4625 	return error;
4626 }
4627 
4628 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4629 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4630 {
4631 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4632 	if (group == NULL) {
4633 		return;
4634 	}
4635 
4636 	if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4637 		struct flow_divert_pcb                  *fd_cb;
4638 		SLIST_HEAD(, flow_divert_pcb)   tmp_list;
4639 
4640 		lck_rw_lock_exclusive(&group->lck);
4641 
4642 		while (!MBUFQ_EMPTY(&group->send_queue)) {
4643 			mbuf_ref_t next_packet;
4644 			FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4645 			next_packet = MBUFQ_FIRST(&group->send_queue);
4646 			int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4647 			if (error) {
4648 				FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_kctl_rcvd: ctl_enqueuembuf returned an error: %d", error);
4649 				OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4650 				lck_rw_done(&group->lck);
4651 				return;
4652 			}
4653 			MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4654 		}
4655 
4656 		SLIST_INIT(&tmp_list);
4657 
4658 		RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4659 			FDRETAIN(fd_cb);
4660 			SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4661 		}
4662 
4663 		lck_rw_done(&group->lck);
4664 
4665 		SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4666 			FDLOCK(fd_cb);
4667 			if (fd_cb->so != NULL) {
4668 				struct socket *so = fd_cb->so;
4669 				flow_divert_lock_socket(so, fd_cb);
4670 				if (fd_cb->group != NULL) {
4671 					flow_divert_send_buffered_data(fd_cb, FALSE);
4672 				}
4673 				flow_divert_unlock_socket(so, fd_cb);
4674 			}
4675 			FDUNLOCK(fd_cb);
4676 			FDRELEASE(fd_cb);
4677 		}
4678 	}
4679 
4680 	FDGRP_RELEASE(group);
4681 }
4682 
4683 static int
flow_divert_kctl_init(void)4684 flow_divert_kctl_init(void)
4685 {
4686 	struct kern_ctl_reg     ctl_reg;
4687 	int                     result;
4688 
4689 	memset(&ctl_reg, 0, sizeof(ctl_reg));
4690 
4691 	strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4692 	ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4693 
4694 	// Do not restrict to privileged processes. flow_divert_kctl_setup checks
4695 	// permissions separately.
4696 	ctl_reg.ctl_flags = CTL_FLAG_REG_EXTENDED | CTL_FLAG_REG_SETUP;
4697 	ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4698 
4699 	ctl_reg.ctl_connect = flow_divert_kctl_connect;
4700 	ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4701 	ctl_reg.ctl_send = flow_divert_kctl_send;
4702 	ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4703 	ctl_reg.ctl_setup = flow_divert_kctl_setup;
4704 
4705 	result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4706 
4707 	if (result) {
4708 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4709 		return result;
4710 	}
4711 
4712 	return 0;
4713 }
4714 
4715 void
flow_divert_init(void)4716 flow_divert_init(void)
4717 {
4718 	memset(&nil_pcb, 0, sizeof(nil_pcb));
4719 	nil_pcb.log_level = LOG_NOTICE;
4720 
4721 	g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4722 
4723 	VERIFY(g_tcp_protosw != NULL);
4724 
4725 	memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4726 	memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4727 
4728 	g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4729 	g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4730 	g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4731 	g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4732 	g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4733 	g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4734 	g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4735 	g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4736 
4737 	g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4738 	g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4739 
4740 	/*
4741 	 * Socket filters shouldn't attach/detach to/from this protosw
4742 	 * since pr_protosw is to be used instead, which points to the
4743 	 * real protocol; if they do, it is a bug and we should panic.
4744 	 */
4745 	g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4746 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4747 	g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4748 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4749 
4750 	/* UDP */
4751 	g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4752 	VERIFY(g_udp_protosw != NULL);
4753 
4754 	memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4755 	memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4756 
4757 	g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4758 	g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4759 	g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4760 	g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4761 	g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4762 	g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4763 	g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4764 	g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4765 	g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4766 
4767 	g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4768 	g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4769 
4770 	/*
4771 	 * Socket filters shouldn't attach/detach to/from this protosw
4772 	 * since pr_protosw is to be used instead, which points to the
4773 	 * real protocol; if they do, it is a bug and we should panic.
4774 	 */
4775 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4776 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4777 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4778 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4779 
4780 	g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4781 
4782 	VERIFY(g_tcp6_protosw != NULL);
4783 
4784 	memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4785 	memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4786 
4787 	g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4788 	g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4789 	g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4790 	g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4791 	g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4792 	g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4793 	g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4794 	g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4795 
4796 	g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4797 	g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4798 	/*
4799 	 * Socket filters shouldn't attach/detach to/from this protosw
4800 	 * since pr_protosw is to be used instead, which points to the
4801 	 * real protocol; if they do, it is a bug and we should panic.
4802 	 */
4803 	g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4804 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4805 	g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4806 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4807 
4808 	/* UDP6 */
4809 	g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4810 
4811 	VERIFY(g_udp6_protosw != NULL);
4812 
4813 	memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4814 	memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4815 
4816 	g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4817 	g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4818 	g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4819 	g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4820 	g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4821 	g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4822 	g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4823 	g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4824 	g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4825 
4826 	g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4827 	g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4828 	/*
4829 	 * Socket filters shouldn't attach/detach to/from this protosw
4830 	 * since pr_protosw is to be used instead, which points to the
4831 	 * real protocol; if they do, it is a bug and we should panic.
4832 	 */
4833 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4834 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4835 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4836 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4837 
4838 	TAILQ_INIT(&g_flow_divert_in_process_group_list);
4839 
4840 	g_init_result = flow_divert_kctl_init();
4841 	if (g_init_result) {
4842 		goto done;
4843 	}
4844 
4845 done:
4846 	if (g_init_result != 0) {
4847 		if (g_flow_divert_kctl_ref != NULL) {
4848 			ctl_deregister(g_flow_divert_kctl_ref);
4849 			g_flow_divert_kctl_ref = NULL;
4850 		}
4851 	}
4852 }
4853