xref: /xnu-11215.41.3/bsd/netinet/flow_divert.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2012-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <sys/file_internal.h>
46 #include <sys/kauth.h>
47 #include <libkern/tree.h>
48 #include <kern/locks.h>
49 #include <kern/debug.h>
50 #include <kern/task.h>
51 #include <mach/task_info.h>
52 #include <net/if_var.h>
53 #include <net/route.h>
54 #include <net/flowhash.h>
55 #include <net/ntstat.h>
56 #include <net/content_filter.h>
57 #include <net/necp.h>
58 #include <netinet/in.h>
59 #include <netinet/in_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/flow_divert.h>
64 #include <netinet/flow_divert_proto.h>
65 #include <netinet6/in6_pcb.h>
66 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
68 #include <libkern/crypto/sha1.h>
69 #include <libkern/crypto/crypto_internal.h>
70 #include <os/log.h>
71 #include <corecrypto/cc.h>
72 #if CONTENT_FILTER
73 #include <net/content_filter.h>
74 #endif /* CONTENT_FILTER */
75 
76 #define FLOW_DIVERT_CONNECT_STARTED             0x00000001
77 #define FLOW_DIVERT_READ_CLOSED                 0x00000002
78 #define FLOW_DIVERT_WRITE_CLOSED                0x00000004
79 #define FLOW_DIVERT_TUNNEL_RD_CLOSED            0x00000008
80 #define FLOW_DIVERT_TUNNEL_WR_CLOSED            0x00000010
81 #define FLOW_DIVERT_HAS_HMAC                    0x00000040
82 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED          0x00000080
83 #define FLOW_DIVERT_IMPLICIT_CONNECT            0x00000100
84 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR          0x00000200
85 #define FLOW_DIVERT_HAS_TOKEN                   0x00000400
86 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR       0x00000800
87 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT         0x00001000
88 
89 #define FDLOG(level, pcb, format, ...) \
90 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
91 
92 #define FDLOG0(level, pcb, msg) \
93 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
94 
95 #define FDRETAIN(pcb)                   if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
96 #define FDRELEASE(pcb)                                                                                                          \
97 	do {                                                                                                                                    \
98 	        if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) {       \
99 	                flow_divert_pcb_destroy(pcb);                                                                   \
100 	        }                                                                                                                                       \
101 	} while (0)
102 
103 #define FDGRP_RETAIN(grp)  if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
104 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
105 
106 #define FDLOCK(pcb)                             lck_mtx_lock(&(pcb)->mtx)
107 #define FDUNLOCK(pcb)                           lck_mtx_unlock(&(pcb)->mtx)
108 
109 #define FD_CTL_SENDBUFF_SIZE                    (128 * 1024)
110 
111 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED           0
112 
113 #define GROUP_COUNT_MAX                         31
114 #define FLOW_DIVERT_MAX_NAME_SIZE               4096
115 #define FLOW_DIVERT_MAX_KEY_SIZE                1024
116 #define FLOW_DIVERT_MAX_TRIE_MEMORY             (1024 * 1024)
117 
118 #define CHILD_MAP_SIZE                  256
119 #define NULL_TRIE_IDX                   0xffff
120 #define TRIE_NODE(t, i)                 ((t)->nodes[(i)])
121 #define TRIE_CHILD(t, i, b)             (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
122 #define TRIE_BYTE(t, i)                 ((t)->bytes[(i)])
123 
124 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
125 
126 static struct flow_divert_pcb           nil_pcb;
127 
128 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
129 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
130 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
131     &flow_divert_mtx_attr);
132 
133 static TAILQ_HEAD(_flow_divert_group_list, flow_divert_group) g_flow_divert_in_process_group_list;
134 
135 static struct flow_divert_group         **g_flow_divert_groups __indexable = NULL;
136 static uint32_t                         g_active_group_count    = 0;
137 
138 static  errno_t                         g_init_result           = 0;
139 
140 static  kern_ctl_ref                    g_flow_divert_kctl_ref  = NULL;
141 
142 static struct protosw                   g_flow_divert_in_protosw;
143 static struct pr_usrreqs                g_flow_divert_in_usrreqs;
144 static struct protosw                   g_flow_divert_in_udp_protosw;
145 static struct pr_usrreqs                g_flow_divert_in_udp_usrreqs;
146 static struct ip6protosw                g_flow_divert_in6_protosw;
147 static struct pr_usrreqs                g_flow_divert_in6_usrreqs;
148 static struct ip6protosw                g_flow_divert_in6_udp_protosw;
149 static struct pr_usrreqs                g_flow_divert_in6_udp_usrreqs;
150 
151 static struct protosw                   *g_tcp_protosw          = NULL;
152 static struct ip6protosw                *g_tcp6_protosw         = NULL;
153 static struct protosw                   *g_udp_protosw          = NULL;
154 static struct ip6protosw                *g_udp6_protosw         = NULL;
155 
156 static KALLOC_TYPE_DEFINE(flow_divert_group_zone, struct flow_divert_group,
157     NET_KT_DEFAULT);
158 static KALLOC_TYPE_DEFINE(flow_divert_pcb_zone, struct flow_divert_pcb,
159     NET_KT_DEFAULT);
160 
161 static errno_t
162 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
163 
164 static boolean_t
165 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
166 
167 static int
168 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr);
169 
170 struct sockaddr *
171 flow_divert_get_buffered_target_address(mbuf_ref_t buffer);
172 
173 static void
174 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed);
175 
176 static void flow_divert_group_destroy(struct flow_divert_group *group);
177 
178 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)179 flow_divert_syslog_type_to_oslog_type(int syslog_type)
180 {
181 	switch (syslog_type) {
182 	case LOG_ERR: return OS_LOG_TYPE_ERROR;
183 	case LOG_INFO: return OS_LOG_TYPE_INFO;
184 	case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
185 	default: return OS_LOG_TYPE_DEFAULT;
186 	}
187 }
188 
189 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)190 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
191 {
192 	return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
193 }
194 
195 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
196 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
197 
198 static const char *
flow_divert_packet_type2str(uint8_t packet_type)199 flow_divert_packet_type2str(uint8_t packet_type)
200 {
201 	switch (packet_type) {
202 	case FLOW_DIVERT_PKT_CONNECT:
203 		return "connect";
204 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
205 		return "connect result";
206 	case FLOW_DIVERT_PKT_DATA:
207 		return "data";
208 	case FLOW_DIVERT_PKT_CLOSE:
209 		return "close";
210 	case FLOW_DIVERT_PKT_READ_NOTIFY:
211 		return "read notification";
212 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
213 		return "properties update";
214 	case FLOW_DIVERT_PKT_APP_MAP_CREATE:
215 		return "app map create";
216 	default:
217 		return "unknown";
218 	}
219 }
220 
221 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)222 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
223 {
224 	struct flow_divert_pcb  key_item;
225 	struct flow_divert_pcb  *fd_cb          = NULL;
226 
227 	key_item.hash = hash;
228 
229 	lck_rw_lock_shared(&group->lck);
230 	fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
231 	FDRETAIN(fd_cb);
232 	lck_rw_done(&group->lck);
233 
234 	return fd_cb;
235 }
236 
237 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)238 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
239 {
240 	struct flow_divert_group *group = NULL;
241 	lck_rw_lock_shared(&g_flow_divert_group_lck);
242 	if (g_active_group_count == 0) {
243 		if (fd_cb != NULL) {
244 			FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
245 		}
246 	} else if (ctl_unit == 0 || (ctl_unit >= GROUP_COUNT_MAX && ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
247 		FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
248 	} else if (ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
249 		if (g_flow_divert_groups == NULL) {
250 			if (fd_cb != NULL) {
251 				FDLOG0(LOG_ERR, fd_cb, "No active non-in-process groups, flow divert cannot be used for this socket");
252 			}
253 		} else {
254 			group = g_flow_divert_groups[ctl_unit];
255 			if (group == NULL) {
256 				if (fd_cb != NULL) {
257 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
258 				}
259 			} else {
260 				FDGRP_RETAIN(group);
261 			}
262 		}
263 	} else {
264 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
265 			if (fd_cb != NULL) {
266 				FDLOG0(LOG_ERR, fd_cb, "No active in-process groups, flow divert cannot be used for this socket");
267 			}
268 		} else {
269 			struct flow_divert_group *group_cursor = NULL;
270 			TAILQ_FOREACH(group_cursor, &g_flow_divert_in_process_group_list, chain) {
271 				if (group_cursor->ctl_unit == ctl_unit) {
272 					group = group_cursor;
273 					break;
274 				}
275 			}
276 			if (group == NULL) {
277 				if (fd_cb != NULL) {
278 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u not found, flow divert cannot be used for this socket", ctl_unit);
279 				}
280 			} else if (fd_cb != NULL &&
281 			    (fd_cb->so == NULL ||
282 			    group_cursor->in_process_pid != fd_cb->so->last_pid)) {
283 				FDLOG(LOG_ERR, fd_cb, "Cannot access group for control unit %u, mismatched PID (%u != %u)",
284 				    ctl_unit, group_cursor->in_process_pid, fd_cb->so ? fd_cb->so->last_pid : 0);
285 				group = NULL;
286 			} else {
287 				FDGRP_RETAIN(group);
288 			}
289 		}
290 	}
291 	lck_rw_done(&g_flow_divert_group_lck);
292 	return group;
293 }
294 
295 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)296 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
297 {
298 	int error = 0;
299 	lck_rw_lock_exclusive(&group->lck);
300 	if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
301 		if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
302 			fd_cb->group = group;
303 			fd_cb->control_group_unit = group->ctl_unit;
304 			FDRETAIN(fd_cb); /* The group now has a reference */
305 		} else {
306 			FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
307 			error = EEXIST;
308 		}
309 	} else {
310 		FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
311 		error = ENOENT;
312 	}
313 	lck_rw_done(&group->lck);
314 	return error;
315 }
316 
317 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)318 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
319 {
320 	errno_t error = 0;
321 	struct flow_divert_group *group = NULL;
322 	static uint32_t g_nextkey = 1;
323 	static uint32_t g_hash_seed = 0;
324 	int try_count = 0;
325 
326 	group = flow_divert_group_lookup(ctl_unit, fd_cb);
327 	if (group == NULL) {
328 		return ENOENT;
329 	}
330 
331 	do {
332 		uint32_t key[2];
333 		uint32_t idx;
334 
335 		key[0] = g_nextkey++;
336 		key[1] = RandomULong();
337 
338 		if (g_hash_seed == 0) {
339 			g_hash_seed = RandomULong();
340 		}
341 
342 		error = 0;
343 		fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
344 
345 		for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
346 			if (idx == ctl_unit) {
347 				continue;
348 			}
349 			struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
350 			if (curr_group != NULL) {
351 				lck_rw_lock_shared(&curr_group->lck);
352 				if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
353 					error = EEXIST;
354 				}
355 				lck_rw_done(&curr_group->lck);
356 				FDGRP_RELEASE(curr_group);
357 			}
358 		}
359 
360 		if (error == 0) {
361 			error = flow_divert_pcb_insert(fd_cb, group);
362 		}
363 	} while (error == EEXIST && try_count++ < 3);
364 
365 	if (error == EEXIST) {
366 		FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
367 		fd_cb->hash = 0;
368 	}
369 
370 	FDGRP_RELEASE(group);
371 	return error;
372 }
373 
374 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)375 flow_divert_pcb_create(socket_t so)
376 {
377 	struct flow_divert_pcb  *new_pcb = NULL;
378 
379 	new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
380 	lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
381 	new_pcb->so = so;
382 	new_pcb->log_level = nil_pcb.log_level;
383 
384 	FDRETAIN(new_pcb);      /* Represents the socket's reference */
385 
386 	return new_pcb;
387 }
388 
389 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)390 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
391 {
392 	FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %llu, tunnel tx %llu, tunnel rx %llu",
393 	    fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
394 
395 	if (fd_cb->connect_token != NULL) {
396 		mbuf_freem(fd_cb->connect_token);
397 	}
398 	if (fd_cb->connect_packet != NULL) {
399 		mbuf_freem(fd_cb->connect_packet);
400 	}
401 	if (fd_cb->app_data != NULL) {
402 		kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
403 	}
404 	if (fd_cb->original_remote_endpoint != NULL) {
405 		free_sockaddr(fd_cb->original_remote_endpoint);
406 	}
407 	zfree(flow_divert_pcb_zone, fd_cb);
408 }
409 
410 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)411 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
412 {
413 	if (fd_cb->group != NULL) {
414 		struct flow_divert_group *group = fd_cb->group;
415 		lck_rw_lock_exclusive(&group->lck);
416 		FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
417 		RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
418 		fd_cb->group = NULL;
419 		FDRELEASE(fd_cb);                               /* Release the group's reference */
420 		lck_rw_done(&group->lck);
421 	}
422 }
423 
424 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_ref_t * packet)425 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_ref_t *packet)
426 {
427 	struct flow_divert_packet_header        hdr;
428 	int                                     error           = 0;
429 
430 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
431 	if (error) {
432 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
433 		return error;
434 	}
435 
436 	hdr.packet_type = packet_type;
437 	hdr.conn_id = htonl(fd_cb->hash);
438 
439 	/* Lay down the header */
440 	error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
441 	if (error) {
442 		FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
443 		mbuf_freem(*packet);
444 		*packet = NULL;
445 		return error;
446 	}
447 
448 	return 0;
449 }
450 
451 static int
flow_divert_packet_append_tlv(mbuf_ref_t packet,uint8_t type,uint32_t length,const void * value)452 flow_divert_packet_append_tlv(mbuf_ref_t packet, uint8_t type, uint32_t length, const void *value)
453 {
454 	uint32_t        net_length      = htonl(length);
455 	int                     error           = 0;
456 
457 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
458 	if (error) {
459 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
460 		return error;
461 	}
462 
463 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
464 	if (error) {
465 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
466 		return error;
467 	}
468 
469 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
470 	if (error) {
471 		FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
472 		return error;
473 	}
474 
475 	return error;
476 }
477 
478 static int
flow_divert_packet_find_tlv(mbuf_ref_t packet,int offset,uint8_t type,int * err,int next)479 flow_divert_packet_find_tlv(mbuf_ref_t packet, int offset, uint8_t type, int *err, int next)
480 {
481 	size_t      cursor      = offset;
482 	int         error       = 0;
483 	uint32_t    curr_length = 0;
484 	uint8_t     curr_type   = 0;
485 
486 	*err = 0;
487 
488 	do {
489 		if (!next) {
490 			error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
491 			if (error) {
492 				*err = ENOENT;
493 				return -1;
494 			}
495 		} else {
496 			next = 0;
497 			curr_type = FLOW_DIVERT_TLV_NIL;
498 		}
499 
500 		if (curr_type != type) {
501 			cursor += sizeof(curr_type);
502 			error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
503 			if (error) {
504 				*err = error;
505 				return -1;
506 			}
507 
508 			cursor += (sizeof(curr_length) + ntohl(curr_length));
509 		}
510 	} while (curr_type != type);
511 
512 	return (int)cursor;
513 }
514 
515 static int
flow_divert_packet_get_tlv(mbuf_ref_t packet,int offset,uint8_t type,size_t buff_len,void * buff __sized_by (buff_len),uint32_t * val_size)516 flow_divert_packet_get_tlv(mbuf_ref_t packet, int offset, uint8_t type, size_t buff_len, void *buff __sized_by(buff_len), uint32_t *val_size)
517 {
518 	int         error      = 0;
519 	uint32_t    length     = 0;
520 	int         tlv_offset = 0;
521 
522 	tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
523 	if (tlv_offset < 0) {
524 		return error;
525 	}
526 
527 	error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
528 	if (error) {
529 		return error;
530 	}
531 
532 	length = ntohl(length);
533 
534 	uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
535 
536 	if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
537 		FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
538 		return EINVAL;
539 	}
540 
541 	if (val_size != NULL) {
542 		*val_size = length;
543 	}
544 
545 	if (buff != NULL && buff_len > 0) {
546 		memset(buff, 0, buff_len);
547 		size_t to_copy = (length < buff_len) ? length : buff_len;
548 		error = mbuf_copydata(packet, data_offset, to_copy, buff);
549 		if (error) {
550 			return error;
551 		}
552 	}
553 
554 	return 0;
555 }
556 
557 static int
flow_divert_packet_compute_hmac(mbuf_ref_t packet,struct flow_divert_group * group,uint8_t * hmac)558 flow_divert_packet_compute_hmac(mbuf_ref_t packet, struct flow_divert_group *group, uint8_t *hmac)
559 {
560 	mbuf_ref_t  curr_mbuf       = packet;
561 
562 	if (g_crypto_funcs == NULL || group->token_key == NULL) {
563 		return ENOPROTOOPT;
564 	}
565 
566 	cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
567 	g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
568 
569 	while (curr_mbuf != NULL) {
570 		g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf));
571 		curr_mbuf = mbuf_next(curr_mbuf);
572 	}
573 
574 	g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
575 
576 	return 0;
577 }
578 
579 static int
flow_divert_packet_verify_hmac(mbuf_ref_t packet,uint32_t ctl_unit)580 flow_divert_packet_verify_hmac(mbuf_ref_t packet, uint32_t ctl_unit)
581 {
582 	int error = 0;
583 	struct flow_divert_group *group = NULL;
584 	int hmac_offset;
585 	uint8_t packet_hmac[SHA_DIGEST_LENGTH];
586 	uint8_t computed_hmac[SHA_DIGEST_LENGTH];
587 	mbuf_ref_t tail;
588 
589 	group = flow_divert_group_lookup(ctl_unit, NULL);
590 	if (group == NULL) {
591 		FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
592 		return ENOPROTOOPT;
593 	}
594 
595 	lck_rw_lock_shared(&group->lck);
596 
597 	if (group->token_key == NULL) {
598 		error = ENOPROTOOPT;
599 		goto done;
600 	}
601 
602 	hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
603 	if (hmac_offset < 0) {
604 		goto done;
605 	}
606 
607 	error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
608 	if (error) {
609 		goto done;
610 	}
611 
612 	/* Chop off the HMAC TLV */
613 	error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
614 	if (error) {
615 		goto done;
616 	}
617 
618 	mbuf_free(tail);
619 
620 	error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
621 	if (error) {
622 		goto done;
623 	}
624 
625 	if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
626 		FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
627 		error = EINVAL;
628 		goto done;
629 	}
630 
631 done:
632 	if (group != NULL) {
633 		lck_rw_done(&group->lck);
634 		FDGRP_RELEASE(group);
635 	}
636 	return error;
637 }
638 
639 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)640 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
641 {
642 	struct inpcb *inp = NULL;
643 	struct ifnet *ifp = NULL;
644 	stats_functional_type ifnet_count_type = stats_functional_type_none;
645 
646 	inp = sotoinpcb(fd_cb->so);
647 	if (inp == NULL) {
648 		return;
649 	}
650 
651 	if (inp->inp_vflag & INP_IPV4) {
652 		ifp = inp->inp_last_outifp;
653 	} else if (inp->inp_vflag & INP_IPV6) {
654 		ifp = inp->in6p_last_outifp;
655 	}
656 	if (ifp != NULL) {
657 		ifnet_count_type = IFNET_COUNT_TYPE(ifp);
658 	}
659 
660 	if (send) {
661 		INP_ADD_STAT(inp, ifnet_count_type, txpackets, 1);
662 		INP_ADD_STAT(inp, ifnet_count_type, txbytes, data_len);
663 	} else {
664 		INP_ADD_STAT(inp, ifnet_count_type, rxpackets, 1);
665 		INP_ADD_STAT(inp, ifnet_count_type, rxbytes, data_len);
666 	}
667 	inp_set_activity_bitmap(inp);
668 }
669 
670 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)671 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
672 {
673 	struct inpcb *inp = sotoinpcb(fd_cb->so);
674 	if (INP_NO_CELLULAR(inp)) {
675 		struct ifnet *ifp = NULL;
676 		if (inp->inp_vflag & INP_IPV4) {
677 			ifp = inp->inp_last_outifp;
678 		} else if (inp->inp_vflag & INP_IPV6) {
679 			ifp = inp->in6p_last_outifp;
680 		}
681 		if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
682 			FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
683 			return EHOSTUNREACH;
684 		}
685 	}
686 	return 0;
687 }
688 
689 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)690 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
691 {
692 	struct inpcb *inp = sotoinpcb(fd_cb->so);
693 	if (INP_NO_EXPENSIVE(inp)) {
694 		struct ifnet *ifp = NULL;
695 		if (inp->inp_vflag & INP_IPV4) {
696 			ifp = inp->inp_last_outifp;
697 		} else if (inp->inp_vflag & INP_IPV6) {
698 			ifp = inp->in6p_last_outifp;
699 		}
700 		if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
701 			FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
702 			return EHOSTUNREACH;
703 		}
704 	}
705 	return 0;
706 }
707 
708 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)709 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
710 {
711 	struct inpcb *inp = sotoinpcb(fd_cb->so);
712 	if (INP_NO_CONSTRAINED(inp)) {
713 		struct ifnet *ifp = NULL;
714 		if (inp->inp_vflag & INP_IPV4) {
715 			ifp = inp->inp_last_outifp;
716 		} else if (inp->inp_vflag & INP_IPV6) {
717 			ifp = inp->in6p_last_outifp;
718 		}
719 		if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
720 			FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
721 			return EHOSTUNREACH;
722 		}
723 	}
724 	return 0;
725 }
726 
727 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,bool tunnel,bool flush_snd)728 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, bool tunnel, bool flush_snd)
729 {
730 	if (how != SHUT_RD) {
731 		fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
732 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
733 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
734 			if (flush_snd) {
735 				/* If the tunnel is not accepting writes any more, then flush the send buffer */
736 				sbflush(&fd_cb->so->so_snd);
737 			}
738 		}
739 	}
740 	if (how != SHUT_WR) {
741 		fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
742 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
743 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
744 		}
745 	}
746 }
747 
748 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)749 trie_node_alloc(struct flow_divert_trie *trie)
750 {
751 	if (trie->nodes_free_next < trie->nodes_count) {
752 		uint16_t node_idx = trie->nodes_free_next++;
753 		TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
754 		return node_idx;
755 	} else {
756 		return NULL_TRIE_IDX;
757 	}
758 }
759 
760 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)761 trie_child_map_alloc(struct flow_divert_trie *trie)
762 {
763 	if (trie->child_maps_free_next < trie->child_maps_count) {
764 		return trie->child_maps_free_next++;
765 	} else {
766 		return NULL_TRIE_IDX;
767 	}
768 }
769 
770 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)771 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
772 {
773 	uint16_t start = trie->bytes_free_next;
774 	if (start + bytes_size <= trie->bytes_count) {
775 		if (start != bytes_idx) {
776 			memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
777 		}
778 		trie->bytes_free_next += bytes_size;
779 		return start;
780 	} else {
781 		return NULL_TRIE_IDX;
782 	}
783 }
784 
785 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)786 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
787 {
788 	uint16_t current = trie->root;
789 	uint16_t child = trie->root;
790 	uint16_t string_end = string_start + (uint16_t)string_len;
791 	uint16_t string_idx = string_start;
792 	uint16_t string_remainder = (uint16_t)string_len;
793 
794 	while (child != NULL_TRIE_IDX) {
795 		uint16_t parent = current;
796 		uint16_t node_idx;
797 		uint16_t current_end;
798 
799 		current = child;
800 		child = NULL_TRIE_IDX;
801 
802 		current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
803 
804 		for (node_idx = TRIE_NODE(trie, current).start;
805 		    node_idx < current_end &&
806 		    string_idx < string_end &&
807 		    TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
808 		    node_idx++, string_idx++) {
809 			;
810 		}
811 
812 		string_remainder = string_end - string_idx;
813 
814 		if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
815 			/*
816 			 * We did not reach the end of the current node's string.
817 			 * We need to split the current node into two:
818 			 *   1. A new node that contains the prefix of the node that matches
819 			 *      the prefix of the string being inserted.
820 			 *   2. The current node modified to point to the remainder
821 			 *      of the current node's string.
822 			 */
823 			uint16_t prefix = trie_node_alloc(trie);
824 			if (prefix == NULL_TRIE_IDX) {
825 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
826 				return NULL_TRIE_IDX;
827 			}
828 
829 			/*
830 			 * Prefix points to the portion of the current nodes's string that has matched
831 			 * the input string thus far.
832 			 */
833 			TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
834 			TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
835 
836 			/*
837 			 * Prefix has the current node as the child corresponding to the first byte
838 			 * after the split.
839 			 */
840 			TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
841 			if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
842 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
843 				return NULL_TRIE_IDX;
844 			}
845 			TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
846 
847 			/* Parent has the prefix as the child correspoding to the first byte in the prefix */
848 			TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
849 
850 			/* Current node is adjusted to point to the remainder */
851 			TRIE_NODE(trie, current).start = node_idx;
852 			TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
853 
854 			/* We want to insert the new leaf (if any) as a child of the prefix */
855 			current = prefix;
856 		}
857 
858 		if (string_remainder > 0) {
859 			/*
860 			 * We still have bytes in the string that have not been matched yet.
861 			 * If the current node has children, iterate to the child corresponding
862 			 * to the next byte in the string.
863 			 */
864 			if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
865 				child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
866 			}
867 		}
868 	} /* while (child != NULL_TRIE_IDX) */
869 
870 	if (string_remainder > 0) {
871 		/* Add a new leaf containing the remainder of the string */
872 		uint16_t leaf = trie_node_alloc(trie);
873 		if (leaf == NULL_TRIE_IDX) {
874 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
875 			return NULL_TRIE_IDX;
876 		}
877 
878 		TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
879 		if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
880 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
881 			return NULL_TRIE_IDX;
882 		}
883 		TRIE_NODE(trie, leaf).length = string_remainder;
884 
885 		/* Set the new leaf as the child of the current node */
886 		if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
887 			TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
888 			if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
889 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
890 				return NULL_TRIE_IDX;
891 			}
892 		}
893 		TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
894 		current = leaf;
895 	} /* else duplicate or this string is a prefix of one of the existing strings */
896 
897 	return current;
898 }
899 
900 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
901 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes __sized_by (string_bytes_count),__unused size_t string_bytes_count)902 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes __sized_by(string_bytes_count), __unused size_t string_bytes_count)
903 {
904 	uint16_t current = trie->root;
905 	uint16_t string_idx = 0;
906 
907 	while (current != NULL_TRIE_IDX) {
908 		uint16_t next = NULL_TRIE_IDX;
909 		uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
910 		uint16_t node_idx;
911 
912 		for (node_idx = TRIE_NODE(trie, current).start;
913 		    node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
914 		    node_idx++, string_idx++) {
915 			;
916 		}
917 
918 		if (node_idx == node_end) {
919 			if (string_bytes[string_idx] == '\0') {
920 				return current; /* Got an exact match */
921 			} else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
922 			    0 == strlcmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
923 				return current; /* Got an apple webclip id prefix match */
924 			} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
925 				next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
926 			}
927 		}
928 		current = next;
929 	}
930 
931 	return NULL_TRIE_IDX;
932 }
933 
934 struct uuid_search_info {
935 	uuid_t      target_uuid;
936 	char        *found_signing_id __sized_by(found_signing_id_size);
937 	boolean_t   found_multiple_signing_ids;
938 	proc_t      found_proc;
939 	size_t      found_signing_id_size;
940 };
941 
942 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)943 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
944 {
945 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
946 	int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
947 
948 	if (info->found_signing_id != NULL) {
949 		if (!info->found_multiple_signing_ids) {
950 			/* All processes that were found had the same signing identifier, so just claim this first one and be done. */
951 			info->found_proc = p;
952 			result = PROC_CLAIMED_DONE;
953 		} else {
954 			uuid_string_t uuid_str;
955 			uuid_unparse(info->target_uuid, uuid_str);
956 			FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
957 		}
958 		kfree_data_sized_by(info->found_signing_id, info->found_signing_id_size);
959 	}
960 
961 	if (result == PROC_RETURNED_DONE) {
962 		uuid_string_t uuid_str;
963 		uuid_unparse(info->target_uuid, uuid_str);
964 		FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
965 	}
966 
967 	return result;
968 }
969 
970 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)971 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
972 {
973 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
974 	int include = 0;
975 
976 	if (info->found_multiple_signing_ids) {
977 		return include;
978 	}
979 
980 	const unsigned char * p_uuid = proc_executableuuid_addr(p);
981 	include = (uuid_compare(p_uuid, info->target_uuid) == 0);
982 	if (include) {
983 		const char *signing_id __null_terminated = cs_identity_get(p);
984 		if (signing_id != NULL) {
985 			FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
986 			size_t signing_id_size = strlen(signing_id) + 1;
987 			if (info->found_signing_id == NULL) {
988 				info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
989 				info->found_signing_id_size = signing_id_size;
990 				strlcpy(info->found_signing_id, signing_id, signing_id_size);
991 			} else if (strlcmp(info->found_signing_id, signing_id, info->found_signing_id_size)) {
992 				info->found_multiple_signing_ids = TRUE;
993 			}
994 		} else {
995 			info->found_multiple_signing_ids = TRUE;
996 		}
997 		include = !info->found_multiple_signing_ids;
998 	}
999 
1000 	return include;
1001 }
1002 
1003 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)1004 flow_divert_find_proc_by_uuid(uuid_t uuid)
1005 {
1006 	struct uuid_search_info info;
1007 
1008 	if (LOG_INFO <= nil_pcb.log_level) {
1009 		uuid_string_t uuid_str;
1010 		uuid_unparse(uuid, uuid_str);
1011 		FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
1012 	}
1013 
1014 	memset(&info, 0, sizeof(info));
1015 	info.found_proc = PROC_NULL;
1016 	uuid_copy(info.target_uuid, uuid);
1017 
1018 	proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
1019 
1020 	return info.found_proc;
1021 }
1022 
1023 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet,bool is_effective)1024 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet, bool is_effective)
1025 {
1026 	int error = 0;
1027 	uint8_t *cdhash = NULL;
1028 	audit_token_t audit_token = {};
1029 	const char *proc_cs_id __null_terminated = signing_id;
1030 
1031 	proc_lock(proc);
1032 
1033 	if (proc_cs_id == NULL) {
1034 		if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1035 			proc_cs_id = cs_identity_get(proc);
1036 		} else {
1037 			FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1038 		}
1039 	}
1040 
1041 	if (is_effective) {
1042 		lck_rw_lock_shared(&fd_cb->group->lck);
1043 		if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1044 			if (proc_cs_id != NULL) {
1045 				size_t proc_cs_id_size = strlen(proc_cs_id) + 1;
1046 				uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)__unsafe_null_terminated_to_indexable(proc_cs_id), proc_cs_id_size);
1047 				if (result == NULL_TRIE_IDX) {
1048 					FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1049 					error = EPERM;
1050 				} else {
1051 					FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1052 				}
1053 			} else {
1054 				error = EPERM;
1055 			}
1056 		}
1057 		lck_rw_done(&fd_cb->group->lck);
1058 	}
1059 
1060 	if (error != 0) {
1061 		goto done;
1062 	}
1063 
1064 	/*
1065 	 * If signing_id is not NULL then it came from the flow divert token and will be added
1066 	 * as part of the token, so there is no need to add it here.
1067 	 */
1068 	if (signing_id == NULL && proc_cs_id != NULL) {
1069 		error = flow_divert_packet_append_tlv(connect_packet,
1070 		    (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1071 		    (uint32_t)strlen(proc_cs_id),
1072 		    __terminated_by_to_indexable(proc_cs_id));
1073 		if (error != 0) {
1074 			FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1075 			goto done;
1076 		}
1077 	}
1078 
1079 	cdhash = cs_get_cdhash(proc);
1080 	if (cdhash != NULL) {
1081 		error = flow_divert_packet_append_tlv(connect_packet,
1082 		    (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1083 		    SHA1_RESULTLEN,
1084 		    cdhash);
1085 		if (error) {
1086 			FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1087 			goto done;
1088 		}
1089 	} else {
1090 		FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1091 	}
1092 
1093 	task_t task __single = proc_task(proc);
1094 	if (task != TASK_NULL) {
1095 		mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1096 		kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1097 		if (rc == KERN_SUCCESS) {
1098 			int append_error = flow_divert_packet_append_tlv(connect_packet,
1099 			    (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1100 			    sizeof(audit_token_t),
1101 			    &audit_token);
1102 			if (append_error) {
1103 				FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1104 			}
1105 		}
1106 	}
1107 
1108 done:
1109 	proc_unlock(proc);
1110 
1111 	return error;
1112 }
1113 
1114 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet)1115 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet)
1116 {
1117 	int error = 0;
1118 	proc_t effective_proc = PROC_NULL;
1119 	proc_t responsible_proc = PROC_NULL;
1120 	proc_t real_proc = proc_find(so->last_pid);
1121 	bool release_real_proc = true;
1122 
1123 	proc_t src_proc = PROC_NULL;
1124 	proc_t real_src_proc = PROC_NULL;
1125 
1126 	if (real_proc == PROC_NULL) {
1127 		FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1128 		release_real_proc = false;
1129 		real_proc = proc;
1130 		if (real_proc == PROC_NULL) {
1131 			real_proc = current_proc();
1132 		}
1133 	}
1134 
1135 	if (so->so_flags & SOF_DELEGATED) {
1136 		if (proc_getpid(real_proc) != so->e_pid) {
1137 			effective_proc = proc_find(so->e_pid);
1138 		} else {
1139 			const unsigned char * real_proc_uuid = proc_executableuuid_addr(real_proc);
1140 			if (uuid_compare(real_proc_uuid, so->e_uuid)) {
1141 				effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1142 			}
1143 		}
1144 	}
1145 
1146 #if defined(XNU_TARGET_OS_OSX)
1147 	lck_rw_lock_shared(&fd_cb->group->lck);
1148 	if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1149 		if (so->so_rpid > 0) {
1150 			responsible_proc = proc_find(so->so_rpid);
1151 		}
1152 	}
1153 	lck_rw_done(&fd_cb->group->lck);
1154 #endif
1155 
1156 	real_src_proc = real_proc;
1157 
1158 	if (responsible_proc != PROC_NULL) {
1159 		src_proc = responsible_proc;
1160 		if (effective_proc != NULL) {
1161 			real_src_proc = effective_proc;
1162 		}
1163 	} else if (effective_proc != PROC_NULL) {
1164 		src_proc = effective_proc;
1165 	} else {
1166 		src_proc = real_proc;
1167 	}
1168 
1169 	error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1170 	if (error != 0) {
1171 		goto done;
1172 	}
1173 
1174 	if (real_src_proc != NULL && real_src_proc != src_proc) {
1175 		error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1176 		if (error != 0) {
1177 			goto done;
1178 		}
1179 	}
1180 
1181 done:
1182 	if (responsible_proc != PROC_NULL) {
1183 		proc_rele(responsible_proc);
1184 	}
1185 
1186 	if (effective_proc != PROC_NULL) {
1187 		proc_rele(effective_proc);
1188 	}
1189 
1190 	if (real_proc != PROC_NULL && release_real_proc) {
1191 		proc_rele(real_proc);
1192 	}
1193 
1194 	return error;
1195 }
1196 
1197 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet)1198 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet)
1199 {
1200 	int             error;
1201 
1202 	if (fd_cb->group == NULL) {
1203 		FDLOG0(LOG_INFO, fd_cb, "no provider, cannot send packet");
1204 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, false);
1205 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1206 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1207 			error = ECONNABORTED;
1208 		} else {
1209 			error = EHOSTUNREACH;
1210 		}
1211 		fd_cb->so->so_error = (uint16_t)error;
1212 		return error;
1213 	}
1214 
1215 	lck_rw_lock_shared(&fd_cb->group->lck);
1216 
1217 	if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1218 		error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1219 		if (error) {
1220 			FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_send_packet: ctl_enqueuembuf returned an error: %d", error);
1221 		}
1222 	} else {
1223 		error = ENOBUFS;
1224 	}
1225 
1226 	if (error == ENOBUFS) {
1227 		if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1228 			lck_rw_lock_exclusive(&fd_cb->group->lck);
1229 		}
1230 		MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1231 		error = 0;
1232 		OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1233 	}
1234 
1235 	lck_rw_done(&fd_cb->group->lck);
1236 
1237 	return error;
1238 }
1239 
1240 static void
flow_divert_append_domain_name(char * domain_name __null_terminated,void * ctx)1241 flow_divert_append_domain_name(char *domain_name __null_terminated, void *ctx)
1242 {
1243 	mbuf_ref_t packet = (mbuf_ref_t)ctx;
1244 	size_t domain_name_length = 0;
1245 
1246 	if (packet == NULL || domain_name == NULL) {
1247 		return;
1248 	}
1249 
1250 	domain_name_length = strlen(domain_name);
1251 	if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1252 		int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, __terminated_by_to_indexable(domain_name));
1253 		if (error) {
1254 			FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1255 		}
1256 	}
1257 }
1258 
1259 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_ref_t * out_connect_packet)1260 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_ref_t *out_connect_packet)
1261 {
1262 	int                     error           = 0;
1263 	int                     flow_type       = 0;
1264 	char *                  signing_id __indexable = NULL;
1265 	uint32_t                sid_size        = 0;
1266 	mbuf_ref_t              connect_packet  = NULL;
1267 	cfil_sock_id_t          cfil_sock_id    = CFIL_SOCK_ID_NONE;
1268 	const void              *cfil_id        = NULL;
1269 	size_t                  cfil_id_size    = 0;
1270 	struct inpcb            *inp            = sotoinpcb(so);
1271 	struct ifnet            *ifp            = NULL;
1272 	uint32_t                flags           = 0;
1273 
1274 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1275 	if (error) {
1276 		goto done;
1277 	}
1278 
1279 	if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1280 		int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1281 		if (find_error == 0 && sid_size > 0) {
1282 			signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1283 			if (signing_id != NULL) {
1284 				flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1285 				FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1286 			}
1287 		}
1288 	}
1289 
1290 	// TODO: remove ternary operator after rdar://121487109 is fixed
1291 	error = flow_divert_add_all_proc_info(fd_cb, so, p, NULL == signing_id ? NULL : __unsafe_null_terminated_from_indexable(signing_id), connect_packet);
1292 
1293 	if (signing_id != NULL) {
1294 		kfree_data(signing_id, sid_size + 1);
1295 	}
1296 
1297 	if (error) {
1298 		FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1299 		goto done;
1300 	}
1301 
1302 	error = flow_divert_packet_append_tlv(connect_packet,
1303 	    FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1304 	    sizeof(fd_cb->so->so_traffic_class),
1305 	    &fd_cb->so->so_traffic_class);
1306 	if (error) {
1307 		goto done;
1308 	}
1309 
1310 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1311 		flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1312 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1313 		flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1314 	} else {
1315 		error = EINVAL;
1316 		goto done;
1317 	}
1318 	error = flow_divert_packet_append_tlv(connect_packet,
1319 	    FLOW_DIVERT_TLV_FLOW_TYPE,
1320 	    sizeof(flow_type),
1321 	    &flow_type);
1322 
1323 	if (error) {
1324 		goto done;
1325 	}
1326 
1327 	if (fd_cb->connect_token != NULL) {
1328 		unsigned int token_len = m_length(fd_cb->connect_token);
1329 		mbuf_concatenate(connect_packet, fd_cb->connect_token);
1330 		mbuf_pkthdr_adjustlen(connect_packet, token_len);
1331 		fd_cb->connect_token = NULL;
1332 	} else {
1333 		error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1334 		if (error) {
1335 			goto done;
1336 		}
1337 
1338 		necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1339 	}
1340 
1341 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1342 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1343 		if (error) {
1344 			goto done;
1345 		}
1346 	}
1347 
1348 	if (inp->inp_vflag & INP_IPV4) {
1349 		ifp = inp->inp_last_outifp;
1350 	} else if (inp->inp_vflag & INP_IPV6) {
1351 		ifp = inp->in6p_last_outifp;
1352 	}
1353 	if ((inp->inp_flags & INP_BOUND_IF) ||
1354 	    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1355 	    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1356 		flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1357 		if (ifp == NULL) {
1358 			ifp = inp->inp_boundifp;
1359 		}
1360 	}
1361 	if (ifp != NULL) {
1362 		uint32_t flow_if_index = ifp->if_index;
1363 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1364 		    sizeof(flow_if_index), &flow_if_index);
1365 		if (error) {
1366 			goto done;
1367 		}
1368 	}
1369 
1370 	if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1371 		flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1372 	}
1373 
1374 	if (flags != 0) {
1375 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1376 		if (error) {
1377 			goto done;
1378 		}
1379 	}
1380 
1381 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
1382 		cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1383 	} else {
1384 		cfil_sock_id = cfil_sock_id_from_socket(so);
1385 	}
1386 
1387 	if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1388 		cfil_id = &cfil_sock_id;
1389 		cfil_id_size = sizeof(cfil_sock_id);
1390 	} else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1391 		cfil_id = &inp->necp_client_uuid;
1392 		cfil_id_size = sizeof(inp->necp_client_uuid);
1393 	}
1394 
1395 	if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1396 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1397 		if (error) {
1398 			goto done;
1399 		}
1400 	}
1401 
1402 done:
1403 	if (!error) {
1404 		*out_connect_packet = connect_packet;
1405 	} else if (connect_packet != NULL) {
1406 		mbuf_freem(connect_packet);
1407 	}
1408 
1409 	return error;
1410 }
1411 
1412 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1413 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1414 {
1415 	int             error                   = 0;
1416 	mbuf_ref_t      connect_packet          = fd_cb->connect_packet;
1417 	mbuf_ref_t      saved_connect_packet    = NULL;
1418 
1419 	if (connect_packet != NULL) {
1420 		error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1421 		if (error) {
1422 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1423 			goto done;
1424 		}
1425 
1426 		error = flow_divert_send_packet(fd_cb, connect_packet);
1427 		if (error) {
1428 			goto done;
1429 		}
1430 
1431 		fd_cb->connect_packet = saved_connect_packet;
1432 		saved_connect_packet = NULL;
1433 	} else {
1434 		error = ENOENT;
1435 	}
1436 done:
1437 	if (saved_connect_packet != NULL) {
1438 		mbuf_freem(saved_connect_packet);
1439 	}
1440 
1441 	return error;
1442 }
1443 
1444 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1445 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1446 {
1447 	int             error       = 0;
1448 	mbuf_ref_t      packet      = NULL;
1449 	int             rbuff_space = 0;
1450 
1451 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1452 	if (error) {
1453 		FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1454 		goto done;
1455 	}
1456 
1457 	rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1458 	if (rbuff_space < 0) {
1459 		rbuff_space = 0;
1460 	}
1461 	rbuff_space = htonl(rbuff_space);
1462 	error = flow_divert_packet_append_tlv(packet,
1463 	    FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1464 	    sizeof(rbuff_space),
1465 	    &rbuff_space);
1466 	if (error) {
1467 		goto done;
1468 	}
1469 
1470 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1471 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1472 		if (error) {
1473 			goto done;
1474 		}
1475 	}
1476 
1477 	error = flow_divert_send_packet(fd_cb, packet);
1478 	if (error) {
1479 		goto done;
1480 	}
1481 
1482 done:
1483 	if (error && packet != NULL) {
1484 		mbuf_freem(packet);
1485 	}
1486 
1487 	return error;
1488 }
1489 
1490 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1491 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1492 {
1493 	int         error   = 0;
1494 	mbuf_ref_t  packet  = NULL;
1495 	uint32_t    zero    = 0;
1496 
1497 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1498 	if (error) {
1499 		FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1500 		goto done;
1501 	}
1502 
1503 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1504 	if (error) {
1505 		FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1506 		goto done;
1507 	}
1508 
1509 	how = htonl(how);
1510 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1511 	if (error) {
1512 		FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1513 		goto done;
1514 	}
1515 
1516 	error = flow_divert_send_packet(fd_cb, packet);
1517 	if (error) {
1518 		goto done;
1519 	}
1520 
1521 done:
1522 	if (error && packet != NULL) {
1523 		mbuf_free(packet);
1524 	}
1525 
1526 	return error;
1527 }
1528 
1529 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1530 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1531 {
1532 	if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1533 	    (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1534 		return SHUT_RDWR;
1535 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1536 		return SHUT_RD;
1537 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1538 		return SHUT_WR;
1539 	}
1540 
1541 	return -1;
1542 }
1543 
1544 /*
1545  * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1546  * writes. Returns FALSE otherwise.
1547  */
1548 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1549 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1550 {
1551 	int             how             = -1;
1552 
1553 	/* Do not send any close messages if there is still data in the send buffer */
1554 	if (fd_cb->so->so_snd.sb_cc == 0) {
1555 		if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1556 			/* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1557 			how = SHUT_RD;
1558 		}
1559 		if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1560 			/* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1561 			if (how == SHUT_RD) {
1562 				how = SHUT_RDWR;
1563 			} else {
1564 				how = SHUT_WR;
1565 			}
1566 		}
1567 	}
1568 
1569 	if (how != -1) {
1570 		FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1571 		if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1572 			/* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1573 			if (how != SHUT_RD) {
1574 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1575 			}
1576 			if (how != SHUT_WR) {
1577 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1578 			}
1579 		}
1580 	}
1581 
1582 	if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
1583 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1584 	}
1585 }
1586 
1587 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len)1588 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len)
1589 {
1590 	mbuf_ref_t  packet = NULL;
1591 	mbuf_ref_t  last   = NULL;
1592 	int         error  = 0;
1593 
1594 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1595 	if (error || packet == NULL) {
1596 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1597 		goto done;
1598 	}
1599 
1600 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1601 		last = m_last(packet);
1602 		mbuf_setnext(last, data);
1603 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1604 	} else {
1605 		data_len = 0;
1606 	}
1607 	error = flow_divert_send_packet(fd_cb, packet);
1608 	if (error == 0 && data_len > 0) {
1609 		fd_cb->bytes_sent += data_len;
1610 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1611 	}
1612 
1613 done:
1614 	if (error) {
1615 		if (last != NULL) {
1616 			mbuf_setnext(last, NULL);
1617 		}
1618 		if (packet != NULL) {
1619 			mbuf_freem(packet);
1620 		}
1621 	}
1622 
1623 	return error;
1624 }
1625 
1626 static errno_t
flow_divert_send_datagram_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len,struct sockaddr * toaddr,Boolean is_fragment,size_t datagram_size)1627 flow_divert_send_datagram_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len, struct sockaddr *toaddr, Boolean is_fragment, size_t datagram_size)
1628 {
1629 	mbuf_ref_t  packet = NULL;
1630 	mbuf_ref_t  last   = NULL;
1631 	int         error  = 0;
1632 
1633 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1634 	if (error || packet == NULL) {
1635 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1636 		goto done;
1637 	}
1638 
1639 	if (toaddr != NULL) {
1640 		error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1641 		if (error) {
1642 			FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1643 			goto done;
1644 		}
1645 	}
1646 	if (is_fragment) {
1647 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_IS_FRAGMENT, sizeof(is_fragment), &is_fragment);
1648 		if (error) {
1649 			FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_IS_FRAGMENT) failed: %d", error);
1650 			goto done;
1651 		}
1652 	}
1653 
1654 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_DATAGRAM_SIZE, sizeof(datagram_size), &datagram_size);
1655 	if (error) {
1656 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_DATAGRAM_SIZE) failed: %d", error);
1657 		goto done;
1658 	}
1659 
1660 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1661 		last = m_last(packet);
1662 		mbuf_setnext(last, data);
1663 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1664 	} else {
1665 		data_len = 0;
1666 	}
1667 	error = flow_divert_send_packet(fd_cb, packet);
1668 	if (error == 0 && data_len > 0) {
1669 		fd_cb->bytes_sent += data_len;
1670 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1671 	}
1672 
1673 done:
1674 	if (error) {
1675 		if (last != NULL) {
1676 			mbuf_setnext(last, NULL);
1677 		}
1678 		if (packet != NULL) {
1679 			mbuf_freem(packet);
1680 		}
1681 	}
1682 
1683 	return error;
1684 }
1685 
1686 static errno_t
flow_divert_send_fragmented_datagram(struct flow_divert_pcb * fd_cb,mbuf_ref_t datagram,size_t datagram_len,struct sockaddr * toaddr)1687 flow_divert_send_fragmented_datagram(struct flow_divert_pcb *fd_cb, mbuf_ref_t datagram, size_t datagram_len, struct sockaddr *toaddr)
1688 {
1689 	mbuf_ref_t  next_data       = datagram;
1690 	size_t      remaining_len   = datagram_len;
1691 	mbuf_ref_t  remaining_data  = NULL;
1692 	int         error           = 0;
1693 	bool        first           = true;
1694 
1695 	while (remaining_len > 0 && next_data != NULL) {
1696 		size_t to_send = remaining_len;
1697 		remaining_data = NULL;
1698 
1699 		if (to_send > FLOW_DIVERT_CHUNK_SIZE) {
1700 			to_send = FLOW_DIVERT_CHUNK_SIZE;
1701 			error = mbuf_split(next_data, to_send, MBUF_DONTWAIT, &remaining_data);
1702 			if (error) {
1703 				break;
1704 			}
1705 		}
1706 
1707 		error = flow_divert_send_datagram_packet(fd_cb, next_data, to_send, (first ? toaddr : NULL), TRUE, (first ? datagram_len : 0));
1708 		if (error) {
1709 			break;
1710 		}
1711 
1712 		first = false;
1713 		remaining_len -= to_send;
1714 		next_data = remaining_data;
1715 	}
1716 
1717 	if (error) {
1718 		if (next_data != NULL) {
1719 			mbuf_freem(next_data);
1720 		}
1721 		if (remaining_data != NULL) {
1722 			mbuf_freem(remaining_data);
1723 		}
1724 	}
1725 	return error;
1726 }
1727 
1728 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1729 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1730 {
1731 	size_t      to_send;
1732 	size_t      sent    = 0;
1733 	int         error   = 0;
1734 	mbuf_ref_t  buffer;
1735 
1736 	to_send = fd_cb->so->so_snd.sb_cc;
1737 	buffer = fd_cb->so->so_snd.sb_mb;
1738 
1739 	if (buffer == NULL && to_send > 0) {
1740 		FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1741 		return;
1742 	}
1743 
1744 	/* Ignore the send window if force is enabled */
1745 	if (!force && (to_send > fd_cb->send_window)) {
1746 		to_send = fd_cb->send_window;
1747 	}
1748 
1749 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1750 		while (sent < to_send) {
1751 			mbuf_ref_t  data;
1752 			size_t      data_len;
1753 
1754 			data_len = to_send - sent;
1755 			if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1756 				data_len = FLOW_DIVERT_CHUNK_SIZE;
1757 			}
1758 
1759 			error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1760 			if (error) {
1761 				FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1762 				break;
1763 			}
1764 
1765 			error = flow_divert_send_data_packet(fd_cb, data, data_len);
1766 			if (error) {
1767 				if (data != NULL) {
1768 					mbuf_freem(data);
1769 				}
1770 				break;
1771 			}
1772 
1773 			sent += data_len;
1774 		}
1775 		sbdrop(&fd_cb->so->so_snd, (int)sent);
1776 		sowwakeup(fd_cb->so);
1777 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1778 		mbuf_ref_t  data;
1779 		mbuf_ref_t  m;
1780 		size_t      data_len;
1781 
1782 		while (buffer) {
1783 			struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1784 
1785 			m = buffer;
1786 			if (toaddr != NULL) {
1787 				/* look for data in the chain */
1788 				do {
1789 					m = m->m_next;
1790 					if (m != NULL && m->m_type == MT_DATA) {
1791 						break;
1792 					}
1793 				} while (m);
1794 				if (m == NULL) {
1795 					/* unexpected */
1796 					FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1797 					goto move_on;
1798 				}
1799 			}
1800 			data_len = mbuf_pkthdr_len(m);
1801 			if (data_len > 0) {
1802 				FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1803 				error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1804 				if (error) {
1805 					FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1806 					break;
1807 				}
1808 			} else {
1809 				data = NULL;
1810 			}
1811 			if (data_len <= FLOW_DIVERT_CHUNK_SIZE) {
1812 				error = flow_divert_send_datagram_packet(fd_cb, data, data_len, toaddr, FALSE, 0);
1813 			} else {
1814 				error = flow_divert_send_fragmented_datagram(fd_cb, data, data_len, toaddr);
1815 				data = NULL;
1816 			}
1817 			if (error) {
1818 				if (data != NULL) {
1819 					mbuf_freem(data);
1820 				}
1821 				break;
1822 			}
1823 			sent += data_len;
1824 move_on:
1825 			buffer = buffer->m_nextpkt;
1826 			(void) sbdroprecord(&(fd_cb->so->so_snd));
1827 		}
1828 	}
1829 
1830 	if (sent > 0) {
1831 		FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1832 		if (fd_cb->send_window >= sent) {
1833 			fd_cb->send_window -= sent;
1834 		} else {
1835 			fd_cb->send_window = 0;
1836 		}
1837 	}
1838 }
1839 
1840 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_size,struct sockaddr * toaddr)1841 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_size, struct sockaddr *toaddr)
1842 {
1843 	size_t to_send = data_size;
1844 	int error = 0;
1845 
1846 	if (to_send > fd_cb->send_window) {
1847 		to_send = fd_cb->send_window;
1848 	}
1849 
1850 	if (fd_cb->so->so_snd.sb_cc > 0) {
1851 		to_send = 0;    /* If the send buffer is non-empty, then we can't send anything */
1852 	}
1853 
1854 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1855 		size_t sent = 0;
1856 		mbuf_ref_t remaining_data = data;
1857 		size_t remaining_size = data_size;
1858 		mbuf_ref_t pkt_data = NULL;
1859 		while (sent < to_send && remaining_data != NULL && remaining_size > 0) {
1860 			size_t  pkt_data_len;
1861 
1862 			pkt_data = remaining_data;
1863 
1864 			if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1865 				pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1866 			} else {
1867 				pkt_data_len = to_send - sent;
1868 			}
1869 
1870 			if (pkt_data_len < remaining_size) {
1871 				error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1872 				if (error) {
1873 					FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1874 					pkt_data = NULL;
1875 					break;
1876 				}
1877 				remaining_size -= pkt_data_len;
1878 			} else {
1879 				remaining_data = NULL;
1880 				remaining_size = 0;
1881 			}
1882 
1883 			error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len);
1884 			if (error) {
1885 				break;
1886 			}
1887 
1888 			pkt_data = NULL;
1889 			sent += pkt_data_len;
1890 		}
1891 
1892 		if (fd_cb->send_window >= sent) {
1893 			fd_cb->send_window -= sent;
1894 		} else {
1895 			fd_cb->send_window = 0;
1896 		}
1897 
1898 		error = 0;
1899 
1900 		if (pkt_data != NULL) {
1901 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1902 				if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1903 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1904 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1905 				}
1906 			} else {
1907 				mbuf_freem(pkt_data);
1908 				error = ENOBUFS;
1909 			}
1910 		}
1911 
1912 		if (remaining_data != NULL) {
1913 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1914 				if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1915 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1916 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1917 				}
1918 			} else {
1919 				mbuf_freem(remaining_data);
1920 				error = ENOBUFS;
1921 			}
1922 		}
1923 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1924 		int send_dgram_error = 0;
1925 		if (to_send || data_size == 0) {
1926 			if (data_size <= FLOW_DIVERT_CHUNK_SIZE) {
1927 				send_dgram_error = flow_divert_send_datagram_packet(fd_cb, data, data_size, toaddr, FALSE, 0);
1928 			} else {
1929 				send_dgram_error = flow_divert_send_fragmented_datagram(fd_cb, data, data_size, toaddr);
1930 				data = NULL;
1931 			}
1932 			if (send_dgram_error) {
1933 				FDLOG(LOG_NOTICE, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu", send_dgram_error, data_size);
1934 			} else {
1935 				if (data_size >= fd_cb->send_window) {
1936 					fd_cb->send_window = 0;
1937 				} else {
1938 					fd_cb->send_window -= data_size;
1939 				}
1940 				data = NULL;
1941 			}
1942 		}
1943 
1944 		if (data != NULL) {
1945 			/* buffer it */
1946 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1947 				if (toaddr != NULL) {
1948 					int append_error = 0;
1949 					if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &append_error)) {
1950 						FDLOG(LOG_ERR, fd_cb,
1951 						    "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d",
1952 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, append_error);
1953 					}
1954 				} else {
1955 					if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1956 						FDLOG(LOG_ERR, fd_cb,
1957 						    "sbappendrecord failed. send buffer size = %u, send_window = %u",
1958 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1959 					}
1960 				}
1961 			} else {
1962 				FDLOG(LOG_ERR, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu, dropping the datagram", error, data_size);
1963 				mbuf_freem(data);
1964 			}
1965 		}
1966 	}
1967 
1968 	return error;
1969 }
1970 
1971 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1972 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1973 {
1974 	int         error  = 0;
1975 	mbuf_ref_t  packet = NULL;
1976 
1977 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1978 	if (error) {
1979 		FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1980 		goto done;
1981 	}
1982 
1983 	error = flow_divert_send_packet(fd_cb, packet);
1984 	if (error) {
1985 		goto done;
1986 	}
1987 
1988 done:
1989 	if (error && packet != NULL) {
1990 		mbuf_free(packet);
1991 	}
1992 
1993 	return error;
1994 }
1995 
1996 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)1997 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
1998 {
1999 	int         error  = 0;
2000 	mbuf_ref_t  packet = NULL;
2001 
2002 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
2003 	if (error) {
2004 		FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
2005 		goto done;
2006 	}
2007 
2008 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
2009 	if (error) {
2010 		FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
2011 		goto done;
2012 	}
2013 
2014 	error = flow_divert_send_packet(fd_cb, packet);
2015 	if (error) {
2016 		goto done;
2017 	}
2018 
2019 done:
2020 	if (error && packet != NULL) {
2021 		mbuf_free(packet);
2022 	}
2023 
2024 	return error;
2025 }
2026 
2027 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)2028 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
2029 {
2030 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2031 
2032 	if (local_endpoint->sa_family == AF_INET6) {
2033 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2034 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2035 			inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
2036 			inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
2037 			in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
2038 		}
2039 		if (inp->inp_lport == 0) {
2040 			inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
2041 		}
2042 	} else if (local_endpoint->sa_family == AF_INET) {
2043 		if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2044 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2045 			inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
2046 		}
2047 		if (inp->inp_lport == 0) {
2048 			inp->inp_lport = (satosin(local_endpoint))->sin_port;
2049 		}
2050 	}
2051 }
2052 
2053 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)2054 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
2055 {
2056 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2057 
2058 	if (remote_endpoint->sa_family == AF_INET6) {
2059 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
2060 			inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
2061 			inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
2062 			in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
2063 		}
2064 		if (inp->inp_fport == 0) {
2065 			inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
2066 		}
2067 	} else if (remote_endpoint->sa_family == AF_INET) {
2068 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
2069 			inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
2070 		}
2071 		if (inp->inp_fport == 0) {
2072 			inp->inp_fport = (satosin(remote_endpoint))->sin_port;
2073 		}
2074 	}
2075 }
2076 
2077 static uint32_t
flow_divert_derive_kernel_control_unit(pid_t pid,uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)2078 flow_divert_derive_kernel_control_unit(pid_t pid, uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
2079 {
2080 	uint32_t result = *ctl_unit;
2081 
2082 	// There are two models supported for deriving control units:
2083 	// 1. A series of flow divert units that allow "transparently" failing
2084 	//    over to the next unit. For this model, the aggregate_unit contains list
2085 	//    of all control units (between 1 and 30) masked over each other.
2086 	// 2. An indication that in-process flow divert should be preferred, with
2087 	//    an out of process flow divert to fail over to. For this model, the
2088 	//    ctl_unit is FLOW_DIVERT_IN_PROCESS_UNIT. In this case, that unit
2089 	//    is returned first, with the unpacked aggregate unit returned as a
2090 	//    fallback.
2091 	*is_aggregate = false;
2092 	if (*ctl_unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
2093 		bool found_unit = false;
2094 		if (pid != 0) {
2095 			// Look for an in-process group that is already open, and use that unit
2096 			struct flow_divert_group *group = NULL;
2097 			TAILQ_FOREACH(group, &g_flow_divert_in_process_group_list, chain) {
2098 				if (group->in_process_pid == pid) {
2099 					// Found an in-process group for our same PID, use it
2100 					found_unit = true;
2101 					result = group->ctl_unit;
2102 					break;
2103 				}
2104 			}
2105 
2106 			// If an in-process group isn't open yet, send a signal up through NECP to request one
2107 			if (!found_unit) {
2108 				necp_client_request_in_process_flow_divert(pid);
2109 			}
2110 		}
2111 
2112 		// If a unit was found, return it
2113 		if (found_unit) {
2114 			if (aggregate_unit != NULL && *aggregate_unit != 0) {
2115 				*is_aggregate = true;
2116 			}
2117 			// The next time around, the aggregate unit values will be picked up
2118 			*ctl_unit = 0;
2119 			return result;
2120 		}
2121 
2122 		// If no unit was found, fall through and clear out the ctl_unit
2123 		result = 0;
2124 		*ctl_unit = 0;
2125 	}
2126 
2127 	if (aggregate_unit != NULL && *aggregate_unit != 0) {
2128 		uint32_t counter;
2129 		struct flow_divert_group *lower_order_group = NULL;
2130 
2131 		for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
2132 			if ((*aggregate_unit) & (1 << counter)) {
2133 				struct flow_divert_group *group = NULL;
2134 				group = flow_divert_group_lookup(counter + 1, NULL);
2135 
2136 				if (group != NULL) {
2137 					if (lower_order_group == NULL) {
2138 						lower_order_group = group;
2139 					} else if ((group->order < lower_order_group->order)) {
2140 						lower_order_group = group;
2141 					}
2142 				}
2143 			}
2144 		}
2145 
2146 		if (lower_order_group != NULL) {
2147 			*aggregate_unit &= ~(1 << (lower_order_group->ctl_unit - 1));
2148 			*is_aggregate = true;
2149 			return lower_order_group->ctl_unit;
2150 		} else {
2151 			*ctl_unit = 0;
2152 			return result;
2153 		}
2154 	} else {
2155 		*ctl_unit = 0;
2156 		return result;
2157 	}
2158 }
2159 
2160 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)2161 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
2162 {
2163 	int error = 0;
2164 	uint32_t policy_control_unit = fd_cb->policy_control_unit;
2165 
2166 	flow_divert_pcb_remove(fd_cb);
2167 
2168 	do {
2169 		struct flow_divert_group *next_group = NULL;
2170 		bool is_aggregate = false;
2171 		uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(0, &policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
2172 
2173 		if (fd_cb->control_group_unit == next_ctl_unit) {
2174 			FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
2175 			error = EALREADY;
2176 			break;
2177 		}
2178 
2179 		if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
2180 			FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
2181 			error = ENOENT;
2182 			break;
2183 		}
2184 
2185 		next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
2186 		if (next_group == NULL) {
2187 			FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
2188 			continue;
2189 		}
2190 
2191 		FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
2192 
2193 		error = flow_divert_pcb_insert(fd_cb, next_group);
2194 		if (error == 0) {
2195 			if (is_aggregate) {
2196 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2197 			} else {
2198 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2199 			}
2200 		}
2201 		FDGRP_RELEASE(next_group);
2202 	} while (fd_cb->group == NULL);
2203 
2204 	if (fd_cb->group == NULL) {
2205 		return error ? error : ENOENT;
2206 	}
2207 
2208 	error = flow_divert_send_connect_packet(fd_cb);
2209 	if (error) {
2210 		FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2211 		flow_divert_pcb_remove(fd_cb);
2212 		error = ENOENT;
2213 	}
2214 
2215 	return error;
2216 }
2217 
2218 static void
flow_divert_disable(struct flow_divert_pcb * fd_cb)2219 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2220 {
2221 	struct socket *so = NULL;
2222 	mbuf_ref_t buffer;
2223 	int error = 0;
2224 	proc_t last_proc = NULL;
2225 	struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2226 	bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2227 	struct inpcb *inp = NULL;
2228 
2229 	so = fd_cb->so;
2230 	if (so == NULL) {
2231 		goto done;
2232 	}
2233 
2234 	FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2235 
2236 	/* Restore the IP state */
2237 	inp = sotoinpcb(so);
2238 	inp->inp_vflag = fd_cb->original_vflag;
2239 	inp->inp_faddr.s_addr = INADDR_ANY;
2240 	inp->inp_fport = 0;
2241 	memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2242 	inp->inp_fifscope = IFSCOPE_NONE;
2243 	inp->in6p_fport = 0;
2244 	/* If flow divert set the local address, clear it out */
2245 	if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2246 		inp->inp_laddr.s_addr = INADDR_ANY;
2247 		memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2248 		inp->inp_lifscope = IFSCOPE_NONE;
2249 	}
2250 	inp->inp_last_outifp = fd_cb->original_last_outifp;
2251 	inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2252 
2253 	/* Dis-associate the socket */
2254 	so->so_flags &= ~SOF_FLOW_DIVERT;
2255 	so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2256 	so->so_fd_pcb = NULL;
2257 	fd_cb->so = NULL;
2258 
2259 	FDRELEASE(fd_cb); /* Release the socket's reference */
2260 
2261 	/* Revert back to the original protocol */
2262 	so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2263 
2264 	/* Reset the socket state to avoid confusing NECP */
2265 	so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2266 
2267 	last_proc = proc_find(so->last_pid);
2268 
2269 	if (do_connect) {
2270 		/* Connect using the original protocol */
2271 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2272 		if (error) {
2273 			FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2274 			goto done;
2275 		}
2276 	}
2277 
2278 	buffer = so->so_snd.sb_mb;
2279 	if (buffer == NULL) {
2280 		/* No buffered data, done */
2281 		goto done;
2282 	}
2283 
2284 	/* Send any buffered data using the original protocol */
2285 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2286 		mbuf_ref_t  data_to_send = NULL;
2287 		size_t      data_len     = so->so_snd.sb_cc;
2288 
2289 		error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2290 		if (error) {
2291 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2292 			goto done;
2293 		}
2294 
2295 		sbflush(&so->so_snd);
2296 
2297 		if (data_to_send->m_flags & M_PKTHDR) {
2298 			mbuf_pkthdr_setlen(data_to_send, data_len);
2299 		}
2300 
2301 		error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2302 		    0,
2303 		    data_to_send,
2304 		    NULL,
2305 		    NULL,
2306 		    (last_proc != NULL ? last_proc : current_proc()));
2307 
2308 		if (error && error != EWOULDBLOCK) {
2309 			FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2310 		} else {
2311 			error = 0;
2312 		}
2313 	} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2314 		struct sockbuf *sb = &so->so_snd;
2315 		MBUFQ_HEAD(send_queue_head) send_queue;
2316 		MBUFQ_INIT(&send_queue);
2317 
2318 		/* Flush the send buffer, moving all records to a temporary queue */
2319 		while (sb->sb_mb != NULL) {
2320 			mbuf_ref_t record = sb->sb_mb;
2321 			mbuf_ref_t m = record;
2322 			sb->sb_mb = sb->sb_mb->m_nextpkt;
2323 			while (m != NULL) {
2324 				sbfree(sb, m);
2325 				m = m->m_next;
2326 			}
2327 			record->m_nextpkt = NULL;
2328 			MBUFQ_ENQUEUE(&send_queue, record);
2329 		}
2330 		SB_EMPTY_FIXUP(sb);
2331 
2332 		while (!MBUFQ_EMPTY(&send_queue)) {
2333 			mbuf_ref_t next_record = MBUFQ_FIRST(&send_queue);
2334 			mbuf_ref_t addr = NULL;
2335 			mbuf_ref_t control = NULL;
2336 			mbuf_ref_t last_control = NULL;
2337 			mbuf_ref_t data = NULL;
2338 			mbuf_ref_t m = next_record;
2339 			struct sockaddr *to_endpoint = NULL;
2340 
2341 			MBUFQ_DEQUEUE(&send_queue, next_record);
2342 
2343 			while (m != NULL) {
2344 				if (m->m_type == MT_SONAME) {
2345 					addr = m;
2346 				} else if (m->m_type == MT_CONTROL) {
2347 					if (control == NULL) {
2348 						control = m;
2349 					}
2350 					last_control = m;
2351 				} else if (m->m_type == MT_DATA) {
2352 					data = m;
2353 					break;
2354 				}
2355 				m = m->m_next;
2356 			}
2357 
2358 			if (addr != NULL && !do_connect) {
2359 				to_endpoint = flow_divert_get_buffered_target_address(addr);
2360 				if (to_endpoint == NULL) {
2361 					FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2362 				}
2363 			}
2364 
2365 			if (data == NULL) {
2366 				FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2367 				mbuf_freem(next_record);
2368 				continue;
2369 			}
2370 
2371 			if (!(data->m_flags & M_PKTHDR)) {
2372 				FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2373 				mbuf_freem(next_record);
2374 				continue;
2375 			}
2376 
2377 			if (addr != NULL) {
2378 				addr->m_next = NULL;
2379 			}
2380 
2381 			if (last_control != NULL) {
2382 				last_control->m_next = NULL;
2383 			}
2384 
2385 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2386 			    0,
2387 			    data,
2388 			    to_endpoint,
2389 			    control,
2390 			    (last_proc != NULL ? last_proc : current_proc()));
2391 
2392 			if (addr != NULL) {
2393 				mbuf_freem(addr);
2394 			}
2395 
2396 			if (error) {
2397 				FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2398 			}
2399 		}
2400 	}
2401 done:
2402 	if (last_proc != NULL) {
2403 		proc_rele(last_proc);
2404 	}
2405 
2406 	if (error && so != NULL) {
2407 		so->so_error = (uint16_t)error;
2408 		flow_divert_disconnect_socket(so, do_connect, false);
2409 	}
2410 }
2411 
2412 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2413 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2414 {
2415 	struct socket           *so             = NULL;
2416 	struct inpcb            *inp            = NULL;
2417 	struct ifnet            *current_ifp    = NULL;
2418 	struct ifnet * __single new_ifp         = NULL;
2419 	int                     error           = 0;
2420 
2421 	so = fd_cb->so;
2422 	if (so == NULL) {
2423 		return;
2424 	}
2425 
2426 	inp = sotoinpcb(so);
2427 
2428 	if (out_if_index <= 0) {
2429 		return;
2430 	}
2431 
2432 	if (inp->inp_vflag & INP_IPV6) {
2433 		current_ifp = inp->in6p_last_outifp;
2434 	} else {
2435 		current_ifp = inp->inp_last_outifp;
2436 	}
2437 
2438 	if (current_ifp != NULL) {
2439 		if (current_ifp->if_index == out_if_index) {
2440 			/* No change */
2441 			return;
2442 		}
2443 
2444 		/* Scope the socket to the given interface */
2445 		error = inp_bindif(inp, out_if_index, &new_ifp);
2446 		if (error != 0) {
2447 			FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2448 			return;
2449 		}
2450 
2451 		if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2452 			/* Get the appropriate address for the given interface */
2453 			if (inp->inp_vflag & INP_IPV6) {
2454 				inp->in6p_laddr = sa6_any.sin6_addr;
2455 				error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2456 			} else {
2457 				inp->inp_laddr.s_addr = INADDR_ANY;
2458 				error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2459 			}
2460 
2461 			if (error != 0) {
2462 				FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2463 			}
2464 		}
2465 	} else {
2466 		ifnet_head_lock_shared();
2467 		if (IF_INDEX_IN_RANGE(out_if_index)) {
2468 			new_ifp = ifindex2ifnet[out_if_index];
2469 		}
2470 		ifnet_head_done();
2471 	}
2472 
2473 	/* Update the "last interface" of the socket */
2474 	if (new_ifp != NULL) {
2475 		if (inp->inp_vflag & INP_IPV6) {
2476 			inp->in6p_last_outifp = new_ifp;
2477 		} else {
2478 			inp->inp_last_outifp = new_ifp;
2479 		}
2480 
2481 #if SKYWALK
2482 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2483 			netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2484 		}
2485 #endif /* SKYWALK */
2486 	}
2487 }
2488 
2489 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2490 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2491 {
2492 	uint32_t              connect_error   = 0;
2493 	uint32_t              ctl_unit        = 0;
2494 	int                   error           = 0;
2495 	union sockaddr_in_4_6 local_endpoint  = {};
2496 	union sockaddr_in_4_6 remote_endpoint = {};
2497 	int                   out_if_index    = 0;
2498 	uint32_t              send_window     = 0;
2499 	uint32_t              app_data_length = 0;
2500 
2501 	memset(&local_endpoint, 0, sizeof(local_endpoint));
2502 	memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2503 
2504 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2505 	if (error) {
2506 		FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2507 		return;
2508 	}
2509 
2510 	connect_error = ntohl(connect_error);
2511 	FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2512 
2513 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2514 	if (error) {
2515 		FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2516 		return;
2517 	}
2518 
2519 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2520 	if (error) {
2521 		FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2522 	}
2523 
2524 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sin6), NULL);
2525 	if (error) {
2526 		FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2527 	}
2528 
2529 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sin6), NULL);
2530 	if (error) {
2531 		FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2532 	}
2533 
2534 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2535 	if (error) {
2536 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2537 	}
2538 
2539 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2540 	if (error) {
2541 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2542 	}
2543 
2544 	error = 0;
2545 
2546 	FDLOCK(fd_cb);
2547 	if (fd_cb->so != NULL) {
2548 		struct inpcb *inp = NULL;
2549 		struct socket *so = fd_cb->so;
2550 		bool local_address_is_valid = false;
2551 
2552 		socket_lock(so, 1);
2553 
2554 		if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2555 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2556 			goto done;
2557 		}
2558 
2559 		if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2560 			FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2561 			goto done;
2562 		}
2563 
2564 		inp = sotoinpcb(so);
2565 
2566 		if (connect_error || error) {
2567 			goto set_socket_state;
2568 		}
2569 
2570 		if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2571 			if (local_endpoint.sa.sa_family == AF_INET) {
2572 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2573 				if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2574 					local_address_is_valid = true;
2575 					fd_cb->local_endpoint = local_endpoint;
2576 					inp->inp_laddr.s_addr = INADDR_ANY;
2577 				} else {
2578 					fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2579 				}
2580 			} else if (local_endpoint.sa.sa_family == AF_INET6) {
2581 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2582 				if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2583 					local_address_is_valid = true;
2584 					fd_cb->local_endpoint = local_endpoint;
2585 					inp->in6p_laddr = sa6_any.sin6_addr;
2586 				} else {
2587 					fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2588 				}
2589 			}
2590 		}
2591 
2592 		flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2593 		flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2594 
2595 		if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2596 			if (remote_endpoint.sa.sa_family == AF_INET) {
2597 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2598 			} else if (remote_endpoint.sa.sa_family == AF_INET6) {
2599 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2600 			}
2601 			flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2602 		}
2603 
2604 		if (app_data_length > 0) {
2605 			uint8_t * app_data = NULL;
2606 			app_data = kalloc_data(app_data_length, Z_WAITOK);
2607 			if (app_data != NULL) {
2608 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2609 				if (error == 0) {
2610 					FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2611 					if (fd_cb->app_data != NULL) {
2612 						kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
2613 					}
2614 					fd_cb->app_data = app_data;
2615 					fd_cb->app_data_length = app_data_length;
2616 				} else {
2617 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2618 					kfree_data(app_data, app_data_length);
2619 				}
2620 			} else {
2621 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2622 			}
2623 		}
2624 
2625 		if (error) {
2626 			goto set_socket_state;
2627 		}
2628 
2629 		if (fd_cb->group == NULL) {
2630 			error = EINVAL;
2631 			goto set_socket_state;
2632 		}
2633 
2634 		ctl_unit = ntohl(ctl_unit);
2635 		if (ctl_unit > 0) {
2636 			int insert_error = 0;
2637 			struct flow_divert_group *grp = NULL;
2638 
2639 			if (ctl_unit >= GROUP_COUNT_MAX) {
2640 				FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2641 				error = EINVAL;
2642 				goto set_socket_state;
2643 			}
2644 
2645 			grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2646 			if (grp == NULL) {
2647 				error = ECONNRESET;
2648 				goto set_socket_state;
2649 			}
2650 
2651 			flow_divert_pcb_remove(fd_cb);
2652 			insert_error = flow_divert_pcb_insert(fd_cb, grp);
2653 			FDGRP_RELEASE(grp);
2654 
2655 			if (insert_error != 0) {
2656 				error = ECONNRESET;
2657 				goto set_socket_state;
2658 			}
2659 		}
2660 
2661 		fd_cb->send_window = ntohl(send_window);
2662 
2663 set_socket_state:
2664 		if (!connect_error && !error) {
2665 			FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2666 			error = flow_divert_send_connect_result(fd_cb);
2667 		}
2668 
2669 		if (connect_error || error) {
2670 			if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2671 				error = flow_divert_try_next_group(fd_cb);
2672 				if (error && fd_cb->policy_control_unit == 0) {
2673 					flow_divert_disable(fd_cb);
2674 					goto done;
2675 				} else if (error == 0) {
2676 					goto done;
2677 				}
2678 			}
2679 
2680 			if (!connect_error) {
2681 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
2682 				so->so_error = (uint16_t)error;
2683 				flow_divert_send_close_if_needed(fd_cb);
2684 			} else {
2685 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
2686 				so->so_error = (uint16_t)connect_error;
2687 			}
2688 			flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
2689 		} else {
2690 #if NECP
2691 			/* Update NECP client with connected five-tuple */
2692 			if (!uuid_is_null(inp->necp_client_uuid)) {
2693 				socket_unlock(so, 0);
2694 				necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2695 				socket_lock(so, 0);
2696 				if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2697 					/* The socket was closed while it was unlocked */
2698 					goto done;
2699 				}
2700 			}
2701 #endif /* NECP */
2702 
2703 			flow_divert_send_buffered_data(fd_cb, FALSE);
2704 			soisconnected(so);
2705 		}
2706 
2707 		/* We don't need the connect packet any more */
2708 		if (fd_cb->connect_packet != NULL) {
2709 			mbuf_freem(fd_cb->connect_packet);
2710 			fd_cb->connect_packet = NULL;
2711 		}
2712 
2713 		/* We don't need the original remote endpoint any more */
2714 		free_sockaddr(fd_cb->original_remote_endpoint);
2715 done:
2716 		socket_unlock(so, 1);
2717 	}
2718 	FDUNLOCK(fd_cb);
2719 }
2720 
2721 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2722 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2723 {
2724 	uint32_t        close_error                     = 0;
2725 	int                     error                   = 0;
2726 	int                     how                     = 0;
2727 
2728 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2729 	if (error) {
2730 		FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2731 		return;
2732 	}
2733 
2734 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2735 	if (error) {
2736 		FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2737 		return;
2738 	}
2739 
2740 	how = ntohl(how);
2741 
2742 	FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2743 
2744 	FDLOCK(fd_cb);
2745 	if (fd_cb->so != NULL) {
2746 		bool is_connected = (SOCK_TYPE(fd_cb->so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2747 		socket_lock(fd_cb->so, 0);
2748 
2749 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2750 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2751 			goto done;
2752 		}
2753 
2754 		fd_cb->so->so_error = (uint16_t)ntohl(close_error);
2755 
2756 		flow_divert_update_closed_state(fd_cb, how, true, true);
2757 
2758 		/* Only do this for stream flows because "shutdown by peer" doesn't make sense for datagram flows */
2759 		how = flow_divert_tunnel_how_closed(fd_cb);
2760 		if (how == SHUT_RDWR) {
2761 			flow_divert_disconnect_socket(fd_cb->so, is_connected, true);
2762 		} else if (how == SHUT_RD && is_connected) {
2763 			socantrcvmore(fd_cb->so);
2764 		} else if (how == SHUT_WR && is_connected) {
2765 			socantsendmore(fd_cb->so);
2766 		}
2767 done:
2768 		socket_unlock(fd_cb->so, 0);
2769 	}
2770 	FDUNLOCK(fd_cb);
2771 }
2772 
2773 static mbuf_ref_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2774 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2775 {
2776 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2777 	bool need_recvdstaddr = false;
2778 	/* Socket flow tracking needs to see the local address */
2779 	need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2780 	if ((inp->inp_vflag & INP_IPV4) &&
2781 	    fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2782 	    ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2783 		return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2784 	} else if ((inp->inp_vflag & INP_IPV6) &&
2785 	    fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2786 	    ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2787 		struct in6_pktinfo pi6;
2788 		memset(&pi6, 0, sizeof(pi6));
2789 		pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2790 
2791 		return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2792 	}
2793 	return NULL;
2794 }
2795 
2796 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,size_t offset)2797 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, size_t offset)
2798 {
2799 	int error = 0;
2800 
2801 	FDLOCK(fd_cb);
2802 	if (fd_cb->so != NULL) {
2803 		mbuf_ref_t data = NULL;
2804 		size_t  data_size;
2805 		struct sockaddr_storage remote_address;
2806 		boolean_t got_remote_sa = FALSE;
2807 		boolean_t appended = FALSE;
2808 		boolean_t append_success = FALSE;
2809 
2810 		socket_lock(fd_cb->so, 0);
2811 
2812 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2813 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2814 			goto done;
2815 		}
2816 
2817 		if (sbspace(&fd_cb->so->so_rcv) == 0) {
2818 			error = ENOBUFS;
2819 			fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2820 			FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2821 			goto done;
2822 		}
2823 
2824 		if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
2825 			uint32_t val_size = 0;
2826 
2827 			/* check if we got remote address with data */
2828 			memset(&remote_address, 0, sizeof(remote_address));
2829 			error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2830 			if (error || val_size > sizeof(remote_address)) {
2831 				FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2832 				error = 0;
2833 			} else {
2834 				if (remote_address.ss_len > sizeof(remote_address)) {
2835 					remote_address.ss_len = sizeof(remote_address);
2836 				}
2837 				/* validate the address */
2838 				if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2839 					got_remote_sa = TRUE;
2840 				} else {
2841 					FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2842 				}
2843 				offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2844 			}
2845 		}
2846 
2847 		data_size = (mbuf_pkthdr_len(packet) - offset);
2848 
2849 		if (fd_cb->so->so_state & SS_CANTRCVMORE) {
2850 			FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2851 			goto done;
2852 		}
2853 
2854 		if (SOCK_TYPE(fd_cb->so) != SOCK_STREAM && SOCK_TYPE(fd_cb->so) != SOCK_DGRAM) {
2855 			FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(fd_cb->so));
2856 			goto done;
2857 		}
2858 
2859 		FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2860 
2861 		error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2862 		if (error || data == NULL) {
2863 			FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2864 			goto done;
2865 		}
2866 
2867 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
2868 			appended = (sbappendstream(&fd_cb->so->so_rcv, data) != 0);
2869 			append_success = TRUE;
2870 		} else {
2871 			struct sockaddr * __single append_sa = NULL;
2872 			mbuf_ref_t mctl;
2873 
2874 			if (got_remote_sa == TRUE) {
2875 				error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2876 			} else {
2877 				if (SOCK_CHECK_DOM(fd_cb->so, AF_INET6)) {
2878 					error = in6_mapped_peeraddr(fd_cb->so, &append_sa);
2879 				} else {
2880 					error = in_getpeeraddr(fd_cb->so, &append_sa);
2881 				}
2882 			}
2883 			if (error) {
2884 				FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2885 			}
2886 
2887 			mctl = flow_divert_create_control_mbuf(fd_cb);
2888 			int append_error = 0;
2889 			appended = sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error);
2890 			if (appended || append_error == 0) {
2891 				append_success = TRUE;
2892 			} else {
2893 				FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2894 			}
2895 
2896 			free_sockaddr(append_sa);
2897 		}
2898 
2899 		if (append_success) {
2900 			fd_cb->bytes_received += data_size;
2901 			flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2902 		}
2903 
2904 		if (appended) {
2905 			sorwakeup(fd_cb->so);
2906 		}
2907 done:
2908 		socket_unlock(fd_cb->so, 0);
2909 	}
2910 	FDUNLOCK(fd_cb);
2911 
2912 	return error;
2913 }
2914 
2915 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2916 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2917 {
2918 	uint32_t        read_count              = 0;
2919 	int             error                   = 0;
2920 
2921 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2922 	if (error) {
2923 		FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2924 		return;
2925 	}
2926 
2927 	FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2928 
2929 	FDLOCK(fd_cb);
2930 	if (fd_cb->so != NULL) {
2931 		socket_lock(fd_cb->so, 0);
2932 
2933 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2934 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2935 			goto done;
2936 		}
2937 
2938 		fd_cb->send_window += ntohl(read_count);
2939 		flow_divert_send_buffered_data(fd_cb, FALSE);
2940 done:
2941 		socket_unlock(fd_cb->so, 0);
2942 	}
2943 	FDUNLOCK(fd_cb);
2944 }
2945 
2946 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_ref_t packet,int offset)2947 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
2948 {
2949 	int error         = 0;
2950 	uint32_t key_size = 0;
2951 	int log_level     = 0;
2952 	uint32_t flags    = 0;
2953 	int32_t order     = FLOW_DIVERT_ORDER_LAST;
2954 
2955 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2956 	if (error) {
2957 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2958 		return;
2959 	}
2960 
2961 	if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2962 		FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2963 		return;
2964 	}
2965 
2966 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2967 	if (!error) {
2968 		nil_pcb.log_level = (uint8_t)log_level;
2969 	}
2970 
2971 	lck_rw_lock_exclusive(&group->lck);
2972 
2973 	if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
2974 		FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
2975 		lck_rw_done(&group->lck);
2976 		return;
2977 	}
2978 
2979 	if (group->token_key != NULL) {
2980 		kfree_data_sized_by(group->token_key, group->token_key_size);
2981 	}
2982 
2983 	group->token_key = kalloc_data(key_size, Z_WAITOK);
2984 	group->token_key_size = key_size;
2985 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2986 	if (error) {
2987 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2988 		kfree_data_sized_by(group->token_key, group->token_key_size);
2989 		lck_rw_done(&group->lck);
2990 		return;
2991 	}
2992 
2993 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2994 	if (!error) {
2995 		group->flags = flags;
2996 	}
2997 
2998 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ORDER, sizeof(order), &order, NULL);
2999 	if (!error) {
3000 		FDLOG(LOG_INFO, &nil_pcb, "group %u order is %u", group->ctl_unit, order);
3001 		group->order = order;
3002 	}
3003 
3004 	lck_rw_done(&group->lck);
3005 }
3006 
3007 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)3008 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
3009 {
3010 	int         error           = 0;
3011 	int         out_if_index    = 0;
3012 	uint32_t    app_data_length = 0;
3013 
3014 	FDLOG0(LOG_INFO, fd_cb, "received a properties update");
3015 
3016 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
3017 	if (error) {
3018 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
3019 	}
3020 
3021 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
3022 	if (error) {
3023 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
3024 	}
3025 
3026 	FDLOCK(fd_cb);
3027 	if (fd_cb->so != NULL) {
3028 		socket_lock(fd_cb->so, 0);
3029 
3030 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
3031 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
3032 			goto done;
3033 		}
3034 
3035 		if (out_if_index > 0) {
3036 			flow_divert_scope(fd_cb, out_if_index, true);
3037 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3038 		}
3039 
3040 		if (app_data_length > 0) {
3041 			uint8_t * app_data __indexable = NULL;
3042 			app_data = kalloc_data(app_data_length, Z_WAITOK);
3043 			if (app_data != NULL) {
3044 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
3045 				if (error == 0) {
3046 					if (fd_cb->app_data != NULL) {
3047 						kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
3048 					}
3049 					fd_cb->app_data = app_data;
3050 					fd_cb->app_data_length = app_data_length;
3051 				} else {
3052 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
3053 					kfree_data(app_data, app_data_length);
3054 				}
3055 			} else {
3056 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
3057 			}
3058 		}
3059 done:
3060 		socket_unlock(fd_cb->so, 0);
3061 	}
3062 	FDUNLOCK(fd_cb);
3063 }
3064 
3065 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_ref_t packet,int offset)3066 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
3067 {
3068 	size_t                  bytes_mem_size      = 0;
3069 	size_t                  child_maps_mem_size = 0;
3070 	size_t                  nodes_mem_size      = 0;
3071 	size_t                  trie_memory_size    = 0;
3072 	int                     cursor              = 0;
3073 	int                     error               = 0;
3074 	struct flow_divert_trie new_trie;
3075 	int                     insert_error        = 0;
3076 	int                     prefix_count        = -1;
3077 	int                     signing_id_count    = 0;
3078 	size_t                  bytes_count         = 0;
3079 	size_t                  nodes_count         = 0;
3080 	size_t                  maps_count          = 0;
3081 
3082 	lck_rw_lock_exclusive(&group->lck);
3083 
3084 	/* Re-set the current trie */
3085 	if (group->signing_id_trie.memory != NULL) {
3086 		kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
3087 	}
3088 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
3089 	group->signing_id_trie.root = NULL_TRIE_IDX;
3090 
3091 	memset(&new_trie, 0, sizeof(new_trie));
3092 
3093 	/* Get the number of shared prefixes in the new set of signing ID strings */
3094 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
3095 
3096 	if (prefix_count < 0 || error) {
3097 		FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
3098 		lck_rw_done(&group->lck);
3099 		return;
3100 	}
3101 
3102 	/* Compute the number of signing IDs and the total amount of bytes needed to store them */
3103 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3104 	    cursor >= 0;
3105 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3106 		uint32_t sid_size = 0;
3107 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3108 		if (error || sid_size == 0) {
3109 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
3110 			signing_id_count = 0;
3111 			break;
3112 		}
3113 		if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
3114 			FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
3115 			signing_id_count = 0;
3116 			break;
3117 		}
3118 		signing_id_count++;
3119 	}
3120 
3121 	if (signing_id_count == 0) {
3122 		lck_rw_done(&group->lck);
3123 		FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
3124 		return;
3125 	}
3126 
3127 	if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
3128 		lck_rw_done(&group->lck);
3129 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
3130 		return;
3131 	}
3132 
3133 	if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
3134 		lck_rw_done(&group->lck);
3135 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
3136 		return;
3137 	}
3138 
3139 	if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
3140 		lck_rw_done(&group->lck);
3141 		FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
3142 		return;
3143 	}
3144 
3145 	FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
3146 	    nodes_count, maps_count, bytes_count);
3147 
3148 	if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
3149 	    os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
3150 	    os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
3151 	    os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
3152 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
3153 		lck_rw_done(&group->lck);
3154 		return;
3155 	}
3156 
3157 	if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
3158 		FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
3159 		lck_rw_done(&group->lck);
3160 		return;
3161 	}
3162 
3163 	new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
3164 	new_trie.memory_size = trie_memory_size;
3165 	if (new_trie.memory == NULL) {
3166 		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
3167 		    nodes_mem_size + child_maps_mem_size + bytes_mem_size);
3168 		lck_rw_done(&group->lck);
3169 		return;
3170 	}
3171 
3172 	/* Initialize the free lists */
3173 	new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
3174 	new_trie.nodes_count = (uint16_t)nodes_count;
3175 
3176 	new_trie.nodes_free_next = 0;
3177 	memset(new_trie.nodes, 0, nodes_mem_size);
3178 
3179 	new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
3180 	new_trie.child_maps_count = (uint16_t)maps_count;
3181 	new_trie.child_maps_size = child_maps_mem_size;
3182 
3183 	new_trie.child_maps_free_next = 0;
3184 	memset(new_trie.child_maps, 0xff, child_maps_mem_size);
3185 
3186 	new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
3187 	new_trie.bytes_count = (uint16_t)bytes_count;
3188 
3189 	new_trie.bytes_free_next = 0;
3190 	memset(new_trie.bytes, 0, bytes_mem_size);
3191 
3192 	/* The root is an empty node */
3193 	new_trie.root = trie_node_alloc(&new_trie);
3194 
3195 	/* Add each signing ID to the trie */
3196 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3197 	    cursor >= 0;
3198 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3199 		uint32_t sid_size = 0;
3200 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3201 		if (error || sid_size == 0) {
3202 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
3203 			insert_error = EINVAL;
3204 			break;
3205 		}
3206 		if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
3207 			uint16_t new_node_idx;
3208 			error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
3209 			if (error) {
3210 				FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
3211 				insert_error = EINVAL;
3212 				break;
3213 			}
3214 			new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
3215 			if (new_node_idx == NULL_TRIE_IDX) {
3216 				insert_error = EINVAL;
3217 				break;
3218 			}
3219 		} else {
3220 			FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3221 			insert_error = ENOBUFS;
3222 			break;
3223 		}
3224 	}
3225 
3226 	if (!insert_error) {
3227 		group->signing_id_trie = new_trie;
3228 	} else {
3229 		kfree_data_sized_by(new_trie.memory, new_trie.memory_size);
3230 	}
3231 
3232 	lck_rw_done(&group->lck);
3233 }
3234 
3235 static void
flow_divert_handle_flow_states_request(struct flow_divert_group * group)3236 flow_divert_handle_flow_states_request(struct flow_divert_group *group)
3237 {
3238 	struct flow_divert_pcb *fd_cb;
3239 	mbuf_ref_t packet = NULL;
3240 	SLIST_HEAD(, flow_divert_pcb) tmp_list;
3241 	int error = 0;
3242 	uint32_t ctl_unit = 0;
3243 
3244 	SLIST_INIT(&tmp_list);
3245 
3246 	error = flow_divert_packet_init(&nil_pcb, FLOW_DIVERT_PKT_FLOW_STATES, &packet);
3247 	if (error || packet == NULL) {
3248 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_packet_init failed: %d, cannot send flow states", error);
3249 		return;
3250 	}
3251 
3252 	lck_rw_lock_shared(&group->lck);
3253 
3254 	if (!MBUFQ_EMPTY(&group->send_queue)) {
3255 		FDLOG0(LOG_WARNING, &nil_pcb, "flow_divert_handle_flow_states_request: group send queue is not empty");
3256 	}
3257 
3258 	ctl_unit = group->ctl_unit;
3259 
3260 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3261 		FDRETAIN(fd_cb);
3262 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3263 	}
3264 
3265 	lck_rw_done(&group->lck);
3266 
3267 	SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
3268 		FDLOCK(fd_cb);
3269 		if (fd_cb->so != NULL) {
3270 			struct flow_divert_flow_state state = {};
3271 			socket_lock(fd_cb->so, 0);
3272 
3273 			state.conn_id = fd_cb->hash;
3274 			state.bytes_written_by_app = fd_cb->bytes_written_by_app;
3275 			state.bytes_sent = fd_cb->bytes_sent;
3276 			state.bytes_received = fd_cb->bytes_received;
3277 			state.send_window = fd_cb->send_window;
3278 			state.send_buffer_bytes = fd_cb->so->so_snd.sb_cc;
3279 
3280 			error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_FLOW_STATE, sizeof(state), &state);
3281 			if (error) {
3282 				FDLOG(LOG_ERR, fd_cb, "Failed to add a flow state: %d", error);
3283 			}
3284 
3285 			socket_unlock(fd_cb->so, 0);
3286 		}
3287 		FDUNLOCK(fd_cb);
3288 		FDRELEASE(fd_cb);
3289 	}
3290 
3291 	error = ctl_enqueuembuf(g_flow_divert_kctl_ref, ctl_unit, packet, CTL_DATA_EOR);
3292 	if (error) {
3293 		FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_handle_flow_states_request: ctl_enqueuembuf returned an error: %d", error);
3294 		mbuf_freem(packet);
3295 	}
3296 }
3297 
3298 static int
flow_divert_input(mbuf_ref_t packet,struct flow_divert_group * group)3299 flow_divert_input(mbuf_ref_t packet, struct flow_divert_group *group)
3300 {
3301 	struct flow_divert_packet_header    hdr;
3302 	int                                 error  = 0;
3303 	struct flow_divert_pcb              *fd_cb;
3304 
3305 	if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3306 		FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3307 		error = EINVAL;
3308 		goto done;
3309 	}
3310 
3311 	error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3312 	if (error) {
3313 		FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3314 		error = ENOBUFS;
3315 		goto done;
3316 	}
3317 
3318 	hdr.conn_id = ntohl(hdr.conn_id);
3319 
3320 	if (hdr.conn_id == 0) {
3321 		switch (hdr.packet_type) {
3322 		case FLOW_DIVERT_PKT_GROUP_INIT:
3323 			flow_divert_handle_group_init(group, packet, sizeof(hdr));
3324 			break;
3325 		case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3326 			flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3327 			break;
3328 		case FLOW_DIVERT_PKT_FLOW_STATES_REQUEST:
3329 			flow_divert_handle_flow_states_request(group);
3330 			break;
3331 		default:
3332 			FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3333 			break;
3334 		}
3335 		goto done;
3336 	}
3337 
3338 	fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group);             /* This retains the PCB */
3339 	if (fd_cb == NULL) {
3340 		if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3341 			FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3342 		}
3343 		goto done;
3344 	}
3345 
3346 	switch (hdr.packet_type) {
3347 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
3348 		flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3349 		break;
3350 	case FLOW_DIVERT_PKT_CLOSE:
3351 		flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3352 		break;
3353 	case FLOW_DIVERT_PKT_DATA:
3354 		error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3355 		break;
3356 	case FLOW_DIVERT_PKT_READ_NOTIFY:
3357 		flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3358 		break;
3359 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3360 		flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3361 		break;
3362 	default:
3363 		FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3364 		break;
3365 	}
3366 
3367 	FDRELEASE(fd_cb);
3368 
3369 done:
3370 	mbuf_freem(packet);
3371 	return error;
3372 }
3373 
3374 static void
flow_divert_close_all(struct flow_divert_group * group)3375 flow_divert_close_all(struct flow_divert_group *group)
3376 {
3377 	struct flow_divert_pcb                  *fd_cb;
3378 	SLIST_HEAD(, flow_divert_pcb)   tmp_list;
3379 
3380 	SLIST_INIT(&tmp_list);
3381 
3382 	lck_rw_lock_exclusive(&group->lck);
3383 
3384 	MBUFQ_DRAIN(&group->send_queue);
3385 
3386 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3387 		FDRETAIN(fd_cb);
3388 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3389 	}
3390 
3391 	group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3392 
3393 	lck_rw_done(&group->lck);
3394 
3395 	while (!SLIST_EMPTY(&tmp_list)) {
3396 		fd_cb = SLIST_FIRST(&tmp_list);
3397 		FDLOCK(fd_cb);
3398 		SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3399 		if (fd_cb->so != NULL) {
3400 			socket_lock(fd_cb->so, 0);
3401 			flow_divert_pcb_remove(fd_cb);
3402 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
3403 			fd_cb->so->so_error = ECONNABORTED;
3404 			flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3405 			socket_unlock(fd_cb->so, 0);
3406 		}
3407 		FDUNLOCK(fd_cb);
3408 		FDRELEASE(fd_cb);
3409 	}
3410 }
3411 
3412 void
flow_divert_detach(struct socket * so)3413 flow_divert_detach(struct socket *so)
3414 {
3415 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3416 
3417 	if (!SO_IS_DIVERTED(so)) {
3418 		return;
3419 	}
3420 
3421 	so->so_flags &= ~SOF_FLOW_DIVERT;
3422 	so->so_fd_pcb = NULL;
3423 
3424 	FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3425 
3426 	if (fd_cb->group != NULL) {
3427 		/* Last-ditch effort to send any buffered data */
3428 		flow_divert_send_buffered_data(fd_cb, TRUE);
3429 
3430 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3431 		flow_divert_send_close_if_needed(fd_cb);
3432 		/* Remove from the group */
3433 		flow_divert_pcb_remove(fd_cb);
3434 	}
3435 
3436 	socket_unlock(so, 0);
3437 	FDLOCK(fd_cb);
3438 	fd_cb->so = NULL;
3439 	FDUNLOCK(fd_cb);
3440 	socket_lock(so, 0);
3441 
3442 	FDRELEASE(fd_cb);       /* Release the socket's reference */
3443 }
3444 
3445 static int
flow_divert_close(struct socket * so)3446 flow_divert_close(struct socket *so)
3447 {
3448 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3449 
3450 	if (!SO_IS_DIVERTED(so)) {
3451 		return EINVAL;
3452 	}
3453 
3454 	FDLOG0(LOG_INFO, fd_cb, "Closing");
3455 
3456 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3457 		soisdisconnecting(so);
3458 		sbflush(&so->so_rcv);
3459 	}
3460 
3461 	flow_divert_send_buffered_data(fd_cb, TRUE);
3462 	flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3463 	flow_divert_send_close_if_needed(fd_cb);
3464 
3465 	/* Remove from the group */
3466 	flow_divert_pcb_remove(fd_cb);
3467 
3468 	return 0;
3469 }
3470 
3471 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3472 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3473     sae_connid_t cid __unused)
3474 {
3475 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3476 		return EINVAL;
3477 	}
3478 
3479 	return flow_divert_close(so);
3480 }
3481 
3482 static int
flow_divert_shutdown(struct socket * so)3483 flow_divert_shutdown(struct socket *so)
3484 {
3485 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3486 
3487 	if (!SO_IS_DIVERTED(so)) {
3488 		return EINVAL;
3489 	}
3490 
3491 	FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3492 
3493 	socantsendmore(so);
3494 
3495 	flow_divert_update_closed_state(fd_cb, SHUT_WR, false, true);
3496 	flow_divert_send_close_if_needed(fd_cb);
3497 
3498 	return 0;
3499 }
3500 
3501 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3502 flow_divert_rcvd(struct socket *so, int flags __unused)
3503 {
3504 	struct flow_divert_pcb  *fd_cb = so->so_fd_pcb;
3505 	int space = 0;
3506 
3507 	if (!SO_IS_DIVERTED(so)) {
3508 		return EINVAL;
3509 	}
3510 
3511 	space = sbspace(&so->so_rcv);
3512 	FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3513 	if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3514 	    (space > 0) &&
3515 	    flow_divert_send_read_notification(fd_cb) == 0) {
3516 		FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3517 		fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3518 	}
3519 
3520 	return 0;
3521 }
3522 
3523 static int
flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet,struct sockaddr * toaddr)3524 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr)
3525 {
3526 	int error = 0;
3527 	int port  = 0;
3528 
3529 	if (!flow_divert_is_sockaddr_valid(toaddr)) {
3530 		FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3531 		error = EINVAL;
3532 		goto done;
3533 	}
3534 
3535 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, toaddr);
3536 	if (error) {
3537 		goto done;
3538 	}
3539 
3540 	if (toaddr->sa_family == AF_INET) {
3541 		port = ntohs((satosin(toaddr))->sin_port);
3542 	} else {
3543 		port = ntohs((satosin6(toaddr))->sin6_port);
3544 	}
3545 
3546 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3547 	if (error) {
3548 		goto done;
3549 	}
3550 
3551 done:
3552 	return error;
3553 }
3554 
3555 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_ref_t buffer)3556 flow_divert_get_buffered_target_address(mbuf_ref_t buffer)
3557 {
3558 	if (buffer != NULL && buffer->m_type == MT_SONAME) {
3559 		struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3560 		if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3561 			return toaddr;
3562 		}
3563 	}
3564 	return NULL;
3565 }
3566 
3567 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3568 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3569 {
3570 	switch (addr->sa_family) {
3571 	case AF_INET:
3572 		if (addr->sa_len < sizeof(struct sockaddr_in)) {
3573 			return FALSE;
3574 		}
3575 		break;
3576 	case AF_INET6:
3577 		if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3578 			return FALSE;
3579 		}
3580 		break;
3581 	default:
3582 		return FALSE;
3583 	}
3584 	return TRUE;
3585 }
3586 
3587 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3588 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3589     struct sockaddr **dup)
3590 {
3591 	int                                             error           = 0;
3592 	struct sockaddr                 *result;
3593 	struct sockaddr_storage ss;
3594 
3595 	if (addr != NULL) {
3596 		result = addr;
3597 	} else {
3598 		memset(&ss, 0, sizeof(ss));
3599 		ss.ss_family = family;
3600 		if (ss.ss_family == AF_INET) {
3601 			ss.ss_len = sizeof(struct sockaddr_in);
3602 		} else if (ss.ss_family == AF_INET6) {
3603 			ss.ss_len = sizeof(struct sockaddr_in6);
3604 		} else {
3605 			error = EINVAL;
3606 		}
3607 		result = (struct sockaddr *)&ss;
3608 	}
3609 
3610 	if (!error) {
3611 		*dup = dup_sockaddr(result, 1);
3612 		if (*dup == NULL) {
3613 			error = ENOBUFS;
3614 		}
3615 	}
3616 
3617 	return error;
3618 }
3619 
3620 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected,bool delay_if_needed)3621 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed)
3622 {
3623 	if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3624 		soisdisconnected(so);
3625 	}
3626 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
3627 		struct inpcb *inp = sotoinpcb(so);
3628 		if (inp != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
3629 			/*
3630 			 * Let NetworkStatistics know this PCB is going away
3631 			 * before we detach it.
3632 			 */
3633 			if (nstat_collect && (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
3634 				nstat_pcb_detach(inp);
3635 			}
3636 
3637 			if (SOCK_DOM(so) == PF_INET6) {
3638 				ROUTE_RELEASE(&inp->in6p_route);
3639 			} else {
3640 				ROUTE_RELEASE(&inp->inp_route);
3641 			}
3642 			if (delay_if_needed) {
3643 				(void) cfil_sock_is_dead(so);
3644 			} else {
3645 				inp->inp_state = INPCB_STATE_DEAD;
3646 				inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
3647 			}
3648 			/* makes sure we're not called twice from so_close */
3649 			so->so_flags |= SOF_PCBCLEARING;
3650 		}
3651 	}
3652 }
3653 
3654 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3655 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3656 {
3657 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3658 
3659 	if (!SO_IS_DIVERTED(so)) {
3660 		return EINVAL;
3661 	}
3662 
3663 	if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3664 		if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3665 			flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3666 		}
3667 	}
3668 
3669 	if (SOCK_DOM(so) == PF_INET) {
3670 		return g_tcp_protosw->pr_ctloutput(so, sopt);
3671 	} else if (SOCK_DOM(so) == PF_INET6) {
3672 		return g_tcp6_protosw->pr_ctloutput(so, sopt);
3673 	}
3674 	return 0;
3675 }
3676 
3677 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3678 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3679 {
3680 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3681 	int                     error           = 0;
3682 	struct inpcb            *inp            = sotoinpcb(so);
3683 	struct sockaddr_in      *sinp;
3684 	mbuf_ref_t              connect_packet  = NULL;
3685 	int                     do_send         = 1;
3686 
3687 	if (!SO_IS_DIVERTED(so)) {
3688 		return EINVAL;
3689 	}
3690 
3691 	if (fd_cb->group == NULL) {
3692 		error = ENETUNREACH;
3693 		goto done;
3694 	}
3695 
3696 	if (inp == NULL) {
3697 		error = EINVAL;
3698 		goto done;
3699 	} else if (inp->inp_state == INPCB_STATE_DEAD) {
3700 		if (so->so_error) {
3701 			error = so->so_error;
3702 			so->so_error = 0;
3703 		} else {
3704 			error = EINVAL;
3705 		}
3706 		goto done;
3707 	}
3708 
3709 	if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3710 		error = EALREADY;
3711 		goto done;
3712 	}
3713 
3714 	FDLOG0(LOG_INFO, fd_cb, "Connecting");
3715 
3716 	if (fd_cb->connect_packet == NULL) {
3717 		struct sockaddr_in sin = {};
3718 		struct ifnet * __single ifp = NULL;
3719 
3720 		if (to == NULL) {
3721 			FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3722 			error = EINVAL;
3723 			goto done;
3724 		}
3725 
3726 		if (!flow_divert_is_sockaddr_valid(to)) {
3727 			FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3728 			error = EINVAL;
3729 			goto done;
3730 		}
3731 
3732 		fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3733 		if (fd_cb->original_remote_endpoint == NULL) {
3734 			FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3735 			error = ENOMEM;
3736 			goto done;
3737 		}
3738 		fd_cb->original_vflag = inp->inp_vflag;
3739 		fd_cb->original_last_outifp = inp->inp_last_outifp;
3740 		fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3741 
3742 		sinp = (struct sockaddr_in *)(void *)to;
3743 		if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3744 			error = EAFNOSUPPORT;
3745 			goto done;
3746 		}
3747 
3748 		if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3749 			struct sockaddr_in6 sin6 = {};
3750 			sin6.sin6_family = AF_INET6;
3751 			sin6.sin6_len = sizeof(struct sockaddr_in6);
3752 			sin6.sin6_port = satosin6(to)->sin6_port;
3753 			sin6.sin6_addr = satosin6(to)->sin6_addr;
3754 			if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3755 				in6_sin6_2_sin(&sin, &sin6);
3756 				to = (struct sockaddr *)&sin;
3757 			}
3758 		}
3759 
3760 		if (to->sa_family == AF_INET6) {
3761 			struct sockaddr_in6 *to6 = satosin6(to);
3762 
3763 			inp->inp_vflag &= ~INP_IPV4;
3764 			inp->inp_vflag |= INP_IPV6;
3765 			fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3766 			fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3767 			fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3768 			error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3769 			if (error) {
3770 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3771 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3772 					error = 0;
3773 				} else {
3774 					goto done;
3775 				}
3776 			}
3777 			if (ifp != NULL) {
3778 				inp->in6p_last_outifp = ifp;
3779 				ifnet_release(ifp);
3780 			}
3781 
3782 			if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3783 			    in6_embedded_scope &&
3784 			    fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3785 				fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3786 				fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3787 			}
3788 
3789 			if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3790 			    in6_embedded_scope &&
3791 			    to6->sin6_addr.s6_addr16[1] != 0) {
3792 				to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3793 				to6->sin6_addr.s6_addr16[1] = 0;
3794 			}
3795 		} else if (to->sa_family == AF_INET) {
3796 			inp->inp_vflag |= INP_IPV4;
3797 			inp->inp_vflag &= ~INP_IPV6;
3798 			fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3799 			fd_cb->local_endpoint.sin.sin_family = AF_INET;
3800 			fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3801 			error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3802 			if (error) {
3803 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3804 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3805 					error = 0;
3806 				} else {
3807 					goto done;
3808 				}
3809 			}
3810 			if (ifp != NULL) {
3811 				inp->inp_last_outifp = ifp;
3812 				ifnet_release(ifp);
3813 			}
3814 		} else {
3815 			FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3816 		}
3817 
3818 		error = flow_divert_check_no_cellular(fd_cb) ||
3819 		    flow_divert_check_no_expensive(fd_cb) ||
3820 		    flow_divert_check_no_constrained(fd_cb);
3821 		if (error) {
3822 			goto done;
3823 		}
3824 
3825 		if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3826 		    !implicit || /* connect() was called or */
3827 		    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3828 		    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3829 			fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3830 		}
3831 
3832 		error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3833 		if (error) {
3834 			goto done;
3835 		}
3836 
3837 		if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3838 			flow_divert_set_remote_endpoint(fd_cb, to);
3839 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3840 		}
3841 
3842 		if (implicit) {
3843 			fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3844 		}
3845 
3846 		if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3847 			FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3848 			do_send = 0;
3849 		}
3850 
3851 		fd_cb->connect_packet = connect_packet;
3852 		connect_packet = NULL;
3853 	} else {
3854 		FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3855 	}
3856 
3857 	if (do_send) {
3858 		error = flow_divert_send_connect_packet(fd_cb);
3859 		if (error) {
3860 			goto done;
3861 		}
3862 
3863 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3864 	}
3865 
3866 	if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3867 		soisconnected(so);
3868 	} else {
3869 		soisconnecting(so);
3870 	}
3871 
3872 done:
3873 	return error;
3874 }
3875 
3876 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3877 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3878 {
3879 #if CONTENT_FILTER
3880 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3881 		int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3882 		if (error != 0) {
3883 			struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3884 			FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3885 			return error;
3886 		}
3887 	}
3888 #endif /* CONTENT_FILTER */
3889 
3890 	return flow_divert_connect_out_internal(so, to, p, false);
3891 }
3892 
3893 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3894 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3895     struct proc *p, uint32_t ifscope, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3896 {
3897 	struct inpcb *inp = sotoinpcb(so);
3898 	int error;
3899 
3900 	if (inp == NULL) {
3901 		return EINVAL;
3902 	}
3903 
3904 	VERIFY(dst != NULL);
3905 
3906 #if CONTENT_FILTER && NECP
3907 	struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3908 	if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3909 	    SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3910 		inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3911 	}
3912 #endif /* CONTENT_FILTER */
3913 
3914 	/* bind socket to the specified interface, if requested */
3915 	if (ifscope != IFSCOPE_NONE &&
3916 	    (error = inp_bindif(inp, ifscope, NULL)) != 0) {
3917 		return error;
3918 	}
3919 
3920 	error = flow_divert_connect_out(so, dst, p);
3921 
3922 	if (error != 0) {
3923 		return error;
3924 	}
3925 
3926 	/* if there is data, send it */
3927 	if (auio != NULL) {
3928 		user_ssize_t datalen = 0;
3929 
3930 		socket_unlock(so, 0);
3931 
3932 		VERIFY(bytes_written != NULL);
3933 
3934 		datalen = uio_resid(auio);
3935 		error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3936 		socket_lock(so, 0);
3937 
3938 		if (error == 0 || error == EWOULDBLOCK) {
3939 			*bytes_written = datalen - uio_resid(auio);
3940 		}
3941 
3942 		/*
3943 		 * sosend returns EWOULDBLOCK if it's a non-blocking
3944 		 * socket or a timeout occured (this allows to return
3945 		 * the amount of queued data through sendit()).
3946 		 *
3947 		 * However, connectx() returns EINPROGRESS in case of a
3948 		 * blocking socket. So we change the return value here.
3949 		 */
3950 		if (error == EWOULDBLOCK) {
3951 			error = EINPROGRESS;
3952 		}
3953 	}
3954 
3955 	if (error == 0 && pcid != NULL) {
3956 		*pcid = 1;      /* there is only 1 connection for a TCP */
3957 	}
3958 
3959 	return error;
3960 }
3961 
3962 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3963 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3964     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3965     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3966     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3967 {
3968 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3969 }
3970 
3971 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3972 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3973     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3974     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3975     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3976 {
3977 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3978 }
3979 
3980 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)3981 flow_divert_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
3982 {
3983 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3984 	int                     error   = 0;
3985 	struct inpcb            *inp;
3986 #if CONTENT_FILTER
3987 	struct m_tag *cfil_tag = NULL;
3988 #endif
3989 
3990 	if (!SO_IS_DIVERTED(so)) {
3991 		return EINVAL;
3992 	}
3993 
3994 	inp = sotoinpcb(so);
3995 	if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3996 		error = ECONNRESET;
3997 		goto done;
3998 	}
3999 
4000 	if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
4001 		/* The provider considers this datagram flow to be closed, so no data can be sent */
4002 		FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
4003 		error = EHOSTUNREACH;
4004 		goto done;
4005 	}
4006 
4007 #if CONTENT_FILTER
4008 	/*
4009 	 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
4010 	 * retrieve the CFIL saved remote address from the mbuf and use it.
4011 	 */
4012 	if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
4013 		struct sockaddr * __single cfil_faddr = NULL;
4014 		cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
4015 		if (cfil_tag) {
4016 			to = (struct sockaddr *)(void *)cfil_faddr;
4017 		}
4018 		FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
4019 	}
4020 #endif
4021 
4022 	/* Implicit connect */
4023 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4024 		FDLOG0(LOG_INFO, fd_cb, "implicit connect");
4025 
4026 		error = flow_divert_connect_out_internal(so, to, p, true);
4027 		if (error) {
4028 			goto done;
4029 		}
4030 	} else {
4031 		error = flow_divert_check_no_cellular(fd_cb) ||
4032 		    flow_divert_check_no_expensive(fd_cb) ||
4033 		    flow_divert_check_no_constrained(fd_cb);
4034 		if (error) {
4035 			goto done;
4036 		}
4037 	}
4038 
4039 	if (data != NULL) {
4040 		size_t data_size = 0;
4041 		if (mbuf_flags(data) & M_PKTHDR) {
4042 			data_size = mbuf_pkthdr_len(data);
4043 		} else {
4044 			for (mbuf_t blob = data; blob != NULL; blob = mbuf_next(blob)) {
4045 				data_size += mbuf_len(blob);
4046 			}
4047 		}
4048 
4049 		FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", data_size);
4050 		fd_cb->bytes_written_by_app += data_size;
4051 
4052 		error = flow_divert_send_app_data(fd_cb, data, data_size, to);
4053 
4054 		data = NULL;
4055 
4056 		if (error) {
4057 			goto done;
4058 		}
4059 	}
4060 
4061 	if (flags & PRUS_EOF) {
4062 		flow_divert_shutdown(so);
4063 	}
4064 
4065 done:
4066 	if (data) {
4067 		mbuf_freem(data);
4068 	}
4069 	if (control) {
4070 		mbuf_free(control);
4071 	}
4072 #if CONTENT_FILTER
4073 	if (cfil_tag) {
4074 		m_tag_free(cfil_tag);
4075 	}
4076 #endif
4077 
4078 	return error;
4079 }
4080 
4081 static int
flow_divert_preconnect(struct socket * so)4082 flow_divert_preconnect(struct socket *so)
4083 {
4084 	int error = 0;
4085 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4086 
4087 	if (!SO_IS_DIVERTED(so)) {
4088 		return EINVAL;
4089 	}
4090 
4091 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4092 		FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
4093 		error = flow_divert_send_connect_packet(so->so_fd_pcb);
4094 		if (error) {
4095 			return error;
4096 		}
4097 
4098 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
4099 	}
4100 
4101 	soclearfastopen(so);
4102 
4103 	return error;
4104 }
4105 
4106 static void
flow_divert_set_protosw(struct socket * so)4107 flow_divert_set_protosw(struct socket *so)
4108 {
4109 	if (SOCK_DOM(so) == PF_INET) {
4110 		so->so_proto = &g_flow_divert_in_protosw;
4111 	} else {
4112 		so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
4113 	}
4114 }
4115 
4116 static void
flow_divert_set_udp_protosw(struct socket * so)4117 flow_divert_set_udp_protosw(struct socket *so)
4118 {
4119 	if (SOCK_DOM(so) == PF_INET) {
4120 		so->so_proto = &g_flow_divert_in_udp_protosw;
4121 	} else {
4122 		so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
4123 	}
4124 }
4125 
4126 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)4127 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
4128 {
4129 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4130 	struct inpcb *inp;
4131 	int error = 0;
4132 
4133 	inp = sotoinpcb(so);
4134 	if (inp == NULL) {
4135 		return EINVAL;
4136 	}
4137 
4138 	if (fd_cb == NULL) {
4139 		error = flow_divert_pcb_init(so);
4140 		fd_cb  = so->so_fd_pcb;
4141 		if (error != 0 || fd_cb == NULL) {
4142 			goto done;
4143 		}
4144 	}
4145 	return flow_divert_data_out(so, flags, data, to, control, p);
4146 
4147 done:
4148 	if (data) {
4149 		mbuf_freem(data);
4150 	}
4151 	if (control) {
4152 		mbuf_free(control);
4153 	}
4154 
4155 	return error;
4156 }
4157 
4158 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)4159 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
4160 {
4161 	errno_t error = 0;
4162 	struct flow_divert_pcb *fd_cb = NULL;
4163 	uint32_t agg_unit = aggregate_unit;
4164 	uint32_t policy_control_unit = ctl_unit;
4165 	bool is_aggregate = false;
4166 
4167 	if (so->so_flags & SOF_FLOW_DIVERT) {
4168 		return EALREADY;
4169 	}
4170 
4171 	fd_cb = flow_divert_pcb_create(so);
4172 	if (fd_cb == NULL) {
4173 		return ENOMEM;
4174 	}
4175 
4176 	do {
4177 		uint32_t group_unit = flow_divert_derive_kernel_control_unit(so->last_pid, &policy_control_unit, &agg_unit, &is_aggregate);
4178 		if (group_unit == 0 || (group_unit >= GROUP_COUNT_MAX && group_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
4179 			FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
4180 			error = EINVAL;
4181 			break;
4182 		}
4183 
4184 		error = flow_divert_add_to_group(fd_cb, group_unit);
4185 		if (error == 0) {
4186 			so->so_fd_pcb = fd_cb;
4187 			so->so_flags |= SOF_FLOW_DIVERT;
4188 			fd_cb->control_group_unit = group_unit;
4189 			fd_cb->policy_control_unit = ctl_unit;
4190 			fd_cb->aggregate_unit = agg_unit;
4191 			if (is_aggregate) {
4192 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4193 			} else {
4194 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4195 			}
4196 
4197 			if (SOCK_TYPE(so) == SOCK_STREAM) {
4198 				flow_divert_set_protosw(so);
4199 			} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
4200 				flow_divert_set_udp_protosw(so);
4201 			}
4202 
4203 			FDLOG0(LOG_INFO, fd_cb, "Created");
4204 		} else if (error != ENOENT) {
4205 			FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
4206 		}
4207 	} while (error == ENOENT);
4208 
4209 	if (error != 0) {
4210 		FDRELEASE(fd_cb);
4211 	}
4212 
4213 	return error;
4214 }
4215 
4216 errno_t
flow_divert_pcb_init(struct socket * so)4217 flow_divert_pcb_init(struct socket *so)
4218 {
4219 	struct inpcb *inp = sotoinpcb(so);
4220 	uint32_t aggregate_units = 0;
4221 	uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
4222 	return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
4223 }
4224 
4225 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)4226 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
4227 {
4228 	uint32_t        ctl_unit        = 0;
4229 	uint32_t        key_unit        = 0;
4230 	uint32_t        aggregate_unit  = 0;
4231 	int             error           = 0;
4232 	int             hmac_error      = 0;
4233 	mbuf_ref_t      token           = NULL;
4234 
4235 	if (so->so_flags & SOF_FLOW_DIVERT) {
4236 		error = EALREADY;
4237 		goto done;
4238 	}
4239 
4240 	if (g_init_result) {
4241 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
4242 		error = ENOPROTOOPT;
4243 		goto done;
4244 	}
4245 
4246 	if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
4247 	    (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
4248 	    (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
4249 		error = EINVAL;
4250 		goto done;
4251 	} else {
4252 		if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
4253 			struct tcpcb *tp = sototcpcb(so);
4254 			if (tp == NULL || tp->t_state != TCPS_CLOSED) {
4255 				error = EINVAL;
4256 				goto done;
4257 			}
4258 		}
4259 	}
4260 
4261 	error = soopt_getm(sopt, &token);
4262 	if (error) {
4263 		token = NULL;
4264 		goto done;
4265 	}
4266 
4267 	error = soopt_mcopyin(sopt, token);
4268 	if (error) {
4269 		token = NULL;
4270 		goto done;
4271 	}
4272 
4273 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
4274 	if (!error) {
4275 		key_unit = ntohl(key_unit);
4276 		if (key_unit >= GROUP_COUNT_MAX) {
4277 			key_unit = 0;
4278 		}
4279 	} else if (error != ENOENT) {
4280 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
4281 		goto done;
4282 	} else {
4283 		key_unit = 0;
4284 	}
4285 
4286 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
4287 	if (error) {
4288 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
4289 		goto done;
4290 	}
4291 
4292 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4293 	if (error && error != ENOENT) {
4294 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4295 		goto done;
4296 	}
4297 
4298 	/* A valid kernel control unit is required */
4299 	ctl_unit = ntohl(ctl_unit);
4300 	aggregate_unit = ntohl(aggregate_unit);
4301 
4302 	if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4303 		hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4304 		if (hmac_error && hmac_error != ENOENT) {
4305 			FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4306 			error = hmac_error;
4307 			goto done;
4308 		}
4309 	}
4310 
4311 	error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4312 	if (error == 0) {
4313 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4314 		int log_level = LOG_NOTICE;
4315 
4316 		error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4317 		if (error == 0) {
4318 			fd_cb->log_level = (uint8_t)log_level;
4319 		}
4320 		error = 0;
4321 
4322 		fd_cb->connect_token = token;
4323 		token = NULL;
4324 
4325 		fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4326 	}
4327 
4328 	if (hmac_error == 0) {
4329 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4330 		if (fd_cb != NULL) {
4331 			fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4332 		}
4333 	}
4334 
4335 done:
4336 	if (token != NULL) {
4337 		mbuf_freem(token);
4338 	}
4339 
4340 	return error;
4341 }
4342 
4343 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4344 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4345 {
4346 	uint32_t                    ctl_unit;
4347 	int                         error                   = 0;
4348 	uint8_t                     hmac[SHA_DIGEST_LENGTH];
4349 	struct flow_divert_pcb      *fd_cb                  = so->so_fd_pcb;
4350 	mbuf_ref_t                  token                   = NULL;
4351 	struct flow_divert_group    *control_group          = NULL;
4352 
4353 	if (!SO_IS_DIVERTED(so)) {
4354 		error = EINVAL;
4355 		goto done;
4356 	}
4357 
4358 	if (fd_cb->group == NULL) {
4359 		error = EINVAL;
4360 		goto done;
4361 	}
4362 
4363 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4364 	if (error) {
4365 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4366 		goto done;
4367 	}
4368 
4369 	ctl_unit = htonl(fd_cb->group->ctl_unit);
4370 
4371 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4372 	if (error) {
4373 		goto done;
4374 	}
4375 
4376 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4377 	if (error) {
4378 		goto done;
4379 	}
4380 
4381 	if (fd_cb->app_data != NULL) {
4382 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4383 		if (error) {
4384 			goto done;
4385 		}
4386 	}
4387 
4388 	control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4389 	if (control_group != NULL) {
4390 		lck_rw_lock_shared(&control_group->lck);
4391 		ctl_unit = htonl(control_group->ctl_unit);
4392 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4393 		if (!error) {
4394 			error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4395 		}
4396 		lck_rw_done(&control_group->lck);
4397 		FDGRP_RELEASE(control_group);
4398 	} else {
4399 		error = ENOPROTOOPT;
4400 	}
4401 
4402 	if (error) {
4403 		goto done;
4404 	}
4405 
4406 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4407 	if (error) {
4408 		goto done;
4409 	}
4410 
4411 	if (sopt->sopt_val == USER_ADDR_NULL) {
4412 		/* If the caller passed NULL to getsockopt, just set the size of the token and return */
4413 		sopt->sopt_valsize = mbuf_pkthdr_len(token);
4414 		goto done;
4415 	}
4416 
4417 	error = soopt_mcopyout(sopt, token);
4418 	if (error) {
4419 		token = NULL;   /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4420 		goto done;
4421 	}
4422 
4423 done:
4424 	if (token != NULL) {
4425 		mbuf_freem(token);
4426 	}
4427 
4428 	return error;
4429 }
4430 
4431 void
flow_divert_group_destroy(struct flow_divert_group * group)4432 flow_divert_group_destroy(struct flow_divert_group *group)
4433 {
4434 	lck_rw_lock_exclusive(&group->lck);
4435 
4436 	FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4437 
4438 	if (group->token_key != NULL) {
4439 		memset(group->token_key, 0, group->token_key_size);
4440 		kfree_data_sized_by(group->token_key, group->token_key_size);
4441 	}
4442 
4443 	/* Re-set the current trie */
4444 	if (group->signing_id_trie.memory != NULL) {
4445 		kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
4446 	}
4447 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4448 	group->signing_id_trie.root = NULL_TRIE_IDX;
4449 
4450 	lck_rw_done(&group->lck);
4451 
4452 	zfree(flow_divert_group_zone, group);
4453 }
4454 
4455 static struct flow_divert_group *
flow_divert_allocate_group(u_int32_t unit,pid_t pid)4456 flow_divert_allocate_group(u_int32_t unit, pid_t pid)
4457 {
4458 	struct flow_divert_group *new_group = NULL;
4459 	new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4460 	lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4461 	RB_INIT(&new_group->pcb_tree);
4462 	new_group->ctl_unit = unit;
4463 	new_group->in_process_pid = pid;
4464 	MBUFQ_INIT(&new_group->send_queue);
4465 	new_group->signing_id_trie.root = NULL_TRIE_IDX;
4466 	new_group->ref_count = 1;
4467 	new_group->order = FLOW_DIVERT_ORDER_LAST;
4468 	return new_group;
4469 }
4470 
4471 static errno_t
flow_divert_kctl_setup(u_int32_t * unit,void ** unitinfo)4472 flow_divert_kctl_setup(u_int32_t *unit, void **unitinfo)
4473 {
4474 	if (unit == NULL || unitinfo == NULL) {
4475 		return EINVAL;
4476 	}
4477 
4478 	struct flow_divert_group *new_group = NULL;
4479 	errno_t error = 0;
4480 	lck_rw_lock_shared(&g_flow_divert_group_lck);
4481 	if (*unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
4482 		// Return next unused in-process unit
4483 		u_int32_t unit_cursor = FLOW_DIVERT_IN_PROCESS_UNIT_MIN;
4484 		struct flow_divert_group *group_next = NULL;
4485 		TAILQ_FOREACH(group_next, &g_flow_divert_in_process_group_list, chain) {
4486 			if (group_next->ctl_unit > unit_cursor) {
4487 				// Found a gap, lets fill it in
4488 				break;
4489 			}
4490 			unit_cursor = group_next->ctl_unit + 1;
4491 			if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4492 				break;
4493 			}
4494 		}
4495 		if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4496 			error = EBUSY;
4497 		} else {
4498 			*unit = unit_cursor;
4499 			new_group = flow_divert_allocate_group(*unit, proc_pid(current_proc()));
4500 			if (group_next != NULL) {
4501 				TAILQ_INSERT_BEFORE(group_next, new_group, chain);
4502 			} else {
4503 				TAILQ_INSERT_TAIL(&g_flow_divert_in_process_group_list, new_group, chain);
4504 			}
4505 			g_active_group_count++;
4506 		}
4507 	} else {
4508 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
4509 			error = EPERM;
4510 		} else {
4511 			if (g_flow_divert_groups == NULL) {
4512 				g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4513 				    GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4514 			}
4515 
4516 			// Return next unused group unit
4517 			bool found_unused_unit = false;
4518 			u_int32_t unit_cursor;
4519 			for (unit_cursor = 1; unit_cursor < GROUP_COUNT_MAX; unit_cursor++) {
4520 				struct flow_divert_group *group = g_flow_divert_groups[unit_cursor];
4521 				if (group == NULL) {
4522 					// Open slot, assign this one
4523 					*unit = unit_cursor;
4524 					new_group = flow_divert_allocate_group(*unit, 0);
4525 					g_flow_divert_groups[*unit] = new_group;
4526 					found_unused_unit = true;
4527 					g_active_group_count++;
4528 					break;
4529 				}
4530 			}
4531 			if (!found_unused_unit) {
4532 				error = EBUSY;
4533 			}
4534 		}
4535 	}
4536 	lck_rw_done(&g_flow_divert_group_lck);
4537 
4538 	*unitinfo = new_group;
4539 
4540 	return error;
4541 }
4542 
4543 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4544 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4545 {
4546 	if (unitinfo == NULL) {
4547 		return EINVAL;
4548 	}
4549 
4550 	// Just validate. The group will already have been allocated.
4551 	struct flow_divert_group *group = (struct flow_divert_group *)*unitinfo;
4552 	if (group == NULL || sac->sc_unit != group->ctl_unit) {
4553 		FDLOG(LOG_ERR, &nil_pcb, "Flow divert connect fail, unit mismatch %u != %u",
4554 		    sac->sc_unit, group ? group->ctl_unit : 0);
4555 		return EINVAL;
4556 	}
4557 
4558 	return 0;
4559 }
4560 
4561 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4562 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4563 {
4564 	struct flow_divert_group    *group  = NULL;
4565 	errno_t                     error   = 0;
4566 
4567 	if (unitinfo == NULL) {
4568 		return 0;
4569 	}
4570 
4571 	FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4572 
4573 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4574 
4575 	if (g_active_group_count == 0) {
4576 		panic("flow divert group %u is disconnecting, but no groups are active (active count = %u)",
4577 		    unit, g_active_group_count);
4578 	}
4579 
4580 	if (unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
4581 		if (unit >= GROUP_COUNT_MAX) {
4582 			return EINVAL;
4583 		}
4584 
4585 		if (g_flow_divert_groups == NULL) {
4586 			panic("flow divert group %u is disconnecting, but groups array is NULL",
4587 			    unit);
4588 		}
4589 		group = g_flow_divert_groups[unit];
4590 
4591 		if (group != (struct flow_divert_group *)unitinfo) {
4592 			panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4593 		}
4594 
4595 		g_flow_divert_groups[unit] = NULL;
4596 	} else {
4597 		group = (struct flow_divert_group *)unitinfo;
4598 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
4599 			panic("flow divert group %u is disconnecting, but in-process group list is empty",
4600 			    unit);
4601 		}
4602 
4603 		TAILQ_REMOVE(&g_flow_divert_in_process_group_list, group, chain);
4604 	}
4605 
4606 	g_active_group_count--;
4607 
4608 	if (g_active_group_count == 0) {
4609 		kfree_type(struct flow_divert_group *,
4610 		    GROUP_COUNT_MAX, g_flow_divert_groups);
4611 		g_flow_divert_groups = NULL;
4612 	}
4613 
4614 	lck_rw_done(&g_flow_divert_group_lck);
4615 
4616 	if (group != NULL) {
4617 		flow_divert_close_all(group);
4618 		FDGRP_RELEASE(group);
4619 	} else {
4620 		error = EINVAL;
4621 	}
4622 
4623 	return error;
4624 }
4625 
4626 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_ref_t m,__unused int flags)4627 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_ref_t m, __unused int flags)
4628 {
4629 	errno_t error = 0;
4630 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4631 	if (group != NULL) {
4632 		error = flow_divert_input(m, group);
4633 		FDGRP_RELEASE(group);
4634 	} else {
4635 		error = ENOENT;
4636 	}
4637 	return error;
4638 }
4639 
4640 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4641 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4642 {
4643 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4644 	if (group == NULL) {
4645 		return;
4646 	}
4647 
4648 	if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4649 		struct flow_divert_pcb                  *fd_cb;
4650 		SLIST_HEAD(, flow_divert_pcb)   tmp_list;
4651 
4652 		lck_rw_lock_exclusive(&group->lck);
4653 
4654 		while (!MBUFQ_EMPTY(&group->send_queue)) {
4655 			mbuf_ref_t next_packet;
4656 			FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4657 			next_packet = MBUFQ_FIRST(&group->send_queue);
4658 			int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4659 			if (error) {
4660 				FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_kctl_rcvd: ctl_enqueuembuf returned an error: %d", error);
4661 				OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4662 				lck_rw_done(&group->lck);
4663 				return;
4664 			}
4665 			MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4666 		}
4667 
4668 		SLIST_INIT(&tmp_list);
4669 
4670 		RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4671 			FDRETAIN(fd_cb);
4672 			SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4673 		}
4674 
4675 		lck_rw_done(&group->lck);
4676 
4677 		SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4678 			FDLOCK(fd_cb);
4679 			if (fd_cb->so != NULL) {
4680 				socket_lock(fd_cb->so, 0);
4681 				if (fd_cb->group != NULL) {
4682 					flow_divert_send_buffered_data(fd_cb, FALSE);
4683 				}
4684 				socket_unlock(fd_cb->so, 0);
4685 			}
4686 			FDUNLOCK(fd_cb);
4687 			FDRELEASE(fd_cb);
4688 		}
4689 	}
4690 
4691 	FDGRP_RELEASE(group);
4692 }
4693 
4694 static int
flow_divert_kctl_init(void)4695 flow_divert_kctl_init(void)
4696 {
4697 	struct kern_ctl_reg     ctl_reg;
4698 	int                     result;
4699 
4700 	memset(&ctl_reg, 0, sizeof(ctl_reg));
4701 
4702 	strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4703 	ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4704 
4705 	// Do not restrict to privileged processes. flow_divert_kctl_setup checks
4706 	// permissions separately.
4707 	ctl_reg.ctl_flags = CTL_FLAG_REG_EXTENDED | CTL_FLAG_REG_SETUP;
4708 	ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4709 
4710 	ctl_reg.ctl_connect = flow_divert_kctl_connect;
4711 	ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4712 	ctl_reg.ctl_send = flow_divert_kctl_send;
4713 	ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4714 	ctl_reg.ctl_setup = flow_divert_kctl_setup;
4715 
4716 	result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4717 
4718 	if (result) {
4719 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4720 		return result;
4721 	}
4722 
4723 	return 0;
4724 }
4725 
4726 void
flow_divert_init(void)4727 flow_divert_init(void)
4728 {
4729 	memset(&nil_pcb, 0, sizeof(nil_pcb));
4730 	nil_pcb.log_level = LOG_NOTICE;
4731 
4732 	g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4733 
4734 	VERIFY(g_tcp_protosw != NULL);
4735 
4736 	memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4737 	memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4738 
4739 	g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4740 	g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4741 	g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4742 	g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4743 	g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4744 	g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4745 	g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4746 	g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4747 
4748 	g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4749 	g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4750 
4751 	/*
4752 	 * Socket filters shouldn't attach/detach to/from this protosw
4753 	 * since pr_protosw is to be used instead, which points to the
4754 	 * real protocol; if they do, it is a bug and we should panic.
4755 	 */
4756 	g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4757 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4758 	g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4759 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4760 
4761 	/* UDP */
4762 	g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4763 	VERIFY(g_udp_protosw != NULL);
4764 
4765 	memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4766 	memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4767 
4768 	g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4769 	g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4770 	g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4771 	g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4772 	g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4773 	g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4774 	g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4775 	g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4776 	g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4777 
4778 	g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4779 	g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4780 
4781 	/*
4782 	 * Socket filters shouldn't attach/detach to/from this protosw
4783 	 * since pr_protosw is to be used instead, which points to the
4784 	 * real protocol; if they do, it is a bug and we should panic.
4785 	 */
4786 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4787 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4788 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4789 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4790 
4791 	g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4792 
4793 	VERIFY(g_tcp6_protosw != NULL);
4794 
4795 	memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4796 	memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4797 
4798 	g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4799 	g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4800 	g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4801 	g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4802 	g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4803 	g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4804 	g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4805 	g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4806 
4807 	g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4808 	g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4809 	/*
4810 	 * Socket filters shouldn't attach/detach to/from this protosw
4811 	 * since pr_protosw is to be used instead, which points to the
4812 	 * real protocol; if they do, it is a bug and we should panic.
4813 	 */
4814 	g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4815 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4816 	g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4817 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4818 
4819 	/* UDP6 */
4820 	g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4821 
4822 	VERIFY(g_udp6_protosw != NULL);
4823 
4824 	memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4825 	memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4826 
4827 	g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4828 	g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4829 	g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4830 	g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4831 	g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4832 	g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4833 	g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4834 	g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4835 	g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4836 
4837 	g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4838 	g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4839 	/*
4840 	 * Socket filters shouldn't attach/detach to/from this protosw
4841 	 * since pr_protosw is to be used instead, which points to the
4842 	 * real protocol; if they do, it is a bug and we should panic.
4843 	 */
4844 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4845 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4846 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4847 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4848 
4849 	TAILQ_INIT(&g_flow_divert_in_process_group_list);
4850 
4851 	g_init_result = flow_divert_kctl_init();
4852 	if (g_init_result) {
4853 		goto done;
4854 	}
4855 
4856 done:
4857 	if (g_init_result != 0) {
4858 		if (g_flow_divert_kctl_ref != NULL) {
4859 			ctl_deregister(g_flow_divert_kctl_ref);
4860 			g_flow_divert_kctl_ref = NULL;
4861 		}
4862 	}
4863 }
4864