xref: /xnu-10063.101.15/bsd/netinet/flow_divert.c (revision 94d3b452840153a99b38a3a9659680b2a006908e)
1 /*
2  * Copyright (c) 2012-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <sys/file_internal.h>
46 #include <sys/kauth.h>
47 #include <libkern/tree.h>
48 #include <kern/locks.h>
49 #include <kern/debug.h>
50 #include <kern/task.h>
51 #include <mach/task_info.h>
52 #include <net/if_var.h>
53 #include <net/route.h>
54 #include <net/flowhash.h>
55 #include <net/ntstat.h>
56 #include <net/content_filter.h>
57 #include <net/necp.h>
58 #include <netinet/in.h>
59 #include <netinet/in_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/flow_divert.h>
64 #include <netinet/flow_divert_proto.h>
65 #include <netinet6/in6_pcb.h>
66 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
68 #include <libkern/crypto/sha1.h>
69 #include <libkern/crypto/crypto_internal.h>
70 #include <os/log.h>
71 #include <corecrypto/cc.h>
72 #if CONTENT_FILTER
73 #include <net/content_filter.h>
74 #endif /* CONTENT_FILTER */
75 
76 #define FLOW_DIVERT_CONNECT_STARTED             0x00000001
77 #define FLOW_DIVERT_READ_CLOSED                 0x00000002
78 #define FLOW_DIVERT_WRITE_CLOSED                0x00000004
79 #define FLOW_DIVERT_TUNNEL_RD_CLOSED    0x00000008
80 #define FLOW_DIVERT_TUNNEL_WR_CLOSED    0x00000010
81 #define FLOW_DIVERT_HAS_HMAC            0x00000040
82 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED  0x00000080
83 #define FLOW_DIVERT_IMPLICIT_CONNECT    0x00000100
84 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR  0x00000200
85 #define FLOW_DIVERT_HAS_TOKEN           0x00000400
86 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR 0x00000800
87 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT   0x00001000
88 
89 #define FDLOG(level, pcb, format, ...) \
90 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
91 
92 #define FDLOG0(level, pcb, msg) \
93 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
94 
95 #define FDRETAIN(pcb)                   if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
96 #define FDRELEASE(pcb)                                                                                                          \
97 	do {                                                                                                                                    \
98 	        if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) {       \
99 	                flow_divert_pcb_destroy(pcb);                                                                   \
100 	        }                                                                                                                                       \
101 	} while (0)
102 
103 #define FDGRP_RETAIN(grp)  if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
104 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
105 
106 #define FDLOCK(pcb)                                             lck_mtx_lock(&(pcb)->mtx)
107 #define FDUNLOCK(pcb)                                   lck_mtx_unlock(&(pcb)->mtx)
108 
109 #define FD_CTL_SENDBUFF_SIZE                    (128 * 1024)
110 
111 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED   0
112 
113 #define GROUP_COUNT_MAX                                 31
114 #define FLOW_DIVERT_MAX_NAME_SIZE               4096
115 #define FLOW_DIVERT_MAX_KEY_SIZE                1024
116 #define FLOW_DIVERT_MAX_TRIE_MEMORY             (1024 * 1024)
117 
118 struct flow_divert_trie_node {
119 	uint16_t start;
120 	uint16_t length;
121 	uint16_t child_map;
122 };
123 
124 #define CHILD_MAP_SIZE                  256
125 #define NULL_TRIE_IDX                   0xffff
126 #define TRIE_NODE(t, i)                 ((t)->nodes[(i)])
127 #define TRIE_CHILD(t, i, b)             (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
128 #define TRIE_BYTE(t, i)                 ((t)->bytes[(i)])
129 
130 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
131 
132 static struct flow_divert_pcb           nil_pcb;
133 
134 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
135 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
136 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
137     &flow_divert_mtx_attr);
138 
139 static TAILQ_HEAD(_flow_divert_group_list, flow_divert_group) g_flow_divert_in_process_group_list;
140 
141 static struct flow_divert_group         **g_flow_divert_groups  = NULL;
142 static uint32_t                         g_active_group_count    = 0;
143 
144 static  errno_t                         g_init_result           = 0;
145 
146 static  kern_ctl_ref                    g_flow_divert_kctl_ref  = NULL;
147 
148 static struct protosw                   g_flow_divert_in_protosw;
149 static struct pr_usrreqs                g_flow_divert_in_usrreqs;
150 static struct protosw                   g_flow_divert_in_udp_protosw;
151 static struct pr_usrreqs                g_flow_divert_in_udp_usrreqs;
152 static struct ip6protosw                g_flow_divert_in6_protosw;
153 static struct pr_usrreqs                g_flow_divert_in6_usrreqs;
154 static struct ip6protosw                g_flow_divert_in6_udp_protosw;
155 static struct pr_usrreqs                g_flow_divert_in6_udp_usrreqs;
156 
157 static struct protosw                   *g_tcp_protosw          = NULL;
158 static struct ip6protosw                *g_tcp6_protosw         = NULL;
159 static struct protosw                   *g_udp_protosw          = NULL;
160 static struct ip6protosw                *g_udp6_protosw         = NULL;
161 
162 static KALLOC_TYPE_DEFINE(flow_divert_group_zone, struct flow_divert_group,
163     NET_KT_DEFAULT);
164 static KALLOC_TYPE_DEFINE(flow_divert_pcb_zone, struct flow_divert_pcb,
165     NET_KT_DEFAULT);
166 
167 static errno_t
168 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
169 
170 static boolean_t
171 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
172 
173 static int
174 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr);
175 
176 struct sockaddr *
177 flow_divert_get_buffered_target_address(mbuf_t buffer);
178 
179 static void
180 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed);
181 
182 static void flow_divert_group_destroy(struct flow_divert_group *group);
183 
184 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)185 flow_divert_syslog_type_to_oslog_type(int syslog_type)
186 {
187 	switch (syslog_type) {
188 	case LOG_ERR: return OS_LOG_TYPE_ERROR;
189 	case LOG_INFO: return OS_LOG_TYPE_INFO;
190 	case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
191 	default: return OS_LOG_TYPE_DEFAULT;
192 	}
193 }
194 
195 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)196 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
197 {
198 	return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
199 }
200 
201 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
202 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
203 
204 static const char *
flow_divert_packet_type2str(uint8_t packet_type)205 flow_divert_packet_type2str(uint8_t packet_type)
206 {
207 	switch (packet_type) {
208 	case FLOW_DIVERT_PKT_CONNECT:
209 		return "connect";
210 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
211 		return "connect result";
212 	case FLOW_DIVERT_PKT_DATA:
213 		return "data";
214 	case FLOW_DIVERT_PKT_CLOSE:
215 		return "close";
216 	case FLOW_DIVERT_PKT_READ_NOTIFY:
217 		return "read notification";
218 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
219 		return "properties update";
220 	case FLOW_DIVERT_PKT_APP_MAP_CREATE:
221 		return "app map create";
222 	default:
223 		return "unknown";
224 	}
225 }
226 
227 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)228 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
229 {
230 	struct flow_divert_pcb  key_item;
231 	struct flow_divert_pcb  *fd_cb          = NULL;
232 
233 	key_item.hash = hash;
234 
235 	lck_rw_lock_shared(&group->lck);
236 	fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
237 	FDRETAIN(fd_cb);
238 	lck_rw_done(&group->lck);
239 
240 	return fd_cb;
241 }
242 
243 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)244 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
245 {
246 	struct flow_divert_group *group = NULL;
247 	lck_rw_lock_shared(&g_flow_divert_group_lck);
248 	if (g_active_group_count == 0) {
249 		if (fd_cb != NULL) {
250 			FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
251 		}
252 	} else if (ctl_unit == 0 || (ctl_unit >= GROUP_COUNT_MAX && ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
253 		FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
254 	} else if (ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
255 		if (g_flow_divert_groups == NULL) {
256 			if (fd_cb != NULL) {
257 				FDLOG0(LOG_ERR, fd_cb, "No active non-in-process groups, flow divert cannot be used for this socket");
258 			}
259 		} else {
260 			group = g_flow_divert_groups[ctl_unit];
261 			if (group == NULL) {
262 				if (fd_cb != NULL) {
263 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
264 				}
265 			} else {
266 				FDGRP_RETAIN(group);
267 			}
268 		}
269 	} else {
270 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
271 			if (fd_cb != NULL) {
272 				FDLOG0(LOG_ERR, fd_cb, "No active in-process groups, flow divert cannot be used for this socket");
273 			}
274 		} else {
275 			struct flow_divert_group *group_cursor = NULL;
276 			TAILQ_FOREACH(group_cursor, &g_flow_divert_in_process_group_list, chain) {
277 				if (group_cursor->ctl_unit == ctl_unit) {
278 					group = group_cursor;
279 					break;
280 				}
281 			}
282 			if (group == NULL) {
283 				if (fd_cb != NULL) {
284 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u not found, flow divert cannot be used for this socket", ctl_unit);
285 				}
286 			} else if (fd_cb != NULL &&
287 			    (fd_cb->so == NULL ||
288 			    group_cursor->in_process_pid != fd_cb->so->last_pid)) {
289 				FDLOG(LOG_ERR, fd_cb, "Cannot access group for control unit %u, mismatched PID (%u != %u)",
290 				    ctl_unit, group_cursor->in_process_pid, fd_cb->so ? fd_cb->so->last_pid : 0);
291 				group = NULL;
292 			} else {
293 				FDGRP_RETAIN(group);
294 			}
295 		}
296 	}
297 	lck_rw_done(&g_flow_divert_group_lck);
298 	return group;
299 }
300 
301 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)302 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
303 {
304 	int error = 0;
305 	lck_rw_lock_exclusive(&group->lck);
306 	if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
307 		if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
308 			fd_cb->group = group;
309 			fd_cb->control_group_unit = group->ctl_unit;
310 			FDRETAIN(fd_cb); /* The group now has a reference */
311 		} else {
312 			FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
313 			error = EEXIST;
314 		}
315 	} else {
316 		FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
317 		error = ENOENT;
318 	}
319 	lck_rw_done(&group->lck);
320 	return error;
321 }
322 
323 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)324 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
325 {
326 	errno_t error = 0;
327 	struct flow_divert_group *group = NULL;
328 	static uint32_t g_nextkey = 1;
329 	static uint32_t g_hash_seed = 0;
330 	int try_count = 0;
331 
332 	group = flow_divert_group_lookup(ctl_unit, fd_cb);
333 	if (group == NULL) {
334 		return ENOENT;
335 	}
336 
337 	do {
338 		uint32_t key[2];
339 		uint32_t idx;
340 
341 		key[0] = g_nextkey++;
342 		key[1] = RandomULong();
343 
344 		if (g_hash_seed == 0) {
345 			g_hash_seed = RandomULong();
346 		}
347 
348 		error = 0;
349 		fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
350 
351 		for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
352 			if (idx == ctl_unit) {
353 				continue;
354 			}
355 			struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
356 			if (curr_group != NULL) {
357 				lck_rw_lock_shared(&curr_group->lck);
358 				if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
359 					error = EEXIST;
360 				}
361 				lck_rw_done(&curr_group->lck);
362 				FDGRP_RELEASE(curr_group);
363 			}
364 		}
365 
366 		if (error == 0) {
367 			error = flow_divert_pcb_insert(fd_cb, group);
368 		}
369 	} while (error == EEXIST && try_count++ < 3);
370 
371 	if (error == EEXIST) {
372 		FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
373 		fd_cb->hash = 0;
374 	}
375 
376 	FDGRP_RELEASE(group);
377 	return error;
378 }
379 
380 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)381 flow_divert_pcb_create(socket_t so)
382 {
383 	struct flow_divert_pcb  *new_pcb = NULL;
384 
385 	new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
386 	lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
387 	new_pcb->so = so;
388 	new_pcb->log_level = nil_pcb.log_level;
389 
390 	FDRETAIN(new_pcb);      /* Represents the socket's reference */
391 
392 	return new_pcb;
393 }
394 
395 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)396 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
397 {
398 	FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %llu, tunnel tx %llu, tunnel rx %llu",
399 	    fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
400 
401 	if (fd_cb->connect_token != NULL) {
402 		mbuf_freem(fd_cb->connect_token);
403 	}
404 	if (fd_cb->connect_packet != NULL) {
405 		mbuf_freem(fd_cb->connect_packet);
406 	}
407 	if (fd_cb->app_data != NULL) {
408 		kfree_data(fd_cb->app_data, fd_cb->app_data_length);
409 	}
410 	if (fd_cb->original_remote_endpoint != NULL) {
411 		free_sockaddr(fd_cb->original_remote_endpoint);
412 	}
413 	zfree(flow_divert_pcb_zone, fd_cb);
414 }
415 
416 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)417 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
418 {
419 	if (fd_cb->group != NULL) {
420 		struct flow_divert_group *group = fd_cb->group;
421 		lck_rw_lock_exclusive(&group->lck);
422 		FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
423 		RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
424 		fd_cb->group = NULL;
425 		FDRELEASE(fd_cb);                               /* Release the group's reference */
426 		lck_rw_done(&group->lck);
427 	}
428 }
429 
430 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_t * packet)431 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_t *packet)
432 {
433 	struct flow_divert_packet_header        hdr;
434 	int                                     error           = 0;
435 
436 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
437 	if (error) {
438 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
439 		return error;
440 	}
441 
442 	hdr.packet_type = packet_type;
443 	hdr.conn_id = htonl(fd_cb->hash);
444 
445 	/* Lay down the header */
446 	error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
447 	if (error) {
448 		FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
449 		mbuf_freem(*packet);
450 		*packet = NULL;
451 		return error;
452 	}
453 
454 	return 0;
455 }
456 
457 static int
flow_divert_packet_append_tlv(mbuf_t packet,uint8_t type,uint32_t length,const void * value)458 flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, uint32_t length, const void *value)
459 {
460 	uint32_t        net_length      = htonl(length);
461 	int                     error           = 0;
462 
463 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
464 	if (error) {
465 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
466 		return error;
467 	}
468 
469 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
470 	if (error) {
471 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
472 		return error;
473 	}
474 
475 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
476 	if (error) {
477 		FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
478 		return error;
479 	}
480 
481 	return error;
482 }
483 
484 static int
flow_divert_packet_find_tlv(mbuf_t packet,int offset,uint8_t type,int * err,int next)485 flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, int next)
486 {
487 	size_t          cursor                  = offset;
488 	int                     error                   = 0;
489 	uint32_t        curr_length;
490 	uint8_t         curr_type;
491 
492 	*err = 0;
493 
494 	do {
495 		if (!next) {
496 			error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
497 			if (error) {
498 				*err = ENOENT;
499 				return -1;
500 			}
501 		} else {
502 			next = 0;
503 			curr_type = FLOW_DIVERT_TLV_NIL;
504 		}
505 
506 		if (curr_type != type) {
507 			cursor += sizeof(curr_type);
508 			error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
509 			if (error) {
510 				*err = error;
511 				return -1;
512 			}
513 
514 			cursor += (sizeof(curr_length) + ntohl(curr_length));
515 		}
516 	} while (curr_type != type);
517 
518 	return (int)cursor;
519 }
520 
521 static int
flow_divert_packet_get_tlv(mbuf_t packet,int offset,uint8_t type,size_t buff_len,void * buff,uint32_t * val_size)522 flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, uint32_t *val_size)
523 {
524 	int                     error           = 0;
525 	uint32_t        length;
526 	int                     tlv_offset;
527 
528 	tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
529 	if (tlv_offset < 0) {
530 		return error;
531 	}
532 
533 	error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
534 	if (error) {
535 		return error;
536 	}
537 
538 	length = ntohl(length);
539 
540 	uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
541 
542 	if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
543 		FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
544 		return EINVAL;
545 	}
546 
547 	if (val_size != NULL) {
548 		*val_size = length;
549 	}
550 
551 	if (buff != NULL && buff_len > 0) {
552 		memset(buff, 0, buff_len);
553 		size_t to_copy = (length < buff_len) ? length : buff_len;
554 		error = mbuf_copydata(packet, data_offset, to_copy, buff);
555 		if (error) {
556 			return error;
557 		}
558 	}
559 
560 	return 0;
561 }
562 
563 static int
flow_divert_packet_compute_hmac(mbuf_t packet,struct flow_divert_group * group,uint8_t * hmac)564 flow_divert_packet_compute_hmac(mbuf_t packet, struct flow_divert_group *group, uint8_t *hmac)
565 {
566 	mbuf_t  curr_mbuf       = packet;
567 
568 	if (g_crypto_funcs == NULL || group->token_key == NULL) {
569 		return ENOPROTOOPT;
570 	}
571 
572 	cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
573 	g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
574 
575 	while (curr_mbuf != NULL) {
576 		g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf));
577 		curr_mbuf = mbuf_next(curr_mbuf);
578 	}
579 
580 	g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
581 
582 	return 0;
583 }
584 
585 static int
flow_divert_packet_verify_hmac(mbuf_t packet,uint32_t ctl_unit)586 flow_divert_packet_verify_hmac(mbuf_t packet, uint32_t ctl_unit)
587 {
588 	int error = 0;
589 	struct flow_divert_group *group = NULL;
590 	int hmac_offset;
591 	uint8_t packet_hmac[SHA_DIGEST_LENGTH];
592 	uint8_t computed_hmac[SHA_DIGEST_LENGTH];
593 	mbuf_t tail;
594 
595 	group = flow_divert_group_lookup(ctl_unit, NULL);
596 	if (group == NULL) {
597 		FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
598 		return ENOPROTOOPT;
599 	}
600 
601 	lck_rw_lock_shared(&group->lck);
602 
603 	if (group->token_key == NULL) {
604 		error = ENOPROTOOPT;
605 		goto done;
606 	}
607 
608 	hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
609 	if (hmac_offset < 0) {
610 		goto done;
611 	}
612 
613 	error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
614 	if (error) {
615 		goto done;
616 	}
617 
618 	/* Chop off the HMAC TLV */
619 	error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
620 	if (error) {
621 		goto done;
622 	}
623 
624 	mbuf_free(tail);
625 
626 	error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
627 	if (error) {
628 		goto done;
629 	}
630 
631 	if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
632 		FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
633 		error = EINVAL;
634 		goto done;
635 	}
636 
637 done:
638 	if (group != NULL) {
639 		lck_rw_done(&group->lck);
640 		FDGRP_RELEASE(group);
641 	}
642 	return error;
643 }
644 
645 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)646 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
647 {
648 	struct inpcb *inp = NULL;
649 	struct ifnet *ifp = NULL;
650 	Boolean cell = FALSE;
651 	Boolean wifi = FALSE;
652 	Boolean wired = FALSE;
653 
654 	inp = sotoinpcb(fd_cb->so);
655 	if (inp == NULL) {
656 		return;
657 	}
658 
659 	if (inp->inp_vflag & INP_IPV4) {
660 		ifp = inp->inp_last_outifp;
661 	} else if (inp->inp_vflag & INP_IPV6) {
662 		ifp = inp->in6p_last_outifp;
663 	}
664 	if (ifp != NULL) {
665 		cell = IFNET_IS_CELLULAR(ifp);
666 		wifi = (!cell && IFNET_IS_WIFI(ifp));
667 		wired = (!wifi && IFNET_IS_WIRED(ifp));
668 	}
669 
670 	if (send) {
671 		INP_ADD_STAT(inp, cell, wifi, wired, txpackets, 1);
672 		INP_ADD_STAT(inp, cell, wifi, wired, txbytes, data_len);
673 	} else {
674 		INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1);
675 		INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, data_len);
676 	}
677 	inp_set_activity_bitmap(inp);
678 }
679 
680 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)681 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
682 {
683 	struct inpcb *inp = sotoinpcb(fd_cb->so);
684 	if (INP_NO_CELLULAR(inp)) {
685 		struct ifnet *ifp = NULL;
686 		if (inp->inp_vflag & INP_IPV4) {
687 			ifp = inp->inp_last_outifp;
688 		} else if (inp->inp_vflag & INP_IPV6) {
689 			ifp = inp->in6p_last_outifp;
690 		}
691 		if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
692 			FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
693 			return EHOSTUNREACH;
694 		}
695 	}
696 	return 0;
697 }
698 
699 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)700 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
701 {
702 	struct inpcb *inp = sotoinpcb(fd_cb->so);
703 	if (INP_NO_EXPENSIVE(inp)) {
704 		struct ifnet *ifp = NULL;
705 		if (inp->inp_vflag & INP_IPV4) {
706 			ifp = inp->inp_last_outifp;
707 		} else if (inp->inp_vflag & INP_IPV6) {
708 			ifp = inp->in6p_last_outifp;
709 		}
710 		if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
711 			FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
712 			return EHOSTUNREACH;
713 		}
714 	}
715 	return 0;
716 }
717 
718 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)719 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
720 {
721 	struct inpcb *inp = sotoinpcb(fd_cb->so);
722 	if (INP_NO_CONSTRAINED(inp)) {
723 		struct ifnet *ifp = NULL;
724 		if (inp->inp_vflag & INP_IPV4) {
725 			ifp = inp->inp_last_outifp;
726 		} else if (inp->inp_vflag & INP_IPV6) {
727 			ifp = inp->in6p_last_outifp;
728 		}
729 		if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
730 			FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
731 			return EHOSTUNREACH;
732 		}
733 	}
734 	return 0;
735 }
736 
737 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,bool tunnel,bool flush_snd)738 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, bool tunnel, bool flush_snd)
739 {
740 	if (how != SHUT_RD) {
741 		fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
742 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
743 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
744 			if (flush_snd) {
745 				/* If the tunnel is not accepting writes any more, then flush the send buffer */
746 				sbflush(&fd_cb->so->so_snd);
747 			}
748 		}
749 	}
750 	if (how != SHUT_WR) {
751 		fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
752 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
753 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
754 		}
755 	}
756 }
757 
758 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)759 trie_node_alloc(struct flow_divert_trie *trie)
760 {
761 	if (trie->nodes_free_next < trie->nodes_count) {
762 		uint16_t node_idx = trie->nodes_free_next++;
763 		TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
764 		return node_idx;
765 	} else {
766 		return NULL_TRIE_IDX;
767 	}
768 }
769 
770 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)771 trie_child_map_alloc(struct flow_divert_trie *trie)
772 {
773 	if (trie->child_maps_free_next < trie->child_maps_count) {
774 		return trie->child_maps_free_next++;
775 	} else {
776 		return NULL_TRIE_IDX;
777 	}
778 }
779 
780 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)781 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
782 {
783 	uint16_t start = trie->bytes_free_next;
784 	if (start + bytes_size <= trie->bytes_count) {
785 		if (start != bytes_idx) {
786 			memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
787 		}
788 		trie->bytes_free_next += bytes_size;
789 		return start;
790 	} else {
791 		return NULL_TRIE_IDX;
792 	}
793 }
794 
795 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)796 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
797 {
798 	uint16_t current = trie->root;
799 	uint16_t child = trie->root;
800 	uint16_t string_end = string_start + (uint16_t)string_len;
801 	uint16_t string_idx = string_start;
802 	uint16_t string_remainder = (uint16_t)string_len;
803 
804 	while (child != NULL_TRIE_IDX) {
805 		uint16_t parent = current;
806 		uint16_t node_idx;
807 		uint16_t current_end;
808 
809 		current = child;
810 		child = NULL_TRIE_IDX;
811 
812 		current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
813 
814 		for (node_idx = TRIE_NODE(trie, current).start;
815 		    node_idx < current_end &&
816 		    string_idx < string_end &&
817 		    TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
818 		    node_idx++, string_idx++) {
819 			;
820 		}
821 
822 		string_remainder = string_end - string_idx;
823 
824 		if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
825 			/*
826 			 * We did not reach the end of the current node's string.
827 			 * We need to split the current node into two:
828 			 *   1. A new node that contains the prefix of the node that matches
829 			 *      the prefix of the string being inserted.
830 			 *   2. The current node modified to point to the remainder
831 			 *      of the current node's string.
832 			 */
833 			uint16_t prefix = trie_node_alloc(trie);
834 			if (prefix == NULL_TRIE_IDX) {
835 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
836 				return NULL_TRIE_IDX;
837 			}
838 
839 			/*
840 			 * Prefix points to the portion of the current nodes's string that has matched
841 			 * the input string thus far.
842 			 */
843 			TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
844 			TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
845 
846 			/*
847 			 * Prefix has the current node as the child corresponding to the first byte
848 			 * after the split.
849 			 */
850 			TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
851 			if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
852 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
853 				return NULL_TRIE_IDX;
854 			}
855 			TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
856 
857 			/* Parent has the prefix as the child correspoding to the first byte in the prefix */
858 			TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
859 
860 			/* Current node is adjusted to point to the remainder */
861 			TRIE_NODE(trie, current).start = node_idx;
862 			TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
863 
864 			/* We want to insert the new leaf (if any) as a child of the prefix */
865 			current = prefix;
866 		}
867 
868 		if (string_remainder > 0) {
869 			/*
870 			 * We still have bytes in the string that have not been matched yet.
871 			 * If the current node has children, iterate to the child corresponding
872 			 * to the next byte in the string.
873 			 */
874 			if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
875 				child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
876 			}
877 		}
878 	} /* while (child != NULL_TRIE_IDX) */
879 
880 	if (string_remainder > 0) {
881 		/* Add a new leaf containing the remainder of the string */
882 		uint16_t leaf = trie_node_alloc(trie);
883 		if (leaf == NULL_TRIE_IDX) {
884 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
885 			return NULL_TRIE_IDX;
886 		}
887 
888 		TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
889 		if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
890 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
891 			return NULL_TRIE_IDX;
892 		}
893 		TRIE_NODE(trie, leaf).length = string_remainder;
894 
895 		/* Set the new leaf as the child of the current node */
896 		if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
897 			TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
898 			if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
899 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
900 				return NULL_TRIE_IDX;
901 			}
902 		}
903 		TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
904 		current = leaf;
905 	} /* else duplicate or this string is a prefix of one of the existing strings */
906 
907 	return current;
908 }
909 
910 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
911 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes)912 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes)
913 {
914 	uint16_t current = trie->root;
915 	uint16_t string_idx = 0;
916 
917 	while (current != NULL_TRIE_IDX) {
918 		uint16_t next = NULL_TRIE_IDX;
919 		uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
920 		uint16_t node_idx;
921 
922 		for (node_idx = TRIE_NODE(trie, current).start;
923 		    node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
924 		    node_idx++, string_idx++) {
925 			;
926 		}
927 
928 		if (node_idx == node_end) {
929 			if (string_bytes[string_idx] == '\0') {
930 				return current; /* Got an exact match */
931 			} else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
932 			    0 == strncmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
933 				return current; /* Got an apple webclip id prefix match */
934 			} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
935 				next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
936 			}
937 		}
938 		current = next;
939 	}
940 
941 	return NULL_TRIE_IDX;
942 }
943 
944 struct uuid_search_info {
945 	uuid_t target_uuid;
946 	char *found_signing_id;
947 	boolean_t found_multiple_signing_ids;
948 	proc_t found_proc;
949 };
950 
951 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)952 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
953 {
954 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
955 	int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
956 
957 	if (info->found_signing_id != NULL) {
958 		if (!info->found_multiple_signing_ids) {
959 			/* All processes that were found had the same signing identifier, so just claim this first one and be done. */
960 			info->found_proc = p;
961 			result = PROC_CLAIMED_DONE;
962 		} else {
963 			uuid_string_t uuid_str;
964 			uuid_unparse(info->target_uuid, uuid_str);
965 			FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
966 		}
967 		kfree_data(info->found_signing_id, strlen(info->found_signing_id) + 1);
968 		info->found_signing_id = NULL;
969 	}
970 
971 	if (result == PROC_RETURNED_DONE) {
972 		uuid_string_t uuid_str;
973 		uuid_unparse(info->target_uuid, uuid_str);
974 		FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
975 	}
976 
977 	return result;
978 }
979 
980 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)981 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
982 {
983 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
984 	int include = 0;
985 
986 	if (info->found_multiple_signing_ids) {
987 		return include;
988 	}
989 
990 	include = (uuid_compare(proc_executableuuid_addr(p), info->target_uuid) == 0);
991 	if (include) {
992 		const char *signing_id = cs_identity_get(p);
993 		if (signing_id != NULL) {
994 			FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
995 			size_t signing_id_size = strlen(signing_id) + 1;
996 			if (info->found_signing_id == NULL) {
997 				info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
998 				memcpy(info->found_signing_id, signing_id, signing_id_size);
999 			} else if (memcmp(signing_id, info->found_signing_id, signing_id_size)) {
1000 				info->found_multiple_signing_ids = TRUE;
1001 			}
1002 		} else {
1003 			info->found_multiple_signing_ids = TRUE;
1004 		}
1005 		include = !info->found_multiple_signing_ids;
1006 	}
1007 
1008 	return include;
1009 }
1010 
1011 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)1012 flow_divert_find_proc_by_uuid(uuid_t uuid)
1013 {
1014 	struct uuid_search_info info;
1015 
1016 	if (LOG_INFO <= nil_pcb.log_level) {
1017 		uuid_string_t uuid_str;
1018 		uuid_unparse(uuid, uuid_str);
1019 		FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
1020 	}
1021 
1022 	memset(&info, 0, sizeof(info));
1023 	info.found_proc = PROC_NULL;
1024 	uuid_copy(info.target_uuid, uuid);
1025 
1026 	proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
1027 
1028 	return info.found_proc;
1029 }
1030 
1031 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id,mbuf_t connect_packet,bool is_effective)1032 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id, mbuf_t connect_packet, bool is_effective)
1033 {
1034 	int error = 0;
1035 	uint8_t *cdhash = NULL;
1036 	audit_token_t audit_token = {};
1037 	const char *proc_cs_id = signing_id;
1038 
1039 	proc_lock(proc);
1040 
1041 	if (proc_cs_id == NULL) {
1042 		if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1043 			proc_cs_id = cs_identity_get(proc);
1044 		} else {
1045 			FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1046 		}
1047 	}
1048 
1049 	if (is_effective) {
1050 		lck_rw_lock_shared(&fd_cb->group->lck);
1051 		if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1052 			if (proc_cs_id != NULL) {
1053 				uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)proc_cs_id);
1054 				if (result == NULL_TRIE_IDX) {
1055 					FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1056 					error = EPERM;
1057 				} else {
1058 					FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1059 				}
1060 			} else {
1061 				error = EPERM;
1062 			}
1063 		}
1064 		lck_rw_done(&fd_cb->group->lck);
1065 	}
1066 
1067 	if (error != 0) {
1068 		goto done;
1069 	}
1070 
1071 	/*
1072 	 * If signing_id is not NULL then it came from the flow divert token and will be added
1073 	 * as part of the token, so there is no need to add it here.
1074 	 */
1075 	if (signing_id == NULL && proc_cs_id != NULL) {
1076 		error = flow_divert_packet_append_tlv(connect_packet,
1077 		    (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1078 		    (uint32_t)strlen(proc_cs_id),
1079 		    proc_cs_id);
1080 		if (error != 0) {
1081 			FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1082 			goto done;
1083 		}
1084 	}
1085 
1086 	cdhash = cs_get_cdhash(proc);
1087 	if (cdhash != NULL) {
1088 		error = flow_divert_packet_append_tlv(connect_packet,
1089 		    (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1090 		    SHA1_RESULTLEN,
1091 		    cdhash);
1092 		if (error) {
1093 			FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1094 			goto done;
1095 		}
1096 	} else {
1097 		FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1098 	}
1099 
1100 	task_t task = proc_task(proc);
1101 	if (task != TASK_NULL) {
1102 		mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1103 		kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1104 		if (rc == KERN_SUCCESS) {
1105 			int append_error = flow_divert_packet_append_tlv(connect_packet,
1106 			    (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1107 			    sizeof(audit_token_t),
1108 			    &audit_token);
1109 			if (append_error) {
1110 				FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1111 			}
1112 		}
1113 	}
1114 
1115 done:
1116 	proc_unlock(proc);
1117 
1118 	return error;
1119 }
1120 
1121 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id,mbuf_t connect_packet)1122 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id, mbuf_t connect_packet)
1123 {
1124 	int error = 0;
1125 	proc_t effective_proc = PROC_NULL;
1126 	proc_t responsible_proc = PROC_NULL;
1127 	proc_t real_proc = proc_find(so->last_pid);
1128 	bool release_real_proc = true;
1129 
1130 	proc_t src_proc = PROC_NULL;
1131 	proc_t real_src_proc = PROC_NULL;
1132 
1133 	if (real_proc == PROC_NULL) {
1134 		FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1135 		release_real_proc = false;
1136 		real_proc = proc;
1137 		if (real_proc == PROC_NULL) {
1138 			real_proc = current_proc();
1139 		}
1140 	}
1141 
1142 	if (so->so_flags & SOF_DELEGATED) {
1143 		if (proc_getpid(real_proc) != so->e_pid) {
1144 			effective_proc = proc_find(so->e_pid);
1145 		} else if (uuid_compare(proc_executableuuid_addr(real_proc), so->e_uuid)) {
1146 			effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1147 		}
1148 	}
1149 
1150 #if defined(XNU_TARGET_OS_OSX)
1151 	lck_rw_lock_shared(&fd_cb->group->lck);
1152 	if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1153 		if (so->so_rpid > 0) {
1154 			responsible_proc = proc_find(so->so_rpid);
1155 		}
1156 	}
1157 	lck_rw_done(&fd_cb->group->lck);
1158 #endif
1159 
1160 	real_src_proc = real_proc;
1161 
1162 	if (responsible_proc != PROC_NULL) {
1163 		src_proc = responsible_proc;
1164 		if (effective_proc != NULL) {
1165 			real_src_proc = effective_proc;
1166 		}
1167 	} else if (effective_proc != PROC_NULL) {
1168 		src_proc = effective_proc;
1169 	} else {
1170 		src_proc = real_proc;
1171 	}
1172 
1173 	error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1174 	if (error != 0) {
1175 		goto done;
1176 	}
1177 
1178 	if (real_src_proc != NULL && real_src_proc != src_proc) {
1179 		error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1180 		if (error != 0) {
1181 			goto done;
1182 		}
1183 	}
1184 
1185 done:
1186 	if (responsible_proc != PROC_NULL) {
1187 		proc_rele(responsible_proc);
1188 	}
1189 
1190 	if (effective_proc != PROC_NULL) {
1191 		proc_rele(effective_proc);
1192 	}
1193 
1194 	if (real_proc != PROC_NULL && release_real_proc) {
1195 		proc_rele(real_proc);
1196 	}
1197 
1198 	return error;
1199 }
1200 
1201 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_t packet)1202 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet)
1203 {
1204 	int             error;
1205 
1206 	if (fd_cb->group == NULL) {
1207 		FDLOG0(LOG_INFO, fd_cb, "no provider, cannot send packet");
1208 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, false);
1209 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1210 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1211 			error = ECONNABORTED;
1212 		} else {
1213 			error = EHOSTUNREACH;
1214 		}
1215 		fd_cb->so->so_error = (uint16_t)error;
1216 		return error;
1217 	}
1218 
1219 	lck_rw_lock_shared(&fd_cb->group->lck);
1220 
1221 	if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1222 		error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1223 		if (error) {
1224 			FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_send_packet: ctl_enqueuembuf returned an error: %d", error);
1225 		}
1226 	} else {
1227 		error = ENOBUFS;
1228 	}
1229 
1230 	if (error == ENOBUFS) {
1231 		if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1232 			lck_rw_lock_exclusive(&fd_cb->group->lck);
1233 		}
1234 		MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1235 		error = 0;
1236 		OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1237 	}
1238 
1239 	lck_rw_done(&fd_cb->group->lck);
1240 
1241 	return error;
1242 }
1243 
1244 static void
flow_divert_append_domain_name(char * domain_name,void * ctx)1245 flow_divert_append_domain_name(char *domain_name, void *ctx)
1246 {
1247 	mbuf_t packet = (mbuf_t)ctx;
1248 	size_t domain_name_length = 0;
1249 
1250 	if (packet == NULL || domain_name == NULL) {
1251 		return;
1252 	}
1253 
1254 	domain_name_length = strlen(domain_name);
1255 	if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1256 		int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, domain_name);
1257 		if (error) {
1258 			FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1259 		}
1260 	}
1261 }
1262 
1263 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_t * out_connect_packet)1264 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_t *out_connect_packet)
1265 {
1266 	int                     error                   = 0;
1267 	int                     flow_type               = 0;
1268 	char                    *signing_id = NULL;
1269 	uint32_t                sid_size = 0;
1270 	mbuf_t                  connect_packet = NULL;
1271 	cfil_sock_id_t          cfil_sock_id            = CFIL_SOCK_ID_NONE;
1272 	const void              *cfil_id                = NULL;
1273 	size_t                  cfil_id_size            = 0;
1274 	struct inpcb            *inp = sotoinpcb(so);
1275 	struct ifnet *ifp = NULL;
1276 	uint32_t flags = 0;
1277 
1278 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1279 	if (error) {
1280 		goto done;
1281 	}
1282 
1283 	if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1284 		int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1285 		if (find_error == 0 && sid_size > 0) {
1286 			signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1287 			if (signing_id != NULL) {
1288 				flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1289 				FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1290 			}
1291 		}
1292 	}
1293 
1294 	error = flow_divert_add_all_proc_info(fd_cb, so, p, signing_id, connect_packet);
1295 
1296 	if (signing_id != NULL) {
1297 		kfree_data(signing_id, sid_size + 1);
1298 	}
1299 
1300 	if (error) {
1301 		FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1302 		goto done;
1303 	}
1304 
1305 	error = flow_divert_packet_append_tlv(connect_packet,
1306 	    FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1307 	    sizeof(fd_cb->so->so_traffic_class),
1308 	    &fd_cb->so->so_traffic_class);
1309 	if (error) {
1310 		goto done;
1311 	}
1312 
1313 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1314 		flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1315 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1316 		flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1317 	} else {
1318 		error = EINVAL;
1319 		goto done;
1320 	}
1321 	error = flow_divert_packet_append_tlv(connect_packet,
1322 	    FLOW_DIVERT_TLV_FLOW_TYPE,
1323 	    sizeof(flow_type),
1324 	    &flow_type);
1325 
1326 	if (error) {
1327 		goto done;
1328 	}
1329 
1330 	if (fd_cb->connect_token != NULL) {
1331 		unsigned int token_len = m_length(fd_cb->connect_token);
1332 		mbuf_concatenate(connect_packet, fd_cb->connect_token);
1333 		mbuf_pkthdr_adjustlen(connect_packet, token_len);
1334 		fd_cb->connect_token = NULL;
1335 	} else {
1336 		error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1337 		if (error) {
1338 			goto done;
1339 		}
1340 
1341 		necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1342 	}
1343 
1344 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1345 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1346 		if (error) {
1347 			goto done;
1348 		}
1349 	}
1350 
1351 	if (inp->inp_vflag & INP_IPV4) {
1352 		ifp = inp->inp_last_outifp;
1353 	} else if (inp->inp_vflag & INP_IPV6) {
1354 		ifp = inp->in6p_last_outifp;
1355 	}
1356 	if (ifp != NULL) {
1357 		uint32_t flow_if_index = ifp->if_index;
1358 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1359 		    sizeof(flow_if_index), &flow_if_index);
1360 		if (error) {
1361 			goto done;
1362 		}
1363 	}
1364 
1365 	if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1366 		flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1367 	}
1368 
1369 	if ((inp->inp_flags & INP_BOUND_IF) ||
1370 	    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1371 	    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1372 		flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1373 	}
1374 
1375 	if (flags != 0) {
1376 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1377 		if (error) {
1378 			goto done;
1379 		}
1380 	}
1381 
1382 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
1383 		cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1384 	} else {
1385 		cfil_sock_id = cfil_sock_id_from_socket(so);
1386 	}
1387 
1388 	if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1389 		cfil_id = &cfil_sock_id;
1390 		cfil_id_size = sizeof(cfil_sock_id);
1391 	} else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1392 		cfil_id = &inp->necp_client_uuid;
1393 		cfil_id_size = sizeof(inp->necp_client_uuid);
1394 	}
1395 
1396 	if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1397 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1398 		if (error) {
1399 			goto done;
1400 		}
1401 	}
1402 
1403 done:
1404 	if (!error) {
1405 		*out_connect_packet = connect_packet;
1406 	} else if (connect_packet != NULL) {
1407 		mbuf_freem(connect_packet);
1408 	}
1409 
1410 	return error;
1411 }
1412 
1413 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1414 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1415 {
1416 	int error = 0;
1417 	mbuf_t connect_packet = fd_cb->connect_packet;
1418 	mbuf_t saved_connect_packet = NULL;
1419 
1420 	if (connect_packet != NULL) {
1421 		error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1422 		if (error) {
1423 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1424 			goto done;
1425 		}
1426 
1427 		error = flow_divert_send_packet(fd_cb, connect_packet);
1428 		if (error) {
1429 			goto done;
1430 		}
1431 
1432 		fd_cb->connect_packet = saved_connect_packet;
1433 		saved_connect_packet = NULL;
1434 	} else {
1435 		error = ENOENT;
1436 	}
1437 done:
1438 	if (saved_connect_packet != NULL) {
1439 		mbuf_freem(saved_connect_packet);
1440 	}
1441 
1442 	return error;
1443 }
1444 
1445 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1446 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1447 {
1448 	int             error                   = 0;
1449 	mbuf_t  packet                  = NULL;
1450 	int             rbuff_space             = 0;
1451 
1452 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1453 	if (error) {
1454 		FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1455 		goto done;
1456 	}
1457 
1458 	rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1459 	if (rbuff_space < 0) {
1460 		rbuff_space = 0;
1461 	}
1462 	rbuff_space = htonl(rbuff_space);
1463 	error = flow_divert_packet_append_tlv(packet,
1464 	    FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1465 	    sizeof(rbuff_space),
1466 	    &rbuff_space);
1467 	if (error) {
1468 		goto done;
1469 	}
1470 
1471 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1472 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1473 		if (error) {
1474 			goto done;
1475 		}
1476 	}
1477 
1478 	error = flow_divert_send_packet(fd_cb, packet);
1479 	if (error) {
1480 		goto done;
1481 	}
1482 
1483 done:
1484 	if (error && packet != NULL) {
1485 		mbuf_freem(packet);
1486 	}
1487 
1488 	return error;
1489 }
1490 
1491 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1492 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1493 {
1494 	int             error   = 0;
1495 	mbuf_t  packet  = NULL;
1496 	uint32_t        zero    = 0;
1497 
1498 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1499 	if (error) {
1500 		FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1501 		goto done;
1502 	}
1503 
1504 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1505 	if (error) {
1506 		FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1507 		goto done;
1508 	}
1509 
1510 	how = htonl(how);
1511 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1512 	if (error) {
1513 		FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1514 		goto done;
1515 	}
1516 
1517 	error = flow_divert_send_packet(fd_cb, packet);
1518 	if (error) {
1519 		goto done;
1520 	}
1521 
1522 done:
1523 	if (error && packet != NULL) {
1524 		mbuf_free(packet);
1525 	}
1526 
1527 	return error;
1528 }
1529 
1530 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1531 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1532 {
1533 	if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1534 	    (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1535 		return SHUT_RDWR;
1536 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1537 		return SHUT_RD;
1538 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1539 		return SHUT_WR;
1540 	}
1541 
1542 	return -1;
1543 }
1544 
1545 /*
1546  * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1547  * writes. Returns FALSE otherwise.
1548  */
1549 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1550 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1551 {
1552 	int             how             = -1;
1553 
1554 	/* Do not send any close messages if there is still data in the send buffer */
1555 	if (fd_cb->so->so_snd.sb_cc == 0) {
1556 		if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1557 			/* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1558 			how = SHUT_RD;
1559 		}
1560 		if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1561 			/* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1562 			if (how == SHUT_RD) {
1563 				how = SHUT_RDWR;
1564 			} else {
1565 				how = SHUT_WR;
1566 			}
1567 		}
1568 	}
1569 
1570 	if (how != -1) {
1571 		FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1572 		if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1573 			/* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1574 			if (how != SHUT_RD) {
1575 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1576 			}
1577 			if (how != SHUT_WR) {
1578 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1579 			}
1580 		}
1581 	}
1582 
1583 	if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
1584 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1585 	}
1586 }
1587 
1588 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_t data,size_t data_len)1589 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len)
1590 {
1591 	mbuf_t packet = NULL;
1592 	mbuf_t last = NULL;
1593 	int error = 0;
1594 
1595 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1596 	if (error || packet == NULL) {
1597 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1598 		goto done;
1599 	}
1600 
1601 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1602 		last = m_last(packet);
1603 		mbuf_setnext(last, data);
1604 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1605 	} else {
1606 		data_len = 0;
1607 	}
1608 	error = flow_divert_send_packet(fd_cb, packet);
1609 	if (error == 0 && data_len > 0) {
1610 		fd_cb->bytes_sent += data_len;
1611 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1612 	}
1613 
1614 done:
1615 	if (error) {
1616 		if (last != NULL) {
1617 			mbuf_setnext(last, NULL);
1618 		}
1619 		if (packet != NULL) {
1620 			mbuf_freem(packet);
1621 		}
1622 	}
1623 
1624 	return error;
1625 }
1626 
1627 static errno_t
flow_divert_send_datagram_packet(struct flow_divert_pcb * fd_cb,mbuf_t data,size_t data_len,struct sockaddr * toaddr,Boolean is_fragment,size_t datagram_size)1628 flow_divert_send_datagram_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, struct sockaddr *toaddr, Boolean is_fragment, size_t datagram_size)
1629 {
1630 	mbuf_t packet = NULL;
1631 	mbuf_t last = NULL;
1632 	int error = 0;
1633 
1634 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1635 	if (error || packet == NULL) {
1636 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1637 		goto done;
1638 	}
1639 
1640 	if (toaddr != NULL) {
1641 		error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1642 		if (error) {
1643 			FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1644 			goto done;
1645 		}
1646 	}
1647 	if (is_fragment) {
1648 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_IS_FRAGMENT, sizeof(is_fragment), &is_fragment);
1649 		if (error) {
1650 			FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_IS_FRAGMENT) failed: %d", error);
1651 			goto done;
1652 		}
1653 	}
1654 
1655 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_DATAGRAM_SIZE, sizeof(datagram_size), &datagram_size);
1656 	if (error) {
1657 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_DATAGRAM_SIZE) failed: %d", error);
1658 		goto done;
1659 	}
1660 
1661 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1662 		last = m_last(packet);
1663 		mbuf_setnext(last, data);
1664 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1665 	} else {
1666 		data_len = 0;
1667 	}
1668 	error = flow_divert_send_packet(fd_cb, packet);
1669 	if (error == 0 && data_len > 0) {
1670 		fd_cb->bytes_sent += data_len;
1671 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1672 	}
1673 
1674 done:
1675 	if (error) {
1676 		if (last != NULL) {
1677 			mbuf_setnext(last, NULL);
1678 		}
1679 		if (packet != NULL) {
1680 			mbuf_freem(packet);
1681 		}
1682 	}
1683 
1684 	return error;
1685 }
1686 
1687 static errno_t
flow_divert_send_fragmented_datagram(struct flow_divert_pcb * fd_cb,mbuf_t datagram,size_t datagram_len,struct sockaddr * toaddr)1688 flow_divert_send_fragmented_datagram(struct flow_divert_pcb *fd_cb, mbuf_t datagram, size_t datagram_len, struct sockaddr *toaddr)
1689 {
1690 	mbuf_t next_data = datagram;
1691 	size_t remaining_len = datagram_len;
1692 	mbuf_t remaining_data = NULL;
1693 	int error = 0;
1694 	bool first = true;
1695 
1696 	while (remaining_len > 0 && next_data != NULL) {
1697 		size_t to_send = remaining_len;
1698 		remaining_data = NULL;
1699 
1700 		if (to_send > FLOW_DIVERT_CHUNK_SIZE) {
1701 			to_send = FLOW_DIVERT_CHUNK_SIZE;
1702 			error = mbuf_split(next_data, to_send, MBUF_DONTWAIT, &remaining_data);
1703 			if (error) {
1704 				break;
1705 			}
1706 		}
1707 
1708 		error = flow_divert_send_datagram_packet(fd_cb, next_data, to_send, (first ? toaddr : NULL), TRUE, (first ? datagram_len : 0));
1709 		if (error) {
1710 			break;
1711 		}
1712 
1713 		first = false;
1714 		remaining_len -= to_send;
1715 		next_data = remaining_data;
1716 	}
1717 
1718 	if (error) {
1719 		if (next_data != NULL) {
1720 			mbuf_freem(next_data);
1721 		}
1722 		if (remaining_data != NULL) {
1723 			mbuf_freem(remaining_data);
1724 		}
1725 	}
1726 	return error;
1727 }
1728 
1729 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1730 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1731 {
1732 	size_t  to_send;
1733 	size_t  sent    = 0;
1734 	int             error   = 0;
1735 	mbuf_t  buffer;
1736 
1737 	to_send = fd_cb->so->so_snd.sb_cc;
1738 	buffer = fd_cb->so->so_snd.sb_mb;
1739 
1740 	if (buffer == NULL && to_send > 0) {
1741 		FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1742 		return;
1743 	}
1744 
1745 	/* Ignore the send window if force is enabled */
1746 	if (!force && (to_send > fd_cb->send_window)) {
1747 		to_send = fd_cb->send_window;
1748 	}
1749 
1750 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1751 		while (sent < to_send) {
1752 			mbuf_t  data;
1753 			size_t  data_len;
1754 
1755 			data_len = to_send - sent;
1756 			if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1757 				data_len = FLOW_DIVERT_CHUNK_SIZE;
1758 			}
1759 
1760 			error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1761 			if (error) {
1762 				FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1763 				break;
1764 			}
1765 
1766 			error = flow_divert_send_data_packet(fd_cb, data, data_len);
1767 			if (error) {
1768 				if (data != NULL) {
1769 					mbuf_freem(data);
1770 				}
1771 				break;
1772 			}
1773 
1774 			sent += data_len;
1775 		}
1776 		sbdrop(&fd_cb->so->so_snd, (int)sent);
1777 		sowwakeup(fd_cb->so);
1778 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1779 		mbuf_t data;
1780 		mbuf_t m;
1781 		size_t data_len;
1782 
1783 		while (buffer) {
1784 			struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1785 
1786 			m = buffer;
1787 			if (toaddr != NULL) {
1788 				/* look for data in the chain */
1789 				do {
1790 					m = m->m_next;
1791 					if (m != NULL && m->m_type == MT_DATA) {
1792 						break;
1793 					}
1794 				} while (m);
1795 				if (m == NULL) {
1796 					/* unexpected */
1797 					FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1798 					goto move_on;
1799 				}
1800 			}
1801 			data_len = mbuf_pkthdr_len(m);
1802 			if (data_len > 0) {
1803 				FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1804 				error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1805 				if (error) {
1806 					FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1807 					break;
1808 				}
1809 			} else {
1810 				data = NULL;
1811 			}
1812 			if (data_len <= FLOW_DIVERT_CHUNK_SIZE) {
1813 				error = flow_divert_send_datagram_packet(fd_cb, data, data_len, toaddr, FALSE, 0);
1814 			} else {
1815 				error = flow_divert_send_fragmented_datagram(fd_cb, data, data_len, toaddr);
1816 				data = NULL;
1817 			}
1818 			if (error) {
1819 				if (data != NULL) {
1820 					mbuf_freem(data);
1821 				}
1822 				break;
1823 			}
1824 			sent += data_len;
1825 move_on:
1826 			buffer = buffer->m_nextpkt;
1827 			(void) sbdroprecord(&(fd_cb->so->so_snd));
1828 		}
1829 	}
1830 
1831 	if (sent > 0) {
1832 		FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1833 		if (fd_cb->send_window >= sent) {
1834 			fd_cb->send_window -= sent;
1835 		} else {
1836 			fd_cb->send_window = 0;
1837 		}
1838 	}
1839 }
1840 
1841 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_t data,size_t data_size,struct sockaddr * toaddr)1842 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_size, struct sockaddr *toaddr)
1843 {
1844 	size_t to_send = data_size;
1845 	int error = 0;
1846 
1847 	if (to_send > fd_cb->send_window) {
1848 		to_send = fd_cb->send_window;
1849 	}
1850 
1851 	if (fd_cb->so->so_snd.sb_cc > 0) {
1852 		to_send = 0;    /* If the send buffer is non-empty, then we can't send anything */
1853 	}
1854 
1855 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1856 		size_t sent = 0;
1857 		mbuf_t remaining_data = data;
1858 		size_t remaining_size = data_size;
1859 		mbuf_t pkt_data = NULL;
1860 
1861 		while (sent < to_send && remaining_data != NULL && remaining_size > 0) {
1862 			size_t  pkt_data_len;
1863 
1864 			pkt_data = remaining_data;
1865 
1866 			if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1867 				pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1868 			} else {
1869 				pkt_data_len = to_send - sent;
1870 			}
1871 
1872 			if (pkt_data_len < remaining_size) {
1873 				error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1874 				if (error) {
1875 					FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1876 					pkt_data = NULL;
1877 					break;
1878 				}
1879 				remaining_size -= pkt_data_len;
1880 			} else {
1881 				remaining_data = NULL;
1882 				remaining_size = 0;
1883 			}
1884 
1885 			error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len);
1886 			if (error) {
1887 				break;
1888 			}
1889 
1890 			pkt_data = NULL;
1891 			sent += pkt_data_len;
1892 		}
1893 
1894 		if (fd_cb->send_window >= sent) {
1895 			fd_cb->send_window -= sent;
1896 		} else {
1897 			fd_cb->send_window = 0;
1898 		}
1899 
1900 		error = 0;
1901 
1902 		if (pkt_data != NULL) {
1903 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1904 				if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1905 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1906 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1907 				}
1908 			} else {
1909 				mbuf_freem(pkt_data);
1910 				error = ENOBUFS;
1911 			}
1912 		}
1913 
1914 		if (remaining_data != NULL) {
1915 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1916 				if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1917 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1918 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1919 				}
1920 			} else {
1921 				mbuf_freem(remaining_data);
1922 				error = ENOBUFS;
1923 			}
1924 		}
1925 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1926 		int send_dgram_error = 0;
1927 		if (to_send || data_size == 0) {
1928 			if (data_size <= FLOW_DIVERT_CHUNK_SIZE) {
1929 				send_dgram_error = flow_divert_send_datagram_packet(fd_cb, data, data_size, toaddr, FALSE, 0);
1930 			} else {
1931 				send_dgram_error = flow_divert_send_fragmented_datagram(fd_cb, data, data_size, toaddr);
1932 				data = NULL;
1933 			}
1934 			if (send_dgram_error) {
1935 				FDLOG(LOG_NOTICE, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu", send_dgram_error, data_size);
1936 			} else {
1937 				if (data_size >= fd_cb->send_window) {
1938 					fd_cb->send_window = 0;
1939 				} else {
1940 					fd_cb->send_window -= data_size;
1941 				}
1942 				data = NULL;
1943 			}
1944 		}
1945 
1946 		if (data != NULL) {
1947 			/* buffer it */
1948 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1949 				if (toaddr != NULL) {
1950 					int append_error = 0;
1951 					if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &append_error)) {
1952 						FDLOG(LOG_ERR, fd_cb,
1953 						    "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d",
1954 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, append_error);
1955 					}
1956 				} else {
1957 					if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1958 						FDLOG(LOG_ERR, fd_cb,
1959 						    "sbappendrecord failed. send buffer size = %u, send_window = %u",
1960 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1961 					}
1962 				}
1963 			} else {
1964 				FDLOG(LOG_ERR, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu, dropping the datagram", error, data_size);
1965 				mbuf_freem(data);
1966 			}
1967 		}
1968 	}
1969 
1970 	return error;
1971 }
1972 
1973 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1974 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1975 {
1976 	int error = 0;
1977 	mbuf_t packet = NULL;
1978 
1979 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1980 	if (error) {
1981 		FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1982 		goto done;
1983 	}
1984 
1985 	error = flow_divert_send_packet(fd_cb, packet);
1986 	if (error) {
1987 		goto done;
1988 	}
1989 
1990 done:
1991 	if (error && packet != NULL) {
1992 		mbuf_free(packet);
1993 	}
1994 
1995 	return error;
1996 }
1997 
1998 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)1999 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
2000 {
2001 	int             error           = 0;
2002 	mbuf_t  packet          = NULL;
2003 
2004 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
2005 	if (error) {
2006 		FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
2007 		goto done;
2008 	}
2009 
2010 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
2011 	if (error) {
2012 		FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
2013 		goto done;
2014 	}
2015 
2016 	error = flow_divert_send_packet(fd_cb, packet);
2017 	if (error) {
2018 		goto done;
2019 	}
2020 
2021 done:
2022 	if (error && packet != NULL) {
2023 		mbuf_free(packet);
2024 	}
2025 
2026 	return error;
2027 }
2028 
2029 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)2030 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
2031 {
2032 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2033 
2034 	if (local_endpoint->sa_family == AF_INET6) {
2035 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2036 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2037 			inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
2038 			inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
2039 			in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
2040 		}
2041 		if (inp->inp_lport == 0) {
2042 			inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
2043 		}
2044 	} else if (local_endpoint->sa_family == AF_INET) {
2045 		if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2046 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2047 			inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
2048 		}
2049 		if (inp->inp_lport == 0) {
2050 			inp->inp_lport = (satosin(local_endpoint))->sin_port;
2051 		}
2052 	}
2053 }
2054 
2055 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)2056 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
2057 {
2058 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2059 
2060 	if (remote_endpoint->sa_family == AF_INET6) {
2061 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
2062 			inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
2063 			inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
2064 			in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
2065 		}
2066 		if (inp->inp_fport == 0) {
2067 			inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
2068 		}
2069 	} else if (remote_endpoint->sa_family == AF_INET) {
2070 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
2071 			inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
2072 		}
2073 		if (inp->inp_fport == 0) {
2074 			inp->inp_fport = (satosin(remote_endpoint))->sin_port;
2075 		}
2076 	}
2077 }
2078 
2079 static uint32_t
flow_divert_derive_kernel_control_unit(pid_t pid,uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)2080 flow_divert_derive_kernel_control_unit(pid_t pid, uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
2081 {
2082 	uint32_t result = *ctl_unit;
2083 
2084 	// There are two models supported for deriving control units:
2085 	// 1. A series of flow divert units that allow "transparently" failing
2086 	//    over to the next unit. For this model, the aggregate_unit contains list
2087 	//    of all control units (between 1 and 30) masked over each other.
2088 	// 2. An indication that in-process flow divert should be preferred, with
2089 	//    an out of process flow divert to fail over to. For this model, the
2090 	//    ctl_unit is FLOW_DIVERT_IN_PROCESS_UNIT. In this case, that unit
2091 	//    is returned first, with the unpacked aggregate unit returned as a
2092 	//    fallback.
2093 	*is_aggregate = false;
2094 	if (*ctl_unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
2095 		bool found_unit = false;
2096 		if (pid != 0) {
2097 			// Look for an in-process group that is already open, and use that unit
2098 			struct flow_divert_group *group = NULL;
2099 			TAILQ_FOREACH(group, &g_flow_divert_in_process_group_list, chain) {
2100 				if (group->in_process_pid == pid) {
2101 					// Found an in-process group for our same PID, use it
2102 					found_unit = true;
2103 					result = group->ctl_unit;
2104 					break;
2105 				}
2106 			}
2107 
2108 			// If an in-process group isn't open yet, send a signal up through NECP to request one
2109 			if (!found_unit) {
2110 				necp_client_request_in_process_flow_divert(pid);
2111 			}
2112 		}
2113 
2114 		// If a unit was found, return it
2115 		if (found_unit) {
2116 			if (aggregate_unit != NULL && *aggregate_unit != 0) {
2117 				*is_aggregate = true;
2118 			}
2119 			// The next time around, the aggregate unit values will be picked up
2120 			*ctl_unit = 0;
2121 			return result;
2122 		}
2123 
2124 		// If no unit was found, fall through and clear out the ctl_unit
2125 		result = 0;
2126 		*ctl_unit = 0;
2127 	}
2128 
2129 	if (aggregate_unit != NULL && *aggregate_unit != 0) {
2130 		uint32_t counter;
2131 		struct flow_divert_group *lower_order_group = NULL;
2132 
2133 		for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
2134 			if ((*aggregate_unit) & (1 << counter)) {
2135 				struct flow_divert_group *group = NULL;
2136 				group = flow_divert_group_lookup(counter + 1, NULL);
2137 
2138 				if (group != NULL) {
2139 					if (lower_order_group == NULL) {
2140 						lower_order_group = group;
2141 					} else if ((group->order < lower_order_group->order)) {
2142 						lower_order_group = group;
2143 					}
2144 				}
2145 			}
2146 		}
2147 
2148 		if (lower_order_group != NULL) {
2149 			*aggregate_unit &= ~(1 << (lower_order_group->ctl_unit - 1));
2150 			*is_aggregate = true;
2151 			return lower_order_group->ctl_unit;
2152 		} else {
2153 			*ctl_unit = 0;
2154 			return result;
2155 		}
2156 	} else {
2157 		*ctl_unit = 0;
2158 		return result;
2159 	}
2160 }
2161 
2162 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)2163 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
2164 {
2165 	int error = 0;
2166 	uint32_t policy_control_unit = fd_cb->policy_control_unit;
2167 
2168 	flow_divert_pcb_remove(fd_cb);
2169 
2170 	do {
2171 		struct flow_divert_group *next_group = NULL;
2172 		bool is_aggregate = false;
2173 		uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(0, &policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
2174 
2175 		if (fd_cb->control_group_unit == next_ctl_unit) {
2176 			FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
2177 			error = EALREADY;
2178 			break;
2179 		}
2180 
2181 		if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
2182 			FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
2183 			error = ENOENT;
2184 			break;
2185 		}
2186 
2187 		next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
2188 		if (next_group == NULL) {
2189 			FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
2190 			continue;
2191 		}
2192 
2193 		FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
2194 
2195 		error = flow_divert_pcb_insert(fd_cb, next_group);
2196 		if (error == 0) {
2197 			if (is_aggregate) {
2198 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2199 			} else {
2200 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2201 			}
2202 		}
2203 		FDGRP_RELEASE(next_group);
2204 	} while (fd_cb->group == NULL);
2205 
2206 	if (fd_cb->group == NULL) {
2207 		return error ? error : ENOENT;
2208 	}
2209 
2210 	error = flow_divert_send_connect_packet(fd_cb);
2211 	if (error) {
2212 		FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2213 		flow_divert_pcb_remove(fd_cb);
2214 		error = ENOENT;
2215 	}
2216 
2217 	return error;
2218 }
2219 
2220 static void
flow_divert_disable(struct flow_divert_pcb * fd_cb)2221 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2222 {
2223 	struct socket *so = NULL;
2224 	mbuf_t  buffer;
2225 	int error = 0;
2226 	proc_t last_proc = NULL;
2227 	struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2228 	bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2229 	struct inpcb *inp = NULL;
2230 
2231 	so = fd_cb->so;
2232 	if (so == NULL) {
2233 		goto done;
2234 	}
2235 
2236 	FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2237 
2238 	/* Restore the IP state */
2239 	inp = sotoinpcb(so);
2240 	inp->inp_vflag = fd_cb->original_vflag;
2241 	inp->inp_faddr.s_addr = INADDR_ANY;
2242 	inp->inp_fport = 0;
2243 	memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2244 	inp->inp_fifscope = IFSCOPE_NONE;
2245 	inp->in6p_fport = 0;
2246 	/* If flow divert set the local address, clear it out */
2247 	if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2248 		inp->inp_laddr.s_addr = INADDR_ANY;
2249 		memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2250 		inp->inp_lifscope = IFSCOPE_NONE;
2251 	}
2252 	inp->inp_last_outifp = fd_cb->original_last_outifp;
2253 	inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2254 
2255 	/* Dis-associate the socket */
2256 	so->so_flags &= ~SOF_FLOW_DIVERT;
2257 	so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2258 	so->so_fd_pcb = NULL;
2259 	fd_cb->so = NULL;
2260 
2261 	FDRELEASE(fd_cb); /* Release the socket's reference */
2262 
2263 	/* Revert back to the original protocol */
2264 	so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2265 
2266 	/* Reset the socket state to avoid confusing NECP */
2267 	so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2268 
2269 	last_proc = proc_find(so->last_pid);
2270 
2271 	if (do_connect) {
2272 		/* Connect using the original protocol */
2273 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2274 		if (error) {
2275 			FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2276 			goto done;
2277 		}
2278 	}
2279 
2280 	buffer = so->so_snd.sb_mb;
2281 	if (buffer == NULL) {
2282 		/* No buffered data, done */
2283 		goto done;
2284 	}
2285 
2286 	/* Send any buffered data using the original protocol */
2287 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2288 		mbuf_t data_to_send = NULL;
2289 		size_t data_len = so->so_snd.sb_cc;
2290 
2291 		error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2292 		if (error) {
2293 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2294 			goto done;
2295 		}
2296 
2297 		sbflush(&so->so_snd);
2298 
2299 		if (data_to_send->m_flags & M_PKTHDR) {
2300 			mbuf_pkthdr_setlen(data_to_send, data_len);
2301 		}
2302 
2303 		error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2304 		    0,
2305 		    data_to_send,
2306 		    NULL,
2307 		    NULL,
2308 		    (last_proc != NULL ? last_proc : current_proc()));
2309 
2310 		if (error && error != EWOULDBLOCK) {
2311 			FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2312 		} else {
2313 			error = 0;
2314 		}
2315 	} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2316 		struct sockbuf *sb = &so->so_snd;
2317 		MBUFQ_HEAD(send_queue_head) send_queue;
2318 		MBUFQ_INIT(&send_queue);
2319 
2320 		/* Flush the send buffer, moving all records to a temporary queue */
2321 		while (sb->sb_mb != NULL) {
2322 			mbuf_t record = sb->sb_mb;
2323 			mbuf_t m = record;
2324 			sb->sb_mb = sb->sb_mb->m_nextpkt;
2325 			while (m != NULL) {
2326 				sbfree(sb, m);
2327 				m = m->m_next;
2328 			}
2329 			record->m_nextpkt = NULL;
2330 			MBUFQ_ENQUEUE(&send_queue, record);
2331 		}
2332 		SB_EMPTY_FIXUP(sb);
2333 
2334 		while (!MBUFQ_EMPTY(&send_queue)) {
2335 			mbuf_t next_record = MBUFQ_FIRST(&send_queue);
2336 			mbuf_t addr = NULL;
2337 			mbuf_t control = NULL;
2338 			mbuf_t last_control = NULL;
2339 			mbuf_t data = NULL;
2340 			mbuf_t m = next_record;
2341 			struct sockaddr *to_endpoint = NULL;
2342 
2343 			MBUFQ_DEQUEUE(&send_queue, next_record);
2344 
2345 			while (m != NULL) {
2346 				if (m->m_type == MT_SONAME) {
2347 					addr = m;
2348 				} else if (m->m_type == MT_CONTROL) {
2349 					if (control == NULL) {
2350 						control = m;
2351 					}
2352 					last_control = m;
2353 				} else if (m->m_type == MT_DATA) {
2354 					data = m;
2355 					break;
2356 				}
2357 				m = m->m_next;
2358 			}
2359 
2360 			if (addr != NULL && !do_connect) {
2361 				to_endpoint = flow_divert_get_buffered_target_address(addr);
2362 				if (to_endpoint == NULL) {
2363 					FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2364 				}
2365 			}
2366 
2367 			if (data == NULL) {
2368 				FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2369 				mbuf_freem(next_record);
2370 				continue;
2371 			}
2372 
2373 			if (!(data->m_flags & M_PKTHDR)) {
2374 				FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2375 				mbuf_freem(next_record);
2376 				continue;
2377 			}
2378 
2379 			if (addr != NULL) {
2380 				addr->m_next = NULL;
2381 			}
2382 
2383 			if (last_control != NULL) {
2384 				last_control->m_next = NULL;
2385 			}
2386 
2387 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2388 			    0,
2389 			    data,
2390 			    to_endpoint,
2391 			    control,
2392 			    (last_proc != NULL ? last_proc : current_proc()));
2393 
2394 			if (addr != NULL) {
2395 				mbuf_freem(addr);
2396 			}
2397 
2398 			if (error) {
2399 				FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2400 			}
2401 		}
2402 	}
2403 done:
2404 	if (last_proc != NULL) {
2405 		proc_rele(last_proc);
2406 	}
2407 
2408 	if (error && so != NULL) {
2409 		so->so_error = (uint16_t)error;
2410 		flow_divert_disconnect_socket(so, do_connect, false);
2411 	}
2412 }
2413 
2414 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2415 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2416 {
2417 	struct socket *so = NULL;
2418 	struct inpcb *inp = NULL;
2419 	struct ifnet *current_ifp = NULL;
2420 	struct ifnet *new_ifp = NULL;
2421 	int error = 0;
2422 
2423 	so = fd_cb->so;
2424 	if (so == NULL) {
2425 		return;
2426 	}
2427 
2428 	inp = sotoinpcb(so);
2429 
2430 	if (out_if_index <= 0) {
2431 		return;
2432 	}
2433 
2434 	if (inp->inp_vflag & INP_IPV6) {
2435 		current_ifp = inp->in6p_last_outifp;
2436 	} else {
2437 		current_ifp = inp->inp_last_outifp;
2438 	}
2439 
2440 	if (current_ifp != NULL) {
2441 		if (current_ifp->if_index == out_if_index) {
2442 			/* No change */
2443 			return;
2444 		}
2445 
2446 		/* Scope the socket to the given interface */
2447 		error = inp_bindif(inp, out_if_index, &new_ifp);
2448 		if (error != 0) {
2449 			FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2450 			return;
2451 		}
2452 
2453 		if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2454 			/* Get the appropriate address for the given interface */
2455 			if (inp->inp_vflag & INP_IPV6) {
2456 				inp->in6p_laddr = sa6_any.sin6_addr;
2457 				error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2458 			} else {
2459 				inp->inp_laddr.s_addr = INADDR_ANY;
2460 				error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2461 			}
2462 
2463 			if (error != 0) {
2464 				FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2465 			}
2466 		}
2467 	} else {
2468 		ifnet_head_lock_shared();
2469 		if (IF_INDEX_IN_RANGE(out_if_index)) {
2470 			new_ifp = ifindex2ifnet[out_if_index];
2471 		}
2472 		ifnet_head_done();
2473 	}
2474 
2475 	/* Update the "last interface" of the socket */
2476 	if (new_ifp != NULL) {
2477 		if (inp->inp_vflag & INP_IPV6) {
2478 			inp->in6p_last_outifp = new_ifp;
2479 		} else {
2480 			inp->inp_last_outifp = new_ifp;
2481 		}
2482 
2483 #if SKYWALK
2484 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2485 			netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2486 		}
2487 #endif /* SKYWALK */
2488 	}
2489 }
2490 
2491 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2492 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2493 {
2494 	uint32_t                                        connect_error = 0;
2495 	uint32_t                                        ctl_unit                        = 0;
2496 	int                                                     error                           = 0;
2497 	union sockaddr_in_4_6 local_endpoint = {};
2498 	union sockaddr_in_4_6 remote_endpoint = {};
2499 	int                                                     out_if_index            = 0;
2500 	uint32_t                                        send_window;
2501 	uint32_t                                        app_data_length         = 0;
2502 
2503 	memset(&local_endpoint, 0, sizeof(local_endpoint));
2504 	memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2505 
2506 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2507 	if (error) {
2508 		FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2509 		return;
2510 	}
2511 
2512 	connect_error = ntohl(connect_error);
2513 	FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2514 
2515 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2516 	if (error) {
2517 		FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2518 		return;
2519 	}
2520 
2521 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2522 	if (error) {
2523 		FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2524 	}
2525 
2526 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sa), NULL);
2527 	if (error) {
2528 		FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2529 	}
2530 
2531 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sa), NULL);
2532 	if (error) {
2533 		FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2534 	}
2535 
2536 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2537 	if (error) {
2538 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2539 	}
2540 
2541 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2542 	if (error) {
2543 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2544 	}
2545 
2546 	error = 0;
2547 
2548 	FDLOCK(fd_cb);
2549 	if (fd_cb->so != NULL) {
2550 		struct inpcb *inp = NULL;
2551 		struct socket *so = fd_cb->so;
2552 		bool local_address_is_valid = false;
2553 
2554 		socket_lock(so, 1);
2555 
2556 		if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2557 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2558 			goto done;
2559 		}
2560 
2561 		if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2562 			FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2563 			goto done;
2564 		}
2565 
2566 		inp = sotoinpcb(so);
2567 
2568 		if (connect_error || error) {
2569 			goto set_socket_state;
2570 		}
2571 
2572 		if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2573 			if (local_endpoint.sa.sa_family == AF_INET) {
2574 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2575 				if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2576 					local_address_is_valid = true;
2577 					fd_cb->local_endpoint = local_endpoint;
2578 					inp->inp_laddr.s_addr = INADDR_ANY;
2579 				} else {
2580 					fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2581 				}
2582 			} else if (local_endpoint.sa.sa_family == AF_INET6) {
2583 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2584 				if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2585 					local_address_is_valid = true;
2586 					fd_cb->local_endpoint = local_endpoint;
2587 					inp->in6p_laddr = sa6_any.sin6_addr;
2588 				} else {
2589 					fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2590 				}
2591 			}
2592 		}
2593 
2594 		flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2595 		flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2596 
2597 		if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2598 			if (remote_endpoint.sa.sa_family == AF_INET) {
2599 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2600 			} else if (remote_endpoint.sa.sa_family == AF_INET6) {
2601 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2602 			}
2603 			flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2604 		}
2605 
2606 		if (app_data_length > 0) {
2607 			uint8_t *app_data = NULL;
2608 			app_data = kalloc_data(app_data_length, Z_WAITOK);
2609 			if (app_data != NULL) {
2610 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2611 				if (error == 0) {
2612 					FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2613 					if (fd_cb->app_data != NULL) {
2614 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
2615 					}
2616 					fd_cb->app_data = app_data;
2617 					fd_cb->app_data_length = app_data_length;
2618 				} else {
2619 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2620 					kfree_data(app_data, app_data_length);
2621 				}
2622 			} else {
2623 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2624 			}
2625 		}
2626 
2627 		if (error) {
2628 			goto set_socket_state;
2629 		}
2630 
2631 		if (fd_cb->group == NULL) {
2632 			error = EINVAL;
2633 			goto set_socket_state;
2634 		}
2635 
2636 		ctl_unit = ntohl(ctl_unit);
2637 		if (ctl_unit > 0) {
2638 			int insert_error = 0;
2639 			struct flow_divert_group *grp = NULL;
2640 
2641 			if (ctl_unit >= GROUP_COUNT_MAX) {
2642 				FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2643 				error = EINVAL;
2644 				goto set_socket_state;
2645 			}
2646 
2647 			grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2648 			if (grp == NULL) {
2649 				error = ECONNRESET;
2650 				goto set_socket_state;
2651 			}
2652 
2653 			flow_divert_pcb_remove(fd_cb);
2654 			insert_error = flow_divert_pcb_insert(fd_cb, grp);
2655 			FDGRP_RELEASE(grp);
2656 
2657 			if (insert_error != 0) {
2658 				error = ECONNRESET;
2659 				goto set_socket_state;
2660 			}
2661 		}
2662 
2663 		fd_cb->send_window = ntohl(send_window);
2664 
2665 set_socket_state:
2666 		if (!connect_error && !error) {
2667 			FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2668 			error = flow_divert_send_connect_result(fd_cb);
2669 		}
2670 
2671 		if (connect_error || error) {
2672 			if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2673 				error = flow_divert_try_next_group(fd_cb);
2674 				if (error && fd_cb->policy_control_unit == 0) {
2675 					flow_divert_disable(fd_cb);
2676 					goto done;
2677 				} else if (error == 0) {
2678 					goto done;
2679 				}
2680 			}
2681 
2682 			if (!connect_error) {
2683 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
2684 				so->so_error = (uint16_t)error;
2685 				flow_divert_send_close_if_needed(fd_cb);
2686 			} else {
2687 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
2688 				so->so_error = (uint16_t)connect_error;
2689 			}
2690 			flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
2691 		} else {
2692 #if NECP
2693 			/* Update NECP client with connected five-tuple */
2694 			if (!uuid_is_null(inp->necp_client_uuid)) {
2695 				socket_unlock(so, 0);
2696 				necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2697 				socket_lock(so, 0);
2698 				if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2699 					/* The socket was closed while it was unlocked */
2700 					goto done;
2701 				}
2702 			}
2703 #endif /* NECP */
2704 
2705 			flow_divert_send_buffered_data(fd_cb, FALSE);
2706 			soisconnected(so);
2707 		}
2708 
2709 		/* We don't need the connect packet any more */
2710 		if (fd_cb->connect_packet != NULL) {
2711 			mbuf_freem(fd_cb->connect_packet);
2712 			fd_cb->connect_packet = NULL;
2713 		}
2714 
2715 		/* We don't need the original remote endpoint any more */
2716 		free_sockaddr(fd_cb->original_remote_endpoint);
2717 done:
2718 		socket_unlock(so, 1);
2719 	}
2720 	FDUNLOCK(fd_cb);
2721 }
2722 
2723 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2724 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2725 {
2726 	uint32_t        close_error                     = 0;
2727 	int                     error                   = 0;
2728 	int                     how                     = 0;
2729 
2730 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2731 	if (error) {
2732 		FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2733 		return;
2734 	}
2735 
2736 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2737 	if (error) {
2738 		FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2739 		return;
2740 	}
2741 
2742 	how = ntohl(how);
2743 
2744 	FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2745 
2746 	FDLOCK(fd_cb);
2747 	if (fd_cb->so != NULL) {
2748 		bool is_connected = (SOCK_TYPE(fd_cb->so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2749 		socket_lock(fd_cb->so, 0);
2750 
2751 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2752 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2753 			goto done;
2754 		}
2755 
2756 		fd_cb->so->so_error = (uint16_t)ntohl(close_error);
2757 
2758 		flow_divert_update_closed_state(fd_cb, how, true, true);
2759 
2760 		/* Only do this for stream flows because "shutdown by peer" doesn't make sense for datagram flows */
2761 		how = flow_divert_tunnel_how_closed(fd_cb);
2762 		if (how == SHUT_RDWR) {
2763 			flow_divert_disconnect_socket(fd_cb->so, is_connected, true);
2764 		} else if (how == SHUT_RD && is_connected) {
2765 			socantrcvmore(fd_cb->so);
2766 		} else if (how == SHUT_WR && is_connected) {
2767 			socantsendmore(fd_cb->so);
2768 		}
2769 done:
2770 		socket_unlock(fd_cb->so, 0);
2771 	}
2772 	FDUNLOCK(fd_cb);
2773 }
2774 
2775 static mbuf_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2776 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2777 {
2778 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2779 	bool need_recvdstaddr = false;
2780 	/* Socket flow tracking needs to see the local address */
2781 	need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2782 	if ((inp->inp_vflag & INP_IPV4) &&
2783 	    fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2784 	    ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2785 		return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2786 	} else if ((inp->inp_vflag & INP_IPV6) &&
2787 	    fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2788 	    ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2789 		struct in6_pktinfo pi6;
2790 		memset(&pi6, 0, sizeof(pi6));
2791 		pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2792 
2793 		return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2794 	}
2795 	return NULL;
2796 }
2797 
2798 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_t packet,size_t offset)2799 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset)
2800 {
2801 	int error = 0;
2802 
2803 	FDLOCK(fd_cb);
2804 	if (fd_cb->so != NULL) {
2805 		mbuf_t  data            = NULL;
2806 		size_t  data_size;
2807 		struct sockaddr_storage remote_address;
2808 		boolean_t got_remote_sa = FALSE;
2809 		boolean_t appended = FALSE;
2810 		boolean_t append_success = FALSE;
2811 
2812 		socket_lock(fd_cb->so, 0);
2813 
2814 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2815 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2816 			goto done;
2817 		}
2818 
2819 		if (sbspace(&fd_cb->so->so_rcv) == 0) {
2820 			error = ENOBUFS;
2821 			fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2822 			FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2823 			goto done;
2824 		}
2825 
2826 		if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
2827 			uint32_t val_size = 0;
2828 
2829 			/* check if we got remote address with data */
2830 			memset(&remote_address, 0, sizeof(remote_address));
2831 			error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2832 			if (error || val_size > sizeof(remote_address)) {
2833 				FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2834 				error = 0;
2835 			} else {
2836 				if (remote_address.ss_len > sizeof(remote_address)) {
2837 					remote_address.ss_len = sizeof(remote_address);
2838 				}
2839 				/* validate the address */
2840 				if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2841 					got_remote_sa = TRUE;
2842 				} else {
2843 					FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2844 				}
2845 				offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2846 			}
2847 		}
2848 
2849 		data_size = (mbuf_pkthdr_len(packet) - offset);
2850 
2851 		if (fd_cb->so->so_state & SS_CANTRCVMORE) {
2852 			FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2853 			goto done;
2854 		}
2855 
2856 		if (SOCK_TYPE(fd_cb->so) != SOCK_STREAM && SOCK_TYPE(fd_cb->so) != SOCK_DGRAM) {
2857 			FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(fd_cb->so));
2858 			goto done;
2859 		}
2860 
2861 		FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2862 
2863 		error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2864 		if (error || data == NULL) {
2865 			FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2866 			goto done;
2867 		}
2868 
2869 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
2870 			appended = (sbappendstream(&fd_cb->so->so_rcv, data) != 0);
2871 			append_success = TRUE;
2872 		} else {
2873 			struct sockaddr *append_sa = NULL;
2874 			mbuf_t mctl;
2875 
2876 			if (got_remote_sa == TRUE) {
2877 				error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2878 			} else {
2879 				if (SOCK_CHECK_DOM(fd_cb->so, AF_INET6)) {
2880 					error = in6_mapped_peeraddr(fd_cb->so, &append_sa);
2881 				} else {
2882 					error = in_getpeeraddr(fd_cb->so, &append_sa);
2883 				}
2884 			}
2885 			if (error) {
2886 				FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2887 			}
2888 
2889 			mctl = flow_divert_create_control_mbuf(fd_cb);
2890 			int append_error = 0;
2891 			appended = sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error);
2892 			if (appended || append_error == 0) {
2893 				append_success = TRUE;
2894 			} else {
2895 				FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2896 			}
2897 
2898 			free_sockaddr(append_sa);
2899 		}
2900 
2901 		if (append_success) {
2902 			fd_cb->bytes_received += data_size;
2903 			flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2904 		}
2905 
2906 		if (appended) {
2907 			sorwakeup(fd_cb->so);
2908 		}
2909 done:
2910 		socket_unlock(fd_cb->so, 0);
2911 	}
2912 	FDUNLOCK(fd_cb);
2913 
2914 	return error;
2915 }
2916 
2917 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2918 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2919 {
2920 	uint32_t        read_count              = 0;
2921 	int             error                   = 0;
2922 
2923 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2924 	if (error) {
2925 		FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2926 		return;
2927 	}
2928 
2929 	FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2930 
2931 	FDLOCK(fd_cb);
2932 	if (fd_cb->so != NULL) {
2933 		socket_lock(fd_cb->so, 0);
2934 
2935 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2936 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2937 			goto done;
2938 		}
2939 
2940 		fd_cb->send_window += ntohl(read_count);
2941 		flow_divert_send_buffered_data(fd_cb, FALSE);
2942 done:
2943 		socket_unlock(fd_cb->so, 0);
2944 	}
2945 	FDUNLOCK(fd_cb);
2946 }
2947 
2948 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_t packet,int offset)2949 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_t packet, int offset)
2950 {
2951 	int error         = 0;
2952 	uint32_t key_size = 0;
2953 	int log_level     = 0;
2954 	uint32_t flags    = 0;
2955 	int32_t order     = FLOW_DIVERT_ORDER_LAST;
2956 
2957 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2958 	if (error) {
2959 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2960 		return;
2961 	}
2962 
2963 	if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2964 		FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2965 		return;
2966 	}
2967 
2968 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2969 	if (!error) {
2970 		nil_pcb.log_level = (uint8_t)log_level;
2971 	}
2972 
2973 	lck_rw_lock_exclusive(&group->lck);
2974 
2975 	if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
2976 		FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
2977 		lck_rw_done(&group->lck);
2978 		return;
2979 	}
2980 
2981 	if (group->token_key != NULL) {
2982 		kfree_data(group->token_key, group->token_key_size);
2983 		group->token_key = NULL;
2984 	}
2985 
2986 	group->token_key = kalloc_data(key_size, Z_WAITOK);
2987 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2988 	if (error) {
2989 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2990 		kfree_data(group->token_key, key_size);
2991 		group->token_key = NULL;
2992 		lck_rw_done(&group->lck);
2993 		return;
2994 	}
2995 
2996 	group->token_key_size = key_size;
2997 
2998 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2999 	if (!error) {
3000 		group->flags = flags;
3001 	}
3002 
3003 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ORDER, sizeof(order), &order, NULL);
3004 	if (!error) {
3005 		FDLOG(LOG_INFO, &nil_pcb, "group %u order is %u", group->ctl_unit, order);
3006 		group->order = order;
3007 	}
3008 
3009 	lck_rw_done(&group->lck);
3010 }
3011 
3012 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)3013 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
3014 {
3015 	int                                                     error                           = 0;
3016 	int                                                     out_if_index            = 0;
3017 	uint32_t                                        app_data_length         = 0;
3018 
3019 	FDLOG0(LOG_INFO, fd_cb, "received a properties update");
3020 
3021 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
3022 	if (error) {
3023 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
3024 	}
3025 
3026 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
3027 	if (error) {
3028 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
3029 	}
3030 
3031 	FDLOCK(fd_cb);
3032 	if (fd_cb->so != NULL) {
3033 		socket_lock(fd_cb->so, 0);
3034 
3035 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
3036 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
3037 			goto done;
3038 		}
3039 
3040 		if (out_if_index > 0) {
3041 			flow_divert_scope(fd_cb, out_if_index, true);
3042 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3043 		}
3044 
3045 		if (app_data_length > 0) {
3046 			uint8_t *app_data = NULL;
3047 			app_data = kalloc_data(app_data_length, Z_WAITOK);
3048 			if (app_data != NULL) {
3049 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
3050 				if (error == 0) {
3051 					if (fd_cb->app_data != NULL) {
3052 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
3053 					}
3054 					fd_cb->app_data = app_data;
3055 					fd_cb->app_data_length = app_data_length;
3056 				} else {
3057 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
3058 					kfree_data(app_data, app_data_length);
3059 				}
3060 			} else {
3061 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
3062 			}
3063 		}
3064 done:
3065 		socket_unlock(fd_cb->so, 0);
3066 	}
3067 	FDUNLOCK(fd_cb);
3068 }
3069 
3070 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_t packet,int offset)3071 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_t packet, int offset)
3072 {
3073 	size_t bytes_mem_size;
3074 	size_t child_maps_mem_size;
3075 	size_t nodes_mem_size;
3076 	size_t trie_memory_size = 0;
3077 	int cursor;
3078 	int error = 0;
3079 	struct flow_divert_trie new_trie;
3080 	int insert_error = 0;
3081 	int prefix_count = -1;
3082 	int signing_id_count = 0;
3083 	size_t bytes_count = 0;
3084 	size_t nodes_count = 0;
3085 	size_t maps_count = 0;
3086 
3087 	lck_rw_lock_exclusive(&group->lck);
3088 
3089 	/* Re-set the current trie */
3090 	if (group->signing_id_trie.memory != NULL) {
3091 		kfree_data_addr(group->signing_id_trie.memory);
3092 	}
3093 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
3094 	group->signing_id_trie.root = NULL_TRIE_IDX;
3095 
3096 	memset(&new_trie, 0, sizeof(new_trie));
3097 
3098 	/* Get the number of shared prefixes in the new set of signing ID strings */
3099 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
3100 
3101 	if (prefix_count < 0 || error) {
3102 		FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
3103 		lck_rw_done(&group->lck);
3104 		return;
3105 	}
3106 
3107 	/* Compute the number of signing IDs and the total amount of bytes needed to store them */
3108 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3109 	    cursor >= 0;
3110 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3111 		uint32_t sid_size = 0;
3112 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3113 		if (error || sid_size == 0) {
3114 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
3115 			signing_id_count = 0;
3116 			break;
3117 		}
3118 		if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
3119 			FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
3120 			signing_id_count = 0;
3121 			break;
3122 		}
3123 		signing_id_count++;
3124 	}
3125 
3126 	if (signing_id_count == 0) {
3127 		lck_rw_done(&group->lck);
3128 		FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
3129 		return;
3130 	}
3131 
3132 	if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
3133 		lck_rw_done(&group->lck);
3134 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
3135 		return;
3136 	}
3137 
3138 	if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
3139 		lck_rw_done(&group->lck);
3140 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
3141 		return;
3142 	}
3143 
3144 	if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
3145 		lck_rw_done(&group->lck);
3146 		FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
3147 		return;
3148 	}
3149 
3150 	FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
3151 	    nodes_count, maps_count, bytes_count);
3152 
3153 	if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
3154 	    os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
3155 	    os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
3156 	    os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
3157 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
3158 		lck_rw_done(&group->lck);
3159 		return;
3160 	}
3161 
3162 	if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
3163 		FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
3164 		lck_rw_done(&group->lck);
3165 		return;
3166 	}
3167 
3168 	new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
3169 	if (new_trie.memory == NULL) {
3170 		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
3171 		    nodes_mem_size + child_maps_mem_size + bytes_mem_size);
3172 		lck_rw_done(&group->lck);
3173 		return;
3174 	}
3175 
3176 	new_trie.bytes_count = (uint16_t)bytes_count;
3177 	new_trie.nodes_count = (uint16_t)nodes_count;
3178 	new_trie.child_maps_count = (uint16_t)maps_count;
3179 
3180 	/* Initialize the free lists */
3181 	new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
3182 	new_trie.nodes_free_next = 0;
3183 	memset(new_trie.nodes, 0, nodes_mem_size);
3184 
3185 	new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
3186 	new_trie.child_maps_free_next = 0;
3187 	memset(new_trie.child_maps, 0xff, child_maps_mem_size);
3188 
3189 	new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
3190 	new_trie.bytes_free_next = 0;
3191 	memset(new_trie.bytes, 0, bytes_mem_size);
3192 
3193 	/* The root is an empty node */
3194 	new_trie.root = trie_node_alloc(&new_trie);
3195 
3196 	/* Add each signing ID to the trie */
3197 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3198 	    cursor >= 0;
3199 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3200 		uint32_t sid_size = 0;
3201 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3202 		if (error || sid_size == 0) {
3203 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
3204 			insert_error = EINVAL;
3205 			break;
3206 		}
3207 		if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
3208 			uint16_t new_node_idx;
3209 			error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
3210 			if (error) {
3211 				FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
3212 				insert_error = EINVAL;
3213 				break;
3214 			}
3215 			new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
3216 			if (new_node_idx == NULL_TRIE_IDX) {
3217 				insert_error = EINVAL;
3218 				break;
3219 			}
3220 		} else {
3221 			FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3222 			insert_error = ENOBUFS;
3223 			break;
3224 		}
3225 	}
3226 
3227 	if (!insert_error) {
3228 		group->signing_id_trie = new_trie;
3229 	} else {
3230 		kfree_data(new_trie.memory, trie_memory_size);
3231 	}
3232 
3233 	lck_rw_done(&group->lck);
3234 }
3235 
3236 static void
flow_divert_handle_flow_states_request(struct flow_divert_group * group)3237 flow_divert_handle_flow_states_request(struct flow_divert_group *group)
3238 {
3239 	struct flow_divert_pcb *fd_cb;
3240 	mbuf_t packet = NULL;
3241 	SLIST_HEAD(, flow_divert_pcb) tmp_list;
3242 	int error = 0;
3243 	uint32_t ctl_unit = 0;
3244 
3245 	SLIST_INIT(&tmp_list);
3246 
3247 	error = flow_divert_packet_init(&nil_pcb, FLOW_DIVERT_PKT_FLOW_STATES, &packet);
3248 	if (error || packet == NULL) {
3249 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_packet_init failed: %d, cannot send flow states", error);
3250 		return;
3251 	}
3252 
3253 	lck_rw_lock_shared(&group->lck);
3254 
3255 	if (!MBUFQ_EMPTY(&group->send_queue)) {
3256 		FDLOG0(LOG_WARNING, &nil_pcb, "flow_divert_handle_flow_states_request: group send queue is not empty");
3257 	}
3258 
3259 	ctl_unit = group->ctl_unit;
3260 
3261 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3262 		FDRETAIN(fd_cb);
3263 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3264 	}
3265 
3266 	lck_rw_done(&group->lck);
3267 
3268 	SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
3269 		FDLOCK(fd_cb);
3270 		if (fd_cb->so != NULL) {
3271 			struct flow_divert_flow_state state = {};
3272 			socket_lock(fd_cb->so, 0);
3273 
3274 			state.conn_id = fd_cb->hash;
3275 			state.bytes_written_by_app = fd_cb->bytes_written_by_app;
3276 			state.bytes_sent = fd_cb->bytes_sent;
3277 			state.bytes_received = fd_cb->bytes_received;
3278 			state.send_window = fd_cb->send_window;
3279 			state.send_buffer_bytes = fd_cb->so->so_snd.sb_cc;
3280 
3281 			error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_FLOW_STATE, sizeof(state), &state);
3282 			if (error) {
3283 				FDLOG(LOG_ERR, fd_cb, "Failed to add a flow state: %d", error);
3284 			}
3285 
3286 			socket_unlock(fd_cb->so, 0);
3287 		}
3288 		FDUNLOCK(fd_cb);
3289 		FDRELEASE(fd_cb);
3290 	}
3291 
3292 	error = ctl_enqueuembuf(g_flow_divert_kctl_ref, ctl_unit, packet, CTL_DATA_EOR);
3293 	if (error) {
3294 		FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_handle_flow_states_request: ctl_enqueuembuf returned an error: %d", error);
3295 		mbuf_freem(packet);
3296 	}
3297 }
3298 
3299 static int
flow_divert_input(mbuf_t packet,struct flow_divert_group * group)3300 flow_divert_input(mbuf_t packet, struct flow_divert_group *group)
3301 {
3302 	struct flow_divert_packet_header        hdr;
3303 	int                                                                     error           = 0;
3304 	struct flow_divert_pcb                          *fd_cb;
3305 
3306 	if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3307 		FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3308 		error = EINVAL;
3309 		goto done;
3310 	}
3311 
3312 	error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3313 	if (error) {
3314 		FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3315 		error = ENOBUFS;
3316 		goto done;
3317 	}
3318 
3319 	hdr.conn_id = ntohl(hdr.conn_id);
3320 
3321 	if (hdr.conn_id == 0) {
3322 		switch (hdr.packet_type) {
3323 		case FLOW_DIVERT_PKT_GROUP_INIT:
3324 			flow_divert_handle_group_init(group, packet, sizeof(hdr));
3325 			break;
3326 		case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3327 			flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3328 			break;
3329 		case FLOW_DIVERT_PKT_FLOW_STATES_REQUEST:
3330 			flow_divert_handle_flow_states_request(group);
3331 			break;
3332 		default:
3333 			FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3334 			break;
3335 		}
3336 		goto done;
3337 	}
3338 
3339 	fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group);             /* This retains the PCB */
3340 	if (fd_cb == NULL) {
3341 		if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3342 			FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3343 		}
3344 		goto done;
3345 	}
3346 
3347 	switch (hdr.packet_type) {
3348 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
3349 		flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3350 		break;
3351 	case FLOW_DIVERT_PKT_CLOSE:
3352 		flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3353 		break;
3354 	case FLOW_DIVERT_PKT_DATA:
3355 		error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3356 		break;
3357 	case FLOW_DIVERT_PKT_READ_NOTIFY:
3358 		flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3359 		break;
3360 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3361 		flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3362 		break;
3363 	default:
3364 		FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3365 		break;
3366 	}
3367 
3368 	FDRELEASE(fd_cb);
3369 
3370 done:
3371 	mbuf_freem(packet);
3372 	return error;
3373 }
3374 
3375 static void
flow_divert_close_all(struct flow_divert_group * group)3376 flow_divert_close_all(struct flow_divert_group *group)
3377 {
3378 	struct flow_divert_pcb                  *fd_cb;
3379 	SLIST_HEAD(, flow_divert_pcb)   tmp_list;
3380 
3381 	SLIST_INIT(&tmp_list);
3382 
3383 	lck_rw_lock_exclusive(&group->lck);
3384 
3385 	MBUFQ_DRAIN(&group->send_queue);
3386 
3387 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3388 		FDRETAIN(fd_cb);
3389 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3390 	}
3391 
3392 	group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3393 
3394 	lck_rw_done(&group->lck);
3395 
3396 	while (!SLIST_EMPTY(&tmp_list)) {
3397 		fd_cb = SLIST_FIRST(&tmp_list);
3398 		FDLOCK(fd_cb);
3399 		SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3400 		if (fd_cb->so != NULL) {
3401 			socket_lock(fd_cb->so, 0);
3402 			flow_divert_pcb_remove(fd_cb);
3403 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
3404 			fd_cb->so->so_error = ECONNABORTED;
3405 			flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3406 			socket_unlock(fd_cb->so, 0);
3407 		}
3408 		FDUNLOCK(fd_cb);
3409 		FDRELEASE(fd_cb);
3410 	}
3411 }
3412 
3413 void
flow_divert_detach(struct socket * so)3414 flow_divert_detach(struct socket *so)
3415 {
3416 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3417 
3418 	if (!SO_IS_DIVERTED(so)) {
3419 		return;
3420 	}
3421 
3422 	so->so_flags &= ~SOF_FLOW_DIVERT;
3423 	so->so_fd_pcb = NULL;
3424 
3425 	FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3426 
3427 	if (fd_cb->group != NULL) {
3428 		/* Last-ditch effort to send any buffered data */
3429 		flow_divert_send_buffered_data(fd_cb, TRUE);
3430 
3431 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3432 		flow_divert_send_close_if_needed(fd_cb);
3433 		/* Remove from the group */
3434 		flow_divert_pcb_remove(fd_cb);
3435 	}
3436 
3437 	socket_unlock(so, 0);
3438 	FDLOCK(fd_cb);
3439 	fd_cb->so = NULL;
3440 	FDUNLOCK(fd_cb);
3441 	socket_lock(so, 0);
3442 
3443 	FDRELEASE(fd_cb);       /* Release the socket's reference */
3444 }
3445 
3446 static int
flow_divert_close(struct socket * so)3447 flow_divert_close(struct socket *so)
3448 {
3449 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3450 
3451 	if (!SO_IS_DIVERTED(so)) {
3452 		return EINVAL;
3453 	}
3454 
3455 	FDLOG0(LOG_INFO, fd_cb, "Closing");
3456 
3457 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3458 		soisdisconnecting(so);
3459 		sbflush(&so->so_rcv);
3460 	}
3461 
3462 	flow_divert_send_buffered_data(fd_cb, TRUE);
3463 	flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3464 	flow_divert_send_close_if_needed(fd_cb);
3465 
3466 	/* Remove from the group */
3467 	flow_divert_pcb_remove(fd_cb);
3468 
3469 	return 0;
3470 }
3471 
3472 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3473 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3474     sae_connid_t cid __unused)
3475 {
3476 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3477 		return EINVAL;
3478 	}
3479 
3480 	return flow_divert_close(so);
3481 }
3482 
3483 static int
flow_divert_shutdown(struct socket * so)3484 flow_divert_shutdown(struct socket *so)
3485 {
3486 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3487 
3488 	if (!SO_IS_DIVERTED(so)) {
3489 		return EINVAL;
3490 	}
3491 
3492 	FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3493 
3494 	socantsendmore(so);
3495 
3496 	flow_divert_update_closed_state(fd_cb, SHUT_WR, false, true);
3497 	flow_divert_send_close_if_needed(fd_cb);
3498 
3499 	return 0;
3500 }
3501 
3502 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3503 flow_divert_rcvd(struct socket *so, int flags __unused)
3504 {
3505 	struct flow_divert_pcb  *fd_cb = so->so_fd_pcb;
3506 	int space = 0;
3507 
3508 	if (!SO_IS_DIVERTED(so)) {
3509 		return EINVAL;
3510 	}
3511 
3512 	space = sbspace(&so->so_rcv);
3513 	FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3514 	if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3515 	    (space > 0) &&
3516 	    flow_divert_send_read_notification(fd_cb) == 0) {
3517 		FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3518 		fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3519 	}
3520 
3521 	return 0;
3522 }
3523 
3524 static int
flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet,struct sockaddr * toaddr)3525 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr)
3526 {
3527 	int error = 0;
3528 	int port  = 0;
3529 
3530 	if (!flow_divert_is_sockaddr_valid(toaddr)) {
3531 		FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3532 		error = EINVAL;
3533 		goto done;
3534 	}
3535 
3536 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, toaddr);
3537 	if (error) {
3538 		goto done;
3539 	}
3540 
3541 	if (toaddr->sa_family == AF_INET) {
3542 		port = ntohs((satosin(toaddr))->sin_port);
3543 	} else {
3544 		port = ntohs((satosin6(toaddr))->sin6_port);
3545 	}
3546 
3547 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3548 	if (error) {
3549 		goto done;
3550 	}
3551 
3552 done:
3553 	return error;
3554 }
3555 
3556 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_t buffer)3557 flow_divert_get_buffered_target_address(mbuf_t buffer)
3558 {
3559 	if (buffer != NULL && buffer->m_type == MT_SONAME) {
3560 		struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3561 		if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3562 			return toaddr;
3563 		}
3564 	}
3565 	return NULL;
3566 }
3567 
3568 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3569 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3570 {
3571 	switch (addr->sa_family) {
3572 	case AF_INET:
3573 		if (addr->sa_len < sizeof(struct sockaddr_in)) {
3574 			return FALSE;
3575 		}
3576 		break;
3577 	case AF_INET6:
3578 		if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3579 			return FALSE;
3580 		}
3581 		break;
3582 	default:
3583 		return FALSE;
3584 	}
3585 	return TRUE;
3586 }
3587 
3588 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3589 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3590     struct sockaddr **dup)
3591 {
3592 	int                                             error           = 0;
3593 	struct sockaddr                 *result;
3594 	struct sockaddr_storage ss;
3595 
3596 	if (addr != NULL) {
3597 		result = addr;
3598 	} else {
3599 		memset(&ss, 0, sizeof(ss));
3600 		ss.ss_family = family;
3601 		if (ss.ss_family == AF_INET) {
3602 			ss.ss_len = sizeof(struct sockaddr_in);
3603 		} else if (ss.ss_family == AF_INET6) {
3604 			ss.ss_len = sizeof(struct sockaddr_in6);
3605 		} else {
3606 			error = EINVAL;
3607 		}
3608 		result = (struct sockaddr *)&ss;
3609 	}
3610 
3611 	if (!error) {
3612 		*dup = dup_sockaddr(result, 1);
3613 		if (*dup == NULL) {
3614 			error = ENOBUFS;
3615 		}
3616 	}
3617 
3618 	return error;
3619 }
3620 
3621 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected,bool delay_if_needed)3622 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed)
3623 {
3624 	if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3625 		soisdisconnected(so);
3626 	}
3627 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
3628 		struct inpcb *inp = sotoinpcb(so);
3629 		if (inp != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
3630 			/*
3631 			 * Let NetworkStatistics know this PCB is going away
3632 			 * before we detach it.
3633 			 */
3634 			if (nstat_collect && (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
3635 				nstat_pcb_detach(inp);
3636 			}
3637 
3638 			if (SOCK_DOM(so) == PF_INET6) {
3639 				ROUTE_RELEASE(&inp->in6p_route);
3640 			} else {
3641 				ROUTE_RELEASE(&inp->inp_route);
3642 			}
3643 			if (delay_if_needed) {
3644 				(void) cfil_sock_is_dead(so);
3645 			} else {
3646 				inp->inp_state = INPCB_STATE_DEAD;
3647 				inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
3648 			}
3649 			/* makes sure we're not called twice from so_close */
3650 			so->so_flags |= SOF_PCBCLEARING;
3651 		}
3652 	}
3653 }
3654 
3655 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3656 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3657 {
3658 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3659 
3660 	if (!SO_IS_DIVERTED(so)) {
3661 		return EINVAL;
3662 	}
3663 
3664 	if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3665 		if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3666 			flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3667 		}
3668 	}
3669 
3670 	if (SOCK_DOM(so) == PF_INET) {
3671 		return g_tcp_protosw->pr_ctloutput(so, sopt);
3672 	} else if (SOCK_DOM(so) == PF_INET6) {
3673 		return g_tcp6_protosw->pr_ctloutput(so, sopt);
3674 	}
3675 	return 0;
3676 }
3677 
3678 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3679 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3680 {
3681 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3682 	int                                             error   = 0;
3683 	struct inpcb                    *inp    = sotoinpcb(so);
3684 	struct sockaddr_in              *sinp;
3685 	mbuf_t                                  connect_packet = NULL;
3686 	int                                             do_send = 1;
3687 
3688 	if (!SO_IS_DIVERTED(so)) {
3689 		return EINVAL;
3690 	}
3691 
3692 	if (fd_cb->group == NULL) {
3693 		error = ENETUNREACH;
3694 		goto done;
3695 	}
3696 
3697 	if (inp == NULL) {
3698 		error = EINVAL;
3699 		goto done;
3700 	} else if (inp->inp_state == INPCB_STATE_DEAD) {
3701 		if (so->so_error) {
3702 			error = so->so_error;
3703 			so->so_error = 0;
3704 		} else {
3705 			error = EINVAL;
3706 		}
3707 		goto done;
3708 	}
3709 
3710 	if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3711 		error = EALREADY;
3712 		goto done;
3713 	}
3714 
3715 	FDLOG0(LOG_INFO, fd_cb, "Connecting");
3716 
3717 	if (fd_cb->connect_packet == NULL) {
3718 		struct sockaddr_in sin = {};
3719 		struct ifnet *ifp = NULL;
3720 
3721 		if (to == NULL) {
3722 			FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3723 			error = EINVAL;
3724 			goto done;
3725 		}
3726 
3727 		if (!flow_divert_is_sockaddr_valid(to)) {
3728 			FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3729 			error = EINVAL;
3730 			goto done;
3731 		}
3732 
3733 		fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3734 		if (fd_cb->original_remote_endpoint == NULL) {
3735 			FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3736 			error = ENOMEM;
3737 			goto done;
3738 		}
3739 		fd_cb->original_vflag = inp->inp_vflag;
3740 		fd_cb->original_last_outifp = inp->inp_last_outifp;
3741 		fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3742 
3743 		sinp = (struct sockaddr_in *)(void *)to;
3744 		if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3745 			error = EAFNOSUPPORT;
3746 			goto done;
3747 		}
3748 
3749 		if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3750 			struct sockaddr_in6 sin6 = {};
3751 			sin6.sin6_family = AF_INET6;
3752 			sin6.sin6_len = sizeof(struct sockaddr_in6);
3753 			sin6.sin6_port = satosin6(to)->sin6_port;
3754 			sin6.sin6_addr = satosin6(to)->sin6_addr;
3755 			if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3756 				in6_sin6_2_sin(&sin, &sin6);
3757 				to = (struct sockaddr *)&sin;
3758 			}
3759 		}
3760 
3761 		if (to->sa_family == AF_INET6) {
3762 			struct sockaddr_in6 *to6 = satosin6(to);
3763 
3764 			inp->inp_vflag &= ~INP_IPV4;
3765 			inp->inp_vflag |= INP_IPV6;
3766 			fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3767 			fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3768 			fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3769 			error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3770 			if (error) {
3771 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3772 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3773 					error = 0;
3774 				} else {
3775 					goto done;
3776 				}
3777 			}
3778 			if (ifp != NULL) {
3779 				inp->in6p_last_outifp = ifp;
3780 				ifnet_release(ifp);
3781 			}
3782 
3783 			if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3784 			    in6_embedded_scope &&
3785 			    fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3786 				fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3787 				fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3788 			}
3789 
3790 			if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3791 			    in6_embedded_scope &&
3792 			    to6->sin6_addr.s6_addr16[1] != 0) {
3793 				to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3794 				to6->sin6_addr.s6_addr16[1] = 0;
3795 			}
3796 		} else if (to->sa_family == AF_INET) {
3797 			inp->inp_vflag |= INP_IPV4;
3798 			inp->inp_vflag &= ~INP_IPV6;
3799 			fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3800 			fd_cb->local_endpoint.sin.sin_family = AF_INET;
3801 			fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3802 			error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3803 			if (error) {
3804 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3805 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3806 					error = 0;
3807 				} else {
3808 					goto done;
3809 				}
3810 			}
3811 			if (ifp != NULL) {
3812 				inp->inp_last_outifp = ifp;
3813 				ifnet_release(ifp);
3814 			}
3815 		} else {
3816 			FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3817 		}
3818 
3819 		error = flow_divert_check_no_cellular(fd_cb) ||
3820 		    flow_divert_check_no_expensive(fd_cb) ||
3821 		    flow_divert_check_no_constrained(fd_cb);
3822 		if (error) {
3823 			goto done;
3824 		}
3825 
3826 		if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3827 		    !implicit || /* connect() was called or */
3828 		    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3829 		    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3830 			fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3831 		}
3832 
3833 		error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3834 		if (error) {
3835 			goto done;
3836 		}
3837 
3838 		if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3839 			flow_divert_set_remote_endpoint(fd_cb, to);
3840 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3841 		}
3842 
3843 		if (implicit) {
3844 			fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3845 		}
3846 
3847 		if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3848 			FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3849 			do_send = 0;
3850 		}
3851 
3852 		fd_cb->connect_packet = connect_packet;
3853 		connect_packet = NULL;
3854 	} else {
3855 		FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3856 	}
3857 
3858 	if (do_send) {
3859 		error = flow_divert_send_connect_packet(fd_cb);
3860 		if (error) {
3861 			goto done;
3862 		}
3863 
3864 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3865 	}
3866 
3867 	if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3868 		soisconnected(so);
3869 	} else {
3870 		soisconnecting(so);
3871 	}
3872 
3873 done:
3874 	return error;
3875 }
3876 
3877 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3878 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3879 {
3880 #if CONTENT_FILTER
3881 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3882 		int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3883 		if (error != 0) {
3884 			struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3885 			FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3886 			return error;
3887 		}
3888 	}
3889 #endif /* CONTENT_FILTER */
3890 
3891 	return flow_divert_connect_out_internal(so, to, p, false);
3892 }
3893 
3894 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3895 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3896     struct proc *p, uint32_t ifscope, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3897 {
3898 	struct inpcb *inp = sotoinpcb(so);
3899 	int error;
3900 
3901 	if (inp == NULL) {
3902 		return EINVAL;
3903 	}
3904 
3905 	VERIFY(dst != NULL);
3906 
3907 #if CONTENT_FILTER && NECP
3908 	struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3909 	if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3910 	    SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3911 		inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3912 	}
3913 #endif /* CONTENT_FILTER */
3914 
3915 	/* bind socket to the specified interface, if requested */
3916 	if (ifscope != IFSCOPE_NONE &&
3917 	    (error = inp_bindif(inp, ifscope, NULL)) != 0) {
3918 		return error;
3919 	}
3920 
3921 	error = flow_divert_connect_out(so, dst, p);
3922 
3923 	if (error != 0) {
3924 		return error;
3925 	}
3926 
3927 	/* if there is data, send it */
3928 	if (auio != NULL) {
3929 		user_ssize_t datalen = 0;
3930 
3931 		socket_unlock(so, 0);
3932 
3933 		VERIFY(bytes_written != NULL);
3934 
3935 		datalen = uio_resid(auio);
3936 		error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3937 		socket_lock(so, 0);
3938 
3939 		if (error == 0 || error == EWOULDBLOCK) {
3940 			*bytes_written = datalen - uio_resid(auio);
3941 		}
3942 
3943 		/*
3944 		 * sosend returns EWOULDBLOCK if it's a non-blocking
3945 		 * socket or a timeout occured (this allows to return
3946 		 * the amount of queued data through sendit()).
3947 		 *
3948 		 * However, connectx() returns EINPROGRESS in case of a
3949 		 * blocking socket. So we change the return value here.
3950 		 */
3951 		if (error == EWOULDBLOCK) {
3952 			error = EINPROGRESS;
3953 		}
3954 	}
3955 
3956 	if (error == 0 && pcid != NULL) {
3957 		*pcid = 1;      /* there is only 1 connection for a TCP */
3958 	}
3959 
3960 	return error;
3961 }
3962 
3963 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3964 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3965     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3966     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3967     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3968 {
3969 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3970 }
3971 
3972 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3973 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3974     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3975     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3976     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3977 {
3978 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3979 }
3980 
3981 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)3982 flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
3983 {
3984 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3985 	int                                             error   = 0;
3986 	struct inpcb *inp;
3987 #if CONTENT_FILTER
3988 	struct m_tag *cfil_tag = NULL;
3989 #endif
3990 
3991 	if (!SO_IS_DIVERTED(so)) {
3992 		return EINVAL;
3993 	}
3994 
3995 	inp = sotoinpcb(so);
3996 	if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3997 		error = ECONNRESET;
3998 		goto done;
3999 	}
4000 
4001 	if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
4002 		/* The provider considers this datagram flow to be closed, so no data can be sent */
4003 		FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
4004 		error = EHOSTUNREACH;
4005 		goto done;
4006 	}
4007 
4008 #if CONTENT_FILTER
4009 	/*
4010 	 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
4011 	 * retrieve the CFIL saved remote address from the mbuf and use it.
4012 	 */
4013 	if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
4014 		struct sockaddr *cfil_faddr = NULL;
4015 		cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
4016 		if (cfil_tag) {
4017 			to = (struct sockaddr *)(void *)cfil_faddr;
4018 		}
4019 		FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
4020 	}
4021 #endif
4022 
4023 	/* Implicit connect */
4024 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4025 		FDLOG0(LOG_INFO, fd_cb, "implicit connect");
4026 
4027 		error = flow_divert_connect_out_internal(so, to, p, true);
4028 		if (error) {
4029 			goto done;
4030 		}
4031 	} else {
4032 		error = flow_divert_check_no_cellular(fd_cb) ||
4033 		    flow_divert_check_no_expensive(fd_cb) ||
4034 		    flow_divert_check_no_constrained(fd_cb);
4035 		if (error) {
4036 			goto done;
4037 		}
4038 	}
4039 
4040 	if (data != NULL) {
4041 		size_t data_size = 0;
4042 		if (mbuf_flags(data) & M_PKTHDR) {
4043 			data_size = mbuf_pkthdr_len(data);
4044 		} else {
4045 			for (mbuf_t blob = data; blob != NULL; blob = mbuf_next(blob)) {
4046 				data_size += mbuf_len(blob);
4047 			}
4048 		}
4049 
4050 		FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", data_size);
4051 		fd_cb->bytes_written_by_app += data_size;
4052 
4053 		error = flow_divert_send_app_data(fd_cb, data, data_size, to);
4054 
4055 		data = NULL;
4056 
4057 		if (error) {
4058 			goto done;
4059 		}
4060 	}
4061 
4062 	if (flags & PRUS_EOF) {
4063 		flow_divert_shutdown(so);
4064 	}
4065 
4066 done:
4067 	if (data) {
4068 		mbuf_freem(data);
4069 	}
4070 	if (control) {
4071 		mbuf_free(control);
4072 	}
4073 #if CONTENT_FILTER
4074 	if (cfil_tag) {
4075 		m_tag_free(cfil_tag);
4076 	}
4077 #endif
4078 
4079 	return error;
4080 }
4081 
4082 static int
flow_divert_preconnect(struct socket * so)4083 flow_divert_preconnect(struct socket *so)
4084 {
4085 	int error = 0;
4086 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4087 
4088 	if (!SO_IS_DIVERTED(so)) {
4089 		return EINVAL;
4090 	}
4091 
4092 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4093 		FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
4094 		error = flow_divert_send_connect_packet(so->so_fd_pcb);
4095 		if (error) {
4096 			return error;
4097 		}
4098 
4099 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
4100 	}
4101 
4102 	soclearfastopen(so);
4103 
4104 	return error;
4105 }
4106 
4107 static void
flow_divert_set_protosw(struct socket * so)4108 flow_divert_set_protosw(struct socket *so)
4109 {
4110 	if (SOCK_DOM(so) == PF_INET) {
4111 		so->so_proto = &g_flow_divert_in_protosw;
4112 	} else {
4113 		so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
4114 	}
4115 }
4116 
4117 static void
flow_divert_set_udp_protosw(struct socket * so)4118 flow_divert_set_udp_protosw(struct socket *so)
4119 {
4120 	if (SOCK_DOM(so) == PF_INET) {
4121 		so->so_proto = &g_flow_divert_in_udp_protosw;
4122 	} else {
4123 		so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
4124 	}
4125 }
4126 
4127 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)4128 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
4129 {
4130 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4131 	struct inpcb *inp;
4132 	int error = 0;
4133 
4134 	inp = sotoinpcb(so);
4135 	if (inp == NULL) {
4136 		return EINVAL;
4137 	}
4138 
4139 	if (fd_cb == NULL) {
4140 		error = flow_divert_pcb_init(so);
4141 		fd_cb  = so->so_fd_pcb;
4142 		if (error != 0 || fd_cb == NULL) {
4143 			goto done;
4144 		}
4145 	}
4146 	return flow_divert_data_out(so, flags, data, to, control, p);
4147 
4148 done:
4149 	if (data) {
4150 		mbuf_freem(data);
4151 	}
4152 	if (control) {
4153 		mbuf_free(control);
4154 	}
4155 
4156 	return error;
4157 }
4158 
4159 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)4160 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
4161 {
4162 	errno_t error = 0;
4163 	struct flow_divert_pcb *fd_cb = NULL;
4164 	uint32_t agg_unit = aggregate_unit;
4165 	uint32_t policy_control_unit = ctl_unit;
4166 	bool is_aggregate = false;
4167 
4168 	if (so->so_flags & SOF_FLOW_DIVERT) {
4169 		return EALREADY;
4170 	}
4171 
4172 	fd_cb = flow_divert_pcb_create(so);
4173 	if (fd_cb == NULL) {
4174 		return ENOMEM;
4175 	}
4176 
4177 	do {
4178 		uint32_t group_unit = flow_divert_derive_kernel_control_unit(so->last_pid, &policy_control_unit, &agg_unit, &is_aggregate);
4179 		if (group_unit == 0 || (group_unit >= GROUP_COUNT_MAX && group_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
4180 			FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
4181 			error = EINVAL;
4182 			break;
4183 		}
4184 
4185 		error = flow_divert_add_to_group(fd_cb, group_unit);
4186 		if (error == 0) {
4187 			so->so_fd_pcb = fd_cb;
4188 			so->so_flags |= SOF_FLOW_DIVERT;
4189 			fd_cb->control_group_unit = group_unit;
4190 			fd_cb->policy_control_unit = ctl_unit;
4191 			fd_cb->aggregate_unit = agg_unit;
4192 			if (is_aggregate) {
4193 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4194 			} else {
4195 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4196 			}
4197 
4198 			if (SOCK_TYPE(so) == SOCK_STREAM) {
4199 				flow_divert_set_protosw(so);
4200 			} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
4201 				flow_divert_set_udp_protosw(so);
4202 			}
4203 
4204 			FDLOG0(LOG_INFO, fd_cb, "Created");
4205 		} else if (error != ENOENT) {
4206 			FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
4207 		}
4208 	} while (error == ENOENT);
4209 
4210 	if (error != 0) {
4211 		FDRELEASE(fd_cb);
4212 	}
4213 
4214 	return error;
4215 }
4216 
4217 errno_t
flow_divert_pcb_init(struct socket * so)4218 flow_divert_pcb_init(struct socket *so)
4219 {
4220 	struct inpcb *inp = sotoinpcb(so);
4221 	uint32_t aggregate_units = 0;
4222 	uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
4223 	return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
4224 }
4225 
4226 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)4227 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
4228 {
4229 	uint32_t ctl_unit = 0;
4230 	uint32_t key_unit = 0;
4231 	uint32_t aggregate_unit = 0;
4232 	int error = 0;
4233 	int hmac_error = 0;
4234 	mbuf_t token = NULL;
4235 
4236 	if (so->so_flags & SOF_FLOW_DIVERT) {
4237 		error = EALREADY;
4238 		goto done;
4239 	}
4240 
4241 	if (g_init_result) {
4242 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
4243 		error = ENOPROTOOPT;
4244 		goto done;
4245 	}
4246 
4247 	if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
4248 	    (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
4249 	    (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
4250 		error = EINVAL;
4251 		goto done;
4252 	} else {
4253 		if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
4254 			struct tcpcb *tp = sototcpcb(so);
4255 			if (tp == NULL || tp->t_state != TCPS_CLOSED) {
4256 				error = EINVAL;
4257 				goto done;
4258 			}
4259 		}
4260 	}
4261 
4262 	error = soopt_getm(sopt, &token);
4263 	if (error) {
4264 		token = NULL;
4265 		goto done;
4266 	}
4267 
4268 	error = soopt_mcopyin(sopt, token);
4269 	if (error) {
4270 		token = NULL;
4271 		goto done;
4272 	}
4273 
4274 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
4275 	if (!error) {
4276 		key_unit = ntohl(key_unit);
4277 		if (key_unit >= GROUP_COUNT_MAX) {
4278 			key_unit = 0;
4279 		}
4280 	} else if (error != ENOENT) {
4281 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
4282 		goto done;
4283 	} else {
4284 		key_unit = 0;
4285 	}
4286 
4287 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
4288 	if (error) {
4289 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
4290 		goto done;
4291 	}
4292 
4293 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4294 	if (error && error != ENOENT) {
4295 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4296 		goto done;
4297 	}
4298 
4299 	/* A valid kernel control unit is required */
4300 	ctl_unit = ntohl(ctl_unit);
4301 	aggregate_unit = ntohl(aggregate_unit);
4302 
4303 	if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4304 		hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4305 		if (hmac_error && hmac_error != ENOENT) {
4306 			FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4307 			error = hmac_error;
4308 			goto done;
4309 		}
4310 	}
4311 
4312 	error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4313 	if (error == 0) {
4314 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4315 		int log_level = LOG_NOTICE;
4316 
4317 		error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4318 		if (error == 0) {
4319 			fd_cb->log_level = (uint8_t)log_level;
4320 		}
4321 		error = 0;
4322 
4323 		fd_cb->connect_token = token;
4324 		token = NULL;
4325 
4326 		fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4327 	}
4328 
4329 	if (hmac_error == 0) {
4330 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4331 		if (fd_cb != NULL) {
4332 			fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4333 		}
4334 	}
4335 
4336 done:
4337 	if (token != NULL) {
4338 		mbuf_freem(token);
4339 	}
4340 
4341 	return error;
4342 }
4343 
4344 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4345 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4346 {
4347 	uint32_t                                        ctl_unit;
4348 	int                                                     error                                           = 0;
4349 	uint8_t                                         hmac[SHA_DIGEST_LENGTH];
4350 	struct flow_divert_pcb          *fd_cb                                          = so->so_fd_pcb;
4351 	mbuf_t                                          token                                           = NULL;
4352 	struct flow_divert_group        *control_group                          = NULL;
4353 
4354 	if (!SO_IS_DIVERTED(so)) {
4355 		error = EINVAL;
4356 		goto done;
4357 	}
4358 
4359 	if (fd_cb->group == NULL) {
4360 		error = EINVAL;
4361 		goto done;
4362 	}
4363 
4364 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4365 	if (error) {
4366 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4367 		goto done;
4368 	}
4369 
4370 	ctl_unit = htonl(fd_cb->group->ctl_unit);
4371 
4372 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4373 	if (error) {
4374 		goto done;
4375 	}
4376 
4377 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4378 	if (error) {
4379 		goto done;
4380 	}
4381 
4382 	if (fd_cb->app_data != NULL) {
4383 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4384 		if (error) {
4385 			goto done;
4386 		}
4387 	}
4388 
4389 	control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4390 	if (control_group != NULL) {
4391 		lck_rw_lock_shared(&control_group->lck);
4392 		ctl_unit = htonl(control_group->ctl_unit);
4393 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4394 		if (!error) {
4395 			error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4396 		}
4397 		lck_rw_done(&control_group->lck);
4398 		FDGRP_RELEASE(control_group);
4399 	} else {
4400 		error = ENOPROTOOPT;
4401 	}
4402 
4403 	if (error) {
4404 		goto done;
4405 	}
4406 
4407 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4408 	if (error) {
4409 		goto done;
4410 	}
4411 
4412 	if (sopt->sopt_val == USER_ADDR_NULL) {
4413 		/* If the caller passed NULL to getsockopt, just set the size of the token and return */
4414 		sopt->sopt_valsize = mbuf_pkthdr_len(token);
4415 		goto done;
4416 	}
4417 
4418 	error = soopt_mcopyout(sopt, token);
4419 	if (error) {
4420 		token = NULL;   /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4421 		goto done;
4422 	}
4423 
4424 done:
4425 	if (token != NULL) {
4426 		mbuf_freem(token);
4427 	}
4428 
4429 	return error;
4430 }
4431 
4432 void
flow_divert_group_destroy(struct flow_divert_group * group)4433 flow_divert_group_destroy(struct flow_divert_group *group)
4434 {
4435 	lck_rw_lock_exclusive(&group->lck);
4436 
4437 	FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4438 
4439 	if (group->token_key != NULL) {
4440 		memset(group->token_key, 0, group->token_key_size);
4441 		kfree_data(group->token_key, group->token_key_size);
4442 		group->token_key = NULL;
4443 		group->token_key_size = 0;
4444 	}
4445 
4446 	/* Re-set the current trie */
4447 	if (group->signing_id_trie.memory != NULL) {
4448 		kfree_data_addr(group->signing_id_trie.memory);
4449 	}
4450 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4451 	group->signing_id_trie.root = NULL_TRIE_IDX;
4452 
4453 	lck_rw_done(&group->lck);
4454 
4455 	zfree(flow_divert_group_zone, group);
4456 }
4457 
4458 static struct flow_divert_group *
flow_divert_allocate_group(u_int32_t unit,pid_t pid)4459 flow_divert_allocate_group(u_int32_t unit, pid_t pid)
4460 {
4461 	struct flow_divert_group *new_group = NULL;
4462 	new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4463 	lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4464 	RB_INIT(&new_group->pcb_tree);
4465 	new_group->ctl_unit = unit;
4466 	new_group->in_process_pid = pid;
4467 	MBUFQ_INIT(&new_group->send_queue);
4468 	new_group->signing_id_trie.root = NULL_TRIE_IDX;
4469 	new_group->ref_count = 1;
4470 	new_group->order = FLOW_DIVERT_ORDER_LAST;
4471 	return new_group;
4472 }
4473 
4474 static errno_t
flow_divert_kctl_setup(u_int32_t * unit,void ** unitinfo)4475 flow_divert_kctl_setup(u_int32_t *unit, void **unitinfo)
4476 {
4477 	if (unit == NULL || unitinfo == NULL) {
4478 		return EINVAL;
4479 	}
4480 
4481 	struct flow_divert_group *new_group = NULL;
4482 	errno_t error = 0;
4483 	lck_rw_lock_shared(&g_flow_divert_group_lck);
4484 	if (*unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
4485 		// Return next unused in-process unit
4486 		u_int32_t unit_cursor = FLOW_DIVERT_IN_PROCESS_UNIT_MIN;
4487 		struct flow_divert_group *group_next = NULL;
4488 		TAILQ_FOREACH(group_next, &g_flow_divert_in_process_group_list, chain) {
4489 			if (group_next->ctl_unit > unit_cursor) {
4490 				// Found a gap, lets fill it in
4491 				break;
4492 			}
4493 			unit_cursor = group_next->ctl_unit + 1;
4494 			if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4495 				break;
4496 			}
4497 		}
4498 		if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4499 			error = EBUSY;
4500 		} else {
4501 			*unit = unit_cursor;
4502 			new_group = flow_divert_allocate_group(*unit, proc_pid(current_proc()));
4503 			if (group_next != NULL) {
4504 				TAILQ_INSERT_BEFORE(group_next, new_group, chain);
4505 			} else {
4506 				TAILQ_INSERT_TAIL(&g_flow_divert_in_process_group_list, new_group, chain);
4507 			}
4508 			g_active_group_count++;
4509 		}
4510 	} else {
4511 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
4512 			error = EPERM;
4513 		} else {
4514 			if (g_flow_divert_groups == NULL) {
4515 				g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4516 				    GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4517 			}
4518 
4519 			// Return next unused group unit
4520 			bool found_unused_unit = false;
4521 			u_int32_t unit_cursor;
4522 			for (unit_cursor = 1; unit_cursor < GROUP_COUNT_MAX; unit_cursor++) {
4523 				struct flow_divert_group *group = g_flow_divert_groups[unit_cursor];
4524 				if (group == NULL) {
4525 					// Open slot, assign this one
4526 					*unit = unit_cursor;
4527 					new_group = flow_divert_allocate_group(*unit, 0);
4528 					g_flow_divert_groups[*unit] = new_group;
4529 					found_unused_unit = true;
4530 					g_active_group_count++;
4531 					break;
4532 				}
4533 			}
4534 			if (!found_unused_unit) {
4535 				error = EBUSY;
4536 			}
4537 		}
4538 	}
4539 	lck_rw_done(&g_flow_divert_group_lck);
4540 
4541 	*unitinfo = new_group;
4542 
4543 	return error;
4544 }
4545 
4546 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4547 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4548 {
4549 	if (unitinfo == NULL) {
4550 		return EINVAL;
4551 	}
4552 
4553 	// Just validate. The group will already have been allocated.
4554 	struct flow_divert_group *group = (struct flow_divert_group *)*unitinfo;
4555 	if (group == NULL || sac->sc_unit != group->ctl_unit) {
4556 		FDLOG(LOG_ERR, &nil_pcb, "Flow divert connect fail, unit mismatch %u != %u",
4557 		    sac->sc_unit, group ? group->ctl_unit : 0);
4558 		return EINVAL;
4559 	}
4560 
4561 	return 0;
4562 }
4563 
4564 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4565 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4566 {
4567 	struct flow_divert_group        *group  = NULL;
4568 	errno_t                                         error   = 0;
4569 
4570 	if (unitinfo == NULL) {
4571 		return 0;
4572 	}
4573 
4574 	FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4575 
4576 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4577 
4578 	if (g_active_group_count == 0) {
4579 		panic("flow divert group %u is disconnecting, but no groups are active (active count = %u)",
4580 		    unit, g_active_group_count);
4581 	}
4582 
4583 	if (unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
4584 		if (unit >= GROUP_COUNT_MAX) {
4585 			return EINVAL;
4586 		}
4587 
4588 		if (g_flow_divert_groups == NULL) {
4589 			panic("flow divert group %u is disconnecting, but groups array is NULL",
4590 			    unit);
4591 		}
4592 		group = g_flow_divert_groups[unit];
4593 
4594 		if (group != (struct flow_divert_group *)unitinfo) {
4595 			panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4596 		}
4597 
4598 		g_flow_divert_groups[unit] = NULL;
4599 	} else {
4600 		group = (struct flow_divert_group *)unitinfo;
4601 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
4602 			panic("flow divert group %u is disconnecting, but in-process group list is empty",
4603 			    unit);
4604 		}
4605 
4606 		TAILQ_REMOVE(&g_flow_divert_in_process_group_list, group, chain);
4607 	}
4608 
4609 	g_active_group_count--;
4610 
4611 	if (g_active_group_count == 0) {
4612 		kfree_type(struct flow_divert_group *,
4613 		    GROUP_COUNT_MAX, g_flow_divert_groups);
4614 		g_flow_divert_groups = NULL;
4615 	}
4616 
4617 	lck_rw_done(&g_flow_divert_group_lck);
4618 
4619 	if (group != NULL) {
4620 		flow_divert_close_all(group);
4621 		FDGRP_RELEASE(group);
4622 	} else {
4623 		error = EINVAL;
4624 	}
4625 
4626 	return error;
4627 }
4628 
4629 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)4630 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_t m, __unused int flags)
4631 {
4632 	errno_t error = 0;
4633 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4634 	if (group != NULL) {
4635 		error = flow_divert_input(m, group);
4636 		FDGRP_RELEASE(group);
4637 	} else {
4638 		error = ENOENT;
4639 	}
4640 	return error;
4641 }
4642 
4643 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4644 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4645 {
4646 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4647 	if (group == NULL) {
4648 		return;
4649 	}
4650 
4651 	if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4652 		struct flow_divert_pcb                  *fd_cb;
4653 		SLIST_HEAD(, flow_divert_pcb)   tmp_list;
4654 
4655 		lck_rw_lock_exclusive(&group->lck);
4656 
4657 		while (!MBUFQ_EMPTY(&group->send_queue)) {
4658 			mbuf_t next_packet;
4659 			FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4660 			next_packet = MBUFQ_FIRST(&group->send_queue);
4661 			int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4662 			if (error) {
4663 				FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_kctl_rcvd: ctl_enqueuembuf returned an error: %d", error);
4664 				OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4665 				lck_rw_done(&group->lck);
4666 				return;
4667 			}
4668 			MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4669 		}
4670 
4671 		SLIST_INIT(&tmp_list);
4672 
4673 		RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4674 			FDRETAIN(fd_cb);
4675 			SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4676 		}
4677 
4678 		lck_rw_done(&group->lck);
4679 
4680 		SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4681 			FDLOCK(fd_cb);
4682 			if (fd_cb->so != NULL) {
4683 				socket_lock(fd_cb->so, 0);
4684 				if (fd_cb->group != NULL) {
4685 					flow_divert_send_buffered_data(fd_cb, FALSE);
4686 				}
4687 				socket_unlock(fd_cb->so, 0);
4688 			}
4689 			FDUNLOCK(fd_cb);
4690 			FDRELEASE(fd_cb);
4691 		}
4692 	}
4693 
4694 	FDGRP_RELEASE(group);
4695 }
4696 
4697 static int
flow_divert_kctl_init(void)4698 flow_divert_kctl_init(void)
4699 {
4700 	struct kern_ctl_reg     ctl_reg;
4701 	int                     result;
4702 
4703 	memset(&ctl_reg, 0, sizeof(ctl_reg));
4704 
4705 	strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4706 	ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4707 
4708 	// Do not restrict to privileged processes. flow_divert_kctl_setup checks
4709 	// permissions separately.
4710 	ctl_reg.ctl_flags = CTL_FLAG_REG_EXTENDED | CTL_FLAG_REG_SETUP;
4711 	ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4712 
4713 	ctl_reg.ctl_connect = flow_divert_kctl_connect;
4714 	ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4715 	ctl_reg.ctl_send = flow_divert_kctl_send;
4716 	ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4717 	ctl_reg.ctl_setup = flow_divert_kctl_setup;
4718 
4719 	result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4720 
4721 	if (result) {
4722 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4723 		return result;
4724 	}
4725 
4726 	return 0;
4727 }
4728 
4729 void
flow_divert_init(void)4730 flow_divert_init(void)
4731 {
4732 	memset(&nil_pcb, 0, sizeof(nil_pcb));
4733 	nil_pcb.log_level = LOG_NOTICE;
4734 
4735 	g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4736 
4737 	VERIFY(g_tcp_protosw != NULL);
4738 
4739 	memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4740 	memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4741 
4742 	g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4743 	g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4744 	g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4745 	g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4746 	g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4747 	g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4748 	g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4749 	g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4750 
4751 	g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4752 	g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4753 
4754 	/*
4755 	 * Socket filters shouldn't attach/detach to/from this protosw
4756 	 * since pr_protosw is to be used instead, which points to the
4757 	 * real protocol; if they do, it is a bug and we should panic.
4758 	 */
4759 	g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4760 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4761 	g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4762 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4763 
4764 	/* UDP */
4765 	g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4766 	VERIFY(g_udp_protosw != NULL);
4767 
4768 	memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4769 	memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4770 
4771 	g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4772 	g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4773 	g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4774 	g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4775 	g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4776 	g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4777 	g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4778 	g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4779 	g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4780 
4781 	g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4782 	g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4783 
4784 	/*
4785 	 * Socket filters shouldn't attach/detach to/from this protosw
4786 	 * since pr_protosw is to be used instead, which points to the
4787 	 * real protocol; if they do, it is a bug and we should panic.
4788 	 */
4789 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4790 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4791 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4792 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4793 
4794 	g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4795 
4796 	VERIFY(g_tcp6_protosw != NULL);
4797 
4798 	memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4799 	memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4800 
4801 	g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4802 	g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4803 	g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4804 	g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4805 	g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4806 	g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4807 	g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4808 	g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4809 
4810 	g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4811 	g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4812 	/*
4813 	 * Socket filters shouldn't attach/detach to/from this protosw
4814 	 * since pr_protosw is to be used instead, which points to the
4815 	 * real protocol; if they do, it is a bug and we should panic.
4816 	 */
4817 	g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4818 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4819 	g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4820 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4821 
4822 	/* UDP6 */
4823 	g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4824 
4825 	VERIFY(g_udp6_protosw != NULL);
4826 
4827 	memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4828 	memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4829 
4830 	g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4831 	g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4832 	g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4833 	g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4834 	g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4835 	g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4836 	g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4837 	g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4838 	g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4839 
4840 	g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4841 	g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4842 	/*
4843 	 * Socket filters shouldn't attach/detach to/from this protosw
4844 	 * since pr_protosw is to be used instead, which points to the
4845 	 * real protocol; if they do, it is a bug and we should panic.
4846 	 */
4847 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4848 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4849 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4850 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4851 
4852 	TAILQ_INIT(&g_flow_divert_in_process_group_list);
4853 
4854 	g_init_result = flow_divert_kctl_init();
4855 	if (g_init_result) {
4856 		goto done;
4857 	}
4858 
4859 done:
4860 	if (g_init_result != 0) {
4861 		if (g_flow_divert_kctl_ref != NULL) {
4862 			ctl_deregister(g_flow_divert_kctl_ref);
4863 			g_flow_divert_kctl_ref = NULL;
4864 		}
4865 	}
4866 }
4867