xref: /xnu-12377.81.4/bsd/netinet/flow_divert.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2012-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <sys/file_internal.h>
46 #include <sys/kauth.h>
47 #include <libkern/tree.h>
48 #include <kern/locks.h>
49 #include <kern/debug.h>
50 #include <kern/task.h>
51 #include <mach/task_info.h>
52 #include <net/if_var.h>
53 #include <net/route.h>
54 #include <net/flowhash.h>
55 #include <net/ntstat.h>
56 #include <net/content_filter.h>
57 #include <net/necp.h>
58 #include <netinet/in.h>
59 #include <netinet/in_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/flow_divert.h>
64 #include <netinet/flow_divert_proto.h>
65 #include <netinet6/in6_pcb.h>
66 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
68 #include <libkern/crypto/sha1.h>
69 #include <libkern/crypto/crypto_internal.h>
70 #include <os/log.h>
71 #include <corecrypto/cc.h>
72 #include <net/sockaddr_utils.h>
73 #if CONTENT_FILTER
74 #include <net/content_filter.h>
75 #endif /* CONTENT_FILTER */
76 
77 #define FLOW_DIVERT_CONNECT_STARTED             0x00000001
78 #define FLOW_DIVERT_READ_CLOSED                 0x00000002
79 #define FLOW_DIVERT_WRITE_CLOSED                0x00000004
80 #define FLOW_DIVERT_TUNNEL_RD_CLOSED            0x00000008
81 #define FLOW_DIVERT_TUNNEL_WR_CLOSED            0x00000010
82 #define FLOW_DIVERT_HAS_HMAC                    0x00000040
83 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED          0x00000080
84 #define FLOW_DIVERT_IMPLICIT_CONNECT            0x00000100
85 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR          0x00000200
86 #define FLOW_DIVERT_HAS_TOKEN                   0x00000400
87 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR       0x00000800
88 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT         0x00001000
89 
90 #define FDLOG(level, pcb, format, ...) \
91 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
92 
93 #define FDLOG0(level, pcb, msg) \
94 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
95 
96 #define FDRETAIN(pcb)                   if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
97 #define FDRELEASE(pcb)                                                                                                          \
98 	do {                                                                                                                                    \
99 	        if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) {       \
100 	                flow_divert_pcb_destroy(pcb);                                                                   \
101 	        }                                                                                                                                       \
102 	} while (0)
103 
104 #define FDGRP_RETAIN(grp)  if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
105 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
106 
107 #define FDLOCK(pcb)                             lck_mtx_lock(&(pcb)->mtx)
108 #define FDUNLOCK(pcb)                           lck_mtx_unlock(&(pcb)->mtx)
109 
110 #define FD_CTL_SENDBUFF_SIZE                    (128 * 1024)
111 
112 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED           0
113 
114 #define GROUP_COUNT_MAX                         31
115 #define FLOW_DIVERT_MAX_NAME_SIZE               4096
116 #define FLOW_DIVERT_MAX_KEY_SIZE                1024
117 #define FLOW_DIVERT_MAX_TRIE_MEMORY             (1024 * 1024)
118 
119 #define CHILD_MAP_SIZE                  256
120 #define NULL_TRIE_IDX                   0xffff
121 #define TRIE_NODE(t, i)                 ((t)->nodes[(i)])
122 #define TRIE_CHILD(t, i, b)             (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
123 #define TRIE_BYTE(t, i)                 ((t)->bytes[(i)])
124 
125 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
126 
127 static struct flow_divert_pcb           nil_pcb;
128 
129 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
130 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
131 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
132     &flow_divert_mtx_attr);
133 
134 static TAILQ_HEAD(_flow_divert_group_list, flow_divert_group) g_flow_divert_in_process_group_list;
135 
136 static struct flow_divert_group         **g_flow_divert_groups __indexable = NULL;
137 static uint32_t                         g_active_group_count    = 0;
138 
139 static  errno_t                         g_init_result           = 0;
140 
141 static  kern_ctl_ref                    g_flow_divert_kctl_ref  = NULL;
142 
143 static struct protosw                   g_flow_divert_in_protosw;
144 static struct pr_usrreqs                g_flow_divert_in_usrreqs;
145 static struct protosw                   g_flow_divert_in_udp_protosw;
146 static struct pr_usrreqs                g_flow_divert_in_udp_usrreqs;
147 static struct ip6protosw                g_flow_divert_in6_protosw;
148 static struct pr_usrreqs                g_flow_divert_in6_usrreqs;
149 static struct ip6protosw                g_flow_divert_in6_udp_protosw;
150 static struct pr_usrreqs                g_flow_divert_in6_udp_usrreqs;
151 
152 static struct protosw                   *g_tcp_protosw          = NULL;
153 static struct ip6protosw                *g_tcp6_protosw         = NULL;
154 static struct protosw                   *g_udp_protosw          = NULL;
155 static struct ip6protosw                *g_udp6_protosw         = NULL;
156 
157 static KALLOC_TYPE_DEFINE(flow_divert_group_zone, struct flow_divert_group,
158     NET_KT_DEFAULT);
159 static KALLOC_TYPE_DEFINE(flow_divert_pcb_zone, struct flow_divert_pcb,
160     NET_KT_DEFAULT);
161 
162 static errno_t
163 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
164 
165 static boolean_t
166 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
167 
168 static int
169 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr);
170 
171 struct sockaddr *
172 flow_divert_get_buffered_target_address(mbuf_ref_t buffer);
173 
174 static void
175 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed);
176 
177 static void flow_divert_group_destroy(struct flow_divert_group *group);
178 
179 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)180 flow_divert_syslog_type_to_oslog_type(int syslog_type)
181 {
182 	switch (syslog_type) {
183 	case LOG_ERR: return OS_LOG_TYPE_ERROR;
184 	case LOG_INFO: return OS_LOG_TYPE_INFO;
185 	case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
186 	default: return OS_LOG_TYPE_DEFAULT;
187 	}
188 }
189 
190 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)191 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
192 {
193 	return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
194 }
195 
196 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
197 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
198 
199 static const char *
flow_divert_packet_type2str(uint8_t packet_type)200 flow_divert_packet_type2str(uint8_t packet_type)
201 {
202 	switch (packet_type) {
203 	case FLOW_DIVERT_PKT_CONNECT:
204 		return "connect";
205 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
206 		return "connect result";
207 	case FLOW_DIVERT_PKT_DATA:
208 		return "data";
209 	case FLOW_DIVERT_PKT_CLOSE:
210 		return "close";
211 	case FLOW_DIVERT_PKT_READ_NOTIFY:
212 		return "read notification";
213 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
214 		return "properties update";
215 	case FLOW_DIVERT_PKT_APP_MAP_CREATE:
216 		return "app map create";
217 	default:
218 		return "unknown";
219 	}
220 }
221 
222 static inline void
flow_divert_lock_socket(struct socket * so,struct flow_divert_pcb * fd_cb)223 flow_divert_lock_socket(struct socket *so, struct flow_divert_pcb *fd_cb)
224 {
225 	socket_lock(so, 0);
226 	fd_cb->plugin_locked = true;
227 }
228 
229 static inline void
flow_divert_unlock_socket(struct socket * so,struct flow_divert_pcb * fd_cb)230 flow_divert_unlock_socket(struct socket *so, struct flow_divert_pcb *fd_cb)
231 {
232 	fd_cb->plugin_locked = false;
233 	socket_unlock(so, 0);
234 }
235 
236 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)237 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
238 {
239 	struct flow_divert_pcb  key_item;
240 	struct flow_divert_pcb  *fd_cb          = NULL;
241 
242 	key_item.hash = hash;
243 
244 	lck_rw_lock_shared(&group->lck);
245 	fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
246 	FDRETAIN(fd_cb);
247 	lck_rw_done(&group->lck);
248 
249 	return fd_cb;
250 }
251 
252 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)253 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
254 {
255 	struct flow_divert_group *group = NULL;
256 	lck_rw_lock_shared(&g_flow_divert_group_lck);
257 	if (g_active_group_count == 0) {
258 		if (fd_cb != NULL) {
259 			FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
260 		}
261 	} else if (ctl_unit == 0 || (ctl_unit >= GROUP_COUNT_MAX && ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
262 		FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
263 	} else if (ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
264 		if (g_flow_divert_groups == NULL) {
265 			if (fd_cb != NULL) {
266 				FDLOG0(LOG_ERR, fd_cb, "No active non-in-process groups, flow divert cannot be used for this socket");
267 			}
268 		} else {
269 			group = g_flow_divert_groups[ctl_unit];
270 			if (group == NULL) {
271 				if (fd_cb != NULL) {
272 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
273 				}
274 			} else {
275 				FDGRP_RETAIN(group);
276 			}
277 		}
278 	} else {
279 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
280 			if (fd_cb != NULL) {
281 				FDLOG0(LOG_ERR, fd_cb, "No active in-process groups, flow divert cannot be used for this socket");
282 			}
283 		} else {
284 			struct flow_divert_group *group_cursor = NULL;
285 			TAILQ_FOREACH(group_cursor, &g_flow_divert_in_process_group_list, chain) {
286 				if (group_cursor->ctl_unit == ctl_unit) {
287 					group = group_cursor;
288 					break;
289 				}
290 			}
291 			if (group == NULL) {
292 				if (fd_cb != NULL) {
293 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u not found, flow divert cannot be used for this socket", ctl_unit);
294 				}
295 			} else if (fd_cb != NULL &&
296 			    (fd_cb->so == NULL ||
297 			    group_cursor->in_process_pid != fd_cb->so->last_pid)) {
298 				FDLOG(LOG_ERR, fd_cb, "Cannot access group for control unit %u, mismatched PID (%u != %u)",
299 				    ctl_unit, group_cursor->in_process_pid, fd_cb->so ? fd_cb->so->last_pid : 0);
300 				group = NULL;
301 			} else {
302 				FDGRP_RETAIN(group);
303 			}
304 		}
305 	}
306 	lck_rw_done(&g_flow_divert_group_lck);
307 	return group;
308 }
309 
310 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)311 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
312 {
313 	int error = 0;
314 	lck_rw_lock_exclusive(&group->lck);
315 	if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
316 		if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
317 			fd_cb->group = group;
318 			fd_cb->control_group_unit = group->ctl_unit;
319 			FDRETAIN(fd_cb); /* The group now has a reference */
320 		} else {
321 			FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
322 			error = EEXIST;
323 		}
324 	} else {
325 		FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
326 		error = ENOENT;
327 	}
328 	lck_rw_done(&group->lck);
329 	return error;
330 }
331 
332 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)333 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
334 {
335 	errno_t error = 0;
336 	struct flow_divert_group *group = NULL;
337 	static uint32_t g_nextkey = 1;
338 	static uint32_t g_hash_seed = 0;
339 	int try_count = 0;
340 
341 	group = flow_divert_group_lookup(ctl_unit, fd_cb);
342 	if (group == NULL) {
343 		return ENOENT;
344 	}
345 
346 	do {
347 		uint32_t key[2];
348 		uint32_t idx;
349 
350 		key[0] = g_nextkey++;
351 		key[1] = RandomULong();
352 
353 		if (g_hash_seed == 0) {
354 			g_hash_seed = RandomULong();
355 		}
356 
357 		error = 0;
358 		fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
359 
360 		for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
361 			if (idx == ctl_unit) {
362 				continue;
363 			}
364 			struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
365 			if (curr_group != NULL) {
366 				lck_rw_lock_shared(&curr_group->lck);
367 				if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
368 					error = EEXIST;
369 				}
370 				lck_rw_done(&curr_group->lck);
371 				FDGRP_RELEASE(curr_group);
372 			}
373 		}
374 
375 		if (error == 0) {
376 			error = flow_divert_pcb_insert(fd_cb, group);
377 		}
378 	} while (error == EEXIST && try_count++ < 3);
379 
380 	if (error == EEXIST) {
381 		FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
382 		fd_cb->hash = 0;
383 	}
384 
385 	FDGRP_RELEASE(group);
386 	return error;
387 }
388 
389 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)390 flow_divert_pcb_create(socket_t so)
391 {
392 	struct flow_divert_pcb  *new_pcb = NULL;
393 
394 	new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
395 	lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
396 	new_pcb->so = so;
397 	new_pcb->log_level = nil_pcb.log_level;
398 
399 	FDRETAIN(new_pcb);      /* Represents the socket's reference */
400 
401 	return new_pcb;
402 }
403 
404 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)405 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
406 {
407 	FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %llu, tunnel tx %llu, tunnel rx %llu",
408 	    fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
409 
410 	if (fd_cb->connect_token != NULL) {
411 		mbuf_freem(fd_cb->connect_token);
412 	}
413 	if (fd_cb->connect_packet != NULL) {
414 		mbuf_freem(fd_cb->connect_packet);
415 	}
416 	if (fd_cb->app_data != NULL) {
417 		kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
418 	}
419 	if (fd_cb->original_remote_endpoint != NULL) {
420 		free_sockaddr(fd_cb->original_remote_endpoint);
421 	}
422 	zfree(flow_divert_pcb_zone, fd_cb);
423 }
424 
425 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)426 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
427 {
428 	if (fd_cb->group != NULL) {
429 		struct flow_divert_group *group = fd_cb->group;
430 		lck_rw_lock_exclusive(&group->lck);
431 		FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
432 		RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
433 		fd_cb->group = NULL;
434 		FDRELEASE(fd_cb);                               /* Release the group's reference */
435 		lck_rw_done(&group->lck);
436 	}
437 }
438 
439 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_ref_t * packet)440 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_ref_t *packet)
441 {
442 	struct flow_divert_packet_header        hdr;
443 	int                                     error           = 0;
444 
445 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
446 	if (error) {
447 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
448 		return error;
449 	}
450 
451 	hdr.packet_type = packet_type;
452 	hdr.conn_id = htonl(fd_cb->hash);
453 
454 	/* Lay down the header */
455 	error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
456 	if (error) {
457 		FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
458 		mbuf_freem(*packet);
459 		*packet = NULL;
460 		return error;
461 	}
462 
463 	return 0;
464 }
465 
466 static int
flow_divert_packet_append_tlv(mbuf_ref_t packet,uint8_t type,uint32_t length,const void __sized_by (length)* value)467 flow_divert_packet_append_tlv(mbuf_ref_t packet, uint8_t type, uint32_t length, const void __sized_by(length) *value)
468 {
469 	uint32_t        net_length      = htonl(length);
470 	int                     error           = 0;
471 
472 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
473 	if (error) {
474 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
475 		return error;
476 	}
477 
478 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
479 	if (error) {
480 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
481 		return error;
482 	}
483 
484 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
485 	if (error) {
486 		FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
487 		return error;
488 	}
489 
490 	return error;
491 }
492 
493 static int
flow_divert_packet_find_tlv(mbuf_ref_t packet,int offset,uint8_t type,int * err,int next)494 flow_divert_packet_find_tlv(mbuf_ref_t packet, int offset, uint8_t type, int *err, int next)
495 {
496 	size_t      cursor      = offset;
497 	int         error       = 0;
498 	uint32_t    curr_length = 0;
499 	uint8_t     curr_type   = 0;
500 
501 	*err = 0;
502 
503 	do {
504 		if (!next) {
505 			error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
506 			if (error) {
507 				*err = ENOENT;
508 				return -1;
509 			}
510 		} else {
511 			next = 0;
512 			curr_type = FLOW_DIVERT_TLV_NIL;
513 		}
514 
515 		if (curr_type != type) {
516 			cursor += sizeof(curr_type);
517 			error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
518 			if (error) {
519 				*err = error;
520 				return -1;
521 			}
522 
523 			cursor += (sizeof(curr_length) + ntohl(curr_length));
524 		}
525 	} while (curr_type != type);
526 
527 	return (int)cursor;
528 }
529 
530 static int
flow_divert_packet_get_tlv(mbuf_ref_t packet,int offset,uint8_t type,size_t buff_len,void * buff __sized_by (buff_len),uint32_t * val_size)531 flow_divert_packet_get_tlv(mbuf_ref_t packet, int offset, uint8_t type, size_t buff_len, void *buff __sized_by(buff_len), uint32_t *val_size)
532 {
533 	int         error      = 0;
534 	uint32_t    length     = 0;
535 	int         tlv_offset = 0;
536 
537 	tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
538 	if (tlv_offset < 0) {
539 		return error;
540 	}
541 
542 	error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
543 	if (error) {
544 		return error;
545 	}
546 
547 	length = ntohl(length);
548 
549 	uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
550 
551 	if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
552 		FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
553 		return EINVAL;
554 	}
555 
556 	if (val_size != NULL) {
557 		*val_size = length;
558 	}
559 
560 	if (buff != NULL && buff_len > 0) {
561 		memset(buff, 0, buff_len);
562 		size_t to_copy = (length < buff_len) ? length : buff_len;
563 		error = mbuf_copydata(packet, data_offset, to_copy, buff);
564 		if (error) {
565 			return error;
566 		}
567 	}
568 
569 	return 0;
570 }
571 
572 static int
flow_divert_packet_compute_hmac(mbuf_ref_t packet,struct flow_divert_group * group,uint8_t * hmac)573 flow_divert_packet_compute_hmac(mbuf_ref_t packet, struct flow_divert_group *group, uint8_t *hmac)
574 {
575 	mbuf_ref_t  curr_mbuf       = packet;
576 
577 	if (g_crypto_funcs == NULL || group->token_key == NULL) {
578 		return ENOPROTOOPT;
579 	}
580 
581 	cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
582 	g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
583 
584 	while (curr_mbuf != NULL) {
585 		g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mtod(curr_mbuf, void *));
586 		curr_mbuf = mbuf_next(curr_mbuf);
587 	}
588 
589 	g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
590 
591 	return 0;
592 }
593 
594 static int
flow_divert_packet_verify_hmac(mbuf_ref_t packet,uint32_t ctl_unit)595 flow_divert_packet_verify_hmac(mbuf_ref_t packet, uint32_t ctl_unit)
596 {
597 	int error = 0;
598 	struct flow_divert_group *group = NULL;
599 	int hmac_offset;
600 	uint8_t packet_hmac[SHA_DIGEST_LENGTH];
601 	uint8_t computed_hmac[SHA_DIGEST_LENGTH];
602 	mbuf_ref_t tail;
603 
604 	group = flow_divert_group_lookup(ctl_unit, NULL);
605 	if (group == NULL) {
606 		FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
607 		return ENOPROTOOPT;
608 	}
609 
610 	lck_rw_lock_shared(&group->lck);
611 
612 	if (group->token_key == NULL) {
613 		error = ENOPROTOOPT;
614 		goto done;
615 	}
616 
617 	hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
618 	if (hmac_offset < 0) {
619 		goto done;
620 	}
621 
622 	error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
623 	if (error) {
624 		goto done;
625 	}
626 
627 	/* Chop off the HMAC TLV */
628 	error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
629 	if (error) {
630 		goto done;
631 	}
632 
633 	mbuf_free(tail);
634 
635 	error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
636 	if (error) {
637 		goto done;
638 	}
639 
640 	if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
641 		FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
642 		error = EINVAL;
643 		goto done;
644 	}
645 
646 done:
647 	if (group != NULL) {
648 		lck_rw_done(&group->lck);
649 		FDGRP_RELEASE(group);
650 	}
651 	return error;
652 }
653 
654 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)655 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
656 {
657 	struct inpcb *inp = NULL;
658 	struct ifnet *ifp = NULL;
659 	stats_functional_type ifnet_count_type = stats_functional_type_unclassified;
660 
661 	inp = sotoinpcb(fd_cb->so);
662 	if (inp == NULL) {
663 		return;
664 	}
665 
666 	if (inp->inp_vflag & INP_IPV4) {
667 		ifp = inp->inp_last_outifp;
668 	} else if (inp->inp_vflag & INP_IPV6) {
669 		ifp = inp->in6p_last_outifp;
670 	}
671 	if (ifp != NULL) {
672 		ifnet_count_type = IFNET_COUNT_TYPE(ifp);
673 	}
674 
675 	if (send) {
676 		INP_ADD_TXSTAT(inp, ifnet_count_type, 1, data_len);
677 	} else {
678 		INP_ADD_RXSTAT(inp, ifnet_count_type, 1, data_len);
679 	}
680 }
681 
682 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)683 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
684 {
685 	struct inpcb *inp = sotoinpcb(fd_cb->so);
686 	if (INP_NO_CELLULAR(inp)) {
687 		struct ifnet *ifp = NULL;
688 		if (inp->inp_vflag & INP_IPV4) {
689 			ifp = inp->inp_last_outifp;
690 		} else if (inp->inp_vflag & INP_IPV6) {
691 			ifp = inp->in6p_last_outifp;
692 		}
693 		if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
694 			FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
695 			return EHOSTUNREACH;
696 		}
697 	}
698 	return 0;
699 }
700 
701 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)702 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
703 {
704 	struct inpcb *inp = sotoinpcb(fd_cb->so);
705 	if (INP_NO_EXPENSIVE(inp)) {
706 		struct ifnet *ifp = NULL;
707 		if (inp->inp_vflag & INP_IPV4) {
708 			ifp = inp->inp_last_outifp;
709 		} else if (inp->inp_vflag & INP_IPV6) {
710 			ifp = inp->in6p_last_outifp;
711 		}
712 		if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
713 			FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
714 			return EHOSTUNREACH;
715 		}
716 	}
717 	return 0;
718 }
719 
720 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)721 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
722 {
723 	struct inpcb *inp = sotoinpcb(fd_cb->so);
724 	if (INP_NO_CONSTRAINED(inp)) {
725 		struct ifnet *ifp = NULL;
726 		if (inp->inp_vflag & INP_IPV4) {
727 			ifp = inp->inp_last_outifp;
728 		} else if (inp->inp_vflag & INP_IPV6) {
729 			ifp = inp->in6p_last_outifp;
730 		}
731 		if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
732 			FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
733 			return EHOSTUNREACH;
734 		}
735 	}
736 	return 0;
737 }
738 
739 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,bool tunnel,bool flush_snd)740 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, bool tunnel, bool flush_snd)
741 {
742 	if (how != SHUT_RD) {
743 		fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
744 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
745 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
746 			if (flush_snd) {
747 				/* If the tunnel is not accepting writes any more, then flush the send buffer */
748 				sbflush(&fd_cb->so->so_snd);
749 			}
750 		}
751 	}
752 	if (how != SHUT_WR) {
753 		fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
754 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
755 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
756 		}
757 	}
758 }
759 
760 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)761 trie_node_alloc(struct flow_divert_trie *trie)
762 {
763 	if (trie->nodes_free_next < trie->nodes_count) {
764 		uint16_t node_idx = trie->nodes_free_next++;
765 		TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
766 		return node_idx;
767 	} else {
768 		return NULL_TRIE_IDX;
769 	}
770 }
771 
772 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)773 trie_child_map_alloc(struct flow_divert_trie *trie)
774 {
775 	if (trie->child_maps_free_next < trie->child_maps_count) {
776 		return trie->child_maps_free_next++;
777 	} else {
778 		return NULL_TRIE_IDX;
779 	}
780 }
781 
782 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)783 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
784 {
785 	uint16_t start = trie->bytes_free_next;
786 	if (start + bytes_size <= trie->bytes_count) {
787 		if (start != bytes_idx) {
788 			memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
789 		}
790 		trie->bytes_free_next += bytes_size;
791 		return start;
792 	} else {
793 		return NULL_TRIE_IDX;
794 	}
795 }
796 
797 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)798 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
799 {
800 	uint16_t current = trie->root;
801 	uint16_t child = trie->root;
802 	uint16_t string_end = string_start + (uint16_t)string_len;
803 	uint16_t string_idx = string_start;
804 	uint16_t string_remainder = (uint16_t)string_len;
805 
806 	while (child != NULL_TRIE_IDX) {
807 		uint16_t parent = current;
808 		uint16_t node_idx;
809 		uint16_t current_end;
810 
811 		current = child;
812 		child = NULL_TRIE_IDX;
813 
814 		current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
815 
816 		for (node_idx = TRIE_NODE(trie, current).start;
817 		    node_idx < current_end &&
818 		    string_idx < string_end &&
819 		    TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
820 		    node_idx++, string_idx++) {
821 			;
822 		}
823 
824 		string_remainder = string_end - string_idx;
825 
826 		if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
827 			/*
828 			 * We did not reach the end of the current node's string.
829 			 * We need to split the current node into two:
830 			 *   1. A new node that contains the prefix of the node that matches
831 			 *      the prefix of the string being inserted.
832 			 *   2. The current node modified to point to the remainder
833 			 *      of the current node's string.
834 			 */
835 			uint16_t prefix = trie_node_alloc(trie);
836 			if (prefix == NULL_TRIE_IDX) {
837 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
838 				return NULL_TRIE_IDX;
839 			}
840 
841 			/*
842 			 * Prefix points to the portion of the current nodes's string that has matched
843 			 * the input string thus far.
844 			 */
845 			TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
846 			TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
847 
848 			/*
849 			 * Prefix has the current node as the child corresponding to the first byte
850 			 * after the split.
851 			 */
852 			TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
853 			if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
854 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
855 				return NULL_TRIE_IDX;
856 			}
857 			TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
858 
859 			/* Parent has the prefix as the child correspoding to the first byte in the prefix */
860 			TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
861 
862 			/* Current node is adjusted to point to the remainder */
863 			TRIE_NODE(trie, current).start = node_idx;
864 			TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
865 
866 			/* We want to insert the new leaf (if any) as a child of the prefix */
867 			current = prefix;
868 		}
869 
870 		if (string_remainder > 0) {
871 			/*
872 			 * We still have bytes in the string that have not been matched yet.
873 			 * If the current node has children, iterate to the child corresponding
874 			 * to the next byte in the string.
875 			 */
876 			if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
877 				child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
878 			}
879 		}
880 	} /* while (child != NULL_TRIE_IDX) */
881 
882 	if (string_remainder > 0) {
883 		/* Add a new leaf containing the remainder of the string */
884 		uint16_t leaf = trie_node_alloc(trie);
885 		if (leaf == NULL_TRIE_IDX) {
886 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
887 			return NULL_TRIE_IDX;
888 		}
889 
890 		TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
891 		if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
892 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
893 			return NULL_TRIE_IDX;
894 		}
895 		TRIE_NODE(trie, leaf).length = string_remainder;
896 
897 		/* Set the new leaf as the child of the current node */
898 		if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
899 			TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
900 			if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
901 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
902 				return NULL_TRIE_IDX;
903 			}
904 		}
905 		TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
906 		current = leaf;
907 	} /* else duplicate or this string is a prefix of one of the existing strings */
908 
909 	return current;
910 }
911 
912 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
913 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes __sized_by (string_bytes_count),__unused size_t string_bytes_count)914 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes __sized_by(string_bytes_count), __unused size_t string_bytes_count)
915 {
916 	uint16_t current = trie->root;
917 	uint16_t string_idx = 0;
918 
919 	while (current != NULL_TRIE_IDX) {
920 		uint16_t next = NULL_TRIE_IDX;
921 		uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
922 		uint16_t node_idx;
923 
924 		for (node_idx = TRIE_NODE(trie, current).start;
925 		    node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
926 		    node_idx++, string_idx++) {
927 			;
928 		}
929 
930 		if (node_idx == node_end) {
931 			if (string_bytes[string_idx] == '\0') {
932 				return current; /* Got an exact match */
933 			} else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
934 			    0 == strlcmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
935 				return current; /* Got an apple webclip id prefix match */
936 			} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
937 				next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
938 			}
939 		}
940 		current = next;
941 	}
942 
943 	return NULL_TRIE_IDX;
944 }
945 
946 struct uuid_search_info {
947 	uuid_t      target_uuid;
948 	char        *found_signing_id __sized_by(found_signing_id_size);
949 	boolean_t   found_multiple_signing_ids;
950 	proc_t      found_proc;
951 	size_t      found_signing_id_size;
952 };
953 
954 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)955 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
956 {
957 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
958 	int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
959 
960 	if (info->found_signing_id != NULL) {
961 		if (!info->found_multiple_signing_ids) {
962 			/* All processes that were found had the same signing identifier, so just claim this first one and be done. */
963 			info->found_proc = p;
964 			result = PROC_CLAIMED_DONE;
965 		} else {
966 			uuid_string_t uuid_str;
967 			uuid_unparse(info->target_uuid, uuid_str);
968 			FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
969 		}
970 		kfree_data_sized_by(info->found_signing_id, info->found_signing_id_size);
971 	}
972 
973 	if (result == PROC_RETURNED_DONE) {
974 		uuid_string_t uuid_str;
975 		uuid_unparse(info->target_uuid, uuid_str);
976 		FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
977 	}
978 
979 	return result;
980 }
981 
982 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)983 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
984 {
985 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
986 	int include = 0;
987 
988 	if (info->found_multiple_signing_ids) {
989 		return include;
990 	}
991 
992 	const unsigned char * p_uuid = proc_executableuuid_addr(p);
993 	include = (uuid_compare(p_uuid, info->target_uuid) == 0);
994 	if (include) {
995 		const char *signing_id __null_terminated = cs_identity_get(p);
996 		if (signing_id != NULL) {
997 			FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
998 			size_t signing_id_size = strlen(signing_id) + 1;
999 			if (info->found_signing_id == NULL) {
1000 				info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
1001 				info->found_signing_id_size = signing_id_size;
1002 				strlcpy(info->found_signing_id, signing_id, signing_id_size);
1003 			} else if (strlcmp(info->found_signing_id, signing_id, info->found_signing_id_size)) {
1004 				info->found_multiple_signing_ids = TRUE;
1005 			}
1006 		} else {
1007 			info->found_multiple_signing_ids = TRUE;
1008 		}
1009 		include = !info->found_multiple_signing_ids;
1010 	}
1011 
1012 	return include;
1013 }
1014 
1015 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)1016 flow_divert_find_proc_by_uuid(uuid_t uuid)
1017 {
1018 	struct uuid_search_info info;
1019 
1020 	if (LOG_INFO <= nil_pcb.log_level) {
1021 		uuid_string_t uuid_str;
1022 		uuid_unparse(uuid, uuid_str);
1023 		FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
1024 	}
1025 
1026 	memset(&info, 0, sizeof(info));
1027 	info.found_proc = PROC_NULL;
1028 	uuid_copy(info.target_uuid, uuid);
1029 
1030 	proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
1031 
1032 	return info.found_proc;
1033 }
1034 
1035 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet,bool is_effective)1036 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet, bool is_effective)
1037 {
1038 	int error = 0;
1039 	uint8_t *cdhash = NULL;
1040 	audit_token_t audit_token = {};
1041 	const char *proc_cs_id __null_terminated = signing_id;
1042 
1043 	proc_lock(proc);
1044 
1045 	if (proc_cs_id == NULL) {
1046 		if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1047 			proc_cs_id = cs_identity_get(proc);
1048 		} else {
1049 			FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1050 		}
1051 	}
1052 
1053 	if (is_effective) {
1054 		lck_rw_lock_shared(&fd_cb->group->lck);
1055 		if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1056 			if (proc_cs_id != NULL) {
1057 				size_t proc_cs_id_size = strlen(proc_cs_id) + 1;
1058 				uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)__unsafe_null_terminated_to_indexable(proc_cs_id), proc_cs_id_size);
1059 				if (result == NULL_TRIE_IDX) {
1060 					FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1061 					error = EPERM;
1062 				} else {
1063 					FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1064 				}
1065 			} else {
1066 				error = EPERM;
1067 			}
1068 		}
1069 		lck_rw_done(&fd_cb->group->lck);
1070 	}
1071 
1072 	if (error != 0) {
1073 		goto done;
1074 	}
1075 
1076 	/*
1077 	 * If signing_id is not NULL then it came from the flow divert token and will be added
1078 	 * as part of the token, so there is no need to add it here.
1079 	 */
1080 	if (signing_id == NULL && proc_cs_id != NULL) {
1081 		error = flow_divert_packet_append_tlv(connect_packet,
1082 		    (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1083 		    (uint32_t)strlen(proc_cs_id),
1084 		    __terminated_by_to_indexable(proc_cs_id));
1085 		if (error != 0) {
1086 			FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1087 			goto done;
1088 		}
1089 	}
1090 
1091 	cdhash = cs_get_cdhash(proc);
1092 	if (cdhash != NULL) {
1093 		error = flow_divert_packet_append_tlv(connect_packet,
1094 		    (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1095 		    SHA1_RESULTLEN,
1096 		    cdhash);
1097 		if (error) {
1098 			FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1099 			goto done;
1100 		}
1101 	} else {
1102 		FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1103 	}
1104 
1105 	task_t task __single = proc_task(proc);
1106 	if (task != TASK_NULL) {
1107 		mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1108 		kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1109 		if (rc == KERN_SUCCESS) {
1110 			int append_error = flow_divert_packet_append_tlv(connect_packet,
1111 			    (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1112 			    sizeof(audit_token_t),
1113 			    &audit_token);
1114 			if (append_error) {
1115 				FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1116 			}
1117 		}
1118 	}
1119 
1120 done:
1121 	proc_unlock(proc);
1122 
1123 	return error;
1124 }
1125 
1126 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet)1127 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet)
1128 {
1129 	int error = 0;
1130 	proc_t effective_proc = PROC_NULL;
1131 	proc_t responsible_proc = PROC_NULL;
1132 	proc_t real_proc = proc_find(so->last_pid);
1133 	bool release_real_proc = true;
1134 
1135 	proc_t src_proc = PROC_NULL;
1136 	proc_t real_src_proc = PROC_NULL;
1137 
1138 	if (real_proc == PROC_NULL) {
1139 		FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1140 		release_real_proc = false;
1141 		real_proc = proc;
1142 		if (real_proc == PROC_NULL) {
1143 			real_proc = current_proc();
1144 		}
1145 	}
1146 
1147 	if (so->so_flags & SOF_DELEGATED) {
1148 		if (proc_getpid(real_proc) != so->e_pid) {
1149 			effective_proc = proc_find(so->e_pid);
1150 		} else {
1151 			const unsigned char * real_proc_uuid = proc_executableuuid_addr(real_proc);
1152 			if (uuid_compare(real_proc_uuid, so->e_uuid)) {
1153 				effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1154 			}
1155 		}
1156 	}
1157 
1158 #if defined(XNU_TARGET_OS_OSX)
1159 	lck_rw_lock_shared(&fd_cb->group->lck);
1160 	if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1161 		if (so->so_rpid > 0) {
1162 			responsible_proc = proc_find(so->so_rpid);
1163 		}
1164 	}
1165 	lck_rw_done(&fd_cb->group->lck);
1166 #endif
1167 
1168 	real_src_proc = real_proc;
1169 
1170 	if (responsible_proc != PROC_NULL) {
1171 		src_proc = responsible_proc;
1172 		if (effective_proc != NULL) {
1173 			real_src_proc = effective_proc;
1174 		}
1175 	} else if (effective_proc != PROC_NULL) {
1176 		src_proc = effective_proc;
1177 	} else {
1178 		src_proc = real_proc;
1179 	}
1180 
1181 	error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1182 	if (error != 0) {
1183 		goto done;
1184 	}
1185 
1186 	if (real_src_proc != NULL && real_src_proc != src_proc) {
1187 		error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1188 		if (error != 0) {
1189 			goto done;
1190 		}
1191 	}
1192 
1193 done:
1194 	if (responsible_proc != PROC_NULL) {
1195 		proc_rele(responsible_proc);
1196 	}
1197 
1198 	if (effective_proc != PROC_NULL) {
1199 		proc_rele(effective_proc);
1200 	}
1201 
1202 	if (real_proc != PROC_NULL && release_real_proc) {
1203 		proc_rele(real_proc);
1204 	}
1205 
1206 	return error;
1207 }
1208 
1209 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet)1210 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet)
1211 {
1212 	int             error;
1213 
1214 	if (fd_cb->group == NULL) {
1215 		FDLOG0(LOG_ERR, fd_cb, "no provider, cannot send packet");
1216 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1217 			error = ECONNABORTED;
1218 		} else {
1219 			error = EHOSTUNREACH;
1220 		}
1221 		return error;
1222 	}
1223 
1224 	lck_rw_lock_shared(&fd_cb->group->lck);
1225 
1226 	if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1227 		error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1228 		if (error) {
1229 			FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_send_packet: ctl_enqueuembuf returned an error: %d", error);
1230 		}
1231 	} else {
1232 		error = ENOBUFS;
1233 	}
1234 
1235 	if (error == ENOBUFS) {
1236 		if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1237 			lck_rw_lock_exclusive(&fd_cb->group->lck);
1238 		}
1239 		MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1240 		error = 0;
1241 		OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1242 	}
1243 
1244 	lck_rw_done(&fd_cb->group->lck);
1245 
1246 	return error;
1247 }
1248 
1249 static void
flow_divert_append_domain_name(char * domain_name __null_terminated,void * ctx)1250 flow_divert_append_domain_name(char *domain_name __null_terminated, void *ctx)
1251 {
1252 	mbuf_ref_t packet = (mbuf_ref_t)ctx;
1253 	size_t domain_name_length = 0;
1254 
1255 	if (packet == NULL || domain_name == NULL) {
1256 		return;
1257 	}
1258 
1259 	domain_name_length = strlen(domain_name);
1260 	if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1261 		int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, __terminated_by_to_indexable(domain_name));
1262 		if (error) {
1263 			FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1264 		}
1265 	}
1266 }
1267 
1268 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_ref_t * out_connect_packet)1269 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_ref_t *out_connect_packet)
1270 {
1271 	int                     error           = 0;
1272 	int                     flow_type       = 0;
1273 	char *                  signing_id __indexable = NULL;
1274 	uint32_t                sid_size        = 0;
1275 	mbuf_ref_t              connect_packet  = NULL;
1276 	cfil_sock_id_t          cfil_sock_id    = CFIL_SOCK_ID_NONE;
1277 	const void              *cfil_id        = NULL;
1278 	size_t                  cfil_id_size    = 0;
1279 	struct inpcb            *inp            = sotoinpcb(so);
1280 	struct ifnet            *ifp            = NULL;
1281 	uint32_t                flags           = 0;
1282 
1283 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1284 	if (error) {
1285 		goto done;
1286 	}
1287 
1288 	if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1289 		int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1290 		if (find_error == 0 && sid_size > 0) {
1291 			signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1292 			if (signing_id != NULL) {
1293 				flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1294 				FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1295 			}
1296 		}
1297 	}
1298 
1299 	// TODO: remove ternary operator after rdar://121487109 is fixed
1300 	error = flow_divert_add_all_proc_info(fd_cb, so, p, NULL == signing_id ? NULL : __unsafe_null_terminated_from_indexable(signing_id), connect_packet);
1301 
1302 	if (signing_id != NULL) {
1303 		kfree_data(signing_id, sid_size + 1);
1304 	}
1305 
1306 	if (error) {
1307 		FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1308 		goto done;
1309 	}
1310 
1311 	error = flow_divert_packet_append_tlv(connect_packet,
1312 	    FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1313 	    sizeof(fd_cb->so->so_traffic_class),
1314 	    &fd_cb->so->so_traffic_class);
1315 	if (error) {
1316 		goto done;
1317 	}
1318 
1319 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1320 		flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1321 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1322 		flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1323 	} else {
1324 		error = EINVAL;
1325 		goto done;
1326 	}
1327 	error = flow_divert_packet_append_tlv(connect_packet,
1328 	    FLOW_DIVERT_TLV_FLOW_TYPE,
1329 	    sizeof(flow_type),
1330 	    &flow_type);
1331 
1332 	if (error) {
1333 		goto done;
1334 	}
1335 
1336 	if (fd_cb->connect_token != NULL) {
1337 		unsigned int token_len = m_length(fd_cb->connect_token);
1338 		mbuf_concatenate(connect_packet, fd_cb->connect_token);
1339 		mbuf_pkthdr_adjustlen(connect_packet, token_len);
1340 		fd_cb->connect_token = NULL;
1341 	} else {
1342 		error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1343 		if (error) {
1344 			goto done;
1345 		}
1346 
1347 		necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1348 	}
1349 
1350 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1351 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, SA_BYTES(&(fd_cb->local_endpoint.sa)));
1352 		if (error) {
1353 			goto done;
1354 		}
1355 	}
1356 
1357 	if (inp->inp_vflag & INP_IPV4) {
1358 		ifp = inp->inp_last_outifp;
1359 	} else if (inp->inp_vflag & INP_IPV6) {
1360 		ifp = inp->in6p_last_outifp;
1361 	}
1362 	if ((inp->inp_flags & INP_BOUND_IF) ||
1363 	    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1364 	    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1365 		flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1366 		if (ifp == NULL) {
1367 			ifp = inp->inp_boundifp;
1368 		}
1369 	}
1370 	if (ifp != NULL) {
1371 		uint32_t flow_if_index = ifp->if_index;
1372 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1373 		    sizeof(flow_if_index), &flow_if_index);
1374 		if (error) {
1375 			goto done;
1376 		}
1377 	}
1378 
1379 	if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1380 		flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1381 	}
1382 
1383 	if (flags != 0) {
1384 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1385 		if (error) {
1386 			goto done;
1387 		}
1388 	}
1389 
1390 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
1391 		cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1392 	} else {
1393 		cfil_sock_id = cfil_sock_id_from_socket(so);
1394 	}
1395 
1396 	if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1397 		cfil_id = &cfil_sock_id;
1398 		cfil_id_size = sizeof(cfil_sock_id);
1399 	} else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1400 		cfil_id = &inp->necp_client_uuid;
1401 		cfil_id_size = sizeof(inp->necp_client_uuid);
1402 	}
1403 
1404 	if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1405 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1406 		if (error) {
1407 			goto done;
1408 		}
1409 	}
1410 
1411 done:
1412 	if (!error) {
1413 		*out_connect_packet = connect_packet;
1414 	} else if (connect_packet != NULL) {
1415 		mbuf_freem(connect_packet);
1416 	}
1417 
1418 	return error;
1419 }
1420 
1421 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1422 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1423 {
1424 	int             error                   = 0;
1425 	mbuf_ref_t      connect_packet          = fd_cb->connect_packet;
1426 	mbuf_ref_t      saved_connect_packet    = NULL;
1427 
1428 	if (connect_packet != NULL) {
1429 		error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1430 		if (error) {
1431 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1432 			goto done;
1433 		}
1434 
1435 		error = flow_divert_send_packet(fd_cb, connect_packet);
1436 		if (error) {
1437 			goto done;
1438 		}
1439 
1440 		fd_cb->connect_packet = saved_connect_packet;
1441 		saved_connect_packet = NULL;
1442 	} else {
1443 		error = ENOENT;
1444 	}
1445 done:
1446 	if (saved_connect_packet != NULL) {
1447 		mbuf_freem(saved_connect_packet);
1448 	}
1449 
1450 	return error;
1451 }
1452 
1453 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1454 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1455 {
1456 	int             error       = 0;
1457 	mbuf_ref_t      packet      = NULL;
1458 	int             rbuff_space = 0;
1459 
1460 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1461 	if (error) {
1462 		FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1463 		goto done;
1464 	}
1465 
1466 	rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1467 	if (rbuff_space < 0) {
1468 		rbuff_space = 0;
1469 	}
1470 	rbuff_space = htonl(rbuff_space);
1471 	error = flow_divert_packet_append_tlv(packet,
1472 	    FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1473 	    sizeof(rbuff_space),
1474 	    &rbuff_space);
1475 	if (error) {
1476 		goto done;
1477 	}
1478 
1479 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1480 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, SA_BYTES(&(fd_cb->local_endpoint.sa)));
1481 		if (error) {
1482 			goto done;
1483 		}
1484 	}
1485 
1486 	error = flow_divert_send_packet(fd_cb, packet);
1487 	if (error) {
1488 		goto done;
1489 	}
1490 
1491 done:
1492 	if (error && packet != NULL) {
1493 		mbuf_freem(packet);
1494 	}
1495 
1496 	return error;
1497 }
1498 
1499 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1500 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1501 {
1502 	int         error   = 0;
1503 	mbuf_ref_t  packet  = NULL;
1504 	uint32_t    zero    = 0;
1505 
1506 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1507 	if (error) {
1508 		FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1509 		goto done;
1510 	}
1511 
1512 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1513 	if (error) {
1514 		FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1515 		goto done;
1516 	}
1517 
1518 	how = htonl(how);
1519 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1520 	if (error) {
1521 		FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1522 		goto done;
1523 	}
1524 
1525 	error = flow_divert_send_packet(fd_cb, packet);
1526 	if (error) {
1527 		goto done;
1528 	}
1529 
1530 done:
1531 	if (error && packet != NULL) {
1532 		mbuf_freem(packet);
1533 	}
1534 
1535 	return error;
1536 }
1537 
1538 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1539 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1540 {
1541 	if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1542 	    (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1543 		return SHUT_RDWR;
1544 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1545 		return SHUT_RD;
1546 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1547 		return SHUT_WR;
1548 	}
1549 
1550 	return -1;
1551 }
1552 
1553 /*
1554  * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1555  * writes. Returns FALSE otherwise.
1556  */
1557 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1558 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1559 {
1560 	int             how             = -1;
1561 
1562 	/* Do not send any close messages if there is still data in the send buffer */
1563 	if (fd_cb->so->so_snd.sb_cc == 0) {
1564 		if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1565 			/* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1566 			how = SHUT_RD;
1567 		}
1568 		if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1569 			/* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1570 			if (how == SHUT_RD) {
1571 				how = SHUT_RDWR;
1572 			} else {
1573 				how = SHUT_WR;
1574 			}
1575 		}
1576 	}
1577 
1578 	if (how != -1) {
1579 		FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1580 		if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1581 			/* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1582 			if (how != SHUT_RD) {
1583 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1584 			}
1585 			if (how != SHUT_WR) {
1586 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1587 			}
1588 		}
1589 	}
1590 }
1591 
1592 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len)1593 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len)
1594 {
1595 	mbuf_ref_t  packet = NULL;
1596 	mbuf_ref_t  last   = NULL;
1597 	int         error  = 0;
1598 
1599 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1600 	if (error || packet == NULL) {
1601 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1602 		goto done;
1603 	}
1604 
1605 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1606 		last = m_last(packet);
1607 		mbuf_setnext(last, data);
1608 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1609 	} else {
1610 		data_len = 0;
1611 	}
1612 	error = flow_divert_send_packet(fd_cb, packet);
1613 	if (error == 0 && data_len > 0) {
1614 		fd_cb->bytes_sent += data_len;
1615 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1616 	}
1617 
1618 done:
1619 	if (error) {
1620 		if (last != NULL) {
1621 			mbuf_setnext(last, NULL);
1622 		}
1623 		if (packet != NULL) {
1624 			mbuf_freem(packet);
1625 		}
1626 	}
1627 
1628 	return error;
1629 }
1630 
1631 static errno_t
flow_divert_send_datagram_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len,struct sockaddr * toaddr,Boolean is_fragment,size_t datagram_size)1632 flow_divert_send_datagram_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len, struct sockaddr *toaddr, Boolean is_fragment, size_t datagram_size)
1633 {
1634 	mbuf_ref_t  packet = NULL;
1635 	mbuf_ref_t  last   = NULL;
1636 	int         error  = 0;
1637 
1638 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1639 	if (error || packet == NULL) {
1640 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1641 		goto done;
1642 	}
1643 
1644 	if (toaddr != NULL) {
1645 		error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1646 		if (error) {
1647 			FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1648 			goto done;
1649 		}
1650 	}
1651 	if (is_fragment) {
1652 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_IS_FRAGMENT, sizeof(is_fragment), &is_fragment);
1653 		if (error) {
1654 			FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_IS_FRAGMENT) failed: %d", error);
1655 			goto done;
1656 		}
1657 	}
1658 
1659 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_DATAGRAM_SIZE, sizeof(datagram_size), &datagram_size);
1660 	if (error) {
1661 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_DATAGRAM_SIZE) failed: %d", error);
1662 		goto done;
1663 	}
1664 
1665 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1666 		last = m_last(packet);
1667 		mbuf_setnext(last, data);
1668 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1669 	} else {
1670 		data_len = 0;
1671 	}
1672 	error = flow_divert_send_packet(fd_cb, packet);
1673 	if (error == 0 && data_len > 0) {
1674 		fd_cb->bytes_sent += data_len;
1675 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1676 	}
1677 
1678 done:
1679 	if (error) {
1680 		if (last != NULL) {
1681 			mbuf_setnext(last, NULL);
1682 		}
1683 		if (packet != NULL) {
1684 			mbuf_freem(packet);
1685 		}
1686 	}
1687 
1688 	return error;
1689 }
1690 
1691 static errno_t
flow_divert_send_fragmented_datagram(struct flow_divert_pcb * fd_cb,mbuf_ref_t datagram,size_t datagram_len,struct sockaddr * toaddr)1692 flow_divert_send_fragmented_datagram(struct flow_divert_pcb *fd_cb, mbuf_ref_t datagram, size_t datagram_len, struct sockaddr *toaddr)
1693 {
1694 	mbuf_ref_t  next_data       = datagram;
1695 	size_t      remaining_len   = datagram_len;
1696 	mbuf_ref_t  remaining_data  = NULL;
1697 	int         error           = 0;
1698 	bool        first           = true;
1699 
1700 	while (remaining_len > 0 && next_data != NULL) {
1701 		size_t to_send = remaining_len;
1702 		remaining_data = NULL;
1703 
1704 		if (to_send > FLOW_DIVERT_CHUNK_SIZE) {
1705 			to_send = FLOW_DIVERT_CHUNK_SIZE;
1706 			error = mbuf_split(next_data, to_send, MBUF_DONTWAIT, &remaining_data);
1707 			if (error) {
1708 				break;
1709 			}
1710 		}
1711 
1712 		error = flow_divert_send_datagram_packet(fd_cb, next_data, to_send, (first ? toaddr : NULL), TRUE, (first ? datagram_len : 0));
1713 		if (error) {
1714 			break;
1715 		}
1716 
1717 		first = false;
1718 		remaining_len -= to_send;
1719 		next_data = remaining_data;
1720 	}
1721 
1722 	if (error) {
1723 		if (next_data != NULL) {
1724 			mbuf_freem(next_data);
1725 		}
1726 		if (remaining_data != NULL) {
1727 			mbuf_freem(remaining_data);
1728 		}
1729 	}
1730 	return error;
1731 }
1732 
1733 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1734 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1735 {
1736 	size_t      to_send;
1737 	size_t      sent    = 0;
1738 	int         error   = 0;
1739 	mbuf_ref_t  buffer;
1740 
1741 	to_send = fd_cb->so->so_snd.sb_cc;
1742 	buffer = fd_cb->so->so_snd.sb_mb;
1743 
1744 	if (buffer == NULL && to_send > 0) {
1745 		FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1746 		return;
1747 	}
1748 
1749 	/* Ignore the send window if force is enabled */
1750 	if (!force && (to_send > fd_cb->send_window)) {
1751 		to_send = fd_cb->send_window;
1752 	}
1753 
1754 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1755 		while (sent < to_send) {
1756 			mbuf_ref_t  data;
1757 			size_t      data_len;
1758 
1759 			data_len = to_send - sent;
1760 			if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1761 				data_len = FLOW_DIVERT_CHUNK_SIZE;
1762 			}
1763 
1764 			error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1765 			if (error) {
1766 				FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1767 				break;
1768 			}
1769 
1770 			error = flow_divert_send_data_packet(fd_cb, data, data_len);
1771 			if (error) {
1772 				if (data != NULL) {
1773 					mbuf_freem(data);
1774 				}
1775 				break;
1776 			}
1777 
1778 			sent += data_len;
1779 		}
1780 		sbdrop(&fd_cb->so->so_snd, (int)sent);
1781 		sowwakeup(fd_cb->so);
1782 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1783 		mbuf_ref_t  data;
1784 		mbuf_ref_t  m;
1785 		size_t      data_len;
1786 
1787 		while (buffer) {
1788 			struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1789 
1790 			m = buffer;
1791 			if (toaddr != NULL) {
1792 				/* look for data in the chain */
1793 				do {
1794 					m = m->m_next;
1795 					if (m != NULL && m->m_type == MT_DATA) {
1796 						break;
1797 					}
1798 				} while (m);
1799 				if (m == NULL) {
1800 					/* unexpected */
1801 					FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1802 					goto move_on;
1803 				}
1804 			}
1805 			data_len = mbuf_pkthdr_len(m);
1806 			if (data_len > 0) {
1807 				FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1808 				error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1809 				if (error) {
1810 					FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1811 					break;
1812 				}
1813 			} else {
1814 				data = NULL;
1815 			}
1816 			if (data_len <= FLOW_DIVERT_CHUNK_SIZE) {
1817 				error = flow_divert_send_datagram_packet(fd_cb, data, data_len, toaddr, FALSE, 0);
1818 			} else {
1819 				error = flow_divert_send_fragmented_datagram(fd_cb, data, data_len, toaddr);
1820 				data = NULL;
1821 			}
1822 			if (error) {
1823 				if (data != NULL) {
1824 					mbuf_freem(data);
1825 				}
1826 				break;
1827 			}
1828 			sent += data_len;
1829 move_on:
1830 			buffer = buffer->m_nextpkt;
1831 			(void) sbdroprecord(&(fd_cb->so->so_snd));
1832 		}
1833 	}
1834 
1835 	if (sent > 0) {
1836 		FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1837 		if (fd_cb->send_window >= sent) {
1838 			fd_cb->send_window -= sent;
1839 		} else {
1840 			fd_cb->send_window = 0;
1841 		}
1842 	}
1843 }
1844 
1845 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_size,struct sockaddr * toaddr)1846 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_size, struct sockaddr *toaddr)
1847 {
1848 	size_t to_send = data_size;
1849 	int error = 0;
1850 
1851 	if (to_send > fd_cb->send_window) {
1852 		to_send = fd_cb->send_window;
1853 	}
1854 
1855 	if (fd_cb->so->so_snd.sb_cc > 0) {
1856 		to_send = 0;    /* If the send buffer is non-empty, then we can't send anything */
1857 	}
1858 
1859 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1860 		size_t sent = 0;
1861 		mbuf_ref_t remaining_data = data;
1862 		size_t remaining_size = data_size;
1863 		mbuf_ref_t pkt_data = NULL;
1864 		while (sent < to_send && remaining_data != NULL && remaining_size > 0) {
1865 			size_t  pkt_data_len;
1866 
1867 			pkt_data = remaining_data;
1868 
1869 			if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1870 				pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1871 			} else {
1872 				pkt_data_len = to_send - sent;
1873 			}
1874 
1875 			if (pkt_data_len < remaining_size) {
1876 				error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1877 				if (error) {
1878 					FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1879 					pkt_data = NULL;
1880 					break;
1881 				}
1882 				remaining_size -= pkt_data_len;
1883 			} else {
1884 				remaining_data = NULL;
1885 				remaining_size = 0;
1886 			}
1887 
1888 			error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len);
1889 			if (error) {
1890 				break;
1891 			}
1892 
1893 			pkt_data = NULL;
1894 			sent += pkt_data_len;
1895 		}
1896 
1897 		if (fd_cb->send_window >= sent) {
1898 			fd_cb->send_window -= sent;
1899 		} else {
1900 			fd_cb->send_window = 0;
1901 		}
1902 
1903 		error = 0;
1904 
1905 		if (pkt_data != NULL) {
1906 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1907 				if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1908 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1909 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1910 				}
1911 			} else {
1912 				mbuf_freem(pkt_data);
1913 				error = ENOBUFS;
1914 			}
1915 		}
1916 
1917 		if (remaining_data != NULL) {
1918 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1919 				if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1920 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1921 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1922 				}
1923 			} else {
1924 				mbuf_freem(remaining_data);
1925 				error = ENOBUFS;
1926 			}
1927 		}
1928 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1929 		int send_dgram_error = 0;
1930 		if (to_send || data_size == 0) {
1931 			if (data_size <= FLOW_DIVERT_CHUNK_SIZE) {
1932 				send_dgram_error = flow_divert_send_datagram_packet(fd_cb, data, data_size, toaddr, FALSE, 0);
1933 			} else {
1934 				send_dgram_error = flow_divert_send_fragmented_datagram(fd_cb, data, data_size, toaddr);
1935 				data = NULL;
1936 			}
1937 			if (send_dgram_error) {
1938 				FDLOG(LOG_NOTICE, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu", send_dgram_error, data_size);
1939 			} else {
1940 				if (data_size >= fd_cb->send_window) {
1941 					fd_cb->send_window = 0;
1942 				} else {
1943 					fd_cb->send_window -= data_size;
1944 				}
1945 				data = NULL;
1946 			}
1947 		}
1948 
1949 		if (data != NULL) {
1950 			/* buffer it */
1951 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1952 				if (toaddr != NULL) {
1953 					int append_error = 0;
1954 					if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &append_error)) {
1955 						FDLOG(LOG_ERR, fd_cb,
1956 						    "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d",
1957 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, append_error);
1958 					}
1959 				} else {
1960 					if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1961 						FDLOG(LOG_ERR, fd_cb,
1962 						    "sbappendrecord failed. send buffer size = %u, send_window = %u",
1963 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1964 					}
1965 				}
1966 			} else {
1967 				FDLOG(LOG_ERR, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu, dropping the datagram", error, data_size);
1968 				mbuf_freem(data);
1969 			}
1970 		}
1971 	}
1972 
1973 	return error;
1974 }
1975 
1976 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1977 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1978 {
1979 	int         error  = 0;
1980 	mbuf_ref_t  packet = NULL;
1981 
1982 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1983 	if (error) {
1984 		FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1985 		goto done;
1986 	}
1987 
1988 	error = flow_divert_send_packet(fd_cb, packet);
1989 	if (error) {
1990 		goto done;
1991 	}
1992 
1993 done:
1994 	if (error && packet != NULL) {
1995 		mbuf_freem(packet);
1996 	}
1997 
1998 	return error;
1999 }
2000 
2001 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)2002 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
2003 {
2004 	int         error  = 0;
2005 	mbuf_ref_t  packet = NULL;
2006 
2007 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
2008 	if (error) {
2009 		FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
2010 		goto done;
2011 	}
2012 
2013 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
2014 	if (error) {
2015 		FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
2016 		goto done;
2017 	}
2018 
2019 	error = flow_divert_send_packet(fd_cb, packet);
2020 	if (error) {
2021 		goto done;
2022 	}
2023 
2024 done:
2025 	if (error && packet != NULL) {
2026 		mbuf_freem(packet);
2027 	}
2028 
2029 	return error;
2030 }
2031 
2032 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)2033 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
2034 {
2035 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2036 
2037 	if (local_endpoint->sa_family == AF_INET6) {
2038 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2039 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2040 			inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
2041 			inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
2042 			in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
2043 		}
2044 		if (inp->inp_lport == 0) {
2045 			inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
2046 		}
2047 	} else if (local_endpoint->sa_family == AF_INET) {
2048 		if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2049 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2050 			inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
2051 		}
2052 		if (inp->inp_lport == 0) {
2053 			inp->inp_lport = (satosin(local_endpoint))->sin_port;
2054 		}
2055 	}
2056 }
2057 
2058 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)2059 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
2060 {
2061 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2062 
2063 	if (remote_endpoint->sa_family == AF_INET6) {
2064 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
2065 			inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
2066 			inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
2067 			in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
2068 		}
2069 		if (inp->inp_fport == 0) {
2070 			inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
2071 		}
2072 	} else if (remote_endpoint->sa_family == AF_INET) {
2073 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
2074 			inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
2075 		}
2076 		if (inp->inp_fport == 0) {
2077 			inp->inp_fport = (satosin(remote_endpoint))->sin_port;
2078 		}
2079 	}
2080 }
2081 
2082 static uint32_t
flow_divert_derive_kernel_control_unit(pid_t pid,uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)2083 flow_divert_derive_kernel_control_unit(pid_t pid, uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
2084 {
2085 	uint32_t result = *ctl_unit;
2086 
2087 	// There are two models supported for deriving control units:
2088 	// 1. A series of flow divert units that allow "transparently" failing
2089 	//    over to the next unit. For this model, the aggregate_unit contains list
2090 	//    of all control units (between 1 and 30) masked over each other.
2091 	// 2. An indication that in-process flow divert should be preferred, with
2092 	//    an out of process flow divert to fail over to. For this model, the
2093 	//    ctl_unit is FLOW_DIVERT_IN_PROCESS_UNIT. In this case, that unit
2094 	//    is returned first, with the unpacked aggregate unit returned as a
2095 	//    fallback.
2096 	*is_aggregate = false;
2097 	if (*ctl_unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
2098 		bool found_unit = false;
2099 		if (pid != 0) {
2100 			// Look for an in-process group that is already open, and use that unit
2101 			struct flow_divert_group *group = NULL;
2102 			TAILQ_FOREACH(group, &g_flow_divert_in_process_group_list, chain) {
2103 				if (group->in_process_pid == pid) {
2104 					// Found an in-process group for our same PID, use it
2105 					found_unit = true;
2106 					result = group->ctl_unit;
2107 					break;
2108 				}
2109 			}
2110 
2111 			// If an in-process group isn't open yet, send a signal up through NECP to request one
2112 			if (!found_unit) {
2113 				necp_client_request_in_process_flow_divert(pid);
2114 			}
2115 		}
2116 
2117 		// If a unit was found, return it
2118 		if (found_unit) {
2119 			if (aggregate_unit != NULL && *aggregate_unit != 0) {
2120 				*is_aggregate = true;
2121 			}
2122 			// The next time around, the aggregate unit values will be picked up
2123 			*ctl_unit = 0;
2124 			return result;
2125 		}
2126 
2127 		// If no unit was found, fall through and clear out the ctl_unit
2128 		result = 0;
2129 		*ctl_unit = 0;
2130 	}
2131 
2132 	if (aggregate_unit != NULL && *aggregate_unit != 0) {
2133 		uint32_t counter;
2134 		struct flow_divert_group *lower_order_group = NULL;
2135 
2136 		for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
2137 			if ((*aggregate_unit) & (1 << counter)) {
2138 				struct flow_divert_group *group = NULL;
2139 				group = flow_divert_group_lookup(counter + 1, NULL);
2140 
2141 				if (group != NULL) {
2142 					if (lower_order_group == NULL) {
2143 						lower_order_group = group;
2144 					} else if ((group->order < lower_order_group->order)) {
2145 						lower_order_group = group;
2146 					}
2147 				}
2148 			}
2149 		}
2150 
2151 		if (lower_order_group != NULL) {
2152 			*aggregate_unit &= ~(1 << (lower_order_group->ctl_unit - 1));
2153 			*is_aggregate = true;
2154 			return lower_order_group->ctl_unit;
2155 		} else {
2156 			*ctl_unit = 0;
2157 			return result;
2158 		}
2159 	} else {
2160 		*ctl_unit = 0;
2161 		return result;
2162 	}
2163 }
2164 
2165 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)2166 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
2167 {
2168 	int error = 0;
2169 	uint32_t policy_control_unit = fd_cb->policy_control_unit;
2170 
2171 	flow_divert_pcb_remove(fd_cb);
2172 
2173 	do {
2174 		struct flow_divert_group *next_group = NULL;
2175 		bool is_aggregate = false;
2176 		uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(0, &policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
2177 
2178 		if (fd_cb->control_group_unit == next_ctl_unit) {
2179 			FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
2180 			error = EALREADY;
2181 			break;
2182 		}
2183 
2184 		if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
2185 			FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
2186 			error = ENOENT;
2187 			break;
2188 		}
2189 
2190 		next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
2191 		if (next_group == NULL) {
2192 			FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
2193 			continue;
2194 		}
2195 
2196 		FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
2197 
2198 		error = flow_divert_pcb_insert(fd_cb, next_group);
2199 		if (error == 0) {
2200 			if (is_aggregate) {
2201 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2202 			} else {
2203 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2204 			}
2205 		}
2206 		FDGRP_RELEASE(next_group);
2207 	} while (fd_cb->group == NULL);
2208 
2209 	if (fd_cb->group == NULL) {
2210 		return error ? error : ENOENT;
2211 	}
2212 
2213 	error = flow_divert_send_connect_packet(fd_cb);
2214 	if (error) {
2215 		FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2216 		flow_divert_pcb_remove(fd_cb);
2217 		error = ENOENT;
2218 	}
2219 
2220 	return error;
2221 }
2222 
2223 static inline bool
flow_divert_address_needs_mapping(struct socket * so,struct sockaddr * addr)2224 flow_divert_address_needs_mapping(struct socket *so, struct sockaddr *addr)
2225 {
2226 	return so != NULL && SOCK_CHECK_DOM(so, PF_INET6) && addr != NULL && addr->sa_family == AF_INET && addr->sa_len >= sizeof(struct sockaddr_in);
2227 }
2228 
2229 static struct sockaddr *
flow_divert_map_v4_to_v6(struct flow_divert_pcb * fd_cb,struct sockaddr * addr_v4,struct sockaddr_in6 * addr_v4inv6)2230 flow_divert_map_v4_to_v6(struct flow_divert_pcb *fd_cb, struct sockaddr *addr_v4, struct sockaddr_in6 *addr_v4inv6)
2231 {
2232 	FDLOG0(LOG_NOTICE, fd_cb, "Mapping v4 remote endpoint to a v6 endpoint");
2233 	union {
2234 		struct in_addr_4in6 addr46;
2235 		struct in6_addr addr6;
2236 	} mapped_addr = {
2237 		.addr6 = IN6ADDR_V4MAPPED_INIT,
2238 	};
2239 	struct sockaddr_in *sin = satosin(addr_v4);
2240 
2241 	memset(addr_v4inv6, 0, sizeof(*addr_v4inv6));
2242 
2243 	mapped_addr.addr46.ia46_addr4 = sin->sin_addr;
2244 
2245 	addr_v4inv6->sin6_family = AF_INET6;
2246 	addr_v4inv6->sin6_len = sizeof(*addr_v4inv6);
2247 	addr_v4inv6->sin6_addr = mapped_addr.addr6;
2248 	addr_v4inv6->sin6_port = sin->sin_port;
2249 
2250 	return (struct sockaddr *)addr_v4inv6;
2251 }
2252 
2253 static int
flow_divert_disable(struct flow_divert_pcb * fd_cb)2254 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2255 {
2256 	struct socket *so = NULL;
2257 	mbuf_ref_t buffer;
2258 	int error = 0;
2259 	proc_t last_proc = NULL;
2260 	struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2261 	bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2262 	struct inpcb *inp = NULL;
2263 	struct sockaddr_in6 sin6 = {};
2264 
2265 	so = fd_cb->so;
2266 	if (so == NULL) {
2267 		goto done;
2268 	}
2269 
2270 	FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2271 
2272 	/* Restore the IP state */
2273 	inp = sotoinpcb(so);
2274 	inp->inp_vflag = fd_cb->original_vflag;
2275 	inp->inp_faddr.s_addr = INADDR_ANY;
2276 	inp->inp_fport = 0;
2277 	memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2278 	inp->inp_fifscope = IFSCOPE_NONE;
2279 	inp->in6p_fport = 0;
2280 	/* If flow divert set the local address, clear it out */
2281 	if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2282 		inp->inp_laddr.s_addr = INADDR_ANY;
2283 		memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2284 		inp->inp_lifscope = IFSCOPE_NONE;
2285 	}
2286 	inp->inp_last_outifp = fd_cb->original_last_outifp;
2287 	inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2288 
2289 	/* Dis-associate the socket */
2290 	so->so_flags &= ~SOF_FLOW_DIVERT;
2291 	so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2292 	so->so_fd_pcb = NULL;
2293 	fd_cb->so = NULL;
2294 
2295 	FDRELEASE(fd_cb); /* Release the socket's reference */
2296 
2297 	/* Revert back to the original protocol */
2298 	so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2299 
2300 	/* Reset the socket state to avoid confusing NECP */
2301 	so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2302 
2303 	if (flow_divert_address_needs_mapping(so, remote_endpoint)) {
2304 		remote_endpoint = flow_divert_map_v4_to_v6(fd_cb, remote_endpoint, &sin6);
2305 	}
2306 
2307 	last_proc = proc_find(so->last_pid);
2308 
2309 	if (do_connect) {
2310 		/* Connect using the original protocol */
2311 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2312 		if (error) {
2313 			FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2314 			goto done;
2315 		}
2316 	}
2317 
2318 	buffer = so->so_snd.sb_mb;
2319 	if (buffer == NULL) {
2320 		/* No buffered data, done */
2321 		goto done;
2322 	}
2323 
2324 	/* Send any buffered data using the original protocol */
2325 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2326 		mbuf_ref_t  data_to_send = NULL;
2327 		size_t      data_len     = so->so_snd.sb_cc;
2328 
2329 		error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2330 		if (error) {
2331 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2332 			goto done;
2333 		}
2334 
2335 		sbflush(&so->so_snd);
2336 
2337 		if (data_to_send->m_flags & M_PKTHDR) {
2338 			mbuf_pkthdr_setlen(data_to_send, data_len);
2339 		}
2340 
2341 		error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2342 		    0,
2343 		    data_to_send,
2344 		    NULL,
2345 		    NULL,
2346 		    (last_proc != NULL ? last_proc : current_proc()));
2347 
2348 		if (error && error != EWOULDBLOCK) {
2349 			FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2350 		} else {
2351 			error = 0;
2352 		}
2353 	} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2354 		struct sockbuf *sb = &so->so_snd;
2355 		MBUFQ_HEAD(send_queue_head) send_queue;
2356 		MBUFQ_INIT(&send_queue);
2357 
2358 		/* Flush the send buffer, moving all records to a temporary queue */
2359 		while (sb->sb_mb != NULL) {
2360 			mbuf_ref_t record = sb->sb_mb;
2361 			mbuf_ref_t m = record;
2362 			sb->sb_mb = sb->sb_mb->m_nextpkt;
2363 			while (m != NULL) {
2364 				sbfree(sb, m);
2365 				m = m->m_next;
2366 			}
2367 			record->m_nextpkt = NULL;
2368 			MBUFQ_ENQUEUE(&send_queue, record);
2369 		}
2370 		SB_EMPTY_FIXUP(sb);
2371 
2372 		while (!MBUFQ_EMPTY(&send_queue)) {
2373 			mbuf_ref_t next_record = MBUFQ_FIRST(&send_queue);
2374 			mbuf_ref_t addr = NULL;
2375 			mbuf_ref_t control = NULL;
2376 			mbuf_ref_t last_control = NULL;
2377 			mbuf_ref_t data = NULL;
2378 			mbuf_ref_t m = next_record;
2379 			struct sockaddr *to_endpoint = NULL;
2380 
2381 			MBUFQ_DEQUEUE(&send_queue, next_record);
2382 
2383 			while (m != NULL) {
2384 				if (m->m_type == MT_SONAME) {
2385 					addr = m;
2386 				} else if (m->m_type == MT_CONTROL) {
2387 					if (control == NULL) {
2388 						control = m;
2389 					}
2390 					last_control = m;
2391 				} else if (m->m_type == MT_DATA) {
2392 					data = m;
2393 					break;
2394 				}
2395 				m = m->m_next;
2396 			}
2397 
2398 			if (addr != NULL && !do_connect) {
2399 				to_endpoint = flow_divert_get_buffered_target_address(addr);
2400 				if (to_endpoint == NULL) {
2401 					FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2402 				} else if (flow_divert_address_needs_mapping(so, to_endpoint)) {
2403 					to_endpoint = flow_divert_map_v4_to_v6(fd_cb, to_endpoint, &sin6);
2404 				}
2405 			}
2406 
2407 			if (data == NULL) {
2408 				FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2409 				mbuf_freem(next_record);
2410 				continue;
2411 			}
2412 
2413 			if (!(data->m_flags & M_PKTHDR)) {
2414 				FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2415 				mbuf_freem(next_record);
2416 				continue;
2417 			}
2418 
2419 			if (addr != NULL) {
2420 				addr->m_next = NULL;
2421 			}
2422 
2423 			if (last_control != NULL) {
2424 				last_control->m_next = NULL;
2425 			}
2426 
2427 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2428 			    0,
2429 			    data,
2430 			    to_endpoint,
2431 			    control,
2432 			    (last_proc != NULL ? last_proc : current_proc()));
2433 
2434 			if (addr != NULL) {
2435 				mbuf_freem(addr);
2436 			}
2437 
2438 			if (error) {
2439 				FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2440 			}
2441 		}
2442 	}
2443 done:
2444 	if (last_proc != NULL) {
2445 		proc_rele(last_proc);
2446 	}
2447 
2448 	return error;
2449 }
2450 
2451 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2452 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2453 {
2454 	struct socket           *so             = NULL;
2455 	struct inpcb            *inp            = NULL;
2456 	struct ifnet            *current_ifp    = NULL;
2457 	struct ifnet * __single new_ifp         = NULL;
2458 	int                     error           = 0;
2459 
2460 	so = fd_cb->so;
2461 	if (so == NULL) {
2462 		return;
2463 	}
2464 
2465 	inp = sotoinpcb(so);
2466 
2467 	if (out_if_index <= 0) {
2468 		return;
2469 	}
2470 
2471 	if (inp->inp_vflag & INP_IPV6) {
2472 		current_ifp = inp->in6p_last_outifp;
2473 	} else {
2474 		current_ifp = inp->inp_last_outifp;
2475 	}
2476 
2477 	if (current_ifp != NULL) {
2478 		if (current_ifp->if_index == out_if_index) {
2479 			/* No change */
2480 			return;
2481 		}
2482 
2483 		/* Scope the socket to the given interface */
2484 		error = inp_bindif(inp, out_if_index, &new_ifp);
2485 		if (error != 0) {
2486 			FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2487 			return;
2488 		}
2489 
2490 		if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2491 			/* Get the appropriate address for the given interface */
2492 			if (inp->inp_vflag & INP_IPV6) {
2493 				inp->in6p_laddr = sa6_any.sin6_addr;
2494 				error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2495 			} else {
2496 				inp->inp_laddr.s_addr = INADDR_ANY;
2497 				error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2498 			}
2499 
2500 			if (error != 0) {
2501 				FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2502 			}
2503 		}
2504 	} else {
2505 		ifnet_head_lock_shared();
2506 		if (IF_INDEX_IN_RANGE(out_if_index)) {
2507 			new_ifp = ifindex2ifnet[out_if_index];
2508 		}
2509 		ifnet_head_done();
2510 	}
2511 
2512 	/* Update the "last interface" of the socket */
2513 	if (new_ifp != NULL) {
2514 		if (inp->inp_vflag & INP_IPV6) {
2515 			inp->in6p_last_outifp = new_ifp;
2516 		} else {
2517 			inp->inp_last_outifp = new_ifp;
2518 		}
2519 
2520 #if SKYWALK
2521 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2522 			netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2523 		}
2524 #endif /* SKYWALK */
2525 	}
2526 }
2527 
2528 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2529 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2530 {
2531 	uint32_t connect_error = 0;
2532 	uint32_t ctl_unit = 0;
2533 	int error = 0;
2534 	union sockaddr_in_4_6 local_endpoint = {};
2535 	union sockaddr_in_4_6 remote_endpoint = {};
2536 	int out_if_index = 0;
2537 	uint32_t send_window = 0;
2538 	uint32_t app_data_length = 0;
2539 	struct inpcb *inp = NULL;
2540 	struct socket *so = fd_cb->so;
2541 	bool local_address_is_valid = false;
2542 
2543 	memset(&local_endpoint, 0, sizeof(local_endpoint));
2544 	memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2545 
2546 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2547 	if (error) {
2548 		FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2549 		return;
2550 	}
2551 
2552 	connect_error = ntohl(connect_error);
2553 	FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2554 
2555 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2556 	if (error) {
2557 		FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2558 		return;
2559 	}
2560 
2561 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2562 	if (error) {
2563 		FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2564 	}
2565 
2566 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sin6), NULL);
2567 	if (error) {
2568 		FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2569 	}
2570 
2571 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sin6), NULL);
2572 	if (error) {
2573 		FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2574 	}
2575 
2576 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2577 	if (error) {
2578 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2579 	}
2580 
2581 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2582 	if (error) {
2583 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2584 	}
2585 
2586 	error = 0;
2587 
2588 	if (!SO_IS_DIVERTED(so)) {
2589 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2590 		return;
2591 	}
2592 
2593 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2594 		FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2595 		return;
2596 	}
2597 
2598 	inp = sotoinpcb(so);
2599 
2600 	if (connect_error || error) {
2601 		goto set_socket_state;
2602 	}
2603 
2604 	if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2605 		if (local_endpoint.sa.sa_family == AF_INET) {
2606 			local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2607 			if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2608 				local_address_is_valid = true;
2609 				fd_cb->local_endpoint = local_endpoint;
2610 				inp->inp_laddr.s_addr = INADDR_ANY;
2611 			} else {
2612 				fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2613 			}
2614 		} else if (local_endpoint.sa.sa_family == AF_INET6) {
2615 			local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2616 			if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2617 				local_address_is_valid = true;
2618 				fd_cb->local_endpoint = local_endpoint;
2619 				inp->in6p_laddr = sa6_any.sin6_addr;
2620 			} else {
2621 				fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2622 			}
2623 		}
2624 	}
2625 
2626 	flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2627 	flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2628 
2629 	if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2630 		if (remote_endpoint.sa.sa_family == AF_INET) {
2631 			remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2632 		} else if (remote_endpoint.sa.sa_family == AF_INET6) {
2633 			remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2634 		}
2635 		flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2636 	}
2637 
2638 	if (app_data_length > 0) {
2639 		uint8_t * app_data = NULL;
2640 		app_data = kalloc_data(app_data_length, Z_WAITOK);
2641 		if (app_data != NULL) {
2642 			error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2643 			if (error == 0) {
2644 				FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2645 				if (fd_cb->app_data != NULL) {
2646 					kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
2647 				}
2648 				fd_cb->app_data = app_data;
2649 				fd_cb->app_data_length = app_data_length;
2650 			} else {
2651 				FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2652 				kfree_data(app_data, app_data_length);
2653 			}
2654 		} else {
2655 			FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2656 		}
2657 	}
2658 
2659 	if (error) {
2660 		goto set_socket_state;
2661 	}
2662 
2663 	if (fd_cb->group == NULL) {
2664 		error = EINVAL;
2665 		goto set_socket_state;
2666 	}
2667 
2668 	ctl_unit = ntohl(ctl_unit);
2669 	if (ctl_unit > 0) {
2670 		int insert_error = 0;
2671 		struct flow_divert_group *grp = NULL;
2672 
2673 		if (ctl_unit >= GROUP_COUNT_MAX) {
2674 			FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2675 			error = EINVAL;
2676 			goto set_socket_state;
2677 		}
2678 
2679 		grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2680 		if (grp == NULL) {
2681 			error = ECONNRESET;
2682 			goto set_socket_state;
2683 		}
2684 
2685 		flow_divert_pcb_remove(fd_cb);
2686 		insert_error = flow_divert_pcb_insert(fd_cb, grp);
2687 		FDGRP_RELEASE(grp);
2688 
2689 		if (insert_error != 0) {
2690 			error = ECONNRESET;
2691 			goto set_socket_state;
2692 		}
2693 	}
2694 
2695 	fd_cb->send_window = ntohl(send_window);
2696 
2697 set_socket_state:
2698 	if (!connect_error && !error) {
2699 		FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2700 		error = flow_divert_send_connect_result(fd_cb);
2701 	}
2702 
2703 	if (connect_error || error) {
2704 		if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2705 			/* The plugin rejected the flow and the control unit is an aggregation of multiple plugins, try to move to the next one */
2706 			error = flow_divert_try_next_group(fd_cb);
2707 			if (error && fd_cb->policy_control_unit == 0) {
2708 				/* No more plugins available, disable flow divert */
2709 				error = flow_divert_disable(fd_cb);
2710 			}
2711 
2712 			if (error == 0) {
2713 				return;
2714 			}
2715 			so->so_error = (uint16_t)error;
2716 		} else if (!connect_error) {
2717 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
2718 			so->so_error = (uint16_t)error;
2719 			/* The plugin did not close the flow, so notify the plugin */
2720 			flow_divert_send_close_if_needed(fd_cb);
2721 		} else {
2722 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
2723 			so->so_error = (uint16_t)connect_error;
2724 		}
2725 		flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
2726 	} else {
2727 #if NECP
2728 		/* Update NECP client with connected five-tuple */
2729 		if (!uuid_is_null(inp->necp_client_uuid)) {
2730 			socket_unlock(so, 0);
2731 			necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2732 			socket_lock(so, 0);
2733 			if (!SO_IS_DIVERTED(so)) {
2734 				/* The socket was closed while it was unlocked */
2735 				return;
2736 			}
2737 		}
2738 #endif /* NECP */
2739 
2740 		flow_divert_send_buffered_data(fd_cb, FALSE);
2741 		soisconnected(so);
2742 	}
2743 
2744 	/* We don't need the connect packet any more */
2745 	if (fd_cb->connect_packet != NULL) {
2746 		mbuf_freem(fd_cb->connect_packet);
2747 		fd_cb->connect_packet = NULL;
2748 	}
2749 
2750 	/* We don't need the original remote endpoint any more */
2751 	free_sockaddr(fd_cb->original_remote_endpoint);
2752 }
2753 
2754 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2755 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2756 {
2757 	uint32_t close_error = 0;
2758 	int error = 0;
2759 	int how = 0;
2760 	struct socket *so = fd_cb->so;
2761 	bool is_connected = (SOCK_TYPE(so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2762 
2763 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2764 	if (error) {
2765 		FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2766 		return;
2767 	}
2768 
2769 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2770 	if (error) {
2771 		FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2772 		return;
2773 	}
2774 
2775 	how = ntohl(how);
2776 
2777 	FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2778 
2779 	if (!SO_IS_DIVERTED(so)) {
2780 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2781 		return;
2782 	}
2783 
2784 	so->so_error = (uint16_t)ntohl(close_error);
2785 
2786 	flow_divert_update_closed_state(fd_cb, how, true, true);
2787 
2788 	/* Only do this for connected flows because "shutdown by peer" doesn't make sense for unconnected datagram flows */
2789 	how = flow_divert_tunnel_how_closed(fd_cb);
2790 	if (how == SHUT_RDWR) {
2791 		flow_divert_disconnect_socket(so, is_connected, true);
2792 	} else if (how == SHUT_RD && is_connected) {
2793 		socantrcvmore(so);
2794 	} else if (how == SHUT_WR && is_connected) {
2795 		socantsendmore(so);
2796 	}
2797 }
2798 
2799 static mbuf_ref_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2800 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2801 {
2802 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2803 	bool need_recvdstaddr = false;
2804 	/* Socket flow tracking needs to see the local address */
2805 	need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2806 	if ((inp->inp_vflag & INP_IPV4) && fd_cb->local_endpoint.sa.sa_family == AF_INET) {
2807 		mbuf_ref_t control = NULL;
2808 		struct mbuf **control_handle = &control;
2809 		if ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr) {
2810 			control_handle = sbcreatecontrol_mbuf((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP, control_handle);
2811 			if (*control_handle == NULL) {
2812 				FDLOG0(LOG_ERR, fd_cb, "failed to create a IP_RECVDSTADDR control mbuf");
2813 				return NULL;
2814 			}
2815 		}
2816 		if (inp->inp_flags & INP_RECVIF) {
2817 			ifnet_ref_t ifp = inp->inp_last_outifp;
2818 			uint8_t sdlbuf[SOCK_MAXADDRLEN + 1] = {};
2819 			struct sockaddr_dl *sdl2 = SDL(sdlbuf);
2820 
2821 			/*
2822 			 * Make sure to accomodate the largest possible
2823 			 * size of SA(if_lladdr)->sa_len.
2824 			 */
2825 			static_assert(sizeof(sdlbuf) == (SOCK_MAXADDRLEN + 1));
2826 
2827 			/* Initialize to a "dummy" address */
2828 			sdl2->sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
2829 			sdl2->sdl_family = AF_LINK;
2830 			sdl2->sdl_index = 0;
2831 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
2832 
2833 			ifnet_head_lock_shared();
2834 			if (ifp != NULL && ifp->if_index && IF_INDEX_IN_RANGE(ifp->if_index)) {
2835 				struct ifaddr *__single ifa = ifnet_addrs[ifp->if_index - 1];
2836 				if (ifa != NULL && ifa->ifa_addr != NULL) {
2837 					struct sockaddr_dl *sdp = NULL;
2838 					IFA_LOCK_SPIN(ifa);
2839 					sdp = SDL(ifa->ifa_addr);
2840 					if (sdp->sdl_family == AF_LINK) {
2841 						/* the above static_assert() ensures sdl_len fits in sdlbuf */
2842 						SOCKADDR_COPY(sdp, sdl2, sdp->sdl_len);
2843 					}
2844 					IFA_UNLOCK(ifa);
2845 				}
2846 			}
2847 			ifnet_head_done();
2848 
2849 			control_handle = sbcreatecontrol_mbuf((caddr_t)SA_BYTES(sdl2), sdl2->sdl_len, IP_RECVIF, IPPROTO_IP, control_handle);
2850 			if (*control_handle == NULL) {
2851 				FDLOG0(LOG_ERR, fd_cb, "failed to create a IP_RECVIF control mbuf");
2852 				return NULL;
2853 			}
2854 		}
2855 		return control;
2856 	} else if ((inp->inp_vflag & INP_IPV6) &&
2857 	    fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2858 	    ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2859 		struct in6_pktinfo pi6;
2860 		memset(&pi6, 0, sizeof(pi6));
2861 		pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2862 		if (inp->in6p_last_outifp != NULL) {
2863 			pi6.ipi6_ifindex = inp->in6p_last_outifp->if_index;
2864 		} else if (inp->inp_last_outifp != NULL) {
2865 			pi6.ipi6_ifindex = inp->inp_last_outifp->if_index;
2866 		}
2867 
2868 		return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2869 	}
2870 	return NULL;
2871 }
2872 
2873 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,size_t offset)2874 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, size_t offset)
2875 {
2876 	int error = 0;
2877 	struct socket *so = fd_cb->so;
2878 	mbuf_ref_t data = NULL;
2879 	size_t  data_size;
2880 	struct sockaddr_storage remote_address;
2881 	boolean_t got_remote_sa = FALSE;
2882 	boolean_t appended = FALSE;
2883 	boolean_t append_success = FALSE;
2884 
2885 	if (!SO_IS_DIVERTED(so)) {
2886 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2887 		return error;
2888 	}
2889 
2890 	if (sbspace(&so->so_rcv) == 0) {
2891 		error = ENOBUFS;
2892 		fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2893 		FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2894 		return error;
2895 	}
2896 
2897 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
2898 		uint32_t val_size = 0;
2899 
2900 		/* check if we got remote address with data */
2901 		memset(&remote_address, 0, sizeof(remote_address));
2902 		error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2903 		if (error || val_size > sizeof(remote_address)) {
2904 			FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2905 			error = 0;
2906 		} else {
2907 			if (remote_address.ss_len > sizeof(remote_address)) {
2908 				remote_address.ss_len = sizeof(remote_address);
2909 			}
2910 			/* validate the address */
2911 			if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2912 				got_remote_sa = TRUE;
2913 			} else {
2914 				FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2915 			}
2916 			offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2917 		}
2918 	}
2919 
2920 	data_size = (mbuf_pkthdr_len(packet) - offset);
2921 
2922 	if (so->so_state & SS_CANTRCVMORE) {
2923 		FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2924 		return error;
2925 	}
2926 
2927 	if (SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) {
2928 		FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(so));
2929 		return error;
2930 	}
2931 
2932 	FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2933 
2934 	error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2935 	if (error || data == NULL) {
2936 		FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2937 		return error;
2938 	}
2939 
2940 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2941 		appended = (sbappendstream(&so->so_rcv, data) != 0);
2942 		append_success = TRUE;
2943 	} else {
2944 		struct sockaddr * __single append_sa = NULL;
2945 		mbuf_ref_t mctl;
2946 
2947 		if (got_remote_sa == TRUE) {
2948 			error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2949 		} else {
2950 			if (SOCK_CHECK_DOM(so, AF_INET6)) {
2951 				error = in6_mapped_peeraddr(so, &append_sa);
2952 			} else {
2953 				error = in_getpeeraddr(so, &append_sa);
2954 			}
2955 		}
2956 		if (error) {
2957 			FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2958 		}
2959 
2960 		mctl = flow_divert_create_control_mbuf(fd_cb);
2961 		int append_error = 0;
2962 		appended = sbappendaddr(&so->so_rcv, append_sa, data, mctl, &append_error);
2963 		if (appended || append_error == 0) {
2964 			append_success = TRUE;
2965 		} else {
2966 			FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2967 		}
2968 
2969 		free_sockaddr(append_sa);
2970 	}
2971 
2972 	if (append_success) {
2973 		fd_cb->bytes_received += data_size;
2974 		flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2975 	}
2976 
2977 	if (appended) {
2978 		sorwakeup(so);
2979 	}
2980 
2981 	return error;
2982 }
2983 
2984 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2985 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2986 {
2987 	uint32_t        read_count              = 0;
2988 	int             error                   = 0;
2989 
2990 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2991 	if (error) {
2992 		FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2993 		return;
2994 	}
2995 
2996 	FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2997 
2998 	if (!SO_IS_DIVERTED(fd_cb->so)) {
2999 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
3000 		return;
3001 	}
3002 
3003 	fd_cb->send_window += ntohl(read_count);
3004 	flow_divert_send_buffered_data(fd_cb, FALSE);
3005 }
3006 
3007 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_ref_t packet,int offset)3008 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
3009 {
3010 	int error         = 0;
3011 	uint32_t key_size = 0;
3012 	int log_level     = 0;
3013 	uint32_t flags    = 0;
3014 	int32_t order     = FLOW_DIVERT_ORDER_LAST;
3015 
3016 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
3017 	if (error) {
3018 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
3019 		return;
3020 	}
3021 
3022 	if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
3023 		FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
3024 		return;
3025 	}
3026 
3027 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
3028 	if (!error) {
3029 		nil_pcb.log_level = (uint8_t)log_level;
3030 	}
3031 
3032 	lck_rw_lock_exclusive(&group->lck);
3033 
3034 	if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
3035 		FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
3036 		lck_rw_done(&group->lck);
3037 		return;
3038 	}
3039 
3040 	if (group->token_key != NULL) {
3041 		kfree_data_sized_by(group->token_key, group->token_key_size);
3042 	}
3043 
3044 	group->token_key = kalloc_data(key_size, Z_WAITOK);
3045 	group->token_key_size = key_size;
3046 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
3047 	if (error) {
3048 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
3049 		kfree_data_sized_by(group->token_key, group->token_key_size);
3050 		lck_rw_done(&group->lck);
3051 		return;
3052 	}
3053 
3054 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
3055 	if (!error) {
3056 		group->flags = flags;
3057 	}
3058 
3059 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ORDER, sizeof(order), &order, NULL);
3060 	if (!error) {
3061 		FDLOG(LOG_INFO, &nil_pcb, "group %u order is %u", group->ctl_unit, order);
3062 		group->order = order;
3063 	}
3064 
3065 	lck_rw_done(&group->lck);
3066 }
3067 
3068 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)3069 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
3070 {
3071 	int error = 0;
3072 	int out_if_index = 0;
3073 	uint32_t app_data_length = 0;
3074 	struct socket *so = fd_cb->so;
3075 
3076 	FDLOG0(LOG_INFO, fd_cb, "received a properties update");
3077 
3078 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
3079 	if (error) {
3080 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
3081 	}
3082 
3083 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
3084 	if (error) {
3085 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
3086 	}
3087 
3088 	if (!SO_IS_DIVERTED(so)) {
3089 		FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
3090 		return;
3091 	}
3092 
3093 	if (out_if_index > 0) {
3094 		flow_divert_scope(fd_cb, out_if_index, true);
3095 		flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3096 	}
3097 
3098 	if (app_data_length > 0) {
3099 		uint8_t * app_data __indexable = NULL;
3100 		app_data = kalloc_data(app_data_length, Z_WAITOK);
3101 		if (app_data != NULL) {
3102 			error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
3103 			if (error == 0) {
3104 				if (fd_cb->app_data != NULL) {
3105 					kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
3106 				}
3107 				fd_cb->app_data = app_data;
3108 				fd_cb->app_data_length = app_data_length;
3109 			} else {
3110 				FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
3111 				kfree_data(app_data, app_data_length);
3112 			}
3113 		} else {
3114 			FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
3115 		}
3116 	}
3117 }
3118 
3119 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_ref_t packet,int offset)3120 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
3121 {
3122 	size_t                  bytes_mem_size      = 0;
3123 	size_t                  child_maps_mem_size = 0;
3124 	size_t                  nodes_mem_size      = 0;
3125 	size_t                  trie_memory_size    = 0;
3126 	int                     cursor              = 0;
3127 	int                     error               = 0;
3128 	struct flow_divert_trie new_trie;
3129 	int                     insert_error        = 0;
3130 	int                     prefix_count        = -1;
3131 	int                     signing_id_count    = 0;
3132 	size_t                  bytes_count         = 0;
3133 	size_t                  nodes_count         = 0;
3134 	size_t                  maps_count          = 0;
3135 
3136 	lck_rw_lock_exclusive(&group->lck);
3137 
3138 	/* Re-set the current trie */
3139 	if (group->signing_id_trie.memory != NULL) {
3140 		kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
3141 	}
3142 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
3143 	group->signing_id_trie.root = NULL_TRIE_IDX;
3144 
3145 	memset(&new_trie, 0, sizeof(new_trie));
3146 
3147 	/* Get the number of shared prefixes in the new set of signing ID strings */
3148 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
3149 
3150 	if (prefix_count < 0 || error) {
3151 		FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
3152 		lck_rw_done(&group->lck);
3153 		return;
3154 	}
3155 
3156 	/* Compute the number of signing IDs and the total amount of bytes needed to store them */
3157 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3158 	    cursor >= 0;
3159 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3160 		uint32_t sid_size = 0;
3161 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3162 		if (error || sid_size == 0) {
3163 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
3164 			signing_id_count = 0;
3165 			break;
3166 		}
3167 		if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
3168 			FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
3169 			signing_id_count = 0;
3170 			break;
3171 		}
3172 		signing_id_count++;
3173 	}
3174 
3175 	if (signing_id_count == 0) {
3176 		lck_rw_done(&group->lck);
3177 		FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
3178 		return;
3179 	}
3180 
3181 	if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
3182 		lck_rw_done(&group->lck);
3183 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
3184 		return;
3185 	}
3186 
3187 	if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
3188 		lck_rw_done(&group->lck);
3189 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
3190 		return;
3191 	}
3192 
3193 	if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
3194 		lck_rw_done(&group->lck);
3195 		FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
3196 		return;
3197 	}
3198 
3199 	FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
3200 	    nodes_count, maps_count, bytes_count);
3201 
3202 	if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
3203 	    os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
3204 	    os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
3205 	    os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
3206 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
3207 		lck_rw_done(&group->lck);
3208 		return;
3209 	}
3210 
3211 	if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
3212 		FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
3213 		lck_rw_done(&group->lck);
3214 		return;
3215 	}
3216 
3217 	new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
3218 	new_trie.memory_size = trie_memory_size;
3219 	if (new_trie.memory == NULL) {
3220 		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
3221 		    nodes_mem_size + child_maps_mem_size + bytes_mem_size);
3222 		lck_rw_done(&group->lck);
3223 		return;
3224 	}
3225 
3226 	/* Initialize the free lists */
3227 	new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
3228 	new_trie.nodes_count = (uint16_t)nodes_count;
3229 
3230 	new_trie.nodes_free_next = 0;
3231 	memset(new_trie.nodes, 0, nodes_mem_size);
3232 
3233 	new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
3234 	new_trie.child_maps_count = (uint16_t)maps_count;
3235 	new_trie.child_maps_size = child_maps_mem_size;
3236 
3237 	new_trie.child_maps_free_next = 0;
3238 	memset(new_trie.child_maps, 0xff, child_maps_mem_size);
3239 
3240 	new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
3241 	new_trie.bytes_count = (uint16_t)bytes_count;
3242 
3243 	new_trie.bytes_free_next = 0;
3244 	memset(new_trie.bytes, 0, bytes_mem_size);
3245 
3246 	/* The root is an empty node */
3247 	new_trie.root = trie_node_alloc(&new_trie);
3248 
3249 	/* Add each signing ID to the trie */
3250 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3251 	    cursor >= 0;
3252 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3253 		uint32_t sid_size = 0;
3254 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3255 		if (error || sid_size == 0) {
3256 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
3257 			insert_error = EINVAL;
3258 			break;
3259 		}
3260 		if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
3261 			uint16_t new_node_idx;
3262 			error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
3263 			if (error) {
3264 				FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
3265 				insert_error = EINVAL;
3266 				break;
3267 			}
3268 			new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
3269 			if (new_node_idx == NULL_TRIE_IDX) {
3270 				insert_error = EINVAL;
3271 				break;
3272 			}
3273 		} else {
3274 			FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3275 			insert_error = ENOBUFS;
3276 			break;
3277 		}
3278 	}
3279 
3280 	if (!insert_error) {
3281 		group->signing_id_trie = new_trie;
3282 	} else {
3283 		kfree_data_sized_by(new_trie.memory, new_trie.memory_size);
3284 	}
3285 
3286 	lck_rw_done(&group->lck);
3287 }
3288 
3289 static void
flow_divert_handle_flow_states_request(struct flow_divert_group * group)3290 flow_divert_handle_flow_states_request(struct flow_divert_group *group)
3291 {
3292 	struct flow_divert_pcb *fd_cb;
3293 	mbuf_ref_t packet = NULL;
3294 	SLIST_HEAD(, flow_divert_pcb) tmp_list;
3295 	int error = 0;
3296 	uint32_t ctl_unit = 0;
3297 
3298 	SLIST_INIT(&tmp_list);
3299 
3300 	error = flow_divert_packet_init(&nil_pcb, FLOW_DIVERT_PKT_FLOW_STATES, &packet);
3301 	if (error || packet == NULL) {
3302 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_packet_init failed: %d, cannot send flow states", error);
3303 		return;
3304 	}
3305 
3306 	lck_rw_lock_shared(&group->lck);
3307 
3308 	if (!MBUFQ_EMPTY(&group->send_queue)) {
3309 		FDLOG0(LOG_WARNING, &nil_pcb, "flow_divert_handle_flow_states_request: group send queue is not empty");
3310 	}
3311 
3312 	ctl_unit = group->ctl_unit;
3313 
3314 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3315 		FDRETAIN(fd_cb);
3316 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3317 	}
3318 
3319 	lck_rw_done(&group->lck);
3320 
3321 	SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
3322 		FDLOCK(fd_cb);
3323 		if (fd_cb->so != NULL) {
3324 			struct flow_divert_flow_state state = {};
3325 			struct socket *so = fd_cb->so;
3326 			flow_divert_lock_socket(so, fd_cb);
3327 
3328 			state.conn_id = fd_cb->hash;
3329 			state.bytes_written_by_app = fd_cb->bytes_written_by_app;
3330 			state.bytes_sent = fd_cb->bytes_sent;
3331 			state.bytes_received = fd_cb->bytes_received;
3332 			state.send_window = fd_cb->send_window;
3333 			state.send_buffer_bytes = so->so_snd.sb_cc;
3334 
3335 			error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_FLOW_STATE, sizeof(state), &state);
3336 			if (error) {
3337 				FDLOG(LOG_ERR, fd_cb, "Failed to add a flow state: %d", error);
3338 			}
3339 
3340 			flow_divert_unlock_socket(so, fd_cb);
3341 		}
3342 		FDUNLOCK(fd_cb);
3343 		FDRELEASE(fd_cb);
3344 	}
3345 
3346 	error = ctl_enqueuembuf(g_flow_divert_kctl_ref, ctl_unit, packet, CTL_DATA_EOR);
3347 	if (error) {
3348 		FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_handle_flow_states_request: ctl_enqueuembuf returned an error: %d", error);
3349 		mbuf_freem(packet);
3350 	}
3351 }
3352 
3353 static int
flow_divert_input(mbuf_ref_t packet,struct flow_divert_group * group)3354 flow_divert_input(mbuf_ref_t packet, struct flow_divert_group *group)
3355 {
3356 	struct flow_divert_packet_header    hdr;
3357 	int                                 error  = 0;
3358 	struct flow_divert_pcb              *fd_cb;
3359 
3360 	if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3361 		FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3362 		error = EINVAL;
3363 		goto done;
3364 	}
3365 
3366 	error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3367 	if (error) {
3368 		FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3369 		error = ENOBUFS;
3370 		goto done;
3371 	}
3372 
3373 	hdr.conn_id = ntohl(hdr.conn_id);
3374 
3375 	if (hdr.conn_id == 0) {
3376 		switch (hdr.packet_type) {
3377 		case FLOW_DIVERT_PKT_GROUP_INIT:
3378 			flow_divert_handle_group_init(group, packet, sizeof(hdr));
3379 			break;
3380 		case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3381 			flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3382 			break;
3383 		case FLOW_DIVERT_PKT_FLOW_STATES_REQUEST:
3384 			flow_divert_handle_flow_states_request(group);
3385 			break;
3386 		default:
3387 			FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3388 			break;
3389 		}
3390 		goto done;
3391 	}
3392 
3393 	fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group);             /* This retains the PCB */
3394 	if (fd_cb == NULL) {
3395 		if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3396 			FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3397 		}
3398 		goto done;
3399 	}
3400 
3401 	FDLOCK(fd_cb);
3402 	if (fd_cb->so != NULL) {
3403 		struct socket *so = fd_cb->so;
3404 		flow_divert_lock_socket(so, fd_cb);
3405 
3406 		switch (hdr.packet_type) {
3407 		case FLOW_DIVERT_PKT_CONNECT_RESULT:
3408 			flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3409 			break;
3410 		case FLOW_DIVERT_PKT_CLOSE:
3411 			flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3412 			break;
3413 		case FLOW_DIVERT_PKT_DATA:
3414 			error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3415 			break;
3416 		case FLOW_DIVERT_PKT_READ_NOTIFY:
3417 			flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3418 			break;
3419 		case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3420 			flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3421 			break;
3422 		default:
3423 			FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3424 			break;
3425 		}
3426 
3427 		flow_divert_unlock_socket(so, fd_cb);
3428 	}
3429 	FDUNLOCK(fd_cb);
3430 
3431 	FDRELEASE(fd_cb);
3432 
3433 done:
3434 	mbuf_freem(packet);
3435 	return error;
3436 }
3437 
3438 static void
flow_divert_close_all(struct flow_divert_group * group)3439 flow_divert_close_all(struct flow_divert_group *group)
3440 {
3441 	struct flow_divert_pcb                  *fd_cb;
3442 	SLIST_HEAD(, flow_divert_pcb)   tmp_list;
3443 
3444 	SLIST_INIT(&tmp_list);
3445 
3446 	lck_rw_lock_exclusive(&group->lck);
3447 
3448 	MBUFQ_DRAIN(&group->send_queue);
3449 
3450 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3451 		FDRETAIN(fd_cb);
3452 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3453 	}
3454 
3455 	group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3456 
3457 	lck_rw_done(&group->lck);
3458 
3459 	while (!SLIST_EMPTY(&tmp_list)) {
3460 		fd_cb = SLIST_FIRST(&tmp_list);
3461 		FDLOCK(fd_cb);
3462 		SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3463 		if (fd_cb->so != NULL) {
3464 			struct socket *so = fd_cb->so;
3465 			flow_divert_lock_socket(so, fd_cb);
3466 			flow_divert_pcb_remove(fd_cb);
3467 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
3468 			so->so_error = ECONNABORTED;
3469 			flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3470 			flow_divert_unlock_socket(so, fd_cb);
3471 		}
3472 		FDUNLOCK(fd_cb);
3473 		FDRELEASE(fd_cb);
3474 	}
3475 }
3476 
3477 void
flow_divert_detach(struct socket * so)3478 flow_divert_detach(struct socket *so)
3479 {
3480 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3481 
3482 	if (!SO_IS_DIVERTED(so)) {
3483 		return;
3484 	}
3485 
3486 	so->so_flags &= ~SOF_FLOW_DIVERT;
3487 	so->so_fd_pcb = NULL;
3488 
3489 	FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3490 
3491 	if (fd_cb->group != NULL) {
3492 		/* Last-ditch effort to send any buffered data */
3493 		flow_divert_send_buffered_data(fd_cb, TRUE);
3494 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3495 		flow_divert_send_close_if_needed(fd_cb);
3496 		/* Remove from the group */
3497 		flow_divert_pcb_remove(fd_cb);
3498 	}
3499 
3500 	sbflush(&so->so_snd);
3501 	sbflush(&so->so_rcv);
3502 
3503 	flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3504 
3505 	if (!fd_cb->plugin_locked) {
3506 		socket_unlock(so, 0);
3507 		FDLOCK(fd_cb);
3508 	}
3509 	fd_cb->so = NULL;
3510 	if (!fd_cb->plugin_locked) {
3511 		FDUNLOCK(fd_cb);
3512 		socket_lock(so, 0);
3513 	}
3514 
3515 	FDRELEASE(fd_cb);       /* Release the socket's reference */
3516 }
3517 
3518 static int
flow_divert_close(struct socket * so)3519 flow_divert_close(struct socket *so)
3520 {
3521 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3522 
3523 	if (!SO_IS_DIVERTED(so)) {
3524 		return EINVAL;
3525 	}
3526 
3527 	FDLOG0(LOG_INFO, fd_cb, "Closing");
3528 
3529 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3530 		soisdisconnecting(so);
3531 		sbflush(&so->so_rcv);
3532 	}
3533 
3534 	FDRETAIN(fd_cb);
3535 
3536 	flow_divert_send_buffered_data(fd_cb, TRUE);
3537 	flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3538 	flow_divert_send_close_if_needed(fd_cb);
3539 
3540 	/* Remove from the group */
3541 	flow_divert_pcb_remove(fd_cb);
3542 
3543 	flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3544 
3545 	FDRELEASE(fd_cb);
3546 
3547 	return 0;
3548 }
3549 
3550 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3551 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3552     sae_connid_t cid __unused)
3553 {
3554 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3555 		return EINVAL;
3556 	}
3557 
3558 	return flow_divert_close(so);
3559 }
3560 
3561 static int
flow_divert_shutdown(struct socket * so)3562 flow_divert_shutdown(struct socket *so)
3563 {
3564 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3565 
3566 	if (!SO_IS_DIVERTED(so)) {
3567 		return EINVAL;
3568 	}
3569 
3570 	FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3571 
3572 	socantsendmore(so);
3573 
3574 	FDRETAIN(fd_cb);
3575 
3576 	flow_divert_update_closed_state(fd_cb, SHUT_WR, false, true);
3577 	flow_divert_send_close_if_needed(fd_cb);
3578 	if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
3579 		flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3580 	}
3581 
3582 	FDRELEASE(fd_cb);
3583 
3584 	return 0;
3585 }
3586 
3587 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3588 flow_divert_rcvd(struct socket *so, int flags __unused)
3589 {
3590 	struct flow_divert_pcb  *fd_cb = so->so_fd_pcb;
3591 	int space = 0;
3592 
3593 	if (!SO_IS_DIVERTED(so)) {
3594 		return EINVAL;
3595 	}
3596 
3597 	space = sbspace(&so->so_rcv);
3598 	FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3599 	if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3600 	    (space > 0) &&
3601 	    flow_divert_send_read_notification(fd_cb) == 0) {
3602 		FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3603 		fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3604 	}
3605 
3606 	return 0;
3607 }
3608 
3609 static int
flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet,struct sockaddr * toaddr)3610 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr)
3611 {
3612 	int error = 0;
3613 	int port  = 0;
3614 
3615 	if (!flow_divert_is_sockaddr_valid(toaddr)) {
3616 		FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3617 		error = EINVAL;
3618 		goto done;
3619 	}
3620 
3621 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, SA_BYTES(toaddr));
3622 	if (error) {
3623 		goto done;
3624 	}
3625 
3626 	if (toaddr->sa_family == AF_INET) {
3627 		port = ntohs((satosin(toaddr))->sin_port);
3628 	} else {
3629 		port = ntohs((satosin6(toaddr))->sin6_port);
3630 	}
3631 
3632 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3633 	if (error) {
3634 		goto done;
3635 	}
3636 
3637 done:
3638 	return error;
3639 }
3640 
3641 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_ref_t buffer)3642 flow_divert_get_buffered_target_address(mbuf_ref_t buffer)
3643 {
3644 	if (buffer != NULL && buffer->m_type == MT_SONAME) {
3645 		struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3646 		if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3647 			return toaddr;
3648 		}
3649 	}
3650 	return NULL;
3651 }
3652 
3653 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3654 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3655 {
3656 	switch (addr->sa_family) {
3657 	case AF_INET:
3658 		if (addr->sa_len < sizeof(struct sockaddr_in)) {
3659 			return FALSE;
3660 		}
3661 		break;
3662 	case AF_INET6:
3663 		if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3664 			return FALSE;
3665 		}
3666 		break;
3667 	default:
3668 		return FALSE;
3669 	}
3670 	return TRUE;
3671 }
3672 
3673 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3674 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3675     struct sockaddr **dup)
3676 {
3677 	int                                             error           = 0;
3678 	struct sockaddr                 *result;
3679 	struct sockaddr_storage ss;
3680 
3681 	if (addr != NULL) {
3682 		result = addr;
3683 	} else {
3684 		memset(&ss, 0, sizeof(ss));
3685 		ss.ss_family = family;
3686 		if (ss.ss_family == AF_INET) {
3687 			ss.ss_len = sizeof(struct sockaddr_in);
3688 		} else if (ss.ss_family == AF_INET6) {
3689 			ss.ss_len = sizeof(struct sockaddr_in6);
3690 		} else {
3691 			error = EINVAL;
3692 		}
3693 		result = (struct sockaddr *)&ss;
3694 	}
3695 
3696 	if (!error) {
3697 		*dup = dup_sockaddr(result, 1);
3698 		if (*dup == NULL) {
3699 			error = ENOBUFS;
3700 		}
3701 	}
3702 
3703 	return error;
3704 }
3705 
3706 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected,bool delay_if_needed)3707 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed)
3708 {
3709 	if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3710 		soisdisconnected(so);
3711 	}
3712 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
3713 		if (delay_if_needed) {
3714 			cfil_sock_is_dead(so);
3715 		} else {
3716 			struct inpcb *inp = sotoinpcb(so);
3717 			if (SOCK_CHECK_DOM(so, PF_INET6)) {
3718 				in6_pcbdetach(inp);
3719 			} else {
3720 				in_pcbdetach(inp);
3721 			}
3722 		}
3723 	}
3724 }
3725 
3726 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3727 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3728 {
3729 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3730 
3731 	if (!SO_IS_DIVERTED(so)) {
3732 		return EINVAL;
3733 	}
3734 
3735 	if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3736 		if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3737 			flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3738 		}
3739 	}
3740 
3741 	if (SOCK_DOM(so) == PF_INET) {
3742 		return g_tcp_protosw->pr_ctloutput(so, sopt);
3743 	} else if (SOCK_DOM(so) == PF_INET6) {
3744 		return g_tcp6_protosw->pr_ctloutput(so, sopt);
3745 	}
3746 	return 0;
3747 }
3748 
3749 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3750 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3751 {
3752 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3753 	int                     error           = 0;
3754 	struct inpcb            *inp            = sotoinpcb(so);
3755 	struct sockaddr_in      *sinp;
3756 	mbuf_ref_t              connect_packet  = NULL;
3757 	int                     do_send         = 1;
3758 
3759 	if (!SO_IS_DIVERTED(so)) {
3760 		return EINVAL;
3761 	}
3762 
3763 	if (fd_cb->group == NULL) {
3764 		error = ENETUNREACH;
3765 		goto done;
3766 	}
3767 
3768 	if (inp == NULL) {
3769 		error = EINVAL;
3770 		goto done;
3771 	} else if (inp->inp_state == INPCB_STATE_DEAD) {
3772 		if (so->so_error) {
3773 			error = so->so_error;
3774 			so->so_error = 0;
3775 		} else {
3776 			error = EINVAL;
3777 		}
3778 		goto done;
3779 	}
3780 
3781 	if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3782 		error = EALREADY;
3783 		goto done;
3784 	}
3785 
3786 	FDLOG0(LOG_INFO, fd_cb, "Connecting");
3787 
3788 	if (fd_cb->connect_packet == NULL) {
3789 		struct sockaddr_in sin = {};
3790 		struct ifnet * __single ifp = NULL;
3791 
3792 		if (to == NULL) {
3793 			FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3794 			error = EINVAL;
3795 			goto done;
3796 		}
3797 
3798 		if (!flow_divert_is_sockaddr_valid(to)) {
3799 			FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3800 			error = EINVAL;
3801 			goto done;
3802 		}
3803 
3804 		fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3805 		if (fd_cb->original_remote_endpoint == NULL) {
3806 			FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3807 			error = ENOMEM;
3808 			goto done;
3809 		}
3810 		fd_cb->original_vflag = inp->inp_vflag;
3811 		fd_cb->original_last_outifp = inp->inp_last_outifp;
3812 		fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3813 
3814 		sinp = (struct sockaddr_in *)(void *)to;
3815 		if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3816 			error = EAFNOSUPPORT;
3817 			goto done;
3818 		}
3819 
3820 		if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3821 			struct sockaddr_in6 sin6 = {};
3822 			sin6.sin6_family = AF_INET6;
3823 			sin6.sin6_len = sizeof(struct sockaddr_in6);
3824 			sin6.sin6_port = satosin6(to)->sin6_port;
3825 			sin6.sin6_addr = satosin6(to)->sin6_addr;
3826 			if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3827 				in6_sin6_2_sin(&sin, &sin6);
3828 				to = (struct sockaddr *)&sin;
3829 			}
3830 		}
3831 
3832 		if (to->sa_family == AF_INET6) {
3833 			struct sockaddr_in6 *to6 = satosin6(to);
3834 
3835 			inp->inp_vflag &= ~INP_IPV4;
3836 			inp->inp_vflag |= INP_IPV6;
3837 			fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3838 			fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3839 			fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3840 			error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3841 			if (error) {
3842 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3843 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3844 					error = 0;
3845 				} else {
3846 					goto done;
3847 				}
3848 			}
3849 			if (ifp != NULL) {
3850 				inp->in6p_last_outifp = ifp;
3851 				ifnet_release(ifp);
3852 			}
3853 
3854 			if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3855 			    in6_embedded_scope &&
3856 			    fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3857 				fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3858 				fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3859 			}
3860 
3861 			if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3862 			    in6_embedded_scope &&
3863 			    to6->sin6_addr.s6_addr16[1] != 0) {
3864 				to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3865 				to6->sin6_addr.s6_addr16[1] = 0;
3866 			}
3867 		} else if (to->sa_family == AF_INET) {
3868 			inp->inp_vflag |= INP_IPV4;
3869 			inp->inp_vflag &= ~INP_IPV6;
3870 			fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3871 			fd_cb->local_endpoint.sin.sin_family = AF_INET;
3872 			fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3873 			error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3874 			if (error) {
3875 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3876 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3877 					error = 0;
3878 				} else {
3879 					goto done;
3880 				}
3881 			}
3882 			if (ifp != NULL) {
3883 				inp->inp_last_outifp = ifp;
3884 				ifnet_release(ifp);
3885 			}
3886 		} else {
3887 			FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3888 		}
3889 
3890 		error = flow_divert_check_no_cellular(fd_cb) ||
3891 		    flow_divert_check_no_expensive(fd_cb) ||
3892 		    flow_divert_check_no_constrained(fd_cb);
3893 		if (error) {
3894 			goto done;
3895 		}
3896 
3897 		if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3898 		    !implicit || /* connect() was called or */
3899 		    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3900 		    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3901 			fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3902 		}
3903 
3904 		error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3905 		if (error) {
3906 			goto done;
3907 		}
3908 
3909 		if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3910 			flow_divert_set_remote_endpoint(fd_cb, to);
3911 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3912 		}
3913 
3914 		if (implicit) {
3915 			fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3916 		}
3917 
3918 		if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3919 			FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3920 			do_send = 0;
3921 		}
3922 
3923 		fd_cb->connect_packet = connect_packet;
3924 		connect_packet = NULL;
3925 	} else {
3926 		FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3927 	}
3928 
3929 	if (do_send) {
3930 		error = flow_divert_send_connect_packet(fd_cb);
3931 		if (error) {
3932 			goto done;
3933 		}
3934 
3935 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3936 	}
3937 
3938 	if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3939 		soisconnected(so);
3940 	} else {
3941 		soisconnecting(so);
3942 	}
3943 
3944 done:
3945 	return error;
3946 }
3947 
3948 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3949 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3950 {
3951 #if CONTENT_FILTER
3952 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3953 		int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3954 		if (error != 0) {
3955 			struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3956 			FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3957 			return error;
3958 		}
3959 	}
3960 #endif /* CONTENT_FILTER */
3961 
3962 	return flow_divert_connect_out_internal(so, to, p, false);
3963 }
3964 
3965 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3966 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3967     struct proc *p, uint32_t ifscope, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3968 {
3969 	struct inpcb *inp = sotoinpcb(so);
3970 	int error;
3971 
3972 	if (inp == NULL) {
3973 		return EINVAL;
3974 	}
3975 
3976 	VERIFY(dst != NULL);
3977 
3978 #if CONTENT_FILTER && NECP
3979 	struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3980 	if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3981 	    SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3982 		inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3983 	}
3984 #endif /* CONTENT_FILTER */
3985 
3986 	/* bind socket to the specified interface, if requested */
3987 	if (ifscope != IFSCOPE_NONE &&
3988 	    (error = inp_bindif(inp, ifscope, NULL)) != 0) {
3989 		return error;
3990 	}
3991 
3992 	error = flow_divert_connect_out(so, dst, p);
3993 
3994 	if (error != 0) {
3995 		return error;
3996 	}
3997 
3998 	/* if there is data, send it */
3999 	if (auio != NULL) {
4000 		user_ssize_t datalen = 0;
4001 
4002 		socket_unlock(so, 0);
4003 
4004 		VERIFY(bytes_written != NULL);
4005 
4006 		datalen = uio_resid(auio);
4007 		error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
4008 		socket_lock(so, 0);
4009 
4010 		if (error == 0 || error == EWOULDBLOCK) {
4011 			*bytes_written = datalen - uio_resid(auio);
4012 		}
4013 
4014 		/*
4015 		 * sosend returns EWOULDBLOCK if it's a non-blocking
4016 		 * socket or a timeout occured (this allows to return
4017 		 * the amount of queued data through sendit()).
4018 		 *
4019 		 * However, connectx() returns EINPROGRESS in case of a
4020 		 * blocking socket. So we change the return value here.
4021 		 */
4022 		if (error == EWOULDBLOCK) {
4023 			error = EINPROGRESS;
4024 		}
4025 	}
4026 
4027 	if (error == 0 && pcid != NULL) {
4028 		*pcid = 1;      /* there is only 1 connection for a TCP */
4029 	}
4030 
4031 	return error;
4032 }
4033 
4034 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)4035 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
4036     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
4037     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
4038     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
4039 {
4040 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
4041 }
4042 
4043 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)4044 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
4045     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
4046     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
4047     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
4048 {
4049 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
4050 }
4051 
4052 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)4053 flow_divert_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
4054 {
4055 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4056 	int                     error   = 0;
4057 	struct inpcb            *inp;
4058 #if CONTENT_FILTER
4059 	struct m_tag *cfil_tag = NULL;
4060 #endif
4061 
4062 	if (!SO_IS_DIVERTED(so)) {
4063 		return EINVAL;
4064 	}
4065 
4066 	inp = sotoinpcb(so);
4067 	if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
4068 		error = ECONNRESET;
4069 		goto done;
4070 	}
4071 
4072 	if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
4073 		/* The provider considers this datagram flow to be closed, so no data can be sent */
4074 		FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
4075 		error = EHOSTUNREACH;
4076 		goto done;
4077 	}
4078 
4079 #if CONTENT_FILTER
4080 	/*
4081 	 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
4082 	 * retrieve the CFIL saved remote address from the mbuf and use it.
4083 	 */
4084 	if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
4085 		struct sockaddr * __single cfil_faddr = NULL;
4086 		cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
4087 		if (cfil_tag) {
4088 			to = (struct sockaddr *)(void *)cfil_faddr;
4089 		}
4090 		FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
4091 	}
4092 #endif
4093 
4094 	/* Implicit connect */
4095 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4096 		FDLOG0(LOG_INFO, fd_cb, "implicit connect");
4097 
4098 		error = flow_divert_connect_out_internal(so, to, p, true);
4099 		if (error) {
4100 			goto done;
4101 		}
4102 	} else {
4103 		error = flow_divert_check_no_cellular(fd_cb) ||
4104 		    flow_divert_check_no_expensive(fd_cb) ||
4105 		    flow_divert_check_no_constrained(fd_cb);
4106 		if (error) {
4107 			goto done;
4108 		}
4109 	}
4110 
4111 	if (data != NULL) {
4112 		size_t data_size = 0;
4113 		if (mbuf_flags(data) & M_PKTHDR) {
4114 			data_size = mbuf_pkthdr_len(data);
4115 		} else {
4116 			for (mbuf_t blob = data; blob != NULL; blob = mbuf_next(blob)) {
4117 				data_size += mbuf_len(blob);
4118 			}
4119 		}
4120 
4121 		FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", data_size);
4122 		fd_cb->bytes_written_by_app += data_size;
4123 
4124 		error = flow_divert_send_app_data(fd_cb, data, data_size, to);
4125 
4126 		data = NULL;
4127 
4128 		if (error) {
4129 			goto done;
4130 		}
4131 	}
4132 
4133 	if (flags & PRUS_EOF) {
4134 		flow_divert_shutdown(so);
4135 	}
4136 
4137 done:
4138 	if (data) {
4139 		mbuf_freem(data);
4140 	}
4141 	if (control) {
4142 		mbuf_freem(control);
4143 	}
4144 #if CONTENT_FILTER
4145 	if (cfil_tag) {
4146 		m_tag_free(cfil_tag);
4147 	}
4148 #endif
4149 
4150 	return error;
4151 }
4152 
4153 static int
flow_divert_preconnect(struct socket * so)4154 flow_divert_preconnect(struct socket *so)
4155 {
4156 	int error = 0;
4157 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4158 
4159 	if (!SO_IS_DIVERTED(so)) {
4160 		return EINVAL;
4161 	}
4162 
4163 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4164 		FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
4165 		error = flow_divert_send_connect_packet(so->so_fd_pcb);
4166 		if (error) {
4167 			return error;
4168 		}
4169 
4170 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
4171 	}
4172 
4173 	soclearfastopen(so);
4174 
4175 	return error;
4176 }
4177 
4178 static void
flow_divert_set_protosw(struct socket * so)4179 flow_divert_set_protosw(struct socket *so)
4180 {
4181 	if (SOCK_DOM(so) == PF_INET) {
4182 		so->so_proto = &g_flow_divert_in_protosw;
4183 	} else {
4184 		so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
4185 	}
4186 }
4187 
4188 static void
flow_divert_set_udp_protosw(struct socket * so)4189 flow_divert_set_udp_protosw(struct socket *so)
4190 {
4191 	if (SOCK_DOM(so) == PF_INET) {
4192 		so->so_proto = &g_flow_divert_in_udp_protosw;
4193 	} else {
4194 		so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
4195 	}
4196 }
4197 
4198 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)4199 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
4200 {
4201 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4202 	struct inpcb *inp;
4203 	int error = 0;
4204 
4205 	inp = sotoinpcb(so);
4206 	if (inp == NULL) {
4207 		error = EINVAL;
4208 		goto done;
4209 	}
4210 
4211 	if (fd_cb == NULL) {
4212 		error = flow_divert_pcb_init(so);
4213 		fd_cb  = so->so_fd_pcb;
4214 		if (error != 0 || fd_cb == NULL) {
4215 			goto done;
4216 		}
4217 	}
4218 	return flow_divert_data_out(so, flags, data, to, control, p);
4219 
4220 done:
4221 	if (data) {
4222 		mbuf_freem(data);
4223 	}
4224 	if (control) {
4225 		mbuf_freem(control);
4226 	}
4227 
4228 	return error;
4229 }
4230 
4231 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)4232 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
4233 {
4234 	errno_t error = 0;
4235 	struct flow_divert_pcb *fd_cb = NULL;
4236 	uint32_t agg_unit = aggregate_unit;
4237 	uint32_t policy_control_unit = ctl_unit;
4238 	bool is_aggregate = false;
4239 
4240 	if (so->so_flags & SOF_FLOW_DIVERT) {
4241 		return EALREADY;
4242 	}
4243 
4244 	fd_cb = flow_divert_pcb_create(so);
4245 	if (fd_cb == NULL) {
4246 		return ENOMEM;
4247 	}
4248 
4249 	do {
4250 		uint32_t group_unit = flow_divert_derive_kernel_control_unit(so->last_pid, &policy_control_unit, &agg_unit, &is_aggregate);
4251 		if (group_unit == 0 || (group_unit >= GROUP_COUNT_MAX && group_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
4252 			FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
4253 			error = EINVAL;
4254 			break;
4255 		}
4256 
4257 		error = flow_divert_add_to_group(fd_cb, group_unit);
4258 		if (error == 0) {
4259 			so->so_fd_pcb = fd_cb;
4260 			so->so_flags |= SOF_FLOW_DIVERT;
4261 			fd_cb->control_group_unit = group_unit;
4262 			fd_cb->policy_control_unit = ctl_unit;
4263 			fd_cb->aggregate_unit = agg_unit;
4264 			if (is_aggregate) {
4265 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4266 			} else {
4267 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4268 			}
4269 
4270 			if (SOCK_TYPE(so) == SOCK_STREAM) {
4271 				flow_divert_set_protosw(so);
4272 			} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
4273 				flow_divert_set_udp_protosw(so);
4274 			}
4275 
4276 			FDLOG0(LOG_INFO, fd_cb, "Created");
4277 		} else if (error != ENOENT) {
4278 			FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
4279 		}
4280 	} while (error == ENOENT);
4281 
4282 	if (error != 0) {
4283 		FDRELEASE(fd_cb);
4284 	}
4285 
4286 	return error;
4287 }
4288 
4289 errno_t
flow_divert_pcb_init(struct socket * so)4290 flow_divert_pcb_init(struct socket *so)
4291 {
4292 	struct inpcb *inp = sotoinpcb(so);
4293 	uint32_t aggregate_units = 0;
4294 	uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
4295 	return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
4296 }
4297 
4298 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)4299 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
4300 {
4301 	uint32_t        ctl_unit        = 0;
4302 	uint32_t        key_unit        = 0;
4303 	uint32_t        aggregate_unit  = 0;
4304 	int             error           = 0;
4305 	int             hmac_error      = 0;
4306 	mbuf_ref_t      token           = NULL;
4307 
4308 	if (so->so_flags & SOF_FLOW_DIVERT) {
4309 		error = EALREADY;
4310 		goto done;
4311 	}
4312 
4313 	if (g_init_result) {
4314 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
4315 		error = ENOPROTOOPT;
4316 		goto done;
4317 	}
4318 
4319 	if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
4320 	    (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
4321 	    (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
4322 		error = EINVAL;
4323 		goto done;
4324 	} else {
4325 		if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
4326 			struct tcpcb *tp = sototcpcb(so);
4327 			if (tp == NULL || tp->t_state != TCPS_CLOSED) {
4328 				error = EINVAL;
4329 				goto done;
4330 			}
4331 		}
4332 	}
4333 
4334 	error = soopt_getm(sopt, &token);
4335 	if (error) {
4336 		token = NULL;
4337 		goto done;
4338 	}
4339 
4340 	error = soopt_mcopyin(sopt, token);
4341 	if (error) {
4342 		token = NULL;
4343 		goto done;
4344 	}
4345 
4346 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
4347 	if (!error) {
4348 		key_unit = ntohl(key_unit);
4349 		if (key_unit >= GROUP_COUNT_MAX) {
4350 			key_unit = 0;
4351 		}
4352 	} else if (error != ENOENT) {
4353 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
4354 		goto done;
4355 	} else {
4356 		key_unit = 0;
4357 	}
4358 
4359 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
4360 	if (error) {
4361 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
4362 		goto done;
4363 	}
4364 
4365 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4366 	if (error && error != ENOENT) {
4367 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4368 		goto done;
4369 	}
4370 
4371 	/* A valid kernel control unit is required */
4372 	ctl_unit = ntohl(ctl_unit);
4373 	aggregate_unit = ntohl(aggregate_unit);
4374 
4375 	if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4376 		hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4377 		if (hmac_error && hmac_error != ENOENT) {
4378 			FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4379 			error = hmac_error;
4380 			goto done;
4381 		}
4382 	}
4383 
4384 	error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4385 	if (error == 0) {
4386 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4387 		int log_level = LOG_NOTICE;
4388 
4389 		error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4390 		if (error == 0) {
4391 			fd_cb->log_level = (uint8_t)log_level;
4392 		}
4393 		error = 0;
4394 
4395 		fd_cb->connect_token = token;
4396 		token = NULL;
4397 
4398 		fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4399 	}
4400 
4401 	if (hmac_error == 0) {
4402 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4403 		if (fd_cb != NULL) {
4404 			fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4405 		}
4406 	}
4407 
4408 done:
4409 	if (token != NULL) {
4410 		mbuf_freem(token);
4411 	}
4412 
4413 	return error;
4414 }
4415 
4416 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4417 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4418 {
4419 	uint32_t                    ctl_unit;
4420 	int                         error                   = 0;
4421 	uint8_t                     hmac[SHA_DIGEST_LENGTH];
4422 	struct flow_divert_pcb      *fd_cb                  = so->so_fd_pcb;
4423 	mbuf_ref_t                  token                   = NULL;
4424 	struct flow_divert_group    *control_group          = NULL;
4425 
4426 	if (!SO_IS_DIVERTED(so)) {
4427 		error = EINVAL;
4428 		goto done;
4429 	}
4430 
4431 	if (fd_cb->group == NULL) {
4432 		error = EINVAL;
4433 		goto done;
4434 	}
4435 
4436 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4437 	if (error) {
4438 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4439 		goto done;
4440 	}
4441 
4442 	ctl_unit = htonl(fd_cb->group->ctl_unit);
4443 
4444 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4445 	if (error) {
4446 		goto done;
4447 	}
4448 
4449 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4450 	if (error) {
4451 		goto done;
4452 	}
4453 
4454 	if (fd_cb->app_data != NULL) {
4455 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4456 		if (error) {
4457 			goto done;
4458 		}
4459 	}
4460 
4461 	control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4462 	if (control_group != NULL) {
4463 		lck_rw_lock_shared(&control_group->lck);
4464 		ctl_unit = htonl(control_group->ctl_unit);
4465 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4466 		if (!error) {
4467 			error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4468 		}
4469 		lck_rw_done(&control_group->lck);
4470 		FDGRP_RELEASE(control_group);
4471 	} else {
4472 		error = ENOPROTOOPT;
4473 	}
4474 
4475 	if (error) {
4476 		goto done;
4477 	}
4478 
4479 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4480 	if (error) {
4481 		goto done;
4482 	}
4483 
4484 	if (sopt->sopt_val == USER_ADDR_NULL) {
4485 		/* If the caller passed NULL to getsockopt, just set the size of the token and return */
4486 		sopt->sopt_valsize = mbuf_pkthdr_len(token);
4487 		goto done;
4488 	}
4489 
4490 	error = soopt_mcopyout(sopt, token);
4491 	if (error) {
4492 		token = NULL;   /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4493 		goto done;
4494 	}
4495 
4496 done:
4497 	if (token != NULL) {
4498 		mbuf_freem(token);
4499 	}
4500 
4501 	return error;
4502 }
4503 
4504 void
flow_divert_group_destroy(struct flow_divert_group * group)4505 flow_divert_group_destroy(struct flow_divert_group *group)
4506 {
4507 	lck_rw_lock_exclusive(&group->lck);
4508 
4509 	FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4510 
4511 	if (group->token_key != NULL) {
4512 		memset(group->token_key, 0, group->token_key_size);
4513 		kfree_data_sized_by(group->token_key, group->token_key_size);
4514 	}
4515 
4516 	/* Re-set the current trie */
4517 	if (group->signing_id_trie.memory != NULL) {
4518 		kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
4519 	}
4520 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4521 	group->signing_id_trie.root = NULL_TRIE_IDX;
4522 
4523 	lck_rw_done(&group->lck);
4524 
4525 	zfree(flow_divert_group_zone, group);
4526 }
4527 
4528 static struct flow_divert_group *
flow_divert_allocate_group(u_int32_t unit,pid_t pid)4529 flow_divert_allocate_group(u_int32_t unit, pid_t pid)
4530 {
4531 	struct flow_divert_group *new_group = NULL;
4532 	new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4533 	lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4534 	RB_INIT(&new_group->pcb_tree);
4535 	new_group->ctl_unit = unit;
4536 	new_group->in_process_pid = pid;
4537 	MBUFQ_INIT(&new_group->send_queue);
4538 	new_group->signing_id_trie.root = NULL_TRIE_IDX;
4539 	new_group->ref_count = 1;
4540 	new_group->order = FLOW_DIVERT_ORDER_LAST;
4541 	return new_group;
4542 }
4543 
4544 static errno_t
flow_divert_kctl_setup(u_int32_t * unit,void ** unitinfo)4545 flow_divert_kctl_setup(u_int32_t *unit, void **unitinfo)
4546 {
4547 	if (unit == NULL || unitinfo == NULL) {
4548 		return EINVAL;
4549 	}
4550 
4551 	struct flow_divert_group *new_group = NULL;
4552 	errno_t error = 0;
4553 	lck_rw_lock_shared(&g_flow_divert_group_lck);
4554 	if (*unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
4555 		// Return next unused in-process unit
4556 		u_int32_t unit_cursor = FLOW_DIVERT_IN_PROCESS_UNIT_MIN;
4557 		struct flow_divert_group *group_next = NULL;
4558 		TAILQ_FOREACH(group_next, &g_flow_divert_in_process_group_list, chain) {
4559 			if (group_next->ctl_unit > unit_cursor) {
4560 				// Found a gap, lets fill it in
4561 				break;
4562 			}
4563 			unit_cursor = group_next->ctl_unit + 1;
4564 			if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4565 				break;
4566 			}
4567 		}
4568 		if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4569 			error = EBUSY;
4570 		} else {
4571 			*unit = unit_cursor;
4572 			new_group = flow_divert_allocate_group(*unit, proc_pid(current_proc()));
4573 			if (group_next != NULL) {
4574 				TAILQ_INSERT_BEFORE(group_next, new_group, chain);
4575 			} else {
4576 				TAILQ_INSERT_TAIL(&g_flow_divert_in_process_group_list, new_group, chain);
4577 			}
4578 			g_active_group_count++;
4579 		}
4580 	} else {
4581 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
4582 			error = EPERM;
4583 		} else {
4584 			if (g_flow_divert_groups == NULL) {
4585 				g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4586 				    GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4587 			}
4588 
4589 			// Return next unused group unit
4590 			bool found_unused_unit = false;
4591 			u_int32_t unit_cursor;
4592 			for (unit_cursor = 1; unit_cursor < GROUP_COUNT_MAX; unit_cursor++) {
4593 				struct flow_divert_group *group = g_flow_divert_groups[unit_cursor];
4594 				if (group == NULL) {
4595 					// Open slot, assign this one
4596 					*unit = unit_cursor;
4597 					new_group = flow_divert_allocate_group(*unit, 0);
4598 					g_flow_divert_groups[*unit] = new_group;
4599 					found_unused_unit = true;
4600 					g_active_group_count++;
4601 					break;
4602 				}
4603 			}
4604 			if (!found_unused_unit) {
4605 				error = EBUSY;
4606 			}
4607 		}
4608 	}
4609 	lck_rw_done(&g_flow_divert_group_lck);
4610 
4611 	*unitinfo = new_group;
4612 
4613 	return error;
4614 }
4615 
4616 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4617 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4618 {
4619 	if (unitinfo == NULL) {
4620 		return EINVAL;
4621 	}
4622 
4623 	// Just validate. The group will already have been allocated.
4624 	struct flow_divert_group *group = (struct flow_divert_group *)*unitinfo;
4625 	if (group == NULL || sac->sc_unit != group->ctl_unit) {
4626 		FDLOG(LOG_ERR, &nil_pcb, "Flow divert connect fail, unit mismatch %u != %u",
4627 		    sac->sc_unit, group ? group->ctl_unit : 0);
4628 		return EINVAL;
4629 	}
4630 
4631 	return 0;
4632 }
4633 
4634 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4635 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4636 {
4637 	struct flow_divert_group    *group  = NULL;
4638 	errno_t                     error   = 0;
4639 
4640 	if (unitinfo == NULL) {
4641 		return 0;
4642 	}
4643 
4644 	FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4645 
4646 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4647 
4648 	if (g_active_group_count == 0) {
4649 		panic("flow divert group %u is disconnecting, but no groups are active (active count = %u)",
4650 		    unit, g_active_group_count);
4651 	}
4652 
4653 	if (unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
4654 		if (unit >= GROUP_COUNT_MAX) {
4655 			return EINVAL;
4656 		}
4657 
4658 		if (g_flow_divert_groups == NULL) {
4659 			panic("flow divert group %u is disconnecting, but groups array is NULL",
4660 			    unit);
4661 		}
4662 		group = g_flow_divert_groups[unit];
4663 
4664 		if (group != (struct flow_divert_group *)unitinfo) {
4665 			panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4666 		}
4667 
4668 		g_flow_divert_groups[unit] = NULL;
4669 	} else {
4670 		group = (struct flow_divert_group *)unitinfo;
4671 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
4672 			panic("flow divert group %u is disconnecting, but in-process group list is empty",
4673 			    unit);
4674 		}
4675 
4676 		TAILQ_REMOVE(&g_flow_divert_in_process_group_list, group, chain);
4677 	}
4678 
4679 	g_active_group_count--;
4680 
4681 	if (g_active_group_count == 0) {
4682 		kfree_type(struct flow_divert_group *,
4683 		    GROUP_COUNT_MAX, g_flow_divert_groups);
4684 		g_flow_divert_groups = NULL;
4685 	}
4686 
4687 	lck_rw_done(&g_flow_divert_group_lck);
4688 
4689 	if (group != NULL) {
4690 		flow_divert_close_all(group);
4691 		FDGRP_RELEASE(group);
4692 	} else {
4693 		error = EINVAL;
4694 	}
4695 
4696 	return error;
4697 }
4698 
4699 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_ref_t m,__unused int flags)4700 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_ref_t m, __unused int flags)
4701 {
4702 	errno_t error = 0;
4703 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4704 	if (group != NULL) {
4705 		error = flow_divert_input(m, group);
4706 		FDGRP_RELEASE(group);
4707 	} else {
4708 		error = ENOENT;
4709 	}
4710 	return error;
4711 }
4712 
4713 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4714 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4715 {
4716 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4717 	if (group == NULL) {
4718 		return;
4719 	}
4720 
4721 	if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4722 		struct flow_divert_pcb                  *fd_cb;
4723 		SLIST_HEAD(, flow_divert_pcb)   tmp_list;
4724 
4725 		lck_rw_lock_exclusive(&group->lck);
4726 
4727 		while (!MBUFQ_EMPTY(&group->send_queue)) {
4728 			mbuf_ref_t next_packet;
4729 			FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4730 			next_packet = MBUFQ_FIRST(&group->send_queue);
4731 			int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4732 			if (error) {
4733 				FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_kctl_rcvd: ctl_enqueuembuf returned an error: %d", error);
4734 				OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4735 				lck_rw_done(&group->lck);
4736 				return;
4737 			}
4738 			MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4739 		}
4740 
4741 		SLIST_INIT(&tmp_list);
4742 
4743 		RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4744 			FDRETAIN(fd_cb);
4745 			SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4746 		}
4747 
4748 		lck_rw_done(&group->lck);
4749 
4750 		SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4751 			FDLOCK(fd_cb);
4752 			if (fd_cb->so != NULL) {
4753 				struct socket *so = fd_cb->so;
4754 				flow_divert_lock_socket(so, fd_cb);
4755 				if (fd_cb->group != NULL) {
4756 					flow_divert_send_buffered_data(fd_cb, FALSE);
4757 				}
4758 				flow_divert_unlock_socket(so, fd_cb);
4759 			}
4760 			FDUNLOCK(fd_cb);
4761 			FDRELEASE(fd_cb);
4762 		}
4763 	}
4764 
4765 	FDGRP_RELEASE(group);
4766 }
4767 
4768 static int
flow_divert_kctl_init(void)4769 flow_divert_kctl_init(void)
4770 {
4771 	struct kern_ctl_reg     ctl_reg;
4772 	int                     result;
4773 
4774 	memset(&ctl_reg, 0, sizeof(ctl_reg));
4775 
4776 	strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4777 	ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4778 
4779 	// Do not restrict to privileged processes. flow_divert_kctl_setup checks
4780 	// permissions separately.
4781 	ctl_reg.ctl_flags = CTL_FLAG_REG_EXTENDED | CTL_FLAG_REG_SETUP;
4782 	ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4783 
4784 	ctl_reg.ctl_connect = flow_divert_kctl_connect;
4785 	ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4786 	ctl_reg.ctl_send = flow_divert_kctl_send;
4787 	ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4788 	ctl_reg.ctl_setup = flow_divert_kctl_setup;
4789 
4790 	result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4791 
4792 	if (result) {
4793 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4794 		return result;
4795 	}
4796 
4797 	return 0;
4798 }
4799 
4800 void
flow_divert_init(void)4801 flow_divert_init(void)
4802 {
4803 	memset(&nil_pcb, 0, sizeof(nil_pcb));
4804 	nil_pcb.log_level = LOG_NOTICE;
4805 
4806 	g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4807 
4808 	VERIFY(g_tcp_protosw != NULL);
4809 
4810 	memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4811 	memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4812 
4813 	g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4814 	g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4815 	g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4816 	g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4817 	g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4818 	g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4819 	g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4820 	g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4821 
4822 	g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4823 	g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4824 
4825 	/*
4826 	 * Socket filters shouldn't attach/detach to/from this protosw
4827 	 * since pr_protosw is to be used instead, which points to the
4828 	 * real protocol; if they do, it is a bug and we should panic.
4829 	 */
4830 	g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4831 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4832 	g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4833 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4834 
4835 	/* UDP */
4836 	g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4837 	VERIFY(g_udp_protosw != NULL);
4838 
4839 	memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4840 	memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4841 
4842 	g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4843 	g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4844 	g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4845 	g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4846 	g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4847 	g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4848 	g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4849 	g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4850 	g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4851 
4852 	g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4853 	g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4854 
4855 	/*
4856 	 * Socket filters shouldn't attach/detach to/from this protosw
4857 	 * since pr_protosw is to be used instead, which points to the
4858 	 * real protocol; if they do, it is a bug and we should panic.
4859 	 */
4860 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4861 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4862 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4863 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4864 
4865 	g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4866 
4867 	VERIFY(g_tcp6_protosw != NULL);
4868 
4869 	memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4870 	memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4871 
4872 	g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4873 	g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4874 	g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4875 	g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4876 	g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4877 	g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4878 	g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4879 	g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4880 
4881 	g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4882 	g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4883 	/*
4884 	 * Socket filters shouldn't attach/detach to/from this protosw
4885 	 * since pr_protosw is to be used instead, which points to the
4886 	 * real protocol; if they do, it is a bug and we should panic.
4887 	 */
4888 	g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4889 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4890 	g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4891 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4892 
4893 	/* UDP6 */
4894 	g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4895 
4896 	VERIFY(g_udp6_protosw != NULL);
4897 
4898 	memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4899 	memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4900 
4901 	g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4902 	g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4903 	g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4904 	g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4905 	g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4906 	g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4907 	g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4908 	g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4909 	g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4910 
4911 	g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4912 	g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4913 	/*
4914 	 * Socket filters shouldn't attach/detach to/from this protosw
4915 	 * since pr_protosw is to be used instead, which points to the
4916 	 * real protocol; if they do, it is a bug and we should panic.
4917 	 */
4918 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4919 	    __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4920 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4921 	    __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4922 
4923 	TAILQ_INIT(&g_flow_divert_in_process_group_list);
4924 
4925 	g_init_result = flow_divert_kctl_init();
4926 	if (g_init_result) {
4927 		goto done;
4928 	}
4929 
4930 done:
4931 	if (g_init_result != 0) {
4932 		if (g_flow_divert_kctl_ref != NULL) {
4933 			ctl_deregister(g_flow_divert_kctl_ref);
4934 			g_flow_divert_kctl_ref = NULL;
4935 		}
4936 	}
4937 }
4938