xref: /xnu-10002.61.3/bsd/netinet/flow_divert.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2012-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <sys/file_internal.h>
46 #include <sys/kauth.h>
47 #include <libkern/tree.h>
48 #include <kern/locks.h>
49 #include <kern/debug.h>
50 #include <kern/task.h>
51 #include <mach/task_info.h>
52 #include <net/if_var.h>
53 #include <net/route.h>
54 #include <net/flowhash.h>
55 #include <net/ntstat.h>
56 #include <net/content_filter.h>
57 #include <net/necp.h>
58 #include <netinet/in.h>
59 #include <netinet/in_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/flow_divert.h>
64 #include <netinet/flow_divert_proto.h>
65 #include <netinet6/in6_pcb.h>
66 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
68 #include <libkern/crypto/sha1.h>
69 #include <libkern/crypto/crypto_internal.h>
70 #include <os/log.h>
71 #include <corecrypto/cc.h>
72 #if CONTENT_FILTER
73 #include <net/content_filter.h>
74 #endif /* CONTENT_FILTER */
75 
76 #define FLOW_DIVERT_CONNECT_STARTED             0x00000001
77 #define FLOW_DIVERT_READ_CLOSED                 0x00000002
78 #define FLOW_DIVERT_WRITE_CLOSED                0x00000004
79 #define FLOW_DIVERT_TUNNEL_RD_CLOSED    0x00000008
80 #define FLOW_DIVERT_TUNNEL_WR_CLOSED    0x00000010
81 #define FLOW_DIVERT_HAS_HMAC            0x00000040
82 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED  0x00000080
83 #define FLOW_DIVERT_IMPLICIT_CONNECT    0x00000100
84 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR  0x00000200
85 #define FLOW_DIVERT_HAS_TOKEN           0x00000400
86 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR 0x00000800
87 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT   0x00001000
88 
89 #define FDLOG(level, pcb, format, ...) \
90 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
91 
92 #define FDLOG0(level, pcb, msg) \
93 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
94 
95 #define FDRETAIN(pcb)                   if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
96 #define FDRELEASE(pcb)                                                                                                          \
97 	do {                                                                                                                                    \
98 	        if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) {       \
99 	                flow_divert_pcb_destroy(pcb);                                                                   \
100 	        }                                                                                                                                       \
101 	} while (0)
102 
103 #define FDGRP_RETAIN(grp)  if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
104 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
105 
106 #define FDLOCK(pcb)                                             lck_mtx_lock(&(pcb)->mtx)
107 #define FDUNLOCK(pcb)                                   lck_mtx_unlock(&(pcb)->mtx)
108 
109 #define FD_CTL_SENDBUFF_SIZE                    (128 * 1024)
110 
111 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED   0
112 
113 #define GROUP_COUNT_MAX                                 31
114 #define FLOW_DIVERT_MAX_NAME_SIZE               4096
115 #define FLOW_DIVERT_MAX_KEY_SIZE                1024
116 #define FLOW_DIVERT_MAX_TRIE_MEMORY             (1024 * 1024)
117 
118 struct flow_divert_trie_node {
119 	uint16_t start;
120 	uint16_t length;
121 	uint16_t child_map;
122 };
123 
124 #define CHILD_MAP_SIZE                  256
125 #define NULL_TRIE_IDX                   0xffff
126 #define TRIE_NODE(t, i)                 ((t)->nodes[(i)])
127 #define TRIE_CHILD(t, i, b)             (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
128 #define TRIE_BYTE(t, i)                 ((t)->bytes[(i)])
129 
130 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
131 
132 static struct flow_divert_pcb           nil_pcb;
133 
134 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
135 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
136 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
137     &flow_divert_mtx_attr);
138 
139 static TAILQ_HEAD(_flow_divert_group_list, flow_divert_group) g_flow_divert_in_process_group_list;
140 
141 static struct flow_divert_group         **g_flow_divert_groups  = NULL;
142 static uint32_t                         g_active_group_count    = 0;
143 
144 static  errno_t                         g_init_result           = 0;
145 
146 static  kern_ctl_ref                    g_flow_divert_kctl_ref  = NULL;
147 
148 static struct protosw                   g_flow_divert_in_protosw;
149 static struct pr_usrreqs                g_flow_divert_in_usrreqs;
150 static struct protosw                   g_flow_divert_in_udp_protosw;
151 static struct pr_usrreqs                g_flow_divert_in_udp_usrreqs;
152 static struct ip6protosw                g_flow_divert_in6_protosw;
153 static struct pr_usrreqs                g_flow_divert_in6_usrreqs;
154 static struct ip6protosw                g_flow_divert_in6_udp_protosw;
155 static struct pr_usrreqs                g_flow_divert_in6_udp_usrreqs;
156 
157 static struct protosw                   *g_tcp_protosw          = NULL;
158 static struct ip6protosw                *g_tcp6_protosw         = NULL;
159 static struct protosw                   *g_udp_protosw          = NULL;
160 static struct ip6protosw                *g_udp6_protosw         = NULL;
161 
162 static KALLOC_TYPE_DEFINE(flow_divert_group_zone, struct flow_divert_group,
163     NET_KT_DEFAULT);
164 static KALLOC_TYPE_DEFINE(flow_divert_pcb_zone, struct flow_divert_pcb,
165     NET_KT_DEFAULT);
166 
167 static errno_t
168 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
169 
170 static boolean_t
171 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
172 
173 static int
174 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr);
175 
176 struct sockaddr *
177 flow_divert_get_buffered_target_address(mbuf_t buffer);
178 
179 static void
180 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed);
181 
182 static void flow_divert_group_destroy(struct flow_divert_group *group);
183 
184 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)185 flow_divert_syslog_type_to_oslog_type(int syslog_type)
186 {
187 	switch (syslog_type) {
188 	case LOG_ERR: return OS_LOG_TYPE_ERROR;
189 	case LOG_INFO: return OS_LOG_TYPE_INFO;
190 	case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
191 	default: return OS_LOG_TYPE_DEFAULT;
192 	}
193 }
194 
195 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)196 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
197 {
198 	return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
199 }
200 
201 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
202 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
203 
204 static const char *
flow_divert_packet_type2str(uint8_t packet_type)205 flow_divert_packet_type2str(uint8_t packet_type)
206 {
207 	switch (packet_type) {
208 	case FLOW_DIVERT_PKT_CONNECT:
209 		return "connect";
210 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
211 		return "connect result";
212 	case FLOW_DIVERT_PKT_DATA:
213 		return "data";
214 	case FLOW_DIVERT_PKT_CLOSE:
215 		return "close";
216 	case FLOW_DIVERT_PKT_READ_NOTIFY:
217 		return "read notification";
218 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
219 		return "properties update";
220 	case FLOW_DIVERT_PKT_APP_MAP_CREATE:
221 		return "app map create";
222 	default:
223 		return "unknown";
224 	}
225 }
226 
227 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)228 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
229 {
230 	struct flow_divert_pcb  key_item;
231 	struct flow_divert_pcb  *fd_cb          = NULL;
232 
233 	key_item.hash = hash;
234 
235 	lck_rw_lock_shared(&group->lck);
236 	fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
237 	FDRETAIN(fd_cb);
238 	lck_rw_done(&group->lck);
239 
240 	return fd_cb;
241 }
242 
243 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)244 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
245 {
246 	struct flow_divert_group *group = NULL;
247 	lck_rw_lock_shared(&g_flow_divert_group_lck);
248 	if (g_active_group_count == 0) {
249 		if (fd_cb != NULL) {
250 			FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
251 		}
252 	} else if (ctl_unit == 0 || (ctl_unit >= GROUP_COUNT_MAX && ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
253 		FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
254 	} else if (ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
255 		if (g_flow_divert_groups == NULL) {
256 			if (fd_cb != NULL) {
257 				FDLOG0(LOG_ERR, fd_cb, "No active non-in-process groups, flow divert cannot be used for this socket");
258 			}
259 		} else {
260 			group = g_flow_divert_groups[ctl_unit];
261 			if (group == NULL) {
262 				if (fd_cb != NULL) {
263 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
264 				}
265 			} else {
266 				FDGRP_RETAIN(group);
267 			}
268 		}
269 	} else {
270 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
271 			if (fd_cb != NULL) {
272 				FDLOG0(LOG_ERR, fd_cb, "No active in-process groups, flow divert cannot be used for this socket");
273 			}
274 		} else {
275 			struct flow_divert_group *group_cursor = NULL;
276 			TAILQ_FOREACH(group_cursor, &g_flow_divert_in_process_group_list, chain) {
277 				if (group_cursor->ctl_unit == ctl_unit) {
278 					group = group_cursor;
279 					break;
280 				}
281 			}
282 			if (group == NULL) {
283 				if (fd_cb != NULL) {
284 					FDLOG(LOG_ERR, fd_cb, "Group for control unit %u not found, flow divert cannot be used for this socket", ctl_unit);
285 				}
286 			} else if (fd_cb != NULL &&
287 			    (fd_cb->so == NULL ||
288 			    group_cursor->in_process_pid != fd_cb->so->last_pid)) {
289 				FDLOG(LOG_ERR, fd_cb, "Cannot access group for control unit %u, mismatched PID (%u != %u)",
290 				    ctl_unit, group_cursor->in_process_pid, fd_cb->so ? fd_cb->so->last_pid : 0);
291 			} else {
292 				FDGRP_RETAIN(group);
293 			}
294 		}
295 	}
296 	lck_rw_done(&g_flow_divert_group_lck);
297 	return group;
298 }
299 
300 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)301 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
302 {
303 	int error = 0;
304 	lck_rw_lock_exclusive(&group->lck);
305 	if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
306 		if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
307 			fd_cb->group = group;
308 			fd_cb->control_group_unit = group->ctl_unit;
309 			FDRETAIN(fd_cb); /* The group now has a reference */
310 		} else {
311 			FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
312 			error = EEXIST;
313 		}
314 	} else {
315 		FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
316 		error = ENOENT;
317 	}
318 	lck_rw_done(&group->lck);
319 	return error;
320 }
321 
322 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)323 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
324 {
325 	errno_t error = 0;
326 	struct flow_divert_group *group = NULL;
327 	static uint32_t g_nextkey = 1;
328 	static uint32_t g_hash_seed = 0;
329 	int try_count = 0;
330 
331 	group = flow_divert_group_lookup(ctl_unit, fd_cb);
332 	if (group == NULL) {
333 		return ENOENT;
334 	}
335 
336 	do {
337 		uint32_t key[2];
338 		uint32_t idx;
339 
340 		key[0] = g_nextkey++;
341 		key[1] = RandomULong();
342 
343 		if (g_hash_seed == 0) {
344 			g_hash_seed = RandomULong();
345 		}
346 
347 		error = 0;
348 		fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
349 
350 		for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
351 			if (idx == ctl_unit) {
352 				continue;
353 			}
354 			struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
355 			if (curr_group != NULL) {
356 				lck_rw_lock_shared(&curr_group->lck);
357 				if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
358 					error = EEXIST;
359 				}
360 				lck_rw_done(&curr_group->lck);
361 				FDGRP_RELEASE(curr_group);
362 			}
363 		}
364 
365 		if (error == 0) {
366 			error = flow_divert_pcb_insert(fd_cb, group);
367 		}
368 	} while (error == EEXIST && try_count++ < 3);
369 
370 	if (error == EEXIST) {
371 		FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
372 		fd_cb->hash = 0;
373 	}
374 
375 	FDGRP_RELEASE(group);
376 	return error;
377 }
378 
379 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)380 flow_divert_pcb_create(socket_t so)
381 {
382 	struct flow_divert_pcb  *new_pcb = NULL;
383 
384 	new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
385 	lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
386 	new_pcb->so = so;
387 	new_pcb->log_level = nil_pcb.log_level;
388 
389 	FDRETAIN(new_pcb);      /* Represents the socket's reference */
390 
391 	return new_pcb;
392 }
393 
394 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)395 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
396 {
397 	FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %u, tunnel tx %u, tunnel rx %u",
398 	    fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
399 
400 	if (fd_cb->connect_token != NULL) {
401 		mbuf_freem(fd_cb->connect_token);
402 	}
403 	if (fd_cb->connect_packet != NULL) {
404 		mbuf_freem(fd_cb->connect_packet);
405 	}
406 	if (fd_cb->app_data != NULL) {
407 		kfree_data(fd_cb->app_data, fd_cb->app_data_length);
408 	}
409 	if (fd_cb->original_remote_endpoint != NULL) {
410 		free_sockaddr(fd_cb->original_remote_endpoint);
411 	}
412 	zfree(flow_divert_pcb_zone, fd_cb);
413 }
414 
415 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)416 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
417 {
418 	if (fd_cb->group != NULL) {
419 		struct flow_divert_group *group = fd_cb->group;
420 		lck_rw_lock_exclusive(&group->lck);
421 		FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
422 		RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
423 		fd_cb->group = NULL;
424 		FDRELEASE(fd_cb);                               /* Release the group's reference */
425 		lck_rw_done(&group->lck);
426 	}
427 }
428 
429 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_t * packet)430 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_t *packet)
431 {
432 	struct flow_divert_packet_header        hdr;
433 	int                                     error           = 0;
434 
435 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
436 	if (error) {
437 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
438 		return error;
439 	}
440 
441 	hdr.packet_type = packet_type;
442 	hdr.conn_id = htonl(fd_cb->hash);
443 
444 	/* Lay down the header */
445 	error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
446 	if (error) {
447 		FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
448 		mbuf_freem(*packet);
449 		*packet = NULL;
450 		return error;
451 	}
452 
453 	return 0;
454 }
455 
456 static int
flow_divert_packet_append_tlv(mbuf_t packet,uint8_t type,uint32_t length,const void * value)457 flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, uint32_t length, const void *value)
458 {
459 	uint32_t        net_length      = htonl(length);
460 	int                     error           = 0;
461 
462 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
463 	if (error) {
464 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
465 		return error;
466 	}
467 
468 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
469 	if (error) {
470 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
471 		return error;
472 	}
473 
474 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
475 	if (error) {
476 		FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
477 		return error;
478 	}
479 
480 	return error;
481 }
482 
483 static int
flow_divert_packet_find_tlv(mbuf_t packet,int offset,uint8_t type,int * err,int next)484 flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, int next)
485 {
486 	size_t          cursor                  = offset;
487 	int                     error                   = 0;
488 	uint32_t        curr_length;
489 	uint8_t         curr_type;
490 
491 	*err = 0;
492 
493 	do {
494 		if (!next) {
495 			error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
496 			if (error) {
497 				*err = ENOENT;
498 				return -1;
499 			}
500 		} else {
501 			next = 0;
502 			curr_type = FLOW_DIVERT_TLV_NIL;
503 		}
504 
505 		if (curr_type != type) {
506 			cursor += sizeof(curr_type);
507 			error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
508 			if (error) {
509 				*err = error;
510 				return -1;
511 			}
512 
513 			cursor += (sizeof(curr_length) + ntohl(curr_length));
514 		}
515 	} while (curr_type != type);
516 
517 	return (int)cursor;
518 }
519 
520 static int
flow_divert_packet_get_tlv(mbuf_t packet,int offset,uint8_t type,size_t buff_len,void * buff,uint32_t * val_size)521 flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, uint32_t *val_size)
522 {
523 	int                     error           = 0;
524 	uint32_t        length;
525 	int                     tlv_offset;
526 
527 	tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
528 	if (tlv_offset < 0) {
529 		return error;
530 	}
531 
532 	error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
533 	if (error) {
534 		return error;
535 	}
536 
537 	length = ntohl(length);
538 
539 	uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
540 
541 	if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
542 		FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
543 		return EINVAL;
544 	}
545 
546 	if (val_size != NULL) {
547 		*val_size = length;
548 	}
549 
550 	if (buff != NULL && buff_len > 0) {
551 		memset(buff, 0, buff_len);
552 		size_t to_copy = (length < buff_len) ? length : buff_len;
553 		error = mbuf_copydata(packet, data_offset, to_copy, buff);
554 		if (error) {
555 			return error;
556 		}
557 	}
558 
559 	return 0;
560 }
561 
562 static int
flow_divert_packet_compute_hmac(mbuf_t packet,struct flow_divert_group * group,uint8_t * hmac)563 flow_divert_packet_compute_hmac(mbuf_t packet, struct flow_divert_group *group, uint8_t *hmac)
564 {
565 	mbuf_t  curr_mbuf       = packet;
566 
567 	if (g_crypto_funcs == NULL || group->token_key == NULL) {
568 		return ENOPROTOOPT;
569 	}
570 
571 	cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
572 	g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
573 
574 	while (curr_mbuf != NULL) {
575 		g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf));
576 		curr_mbuf = mbuf_next(curr_mbuf);
577 	}
578 
579 	g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
580 
581 	return 0;
582 }
583 
584 static int
flow_divert_packet_verify_hmac(mbuf_t packet,uint32_t ctl_unit)585 flow_divert_packet_verify_hmac(mbuf_t packet, uint32_t ctl_unit)
586 {
587 	int error = 0;
588 	struct flow_divert_group *group = NULL;
589 	int hmac_offset;
590 	uint8_t packet_hmac[SHA_DIGEST_LENGTH];
591 	uint8_t computed_hmac[SHA_DIGEST_LENGTH];
592 	mbuf_t tail;
593 
594 	group = flow_divert_group_lookup(ctl_unit, NULL);
595 	if (group == NULL) {
596 		FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
597 		return ENOPROTOOPT;
598 	}
599 
600 	lck_rw_lock_shared(&group->lck);
601 
602 	if (group->token_key == NULL) {
603 		error = ENOPROTOOPT;
604 		goto done;
605 	}
606 
607 	hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
608 	if (hmac_offset < 0) {
609 		goto done;
610 	}
611 
612 	error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
613 	if (error) {
614 		goto done;
615 	}
616 
617 	/* Chop off the HMAC TLV */
618 	error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
619 	if (error) {
620 		goto done;
621 	}
622 
623 	mbuf_free(tail);
624 
625 	error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
626 	if (error) {
627 		goto done;
628 	}
629 
630 	if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
631 		FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
632 		error = EINVAL;
633 		goto done;
634 	}
635 
636 done:
637 	if (group != NULL) {
638 		lck_rw_done(&group->lck);
639 		FDGRP_RELEASE(group);
640 	}
641 	return error;
642 }
643 
644 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)645 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
646 {
647 	struct inpcb *inp = NULL;
648 	struct ifnet *ifp = NULL;
649 	Boolean cell = FALSE;
650 	Boolean wifi = FALSE;
651 	Boolean wired = FALSE;
652 
653 	inp = sotoinpcb(fd_cb->so);
654 	if (inp == NULL) {
655 		return;
656 	}
657 
658 	if (inp->inp_vflag & INP_IPV4) {
659 		ifp = inp->inp_last_outifp;
660 	} else if (inp->inp_vflag & INP_IPV6) {
661 		ifp = inp->in6p_last_outifp;
662 	}
663 	if (ifp != NULL) {
664 		cell = IFNET_IS_CELLULAR(ifp);
665 		wifi = (!cell && IFNET_IS_WIFI(ifp));
666 		wired = (!wifi && IFNET_IS_WIRED(ifp));
667 	}
668 
669 	if (send) {
670 		INP_ADD_STAT(inp, cell, wifi, wired, txpackets, 1);
671 		INP_ADD_STAT(inp, cell, wifi, wired, txbytes, data_len);
672 	} else {
673 		INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1);
674 		INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, data_len);
675 	}
676 	inp_set_activity_bitmap(inp);
677 }
678 
679 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)680 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
681 {
682 	struct inpcb *inp = sotoinpcb(fd_cb->so);
683 	if (INP_NO_CELLULAR(inp)) {
684 		struct ifnet *ifp = NULL;
685 		if (inp->inp_vflag & INP_IPV4) {
686 			ifp = inp->inp_last_outifp;
687 		} else if (inp->inp_vflag & INP_IPV6) {
688 			ifp = inp->in6p_last_outifp;
689 		}
690 		if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
691 			FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
692 			return EHOSTUNREACH;
693 		}
694 	}
695 	return 0;
696 }
697 
698 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)699 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
700 {
701 	struct inpcb *inp = sotoinpcb(fd_cb->so);
702 	if (INP_NO_EXPENSIVE(inp)) {
703 		struct ifnet *ifp = NULL;
704 		if (inp->inp_vflag & INP_IPV4) {
705 			ifp = inp->inp_last_outifp;
706 		} else if (inp->inp_vflag & INP_IPV6) {
707 			ifp = inp->in6p_last_outifp;
708 		}
709 		if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
710 			FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
711 			return EHOSTUNREACH;
712 		}
713 	}
714 	return 0;
715 }
716 
717 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)718 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
719 {
720 	struct inpcb *inp = sotoinpcb(fd_cb->so);
721 	if (INP_NO_CONSTRAINED(inp)) {
722 		struct ifnet *ifp = NULL;
723 		if (inp->inp_vflag & INP_IPV4) {
724 			ifp = inp->inp_last_outifp;
725 		} else if (inp->inp_vflag & INP_IPV6) {
726 			ifp = inp->in6p_last_outifp;
727 		}
728 		if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
729 			FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
730 			return EHOSTUNREACH;
731 		}
732 	}
733 	return 0;
734 }
735 
736 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,bool tunnel,bool flush_snd)737 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, bool tunnel, bool flush_snd)
738 {
739 	if (how != SHUT_RD) {
740 		fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
741 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
742 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
743 			if (flush_snd) {
744 				/* If the tunnel is not accepting writes any more, then flush the send buffer */
745 				sbflush(&fd_cb->so->so_snd);
746 			}
747 		}
748 	}
749 	if (how != SHUT_WR) {
750 		fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
751 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
752 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
753 		}
754 	}
755 }
756 
757 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)758 trie_node_alloc(struct flow_divert_trie *trie)
759 {
760 	if (trie->nodes_free_next < trie->nodes_count) {
761 		uint16_t node_idx = trie->nodes_free_next++;
762 		TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
763 		return node_idx;
764 	} else {
765 		return NULL_TRIE_IDX;
766 	}
767 }
768 
769 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)770 trie_child_map_alloc(struct flow_divert_trie *trie)
771 {
772 	if (trie->child_maps_free_next < trie->child_maps_count) {
773 		return trie->child_maps_free_next++;
774 	} else {
775 		return NULL_TRIE_IDX;
776 	}
777 }
778 
779 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)780 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
781 {
782 	uint16_t start = trie->bytes_free_next;
783 	if (start + bytes_size <= trie->bytes_count) {
784 		if (start != bytes_idx) {
785 			memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
786 		}
787 		trie->bytes_free_next += bytes_size;
788 		return start;
789 	} else {
790 		return NULL_TRIE_IDX;
791 	}
792 }
793 
794 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)795 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
796 {
797 	uint16_t current = trie->root;
798 	uint16_t child = trie->root;
799 	uint16_t string_end = string_start + (uint16_t)string_len;
800 	uint16_t string_idx = string_start;
801 	uint16_t string_remainder = (uint16_t)string_len;
802 
803 	while (child != NULL_TRIE_IDX) {
804 		uint16_t parent = current;
805 		uint16_t node_idx;
806 		uint16_t current_end;
807 
808 		current = child;
809 		child = NULL_TRIE_IDX;
810 
811 		current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
812 
813 		for (node_idx = TRIE_NODE(trie, current).start;
814 		    node_idx < current_end &&
815 		    string_idx < string_end &&
816 		    TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
817 		    node_idx++, string_idx++) {
818 			;
819 		}
820 
821 		string_remainder = string_end - string_idx;
822 
823 		if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
824 			/*
825 			 * We did not reach the end of the current node's string.
826 			 * We need to split the current node into two:
827 			 *   1. A new node that contains the prefix of the node that matches
828 			 *      the prefix of the string being inserted.
829 			 *   2. The current node modified to point to the remainder
830 			 *      of the current node's string.
831 			 */
832 			uint16_t prefix = trie_node_alloc(trie);
833 			if (prefix == NULL_TRIE_IDX) {
834 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
835 				return NULL_TRIE_IDX;
836 			}
837 
838 			/*
839 			 * Prefix points to the portion of the current nodes's string that has matched
840 			 * the input string thus far.
841 			 */
842 			TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
843 			TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
844 
845 			/*
846 			 * Prefix has the current node as the child corresponding to the first byte
847 			 * after the split.
848 			 */
849 			TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
850 			if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
851 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
852 				return NULL_TRIE_IDX;
853 			}
854 			TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
855 
856 			/* Parent has the prefix as the child correspoding to the first byte in the prefix */
857 			TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
858 
859 			/* Current node is adjusted to point to the remainder */
860 			TRIE_NODE(trie, current).start = node_idx;
861 			TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
862 
863 			/* We want to insert the new leaf (if any) as a child of the prefix */
864 			current = prefix;
865 		}
866 
867 		if (string_remainder > 0) {
868 			/*
869 			 * We still have bytes in the string that have not been matched yet.
870 			 * If the current node has children, iterate to the child corresponding
871 			 * to the next byte in the string.
872 			 */
873 			if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
874 				child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
875 			}
876 		}
877 	} /* while (child != NULL_TRIE_IDX) */
878 
879 	if (string_remainder > 0) {
880 		/* Add a new leaf containing the remainder of the string */
881 		uint16_t leaf = trie_node_alloc(trie);
882 		if (leaf == NULL_TRIE_IDX) {
883 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
884 			return NULL_TRIE_IDX;
885 		}
886 
887 		TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
888 		if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
889 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
890 			return NULL_TRIE_IDX;
891 		}
892 		TRIE_NODE(trie, leaf).length = string_remainder;
893 
894 		/* Set the new leaf as the child of the current node */
895 		if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
896 			TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
897 			if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
898 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
899 				return NULL_TRIE_IDX;
900 			}
901 		}
902 		TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
903 		current = leaf;
904 	} /* else duplicate or this string is a prefix of one of the existing strings */
905 
906 	return current;
907 }
908 
909 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
910 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes)911 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes)
912 {
913 	uint16_t current = trie->root;
914 	uint16_t string_idx = 0;
915 
916 	while (current != NULL_TRIE_IDX) {
917 		uint16_t next = NULL_TRIE_IDX;
918 		uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
919 		uint16_t node_idx;
920 
921 		for (node_idx = TRIE_NODE(trie, current).start;
922 		    node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
923 		    node_idx++, string_idx++) {
924 			;
925 		}
926 
927 		if (node_idx == node_end) {
928 			if (string_bytes[string_idx] == '\0') {
929 				return current; /* Got an exact match */
930 			} else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
931 			    0 == strncmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
932 				return current; /* Got an apple webclip id prefix match */
933 			} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
934 				next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
935 			}
936 		}
937 		current = next;
938 	}
939 
940 	return NULL_TRIE_IDX;
941 }
942 
943 struct uuid_search_info {
944 	uuid_t target_uuid;
945 	char *found_signing_id;
946 	boolean_t found_multiple_signing_ids;
947 	proc_t found_proc;
948 };
949 
950 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)951 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
952 {
953 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
954 	int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
955 
956 	if (info->found_signing_id != NULL) {
957 		if (!info->found_multiple_signing_ids) {
958 			/* All processes that were found had the same signing identifier, so just claim this first one and be done. */
959 			info->found_proc = p;
960 			result = PROC_CLAIMED_DONE;
961 		} else {
962 			uuid_string_t uuid_str;
963 			uuid_unparse(info->target_uuid, uuid_str);
964 			FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
965 		}
966 		kfree_data(info->found_signing_id, strlen(info->found_signing_id) + 1);
967 		info->found_signing_id = NULL;
968 	}
969 
970 	if (result == PROC_RETURNED_DONE) {
971 		uuid_string_t uuid_str;
972 		uuid_unparse(info->target_uuid, uuid_str);
973 		FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
974 	}
975 
976 	return result;
977 }
978 
979 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)980 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
981 {
982 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
983 	int include = 0;
984 
985 	if (info->found_multiple_signing_ids) {
986 		return include;
987 	}
988 
989 	include = (uuid_compare(proc_executableuuid_addr(p), info->target_uuid) == 0);
990 	if (include) {
991 		const char *signing_id = cs_identity_get(p);
992 		if (signing_id != NULL) {
993 			FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
994 			size_t signing_id_size = strlen(signing_id) + 1;
995 			if (info->found_signing_id == NULL) {
996 				info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
997 				memcpy(info->found_signing_id, signing_id, signing_id_size);
998 			} else if (memcmp(signing_id, info->found_signing_id, signing_id_size)) {
999 				info->found_multiple_signing_ids = TRUE;
1000 			}
1001 		} else {
1002 			info->found_multiple_signing_ids = TRUE;
1003 		}
1004 		include = !info->found_multiple_signing_ids;
1005 	}
1006 
1007 	return include;
1008 }
1009 
1010 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)1011 flow_divert_find_proc_by_uuid(uuid_t uuid)
1012 {
1013 	struct uuid_search_info info;
1014 
1015 	if (LOG_INFO <= nil_pcb.log_level) {
1016 		uuid_string_t uuid_str;
1017 		uuid_unparse(uuid, uuid_str);
1018 		FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
1019 	}
1020 
1021 	memset(&info, 0, sizeof(info));
1022 	info.found_proc = PROC_NULL;
1023 	uuid_copy(info.target_uuid, uuid);
1024 
1025 	proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
1026 
1027 	return info.found_proc;
1028 }
1029 
1030 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id,mbuf_t connect_packet,bool is_effective)1031 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id, mbuf_t connect_packet, bool is_effective)
1032 {
1033 	int error = 0;
1034 	uint8_t *cdhash = NULL;
1035 	audit_token_t audit_token = {};
1036 	const char *proc_cs_id = signing_id;
1037 
1038 	proc_lock(proc);
1039 
1040 	if (proc_cs_id == NULL) {
1041 		if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1042 			proc_cs_id = cs_identity_get(proc);
1043 		} else {
1044 			FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1045 		}
1046 	}
1047 
1048 	if (is_effective) {
1049 		lck_rw_lock_shared(&fd_cb->group->lck);
1050 		if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1051 			if (proc_cs_id != NULL) {
1052 				uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)proc_cs_id);
1053 				if (result == NULL_TRIE_IDX) {
1054 					FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1055 					error = EPERM;
1056 				} else {
1057 					FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1058 				}
1059 			} else {
1060 				error = EPERM;
1061 			}
1062 		}
1063 		lck_rw_done(&fd_cb->group->lck);
1064 	}
1065 
1066 	if (error != 0) {
1067 		goto done;
1068 	}
1069 
1070 	/*
1071 	 * If signing_id is not NULL then it came from the flow divert token and will be added
1072 	 * as part of the token, so there is no need to add it here.
1073 	 */
1074 	if (signing_id == NULL && proc_cs_id != NULL) {
1075 		error = flow_divert_packet_append_tlv(connect_packet,
1076 		    (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1077 		    (uint32_t)strlen(proc_cs_id),
1078 		    proc_cs_id);
1079 		if (error != 0) {
1080 			FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1081 			goto done;
1082 		}
1083 	}
1084 
1085 	cdhash = cs_get_cdhash(proc);
1086 	if (cdhash != NULL) {
1087 		error = flow_divert_packet_append_tlv(connect_packet,
1088 		    (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1089 		    SHA1_RESULTLEN,
1090 		    cdhash);
1091 		if (error) {
1092 			FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1093 			goto done;
1094 		}
1095 	} else {
1096 		FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1097 	}
1098 
1099 	task_t task = proc_task(proc);
1100 	if (task != TASK_NULL) {
1101 		mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1102 		kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1103 		if (rc == KERN_SUCCESS) {
1104 			int append_error = flow_divert_packet_append_tlv(connect_packet,
1105 			    (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1106 			    sizeof(audit_token_t),
1107 			    &audit_token);
1108 			if (append_error) {
1109 				FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1110 			}
1111 		}
1112 	}
1113 
1114 done:
1115 	proc_unlock(proc);
1116 
1117 	return error;
1118 }
1119 
1120 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id,mbuf_t connect_packet)1121 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id, mbuf_t connect_packet)
1122 {
1123 	int error = 0;
1124 	proc_t effective_proc = PROC_NULL;
1125 	proc_t responsible_proc = PROC_NULL;
1126 	proc_t real_proc = proc_find(so->last_pid);
1127 	bool release_real_proc = true;
1128 
1129 	proc_t src_proc = PROC_NULL;
1130 	proc_t real_src_proc = PROC_NULL;
1131 
1132 	if (real_proc == PROC_NULL) {
1133 		FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1134 		release_real_proc = false;
1135 		real_proc = proc;
1136 		if (real_proc == PROC_NULL) {
1137 			real_proc = current_proc();
1138 		}
1139 	}
1140 
1141 	if (so->so_flags & SOF_DELEGATED) {
1142 		if (proc_getpid(real_proc) != so->e_pid) {
1143 			effective_proc = proc_find(so->e_pid);
1144 		} else if (uuid_compare(proc_executableuuid_addr(real_proc), so->e_uuid)) {
1145 			effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1146 		}
1147 	}
1148 
1149 #if defined(XNU_TARGET_OS_OSX)
1150 	lck_rw_lock_shared(&fd_cb->group->lck);
1151 	if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1152 		if (so->so_rpid > 0) {
1153 			responsible_proc = proc_find(so->so_rpid);
1154 		}
1155 	}
1156 	lck_rw_done(&fd_cb->group->lck);
1157 #endif
1158 
1159 	real_src_proc = real_proc;
1160 
1161 	if (responsible_proc != PROC_NULL) {
1162 		src_proc = responsible_proc;
1163 		if (effective_proc != NULL) {
1164 			real_src_proc = effective_proc;
1165 		}
1166 	} else if (effective_proc != PROC_NULL) {
1167 		src_proc = effective_proc;
1168 	} else {
1169 		src_proc = real_proc;
1170 	}
1171 
1172 	error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1173 	if (error != 0) {
1174 		goto done;
1175 	}
1176 
1177 	if (real_src_proc != NULL && real_src_proc != src_proc) {
1178 		error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1179 		if (error != 0) {
1180 			goto done;
1181 		}
1182 	}
1183 
1184 done:
1185 	if (responsible_proc != PROC_NULL) {
1186 		proc_rele(responsible_proc);
1187 	}
1188 
1189 	if (effective_proc != PROC_NULL) {
1190 		proc_rele(effective_proc);
1191 	}
1192 
1193 	if (real_proc != PROC_NULL && release_real_proc) {
1194 		proc_rele(real_proc);
1195 	}
1196 
1197 	return error;
1198 }
1199 
1200 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_t packet)1201 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet)
1202 {
1203 	int             error;
1204 
1205 	if (fd_cb->group == NULL) {
1206 		FDLOG0(LOG_INFO, fd_cb, "no provider, cannot send packet");
1207 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, false);
1208 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1209 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1210 			error = ECONNABORTED;
1211 		} else {
1212 			error = EHOSTUNREACH;
1213 		}
1214 		fd_cb->so->so_error = (uint16_t)error;
1215 		return error;
1216 	}
1217 
1218 	lck_rw_lock_shared(&fd_cb->group->lck);
1219 
1220 	if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1221 		error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1222 		if (error) {
1223 			FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_send_packet: ctl_enqueuembuf returned an error: %d", error);
1224 		}
1225 	} else {
1226 		error = ENOBUFS;
1227 	}
1228 
1229 	if (error == ENOBUFS) {
1230 		if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1231 			lck_rw_lock_exclusive(&fd_cb->group->lck);
1232 		}
1233 		MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1234 		error = 0;
1235 		OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1236 	}
1237 
1238 	lck_rw_done(&fd_cb->group->lck);
1239 
1240 	return error;
1241 }
1242 
1243 static void
flow_divert_append_domain_name(char * domain_name,void * ctx)1244 flow_divert_append_domain_name(char *domain_name, void *ctx)
1245 {
1246 	mbuf_t packet = (mbuf_t)ctx;
1247 	size_t domain_name_length = 0;
1248 
1249 	if (packet == NULL || domain_name == NULL) {
1250 		return;
1251 	}
1252 
1253 	domain_name_length = strlen(domain_name);
1254 	if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1255 		int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, domain_name);
1256 		if (error) {
1257 			FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1258 		}
1259 	}
1260 }
1261 
1262 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_t * out_connect_packet)1263 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_t *out_connect_packet)
1264 {
1265 	int                     error                   = 0;
1266 	int                     flow_type               = 0;
1267 	char                    *signing_id = NULL;
1268 	uint32_t                sid_size = 0;
1269 	mbuf_t                  connect_packet = NULL;
1270 	cfil_sock_id_t          cfil_sock_id            = CFIL_SOCK_ID_NONE;
1271 	const void              *cfil_id                = NULL;
1272 	size_t                  cfil_id_size            = 0;
1273 	struct inpcb            *inp = sotoinpcb(so);
1274 	struct ifnet *ifp = NULL;
1275 	uint32_t flags = 0;
1276 
1277 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1278 	if (error) {
1279 		goto done;
1280 	}
1281 
1282 	if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1283 		int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1284 		if (find_error == 0 && sid_size > 0) {
1285 			signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1286 			if (signing_id != NULL) {
1287 				flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1288 				FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1289 			}
1290 		}
1291 	}
1292 
1293 	error = flow_divert_add_all_proc_info(fd_cb, so, p, signing_id, connect_packet);
1294 
1295 	if (signing_id != NULL) {
1296 		kfree_data(signing_id, sid_size + 1);
1297 	}
1298 
1299 	if (error) {
1300 		FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1301 		goto done;
1302 	}
1303 
1304 	error = flow_divert_packet_append_tlv(connect_packet,
1305 	    FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1306 	    sizeof(fd_cb->so->so_traffic_class),
1307 	    &fd_cb->so->so_traffic_class);
1308 	if (error) {
1309 		goto done;
1310 	}
1311 
1312 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1313 		flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1314 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1315 		flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1316 	} else {
1317 		error = EINVAL;
1318 		goto done;
1319 	}
1320 	error = flow_divert_packet_append_tlv(connect_packet,
1321 	    FLOW_DIVERT_TLV_FLOW_TYPE,
1322 	    sizeof(flow_type),
1323 	    &flow_type);
1324 
1325 	if (error) {
1326 		goto done;
1327 	}
1328 
1329 	if (fd_cb->connect_token != NULL) {
1330 		unsigned int token_len = m_length(fd_cb->connect_token);
1331 		mbuf_concatenate(connect_packet, fd_cb->connect_token);
1332 		mbuf_pkthdr_adjustlen(connect_packet, token_len);
1333 		fd_cb->connect_token = NULL;
1334 	} else {
1335 		error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1336 		if (error) {
1337 			goto done;
1338 		}
1339 
1340 		necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1341 	}
1342 
1343 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1344 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1345 		if (error) {
1346 			goto done;
1347 		}
1348 	}
1349 
1350 	if (inp->inp_vflag & INP_IPV4) {
1351 		ifp = inp->inp_last_outifp;
1352 	} else if (inp->inp_vflag & INP_IPV6) {
1353 		ifp = inp->in6p_last_outifp;
1354 	}
1355 	if (ifp != NULL) {
1356 		uint32_t flow_if_index = ifp->if_index;
1357 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1358 		    sizeof(flow_if_index), &flow_if_index);
1359 		if (error) {
1360 			goto done;
1361 		}
1362 	}
1363 
1364 	if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1365 		flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1366 	}
1367 
1368 	if ((inp->inp_flags & INP_BOUND_IF) ||
1369 	    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1370 	    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1371 		flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1372 	}
1373 
1374 	if (flags != 0) {
1375 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1376 		if (error) {
1377 			goto done;
1378 		}
1379 	}
1380 
1381 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
1382 		cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1383 	} else {
1384 		cfil_sock_id = cfil_sock_id_from_socket(so);
1385 	}
1386 
1387 	if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1388 		cfil_id = &cfil_sock_id;
1389 		cfil_id_size = sizeof(cfil_sock_id);
1390 	} else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1391 		cfil_id = &inp->necp_client_uuid;
1392 		cfil_id_size = sizeof(inp->necp_client_uuid);
1393 	}
1394 
1395 	if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1396 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1397 		if (error) {
1398 			goto done;
1399 		}
1400 	}
1401 
1402 done:
1403 	if (!error) {
1404 		*out_connect_packet = connect_packet;
1405 	} else if (connect_packet != NULL) {
1406 		mbuf_freem(connect_packet);
1407 	}
1408 
1409 	return error;
1410 }
1411 
1412 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1413 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1414 {
1415 	int error = 0;
1416 	mbuf_t connect_packet = fd_cb->connect_packet;
1417 	mbuf_t saved_connect_packet = NULL;
1418 
1419 	if (connect_packet != NULL) {
1420 		error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1421 		if (error) {
1422 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1423 			goto done;
1424 		}
1425 
1426 		error = flow_divert_send_packet(fd_cb, connect_packet);
1427 		if (error) {
1428 			goto done;
1429 		}
1430 
1431 		fd_cb->connect_packet = saved_connect_packet;
1432 		saved_connect_packet = NULL;
1433 	} else {
1434 		error = ENOENT;
1435 	}
1436 done:
1437 	if (saved_connect_packet != NULL) {
1438 		mbuf_freem(saved_connect_packet);
1439 	}
1440 
1441 	return error;
1442 }
1443 
1444 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1445 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1446 {
1447 	int             error                   = 0;
1448 	mbuf_t  packet                  = NULL;
1449 	int             rbuff_space             = 0;
1450 
1451 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1452 	if (error) {
1453 		FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1454 		goto done;
1455 	}
1456 
1457 	rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1458 	if (rbuff_space < 0) {
1459 		rbuff_space = 0;
1460 	}
1461 	rbuff_space = htonl(rbuff_space);
1462 	error = flow_divert_packet_append_tlv(packet,
1463 	    FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1464 	    sizeof(rbuff_space),
1465 	    &rbuff_space);
1466 	if (error) {
1467 		goto done;
1468 	}
1469 
1470 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1471 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1472 		if (error) {
1473 			goto done;
1474 		}
1475 	}
1476 
1477 	error = flow_divert_send_packet(fd_cb, packet);
1478 	if (error) {
1479 		goto done;
1480 	}
1481 
1482 done:
1483 	if (error && packet != NULL) {
1484 		mbuf_freem(packet);
1485 	}
1486 
1487 	return error;
1488 }
1489 
1490 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1491 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1492 {
1493 	int             error   = 0;
1494 	mbuf_t  packet  = NULL;
1495 	uint32_t        zero    = 0;
1496 
1497 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1498 	if (error) {
1499 		FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1500 		goto done;
1501 	}
1502 
1503 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1504 	if (error) {
1505 		FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1506 		goto done;
1507 	}
1508 
1509 	how = htonl(how);
1510 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1511 	if (error) {
1512 		FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1513 		goto done;
1514 	}
1515 
1516 	error = flow_divert_send_packet(fd_cb, packet);
1517 	if (error) {
1518 		goto done;
1519 	}
1520 
1521 done:
1522 	if (error && packet != NULL) {
1523 		mbuf_free(packet);
1524 	}
1525 
1526 	return error;
1527 }
1528 
1529 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1530 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1531 {
1532 	if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1533 	    (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1534 		return SHUT_RDWR;
1535 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1536 		return SHUT_RD;
1537 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1538 		return SHUT_WR;
1539 	}
1540 
1541 	return -1;
1542 }
1543 
1544 /*
1545  * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1546  * writes. Returns FALSE otherwise.
1547  */
1548 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1549 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1550 {
1551 	int             how             = -1;
1552 
1553 	/* Do not send any close messages if there is still data in the send buffer */
1554 	if (fd_cb->so->so_snd.sb_cc == 0) {
1555 		if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1556 			/* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1557 			how = SHUT_RD;
1558 		}
1559 		if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1560 			/* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1561 			if (how == SHUT_RD) {
1562 				how = SHUT_RDWR;
1563 			} else {
1564 				how = SHUT_WR;
1565 			}
1566 		}
1567 	}
1568 
1569 	if (how != -1) {
1570 		FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1571 		if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1572 			/* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1573 			if (how != SHUT_RD) {
1574 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1575 			}
1576 			if (how != SHUT_WR) {
1577 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1578 			}
1579 		}
1580 	}
1581 
1582 	if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
1583 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1584 	}
1585 }
1586 
1587 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_t data,size_t data_len)1588 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len)
1589 {
1590 	mbuf_t packet = NULL;
1591 	mbuf_t last = NULL;
1592 	int error = 0;
1593 
1594 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1595 	if (error || packet == NULL) {
1596 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1597 		goto done;
1598 	}
1599 
1600 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1601 		last = m_last(packet);
1602 		mbuf_setnext(last, data);
1603 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1604 	} else {
1605 		data_len = 0;
1606 	}
1607 	error = flow_divert_send_packet(fd_cb, packet);
1608 	if (error == 0 && data_len > 0) {
1609 		fd_cb->bytes_sent += data_len;
1610 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1611 	}
1612 
1613 done:
1614 	if (error) {
1615 		if (last != NULL) {
1616 			mbuf_setnext(last, NULL);
1617 		}
1618 		if (packet != NULL) {
1619 			mbuf_freem(packet);
1620 		}
1621 	}
1622 
1623 	return error;
1624 }
1625 
1626 static errno_t
flow_divert_send_datagram_packet(struct flow_divert_pcb * fd_cb,mbuf_t data,size_t data_len,struct sockaddr * toaddr,Boolean is_fragment,size_t datagram_size)1627 flow_divert_send_datagram_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, struct sockaddr *toaddr, Boolean is_fragment, size_t datagram_size)
1628 {
1629 	mbuf_t packet = NULL;
1630 	mbuf_t last = NULL;
1631 	int error = 0;
1632 
1633 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1634 	if (error || packet == NULL) {
1635 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1636 		goto done;
1637 	}
1638 
1639 	if (toaddr != NULL) {
1640 		error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1641 		if (error) {
1642 			FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1643 			goto done;
1644 		}
1645 	}
1646 	if (is_fragment) {
1647 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_IS_FRAGMENT, sizeof(is_fragment), &is_fragment);
1648 		if (error) {
1649 			FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_IS_FRAGMENT) failed: %d", error);
1650 			goto done;
1651 		}
1652 	}
1653 
1654 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_DATAGRAM_SIZE, sizeof(datagram_size), &datagram_size);
1655 	if (error) {
1656 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_DATAGRAM_SIZE) failed: %d", error);
1657 		goto done;
1658 	}
1659 
1660 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1661 		last = m_last(packet);
1662 		mbuf_setnext(last, data);
1663 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1664 	} else {
1665 		data_len = 0;
1666 	}
1667 	error = flow_divert_send_packet(fd_cb, packet);
1668 	if (error == 0 && data_len > 0) {
1669 		fd_cb->bytes_sent += data_len;
1670 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1671 	}
1672 
1673 done:
1674 	if (error) {
1675 		if (last != NULL) {
1676 			mbuf_setnext(last, NULL);
1677 		}
1678 		if (packet != NULL) {
1679 			mbuf_freem(packet);
1680 		}
1681 	}
1682 
1683 	return error;
1684 }
1685 
1686 static errno_t
flow_divert_send_fragmented_datagram(struct flow_divert_pcb * fd_cb,mbuf_t datagram,size_t datagram_len,struct sockaddr * toaddr)1687 flow_divert_send_fragmented_datagram(struct flow_divert_pcb *fd_cb, mbuf_t datagram, size_t datagram_len, struct sockaddr *toaddr)
1688 {
1689 	mbuf_t next_data = datagram;
1690 	size_t remaining_len = datagram_len;
1691 	mbuf_t remaining_data = NULL;
1692 	int error = 0;
1693 	bool first = true;
1694 
1695 	while (remaining_len > 0 && next_data != NULL) {
1696 		size_t to_send = remaining_len;
1697 		remaining_data = NULL;
1698 
1699 		if (to_send > FLOW_DIVERT_CHUNK_SIZE) {
1700 			to_send = FLOW_DIVERT_CHUNK_SIZE;
1701 			error = mbuf_split(next_data, to_send, MBUF_DONTWAIT, &remaining_data);
1702 			if (error) {
1703 				break;
1704 			}
1705 		}
1706 
1707 		error = flow_divert_send_datagram_packet(fd_cb, next_data, to_send, (first ? toaddr : NULL), TRUE, (first ? datagram_len : 0));
1708 		if (error) {
1709 			break;
1710 		}
1711 
1712 		first = false;
1713 		remaining_len -= to_send;
1714 		next_data = remaining_data;
1715 	}
1716 
1717 	if (error) {
1718 		if (next_data != NULL) {
1719 			mbuf_freem(next_data);
1720 		}
1721 		if (remaining_data != NULL) {
1722 			mbuf_freem(remaining_data);
1723 		}
1724 	}
1725 	return error;
1726 }
1727 
1728 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1729 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1730 {
1731 	size_t  to_send;
1732 	size_t  sent    = 0;
1733 	int             error   = 0;
1734 	mbuf_t  buffer;
1735 
1736 	to_send = fd_cb->so->so_snd.sb_cc;
1737 	buffer = fd_cb->so->so_snd.sb_mb;
1738 
1739 	if (buffer == NULL && to_send > 0) {
1740 		FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1741 		return;
1742 	}
1743 
1744 	/* Ignore the send window if force is enabled */
1745 	if (!force && (to_send > fd_cb->send_window)) {
1746 		to_send = fd_cb->send_window;
1747 	}
1748 
1749 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1750 		while (sent < to_send) {
1751 			mbuf_t  data;
1752 			size_t  data_len;
1753 
1754 			data_len = to_send - sent;
1755 			if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1756 				data_len = FLOW_DIVERT_CHUNK_SIZE;
1757 			}
1758 
1759 			error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1760 			if (error) {
1761 				FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1762 				break;
1763 			}
1764 
1765 			error = flow_divert_send_data_packet(fd_cb, data, data_len);
1766 			if (error) {
1767 				if (data != NULL) {
1768 					mbuf_freem(data);
1769 				}
1770 				break;
1771 			}
1772 
1773 			sent += data_len;
1774 		}
1775 		sbdrop(&fd_cb->so->so_snd, (int)sent);
1776 		sowwakeup(fd_cb->so);
1777 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1778 		mbuf_t data;
1779 		mbuf_t m;
1780 		size_t data_len;
1781 
1782 		while (buffer) {
1783 			struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1784 
1785 			m = buffer;
1786 			if (toaddr != NULL) {
1787 				/* look for data in the chain */
1788 				do {
1789 					m = m->m_next;
1790 					if (m != NULL && m->m_type == MT_DATA) {
1791 						break;
1792 					}
1793 				} while (m);
1794 				if (m == NULL) {
1795 					/* unexpected */
1796 					FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1797 					goto move_on;
1798 				}
1799 			}
1800 			data_len = mbuf_pkthdr_len(m);
1801 			if (data_len > 0) {
1802 				FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1803 				error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1804 				if (error) {
1805 					FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1806 					break;
1807 				}
1808 			} else {
1809 				data = NULL;
1810 			}
1811 			if (data_len <= FLOW_DIVERT_CHUNK_SIZE) {
1812 				error = flow_divert_send_datagram_packet(fd_cb, data, data_len, toaddr, FALSE, 0);
1813 			} else {
1814 				error = flow_divert_send_fragmented_datagram(fd_cb, data, data_len, toaddr);
1815 				data = NULL;
1816 			}
1817 			if (error) {
1818 				if (data != NULL) {
1819 					mbuf_freem(data);
1820 				}
1821 				break;
1822 			}
1823 			sent += data_len;
1824 move_on:
1825 			buffer = buffer->m_nextpkt;
1826 			(void) sbdroprecord(&(fd_cb->so->so_snd));
1827 		}
1828 	}
1829 
1830 	if (sent > 0) {
1831 		FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1832 		if (fd_cb->send_window >= sent) {
1833 			fd_cb->send_window -= sent;
1834 		} else {
1835 			fd_cb->send_window = 0;
1836 		}
1837 	}
1838 }
1839 
1840 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_t data,struct sockaddr * toaddr)1841 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data, struct sockaddr *toaddr)
1842 {
1843 	size_t  to_send         = mbuf_pkthdr_len(data);
1844 	int     error           = 0;
1845 
1846 	if (to_send > fd_cb->send_window) {
1847 		to_send = fd_cb->send_window;
1848 	}
1849 
1850 	if (fd_cb->so->so_snd.sb_cc > 0) {
1851 		to_send = 0;    /* If the send buffer is non-empty, then we can't send anything */
1852 	}
1853 
1854 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1855 		size_t  sent            = 0;
1856 		mbuf_t  remaining_data  = data;
1857 		mbuf_t  pkt_data        = NULL;
1858 		while (sent < to_send && remaining_data != NULL) {
1859 			size_t  pkt_data_len;
1860 
1861 			pkt_data = remaining_data;
1862 
1863 			if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1864 				pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1865 			} else {
1866 				pkt_data_len = to_send - sent;
1867 			}
1868 
1869 			if (pkt_data_len < mbuf_pkthdr_len(pkt_data)) {
1870 				error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1871 				if (error) {
1872 					FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1873 					pkt_data = NULL;
1874 					break;
1875 				}
1876 			} else {
1877 				remaining_data = NULL;
1878 			}
1879 
1880 			error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len);
1881 			if (error) {
1882 				break;
1883 			}
1884 
1885 			pkt_data = NULL;
1886 			sent += pkt_data_len;
1887 		}
1888 
1889 		fd_cb->send_window -= sent;
1890 
1891 		error = 0;
1892 
1893 		if (pkt_data != NULL) {
1894 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1895 				if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1896 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1897 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1898 				}
1899 			} else {
1900 				mbuf_freem(pkt_data);
1901 				error = ENOBUFS;
1902 			}
1903 		}
1904 
1905 		if (remaining_data != NULL) {
1906 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1907 				if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1908 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1909 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1910 				}
1911 			} else {
1912 				mbuf_freem(remaining_data);
1913 				error = ENOBUFS;
1914 			}
1915 		}
1916 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1917 		int send_dgram_error = 0;
1918 		size_t data_size = mbuf_pkthdr_len(data);
1919 		if (to_send || data_size == 0) {
1920 			if (data_size <= FLOW_DIVERT_CHUNK_SIZE) {
1921 				send_dgram_error = flow_divert_send_datagram_packet(fd_cb, data, data_size, toaddr, FALSE, 0);
1922 			} else {
1923 				send_dgram_error = flow_divert_send_fragmented_datagram(fd_cb, data, data_size, toaddr);
1924 				data = NULL;
1925 			}
1926 			if (send_dgram_error) {
1927 				FDLOG(LOG_NOTICE, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu", send_dgram_error, data_size);
1928 			} else {
1929 				if (data_size >= fd_cb->send_window) {
1930 					fd_cb->send_window = 0;
1931 				} else {
1932 					fd_cb->send_window -= data_size;
1933 				}
1934 				data = NULL;
1935 			}
1936 		}
1937 
1938 		if (data != NULL) {
1939 			/* buffer it */
1940 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1941 				if (toaddr != NULL) {
1942 					int append_error = 0;
1943 					if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &append_error)) {
1944 						FDLOG(LOG_ERR, fd_cb,
1945 						    "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d",
1946 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, append_error);
1947 					}
1948 				} else {
1949 					if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1950 						FDLOG(LOG_ERR, fd_cb,
1951 						    "sbappendrecord failed. send buffer size = %u, send_window = %u",
1952 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1953 					}
1954 				}
1955 			} else {
1956 				FDLOG(LOG_ERR, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu, dropping the datagram", error, data_size);
1957 				mbuf_freem(data);
1958 			}
1959 		}
1960 	}
1961 
1962 	return error;
1963 }
1964 
1965 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1966 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1967 {
1968 	int error = 0;
1969 	mbuf_t packet = NULL;
1970 
1971 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1972 	if (error) {
1973 		FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1974 		goto done;
1975 	}
1976 
1977 	error = flow_divert_send_packet(fd_cb, packet);
1978 	if (error) {
1979 		goto done;
1980 	}
1981 
1982 done:
1983 	if (error && packet != NULL) {
1984 		mbuf_free(packet);
1985 	}
1986 
1987 	return error;
1988 }
1989 
1990 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)1991 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
1992 {
1993 	int             error           = 0;
1994 	mbuf_t  packet          = NULL;
1995 
1996 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
1997 	if (error) {
1998 		FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
1999 		goto done;
2000 	}
2001 
2002 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
2003 	if (error) {
2004 		FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
2005 		goto done;
2006 	}
2007 
2008 	error = flow_divert_send_packet(fd_cb, packet);
2009 	if (error) {
2010 		goto done;
2011 	}
2012 
2013 done:
2014 	if (error && packet != NULL) {
2015 		mbuf_free(packet);
2016 	}
2017 
2018 	return error;
2019 }
2020 
2021 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)2022 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
2023 {
2024 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2025 
2026 	if (local_endpoint->sa_family == AF_INET6) {
2027 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2028 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2029 			inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
2030 			inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
2031 			in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
2032 		}
2033 		if (inp->inp_lport == 0) {
2034 			inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
2035 		}
2036 	} else if (local_endpoint->sa_family == AF_INET) {
2037 		if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2038 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2039 			inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
2040 		}
2041 		if (inp->inp_lport == 0) {
2042 			inp->inp_lport = (satosin(local_endpoint))->sin_port;
2043 		}
2044 	}
2045 }
2046 
2047 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)2048 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
2049 {
2050 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2051 
2052 	if (remote_endpoint->sa_family == AF_INET6) {
2053 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
2054 			inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
2055 			inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
2056 			in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
2057 		}
2058 		if (inp->inp_fport == 0) {
2059 			inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
2060 		}
2061 	} else if (remote_endpoint->sa_family == AF_INET) {
2062 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
2063 			inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
2064 		}
2065 		if (inp->inp_fport == 0) {
2066 			inp->inp_fport = (satosin(remote_endpoint))->sin_port;
2067 		}
2068 	}
2069 }
2070 
2071 static uint32_t
flow_divert_derive_kernel_control_unit(pid_t pid,uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)2072 flow_divert_derive_kernel_control_unit(pid_t pid, uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
2073 {
2074 	uint32_t result = *ctl_unit;
2075 
2076 	// There are two models supported for deriving control units:
2077 	// 1. A series of flow divert units that allow "transparently" failing
2078 	//    over to the next unit. For this model, the aggregate_unit contains list
2079 	//    of all control units (between 1 and 30) masked over each other.
2080 	// 2. An indication that in-process flow divert should be preferred, with
2081 	//    an out of process flow divert to fail over to. For this model, the
2082 	//    ctl_unit is FLOW_DIVERT_IN_PROCESS_UNIT. In this case, that unit
2083 	//    is returned first, with the unpacked aggregate unit returned as a
2084 	//    fallback.
2085 	*is_aggregate = false;
2086 	if (*ctl_unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
2087 		bool found_unit = false;
2088 		if (pid != 0) {
2089 			// Look for an in-process group that is already open, and use that unit
2090 			struct flow_divert_group *group = NULL;
2091 			TAILQ_FOREACH(group, &g_flow_divert_in_process_group_list, chain) {
2092 				if (group->in_process_pid == pid) {
2093 					// Found an in-process group for our same PID, use it
2094 					found_unit = true;
2095 					result = group->ctl_unit;
2096 					break;
2097 				}
2098 			}
2099 
2100 			// If an in-process group isn't open yet, send a signal up through NECP to request one
2101 			if (!found_unit) {
2102 				necp_client_request_in_process_flow_divert(pid);
2103 			}
2104 		}
2105 
2106 		// If a unit was found, return it
2107 		if (found_unit) {
2108 			if (aggregate_unit != NULL && *aggregate_unit != 0) {
2109 				*is_aggregate = true;
2110 			}
2111 			// The next time around, the aggregate unit values will be picked up
2112 			*ctl_unit = 0;
2113 			return result;
2114 		}
2115 
2116 		// If no unit was found, fall through and clear out the ctl_unit
2117 		result = 0;
2118 		*ctl_unit = 0;
2119 	}
2120 
2121 	if (aggregate_unit != NULL && *aggregate_unit != 0) {
2122 		uint32_t counter;
2123 		struct flow_divert_group *lower_order_group = NULL;
2124 
2125 		for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
2126 			if ((*aggregate_unit) & (1 << counter)) {
2127 				struct flow_divert_group *group = NULL;
2128 				group = flow_divert_group_lookup(counter + 1, NULL);
2129 
2130 				if (group != NULL) {
2131 					if (lower_order_group == NULL) {
2132 						lower_order_group = group;
2133 					} else if ((group->order < lower_order_group->order)) {
2134 						lower_order_group = group;
2135 					}
2136 				}
2137 			}
2138 		}
2139 
2140 		if (lower_order_group != NULL) {
2141 			*aggregate_unit &= ~(1 << (lower_order_group->ctl_unit - 1));
2142 			*is_aggregate = true;
2143 			return lower_order_group->ctl_unit;
2144 		} else {
2145 			*ctl_unit = 0;
2146 			return result;
2147 		}
2148 	} else {
2149 		*ctl_unit = 0;
2150 		return result;
2151 	}
2152 }
2153 
2154 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)2155 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
2156 {
2157 	int error = 0;
2158 	uint32_t policy_control_unit = fd_cb->policy_control_unit;
2159 
2160 	flow_divert_pcb_remove(fd_cb);
2161 
2162 	do {
2163 		struct flow_divert_group *next_group = NULL;
2164 		bool is_aggregate = false;
2165 		uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(0, &policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
2166 
2167 		if (fd_cb->control_group_unit == next_ctl_unit) {
2168 			FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
2169 			error = EALREADY;
2170 			break;
2171 		}
2172 
2173 		if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
2174 			FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
2175 			error = ENOENT;
2176 			break;
2177 		}
2178 
2179 		next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
2180 		if (next_group == NULL) {
2181 			FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
2182 			continue;
2183 		}
2184 
2185 		FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
2186 
2187 		error = flow_divert_pcb_insert(fd_cb, next_group);
2188 		if (error == 0) {
2189 			if (is_aggregate) {
2190 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2191 			} else {
2192 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2193 			}
2194 		}
2195 		FDGRP_RELEASE(next_group);
2196 	} while (fd_cb->group == NULL);
2197 
2198 	if (fd_cb->group == NULL) {
2199 		return error ? error : ENOENT;
2200 	}
2201 
2202 	error = flow_divert_send_connect_packet(fd_cb);
2203 	if (error) {
2204 		FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2205 		flow_divert_pcb_remove(fd_cb);
2206 		error = ENOENT;
2207 	}
2208 
2209 	return error;
2210 }
2211 
2212 static void
flow_divert_disable(struct flow_divert_pcb * fd_cb)2213 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2214 {
2215 	struct socket *so = NULL;
2216 	mbuf_t  buffer;
2217 	int error = 0;
2218 	proc_t last_proc = NULL;
2219 	struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2220 	bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2221 	struct inpcb *inp = NULL;
2222 
2223 	so = fd_cb->so;
2224 	if (so == NULL) {
2225 		goto done;
2226 	}
2227 
2228 	FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2229 
2230 	/* Restore the IP state */
2231 	inp = sotoinpcb(so);
2232 	inp->inp_vflag = fd_cb->original_vflag;
2233 	inp->inp_faddr.s_addr = INADDR_ANY;
2234 	inp->inp_fport = 0;
2235 	memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2236 	inp->inp_fifscope = IFSCOPE_NONE;
2237 	inp->in6p_fport = 0;
2238 	/* If flow divert set the local address, clear it out */
2239 	if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2240 		inp->inp_laddr.s_addr = INADDR_ANY;
2241 		memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2242 		inp->inp_lifscope = IFSCOPE_NONE;
2243 	}
2244 	inp->inp_last_outifp = fd_cb->original_last_outifp;
2245 	inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2246 
2247 	/* Dis-associate the socket */
2248 	so->so_flags &= ~SOF_FLOW_DIVERT;
2249 	so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2250 	so->so_fd_pcb = NULL;
2251 	fd_cb->so = NULL;
2252 
2253 	FDRELEASE(fd_cb); /* Release the socket's reference */
2254 
2255 	/* Revert back to the original protocol */
2256 	so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2257 
2258 	/* Reset the socket state to avoid confusing NECP */
2259 	so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2260 
2261 	last_proc = proc_find(so->last_pid);
2262 
2263 	if (do_connect) {
2264 		/* Connect using the original protocol */
2265 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2266 		if (error) {
2267 			FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2268 			goto done;
2269 		}
2270 	}
2271 
2272 	buffer = so->so_snd.sb_mb;
2273 	if (buffer == NULL) {
2274 		/* No buffered data, done */
2275 		goto done;
2276 	}
2277 
2278 	/* Send any buffered data using the original protocol */
2279 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2280 		mbuf_t data_to_send = NULL;
2281 		size_t data_len = so->so_snd.sb_cc;
2282 
2283 		error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2284 		if (error) {
2285 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2286 			goto done;
2287 		}
2288 
2289 		sbflush(&so->so_snd);
2290 
2291 		if (data_to_send->m_flags & M_PKTHDR) {
2292 			mbuf_pkthdr_setlen(data_to_send, data_len);
2293 		}
2294 
2295 		error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2296 		    0,
2297 		    data_to_send,
2298 		    NULL,
2299 		    NULL,
2300 		    (last_proc != NULL ? last_proc : current_proc()));
2301 
2302 		if (error && error != EWOULDBLOCK) {
2303 			FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2304 		} else {
2305 			error = 0;
2306 		}
2307 	} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2308 		struct sockbuf *sb = &so->so_snd;
2309 		MBUFQ_HEAD(send_queue_head) send_queue;
2310 		MBUFQ_INIT(&send_queue);
2311 
2312 		/* Flush the send buffer, moving all records to a temporary queue */
2313 		while (sb->sb_mb != NULL) {
2314 			mbuf_t record = sb->sb_mb;
2315 			mbuf_t m = record;
2316 			sb->sb_mb = sb->sb_mb->m_nextpkt;
2317 			while (m != NULL) {
2318 				sbfree(sb, m);
2319 				m = m->m_next;
2320 			}
2321 			record->m_nextpkt = NULL;
2322 			MBUFQ_ENQUEUE(&send_queue, record);
2323 		}
2324 		SB_EMPTY_FIXUP(sb);
2325 
2326 		while (!MBUFQ_EMPTY(&send_queue)) {
2327 			mbuf_t next_record = MBUFQ_FIRST(&send_queue);
2328 			mbuf_t addr = NULL;
2329 			mbuf_t control = NULL;
2330 			mbuf_t last_control = NULL;
2331 			mbuf_t data = NULL;
2332 			mbuf_t m = next_record;
2333 			struct sockaddr *to_endpoint = NULL;
2334 
2335 			MBUFQ_DEQUEUE(&send_queue, next_record);
2336 
2337 			while (m != NULL) {
2338 				if (m->m_type == MT_SONAME) {
2339 					addr = m;
2340 				} else if (m->m_type == MT_CONTROL) {
2341 					if (control == NULL) {
2342 						control = m;
2343 					}
2344 					last_control = m;
2345 				} else if (m->m_type == MT_DATA) {
2346 					data = m;
2347 					break;
2348 				}
2349 				m = m->m_next;
2350 			}
2351 
2352 			if (addr != NULL && !do_connect) {
2353 				to_endpoint = flow_divert_get_buffered_target_address(addr);
2354 				if (to_endpoint == NULL) {
2355 					FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2356 				}
2357 			}
2358 
2359 			if (data == NULL) {
2360 				FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2361 				mbuf_freem(next_record);
2362 				continue;
2363 			}
2364 
2365 			if (!(data->m_flags & M_PKTHDR)) {
2366 				FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2367 				mbuf_freem(next_record);
2368 				continue;
2369 			}
2370 
2371 			if (addr != NULL) {
2372 				addr->m_next = NULL;
2373 			}
2374 
2375 			if (last_control != NULL) {
2376 				last_control->m_next = NULL;
2377 			}
2378 
2379 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2380 			    0,
2381 			    data,
2382 			    to_endpoint,
2383 			    control,
2384 			    (last_proc != NULL ? last_proc : current_proc()));
2385 
2386 			if (addr != NULL) {
2387 				mbuf_freem(addr);
2388 			}
2389 
2390 			if (error) {
2391 				FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2392 			}
2393 		}
2394 	}
2395 done:
2396 	if (last_proc != NULL) {
2397 		proc_rele(last_proc);
2398 	}
2399 
2400 	if (error && so != NULL) {
2401 		so->so_error = (uint16_t)error;
2402 		flow_divert_disconnect_socket(so, do_connect, false);
2403 	}
2404 }
2405 
2406 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2407 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2408 {
2409 	struct socket *so = NULL;
2410 	struct inpcb *inp = NULL;
2411 	struct ifnet *current_ifp = NULL;
2412 	struct ifnet *new_ifp = NULL;
2413 	int error = 0;
2414 
2415 	so = fd_cb->so;
2416 	if (so == NULL) {
2417 		return;
2418 	}
2419 
2420 	inp = sotoinpcb(so);
2421 
2422 	if (out_if_index <= 0) {
2423 		return;
2424 	}
2425 
2426 	if (inp->inp_vflag & INP_IPV6) {
2427 		current_ifp = inp->in6p_last_outifp;
2428 	} else {
2429 		current_ifp = inp->inp_last_outifp;
2430 	}
2431 
2432 	if (current_ifp != NULL) {
2433 		if (current_ifp->if_index == out_if_index) {
2434 			/* No change */
2435 			return;
2436 		}
2437 
2438 		/* Scope the socket to the given interface */
2439 		error = inp_bindif(inp, out_if_index, &new_ifp);
2440 		if (error != 0) {
2441 			FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2442 			return;
2443 		}
2444 
2445 		if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2446 			/* Get the appropriate address for the given interface */
2447 			if (inp->inp_vflag & INP_IPV6) {
2448 				inp->in6p_laddr = sa6_any.sin6_addr;
2449 				error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2450 			} else {
2451 				inp->inp_laddr.s_addr = INADDR_ANY;
2452 				error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2453 			}
2454 
2455 			if (error != 0) {
2456 				FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2457 			}
2458 		}
2459 	} else {
2460 		ifnet_head_lock_shared();
2461 		if (out_if_index <= if_index) {
2462 			new_ifp = ifindex2ifnet[out_if_index];
2463 		}
2464 		ifnet_head_done();
2465 	}
2466 
2467 	/* Update the "last interface" of the socket */
2468 	if (new_ifp != NULL) {
2469 		if (inp->inp_vflag & INP_IPV6) {
2470 			inp->in6p_last_outifp = new_ifp;
2471 		} else {
2472 			inp->inp_last_outifp = new_ifp;
2473 		}
2474 
2475 #if SKYWALK
2476 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2477 			netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2478 		}
2479 #endif /* SKYWALK */
2480 	}
2481 }
2482 
2483 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2484 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2485 {
2486 	uint32_t                                        connect_error = 0;
2487 	uint32_t                                        ctl_unit                        = 0;
2488 	int                                                     error                           = 0;
2489 	union sockaddr_in_4_6 local_endpoint = {};
2490 	union sockaddr_in_4_6 remote_endpoint = {};
2491 	int                                                     out_if_index            = 0;
2492 	uint32_t                                        send_window;
2493 	uint32_t                                        app_data_length         = 0;
2494 
2495 	memset(&local_endpoint, 0, sizeof(local_endpoint));
2496 	memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2497 
2498 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2499 	if (error) {
2500 		FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2501 		return;
2502 	}
2503 
2504 	connect_error = ntohl(connect_error);
2505 	FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2506 
2507 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2508 	if (error) {
2509 		FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2510 		return;
2511 	}
2512 
2513 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2514 	if (error) {
2515 		FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2516 	}
2517 
2518 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sa), NULL);
2519 	if (error) {
2520 		FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2521 	}
2522 
2523 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sa), NULL);
2524 	if (error) {
2525 		FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2526 	}
2527 
2528 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2529 	if (error) {
2530 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2531 	}
2532 
2533 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2534 	if (error) {
2535 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2536 	}
2537 
2538 	error = 0;
2539 
2540 	FDLOCK(fd_cb);
2541 	if (fd_cb->so != NULL) {
2542 		struct inpcb *inp = NULL;
2543 		struct socket *so = fd_cb->so;
2544 		bool local_address_is_valid = false;
2545 
2546 		socket_lock(so, 1);
2547 
2548 		if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2549 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2550 			goto done;
2551 		}
2552 
2553 		if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2554 			FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2555 			goto done;
2556 		}
2557 
2558 		inp = sotoinpcb(so);
2559 
2560 		if (connect_error || error) {
2561 			goto set_socket_state;
2562 		}
2563 
2564 		if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2565 			if (local_endpoint.sa.sa_family == AF_INET) {
2566 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2567 				if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2568 					local_address_is_valid = true;
2569 					fd_cb->local_endpoint = local_endpoint;
2570 					inp->inp_laddr.s_addr = INADDR_ANY;
2571 				} else {
2572 					fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2573 				}
2574 			} else if (local_endpoint.sa.sa_family == AF_INET6) {
2575 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2576 				if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2577 					local_address_is_valid = true;
2578 					fd_cb->local_endpoint = local_endpoint;
2579 					inp->in6p_laddr = sa6_any.sin6_addr;
2580 				} else {
2581 					fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2582 				}
2583 			}
2584 		}
2585 
2586 		flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2587 		flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2588 
2589 		if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2590 			if (remote_endpoint.sa.sa_family == AF_INET) {
2591 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2592 			} else if (remote_endpoint.sa.sa_family == AF_INET6) {
2593 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2594 			}
2595 			flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2596 		}
2597 
2598 		if (app_data_length > 0) {
2599 			uint8_t *app_data = NULL;
2600 			app_data = kalloc_data(app_data_length, Z_WAITOK);
2601 			if (app_data != NULL) {
2602 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2603 				if (error == 0) {
2604 					FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2605 					if (fd_cb->app_data != NULL) {
2606 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
2607 					}
2608 					fd_cb->app_data = app_data;
2609 					fd_cb->app_data_length = app_data_length;
2610 				} else {
2611 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2612 					kfree_data(app_data, app_data_length);
2613 				}
2614 			} else {
2615 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2616 			}
2617 		}
2618 
2619 		if (error) {
2620 			goto set_socket_state;
2621 		}
2622 
2623 		if (fd_cb->group == NULL) {
2624 			error = EINVAL;
2625 			goto set_socket_state;
2626 		}
2627 
2628 		ctl_unit = ntohl(ctl_unit);
2629 		if (ctl_unit > 0) {
2630 			int insert_error = 0;
2631 			struct flow_divert_group *grp = NULL;
2632 
2633 			if (ctl_unit >= GROUP_COUNT_MAX) {
2634 				FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2635 				error = EINVAL;
2636 				goto set_socket_state;
2637 			}
2638 
2639 			grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2640 			if (grp == NULL) {
2641 				error = ECONNRESET;
2642 				goto set_socket_state;
2643 			}
2644 
2645 			flow_divert_pcb_remove(fd_cb);
2646 			insert_error = flow_divert_pcb_insert(fd_cb, grp);
2647 			FDGRP_RELEASE(grp);
2648 
2649 			if (insert_error != 0) {
2650 				error = ECONNRESET;
2651 				goto set_socket_state;
2652 			}
2653 		}
2654 
2655 		fd_cb->send_window = ntohl(send_window);
2656 
2657 set_socket_state:
2658 		if (!connect_error && !error) {
2659 			FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2660 			error = flow_divert_send_connect_result(fd_cb);
2661 		}
2662 
2663 		if (connect_error || error) {
2664 			if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2665 				error = flow_divert_try_next_group(fd_cb);
2666 				if (error && fd_cb->policy_control_unit == 0) {
2667 					flow_divert_disable(fd_cb);
2668 					goto done;
2669 				} else if (error == 0) {
2670 					goto done;
2671 				}
2672 			}
2673 
2674 			if (!connect_error) {
2675 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
2676 				so->so_error = (uint16_t)error;
2677 				flow_divert_send_close_if_needed(fd_cb);
2678 			} else {
2679 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
2680 				so->so_error = (uint16_t)connect_error;
2681 			}
2682 			flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
2683 		} else {
2684 #if NECP
2685 			/* Update NECP client with connected five-tuple */
2686 			if (!uuid_is_null(inp->necp_client_uuid)) {
2687 				socket_unlock(so, 0);
2688 				necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2689 				socket_lock(so, 0);
2690 				if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2691 					/* The socket was closed while it was unlocked */
2692 					goto done;
2693 				}
2694 			}
2695 #endif /* NECP */
2696 
2697 			flow_divert_send_buffered_data(fd_cb, FALSE);
2698 			soisconnected(so);
2699 		}
2700 
2701 		/* We don't need the connect packet any more */
2702 		if (fd_cb->connect_packet != NULL) {
2703 			mbuf_freem(fd_cb->connect_packet);
2704 			fd_cb->connect_packet = NULL;
2705 		}
2706 
2707 		/* We don't need the original remote endpoint any more */
2708 		free_sockaddr(fd_cb->original_remote_endpoint);
2709 done:
2710 		socket_unlock(so, 1);
2711 	}
2712 	FDUNLOCK(fd_cb);
2713 }
2714 
2715 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2716 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2717 {
2718 	uint32_t        close_error                     = 0;
2719 	int                     error                   = 0;
2720 	int                     how                     = 0;
2721 
2722 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2723 	if (error) {
2724 		FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2725 		return;
2726 	}
2727 
2728 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2729 	if (error) {
2730 		FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2731 		return;
2732 	}
2733 
2734 	how = ntohl(how);
2735 
2736 	FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2737 
2738 	FDLOCK(fd_cb);
2739 	if (fd_cb->so != NULL) {
2740 		bool is_connected = (SOCK_TYPE(fd_cb->so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2741 		socket_lock(fd_cb->so, 0);
2742 
2743 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2744 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2745 			goto done;
2746 		}
2747 
2748 		fd_cb->so->so_error = (uint16_t)ntohl(close_error);
2749 
2750 		flow_divert_update_closed_state(fd_cb, how, true, true);
2751 
2752 		/* Only do this for stream flows because "shutdown by peer" doesn't make sense for datagram flows */
2753 		how = flow_divert_tunnel_how_closed(fd_cb);
2754 		if (how == SHUT_RDWR) {
2755 			flow_divert_disconnect_socket(fd_cb->so, is_connected, true);
2756 		} else if (how == SHUT_RD && is_connected) {
2757 			socantrcvmore(fd_cb->so);
2758 		} else if (how == SHUT_WR && is_connected) {
2759 			socantsendmore(fd_cb->so);
2760 		}
2761 done:
2762 		socket_unlock(fd_cb->so, 0);
2763 	}
2764 	FDUNLOCK(fd_cb);
2765 }
2766 
2767 static mbuf_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2768 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2769 {
2770 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2771 	bool need_recvdstaddr = false;
2772 	/* Socket flow tracking needs to see the local address */
2773 	need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2774 	if ((inp->inp_vflag & INP_IPV4) &&
2775 	    fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2776 	    ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2777 		return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2778 	} else if ((inp->inp_vflag & INP_IPV6) &&
2779 	    fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2780 	    ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2781 		struct in6_pktinfo pi6;
2782 		memset(&pi6, 0, sizeof(pi6));
2783 		pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2784 
2785 		return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2786 	}
2787 	return NULL;
2788 }
2789 
2790 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_t packet,size_t offset)2791 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset)
2792 {
2793 	int error = 0;
2794 
2795 	FDLOCK(fd_cb);
2796 	if (fd_cb->so != NULL) {
2797 		mbuf_t  data            = NULL;
2798 		size_t  data_size;
2799 		struct sockaddr_storage remote_address;
2800 		boolean_t got_remote_sa = FALSE;
2801 		boolean_t appended = FALSE;
2802 		boolean_t append_success = FALSE;
2803 
2804 		socket_lock(fd_cb->so, 0);
2805 
2806 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2807 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2808 			goto done;
2809 		}
2810 
2811 		if (sbspace(&fd_cb->so->so_rcv) == 0) {
2812 			error = ENOBUFS;
2813 			fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2814 			FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2815 			goto done;
2816 		}
2817 
2818 		if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
2819 			uint32_t val_size = 0;
2820 
2821 			/* check if we got remote address with data */
2822 			memset(&remote_address, 0, sizeof(remote_address));
2823 			error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2824 			if (error || val_size > sizeof(remote_address)) {
2825 				FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2826 				error = 0;
2827 			} else {
2828 				if (remote_address.ss_len > sizeof(remote_address)) {
2829 					remote_address.ss_len = sizeof(remote_address);
2830 				}
2831 				/* validate the address */
2832 				if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2833 					got_remote_sa = TRUE;
2834 				} else {
2835 					FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2836 				}
2837 				offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2838 			}
2839 		}
2840 
2841 		data_size = (mbuf_pkthdr_len(packet) - offset);
2842 
2843 		if (fd_cb->so->so_state & SS_CANTRCVMORE) {
2844 			FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2845 			goto done;
2846 		}
2847 
2848 		if (SOCK_TYPE(fd_cb->so) != SOCK_STREAM && SOCK_TYPE(fd_cb->so) != SOCK_DGRAM) {
2849 			FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(fd_cb->so));
2850 			goto done;
2851 		}
2852 
2853 		FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2854 
2855 		error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2856 		if (error || data == NULL) {
2857 			FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2858 			goto done;
2859 		}
2860 
2861 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
2862 			appended = (sbappendstream(&fd_cb->so->so_rcv, data) != 0);
2863 			append_success = TRUE;
2864 		} else {
2865 			struct sockaddr *append_sa = NULL;
2866 			mbuf_t mctl;
2867 
2868 			if (got_remote_sa == TRUE) {
2869 				error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2870 			} else {
2871 				if (fd_cb->so->so_proto->pr_domain->dom_family == AF_INET6) {
2872 					error = in6_mapped_peeraddr(fd_cb->so, &append_sa);
2873 				} else {
2874 					error = in_getpeeraddr(fd_cb->so, &append_sa);
2875 				}
2876 			}
2877 			if (error) {
2878 				FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2879 			}
2880 
2881 			mctl = flow_divert_create_control_mbuf(fd_cb);
2882 			int append_error = 0;
2883 			appended = sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error);
2884 			if (appended || append_error == 0) {
2885 				append_success = TRUE;
2886 			} else {
2887 				FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2888 			}
2889 
2890 			free_sockaddr(append_sa);
2891 		}
2892 
2893 		if (append_success) {
2894 			fd_cb->bytes_received += data_size;
2895 			flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2896 		}
2897 
2898 		if (appended) {
2899 			sorwakeup(fd_cb->so);
2900 		}
2901 done:
2902 		socket_unlock(fd_cb->so, 0);
2903 	}
2904 	FDUNLOCK(fd_cb);
2905 
2906 	return error;
2907 }
2908 
2909 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2910 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2911 {
2912 	uint32_t        read_count              = 0;
2913 	int             error                   = 0;
2914 
2915 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2916 	if (error) {
2917 		FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2918 		return;
2919 	}
2920 
2921 	FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2922 
2923 	FDLOCK(fd_cb);
2924 	if (fd_cb->so != NULL) {
2925 		socket_lock(fd_cb->so, 0);
2926 
2927 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2928 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2929 			goto done;
2930 		}
2931 
2932 		fd_cb->send_window += ntohl(read_count);
2933 		flow_divert_send_buffered_data(fd_cb, FALSE);
2934 done:
2935 		socket_unlock(fd_cb->so, 0);
2936 	}
2937 	FDUNLOCK(fd_cb);
2938 }
2939 
2940 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_t packet,int offset)2941 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_t packet, int offset)
2942 {
2943 	int error         = 0;
2944 	uint32_t key_size = 0;
2945 	int log_level     = 0;
2946 	uint32_t flags    = 0;
2947 	int32_t order     = FLOW_DIVERT_ORDER_LAST;
2948 
2949 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2950 	if (error) {
2951 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2952 		return;
2953 	}
2954 
2955 	if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2956 		FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2957 		return;
2958 	}
2959 
2960 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2961 	if (!error) {
2962 		nil_pcb.log_level = (uint8_t)log_level;
2963 	}
2964 
2965 	lck_rw_lock_exclusive(&group->lck);
2966 
2967 	if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
2968 		FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
2969 		lck_rw_done(&group->lck);
2970 		return;
2971 	}
2972 
2973 	if (group->token_key != NULL) {
2974 		kfree_data(group->token_key, group->token_key_size);
2975 		group->token_key = NULL;
2976 	}
2977 
2978 	group->token_key = kalloc_data(key_size, Z_WAITOK);
2979 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2980 	if (error) {
2981 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2982 		kfree_data(group->token_key, key_size);
2983 		group->token_key = NULL;
2984 		lck_rw_done(&group->lck);
2985 		return;
2986 	}
2987 
2988 	group->token_key_size = key_size;
2989 
2990 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2991 	if (!error) {
2992 		group->flags = flags;
2993 	}
2994 
2995 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ORDER, sizeof(order), &order, NULL);
2996 	if (!error) {
2997 		FDLOG(LOG_INFO, &nil_pcb, "group %u order is %u", group->ctl_unit, order);
2998 		group->order = order;
2999 	}
3000 
3001 	lck_rw_done(&group->lck);
3002 }
3003 
3004 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)3005 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
3006 {
3007 	int                                                     error                           = 0;
3008 	int                                                     out_if_index            = 0;
3009 	uint32_t                                        app_data_length         = 0;
3010 
3011 	FDLOG0(LOG_INFO, fd_cb, "received a properties update");
3012 
3013 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
3014 	if (error) {
3015 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
3016 	}
3017 
3018 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
3019 	if (error) {
3020 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
3021 	}
3022 
3023 	FDLOCK(fd_cb);
3024 	if (fd_cb->so != NULL) {
3025 		socket_lock(fd_cb->so, 0);
3026 
3027 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
3028 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
3029 			goto done;
3030 		}
3031 
3032 		if (out_if_index > 0) {
3033 			flow_divert_scope(fd_cb, out_if_index, true);
3034 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3035 		}
3036 
3037 		if (app_data_length > 0) {
3038 			uint8_t *app_data = NULL;
3039 			app_data = kalloc_data(app_data_length, Z_WAITOK);
3040 			if (app_data != NULL) {
3041 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
3042 				if (error == 0) {
3043 					if (fd_cb->app_data != NULL) {
3044 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
3045 					}
3046 					fd_cb->app_data = app_data;
3047 					fd_cb->app_data_length = app_data_length;
3048 				} else {
3049 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
3050 					kfree_data(app_data, app_data_length);
3051 				}
3052 			} else {
3053 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
3054 			}
3055 		}
3056 done:
3057 		socket_unlock(fd_cb->so, 0);
3058 	}
3059 	FDUNLOCK(fd_cb);
3060 }
3061 
3062 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_t packet,int offset)3063 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_t packet, int offset)
3064 {
3065 	size_t bytes_mem_size;
3066 	size_t child_maps_mem_size;
3067 	size_t nodes_mem_size;
3068 	size_t trie_memory_size = 0;
3069 	int cursor;
3070 	int error = 0;
3071 	struct flow_divert_trie new_trie;
3072 	int insert_error = 0;
3073 	int prefix_count = -1;
3074 	int signing_id_count = 0;
3075 	size_t bytes_count = 0;
3076 	size_t nodes_count = 0;
3077 	size_t maps_count = 0;
3078 
3079 	lck_rw_lock_exclusive(&group->lck);
3080 
3081 	/* Re-set the current trie */
3082 	if (group->signing_id_trie.memory != NULL) {
3083 		kfree_data_addr(group->signing_id_trie.memory);
3084 	}
3085 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
3086 	group->signing_id_trie.root = NULL_TRIE_IDX;
3087 
3088 	memset(&new_trie, 0, sizeof(new_trie));
3089 
3090 	/* Get the number of shared prefixes in the new set of signing ID strings */
3091 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
3092 
3093 	if (prefix_count < 0 || error) {
3094 		FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
3095 		lck_rw_done(&group->lck);
3096 		return;
3097 	}
3098 
3099 	/* Compute the number of signing IDs and the total amount of bytes needed to store them */
3100 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3101 	    cursor >= 0;
3102 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3103 		uint32_t sid_size = 0;
3104 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3105 		if (error || sid_size == 0) {
3106 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
3107 			signing_id_count = 0;
3108 			break;
3109 		}
3110 		if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
3111 			FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
3112 			signing_id_count = 0;
3113 			break;
3114 		}
3115 		signing_id_count++;
3116 	}
3117 
3118 	if (signing_id_count == 0) {
3119 		lck_rw_done(&group->lck);
3120 		FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
3121 		return;
3122 	}
3123 
3124 	if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
3125 		lck_rw_done(&group->lck);
3126 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
3127 		return;
3128 	}
3129 
3130 	if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
3131 		lck_rw_done(&group->lck);
3132 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
3133 		return;
3134 	}
3135 
3136 	if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
3137 		lck_rw_done(&group->lck);
3138 		FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
3139 		return;
3140 	}
3141 
3142 	FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
3143 	    nodes_count, maps_count, bytes_count);
3144 
3145 	if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
3146 	    os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
3147 	    os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
3148 	    os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
3149 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
3150 		lck_rw_done(&group->lck);
3151 		return;
3152 	}
3153 
3154 	if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
3155 		FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
3156 		lck_rw_done(&group->lck);
3157 		return;
3158 	}
3159 
3160 	new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
3161 	if (new_trie.memory == NULL) {
3162 		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
3163 		    nodes_mem_size + child_maps_mem_size + bytes_mem_size);
3164 		lck_rw_done(&group->lck);
3165 		return;
3166 	}
3167 
3168 	new_trie.bytes_count = (uint16_t)bytes_count;
3169 	new_trie.nodes_count = (uint16_t)nodes_count;
3170 	new_trie.child_maps_count = (uint16_t)maps_count;
3171 
3172 	/* Initialize the free lists */
3173 	new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
3174 	new_trie.nodes_free_next = 0;
3175 	memset(new_trie.nodes, 0, nodes_mem_size);
3176 
3177 	new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
3178 	new_trie.child_maps_free_next = 0;
3179 	memset(new_trie.child_maps, 0xff, child_maps_mem_size);
3180 
3181 	new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
3182 	new_trie.bytes_free_next = 0;
3183 	memset(new_trie.bytes, 0, bytes_mem_size);
3184 
3185 	/* The root is an empty node */
3186 	new_trie.root = trie_node_alloc(&new_trie);
3187 
3188 	/* Add each signing ID to the trie */
3189 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3190 	    cursor >= 0;
3191 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3192 		uint32_t sid_size = 0;
3193 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3194 		if (error || sid_size == 0) {
3195 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
3196 			insert_error = EINVAL;
3197 			break;
3198 		}
3199 		if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
3200 			uint16_t new_node_idx;
3201 			error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
3202 			if (error) {
3203 				FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
3204 				insert_error = EINVAL;
3205 				break;
3206 			}
3207 			new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
3208 			if (new_node_idx == NULL_TRIE_IDX) {
3209 				insert_error = EINVAL;
3210 				break;
3211 			}
3212 		} else {
3213 			FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3214 			insert_error = ENOBUFS;
3215 			break;
3216 		}
3217 	}
3218 
3219 	if (!insert_error) {
3220 		group->signing_id_trie = new_trie;
3221 	} else {
3222 		kfree_data(new_trie.memory, trie_memory_size);
3223 	}
3224 
3225 	lck_rw_done(&group->lck);
3226 }
3227 
3228 static int
flow_divert_input(mbuf_t packet,struct flow_divert_group * group)3229 flow_divert_input(mbuf_t packet, struct flow_divert_group *group)
3230 {
3231 	struct flow_divert_packet_header        hdr;
3232 	int                                                                     error           = 0;
3233 	struct flow_divert_pcb                          *fd_cb;
3234 
3235 	if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3236 		FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3237 		error = EINVAL;
3238 		goto done;
3239 	}
3240 
3241 	error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3242 	if (error) {
3243 		FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3244 		error = ENOBUFS;
3245 		goto done;
3246 	}
3247 
3248 	hdr.conn_id = ntohl(hdr.conn_id);
3249 
3250 	if (hdr.conn_id == 0) {
3251 		switch (hdr.packet_type) {
3252 		case FLOW_DIVERT_PKT_GROUP_INIT:
3253 			flow_divert_handle_group_init(group, packet, sizeof(hdr));
3254 			break;
3255 		case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3256 			flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3257 			break;
3258 		default:
3259 			FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3260 			break;
3261 		}
3262 		goto done;
3263 	}
3264 
3265 	fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group);             /* This retains the PCB */
3266 	if (fd_cb == NULL) {
3267 		if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3268 			FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3269 		}
3270 		goto done;
3271 	}
3272 
3273 	switch (hdr.packet_type) {
3274 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
3275 		flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3276 		break;
3277 	case FLOW_DIVERT_PKT_CLOSE:
3278 		flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3279 		break;
3280 	case FLOW_DIVERT_PKT_DATA:
3281 		error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3282 		break;
3283 	case FLOW_DIVERT_PKT_READ_NOTIFY:
3284 		flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3285 		break;
3286 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3287 		flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3288 		break;
3289 	default:
3290 		FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3291 		break;
3292 	}
3293 
3294 	FDRELEASE(fd_cb);
3295 
3296 done:
3297 	mbuf_freem(packet);
3298 	return error;
3299 }
3300 
3301 static void
flow_divert_close_all(struct flow_divert_group * group)3302 flow_divert_close_all(struct flow_divert_group *group)
3303 {
3304 	struct flow_divert_pcb                  *fd_cb;
3305 	SLIST_HEAD(, flow_divert_pcb)   tmp_list;
3306 
3307 	SLIST_INIT(&tmp_list);
3308 
3309 	lck_rw_lock_exclusive(&group->lck);
3310 
3311 	MBUFQ_DRAIN(&group->send_queue);
3312 
3313 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3314 		FDRETAIN(fd_cb);
3315 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3316 	}
3317 
3318 	group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3319 
3320 	lck_rw_done(&group->lck);
3321 
3322 	while (!SLIST_EMPTY(&tmp_list)) {
3323 		fd_cb = SLIST_FIRST(&tmp_list);
3324 		FDLOCK(fd_cb);
3325 		SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3326 		if (fd_cb->so != NULL) {
3327 			socket_lock(fd_cb->so, 0);
3328 			flow_divert_pcb_remove(fd_cb);
3329 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
3330 			fd_cb->so->so_error = ECONNABORTED;
3331 			flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3332 			socket_unlock(fd_cb->so, 0);
3333 		}
3334 		FDUNLOCK(fd_cb);
3335 		FDRELEASE(fd_cb);
3336 	}
3337 }
3338 
3339 void
flow_divert_detach(struct socket * so)3340 flow_divert_detach(struct socket *so)
3341 {
3342 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3343 
3344 	if (!SO_IS_DIVERTED(so)) {
3345 		return;
3346 	}
3347 
3348 	so->so_flags &= ~SOF_FLOW_DIVERT;
3349 	so->so_fd_pcb = NULL;
3350 
3351 	FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3352 
3353 	if (fd_cb->group != NULL) {
3354 		/* Last-ditch effort to send any buffered data */
3355 		flow_divert_send_buffered_data(fd_cb, TRUE);
3356 
3357 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3358 		flow_divert_send_close_if_needed(fd_cb);
3359 		/* Remove from the group */
3360 		flow_divert_pcb_remove(fd_cb);
3361 	}
3362 
3363 	socket_unlock(so, 0);
3364 	FDLOCK(fd_cb);
3365 	fd_cb->so = NULL;
3366 	FDUNLOCK(fd_cb);
3367 	socket_lock(so, 0);
3368 
3369 	FDRELEASE(fd_cb);       /* Release the socket's reference */
3370 }
3371 
3372 static int
flow_divert_close(struct socket * so)3373 flow_divert_close(struct socket *so)
3374 {
3375 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3376 
3377 	if (!SO_IS_DIVERTED(so)) {
3378 		return EINVAL;
3379 	}
3380 
3381 	FDLOG0(LOG_INFO, fd_cb, "Closing");
3382 
3383 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3384 		soisdisconnecting(so);
3385 		sbflush(&so->so_rcv);
3386 	}
3387 
3388 	flow_divert_send_buffered_data(fd_cb, TRUE);
3389 	flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3390 	flow_divert_send_close_if_needed(fd_cb);
3391 
3392 	/* Remove from the group */
3393 	flow_divert_pcb_remove(fd_cb);
3394 
3395 	return 0;
3396 }
3397 
3398 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3399 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3400     sae_connid_t cid __unused)
3401 {
3402 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3403 		return EINVAL;
3404 	}
3405 
3406 	return flow_divert_close(so);
3407 }
3408 
3409 static int
flow_divert_shutdown(struct socket * so)3410 flow_divert_shutdown(struct socket *so)
3411 {
3412 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3413 
3414 	if (!SO_IS_DIVERTED(so)) {
3415 		return EINVAL;
3416 	}
3417 
3418 	FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3419 
3420 	socantsendmore(so);
3421 
3422 	flow_divert_update_closed_state(fd_cb, SHUT_WR, false, true);
3423 	flow_divert_send_close_if_needed(fd_cb);
3424 
3425 	return 0;
3426 }
3427 
3428 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3429 flow_divert_rcvd(struct socket *so, int flags __unused)
3430 {
3431 	struct flow_divert_pcb  *fd_cb = so->so_fd_pcb;
3432 	int space = 0;
3433 
3434 	if (!SO_IS_DIVERTED(so)) {
3435 		return EINVAL;
3436 	}
3437 
3438 	space = sbspace(&so->so_rcv);
3439 	FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3440 	if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3441 	    (space > 0) &&
3442 	    flow_divert_send_read_notification(fd_cb) == 0) {
3443 		FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3444 		fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3445 	}
3446 
3447 	return 0;
3448 }
3449 
3450 static int
flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet,struct sockaddr * toaddr)3451 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr)
3452 {
3453 	int error = 0;
3454 	int port  = 0;
3455 
3456 	if (!flow_divert_is_sockaddr_valid(toaddr)) {
3457 		FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3458 		error = EINVAL;
3459 		goto done;
3460 	}
3461 
3462 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, toaddr);
3463 	if (error) {
3464 		goto done;
3465 	}
3466 
3467 	if (toaddr->sa_family == AF_INET) {
3468 		port = ntohs((satosin(toaddr))->sin_port);
3469 	} else {
3470 		port = ntohs((satosin6(toaddr))->sin6_port);
3471 	}
3472 
3473 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3474 	if (error) {
3475 		goto done;
3476 	}
3477 
3478 done:
3479 	return error;
3480 }
3481 
3482 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_t buffer)3483 flow_divert_get_buffered_target_address(mbuf_t buffer)
3484 {
3485 	if (buffer != NULL && buffer->m_type == MT_SONAME) {
3486 		struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3487 		if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3488 			return toaddr;
3489 		}
3490 	}
3491 	return NULL;
3492 }
3493 
3494 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3495 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3496 {
3497 	switch (addr->sa_family) {
3498 	case AF_INET:
3499 		if (addr->sa_len < sizeof(struct sockaddr_in)) {
3500 			return FALSE;
3501 		}
3502 		break;
3503 	case AF_INET6:
3504 		if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3505 			return FALSE;
3506 		}
3507 		break;
3508 	default:
3509 		return FALSE;
3510 	}
3511 	return TRUE;
3512 }
3513 
3514 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3515 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3516     struct sockaddr **dup)
3517 {
3518 	int                                             error           = 0;
3519 	struct sockaddr                 *result;
3520 	struct sockaddr_storage ss;
3521 
3522 	if (addr != NULL) {
3523 		result = addr;
3524 	} else {
3525 		memset(&ss, 0, sizeof(ss));
3526 		ss.ss_family = family;
3527 		if (ss.ss_family == AF_INET) {
3528 			ss.ss_len = sizeof(struct sockaddr_in);
3529 		} else if (ss.ss_family == AF_INET6) {
3530 			ss.ss_len = sizeof(struct sockaddr_in6);
3531 		} else {
3532 			error = EINVAL;
3533 		}
3534 		result = (struct sockaddr *)&ss;
3535 	}
3536 
3537 	if (!error) {
3538 		*dup = dup_sockaddr(result, 1);
3539 		if (*dup == NULL) {
3540 			error = ENOBUFS;
3541 		}
3542 	}
3543 
3544 	return error;
3545 }
3546 
3547 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected,bool delay_if_needed)3548 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed)
3549 {
3550 	if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3551 		soisdisconnected(so);
3552 	}
3553 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
3554 		struct inpcb *inp = sotoinpcb(so);
3555 		if (inp != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
3556 			/*
3557 			 * Let NetworkStatistics know this PCB is going away
3558 			 * before we detach it.
3559 			 */
3560 			if (nstat_collect && (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
3561 				nstat_pcb_detach(inp);
3562 			}
3563 
3564 			if (SOCK_DOM(so) == PF_INET6) {
3565 				ROUTE_RELEASE(&inp->in6p_route);
3566 			} else {
3567 				ROUTE_RELEASE(&inp->inp_route);
3568 			}
3569 			if (delay_if_needed) {
3570 				(void) cfil_sock_is_dead(so);
3571 			} else {
3572 				inp->inp_state = INPCB_STATE_DEAD;
3573 				inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
3574 			}
3575 			/* makes sure we're not called twice from so_close */
3576 			so->so_flags |= SOF_PCBCLEARING;
3577 		}
3578 	}
3579 }
3580 
3581 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3582 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3583 {
3584 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3585 
3586 	if (!SO_IS_DIVERTED(so)) {
3587 		return EINVAL;
3588 	}
3589 
3590 	if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3591 		if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3592 			flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3593 		}
3594 	}
3595 
3596 	if (SOCK_DOM(so) == PF_INET) {
3597 		return g_tcp_protosw->pr_ctloutput(so, sopt);
3598 	} else if (SOCK_DOM(so) == PF_INET6) {
3599 		return g_tcp6_protosw->pr_ctloutput(so, sopt);
3600 	}
3601 	return 0;
3602 }
3603 
3604 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3605 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3606 {
3607 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3608 	int                                             error   = 0;
3609 	struct inpcb                    *inp    = sotoinpcb(so);
3610 	struct sockaddr_in              *sinp;
3611 	mbuf_t                                  connect_packet = NULL;
3612 	int                                             do_send = 1;
3613 
3614 	if (!SO_IS_DIVERTED(so)) {
3615 		return EINVAL;
3616 	}
3617 
3618 	if (fd_cb->group == NULL) {
3619 		error = ENETUNREACH;
3620 		goto done;
3621 	}
3622 
3623 	if (inp == NULL) {
3624 		error = EINVAL;
3625 		goto done;
3626 	} else if (inp->inp_state == INPCB_STATE_DEAD) {
3627 		if (so->so_error) {
3628 			error = so->so_error;
3629 			so->so_error = 0;
3630 		} else {
3631 			error = EINVAL;
3632 		}
3633 		goto done;
3634 	}
3635 
3636 	if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3637 		error = EALREADY;
3638 		goto done;
3639 	}
3640 
3641 	FDLOG0(LOG_INFO, fd_cb, "Connecting");
3642 
3643 	if (fd_cb->connect_packet == NULL) {
3644 		struct sockaddr_in sin = {};
3645 		struct ifnet *ifp = NULL;
3646 
3647 		if (to == NULL) {
3648 			FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3649 			error = EINVAL;
3650 			goto done;
3651 		}
3652 
3653 		if (!flow_divert_is_sockaddr_valid(to)) {
3654 			FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3655 			error = EINVAL;
3656 			goto done;
3657 		}
3658 
3659 		fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3660 		if (fd_cb->original_remote_endpoint == NULL) {
3661 			FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3662 			error = ENOMEM;
3663 			goto done;
3664 		}
3665 		fd_cb->original_vflag = inp->inp_vflag;
3666 		fd_cb->original_last_outifp = inp->inp_last_outifp;
3667 		fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3668 
3669 		sinp = (struct sockaddr_in *)(void *)to;
3670 		if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3671 			error = EAFNOSUPPORT;
3672 			goto done;
3673 		}
3674 
3675 		if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3676 			struct sockaddr_in6 sin6 = {};
3677 			sin6.sin6_family = AF_INET6;
3678 			sin6.sin6_len = sizeof(struct sockaddr_in6);
3679 			sin6.sin6_port = satosin6(to)->sin6_port;
3680 			sin6.sin6_addr = satosin6(to)->sin6_addr;
3681 			if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3682 				in6_sin6_2_sin(&sin, &sin6);
3683 				to = (struct sockaddr *)&sin;
3684 			}
3685 		}
3686 
3687 		if (to->sa_family == AF_INET6) {
3688 			struct sockaddr_in6 *to6 = satosin6(to);
3689 
3690 			inp->inp_vflag &= ~INP_IPV4;
3691 			inp->inp_vflag |= INP_IPV6;
3692 			fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3693 			fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3694 			fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3695 			error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3696 			if (error) {
3697 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3698 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3699 					error = 0;
3700 				} else {
3701 					goto done;
3702 				}
3703 			}
3704 			if (ifp != NULL) {
3705 				inp->in6p_last_outifp = ifp;
3706 				ifnet_release(ifp);
3707 			}
3708 
3709 			if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3710 			    in6_embedded_scope &&
3711 			    fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3712 				fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3713 				fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3714 			}
3715 
3716 			if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3717 			    in6_embedded_scope &&
3718 			    to6->sin6_addr.s6_addr16[1] != 0) {
3719 				to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3720 				to6->sin6_addr.s6_addr16[1] = 0;
3721 			}
3722 		} else if (to->sa_family == AF_INET) {
3723 			inp->inp_vflag |= INP_IPV4;
3724 			inp->inp_vflag &= ~INP_IPV6;
3725 			fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3726 			fd_cb->local_endpoint.sin.sin_family = AF_INET;
3727 			fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3728 			error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3729 			if (error) {
3730 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3731 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3732 					error = 0;
3733 				} else {
3734 					goto done;
3735 				}
3736 			}
3737 			if (ifp != NULL) {
3738 				inp->inp_last_outifp = ifp;
3739 				ifnet_release(ifp);
3740 			}
3741 		} else {
3742 			FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3743 		}
3744 
3745 		error = flow_divert_check_no_cellular(fd_cb) ||
3746 		    flow_divert_check_no_expensive(fd_cb) ||
3747 		    flow_divert_check_no_constrained(fd_cb);
3748 		if (error) {
3749 			goto done;
3750 		}
3751 
3752 		if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3753 		    !implicit || /* connect() was called or */
3754 		    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3755 		    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3756 			fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3757 		}
3758 
3759 		error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3760 		if (error) {
3761 			goto done;
3762 		}
3763 
3764 		if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3765 			flow_divert_set_remote_endpoint(fd_cb, to);
3766 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3767 		}
3768 
3769 		if (implicit) {
3770 			fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3771 		}
3772 
3773 		if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3774 			FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3775 			do_send = 0;
3776 		}
3777 
3778 		fd_cb->connect_packet = connect_packet;
3779 		connect_packet = NULL;
3780 	} else {
3781 		FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3782 	}
3783 
3784 	if (do_send) {
3785 		error = flow_divert_send_connect_packet(fd_cb);
3786 		if (error) {
3787 			goto done;
3788 		}
3789 
3790 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3791 	}
3792 
3793 	if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3794 		soisconnected(so);
3795 	} else {
3796 		soisconnecting(so);
3797 	}
3798 
3799 done:
3800 	return error;
3801 }
3802 
3803 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3804 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3805 {
3806 #if CONTENT_FILTER
3807 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3808 		int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3809 		if (error != 0) {
3810 			struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3811 			FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3812 			return error;
3813 		}
3814 	}
3815 #endif /* CONTENT_FILTER */
3816 
3817 	return flow_divert_connect_out_internal(so, to, p, false);
3818 }
3819 
3820 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3821 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3822     struct proc *p, uint32_t ifscope, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3823 {
3824 	struct inpcb *inp = sotoinpcb(so);
3825 	int error;
3826 
3827 	if (inp == NULL) {
3828 		return EINVAL;
3829 	}
3830 
3831 	VERIFY(dst != NULL);
3832 
3833 #if CONTENT_FILTER && NECP
3834 	struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3835 	if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3836 	    SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3837 		inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3838 	}
3839 #endif /* CONTENT_FILTER */
3840 
3841 	/* bind socket to the specified interface, if requested */
3842 	if (ifscope != IFSCOPE_NONE &&
3843 	    (error = inp_bindif(inp, ifscope, NULL)) != 0) {
3844 		return error;
3845 	}
3846 
3847 	error = flow_divert_connect_out(so, dst, p);
3848 
3849 	if (error != 0) {
3850 		return error;
3851 	}
3852 
3853 	/* if there is data, send it */
3854 	if (auio != NULL) {
3855 		user_ssize_t datalen = 0;
3856 
3857 		socket_unlock(so, 0);
3858 
3859 		VERIFY(bytes_written != NULL);
3860 
3861 		datalen = uio_resid(auio);
3862 		error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3863 		socket_lock(so, 0);
3864 
3865 		if (error == 0 || error == EWOULDBLOCK) {
3866 			*bytes_written = datalen - uio_resid(auio);
3867 		}
3868 
3869 		/*
3870 		 * sosend returns EWOULDBLOCK if it's a non-blocking
3871 		 * socket or a timeout occured (this allows to return
3872 		 * the amount of queued data through sendit()).
3873 		 *
3874 		 * However, connectx() returns EINPROGRESS in case of a
3875 		 * blocking socket. So we change the return value here.
3876 		 */
3877 		if (error == EWOULDBLOCK) {
3878 			error = EINPROGRESS;
3879 		}
3880 	}
3881 
3882 	if (error == 0 && pcid != NULL) {
3883 		*pcid = 1;      /* there is only 1 connection for a TCP */
3884 	}
3885 
3886 	return error;
3887 }
3888 
3889 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3890 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3891     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3892     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3893     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3894 {
3895 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3896 }
3897 
3898 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3899 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3900     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3901     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3902     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3903 {
3904 	return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3905 }
3906 
3907 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)3908 flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
3909 {
3910 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3911 	int                                             error   = 0;
3912 	struct inpcb *inp;
3913 #if CONTENT_FILTER
3914 	struct m_tag *cfil_tag = NULL;
3915 #endif
3916 
3917 	if (!SO_IS_DIVERTED(so)) {
3918 		return EINVAL;
3919 	}
3920 
3921 	inp = sotoinpcb(so);
3922 	if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3923 		error = ECONNRESET;
3924 		goto done;
3925 	}
3926 
3927 	if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
3928 		/* The provider considers this datagram flow to be closed, so no data can be sent */
3929 		FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
3930 		error = EHOSTUNREACH;
3931 		goto done;
3932 	}
3933 
3934 #if CONTENT_FILTER
3935 	/*
3936 	 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
3937 	 * retrieve the CFIL saved remote address from the mbuf and use it.
3938 	 */
3939 	if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
3940 		struct sockaddr *cfil_faddr = NULL;
3941 		cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
3942 		if (cfil_tag) {
3943 			to = (struct sockaddr *)(void *)cfil_faddr;
3944 		}
3945 		FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
3946 	}
3947 #endif
3948 
3949 	/* Implicit connect */
3950 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
3951 		FDLOG0(LOG_INFO, fd_cb, "implicit connect");
3952 
3953 		error = flow_divert_connect_out_internal(so, to, p, true);
3954 		if (error) {
3955 			goto done;
3956 		}
3957 	} else {
3958 		error = flow_divert_check_no_cellular(fd_cb) ||
3959 		    flow_divert_check_no_expensive(fd_cb) ||
3960 		    flow_divert_check_no_constrained(fd_cb);
3961 		if (error) {
3962 			goto done;
3963 		}
3964 	}
3965 
3966 	FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", mbuf_pkthdr_len(data));
3967 
3968 	fd_cb->bytes_written_by_app += mbuf_pkthdr_len(data);
3969 	error = flow_divert_send_app_data(fd_cb, data, to);
3970 
3971 	data = NULL;
3972 
3973 	if (error) {
3974 		goto done;
3975 	}
3976 
3977 	if (flags & PRUS_EOF) {
3978 		flow_divert_shutdown(so);
3979 	}
3980 
3981 done:
3982 	if (data) {
3983 		mbuf_freem(data);
3984 	}
3985 	if (control) {
3986 		mbuf_free(control);
3987 	}
3988 #if CONTENT_FILTER
3989 	if (cfil_tag) {
3990 		m_tag_free(cfil_tag);
3991 	}
3992 #endif
3993 
3994 	return error;
3995 }
3996 
3997 static int
flow_divert_preconnect(struct socket * so)3998 flow_divert_preconnect(struct socket *so)
3999 {
4000 	int error = 0;
4001 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4002 
4003 	if (!SO_IS_DIVERTED(so)) {
4004 		return EINVAL;
4005 	}
4006 
4007 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4008 		FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
4009 		error = flow_divert_send_connect_packet(so->so_fd_pcb);
4010 		if (error) {
4011 			return error;
4012 		}
4013 
4014 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
4015 	}
4016 
4017 	soclearfastopen(so);
4018 
4019 	return error;
4020 }
4021 
4022 static void
flow_divert_set_protosw(struct socket * so)4023 flow_divert_set_protosw(struct socket *so)
4024 {
4025 	if (SOCK_DOM(so) == PF_INET) {
4026 		so->so_proto = &g_flow_divert_in_protosw;
4027 	} else {
4028 		so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
4029 	}
4030 }
4031 
4032 static void
flow_divert_set_udp_protosw(struct socket * so)4033 flow_divert_set_udp_protosw(struct socket *so)
4034 {
4035 	if (SOCK_DOM(so) == PF_INET) {
4036 		so->so_proto = &g_flow_divert_in_udp_protosw;
4037 	} else {
4038 		so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
4039 	}
4040 }
4041 
4042 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)4043 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
4044 {
4045 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
4046 	struct inpcb *inp;
4047 	int error = 0;
4048 
4049 	inp = sotoinpcb(so);
4050 	if (inp == NULL) {
4051 		return EINVAL;
4052 	}
4053 
4054 	if (fd_cb == NULL) {
4055 		error = flow_divert_pcb_init(so);
4056 		fd_cb  = so->so_fd_pcb;
4057 		if (error != 0 || fd_cb == NULL) {
4058 			goto done;
4059 		}
4060 	}
4061 	return flow_divert_data_out(so, flags, data, to, control, p);
4062 
4063 done:
4064 	if (data) {
4065 		mbuf_freem(data);
4066 	}
4067 	if (control) {
4068 		mbuf_free(control);
4069 	}
4070 
4071 	return error;
4072 }
4073 
4074 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)4075 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
4076 {
4077 	errno_t error = 0;
4078 	struct flow_divert_pcb *fd_cb = NULL;
4079 	uint32_t agg_unit = aggregate_unit;
4080 	uint32_t policy_control_unit = ctl_unit;
4081 	bool is_aggregate = false;
4082 
4083 	if (so->so_flags & SOF_FLOW_DIVERT) {
4084 		return EALREADY;
4085 	}
4086 
4087 	fd_cb = flow_divert_pcb_create(so);
4088 	if (fd_cb == NULL) {
4089 		return ENOMEM;
4090 	}
4091 
4092 	do {
4093 		uint32_t group_unit = flow_divert_derive_kernel_control_unit(so->last_pid, &policy_control_unit, &agg_unit, &is_aggregate);
4094 		if (group_unit == 0 || (group_unit >= GROUP_COUNT_MAX && group_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
4095 			FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
4096 			error = EINVAL;
4097 			break;
4098 		}
4099 
4100 		error = flow_divert_add_to_group(fd_cb, group_unit);
4101 		if (error == 0) {
4102 			so->so_fd_pcb = fd_cb;
4103 			so->so_flags |= SOF_FLOW_DIVERT;
4104 			fd_cb->control_group_unit = group_unit;
4105 			fd_cb->policy_control_unit = ctl_unit;
4106 			fd_cb->aggregate_unit = agg_unit;
4107 			if (is_aggregate) {
4108 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4109 			} else {
4110 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4111 			}
4112 
4113 			if (SOCK_TYPE(so) == SOCK_STREAM) {
4114 				flow_divert_set_protosw(so);
4115 			} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
4116 				flow_divert_set_udp_protosw(so);
4117 			}
4118 
4119 			FDLOG0(LOG_INFO, fd_cb, "Created");
4120 		} else if (error != ENOENT) {
4121 			FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
4122 		}
4123 	} while (error == ENOENT);
4124 
4125 	if (error != 0) {
4126 		FDRELEASE(fd_cb);
4127 	}
4128 
4129 	return error;
4130 }
4131 
4132 errno_t
flow_divert_pcb_init(struct socket * so)4133 flow_divert_pcb_init(struct socket *so)
4134 {
4135 	struct inpcb *inp = sotoinpcb(so);
4136 	uint32_t aggregate_units = 0;
4137 	uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
4138 	return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
4139 }
4140 
4141 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)4142 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
4143 {
4144 	uint32_t ctl_unit = 0;
4145 	uint32_t key_unit = 0;
4146 	uint32_t aggregate_unit = 0;
4147 	int error = 0;
4148 	int hmac_error = 0;
4149 	mbuf_t token = NULL;
4150 
4151 	if (so->so_flags & SOF_FLOW_DIVERT) {
4152 		error = EALREADY;
4153 		goto done;
4154 	}
4155 
4156 	if (g_init_result) {
4157 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
4158 		error = ENOPROTOOPT;
4159 		goto done;
4160 	}
4161 
4162 	if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
4163 	    (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
4164 	    (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
4165 		error = EINVAL;
4166 		goto done;
4167 	} else {
4168 		if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
4169 			struct tcpcb *tp = sototcpcb(so);
4170 			if (tp == NULL || tp->t_state != TCPS_CLOSED) {
4171 				error = EINVAL;
4172 				goto done;
4173 			}
4174 		}
4175 	}
4176 
4177 	error = soopt_getm(sopt, &token);
4178 	if (error) {
4179 		token = NULL;
4180 		goto done;
4181 	}
4182 
4183 	error = soopt_mcopyin(sopt, token);
4184 	if (error) {
4185 		token = NULL;
4186 		goto done;
4187 	}
4188 
4189 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
4190 	if (!error) {
4191 		key_unit = ntohl(key_unit);
4192 		if (key_unit >= GROUP_COUNT_MAX) {
4193 			key_unit = 0;
4194 		}
4195 	} else if (error != ENOENT) {
4196 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
4197 		goto done;
4198 	} else {
4199 		key_unit = 0;
4200 	}
4201 
4202 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
4203 	if (error) {
4204 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
4205 		goto done;
4206 	}
4207 
4208 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4209 	if (error && error != ENOENT) {
4210 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4211 		goto done;
4212 	}
4213 
4214 	/* A valid kernel control unit is required */
4215 	ctl_unit = ntohl(ctl_unit);
4216 	aggregate_unit = ntohl(aggregate_unit);
4217 
4218 	if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4219 		hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4220 		if (hmac_error && hmac_error != ENOENT) {
4221 			FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4222 			error = hmac_error;
4223 			goto done;
4224 		}
4225 	}
4226 
4227 	error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4228 	if (error == 0) {
4229 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4230 		int log_level = LOG_NOTICE;
4231 
4232 		error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4233 		if (error == 0) {
4234 			fd_cb->log_level = (uint8_t)log_level;
4235 		}
4236 		error = 0;
4237 
4238 		fd_cb->connect_token = token;
4239 		token = NULL;
4240 
4241 		fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4242 	}
4243 
4244 	if (hmac_error == 0) {
4245 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4246 		if (fd_cb != NULL) {
4247 			fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4248 		}
4249 	}
4250 
4251 done:
4252 	if (token != NULL) {
4253 		mbuf_freem(token);
4254 	}
4255 
4256 	return error;
4257 }
4258 
4259 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4260 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4261 {
4262 	uint32_t                                        ctl_unit;
4263 	int                                                     error                                           = 0;
4264 	uint8_t                                         hmac[SHA_DIGEST_LENGTH];
4265 	struct flow_divert_pcb          *fd_cb                                          = so->so_fd_pcb;
4266 	mbuf_t                                          token                                           = NULL;
4267 	struct flow_divert_group        *control_group                          = NULL;
4268 
4269 	if (!SO_IS_DIVERTED(so)) {
4270 		error = EINVAL;
4271 		goto done;
4272 	}
4273 
4274 	if (fd_cb->group == NULL) {
4275 		error = EINVAL;
4276 		goto done;
4277 	}
4278 
4279 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4280 	if (error) {
4281 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4282 		goto done;
4283 	}
4284 
4285 	ctl_unit = htonl(fd_cb->group->ctl_unit);
4286 
4287 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4288 	if (error) {
4289 		goto done;
4290 	}
4291 
4292 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4293 	if (error) {
4294 		goto done;
4295 	}
4296 
4297 	if (fd_cb->app_data != NULL) {
4298 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4299 		if (error) {
4300 			goto done;
4301 		}
4302 	}
4303 
4304 	control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4305 	if (control_group != NULL) {
4306 		lck_rw_lock_shared(&control_group->lck);
4307 		ctl_unit = htonl(control_group->ctl_unit);
4308 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4309 		if (!error) {
4310 			error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4311 		}
4312 		lck_rw_done(&control_group->lck);
4313 		FDGRP_RELEASE(control_group);
4314 	} else {
4315 		error = ENOPROTOOPT;
4316 	}
4317 
4318 	if (error) {
4319 		goto done;
4320 	}
4321 
4322 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4323 	if (error) {
4324 		goto done;
4325 	}
4326 
4327 	if (sopt->sopt_val == USER_ADDR_NULL) {
4328 		/* If the caller passed NULL to getsockopt, just set the size of the token and return */
4329 		sopt->sopt_valsize = mbuf_pkthdr_len(token);
4330 		goto done;
4331 	}
4332 
4333 	error = soopt_mcopyout(sopt, token);
4334 	if (error) {
4335 		token = NULL;   /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4336 		goto done;
4337 	}
4338 
4339 done:
4340 	if (token != NULL) {
4341 		mbuf_freem(token);
4342 	}
4343 
4344 	return error;
4345 }
4346 
4347 void
flow_divert_group_destroy(struct flow_divert_group * group)4348 flow_divert_group_destroy(struct flow_divert_group *group)
4349 {
4350 	lck_rw_lock_exclusive(&group->lck);
4351 
4352 	FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4353 
4354 	if (group->token_key != NULL) {
4355 		memset(group->token_key, 0, group->token_key_size);
4356 		kfree_data(group->token_key, group->token_key_size);
4357 		group->token_key = NULL;
4358 		group->token_key_size = 0;
4359 	}
4360 
4361 	/* Re-set the current trie */
4362 	if (group->signing_id_trie.memory != NULL) {
4363 		kfree_data_addr(group->signing_id_trie.memory);
4364 	}
4365 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4366 	group->signing_id_trie.root = NULL_TRIE_IDX;
4367 
4368 	lck_rw_done(&group->lck);
4369 
4370 	zfree(flow_divert_group_zone, group);
4371 }
4372 
4373 static struct flow_divert_group *
flow_divert_allocate_group(u_int32_t unit,pid_t pid)4374 flow_divert_allocate_group(u_int32_t unit, pid_t pid)
4375 {
4376 	struct flow_divert_group *new_group = NULL;
4377 	new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4378 	lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4379 	RB_INIT(&new_group->pcb_tree);
4380 	new_group->ctl_unit = unit;
4381 	new_group->in_process_pid = pid;
4382 	MBUFQ_INIT(&new_group->send_queue);
4383 	new_group->signing_id_trie.root = NULL_TRIE_IDX;
4384 	new_group->ref_count = 1;
4385 	new_group->order = FLOW_DIVERT_ORDER_LAST;
4386 	return new_group;
4387 }
4388 
4389 static errno_t
flow_divert_kctl_setup(u_int32_t * unit,void ** unitinfo)4390 flow_divert_kctl_setup(u_int32_t *unit, void **unitinfo)
4391 {
4392 	if (unit == NULL || unitinfo == NULL) {
4393 		return EINVAL;
4394 	}
4395 
4396 	struct flow_divert_group *new_group = NULL;
4397 	errno_t error = 0;
4398 	lck_rw_lock_shared(&g_flow_divert_group_lck);
4399 	if (*unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
4400 		// Return next unused in-process unit
4401 		u_int32_t unit_cursor = FLOW_DIVERT_IN_PROCESS_UNIT_MIN;
4402 		struct flow_divert_group *group_next = NULL;
4403 		TAILQ_FOREACH(group_next, &g_flow_divert_in_process_group_list, chain) {
4404 			if (group_next->ctl_unit > unit_cursor) {
4405 				// Found a gap, lets fill it in
4406 				break;
4407 			}
4408 			unit_cursor = group_next->ctl_unit + 1;
4409 			if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4410 				break;
4411 			}
4412 		}
4413 		if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4414 			error = EBUSY;
4415 		} else {
4416 			*unit = unit_cursor;
4417 			new_group = flow_divert_allocate_group(*unit, proc_pid(current_proc()));
4418 			if (group_next != NULL) {
4419 				TAILQ_INSERT_BEFORE(group_next, new_group, chain);
4420 			} else {
4421 				TAILQ_INSERT_TAIL(&g_flow_divert_in_process_group_list, new_group, chain);
4422 			}
4423 			g_active_group_count++;
4424 		}
4425 	} else {
4426 		if (kauth_cred_issuser(kauth_cred_get()) == 0) {
4427 			error = EPERM;
4428 		} else {
4429 			if (g_flow_divert_groups == NULL) {
4430 				g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4431 				    GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4432 			}
4433 
4434 			// Return next unused group unit
4435 			bool found_unused_unit = false;
4436 			u_int32_t unit_cursor;
4437 			for (unit_cursor = 1; unit_cursor < GROUP_COUNT_MAX; unit_cursor++) {
4438 				struct flow_divert_group *group = g_flow_divert_groups[unit_cursor];
4439 				if (group == NULL) {
4440 					// Open slot, assign this one
4441 					*unit = unit_cursor;
4442 					new_group = flow_divert_allocate_group(*unit, 0);
4443 					g_flow_divert_groups[*unit] = new_group;
4444 					found_unused_unit = true;
4445 					g_active_group_count++;
4446 					break;
4447 				}
4448 			}
4449 			if (!found_unused_unit) {
4450 				error = EBUSY;
4451 			}
4452 		}
4453 	}
4454 	lck_rw_done(&g_flow_divert_group_lck);
4455 
4456 	*unitinfo = new_group;
4457 
4458 	return error;
4459 }
4460 
4461 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4462 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4463 {
4464 	if (unitinfo == NULL) {
4465 		return EINVAL;
4466 	}
4467 
4468 	// Just validate. The group will already have been allocated.
4469 	struct flow_divert_group *group = (struct flow_divert_group *)*unitinfo;
4470 	if (group == NULL || sac->sc_unit != group->ctl_unit) {
4471 		FDLOG(LOG_ERR, &nil_pcb, "Flow divert connect fail, unit mismatch %u != %u",
4472 		    sac->sc_unit, group ? group->ctl_unit : 0);
4473 		return EINVAL;
4474 	}
4475 
4476 	return 0;
4477 }
4478 
4479 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4480 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4481 {
4482 	struct flow_divert_group        *group  = NULL;
4483 	errno_t                                         error   = 0;
4484 
4485 	if (unitinfo == NULL) {
4486 		return 0;
4487 	}
4488 
4489 	FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4490 
4491 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4492 
4493 	if (g_active_group_count == 0) {
4494 		panic("flow divert group %u is disconnecting, but no groups are active (active count = %u)",
4495 		    unit, g_active_group_count);
4496 	}
4497 
4498 	if (unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
4499 		if (unit >= GROUP_COUNT_MAX) {
4500 			return EINVAL;
4501 		}
4502 
4503 		if (g_flow_divert_groups == NULL) {
4504 			panic("flow divert group %u is disconnecting, but groups array is NULL",
4505 			    unit);
4506 		}
4507 		group = g_flow_divert_groups[unit];
4508 
4509 		if (group != (struct flow_divert_group *)unitinfo) {
4510 			panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4511 		}
4512 
4513 		g_flow_divert_groups[unit] = NULL;
4514 	} else {
4515 		group = (struct flow_divert_group *)unitinfo;
4516 		if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
4517 			panic("flow divert group %u is disconnecting, but in-process group list is empty",
4518 			    unit);
4519 		}
4520 
4521 		TAILQ_REMOVE(&g_flow_divert_in_process_group_list, group, chain);
4522 	}
4523 
4524 	g_active_group_count--;
4525 
4526 	if (g_active_group_count == 0) {
4527 		kfree_type(struct flow_divert_group *,
4528 		    GROUP_COUNT_MAX, g_flow_divert_groups);
4529 		g_flow_divert_groups = NULL;
4530 	}
4531 
4532 	lck_rw_done(&g_flow_divert_group_lck);
4533 
4534 	if (group != NULL) {
4535 		flow_divert_close_all(group);
4536 		FDGRP_RELEASE(group);
4537 	} else {
4538 		error = EINVAL;
4539 	}
4540 
4541 	return error;
4542 }
4543 
4544 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)4545 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_t m, __unused int flags)
4546 {
4547 	errno_t error = 0;
4548 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4549 	if (group != NULL) {
4550 		error = flow_divert_input(m, group);
4551 		FDGRP_RELEASE(group);
4552 	} else {
4553 		error = ENOENT;
4554 	}
4555 	return error;
4556 }
4557 
4558 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4559 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4560 {
4561 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4562 	if (group == NULL) {
4563 		return;
4564 	}
4565 
4566 	if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4567 		struct flow_divert_pcb                  *fd_cb;
4568 		SLIST_HEAD(, flow_divert_pcb)   tmp_list;
4569 
4570 		lck_rw_lock_exclusive(&group->lck);
4571 
4572 		while (!MBUFQ_EMPTY(&group->send_queue)) {
4573 			mbuf_t next_packet;
4574 			FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4575 			next_packet = MBUFQ_FIRST(&group->send_queue);
4576 			int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4577 			if (error) {
4578 				FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_kctl_rcvd: ctl_enqueuembuf returned an error: %d", error);
4579 				OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4580 				lck_rw_done(&group->lck);
4581 				return;
4582 			}
4583 			MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4584 		}
4585 
4586 		SLIST_INIT(&tmp_list);
4587 
4588 		RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4589 			FDRETAIN(fd_cb);
4590 			SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4591 		}
4592 
4593 		lck_rw_done(&group->lck);
4594 
4595 		SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4596 			FDLOCK(fd_cb);
4597 			if (fd_cb->so != NULL) {
4598 				socket_lock(fd_cb->so, 0);
4599 				if (fd_cb->group != NULL) {
4600 					flow_divert_send_buffered_data(fd_cb, FALSE);
4601 				}
4602 				socket_unlock(fd_cb->so, 0);
4603 			}
4604 			FDUNLOCK(fd_cb);
4605 			FDRELEASE(fd_cb);
4606 		}
4607 	}
4608 
4609 	FDGRP_RELEASE(group);
4610 }
4611 
4612 static int
flow_divert_kctl_init(void)4613 flow_divert_kctl_init(void)
4614 {
4615 	struct kern_ctl_reg     ctl_reg;
4616 	int                     result;
4617 
4618 	memset(&ctl_reg, 0, sizeof(ctl_reg));
4619 
4620 	strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4621 	ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4622 
4623 	// Do not restrict to privileged processes. flow_divert_kctl_setup checks
4624 	// permissions separately.
4625 	ctl_reg.ctl_flags = CTL_FLAG_REG_EXTENDED | CTL_FLAG_REG_SETUP;
4626 	ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4627 
4628 	ctl_reg.ctl_connect = flow_divert_kctl_connect;
4629 	ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4630 	ctl_reg.ctl_send = flow_divert_kctl_send;
4631 	ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4632 	ctl_reg.ctl_setup = flow_divert_kctl_setup;
4633 
4634 	result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4635 
4636 	if (result) {
4637 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4638 		return result;
4639 	}
4640 
4641 	return 0;
4642 }
4643 
4644 void
flow_divert_init(void)4645 flow_divert_init(void)
4646 {
4647 	memset(&nil_pcb, 0, sizeof(nil_pcb));
4648 	nil_pcb.log_level = LOG_NOTICE;
4649 
4650 	g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4651 
4652 	VERIFY(g_tcp_protosw != NULL);
4653 
4654 	memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4655 	memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4656 
4657 	g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4658 	g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4659 	g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4660 	g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4661 	g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4662 	g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4663 	g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4664 	g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4665 
4666 	g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4667 	g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4668 
4669 	/*
4670 	 * Socket filters shouldn't attach/detach to/from this protosw
4671 	 * since pr_protosw is to be used instead, which points to the
4672 	 * real protocol; if they do, it is a bug and we should panic.
4673 	 */
4674 	g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4675 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4676 	g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4677 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4678 
4679 	/* UDP */
4680 	g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4681 	VERIFY(g_udp_protosw != NULL);
4682 
4683 	memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4684 	memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4685 
4686 	g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4687 	g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4688 	g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4689 	g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4690 	g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4691 	g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4692 	g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4693 	g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4694 	g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4695 
4696 	g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4697 	g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4698 
4699 	/*
4700 	 * Socket filters shouldn't attach/detach to/from this protosw
4701 	 * since pr_protosw is to be used instead, which points to the
4702 	 * real protocol; if they do, it is a bug and we should panic.
4703 	 */
4704 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4705 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4706 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4707 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4708 
4709 	g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4710 
4711 	VERIFY(g_tcp6_protosw != NULL);
4712 
4713 	memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4714 	memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4715 
4716 	g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4717 	g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4718 	g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4719 	g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4720 	g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4721 	g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4722 	g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4723 	g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4724 
4725 	g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4726 	g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4727 	/*
4728 	 * Socket filters shouldn't attach/detach to/from this protosw
4729 	 * since pr_protosw is to be used instead, which points to the
4730 	 * real protocol; if they do, it is a bug and we should panic.
4731 	 */
4732 	g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4733 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4734 	g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4735 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4736 
4737 	/* UDP6 */
4738 	g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4739 
4740 	VERIFY(g_udp6_protosw != NULL);
4741 
4742 	memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4743 	memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4744 
4745 	g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4746 	g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4747 	g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4748 	g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4749 	g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4750 	g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4751 	g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4752 	g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4753 	g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4754 
4755 	g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4756 	g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4757 	/*
4758 	 * Socket filters shouldn't attach/detach to/from this protosw
4759 	 * since pr_protosw is to be used instead, which points to the
4760 	 * real protocol; if they do, it is a bug and we should panic.
4761 	 */
4762 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4763 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4764 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4765 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4766 
4767 	TAILQ_INIT(&g_flow_divert_in_process_group_list);
4768 
4769 	g_init_result = flow_divert_kctl_init();
4770 	if (g_init_result) {
4771 		goto done;
4772 	}
4773 
4774 done:
4775 	if (g_init_result != 0) {
4776 		if (g_flow_divert_kctl_ref != NULL) {
4777 			ctl_deregister(g_flow_divert_kctl_ref);
4778 			g_flow_divert_kctl_ref = NULL;
4779 		}
4780 	}
4781 }
4782