xref: /xnu-8796.141.3/bsd/netinet/flow_divert.c (revision 1b191cb58250d0705d8a51287127505aa4bc0789)
1 /*
2  * Copyright (c) 2012-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <libkern/tree.h>
46 #include <kern/locks.h>
47 #include <kern/debug.h>
48 #include <kern/task.h>
49 #include <mach/task_info.h>
50 #include <net/if_var.h>
51 #include <net/route.h>
52 #include <net/flowhash.h>
53 #include <net/ntstat.h>
54 #include <net/content_filter.h>
55 #include <net/necp.h>
56 #include <netinet/in.h>
57 #include <netinet/in_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_fsm.h>
61 #include <netinet/flow_divert.h>
62 #include <netinet/flow_divert_proto.h>
63 #include <netinet6/in6_pcb.h>
64 #include <netinet6/ip6protosw.h>
65 #include <dev/random/randomdev.h>
66 #include <libkern/crypto/sha1.h>
67 #include <libkern/crypto/crypto_internal.h>
68 #include <os/log.h>
69 #include <corecrypto/cc.h>
70 #if CONTENT_FILTER
71 #include <net/content_filter.h>
72 #endif /* CONTENT_FILTER */
73 
74 #define FLOW_DIVERT_CONNECT_STARTED             0x00000001
75 #define FLOW_DIVERT_READ_CLOSED                 0x00000002
76 #define FLOW_DIVERT_WRITE_CLOSED                0x00000004
77 #define FLOW_DIVERT_TUNNEL_RD_CLOSED    0x00000008
78 #define FLOW_DIVERT_TUNNEL_WR_CLOSED    0x00000010
79 #define FLOW_DIVERT_HAS_HMAC            0x00000040
80 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED  0x00000080
81 #define FLOW_DIVERT_IMPLICIT_CONNECT    0x00000100
82 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR  0x00000200
83 #define FLOW_DIVERT_HAS_TOKEN           0x00000400
84 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR 0x00000800
85 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT   0x00001000
86 
87 #define FDLOG(level, pcb, format, ...) \
88 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
89 
90 #define FDLOG0(level, pcb, msg) \
91 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
92 
93 #define FDRETAIN(pcb)                   if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
94 #define FDRELEASE(pcb)                                                                                                          \
95 	do {                                                                                                                                    \
96 	        if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) {       \
97 	                flow_divert_pcb_destroy(pcb);                                                                   \
98 	        }                                                                                                                                       \
99 	} while (0)
100 
101 #define FDGRP_RETAIN(grp)  if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
102 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
103 
104 #define FDLOCK(pcb)                                             lck_mtx_lock(&(pcb)->mtx)
105 #define FDUNLOCK(pcb)                                   lck_mtx_unlock(&(pcb)->mtx)
106 
107 #define FD_CTL_SENDBUFF_SIZE                    (128 * 1024)
108 
109 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED   0
110 
111 #define GROUP_COUNT_MAX                                 31
112 #define FLOW_DIVERT_MAX_NAME_SIZE               4096
113 #define FLOW_DIVERT_MAX_KEY_SIZE                1024
114 #define FLOW_DIVERT_MAX_TRIE_MEMORY             (1024 * 1024)
115 
116 struct flow_divert_trie_node {
117 	uint16_t start;
118 	uint16_t length;
119 	uint16_t child_map;
120 };
121 
122 #define CHILD_MAP_SIZE                  256
123 #define NULL_TRIE_IDX                   0xffff
124 #define TRIE_NODE(t, i)                 ((t)->nodes[(i)])
125 #define TRIE_CHILD(t, i, b)             (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
126 #define TRIE_BYTE(t, i)                 ((t)->bytes[(i)])
127 
128 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
129 
130 static struct flow_divert_pcb           nil_pcb;
131 
132 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
133 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
134 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
135     &flow_divert_mtx_attr);
136 
137 static struct flow_divert_group         **g_flow_divert_groups  = NULL;
138 static uint32_t                         g_active_group_count    = 0;
139 
140 static  errno_t                         g_init_result           = 0;
141 
142 static  kern_ctl_ref                    g_flow_divert_kctl_ref  = NULL;
143 
144 static struct protosw                   g_flow_divert_in_protosw;
145 static struct pr_usrreqs                g_flow_divert_in_usrreqs;
146 static struct protosw                   g_flow_divert_in_udp_protosw;
147 static struct pr_usrreqs                g_flow_divert_in_udp_usrreqs;
148 static struct ip6protosw                g_flow_divert_in6_protosw;
149 static struct pr_usrreqs                g_flow_divert_in6_usrreqs;
150 static struct ip6protosw                g_flow_divert_in6_udp_protosw;
151 static struct pr_usrreqs                g_flow_divert_in6_udp_usrreqs;
152 
153 static struct protosw                   *g_tcp_protosw          = NULL;
154 static struct ip6protosw                *g_tcp6_protosw         = NULL;
155 static struct protosw                   *g_udp_protosw          = NULL;
156 static struct ip6protosw                *g_udp6_protosw         = NULL;
157 
158 static KALLOC_TYPE_DEFINE(flow_divert_group_zone, struct flow_divert_group,
159     NET_KT_DEFAULT);
160 static KALLOC_TYPE_DEFINE(flow_divert_pcb_zone, struct flow_divert_pcb,
161     NET_KT_DEFAULT);
162 
163 static errno_t
164 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
165 
166 static boolean_t
167 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
168 
169 static int
170 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr);
171 
172 struct sockaddr *
173 flow_divert_get_buffered_target_address(mbuf_t buffer);
174 
175 static void
176 flow_divert_disconnect_socket(struct socket *so, bool is_connected);
177 
178 static void flow_divert_group_destroy(struct flow_divert_group *group);
179 
180 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)181 flow_divert_syslog_type_to_oslog_type(int syslog_type)
182 {
183 	switch (syslog_type) {
184 	case LOG_ERR: return OS_LOG_TYPE_ERROR;
185 	case LOG_INFO: return OS_LOG_TYPE_INFO;
186 	case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
187 	default: return OS_LOG_TYPE_DEFAULT;
188 	}
189 }
190 
191 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)192 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
193 {
194 	return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
195 }
196 
197 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
198 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
199 
200 static const char *
flow_divert_packet_type2str(uint8_t packet_type)201 flow_divert_packet_type2str(uint8_t packet_type)
202 {
203 	switch (packet_type) {
204 	case FLOW_DIVERT_PKT_CONNECT:
205 		return "connect";
206 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
207 		return "connect result";
208 	case FLOW_DIVERT_PKT_DATA:
209 		return "data";
210 	case FLOW_DIVERT_PKT_CLOSE:
211 		return "close";
212 	case FLOW_DIVERT_PKT_READ_NOTIFY:
213 		return "read notification";
214 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
215 		return "properties update";
216 	case FLOW_DIVERT_PKT_APP_MAP_CREATE:
217 		return "app map create";
218 	default:
219 		return "unknown";
220 	}
221 }
222 
223 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)224 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
225 {
226 	struct flow_divert_pcb  key_item;
227 	struct flow_divert_pcb  *fd_cb          = NULL;
228 
229 	key_item.hash = hash;
230 
231 	lck_rw_lock_shared(&group->lck);
232 	fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
233 	FDRETAIN(fd_cb);
234 	lck_rw_done(&group->lck);
235 
236 	return fd_cb;
237 }
238 
239 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)240 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
241 {
242 	struct flow_divert_group *group = NULL;
243 	lck_rw_lock_shared(&g_flow_divert_group_lck);
244 	if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
245 		if (fd_cb != NULL) {
246 			FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
247 		}
248 	} else if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) {
249 		FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
250 	} else {
251 		group = g_flow_divert_groups[ctl_unit];
252 		if (group == NULL) {
253 			if (fd_cb != NULL) {
254 				FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
255 			}
256 		} else {
257 			FDGRP_RETAIN(group);
258 		}
259 	}
260 	lck_rw_done(&g_flow_divert_group_lck);
261 	return group;
262 }
263 
264 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)265 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
266 {
267 	int error = 0;
268 	lck_rw_lock_exclusive(&group->lck);
269 	if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
270 		if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
271 			fd_cb->group = group;
272 			fd_cb->control_group_unit = group->ctl_unit;
273 			FDRETAIN(fd_cb); /* The group now has a reference */
274 		} else {
275 			FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
276 			error = EEXIST;
277 		}
278 	} else {
279 		FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
280 		error = ENOENT;
281 	}
282 	lck_rw_done(&group->lck);
283 	return error;
284 }
285 
286 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)287 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
288 {
289 	errno_t error = 0;
290 	struct flow_divert_group *group = NULL;
291 	static uint32_t g_nextkey = 1;
292 	static uint32_t g_hash_seed = 0;
293 	int try_count = 0;
294 
295 	group = flow_divert_group_lookup(ctl_unit, fd_cb);
296 	if (group == NULL) {
297 		return ENOENT;
298 	}
299 
300 	do {
301 		uint32_t key[2];
302 		uint32_t idx;
303 
304 		key[0] = g_nextkey++;
305 		key[1] = RandomULong();
306 
307 		if (g_hash_seed == 0) {
308 			g_hash_seed = RandomULong();
309 		}
310 
311 		error = 0;
312 		fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
313 
314 		for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
315 			if (idx == ctl_unit) {
316 				continue;
317 			}
318 			struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
319 			if (curr_group != NULL) {
320 				lck_rw_lock_shared(&curr_group->lck);
321 				if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
322 					error = EEXIST;
323 				}
324 				lck_rw_done(&curr_group->lck);
325 				FDGRP_RELEASE(curr_group);
326 			}
327 		}
328 
329 		if (error == 0) {
330 			error = flow_divert_pcb_insert(fd_cb, group);
331 		}
332 	} while (error == EEXIST && try_count++ < 3);
333 
334 	if (error == EEXIST) {
335 		FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
336 		fd_cb->hash = 0;
337 	}
338 
339 	FDGRP_RELEASE(group);
340 	return error;
341 }
342 
343 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)344 flow_divert_pcb_create(socket_t so)
345 {
346 	struct flow_divert_pcb  *new_pcb = NULL;
347 
348 	new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
349 	lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
350 	new_pcb->so = so;
351 	new_pcb->log_level = nil_pcb.log_level;
352 
353 	FDRETAIN(new_pcb);      /* Represents the socket's reference */
354 
355 	return new_pcb;
356 }
357 
358 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)359 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
360 {
361 	FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %u, tunnel tx %u, tunnel rx %u",
362 	    fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
363 
364 	if (fd_cb->connect_token != NULL) {
365 		mbuf_freem(fd_cb->connect_token);
366 	}
367 	if (fd_cb->connect_packet != NULL) {
368 		mbuf_freem(fd_cb->connect_packet);
369 	}
370 	if (fd_cb->app_data != NULL) {
371 		kfree_data(fd_cb->app_data, fd_cb->app_data_length);
372 	}
373 	if (fd_cb->original_remote_endpoint != NULL) {
374 		free_sockaddr(fd_cb->original_remote_endpoint);
375 	}
376 	zfree(flow_divert_pcb_zone, fd_cb);
377 }
378 
379 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)380 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
381 {
382 	if (fd_cb->group != NULL) {
383 		struct flow_divert_group *group = fd_cb->group;
384 		lck_rw_lock_exclusive(&group->lck);
385 		FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
386 		RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
387 		fd_cb->group = NULL;
388 		FDRELEASE(fd_cb);                               /* Release the group's reference */
389 		lck_rw_done(&group->lck);
390 	}
391 }
392 
393 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_t * packet)394 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_t *packet)
395 {
396 	struct flow_divert_packet_header        hdr;
397 	int                                     error           = 0;
398 
399 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
400 	if (error) {
401 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
402 		return error;
403 	}
404 
405 	hdr.packet_type = packet_type;
406 	hdr.conn_id = htonl(fd_cb->hash);
407 
408 	/* Lay down the header */
409 	error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
410 	if (error) {
411 		FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
412 		mbuf_freem(*packet);
413 		*packet = NULL;
414 		return error;
415 	}
416 
417 	return 0;
418 }
419 
420 static int
flow_divert_packet_append_tlv(mbuf_t packet,uint8_t type,uint32_t length,const void * value)421 flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, uint32_t length, const void *value)
422 {
423 	uint32_t        net_length      = htonl(length);
424 	int                     error           = 0;
425 
426 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
427 	if (error) {
428 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
429 		return error;
430 	}
431 
432 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
433 	if (error) {
434 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
435 		return error;
436 	}
437 
438 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
439 	if (error) {
440 		FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
441 		return error;
442 	}
443 
444 	return error;
445 }
446 
447 static int
flow_divert_packet_find_tlv(mbuf_t packet,int offset,uint8_t type,int * err,int next)448 flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, int next)
449 {
450 	size_t          cursor                  = offset;
451 	int                     error                   = 0;
452 	uint32_t        curr_length;
453 	uint8_t         curr_type;
454 
455 	*err = 0;
456 
457 	do {
458 		if (!next) {
459 			error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
460 			if (error) {
461 				*err = ENOENT;
462 				return -1;
463 			}
464 		} else {
465 			next = 0;
466 			curr_type = FLOW_DIVERT_TLV_NIL;
467 		}
468 
469 		if (curr_type != type) {
470 			cursor += sizeof(curr_type);
471 			error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
472 			if (error) {
473 				*err = error;
474 				return -1;
475 			}
476 
477 			cursor += (sizeof(curr_length) + ntohl(curr_length));
478 		}
479 	} while (curr_type != type);
480 
481 	return (int)cursor;
482 }
483 
484 static int
flow_divert_packet_get_tlv(mbuf_t packet,int offset,uint8_t type,size_t buff_len,void * buff,uint32_t * val_size)485 flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, uint32_t *val_size)
486 {
487 	int                     error           = 0;
488 	uint32_t        length;
489 	int                     tlv_offset;
490 
491 	tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
492 	if (tlv_offset < 0) {
493 		return error;
494 	}
495 
496 	error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
497 	if (error) {
498 		return error;
499 	}
500 
501 	length = ntohl(length);
502 
503 	uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
504 
505 	if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
506 		FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
507 		return EINVAL;
508 	}
509 
510 	if (val_size != NULL) {
511 		*val_size = length;
512 	}
513 
514 	if (buff != NULL && buff_len > 0) {
515 		memset(buff, 0, buff_len);
516 		size_t to_copy = (length < buff_len) ? length : buff_len;
517 		error = mbuf_copydata(packet, data_offset, to_copy, buff);
518 		if (error) {
519 			return error;
520 		}
521 	}
522 
523 	return 0;
524 }
525 
526 static int
flow_divert_packet_compute_hmac(mbuf_t packet,struct flow_divert_group * group,uint8_t * hmac)527 flow_divert_packet_compute_hmac(mbuf_t packet, struct flow_divert_group *group, uint8_t *hmac)
528 {
529 	mbuf_t  curr_mbuf       = packet;
530 
531 	if (g_crypto_funcs == NULL || group->token_key == NULL) {
532 		return ENOPROTOOPT;
533 	}
534 
535 	cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
536 	g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
537 
538 	while (curr_mbuf != NULL) {
539 		g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf));
540 		curr_mbuf = mbuf_next(curr_mbuf);
541 	}
542 
543 	g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
544 
545 	return 0;
546 }
547 
548 static int
flow_divert_packet_verify_hmac(mbuf_t packet,uint32_t ctl_unit)549 flow_divert_packet_verify_hmac(mbuf_t packet, uint32_t ctl_unit)
550 {
551 	int error = 0;
552 	struct flow_divert_group *group = NULL;
553 	int hmac_offset;
554 	uint8_t packet_hmac[SHA_DIGEST_LENGTH];
555 	uint8_t computed_hmac[SHA_DIGEST_LENGTH];
556 	mbuf_t tail;
557 
558 	group = flow_divert_group_lookup(ctl_unit, NULL);
559 	if (group == NULL) {
560 		FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
561 		return ENOPROTOOPT;
562 	}
563 
564 	lck_rw_lock_shared(&group->lck);
565 
566 	if (group->token_key == NULL) {
567 		error = ENOPROTOOPT;
568 		goto done;
569 	}
570 
571 	hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
572 	if (hmac_offset < 0) {
573 		goto done;
574 	}
575 
576 	error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
577 	if (error) {
578 		goto done;
579 	}
580 
581 	/* Chop off the HMAC TLV */
582 	error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
583 	if (error) {
584 		goto done;
585 	}
586 
587 	mbuf_free(tail);
588 
589 	error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
590 	if (error) {
591 		goto done;
592 	}
593 
594 	if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
595 		FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
596 		error = EINVAL;
597 		goto done;
598 	}
599 
600 done:
601 	if (group != NULL) {
602 		lck_rw_done(&group->lck);
603 		FDGRP_RELEASE(group);
604 	}
605 	return error;
606 }
607 
608 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)609 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
610 {
611 	struct inpcb *inp = NULL;
612 	struct ifnet *ifp = NULL;
613 	Boolean cell = FALSE;
614 	Boolean wifi = FALSE;
615 	Boolean wired = FALSE;
616 
617 	inp = sotoinpcb(fd_cb->so);
618 	if (inp == NULL) {
619 		return;
620 	}
621 
622 	if (inp->inp_vflag & INP_IPV4) {
623 		ifp = inp->inp_last_outifp;
624 	} else if (inp->inp_vflag & INP_IPV6) {
625 		ifp = inp->in6p_last_outifp;
626 	}
627 	if (ifp != NULL) {
628 		cell = IFNET_IS_CELLULAR(ifp);
629 		wifi = (!cell && IFNET_IS_WIFI(ifp));
630 		wired = (!wifi && IFNET_IS_WIRED(ifp));
631 	}
632 
633 	if (send) {
634 		INP_ADD_STAT(inp, cell, wifi, wired, txpackets, 1);
635 		INP_ADD_STAT(inp, cell, wifi, wired, txbytes, data_len);
636 	} else {
637 		INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1);
638 		INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, data_len);
639 	}
640 	inp_set_activity_bitmap(inp);
641 }
642 
643 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)644 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
645 {
646 	struct inpcb *inp = sotoinpcb(fd_cb->so);
647 	if (INP_NO_CELLULAR(inp)) {
648 		struct ifnet *ifp = NULL;
649 		if (inp->inp_vflag & INP_IPV4) {
650 			ifp = inp->inp_last_outifp;
651 		} else if (inp->inp_vflag & INP_IPV6) {
652 			ifp = inp->in6p_last_outifp;
653 		}
654 		if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
655 			FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
656 			return EHOSTUNREACH;
657 		}
658 	}
659 	return 0;
660 }
661 
662 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)663 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
664 {
665 	struct inpcb *inp = sotoinpcb(fd_cb->so);
666 	if (INP_NO_EXPENSIVE(inp)) {
667 		struct ifnet *ifp = NULL;
668 		if (inp->inp_vflag & INP_IPV4) {
669 			ifp = inp->inp_last_outifp;
670 		} else if (inp->inp_vflag & INP_IPV6) {
671 			ifp = inp->in6p_last_outifp;
672 		}
673 		if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
674 			FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
675 			return EHOSTUNREACH;
676 		}
677 	}
678 	return 0;
679 }
680 
681 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)682 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
683 {
684 	struct inpcb *inp = sotoinpcb(fd_cb->so);
685 	if (INP_NO_CONSTRAINED(inp)) {
686 		struct ifnet *ifp = NULL;
687 		if (inp->inp_vflag & INP_IPV4) {
688 			ifp = inp->inp_last_outifp;
689 		} else if (inp->inp_vflag & INP_IPV6) {
690 			ifp = inp->in6p_last_outifp;
691 		}
692 		if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
693 			FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
694 			return EHOSTUNREACH;
695 		}
696 	}
697 	return 0;
698 }
699 
700 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,bool tunnel,bool flush_snd)701 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, bool tunnel, bool flush_snd)
702 {
703 	if (how != SHUT_RD) {
704 		fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
705 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
706 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
707 			if (flush_snd) {
708 				/* If the tunnel is not accepting writes any more, then flush the send buffer */
709 				sbflush(&fd_cb->so->so_snd);
710 			}
711 		}
712 	}
713 	if (how != SHUT_WR) {
714 		fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
715 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
716 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
717 		}
718 	}
719 }
720 
721 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)722 trie_node_alloc(struct flow_divert_trie *trie)
723 {
724 	if (trie->nodes_free_next < trie->nodes_count) {
725 		uint16_t node_idx = trie->nodes_free_next++;
726 		TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
727 		return node_idx;
728 	} else {
729 		return NULL_TRIE_IDX;
730 	}
731 }
732 
733 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)734 trie_child_map_alloc(struct flow_divert_trie *trie)
735 {
736 	if (trie->child_maps_free_next < trie->child_maps_count) {
737 		return trie->child_maps_free_next++;
738 	} else {
739 		return NULL_TRIE_IDX;
740 	}
741 }
742 
743 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)744 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
745 {
746 	uint16_t start = trie->bytes_free_next;
747 	if (start + bytes_size <= trie->bytes_count) {
748 		if (start != bytes_idx) {
749 			memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
750 		}
751 		trie->bytes_free_next += bytes_size;
752 		return start;
753 	} else {
754 		return NULL_TRIE_IDX;
755 	}
756 }
757 
758 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)759 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
760 {
761 	uint16_t current = trie->root;
762 	uint16_t child = trie->root;
763 	uint16_t string_end = string_start + (uint16_t)string_len;
764 	uint16_t string_idx = string_start;
765 	uint16_t string_remainder = (uint16_t)string_len;
766 
767 	while (child != NULL_TRIE_IDX) {
768 		uint16_t parent = current;
769 		uint16_t node_idx;
770 		uint16_t current_end;
771 
772 		current = child;
773 		child = NULL_TRIE_IDX;
774 
775 		current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
776 
777 		for (node_idx = TRIE_NODE(trie, current).start;
778 		    node_idx < current_end &&
779 		    string_idx < string_end &&
780 		    TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
781 		    node_idx++, string_idx++) {
782 			;
783 		}
784 
785 		string_remainder = string_end - string_idx;
786 
787 		if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
788 			/*
789 			 * We did not reach the end of the current node's string.
790 			 * We need to split the current node into two:
791 			 *   1. A new node that contains the prefix of the node that matches
792 			 *      the prefix of the string being inserted.
793 			 *   2. The current node modified to point to the remainder
794 			 *      of the current node's string.
795 			 */
796 			uint16_t prefix = trie_node_alloc(trie);
797 			if (prefix == NULL_TRIE_IDX) {
798 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
799 				return NULL_TRIE_IDX;
800 			}
801 
802 			/*
803 			 * Prefix points to the portion of the current nodes's string that has matched
804 			 * the input string thus far.
805 			 */
806 			TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
807 			TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
808 
809 			/*
810 			 * Prefix has the current node as the child corresponding to the first byte
811 			 * after the split.
812 			 */
813 			TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
814 			if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
815 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
816 				return NULL_TRIE_IDX;
817 			}
818 			TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
819 
820 			/* Parent has the prefix as the child correspoding to the first byte in the prefix */
821 			TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
822 
823 			/* Current node is adjusted to point to the remainder */
824 			TRIE_NODE(trie, current).start = node_idx;
825 			TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
826 
827 			/* We want to insert the new leaf (if any) as a child of the prefix */
828 			current = prefix;
829 		}
830 
831 		if (string_remainder > 0) {
832 			/*
833 			 * We still have bytes in the string that have not been matched yet.
834 			 * If the current node has children, iterate to the child corresponding
835 			 * to the next byte in the string.
836 			 */
837 			if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
838 				child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
839 			}
840 		}
841 	} /* while (child != NULL_TRIE_IDX) */
842 
843 	if (string_remainder > 0) {
844 		/* Add a new leaf containing the remainder of the string */
845 		uint16_t leaf = trie_node_alloc(trie);
846 		if (leaf == NULL_TRIE_IDX) {
847 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
848 			return NULL_TRIE_IDX;
849 		}
850 
851 		TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
852 		if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
853 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
854 			return NULL_TRIE_IDX;
855 		}
856 		TRIE_NODE(trie, leaf).length = string_remainder;
857 
858 		/* Set the new leaf as the child of the current node */
859 		if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
860 			TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
861 			if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
862 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
863 				return NULL_TRIE_IDX;
864 			}
865 		}
866 		TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
867 		current = leaf;
868 	} /* else duplicate or this string is a prefix of one of the existing strings */
869 
870 	return current;
871 }
872 
873 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
874 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes)875 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes)
876 {
877 	uint16_t current = trie->root;
878 	uint16_t string_idx = 0;
879 
880 	while (current != NULL_TRIE_IDX) {
881 		uint16_t next = NULL_TRIE_IDX;
882 		uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
883 		uint16_t node_idx;
884 
885 		for (node_idx = TRIE_NODE(trie, current).start;
886 		    node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
887 		    node_idx++, string_idx++) {
888 			;
889 		}
890 
891 		if (node_idx == node_end) {
892 			if (string_bytes[string_idx] == '\0') {
893 				return current; /* Got an exact match */
894 			} else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
895 			    0 == strncmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
896 				return current; /* Got an apple webclip id prefix match */
897 			} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
898 				next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
899 			}
900 		}
901 		current = next;
902 	}
903 
904 	return NULL_TRIE_IDX;
905 }
906 
907 struct uuid_search_info {
908 	uuid_t target_uuid;
909 	char *found_signing_id;
910 	boolean_t found_multiple_signing_ids;
911 	proc_t found_proc;
912 };
913 
914 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)915 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
916 {
917 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
918 	int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
919 
920 	if (info->found_signing_id != NULL) {
921 		if (!info->found_multiple_signing_ids) {
922 			/* All processes that were found had the same signing identifier, so just claim this first one and be done. */
923 			info->found_proc = p;
924 			result = PROC_CLAIMED_DONE;
925 		} else {
926 			uuid_string_t uuid_str;
927 			uuid_unparse(info->target_uuid, uuid_str);
928 			FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
929 		}
930 		kfree_data(info->found_signing_id, strlen(info->found_signing_id) + 1);
931 		info->found_signing_id = NULL;
932 	}
933 
934 	if (result == PROC_RETURNED_DONE) {
935 		uuid_string_t uuid_str;
936 		uuid_unparse(info->target_uuid, uuid_str);
937 		FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
938 	}
939 
940 	return result;
941 }
942 
943 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)944 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
945 {
946 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
947 	int include = 0;
948 
949 	if (info->found_multiple_signing_ids) {
950 		return include;
951 	}
952 
953 	include = (uuid_compare(proc_executableuuid_addr(p), info->target_uuid) == 0);
954 	if (include) {
955 		const char *signing_id = cs_identity_get(p);
956 		if (signing_id != NULL) {
957 			FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
958 			size_t signing_id_size = strlen(signing_id) + 1;
959 			if (info->found_signing_id == NULL) {
960 				info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
961 				memcpy(info->found_signing_id, signing_id, signing_id_size);
962 			} else if (memcmp(signing_id, info->found_signing_id, signing_id_size)) {
963 				info->found_multiple_signing_ids = TRUE;
964 			}
965 		} else {
966 			info->found_multiple_signing_ids = TRUE;
967 		}
968 		include = !info->found_multiple_signing_ids;
969 	}
970 
971 	return include;
972 }
973 
974 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)975 flow_divert_find_proc_by_uuid(uuid_t uuid)
976 {
977 	struct uuid_search_info info;
978 
979 	if (LOG_INFO <= nil_pcb.log_level) {
980 		uuid_string_t uuid_str;
981 		uuid_unparse(uuid, uuid_str);
982 		FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
983 	}
984 
985 	memset(&info, 0, sizeof(info));
986 	info.found_proc = PROC_NULL;
987 	uuid_copy(info.target_uuid, uuid);
988 
989 	proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
990 
991 	return info.found_proc;
992 }
993 
994 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id,mbuf_t connect_packet,bool is_effective)995 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id, mbuf_t connect_packet, bool is_effective)
996 {
997 	int error = 0;
998 	uint8_t *cdhash = NULL;
999 	audit_token_t audit_token = {};
1000 	const char *proc_cs_id = signing_id;
1001 
1002 	proc_lock(proc);
1003 
1004 	if (proc_cs_id == NULL) {
1005 		if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1006 			proc_cs_id = cs_identity_get(proc);
1007 		} else {
1008 			FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1009 		}
1010 	}
1011 
1012 	if (is_effective) {
1013 		lck_rw_lock_shared(&fd_cb->group->lck);
1014 		if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1015 			if (proc_cs_id != NULL) {
1016 				uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)proc_cs_id);
1017 				if (result == NULL_TRIE_IDX) {
1018 					FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1019 					error = EPERM;
1020 				} else {
1021 					FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1022 				}
1023 			} else {
1024 				error = EPERM;
1025 			}
1026 		}
1027 		lck_rw_done(&fd_cb->group->lck);
1028 	}
1029 
1030 	if (error != 0) {
1031 		goto done;
1032 	}
1033 
1034 	/*
1035 	 * If signing_id is not NULL then it came from the flow divert token and will be added
1036 	 * as part of the token, so there is no need to add it here.
1037 	 */
1038 	if (signing_id == NULL && proc_cs_id != NULL) {
1039 		error = flow_divert_packet_append_tlv(connect_packet,
1040 		    (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1041 		    (uint32_t)strlen(proc_cs_id),
1042 		    proc_cs_id);
1043 		if (error != 0) {
1044 			FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1045 			goto done;
1046 		}
1047 	}
1048 
1049 	cdhash = cs_get_cdhash(proc);
1050 	if (cdhash != NULL) {
1051 		error = flow_divert_packet_append_tlv(connect_packet,
1052 		    (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1053 		    SHA1_RESULTLEN,
1054 		    cdhash);
1055 		if (error) {
1056 			FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1057 			goto done;
1058 		}
1059 	} else {
1060 		FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1061 	}
1062 
1063 	task_t task = proc_task(proc);
1064 	if (task != TASK_NULL) {
1065 		mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1066 		kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1067 		if (rc == KERN_SUCCESS) {
1068 			int append_error = flow_divert_packet_append_tlv(connect_packet,
1069 			    (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1070 			    sizeof(audit_token_t),
1071 			    &audit_token);
1072 			if (append_error) {
1073 				FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1074 			}
1075 		}
1076 	}
1077 
1078 done:
1079 	proc_unlock(proc);
1080 
1081 	return error;
1082 }
1083 
1084 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id,mbuf_t connect_packet)1085 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id, mbuf_t connect_packet)
1086 {
1087 	int error = 0;
1088 	proc_t effective_proc = PROC_NULL;
1089 	proc_t responsible_proc = PROC_NULL;
1090 	proc_t real_proc = proc_find(so->last_pid);
1091 	bool release_real_proc = true;
1092 
1093 	proc_t src_proc = PROC_NULL;
1094 	proc_t real_src_proc = PROC_NULL;
1095 
1096 	if (real_proc == PROC_NULL) {
1097 		FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1098 		release_real_proc = false;
1099 		real_proc = proc;
1100 		if (real_proc == PROC_NULL) {
1101 			real_proc = current_proc();
1102 		}
1103 	}
1104 
1105 	if (so->so_flags & SOF_DELEGATED) {
1106 		if (proc_getpid(real_proc) != so->e_pid) {
1107 			effective_proc = proc_find(so->e_pid);
1108 		} else if (uuid_compare(proc_executableuuid_addr(real_proc), so->e_uuid)) {
1109 			effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1110 		}
1111 	}
1112 
1113 #if defined(XNU_TARGET_OS_OSX)
1114 	lck_rw_lock_shared(&fd_cb->group->lck);
1115 	if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1116 		if (so->so_rpid > 0) {
1117 			responsible_proc = proc_find(so->so_rpid);
1118 		}
1119 	}
1120 	lck_rw_done(&fd_cb->group->lck);
1121 #endif
1122 
1123 	real_src_proc = real_proc;
1124 
1125 	if (responsible_proc != PROC_NULL) {
1126 		src_proc = responsible_proc;
1127 		if (effective_proc != NULL) {
1128 			real_src_proc = effective_proc;
1129 		}
1130 	} else if (effective_proc != PROC_NULL) {
1131 		src_proc = effective_proc;
1132 	} else {
1133 		src_proc = real_proc;
1134 	}
1135 
1136 	error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1137 	if (error != 0) {
1138 		goto done;
1139 	}
1140 
1141 	if (real_src_proc != NULL && real_src_proc != src_proc) {
1142 		error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1143 		if (error != 0) {
1144 			goto done;
1145 		}
1146 	}
1147 
1148 done:
1149 	if (responsible_proc != PROC_NULL) {
1150 		proc_rele(responsible_proc);
1151 	}
1152 
1153 	if (effective_proc != PROC_NULL) {
1154 		proc_rele(effective_proc);
1155 	}
1156 
1157 	if (real_proc != PROC_NULL && release_real_proc) {
1158 		proc_rele(real_proc);
1159 	}
1160 
1161 	return error;
1162 }
1163 
1164 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_t packet)1165 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet)
1166 {
1167 	int             error;
1168 
1169 	if (fd_cb->group == NULL) {
1170 		FDLOG0(LOG_INFO, fd_cb, "no provider, cannot send packet");
1171 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, false);
1172 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
1173 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1174 			error = ECONNABORTED;
1175 		} else {
1176 			error = EHOSTUNREACH;
1177 		}
1178 		fd_cb->so->so_error = (uint16_t)error;
1179 		return error;
1180 	}
1181 
1182 	lck_rw_lock_shared(&fd_cb->group->lck);
1183 
1184 	if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1185 		error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1186 		if (error) {
1187 			FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_send_packet: ctl_enqueuembuf returned an error: %d", error);
1188 		}
1189 	} else {
1190 		error = ENOBUFS;
1191 	}
1192 
1193 	if (error == ENOBUFS) {
1194 		if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1195 			lck_rw_lock_exclusive(&fd_cb->group->lck);
1196 		}
1197 		MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1198 		error = 0;
1199 		OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1200 	}
1201 
1202 	lck_rw_done(&fd_cb->group->lck);
1203 
1204 	return error;
1205 }
1206 
1207 static void
flow_divert_append_domain_name(char * domain_name,void * ctx)1208 flow_divert_append_domain_name(char *domain_name, void *ctx)
1209 {
1210 	mbuf_t packet = (mbuf_t)ctx;
1211 	size_t domain_name_length = 0;
1212 
1213 	if (packet == NULL || domain_name == NULL) {
1214 		return;
1215 	}
1216 
1217 	domain_name_length = strlen(domain_name);
1218 	if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1219 		int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, domain_name);
1220 		if (error) {
1221 			FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1222 		}
1223 	}
1224 }
1225 
1226 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_t * out_connect_packet)1227 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_t *out_connect_packet)
1228 {
1229 	int                     error                   = 0;
1230 	int                     flow_type               = 0;
1231 	char                    *signing_id = NULL;
1232 	uint32_t                sid_size = 0;
1233 	mbuf_t                  connect_packet = NULL;
1234 	cfil_sock_id_t          cfil_sock_id            = CFIL_SOCK_ID_NONE;
1235 	const void              *cfil_id                = NULL;
1236 	size_t                  cfil_id_size            = 0;
1237 	struct inpcb            *inp = sotoinpcb(so);
1238 	struct ifnet *ifp = NULL;
1239 	uint32_t flags = 0;
1240 
1241 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1242 	if (error) {
1243 		goto done;
1244 	}
1245 
1246 	if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1247 		int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1248 		if (find_error == 0 && sid_size > 0) {
1249 			signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1250 			if (signing_id != NULL) {
1251 				flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1252 				FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1253 			}
1254 		}
1255 	}
1256 
1257 	error = flow_divert_add_all_proc_info(fd_cb, so, p, signing_id, connect_packet);
1258 
1259 	if (signing_id != NULL) {
1260 		kfree_data(signing_id, sid_size + 1);
1261 	}
1262 
1263 	if (error) {
1264 		FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1265 		goto done;
1266 	}
1267 
1268 	error = flow_divert_packet_append_tlv(connect_packet,
1269 	    FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1270 	    sizeof(fd_cb->so->so_traffic_class),
1271 	    &fd_cb->so->so_traffic_class);
1272 	if (error) {
1273 		goto done;
1274 	}
1275 
1276 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1277 		flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1278 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1279 		flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1280 	} else {
1281 		error = EINVAL;
1282 		goto done;
1283 	}
1284 	error = flow_divert_packet_append_tlv(connect_packet,
1285 	    FLOW_DIVERT_TLV_FLOW_TYPE,
1286 	    sizeof(flow_type),
1287 	    &flow_type);
1288 
1289 	if (error) {
1290 		goto done;
1291 	}
1292 
1293 	if (fd_cb->connect_token != NULL) {
1294 		unsigned int token_len = m_length(fd_cb->connect_token);
1295 		mbuf_concatenate(connect_packet, fd_cb->connect_token);
1296 		mbuf_pkthdr_adjustlen(connect_packet, token_len);
1297 		fd_cb->connect_token = NULL;
1298 	} else {
1299 		error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1300 		if (error) {
1301 			goto done;
1302 		}
1303 
1304 		necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1305 	}
1306 
1307 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1308 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1309 		if (error) {
1310 			goto done;
1311 		}
1312 	}
1313 
1314 	if (inp->inp_vflag & INP_IPV4) {
1315 		ifp = inp->inp_last_outifp;
1316 	} else if (inp->inp_vflag & INP_IPV6) {
1317 		ifp = inp->in6p_last_outifp;
1318 	}
1319 	if (ifp != NULL) {
1320 		uint32_t flow_if_index = ifp->if_index;
1321 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1322 		    sizeof(flow_if_index), &flow_if_index);
1323 		if (error) {
1324 			goto done;
1325 		}
1326 	}
1327 
1328 	if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1329 		flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1330 	}
1331 
1332 	if ((inp->inp_flags & INP_BOUND_IF) ||
1333 	    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1334 	    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1335 		flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1336 	}
1337 
1338 	if (flags != 0) {
1339 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1340 		if (error) {
1341 			goto done;
1342 		}
1343 	}
1344 
1345 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
1346 		cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1347 	} else {
1348 		cfil_sock_id = cfil_sock_id_from_socket(so);
1349 	}
1350 
1351 	if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1352 		cfil_id = &cfil_sock_id;
1353 		cfil_id_size = sizeof(cfil_sock_id);
1354 	} else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1355 		cfil_id = &inp->necp_client_uuid;
1356 		cfil_id_size = sizeof(inp->necp_client_uuid);
1357 	}
1358 
1359 	if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1360 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1361 		if (error) {
1362 			goto done;
1363 		}
1364 	}
1365 
1366 done:
1367 	if (!error) {
1368 		*out_connect_packet = connect_packet;
1369 	} else if (connect_packet != NULL) {
1370 		mbuf_freem(connect_packet);
1371 	}
1372 
1373 	return error;
1374 }
1375 
1376 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1377 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1378 {
1379 	int error = 0;
1380 	mbuf_t connect_packet = fd_cb->connect_packet;
1381 	mbuf_t saved_connect_packet = NULL;
1382 
1383 	if (connect_packet != NULL) {
1384 		error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1385 		if (error) {
1386 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1387 			goto done;
1388 		}
1389 
1390 		error = flow_divert_send_packet(fd_cb, connect_packet);
1391 		if (error) {
1392 			goto done;
1393 		}
1394 
1395 		fd_cb->connect_packet = saved_connect_packet;
1396 		saved_connect_packet = NULL;
1397 	} else {
1398 		error = ENOENT;
1399 	}
1400 done:
1401 	if (saved_connect_packet != NULL) {
1402 		mbuf_freem(saved_connect_packet);
1403 	}
1404 
1405 	return error;
1406 }
1407 
1408 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1409 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1410 {
1411 	int             error                   = 0;
1412 	mbuf_t  packet                  = NULL;
1413 	int             rbuff_space             = 0;
1414 
1415 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1416 	if (error) {
1417 		FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1418 		goto done;
1419 	}
1420 
1421 	rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1422 	if (rbuff_space < 0) {
1423 		rbuff_space = 0;
1424 	}
1425 	rbuff_space = htonl(rbuff_space);
1426 	error = flow_divert_packet_append_tlv(packet,
1427 	    FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1428 	    sizeof(rbuff_space),
1429 	    &rbuff_space);
1430 	if (error) {
1431 		goto done;
1432 	}
1433 
1434 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1435 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1436 		if (error) {
1437 			goto done;
1438 		}
1439 	}
1440 
1441 	error = flow_divert_send_packet(fd_cb, packet);
1442 	if (error) {
1443 		goto done;
1444 	}
1445 
1446 done:
1447 	if (error && packet != NULL) {
1448 		mbuf_freem(packet);
1449 	}
1450 
1451 	return error;
1452 }
1453 
1454 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1455 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1456 {
1457 	int             error   = 0;
1458 	mbuf_t  packet  = NULL;
1459 	uint32_t        zero    = 0;
1460 
1461 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1462 	if (error) {
1463 		FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1464 		goto done;
1465 	}
1466 
1467 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1468 	if (error) {
1469 		FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1470 		goto done;
1471 	}
1472 
1473 	how = htonl(how);
1474 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1475 	if (error) {
1476 		FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1477 		goto done;
1478 	}
1479 
1480 	error = flow_divert_send_packet(fd_cb, packet);
1481 	if (error) {
1482 		goto done;
1483 	}
1484 
1485 done:
1486 	if (error && packet != NULL) {
1487 		mbuf_free(packet);
1488 	}
1489 
1490 	return error;
1491 }
1492 
1493 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1494 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1495 {
1496 	if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1497 	    (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1498 		return SHUT_RDWR;
1499 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1500 		return SHUT_RD;
1501 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1502 		return SHUT_WR;
1503 	}
1504 
1505 	return -1;
1506 }
1507 
1508 /*
1509  * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1510  * writes. Returns FALSE otherwise.
1511  */
1512 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1513 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1514 {
1515 	int             how             = -1;
1516 
1517 	/* Do not send any close messages if there is still data in the send buffer */
1518 	if (fd_cb->so->so_snd.sb_cc == 0) {
1519 		if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1520 			/* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1521 			how = SHUT_RD;
1522 		}
1523 		if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1524 			/* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1525 			if (how == SHUT_RD) {
1526 				how = SHUT_RDWR;
1527 			} else {
1528 				how = SHUT_WR;
1529 			}
1530 		}
1531 	}
1532 
1533 	if (how != -1) {
1534 		FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1535 		if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1536 			/* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1537 			if (how != SHUT_RD) {
1538 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1539 			}
1540 			if (how != SHUT_WR) {
1541 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1542 			}
1543 		}
1544 	}
1545 
1546 	if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
1547 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
1548 	}
1549 }
1550 
1551 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_t data,size_t data_len)1552 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len)
1553 {
1554 	mbuf_t packet = NULL;
1555 	mbuf_t last = NULL;
1556 	int error = 0;
1557 
1558 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1559 	if (error || packet == NULL) {
1560 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1561 		goto done;
1562 	}
1563 
1564 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1565 		last = m_last(packet);
1566 		mbuf_setnext(last, data);
1567 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1568 	} else {
1569 		data_len = 0;
1570 	}
1571 	error = flow_divert_send_packet(fd_cb, packet);
1572 	if (error == 0 && data_len > 0) {
1573 		fd_cb->bytes_sent += data_len;
1574 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1575 	}
1576 
1577 done:
1578 	if (error) {
1579 		if (last != NULL) {
1580 			mbuf_setnext(last, NULL);
1581 		}
1582 		if (packet != NULL) {
1583 			mbuf_freem(packet);
1584 		}
1585 	}
1586 
1587 	return error;
1588 }
1589 
1590 static errno_t
flow_divert_send_datagram_packet(struct flow_divert_pcb * fd_cb,mbuf_t data,size_t data_len,struct sockaddr * toaddr,Boolean is_fragment,size_t datagram_size)1591 flow_divert_send_datagram_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, struct sockaddr *toaddr, Boolean is_fragment, size_t datagram_size)
1592 {
1593 	mbuf_t packet = NULL;
1594 	mbuf_t last = NULL;
1595 	int error = 0;
1596 
1597 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1598 	if (error || packet == NULL) {
1599 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1600 		goto done;
1601 	}
1602 
1603 	if (toaddr != NULL) {
1604 		error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1605 		if (error) {
1606 			FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1607 			goto done;
1608 		}
1609 	}
1610 	if (is_fragment) {
1611 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_IS_FRAGMENT, sizeof(is_fragment), &is_fragment);
1612 		if (error) {
1613 			FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_IS_FRAGMENT) failed: %d", error);
1614 			goto done;
1615 		}
1616 	}
1617 
1618 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_DATAGRAM_SIZE, sizeof(datagram_size), &datagram_size);
1619 	if (error) {
1620 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_DATAGRAM_SIZE) failed: %d", error);
1621 		goto done;
1622 	}
1623 
1624 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1625 		last = m_last(packet);
1626 		mbuf_setnext(last, data);
1627 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1628 	} else {
1629 		data_len = 0;
1630 	}
1631 	error = flow_divert_send_packet(fd_cb, packet);
1632 	if (error == 0 && data_len > 0) {
1633 		fd_cb->bytes_sent += data_len;
1634 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1635 	}
1636 
1637 done:
1638 	if (error) {
1639 		if (last != NULL) {
1640 			mbuf_setnext(last, NULL);
1641 		}
1642 		if (packet != NULL) {
1643 			mbuf_freem(packet);
1644 		}
1645 	}
1646 
1647 	return error;
1648 }
1649 
1650 static errno_t
flow_divert_send_fragmented_datagram(struct flow_divert_pcb * fd_cb,mbuf_t datagram,size_t datagram_len,struct sockaddr * toaddr)1651 flow_divert_send_fragmented_datagram(struct flow_divert_pcb *fd_cb, mbuf_t datagram, size_t datagram_len, struct sockaddr *toaddr)
1652 {
1653 	mbuf_t next_data = datagram;
1654 	size_t remaining_len = datagram_len;
1655 	mbuf_t remaining_data = NULL;
1656 	int error = 0;
1657 	bool first = true;
1658 
1659 	while (remaining_len > 0 && next_data != NULL) {
1660 		size_t to_send = remaining_len;
1661 		remaining_data = NULL;
1662 
1663 		if (to_send > FLOW_DIVERT_CHUNK_SIZE) {
1664 			to_send = FLOW_DIVERT_CHUNK_SIZE;
1665 			error = mbuf_split(next_data, to_send, MBUF_DONTWAIT, &remaining_data);
1666 			if (error) {
1667 				break;
1668 			}
1669 		}
1670 
1671 		error = flow_divert_send_datagram_packet(fd_cb, next_data, to_send, (first ? toaddr : NULL), TRUE, (first ? datagram_len : 0));
1672 		if (error) {
1673 			break;
1674 		}
1675 
1676 		first = false;
1677 		remaining_len -= to_send;
1678 		next_data = remaining_data;
1679 	}
1680 
1681 	if (error) {
1682 		if (next_data != NULL) {
1683 			mbuf_freem(next_data);
1684 		}
1685 		if (remaining_data != NULL) {
1686 			mbuf_freem(remaining_data);
1687 		}
1688 	}
1689 	return error;
1690 }
1691 
1692 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1693 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1694 {
1695 	size_t  to_send;
1696 	size_t  sent    = 0;
1697 	int             error   = 0;
1698 	mbuf_t  buffer;
1699 
1700 	to_send = fd_cb->so->so_snd.sb_cc;
1701 	buffer = fd_cb->so->so_snd.sb_mb;
1702 
1703 	if (buffer == NULL && to_send > 0) {
1704 		FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1705 		return;
1706 	}
1707 
1708 	/* Ignore the send window if force is enabled */
1709 	if (!force && (to_send > fd_cb->send_window)) {
1710 		to_send = fd_cb->send_window;
1711 	}
1712 
1713 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1714 		while (sent < to_send) {
1715 			mbuf_t  data;
1716 			size_t  data_len;
1717 
1718 			data_len = to_send - sent;
1719 			if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1720 				data_len = FLOW_DIVERT_CHUNK_SIZE;
1721 			}
1722 
1723 			error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1724 			if (error) {
1725 				FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1726 				break;
1727 			}
1728 
1729 			error = flow_divert_send_data_packet(fd_cb, data, data_len);
1730 			if (error) {
1731 				if (data != NULL) {
1732 					mbuf_freem(data);
1733 				}
1734 				break;
1735 			}
1736 
1737 			sent += data_len;
1738 		}
1739 		sbdrop(&fd_cb->so->so_snd, (int)sent);
1740 		sowwakeup(fd_cb->so);
1741 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1742 		mbuf_t data;
1743 		mbuf_t m;
1744 		size_t data_len;
1745 
1746 		while (buffer) {
1747 			struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1748 
1749 			m = buffer;
1750 			if (toaddr != NULL) {
1751 				/* look for data in the chain */
1752 				do {
1753 					m = m->m_next;
1754 					if (m != NULL && m->m_type == MT_DATA) {
1755 						break;
1756 					}
1757 				} while (m);
1758 				if (m == NULL) {
1759 					/* unexpected */
1760 					FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1761 					goto move_on;
1762 				}
1763 			}
1764 			data_len = mbuf_pkthdr_len(m);
1765 			if (data_len > 0) {
1766 				FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1767 				error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1768 				if (error) {
1769 					FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1770 					break;
1771 				}
1772 			} else {
1773 				data = NULL;
1774 			}
1775 			if (data_len <= FLOW_DIVERT_CHUNK_SIZE) {
1776 				error = flow_divert_send_datagram_packet(fd_cb, data, data_len, toaddr, FALSE, 0);
1777 			} else {
1778 				error = flow_divert_send_fragmented_datagram(fd_cb, data, data_len, toaddr);
1779 				data = NULL;
1780 			}
1781 			if (error) {
1782 				if (data != NULL) {
1783 					mbuf_freem(data);
1784 				}
1785 				break;
1786 			}
1787 			sent += data_len;
1788 move_on:
1789 			buffer = buffer->m_nextpkt;
1790 			(void) sbdroprecord(&(fd_cb->so->so_snd));
1791 		}
1792 	}
1793 
1794 	if (sent > 0) {
1795 		FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1796 		if (fd_cb->send_window >= sent) {
1797 			fd_cb->send_window -= sent;
1798 		} else {
1799 			fd_cb->send_window = 0;
1800 		}
1801 	}
1802 }
1803 
1804 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_t data,struct sockaddr * toaddr)1805 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data, struct sockaddr *toaddr)
1806 {
1807 	size_t  to_send         = mbuf_pkthdr_len(data);
1808 	int     error           = 0;
1809 
1810 	if (to_send > fd_cb->send_window) {
1811 		to_send = fd_cb->send_window;
1812 	}
1813 
1814 	if (fd_cb->so->so_snd.sb_cc > 0) {
1815 		to_send = 0;    /* If the send buffer is non-empty, then we can't send anything */
1816 	}
1817 
1818 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1819 		size_t  sent            = 0;
1820 		mbuf_t  remaining_data  = data;
1821 		mbuf_t  pkt_data        = NULL;
1822 		while (sent < to_send && remaining_data != NULL) {
1823 			size_t  pkt_data_len;
1824 
1825 			pkt_data = remaining_data;
1826 
1827 			if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1828 				pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1829 			} else {
1830 				pkt_data_len = to_send - sent;
1831 			}
1832 
1833 			if (pkt_data_len < mbuf_pkthdr_len(pkt_data)) {
1834 				error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1835 				if (error) {
1836 					FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1837 					pkt_data = NULL;
1838 					break;
1839 				}
1840 			} else {
1841 				remaining_data = NULL;
1842 			}
1843 
1844 			error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len);
1845 			if (error) {
1846 				break;
1847 			}
1848 
1849 			pkt_data = NULL;
1850 			sent += pkt_data_len;
1851 		}
1852 
1853 		fd_cb->send_window -= sent;
1854 
1855 		error = 0;
1856 
1857 		if (pkt_data != NULL) {
1858 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1859 				if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1860 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1861 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1862 				}
1863 			} else {
1864 				mbuf_freem(pkt_data);
1865 				error = ENOBUFS;
1866 			}
1867 		}
1868 
1869 		if (remaining_data != NULL) {
1870 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1871 				if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1872 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1873 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1874 				}
1875 			} else {
1876 				mbuf_freem(remaining_data);
1877 				error = ENOBUFS;
1878 			}
1879 		}
1880 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1881 		int send_dgram_error = 0;
1882 		size_t data_size = mbuf_pkthdr_len(data);
1883 		if (to_send || data_size == 0) {
1884 			if (data_size <= FLOW_DIVERT_CHUNK_SIZE) {
1885 				send_dgram_error = flow_divert_send_datagram_packet(fd_cb, data, data_size, toaddr, FALSE, 0);
1886 			} else {
1887 				send_dgram_error = flow_divert_send_fragmented_datagram(fd_cb, data, data_size, toaddr);
1888 				data = NULL;
1889 			}
1890 			if (send_dgram_error) {
1891 				FDLOG(LOG_NOTICE, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu", send_dgram_error, data_size);
1892 			} else {
1893 				if (data_size >= fd_cb->send_window) {
1894 					fd_cb->send_window = 0;
1895 				} else {
1896 					fd_cb->send_window -= data_size;
1897 				}
1898 				data = NULL;
1899 			}
1900 		}
1901 
1902 		if (data != NULL) {
1903 			/* buffer it */
1904 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1905 				if (toaddr != NULL) {
1906 					int append_error = 0;
1907 					if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &append_error)) {
1908 						FDLOG(LOG_ERR, fd_cb,
1909 						    "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d",
1910 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, append_error);
1911 					}
1912 				} else {
1913 					if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1914 						FDLOG(LOG_ERR, fd_cb,
1915 						    "sbappendrecord failed. send buffer size = %u, send_window = %u",
1916 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1917 					}
1918 				}
1919 			} else {
1920 				FDLOG(LOG_ERR, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu, dropping the datagram", error, data_size);
1921 				mbuf_freem(data);
1922 			}
1923 		}
1924 	}
1925 
1926 	return error;
1927 }
1928 
1929 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1930 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1931 {
1932 	int error = 0;
1933 	mbuf_t packet = NULL;
1934 
1935 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1936 	if (error) {
1937 		FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1938 		goto done;
1939 	}
1940 
1941 	error = flow_divert_send_packet(fd_cb, packet);
1942 	if (error) {
1943 		goto done;
1944 	}
1945 
1946 done:
1947 	if (error && packet != NULL) {
1948 		mbuf_free(packet);
1949 	}
1950 
1951 	return error;
1952 }
1953 
1954 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)1955 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
1956 {
1957 	int             error           = 0;
1958 	mbuf_t  packet          = NULL;
1959 
1960 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
1961 	if (error) {
1962 		FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
1963 		goto done;
1964 	}
1965 
1966 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
1967 	if (error) {
1968 		FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
1969 		goto done;
1970 	}
1971 
1972 	error = flow_divert_send_packet(fd_cb, packet);
1973 	if (error) {
1974 		goto done;
1975 	}
1976 
1977 done:
1978 	if (error && packet != NULL) {
1979 		mbuf_free(packet);
1980 	}
1981 
1982 	return error;
1983 }
1984 
1985 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)1986 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
1987 {
1988 	struct inpcb *inp = sotoinpcb(fd_cb->so);
1989 
1990 	if (local_endpoint->sa_family == AF_INET6) {
1991 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
1992 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
1993 			inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
1994 			inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
1995 			in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
1996 		}
1997 		if (inp->inp_lport == 0) {
1998 			inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
1999 		}
2000 	} else if (local_endpoint->sa_family == AF_INET) {
2001 		if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2002 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2003 			inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
2004 		}
2005 		if (inp->inp_lport == 0) {
2006 			inp->inp_lport = (satosin(local_endpoint))->sin_port;
2007 		}
2008 	}
2009 }
2010 
2011 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)2012 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
2013 {
2014 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2015 
2016 	if (remote_endpoint->sa_family == AF_INET6) {
2017 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
2018 			inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
2019 			inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
2020 			in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
2021 		}
2022 		if (inp->inp_fport == 0) {
2023 			inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
2024 		}
2025 	} else if (remote_endpoint->sa_family == AF_INET) {
2026 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
2027 			inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
2028 		}
2029 		if (inp->inp_fport == 0) {
2030 			inp->inp_fport = (satosin(remote_endpoint))->sin_port;
2031 		}
2032 	}
2033 }
2034 
2035 static uint32_t
flow_divert_derive_kernel_control_unit(uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)2036 flow_divert_derive_kernel_control_unit(uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
2037 {
2038 	uint32_t result = *ctl_unit;
2039 
2040 	*is_aggregate = false;
2041 	if (aggregate_unit != NULL && *aggregate_unit != 0) {
2042 		uint32_t counter;
2043 		for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
2044 			if ((*aggregate_unit) & (1 << counter)) {
2045 				break;
2046 			}
2047 		}
2048 		if (counter < (GROUP_COUNT_MAX - 1)) {
2049 			*aggregate_unit &= ~(1 << counter);
2050 			*is_aggregate = true;
2051 			return counter + 1;
2052 		} else {
2053 			*ctl_unit = 0;
2054 			return result;
2055 		}
2056 	} else {
2057 		*ctl_unit = 0;
2058 		return result;
2059 	}
2060 }
2061 
2062 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)2063 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
2064 {
2065 	int error = 0;
2066 	uint32_t policy_control_unit = fd_cb->policy_control_unit;
2067 
2068 	flow_divert_pcb_remove(fd_cb);
2069 
2070 	do {
2071 		struct flow_divert_group *next_group = NULL;
2072 		bool is_aggregate = false;
2073 		uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(&policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
2074 
2075 		if (fd_cb->control_group_unit == next_ctl_unit) {
2076 			FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
2077 			error = EALREADY;
2078 			break;
2079 		}
2080 
2081 		if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
2082 			FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
2083 			error = ENOENT;
2084 			break;
2085 		}
2086 
2087 		next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
2088 		if (next_group == NULL) {
2089 			FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
2090 			continue;
2091 		}
2092 
2093 		FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
2094 
2095 		error = flow_divert_pcb_insert(fd_cb, next_group);
2096 		if (error == 0) {
2097 			if (is_aggregate) {
2098 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2099 			} else {
2100 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2101 			}
2102 		}
2103 		FDGRP_RELEASE(next_group);
2104 	} while (fd_cb->group == NULL);
2105 
2106 	if (fd_cb->group == NULL) {
2107 		return error ? error : ENOENT;
2108 	}
2109 
2110 	error = flow_divert_send_connect_packet(fd_cb);
2111 	if (error) {
2112 		FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2113 		flow_divert_pcb_remove(fd_cb);
2114 		error = ENOENT;
2115 	}
2116 
2117 	return error;
2118 }
2119 
2120 static void
flow_divert_disable(struct flow_divert_pcb * fd_cb)2121 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2122 {
2123 	struct socket *so = NULL;
2124 	mbuf_t  buffer;
2125 	int error = 0;
2126 	proc_t last_proc = NULL;
2127 	struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2128 	bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2129 	struct inpcb *inp = NULL;
2130 
2131 	so = fd_cb->so;
2132 	if (so == NULL) {
2133 		goto done;
2134 	}
2135 
2136 	FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2137 
2138 	/* Restore the IP state */
2139 	inp = sotoinpcb(so);
2140 	inp->inp_vflag = fd_cb->original_vflag;
2141 	inp->inp_faddr.s_addr = INADDR_ANY;
2142 	inp->inp_fport = 0;
2143 	memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2144 	inp->inp_fifscope = IFSCOPE_NONE;
2145 	inp->in6p_fport = 0;
2146 	/* If flow divert set the local address, clear it out */
2147 	if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2148 		inp->inp_laddr.s_addr = INADDR_ANY;
2149 		memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2150 		inp->inp_lifscope = IFSCOPE_NONE;
2151 	}
2152 	inp->inp_last_outifp = fd_cb->original_last_outifp;
2153 	inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2154 
2155 	/* Dis-associate the socket */
2156 	so->so_flags &= ~SOF_FLOW_DIVERT;
2157 	so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2158 	so->so_fd_pcb = NULL;
2159 	fd_cb->so = NULL;
2160 
2161 	FDRELEASE(fd_cb); /* Release the socket's reference */
2162 
2163 	/* Revert back to the original protocol */
2164 	so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2165 
2166 	/* Reset the socket state to avoid confusing NECP */
2167 	so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2168 
2169 	last_proc = proc_find(so->last_pid);
2170 
2171 	if (do_connect) {
2172 		/* Connect using the original protocol */
2173 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2174 		if (error) {
2175 			FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2176 			goto done;
2177 		}
2178 	}
2179 
2180 	buffer = so->so_snd.sb_mb;
2181 	if (buffer == NULL) {
2182 		/* No buffered data, done */
2183 		goto done;
2184 	}
2185 
2186 	/* Send any buffered data using the original protocol */
2187 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2188 		mbuf_t data_to_send = NULL;
2189 		size_t data_len = so->so_snd.sb_cc;
2190 
2191 		error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2192 		if (error) {
2193 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2194 			goto done;
2195 		}
2196 
2197 		sbflush(&so->so_snd);
2198 
2199 		if (data_to_send->m_flags & M_PKTHDR) {
2200 			mbuf_pkthdr_setlen(data_to_send, data_len);
2201 		}
2202 
2203 		error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2204 		    0,
2205 		    data_to_send,
2206 		    NULL,
2207 		    NULL,
2208 		    (last_proc != NULL ? last_proc : current_proc()));
2209 
2210 		if (error && error != EWOULDBLOCK) {
2211 			FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2212 		} else {
2213 			error = 0;
2214 		}
2215 	} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2216 		struct sockbuf *sb = &so->so_snd;
2217 		MBUFQ_HEAD(send_queue_head) send_queue;
2218 		MBUFQ_INIT(&send_queue);
2219 
2220 		/* Flush the send buffer, moving all records to a temporary queue */
2221 		while (sb->sb_mb != NULL) {
2222 			mbuf_t record = sb->sb_mb;
2223 			mbuf_t m = record;
2224 			sb->sb_mb = sb->sb_mb->m_nextpkt;
2225 			while (m != NULL) {
2226 				sbfree(sb, m);
2227 				m = m->m_next;
2228 			}
2229 			record->m_nextpkt = NULL;
2230 			MBUFQ_ENQUEUE(&send_queue, record);
2231 		}
2232 		SB_EMPTY_FIXUP(sb);
2233 
2234 		while (!MBUFQ_EMPTY(&send_queue)) {
2235 			mbuf_t next_record = MBUFQ_FIRST(&send_queue);
2236 			mbuf_t addr = NULL;
2237 			mbuf_t control = NULL;
2238 			mbuf_t last_control = NULL;
2239 			mbuf_t data = NULL;
2240 			mbuf_t m = next_record;
2241 			struct sockaddr *to_endpoint = NULL;
2242 
2243 			MBUFQ_DEQUEUE(&send_queue, next_record);
2244 
2245 			while (m != NULL) {
2246 				if (m->m_type == MT_SONAME) {
2247 					addr = m;
2248 				} else if (m->m_type == MT_CONTROL) {
2249 					if (control == NULL) {
2250 						control = m;
2251 					}
2252 					last_control = m;
2253 				} else if (m->m_type == MT_DATA) {
2254 					data = m;
2255 					break;
2256 				}
2257 				m = m->m_next;
2258 			}
2259 
2260 			if (addr != NULL && !do_connect) {
2261 				to_endpoint = flow_divert_get_buffered_target_address(addr);
2262 				if (to_endpoint == NULL) {
2263 					FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2264 				}
2265 			}
2266 
2267 			if (data == NULL) {
2268 				FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2269 				mbuf_freem(next_record);
2270 				continue;
2271 			}
2272 
2273 			if (!(data->m_flags & M_PKTHDR)) {
2274 				FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2275 				mbuf_freem(next_record);
2276 				continue;
2277 			}
2278 
2279 			if (addr != NULL) {
2280 				addr->m_next = NULL;
2281 			}
2282 
2283 			if (last_control != NULL) {
2284 				last_control->m_next = NULL;
2285 			}
2286 
2287 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2288 			    0,
2289 			    data,
2290 			    to_endpoint,
2291 			    control,
2292 			    (last_proc != NULL ? last_proc : current_proc()));
2293 
2294 			if (addr != NULL) {
2295 				mbuf_freem(addr);
2296 			}
2297 
2298 			if (error) {
2299 				FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2300 			}
2301 		}
2302 	}
2303 done:
2304 	if (last_proc != NULL) {
2305 		proc_rele(last_proc);
2306 	}
2307 
2308 	if (error && so != NULL) {
2309 		so->so_error = (uint16_t)error;
2310 		flow_divert_disconnect_socket(so, do_connect);
2311 	}
2312 }
2313 
2314 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2315 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2316 {
2317 	struct socket *so = NULL;
2318 	struct inpcb *inp = NULL;
2319 	struct ifnet *current_ifp = NULL;
2320 	struct ifnet *new_ifp = NULL;
2321 	int error = 0;
2322 
2323 	so = fd_cb->so;
2324 	if (so == NULL) {
2325 		return;
2326 	}
2327 
2328 	inp = sotoinpcb(so);
2329 
2330 	if (out_if_index <= 0) {
2331 		return;
2332 	}
2333 
2334 	if (inp->inp_vflag & INP_IPV6) {
2335 		current_ifp = inp->in6p_last_outifp;
2336 	} else {
2337 		current_ifp = inp->inp_last_outifp;
2338 	}
2339 
2340 	if (current_ifp != NULL) {
2341 		if (current_ifp->if_index == out_if_index) {
2342 			/* No change */
2343 			return;
2344 		}
2345 
2346 		/* Scope the socket to the given interface */
2347 		error = inp_bindif(inp, out_if_index, &new_ifp);
2348 		if (error != 0) {
2349 			FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2350 			return;
2351 		}
2352 
2353 		if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2354 			/* Get the appropriate address for the given interface */
2355 			if (inp->inp_vflag & INP_IPV6) {
2356 				inp->in6p_laddr = sa6_any.sin6_addr;
2357 				error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2358 			} else {
2359 				inp->inp_laddr.s_addr = INADDR_ANY;
2360 				error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2361 			}
2362 
2363 			if (error != 0) {
2364 				FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2365 			}
2366 		}
2367 	} else {
2368 		ifnet_head_lock_shared();
2369 		if (out_if_index <= if_index) {
2370 			new_ifp = ifindex2ifnet[out_if_index];
2371 		}
2372 		ifnet_head_done();
2373 	}
2374 
2375 	/* Update the "last interface" of the socket */
2376 	if (new_ifp != NULL) {
2377 		if (inp->inp_vflag & INP_IPV6) {
2378 			inp->in6p_last_outifp = new_ifp;
2379 		} else {
2380 			inp->inp_last_outifp = new_ifp;
2381 		}
2382 
2383 #if SKYWALK
2384 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2385 			netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2386 		}
2387 #endif /* SKYWALK */
2388 	}
2389 }
2390 
2391 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2392 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2393 {
2394 	uint32_t                                        connect_error = 0;
2395 	uint32_t                                        ctl_unit                        = 0;
2396 	int                                                     error                           = 0;
2397 	union sockaddr_in_4_6 local_endpoint = {};
2398 	union sockaddr_in_4_6 remote_endpoint = {};
2399 	int                                                     out_if_index            = 0;
2400 	uint32_t                                        send_window;
2401 	uint32_t                                        app_data_length         = 0;
2402 
2403 	memset(&local_endpoint, 0, sizeof(local_endpoint));
2404 	memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2405 
2406 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2407 	if (error) {
2408 		FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2409 		return;
2410 	}
2411 
2412 	connect_error = ntohl(connect_error);
2413 	FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2414 
2415 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2416 	if (error) {
2417 		FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2418 		return;
2419 	}
2420 
2421 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2422 	if (error) {
2423 		FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2424 	}
2425 
2426 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sa), NULL);
2427 	if (error) {
2428 		FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2429 	}
2430 
2431 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sa), NULL);
2432 	if (error) {
2433 		FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2434 	}
2435 
2436 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2437 	if (error) {
2438 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2439 	}
2440 
2441 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2442 	if (error) {
2443 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2444 	}
2445 
2446 	error = 0;
2447 
2448 	FDLOCK(fd_cb);
2449 	if (fd_cb->so != NULL) {
2450 		struct inpcb *inp = NULL;
2451 		struct socket *so = fd_cb->so;
2452 		bool local_address_is_valid = false;
2453 
2454 		socket_lock(so, 1);
2455 
2456 		if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2457 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2458 			goto done;
2459 		}
2460 
2461 		if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2462 			FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2463 			goto done;
2464 		}
2465 
2466 		inp = sotoinpcb(so);
2467 
2468 		if (connect_error || error) {
2469 			goto set_socket_state;
2470 		}
2471 
2472 		if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2473 			if (local_endpoint.sa.sa_family == AF_INET) {
2474 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2475 				if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2476 					local_address_is_valid = true;
2477 					fd_cb->local_endpoint = local_endpoint;
2478 					inp->inp_laddr.s_addr = INADDR_ANY;
2479 				} else {
2480 					fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2481 				}
2482 			} else if (local_endpoint.sa.sa_family == AF_INET6) {
2483 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2484 				if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2485 					local_address_is_valid = true;
2486 					fd_cb->local_endpoint = local_endpoint;
2487 					inp->in6p_laddr = sa6_any.sin6_addr;
2488 				} else {
2489 					fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2490 				}
2491 			}
2492 		}
2493 
2494 		flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2495 		flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2496 
2497 		if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2498 			if (remote_endpoint.sa.sa_family == AF_INET) {
2499 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2500 			} else if (remote_endpoint.sa.sa_family == AF_INET6) {
2501 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2502 			}
2503 			flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2504 		}
2505 
2506 		if (app_data_length > 0) {
2507 			uint8_t *app_data = NULL;
2508 			app_data = kalloc_data(app_data_length, Z_WAITOK);
2509 			if (app_data != NULL) {
2510 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2511 				if (error == 0) {
2512 					FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2513 					if (fd_cb->app_data != NULL) {
2514 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
2515 					}
2516 					fd_cb->app_data = app_data;
2517 					fd_cb->app_data_length = app_data_length;
2518 				} else {
2519 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2520 					kfree_data(app_data, app_data_length);
2521 				}
2522 			} else {
2523 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2524 			}
2525 		}
2526 
2527 		if (error) {
2528 			goto set_socket_state;
2529 		}
2530 
2531 		if (fd_cb->group == NULL) {
2532 			error = EINVAL;
2533 			goto set_socket_state;
2534 		}
2535 
2536 		ctl_unit = ntohl(ctl_unit);
2537 		if (ctl_unit > 0) {
2538 			int insert_error = 0;
2539 			struct flow_divert_group *grp = NULL;
2540 
2541 			if (ctl_unit >= GROUP_COUNT_MAX) {
2542 				FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2543 				error = EINVAL;
2544 				goto set_socket_state;
2545 			}
2546 
2547 			grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2548 			if (grp == NULL) {
2549 				error = ECONNRESET;
2550 				goto set_socket_state;
2551 			}
2552 
2553 			flow_divert_pcb_remove(fd_cb);
2554 			insert_error = flow_divert_pcb_insert(fd_cb, grp);
2555 			FDGRP_RELEASE(grp);
2556 
2557 			if (insert_error != 0) {
2558 				error = ECONNRESET;
2559 				goto set_socket_state;
2560 			}
2561 		}
2562 
2563 		fd_cb->send_window = ntohl(send_window);
2564 
2565 set_socket_state:
2566 		if (!connect_error && !error) {
2567 			FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2568 			error = flow_divert_send_connect_result(fd_cb);
2569 		}
2570 
2571 		if (connect_error || error) {
2572 			if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2573 				error = flow_divert_try_next_group(fd_cb);
2574 				if (error && fd_cb->policy_control_unit == 0) {
2575 					flow_divert_disable(fd_cb);
2576 					goto done;
2577 				} else if (error == 0) {
2578 					goto done;
2579 				}
2580 			}
2581 
2582 			if (!connect_error) {
2583 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
2584 				so->so_error = (uint16_t)error;
2585 				flow_divert_send_close_if_needed(fd_cb);
2586 			} else {
2587 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
2588 				so->so_error = (uint16_t)connect_error;
2589 			}
2590 			flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2591 		} else {
2592 #if NECP
2593 			/* Update NECP client with connected five-tuple */
2594 			if (!uuid_is_null(inp->necp_client_uuid)) {
2595 				socket_unlock(so, 0);
2596 				necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2597 				socket_lock(so, 0);
2598 				if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2599 					/* The socket was closed while it was unlocked */
2600 					goto done;
2601 				}
2602 			}
2603 #endif /* NECP */
2604 
2605 			flow_divert_send_buffered_data(fd_cb, FALSE);
2606 			soisconnected(so);
2607 		}
2608 
2609 		/* We don't need the connect packet any more */
2610 		if (fd_cb->connect_packet != NULL) {
2611 			mbuf_freem(fd_cb->connect_packet);
2612 			fd_cb->connect_packet = NULL;
2613 		}
2614 
2615 		/* We don't need the original remote endpoint any more */
2616 		free_sockaddr(fd_cb->original_remote_endpoint);
2617 done:
2618 		socket_unlock(so, 1);
2619 	}
2620 	FDUNLOCK(fd_cb);
2621 }
2622 
2623 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2624 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2625 {
2626 	uint32_t        close_error                     = 0;
2627 	int                     error                   = 0;
2628 	int                     how                     = 0;
2629 
2630 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2631 	if (error) {
2632 		FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2633 		return;
2634 	}
2635 
2636 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2637 	if (error) {
2638 		FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2639 		return;
2640 	}
2641 
2642 	how = ntohl(how);
2643 
2644 	FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2645 
2646 	FDLOCK(fd_cb);
2647 	if (fd_cb->so != NULL) {
2648 		bool is_connected = (SOCK_TYPE(fd_cb->so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2649 		socket_lock(fd_cb->so, 0);
2650 
2651 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2652 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2653 			goto done;
2654 		}
2655 
2656 		fd_cb->so->so_error = (uint16_t)ntohl(close_error);
2657 
2658 		flow_divert_update_closed_state(fd_cb, how, true, true);
2659 
2660 		/* Only do this for stream flows because "shutdown by peer" doesn't make sense for datagram flows */
2661 		how = flow_divert_tunnel_how_closed(fd_cb);
2662 		if (how == SHUT_RDWR) {
2663 			flow_divert_disconnect_socket(fd_cb->so, is_connected);
2664 		} else if (how == SHUT_RD && is_connected) {
2665 			socantrcvmore(fd_cb->so);
2666 		} else if (how == SHUT_WR && is_connected) {
2667 			socantsendmore(fd_cb->so);
2668 		}
2669 done:
2670 		socket_unlock(fd_cb->so, 0);
2671 	}
2672 	FDUNLOCK(fd_cb);
2673 }
2674 
2675 static mbuf_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2676 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2677 {
2678 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2679 	bool need_recvdstaddr = false;
2680 	/* Socket flow tracking needs to see the local address */
2681 	need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2682 	if ((inp->inp_vflag & INP_IPV4) &&
2683 	    fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2684 	    ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2685 		return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2686 	} else if ((inp->inp_vflag & INP_IPV6) &&
2687 	    fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2688 	    ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2689 		struct in6_pktinfo pi6;
2690 		memset(&pi6, 0, sizeof(pi6));
2691 		pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2692 
2693 		return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2694 	}
2695 	return NULL;
2696 }
2697 
2698 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_t packet,size_t offset)2699 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset)
2700 {
2701 	int error = 0;
2702 
2703 	FDLOCK(fd_cb);
2704 	if (fd_cb->so != NULL) {
2705 		mbuf_t  data            = NULL;
2706 		size_t  data_size;
2707 		struct sockaddr_storage remote_address;
2708 		boolean_t got_remote_sa = FALSE;
2709 		boolean_t appended = FALSE;
2710 		boolean_t append_success = FALSE;
2711 
2712 		socket_lock(fd_cb->so, 0);
2713 
2714 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2715 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2716 			goto done;
2717 		}
2718 
2719 		if (sbspace(&fd_cb->so->so_rcv) == 0) {
2720 			error = ENOBUFS;
2721 			fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2722 			FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2723 			goto done;
2724 		}
2725 
2726 		if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
2727 			uint32_t val_size = 0;
2728 
2729 			/* check if we got remote address with data */
2730 			memset(&remote_address, 0, sizeof(remote_address));
2731 			error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2732 			if (error || val_size > sizeof(remote_address)) {
2733 				FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2734 				error = 0;
2735 			} else {
2736 				if (remote_address.ss_len > sizeof(remote_address)) {
2737 					remote_address.ss_len = sizeof(remote_address);
2738 				}
2739 				/* validate the address */
2740 				if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2741 					got_remote_sa = TRUE;
2742 				} else {
2743 					FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2744 				}
2745 				offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2746 			}
2747 		}
2748 
2749 		data_size = (mbuf_pkthdr_len(packet) - offset);
2750 
2751 		if (fd_cb->so->so_state & SS_CANTRCVMORE) {
2752 			FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2753 			goto done;
2754 		}
2755 
2756 		if (SOCK_TYPE(fd_cb->so) != SOCK_STREAM && SOCK_TYPE(fd_cb->so) != SOCK_DGRAM) {
2757 			FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(fd_cb->so));
2758 			goto done;
2759 		}
2760 
2761 		FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2762 
2763 		error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2764 		if (error || data == NULL) {
2765 			FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2766 			goto done;
2767 		}
2768 
2769 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
2770 			appended = (sbappendstream(&fd_cb->so->so_rcv, data) != 0);
2771 			append_success = TRUE;
2772 		} else {
2773 			struct sockaddr *append_sa = NULL;
2774 			mbuf_t mctl;
2775 
2776 			if (got_remote_sa == TRUE) {
2777 				error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2778 			} else {
2779 				if (fd_cb->so->so_proto->pr_domain->dom_family == AF_INET6) {
2780 					error = in6_mapped_peeraddr(fd_cb->so, &append_sa);
2781 				} else {
2782 					error = in_getpeeraddr(fd_cb->so, &append_sa);
2783 				}
2784 			}
2785 			if (error) {
2786 				FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2787 			}
2788 
2789 			mctl = flow_divert_create_control_mbuf(fd_cb);
2790 			int append_error = 0;
2791 			appended = sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error);
2792 			if (appended || append_error == 0) {
2793 				append_success = TRUE;
2794 			} else {
2795 				FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2796 			}
2797 
2798 			free_sockaddr(append_sa);
2799 		}
2800 
2801 		if (append_success) {
2802 			fd_cb->bytes_received += data_size;
2803 			flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2804 		}
2805 
2806 		if (appended) {
2807 			sorwakeup(fd_cb->so);
2808 		}
2809 done:
2810 		socket_unlock(fd_cb->so, 0);
2811 	}
2812 	FDUNLOCK(fd_cb);
2813 
2814 	return error;
2815 }
2816 
2817 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2818 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2819 {
2820 	uint32_t        read_count              = 0;
2821 	int             error                   = 0;
2822 
2823 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2824 	if (error) {
2825 		FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2826 		return;
2827 	}
2828 
2829 	FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2830 
2831 	FDLOCK(fd_cb);
2832 	if (fd_cb->so != NULL) {
2833 		socket_lock(fd_cb->so, 0);
2834 
2835 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2836 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2837 			goto done;
2838 		}
2839 
2840 		fd_cb->send_window += ntohl(read_count);
2841 		flow_divert_send_buffered_data(fd_cb, FALSE);
2842 done:
2843 		socket_unlock(fd_cb->so, 0);
2844 	}
2845 	FDUNLOCK(fd_cb);
2846 }
2847 
2848 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_t packet,int offset)2849 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_t packet, int offset)
2850 {
2851 	int error         = 0;
2852 	uint32_t key_size = 0;
2853 	int log_level     = 0;
2854 	uint32_t flags    = 0;
2855 
2856 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2857 	if (error) {
2858 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2859 		return;
2860 	}
2861 
2862 	if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2863 		FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2864 		return;
2865 	}
2866 
2867 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2868 	if (!error) {
2869 		nil_pcb.log_level = (uint8_t)log_level;
2870 	}
2871 
2872 	lck_rw_lock_exclusive(&group->lck);
2873 
2874 	if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
2875 		FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
2876 		lck_rw_done(&group->lck);
2877 		return;
2878 	}
2879 
2880 	if (group->token_key != NULL) {
2881 		kfree_data(group->token_key, group->token_key_size);
2882 		group->token_key = NULL;
2883 	}
2884 
2885 	group->token_key = kalloc_data(key_size, Z_WAITOK);
2886 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2887 	if (error) {
2888 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2889 		kfree_data(group->token_key, key_size);
2890 		group->token_key = NULL;
2891 		lck_rw_done(&group->lck);
2892 		return;
2893 	}
2894 
2895 	group->token_key_size = key_size;
2896 
2897 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2898 	if (!error) {
2899 		group->flags = flags;
2900 	}
2901 
2902 	lck_rw_done(&group->lck);
2903 }
2904 
2905 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2906 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2907 {
2908 	int                                                     error                           = 0;
2909 	int                                                     out_if_index            = 0;
2910 	uint32_t                                        app_data_length         = 0;
2911 
2912 	FDLOG0(LOG_INFO, fd_cb, "received a properties update");
2913 
2914 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2915 	if (error) {
2916 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
2917 	}
2918 
2919 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2920 	if (error) {
2921 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
2922 	}
2923 
2924 	FDLOCK(fd_cb);
2925 	if (fd_cb->so != NULL) {
2926 		socket_lock(fd_cb->so, 0);
2927 
2928 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2929 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
2930 			goto done;
2931 		}
2932 
2933 		if (out_if_index > 0) {
2934 			flow_divert_scope(fd_cb, out_if_index, true);
2935 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2936 		}
2937 
2938 		if (app_data_length > 0) {
2939 			uint8_t *app_data = NULL;
2940 			app_data = kalloc_data(app_data_length, Z_WAITOK);
2941 			if (app_data != NULL) {
2942 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2943 				if (error == 0) {
2944 					if (fd_cb->app_data != NULL) {
2945 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
2946 					}
2947 					fd_cb->app_data = app_data;
2948 					fd_cb->app_data_length = app_data_length;
2949 				} else {
2950 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
2951 					kfree_data(app_data, app_data_length);
2952 				}
2953 			} else {
2954 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
2955 			}
2956 		}
2957 done:
2958 		socket_unlock(fd_cb->so, 0);
2959 	}
2960 	FDUNLOCK(fd_cb);
2961 }
2962 
2963 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_t packet,int offset)2964 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_t packet, int offset)
2965 {
2966 	size_t bytes_mem_size;
2967 	size_t child_maps_mem_size;
2968 	size_t nodes_mem_size;
2969 	size_t trie_memory_size = 0;
2970 	int cursor;
2971 	int error = 0;
2972 	struct flow_divert_trie new_trie;
2973 	int insert_error = 0;
2974 	int prefix_count = -1;
2975 	int signing_id_count = 0;
2976 	size_t bytes_count = 0;
2977 	size_t nodes_count = 0;
2978 	size_t maps_count = 0;
2979 
2980 	lck_rw_lock_exclusive(&group->lck);
2981 
2982 	/* Re-set the current trie */
2983 	if (group->signing_id_trie.memory != NULL) {
2984 		kfree_data_addr(group->signing_id_trie.memory);
2985 	}
2986 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
2987 	group->signing_id_trie.root = NULL_TRIE_IDX;
2988 
2989 	memset(&new_trie, 0, sizeof(new_trie));
2990 
2991 	/* Get the number of shared prefixes in the new set of signing ID strings */
2992 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
2993 
2994 	if (prefix_count < 0 || error) {
2995 		FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
2996 		lck_rw_done(&group->lck);
2997 		return;
2998 	}
2999 
3000 	/* Compute the number of signing IDs and the total amount of bytes needed to store them */
3001 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3002 	    cursor >= 0;
3003 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3004 		uint32_t sid_size = 0;
3005 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3006 		if (error || sid_size == 0) {
3007 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
3008 			signing_id_count = 0;
3009 			break;
3010 		}
3011 		if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
3012 			FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
3013 			signing_id_count = 0;
3014 			break;
3015 		}
3016 		signing_id_count++;
3017 	}
3018 
3019 	if (signing_id_count == 0) {
3020 		lck_rw_done(&group->lck);
3021 		FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
3022 		return;
3023 	}
3024 
3025 	if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
3026 		lck_rw_done(&group->lck);
3027 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
3028 		return;
3029 	}
3030 
3031 	if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
3032 		lck_rw_done(&group->lck);
3033 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
3034 		return;
3035 	}
3036 
3037 	if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
3038 		lck_rw_done(&group->lck);
3039 		FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
3040 		return;
3041 	}
3042 
3043 	FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
3044 	    nodes_count, maps_count, bytes_count);
3045 
3046 	if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
3047 	    os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
3048 	    os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
3049 	    os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
3050 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
3051 		lck_rw_done(&group->lck);
3052 		return;
3053 	}
3054 
3055 	if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
3056 		FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
3057 		lck_rw_done(&group->lck);
3058 		return;
3059 	}
3060 
3061 	new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
3062 	if (new_trie.memory == NULL) {
3063 		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
3064 		    nodes_mem_size + child_maps_mem_size + bytes_mem_size);
3065 		lck_rw_done(&group->lck);
3066 		return;
3067 	}
3068 
3069 	new_trie.bytes_count = (uint16_t)bytes_count;
3070 	new_trie.nodes_count = (uint16_t)nodes_count;
3071 	new_trie.child_maps_count = (uint16_t)maps_count;
3072 
3073 	/* Initialize the free lists */
3074 	new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
3075 	new_trie.nodes_free_next = 0;
3076 	memset(new_trie.nodes, 0, nodes_mem_size);
3077 
3078 	new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
3079 	new_trie.child_maps_free_next = 0;
3080 	memset(new_trie.child_maps, 0xff, child_maps_mem_size);
3081 
3082 	new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
3083 	new_trie.bytes_free_next = 0;
3084 	memset(new_trie.bytes, 0, bytes_mem_size);
3085 
3086 	/* The root is an empty node */
3087 	new_trie.root = trie_node_alloc(&new_trie);
3088 
3089 	/* Add each signing ID to the trie */
3090 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3091 	    cursor >= 0;
3092 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3093 		uint32_t sid_size = 0;
3094 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3095 		if (error || sid_size == 0) {
3096 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
3097 			insert_error = EINVAL;
3098 			break;
3099 		}
3100 		if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
3101 			uint16_t new_node_idx;
3102 			error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
3103 			if (error) {
3104 				FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
3105 				insert_error = EINVAL;
3106 				break;
3107 			}
3108 			new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
3109 			if (new_node_idx == NULL_TRIE_IDX) {
3110 				insert_error = EINVAL;
3111 				break;
3112 			}
3113 		} else {
3114 			FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3115 			insert_error = ENOBUFS;
3116 			break;
3117 		}
3118 	}
3119 
3120 	if (!insert_error) {
3121 		group->signing_id_trie = new_trie;
3122 	} else {
3123 		kfree_data(new_trie.memory, trie_memory_size);
3124 	}
3125 
3126 	lck_rw_done(&group->lck);
3127 }
3128 
3129 static int
flow_divert_input(mbuf_t packet,struct flow_divert_group * group)3130 flow_divert_input(mbuf_t packet, struct flow_divert_group *group)
3131 {
3132 	struct flow_divert_packet_header        hdr;
3133 	int                                                                     error           = 0;
3134 	struct flow_divert_pcb                          *fd_cb;
3135 
3136 	if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3137 		FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3138 		error = EINVAL;
3139 		goto done;
3140 	}
3141 
3142 	error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3143 	if (error) {
3144 		FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3145 		error = ENOBUFS;
3146 		goto done;
3147 	}
3148 
3149 	hdr.conn_id = ntohl(hdr.conn_id);
3150 
3151 	if (hdr.conn_id == 0) {
3152 		switch (hdr.packet_type) {
3153 		case FLOW_DIVERT_PKT_GROUP_INIT:
3154 			flow_divert_handle_group_init(group, packet, sizeof(hdr));
3155 			break;
3156 		case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3157 			flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3158 			break;
3159 		default:
3160 			FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3161 			break;
3162 		}
3163 		goto done;
3164 	}
3165 
3166 	fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group);             /* This retains the PCB */
3167 	if (fd_cb == NULL) {
3168 		if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3169 			FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3170 		}
3171 		goto done;
3172 	}
3173 
3174 	switch (hdr.packet_type) {
3175 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
3176 		flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3177 		break;
3178 	case FLOW_DIVERT_PKT_CLOSE:
3179 		flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3180 		break;
3181 	case FLOW_DIVERT_PKT_DATA:
3182 		error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3183 		break;
3184 	case FLOW_DIVERT_PKT_READ_NOTIFY:
3185 		flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3186 		break;
3187 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3188 		flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3189 		break;
3190 	default:
3191 		FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3192 		break;
3193 	}
3194 
3195 	FDRELEASE(fd_cb);
3196 
3197 done:
3198 	mbuf_freem(packet);
3199 	return error;
3200 }
3201 
3202 static void
flow_divert_close_all(struct flow_divert_group * group)3203 flow_divert_close_all(struct flow_divert_group *group)
3204 {
3205 	struct flow_divert_pcb                  *fd_cb;
3206 	SLIST_HEAD(, flow_divert_pcb)   tmp_list;
3207 
3208 	SLIST_INIT(&tmp_list);
3209 
3210 	lck_rw_lock_exclusive(&group->lck);
3211 
3212 	MBUFQ_DRAIN(&group->send_queue);
3213 
3214 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3215 		FDRETAIN(fd_cb);
3216 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3217 	}
3218 
3219 	group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3220 
3221 	lck_rw_done(&group->lck);
3222 
3223 	while (!SLIST_EMPTY(&tmp_list)) {
3224 		fd_cb = SLIST_FIRST(&tmp_list);
3225 		FDLOCK(fd_cb);
3226 		SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3227 		if (fd_cb->so != NULL) {
3228 			socket_lock(fd_cb->so, 0);
3229 			flow_divert_pcb_remove(fd_cb);
3230 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
3231 			fd_cb->so->so_error = ECONNABORTED;
3232 			flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
3233 			socket_unlock(fd_cb->so, 0);
3234 		}
3235 		FDUNLOCK(fd_cb);
3236 		FDRELEASE(fd_cb);
3237 	}
3238 }
3239 
3240 void
flow_divert_detach(struct socket * so)3241 flow_divert_detach(struct socket *so)
3242 {
3243 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3244 
3245 	if (!SO_IS_DIVERTED(so)) {
3246 		return;
3247 	}
3248 
3249 	so->so_flags &= ~SOF_FLOW_DIVERT;
3250 	so->so_fd_pcb = NULL;
3251 
3252 	FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3253 
3254 	if (fd_cb->group != NULL) {
3255 		/* Last-ditch effort to send any buffered data */
3256 		flow_divert_send_buffered_data(fd_cb, TRUE);
3257 
3258 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3259 		flow_divert_send_close_if_needed(fd_cb);
3260 		/* Remove from the group */
3261 		flow_divert_pcb_remove(fd_cb);
3262 	}
3263 
3264 	socket_unlock(so, 0);
3265 	FDLOCK(fd_cb);
3266 	fd_cb->so = NULL;
3267 	FDUNLOCK(fd_cb);
3268 	socket_lock(so, 0);
3269 
3270 	FDRELEASE(fd_cb);       /* Release the socket's reference */
3271 }
3272 
3273 static int
flow_divert_close(struct socket * so)3274 flow_divert_close(struct socket *so)
3275 {
3276 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3277 
3278 	if (!SO_IS_DIVERTED(so)) {
3279 		return EINVAL;
3280 	}
3281 
3282 	FDLOG0(LOG_INFO, fd_cb, "Closing");
3283 
3284 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3285 		soisdisconnecting(so);
3286 		sbflush(&so->so_rcv);
3287 	}
3288 
3289 	flow_divert_send_buffered_data(fd_cb, TRUE);
3290 	flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3291 	flow_divert_send_close_if_needed(fd_cb);
3292 
3293 	/* Remove from the group */
3294 	flow_divert_pcb_remove(fd_cb);
3295 
3296 	return 0;
3297 }
3298 
3299 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3300 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3301     sae_connid_t cid __unused)
3302 {
3303 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3304 		return EINVAL;
3305 	}
3306 
3307 	return flow_divert_close(so);
3308 }
3309 
3310 static int
flow_divert_shutdown(struct socket * so)3311 flow_divert_shutdown(struct socket *so)
3312 {
3313 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3314 
3315 	if (!SO_IS_DIVERTED(so)) {
3316 		return EINVAL;
3317 	}
3318 
3319 	FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3320 
3321 	socantsendmore(so);
3322 
3323 	flow_divert_update_closed_state(fd_cb, SHUT_WR, false, true);
3324 	flow_divert_send_close_if_needed(fd_cb);
3325 
3326 	return 0;
3327 }
3328 
3329 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3330 flow_divert_rcvd(struct socket *so, int flags __unused)
3331 {
3332 	struct flow_divert_pcb  *fd_cb = so->so_fd_pcb;
3333 	int space = 0;
3334 
3335 	if (!SO_IS_DIVERTED(so)) {
3336 		return EINVAL;
3337 	}
3338 
3339 	space = sbspace(&so->so_rcv);
3340 	FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3341 	if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3342 	    (space > 0) &&
3343 	    flow_divert_send_read_notification(fd_cb) == 0) {
3344 		FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3345 		fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3346 	}
3347 
3348 	return 0;
3349 }
3350 
3351 static int
flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet,struct sockaddr * toaddr)3352 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr)
3353 {
3354 	int error = 0;
3355 	int port  = 0;
3356 
3357 	if (!flow_divert_is_sockaddr_valid(toaddr)) {
3358 		FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3359 		error = EINVAL;
3360 		goto done;
3361 	}
3362 
3363 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, toaddr);
3364 	if (error) {
3365 		goto done;
3366 	}
3367 
3368 	if (toaddr->sa_family == AF_INET) {
3369 		port = ntohs((satosin(toaddr))->sin_port);
3370 	} else {
3371 		port = ntohs((satosin6(toaddr))->sin6_port);
3372 	}
3373 
3374 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3375 	if (error) {
3376 		goto done;
3377 	}
3378 
3379 done:
3380 	return error;
3381 }
3382 
3383 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_t buffer)3384 flow_divert_get_buffered_target_address(mbuf_t buffer)
3385 {
3386 	if (buffer != NULL && buffer->m_type == MT_SONAME) {
3387 		struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3388 		if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3389 			return toaddr;
3390 		}
3391 	}
3392 	return NULL;
3393 }
3394 
3395 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3396 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3397 {
3398 	switch (addr->sa_family) {
3399 	case AF_INET:
3400 		if (addr->sa_len < sizeof(struct sockaddr_in)) {
3401 			return FALSE;
3402 		}
3403 		break;
3404 	case AF_INET6:
3405 		if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3406 			return FALSE;
3407 		}
3408 		break;
3409 	default:
3410 		return FALSE;
3411 	}
3412 	return TRUE;
3413 }
3414 
3415 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3416 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3417     struct sockaddr **dup)
3418 {
3419 	int                                             error           = 0;
3420 	struct sockaddr                 *result;
3421 	struct sockaddr_storage ss;
3422 
3423 	if (addr != NULL) {
3424 		result = addr;
3425 	} else {
3426 		memset(&ss, 0, sizeof(ss));
3427 		ss.ss_family = family;
3428 		if (ss.ss_family == AF_INET) {
3429 			ss.ss_len = sizeof(struct sockaddr_in);
3430 		} else if (ss.ss_family == AF_INET6) {
3431 			ss.ss_len = sizeof(struct sockaddr_in6);
3432 		} else {
3433 			error = EINVAL;
3434 		}
3435 		result = (struct sockaddr *)&ss;
3436 	}
3437 
3438 	if (!error) {
3439 		*dup = dup_sockaddr(result, 1);
3440 		if (*dup == NULL) {
3441 			error = ENOBUFS;
3442 		}
3443 	}
3444 
3445 	return error;
3446 }
3447 
3448 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected)3449 flow_divert_disconnect_socket(struct socket *so, bool is_connected)
3450 {
3451 	if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3452 		soisdisconnected(so);
3453 	}
3454 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
3455 		struct inpcb *inp = sotoinpcb(so);
3456 		if (inp != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
3457 			/*
3458 			 * Let NetworkStatistics know this PCB is going away
3459 			 * before we detach it.
3460 			 */
3461 			if (nstat_collect && (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
3462 				nstat_pcb_detach(inp);
3463 			}
3464 
3465 			if (SOCK_DOM(so) == PF_INET6) {
3466 				ROUTE_RELEASE(&inp->in6p_route);
3467 			} else {
3468 				ROUTE_RELEASE(&inp->inp_route);
3469 			}
3470 			inp->inp_state = INPCB_STATE_DEAD;
3471 			/* makes sure we're not called twice from so_close */
3472 			so->so_flags |= SOF_PCBCLEARING;
3473 			inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
3474 		}
3475 	}
3476 }
3477 
3478 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3479 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3480 {
3481 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3482 
3483 	if (!SO_IS_DIVERTED(so)) {
3484 		return EINVAL;
3485 	}
3486 
3487 	if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3488 		if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3489 			flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3490 		}
3491 	}
3492 
3493 	if (SOCK_DOM(so) == PF_INET) {
3494 		return g_tcp_protosw->pr_ctloutput(so, sopt);
3495 	} else if (SOCK_DOM(so) == PF_INET6) {
3496 		return g_tcp6_protosw->pr_ctloutput(so, sopt);
3497 	}
3498 	return 0;
3499 }
3500 
3501 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3502 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3503 {
3504 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3505 	int                                             error   = 0;
3506 	struct inpcb                    *inp    = sotoinpcb(so);
3507 	struct sockaddr_in              *sinp;
3508 	mbuf_t                                  connect_packet = NULL;
3509 	int                                             do_send = 1;
3510 
3511 	if (!SO_IS_DIVERTED(so)) {
3512 		return EINVAL;
3513 	}
3514 
3515 	if (fd_cb->group == NULL) {
3516 		error = ENETUNREACH;
3517 		goto done;
3518 	}
3519 
3520 	if (inp == NULL) {
3521 		error = EINVAL;
3522 		goto done;
3523 	} else if (inp->inp_state == INPCB_STATE_DEAD) {
3524 		if (so->so_error) {
3525 			error = so->so_error;
3526 			so->so_error = 0;
3527 		} else {
3528 			error = EINVAL;
3529 		}
3530 		goto done;
3531 	}
3532 
3533 	if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3534 		error = EALREADY;
3535 		goto done;
3536 	}
3537 
3538 	FDLOG0(LOG_INFO, fd_cb, "Connecting");
3539 
3540 	if (fd_cb->connect_packet == NULL) {
3541 		struct sockaddr_in sin = {};
3542 		struct ifnet *ifp = NULL;
3543 
3544 		if (to == NULL) {
3545 			FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3546 			error = EINVAL;
3547 			goto done;
3548 		}
3549 
3550 		if (!flow_divert_is_sockaddr_valid(to)) {
3551 			FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3552 			error = EINVAL;
3553 			goto done;
3554 		}
3555 
3556 		fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3557 		if (fd_cb->original_remote_endpoint == NULL) {
3558 			FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3559 			error = ENOMEM;
3560 			goto done;
3561 		}
3562 		fd_cb->original_vflag = inp->inp_vflag;
3563 		fd_cb->original_last_outifp = inp->inp_last_outifp;
3564 		fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3565 
3566 		sinp = (struct sockaddr_in *)(void *)to;
3567 		if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3568 			error = EAFNOSUPPORT;
3569 			goto done;
3570 		}
3571 
3572 		if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3573 			struct sockaddr_in6 sin6 = {};
3574 			sin6.sin6_family = AF_INET6;
3575 			sin6.sin6_len = sizeof(struct sockaddr_in6);
3576 			sin6.sin6_port = satosin6(to)->sin6_port;
3577 			sin6.sin6_addr = satosin6(to)->sin6_addr;
3578 			if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3579 				in6_sin6_2_sin(&sin, &sin6);
3580 				to = (struct sockaddr *)&sin;
3581 			}
3582 		}
3583 
3584 		if (to->sa_family == AF_INET6) {
3585 			struct sockaddr_in6 *to6 = satosin6(to);
3586 
3587 			inp->inp_vflag &= ~INP_IPV4;
3588 			inp->inp_vflag |= INP_IPV6;
3589 			fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3590 			fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3591 			fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3592 			error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3593 			if (error) {
3594 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3595 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3596 					error = 0;
3597 				} else {
3598 					goto done;
3599 				}
3600 			}
3601 			if (ifp != NULL) {
3602 				inp->in6p_last_outifp = ifp;
3603 				ifnet_release(ifp);
3604 			}
3605 
3606 			if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3607 			    in6_embedded_scope &&
3608 			    fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3609 				fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3610 				fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3611 			}
3612 
3613 			if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3614 			    in6_embedded_scope &&
3615 			    to6->sin6_addr.s6_addr16[1] != 0) {
3616 				to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3617 				to6->sin6_addr.s6_addr16[1] = 0;
3618 			}
3619 		} else if (to->sa_family == AF_INET) {
3620 			inp->inp_vflag |= INP_IPV4;
3621 			inp->inp_vflag &= ~INP_IPV6;
3622 			fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3623 			fd_cb->local_endpoint.sin.sin_family = AF_INET;
3624 			fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3625 			error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3626 			if (error) {
3627 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3628 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3629 					error = 0;
3630 				} else {
3631 					goto done;
3632 				}
3633 			}
3634 			if (ifp != NULL) {
3635 				inp->inp_last_outifp = ifp;
3636 				ifnet_release(ifp);
3637 			}
3638 		} else {
3639 			FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3640 		}
3641 
3642 		error = flow_divert_check_no_cellular(fd_cb) ||
3643 		    flow_divert_check_no_expensive(fd_cb) ||
3644 		    flow_divert_check_no_constrained(fd_cb);
3645 		if (error) {
3646 			goto done;
3647 		}
3648 
3649 		if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3650 		    !implicit || /* connect() was called or */
3651 		    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3652 		    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3653 			fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3654 		}
3655 
3656 		error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3657 		if (error) {
3658 			goto done;
3659 		}
3660 
3661 		if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3662 			flow_divert_set_remote_endpoint(fd_cb, to);
3663 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3664 		}
3665 
3666 		if (implicit) {
3667 			fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3668 		}
3669 
3670 		if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3671 			FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3672 			do_send = 0;
3673 		}
3674 
3675 		fd_cb->connect_packet = connect_packet;
3676 		connect_packet = NULL;
3677 	} else {
3678 		FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3679 	}
3680 
3681 	if (do_send) {
3682 		error = flow_divert_send_connect_packet(fd_cb);
3683 		if (error) {
3684 			goto done;
3685 		}
3686 
3687 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3688 	}
3689 
3690 	if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3691 		soisconnected(so);
3692 	} else {
3693 		soisconnecting(so);
3694 	}
3695 
3696 done:
3697 	return error;
3698 }
3699 
3700 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3701 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3702 {
3703 #if CONTENT_FILTER
3704 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3705 		int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3706 		if (error != 0) {
3707 			struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3708 			FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3709 			return error;
3710 		}
3711 	}
3712 #endif /* CONTENT_FILTER */
3713 
3714 	return flow_divert_connect_out_internal(so, to, p, false);
3715 }
3716 
3717 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3718 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3719     struct proc *p, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3720 {
3721 	struct inpcb *inp = sotoinpcb(so);
3722 	int error;
3723 
3724 	if (inp == NULL) {
3725 		return EINVAL;
3726 	}
3727 
3728 	VERIFY(dst != NULL);
3729 
3730 #if CONTENT_FILTER && NECP
3731 	struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3732 	if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3733 	    SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3734 		inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3735 	}
3736 #endif /* CONTENT_FILTER */
3737 
3738 	error = flow_divert_connect_out(so, dst, p);
3739 
3740 	if (error != 0) {
3741 		return error;
3742 	}
3743 
3744 	/* if there is data, send it */
3745 	if (auio != NULL) {
3746 		user_ssize_t datalen = 0;
3747 
3748 		socket_unlock(so, 0);
3749 
3750 		VERIFY(bytes_written != NULL);
3751 
3752 		datalen = uio_resid(auio);
3753 		error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3754 		socket_lock(so, 0);
3755 
3756 		if (error == 0 || error == EWOULDBLOCK) {
3757 			*bytes_written = datalen - uio_resid(auio);
3758 		}
3759 
3760 		/*
3761 		 * sosend returns EWOULDBLOCK if it's a non-blocking
3762 		 * socket or a timeout occured (this allows to return
3763 		 * the amount of queued data through sendit()).
3764 		 *
3765 		 * However, connectx() returns EINPROGRESS in case of a
3766 		 * blocking socket. So we change the return value here.
3767 		 */
3768 		if (error == EWOULDBLOCK) {
3769 			error = EINPROGRESS;
3770 		}
3771 	}
3772 
3773 	if (error == 0 && pcid != NULL) {
3774 		*pcid = 1;      /* there is only 1 connection for a TCP */
3775 	}
3776 
3777 	return error;
3778 }
3779 
3780 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope __unused,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3781 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3782     struct sockaddr *dst, struct proc *p, uint32_t ifscope __unused,
3783     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3784     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3785 {
3786 	return flow_divert_connectx_out_common(so, dst, p, pcid, uio, bytes_written);
3787 }
3788 
3789 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope __unused,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3790 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3791     struct sockaddr *dst, struct proc *p, uint32_t ifscope __unused,
3792     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3793     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3794 {
3795 	return flow_divert_connectx_out_common(so, dst, p, pcid, uio, bytes_written);
3796 }
3797 
3798 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)3799 flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
3800 {
3801 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3802 	int                                             error   = 0;
3803 	struct inpcb *inp;
3804 #if CONTENT_FILTER
3805 	struct m_tag *cfil_tag = NULL;
3806 #endif
3807 
3808 	if (!SO_IS_DIVERTED(so)) {
3809 		return EINVAL;
3810 	}
3811 
3812 	inp = sotoinpcb(so);
3813 	if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3814 		error = ECONNRESET;
3815 		goto done;
3816 	}
3817 
3818 	if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
3819 		/* The provider considers this datagram flow to be closed, so no data can be sent */
3820 		FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
3821 		error = EHOSTUNREACH;
3822 		goto done;
3823 	}
3824 
3825 #if CONTENT_FILTER
3826 	/*
3827 	 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
3828 	 * retrieve the CFIL saved remote address from the mbuf and use it.
3829 	 */
3830 	if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
3831 		struct sockaddr *cfil_faddr = NULL;
3832 		cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
3833 		if (cfil_tag) {
3834 			to = (struct sockaddr *)(void *)cfil_faddr;
3835 		}
3836 		FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
3837 	}
3838 #endif
3839 
3840 	/* Implicit connect */
3841 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
3842 		FDLOG0(LOG_INFO, fd_cb, "implicit connect");
3843 
3844 		error = flow_divert_connect_out_internal(so, to, p, true);
3845 		if (error) {
3846 			goto done;
3847 		}
3848 	} else {
3849 		error = flow_divert_check_no_cellular(fd_cb) ||
3850 		    flow_divert_check_no_expensive(fd_cb) ||
3851 		    flow_divert_check_no_constrained(fd_cb);
3852 		if (error) {
3853 			goto done;
3854 		}
3855 	}
3856 
3857 	FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", mbuf_pkthdr_len(data));
3858 
3859 	fd_cb->bytes_written_by_app += mbuf_pkthdr_len(data);
3860 	error = flow_divert_send_app_data(fd_cb, data, to);
3861 
3862 	data = NULL;
3863 
3864 	if (error) {
3865 		goto done;
3866 	}
3867 
3868 	if (flags & PRUS_EOF) {
3869 		flow_divert_shutdown(so);
3870 	}
3871 
3872 done:
3873 	if (data) {
3874 		mbuf_freem(data);
3875 	}
3876 	if (control) {
3877 		mbuf_free(control);
3878 	}
3879 #if CONTENT_FILTER
3880 	if (cfil_tag) {
3881 		m_tag_free(cfil_tag);
3882 	}
3883 #endif
3884 
3885 	return error;
3886 }
3887 
3888 static int
flow_divert_preconnect(struct socket * so)3889 flow_divert_preconnect(struct socket *so)
3890 {
3891 	int error = 0;
3892 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3893 
3894 	if (!SO_IS_DIVERTED(so)) {
3895 		return EINVAL;
3896 	}
3897 
3898 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
3899 		FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
3900 		error = flow_divert_send_connect_packet(so->so_fd_pcb);
3901 		if (error) {
3902 			return error;
3903 		}
3904 
3905 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3906 	}
3907 
3908 	soclearfastopen(so);
3909 
3910 	return error;
3911 }
3912 
3913 static void
flow_divert_set_protosw(struct socket * so)3914 flow_divert_set_protosw(struct socket *so)
3915 {
3916 	if (SOCK_DOM(so) == PF_INET) {
3917 		so->so_proto = &g_flow_divert_in_protosw;
3918 	} else {
3919 		so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
3920 	}
3921 }
3922 
3923 static void
flow_divert_set_udp_protosw(struct socket * so)3924 flow_divert_set_udp_protosw(struct socket *so)
3925 {
3926 	if (SOCK_DOM(so) == PF_INET) {
3927 		so->so_proto = &g_flow_divert_in_udp_protosw;
3928 	} else {
3929 		so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
3930 	}
3931 }
3932 
3933 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)3934 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
3935 {
3936 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3937 	struct inpcb *inp;
3938 	int error = 0;
3939 
3940 	inp = sotoinpcb(so);
3941 	if (inp == NULL) {
3942 		return EINVAL;
3943 	}
3944 
3945 	if (fd_cb == NULL) {
3946 		error = flow_divert_pcb_init(so);
3947 		fd_cb  = so->so_fd_pcb;
3948 		if (error != 0 || fd_cb == NULL) {
3949 			goto done;
3950 		}
3951 	}
3952 	return flow_divert_data_out(so, flags, data, to, control, p);
3953 
3954 done:
3955 	if (data) {
3956 		mbuf_freem(data);
3957 	}
3958 	if (control) {
3959 		mbuf_free(control);
3960 	}
3961 
3962 	return error;
3963 }
3964 
3965 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)3966 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
3967 {
3968 	errno_t error = 0;
3969 	struct flow_divert_pcb *fd_cb = NULL;
3970 	uint32_t agg_unit = aggregate_unit;
3971 	uint32_t policy_control_unit = ctl_unit;
3972 	bool is_aggregate = false;
3973 
3974 	if (so->so_flags & SOF_FLOW_DIVERT) {
3975 		return EALREADY;
3976 	}
3977 
3978 	fd_cb = flow_divert_pcb_create(so);
3979 	if (fd_cb == NULL) {
3980 		return ENOMEM;
3981 	}
3982 
3983 	do {
3984 		uint32_t group_unit = flow_divert_derive_kernel_control_unit(&policy_control_unit, &agg_unit, &is_aggregate);
3985 		if (group_unit == 0 || group_unit >= GROUP_COUNT_MAX) {
3986 			FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
3987 			error = EINVAL;
3988 			break;
3989 		}
3990 
3991 		error = flow_divert_add_to_group(fd_cb, group_unit);
3992 		if (error == 0) {
3993 			so->so_fd_pcb = fd_cb;
3994 			so->so_flags |= SOF_FLOW_DIVERT;
3995 			fd_cb->control_group_unit = group_unit;
3996 			fd_cb->policy_control_unit = ctl_unit;
3997 			fd_cb->aggregate_unit = agg_unit;
3998 			if (is_aggregate) {
3999 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4000 			} else {
4001 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4002 			}
4003 
4004 			if (SOCK_TYPE(so) == SOCK_STREAM) {
4005 				flow_divert_set_protosw(so);
4006 			} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
4007 				flow_divert_set_udp_protosw(so);
4008 			}
4009 
4010 			FDLOG0(LOG_INFO, fd_cb, "Created");
4011 		} else if (error != ENOENT) {
4012 			FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
4013 		}
4014 	} while (error == ENOENT);
4015 
4016 	if (error != 0) {
4017 		FDRELEASE(fd_cb);
4018 	}
4019 
4020 	return error;
4021 }
4022 
4023 errno_t
flow_divert_pcb_init(struct socket * so)4024 flow_divert_pcb_init(struct socket *so)
4025 {
4026 	struct inpcb *inp = sotoinpcb(so);
4027 	uint32_t aggregate_units = 0;
4028 	uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
4029 	return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
4030 }
4031 
4032 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)4033 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
4034 {
4035 	uint32_t ctl_unit = 0;
4036 	uint32_t key_unit = 0;
4037 	uint32_t aggregate_unit = 0;
4038 	int error = 0;
4039 	int hmac_error = 0;
4040 	mbuf_t token = NULL;
4041 
4042 	if (so->so_flags & SOF_FLOW_DIVERT) {
4043 		error = EALREADY;
4044 		goto done;
4045 	}
4046 
4047 	if (g_init_result) {
4048 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
4049 		error = ENOPROTOOPT;
4050 		goto done;
4051 	}
4052 
4053 	if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
4054 	    (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
4055 	    (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
4056 		error = EINVAL;
4057 		goto done;
4058 	} else {
4059 		if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
4060 			struct tcpcb *tp = sototcpcb(so);
4061 			if (tp == NULL || tp->t_state != TCPS_CLOSED) {
4062 				error = EINVAL;
4063 				goto done;
4064 			}
4065 		}
4066 	}
4067 
4068 	error = soopt_getm(sopt, &token);
4069 	if (error) {
4070 		token = NULL;
4071 		goto done;
4072 	}
4073 
4074 	error = soopt_mcopyin(sopt, token);
4075 	if (error) {
4076 		token = NULL;
4077 		goto done;
4078 	}
4079 
4080 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
4081 	if (!error) {
4082 		key_unit = ntohl(key_unit);
4083 		if (key_unit >= GROUP_COUNT_MAX) {
4084 			key_unit = 0;
4085 		}
4086 	} else if (error != ENOENT) {
4087 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
4088 		goto done;
4089 	} else {
4090 		key_unit = 0;
4091 	}
4092 
4093 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
4094 	if (error) {
4095 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
4096 		goto done;
4097 	}
4098 
4099 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4100 	if (error && error != ENOENT) {
4101 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4102 		goto done;
4103 	}
4104 
4105 	/* A valid kernel control unit is required */
4106 	ctl_unit = ntohl(ctl_unit);
4107 	aggregate_unit = ntohl(aggregate_unit);
4108 
4109 	if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4110 		hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4111 		if (hmac_error && hmac_error != ENOENT) {
4112 			FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4113 			error = hmac_error;
4114 			goto done;
4115 		}
4116 	}
4117 
4118 	error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4119 	if (error == 0) {
4120 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4121 		int log_level = LOG_NOTICE;
4122 
4123 		error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4124 		if (error == 0) {
4125 			fd_cb->log_level = (uint8_t)log_level;
4126 		}
4127 		error = 0;
4128 
4129 		fd_cb->connect_token = token;
4130 		token = NULL;
4131 
4132 		fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4133 	}
4134 
4135 	if (hmac_error == 0) {
4136 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4137 		if (fd_cb != NULL) {
4138 			fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4139 		}
4140 	}
4141 
4142 done:
4143 	if (token != NULL) {
4144 		mbuf_freem(token);
4145 	}
4146 
4147 	return error;
4148 }
4149 
4150 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4151 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4152 {
4153 	uint32_t                                        ctl_unit;
4154 	int                                                     error                                           = 0;
4155 	uint8_t                                         hmac[SHA_DIGEST_LENGTH];
4156 	struct flow_divert_pcb          *fd_cb                                          = so->so_fd_pcb;
4157 	mbuf_t                                          token                                           = NULL;
4158 	struct flow_divert_group        *control_group                          = NULL;
4159 
4160 	if (!SO_IS_DIVERTED(so)) {
4161 		error = EINVAL;
4162 		goto done;
4163 	}
4164 
4165 	if (fd_cb->group == NULL) {
4166 		error = EINVAL;
4167 		goto done;
4168 	}
4169 
4170 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4171 	if (error) {
4172 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4173 		goto done;
4174 	}
4175 
4176 	ctl_unit = htonl(fd_cb->group->ctl_unit);
4177 
4178 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4179 	if (error) {
4180 		goto done;
4181 	}
4182 
4183 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4184 	if (error) {
4185 		goto done;
4186 	}
4187 
4188 	if (fd_cb->app_data != NULL) {
4189 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4190 		if (error) {
4191 			goto done;
4192 		}
4193 	}
4194 
4195 	control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4196 	if (control_group != NULL) {
4197 		lck_rw_lock_shared(&control_group->lck);
4198 		ctl_unit = htonl(control_group->ctl_unit);
4199 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4200 		if (!error) {
4201 			error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4202 		}
4203 		lck_rw_done(&control_group->lck);
4204 		FDGRP_RELEASE(control_group);
4205 	} else {
4206 		error = ENOPROTOOPT;
4207 	}
4208 
4209 	if (error) {
4210 		goto done;
4211 	}
4212 
4213 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4214 	if (error) {
4215 		goto done;
4216 	}
4217 
4218 	if (sopt->sopt_val == USER_ADDR_NULL) {
4219 		/* If the caller passed NULL to getsockopt, just set the size of the token and return */
4220 		sopt->sopt_valsize = mbuf_pkthdr_len(token);
4221 		goto done;
4222 	}
4223 
4224 	error = soopt_mcopyout(sopt, token);
4225 	if (error) {
4226 		token = NULL;   /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4227 		goto done;
4228 	}
4229 
4230 done:
4231 	if (token != NULL) {
4232 		mbuf_freem(token);
4233 	}
4234 
4235 	return error;
4236 }
4237 
4238 void
flow_divert_group_destroy(struct flow_divert_group * group)4239 flow_divert_group_destroy(struct flow_divert_group *group)
4240 {
4241 	lck_rw_lock_exclusive(&group->lck);
4242 
4243 	FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4244 
4245 	if (group->token_key != NULL) {
4246 		memset(group->token_key, 0, group->token_key_size);
4247 		kfree_data(group->token_key, group->token_key_size);
4248 		group->token_key = NULL;
4249 		group->token_key_size = 0;
4250 	}
4251 
4252 	/* Re-set the current trie */
4253 	if (group->signing_id_trie.memory != NULL) {
4254 		kfree_data_addr(group->signing_id_trie.memory);
4255 	}
4256 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4257 	group->signing_id_trie.root = NULL_TRIE_IDX;
4258 
4259 	lck_rw_done(&group->lck);
4260 
4261 	zfree(flow_divert_group_zone, group);
4262 }
4263 
4264 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4265 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4266 {
4267 	struct flow_divert_group        *new_group      = NULL;
4268 	int                             error           = 0;
4269 
4270 	if (sac->sc_unit >= GROUP_COUNT_MAX) {
4271 		error = EINVAL;
4272 		goto done;
4273 	}
4274 
4275 	*unitinfo = NULL;
4276 
4277 	new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4278 	lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4279 	RB_INIT(&new_group->pcb_tree);
4280 	new_group->ctl_unit = sac->sc_unit;
4281 	MBUFQ_INIT(&new_group->send_queue);
4282 	new_group->signing_id_trie.root = NULL_TRIE_IDX;
4283 	new_group->ref_count = 1;
4284 
4285 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4286 
4287 	if (g_flow_divert_groups == NULL) {
4288 		g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4289 		    GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4290 	}
4291 
4292 	if (g_flow_divert_groups[sac->sc_unit] != NULL) {
4293 		error = EALREADY;
4294 	} else {
4295 		g_flow_divert_groups[sac->sc_unit] = new_group;
4296 		g_active_group_count++;
4297 	}
4298 
4299 	lck_rw_done(&g_flow_divert_group_lck);
4300 
4301 done:
4302 	if (error == 0) {
4303 		*unitinfo = new_group;
4304 	} else if (new_group != NULL) {
4305 		zfree(flow_divert_group_zone, new_group);
4306 	}
4307 	return error;
4308 }
4309 
4310 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4311 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4312 {
4313 	struct flow_divert_group        *group  = NULL;
4314 	errno_t                                         error   = 0;
4315 
4316 	if (unit >= GROUP_COUNT_MAX) {
4317 		return EINVAL;
4318 	}
4319 
4320 	if (unitinfo == NULL) {
4321 		return 0;
4322 	}
4323 
4324 	FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4325 
4326 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4327 
4328 	if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
4329 		panic("flow divert group %u is disconnecting, but no groups are active (groups = %p, active count = %u", unit,
4330 		    g_flow_divert_groups, g_active_group_count);
4331 	}
4332 
4333 	group = g_flow_divert_groups[unit];
4334 
4335 	if (group != (struct flow_divert_group *)unitinfo) {
4336 		panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4337 	}
4338 
4339 	g_flow_divert_groups[unit] = NULL;
4340 	g_active_group_count--;
4341 
4342 	if (g_active_group_count == 0) {
4343 		kfree_type(struct flow_divert_group *,
4344 		    GROUP_COUNT_MAX, g_flow_divert_groups);
4345 		g_flow_divert_groups = NULL;
4346 	}
4347 
4348 	lck_rw_done(&g_flow_divert_group_lck);
4349 
4350 	if (group != NULL) {
4351 		flow_divert_close_all(group);
4352 		FDGRP_RELEASE(group);
4353 	} else {
4354 		error = EINVAL;
4355 	}
4356 
4357 	return error;
4358 }
4359 
4360 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)4361 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_t m, __unused int flags)
4362 {
4363 	errno_t error = 0;
4364 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4365 	if (group != NULL) {
4366 		error = flow_divert_input(m, group);
4367 		FDGRP_RELEASE(group);
4368 	} else {
4369 		error = ENOENT;
4370 	}
4371 	return error;
4372 }
4373 
4374 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4375 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4376 {
4377 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4378 	if (group == NULL) {
4379 		return;
4380 	}
4381 
4382 	if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4383 		struct flow_divert_pcb                  *fd_cb;
4384 		SLIST_HEAD(, flow_divert_pcb)   tmp_list;
4385 
4386 		lck_rw_lock_exclusive(&group->lck);
4387 
4388 		while (!MBUFQ_EMPTY(&group->send_queue)) {
4389 			mbuf_t next_packet;
4390 			FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4391 			next_packet = MBUFQ_FIRST(&group->send_queue);
4392 			int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4393 			if (error) {
4394 				FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_kctl_rcvd: ctl_enqueuembuf returned an error: %d", error);
4395 				OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4396 				lck_rw_done(&group->lck);
4397 				return;
4398 			}
4399 			MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4400 		}
4401 
4402 		SLIST_INIT(&tmp_list);
4403 
4404 		RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4405 			FDRETAIN(fd_cb);
4406 			SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4407 		}
4408 
4409 		lck_rw_done(&group->lck);
4410 
4411 		SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4412 			FDLOCK(fd_cb);
4413 			if (fd_cb->so != NULL) {
4414 				socket_lock(fd_cb->so, 0);
4415 				if (fd_cb->group != NULL) {
4416 					flow_divert_send_buffered_data(fd_cb, FALSE);
4417 				}
4418 				socket_unlock(fd_cb->so, 0);
4419 			}
4420 			FDUNLOCK(fd_cb);
4421 			FDRELEASE(fd_cb);
4422 		}
4423 	}
4424 
4425 	FDGRP_RELEASE(group);
4426 }
4427 
4428 static int
flow_divert_kctl_init(void)4429 flow_divert_kctl_init(void)
4430 {
4431 	struct kern_ctl_reg     ctl_reg;
4432 	int                     result;
4433 
4434 	memset(&ctl_reg, 0, sizeof(ctl_reg));
4435 
4436 	strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4437 	ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4438 	ctl_reg.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
4439 	ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4440 
4441 	ctl_reg.ctl_connect = flow_divert_kctl_connect;
4442 	ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4443 	ctl_reg.ctl_send = flow_divert_kctl_send;
4444 	ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4445 
4446 	result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4447 
4448 	if (result) {
4449 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4450 		return result;
4451 	}
4452 
4453 	return 0;
4454 }
4455 
4456 void
flow_divert_init(void)4457 flow_divert_init(void)
4458 {
4459 	memset(&nil_pcb, 0, sizeof(nil_pcb));
4460 	nil_pcb.log_level = LOG_NOTICE;
4461 
4462 	g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4463 
4464 	VERIFY(g_tcp_protosw != NULL);
4465 
4466 	memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4467 	memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4468 
4469 	g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4470 	g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4471 	g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4472 	g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4473 	g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4474 	g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4475 	g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4476 	g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4477 
4478 	g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4479 	g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4480 
4481 	/*
4482 	 * Socket filters shouldn't attach/detach to/from this protosw
4483 	 * since pr_protosw is to be used instead, which points to the
4484 	 * real protocol; if they do, it is a bug and we should panic.
4485 	 */
4486 	g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4487 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4488 	g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4489 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4490 
4491 	/* UDP */
4492 	g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4493 	VERIFY(g_udp_protosw != NULL);
4494 
4495 	memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4496 	memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4497 
4498 	g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4499 	g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4500 	g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4501 	g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4502 	g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4503 	g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4504 	g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4505 	g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4506 	g_flow_divert_in_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp;
4507 	g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4508 
4509 	g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4510 	g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4511 
4512 	/*
4513 	 * Socket filters shouldn't attach/detach to/from this protosw
4514 	 * since pr_protosw is to be used instead, which points to the
4515 	 * real protocol; if they do, it is a bug and we should panic.
4516 	 */
4517 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4518 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4519 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4520 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4521 
4522 	g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4523 
4524 	VERIFY(g_tcp6_protosw != NULL);
4525 
4526 	memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4527 	memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4528 
4529 	g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4530 	g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4531 	g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4532 	g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4533 	g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4534 	g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4535 	g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4536 	g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4537 
4538 	g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4539 	g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4540 	/*
4541 	 * Socket filters shouldn't attach/detach to/from this protosw
4542 	 * since pr_protosw is to be used instead, which points to the
4543 	 * real protocol; if they do, it is a bug and we should panic.
4544 	 */
4545 	g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4546 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4547 	g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4548 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4549 
4550 	/* UDP6 */
4551 	g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4552 
4553 	VERIFY(g_udp6_protosw != NULL);
4554 
4555 	memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4556 	memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4557 
4558 	g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4559 	g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4560 	g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4561 	g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4562 	g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4563 	g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4564 	g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4565 	g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4566 	g_flow_divert_in6_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp;
4567 	g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4568 
4569 	g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4570 	g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4571 	/*
4572 	 * Socket filters shouldn't attach/detach to/from this protosw
4573 	 * since pr_protosw is to be used instead, which points to the
4574 	 * real protocol; if they do, it is a bug and we should panic.
4575 	 */
4576 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4577 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4578 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4579 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4580 
4581 	g_init_result = flow_divert_kctl_init();
4582 	if (g_init_result) {
4583 		goto done;
4584 	}
4585 
4586 done:
4587 	if (g_init_result != 0) {
4588 		if (g_flow_divert_kctl_ref != NULL) {
4589 			ctl_deregister(g_flow_divert_kctl_ref);
4590 			g_flow_divert_kctl_ref = NULL;
4591 		}
4592 	}
4593 }
4594