xref: /xnu-8020.140.41/bsd/netinet/flow_divert.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2012-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <libkern/tree.h>
46 #include <kern/locks.h>
47 #include <kern/debug.h>
48 #include <kern/task.h>
49 #include <mach/task_info.h>
50 #include <net/if_var.h>
51 #include <net/route.h>
52 #include <net/flowhash.h>
53 #include <net/ntstat.h>
54 #include <net/content_filter.h>
55 #include <net/necp.h>
56 #include <netinet/in.h>
57 #include <netinet/in_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_fsm.h>
61 #include <netinet/flow_divert.h>
62 #include <netinet/flow_divert_proto.h>
63 #include <netinet6/in6_pcb.h>
64 #include <netinet6/ip6protosw.h>
65 #include <dev/random/randomdev.h>
66 #include <libkern/crypto/sha1.h>
67 #include <libkern/crypto/crypto_internal.h>
68 #include <os/log.h>
69 #include <corecrypto/cc.h>
70 #if CONTENT_FILTER
71 #include <net/content_filter.h>
72 #endif /* CONTENT_FILTER */
73 
74 #define FLOW_DIVERT_CONNECT_STARTED             0x00000001
75 #define FLOW_DIVERT_READ_CLOSED                 0x00000002
76 #define FLOW_DIVERT_WRITE_CLOSED                0x00000004
77 #define FLOW_DIVERT_TUNNEL_RD_CLOSED    0x00000008
78 #define FLOW_DIVERT_TUNNEL_WR_CLOSED    0x00000010
79 #define FLOW_DIVERT_HAS_HMAC            0x00000040
80 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED  0x00000080
81 #define FLOW_DIVERT_IMPLICIT_CONNECT    0x00000100
82 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR  0x00000200
83 #define FLOW_DIVERT_HAS_TOKEN           0x00000400
84 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR 0x00000800
85 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT   0x00001000
86 
87 #define FDLOG(level, pcb, format, ...) \
88 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
89 
90 #define FDLOG0(level, pcb, msg) \
91 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
92 
93 #define FDRETAIN(pcb)                   if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
94 #define FDRELEASE(pcb)                                                                                                          \
95 	do {                                                                                                                                    \
96 	        if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) {       \
97 	                flow_divert_pcb_destroy(pcb);                                                                   \
98 	        }                                                                                                                                       \
99 	} while (0)
100 
101 #define FDLOCK(pcb)                                             lck_mtx_lock(&(pcb)->mtx)
102 #define FDUNLOCK(pcb)                                   lck_mtx_unlock(&(pcb)->mtx)
103 
104 #define FD_CTL_SENDBUFF_SIZE                    (128 * 1024)
105 
106 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED   0
107 
108 #define GROUP_COUNT_MAX                                 31
109 #define FLOW_DIVERT_MAX_NAME_SIZE               4096
110 #define FLOW_DIVERT_MAX_KEY_SIZE                1024
111 #define FLOW_DIVERT_MAX_TRIE_MEMORY             (1024 * 1024)
112 
113 struct flow_divert_trie_node {
114 	uint16_t start;
115 	uint16_t length;
116 	uint16_t child_map;
117 };
118 
119 #define CHILD_MAP_SIZE                  256
120 #define NULL_TRIE_IDX                   0xffff
121 #define TRIE_NODE(t, i)                 ((t)->nodes[(i)])
122 #define TRIE_CHILD(t, i, b)             (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
123 #define TRIE_BYTE(t, i)                 ((t)->bytes[(i)])
124 
125 static struct flow_divert_pcb           nil_pcb;
126 
127 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
128 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
129 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
130     &flow_divert_mtx_attr);
131 
132 static struct flow_divert_group         **g_flow_divert_groups  = NULL;
133 static uint32_t                         g_active_group_count    = 0;
134 
135 static  errno_t                         g_init_result           = 0;
136 
137 static  kern_ctl_ref                    g_flow_divert_kctl_ref  = NULL;
138 
139 static struct protosw                   g_flow_divert_in_protosw;
140 static struct pr_usrreqs                g_flow_divert_in_usrreqs;
141 static struct protosw                   g_flow_divert_in_udp_protosw;
142 static struct pr_usrreqs                g_flow_divert_in_udp_usrreqs;
143 static struct ip6protosw                g_flow_divert_in6_protosw;
144 static struct pr_usrreqs                g_flow_divert_in6_usrreqs;
145 static struct ip6protosw                g_flow_divert_in6_udp_protosw;
146 static struct pr_usrreqs                g_flow_divert_in6_udp_usrreqs;
147 
148 static struct protosw                   *g_tcp_protosw          = NULL;
149 static struct ip6protosw                *g_tcp6_protosw         = NULL;
150 static struct protosw                   *g_udp_protosw          = NULL;
151 static struct ip6protosw                *g_udp6_protosw         = NULL;
152 
153 ZONE_DEFINE(flow_divert_group_zone, "flow_divert_group",
154     sizeof(struct flow_divert_group), ZC_ZFREE_CLEARMEM);
155 ZONE_DEFINE(flow_divert_pcb_zone, "flow_divert_pcb",
156     sizeof(struct flow_divert_pcb), ZC_ZFREE_CLEARMEM);
157 
158 static errno_t
159 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
160 
161 static boolean_t
162 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
163 
164 static int
165 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr);
166 
167 struct sockaddr *
168 flow_divert_get_buffered_target_address(mbuf_t buffer);
169 
170 static void
171 flow_divert_disconnect_socket(struct socket *so, bool is_connected);
172 
173 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)174 flow_divert_syslog_type_to_oslog_type(int syslog_type)
175 {
176 	switch (syslog_type) {
177 	case LOG_ERR: return OS_LOG_TYPE_ERROR;
178 	case LOG_INFO: return OS_LOG_TYPE_INFO;
179 	case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
180 	default: return OS_LOG_TYPE_DEFAULT;
181 	}
182 }
183 
184 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)185 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
186 {
187 	return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
188 }
189 
190 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
191 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
192 
193 static const char *
flow_divert_packet_type2str(uint8_t packet_type)194 flow_divert_packet_type2str(uint8_t packet_type)
195 {
196 	switch (packet_type) {
197 	case FLOW_DIVERT_PKT_CONNECT:
198 		return "connect";
199 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
200 		return "connect result";
201 	case FLOW_DIVERT_PKT_DATA:
202 		return "data";
203 	case FLOW_DIVERT_PKT_CLOSE:
204 		return "close";
205 	case FLOW_DIVERT_PKT_READ_NOTIFY:
206 		return "read notification";
207 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
208 		return "properties update";
209 	case FLOW_DIVERT_PKT_APP_MAP_CREATE:
210 		return "app map create";
211 	default:
212 		return "unknown";
213 	}
214 }
215 
216 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)217 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
218 {
219 	struct flow_divert_pcb  key_item;
220 	struct flow_divert_pcb  *fd_cb          = NULL;
221 
222 	key_item.hash = hash;
223 
224 	lck_rw_lock_shared(&group->lck);
225 	fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
226 	FDRETAIN(fd_cb);
227 	lck_rw_done(&group->lck);
228 
229 	return fd_cb;
230 }
231 
232 static errno_t
flow_divert_pcb_insert(struct socket * so,struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)233 flow_divert_pcb_insert(struct socket *so, struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
234 {
235 	errno_t                                                 error                                           = 0;
236 	struct                                          flow_divert_pcb *exist          = NULL;
237 	struct flow_divert_group        *group;
238 	static uint32_t                         g_nextkey                                       = 1;
239 	static uint32_t                         g_hash_seed                                     = 0;
240 	int                                                     try_count                                       = 0;
241 
242 	if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) {
243 		return EINVAL;
244 	}
245 
246 	socket_unlock(so, 0);
247 	lck_rw_lock_shared(&g_flow_divert_group_lck);
248 
249 	if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
250 		FDLOG0(LOG_ERR, &nil_pcb, "No active groups, flow divert cannot be used for this socket");
251 		error = ENETUNREACH;
252 		goto done;
253 	}
254 
255 	group = g_flow_divert_groups[ctl_unit];
256 	if (group == NULL) {
257 		FDLOG(LOG_ERR, &nil_pcb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
258 		error = ENETUNREACH;
259 		goto done;
260 	}
261 
262 	socket_lock(so, 0);
263 	if (!(so->so_flags & SOF_FLOW_DIVERT)) {
264 		error = EINVAL;
265 		goto unlock;
266 	}
267 
268 	do {
269 		uint32_t        key[2];
270 		uint32_t        idx;
271 
272 		key[0] = g_nextkey++;
273 		key[1] = RandomULong();
274 
275 		if (g_hash_seed == 0) {
276 			g_hash_seed = RandomULong();
277 		}
278 
279 		fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
280 
281 		for (idx = 1; idx < GROUP_COUNT_MAX; idx++) {
282 			struct flow_divert_group *curr_group = g_flow_divert_groups[idx];
283 			if (curr_group != NULL && curr_group != group) {
284 				lck_rw_lock_shared(&curr_group->lck);
285 				exist = RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb);
286 				lck_rw_done(&curr_group->lck);
287 				if (exist != NULL) {
288 					break;
289 				}
290 			}
291 		}
292 
293 		if (exist == NULL) {
294 			lck_rw_lock_exclusive(&group->lck);
295 			exist = RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb);
296 			lck_rw_done(&group->lck);
297 		}
298 	} while (exist != NULL && try_count++ < 3);
299 
300 	if (exist == NULL) {
301 		fd_cb->group = group;
302 		FDRETAIN(fd_cb);                /* The group now has a reference */
303 	} else {
304 		fd_cb->hash = 0;
305 		error = EEXIST;
306 	}
307 
308 unlock:
309 	socket_unlock(so, 0);
310 
311 done:
312 	lck_rw_done(&g_flow_divert_group_lck);
313 	socket_lock(so, 0);
314 
315 	if (!(so->so_flags & SOF_FLOW_DIVERT)) {
316 		error = EINVAL;
317 	}
318 
319 	return error;
320 }
321 
322 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)323 flow_divert_pcb_create(socket_t so)
324 {
325 	struct flow_divert_pcb  *new_pcb = NULL;
326 
327 	new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
328 	lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
329 	new_pcb->so = so;
330 	new_pcb->log_level = nil_pcb.log_level;
331 
332 	FDRETAIN(new_pcb);      /* Represents the socket's reference */
333 
334 	return new_pcb;
335 }
336 
337 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)338 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
339 {
340 	FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %u, tunnel tx %u, tunnel rx %u",
341 	    fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
342 
343 	if (fd_cb->connect_token != NULL) {
344 		mbuf_freem(fd_cb->connect_token);
345 	}
346 	if (fd_cb->connect_packet != NULL) {
347 		mbuf_freem(fd_cb->connect_packet);
348 	}
349 	if (fd_cb->app_data != NULL) {
350 		kfree_data(fd_cb->app_data, fd_cb->app_data_length);
351 	}
352 	free_sockaddr(fd_cb->original_remote_endpoint);
353 	zfree(flow_divert_pcb_zone, fd_cb);
354 }
355 
356 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)357 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
358 {
359 	if (fd_cb->group != NULL) {
360 		struct flow_divert_group *group = fd_cb->group;
361 		lck_rw_lock_exclusive(&group->lck);
362 		FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
363 		RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
364 		fd_cb->group = NULL;
365 		FDRELEASE(fd_cb);                               /* Release the group's reference */
366 		lck_rw_done(&group->lck);
367 	}
368 }
369 
370 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_t * packet)371 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_t *packet)
372 {
373 	struct flow_divert_packet_header        hdr;
374 	int                                     error           = 0;
375 
376 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
377 	if (error) {
378 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
379 		return error;
380 	}
381 
382 	hdr.packet_type = packet_type;
383 	hdr.conn_id = htonl(fd_cb->hash);
384 
385 	/* Lay down the header */
386 	error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
387 	if (error) {
388 		FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
389 		mbuf_freem(*packet);
390 		*packet = NULL;
391 		return error;
392 	}
393 
394 	return 0;
395 }
396 
397 static int
flow_divert_packet_append_tlv(mbuf_t packet,uint8_t type,uint32_t length,const void * value)398 flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, uint32_t length, const void *value)
399 {
400 	uint32_t        net_length      = htonl(length);
401 	int                     error           = 0;
402 
403 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
404 	if (error) {
405 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
406 		return error;
407 	}
408 
409 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
410 	if (error) {
411 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
412 		return error;
413 	}
414 
415 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
416 	if (error) {
417 		FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
418 		return error;
419 	}
420 
421 	return error;
422 }
423 
424 static int
flow_divert_packet_find_tlv(mbuf_t packet,int offset,uint8_t type,int * err,int next)425 flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, int next)
426 {
427 	size_t          cursor                  = offset;
428 	int                     error                   = 0;
429 	uint32_t        curr_length;
430 	uint8_t         curr_type;
431 
432 	*err = 0;
433 
434 	do {
435 		if (!next) {
436 			error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
437 			if (error) {
438 				*err = ENOENT;
439 				return -1;
440 			}
441 		} else {
442 			next = 0;
443 			curr_type = FLOW_DIVERT_TLV_NIL;
444 		}
445 
446 		if (curr_type != type) {
447 			cursor += sizeof(curr_type);
448 			error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
449 			if (error) {
450 				*err = error;
451 				return -1;
452 			}
453 
454 			cursor += (sizeof(curr_length) + ntohl(curr_length));
455 		}
456 	} while (curr_type != type);
457 
458 	return (int)cursor;
459 }
460 
461 static int
flow_divert_packet_get_tlv(mbuf_t packet,int offset,uint8_t type,size_t buff_len,void * buff,uint32_t * val_size)462 flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, uint32_t *val_size)
463 {
464 	int                     error           = 0;
465 	uint32_t        length;
466 	int                     tlv_offset;
467 
468 	tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
469 	if (tlv_offset < 0) {
470 		return error;
471 	}
472 
473 	error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
474 	if (error) {
475 		return error;
476 	}
477 
478 	length = ntohl(length);
479 
480 	uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
481 
482 	if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
483 		FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
484 		return EINVAL;
485 	}
486 
487 	if (val_size != NULL) {
488 		*val_size = length;
489 	}
490 
491 	if (buff != NULL && buff_len > 0) {
492 		memset(buff, 0, buff_len);
493 		size_t to_copy = (length < buff_len) ? length : buff_len;
494 		error = mbuf_copydata(packet, data_offset, to_copy, buff);
495 		if (error) {
496 			return error;
497 		}
498 	}
499 
500 	return 0;
501 }
502 
503 static int
flow_divert_packet_compute_hmac(mbuf_t packet,struct flow_divert_group * group,uint8_t * hmac)504 flow_divert_packet_compute_hmac(mbuf_t packet, struct flow_divert_group *group, uint8_t *hmac)
505 {
506 	mbuf_t  curr_mbuf       = packet;
507 
508 	if (g_crypto_funcs == NULL || group->token_key == NULL) {
509 		return ENOPROTOOPT;
510 	}
511 
512 	cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
513 	g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
514 
515 	while (curr_mbuf != NULL) {
516 		g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf));
517 		curr_mbuf = mbuf_next(curr_mbuf);
518 	}
519 
520 	g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
521 
522 	return 0;
523 }
524 
525 static int
flow_divert_packet_verify_hmac(mbuf_t packet,uint32_t ctl_unit)526 flow_divert_packet_verify_hmac(mbuf_t packet, uint32_t ctl_unit)
527 {
528 	int                                                     error = 0;
529 	struct flow_divert_group        *group = NULL;
530 	int                                                     hmac_offset;
531 	uint8_t                                         packet_hmac[SHA_DIGEST_LENGTH];
532 	uint8_t                                         computed_hmac[SHA_DIGEST_LENGTH];
533 	mbuf_t                                          tail;
534 
535 	lck_rw_lock_shared(&g_flow_divert_group_lck);
536 
537 	if (g_flow_divert_groups != NULL && g_active_group_count > 0) {
538 		group = g_flow_divert_groups[ctl_unit];
539 	}
540 
541 	if (group == NULL) {
542 		lck_rw_done(&g_flow_divert_group_lck);
543 		return ENOPROTOOPT;
544 	}
545 
546 	lck_rw_lock_shared(&group->lck);
547 
548 	if (group->token_key == NULL) {
549 		error = ENOPROTOOPT;
550 		goto done;
551 	}
552 
553 	hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
554 	if (hmac_offset < 0) {
555 		goto done;
556 	}
557 
558 	error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
559 	if (error) {
560 		goto done;
561 	}
562 
563 	/* Chop off the HMAC TLV */
564 	error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
565 	if (error) {
566 		goto done;
567 	}
568 
569 	mbuf_free(tail);
570 
571 	error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
572 	if (error) {
573 		goto done;
574 	}
575 
576 	if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
577 		FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
578 		error = EINVAL;
579 		goto done;
580 	}
581 
582 done:
583 	lck_rw_done(&group->lck);
584 	lck_rw_done(&g_flow_divert_group_lck);
585 	return error;
586 }
587 
588 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)589 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
590 {
591 	struct inpcb *inp = NULL;
592 	struct ifnet *ifp = NULL;
593 	Boolean cell = FALSE;
594 	Boolean wifi = FALSE;
595 	Boolean wired = FALSE;
596 
597 	inp = sotoinpcb(fd_cb->so);
598 	if (inp == NULL) {
599 		return;
600 	}
601 
602 	if (inp->inp_vflag & INP_IPV4) {
603 		ifp = inp->inp_last_outifp;
604 	} else if (inp->inp_vflag & INP_IPV6) {
605 		ifp = inp->in6p_last_outifp;
606 	}
607 	if (ifp != NULL) {
608 		cell = IFNET_IS_CELLULAR(ifp);
609 		wifi = (!cell && IFNET_IS_WIFI(ifp));
610 		wired = (!wifi && IFNET_IS_WIRED(ifp));
611 	}
612 
613 	if (send) {
614 		INP_ADD_STAT(inp, cell, wifi, wired, txpackets, 1);
615 		INP_ADD_STAT(inp, cell, wifi, wired, txbytes, data_len);
616 	} else {
617 		INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1);
618 		INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, data_len);
619 	}
620 	inp_set_activity_bitmap(inp);
621 }
622 
623 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)624 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
625 {
626 	struct inpcb *inp = sotoinpcb(fd_cb->so);
627 	if (INP_NO_CELLULAR(inp)) {
628 		struct ifnet *ifp = NULL;
629 		if (inp->inp_vflag & INP_IPV4) {
630 			ifp = inp->inp_last_outifp;
631 		} else if (inp->inp_vflag & INP_IPV6) {
632 			ifp = inp->in6p_last_outifp;
633 		}
634 		if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
635 			FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
636 			return EHOSTUNREACH;
637 		}
638 	}
639 	return 0;
640 }
641 
642 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)643 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
644 {
645 	struct inpcb *inp = sotoinpcb(fd_cb->so);
646 	if (INP_NO_EXPENSIVE(inp)) {
647 		struct ifnet *ifp = NULL;
648 		if (inp->inp_vflag & INP_IPV4) {
649 			ifp = inp->inp_last_outifp;
650 		} else if (inp->inp_vflag & INP_IPV6) {
651 			ifp = inp->in6p_last_outifp;
652 		}
653 		if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
654 			FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
655 			return EHOSTUNREACH;
656 		}
657 	}
658 	return 0;
659 }
660 
661 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)662 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
663 {
664 	struct inpcb *inp = sotoinpcb(fd_cb->so);
665 	if (INP_NO_CONSTRAINED(inp)) {
666 		struct ifnet *ifp = NULL;
667 		if (inp->inp_vflag & INP_IPV4) {
668 			ifp = inp->inp_last_outifp;
669 		} else if (inp->inp_vflag & INP_IPV6) {
670 			ifp = inp->in6p_last_outifp;
671 		}
672 		if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
673 			FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
674 			return EHOSTUNREACH;
675 		}
676 	}
677 	return 0;
678 }
679 
680 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,Boolean tunnel)681 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, Boolean tunnel)
682 {
683 	if (how != SHUT_RD) {
684 		fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
685 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
686 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
687 			/* If the tunnel is not accepting writes any more, then flush the send buffer */
688 			sbflush(&fd_cb->so->so_snd);
689 		}
690 	}
691 	if (how != SHUT_WR) {
692 		fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
693 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
694 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
695 		}
696 	}
697 }
698 
699 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)700 trie_node_alloc(struct flow_divert_trie *trie)
701 {
702 	if (trie->nodes_free_next < trie->nodes_count) {
703 		uint16_t node_idx = trie->nodes_free_next++;
704 		TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
705 		return node_idx;
706 	} else {
707 		return NULL_TRIE_IDX;
708 	}
709 }
710 
711 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)712 trie_child_map_alloc(struct flow_divert_trie *trie)
713 {
714 	if (trie->child_maps_free_next < trie->child_maps_count) {
715 		return trie->child_maps_free_next++;
716 	} else {
717 		return NULL_TRIE_IDX;
718 	}
719 }
720 
721 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)722 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
723 {
724 	uint16_t start = trie->bytes_free_next;
725 	if (start + bytes_size <= trie->bytes_count) {
726 		if (start != bytes_idx) {
727 			memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
728 		}
729 		trie->bytes_free_next += bytes_size;
730 		return start;
731 	} else {
732 		return NULL_TRIE_IDX;
733 	}
734 }
735 
736 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)737 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
738 {
739 	uint16_t current = trie->root;
740 	uint16_t child = trie->root;
741 	uint16_t string_end = string_start + (uint16_t)string_len;
742 	uint16_t string_idx = string_start;
743 	uint16_t string_remainder = (uint16_t)string_len;
744 
745 	while (child != NULL_TRIE_IDX) {
746 		uint16_t parent = current;
747 		uint16_t node_idx;
748 		uint16_t current_end;
749 
750 		current = child;
751 		child = NULL_TRIE_IDX;
752 
753 		current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
754 
755 		for (node_idx = TRIE_NODE(trie, current).start;
756 		    node_idx < current_end &&
757 		    string_idx < string_end &&
758 		    TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
759 		    node_idx++, string_idx++) {
760 			;
761 		}
762 
763 		string_remainder = string_end - string_idx;
764 
765 		if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
766 			/*
767 			 * We did not reach the end of the current node's string.
768 			 * We need to split the current node into two:
769 			 *   1. A new node that contains the prefix of the node that matches
770 			 *      the prefix of the string being inserted.
771 			 *   2. The current node modified to point to the remainder
772 			 *      of the current node's string.
773 			 */
774 			uint16_t prefix = trie_node_alloc(trie);
775 			if (prefix == NULL_TRIE_IDX) {
776 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
777 				return NULL_TRIE_IDX;
778 			}
779 
780 			/*
781 			 * Prefix points to the portion of the current nodes's string that has matched
782 			 * the input string thus far.
783 			 */
784 			TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
785 			TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
786 
787 			/*
788 			 * Prefix has the current node as the child corresponding to the first byte
789 			 * after the split.
790 			 */
791 			TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
792 			if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
793 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
794 				return NULL_TRIE_IDX;
795 			}
796 			TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
797 
798 			/* Parent has the prefix as the child correspoding to the first byte in the prefix */
799 			TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
800 
801 			/* Current node is adjusted to point to the remainder */
802 			TRIE_NODE(trie, current).start = node_idx;
803 			TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
804 
805 			/* We want to insert the new leaf (if any) as a child of the prefix */
806 			current = prefix;
807 		}
808 
809 		if (string_remainder > 0) {
810 			/*
811 			 * We still have bytes in the string that have not been matched yet.
812 			 * If the current node has children, iterate to the child corresponding
813 			 * to the next byte in the string.
814 			 */
815 			if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
816 				child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
817 			}
818 		}
819 	} /* while (child != NULL_TRIE_IDX) */
820 
821 	if (string_remainder > 0) {
822 		/* Add a new leaf containing the remainder of the string */
823 		uint16_t leaf = trie_node_alloc(trie);
824 		if (leaf == NULL_TRIE_IDX) {
825 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
826 			return NULL_TRIE_IDX;
827 		}
828 
829 		TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
830 		if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
831 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
832 			return NULL_TRIE_IDX;
833 		}
834 		TRIE_NODE(trie, leaf).length = string_remainder;
835 
836 		/* Set the new leaf as the child of the current node */
837 		if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
838 			TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
839 			if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
840 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
841 				return NULL_TRIE_IDX;
842 			}
843 		}
844 		TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
845 		current = leaf;
846 	} /* else duplicate or this string is a prefix of one of the existing strings */
847 
848 	return current;
849 }
850 
851 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
852 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes)853 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes)
854 {
855 	uint16_t current = trie->root;
856 	uint16_t string_idx = 0;
857 
858 	while (current != NULL_TRIE_IDX) {
859 		uint16_t next = NULL_TRIE_IDX;
860 		uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
861 		uint16_t node_idx;
862 
863 		for (node_idx = TRIE_NODE(trie, current).start;
864 		    node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
865 		    node_idx++, string_idx++) {
866 			;
867 		}
868 
869 		if (node_idx == node_end) {
870 			if (string_bytes[string_idx] == '\0') {
871 				return current; /* Got an exact match */
872 			} else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
873 			    0 == strncmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
874 				return current; /* Got an apple webclip id prefix match */
875 			} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
876 				next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
877 			}
878 		}
879 		current = next;
880 	}
881 
882 	return NULL_TRIE_IDX;
883 }
884 
885 struct uuid_search_info {
886 	uuid_t target_uuid;
887 	char *found_signing_id;
888 	boolean_t found_multiple_signing_ids;
889 	proc_t found_proc;
890 };
891 
892 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)893 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
894 {
895 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
896 	int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
897 
898 	if (info->found_signing_id != NULL) {
899 		if (!info->found_multiple_signing_ids) {
900 			/* All processes that were found had the same signing identifier, so just claim this first one and be done. */
901 			info->found_proc = p;
902 			result = PROC_CLAIMED_DONE;
903 		} else {
904 			uuid_string_t uuid_str;
905 			uuid_unparse(info->target_uuid, uuid_str);
906 			FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
907 		}
908 		kfree_data(info->found_signing_id, strlen(info->found_signing_id) + 1);
909 		info->found_signing_id = NULL;
910 	}
911 
912 	if (result == PROC_RETURNED_DONE) {
913 		uuid_string_t uuid_str;
914 		uuid_unparse(info->target_uuid, uuid_str);
915 		FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
916 	}
917 
918 	return result;
919 }
920 
921 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)922 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
923 {
924 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
925 	int include = 0;
926 
927 	if (info->found_multiple_signing_ids) {
928 		return include;
929 	}
930 
931 	include = (uuid_compare(proc_executableuuid_addr(p), info->target_uuid) == 0);
932 	if (include) {
933 		const char *signing_id = cs_identity_get(p);
934 		if (signing_id != NULL) {
935 			FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
936 			size_t signing_id_size = strlen(signing_id) + 1;
937 			if (info->found_signing_id == NULL) {
938 				info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
939 				memcpy(info->found_signing_id, signing_id, signing_id_size);
940 			} else if (memcmp(signing_id, info->found_signing_id, signing_id_size)) {
941 				info->found_multiple_signing_ids = TRUE;
942 			}
943 		} else {
944 			info->found_multiple_signing_ids = TRUE;
945 		}
946 		include = !info->found_multiple_signing_ids;
947 	}
948 
949 	return include;
950 }
951 
952 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)953 flow_divert_find_proc_by_uuid(uuid_t uuid)
954 {
955 	struct uuid_search_info info;
956 
957 	if (LOG_INFO <= nil_pcb.log_level) {
958 		uuid_string_t uuid_str;
959 		uuid_unparse(uuid, uuid_str);
960 		FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
961 	}
962 
963 	memset(&info, 0, sizeof(info));
964 	info.found_proc = PROC_NULL;
965 	uuid_copy(info.target_uuid, uuid);
966 
967 	proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
968 
969 	return info.found_proc;
970 }
971 
972 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id,mbuf_t connect_packet,bool is_effective)973 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id, mbuf_t connect_packet, bool is_effective)
974 {
975 	int error = 0;
976 	uint8_t *cdhash = NULL;
977 	audit_token_t audit_token = {};
978 	const char *proc_cs_id = signing_id;
979 
980 	proc_lock(proc);
981 
982 	if (proc_cs_id == NULL) {
983 		if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
984 			proc_cs_id = cs_identity_get(proc);
985 		} else {
986 			FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
987 		}
988 	}
989 
990 	if (is_effective) {
991 		lck_rw_lock_shared(&fd_cb->group->lck);
992 		if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
993 			if (proc_cs_id != NULL) {
994 				uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)proc_cs_id);
995 				if (result == NULL_TRIE_IDX) {
996 					FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
997 					error = EPERM;
998 				} else {
999 					FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1000 				}
1001 			} else {
1002 				error = EPERM;
1003 			}
1004 		}
1005 		lck_rw_done(&fd_cb->group->lck);
1006 	}
1007 
1008 	if (error != 0) {
1009 		goto done;
1010 	}
1011 
1012 	/*
1013 	 * If signing_id is not NULL then it came from the flow divert token and will be added
1014 	 * as part of the token, so there is no need to add it here.
1015 	 */
1016 	if (signing_id == NULL && proc_cs_id != NULL) {
1017 		error = flow_divert_packet_append_tlv(connect_packet,
1018 		    (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1019 		    (uint32_t)strlen(proc_cs_id),
1020 		    proc_cs_id);
1021 		if (error != 0) {
1022 			FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1023 			goto done;
1024 		}
1025 	}
1026 
1027 	cdhash = cs_get_cdhash(proc);
1028 	if (cdhash != NULL) {
1029 		error = flow_divert_packet_append_tlv(connect_packet,
1030 		    (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1031 		    SHA1_RESULTLEN,
1032 		    cdhash);
1033 		if (error) {
1034 			FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1035 			goto done;
1036 		}
1037 	} else {
1038 		FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1039 	}
1040 
1041 	task_t task = proc_task(proc);
1042 	if (task != TASK_NULL) {
1043 		mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1044 		kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1045 		if (rc == KERN_SUCCESS) {
1046 			int append_error = flow_divert_packet_append_tlv(connect_packet,
1047 			    (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1048 			    sizeof(audit_token_t),
1049 			    &audit_token);
1050 			if (append_error) {
1051 				FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1052 			}
1053 		}
1054 	}
1055 
1056 done:
1057 	proc_unlock(proc);
1058 
1059 	return error;
1060 }
1061 
1062 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id,mbuf_t connect_packet)1063 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id, mbuf_t connect_packet)
1064 {
1065 	int error = 0;
1066 	proc_t effective_proc = PROC_NULL;
1067 	proc_t responsible_proc = PROC_NULL;
1068 	proc_t real_proc = proc_find(so->last_pid);
1069 	bool release_real_proc = true;
1070 
1071 	proc_t src_proc = PROC_NULL;
1072 	proc_t real_src_proc = PROC_NULL;
1073 
1074 	if (real_proc == PROC_NULL) {
1075 		FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1076 		release_real_proc = false;
1077 		real_proc = proc;
1078 		if (real_proc == PROC_NULL) {
1079 			real_proc = current_proc();
1080 		}
1081 	}
1082 
1083 	if (so->so_flags & SOF_DELEGATED) {
1084 		if (proc_getpid(real_proc) != so->e_pid) {
1085 			effective_proc = proc_find(so->e_pid);
1086 		} else if (uuid_compare(proc_executableuuid_addr(real_proc), so->e_uuid)) {
1087 			effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1088 		}
1089 	}
1090 
1091 #if defined(XNU_TARGET_OS_OSX)
1092 	lck_rw_lock_shared(&fd_cb->group->lck);
1093 	if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1094 		if (so->so_rpid > 0) {
1095 			responsible_proc = proc_find(so->so_rpid);
1096 		}
1097 	}
1098 	lck_rw_done(&fd_cb->group->lck);
1099 #endif
1100 
1101 	real_src_proc = real_proc;
1102 
1103 	if (responsible_proc != PROC_NULL) {
1104 		src_proc = responsible_proc;
1105 		if (effective_proc != NULL) {
1106 			real_src_proc = effective_proc;
1107 		}
1108 	} else if (effective_proc != PROC_NULL) {
1109 		src_proc = effective_proc;
1110 	} else {
1111 		src_proc = real_proc;
1112 	}
1113 
1114 	error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1115 	if (error != 0) {
1116 		goto done;
1117 	}
1118 
1119 	if (real_src_proc != NULL && real_src_proc != src_proc) {
1120 		error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1121 		if (error != 0) {
1122 			goto done;
1123 		}
1124 	}
1125 
1126 done:
1127 	if (responsible_proc != PROC_NULL) {
1128 		proc_rele(responsible_proc);
1129 	}
1130 
1131 	if (effective_proc != PROC_NULL) {
1132 		proc_rele(effective_proc);
1133 	}
1134 
1135 	if (real_proc != PROC_NULL && release_real_proc) {
1136 		proc_rele(real_proc);
1137 	}
1138 
1139 	return error;
1140 }
1141 
1142 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_t packet,Boolean enqueue)1143 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet, Boolean enqueue)
1144 {
1145 	int             error;
1146 
1147 	if (fd_cb->group == NULL) {
1148 		FDLOG0(LOG_INFO, fd_cb, "no provider, cannot send packet");
1149 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
1150 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
1151 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1152 			error = ECONNABORTED;
1153 		} else {
1154 			error = EHOSTUNREACH;
1155 		}
1156 		fd_cb->so->so_error = (uint16_t)error;
1157 		return error;
1158 	}
1159 
1160 	lck_rw_lock_shared(&fd_cb->group->lck);
1161 
1162 	if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1163 		error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1164 	} else {
1165 		error = ENOBUFS;
1166 	}
1167 
1168 	if (error == ENOBUFS) {
1169 		if (enqueue) {
1170 			if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1171 				lck_rw_lock_exclusive(&fd_cb->group->lck);
1172 			}
1173 			MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1174 			error = 0;
1175 		}
1176 		OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1177 	}
1178 
1179 	lck_rw_done(&fd_cb->group->lck);
1180 
1181 	return error;
1182 }
1183 
1184 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_t * out_connect_packet)1185 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_t *out_connect_packet)
1186 {
1187 	int                     error                   = 0;
1188 	int                     flow_type               = 0;
1189 	char                    *signing_id = NULL;
1190 	uint32_t                sid_size = 0;
1191 	mbuf_t                  connect_packet = NULL;
1192 	cfil_sock_id_t          cfil_sock_id            = CFIL_SOCK_ID_NONE;
1193 	const void              *cfil_id                = NULL;
1194 	size_t                  cfil_id_size            = 0;
1195 	struct inpcb            *inp = sotoinpcb(so);
1196 	struct ifnet *ifp = NULL;
1197 	uint32_t flags = 0;
1198 
1199 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1200 	if (error) {
1201 		goto done;
1202 	}
1203 
1204 	if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1205 		int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1206 		if (find_error == 0 && sid_size > 0) {
1207 			signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1208 			if (signing_id != NULL) {
1209 				flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1210 				FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1211 			}
1212 		}
1213 	}
1214 
1215 	error = flow_divert_add_all_proc_info(fd_cb, so, p, signing_id, connect_packet);
1216 
1217 	if (signing_id != NULL) {
1218 		kfree_data(signing_id, sid_size + 1);
1219 	}
1220 
1221 	if (error) {
1222 		FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1223 		goto done;
1224 	}
1225 
1226 	error = flow_divert_packet_append_tlv(connect_packet,
1227 	    FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1228 	    sizeof(fd_cb->so->so_traffic_class),
1229 	    &fd_cb->so->so_traffic_class);
1230 	if (error) {
1231 		goto done;
1232 	}
1233 
1234 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1235 		flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1236 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1237 		flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1238 	} else {
1239 		error = EINVAL;
1240 		goto done;
1241 	}
1242 	error = flow_divert_packet_append_tlv(connect_packet,
1243 	    FLOW_DIVERT_TLV_FLOW_TYPE,
1244 	    sizeof(flow_type),
1245 	    &flow_type);
1246 
1247 	if (error) {
1248 		goto done;
1249 	}
1250 
1251 	if (fd_cb->connect_token != NULL) {
1252 		unsigned int token_len = m_length(fd_cb->connect_token);
1253 		mbuf_concatenate(connect_packet, fd_cb->connect_token);
1254 		mbuf_pkthdr_adjustlen(connect_packet, token_len);
1255 		fd_cb->connect_token = NULL;
1256 	} else {
1257 		error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1258 		if (error) {
1259 			goto done;
1260 		}
1261 
1262 		if (inp->inp_necp_attributes.inp_domain != NULL) {
1263 			size_t domain_length = strlen(inp->inp_necp_attributes.inp_domain);
1264 			if (domain_length > 0 && domain_length <= FLOW_DIVERT_MAX_NAME_SIZE) {
1265 				error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_length, inp->inp_necp_attributes.inp_domain);
1266 			}
1267 		}
1268 	}
1269 
1270 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1271 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1272 		if (error) {
1273 			goto done;
1274 		}
1275 	}
1276 
1277 	if (inp->inp_vflag & INP_IPV4) {
1278 		ifp = inp->inp_last_outifp;
1279 	} else if (inp->inp_vflag & INP_IPV6) {
1280 		ifp = inp->in6p_last_outifp;
1281 	}
1282 	if (ifp != NULL) {
1283 		uint32_t flow_if_index = ifp->if_index;
1284 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1285 		    sizeof(flow_if_index), &flow_if_index);
1286 		if (error) {
1287 			goto done;
1288 		}
1289 	}
1290 
1291 	if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1292 		flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1293 	}
1294 
1295 	if ((inp->inp_flags & INP_BOUND_IF) ||
1296 	    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1297 	    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1298 		flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1299 	}
1300 
1301 	if (flags != 0) {
1302 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1303 		if (error) {
1304 			goto done;
1305 		}
1306 	}
1307 
1308 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
1309 		cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1310 	} else {
1311 		cfil_sock_id = cfil_sock_id_from_socket(so);
1312 	}
1313 
1314 	if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1315 		cfil_id = &cfil_sock_id;
1316 		cfil_id_size = sizeof(cfil_sock_id);
1317 	} else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1318 		cfil_id = &inp->necp_client_uuid;
1319 		cfil_id_size = sizeof(inp->necp_client_uuid);
1320 	}
1321 
1322 	if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1323 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1324 		if (error) {
1325 			goto done;
1326 		}
1327 	}
1328 
1329 done:
1330 	if (!error) {
1331 		*out_connect_packet = connect_packet;
1332 	} else if (connect_packet != NULL) {
1333 		mbuf_freem(connect_packet);
1334 	}
1335 
1336 	return error;
1337 }
1338 
1339 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1340 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1341 {
1342 	int error = 0;
1343 	mbuf_t connect_packet = fd_cb->connect_packet;
1344 	mbuf_t saved_connect_packet = NULL;
1345 
1346 	if (connect_packet != NULL) {
1347 		error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1348 		if (error) {
1349 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1350 			goto done;
1351 		}
1352 
1353 		error = flow_divert_send_packet(fd_cb, connect_packet, TRUE);
1354 		if (error) {
1355 			goto done;
1356 		}
1357 
1358 		fd_cb->connect_packet = saved_connect_packet;
1359 		saved_connect_packet = NULL;
1360 	} else {
1361 		error = ENOENT;
1362 	}
1363 done:
1364 	if (saved_connect_packet != NULL) {
1365 		mbuf_freem(saved_connect_packet);
1366 	}
1367 
1368 	return error;
1369 }
1370 
1371 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1372 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1373 {
1374 	int             error                   = 0;
1375 	mbuf_t  packet                  = NULL;
1376 	int             rbuff_space             = 0;
1377 
1378 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1379 	if (error) {
1380 		FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1381 		goto done;
1382 	}
1383 
1384 	rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1385 	if (rbuff_space < 0) {
1386 		rbuff_space = 0;
1387 	}
1388 	rbuff_space = htonl(rbuff_space);
1389 	error = flow_divert_packet_append_tlv(packet,
1390 	    FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1391 	    sizeof(rbuff_space),
1392 	    &rbuff_space);
1393 	if (error) {
1394 		goto done;
1395 	}
1396 
1397 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1398 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1399 		if (error) {
1400 			goto done;
1401 		}
1402 	}
1403 
1404 	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1405 	if (error) {
1406 		goto done;
1407 	}
1408 
1409 done:
1410 	if (error && packet != NULL) {
1411 		mbuf_freem(packet);
1412 	}
1413 
1414 	return error;
1415 }
1416 
1417 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1418 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1419 {
1420 	int             error   = 0;
1421 	mbuf_t  packet  = NULL;
1422 	uint32_t        zero    = 0;
1423 
1424 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1425 	if (error) {
1426 		FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1427 		goto done;
1428 	}
1429 
1430 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1431 	if (error) {
1432 		FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1433 		goto done;
1434 	}
1435 
1436 	how = htonl(how);
1437 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1438 	if (error) {
1439 		FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1440 		goto done;
1441 	}
1442 
1443 	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1444 	if (error) {
1445 		goto done;
1446 	}
1447 
1448 done:
1449 	if (error && packet != NULL) {
1450 		mbuf_free(packet);
1451 	}
1452 
1453 	return error;
1454 }
1455 
1456 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1457 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1458 {
1459 	if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1460 	    (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1461 		return SHUT_RDWR;
1462 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1463 		return SHUT_RD;
1464 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1465 		return SHUT_WR;
1466 	}
1467 
1468 	return -1;
1469 }
1470 
1471 /*
1472  * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1473  * writes. Returns FALSE otherwise.
1474  */
1475 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1476 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1477 {
1478 	int             how             = -1;
1479 
1480 	/* Do not send any close messages if there is still data in the send buffer */
1481 	if (fd_cb->so->so_snd.sb_cc == 0) {
1482 		if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1483 			/* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1484 			how = SHUT_RD;
1485 		}
1486 		if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1487 			/* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1488 			if (how == SHUT_RD) {
1489 				how = SHUT_RDWR;
1490 			} else {
1491 				how = SHUT_WR;
1492 			}
1493 		}
1494 	}
1495 
1496 	if (how != -1) {
1497 		FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1498 		if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1499 			/* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1500 			if (how != SHUT_RD) {
1501 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1502 			}
1503 			if (how != SHUT_WR) {
1504 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1505 			}
1506 		}
1507 	}
1508 
1509 	if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
1510 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
1511 	}
1512 }
1513 
1514 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_t data,size_t data_len,struct sockaddr * toaddr,Boolean force)1515 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, struct sockaddr *toaddr, Boolean force)
1516 {
1517 	mbuf_t  packet = NULL;
1518 	mbuf_t  last = NULL;
1519 	int             error   = 0;
1520 
1521 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1522 	if (error || packet == NULL) {
1523 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1524 		goto done;
1525 	}
1526 
1527 	if (toaddr != NULL) {
1528 		error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1529 		if (error) {
1530 			FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1531 			goto done;
1532 		}
1533 	}
1534 
1535 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1536 		last = m_last(packet);
1537 		mbuf_setnext(last, data);
1538 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1539 	} else {
1540 		data_len = 0;
1541 	}
1542 	error = flow_divert_send_packet(fd_cb, packet, force);
1543 	if (error == 0 && data_len > 0) {
1544 		fd_cb->bytes_sent += data_len;
1545 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1546 	}
1547 
1548 done:
1549 	if (error) {
1550 		if (last != NULL) {
1551 			mbuf_setnext(last, NULL);
1552 		}
1553 		if (packet != NULL) {
1554 			mbuf_freem(packet);
1555 		}
1556 	}
1557 
1558 	return error;
1559 }
1560 
1561 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1562 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1563 {
1564 	size_t  to_send;
1565 	size_t  sent    = 0;
1566 	int             error   = 0;
1567 	mbuf_t  buffer;
1568 
1569 	to_send = fd_cb->so->so_snd.sb_cc;
1570 	buffer = fd_cb->so->so_snd.sb_mb;
1571 
1572 	if (buffer == NULL && to_send > 0) {
1573 		FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1574 		return;
1575 	}
1576 
1577 	/* Ignore the send window if force is enabled */
1578 	if (!force && (to_send > fd_cb->send_window)) {
1579 		to_send = fd_cb->send_window;
1580 	}
1581 
1582 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1583 		while (sent < to_send) {
1584 			mbuf_t  data;
1585 			size_t  data_len;
1586 
1587 			data_len = to_send - sent;
1588 			if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1589 				data_len = FLOW_DIVERT_CHUNK_SIZE;
1590 			}
1591 
1592 			error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1593 			if (error) {
1594 				FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1595 				break;
1596 			}
1597 
1598 			error = flow_divert_send_data_packet(fd_cb, data, data_len, NULL, force);
1599 			if (error) {
1600 				if (data != NULL) {
1601 					mbuf_freem(data);
1602 				}
1603 				break;
1604 			}
1605 
1606 			sent += data_len;
1607 		}
1608 		sbdrop(&fd_cb->so->so_snd, (int)sent);
1609 		sowwakeup(fd_cb->so);
1610 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1611 		mbuf_t data;
1612 		mbuf_t m;
1613 		size_t data_len;
1614 
1615 		while (buffer) {
1616 			struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1617 
1618 			m = buffer;
1619 			if (toaddr != NULL) {
1620 				/* look for data in the chain */
1621 				do {
1622 					m = m->m_next;
1623 					if (m != NULL && m->m_type == MT_DATA) {
1624 						break;
1625 					}
1626 				} while (m);
1627 				if (m == NULL) {
1628 					/* unexpected */
1629 					FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1630 					goto move_on;
1631 				}
1632 			}
1633 			data_len = mbuf_pkthdr_len(m);
1634 			if (data_len > 0) {
1635 				FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1636 				error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1637 				if (error) {
1638 					FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1639 					break;
1640 				}
1641 			} else {
1642 				data = NULL;
1643 			}
1644 			error = flow_divert_send_data_packet(fd_cb, data, data_len, toaddr, force);
1645 			if (error) {
1646 				if (data != NULL) {
1647 					mbuf_freem(data);
1648 				}
1649 				break;
1650 			}
1651 			sent += data_len;
1652 move_on:
1653 			buffer = buffer->m_nextpkt;
1654 			(void) sbdroprecord(&(fd_cb->so->so_snd));
1655 		}
1656 	}
1657 
1658 	if (sent > 0) {
1659 		FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1660 		if (fd_cb->send_window >= sent) {
1661 			fd_cb->send_window -= sent;
1662 		} else {
1663 			fd_cb->send_window = 0;
1664 		}
1665 	}
1666 }
1667 
1668 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_t data,struct sockaddr * toaddr)1669 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data, struct sockaddr *toaddr)
1670 {
1671 	size_t  to_send         = mbuf_pkthdr_len(data);
1672 	int     error           = 0;
1673 
1674 	if (to_send > fd_cb->send_window) {
1675 		to_send = fd_cb->send_window;
1676 	}
1677 
1678 	if (fd_cb->so->so_snd.sb_cc > 0) {
1679 		to_send = 0;    /* If the send buffer is non-empty, then we can't send anything */
1680 	}
1681 
1682 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1683 		size_t  sent            = 0;
1684 		mbuf_t  remaining_data  = data;
1685 		mbuf_t  pkt_data        = NULL;
1686 		while (sent < to_send && remaining_data != NULL) {
1687 			size_t  pkt_data_len;
1688 
1689 			pkt_data = remaining_data;
1690 
1691 			if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1692 				pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1693 			} else {
1694 				pkt_data_len = to_send - sent;
1695 			}
1696 
1697 			if (pkt_data_len < mbuf_pkthdr_len(pkt_data)) {
1698 				error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1699 				if (error) {
1700 					FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1701 					pkt_data = NULL;
1702 					break;
1703 				}
1704 			} else {
1705 				remaining_data = NULL;
1706 			}
1707 
1708 			error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len, NULL, FALSE);
1709 
1710 			if (error) {
1711 				break;
1712 			}
1713 
1714 			pkt_data = NULL;
1715 			sent += pkt_data_len;
1716 		}
1717 
1718 		fd_cb->send_window -= sent;
1719 
1720 		error = 0;
1721 
1722 		if (pkt_data != NULL) {
1723 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1724 				if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1725 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1726 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1727 				}
1728 			} else {
1729 				mbuf_freem(pkt_data);
1730 				error = ENOBUFS;
1731 			}
1732 		}
1733 
1734 		if (remaining_data != NULL) {
1735 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1736 				if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1737 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1738 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1739 				}
1740 			} else {
1741 				mbuf_freem(remaining_data);
1742 				error = ENOBUFS;
1743 			}
1744 		}
1745 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1746 		if (to_send || mbuf_pkthdr_len(data) == 0) {
1747 			error = flow_divert_send_data_packet(fd_cb, data, to_send, toaddr, FALSE);
1748 			if (error) {
1749 				FDLOG(LOG_ERR, fd_cb, "flow_divert_send_data_packet failed. send data size = %lu", to_send);
1750 				if (data != NULL) {
1751 					mbuf_freem(data);
1752 				}
1753 			} else {
1754 				fd_cb->send_window -= to_send;
1755 			}
1756 		} else {
1757 			/* buffer it */
1758 			if (sbspace(&fd_cb->so->so_snd) >= (int)mbuf_pkthdr_len(data)) {
1759 				if (toaddr != NULL) {
1760 					if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &error)) {
1761 						FDLOG(LOG_ERR, fd_cb,
1762 						    "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d\n",
1763 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, error);
1764 					}
1765 					error = 0;
1766 				} else {
1767 					if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1768 						FDLOG(LOG_ERR, fd_cb,
1769 						    "sbappendrecord failed. send buffer size = %u, send_window = %u, error = %d\n",
1770 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, error);
1771 					}
1772 				}
1773 			} else {
1774 				if (data != NULL) {
1775 					mbuf_freem(data);
1776 				}
1777 				error = ENOBUFS;
1778 			}
1779 		}
1780 	}
1781 
1782 	return error;
1783 }
1784 
1785 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1786 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1787 {
1788 	int error = 0;
1789 	mbuf_t packet = NULL;
1790 
1791 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1792 	if (error) {
1793 		FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1794 		goto done;
1795 	}
1796 
1797 	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1798 	if (error) {
1799 		goto done;
1800 	}
1801 
1802 done:
1803 	if (error && packet != NULL) {
1804 		mbuf_free(packet);
1805 	}
1806 
1807 	return error;
1808 }
1809 
1810 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)1811 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
1812 {
1813 	int             error           = 0;
1814 	mbuf_t  packet          = NULL;
1815 
1816 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
1817 	if (error) {
1818 		FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
1819 		goto done;
1820 	}
1821 
1822 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
1823 	if (error) {
1824 		FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
1825 		goto done;
1826 	}
1827 
1828 	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1829 	if (error) {
1830 		goto done;
1831 	}
1832 
1833 done:
1834 	if (error && packet != NULL) {
1835 		mbuf_free(packet);
1836 	}
1837 
1838 	return error;
1839 }
1840 
1841 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)1842 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
1843 {
1844 	struct inpcb *inp = sotoinpcb(fd_cb->so);
1845 
1846 	if (local_endpoint->sa_family == AF_INET6) {
1847 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
1848 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
1849 			inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
1850 			inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
1851 			in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
1852 		}
1853 		if (inp->inp_lport == 0) {
1854 			inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
1855 		}
1856 	} else if (local_endpoint->sa_family == AF_INET) {
1857 		if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
1858 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
1859 			inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
1860 		}
1861 		if (inp->inp_lport == 0) {
1862 			inp->inp_lport = (satosin(local_endpoint))->sin_port;
1863 		}
1864 	}
1865 }
1866 
1867 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)1868 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
1869 {
1870 	struct inpcb *inp = sotoinpcb(fd_cb->so);
1871 
1872 	if (remote_endpoint->sa_family == AF_INET6) {
1873 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
1874 			inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
1875 			inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
1876 			in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
1877 		}
1878 		if (inp->inp_fport == 0) {
1879 			inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
1880 		}
1881 	} else if (remote_endpoint->sa_family == AF_INET) {
1882 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
1883 			inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
1884 		}
1885 		if (inp->inp_fport == 0) {
1886 			inp->inp_fport = (satosin(remote_endpoint))->sin_port;
1887 		}
1888 	}
1889 }
1890 
1891 static uint32_t
flow_divert_derive_kernel_control_unit(uint32_t ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)1892 flow_divert_derive_kernel_control_unit(uint32_t ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
1893 {
1894 	*is_aggregate = false;
1895 	if (aggregate_unit != NULL && *aggregate_unit != 0) {
1896 		uint32_t counter;
1897 		for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
1898 			if ((*aggregate_unit) & (1 << counter)) {
1899 				break;
1900 			}
1901 		}
1902 		if (counter < (GROUP_COUNT_MAX - 1)) {
1903 			*aggregate_unit &= ~(1 << counter);
1904 			*is_aggregate = true;
1905 			return counter + 1;
1906 		} else {
1907 			return ctl_unit;
1908 		}
1909 	} else {
1910 		return ctl_unit;
1911 	}
1912 }
1913 
1914 static int
flow_divert_try_next(struct flow_divert_pcb * fd_cb)1915 flow_divert_try_next(struct flow_divert_pcb *fd_cb)
1916 {
1917 	uint32_t current_ctl_unit = 0;
1918 	uint32_t next_ctl_unit = 0;
1919 	struct flow_divert_group *current_group = NULL;
1920 	struct flow_divert_group *next_group = NULL;
1921 	int error = 0;
1922 	bool is_aggregate = false;
1923 
1924 	next_ctl_unit = flow_divert_derive_kernel_control_unit(fd_cb->policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
1925 	current_ctl_unit = fd_cb->control_group_unit;
1926 
1927 	if (current_ctl_unit == next_ctl_unit) {
1928 		FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
1929 		error = EALREADY;
1930 		goto done;
1931 	}
1932 
1933 	if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
1934 		FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
1935 		error = ENOENT;
1936 		goto done;
1937 	}
1938 
1939 	if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
1940 		FDLOG0(LOG_NOTICE, fd_cb, "No active groups, disabling flow divert");
1941 		error = ENOENT;
1942 		goto done;
1943 	}
1944 
1945 	next_group = g_flow_divert_groups[next_ctl_unit];
1946 	if (next_group == NULL) {
1947 		FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
1948 		error = ENOENT;
1949 		goto done;
1950 	}
1951 
1952 	current_group = fd_cb->group;
1953 
1954 	lck_rw_lock_exclusive(&(current_group->lck));
1955 	lck_rw_lock_exclusive(&(next_group->lck));
1956 
1957 	FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", current_ctl_unit, next_ctl_unit);
1958 
1959 	RB_REMOVE(fd_pcb_tree, &(current_group->pcb_tree), fd_cb);
1960 	if (RB_INSERT(fd_pcb_tree, &(next_group->pcb_tree), fd_cb) != NULL) {
1961 		panic("group with unit %u already contains a connection with hash %u", next_ctl_unit, fd_cb->hash);
1962 	}
1963 
1964 	fd_cb->group = next_group;
1965 	fd_cb->control_group_unit = next_ctl_unit;
1966 	if (is_aggregate) {
1967 		fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
1968 	} else {
1969 		fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
1970 	}
1971 
1972 	lck_rw_done(&(next_group->lck));
1973 	lck_rw_done(&(current_group->lck));
1974 
1975 	error = flow_divert_send_connect_packet(fd_cb);
1976 	if (error) {
1977 		FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", next_ctl_unit);
1978 		error = ENOENT;
1979 		goto done;
1980 	}
1981 
1982 done:
1983 	return error;
1984 }
1985 
1986 static void
flow_divert_disable(struct flow_divert_pcb * fd_cb)1987 flow_divert_disable(struct flow_divert_pcb *fd_cb)
1988 {
1989 	struct socket *so = NULL;
1990 	mbuf_t  buffer;
1991 	int error = 0;
1992 	proc_t last_proc = NULL;
1993 	struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
1994 	bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
1995 	struct inpcb *inp = NULL;
1996 
1997 	so = fd_cb->so;
1998 	if (so == NULL) {
1999 		goto done;
2000 	}
2001 
2002 	FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2003 
2004 	/* Restore the IP state */
2005 	inp = sotoinpcb(so);
2006 	inp->inp_vflag = fd_cb->original_vflag;
2007 	inp->inp_faddr.s_addr = INADDR_ANY;
2008 	inp->inp_fport = 0;
2009 	memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2010 	inp->inp_fifscope = IFSCOPE_NONE;
2011 	inp->in6p_fport = 0;
2012 	/* If flow divert set the local address, clear it out */
2013 	if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2014 		inp->inp_laddr.s_addr = INADDR_ANY;
2015 		memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2016 		inp->inp_lifscope = IFSCOPE_NONE;
2017 	}
2018 	inp->inp_last_outifp = fd_cb->original_last_outifp;
2019 	inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2020 
2021 	/* Dis-associate the socket */
2022 	so->so_flags &= ~SOF_FLOW_DIVERT;
2023 	so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2024 	so->so_fd_pcb = NULL;
2025 	fd_cb->so = NULL;
2026 
2027 	/* Remove from the group */
2028 	flow_divert_pcb_remove(fd_cb);
2029 
2030 	FDRELEASE(fd_cb); /* Release the socket's reference */
2031 
2032 	/* Revert back to the original protocol */
2033 	so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2034 
2035 	/* Reset the socket state to avoid confusing NECP */
2036 	so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2037 
2038 	last_proc = proc_find(so->last_pid);
2039 
2040 	if (do_connect) {
2041 		/* Connect using the original protocol */
2042 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2043 		if (error) {
2044 			FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2045 			goto done;
2046 		}
2047 	}
2048 
2049 	buffer = so->so_snd.sb_mb;
2050 	if (buffer == NULL) {
2051 		/* No buffered data, done */
2052 		goto done;
2053 	}
2054 
2055 	/* Send any buffered data using the original protocol */
2056 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2057 		mbuf_t data_to_send = NULL;
2058 		size_t data_len = so->so_snd.sb_cc;
2059 
2060 		error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2061 		if (error) {
2062 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2063 			goto done;
2064 		}
2065 
2066 		sbflush(&so->so_snd);
2067 
2068 		if (data_to_send->m_flags & M_PKTHDR) {
2069 			mbuf_pkthdr_setlen(data_to_send, data_len);
2070 		}
2071 
2072 		error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2073 		    0,
2074 		    data_to_send,
2075 		    NULL,
2076 		    NULL,
2077 		    (last_proc != NULL ? last_proc : current_proc()));
2078 
2079 		if (error && error != EWOULDBLOCK) {
2080 			FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2081 		} else {
2082 			error = 0;
2083 		}
2084 	} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2085 		struct sockbuf *sb = &so->so_snd;
2086 		MBUFQ_HEAD(send_queue_head) send_queue;
2087 		MBUFQ_INIT(&send_queue);
2088 
2089 		/* Flush the send buffer, moving all records to a temporary queue */
2090 		while (sb->sb_mb != NULL) {
2091 			mbuf_t record = sb->sb_mb;
2092 			mbuf_t m = record;
2093 			sb->sb_mb = sb->sb_mb->m_nextpkt;
2094 			while (m != NULL) {
2095 				sbfree(sb, m);
2096 				m = m->m_next;
2097 			}
2098 			record->m_nextpkt = NULL;
2099 			MBUFQ_ENQUEUE(&send_queue, record);
2100 		}
2101 		SB_EMPTY_FIXUP(sb);
2102 
2103 		while (!MBUFQ_EMPTY(&send_queue)) {
2104 			mbuf_t next_record = MBUFQ_FIRST(&send_queue);
2105 			mbuf_t addr = NULL;
2106 			mbuf_t control = NULL;
2107 			mbuf_t last_control = NULL;
2108 			mbuf_t data = NULL;
2109 			mbuf_t m = next_record;
2110 			struct sockaddr *to_endpoint = NULL;
2111 
2112 			MBUFQ_DEQUEUE(&send_queue, next_record);
2113 
2114 			while (m != NULL) {
2115 				if (m->m_type == MT_SONAME) {
2116 					addr = m;
2117 				} else if (m->m_type == MT_CONTROL) {
2118 					if (control == NULL) {
2119 						control = m;
2120 					}
2121 					last_control = m;
2122 				} else if (m->m_type == MT_DATA) {
2123 					data = m;
2124 					break;
2125 				}
2126 				m = m->m_next;
2127 			}
2128 
2129 			if (addr != NULL && !do_connect) {
2130 				to_endpoint = flow_divert_get_buffered_target_address(addr);
2131 				if (to_endpoint == NULL) {
2132 					FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2133 				}
2134 			}
2135 
2136 			if (data == NULL) {
2137 				FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2138 				mbuf_freem(next_record);
2139 				continue;
2140 			}
2141 
2142 			if (!(data->m_flags & M_PKTHDR)) {
2143 				FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2144 				mbuf_freem(next_record);
2145 				continue;
2146 			}
2147 
2148 			if (addr != NULL) {
2149 				addr->m_next = NULL;
2150 			}
2151 
2152 			if (last_control != NULL) {
2153 				last_control->m_next = NULL;
2154 			}
2155 
2156 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2157 			    0,
2158 			    data,
2159 			    to_endpoint,
2160 			    control,
2161 			    (last_proc != NULL ? last_proc : current_proc()));
2162 
2163 			if (addr != NULL) {
2164 				mbuf_freem(addr);
2165 			}
2166 
2167 			if (error) {
2168 				FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2169 			}
2170 		}
2171 	}
2172 done:
2173 	if (last_proc != NULL) {
2174 		proc_rele(last_proc);
2175 	}
2176 
2177 	if (error && so != NULL) {
2178 		so->so_error = (uint16_t)error;
2179 		flow_divert_disconnect_socket(so, do_connect);
2180 	}
2181 }
2182 
2183 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2184 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2185 {
2186 	struct socket *so = NULL;
2187 	struct inpcb *inp = NULL;
2188 	struct ifnet *current_ifp = NULL;
2189 	struct ifnet *new_ifp = NULL;
2190 	int error = 0;
2191 
2192 	so = fd_cb->so;
2193 	if (so == NULL) {
2194 		return;
2195 	}
2196 
2197 	inp = sotoinpcb(so);
2198 
2199 	if (out_if_index <= 0) {
2200 		return;
2201 	}
2202 
2203 	if (inp->inp_vflag & INP_IPV6) {
2204 		current_ifp = inp->in6p_last_outifp;
2205 	} else {
2206 		current_ifp = inp->inp_last_outifp;
2207 	}
2208 
2209 	if (current_ifp != NULL) {
2210 		if (current_ifp->if_index == out_if_index) {
2211 			/* No change */
2212 			return;
2213 		}
2214 
2215 		/* Scope the socket to the given interface */
2216 		error = inp_bindif(inp, out_if_index, &new_ifp);
2217 		if (error != 0) {
2218 			FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2219 			return;
2220 		}
2221 
2222 		if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2223 			/* Get the appropriate address for the given interface */
2224 			if (inp->inp_vflag & INP_IPV6) {
2225 				inp->in6p_laddr = sa6_any.sin6_addr;
2226 				error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2227 			} else {
2228 				inp->inp_laddr.s_addr = INADDR_ANY;
2229 				error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2230 			}
2231 
2232 			if (error != 0) {
2233 				FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2234 			}
2235 		}
2236 	} else {
2237 		ifnet_head_lock_shared();
2238 		if (out_if_index <= if_index) {
2239 			new_ifp = ifindex2ifnet[out_if_index];
2240 		}
2241 		ifnet_head_done();
2242 	}
2243 
2244 	/* Update the "last interface" of the socket */
2245 	if (new_ifp != NULL) {
2246 		if (inp->inp_vflag & INP_IPV6) {
2247 			inp->in6p_last_outifp = new_ifp;
2248 		} else {
2249 			inp->inp_last_outifp = new_ifp;
2250 		}
2251 
2252 #if SKYWALK
2253 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2254 			netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2255 		}
2256 #endif /* SKYWALK */
2257 	}
2258 }
2259 
2260 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2261 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2262 {
2263 	uint32_t                                        connect_error = 0;
2264 	uint32_t                                        ctl_unit                        = 0;
2265 	int                                                     error                           = 0;
2266 	struct flow_divert_group        *grp                            = NULL;
2267 	union sockaddr_in_4_6 local_endpoint = {};
2268 	union sockaddr_in_4_6 remote_endpoint = {};
2269 	int                                                     out_if_index            = 0;
2270 	uint32_t                                        send_window;
2271 	uint32_t                                        app_data_length         = 0;
2272 
2273 	memset(&local_endpoint, 0, sizeof(local_endpoint));
2274 	memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2275 
2276 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2277 	if (error) {
2278 		FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2279 		return;
2280 	}
2281 
2282 	connect_error = ntohl(connect_error);
2283 	FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2284 
2285 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2286 	if (error) {
2287 		FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2288 		return;
2289 	}
2290 
2291 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2292 	if (error) {
2293 		FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2294 	}
2295 
2296 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sa), NULL);
2297 	if (error) {
2298 		FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2299 	}
2300 
2301 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sa), NULL);
2302 	if (error) {
2303 		FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2304 	}
2305 
2306 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2307 	if (error) {
2308 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2309 	}
2310 
2311 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2312 	if (error) {
2313 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2314 	}
2315 
2316 	error = 0;
2317 	ctl_unit                = ntohl(ctl_unit);
2318 
2319 	lck_rw_lock_shared(&g_flow_divert_group_lck);
2320 
2321 	if (connect_error == 0 && ctl_unit > 0) {
2322 		if (ctl_unit >= GROUP_COUNT_MAX) {
2323 			FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2324 			error = EINVAL;
2325 		} else if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
2326 			FDLOG0(LOG_ERR, fd_cb, "No active groups, dropping connection");
2327 			error = EINVAL;
2328 		} else {
2329 			grp = g_flow_divert_groups[ctl_unit];
2330 			if (grp == NULL) {
2331 				error = ECONNRESET;
2332 			}
2333 		}
2334 	}
2335 
2336 	FDLOCK(fd_cb);
2337 	if (fd_cb->so != NULL) {
2338 		struct inpcb                            *inp = NULL;
2339 		struct flow_divert_group        *old_group;
2340 		struct socket *so = fd_cb->so;
2341 		bool local_address_is_valid = false;
2342 
2343 		socket_lock(so, 0);
2344 
2345 		if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2346 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2347 			goto done;
2348 		}
2349 
2350 		if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2351 			FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2352 			goto done;
2353 		}
2354 
2355 		inp = sotoinpcb(so);
2356 
2357 		if (connect_error || error) {
2358 			goto set_socket_state;
2359 		}
2360 
2361 		if (flow_divert_is_sockaddr_valid(&(local_endpoint.sa))) {
2362 			if (local_endpoint.sa.sa_family == AF_INET) {
2363 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2364 				if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2365 					local_address_is_valid = true;
2366 					fd_cb->local_endpoint = local_endpoint;
2367 					inp->inp_laddr.s_addr = INADDR_ANY;
2368 				} else {
2369 					fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2370 				}
2371 			} else if (local_endpoint.sa.sa_family == AF_INET6) {
2372 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2373 				if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2374 					local_address_is_valid = true;
2375 					fd_cb->local_endpoint = local_endpoint;
2376 					inp->in6p_laddr = sa6_any.sin6_addr;
2377 				} else {
2378 					fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2379 				}
2380 			}
2381 		}
2382 
2383 		flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2384 		flow_divert_set_local_endpoint(fd_cb, &(fd_cb->local_endpoint.sa));
2385 
2386 		if (flow_divert_is_sockaddr_valid(&(remote_endpoint.sa)) && SOCK_TYPE(so) == SOCK_STREAM) {
2387 			if (remote_endpoint.sa.sa_family == AF_INET) {
2388 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2389 			} else if (remote_endpoint.sa.sa_family == AF_INET6) {
2390 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2391 			}
2392 			flow_divert_set_remote_endpoint(fd_cb, &(remote_endpoint.sa));
2393 		}
2394 
2395 		if (app_data_length > 0) {
2396 			uint8_t *app_data = NULL;
2397 			app_data = kalloc_data(app_data_length, Z_WAITOK);
2398 			if (app_data != NULL) {
2399 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2400 				if (error == 0) {
2401 					FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2402 					if (fd_cb->app_data != NULL) {
2403 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
2404 					}
2405 					fd_cb->app_data = app_data;
2406 					fd_cb->app_data_length = app_data_length;
2407 				} else {
2408 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2409 					kfree_data(app_data, app_data_length);
2410 				}
2411 			} else {
2412 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2413 			}
2414 		}
2415 
2416 		if (error) {
2417 			goto set_socket_state;
2418 		}
2419 
2420 		if (fd_cb->group == NULL) {
2421 			error = EINVAL;
2422 			goto set_socket_state;
2423 		}
2424 
2425 		if (grp != NULL) {
2426 			old_group = fd_cb->group;
2427 
2428 			lck_rw_lock_exclusive(&old_group->lck);
2429 			lck_rw_lock_exclusive(&grp->lck);
2430 
2431 			RB_REMOVE(fd_pcb_tree, &old_group->pcb_tree, fd_cb);
2432 			if (RB_INSERT(fd_pcb_tree, &grp->pcb_tree, fd_cb) != NULL) {
2433 				panic("group with unit %u already contains a connection with hash %u", grp->ctl_unit, fd_cb->hash);
2434 			}
2435 
2436 			fd_cb->group = grp;
2437 
2438 			lck_rw_done(&grp->lck);
2439 			lck_rw_done(&old_group->lck);
2440 		}
2441 
2442 		fd_cb->send_window = ntohl(send_window);
2443 
2444 set_socket_state:
2445 		if (!connect_error && !error) {
2446 			FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2447 			error = flow_divert_send_connect_result(fd_cb);
2448 		}
2449 
2450 		if (connect_error || error) {
2451 			if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2452 				error = flow_divert_try_next(fd_cb);
2453 				if (error) {
2454 					flow_divert_disable(fd_cb);
2455 				}
2456 				goto done;
2457 			}
2458 
2459 			if (!connect_error) {
2460 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
2461 				so->so_error = (uint16_t)error;
2462 				flow_divert_send_close_if_needed(fd_cb);
2463 			} else {
2464 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
2465 				so->so_error = (uint16_t)connect_error;
2466 			}
2467 			flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2468 		} else {
2469 #if NECP
2470 			/* Update NECP client with connected five-tuple */
2471 			if (!uuid_is_null(inp->necp_client_uuid)) {
2472 				socket_unlock(so, 0);
2473 				necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2474 				socket_lock(so, 0);
2475 			}
2476 #endif /* NECP */
2477 
2478 			flow_divert_send_buffered_data(fd_cb, FALSE);
2479 			soisconnected(so);
2480 		}
2481 
2482 		/* We don't need the connect packet any more */
2483 		if (fd_cb->connect_packet != NULL) {
2484 			mbuf_freem(fd_cb->connect_packet);
2485 			fd_cb->connect_packet = NULL;
2486 		}
2487 
2488 		/* We don't need the original remote endpoint any more */
2489 		free_sockaddr(fd_cb->original_remote_endpoint);
2490 done:
2491 		socket_unlock(so, 0);
2492 	}
2493 	FDUNLOCK(fd_cb);
2494 
2495 	lck_rw_done(&g_flow_divert_group_lck);
2496 }
2497 
2498 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2499 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2500 {
2501 	uint32_t        close_error                     = 0;
2502 	int                     error                   = 0;
2503 	int                     how                     = 0;
2504 
2505 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2506 	if (error) {
2507 		FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2508 		return;
2509 	}
2510 
2511 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2512 	if (error) {
2513 		FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2514 		return;
2515 	}
2516 
2517 	how = ntohl(how);
2518 
2519 	FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2520 
2521 	FDLOCK(fd_cb);
2522 	if (fd_cb->so != NULL) {
2523 		bool is_connected = (SOCK_TYPE(fd_cb->so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2524 		socket_lock(fd_cb->so, 0);
2525 
2526 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2527 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2528 			goto done;
2529 		}
2530 
2531 		fd_cb->so->so_error = (uint16_t)ntohl(close_error);
2532 
2533 		flow_divert_update_closed_state(fd_cb, how, TRUE);
2534 
2535 		/* Only do this for stream flows because "shutdown by peer" doesn't make sense for datagram flows */
2536 		how = flow_divert_tunnel_how_closed(fd_cb);
2537 		if (how == SHUT_RDWR) {
2538 			flow_divert_disconnect_socket(fd_cb->so, is_connected);
2539 		} else if (how == SHUT_RD && is_connected) {
2540 			socantrcvmore(fd_cb->so);
2541 		} else if (how == SHUT_WR && is_connected) {
2542 			socantsendmore(fd_cb->so);
2543 		}
2544 done:
2545 		socket_unlock(fd_cb->so, 0);
2546 	}
2547 	FDUNLOCK(fd_cb);
2548 }
2549 
2550 static mbuf_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2551 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2552 {
2553 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2554 	bool need_recvdstaddr = false;
2555 	/* Socket flow tracking needs to see the local address */
2556 	need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2557 	if ((inp->inp_vflag & INP_IPV4) &&
2558 	    fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2559 	    ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2560 		return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2561 	} else if ((inp->inp_vflag & INP_IPV6) &&
2562 	    fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2563 	    ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2564 		struct in6_pktinfo pi6;
2565 		memset(&pi6, 0, sizeof(pi6));
2566 		pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2567 
2568 		return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2569 	}
2570 	return NULL;
2571 }
2572 
2573 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_t packet,size_t offset)2574 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset)
2575 {
2576 	int error = 0;
2577 
2578 	FDLOCK(fd_cb);
2579 	if (fd_cb->so != NULL) {
2580 		mbuf_t  data            = NULL;
2581 		size_t  data_size;
2582 		struct sockaddr_storage remote_address;
2583 		boolean_t got_remote_sa = FALSE;
2584 		boolean_t appended = FALSE;
2585 		boolean_t append_success = FALSE;
2586 
2587 		socket_lock(fd_cb->so, 0);
2588 
2589 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2590 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2591 			goto done;
2592 		}
2593 
2594 		if (sbspace(&fd_cb->so->so_rcv) == 0) {
2595 			error = ENOBUFS;
2596 			fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2597 			FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2598 			goto done;
2599 		}
2600 
2601 		if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
2602 			uint32_t val_size = 0;
2603 
2604 			/* check if we got remote address with data */
2605 			memset(&remote_address, 0, sizeof(remote_address));
2606 			error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2607 			if (error || val_size > sizeof(remote_address)) {
2608 				FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2609 				error = 0;
2610 			} else {
2611 				if (remote_address.ss_len > sizeof(remote_address)) {
2612 					remote_address.ss_len = sizeof(remote_address);
2613 				}
2614 				/* validate the address */
2615 				if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2616 					got_remote_sa = TRUE;
2617 				} else {
2618 					FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2619 				}
2620 				offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2621 			}
2622 		}
2623 
2624 		data_size = (mbuf_pkthdr_len(packet) - offset);
2625 
2626 		if (fd_cb->so->so_state & SS_CANTRCVMORE) {
2627 			FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2628 			goto done;
2629 		}
2630 
2631 		if (SOCK_TYPE(fd_cb->so) != SOCK_STREAM && SOCK_TYPE(fd_cb->so) != SOCK_DGRAM) {
2632 			FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(fd_cb->so));
2633 			goto done;
2634 		}
2635 
2636 		FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2637 
2638 		error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2639 		if (error || data == NULL) {
2640 			FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2641 			goto done;
2642 		}
2643 
2644 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
2645 			appended = (sbappendstream(&fd_cb->so->so_rcv, data) != 0);
2646 			append_success = TRUE;
2647 		} else {
2648 			struct sockaddr *append_sa = NULL;
2649 			mbuf_t mctl;
2650 
2651 			if (got_remote_sa == TRUE) {
2652 				error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2653 			} else {
2654 				if (fd_cb->so->so_proto->pr_domain->dom_family == AF_INET6) {
2655 					error = in6_mapped_peeraddr(fd_cb->so, &append_sa);
2656 				} else {
2657 					error = in_getpeeraddr(fd_cb->so, &append_sa);
2658 				}
2659 			}
2660 			if (error) {
2661 				FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2662 			}
2663 
2664 			mctl = flow_divert_create_control_mbuf(fd_cb);
2665 			int append_error = 0;
2666 			appended = sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error);
2667 			if (appended || append_error == 0) {
2668 				append_success = TRUE;
2669 			} else {
2670 				FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2671 			}
2672 
2673 			free_sockaddr(append_sa);
2674 		}
2675 
2676 		if (append_success) {
2677 			fd_cb->bytes_received += data_size;
2678 			flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2679 		}
2680 
2681 		if (appended) {
2682 			sorwakeup(fd_cb->so);
2683 		}
2684 done:
2685 		socket_unlock(fd_cb->so, 0);
2686 	}
2687 	FDUNLOCK(fd_cb);
2688 
2689 	return error;
2690 }
2691 
2692 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2693 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2694 {
2695 	uint32_t        read_count              = 0;
2696 	int             error                   = 0;
2697 
2698 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2699 	if (error) {
2700 		FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2701 		return;
2702 	}
2703 
2704 	FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2705 
2706 	FDLOCK(fd_cb);
2707 	if (fd_cb->so != NULL) {
2708 		socket_lock(fd_cb->so, 0);
2709 
2710 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2711 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2712 			goto done;
2713 		}
2714 
2715 		fd_cb->send_window += ntohl(read_count);
2716 		flow_divert_send_buffered_data(fd_cb, FALSE);
2717 done:
2718 		socket_unlock(fd_cb->so, 0);
2719 	}
2720 	FDUNLOCK(fd_cb);
2721 }
2722 
2723 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_t packet,int offset)2724 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_t packet, int offset)
2725 {
2726 	int error         = 0;
2727 	uint32_t key_size = 0;
2728 	int log_level     = 0;
2729 	uint32_t flags    = 0;
2730 
2731 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2732 	if (error) {
2733 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2734 		return;
2735 	}
2736 
2737 	if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2738 		FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2739 		return;
2740 	}
2741 
2742 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2743 	if (!error) {
2744 		nil_pcb.log_level = (uint8_t)log_level;
2745 	}
2746 
2747 	lck_rw_lock_exclusive(&group->lck);
2748 
2749 	if (group->token_key != NULL) {
2750 		kfree_data(group->token_key, group->token_key_size);
2751 		group->token_key = NULL;
2752 	}
2753 
2754 	group->token_key = kalloc_data(key_size, Z_WAITOK);
2755 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2756 	if (error) {
2757 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2758 		kfree_data(group->token_key, key_size);
2759 		group->token_key = NULL;
2760 		lck_rw_done(&group->lck);
2761 		return;
2762 	}
2763 
2764 	group->token_key_size = key_size;
2765 
2766 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2767 	if (!error) {
2768 		group->flags = flags;
2769 	}
2770 
2771 	lck_rw_done(&group->lck);
2772 }
2773 
2774 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2775 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2776 {
2777 	int                                                     error                           = 0;
2778 	int                                                     out_if_index            = 0;
2779 	uint32_t                                        app_data_length         = 0;
2780 
2781 	FDLOG0(LOG_INFO, fd_cb, "received a properties update");
2782 
2783 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2784 	if (error) {
2785 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
2786 	}
2787 
2788 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2789 	if (error) {
2790 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
2791 	}
2792 
2793 	FDLOCK(fd_cb);
2794 	if (fd_cb->so != NULL) {
2795 		socket_lock(fd_cb->so, 0);
2796 
2797 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2798 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
2799 			goto done;
2800 		}
2801 
2802 		if (out_if_index > 0) {
2803 			flow_divert_scope(fd_cb, out_if_index, true);
2804 			flow_divert_set_local_endpoint(fd_cb, &(fd_cb->local_endpoint.sa));
2805 		}
2806 
2807 		if (app_data_length > 0) {
2808 			uint8_t *app_data = NULL;
2809 			app_data = kalloc_data(app_data_length, Z_WAITOK);
2810 			if (app_data != NULL) {
2811 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2812 				if (error == 0) {
2813 					if (fd_cb->app_data != NULL) {
2814 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
2815 					}
2816 					fd_cb->app_data = app_data;
2817 					fd_cb->app_data_length = app_data_length;
2818 				} else {
2819 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
2820 					kfree_data(app_data, app_data_length);
2821 				}
2822 			} else {
2823 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
2824 			}
2825 		}
2826 done:
2827 		socket_unlock(fd_cb->so, 0);
2828 	}
2829 	FDUNLOCK(fd_cb);
2830 }
2831 
2832 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_t packet,int offset)2833 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_t packet, int offset)
2834 {
2835 	size_t bytes_mem_size;
2836 	size_t child_maps_mem_size;
2837 	size_t nodes_mem_size;
2838 	size_t trie_memory_size = 0;
2839 	int cursor;
2840 	int error = 0;
2841 	struct flow_divert_trie new_trie;
2842 	int insert_error = 0;
2843 	int prefix_count = -1;
2844 	int signing_id_count = 0;
2845 	size_t bytes_count = 0;
2846 	size_t nodes_count = 0;
2847 	size_t maps_count = 0;
2848 
2849 	lck_rw_lock_exclusive(&group->lck);
2850 
2851 	/* Re-set the current trie */
2852 	if (group->signing_id_trie.memory != NULL) {
2853 		kfree_data_addr(group->signing_id_trie.memory);
2854 	}
2855 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
2856 	group->signing_id_trie.root = NULL_TRIE_IDX;
2857 
2858 	memset(&new_trie, 0, sizeof(new_trie));
2859 
2860 	/* Get the number of shared prefixes in the new set of signing ID strings */
2861 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
2862 
2863 	if (prefix_count < 0 || error) {
2864 		FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
2865 		lck_rw_done(&group->lck);
2866 		return;
2867 	}
2868 
2869 	/* Compute the number of signing IDs and the total amount of bytes needed to store them */
2870 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
2871 	    cursor >= 0;
2872 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
2873 		uint32_t sid_size = 0;
2874 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
2875 		if (error || sid_size == 0) {
2876 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
2877 			signing_id_count = 0;
2878 			break;
2879 		}
2880 		if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
2881 			FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
2882 			signing_id_count = 0;
2883 			break;
2884 		}
2885 		signing_id_count++;
2886 	}
2887 
2888 	if (signing_id_count == 0) {
2889 		lck_rw_done(&group->lck);
2890 		FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
2891 		return;
2892 	}
2893 
2894 	if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
2895 		lck_rw_done(&group->lck);
2896 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
2897 		return;
2898 	}
2899 
2900 	if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
2901 		lck_rw_done(&group->lck);
2902 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
2903 		return;
2904 	}
2905 
2906 	if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
2907 		lck_rw_done(&group->lck);
2908 		FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
2909 		return;
2910 	}
2911 
2912 	FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
2913 	    nodes_count, maps_count, bytes_count);
2914 
2915 	if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
2916 	    os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
2917 	    os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
2918 	    os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
2919 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
2920 		lck_rw_done(&group->lck);
2921 		return;
2922 	}
2923 
2924 	if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
2925 		FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
2926 		lck_rw_done(&group->lck);
2927 		return;
2928 	}
2929 
2930 	new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
2931 	if (new_trie.memory == NULL) {
2932 		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
2933 		    nodes_mem_size + child_maps_mem_size + bytes_mem_size);
2934 		lck_rw_done(&group->lck);
2935 		return;
2936 	}
2937 
2938 	new_trie.bytes_count = (uint16_t)bytes_count;
2939 	new_trie.nodes_count = (uint16_t)nodes_count;
2940 	new_trie.child_maps_count = (uint16_t)maps_count;
2941 
2942 	/* Initialize the free lists */
2943 	new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
2944 	new_trie.nodes_free_next = 0;
2945 	memset(new_trie.nodes, 0, nodes_mem_size);
2946 
2947 	new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
2948 	new_trie.child_maps_free_next = 0;
2949 	memset(new_trie.child_maps, 0xff, child_maps_mem_size);
2950 
2951 	new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
2952 	new_trie.bytes_free_next = 0;
2953 	memset(new_trie.bytes, 0, bytes_mem_size);
2954 
2955 	/* The root is an empty node */
2956 	new_trie.root = trie_node_alloc(&new_trie);
2957 
2958 	/* Add each signing ID to the trie */
2959 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
2960 	    cursor >= 0;
2961 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
2962 		uint32_t sid_size = 0;
2963 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
2964 		if (error || sid_size == 0) {
2965 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
2966 			insert_error = EINVAL;
2967 			break;
2968 		}
2969 		if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
2970 			uint16_t new_node_idx;
2971 			error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
2972 			if (error) {
2973 				FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
2974 				insert_error = EINVAL;
2975 				break;
2976 			}
2977 			new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
2978 			if (new_node_idx == NULL_TRIE_IDX) {
2979 				insert_error = EINVAL;
2980 				break;
2981 			}
2982 		} else {
2983 			FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
2984 			insert_error = ENOBUFS;
2985 			break;
2986 		}
2987 	}
2988 
2989 	if (!insert_error) {
2990 		group->signing_id_trie = new_trie;
2991 	} else {
2992 		kfree_data(new_trie.memory, trie_memory_size);
2993 	}
2994 
2995 	lck_rw_done(&group->lck);
2996 }
2997 
2998 static int
flow_divert_input(mbuf_t packet,struct flow_divert_group * group)2999 flow_divert_input(mbuf_t packet, struct flow_divert_group *group)
3000 {
3001 	struct flow_divert_packet_header        hdr;
3002 	int                                                                     error           = 0;
3003 	struct flow_divert_pcb                          *fd_cb;
3004 
3005 	if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3006 		FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3007 		error = EINVAL;
3008 		goto done;
3009 	}
3010 
3011 	error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3012 	if (error) {
3013 		FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3014 		error = ENOBUFS;
3015 		goto done;
3016 	}
3017 
3018 	hdr.conn_id = ntohl(hdr.conn_id);
3019 
3020 	if (hdr.conn_id == 0) {
3021 		switch (hdr.packet_type) {
3022 		case FLOW_DIVERT_PKT_GROUP_INIT:
3023 			flow_divert_handle_group_init(group, packet, sizeof(hdr));
3024 			break;
3025 		case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3026 			flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3027 			break;
3028 		default:
3029 			FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3030 			break;
3031 		}
3032 		goto done;
3033 	}
3034 
3035 	fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group);             /* This retains the PCB */
3036 	if (fd_cb == NULL) {
3037 		if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3038 			FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3039 		}
3040 		goto done;
3041 	}
3042 
3043 	switch (hdr.packet_type) {
3044 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
3045 		flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3046 		break;
3047 	case FLOW_DIVERT_PKT_CLOSE:
3048 		flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3049 		break;
3050 	case FLOW_DIVERT_PKT_DATA:
3051 		error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3052 		break;
3053 	case FLOW_DIVERT_PKT_READ_NOTIFY:
3054 		flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3055 		break;
3056 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3057 		flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3058 		break;
3059 	default:
3060 		FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3061 		break;
3062 	}
3063 
3064 	FDRELEASE(fd_cb);
3065 
3066 done:
3067 	mbuf_freem(packet);
3068 	return error;
3069 }
3070 
3071 static void
flow_divert_close_all(struct flow_divert_group * group)3072 flow_divert_close_all(struct flow_divert_group *group)
3073 {
3074 	struct flow_divert_pcb                  *fd_cb;
3075 	SLIST_HEAD(, flow_divert_pcb)   tmp_list;
3076 
3077 	SLIST_INIT(&tmp_list);
3078 
3079 	lck_rw_lock_exclusive(&group->lck);
3080 
3081 	MBUFQ_DRAIN(&group->send_queue);
3082 
3083 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3084 		FDRETAIN(fd_cb);
3085 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3086 	}
3087 
3088 	lck_rw_done(&group->lck);
3089 
3090 	while (!SLIST_EMPTY(&tmp_list)) {
3091 		fd_cb = SLIST_FIRST(&tmp_list);
3092 		FDLOCK(fd_cb);
3093 		SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3094 		if (fd_cb->so != NULL) {
3095 			socket_lock(fd_cb->so, 0);
3096 			flow_divert_pcb_remove(fd_cb);
3097 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
3098 			fd_cb->so->so_error = ECONNABORTED;
3099 			flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
3100 			socket_unlock(fd_cb->so, 0);
3101 		}
3102 		FDUNLOCK(fd_cb);
3103 		FDRELEASE(fd_cb);
3104 	}
3105 }
3106 
3107 void
flow_divert_detach(struct socket * so)3108 flow_divert_detach(struct socket *so)
3109 {
3110 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3111 
3112 	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
3113 
3114 	so->so_flags &= ~SOF_FLOW_DIVERT;
3115 	so->so_fd_pcb = NULL;
3116 
3117 	FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3118 
3119 	if (fd_cb->group != NULL) {
3120 		/* Last-ditch effort to send any buffered data */
3121 		flow_divert_send_buffered_data(fd_cb, TRUE);
3122 
3123 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
3124 		flow_divert_send_close_if_needed(fd_cb);
3125 		/* Remove from the group */
3126 		flow_divert_pcb_remove(fd_cb);
3127 	}
3128 
3129 	socket_unlock(so, 0);
3130 	FDLOCK(fd_cb);
3131 	fd_cb->so = NULL;
3132 	FDUNLOCK(fd_cb);
3133 	socket_lock(so, 0);
3134 
3135 	FDRELEASE(fd_cb);       /* Release the socket's reference */
3136 }
3137 
3138 static int
flow_divert_close(struct socket * so)3139 flow_divert_close(struct socket *so)
3140 {
3141 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3142 
3143 	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
3144 
3145 	FDLOG0(LOG_INFO, fd_cb, "Closing");
3146 
3147 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3148 		soisdisconnecting(so);
3149 		sbflush(&so->so_rcv);
3150 	}
3151 
3152 	flow_divert_send_buffered_data(fd_cb, TRUE);
3153 	flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
3154 	flow_divert_send_close_if_needed(fd_cb);
3155 
3156 	/* Remove from the group */
3157 	flow_divert_pcb_remove(fd_cb);
3158 
3159 	return 0;
3160 }
3161 
3162 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3163 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3164     sae_connid_t cid __unused)
3165 {
3166 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3167 		return EINVAL;
3168 	}
3169 
3170 	return flow_divert_close(so);
3171 }
3172 
3173 static int
flow_divert_shutdown(struct socket * so)3174 flow_divert_shutdown(struct socket *so)
3175 {
3176 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3177 
3178 	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
3179 
3180 	FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3181 
3182 	socantsendmore(so);
3183 
3184 	flow_divert_update_closed_state(fd_cb, SHUT_WR, FALSE);
3185 	flow_divert_send_close_if_needed(fd_cb);
3186 
3187 	return 0;
3188 }
3189 
3190 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3191 flow_divert_rcvd(struct socket *so, int flags __unused)
3192 {
3193 	struct flow_divert_pcb  *fd_cb = so->so_fd_pcb;
3194 	int space = sbspace(&so->so_rcv);
3195 
3196 	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
3197 
3198 	FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3199 	if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3200 	    (space > 0) &&
3201 	    flow_divert_send_read_notification(fd_cb) == 0) {
3202 		FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3203 		fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3204 	}
3205 
3206 	return 0;
3207 }
3208 
3209 static int
flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet,struct sockaddr * toaddr)3210 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr)
3211 {
3212 	int error = 0;
3213 	int port  = 0;
3214 
3215 	if (!flow_divert_is_sockaddr_valid(toaddr)) {
3216 		FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3217 		error = EINVAL;
3218 		goto done;
3219 	}
3220 
3221 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, toaddr);
3222 	if (error) {
3223 		goto done;
3224 	}
3225 
3226 	if (toaddr->sa_family == AF_INET) {
3227 		port = ntohs((satosin(toaddr))->sin_port);
3228 	} else {
3229 		port = ntohs((satosin6(toaddr))->sin6_port);
3230 	}
3231 
3232 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3233 	if (error) {
3234 		goto done;
3235 	}
3236 
3237 done:
3238 	return error;
3239 }
3240 
3241 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_t buffer)3242 flow_divert_get_buffered_target_address(mbuf_t buffer)
3243 {
3244 	if (buffer != NULL && buffer->m_type == MT_SONAME) {
3245 		struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3246 		if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3247 			return toaddr;
3248 		}
3249 	}
3250 	return NULL;
3251 }
3252 
3253 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3254 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3255 {
3256 	switch (addr->sa_family) {
3257 	case AF_INET:
3258 		if (addr->sa_len < sizeof(struct sockaddr_in)) {
3259 			return FALSE;
3260 		}
3261 		break;
3262 	case AF_INET6:
3263 		if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3264 			return FALSE;
3265 		}
3266 		break;
3267 	default:
3268 		return FALSE;
3269 	}
3270 	return TRUE;
3271 }
3272 
3273 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3274 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3275     struct sockaddr **dup)
3276 {
3277 	int                                             error           = 0;
3278 	struct sockaddr                 *result;
3279 	struct sockaddr_storage ss;
3280 
3281 	if (addr != NULL) {
3282 		result = addr;
3283 	} else {
3284 		memset(&ss, 0, sizeof(ss));
3285 		ss.ss_family = family;
3286 		if (ss.ss_family == AF_INET) {
3287 			ss.ss_len = sizeof(struct sockaddr_in);
3288 		} else if (ss.ss_family == AF_INET6) {
3289 			ss.ss_len = sizeof(struct sockaddr_in6);
3290 		} else {
3291 			error = EINVAL;
3292 		}
3293 		result = (struct sockaddr *)&ss;
3294 	}
3295 
3296 	if (!error) {
3297 		*dup = dup_sockaddr(result, 1);
3298 		if (*dup == NULL) {
3299 			error = ENOBUFS;
3300 		}
3301 	}
3302 
3303 	return error;
3304 }
3305 
3306 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected)3307 flow_divert_disconnect_socket(struct socket *so, bool is_connected)
3308 {
3309 	if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3310 		soisdisconnected(so);
3311 	}
3312 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
3313 		struct inpcb *inp = sotoinpcb(so);
3314 		if (inp != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
3315 			/*
3316 			 * Let NetworkStatistics know this PCB is going away
3317 			 * before we detach it.
3318 			 */
3319 			if (nstat_collect && (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
3320 				nstat_pcb_detach(inp);
3321 			}
3322 
3323 			if (SOCK_DOM(so) == PF_INET6) {
3324 				ROUTE_RELEASE(&inp->in6p_route);
3325 			} else {
3326 				ROUTE_RELEASE(&inp->inp_route);
3327 			}
3328 			inp->inp_state = INPCB_STATE_DEAD;
3329 			/* makes sure we're not called twice from so_close */
3330 			so->so_flags |= SOF_PCBCLEARING;
3331 			inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
3332 		}
3333 	}
3334 }
3335 
3336 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3337 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3338 {
3339 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3340 
3341 	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
3342 
3343 	if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3344 		if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3345 			flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3346 		}
3347 	}
3348 
3349 	if (SOCK_DOM(so) == PF_INET) {
3350 		return g_tcp_protosw->pr_ctloutput(so, sopt);
3351 	} else if (SOCK_DOM(so) == PF_INET6) {
3352 		return g_tcp6_protosw->pr_ctloutput(so, sopt);
3353 	}
3354 	return 0;
3355 }
3356 
3357 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3358 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3359 {
3360 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3361 	int                                             error   = 0;
3362 	struct inpcb                    *inp    = sotoinpcb(so);
3363 	struct sockaddr_in              *sinp;
3364 	mbuf_t                                  connect_packet = NULL;
3365 	int                                             do_send = 1;
3366 
3367 	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
3368 
3369 	if (fd_cb->group == NULL) {
3370 		error = ENETUNREACH;
3371 		goto done;
3372 	}
3373 
3374 	if (inp == NULL) {
3375 		error = EINVAL;
3376 		goto done;
3377 	} else if (inp->inp_state == INPCB_STATE_DEAD) {
3378 		if (so->so_error) {
3379 			error = so->so_error;
3380 			so->so_error = 0;
3381 		} else {
3382 			error = EINVAL;
3383 		}
3384 		goto done;
3385 	}
3386 
3387 	if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3388 		error = EALREADY;
3389 		goto done;
3390 	}
3391 
3392 	FDLOG0(LOG_INFO, fd_cb, "Connecting");
3393 
3394 	if (fd_cb->connect_packet == NULL) {
3395 		struct sockaddr_in sin = {};
3396 		struct ifnet *ifp = NULL;
3397 
3398 		if (to == NULL) {
3399 			FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3400 			error = EINVAL;
3401 			goto done;
3402 		}
3403 
3404 		if (!flow_divert_is_sockaddr_valid(to)) {
3405 			FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3406 			error = EINVAL;
3407 			goto done;
3408 		}
3409 
3410 		fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3411 		if (fd_cb->original_remote_endpoint == NULL) {
3412 			FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3413 			error = ENOMEM;
3414 			goto done;
3415 		}
3416 		fd_cb->original_vflag = inp->inp_vflag;
3417 		fd_cb->original_last_outifp = inp->inp_last_outifp;
3418 		fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3419 
3420 		sinp = (struct sockaddr_in *)(void *)to;
3421 		if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3422 			error = EAFNOSUPPORT;
3423 			goto done;
3424 		}
3425 
3426 		if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3427 			struct sockaddr_in6 sin6 = {};
3428 			sin6.sin6_family = AF_INET6;
3429 			sin6.sin6_len = sizeof(struct sockaddr_in6);
3430 			sin6.sin6_port = satosin6(to)->sin6_port;
3431 			sin6.sin6_addr = satosin6(to)->sin6_addr;
3432 			if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3433 				in6_sin6_2_sin(&sin, &sin6);
3434 				to = (struct sockaddr *)&sin;
3435 			}
3436 		}
3437 
3438 		if (to->sa_family == AF_INET6) {
3439 			inp->inp_vflag &= ~INP_IPV4;
3440 			inp->inp_vflag |= INP_IPV6;
3441 			fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3442 			fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3443 			fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3444 			error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3445 			if (error) {
3446 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3447 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3448 					error = 0;
3449 				} else {
3450 					goto done;
3451 				}
3452 			}
3453 			if (ifp != NULL) {
3454 				inp->in6p_last_outifp = ifp;
3455 				ifnet_release(ifp);
3456 			}
3457 		} else if (to->sa_family == AF_INET) {
3458 			inp->inp_vflag |= INP_IPV4;
3459 			inp->inp_vflag &= ~INP_IPV6;
3460 			fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3461 			fd_cb->local_endpoint.sin.sin_family = AF_INET;
3462 			fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3463 			error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3464 			if (error) {
3465 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3466 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3467 					error = 0;
3468 				} else {
3469 					goto done;
3470 				}
3471 			}
3472 			if (ifp != NULL) {
3473 				inp->inp_last_outifp = ifp;
3474 				ifnet_release(ifp);
3475 			}
3476 		} else {
3477 			FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3478 		}
3479 
3480 		error = flow_divert_check_no_cellular(fd_cb) ||
3481 		    flow_divert_check_no_expensive(fd_cb) ||
3482 		    flow_divert_check_no_constrained(fd_cb);
3483 		if (error) {
3484 			goto done;
3485 		}
3486 
3487 		if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3488 		    !implicit || /* connect() was called or */
3489 		    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3490 		    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3491 			fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3492 		}
3493 
3494 		error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3495 		if (error) {
3496 			goto done;
3497 		}
3498 
3499 		if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3500 			flow_divert_set_remote_endpoint(fd_cb, to);
3501 			flow_divert_set_local_endpoint(fd_cb, &(fd_cb->local_endpoint.sa));
3502 		}
3503 
3504 		if (implicit) {
3505 			fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3506 		}
3507 
3508 		if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3509 			FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3510 			do_send = 0;
3511 		}
3512 
3513 		fd_cb->connect_packet = connect_packet;
3514 		connect_packet = NULL;
3515 	} else {
3516 		FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3517 	}
3518 
3519 	if (do_send) {
3520 		error = flow_divert_send_connect_packet(fd_cb);
3521 		if (error) {
3522 			goto done;
3523 		}
3524 
3525 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3526 	}
3527 
3528 	if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3529 		soisconnected(so);
3530 	} else {
3531 		soisconnecting(so);
3532 	}
3533 
3534 done:
3535 	return error;
3536 }
3537 
3538 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3539 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3540 {
3541 #if CONTENT_FILTER
3542 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3543 		int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3544 		if (error != 0) {
3545 			struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3546 			FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3547 			return error;
3548 		}
3549 	}
3550 #endif /* CONTENT_FILTER */
3551 
3552 	return flow_divert_connect_out_internal(so, to, p, false);
3553 }
3554 
3555 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3556 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3557     struct proc *p, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3558 {
3559 	struct inpcb *inp = sotoinpcb(so);
3560 	int error;
3561 
3562 	if (inp == NULL) {
3563 		return EINVAL;
3564 	}
3565 
3566 	VERIFY(dst != NULL);
3567 
3568 #if CONTENT_FILTER && NECP
3569 	struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3570 	if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3571 	    SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3572 		inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3573 	}
3574 #endif /* CONTENT_FILTER */
3575 
3576 	error = flow_divert_connect_out(so, dst, p);
3577 
3578 	if (error != 0) {
3579 		return error;
3580 	}
3581 
3582 	/* if there is data, send it */
3583 	if (auio != NULL) {
3584 		user_ssize_t datalen = 0;
3585 
3586 		socket_unlock(so, 0);
3587 
3588 		VERIFY(bytes_written != NULL);
3589 
3590 		datalen = uio_resid(auio);
3591 		error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3592 		socket_lock(so, 0);
3593 
3594 		if (error == 0 || error == EWOULDBLOCK) {
3595 			*bytes_written = datalen - uio_resid(auio);
3596 		}
3597 
3598 		/*
3599 		 * sosend returns EWOULDBLOCK if it's a non-blocking
3600 		 * socket or a timeout occured (this allows to return
3601 		 * the amount of queued data through sendit()).
3602 		 *
3603 		 * However, connectx() returns EINPROGRESS in case of a
3604 		 * blocking socket. So we change the return value here.
3605 		 */
3606 		if (error == EWOULDBLOCK) {
3607 			error = EINPROGRESS;
3608 		}
3609 	}
3610 
3611 	if (error == 0 && pcid != NULL) {
3612 		*pcid = 1;      /* there is only 1 connection for a TCP */
3613 	}
3614 
3615 	return error;
3616 }
3617 
3618 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope __unused,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3619 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3620     struct sockaddr *dst, struct proc *p, uint32_t ifscope __unused,
3621     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3622     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3623 {
3624 	return flow_divert_connectx_out_common(so, dst, p, pcid, uio, bytes_written);
3625 }
3626 
3627 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope __unused,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3628 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3629     struct sockaddr *dst, struct proc *p, uint32_t ifscope __unused,
3630     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3631     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3632 {
3633 	return flow_divert_connectx_out_common(so, dst, p, pcid, uio, bytes_written);
3634 }
3635 
3636 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)3637 flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
3638 {
3639 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3640 	int                                             error   = 0;
3641 	struct inpcb *inp;
3642 #if CONTENT_FILTER
3643 	struct m_tag *cfil_tag = NULL;
3644 #endif
3645 
3646 	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
3647 
3648 	inp = sotoinpcb(so);
3649 	if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3650 		error = ECONNRESET;
3651 		goto done;
3652 	}
3653 
3654 	if (control && mbuf_len(control) > 0) {
3655 		error = EINVAL;
3656 		goto done;
3657 	}
3658 
3659 	if (flags & MSG_OOB) {
3660 		error = EINVAL;
3661 		goto done; /* We don't support OOB data */
3662 	}
3663 
3664 	if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
3665 		/* The provider considers this datagram flow to be closed, so no data can be sent */
3666 		FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
3667 		error = EHOSTUNREACH;
3668 		goto done;
3669 	}
3670 
3671 #if CONTENT_FILTER
3672 	/*
3673 	 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
3674 	 * retrieve the CFIL saved remote address from the mbuf and use it.
3675 	 */
3676 	if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
3677 		struct sockaddr *cfil_faddr = NULL;
3678 		cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
3679 		if (cfil_tag) {
3680 			to = (struct sockaddr *)(void *)cfil_faddr;
3681 		}
3682 		FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
3683 	}
3684 #endif
3685 
3686 	/* Implicit connect */
3687 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
3688 		FDLOG0(LOG_INFO, fd_cb, "implicit connect");
3689 
3690 		error = flow_divert_connect_out_internal(so, to, p, true);
3691 		if (error) {
3692 			goto done;
3693 		}
3694 	} else {
3695 		error = flow_divert_check_no_cellular(fd_cb) ||
3696 		    flow_divert_check_no_expensive(fd_cb) ||
3697 		    flow_divert_check_no_constrained(fd_cb);
3698 		if (error) {
3699 			goto done;
3700 		}
3701 	}
3702 
3703 	FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", mbuf_pkthdr_len(data));
3704 
3705 	fd_cb->bytes_written_by_app += mbuf_pkthdr_len(data);
3706 	error = flow_divert_send_app_data(fd_cb, data, to);
3707 
3708 	data = NULL;
3709 
3710 	if (error) {
3711 		goto done;
3712 	}
3713 
3714 	if (flags & PRUS_EOF) {
3715 		flow_divert_shutdown(so);
3716 	}
3717 
3718 done:
3719 	if (data) {
3720 		mbuf_freem(data);
3721 	}
3722 	if (control) {
3723 		mbuf_free(control);
3724 	}
3725 #if CONTENT_FILTER
3726 	if (cfil_tag) {
3727 		m_tag_free(cfil_tag);
3728 	}
3729 #endif
3730 
3731 	return error;
3732 }
3733 
3734 static int
flow_divert_preconnect(struct socket * so)3735 flow_divert_preconnect(struct socket *so)
3736 {
3737 	int error = 0;
3738 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3739 
3740 	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
3741 
3742 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
3743 		FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
3744 		error = flow_divert_send_connect_packet(so->so_fd_pcb);
3745 		if (error) {
3746 			return error;
3747 		}
3748 
3749 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3750 	}
3751 
3752 	soclearfastopen(so);
3753 
3754 	return error;
3755 }
3756 
3757 static void
flow_divert_set_protosw(struct socket * so)3758 flow_divert_set_protosw(struct socket *so)
3759 {
3760 	if (SOCK_DOM(so) == PF_INET) {
3761 		so->so_proto = &g_flow_divert_in_protosw;
3762 	} else {
3763 		so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
3764 	}
3765 }
3766 
3767 static void
flow_divert_set_udp_protosw(struct socket * so)3768 flow_divert_set_udp_protosw(struct socket *so)
3769 {
3770 	if (SOCK_DOM(so) == PF_INET) {
3771 		so->so_proto = &g_flow_divert_in_udp_protosw;
3772 	} else {
3773 		so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
3774 	}
3775 }
3776 
3777 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)3778 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
3779 {
3780 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3781 	struct inpcb *inp;
3782 	int error = 0;
3783 
3784 	inp = sotoinpcb(so);
3785 	if (inp == NULL) {
3786 		return EINVAL;
3787 	}
3788 
3789 	if (fd_cb == NULL) {
3790 		error = flow_divert_pcb_init(so);
3791 		fd_cb  = so->so_fd_pcb;
3792 		if (error != 0 || fd_cb == NULL) {
3793 			goto done;
3794 		}
3795 	}
3796 	return flow_divert_data_out(so, flags, data, to, control, p);
3797 
3798 done:
3799 	if (data) {
3800 		mbuf_freem(data);
3801 	}
3802 	if (control) {
3803 		mbuf_free(control);
3804 	}
3805 
3806 	return error;
3807 }
3808 
3809 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)3810 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
3811 {
3812 	errno_t error = 0;
3813 	struct flow_divert_pcb *fd_cb;
3814 	uint32_t agg_unit = aggregate_unit;
3815 	bool is_aggregate = false;
3816 	uint32_t group_unit = flow_divert_derive_kernel_control_unit(ctl_unit, &agg_unit, &is_aggregate);
3817 
3818 	if (group_unit == 0) {
3819 		return EINVAL;
3820 	}
3821 
3822 	if (so->so_flags & SOF_FLOW_DIVERT) {
3823 		return EALREADY;
3824 	}
3825 
3826 	fd_cb = flow_divert_pcb_create(so);
3827 	if (fd_cb != NULL) {
3828 		so->so_fd_pcb = fd_cb;
3829 		so->so_flags |= SOF_FLOW_DIVERT;
3830 		fd_cb->control_group_unit = group_unit;
3831 		fd_cb->policy_control_unit = ctl_unit;
3832 		fd_cb->aggregate_unit = agg_unit;
3833 		if (is_aggregate) {
3834 			fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
3835 		} else {
3836 			fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
3837 		}
3838 
3839 		error = flow_divert_pcb_insert(so, fd_cb, group_unit);
3840 		if (error) {
3841 			FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
3842 			if (so->so_flags & SOF_FLOW_DIVERT) {
3843 				so->so_fd_pcb = NULL;
3844 				so->so_flags &= ~SOF_FLOW_DIVERT;
3845 				FDRELEASE(fd_cb);
3846 			}
3847 		} else {
3848 			if (SOCK_TYPE(so) == SOCK_STREAM) {
3849 				flow_divert_set_protosw(so);
3850 			} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
3851 				flow_divert_set_udp_protosw(so);
3852 			}
3853 
3854 			FDLOG0(LOG_INFO, fd_cb, "Created");
3855 		}
3856 	} else {
3857 		error = ENOMEM;
3858 	}
3859 
3860 	return error;
3861 }
3862 
3863 errno_t
flow_divert_pcb_init(struct socket * so)3864 flow_divert_pcb_init(struct socket *so)
3865 {
3866 	struct inpcb *inp = sotoinpcb(so);
3867 	uint32_t aggregate_units = 0;
3868 	uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
3869 	return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
3870 }
3871 
3872 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)3873 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
3874 {
3875 	uint32_t ctl_unit = 0;
3876 	uint32_t key_unit = 0;
3877 	uint32_t aggregate_unit = 0;
3878 	int error = 0;
3879 	int hmac_error = 0;
3880 	mbuf_t token = NULL;
3881 
3882 	if (so->so_flags & SOF_FLOW_DIVERT) {
3883 		error = EALREADY;
3884 		goto done;
3885 	}
3886 
3887 	if (g_init_result) {
3888 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
3889 		error = ENOPROTOOPT;
3890 		goto done;
3891 	}
3892 
3893 	if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
3894 	    (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
3895 	    (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
3896 		error = EINVAL;
3897 		goto done;
3898 	} else {
3899 		if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
3900 			struct tcpcb *tp = sototcpcb(so);
3901 			if (tp == NULL || tp->t_state != TCPS_CLOSED) {
3902 				error = EINVAL;
3903 				goto done;
3904 			}
3905 		}
3906 	}
3907 
3908 	error = soopt_getm(sopt, &token);
3909 	if (error) {
3910 		token = NULL;
3911 		goto done;
3912 	}
3913 
3914 	error = soopt_mcopyin(sopt, token);
3915 	if (error) {
3916 		token = NULL;
3917 		goto done;
3918 	}
3919 
3920 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
3921 	if (!error) {
3922 		key_unit = ntohl(key_unit);
3923 		if (key_unit >= GROUP_COUNT_MAX) {
3924 			key_unit = 0;
3925 		}
3926 	} else if (error != ENOENT) {
3927 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
3928 		goto done;
3929 	} else {
3930 		key_unit = 0;
3931 	}
3932 
3933 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
3934 	if (error) {
3935 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
3936 		goto done;
3937 	}
3938 
3939 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
3940 	if (error && error != ENOENT) {
3941 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
3942 		goto done;
3943 	}
3944 
3945 	/* A valid kernel control unit is required */
3946 	ctl_unit = ntohl(ctl_unit);
3947 	aggregate_unit = ntohl(aggregate_unit);
3948 
3949 	if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
3950 		socket_unlock(so, 0);
3951 		hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
3952 		socket_lock(so, 0);
3953 
3954 		if (hmac_error && hmac_error != ENOENT) {
3955 			FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
3956 			error = hmac_error;
3957 			goto done;
3958 		}
3959 	}
3960 
3961 	error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
3962 	if (error == 0) {
3963 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3964 		int log_level = LOG_NOTICE;
3965 
3966 		error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
3967 		if (error == 0) {
3968 			fd_cb->log_level = (uint8_t)log_level;
3969 		}
3970 		error = 0;
3971 
3972 		fd_cb->connect_token = token;
3973 		token = NULL;
3974 
3975 		fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
3976 	}
3977 
3978 	if (hmac_error == 0) {
3979 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3980 		if (fd_cb != NULL) {
3981 			fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
3982 		}
3983 	}
3984 
3985 done:
3986 	if (token != NULL) {
3987 		mbuf_freem(token);
3988 	}
3989 
3990 	return error;
3991 }
3992 
3993 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)3994 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
3995 {
3996 	uint32_t                                        ctl_unit;
3997 	int                                                     error                                           = 0;
3998 	uint8_t                                         hmac[SHA_DIGEST_LENGTH];
3999 	struct flow_divert_pcb          *fd_cb                                          = so->so_fd_pcb;
4000 	mbuf_t                                          token                                           = NULL;
4001 	struct flow_divert_group        *control_group                          = NULL;
4002 
4003 	if (!(so->so_flags & SOF_FLOW_DIVERT)) {
4004 		error = EINVAL;
4005 		goto done;
4006 	}
4007 
4008 	VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL);
4009 
4010 	if (fd_cb->group == NULL) {
4011 		error = EINVAL;
4012 		goto done;
4013 	}
4014 
4015 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4016 	if (error) {
4017 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4018 		goto done;
4019 	}
4020 
4021 	ctl_unit = htonl(fd_cb->group->ctl_unit);
4022 
4023 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4024 	if (error) {
4025 		goto done;
4026 	}
4027 
4028 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4029 	if (error) {
4030 		goto done;
4031 	}
4032 
4033 	if (fd_cb->app_data != NULL) {
4034 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4035 		if (error) {
4036 			goto done;
4037 		}
4038 	}
4039 
4040 	socket_unlock(so, 0);
4041 	lck_rw_lock_shared(&g_flow_divert_group_lck);
4042 
4043 	if (g_flow_divert_groups != NULL && g_active_group_count > 0 &&
4044 	    fd_cb->control_group_unit > 0 && fd_cb->control_group_unit < GROUP_COUNT_MAX) {
4045 		control_group = g_flow_divert_groups[fd_cb->control_group_unit];
4046 	}
4047 
4048 	if (control_group != NULL) {
4049 		lck_rw_lock_shared(&control_group->lck);
4050 		ctl_unit = htonl(control_group->ctl_unit);
4051 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4052 		if (!error) {
4053 			error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4054 		}
4055 		lck_rw_done(&control_group->lck);
4056 	} else {
4057 		error = ENOPROTOOPT;
4058 	}
4059 
4060 	lck_rw_done(&g_flow_divert_group_lck);
4061 	socket_lock(so, 0);
4062 
4063 	if (error) {
4064 		goto done;
4065 	}
4066 
4067 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4068 	if (error) {
4069 		goto done;
4070 	}
4071 
4072 	if (sopt->sopt_val == USER_ADDR_NULL) {
4073 		/* If the caller passed NULL to getsockopt, just set the size of the token and return */
4074 		sopt->sopt_valsize = mbuf_pkthdr_len(token);
4075 		goto done;
4076 	}
4077 
4078 	error = soopt_mcopyout(sopt, token);
4079 	if (error) {
4080 		token = NULL;   /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4081 		goto done;
4082 	}
4083 
4084 done:
4085 	if (token != NULL) {
4086 		mbuf_freem(token);
4087 	}
4088 
4089 	return error;
4090 }
4091 
4092 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4093 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4094 {
4095 	struct flow_divert_group        *new_group      = NULL;
4096 	int                             error           = 0;
4097 
4098 	if (sac->sc_unit >= GROUP_COUNT_MAX) {
4099 		error = EINVAL;
4100 		goto done;
4101 	}
4102 
4103 	*unitinfo = NULL;
4104 
4105 	new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4106 	lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4107 	RB_INIT(&new_group->pcb_tree);
4108 	new_group->ctl_unit = sac->sc_unit;
4109 	MBUFQ_INIT(&new_group->send_queue);
4110 	new_group->signing_id_trie.root = NULL_TRIE_IDX;
4111 
4112 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4113 
4114 	if (g_flow_divert_groups == NULL) {
4115 		g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4116 		    GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4117 	}
4118 
4119 	if (g_flow_divert_groups[sac->sc_unit] != NULL) {
4120 		error = EALREADY;
4121 	} else {
4122 		g_flow_divert_groups[sac->sc_unit] = new_group;
4123 		g_active_group_count++;
4124 	}
4125 
4126 	lck_rw_done(&g_flow_divert_group_lck);
4127 
4128 done:
4129 	if (error == 0) {
4130 		*unitinfo = new_group;
4131 	} else if (new_group != NULL) {
4132 		zfree(flow_divert_group_zone, new_group);
4133 	}
4134 	return error;
4135 }
4136 
4137 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4138 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4139 {
4140 	struct flow_divert_group        *group  = NULL;
4141 	errno_t                                         error   = 0;
4142 
4143 	if (unit >= GROUP_COUNT_MAX) {
4144 		return EINVAL;
4145 	}
4146 
4147 	if (unitinfo == NULL) {
4148 		return 0;
4149 	}
4150 
4151 	FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4152 
4153 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4154 
4155 	if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
4156 		panic("flow divert group %u is disconnecting, but no groups are active (groups = %p, active count = %u", unit,
4157 		    g_flow_divert_groups, g_active_group_count);
4158 	}
4159 
4160 	group = g_flow_divert_groups[unit];
4161 
4162 	if (group != (struct flow_divert_group *)unitinfo) {
4163 		panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4164 	}
4165 
4166 	g_flow_divert_groups[unit] = NULL;
4167 	g_active_group_count--;
4168 
4169 	if (g_active_group_count == 0) {
4170 		kfree_type(struct flow_divert_group *,
4171 		    GROUP_COUNT_MAX, g_flow_divert_groups);
4172 		g_flow_divert_groups = NULL;
4173 	}
4174 
4175 	lck_rw_done(&g_flow_divert_group_lck);
4176 
4177 	if (group != NULL) {
4178 		flow_divert_close_all(group);
4179 
4180 		lck_rw_lock_exclusive(&group->lck);
4181 
4182 		if (group->token_key != NULL) {
4183 			memset(group->token_key, 0, group->token_key_size);
4184 			kfree_data(group->token_key, group->token_key_size);
4185 			group->token_key = NULL;
4186 			group->token_key_size = 0;
4187 		}
4188 
4189 		/* Re-set the current trie */
4190 		if (group->signing_id_trie.memory != NULL) {
4191 			kfree_data_addr(group->signing_id_trie.memory);
4192 		}
4193 		memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4194 		group->signing_id_trie.root = NULL_TRIE_IDX;
4195 
4196 		lck_rw_done(&group->lck);
4197 
4198 		zfree(flow_divert_group_zone, group);
4199 	} else {
4200 		error = EINVAL;
4201 	}
4202 
4203 	return error;
4204 }
4205 
4206 static errno_t
flow_divert_kctl_send(kern_ctl_ref kctlref __unused,uint32_t unit __unused,void * unitinfo,mbuf_t m,int flags __unused)4207 flow_divert_kctl_send(kern_ctl_ref kctlref __unused, uint32_t unit __unused, void *unitinfo, mbuf_t m, int flags __unused)
4208 {
4209 	return flow_divert_input(m, (struct flow_divert_group *)unitinfo);
4210 }
4211 
4212 static void
flow_divert_kctl_rcvd(kern_ctl_ref kctlref __unused,uint32_t unit __unused,void * unitinfo,int flags __unused)4213 flow_divert_kctl_rcvd(kern_ctl_ref kctlref __unused, uint32_t unit __unused, void *unitinfo, int flags __unused)
4214 {
4215 	struct flow_divert_group        *group  = (struct flow_divert_group *)unitinfo;
4216 
4217 	if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4218 		struct flow_divert_pcb                  *fd_cb;
4219 		SLIST_HEAD(, flow_divert_pcb)   tmp_list;
4220 
4221 		lck_rw_lock_shared(&g_flow_divert_group_lck);
4222 		lck_rw_lock_exclusive(&group->lck);
4223 
4224 		while (!MBUFQ_EMPTY(&group->send_queue)) {
4225 			mbuf_t next_packet;
4226 			FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4227 			next_packet = MBUFQ_FIRST(&group->send_queue);
4228 			int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4229 			if (error) {
4230 				FDLOG(LOG_DEBUG, &nil_pcb, "ctl_enqueuembuf returned an error: %d", error);
4231 				OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4232 				lck_rw_done(&group->lck);
4233 				lck_rw_done(&g_flow_divert_group_lck);
4234 				return;
4235 			}
4236 			MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4237 		}
4238 
4239 		SLIST_INIT(&tmp_list);
4240 
4241 		RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4242 			FDRETAIN(fd_cb);
4243 			SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4244 		}
4245 
4246 		lck_rw_done(&group->lck);
4247 
4248 		SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4249 			FDLOCK(fd_cb);
4250 			if (fd_cb->so != NULL) {
4251 				socket_lock(fd_cb->so, 0);
4252 				if (fd_cb->group != NULL) {
4253 					flow_divert_send_buffered_data(fd_cb, FALSE);
4254 				}
4255 				socket_unlock(fd_cb->so, 0);
4256 			}
4257 			FDUNLOCK(fd_cb);
4258 			FDRELEASE(fd_cb);
4259 		}
4260 
4261 		lck_rw_done(&g_flow_divert_group_lck);
4262 	}
4263 }
4264 
4265 static int
flow_divert_kctl_init(void)4266 flow_divert_kctl_init(void)
4267 {
4268 	struct kern_ctl_reg     ctl_reg;
4269 	int                     result;
4270 
4271 	memset(&ctl_reg, 0, sizeof(ctl_reg));
4272 
4273 	strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4274 	ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4275 	ctl_reg.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
4276 	ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4277 
4278 	ctl_reg.ctl_connect = flow_divert_kctl_connect;
4279 	ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4280 	ctl_reg.ctl_send = flow_divert_kctl_send;
4281 	ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4282 
4283 	result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4284 
4285 	if (result) {
4286 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4287 		return result;
4288 	}
4289 
4290 	return 0;
4291 }
4292 
4293 void
flow_divert_init(void)4294 flow_divert_init(void)
4295 {
4296 	memset(&nil_pcb, 0, sizeof(nil_pcb));
4297 	nil_pcb.log_level = LOG_NOTICE;
4298 
4299 	g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4300 
4301 	VERIFY(g_tcp_protosw != NULL);
4302 
4303 	memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4304 	memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4305 
4306 	g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4307 	g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4308 	g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4309 	g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4310 	g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4311 	g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4312 	g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4313 	g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4314 
4315 	g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4316 	g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4317 
4318 	/*
4319 	 * Socket filters shouldn't attach/detach to/from this protosw
4320 	 * since pr_protosw is to be used instead, which points to the
4321 	 * real protocol; if they do, it is a bug and we should panic.
4322 	 */
4323 	g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4324 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4325 	g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4326 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4327 
4328 	/* UDP */
4329 	g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4330 	VERIFY(g_udp_protosw != NULL);
4331 
4332 	memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4333 	memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4334 
4335 	g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4336 	g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4337 	g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4338 	g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4339 	g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4340 	g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4341 	g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4342 	g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4343 	g_flow_divert_in_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp;
4344 	g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4345 
4346 	g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4347 	g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4348 
4349 	/*
4350 	 * Socket filters shouldn't attach/detach to/from this protosw
4351 	 * since pr_protosw is to be used instead, which points to the
4352 	 * real protocol; if they do, it is a bug and we should panic.
4353 	 */
4354 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4355 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4356 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4357 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4358 
4359 	g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4360 
4361 	VERIFY(g_tcp6_protosw != NULL);
4362 
4363 	memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4364 	memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4365 
4366 	g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4367 	g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4368 	g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4369 	g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4370 	g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4371 	g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4372 	g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4373 	g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4374 
4375 	g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4376 	g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4377 	/*
4378 	 * Socket filters shouldn't attach/detach to/from this protosw
4379 	 * since pr_protosw is to be used instead, which points to the
4380 	 * real protocol; if they do, it is a bug and we should panic.
4381 	 */
4382 	g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4383 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4384 	g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4385 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4386 
4387 	/* UDP6 */
4388 	g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4389 
4390 	VERIFY(g_udp6_protosw != NULL);
4391 
4392 	memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4393 	memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4394 
4395 	g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4396 	g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4397 	g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4398 	g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4399 	g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4400 	g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4401 	g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4402 	g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4403 	g_flow_divert_in6_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp;
4404 	g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4405 
4406 	g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4407 	g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4408 	/*
4409 	 * Socket filters shouldn't attach/detach to/from this protosw
4410 	 * since pr_protosw is to be used instead, which points to the
4411 	 * real protocol; if they do, it is a bug and we should panic.
4412 	 */
4413 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4414 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4415 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4416 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4417 
4418 	g_init_result = flow_divert_kctl_init();
4419 	if (g_init_result) {
4420 		goto done;
4421 	}
4422 
4423 done:
4424 	if (g_init_result != 0) {
4425 		if (g_flow_divert_kctl_ref != NULL) {
4426 			ctl_deregister(g_flow_divert_kctl_ref);
4427 			g_flow_divert_kctl_ref = NULL;
4428 		}
4429 	}
4430 }
4431