xref: /xnu-8792.41.9/bsd/netinet/flow_divert.c (revision 5c2921b07a2480ab43ec66f5b9e41cb872bc554f)
1 /*
2  * Copyright (c) 2012-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <libkern/tree.h>
46 #include <kern/locks.h>
47 #include <kern/debug.h>
48 #include <kern/task.h>
49 #include <mach/task_info.h>
50 #include <net/if_var.h>
51 #include <net/route.h>
52 #include <net/flowhash.h>
53 #include <net/ntstat.h>
54 #include <net/content_filter.h>
55 #include <net/necp.h>
56 #include <netinet/in.h>
57 #include <netinet/in_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcp_fsm.h>
61 #include <netinet/flow_divert.h>
62 #include <netinet/flow_divert_proto.h>
63 #include <netinet6/in6_pcb.h>
64 #include <netinet6/ip6protosw.h>
65 #include <dev/random/randomdev.h>
66 #include <libkern/crypto/sha1.h>
67 #include <libkern/crypto/crypto_internal.h>
68 #include <os/log.h>
69 #include <corecrypto/cc.h>
70 #if CONTENT_FILTER
71 #include <net/content_filter.h>
72 #endif /* CONTENT_FILTER */
73 
74 #define FLOW_DIVERT_CONNECT_STARTED             0x00000001
75 #define FLOW_DIVERT_READ_CLOSED                 0x00000002
76 #define FLOW_DIVERT_WRITE_CLOSED                0x00000004
77 #define FLOW_DIVERT_TUNNEL_RD_CLOSED    0x00000008
78 #define FLOW_DIVERT_TUNNEL_WR_CLOSED    0x00000010
79 #define FLOW_DIVERT_HAS_HMAC            0x00000040
80 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED  0x00000080
81 #define FLOW_DIVERT_IMPLICIT_CONNECT    0x00000100
82 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR  0x00000200
83 #define FLOW_DIVERT_HAS_TOKEN           0x00000400
84 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR 0x00000800
85 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT   0x00001000
86 
87 #define FDLOG(level, pcb, format, ...) \
88 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
89 
90 #define FDLOG0(level, pcb, msg) \
91 	os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
92 
93 #define FDRETAIN(pcb)                   if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
94 #define FDRELEASE(pcb)                                                                                                          \
95 	do {                                                                                                                                    \
96 	        if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) {       \
97 	                flow_divert_pcb_destroy(pcb);                                                                   \
98 	        }                                                                                                                                       \
99 	} while (0)
100 
101 #define FDGRP_RETAIN(grp)  if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
102 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
103 
104 #define FDLOCK(pcb)                                             lck_mtx_lock(&(pcb)->mtx)
105 #define FDUNLOCK(pcb)                                   lck_mtx_unlock(&(pcb)->mtx)
106 
107 #define FD_CTL_SENDBUFF_SIZE                    (128 * 1024)
108 
109 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED   0
110 
111 #define GROUP_COUNT_MAX                                 31
112 #define FLOW_DIVERT_MAX_NAME_SIZE               4096
113 #define FLOW_DIVERT_MAX_KEY_SIZE                1024
114 #define FLOW_DIVERT_MAX_TRIE_MEMORY             (1024 * 1024)
115 
116 struct flow_divert_trie_node {
117 	uint16_t start;
118 	uint16_t length;
119 	uint16_t child_map;
120 };
121 
122 #define CHILD_MAP_SIZE                  256
123 #define NULL_TRIE_IDX                   0xffff
124 #define TRIE_NODE(t, i)                 ((t)->nodes[(i)])
125 #define TRIE_CHILD(t, i, b)             (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
126 #define TRIE_BYTE(t, i)                 ((t)->bytes[(i)])
127 
128 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
129 
130 static struct flow_divert_pcb           nil_pcb;
131 
132 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
133 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
134 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
135     &flow_divert_mtx_attr);
136 
137 static struct flow_divert_group         **g_flow_divert_groups  = NULL;
138 static uint32_t                         g_active_group_count    = 0;
139 
140 static  errno_t                         g_init_result           = 0;
141 
142 static  kern_ctl_ref                    g_flow_divert_kctl_ref  = NULL;
143 
144 static struct protosw                   g_flow_divert_in_protosw;
145 static struct pr_usrreqs                g_flow_divert_in_usrreqs;
146 static struct protosw                   g_flow_divert_in_udp_protosw;
147 static struct pr_usrreqs                g_flow_divert_in_udp_usrreqs;
148 static struct ip6protosw                g_flow_divert_in6_protosw;
149 static struct pr_usrreqs                g_flow_divert_in6_usrreqs;
150 static struct ip6protosw                g_flow_divert_in6_udp_protosw;
151 static struct pr_usrreqs                g_flow_divert_in6_udp_usrreqs;
152 
153 static struct protosw                   *g_tcp_protosw          = NULL;
154 static struct ip6protosw                *g_tcp6_protosw         = NULL;
155 static struct protosw                   *g_udp_protosw          = NULL;
156 static struct ip6protosw                *g_udp6_protosw         = NULL;
157 
158 ZONE_DEFINE(flow_divert_group_zone, "flow_divert_group",
159     sizeof(struct flow_divert_group), ZC_ZFREE_CLEARMEM);
160 ZONE_DEFINE(flow_divert_pcb_zone, "flow_divert_pcb",
161     sizeof(struct flow_divert_pcb), ZC_ZFREE_CLEARMEM);
162 
163 static errno_t
164 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
165 
166 static boolean_t
167 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
168 
169 static int
170 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr);
171 
172 struct sockaddr *
173 flow_divert_get_buffered_target_address(mbuf_t buffer);
174 
175 static void
176 flow_divert_disconnect_socket(struct socket *so, bool is_connected);
177 
178 static void flow_divert_group_destroy(struct flow_divert_group *group);
179 
180 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)181 flow_divert_syslog_type_to_oslog_type(int syslog_type)
182 {
183 	switch (syslog_type) {
184 	case LOG_ERR: return OS_LOG_TYPE_ERROR;
185 	case LOG_INFO: return OS_LOG_TYPE_INFO;
186 	case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
187 	default: return OS_LOG_TYPE_DEFAULT;
188 	}
189 }
190 
191 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)192 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
193 {
194 	return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
195 }
196 
197 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
198 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
199 
200 static const char *
flow_divert_packet_type2str(uint8_t packet_type)201 flow_divert_packet_type2str(uint8_t packet_type)
202 {
203 	switch (packet_type) {
204 	case FLOW_DIVERT_PKT_CONNECT:
205 		return "connect";
206 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
207 		return "connect result";
208 	case FLOW_DIVERT_PKT_DATA:
209 		return "data";
210 	case FLOW_DIVERT_PKT_CLOSE:
211 		return "close";
212 	case FLOW_DIVERT_PKT_READ_NOTIFY:
213 		return "read notification";
214 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
215 		return "properties update";
216 	case FLOW_DIVERT_PKT_APP_MAP_CREATE:
217 		return "app map create";
218 	default:
219 		return "unknown";
220 	}
221 }
222 
223 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)224 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
225 {
226 	struct flow_divert_pcb  key_item;
227 	struct flow_divert_pcb  *fd_cb          = NULL;
228 
229 	key_item.hash = hash;
230 
231 	lck_rw_lock_shared(&group->lck);
232 	fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
233 	FDRETAIN(fd_cb);
234 	lck_rw_done(&group->lck);
235 
236 	return fd_cb;
237 }
238 
239 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)240 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
241 {
242 	struct flow_divert_group *group = NULL;
243 	lck_rw_lock_shared(&g_flow_divert_group_lck);
244 	if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
245 		if (fd_cb != NULL) {
246 			FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
247 		}
248 	} else if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) {
249 		FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
250 	} else {
251 		group = g_flow_divert_groups[ctl_unit];
252 		if (group == NULL) {
253 			if (fd_cb != NULL) {
254 				FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
255 			}
256 		} else {
257 			FDGRP_RETAIN(group);
258 		}
259 	}
260 	lck_rw_done(&g_flow_divert_group_lck);
261 	return group;
262 }
263 
264 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)265 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
266 {
267 	int error = 0;
268 	lck_rw_lock_exclusive(&group->lck);
269 	if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
270 		if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
271 			fd_cb->group = group;
272 			fd_cb->control_group_unit = group->ctl_unit;
273 			FDRETAIN(fd_cb); /* The group now has a reference */
274 		} else {
275 			FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
276 			error = EEXIST;
277 		}
278 	} else {
279 		FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
280 		error = ENOENT;
281 	}
282 	lck_rw_done(&group->lck);
283 	return error;
284 }
285 
286 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)287 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
288 {
289 	errno_t error = 0;
290 	struct flow_divert_group *group = NULL;
291 	static uint32_t g_nextkey = 1;
292 	static uint32_t g_hash_seed = 0;
293 	int try_count = 0;
294 
295 	group = flow_divert_group_lookup(ctl_unit, fd_cb);
296 	if (group == NULL) {
297 		return ENOENT;
298 	}
299 
300 	do {
301 		uint32_t key[2];
302 		uint32_t idx;
303 
304 		key[0] = g_nextkey++;
305 		key[1] = RandomULong();
306 
307 		if (g_hash_seed == 0) {
308 			g_hash_seed = RandomULong();
309 		}
310 
311 		error = 0;
312 		fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
313 
314 		for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
315 			if (idx == ctl_unit) {
316 				continue;
317 			}
318 			struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
319 			if (curr_group != NULL) {
320 				lck_rw_lock_shared(&curr_group->lck);
321 				if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
322 					error = EEXIST;
323 				}
324 				lck_rw_done(&curr_group->lck);
325 				FDGRP_RELEASE(curr_group);
326 			}
327 		}
328 
329 		if (error == 0) {
330 			error = flow_divert_pcb_insert(fd_cb, group);
331 		}
332 	} while (error == EEXIST && try_count++ < 3);
333 
334 	if (error == EEXIST) {
335 		FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
336 		fd_cb->hash = 0;
337 	}
338 
339 	FDGRP_RELEASE(group);
340 	return error;
341 }
342 
343 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)344 flow_divert_pcb_create(socket_t so)
345 {
346 	struct flow_divert_pcb  *new_pcb = NULL;
347 
348 	new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
349 	lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
350 	new_pcb->so = so;
351 	new_pcb->log_level = nil_pcb.log_level;
352 
353 	FDRETAIN(new_pcb);      /* Represents the socket's reference */
354 
355 	return new_pcb;
356 }
357 
358 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)359 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
360 {
361 	FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %u, tunnel tx %u, tunnel rx %u",
362 	    fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
363 
364 	if (fd_cb->connect_token != NULL) {
365 		mbuf_freem(fd_cb->connect_token);
366 	}
367 	if (fd_cb->connect_packet != NULL) {
368 		mbuf_freem(fd_cb->connect_packet);
369 	}
370 	if (fd_cb->app_data != NULL) {
371 		kfree_data(fd_cb->app_data, fd_cb->app_data_length);
372 	}
373 	if (fd_cb->original_remote_endpoint != NULL) {
374 		free_sockaddr(fd_cb->original_remote_endpoint);
375 	}
376 	zfree(flow_divert_pcb_zone, fd_cb);
377 }
378 
379 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)380 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
381 {
382 	if (fd_cb->group != NULL) {
383 		struct flow_divert_group *group = fd_cb->group;
384 		lck_rw_lock_exclusive(&group->lck);
385 		FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
386 		RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
387 		fd_cb->group = NULL;
388 		FDRELEASE(fd_cb);                               /* Release the group's reference */
389 		lck_rw_done(&group->lck);
390 	}
391 }
392 
393 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_t * packet)394 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_t *packet)
395 {
396 	struct flow_divert_packet_header        hdr;
397 	int                                     error           = 0;
398 
399 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
400 	if (error) {
401 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
402 		return error;
403 	}
404 
405 	hdr.packet_type = packet_type;
406 	hdr.conn_id = htonl(fd_cb->hash);
407 
408 	/* Lay down the header */
409 	error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
410 	if (error) {
411 		FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
412 		mbuf_freem(*packet);
413 		*packet = NULL;
414 		return error;
415 	}
416 
417 	return 0;
418 }
419 
420 static int
flow_divert_packet_append_tlv(mbuf_t packet,uint8_t type,uint32_t length,const void * value)421 flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, uint32_t length, const void *value)
422 {
423 	uint32_t        net_length      = htonl(length);
424 	int                     error           = 0;
425 
426 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
427 	if (error) {
428 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
429 		return error;
430 	}
431 
432 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
433 	if (error) {
434 		FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
435 		return error;
436 	}
437 
438 	error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
439 	if (error) {
440 		FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
441 		return error;
442 	}
443 
444 	return error;
445 }
446 
447 static int
flow_divert_packet_find_tlv(mbuf_t packet,int offset,uint8_t type,int * err,int next)448 flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, int next)
449 {
450 	size_t          cursor                  = offset;
451 	int                     error                   = 0;
452 	uint32_t        curr_length;
453 	uint8_t         curr_type;
454 
455 	*err = 0;
456 
457 	do {
458 		if (!next) {
459 			error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
460 			if (error) {
461 				*err = ENOENT;
462 				return -1;
463 			}
464 		} else {
465 			next = 0;
466 			curr_type = FLOW_DIVERT_TLV_NIL;
467 		}
468 
469 		if (curr_type != type) {
470 			cursor += sizeof(curr_type);
471 			error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
472 			if (error) {
473 				*err = error;
474 				return -1;
475 			}
476 
477 			cursor += (sizeof(curr_length) + ntohl(curr_length));
478 		}
479 	} while (curr_type != type);
480 
481 	return (int)cursor;
482 }
483 
484 static int
flow_divert_packet_get_tlv(mbuf_t packet,int offset,uint8_t type,size_t buff_len,void * buff,uint32_t * val_size)485 flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, uint32_t *val_size)
486 {
487 	int                     error           = 0;
488 	uint32_t        length;
489 	int                     tlv_offset;
490 
491 	tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
492 	if (tlv_offset < 0) {
493 		return error;
494 	}
495 
496 	error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
497 	if (error) {
498 		return error;
499 	}
500 
501 	length = ntohl(length);
502 
503 	uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
504 
505 	if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
506 		FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
507 		return EINVAL;
508 	}
509 
510 	if (val_size != NULL) {
511 		*val_size = length;
512 	}
513 
514 	if (buff != NULL && buff_len > 0) {
515 		memset(buff, 0, buff_len);
516 		size_t to_copy = (length < buff_len) ? length : buff_len;
517 		error = mbuf_copydata(packet, data_offset, to_copy, buff);
518 		if (error) {
519 			return error;
520 		}
521 	}
522 
523 	return 0;
524 }
525 
526 static int
flow_divert_packet_compute_hmac(mbuf_t packet,struct flow_divert_group * group,uint8_t * hmac)527 flow_divert_packet_compute_hmac(mbuf_t packet, struct flow_divert_group *group, uint8_t *hmac)
528 {
529 	mbuf_t  curr_mbuf       = packet;
530 
531 	if (g_crypto_funcs == NULL || group->token_key == NULL) {
532 		return ENOPROTOOPT;
533 	}
534 
535 	cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
536 	g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
537 
538 	while (curr_mbuf != NULL) {
539 		g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf));
540 		curr_mbuf = mbuf_next(curr_mbuf);
541 	}
542 
543 	g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
544 
545 	return 0;
546 }
547 
548 static int
flow_divert_packet_verify_hmac(mbuf_t packet,uint32_t ctl_unit)549 flow_divert_packet_verify_hmac(mbuf_t packet, uint32_t ctl_unit)
550 {
551 	int error = 0;
552 	struct flow_divert_group *group = NULL;
553 	int hmac_offset;
554 	uint8_t packet_hmac[SHA_DIGEST_LENGTH];
555 	uint8_t computed_hmac[SHA_DIGEST_LENGTH];
556 	mbuf_t tail;
557 
558 	group = flow_divert_group_lookup(ctl_unit, NULL);
559 	if (group == NULL) {
560 		FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
561 		return ENOPROTOOPT;
562 	}
563 
564 	lck_rw_lock_shared(&group->lck);
565 
566 	if (group->token_key == NULL) {
567 		error = ENOPROTOOPT;
568 		goto done;
569 	}
570 
571 	hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
572 	if (hmac_offset < 0) {
573 		goto done;
574 	}
575 
576 	error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
577 	if (error) {
578 		goto done;
579 	}
580 
581 	/* Chop off the HMAC TLV */
582 	error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
583 	if (error) {
584 		goto done;
585 	}
586 
587 	mbuf_free(tail);
588 
589 	error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
590 	if (error) {
591 		goto done;
592 	}
593 
594 	if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
595 		FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
596 		error = EINVAL;
597 		goto done;
598 	}
599 
600 done:
601 	if (group != NULL) {
602 		lck_rw_done(&group->lck);
603 		FDGRP_RELEASE(group);
604 	}
605 	return error;
606 }
607 
608 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)609 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
610 {
611 	struct inpcb *inp = NULL;
612 	struct ifnet *ifp = NULL;
613 	Boolean cell = FALSE;
614 	Boolean wifi = FALSE;
615 	Boolean wired = FALSE;
616 
617 	inp = sotoinpcb(fd_cb->so);
618 	if (inp == NULL) {
619 		return;
620 	}
621 
622 	if (inp->inp_vflag & INP_IPV4) {
623 		ifp = inp->inp_last_outifp;
624 	} else if (inp->inp_vflag & INP_IPV6) {
625 		ifp = inp->in6p_last_outifp;
626 	}
627 	if (ifp != NULL) {
628 		cell = IFNET_IS_CELLULAR(ifp);
629 		wifi = (!cell && IFNET_IS_WIFI(ifp));
630 		wired = (!wifi && IFNET_IS_WIRED(ifp));
631 	}
632 
633 	if (send) {
634 		INP_ADD_STAT(inp, cell, wifi, wired, txpackets, 1);
635 		INP_ADD_STAT(inp, cell, wifi, wired, txbytes, data_len);
636 	} else {
637 		INP_ADD_STAT(inp, cell, wifi, wired, rxpackets, 1);
638 		INP_ADD_STAT(inp, cell, wifi, wired, rxbytes, data_len);
639 	}
640 	inp_set_activity_bitmap(inp);
641 }
642 
643 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)644 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
645 {
646 	struct inpcb *inp = sotoinpcb(fd_cb->so);
647 	if (INP_NO_CELLULAR(inp)) {
648 		struct ifnet *ifp = NULL;
649 		if (inp->inp_vflag & INP_IPV4) {
650 			ifp = inp->inp_last_outifp;
651 		} else if (inp->inp_vflag & INP_IPV6) {
652 			ifp = inp->in6p_last_outifp;
653 		}
654 		if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
655 			FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
656 			return EHOSTUNREACH;
657 		}
658 	}
659 	return 0;
660 }
661 
662 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)663 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
664 {
665 	struct inpcb *inp = sotoinpcb(fd_cb->so);
666 	if (INP_NO_EXPENSIVE(inp)) {
667 		struct ifnet *ifp = NULL;
668 		if (inp->inp_vflag & INP_IPV4) {
669 			ifp = inp->inp_last_outifp;
670 		} else if (inp->inp_vflag & INP_IPV6) {
671 			ifp = inp->in6p_last_outifp;
672 		}
673 		if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
674 			FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
675 			return EHOSTUNREACH;
676 		}
677 	}
678 	return 0;
679 }
680 
681 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)682 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
683 {
684 	struct inpcb *inp = sotoinpcb(fd_cb->so);
685 	if (INP_NO_CONSTRAINED(inp)) {
686 		struct ifnet *ifp = NULL;
687 		if (inp->inp_vflag & INP_IPV4) {
688 			ifp = inp->inp_last_outifp;
689 		} else if (inp->inp_vflag & INP_IPV6) {
690 			ifp = inp->in6p_last_outifp;
691 		}
692 		if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
693 			FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
694 			return EHOSTUNREACH;
695 		}
696 	}
697 	return 0;
698 }
699 
700 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,Boolean tunnel)701 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, Boolean tunnel)
702 {
703 	if (how != SHUT_RD) {
704 		fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
705 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
706 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
707 			/* If the tunnel is not accepting writes any more, then flush the send buffer */
708 			sbflush(&fd_cb->so->so_snd);
709 		}
710 	}
711 	if (how != SHUT_WR) {
712 		fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
713 		if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
714 			fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
715 		}
716 	}
717 }
718 
719 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)720 trie_node_alloc(struct flow_divert_trie *trie)
721 {
722 	if (trie->nodes_free_next < trie->nodes_count) {
723 		uint16_t node_idx = trie->nodes_free_next++;
724 		TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
725 		return node_idx;
726 	} else {
727 		return NULL_TRIE_IDX;
728 	}
729 }
730 
731 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)732 trie_child_map_alloc(struct flow_divert_trie *trie)
733 {
734 	if (trie->child_maps_free_next < trie->child_maps_count) {
735 		return trie->child_maps_free_next++;
736 	} else {
737 		return NULL_TRIE_IDX;
738 	}
739 }
740 
741 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)742 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
743 {
744 	uint16_t start = trie->bytes_free_next;
745 	if (start + bytes_size <= trie->bytes_count) {
746 		if (start != bytes_idx) {
747 			memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
748 		}
749 		trie->bytes_free_next += bytes_size;
750 		return start;
751 	} else {
752 		return NULL_TRIE_IDX;
753 	}
754 }
755 
756 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)757 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
758 {
759 	uint16_t current = trie->root;
760 	uint16_t child = trie->root;
761 	uint16_t string_end = string_start + (uint16_t)string_len;
762 	uint16_t string_idx = string_start;
763 	uint16_t string_remainder = (uint16_t)string_len;
764 
765 	while (child != NULL_TRIE_IDX) {
766 		uint16_t parent = current;
767 		uint16_t node_idx;
768 		uint16_t current_end;
769 
770 		current = child;
771 		child = NULL_TRIE_IDX;
772 
773 		current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
774 
775 		for (node_idx = TRIE_NODE(trie, current).start;
776 		    node_idx < current_end &&
777 		    string_idx < string_end &&
778 		    TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
779 		    node_idx++, string_idx++) {
780 			;
781 		}
782 
783 		string_remainder = string_end - string_idx;
784 
785 		if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
786 			/*
787 			 * We did not reach the end of the current node's string.
788 			 * We need to split the current node into two:
789 			 *   1. A new node that contains the prefix of the node that matches
790 			 *      the prefix of the string being inserted.
791 			 *   2. The current node modified to point to the remainder
792 			 *      of the current node's string.
793 			 */
794 			uint16_t prefix = trie_node_alloc(trie);
795 			if (prefix == NULL_TRIE_IDX) {
796 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
797 				return NULL_TRIE_IDX;
798 			}
799 
800 			/*
801 			 * Prefix points to the portion of the current nodes's string that has matched
802 			 * the input string thus far.
803 			 */
804 			TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
805 			TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
806 
807 			/*
808 			 * Prefix has the current node as the child corresponding to the first byte
809 			 * after the split.
810 			 */
811 			TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
812 			if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
813 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
814 				return NULL_TRIE_IDX;
815 			}
816 			TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
817 
818 			/* Parent has the prefix as the child correspoding to the first byte in the prefix */
819 			TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
820 
821 			/* Current node is adjusted to point to the remainder */
822 			TRIE_NODE(trie, current).start = node_idx;
823 			TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
824 
825 			/* We want to insert the new leaf (if any) as a child of the prefix */
826 			current = prefix;
827 		}
828 
829 		if (string_remainder > 0) {
830 			/*
831 			 * We still have bytes in the string that have not been matched yet.
832 			 * If the current node has children, iterate to the child corresponding
833 			 * to the next byte in the string.
834 			 */
835 			if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
836 				child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
837 			}
838 		}
839 	} /* while (child != NULL_TRIE_IDX) */
840 
841 	if (string_remainder > 0) {
842 		/* Add a new leaf containing the remainder of the string */
843 		uint16_t leaf = trie_node_alloc(trie);
844 		if (leaf == NULL_TRIE_IDX) {
845 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
846 			return NULL_TRIE_IDX;
847 		}
848 
849 		TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
850 		if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
851 			FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
852 			return NULL_TRIE_IDX;
853 		}
854 		TRIE_NODE(trie, leaf).length = string_remainder;
855 
856 		/* Set the new leaf as the child of the current node */
857 		if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
858 			TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
859 			if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
860 				FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
861 				return NULL_TRIE_IDX;
862 			}
863 		}
864 		TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
865 		current = leaf;
866 	} /* else duplicate or this string is a prefix of one of the existing strings */
867 
868 	return current;
869 }
870 
871 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
872 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes)873 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes)
874 {
875 	uint16_t current = trie->root;
876 	uint16_t string_idx = 0;
877 
878 	while (current != NULL_TRIE_IDX) {
879 		uint16_t next = NULL_TRIE_IDX;
880 		uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
881 		uint16_t node_idx;
882 
883 		for (node_idx = TRIE_NODE(trie, current).start;
884 		    node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
885 		    node_idx++, string_idx++) {
886 			;
887 		}
888 
889 		if (node_idx == node_end) {
890 			if (string_bytes[string_idx] == '\0') {
891 				return current; /* Got an exact match */
892 			} else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
893 			    0 == strncmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
894 				return current; /* Got an apple webclip id prefix match */
895 			} else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
896 				next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
897 			}
898 		}
899 		current = next;
900 	}
901 
902 	return NULL_TRIE_IDX;
903 }
904 
905 struct uuid_search_info {
906 	uuid_t target_uuid;
907 	char *found_signing_id;
908 	boolean_t found_multiple_signing_ids;
909 	proc_t found_proc;
910 };
911 
912 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)913 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
914 {
915 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
916 	int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
917 
918 	if (info->found_signing_id != NULL) {
919 		if (!info->found_multiple_signing_ids) {
920 			/* All processes that were found had the same signing identifier, so just claim this first one and be done. */
921 			info->found_proc = p;
922 			result = PROC_CLAIMED_DONE;
923 		} else {
924 			uuid_string_t uuid_str;
925 			uuid_unparse(info->target_uuid, uuid_str);
926 			FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
927 		}
928 		kfree_data(info->found_signing_id, strlen(info->found_signing_id) + 1);
929 		info->found_signing_id = NULL;
930 	}
931 
932 	if (result == PROC_RETURNED_DONE) {
933 		uuid_string_t uuid_str;
934 		uuid_unparse(info->target_uuid, uuid_str);
935 		FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
936 	}
937 
938 	return result;
939 }
940 
941 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)942 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
943 {
944 	struct uuid_search_info *info = (struct uuid_search_info *)arg;
945 	int include = 0;
946 
947 	if (info->found_multiple_signing_ids) {
948 		return include;
949 	}
950 
951 	include = (uuid_compare(proc_executableuuid_addr(p), info->target_uuid) == 0);
952 	if (include) {
953 		const char *signing_id = cs_identity_get(p);
954 		if (signing_id != NULL) {
955 			FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
956 			size_t signing_id_size = strlen(signing_id) + 1;
957 			if (info->found_signing_id == NULL) {
958 				info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
959 				memcpy(info->found_signing_id, signing_id, signing_id_size);
960 			} else if (memcmp(signing_id, info->found_signing_id, signing_id_size)) {
961 				info->found_multiple_signing_ids = TRUE;
962 			}
963 		} else {
964 			info->found_multiple_signing_ids = TRUE;
965 		}
966 		include = !info->found_multiple_signing_ids;
967 	}
968 
969 	return include;
970 }
971 
972 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)973 flow_divert_find_proc_by_uuid(uuid_t uuid)
974 {
975 	struct uuid_search_info info;
976 
977 	if (LOG_INFO <= nil_pcb.log_level) {
978 		uuid_string_t uuid_str;
979 		uuid_unparse(uuid, uuid_str);
980 		FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
981 	}
982 
983 	memset(&info, 0, sizeof(info));
984 	info.found_proc = PROC_NULL;
985 	uuid_copy(info.target_uuid, uuid);
986 
987 	proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
988 
989 	return info.found_proc;
990 }
991 
992 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id,mbuf_t connect_packet,bool is_effective)993 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id, mbuf_t connect_packet, bool is_effective)
994 {
995 	int error = 0;
996 	uint8_t *cdhash = NULL;
997 	audit_token_t audit_token = {};
998 	const char *proc_cs_id = signing_id;
999 
1000 	proc_lock(proc);
1001 
1002 	if (proc_cs_id == NULL) {
1003 		if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1004 			proc_cs_id = cs_identity_get(proc);
1005 		} else {
1006 			FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1007 		}
1008 	}
1009 
1010 	if (is_effective) {
1011 		lck_rw_lock_shared(&fd_cb->group->lck);
1012 		if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1013 			if (proc_cs_id != NULL) {
1014 				uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)proc_cs_id);
1015 				if (result == NULL_TRIE_IDX) {
1016 					FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1017 					error = EPERM;
1018 				} else {
1019 					FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1020 				}
1021 			} else {
1022 				error = EPERM;
1023 			}
1024 		}
1025 		lck_rw_done(&fd_cb->group->lck);
1026 	}
1027 
1028 	if (error != 0) {
1029 		goto done;
1030 	}
1031 
1032 	/*
1033 	 * If signing_id is not NULL then it came from the flow divert token and will be added
1034 	 * as part of the token, so there is no need to add it here.
1035 	 */
1036 	if (signing_id == NULL && proc_cs_id != NULL) {
1037 		error = flow_divert_packet_append_tlv(connect_packet,
1038 		    (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1039 		    (uint32_t)strlen(proc_cs_id),
1040 		    proc_cs_id);
1041 		if (error != 0) {
1042 			FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1043 			goto done;
1044 		}
1045 	}
1046 
1047 	cdhash = cs_get_cdhash(proc);
1048 	if (cdhash != NULL) {
1049 		error = flow_divert_packet_append_tlv(connect_packet,
1050 		    (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1051 		    SHA1_RESULTLEN,
1052 		    cdhash);
1053 		if (error) {
1054 			FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1055 			goto done;
1056 		}
1057 	} else {
1058 		FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1059 	}
1060 
1061 	task_t task = proc_task(proc);
1062 	if (task != TASK_NULL) {
1063 		mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1064 		kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1065 		if (rc == KERN_SUCCESS) {
1066 			int append_error = flow_divert_packet_append_tlv(connect_packet,
1067 			    (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1068 			    sizeof(audit_token_t),
1069 			    &audit_token);
1070 			if (append_error) {
1071 				FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1072 			}
1073 		}
1074 	}
1075 
1076 done:
1077 	proc_unlock(proc);
1078 
1079 	return error;
1080 }
1081 
1082 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id,mbuf_t connect_packet)1083 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id, mbuf_t connect_packet)
1084 {
1085 	int error = 0;
1086 	proc_t effective_proc = PROC_NULL;
1087 	proc_t responsible_proc = PROC_NULL;
1088 	proc_t real_proc = proc_find(so->last_pid);
1089 	bool release_real_proc = true;
1090 
1091 	proc_t src_proc = PROC_NULL;
1092 	proc_t real_src_proc = PROC_NULL;
1093 
1094 	if (real_proc == PROC_NULL) {
1095 		FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1096 		release_real_proc = false;
1097 		real_proc = proc;
1098 		if (real_proc == PROC_NULL) {
1099 			real_proc = current_proc();
1100 		}
1101 	}
1102 
1103 	if (so->so_flags & SOF_DELEGATED) {
1104 		if (proc_getpid(real_proc) != so->e_pid) {
1105 			effective_proc = proc_find(so->e_pid);
1106 		} else if (uuid_compare(proc_executableuuid_addr(real_proc), so->e_uuid)) {
1107 			effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1108 		}
1109 	}
1110 
1111 #if defined(XNU_TARGET_OS_OSX)
1112 	lck_rw_lock_shared(&fd_cb->group->lck);
1113 	if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1114 		if (so->so_rpid > 0) {
1115 			responsible_proc = proc_find(so->so_rpid);
1116 		}
1117 	}
1118 	lck_rw_done(&fd_cb->group->lck);
1119 #endif
1120 
1121 	real_src_proc = real_proc;
1122 
1123 	if (responsible_proc != PROC_NULL) {
1124 		src_proc = responsible_proc;
1125 		if (effective_proc != NULL) {
1126 			real_src_proc = effective_proc;
1127 		}
1128 	} else if (effective_proc != PROC_NULL) {
1129 		src_proc = effective_proc;
1130 	} else {
1131 		src_proc = real_proc;
1132 	}
1133 
1134 	error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1135 	if (error != 0) {
1136 		goto done;
1137 	}
1138 
1139 	if (real_src_proc != NULL && real_src_proc != src_proc) {
1140 		error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1141 		if (error != 0) {
1142 			goto done;
1143 		}
1144 	}
1145 
1146 done:
1147 	if (responsible_proc != PROC_NULL) {
1148 		proc_rele(responsible_proc);
1149 	}
1150 
1151 	if (effective_proc != PROC_NULL) {
1152 		proc_rele(effective_proc);
1153 	}
1154 
1155 	if (real_proc != PROC_NULL && release_real_proc) {
1156 		proc_rele(real_proc);
1157 	}
1158 
1159 	return error;
1160 }
1161 
1162 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_t packet,Boolean enqueue)1163 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet, Boolean enqueue)
1164 {
1165 	int             error;
1166 
1167 	if (fd_cb->group == NULL) {
1168 		FDLOG0(LOG_INFO, fd_cb, "no provider, cannot send packet");
1169 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
1170 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
1171 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1172 			error = ECONNABORTED;
1173 		} else {
1174 			error = EHOSTUNREACH;
1175 		}
1176 		fd_cb->so->so_error = (uint16_t)error;
1177 		return error;
1178 	}
1179 
1180 	lck_rw_lock_shared(&fd_cb->group->lck);
1181 
1182 	if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1183 		error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1184 	} else {
1185 		error = ENOBUFS;
1186 	}
1187 
1188 	if (error == ENOBUFS) {
1189 		if (enqueue) {
1190 			if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1191 				lck_rw_lock_exclusive(&fd_cb->group->lck);
1192 			}
1193 			MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1194 			error = 0;
1195 		}
1196 		OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1197 	}
1198 
1199 	lck_rw_done(&fd_cb->group->lck);
1200 
1201 	return error;
1202 }
1203 
1204 static void
flow_divert_append_domain_name(char * domain_name,void * ctx)1205 flow_divert_append_domain_name(char *domain_name, void *ctx)
1206 {
1207 	mbuf_t packet = (mbuf_t)ctx;
1208 	size_t domain_name_length = 0;
1209 
1210 	if (packet == NULL || domain_name == NULL) {
1211 		return;
1212 	}
1213 
1214 	domain_name_length = strlen(domain_name);
1215 	if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1216 		int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, domain_name);
1217 		if (error) {
1218 			FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1219 		}
1220 	}
1221 }
1222 
1223 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_t * out_connect_packet)1224 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_t *out_connect_packet)
1225 {
1226 	int                     error                   = 0;
1227 	int                     flow_type               = 0;
1228 	char                    *signing_id = NULL;
1229 	uint32_t                sid_size = 0;
1230 	mbuf_t                  connect_packet = NULL;
1231 	cfil_sock_id_t          cfil_sock_id            = CFIL_SOCK_ID_NONE;
1232 	const void              *cfil_id                = NULL;
1233 	size_t                  cfil_id_size            = 0;
1234 	struct inpcb            *inp = sotoinpcb(so);
1235 	struct ifnet *ifp = NULL;
1236 	uint32_t flags = 0;
1237 
1238 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1239 	if (error) {
1240 		goto done;
1241 	}
1242 
1243 	if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1244 		int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1245 		if (find_error == 0 && sid_size > 0) {
1246 			signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1247 			if (signing_id != NULL) {
1248 				flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1249 				FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1250 			}
1251 		}
1252 	}
1253 
1254 	error = flow_divert_add_all_proc_info(fd_cb, so, p, signing_id, connect_packet);
1255 
1256 	if (signing_id != NULL) {
1257 		kfree_data(signing_id, sid_size + 1);
1258 	}
1259 
1260 	if (error) {
1261 		FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1262 		goto done;
1263 	}
1264 
1265 	error = flow_divert_packet_append_tlv(connect_packet,
1266 	    FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1267 	    sizeof(fd_cb->so->so_traffic_class),
1268 	    &fd_cb->so->so_traffic_class);
1269 	if (error) {
1270 		goto done;
1271 	}
1272 
1273 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1274 		flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1275 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1276 		flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1277 	} else {
1278 		error = EINVAL;
1279 		goto done;
1280 	}
1281 	error = flow_divert_packet_append_tlv(connect_packet,
1282 	    FLOW_DIVERT_TLV_FLOW_TYPE,
1283 	    sizeof(flow_type),
1284 	    &flow_type);
1285 
1286 	if (error) {
1287 		goto done;
1288 	}
1289 
1290 	if (fd_cb->connect_token != NULL) {
1291 		unsigned int token_len = m_length(fd_cb->connect_token);
1292 		mbuf_concatenate(connect_packet, fd_cb->connect_token);
1293 		mbuf_pkthdr_adjustlen(connect_packet, token_len);
1294 		fd_cb->connect_token = NULL;
1295 	} else {
1296 		error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1297 		if (error) {
1298 			goto done;
1299 		}
1300 
1301 		necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1302 	}
1303 
1304 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1305 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1306 		if (error) {
1307 			goto done;
1308 		}
1309 	}
1310 
1311 	if (inp->inp_vflag & INP_IPV4) {
1312 		ifp = inp->inp_last_outifp;
1313 	} else if (inp->inp_vflag & INP_IPV6) {
1314 		ifp = inp->in6p_last_outifp;
1315 	}
1316 	if (ifp != NULL) {
1317 		uint32_t flow_if_index = ifp->if_index;
1318 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1319 		    sizeof(flow_if_index), &flow_if_index);
1320 		if (error) {
1321 			goto done;
1322 		}
1323 	}
1324 
1325 	if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1326 		flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1327 	}
1328 
1329 	if ((inp->inp_flags & INP_BOUND_IF) ||
1330 	    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1331 	    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1332 		flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1333 	}
1334 
1335 	if (flags != 0) {
1336 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1337 		if (error) {
1338 			goto done;
1339 		}
1340 	}
1341 
1342 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
1343 		cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1344 	} else {
1345 		cfil_sock_id = cfil_sock_id_from_socket(so);
1346 	}
1347 
1348 	if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1349 		cfil_id = &cfil_sock_id;
1350 		cfil_id_size = sizeof(cfil_sock_id);
1351 	} else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1352 		cfil_id = &inp->necp_client_uuid;
1353 		cfil_id_size = sizeof(inp->necp_client_uuid);
1354 	}
1355 
1356 	if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1357 		error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1358 		if (error) {
1359 			goto done;
1360 		}
1361 	}
1362 
1363 done:
1364 	if (!error) {
1365 		*out_connect_packet = connect_packet;
1366 	} else if (connect_packet != NULL) {
1367 		mbuf_freem(connect_packet);
1368 	}
1369 
1370 	return error;
1371 }
1372 
1373 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1374 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1375 {
1376 	int error = 0;
1377 	mbuf_t connect_packet = fd_cb->connect_packet;
1378 	mbuf_t saved_connect_packet = NULL;
1379 
1380 	if (connect_packet != NULL) {
1381 		error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1382 		if (error) {
1383 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1384 			goto done;
1385 		}
1386 
1387 		error = flow_divert_send_packet(fd_cb, connect_packet, TRUE);
1388 		if (error) {
1389 			goto done;
1390 		}
1391 
1392 		fd_cb->connect_packet = saved_connect_packet;
1393 		saved_connect_packet = NULL;
1394 	} else {
1395 		error = ENOENT;
1396 	}
1397 done:
1398 	if (saved_connect_packet != NULL) {
1399 		mbuf_freem(saved_connect_packet);
1400 	}
1401 
1402 	return error;
1403 }
1404 
1405 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1406 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1407 {
1408 	int             error                   = 0;
1409 	mbuf_t  packet                  = NULL;
1410 	int             rbuff_space             = 0;
1411 
1412 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1413 	if (error) {
1414 		FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1415 		goto done;
1416 	}
1417 
1418 	rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1419 	if (rbuff_space < 0) {
1420 		rbuff_space = 0;
1421 	}
1422 	rbuff_space = htonl(rbuff_space);
1423 	error = flow_divert_packet_append_tlv(packet,
1424 	    FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1425 	    sizeof(rbuff_space),
1426 	    &rbuff_space);
1427 	if (error) {
1428 		goto done;
1429 	}
1430 
1431 	if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1432 		error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1433 		if (error) {
1434 			goto done;
1435 		}
1436 	}
1437 
1438 	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1439 	if (error) {
1440 		goto done;
1441 	}
1442 
1443 done:
1444 	if (error && packet != NULL) {
1445 		mbuf_freem(packet);
1446 	}
1447 
1448 	return error;
1449 }
1450 
1451 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1452 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1453 {
1454 	int             error   = 0;
1455 	mbuf_t  packet  = NULL;
1456 	uint32_t        zero    = 0;
1457 
1458 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1459 	if (error) {
1460 		FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1461 		goto done;
1462 	}
1463 
1464 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1465 	if (error) {
1466 		FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1467 		goto done;
1468 	}
1469 
1470 	how = htonl(how);
1471 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1472 	if (error) {
1473 		FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1474 		goto done;
1475 	}
1476 
1477 	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1478 	if (error) {
1479 		goto done;
1480 	}
1481 
1482 done:
1483 	if (error && packet != NULL) {
1484 		mbuf_free(packet);
1485 	}
1486 
1487 	return error;
1488 }
1489 
1490 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1491 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1492 {
1493 	if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1494 	    (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1495 		return SHUT_RDWR;
1496 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1497 		return SHUT_RD;
1498 	} else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1499 		return SHUT_WR;
1500 	}
1501 
1502 	return -1;
1503 }
1504 
1505 /*
1506  * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1507  * writes. Returns FALSE otherwise.
1508  */
1509 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1510 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1511 {
1512 	int             how             = -1;
1513 
1514 	/* Do not send any close messages if there is still data in the send buffer */
1515 	if (fd_cb->so->so_snd.sb_cc == 0) {
1516 		if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1517 			/* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1518 			how = SHUT_RD;
1519 		}
1520 		if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1521 			/* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1522 			if (how == SHUT_RD) {
1523 				how = SHUT_RDWR;
1524 			} else {
1525 				how = SHUT_WR;
1526 			}
1527 		}
1528 	}
1529 
1530 	if (how != -1) {
1531 		FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1532 		if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1533 			/* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1534 			if (how != SHUT_RD) {
1535 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1536 			}
1537 			if (how != SHUT_WR) {
1538 				fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1539 			}
1540 		}
1541 	}
1542 
1543 	if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
1544 		flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
1545 	}
1546 }
1547 
1548 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_t data,size_t data_len,struct sockaddr * toaddr,Boolean force)1549 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, struct sockaddr *toaddr, Boolean force)
1550 {
1551 	mbuf_t  packet = NULL;
1552 	mbuf_t  last = NULL;
1553 	int             error   = 0;
1554 
1555 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1556 	if (error || packet == NULL) {
1557 		FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1558 		goto done;
1559 	}
1560 
1561 	if (toaddr != NULL) {
1562 		error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1563 		if (error) {
1564 			FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1565 			goto done;
1566 		}
1567 	}
1568 
1569 	if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1570 		last = m_last(packet);
1571 		mbuf_setnext(last, data);
1572 		mbuf_pkthdr_adjustlen(packet, (int)data_len);
1573 	} else {
1574 		data_len = 0;
1575 	}
1576 	error = flow_divert_send_packet(fd_cb, packet, force);
1577 	if (error == 0 && data_len > 0) {
1578 		fd_cb->bytes_sent += data_len;
1579 		flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1580 	}
1581 
1582 done:
1583 	if (error) {
1584 		if (last != NULL) {
1585 			mbuf_setnext(last, NULL);
1586 		}
1587 		if (packet != NULL) {
1588 			mbuf_freem(packet);
1589 		}
1590 	}
1591 
1592 	return error;
1593 }
1594 
1595 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1596 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1597 {
1598 	size_t  to_send;
1599 	size_t  sent    = 0;
1600 	int             error   = 0;
1601 	mbuf_t  buffer;
1602 
1603 	to_send = fd_cb->so->so_snd.sb_cc;
1604 	buffer = fd_cb->so->so_snd.sb_mb;
1605 
1606 	if (buffer == NULL && to_send > 0) {
1607 		FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1608 		return;
1609 	}
1610 
1611 	/* Ignore the send window if force is enabled */
1612 	if (!force && (to_send > fd_cb->send_window)) {
1613 		to_send = fd_cb->send_window;
1614 	}
1615 
1616 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1617 		while (sent < to_send) {
1618 			mbuf_t  data;
1619 			size_t  data_len;
1620 
1621 			data_len = to_send - sent;
1622 			if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1623 				data_len = FLOW_DIVERT_CHUNK_SIZE;
1624 			}
1625 
1626 			error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1627 			if (error) {
1628 				FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1629 				break;
1630 			}
1631 
1632 			error = flow_divert_send_data_packet(fd_cb, data, data_len, NULL, force);
1633 			if (error) {
1634 				if (data != NULL) {
1635 					mbuf_freem(data);
1636 				}
1637 				break;
1638 			}
1639 
1640 			sent += data_len;
1641 		}
1642 		sbdrop(&fd_cb->so->so_snd, (int)sent);
1643 		sowwakeup(fd_cb->so);
1644 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1645 		mbuf_t data;
1646 		mbuf_t m;
1647 		size_t data_len;
1648 
1649 		while (buffer) {
1650 			struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1651 
1652 			m = buffer;
1653 			if (toaddr != NULL) {
1654 				/* look for data in the chain */
1655 				do {
1656 					m = m->m_next;
1657 					if (m != NULL && m->m_type == MT_DATA) {
1658 						break;
1659 					}
1660 				} while (m);
1661 				if (m == NULL) {
1662 					/* unexpected */
1663 					FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1664 					goto move_on;
1665 				}
1666 			}
1667 			data_len = mbuf_pkthdr_len(m);
1668 			if (data_len > 0) {
1669 				FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1670 				error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1671 				if (error) {
1672 					FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1673 					break;
1674 				}
1675 			} else {
1676 				data = NULL;
1677 			}
1678 			error = flow_divert_send_data_packet(fd_cb, data, data_len, toaddr, force);
1679 			if (error) {
1680 				if (data != NULL) {
1681 					mbuf_freem(data);
1682 				}
1683 				break;
1684 			}
1685 			sent += data_len;
1686 move_on:
1687 			buffer = buffer->m_nextpkt;
1688 			(void) sbdroprecord(&(fd_cb->so->so_snd));
1689 		}
1690 	}
1691 
1692 	if (sent > 0) {
1693 		FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1694 		if (fd_cb->send_window >= sent) {
1695 			fd_cb->send_window -= sent;
1696 		} else {
1697 			fd_cb->send_window = 0;
1698 		}
1699 	}
1700 }
1701 
1702 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_t data,struct sockaddr * toaddr)1703 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data, struct sockaddr *toaddr)
1704 {
1705 	size_t  to_send         = mbuf_pkthdr_len(data);
1706 	int     error           = 0;
1707 
1708 	if (to_send > fd_cb->send_window) {
1709 		to_send = fd_cb->send_window;
1710 	}
1711 
1712 	if (fd_cb->so->so_snd.sb_cc > 0) {
1713 		to_send = 0;    /* If the send buffer is non-empty, then we can't send anything */
1714 	}
1715 
1716 	if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1717 		size_t  sent            = 0;
1718 		mbuf_t  remaining_data  = data;
1719 		mbuf_t  pkt_data        = NULL;
1720 		while (sent < to_send && remaining_data != NULL) {
1721 			size_t  pkt_data_len;
1722 
1723 			pkt_data = remaining_data;
1724 
1725 			if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1726 				pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1727 			} else {
1728 				pkt_data_len = to_send - sent;
1729 			}
1730 
1731 			if (pkt_data_len < mbuf_pkthdr_len(pkt_data)) {
1732 				error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1733 				if (error) {
1734 					FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1735 					pkt_data = NULL;
1736 					break;
1737 				}
1738 			} else {
1739 				remaining_data = NULL;
1740 			}
1741 
1742 			error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len, NULL, FALSE);
1743 
1744 			if (error) {
1745 				break;
1746 			}
1747 
1748 			pkt_data = NULL;
1749 			sent += pkt_data_len;
1750 		}
1751 
1752 		fd_cb->send_window -= sent;
1753 
1754 		error = 0;
1755 
1756 		if (pkt_data != NULL) {
1757 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1758 				if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1759 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1760 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1761 				}
1762 			} else {
1763 				mbuf_freem(pkt_data);
1764 				error = ENOBUFS;
1765 			}
1766 		}
1767 
1768 		if (remaining_data != NULL) {
1769 			if (sbspace(&fd_cb->so->so_snd) > 0) {
1770 				if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1771 					FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1772 					    fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1773 				}
1774 			} else {
1775 				mbuf_freem(remaining_data);
1776 				error = ENOBUFS;
1777 			}
1778 		}
1779 	} else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1780 		if (to_send || mbuf_pkthdr_len(data) == 0) {
1781 			error = flow_divert_send_data_packet(fd_cb, data, to_send, toaddr, FALSE);
1782 			if (error) {
1783 				FDLOG(LOG_ERR, fd_cb, "flow_divert_send_data_packet failed. send data size = %lu", to_send);
1784 				if (data != NULL) {
1785 					mbuf_freem(data);
1786 				}
1787 			} else {
1788 				fd_cb->send_window -= to_send;
1789 			}
1790 		} else {
1791 			/* buffer it */
1792 			if (sbspace(&fd_cb->so->so_snd) >= (int)mbuf_pkthdr_len(data)) {
1793 				if (toaddr != NULL) {
1794 					if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &error)) {
1795 						FDLOG(LOG_ERR, fd_cb,
1796 						    "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d\n",
1797 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, error);
1798 					}
1799 					error = 0;
1800 				} else {
1801 					if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1802 						FDLOG(LOG_ERR, fd_cb,
1803 						    "sbappendrecord failed. send buffer size = %u, send_window = %u, error = %d\n",
1804 						    fd_cb->so->so_snd.sb_cc, fd_cb->send_window, error);
1805 					}
1806 				}
1807 			} else {
1808 				if (data != NULL) {
1809 					mbuf_freem(data);
1810 				}
1811 				error = ENOBUFS;
1812 			}
1813 		}
1814 	}
1815 
1816 	return error;
1817 }
1818 
1819 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1820 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1821 {
1822 	int error = 0;
1823 	mbuf_t packet = NULL;
1824 
1825 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1826 	if (error) {
1827 		FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1828 		goto done;
1829 	}
1830 
1831 	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1832 	if (error) {
1833 		goto done;
1834 	}
1835 
1836 done:
1837 	if (error && packet != NULL) {
1838 		mbuf_free(packet);
1839 	}
1840 
1841 	return error;
1842 }
1843 
1844 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)1845 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
1846 {
1847 	int             error           = 0;
1848 	mbuf_t  packet          = NULL;
1849 
1850 	error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
1851 	if (error) {
1852 		FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
1853 		goto done;
1854 	}
1855 
1856 	error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
1857 	if (error) {
1858 		FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
1859 		goto done;
1860 	}
1861 
1862 	error = flow_divert_send_packet(fd_cb, packet, TRUE);
1863 	if (error) {
1864 		goto done;
1865 	}
1866 
1867 done:
1868 	if (error && packet != NULL) {
1869 		mbuf_free(packet);
1870 	}
1871 
1872 	return error;
1873 }
1874 
1875 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)1876 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
1877 {
1878 	struct inpcb *inp = sotoinpcb(fd_cb->so);
1879 
1880 	if (local_endpoint->sa_family == AF_INET6) {
1881 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
1882 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
1883 			inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
1884 			inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
1885 			in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
1886 		}
1887 		if (inp->inp_lport == 0) {
1888 			inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
1889 		}
1890 	} else if (local_endpoint->sa_family == AF_INET) {
1891 		if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
1892 			fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
1893 			inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
1894 		}
1895 		if (inp->inp_lport == 0) {
1896 			inp->inp_lport = (satosin(local_endpoint))->sin_port;
1897 		}
1898 	}
1899 }
1900 
1901 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)1902 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
1903 {
1904 	struct inpcb *inp = sotoinpcb(fd_cb->so);
1905 
1906 	if (remote_endpoint->sa_family == AF_INET6) {
1907 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
1908 			inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
1909 			inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
1910 			in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
1911 		}
1912 		if (inp->inp_fport == 0) {
1913 			inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
1914 		}
1915 	} else if (remote_endpoint->sa_family == AF_INET) {
1916 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
1917 			inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
1918 		}
1919 		if (inp->inp_fport == 0) {
1920 			inp->inp_fport = (satosin(remote_endpoint))->sin_port;
1921 		}
1922 	}
1923 }
1924 
1925 static uint32_t
flow_divert_derive_kernel_control_unit(uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)1926 flow_divert_derive_kernel_control_unit(uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
1927 {
1928 	uint32_t result = *ctl_unit;
1929 
1930 	*is_aggregate = false;
1931 	if (aggregate_unit != NULL && *aggregate_unit != 0) {
1932 		uint32_t counter;
1933 		for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
1934 			if ((*aggregate_unit) & (1 << counter)) {
1935 				break;
1936 			}
1937 		}
1938 		if (counter < (GROUP_COUNT_MAX - 1)) {
1939 			*aggregate_unit &= ~(1 << counter);
1940 			*is_aggregate = true;
1941 			return counter + 1;
1942 		} else {
1943 			*ctl_unit = 0;
1944 			return result;
1945 		}
1946 	} else {
1947 		*ctl_unit = 0;
1948 		return result;
1949 	}
1950 }
1951 
1952 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)1953 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
1954 {
1955 	int error = 0;
1956 	uint32_t policy_control_unit = fd_cb->policy_control_unit;
1957 
1958 	flow_divert_pcb_remove(fd_cb);
1959 
1960 	do {
1961 		struct flow_divert_group *next_group = NULL;
1962 		bool is_aggregate = false;
1963 		uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(&policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
1964 
1965 		if (fd_cb->control_group_unit == next_ctl_unit) {
1966 			FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
1967 			error = EALREADY;
1968 			break;
1969 		}
1970 
1971 		if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
1972 			FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
1973 			error = ENOENT;
1974 			break;
1975 		}
1976 
1977 		next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
1978 		if (next_group == NULL) {
1979 			FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
1980 			continue;
1981 		}
1982 
1983 		FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
1984 
1985 		error = flow_divert_pcb_insert(fd_cb, next_group);
1986 		if (error == 0) {
1987 			if (is_aggregate) {
1988 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
1989 			} else {
1990 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
1991 			}
1992 		}
1993 		FDGRP_RELEASE(next_group);
1994 	} while (fd_cb->group == NULL);
1995 
1996 	if (fd_cb->group == NULL) {
1997 		return error ? error : ENOENT;
1998 	}
1999 
2000 	error = flow_divert_send_connect_packet(fd_cb);
2001 	if (error) {
2002 		FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2003 		flow_divert_pcb_remove(fd_cb);
2004 		error = ENOENT;
2005 	}
2006 
2007 	return error;
2008 }
2009 
2010 static void
flow_divert_disable(struct flow_divert_pcb * fd_cb)2011 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2012 {
2013 	struct socket *so = NULL;
2014 	mbuf_t  buffer;
2015 	int error = 0;
2016 	proc_t last_proc = NULL;
2017 	struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2018 	bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2019 	struct inpcb *inp = NULL;
2020 
2021 	so = fd_cb->so;
2022 	if (so == NULL) {
2023 		goto done;
2024 	}
2025 
2026 	FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2027 
2028 	/* Restore the IP state */
2029 	inp = sotoinpcb(so);
2030 	inp->inp_vflag = fd_cb->original_vflag;
2031 	inp->inp_faddr.s_addr = INADDR_ANY;
2032 	inp->inp_fport = 0;
2033 	memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2034 	inp->inp_fifscope = IFSCOPE_NONE;
2035 	inp->in6p_fport = 0;
2036 	/* If flow divert set the local address, clear it out */
2037 	if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2038 		inp->inp_laddr.s_addr = INADDR_ANY;
2039 		memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2040 		inp->inp_lifscope = IFSCOPE_NONE;
2041 	}
2042 	inp->inp_last_outifp = fd_cb->original_last_outifp;
2043 	inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2044 
2045 	/* Dis-associate the socket */
2046 	so->so_flags &= ~SOF_FLOW_DIVERT;
2047 	so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2048 	so->so_fd_pcb = NULL;
2049 	fd_cb->so = NULL;
2050 
2051 	FDRELEASE(fd_cb); /* Release the socket's reference */
2052 
2053 	/* Revert back to the original protocol */
2054 	so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2055 
2056 	/* Reset the socket state to avoid confusing NECP */
2057 	so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2058 
2059 	last_proc = proc_find(so->last_pid);
2060 
2061 	if (do_connect) {
2062 		/* Connect using the original protocol */
2063 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2064 		if (error) {
2065 			FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2066 			goto done;
2067 		}
2068 	}
2069 
2070 	buffer = so->so_snd.sb_mb;
2071 	if (buffer == NULL) {
2072 		/* No buffered data, done */
2073 		goto done;
2074 	}
2075 
2076 	/* Send any buffered data using the original protocol */
2077 	if (SOCK_TYPE(so) == SOCK_STREAM) {
2078 		mbuf_t data_to_send = NULL;
2079 		size_t data_len = so->so_snd.sb_cc;
2080 
2081 		error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2082 		if (error) {
2083 			FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2084 			goto done;
2085 		}
2086 
2087 		sbflush(&so->so_snd);
2088 
2089 		if (data_to_send->m_flags & M_PKTHDR) {
2090 			mbuf_pkthdr_setlen(data_to_send, data_len);
2091 		}
2092 
2093 		error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2094 		    0,
2095 		    data_to_send,
2096 		    NULL,
2097 		    NULL,
2098 		    (last_proc != NULL ? last_proc : current_proc()));
2099 
2100 		if (error && error != EWOULDBLOCK) {
2101 			FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2102 		} else {
2103 			error = 0;
2104 		}
2105 	} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2106 		struct sockbuf *sb = &so->so_snd;
2107 		MBUFQ_HEAD(send_queue_head) send_queue;
2108 		MBUFQ_INIT(&send_queue);
2109 
2110 		/* Flush the send buffer, moving all records to a temporary queue */
2111 		while (sb->sb_mb != NULL) {
2112 			mbuf_t record = sb->sb_mb;
2113 			mbuf_t m = record;
2114 			sb->sb_mb = sb->sb_mb->m_nextpkt;
2115 			while (m != NULL) {
2116 				sbfree(sb, m);
2117 				m = m->m_next;
2118 			}
2119 			record->m_nextpkt = NULL;
2120 			MBUFQ_ENQUEUE(&send_queue, record);
2121 		}
2122 		SB_EMPTY_FIXUP(sb);
2123 
2124 		while (!MBUFQ_EMPTY(&send_queue)) {
2125 			mbuf_t next_record = MBUFQ_FIRST(&send_queue);
2126 			mbuf_t addr = NULL;
2127 			mbuf_t control = NULL;
2128 			mbuf_t last_control = NULL;
2129 			mbuf_t data = NULL;
2130 			mbuf_t m = next_record;
2131 			struct sockaddr *to_endpoint = NULL;
2132 
2133 			MBUFQ_DEQUEUE(&send_queue, next_record);
2134 
2135 			while (m != NULL) {
2136 				if (m->m_type == MT_SONAME) {
2137 					addr = m;
2138 				} else if (m->m_type == MT_CONTROL) {
2139 					if (control == NULL) {
2140 						control = m;
2141 					}
2142 					last_control = m;
2143 				} else if (m->m_type == MT_DATA) {
2144 					data = m;
2145 					break;
2146 				}
2147 				m = m->m_next;
2148 			}
2149 
2150 			if (addr != NULL && !do_connect) {
2151 				to_endpoint = flow_divert_get_buffered_target_address(addr);
2152 				if (to_endpoint == NULL) {
2153 					FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2154 				}
2155 			}
2156 
2157 			if (data == NULL) {
2158 				FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2159 				mbuf_freem(next_record);
2160 				continue;
2161 			}
2162 
2163 			if (!(data->m_flags & M_PKTHDR)) {
2164 				FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2165 				mbuf_freem(next_record);
2166 				continue;
2167 			}
2168 
2169 			if (addr != NULL) {
2170 				addr->m_next = NULL;
2171 			}
2172 
2173 			if (last_control != NULL) {
2174 				last_control->m_next = NULL;
2175 			}
2176 
2177 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2178 			    0,
2179 			    data,
2180 			    to_endpoint,
2181 			    control,
2182 			    (last_proc != NULL ? last_proc : current_proc()));
2183 
2184 			if (addr != NULL) {
2185 				mbuf_freem(addr);
2186 			}
2187 
2188 			if (error) {
2189 				FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2190 			}
2191 		}
2192 	}
2193 done:
2194 	if (last_proc != NULL) {
2195 		proc_rele(last_proc);
2196 	}
2197 
2198 	if (error && so != NULL) {
2199 		so->so_error = (uint16_t)error;
2200 		flow_divert_disconnect_socket(so, do_connect);
2201 	}
2202 }
2203 
2204 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2205 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2206 {
2207 	struct socket *so = NULL;
2208 	struct inpcb *inp = NULL;
2209 	struct ifnet *current_ifp = NULL;
2210 	struct ifnet *new_ifp = NULL;
2211 	int error = 0;
2212 
2213 	so = fd_cb->so;
2214 	if (so == NULL) {
2215 		return;
2216 	}
2217 
2218 	inp = sotoinpcb(so);
2219 
2220 	if (out_if_index <= 0) {
2221 		return;
2222 	}
2223 
2224 	if (inp->inp_vflag & INP_IPV6) {
2225 		current_ifp = inp->in6p_last_outifp;
2226 	} else {
2227 		current_ifp = inp->inp_last_outifp;
2228 	}
2229 
2230 	if (current_ifp != NULL) {
2231 		if (current_ifp->if_index == out_if_index) {
2232 			/* No change */
2233 			return;
2234 		}
2235 
2236 		/* Scope the socket to the given interface */
2237 		error = inp_bindif(inp, out_if_index, &new_ifp);
2238 		if (error != 0) {
2239 			FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2240 			return;
2241 		}
2242 
2243 		if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2244 			/* Get the appropriate address for the given interface */
2245 			if (inp->inp_vflag & INP_IPV6) {
2246 				inp->in6p_laddr = sa6_any.sin6_addr;
2247 				error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2248 			} else {
2249 				inp->inp_laddr.s_addr = INADDR_ANY;
2250 				error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2251 			}
2252 
2253 			if (error != 0) {
2254 				FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2255 			}
2256 		}
2257 	} else {
2258 		ifnet_head_lock_shared();
2259 		if (out_if_index <= if_index) {
2260 			new_ifp = ifindex2ifnet[out_if_index];
2261 		}
2262 		ifnet_head_done();
2263 	}
2264 
2265 	/* Update the "last interface" of the socket */
2266 	if (new_ifp != NULL) {
2267 		if (inp->inp_vflag & INP_IPV6) {
2268 			inp->in6p_last_outifp = new_ifp;
2269 		} else {
2270 			inp->inp_last_outifp = new_ifp;
2271 		}
2272 
2273 #if SKYWALK
2274 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2275 			netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2276 		}
2277 #endif /* SKYWALK */
2278 	}
2279 }
2280 
2281 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2282 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2283 {
2284 	uint32_t                                        connect_error = 0;
2285 	uint32_t                                        ctl_unit                        = 0;
2286 	int                                                     error                           = 0;
2287 	union sockaddr_in_4_6 local_endpoint = {};
2288 	union sockaddr_in_4_6 remote_endpoint = {};
2289 	int                                                     out_if_index            = 0;
2290 	uint32_t                                        send_window;
2291 	uint32_t                                        app_data_length         = 0;
2292 
2293 	memset(&local_endpoint, 0, sizeof(local_endpoint));
2294 	memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2295 
2296 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2297 	if (error) {
2298 		FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2299 		return;
2300 	}
2301 
2302 	connect_error = ntohl(connect_error);
2303 	FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2304 
2305 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2306 	if (error) {
2307 		FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2308 		return;
2309 	}
2310 
2311 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2312 	if (error) {
2313 		FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2314 	}
2315 
2316 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sa), NULL);
2317 	if (error) {
2318 		FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2319 	}
2320 
2321 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sa), NULL);
2322 	if (error) {
2323 		FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2324 	}
2325 
2326 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2327 	if (error) {
2328 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2329 	}
2330 
2331 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2332 	if (error) {
2333 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2334 	}
2335 
2336 	error = 0;
2337 
2338 	FDLOCK(fd_cb);
2339 	if (fd_cb->so != NULL) {
2340 		struct inpcb *inp = NULL;
2341 		struct socket *so = fd_cb->so;
2342 		bool local_address_is_valid = false;
2343 
2344 		socket_lock(so, 0);
2345 
2346 		if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2347 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2348 			goto done;
2349 		}
2350 
2351 		if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2352 			FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2353 			goto done;
2354 		}
2355 
2356 		inp = sotoinpcb(so);
2357 
2358 		if (connect_error || error) {
2359 			goto set_socket_state;
2360 		}
2361 
2362 		if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2363 			if (local_endpoint.sa.sa_family == AF_INET) {
2364 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2365 				if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2366 					local_address_is_valid = true;
2367 					fd_cb->local_endpoint = local_endpoint;
2368 					inp->inp_laddr.s_addr = INADDR_ANY;
2369 				} else {
2370 					fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2371 				}
2372 			} else if (local_endpoint.sa.sa_family == AF_INET6) {
2373 				local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2374 				if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2375 					local_address_is_valid = true;
2376 					fd_cb->local_endpoint = local_endpoint;
2377 					inp->in6p_laddr = sa6_any.sin6_addr;
2378 				} else {
2379 					fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2380 				}
2381 			}
2382 		}
2383 
2384 		flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2385 		flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2386 
2387 		if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2388 			if (remote_endpoint.sa.sa_family == AF_INET) {
2389 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2390 			} else if (remote_endpoint.sa.sa_family == AF_INET6) {
2391 				remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2392 			}
2393 			flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2394 		}
2395 
2396 		if (app_data_length > 0) {
2397 			uint8_t *app_data = NULL;
2398 			app_data = kalloc_data(app_data_length, Z_WAITOK);
2399 			if (app_data != NULL) {
2400 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2401 				if (error == 0) {
2402 					FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2403 					if (fd_cb->app_data != NULL) {
2404 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
2405 					}
2406 					fd_cb->app_data = app_data;
2407 					fd_cb->app_data_length = app_data_length;
2408 				} else {
2409 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2410 					kfree_data(app_data, app_data_length);
2411 				}
2412 			} else {
2413 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2414 			}
2415 		}
2416 
2417 		if (error) {
2418 			goto set_socket_state;
2419 		}
2420 
2421 		if (fd_cb->group == NULL) {
2422 			error = EINVAL;
2423 			goto set_socket_state;
2424 		}
2425 
2426 		ctl_unit = ntohl(ctl_unit);
2427 		if (ctl_unit > 0) {
2428 			int insert_error = 0;
2429 			struct flow_divert_group *grp = NULL;
2430 
2431 			if (ctl_unit >= GROUP_COUNT_MAX) {
2432 				FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2433 				error = EINVAL;
2434 				goto set_socket_state;
2435 			}
2436 
2437 			grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2438 			if (grp == NULL) {
2439 				error = ECONNRESET;
2440 				goto set_socket_state;
2441 			}
2442 
2443 			flow_divert_pcb_remove(fd_cb);
2444 			insert_error = flow_divert_pcb_insert(fd_cb, grp);
2445 			FDGRP_RELEASE(grp);
2446 
2447 			if (insert_error != 0) {
2448 				error = ECONNRESET;
2449 				goto set_socket_state;
2450 			}
2451 		}
2452 
2453 		fd_cb->send_window = ntohl(send_window);
2454 
2455 set_socket_state:
2456 		if (!connect_error && !error) {
2457 			FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2458 			error = flow_divert_send_connect_result(fd_cb);
2459 		}
2460 
2461 		if (connect_error || error) {
2462 			if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2463 				error = flow_divert_try_next_group(fd_cb);
2464 				if (error && fd_cb->policy_control_unit == 0) {
2465 					flow_divert_disable(fd_cb);
2466 					goto done;
2467 				} else if (error == 0) {
2468 					goto done;
2469 				}
2470 			}
2471 
2472 			if (!connect_error) {
2473 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
2474 				so->so_error = (uint16_t)error;
2475 				flow_divert_send_close_if_needed(fd_cb);
2476 			} else {
2477 				flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
2478 				so->so_error = (uint16_t)connect_error;
2479 			}
2480 			flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2481 		} else {
2482 #if NECP
2483 			/* Update NECP client with connected five-tuple */
2484 			if (!uuid_is_null(inp->necp_client_uuid)) {
2485 				socket_unlock(so, 0);
2486 				necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2487 				socket_lock(so, 0);
2488 				if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2489 					/* The socket was closed while it was unlocked */
2490 					goto done;
2491 				}
2492 			}
2493 #endif /* NECP */
2494 
2495 			flow_divert_send_buffered_data(fd_cb, FALSE);
2496 			soisconnected(so);
2497 		}
2498 
2499 		/* We don't need the connect packet any more */
2500 		if (fd_cb->connect_packet != NULL) {
2501 			mbuf_freem(fd_cb->connect_packet);
2502 			fd_cb->connect_packet = NULL;
2503 		}
2504 
2505 		/* We don't need the original remote endpoint any more */
2506 		free_sockaddr(fd_cb->original_remote_endpoint);
2507 done:
2508 		socket_unlock(so, 0);
2509 	}
2510 	FDUNLOCK(fd_cb);
2511 }
2512 
2513 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2514 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2515 {
2516 	uint32_t        close_error                     = 0;
2517 	int                     error                   = 0;
2518 	int                     how                     = 0;
2519 
2520 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2521 	if (error) {
2522 		FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2523 		return;
2524 	}
2525 
2526 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2527 	if (error) {
2528 		FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2529 		return;
2530 	}
2531 
2532 	how = ntohl(how);
2533 
2534 	FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2535 
2536 	FDLOCK(fd_cb);
2537 	if (fd_cb->so != NULL) {
2538 		bool is_connected = (SOCK_TYPE(fd_cb->so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2539 		socket_lock(fd_cb->so, 0);
2540 
2541 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2542 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2543 			goto done;
2544 		}
2545 
2546 		fd_cb->so->so_error = (uint16_t)ntohl(close_error);
2547 
2548 		flow_divert_update_closed_state(fd_cb, how, TRUE);
2549 
2550 		/* Only do this for stream flows because "shutdown by peer" doesn't make sense for datagram flows */
2551 		how = flow_divert_tunnel_how_closed(fd_cb);
2552 		if (how == SHUT_RDWR) {
2553 			flow_divert_disconnect_socket(fd_cb->so, is_connected);
2554 		} else if (how == SHUT_RD && is_connected) {
2555 			socantrcvmore(fd_cb->so);
2556 		} else if (how == SHUT_WR && is_connected) {
2557 			socantsendmore(fd_cb->so);
2558 		}
2559 done:
2560 		socket_unlock(fd_cb->so, 0);
2561 	}
2562 	FDUNLOCK(fd_cb);
2563 }
2564 
2565 static mbuf_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2566 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2567 {
2568 	struct inpcb *inp = sotoinpcb(fd_cb->so);
2569 	bool need_recvdstaddr = false;
2570 	/* Socket flow tracking needs to see the local address */
2571 	need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2572 	if ((inp->inp_vflag & INP_IPV4) &&
2573 	    fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2574 	    ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2575 		return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2576 	} else if ((inp->inp_vflag & INP_IPV6) &&
2577 	    fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2578 	    ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2579 		struct in6_pktinfo pi6;
2580 		memset(&pi6, 0, sizeof(pi6));
2581 		pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2582 
2583 		return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2584 	}
2585 	return NULL;
2586 }
2587 
2588 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_t packet,size_t offset)2589 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset)
2590 {
2591 	int error = 0;
2592 
2593 	FDLOCK(fd_cb);
2594 	if (fd_cb->so != NULL) {
2595 		mbuf_t  data            = NULL;
2596 		size_t  data_size;
2597 		struct sockaddr_storage remote_address;
2598 		boolean_t got_remote_sa = FALSE;
2599 		boolean_t appended = FALSE;
2600 		boolean_t append_success = FALSE;
2601 
2602 		socket_lock(fd_cb->so, 0);
2603 
2604 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2605 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2606 			goto done;
2607 		}
2608 
2609 		if (sbspace(&fd_cb->so->so_rcv) == 0) {
2610 			error = ENOBUFS;
2611 			fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2612 			FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2613 			goto done;
2614 		}
2615 
2616 		if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
2617 			uint32_t val_size = 0;
2618 
2619 			/* check if we got remote address with data */
2620 			memset(&remote_address, 0, sizeof(remote_address));
2621 			error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2622 			if (error || val_size > sizeof(remote_address)) {
2623 				FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2624 				error = 0;
2625 			} else {
2626 				if (remote_address.ss_len > sizeof(remote_address)) {
2627 					remote_address.ss_len = sizeof(remote_address);
2628 				}
2629 				/* validate the address */
2630 				if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2631 					got_remote_sa = TRUE;
2632 				} else {
2633 					FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2634 				}
2635 				offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2636 			}
2637 		}
2638 
2639 		data_size = (mbuf_pkthdr_len(packet) - offset);
2640 
2641 		if (fd_cb->so->so_state & SS_CANTRCVMORE) {
2642 			FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2643 			goto done;
2644 		}
2645 
2646 		if (SOCK_TYPE(fd_cb->so) != SOCK_STREAM && SOCK_TYPE(fd_cb->so) != SOCK_DGRAM) {
2647 			FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(fd_cb->so));
2648 			goto done;
2649 		}
2650 
2651 		FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2652 
2653 		error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2654 		if (error || data == NULL) {
2655 			FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2656 			goto done;
2657 		}
2658 
2659 		if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
2660 			appended = (sbappendstream(&fd_cb->so->so_rcv, data) != 0);
2661 			append_success = TRUE;
2662 		} else {
2663 			struct sockaddr *append_sa = NULL;
2664 			mbuf_t mctl;
2665 
2666 			if (got_remote_sa == TRUE) {
2667 				error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2668 			} else {
2669 				if (fd_cb->so->so_proto->pr_domain->dom_family == AF_INET6) {
2670 					error = in6_mapped_peeraddr(fd_cb->so, &append_sa);
2671 				} else {
2672 					error = in_getpeeraddr(fd_cb->so, &append_sa);
2673 				}
2674 			}
2675 			if (error) {
2676 				FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2677 			}
2678 
2679 			mctl = flow_divert_create_control_mbuf(fd_cb);
2680 			int append_error = 0;
2681 			appended = sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error);
2682 			if (appended || append_error == 0) {
2683 				append_success = TRUE;
2684 			} else {
2685 				FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2686 			}
2687 
2688 			free_sockaddr(append_sa);
2689 		}
2690 
2691 		if (append_success) {
2692 			fd_cb->bytes_received += data_size;
2693 			flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2694 		}
2695 
2696 		if (appended) {
2697 			sorwakeup(fd_cb->so);
2698 		}
2699 done:
2700 		socket_unlock(fd_cb->so, 0);
2701 	}
2702 	FDUNLOCK(fd_cb);
2703 
2704 	return error;
2705 }
2706 
2707 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2708 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2709 {
2710 	uint32_t        read_count              = 0;
2711 	int             error                   = 0;
2712 
2713 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2714 	if (error) {
2715 		FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2716 		return;
2717 	}
2718 
2719 	FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2720 
2721 	FDLOCK(fd_cb);
2722 	if (fd_cb->so != NULL) {
2723 		socket_lock(fd_cb->so, 0);
2724 
2725 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2726 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2727 			goto done;
2728 		}
2729 
2730 		fd_cb->send_window += ntohl(read_count);
2731 		flow_divert_send_buffered_data(fd_cb, FALSE);
2732 done:
2733 		socket_unlock(fd_cb->so, 0);
2734 	}
2735 	FDUNLOCK(fd_cb);
2736 }
2737 
2738 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_t packet,int offset)2739 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_t packet, int offset)
2740 {
2741 	int error         = 0;
2742 	uint32_t key_size = 0;
2743 	int log_level     = 0;
2744 	uint32_t flags    = 0;
2745 
2746 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2747 	if (error) {
2748 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2749 		return;
2750 	}
2751 
2752 	if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2753 		FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2754 		return;
2755 	}
2756 
2757 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2758 	if (!error) {
2759 		nil_pcb.log_level = (uint8_t)log_level;
2760 	}
2761 
2762 	lck_rw_lock_exclusive(&group->lck);
2763 
2764 	if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
2765 		FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
2766 		lck_rw_done(&group->lck);
2767 		return;
2768 	}
2769 
2770 	if (group->token_key != NULL) {
2771 		kfree_data(group->token_key, group->token_key_size);
2772 		group->token_key = NULL;
2773 	}
2774 
2775 	group->token_key = kalloc_data(key_size, Z_WAITOK);
2776 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2777 	if (error) {
2778 		FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2779 		kfree_data(group->token_key, key_size);
2780 		group->token_key = NULL;
2781 		lck_rw_done(&group->lck);
2782 		return;
2783 	}
2784 
2785 	group->token_key_size = key_size;
2786 
2787 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2788 	if (!error) {
2789 		group->flags = flags;
2790 	}
2791 
2792 	lck_rw_done(&group->lck);
2793 }
2794 
2795 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_t packet,int offset)2796 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset)
2797 {
2798 	int                                                     error                           = 0;
2799 	int                                                     out_if_index            = 0;
2800 	uint32_t                                        app_data_length         = 0;
2801 
2802 	FDLOG0(LOG_INFO, fd_cb, "received a properties update");
2803 
2804 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2805 	if (error) {
2806 		FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
2807 	}
2808 
2809 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2810 	if (error) {
2811 		FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
2812 	}
2813 
2814 	FDLOCK(fd_cb);
2815 	if (fd_cb->so != NULL) {
2816 		socket_lock(fd_cb->so, 0);
2817 
2818 		if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2819 			FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
2820 			goto done;
2821 		}
2822 
2823 		if (out_if_index > 0) {
2824 			flow_divert_scope(fd_cb, out_if_index, true);
2825 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2826 		}
2827 
2828 		if (app_data_length > 0) {
2829 			uint8_t *app_data = NULL;
2830 			app_data = kalloc_data(app_data_length, Z_WAITOK);
2831 			if (app_data != NULL) {
2832 				error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2833 				if (error == 0) {
2834 					if (fd_cb->app_data != NULL) {
2835 						kfree_data(fd_cb->app_data, fd_cb->app_data_length);
2836 					}
2837 					fd_cb->app_data = app_data;
2838 					fd_cb->app_data_length = app_data_length;
2839 				} else {
2840 					FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
2841 					kfree_data(app_data, app_data_length);
2842 				}
2843 			} else {
2844 				FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
2845 			}
2846 		}
2847 done:
2848 		socket_unlock(fd_cb->so, 0);
2849 	}
2850 	FDUNLOCK(fd_cb);
2851 }
2852 
2853 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_t packet,int offset)2854 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_t packet, int offset)
2855 {
2856 	size_t bytes_mem_size;
2857 	size_t child_maps_mem_size;
2858 	size_t nodes_mem_size;
2859 	size_t trie_memory_size = 0;
2860 	int cursor;
2861 	int error = 0;
2862 	struct flow_divert_trie new_trie;
2863 	int insert_error = 0;
2864 	int prefix_count = -1;
2865 	int signing_id_count = 0;
2866 	size_t bytes_count = 0;
2867 	size_t nodes_count = 0;
2868 	size_t maps_count = 0;
2869 
2870 	lck_rw_lock_exclusive(&group->lck);
2871 
2872 	/* Re-set the current trie */
2873 	if (group->signing_id_trie.memory != NULL) {
2874 		kfree_data_addr(group->signing_id_trie.memory);
2875 	}
2876 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
2877 	group->signing_id_trie.root = NULL_TRIE_IDX;
2878 
2879 	memset(&new_trie, 0, sizeof(new_trie));
2880 
2881 	/* Get the number of shared prefixes in the new set of signing ID strings */
2882 	error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
2883 
2884 	if (prefix_count < 0 || error) {
2885 		FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
2886 		lck_rw_done(&group->lck);
2887 		return;
2888 	}
2889 
2890 	/* Compute the number of signing IDs and the total amount of bytes needed to store them */
2891 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
2892 	    cursor >= 0;
2893 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
2894 		uint32_t sid_size = 0;
2895 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
2896 		if (error || sid_size == 0) {
2897 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
2898 			signing_id_count = 0;
2899 			break;
2900 		}
2901 		if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
2902 			FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
2903 			signing_id_count = 0;
2904 			break;
2905 		}
2906 		signing_id_count++;
2907 	}
2908 
2909 	if (signing_id_count == 0) {
2910 		lck_rw_done(&group->lck);
2911 		FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
2912 		return;
2913 	}
2914 
2915 	if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
2916 		lck_rw_done(&group->lck);
2917 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
2918 		return;
2919 	}
2920 
2921 	if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
2922 		lck_rw_done(&group->lck);
2923 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
2924 		return;
2925 	}
2926 
2927 	if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
2928 		lck_rw_done(&group->lck);
2929 		FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
2930 		return;
2931 	}
2932 
2933 	FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
2934 	    nodes_count, maps_count, bytes_count);
2935 
2936 	if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
2937 	    os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
2938 	    os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
2939 	    os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
2940 		FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
2941 		lck_rw_done(&group->lck);
2942 		return;
2943 	}
2944 
2945 	if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
2946 		FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
2947 		lck_rw_done(&group->lck);
2948 		return;
2949 	}
2950 
2951 	new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
2952 	if (new_trie.memory == NULL) {
2953 		FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
2954 		    nodes_mem_size + child_maps_mem_size + bytes_mem_size);
2955 		lck_rw_done(&group->lck);
2956 		return;
2957 	}
2958 
2959 	new_trie.bytes_count = (uint16_t)bytes_count;
2960 	new_trie.nodes_count = (uint16_t)nodes_count;
2961 	new_trie.child_maps_count = (uint16_t)maps_count;
2962 
2963 	/* Initialize the free lists */
2964 	new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
2965 	new_trie.nodes_free_next = 0;
2966 	memset(new_trie.nodes, 0, nodes_mem_size);
2967 
2968 	new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
2969 	new_trie.child_maps_free_next = 0;
2970 	memset(new_trie.child_maps, 0xff, child_maps_mem_size);
2971 
2972 	new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
2973 	new_trie.bytes_free_next = 0;
2974 	memset(new_trie.bytes, 0, bytes_mem_size);
2975 
2976 	/* The root is an empty node */
2977 	new_trie.root = trie_node_alloc(&new_trie);
2978 
2979 	/* Add each signing ID to the trie */
2980 	for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
2981 	    cursor >= 0;
2982 	    cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
2983 		uint32_t sid_size = 0;
2984 		error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
2985 		if (error || sid_size == 0) {
2986 			FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
2987 			insert_error = EINVAL;
2988 			break;
2989 		}
2990 		if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
2991 			uint16_t new_node_idx;
2992 			error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
2993 			if (error) {
2994 				FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
2995 				insert_error = EINVAL;
2996 				break;
2997 			}
2998 			new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
2999 			if (new_node_idx == NULL_TRIE_IDX) {
3000 				insert_error = EINVAL;
3001 				break;
3002 			}
3003 		} else {
3004 			FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3005 			insert_error = ENOBUFS;
3006 			break;
3007 		}
3008 	}
3009 
3010 	if (!insert_error) {
3011 		group->signing_id_trie = new_trie;
3012 	} else {
3013 		kfree_data(new_trie.memory, trie_memory_size);
3014 	}
3015 
3016 	lck_rw_done(&group->lck);
3017 }
3018 
3019 static int
flow_divert_input(mbuf_t packet,struct flow_divert_group * group)3020 flow_divert_input(mbuf_t packet, struct flow_divert_group *group)
3021 {
3022 	struct flow_divert_packet_header        hdr;
3023 	int                                                                     error           = 0;
3024 	struct flow_divert_pcb                          *fd_cb;
3025 
3026 	if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3027 		FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3028 		error = EINVAL;
3029 		goto done;
3030 	}
3031 
3032 	error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3033 	if (error) {
3034 		FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3035 		error = ENOBUFS;
3036 		goto done;
3037 	}
3038 
3039 	hdr.conn_id = ntohl(hdr.conn_id);
3040 
3041 	if (hdr.conn_id == 0) {
3042 		switch (hdr.packet_type) {
3043 		case FLOW_DIVERT_PKT_GROUP_INIT:
3044 			flow_divert_handle_group_init(group, packet, sizeof(hdr));
3045 			break;
3046 		case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3047 			flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3048 			break;
3049 		default:
3050 			FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3051 			break;
3052 		}
3053 		goto done;
3054 	}
3055 
3056 	fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group);             /* This retains the PCB */
3057 	if (fd_cb == NULL) {
3058 		if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3059 			FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3060 		}
3061 		goto done;
3062 	}
3063 
3064 	switch (hdr.packet_type) {
3065 	case FLOW_DIVERT_PKT_CONNECT_RESULT:
3066 		flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3067 		break;
3068 	case FLOW_DIVERT_PKT_CLOSE:
3069 		flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3070 		break;
3071 	case FLOW_DIVERT_PKT_DATA:
3072 		error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3073 		break;
3074 	case FLOW_DIVERT_PKT_READ_NOTIFY:
3075 		flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3076 		break;
3077 	case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3078 		flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3079 		break;
3080 	default:
3081 		FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3082 		break;
3083 	}
3084 
3085 	FDRELEASE(fd_cb);
3086 
3087 done:
3088 	mbuf_freem(packet);
3089 	return error;
3090 }
3091 
3092 static void
flow_divert_close_all(struct flow_divert_group * group)3093 flow_divert_close_all(struct flow_divert_group *group)
3094 {
3095 	struct flow_divert_pcb                  *fd_cb;
3096 	SLIST_HEAD(, flow_divert_pcb)   tmp_list;
3097 
3098 	SLIST_INIT(&tmp_list);
3099 
3100 	lck_rw_lock_exclusive(&group->lck);
3101 
3102 	MBUFQ_DRAIN(&group->send_queue);
3103 
3104 	RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3105 		FDRETAIN(fd_cb);
3106 		SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3107 	}
3108 
3109 	group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3110 
3111 	lck_rw_done(&group->lck);
3112 
3113 	while (!SLIST_EMPTY(&tmp_list)) {
3114 		fd_cb = SLIST_FIRST(&tmp_list);
3115 		FDLOCK(fd_cb);
3116 		SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3117 		if (fd_cb->so != NULL) {
3118 			socket_lock(fd_cb->so, 0);
3119 			flow_divert_pcb_remove(fd_cb);
3120 			flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE);
3121 			fd_cb->so->so_error = ECONNABORTED;
3122 			flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
3123 			socket_unlock(fd_cb->so, 0);
3124 		}
3125 		FDUNLOCK(fd_cb);
3126 		FDRELEASE(fd_cb);
3127 	}
3128 }
3129 
3130 void
flow_divert_detach(struct socket * so)3131 flow_divert_detach(struct socket *so)
3132 {
3133 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3134 
3135 	if (!SO_IS_DIVERTED(so)) {
3136 		return;
3137 	}
3138 
3139 	so->so_flags &= ~SOF_FLOW_DIVERT;
3140 	so->so_fd_pcb = NULL;
3141 
3142 	FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3143 
3144 	if (fd_cb->group != NULL) {
3145 		/* Last-ditch effort to send any buffered data */
3146 		flow_divert_send_buffered_data(fd_cb, TRUE);
3147 
3148 		flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
3149 		flow_divert_send_close_if_needed(fd_cb);
3150 		/* Remove from the group */
3151 		flow_divert_pcb_remove(fd_cb);
3152 	}
3153 
3154 	socket_unlock(so, 0);
3155 	FDLOCK(fd_cb);
3156 	fd_cb->so = NULL;
3157 	FDUNLOCK(fd_cb);
3158 	socket_lock(so, 0);
3159 
3160 	FDRELEASE(fd_cb);       /* Release the socket's reference */
3161 }
3162 
3163 static int
flow_divert_close(struct socket * so)3164 flow_divert_close(struct socket *so)
3165 {
3166 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3167 
3168 	if (!SO_IS_DIVERTED(so)) {
3169 		return EINVAL;
3170 	}
3171 
3172 	FDLOG0(LOG_INFO, fd_cb, "Closing");
3173 
3174 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3175 		soisdisconnecting(so);
3176 		sbflush(&so->so_rcv);
3177 	}
3178 
3179 	flow_divert_send_buffered_data(fd_cb, TRUE);
3180 	flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE);
3181 	flow_divert_send_close_if_needed(fd_cb);
3182 
3183 	/* Remove from the group */
3184 	flow_divert_pcb_remove(fd_cb);
3185 
3186 	return 0;
3187 }
3188 
3189 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3190 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3191     sae_connid_t cid __unused)
3192 {
3193 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3194 		return EINVAL;
3195 	}
3196 
3197 	return flow_divert_close(so);
3198 }
3199 
3200 static int
flow_divert_shutdown(struct socket * so)3201 flow_divert_shutdown(struct socket *so)
3202 {
3203 	struct flow_divert_pcb  *fd_cb          = so->so_fd_pcb;
3204 
3205 	if (!SO_IS_DIVERTED(so)) {
3206 		return EINVAL;
3207 	}
3208 
3209 	FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3210 
3211 	socantsendmore(so);
3212 
3213 	flow_divert_update_closed_state(fd_cb, SHUT_WR, FALSE);
3214 	flow_divert_send_close_if_needed(fd_cb);
3215 
3216 	return 0;
3217 }
3218 
3219 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3220 flow_divert_rcvd(struct socket *so, int flags __unused)
3221 {
3222 	struct flow_divert_pcb  *fd_cb = so->so_fd_pcb;
3223 	int space = 0;
3224 
3225 	if (!SO_IS_DIVERTED(so)) {
3226 		return EINVAL;
3227 	}
3228 
3229 	space = sbspace(&so->so_rcv);
3230 	FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3231 	if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3232 	    (space > 0) &&
3233 	    flow_divert_send_read_notification(fd_cb) == 0) {
3234 		FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3235 		fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3236 	}
3237 
3238 	return 0;
3239 }
3240 
3241 static int
flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet,struct sockaddr * toaddr)3242 flow_divert_append_target_endpoint_tlv(mbuf_t connect_packet, struct sockaddr *toaddr)
3243 {
3244 	int error = 0;
3245 	int port  = 0;
3246 
3247 	if (!flow_divert_is_sockaddr_valid(toaddr)) {
3248 		FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3249 		error = EINVAL;
3250 		goto done;
3251 	}
3252 
3253 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, toaddr);
3254 	if (error) {
3255 		goto done;
3256 	}
3257 
3258 	if (toaddr->sa_family == AF_INET) {
3259 		port = ntohs((satosin(toaddr))->sin_port);
3260 	} else {
3261 		port = ntohs((satosin6(toaddr))->sin6_port);
3262 	}
3263 
3264 	error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3265 	if (error) {
3266 		goto done;
3267 	}
3268 
3269 done:
3270 	return error;
3271 }
3272 
3273 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_t buffer)3274 flow_divert_get_buffered_target_address(mbuf_t buffer)
3275 {
3276 	if (buffer != NULL && buffer->m_type == MT_SONAME) {
3277 		struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3278 		if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3279 			return toaddr;
3280 		}
3281 	}
3282 	return NULL;
3283 }
3284 
3285 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3286 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3287 {
3288 	switch (addr->sa_family) {
3289 	case AF_INET:
3290 		if (addr->sa_len < sizeof(struct sockaddr_in)) {
3291 			return FALSE;
3292 		}
3293 		break;
3294 	case AF_INET6:
3295 		if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3296 			return FALSE;
3297 		}
3298 		break;
3299 	default:
3300 		return FALSE;
3301 	}
3302 	return TRUE;
3303 }
3304 
3305 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3306 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3307     struct sockaddr **dup)
3308 {
3309 	int                                             error           = 0;
3310 	struct sockaddr                 *result;
3311 	struct sockaddr_storage ss;
3312 
3313 	if (addr != NULL) {
3314 		result = addr;
3315 	} else {
3316 		memset(&ss, 0, sizeof(ss));
3317 		ss.ss_family = family;
3318 		if (ss.ss_family == AF_INET) {
3319 			ss.ss_len = sizeof(struct sockaddr_in);
3320 		} else if (ss.ss_family == AF_INET6) {
3321 			ss.ss_len = sizeof(struct sockaddr_in6);
3322 		} else {
3323 			error = EINVAL;
3324 		}
3325 		result = (struct sockaddr *)&ss;
3326 	}
3327 
3328 	if (!error) {
3329 		*dup = dup_sockaddr(result, 1);
3330 		if (*dup == NULL) {
3331 			error = ENOBUFS;
3332 		}
3333 	}
3334 
3335 	return error;
3336 }
3337 
3338 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected)3339 flow_divert_disconnect_socket(struct socket *so, bool is_connected)
3340 {
3341 	if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3342 		soisdisconnected(so);
3343 	}
3344 	if (SOCK_TYPE(so) == SOCK_DGRAM) {
3345 		struct inpcb *inp = sotoinpcb(so);
3346 		if (inp != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
3347 			/*
3348 			 * Let NetworkStatistics know this PCB is going away
3349 			 * before we detach it.
3350 			 */
3351 			if (nstat_collect && (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
3352 				nstat_pcb_detach(inp);
3353 			}
3354 
3355 			if (SOCK_DOM(so) == PF_INET6) {
3356 				ROUTE_RELEASE(&inp->in6p_route);
3357 			} else {
3358 				ROUTE_RELEASE(&inp->inp_route);
3359 			}
3360 			inp->inp_state = INPCB_STATE_DEAD;
3361 			/* makes sure we're not called twice from so_close */
3362 			so->so_flags |= SOF_PCBCLEARING;
3363 			inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
3364 		}
3365 	}
3366 }
3367 
3368 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3369 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3370 {
3371 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3372 
3373 	if (!SO_IS_DIVERTED(so)) {
3374 		return EINVAL;
3375 	}
3376 
3377 	if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3378 		if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3379 			flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3380 		}
3381 	}
3382 
3383 	if (SOCK_DOM(so) == PF_INET) {
3384 		return g_tcp_protosw->pr_ctloutput(so, sopt);
3385 	} else if (SOCK_DOM(so) == PF_INET6) {
3386 		return g_tcp6_protosw->pr_ctloutput(so, sopt);
3387 	}
3388 	return 0;
3389 }
3390 
3391 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3392 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3393 {
3394 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3395 	int                                             error   = 0;
3396 	struct inpcb                    *inp    = sotoinpcb(so);
3397 	struct sockaddr_in              *sinp;
3398 	mbuf_t                                  connect_packet = NULL;
3399 	int                                             do_send = 1;
3400 
3401 	if (!SO_IS_DIVERTED(so)) {
3402 		return EINVAL;
3403 	}
3404 
3405 	if (fd_cb->group == NULL) {
3406 		error = ENETUNREACH;
3407 		goto done;
3408 	}
3409 
3410 	if (inp == NULL) {
3411 		error = EINVAL;
3412 		goto done;
3413 	} else if (inp->inp_state == INPCB_STATE_DEAD) {
3414 		if (so->so_error) {
3415 			error = so->so_error;
3416 			so->so_error = 0;
3417 		} else {
3418 			error = EINVAL;
3419 		}
3420 		goto done;
3421 	}
3422 
3423 	if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3424 		error = EALREADY;
3425 		goto done;
3426 	}
3427 
3428 	FDLOG0(LOG_INFO, fd_cb, "Connecting");
3429 
3430 	if (fd_cb->connect_packet == NULL) {
3431 		struct sockaddr_in sin = {};
3432 		struct ifnet *ifp = NULL;
3433 
3434 		if (to == NULL) {
3435 			FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3436 			error = EINVAL;
3437 			goto done;
3438 		}
3439 
3440 		if (!flow_divert_is_sockaddr_valid(to)) {
3441 			FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3442 			error = EINVAL;
3443 			goto done;
3444 		}
3445 
3446 		fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3447 		if (fd_cb->original_remote_endpoint == NULL) {
3448 			FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3449 			error = ENOMEM;
3450 			goto done;
3451 		}
3452 		fd_cb->original_vflag = inp->inp_vflag;
3453 		fd_cb->original_last_outifp = inp->inp_last_outifp;
3454 		fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3455 
3456 		sinp = (struct sockaddr_in *)(void *)to;
3457 		if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3458 			error = EAFNOSUPPORT;
3459 			goto done;
3460 		}
3461 
3462 		if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3463 			struct sockaddr_in6 sin6 = {};
3464 			sin6.sin6_family = AF_INET6;
3465 			sin6.sin6_len = sizeof(struct sockaddr_in6);
3466 			sin6.sin6_port = satosin6(to)->sin6_port;
3467 			sin6.sin6_addr = satosin6(to)->sin6_addr;
3468 			if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3469 				in6_sin6_2_sin(&sin, &sin6);
3470 				to = (struct sockaddr *)&sin;
3471 			}
3472 		}
3473 
3474 		if (to->sa_family == AF_INET6) {
3475 			struct sockaddr_in6 *to6 = satosin6(to);
3476 
3477 			inp->inp_vflag &= ~INP_IPV4;
3478 			inp->inp_vflag |= INP_IPV6;
3479 			fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3480 			fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3481 			fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3482 			error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3483 			if (error) {
3484 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3485 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3486 					error = 0;
3487 				} else {
3488 					goto done;
3489 				}
3490 			}
3491 			if (ifp != NULL) {
3492 				inp->in6p_last_outifp = ifp;
3493 				ifnet_release(ifp);
3494 			}
3495 
3496 			if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3497 			    in6_embedded_scope &&
3498 			    fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3499 				fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3500 				fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3501 			}
3502 
3503 			if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3504 			    in6_embedded_scope &&
3505 			    to6->sin6_addr.s6_addr16[1] != 0) {
3506 				to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3507 				to6->sin6_addr.s6_addr16[1] = 0;
3508 			}
3509 		} else if (to->sa_family == AF_INET) {
3510 			inp->inp_vflag |= INP_IPV4;
3511 			inp->inp_vflag &= ~INP_IPV6;
3512 			fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3513 			fd_cb->local_endpoint.sin.sin_family = AF_INET;
3514 			fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3515 			error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3516 			if (error) {
3517 				FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3518 				if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3519 					error = 0;
3520 				} else {
3521 					goto done;
3522 				}
3523 			}
3524 			if (ifp != NULL) {
3525 				inp->inp_last_outifp = ifp;
3526 				ifnet_release(ifp);
3527 			}
3528 		} else {
3529 			FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3530 		}
3531 
3532 		error = flow_divert_check_no_cellular(fd_cb) ||
3533 		    flow_divert_check_no_expensive(fd_cb) ||
3534 		    flow_divert_check_no_constrained(fd_cb);
3535 		if (error) {
3536 			goto done;
3537 		}
3538 
3539 		if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3540 		    !implicit || /* connect() was called or */
3541 		    ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3542 		    ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3543 			fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3544 		}
3545 
3546 		error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3547 		if (error) {
3548 			goto done;
3549 		}
3550 
3551 		if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3552 			flow_divert_set_remote_endpoint(fd_cb, to);
3553 			flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3554 		}
3555 
3556 		if (implicit) {
3557 			fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3558 		}
3559 
3560 		if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3561 			FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3562 			do_send = 0;
3563 		}
3564 
3565 		fd_cb->connect_packet = connect_packet;
3566 		connect_packet = NULL;
3567 	} else {
3568 		FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3569 	}
3570 
3571 	if (do_send) {
3572 		error = flow_divert_send_connect_packet(fd_cb);
3573 		if (error) {
3574 			goto done;
3575 		}
3576 
3577 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3578 	}
3579 
3580 	if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3581 		soisconnected(so);
3582 	} else {
3583 		soisconnecting(so);
3584 	}
3585 
3586 done:
3587 	return error;
3588 }
3589 
3590 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3591 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3592 {
3593 #if CONTENT_FILTER
3594 	if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3595 		int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3596 		if (error != 0) {
3597 			struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3598 			FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3599 			return error;
3600 		}
3601 	}
3602 #endif /* CONTENT_FILTER */
3603 
3604 	return flow_divert_connect_out_internal(so, to, p, false);
3605 }
3606 
3607 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3608 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3609     struct proc *p, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3610 {
3611 	struct inpcb *inp = sotoinpcb(so);
3612 	int error;
3613 
3614 	if (inp == NULL) {
3615 		return EINVAL;
3616 	}
3617 
3618 	VERIFY(dst != NULL);
3619 
3620 #if CONTENT_FILTER && NECP
3621 	struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3622 	if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3623 	    SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3624 		inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3625 	}
3626 #endif /* CONTENT_FILTER */
3627 
3628 	error = flow_divert_connect_out(so, dst, p);
3629 
3630 	if (error != 0) {
3631 		return error;
3632 	}
3633 
3634 	/* if there is data, send it */
3635 	if (auio != NULL) {
3636 		user_ssize_t datalen = 0;
3637 
3638 		socket_unlock(so, 0);
3639 
3640 		VERIFY(bytes_written != NULL);
3641 
3642 		datalen = uio_resid(auio);
3643 		error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3644 		socket_lock(so, 0);
3645 
3646 		if (error == 0 || error == EWOULDBLOCK) {
3647 			*bytes_written = datalen - uio_resid(auio);
3648 		}
3649 
3650 		/*
3651 		 * sosend returns EWOULDBLOCK if it's a non-blocking
3652 		 * socket or a timeout occured (this allows to return
3653 		 * the amount of queued data through sendit()).
3654 		 *
3655 		 * However, connectx() returns EINPROGRESS in case of a
3656 		 * blocking socket. So we change the return value here.
3657 		 */
3658 		if (error == EWOULDBLOCK) {
3659 			error = EINPROGRESS;
3660 		}
3661 	}
3662 
3663 	if (error == 0 && pcid != NULL) {
3664 		*pcid = 1;      /* there is only 1 connection for a TCP */
3665 	}
3666 
3667 	return error;
3668 }
3669 
3670 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope __unused,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3671 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3672     struct sockaddr *dst, struct proc *p, uint32_t ifscope __unused,
3673     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3674     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3675 {
3676 	return flow_divert_connectx_out_common(so, dst, p, pcid, uio, bytes_written);
3677 }
3678 
3679 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope __unused,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3680 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3681     struct sockaddr *dst, struct proc *p, uint32_t ifscope __unused,
3682     sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3683     uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3684 {
3685 	return flow_divert_connectx_out_common(so, dst, p, pcid, uio, bytes_written);
3686 }
3687 
3688 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)3689 flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
3690 {
3691 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3692 	int                                             error   = 0;
3693 	struct inpcb *inp;
3694 #if CONTENT_FILTER
3695 	struct m_tag *cfil_tag = NULL;
3696 #endif
3697 
3698 	if (!SO_IS_DIVERTED(so)) {
3699 		return EINVAL;
3700 	}
3701 
3702 	inp = sotoinpcb(so);
3703 	if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3704 		error = ECONNRESET;
3705 		goto done;
3706 	}
3707 
3708 	if (control && mbuf_len(control) > 0) {
3709 		error = EINVAL;
3710 		goto done;
3711 	}
3712 
3713 	if (flags & MSG_OOB) {
3714 		error = EINVAL;
3715 		goto done; /* We don't support OOB data */
3716 	}
3717 
3718 	if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
3719 		/* The provider considers this datagram flow to be closed, so no data can be sent */
3720 		FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
3721 		error = EHOSTUNREACH;
3722 		goto done;
3723 	}
3724 
3725 #if CONTENT_FILTER
3726 	/*
3727 	 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
3728 	 * retrieve the CFIL saved remote address from the mbuf and use it.
3729 	 */
3730 	if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
3731 		struct sockaddr *cfil_faddr = NULL;
3732 		cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
3733 		if (cfil_tag) {
3734 			to = (struct sockaddr *)(void *)cfil_faddr;
3735 		}
3736 		FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
3737 	}
3738 #endif
3739 
3740 	/* Implicit connect */
3741 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
3742 		FDLOG0(LOG_INFO, fd_cb, "implicit connect");
3743 
3744 		error = flow_divert_connect_out_internal(so, to, p, true);
3745 		if (error) {
3746 			goto done;
3747 		}
3748 	} else {
3749 		error = flow_divert_check_no_cellular(fd_cb) ||
3750 		    flow_divert_check_no_expensive(fd_cb) ||
3751 		    flow_divert_check_no_constrained(fd_cb);
3752 		if (error) {
3753 			goto done;
3754 		}
3755 	}
3756 
3757 	FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", mbuf_pkthdr_len(data));
3758 
3759 	fd_cb->bytes_written_by_app += mbuf_pkthdr_len(data);
3760 	error = flow_divert_send_app_data(fd_cb, data, to);
3761 
3762 	data = NULL;
3763 
3764 	if (error) {
3765 		goto done;
3766 	}
3767 
3768 	if (flags & PRUS_EOF) {
3769 		flow_divert_shutdown(so);
3770 	}
3771 
3772 done:
3773 	if (data) {
3774 		mbuf_freem(data);
3775 	}
3776 	if (control) {
3777 		mbuf_free(control);
3778 	}
3779 #if CONTENT_FILTER
3780 	if (cfil_tag) {
3781 		m_tag_free(cfil_tag);
3782 	}
3783 #endif
3784 
3785 	return error;
3786 }
3787 
3788 static int
flow_divert_preconnect(struct socket * so)3789 flow_divert_preconnect(struct socket *so)
3790 {
3791 	int error = 0;
3792 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3793 
3794 	if (!SO_IS_DIVERTED(so)) {
3795 		return EINVAL;
3796 	}
3797 
3798 	if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
3799 		FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
3800 		error = flow_divert_send_connect_packet(so->so_fd_pcb);
3801 		if (error) {
3802 			return error;
3803 		}
3804 
3805 		fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3806 	}
3807 
3808 	soclearfastopen(so);
3809 
3810 	return error;
3811 }
3812 
3813 static void
flow_divert_set_protosw(struct socket * so)3814 flow_divert_set_protosw(struct socket *so)
3815 {
3816 	if (SOCK_DOM(so) == PF_INET) {
3817 		so->so_proto = &g_flow_divert_in_protosw;
3818 	} else {
3819 		so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
3820 	}
3821 }
3822 
3823 static void
flow_divert_set_udp_protosw(struct socket * so)3824 flow_divert_set_udp_protosw(struct socket *so)
3825 {
3826 	if (SOCK_DOM(so) == PF_INET) {
3827 		so->so_proto = &g_flow_divert_in_udp_protosw;
3828 	} else {
3829 		so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
3830 	}
3831 }
3832 
3833 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_t data,struct sockaddr * to,mbuf_t control,struct proc * p)3834 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p)
3835 {
3836 	struct flow_divert_pcb  *fd_cb  = so->so_fd_pcb;
3837 	struct inpcb *inp;
3838 	int error = 0;
3839 
3840 	inp = sotoinpcb(so);
3841 	if (inp == NULL) {
3842 		return EINVAL;
3843 	}
3844 
3845 	if (fd_cb == NULL) {
3846 		error = flow_divert_pcb_init(so);
3847 		fd_cb  = so->so_fd_pcb;
3848 		if (error != 0 || fd_cb == NULL) {
3849 			goto done;
3850 		}
3851 	}
3852 	return flow_divert_data_out(so, flags, data, to, control, p);
3853 
3854 done:
3855 	if (data) {
3856 		mbuf_freem(data);
3857 	}
3858 	if (control) {
3859 		mbuf_free(control);
3860 	}
3861 
3862 	return error;
3863 }
3864 
3865 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)3866 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
3867 {
3868 	errno_t error = 0;
3869 	struct flow_divert_pcb *fd_cb = NULL;
3870 	uint32_t agg_unit = aggregate_unit;
3871 	uint32_t policy_control_unit = ctl_unit;
3872 	bool is_aggregate = false;
3873 
3874 	if (so->so_flags & SOF_FLOW_DIVERT) {
3875 		return EALREADY;
3876 	}
3877 
3878 	fd_cb = flow_divert_pcb_create(so);
3879 	if (fd_cb == NULL) {
3880 		return ENOMEM;
3881 	}
3882 
3883 	do {
3884 		uint32_t group_unit = flow_divert_derive_kernel_control_unit(&policy_control_unit, &agg_unit, &is_aggregate);
3885 		if (group_unit == 0 || group_unit >= GROUP_COUNT_MAX) {
3886 			FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
3887 			error = EINVAL;
3888 			break;
3889 		}
3890 
3891 		error = flow_divert_add_to_group(fd_cb, group_unit);
3892 		if (error == 0) {
3893 			so->so_fd_pcb = fd_cb;
3894 			so->so_flags |= SOF_FLOW_DIVERT;
3895 			fd_cb->control_group_unit = group_unit;
3896 			fd_cb->policy_control_unit = ctl_unit;
3897 			fd_cb->aggregate_unit = agg_unit;
3898 			if (is_aggregate) {
3899 				fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
3900 			} else {
3901 				fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
3902 			}
3903 
3904 			if (SOCK_TYPE(so) == SOCK_STREAM) {
3905 				flow_divert_set_protosw(so);
3906 			} else if (SOCK_TYPE(so) == SOCK_DGRAM) {
3907 				flow_divert_set_udp_protosw(so);
3908 			}
3909 
3910 			FDLOG0(LOG_INFO, fd_cb, "Created");
3911 		} else if (error != ENOENT) {
3912 			FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
3913 		}
3914 	} while (error == ENOENT);
3915 
3916 	if (error != 0) {
3917 		FDRELEASE(fd_cb);
3918 	}
3919 
3920 	return error;
3921 }
3922 
3923 errno_t
flow_divert_pcb_init(struct socket * so)3924 flow_divert_pcb_init(struct socket *so)
3925 {
3926 	struct inpcb *inp = sotoinpcb(so);
3927 	uint32_t aggregate_units = 0;
3928 	uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
3929 	return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
3930 }
3931 
3932 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)3933 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
3934 {
3935 	uint32_t ctl_unit = 0;
3936 	uint32_t key_unit = 0;
3937 	uint32_t aggregate_unit = 0;
3938 	int error = 0;
3939 	int hmac_error = 0;
3940 	mbuf_t token = NULL;
3941 
3942 	if (so->so_flags & SOF_FLOW_DIVERT) {
3943 		error = EALREADY;
3944 		goto done;
3945 	}
3946 
3947 	if (g_init_result) {
3948 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
3949 		error = ENOPROTOOPT;
3950 		goto done;
3951 	}
3952 
3953 	if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
3954 	    (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
3955 	    (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
3956 		error = EINVAL;
3957 		goto done;
3958 	} else {
3959 		if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
3960 			struct tcpcb *tp = sototcpcb(so);
3961 			if (tp == NULL || tp->t_state != TCPS_CLOSED) {
3962 				error = EINVAL;
3963 				goto done;
3964 			}
3965 		}
3966 	}
3967 
3968 	error = soopt_getm(sopt, &token);
3969 	if (error) {
3970 		token = NULL;
3971 		goto done;
3972 	}
3973 
3974 	error = soopt_mcopyin(sopt, token);
3975 	if (error) {
3976 		token = NULL;
3977 		goto done;
3978 	}
3979 
3980 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
3981 	if (!error) {
3982 		key_unit = ntohl(key_unit);
3983 		if (key_unit >= GROUP_COUNT_MAX) {
3984 			key_unit = 0;
3985 		}
3986 	} else if (error != ENOENT) {
3987 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
3988 		goto done;
3989 	} else {
3990 		key_unit = 0;
3991 	}
3992 
3993 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
3994 	if (error) {
3995 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
3996 		goto done;
3997 	}
3998 
3999 	error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4000 	if (error && error != ENOENT) {
4001 		FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4002 		goto done;
4003 	}
4004 
4005 	/* A valid kernel control unit is required */
4006 	ctl_unit = ntohl(ctl_unit);
4007 	aggregate_unit = ntohl(aggregate_unit);
4008 
4009 	if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4010 		hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4011 		if (hmac_error && hmac_error != ENOENT) {
4012 			FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4013 			error = hmac_error;
4014 			goto done;
4015 		}
4016 	}
4017 
4018 	error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4019 	if (error == 0) {
4020 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4021 		int log_level = LOG_NOTICE;
4022 
4023 		error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4024 		if (error == 0) {
4025 			fd_cb->log_level = (uint8_t)log_level;
4026 		}
4027 		error = 0;
4028 
4029 		fd_cb->connect_token = token;
4030 		token = NULL;
4031 
4032 		fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4033 	}
4034 
4035 	if (hmac_error == 0) {
4036 		struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4037 		if (fd_cb != NULL) {
4038 			fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4039 		}
4040 	}
4041 
4042 done:
4043 	if (token != NULL) {
4044 		mbuf_freem(token);
4045 	}
4046 
4047 	return error;
4048 }
4049 
4050 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4051 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4052 {
4053 	uint32_t                                        ctl_unit;
4054 	int                                                     error                                           = 0;
4055 	uint8_t                                         hmac[SHA_DIGEST_LENGTH];
4056 	struct flow_divert_pcb          *fd_cb                                          = so->so_fd_pcb;
4057 	mbuf_t                                          token                                           = NULL;
4058 	struct flow_divert_group        *control_group                          = NULL;
4059 
4060 	if (!SO_IS_DIVERTED(so)) {
4061 		error = EINVAL;
4062 		goto done;
4063 	}
4064 
4065 	if (fd_cb->group == NULL) {
4066 		error = EINVAL;
4067 		goto done;
4068 	}
4069 
4070 	error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4071 	if (error) {
4072 		FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4073 		goto done;
4074 	}
4075 
4076 	ctl_unit = htonl(fd_cb->group->ctl_unit);
4077 
4078 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4079 	if (error) {
4080 		goto done;
4081 	}
4082 
4083 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4084 	if (error) {
4085 		goto done;
4086 	}
4087 
4088 	if (fd_cb->app_data != NULL) {
4089 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4090 		if (error) {
4091 			goto done;
4092 		}
4093 	}
4094 
4095 	control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4096 	if (control_group != NULL) {
4097 		lck_rw_lock_shared(&control_group->lck);
4098 		ctl_unit = htonl(control_group->ctl_unit);
4099 		error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4100 		if (!error) {
4101 			error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4102 		}
4103 		lck_rw_done(&control_group->lck);
4104 		FDGRP_RELEASE(control_group);
4105 	} else {
4106 		error = ENOPROTOOPT;
4107 	}
4108 
4109 	if (error) {
4110 		goto done;
4111 	}
4112 
4113 	error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4114 	if (error) {
4115 		goto done;
4116 	}
4117 
4118 	if (sopt->sopt_val == USER_ADDR_NULL) {
4119 		/* If the caller passed NULL to getsockopt, just set the size of the token and return */
4120 		sopt->sopt_valsize = mbuf_pkthdr_len(token);
4121 		goto done;
4122 	}
4123 
4124 	error = soopt_mcopyout(sopt, token);
4125 	if (error) {
4126 		token = NULL;   /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4127 		goto done;
4128 	}
4129 
4130 done:
4131 	if (token != NULL) {
4132 		mbuf_freem(token);
4133 	}
4134 
4135 	return error;
4136 }
4137 
4138 void
flow_divert_group_destroy(struct flow_divert_group * group)4139 flow_divert_group_destroy(struct flow_divert_group *group)
4140 {
4141 	lck_rw_lock_exclusive(&group->lck);
4142 
4143 	FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4144 
4145 	if (group->token_key != NULL) {
4146 		memset(group->token_key, 0, group->token_key_size);
4147 		kfree_data(group->token_key, group->token_key_size);
4148 		group->token_key = NULL;
4149 		group->token_key_size = 0;
4150 	}
4151 
4152 	/* Re-set the current trie */
4153 	if (group->signing_id_trie.memory != NULL) {
4154 		kfree_data_addr(group->signing_id_trie.memory);
4155 	}
4156 	memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4157 	group->signing_id_trie.root = NULL_TRIE_IDX;
4158 
4159 	lck_rw_done(&group->lck);
4160 
4161 	zfree(flow_divert_group_zone, group);
4162 }
4163 
4164 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4165 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4166 {
4167 	struct flow_divert_group        *new_group      = NULL;
4168 	int                             error           = 0;
4169 
4170 	if (sac->sc_unit >= GROUP_COUNT_MAX) {
4171 		error = EINVAL;
4172 		goto done;
4173 	}
4174 
4175 	*unitinfo = NULL;
4176 
4177 	new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4178 	lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4179 	RB_INIT(&new_group->pcb_tree);
4180 	new_group->ctl_unit = sac->sc_unit;
4181 	MBUFQ_INIT(&new_group->send_queue);
4182 	new_group->signing_id_trie.root = NULL_TRIE_IDX;
4183 	new_group->ref_count = 1;
4184 
4185 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4186 
4187 	if (g_flow_divert_groups == NULL) {
4188 		g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4189 		    GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4190 	}
4191 
4192 	if (g_flow_divert_groups[sac->sc_unit] != NULL) {
4193 		error = EALREADY;
4194 	} else {
4195 		g_flow_divert_groups[sac->sc_unit] = new_group;
4196 		g_active_group_count++;
4197 	}
4198 
4199 	lck_rw_done(&g_flow_divert_group_lck);
4200 
4201 done:
4202 	if (error == 0) {
4203 		*unitinfo = new_group;
4204 	} else if (new_group != NULL) {
4205 		zfree(flow_divert_group_zone, new_group);
4206 	}
4207 	return error;
4208 }
4209 
4210 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4211 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4212 {
4213 	struct flow_divert_group        *group  = NULL;
4214 	errno_t                                         error   = 0;
4215 
4216 	if (unit >= GROUP_COUNT_MAX) {
4217 		return EINVAL;
4218 	}
4219 
4220 	if (unitinfo == NULL) {
4221 		return 0;
4222 	}
4223 
4224 	FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4225 
4226 	lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4227 
4228 	if (g_flow_divert_groups == NULL || g_active_group_count == 0) {
4229 		panic("flow divert group %u is disconnecting, but no groups are active (groups = %p, active count = %u", unit,
4230 		    g_flow_divert_groups, g_active_group_count);
4231 	}
4232 
4233 	group = g_flow_divert_groups[unit];
4234 
4235 	if (group != (struct flow_divert_group *)unitinfo) {
4236 		panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4237 	}
4238 
4239 	g_flow_divert_groups[unit] = NULL;
4240 	g_active_group_count--;
4241 
4242 	if (g_active_group_count == 0) {
4243 		kfree_type(struct flow_divert_group *,
4244 		    GROUP_COUNT_MAX, g_flow_divert_groups);
4245 		g_flow_divert_groups = NULL;
4246 	}
4247 
4248 	lck_rw_done(&g_flow_divert_group_lck);
4249 
4250 	if (group != NULL) {
4251 		flow_divert_close_all(group);
4252 		FDGRP_RELEASE(group);
4253 	} else {
4254 		error = EINVAL;
4255 	}
4256 
4257 	return error;
4258 }
4259 
4260 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_t m,__unused int flags)4261 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_t m, __unused int flags)
4262 {
4263 	errno_t error = 0;
4264 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4265 	if (group != NULL) {
4266 		error = flow_divert_input(m, group);
4267 		FDGRP_RELEASE(group);
4268 	} else {
4269 		error = ENOENT;
4270 	}
4271 	return error;
4272 }
4273 
4274 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4275 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4276 {
4277 	struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4278 	if (group == NULL) {
4279 		return;
4280 	}
4281 
4282 	if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4283 		struct flow_divert_pcb                  *fd_cb;
4284 		SLIST_HEAD(, flow_divert_pcb)   tmp_list;
4285 
4286 		lck_rw_lock_exclusive(&group->lck);
4287 
4288 		while (!MBUFQ_EMPTY(&group->send_queue)) {
4289 			mbuf_t next_packet;
4290 			FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4291 			next_packet = MBUFQ_FIRST(&group->send_queue);
4292 			int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4293 			if (error) {
4294 				FDLOG(LOG_DEBUG, &nil_pcb, "ctl_enqueuembuf returned an error: %d", error);
4295 				OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4296 				lck_rw_done(&group->lck);
4297 				return;
4298 			}
4299 			MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4300 		}
4301 
4302 		SLIST_INIT(&tmp_list);
4303 
4304 		RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4305 			FDRETAIN(fd_cb);
4306 			SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4307 		}
4308 
4309 		lck_rw_done(&group->lck);
4310 
4311 		SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4312 			FDLOCK(fd_cb);
4313 			if (fd_cb->so != NULL) {
4314 				socket_lock(fd_cb->so, 0);
4315 				if (fd_cb->group != NULL) {
4316 					flow_divert_send_buffered_data(fd_cb, FALSE);
4317 				}
4318 				socket_unlock(fd_cb->so, 0);
4319 			}
4320 			FDUNLOCK(fd_cb);
4321 			FDRELEASE(fd_cb);
4322 		}
4323 	}
4324 
4325 	FDGRP_RELEASE(group);
4326 }
4327 
4328 static int
flow_divert_kctl_init(void)4329 flow_divert_kctl_init(void)
4330 {
4331 	struct kern_ctl_reg     ctl_reg;
4332 	int                     result;
4333 
4334 	memset(&ctl_reg, 0, sizeof(ctl_reg));
4335 
4336 	strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4337 	ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4338 	ctl_reg.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
4339 	ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4340 
4341 	ctl_reg.ctl_connect = flow_divert_kctl_connect;
4342 	ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4343 	ctl_reg.ctl_send = flow_divert_kctl_send;
4344 	ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4345 
4346 	result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4347 
4348 	if (result) {
4349 		FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4350 		return result;
4351 	}
4352 
4353 	return 0;
4354 }
4355 
4356 void
flow_divert_init(void)4357 flow_divert_init(void)
4358 {
4359 	memset(&nil_pcb, 0, sizeof(nil_pcb));
4360 	nil_pcb.log_level = LOG_NOTICE;
4361 
4362 	g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4363 
4364 	VERIFY(g_tcp_protosw != NULL);
4365 
4366 	memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4367 	memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4368 
4369 	g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4370 	g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4371 	g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4372 	g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4373 	g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4374 	g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4375 	g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4376 	g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4377 
4378 	g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4379 	g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4380 
4381 	/*
4382 	 * Socket filters shouldn't attach/detach to/from this protosw
4383 	 * since pr_protosw is to be used instead, which points to the
4384 	 * real protocol; if they do, it is a bug and we should panic.
4385 	 */
4386 	g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4387 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4388 	g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4389 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4390 
4391 	/* UDP */
4392 	g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4393 	VERIFY(g_udp_protosw != NULL);
4394 
4395 	memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4396 	memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4397 
4398 	g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4399 	g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4400 	g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4401 	g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4402 	g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4403 	g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4404 	g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4405 	g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4406 	g_flow_divert_in_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp;
4407 	g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4408 
4409 	g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4410 	g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4411 
4412 	/*
4413 	 * Socket filters shouldn't attach/detach to/from this protosw
4414 	 * since pr_protosw is to be used instead, which points to the
4415 	 * real protocol; if they do, it is a bug and we should panic.
4416 	 */
4417 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4418 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4419 	g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4420 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4421 
4422 	g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4423 
4424 	VERIFY(g_tcp6_protosw != NULL);
4425 
4426 	memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4427 	memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4428 
4429 	g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4430 	g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4431 	g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4432 	g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4433 	g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4434 	g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4435 	g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4436 	g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4437 
4438 	g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4439 	g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4440 	/*
4441 	 * Socket filters shouldn't attach/detach to/from this protosw
4442 	 * since pr_protosw is to be used instead, which points to the
4443 	 * real protocol; if they do, it is a bug and we should panic.
4444 	 */
4445 	g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4446 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4447 	g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4448 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4449 
4450 	/* UDP6 */
4451 	g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4452 
4453 	VERIFY(g_udp6_protosw != NULL);
4454 
4455 	memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4456 	memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4457 
4458 	g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4459 	g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4460 	g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4461 	g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4462 	g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4463 	g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4464 	g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4465 	g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4466 	g_flow_divert_in6_udp_usrreqs.pru_soreceive_list = pru_soreceive_list_notsupp;
4467 	g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4468 
4469 	g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4470 	g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4471 	/*
4472 	 * Socket filters shouldn't attach/detach to/from this protosw
4473 	 * since pr_protosw is to be used instead, which points to the
4474 	 * real protocol; if they do, it is a bug and we should panic.
4475 	 */
4476 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4477 	    (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
4478 	g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4479 	    (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
4480 
4481 	g_init_result = flow_divert_kctl_init();
4482 	if (g_init_result) {
4483 		goto done;
4484 	}
4485 
4486 done:
4487 	if (g_init_result != 0) {
4488 		if (g_flow_divert_kctl_ref != NULL) {
4489 			ctl_deregister(g_flow_divert_kctl_ref);
4490 			g_flow_divert_kctl_ref = NULL;
4491 		}
4492 	}
4493 }
4494