1 /*
2 * Copyright (c) 2012-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <sys/file_internal.h>
46 #include <sys/kauth.h>
47 #include <libkern/tree.h>
48 #include <kern/locks.h>
49 #include <kern/debug.h>
50 #include <kern/task.h>
51 #include <mach/task_info.h>
52 #include <net/if_var.h>
53 #include <net/route.h>
54 #include <net/flowhash.h>
55 #include <net/ntstat.h>
56 #include <net/content_filter.h>
57 #include <net/necp.h>
58 #include <netinet/in.h>
59 #include <netinet/in_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/flow_divert.h>
64 #include <netinet/flow_divert_proto.h>
65 #include <netinet6/in6_pcb.h>
66 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
68 #include <libkern/crypto/sha1.h>
69 #include <libkern/crypto/crypto_internal.h>
70 #include <os/log.h>
71 #include <corecrypto/cc.h>
72 #include <net/sockaddr_utils.h>
73 #if CONTENT_FILTER
74 #include <net/content_filter.h>
75 #endif /* CONTENT_FILTER */
76
77 #define FLOW_DIVERT_CONNECT_STARTED 0x00000001
78 #define FLOW_DIVERT_READ_CLOSED 0x00000002
79 #define FLOW_DIVERT_WRITE_CLOSED 0x00000004
80 #define FLOW_DIVERT_TUNNEL_RD_CLOSED 0x00000008
81 #define FLOW_DIVERT_TUNNEL_WR_CLOSED 0x00000010
82 #define FLOW_DIVERT_HAS_HMAC 0x00000040
83 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED 0x00000080
84 #define FLOW_DIVERT_IMPLICIT_CONNECT 0x00000100
85 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR 0x00000200
86 #define FLOW_DIVERT_HAS_TOKEN 0x00000400
87 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR 0x00000800
88 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT 0x00001000
89
90 #define FDLOG(level, pcb, format, ...) \
91 os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
92
93 #define FDLOG0(level, pcb, msg) \
94 os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
95
96 #define FDRETAIN(pcb) if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
97 #define FDRELEASE(pcb) \
98 do { \
99 if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) { \
100 flow_divert_pcb_destroy(pcb); \
101 } \
102 } while (0)
103
104 #define FDGRP_RETAIN(grp) if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
105 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
106
107 #define FDLOCK(pcb) lck_mtx_lock(&(pcb)->mtx)
108 #define FDUNLOCK(pcb) lck_mtx_unlock(&(pcb)->mtx)
109
110 #define FD_CTL_SENDBUFF_SIZE (128 * 1024)
111
112 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED 0
113
114 #define GROUP_COUNT_MAX 31
115 #define FLOW_DIVERT_MAX_NAME_SIZE 4096
116 #define FLOW_DIVERT_MAX_KEY_SIZE 1024
117 #define FLOW_DIVERT_MAX_TRIE_MEMORY (1024 * 1024)
118
119 #define CHILD_MAP_SIZE 256
120 #define NULL_TRIE_IDX 0xffff
121 #define TRIE_NODE(t, i) ((t)->nodes[(i)])
122 #define TRIE_CHILD(t, i, b) (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
123 #define TRIE_BYTE(t, i) ((t)->bytes[(i)])
124
125 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
126
127 static struct flow_divert_pcb nil_pcb;
128
129 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
130 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
131 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
132 &flow_divert_mtx_attr);
133
134 static TAILQ_HEAD(_flow_divert_group_list, flow_divert_group) g_flow_divert_in_process_group_list;
135
136 static struct flow_divert_group **g_flow_divert_groups __indexable = NULL;
137 static uint32_t g_active_group_count = 0;
138
139 static errno_t g_init_result = 0;
140
141 static kern_ctl_ref g_flow_divert_kctl_ref = NULL;
142
143 static struct protosw g_flow_divert_in_protosw;
144 static struct pr_usrreqs g_flow_divert_in_usrreqs;
145 static struct protosw g_flow_divert_in_udp_protosw;
146 static struct pr_usrreqs g_flow_divert_in_udp_usrreqs;
147 static struct ip6protosw g_flow_divert_in6_protosw;
148 static struct pr_usrreqs g_flow_divert_in6_usrreqs;
149 static struct ip6protosw g_flow_divert_in6_udp_protosw;
150 static struct pr_usrreqs g_flow_divert_in6_udp_usrreqs;
151
152 static struct protosw *g_tcp_protosw = NULL;
153 static struct ip6protosw *g_tcp6_protosw = NULL;
154 static struct protosw *g_udp_protosw = NULL;
155 static struct ip6protosw *g_udp6_protosw = NULL;
156
157 static KALLOC_TYPE_DEFINE(flow_divert_group_zone, struct flow_divert_group,
158 NET_KT_DEFAULT);
159 static KALLOC_TYPE_DEFINE(flow_divert_pcb_zone, struct flow_divert_pcb,
160 NET_KT_DEFAULT);
161
162 static errno_t
163 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
164
165 static boolean_t
166 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
167
168 static int
169 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr);
170
171 struct sockaddr *
172 flow_divert_get_buffered_target_address(mbuf_ref_t buffer);
173
174 static void
175 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed);
176
177 static void flow_divert_group_destroy(struct flow_divert_group *group);
178
179 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)180 flow_divert_syslog_type_to_oslog_type(int syslog_type)
181 {
182 switch (syslog_type) {
183 case LOG_ERR: return OS_LOG_TYPE_ERROR;
184 case LOG_INFO: return OS_LOG_TYPE_INFO;
185 case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
186 default: return OS_LOG_TYPE_DEFAULT;
187 }
188 }
189
190 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)191 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
192 {
193 return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
194 }
195
196 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
197 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
198
199 static const char *
flow_divert_packet_type2str(uint8_t packet_type)200 flow_divert_packet_type2str(uint8_t packet_type)
201 {
202 switch (packet_type) {
203 case FLOW_DIVERT_PKT_CONNECT:
204 return "connect";
205 case FLOW_DIVERT_PKT_CONNECT_RESULT:
206 return "connect result";
207 case FLOW_DIVERT_PKT_DATA:
208 return "data";
209 case FLOW_DIVERT_PKT_CLOSE:
210 return "close";
211 case FLOW_DIVERT_PKT_READ_NOTIFY:
212 return "read notification";
213 case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
214 return "properties update";
215 case FLOW_DIVERT_PKT_APP_MAP_CREATE:
216 return "app map create";
217 default:
218 return "unknown";
219 }
220 }
221
222 static inline void
flow_divert_lock_socket(struct socket * so,struct flow_divert_pcb * fd_cb)223 flow_divert_lock_socket(struct socket *so, struct flow_divert_pcb *fd_cb)
224 {
225 socket_lock(so, 0);
226 fd_cb->plugin_locked = true;
227 }
228
229 static inline void
flow_divert_unlock_socket(struct socket * so,struct flow_divert_pcb * fd_cb)230 flow_divert_unlock_socket(struct socket *so, struct flow_divert_pcb *fd_cb)
231 {
232 fd_cb->plugin_locked = false;
233 socket_unlock(so, 0);
234 }
235
236 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)237 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
238 {
239 struct flow_divert_pcb key_item;
240 struct flow_divert_pcb *fd_cb = NULL;
241
242 key_item.hash = hash;
243
244 lck_rw_lock_shared(&group->lck);
245 fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
246 FDRETAIN(fd_cb);
247 lck_rw_done(&group->lck);
248
249 return fd_cb;
250 }
251
252 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)253 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
254 {
255 struct flow_divert_group *group = NULL;
256 lck_rw_lock_shared(&g_flow_divert_group_lck);
257 if (g_active_group_count == 0) {
258 if (fd_cb != NULL) {
259 FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
260 }
261 } else if (ctl_unit == 0 || (ctl_unit >= GROUP_COUNT_MAX && ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
262 FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
263 } else if (ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
264 if (g_flow_divert_groups == NULL) {
265 if (fd_cb != NULL) {
266 FDLOG0(LOG_ERR, fd_cb, "No active non-in-process groups, flow divert cannot be used for this socket");
267 }
268 } else {
269 group = g_flow_divert_groups[ctl_unit];
270 if (group == NULL) {
271 if (fd_cb != NULL) {
272 FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
273 }
274 } else {
275 FDGRP_RETAIN(group);
276 }
277 }
278 } else {
279 if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
280 if (fd_cb != NULL) {
281 FDLOG0(LOG_ERR, fd_cb, "No active in-process groups, flow divert cannot be used for this socket");
282 }
283 } else {
284 struct flow_divert_group *group_cursor = NULL;
285 TAILQ_FOREACH(group_cursor, &g_flow_divert_in_process_group_list, chain) {
286 if (group_cursor->ctl_unit == ctl_unit) {
287 group = group_cursor;
288 break;
289 }
290 }
291 if (group == NULL) {
292 if (fd_cb != NULL) {
293 FDLOG(LOG_ERR, fd_cb, "Group for control unit %u not found, flow divert cannot be used for this socket", ctl_unit);
294 }
295 } else if (fd_cb != NULL &&
296 (fd_cb->so == NULL ||
297 group_cursor->in_process_pid != fd_cb->so->last_pid)) {
298 FDLOG(LOG_ERR, fd_cb, "Cannot access group for control unit %u, mismatched PID (%u != %u)",
299 ctl_unit, group_cursor->in_process_pid, fd_cb->so ? fd_cb->so->last_pid : 0);
300 group = NULL;
301 } else {
302 FDGRP_RETAIN(group);
303 }
304 }
305 }
306 lck_rw_done(&g_flow_divert_group_lck);
307 return group;
308 }
309
310 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)311 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
312 {
313 int error = 0;
314 lck_rw_lock_exclusive(&group->lck);
315 if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
316 if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
317 fd_cb->group = group;
318 fd_cb->control_group_unit = group->ctl_unit;
319 FDRETAIN(fd_cb); /* The group now has a reference */
320 } else {
321 FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
322 error = EEXIST;
323 }
324 } else {
325 FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
326 error = ENOENT;
327 }
328 lck_rw_done(&group->lck);
329 return error;
330 }
331
332 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)333 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
334 {
335 errno_t error = 0;
336 struct flow_divert_group *group = NULL;
337 static uint32_t g_nextkey = 1;
338 static uint32_t g_hash_seed = 0;
339 int try_count = 0;
340
341 group = flow_divert_group_lookup(ctl_unit, fd_cb);
342 if (group == NULL) {
343 return ENOENT;
344 }
345
346 do {
347 uint32_t key[2];
348 uint32_t idx;
349
350 key[0] = g_nextkey++;
351 key[1] = RandomULong();
352
353 if (g_hash_seed == 0) {
354 g_hash_seed = RandomULong();
355 }
356
357 error = 0;
358 fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
359
360 for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
361 if (idx == ctl_unit) {
362 continue;
363 }
364 struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
365 if (curr_group != NULL) {
366 lck_rw_lock_shared(&curr_group->lck);
367 if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
368 error = EEXIST;
369 }
370 lck_rw_done(&curr_group->lck);
371 FDGRP_RELEASE(curr_group);
372 }
373 }
374
375 if (error == 0) {
376 error = flow_divert_pcb_insert(fd_cb, group);
377 }
378 } while (error == EEXIST && try_count++ < 3);
379
380 if (error == EEXIST) {
381 FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
382 fd_cb->hash = 0;
383 }
384
385 FDGRP_RELEASE(group);
386 return error;
387 }
388
389 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)390 flow_divert_pcb_create(socket_t so)
391 {
392 struct flow_divert_pcb *new_pcb = NULL;
393
394 new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
395 lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
396 new_pcb->so = so;
397 new_pcb->log_level = nil_pcb.log_level;
398
399 FDRETAIN(new_pcb); /* Represents the socket's reference */
400
401 return new_pcb;
402 }
403
404 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)405 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
406 {
407 FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %llu, tunnel tx %llu, tunnel rx %llu",
408 fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
409
410 if (fd_cb->connect_token != NULL) {
411 mbuf_freem(fd_cb->connect_token);
412 }
413 if (fd_cb->connect_packet != NULL) {
414 mbuf_freem(fd_cb->connect_packet);
415 }
416 if (fd_cb->app_data != NULL) {
417 kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
418 }
419 if (fd_cb->original_remote_endpoint != NULL) {
420 free_sockaddr(fd_cb->original_remote_endpoint);
421 }
422 zfree(flow_divert_pcb_zone, fd_cb);
423 }
424
425 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)426 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
427 {
428 if (fd_cb->group != NULL) {
429 struct flow_divert_group *group = fd_cb->group;
430 lck_rw_lock_exclusive(&group->lck);
431 FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
432 RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
433 fd_cb->group = NULL;
434 FDRELEASE(fd_cb); /* Release the group's reference */
435 lck_rw_done(&group->lck);
436 }
437 }
438
439 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_ref_t * packet)440 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_ref_t *packet)
441 {
442 struct flow_divert_packet_header hdr;
443 int error = 0;
444
445 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
446 if (error) {
447 FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
448 return error;
449 }
450
451 hdr.packet_type = packet_type;
452 hdr.conn_id = htonl(fd_cb->hash);
453
454 /* Lay down the header */
455 error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
456 if (error) {
457 FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
458 mbuf_freem(*packet);
459 *packet = NULL;
460 return error;
461 }
462
463 return 0;
464 }
465
466 static int
flow_divert_packet_append_tlv(mbuf_ref_t packet,uint8_t type,uint32_t length,const void __sized_by (length)* value)467 flow_divert_packet_append_tlv(mbuf_ref_t packet, uint8_t type, uint32_t length, const void __sized_by(length) *value)
468 {
469 uint32_t net_length = htonl(length);
470 int error = 0;
471
472 error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
473 if (error) {
474 FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
475 return error;
476 }
477
478 error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
479 if (error) {
480 FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
481 return error;
482 }
483
484 error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
485 if (error) {
486 FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
487 return error;
488 }
489
490 return error;
491 }
492
493 static int
flow_divert_packet_find_tlv(mbuf_ref_t packet,int offset,uint8_t type,int * err,int next)494 flow_divert_packet_find_tlv(mbuf_ref_t packet, int offset, uint8_t type, int *err, int next)
495 {
496 size_t cursor = offset;
497 int error = 0;
498 uint32_t curr_length = 0;
499 uint8_t curr_type = 0;
500
501 *err = 0;
502
503 do {
504 if (!next) {
505 error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
506 if (error) {
507 *err = ENOENT;
508 return -1;
509 }
510 } else {
511 next = 0;
512 curr_type = FLOW_DIVERT_TLV_NIL;
513 }
514
515 if (curr_type != type) {
516 cursor += sizeof(curr_type);
517 error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
518 if (error) {
519 *err = error;
520 return -1;
521 }
522
523 cursor += (sizeof(curr_length) + ntohl(curr_length));
524 }
525 } while (curr_type != type);
526
527 return (int)cursor;
528 }
529
530 static int
flow_divert_packet_get_tlv(mbuf_ref_t packet,int offset,uint8_t type,size_t buff_len,void * buff __sized_by (buff_len),uint32_t * val_size)531 flow_divert_packet_get_tlv(mbuf_ref_t packet, int offset, uint8_t type, size_t buff_len, void *buff __sized_by(buff_len), uint32_t *val_size)
532 {
533 int error = 0;
534 uint32_t length = 0;
535 int tlv_offset = 0;
536
537 tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
538 if (tlv_offset < 0) {
539 return error;
540 }
541
542 error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
543 if (error) {
544 return error;
545 }
546
547 length = ntohl(length);
548
549 uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
550
551 if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
552 FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
553 return EINVAL;
554 }
555
556 if (val_size != NULL) {
557 *val_size = length;
558 }
559
560 if (buff != NULL && buff_len > 0) {
561 memset(buff, 0, buff_len);
562 size_t to_copy = (length < buff_len) ? length : buff_len;
563 error = mbuf_copydata(packet, data_offset, to_copy, buff);
564 if (error) {
565 return error;
566 }
567 }
568
569 return 0;
570 }
571
572 static int
flow_divert_packet_compute_hmac(mbuf_ref_t packet,struct flow_divert_group * group,uint8_t * hmac)573 flow_divert_packet_compute_hmac(mbuf_ref_t packet, struct flow_divert_group *group, uint8_t *hmac)
574 {
575 mbuf_ref_t curr_mbuf = packet;
576
577 if (g_crypto_funcs == NULL || group->token_key == NULL) {
578 return ENOPROTOOPT;
579 }
580
581 cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
582 g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
583
584 while (curr_mbuf != NULL) {
585 g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mtod(curr_mbuf, void *));
586 curr_mbuf = mbuf_next(curr_mbuf);
587 }
588
589 g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
590
591 return 0;
592 }
593
594 static int
flow_divert_packet_verify_hmac(mbuf_ref_t packet,uint32_t ctl_unit)595 flow_divert_packet_verify_hmac(mbuf_ref_t packet, uint32_t ctl_unit)
596 {
597 int error = 0;
598 struct flow_divert_group *group = NULL;
599 int hmac_offset;
600 uint8_t packet_hmac[SHA_DIGEST_LENGTH];
601 uint8_t computed_hmac[SHA_DIGEST_LENGTH];
602 mbuf_ref_t tail;
603
604 group = flow_divert_group_lookup(ctl_unit, NULL);
605 if (group == NULL) {
606 FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
607 return ENOPROTOOPT;
608 }
609
610 lck_rw_lock_shared(&group->lck);
611
612 if (group->token_key == NULL) {
613 error = ENOPROTOOPT;
614 goto done;
615 }
616
617 hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
618 if (hmac_offset < 0) {
619 goto done;
620 }
621
622 error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
623 if (error) {
624 goto done;
625 }
626
627 /* Chop off the HMAC TLV */
628 error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
629 if (error) {
630 goto done;
631 }
632
633 mbuf_free(tail);
634
635 error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
636 if (error) {
637 goto done;
638 }
639
640 if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
641 FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
642 error = EINVAL;
643 goto done;
644 }
645
646 done:
647 if (group != NULL) {
648 lck_rw_done(&group->lck);
649 FDGRP_RELEASE(group);
650 }
651 return error;
652 }
653
654 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)655 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
656 {
657 struct inpcb *inp = NULL;
658 struct ifnet *ifp = NULL;
659 stats_functional_type ifnet_count_type = stats_functional_type_none;
660
661 inp = sotoinpcb(fd_cb->so);
662 if (inp == NULL) {
663 return;
664 }
665
666 if (inp->inp_vflag & INP_IPV4) {
667 ifp = inp->inp_last_outifp;
668 } else if (inp->inp_vflag & INP_IPV6) {
669 ifp = inp->in6p_last_outifp;
670 }
671 if (ifp != NULL) {
672 ifnet_count_type = IFNET_COUNT_TYPE(ifp);
673 }
674
675 if (send) {
676 INP_ADD_STAT(inp, ifnet_count_type, txpackets, 1);
677 INP_ADD_STAT(inp, ifnet_count_type, txbytes, data_len);
678 } else {
679 INP_ADD_STAT(inp, ifnet_count_type, rxpackets, 1);
680 INP_ADD_STAT(inp, ifnet_count_type, rxbytes, data_len);
681 }
682 inp_set_activity_bitmap(inp);
683 }
684
685 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)686 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
687 {
688 struct inpcb *inp = sotoinpcb(fd_cb->so);
689 if (INP_NO_CELLULAR(inp)) {
690 struct ifnet *ifp = NULL;
691 if (inp->inp_vflag & INP_IPV4) {
692 ifp = inp->inp_last_outifp;
693 } else if (inp->inp_vflag & INP_IPV6) {
694 ifp = inp->in6p_last_outifp;
695 }
696 if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
697 FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
698 return EHOSTUNREACH;
699 }
700 }
701 return 0;
702 }
703
704 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)705 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
706 {
707 struct inpcb *inp = sotoinpcb(fd_cb->so);
708 if (INP_NO_EXPENSIVE(inp)) {
709 struct ifnet *ifp = NULL;
710 if (inp->inp_vflag & INP_IPV4) {
711 ifp = inp->inp_last_outifp;
712 } else if (inp->inp_vflag & INP_IPV6) {
713 ifp = inp->in6p_last_outifp;
714 }
715 if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
716 FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
717 return EHOSTUNREACH;
718 }
719 }
720 return 0;
721 }
722
723 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)724 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
725 {
726 struct inpcb *inp = sotoinpcb(fd_cb->so);
727 if (INP_NO_CONSTRAINED(inp)) {
728 struct ifnet *ifp = NULL;
729 if (inp->inp_vflag & INP_IPV4) {
730 ifp = inp->inp_last_outifp;
731 } else if (inp->inp_vflag & INP_IPV6) {
732 ifp = inp->in6p_last_outifp;
733 }
734 if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
735 FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
736 return EHOSTUNREACH;
737 }
738 }
739 return 0;
740 }
741
742 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,bool tunnel,bool flush_snd)743 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, bool tunnel, bool flush_snd)
744 {
745 if (how != SHUT_RD) {
746 fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
747 if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
748 fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
749 if (flush_snd) {
750 /* If the tunnel is not accepting writes any more, then flush the send buffer */
751 sbflush(&fd_cb->so->so_snd);
752 }
753 }
754 }
755 if (how != SHUT_WR) {
756 fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
757 if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
758 fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
759 }
760 }
761 }
762
763 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)764 trie_node_alloc(struct flow_divert_trie *trie)
765 {
766 if (trie->nodes_free_next < trie->nodes_count) {
767 uint16_t node_idx = trie->nodes_free_next++;
768 TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
769 return node_idx;
770 } else {
771 return NULL_TRIE_IDX;
772 }
773 }
774
775 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)776 trie_child_map_alloc(struct flow_divert_trie *trie)
777 {
778 if (trie->child_maps_free_next < trie->child_maps_count) {
779 return trie->child_maps_free_next++;
780 } else {
781 return NULL_TRIE_IDX;
782 }
783 }
784
785 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)786 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
787 {
788 uint16_t start = trie->bytes_free_next;
789 if (start + bytes_size <= trie->bytes_count) {
790 if (start != bytes_idx) {
791 memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
792 }
793 trie->bytes_free_next += bytes_size;
794 return start;
795 } else {
796 return NULL_TRIE_IDX;
797 }
798 }
799
800 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)801 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
802 {
803 uint16_t current = trie->root;
804 uint16_t child = trie->root;
805 uint16_t string_end = string_start + (uint16_t)string_len;
806 uint16_t string_idx = string_start;
807 uint16_t string_remainder = (uint16_t)string_len;
808
809 while (child != NULL_TRIE_IDX) {
810 uint16_t parent = current;
811 uint16_t node_idx;
812 uint16_t current_end;
813
814 current = child;
815 child = NULL_TRIE_IDX;
816
817 current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
818
819 for (node_idx = TRIE_NODE(trie, current).start;
820 node_idx < current_end &&
821 string_idx < string_end &&
822 TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
823 node_idx++, string_idx++) {
824 ;
825 }
826
827 string_remainder = string_end - string_idx;
828
829 if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
830 /*
831 * We did not reach the end of the current node's string.
832 * We need to split the current node into two:
833 * 1. A new node that contains the prefix of the node that matches
834 * the prefix of the string being inserted.
835 * 2. The current node modified to point to the remainder
836 * of the current node's string.
837 */
838 uint16_t prefix = trie_node_alloc(trie);
839 if (prefix == NULL_TRIE_IDX) {
840 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
841 return NULL_TRIE_IDX;
842 }
843
844 /*
845 * Prefix points to the portion of the current nodes's string that has matched
846 * the input string thus far.
847 */
848 TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
849 TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
850
851 /*
852 * Prefix has the current node as the child corresponding to the first byte
853 * after the split.
854 */
855 TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
856 if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
857 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
858 return NULL_TRIE_IDX;
859 }
860 TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
861
862 /* Parent has the prefix as the child correspoding to the first byte in the prefix */
863 TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
864
865 /* Current node is adjusted to point to the remainder */
866 TRIE_NODE(trie, current).start = node_idx;
867 TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
868
869 /* We want to insert the new leaf (if any) as a child of the prefix */
870 current = prefix;
871 }
872
873 if (string_remainder > 0) {
874 /*
875 * We still have bytes in the string that have not been matched yet.
876 * If the current node has children, iterate to the child corresponding
877 * to the next byte in the string.
878 */
879 if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
880 child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
881 }
882 }
883 } /* while (child != NULL_TRIE_IDX) */
884
885 if (string_remainder > 0) {
886 /* Add a new leaf containing the remainder of the string */
887 uint16_t leaf = trie_node_alloc(trie);
888 if (leaf == NULL_TRIE_IDX) {
889 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
890 return NULL_TRIE_IDX;
891 }
892
893 TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
894 if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
895 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
896 return NULL_TRIE_IDX;
897 }
898 TRIE_NODE(trie, leaf).length = string_remainder;
899
900 /* Set the new leaf as the child of the current node */
901 if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
902 TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
903 if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
904 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
905 return NULL_TRIE_IDX;
906 }
907 }
908 TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
909 current = leaf;
910 } /* else duplicate or this string is a prefix of one of the existing strings */
911
912 return current;
913 }
914
915 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
916 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes __sized_by (string_bytes_count),__unused size_t string_bytes_count)917 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes __sized_by(string_bytes_count), __unused size_t string_bytes_count)
918 {
919 uint16_t current = trie->root;
920 uint16_t string_idx = 0;
921
922 while (current != NULL_TRIE_IDX) {
923 uint16_t next = NULL_TRIE_IDX;
924 uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
925 uint16_t node_idx;
926
927 for (node_idx = TRIE_NODE(trie, current).start;
928 node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
929 node_idx++, string_idx++) {
930 ;
931 }
932
933 if (node_idx == node_end) {
934 if (string_bytes[string_idx] == '\0') {
935 return current; /* Got an exact match */
936 } else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
937 0 == strlcmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
938 return current; /* Got an apple webclip id prefix match */
939 } else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
940 next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
941 }
942 }
943 current = next;
944 }
945
946 return NULL_TRIE_IDX;
947 }
948
949 struct uuid_search_info {
950 uuid_t target_uuid;
951 char *found_signing_id __sized_by(found_signing_id_size);
952 boolean_t found_multiple_signing_ids;
953 proc_t found_proc;
954 size_t found_signing_id_size;
955 };
956
957 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)958 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
959 {
960 struct uuid_search_info *info = (struct uuid_search_info *)arg;
961 int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
962
963 if (info->found_signing_id != NULL) {
964 if (!info->found_multiple_signing_ids) {
965 /* All processes that were found had the same signing identifier, so just claim this first one and be done. */
966 info->found_proc = p;
967 result = PROC_CLAIMED_DONE;
968 } else {
969 uuid_string_t uuid_str;
970 uuid_unparse(info->target_uuid, uuid_str);
971 FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
972 }
973 kfree_data_sized_by(info->found_signing_id, info->found_signing_id_size);
974 }
975
976 if (result == PROC_RETURNED_DONE) {
977 uuid_string_t uuid_str;
978 uuid_unparse(info->target_uuid, uuid_str);
979 FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
980 }
981
982 return result;
983 }
984
985 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)986 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
987 {
988 struct uuid_search_info *info = (struct uuid_search_info *)arg;
989 int include = 0;
990
991 if (info->found_multiple_signing_ids) {
992 return include;
993 }
994
995 const unsigned char * p_uuid = proc_executableuuid_addr(p);
996 include = (uuid_compare(p_uuid, info->target_uuid) == 0);
997 if (include) {
998 const char *signing_id __null_terminated = cs_identity_get(p);
999 if (signing_id != NULL) {
1000 FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
1001 size_t signing_id_size = strlen(signing_id) + 1;
1002 if (info->found_signing_id == NULL) {
1003 info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
1004 info->found_signing_id_size = signing_id_size;
1005 strlcpy(info->found_signing_id, signing_id, signing_id_size);
1006 } else if (strlcmp(info->found_signing_id, signing_id, info->found_signing_id_size)) {
1007 info->found_multiple_signing_ids = TRUE;
1008 }
1009 } else {
1010 info->found_multiple_signing_ids = TRUE;
1011 }
1012 include = !info->found_multiple_signing_ids;
1013 }
1014
1015 return include;
1016 }
1017
1018 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)1019 flow_divert_find_proc_by_uuid(uuid_t uuid)
1020 {
1021 struct uuid_search_info info;
1022
1023 if (LOG_INFO <= nil_pcb.log_level) {
1024 uuid_string_t uuid_str;
1025 uuid_unparse(uuid, uuid_str);
1026 FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
1027 }
1028
1029 memset(&info, 0, sizeof(info));
1030 info.found_proc = PROC_NULL;
1031 uuid_copy(info.target_uuid, uuid);
1032
1033 proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
1034
1035 return info.found_proc;
1036 }
1037
1038 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet,bool is_effective)1039 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet, bool is_effective)
1040 {
1041 int error = 0;
1042 uint8_t *cdhash = NULL;
1043 audit_token_t audit_token = {};
1044 const char *proc_cs_id __null_terminated = signing_id;
1045
1046 proc_lock(proc);
1047
1048 if (proc_cs_id == NULL) {
1049 if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1050 proc_cs_id = cs_identity_get(proc);
1051 } else {
1052 FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1053 }
1054 }
1055
1056 if (is_effective) {
1057 lck_rw_lock_shared(&fd_cb->group->lck);
1058 if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1059 if (proc_cs_id != NULL) {
1060 size_t proc_cs_id_size = strlen(proc_cs_id) + 1;
1061 uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)__unsafe_null_terminated_to_indexable(proc_cs_id), proc_cs_id_size);
1062 if (result == NULL_TRIE_IDX) {
1063 FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1064 error = EPERM;
1065 } else {
1066 FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1067 }
1068 } else {
1069 error = EPERM;
1070 }
1071 }
1072 lck_rw_done(&fd_cb->group->lck);
1073 }
1074
1075 if (error != 0) {
1076 goto done;
1077 }
1078
1079 /*
1080 * If signing_id is not NULL then it came from the flow divert token and will be added
1081 * as part of the token, so there is no need to add it here.
1082 */
1083 if (signing_id == NULL && proc_cs_id != NULL) {
1084 error = flow_divert_packet_append_tlv(connect_packet,
1085 (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1086 (uint32_t)strlen(proc_cs_id),
1087 __terminated_by_to_indexable(proc_cs_id));
1088 if (error != 0) {
1089 FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1090 goto done;
1091 }
1092 }
1093
1094 cdhash = cs_get_cdhash(proc);
1095 if (cdhash != NULL) {
1096 error = flow_divert_packet_append_tlv(connect_packet,
1097 (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1098 SHA1_RESULTLEN,
1099 cdhash);
1100 if (error) {
1101 FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1102 goto done;
1103 }
1104 } else {
1105 FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1106 }
1107
1108 task_t task __single = proc_task(proc);
1109 if (task != TASK_NULL) {
1110 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1111 kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1112 if (rc == KERN_SUCCESS) {
1113 int append_error = flow_divert_packet_append_tlv(connect_packet,
1114 (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1115 sizeof(audit_token_t),
1116 &audit_token);
1117 if (append_error) {
1118 FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1119 }
1120 }
1121 }
1122
1123 done:
1124 proc_unlock(proc);
1125
1126 return error;
1127 }
1128
1129 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet)1130 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet)
1131 {
1132 int error = 0;
1133 proc_t effective_proc = PROC_NULL;
1134 proc_t responsible_proc = PROC_NULL;
1135 proc_t real_proc = proc_find(so->last_pid);
1136 bool release_real_proc = true;
1137
1138 proc_t src_proc = PROC_NULL;
1139 proc_t real_src_proc = PROC_NULL;
1140
1141 if (real_proc == PROC_NULL) {
1142 FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1143 release_real_proc = false;
1144 real_proc = proc;
1145 if (real_proc == PROC_NULL) {
1146 real_proc = current_proc();
1147 }
1148 }
1149
1150 if (so->so_flags & SOF_DELEGATED) {
1151 if (proc_getpid(real_proc) != so->e_pid) {
1152 effective_proc = proc_find(so->e_pid);
1153 } else {
1154 const unsigned char * real_proc_uuid = proc_executableuuid_addr(real_proc);
1155 if (uuid_compare(real_proc_uuid, so->e_uuid)) {
1156 effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1157 }
1158 }
1159 }
1160
1161 #if defined(XNU_TARGET_OS_OSX)
1162 lck_rw_lock_shared(&fd_cb->group->lck);
1163 if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1164 if (so->so_rpid > 0) {
1165 responsible_proc = proc_find(so->so_rpid);
1166 }
1167 }
1168 lck_rw_done(&fd_cb->group->lck);
1169 #endif
1170
1171 real_src_proc = real_proc;
1172
1173 if (responsible_proc != PROC_NULL) {
1174 src_proc = responsible_proc;
1175 if (effective_proc != NULL) {
1176 real_src_proc = effective_proc;
1177 }
1178 } else if (effective_proc != PROC_NULL) {
1179 src_proc = effective_proc;
1180 } else {
1181 src_proc = real_proc;
1182 }
1183
1184 error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1185 if (error != 0) {
1186 goto done;
1187 }
1188
1189 if (real_src_proc != NULL && real_src_proc != src_proc) {
1190 error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1191 if (error != 0) {
1192 goto done;
1193 }
1194 }
1195
1196 done:
1197 if (responsible_proc != PROC_NULL) {
1198 proc_rele(responsible_proc);
1199 }
1200
1201 if (effective_proc != PROC_NULL) {
1202 proc_rele(effective_proc);
1203 }
1204
1205 if (real_proc != PROC_NULL && release_real_proc) {
1206 proc_rele(real_proc);
1207 }
1208
1209 return error;
1210 }
1211
1212 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet)1213 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet)
1214 {
1215 int error;
1216
1217 if (fd_cb->group == NULL) {
1218 FDLOG0(LOG_ERR, fd_cb, "no provider, cannot send packet");
1219 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1220 error = ECONNABORTED;
1221 } else {
1222 error = EHOSTUNREACH;
1223 }
1224 return error;
1225 }
1226
1227 lck_rw_lock_shared(&fd_cb->group->lck);
1228
1229 if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1230 error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1231 if (error) {
1232 FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_send_packet: ctl_enqueuembuf returned an error: %d", error);
1233 }
1234 } else {
1235 error = ENOBUFS;
1236 }
1237
1238 if (error == ENOBUFS) {
1239 if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1240 lck_rw_lock_exclusive(&fd_cb->group->lck);
1241 }
1242 MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1243 error = 0;
1244 OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1245 }
1246
1247 lck_rw_done(&fd_cb->group->lck);
1248
1249 return error;
1250 }
1251
1252 static void
flow_divert_append_domain_name(char * domain_name __null_terminated,void * ctx)1253 flow_divert_append_domain_name(char *domain_name __null_terminated, void *ctx)
1254 {
1255 mbuf_ref_t packet = (mbuf_ref_t)ctx;
1256 size_t domain_name_length = 0;
1257
1258 if (packet == NULL || domain_name == NULL) {
1259 return;
1260 }
1261
1262 domain_name_length = strlen(domain_name);
1263 if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1264 int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, __terminated_by_to_indexable(domain_name));
1265 if (error) {
1266 FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1267 }
1268 }
1269 }
1270
1271 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_ref_t * out_connect_packet)1272 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_ref_t *out_connect_packet)
1273 {
1274 int error = 0;
1275 int flow_type = 0;
1276 char * signing_id __indexable = NULL;
1277 uint32_t sid_size = 0;
1278 mbuf_ref_t connect_packet = NULL;
1279 cfil_sock_id_t cfil_sock_id = CFIL_SOCK_ID_NONE;
1280 const void *cfil_id = NULL;
1281 size_t cfil_id_size = 0;
1282 struct inpcb *inp = sotoinpcb(so);
1283 struct ifnet *ifp = NULL;
1284 uint32_t flags = 0;
1285
1286 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1287 if (error) {
1288 goto done;
1289 }
1290
1291 if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1292 int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1293 if (find_error == 0 && sid_size > 0) {
1294 signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1295 if (signing_id != NULL) {
1296 flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1297 FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1298 }
1299 }
1300 }
1301
1302 // TODO: remove ternary operator after rdar://121487109 is fixed
1303 error = flow_divert_add_all_proc_info(fd_cb, so, p, NULL == signing_id ? NULL : __unsafe_null_terminated_from_indexable(signing_id), connect_packet);
1304
1305 if (signing_id != NULL) {
1306 kfree_data(signing_id, sid_size + 1);
1307 }
1308
1309 if (error) {
1310 FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1311 goto done;
1312 }
1313
1314 error = flow_divert_packet_append_tlv(connect_packet,
1315 FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1316 sizeof(fd_cb->so->so_traffic_class),
1317 &fd_cb->so->so_traffic_class);
1318 if (error) {
1319 goto done;
1320 }
1321
1322 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1323 flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1324 } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1325 flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1326 } else {
1327 error = EINVAL;
1328 goto done;
1329 }
1330 error = flow_divert_packet_append_tlv(connect_packet,
1331 FLOW_DIVERT_TLV_FLOW_TYPE,
1332 sizeof(flow_type),
1333 &flow_type);
1334
1335 if (error) {
1336 goto done;
1337 }
1338
1339 if (fd_cb->connect_token != NULL) {
1340 unsigned int token_len = m_length(fd_cb->connect_token);
1341 mbuf_concatenate(connect_packet, fd_cb->connect_token);
1342 mbuf_pkthdr_adjustlen(connect_packet, token_len);
1343 fd_cb->connect_token = NULL;
1344 } else {
1345 error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1346 if (error) {
1347 goto done;
1348 }
1349
1350 necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1351 }
1352
1353 if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1354 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, SA_BYTES(&(fd_cb->local_endpoint.sa)));
1355 if (error) {
1356 goto done;
1357 }
1358 }
1359
1360 if (inp->inp_vflag & INP_IPV4) {
1361 ifp = inp->inp_last_outifp;
1362 } else if (inp->inp_vflag & INP_IPV6) {
1363 ifp = inp->in6p_last_outifp;
1364 }
1365 if ((inp->inp_flags & INP_BOUND_IF) ||
1366 ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1367 ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1368 flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1369 if (ifp == NULL) {
1370 ifp = inp->inp_boundifp;
1371 }
1372 }
1373 if (ifp != NULL) {
1374 uint32_t flow_if_index = ifp->if_index;
1375 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1376 sizeof(flow_if_index), &flow_if_index);
1377 if (error) {
1378 goto done;
1379 }
1380 }
1381
1382 if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1383 flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1384 }
1385
1386 if (flags != 0) {
1387 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1388 if (error) {
1389 goto done;
1390 }
1391 }
1392
1393 if (SOCK_TYPE(so) == SOCK_DGRAM) {
1394 cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1395 } else {
1396 cfil_sock_id = cfil_sock_id_from_socket(so);
1397 }
1398
1399 if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1400 cfil_id = &cfil_sock_id;
1401 cfil_id_size = sizeof(cfil_sock_id);
1402 } else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1403 cfil_id = &inp->necp_client_uuid;
1404 cfil_id_size = sizeof(inp->necp_client_uuid);
1405 }
1406
1407 if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1408 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1409 if (error) {
1410 goto done;
1411 }
1412 }
1413
1414 done:
1415 if (!error) {
1416 *out_connect_packet = connect_packet;
1417 } else if (connect_packet != NULL) {
1418 mbuf_freem(connect_packet);
1419 }
1420
1421 return error;
1422 }
1423
1424 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1425 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1426 {
1427 int error = 0;
1428 mbuf_ref_t connect_packet = fd_cb->connect_packet;
1429 mbuf_ref_t saved_connect_packet = NULL;
1430
1431 if (connect_packet != NULL) {
1432 error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1433 if (error) {
1434 FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1435 goto done;
1436 }
1437
1438 error = flow_divert_send_packet(fd_cb, connect_packet);
1439 if (error) {
1440 goto done;
1441 }
1442
1443 fd_cb->connect_packet = saved_connect_packet;
1444 saved_connect_packet = NULL;
1445 } else {
1446 error = ENOENT;
1447 }
1448 done:
1449 if (saved_connect_packet != NULL) {
1450 mbuf_freem(saved_connect_packet);
1451 }
1452
1453 return error;
1454 }
1455
1456 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1457 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1458 {
1459 int error = 0;
1460 mbuf_ref_t packet = NULL;
1461 int rbuff_space = 0;
1462
1463 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1464 if (error) {
1465 FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1466 goto done;
1467 }
1468
1469 rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1470 if (rbuff_space < 0) {
1471 rbuff_space = 0;
1472 }
1473 rbuff_space = htonl(rbuff_space);
1474 error = flow_divert_packet_append_tlv(packet,
1475 FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1476 sizeof(rbuff_space),
1477 &rbuff_space);
1478 if (error) {
1479 goto done;
1480 }
1481
1482 if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1483 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, SA_BYTES(&(fd_cb->local_endpoint.sa)));
1484 if (error) {
1485 goto done;
1486 }
1487 }
1488
1489 error = flow_divert_send_packet(fd_cb, packet);
1490 if (error) {
1491 goto done;
1492 }
1493
1494 done:
1495 if (error && packet != NULL) {
1496 mbuf_freem(packet);
1497 }
1498
1499 return error;
1500 }
1501
1502 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1503 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1504 {
1505 int error = 0;
1506 mbuf_ref_t packet = NULL;
1507 uint32_t zero = 0;
1508
1509 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1510 if (error) {
1511 FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1512 goto done;
1513 }
1514
1515 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1516 if (error) {
1517 FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1518 goto done;
1519 }
1520
1521 how = htonl(how);
1522 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1523 if (error) {
1524 FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1525 goto done;
1526 }
1527
1528 error = flow_divert_send_packet(fd_cb, packet);
1529 if (error) {
1530 goto done;
1531 }
1532
1533 done:
1534 if (error && packet != NULL) {
1535 mbuf_freem(packet);
1536 }
1537
1538 return error;
1539 }
1540
1541 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1542 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1543 {
1544 if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1545 (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1546 return SHUT_RDWR;
1547 } else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1548 return SHUT_RD;
1549 } else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1550 return SHUT_WR;
1551 }
1552
1553 return -1;
1554 }
1555
1556 /*
1557 * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1558 * writes. Returns FALSE otherwise.
1559 */
1560 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1561 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1562 {
1563 int how = -1;
1564
1565 /* Do not send any close messages if there is still data in the send buffer */
1566 if (fd_cb->so->so_snd.sb_cc == 0) {
1567 if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1568 /* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1569 how = SHUT_RD;
1570 }
1571 if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1572 /* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1573 if (how == SHUT_RD) {
1574 how = SHUT_RDWR;
1575 } else {
1576 how = SHUT_WR;
1577 }
1578 }
1579 }
1580
1581 if (how != -1) {
1582 FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1583 if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1584 /* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1585 if (how != SHUT_RD) {
1586 fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1587 }
1588 if (how != SHUT_WR) {
1589 fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1590 }
1591 }
1592 }
1593 }
1594
1595 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len)1596 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len)
1597 {
1598 mbuf_ref_t packet = NULL;
1599 mbuf_ref_t last = NULL;
1600 int error = 0;
1601
1602 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1603 if (error || packet == NULL) {
1604 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1605 goto done;
1606 }
1607
1608 if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1609 last = m_last(packet);
1610 mbuf_setnext(last, data);
1611 mbuf_pkthdr_adjustlen(packet, (int)data_len);
1612 } else {
1613 data_len = 0;
1614 }
1615 error = flow_divert_send_packet(fd_cb, packet);
1616 if (error == 0 && data_len > 0) {
1617 fd_cb->bytes_sent += data_len;
1618 flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1619 }
1620
1621 done:
1622 if (error) {
1623 if (last != NULL) {
1624 mbuf_setnext(last, NULL);
1625 }
1626 if (packet != NULL) {
1627 mbuf_freem(packet);
1628 }
1629 }
1630
1631 return error;
1632 }
1633
1634 static errno_t
flow_divert_send_datagram_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len,struct sockaddr * toaddr,Boolean is_fragment,size_t datagram_size)1635 flow_divert_send_datagram_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len, struct sockaddr *toaddr, Boolean is_fragment, size_t datagram_size)
1636 {
1637 mbuf_ref_t packet = NULL;
1638 mbuf_ref_t last = NULL;
1639 int error = 0;
1640
1641 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1642 if (error || packet == NULL) {
1643 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1644 goto done;
1645 }
1646
1647 if (toaddr != NULL) {
1648 error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1649 if (error) {
1650 FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1651 goto done;
1652 }
1653 }
1654 if (is_fragment) {
1655 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_IS_FRAGMENT, sizeof(is_fragment), &is_fragment);
1656 if (error) {
1657 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_IS_FRAGMENT) failed: %d", error);
1658 goto done;
1659 }
1660 }
1661
1662 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_DATAGRAM_SIZE, sizeof(datagram_size), &datagram_size);
1663 if (error) {
1664 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_DATAGRAM_SIZE) failed: %d", error);
1665 goto done;
1666 }
1667
1668 if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1669 last = m_last(packet);
1670 mbuf_setnext(last, data);
1671 mbuf_pkthdr_adjustlen(packet, (int)data_len);
1672 } else {
1673 data_len = 0;
1674 }
1675 error = flow_divert_send_packet(fd_cb, packet);
1676 if (error == 0 && data_len > 0) {
1677 fd_cb->bytes_sent += data_len;
1678 flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1679 }
1680
1681 done:
1682 if (error) {
1683 if (last != NULL) {
1684 mbuf_setnext(last, NULL);
1685 }
1686 if (packet != NULL) {
1687 mbuf_freem(packet);
1688 }
1689 }
1690
1691 return error;
1692 }
1693
1694 static errno_t
flow_divert_send_fragmented_datagram(struct flow_divert_pcb * fd_cb,mbuf_ref_t datagram,size_t datagram_len,struct sockaddr * toaddr)1695 flow_divert_send_fragmented_datagram(struct flow_divert_pcb *fd_cb, mbuf_ref_t datagram, size_t datagram_len, struct sockaddr *toaddr)
1696 {
1697 mbuf_ref_t next_data = datagram;
1698 size_t remaining_len = datagram_len;
1699 mbuf_ref_t remaining_data = NULL;
1700 int error = 0;
1701 bool first = true;
1702
1703 while (remaining_len > 0 && next_data != NULL) {
1704 size_t to_send = remaining_len;
1705 remaining_data = NULL;
1706
1707 if (to_send > FLOW_DIVERT_CHUNK_SIZE) {
1708 to_send = FLOW_DIVERT_CHUNK_SIZE;
1709 error = mbuf_split(next_data, to_send, MBUF_DONTWAIT, &remaining_data);
1710 if (error) {
1711 break;
1712 }
1713 }
1714
1715 error = flow_divert_send_datagram_packet(fd_cb, next_data, to_send, (first ? toaddr : NULL), TRUE, (first ? datagram_len : 0));
1716 if (error) {
1717 break;
1718 }
1719
1720 first = false;
1721 remaining_len -= to_send;
1722 next_data = remaining_data;
1723 }
1724
1725 if (error) {
1726 if (next_data != NULL) {
1727 mbuf_freem(next_data);
1728 }
1729 if (remaining_data != NULL) {
1730 mbuf_freem(remaining_data);
1731 }
1732 }
1733 return error;
1734 }
1735
1736 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1737 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1738 {
1739 size_t to_send;
1740 size_t sent = 0;
1741 int error = 0;
1742 mbuf_ref_t buffer;
1743
1744 to_send = fd_cb->so->so_snd.sb_cc;
1745 buffer = fd_cb->so->so_snd.sb_mb;
1746
1747 if (buffer == NULL && to_send > 0) {
1748 FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1749 return;
1750 }
1751
1752 /* Ignore the send window if force is enabled */
1753 if (!force && (to_send > fd_cb->send_window)) {
1754 to_send = fd_cb->send_window;
1755 }
1756
1757 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1758 while (sent < to_send) {
1759 mbuf_ref_t data;
1760 size_t data_len;
1761
1762 data_len = to_send - sent;
1763 if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1764 data_len = FLOW_DIVERT_CHUNK_SIZE;
1765 }
1766
1767 error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1768 if (error) {
1769 FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1770 break;
1771 }
1772
1773 error = flow_divert_send_data_packet(fd_cb, data, data_len);
1774 if (error) {
1775 if (data != NULL) {
1776 mbuf_freem(data);
1777 }
1778 break;
1779 }
1780
1781 sent += data_len;
1782 }
1783 sbdrop(&fd_cb->so->so_snd, (int)sent);
1784 sowwakeup(fd_cb->so);
1785 } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1786 mbuf_ref_t data;
1787 mbuf_ref_t m;
1788 size_t data_len;
1789
1790 while (buffer) {
1791 struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1792
1793 m = buffer;
1794 if (toaddr != NULL) {
1795 /* look for data in the chain */
1796 do {
1797 m = m->m_next;
1798 if (m != NULL && m->m_type == MT_DATA) {
1799 break;
1800 }
1801 } while (m);
1802 if (m == NULL) {
1803 /* unexpected */
1804 FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1805 goto move_on;
1806 }
1807 }
1808 data_len = mbuf_pkthdr_len(m);
1809 if (data_len > 0) {
1810 FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1811 error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1812 if (error) {
1813 FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1814 break;
1815 }
1816 } else {
1817 data = NULL;
1818 }
1819 if (data_len <= FLOW_DIVERT_CHUNK_SIZE) {
1820 error = flow_divert_send_datagram_packet(fd_cb, data, data_len, toaddr, FALSE, 0);
1821 } else {
1822 error = flow_divert_send_fragmented_datagram(fd_cb, data, data_len, toaddr);
1823 data = NULL;
1824 }
1825 if (error) {
1826 if (data != NULL) {
1827 mbuf_freem(data);
1828 }
1829 break;
1830 }
1831 sent += data_len;
1832 move_on:
1833 buffer = buffer->m_nextpkt;
1834 (void) sbdroprecord(&(fd_cb->so->so_snd));
1835 }
1836 }
1837
1838 if (sent > 0) {
1839 FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1840 if (fd_cb->send_window >= sent) {
1841 fd_cb->send_window -= sent;
1842 } else {
1843 fd_cb->send_window = 0;
1844 }
1845 }
1846 }
1847
1848 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_size,struct sockaddr * toaddr)1849 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_size, struct sockaddr *toaddr)
1850 {
1851 size_t to_send = data_size;
1852 int error = 0;
1853
1854 if (to_send > fd_cb->send_window) {
1855 to_send = fd_cb->send_window;
1856 }
1857
1858 if (fd_cb->so->so_snd.sb_cc > 0) {
1859 to_send = 0; /* If the send buffer is non-empty, then we can't send anything */
1860 }
1861
1862 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1863 size_t sent = 0;
1864 mbuf_ref_t remaining_data = data;
1865 size_t remaining_size = data_size;
1866 mbuf_ref_t pkt_data = NULL;
1867 while (sent < to_send && remaining_data != NULL && remaining_size > 0) {
1868 size_t pkt_data_len;
1869
1870 pkt_data = remaining_data;
1871
1872 if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1873 pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1874 } else {
1875 pkt_data_len = to_send - sent;
1876 }
1877
1878 if (pkt_data_len < remaining_size) {
1879 error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1880 if (error) {
1881 FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1882 pkt_data = NULL;
1883 break;
1884 }
1885 remaining_size -= pkt_data_len;
1886 } else {
1887 remaining_data = NULL;
1888 remaining_size = 0;
1889 }
1890
1891 error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len);
1892 if (error) {
1893 break;
1894 }
1895
1896 pkt_data = NULL;
1897 sent += pkt_data_len;
1898 }
1899
1900 if (fd_cb->send_window >= sent) {
1901 fd_cb->send_window -= sent;
1902 } else {
1903 fd_cb->send_window = 0;
1904 }
1905
1906 error = 0;
1907
1908 if (pkt_data != NULL) {
1909 if (sbspace(&fd_cb->so->so_snd) > 0) {
1910 if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1911 FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1912 fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1913 }
1914 } else {
1915 mbuf_freem(pkt_data);
1916 error = ENOBUFS;
1917 }
1918 }
1919
1920 if (remaining_data != NULL) {
1921 if (sbspace(&fd_cb->so->so_snd) > 0) {
1922 if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1923 FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1924 fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1925 }
1926 } else {
1927 mbuf_freem(remaining_data);
1928 error = ENOBUFS;
1929 }
1930 }
1931 } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1932 int send_dgram_error = 0;
1933 if (to_send || data_size == 0) {
1934 if (data_size <= FLOW_DIVERT_CHUNK_SIZE) {
1935 send_dgram_error = flow_divert_send_datagram_packet(fd_cb, data, data_size, toaddr, FALSE, 0);
1936 } else {
1937 send_dgram_error = flow_divert_send_fragmented_datagram(fd_cb, data, data_size, toaddr);
1938 data = NULL;
1939 }
1940 if (send_dgram_error) {
1941 FDLOG(LOG_NOTICE, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu", send_dgram_error, data_size);
1942 } else {
1943 if (data_size >= fd_cb->send_window) {
1944 fd_cb->send_window = 0;
1945 } else {
1946 fd_cb->send_window -= data_size;
1947 }
1948 data = NULL;
1949 }
1950 }
1951
1952 if (data != NULL) {
1953 /* buffer it */
1954 if (sbspace(&fd_cb->so->so_snd) > 0) {
1955 if (toaddr != NULL) {
1956 int append_error = 0;
1957 if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &append_error)) {
1958 FDLOG(LOG_ERR, fd_cb,
1959 "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d",
1960 fd_cb->so->so_snd.sb_cc, fd_cb->send_window, append_error);
1961 }
1962 } else {
1963 if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1964 FDLOG(LOG_ERR, fd_cb,
1965 "sbappendrecord failed. send buffer size = %u, send_window = %u",
1966 fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1967 }
1968 }
1969 } else {
1970 FDLOG(LOG_ERR, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu, dropping the datagram", error, data_size);
1971 mbuf_freem(data);
1972 }
1973 }
1974 }
1975
1976 return error;
1977 }
1978
1979 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1980 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1981 {
1982 int error = 0;
1983 mbuf_ref_t packet = NULL;
1984
1985 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1986 if (error) {
1987 FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1988 goto done;
1989 }
1990
1991 error = flow_divert_send_packet(fd_cb, packet);
1992 if (error) {
1993 goto done;
1994 }
1995
1996 done:
1997 if (error && packet != NULL) {
1998 mbuf_freem(packet);
1999 }
2000
2001 return error;
2002 }
2003
2004 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)2005 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
2006 {
2007 int error = 0;
2008 mbuf_ref_t packet = NULL;
2009
2010 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
2011 if (error) {
2012 FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
2013 goto done;
2014 }
2015
2016 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
2017 if (error) {
2018 FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
2019 goto done;
2020 }
2021
2022 error = flow_divert_send_packet(fd_cb, packet);
2023 if (error) {
2024 goto done;
2025 }
2026
2027 done:
2028 if (error && packet != NULL) {
2029 mbuf_freem(packet);
2030 }
2031
2032 return error;
2033 }
2034
2035 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)2036 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
2037 {
2038 struct inpcb *inp = sotoinpcb(fd_cb->so);
2039
2040 if (local_endpoint->sa_family == AF_INET6) {
2041 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2042 fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2043 inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
2044 inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
2045 in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
2046 }
2047 if (inp->inp_lport == 0) {
2048 inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
2049 }
2050 } else if (local_endpoint->sa_family == AF_INET) {
2051 if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2052 fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2053 inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
2054 }
2055 if (inp->inp_lport == 0) {
2056 inp->inp_lport = (satosin(local_endpoint))->sin_port;
2057 }
2058 }
2059 }
2060
2061 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)2062 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
2063 {
2064 struct inpcb *inp = sotoinpcb(fd_cb->so);
2065
2066 if (remote_endpoint->sa_family == AF_INET6) {
2067 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
2068 inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
2069 inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
2070 in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
2071 }
2072 if (inp->inp_fport == 0) {
2073 inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
2074 }
2075 } else if (remote_endpoint->sa_family == AF_INET) {
2076 if (inp->inp_faddr.s_addr == INADDR_ANY) {
2077 inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
2078 }
2079 if (inp->inp_fport == 0) {
2080 inp->inp_fport = (satosin(remote_endpoint))->sin_port;
2081 }
2082 }
2083 }
2084
2085 static uint32_t
flow_divert_derive_kernel_control_unit(pid_t pid,uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)2086 flow_divert_derive_kernel_control_unit(pid_t pid, uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
2087 {
2088 uint32_t result = *ctl_unit;
2089
2090 // There are two models supported for deriving control units:
2091 // 1. A series of flow divert units that allow "transparently" failing
2092 // over to the next unit. For this model, the aggregate_unit contains list
2093 // of all control units (between 1 and 30) masked over each other.
2094 // 2. An indication that in-process flow divert should be preferred, with
2095 // an out of process flow divert to fail over to. For this model, the
2096 // ctl_unit is FLOW_DIVERT_IN_PROCESS_UNIT. In this case, that unit
2097 // is returned first, with the unpacked aggregate unit returned as a
2098 // fallback.
2099 *is_aggregate = false;
2100 if (*ctl_unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
2101 bool found_unit = false;
2102 if (pid != 0) {
2103 // Look for an in-process group that is already open, and use that unit
2104 struct flow_divert_group *group = NULL;
2105 TAILQ_FOREACH(group, &g_flow_divert_in_process_group_list, chain) {
2106 if (group->in_process_pid == pid) {
2107 // Found an in-process group for our same PID, use it
2108 found_unit = true;
2109 result = group->ctl_unit;
2110 break;
2111 }
2112 }
2113
2114 // If an in-process group isn't open yet, send a signal up through NECP to request one
2115 if (!found_unit) {
2116 necp_client_request_in_process_flow_divert(pid);
2117 }
2118 }
2119
2120 // If a unit was found, return it
2121 if (found_unit) {
2122 if (aggregate_unit != NULL && *aggregate_unit != 0) {
2123 *is_aggregate = true;
2124 }
2125 // The next time around, the aggregate unit values will be picked up
2126 *ctl_unit = 0;
2127 return result;
2128 }
2129
2130 // If no unit was found, fall through and clear out the ctl_unit
2131 result = 0;
2132 *ctl_unit = 0;
2133 }
2134
2135 if (aggregate_unit != NULL && *aggregate_unit != 0) {
2136 uint32_t counter;
2137 struct flow_divert_group *lower_order_group = NULL;
2138
2139 for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
2140 if ((*aggregate_unit) & (1 << counter)) {
2141 struct flow_divert_group *group = NULL;
2142 group = flow_divert_group_lookup(counter + 1, NULL);
2143
2144 if (group != NULL) {
2145 if (lower_order_group == NULL) {
2146 lower_order_group = group;
2147 } else if ((group->order < lower_order_group->order)) {
2148 lower_order_group = group;
2149 }
2150 }
2151 }
2152 }
2153
2154 if (lower_order_group != NULL) {
2155 *aggregate_unit &= ~(1 << (lower_order_group->ctl_unit - 1));
2156 *is_aggregate = true;
2157 return lower_order_group->ctl_unit;
2158 } else {
2159 *ctl_unit = 0;
2160 return result;
2161 }
2162 } else {
2163 *ctl_unit = 0;
2164 return result;
2165 }
2166 }
2167
2168 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)2169 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
2170 {
2171 int error = 0;
2172 uint32_t policy_control_unit = fd_cb->policy_control_unit;
2173
2174 flow_divert_pcb_remove(fd_cb);
2175
2176 do {
2177 struct flow_divert_group *next_group = NULL;
2178 bool is_aggregate = false;
2179 uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(0, &policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
2180
2181 if (fd_cb->control_group_unit == next_ctl_unit) {
2182 FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
2183 error = EALREADY;
2184 break;
2185 }
2186
2187 if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
2188 FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
2189 error = ENOENT;
2190 break;
2191 }
2192
2193 next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
2194 if (next_group == NULL) {
2195 FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
2196 continue;
2197 }
2198
2199 FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
2200
2201 error = flow_divert_pcb_insert(fd_cb, next_group);
2202 if (error == 0) {
2203 if (is_aggregate) {
2204 fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2205 } else {
2206 fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2207 }
2208 }
2209 FDGRP_RELEASE(next_group);
2210 } while (fd_cb->group == NULL);
2211
2212 if (fd_cb->group == NULL) {
2213 return error ? error : ENOENT;
2214 }
2215
2216 error = flow_divert_send_connect_packet(fd_cb);
2217 if (error) {
2218 FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2219 flow_divert_pcb_remove(fd_cb);
2220 error = ENOENT;
2221 }
2222
2223 return error;
2224 }
2225
2226 static int
flow_divert_disable(struct flow_divert_pcb * fd_cb)2227 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2228 {
2229 struct socket *so = NULL;
2230 mbuf_ref_t buffer;
2231 int error = 0;
2232 proc_t last_proc = NULL;
2233 struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2234 bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2235 struct inpcb *inp = NULL;
2236
2237 so = fd_cb->so;
2238 if (so == NULL) {
2239 goto done;
2240 }
2241
2242 FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2243
2244 /* Restore the IP state */
2245 inp = sotoinpcb(so);
2246 inp->inp_vflag = fd_cb->original_vflag;
2247 inp->inp_faddr.s_addr = INADDR_ANY;
2248 inp->inp_fport = 0;
2249 memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2250 inp->inp_fifscope = IFSCOPE_NONE;
2251 inp->in6p_fport = 0;
2252 /* If flow divert set the local address, clear it out */
2253 if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2254 inp->inp_laddr.s_addr = INADDR_ANY;
2255 memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2256 inp->inp_lifscope = IFSCOPE_NONE;
2257 }
2258 inp->inp_last_outifp = fd_cb->original_last_outifp;
2259 inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2260
2261 /* Dis-associate the socket */
2262 so->so_flags &= ~SOF_FLOW_DIVERT;
2263 so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2264 so->so_fd_pcb = NULL;
2265 fd_cb->so = NULL;
2266
2267 FDRELEASE(fd_cb); /* Release the socket's reference */
2268
2269 /* Revert back to the original protocol */
2270 so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2271
2272 /* Reset the socket state to avoid confusing NECP */
2273 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2274
2275 last_proc = proc_find(so->last_pid);
2276
2277 if (do_connect) {
2278 /* Connect using the original protocol */
2279 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2280 if (error) {
2281 FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2282 goto done;
2283 }
2284 }
2285
2286 buffer = so->so_snd.sb_mb;
2287 if (buffer == NULL) {
2288 /* No buffered data, done */
2289 goto done;
2290 }
2291
2292 /* Send any buffered data using the original protocol */
2293 if (SOCK_TYPE(so) == SOCK_STREAM) {
2294 mbuf_ref_t data_to_send = NULL;
2295 size_t data_len = so->so_snd.sb_cc;
2296
2297 error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2298 if (error) {
2299 FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2300 goto done;
2301 }
2302
2303 sbflush(&so->so_snd);
2304
2305 if (data_to_send->m_flags & M_PKTHDR) {
2306 mbuf_pkthdr_setlen(data_to_send, data_len);
2307 }
2308
2309 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2310 0,
2311 data_to_send,
2312 NULL,
2313 NULL,
2314 (last_proc != NULL ? last_proc : current_proc()));
2315
2316 if (error && error != EWOULDBLOCK) {
2317 FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2318 } else {
2319 error = 0;
2320 }
2321 } else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2322 struct sockbuf *sb = &so->so_snd;
2323 MBUFQ_HEAD(send_queue_head) send_queue;
2324 MBUFQ_INIT(&send_queue);
2325
2326 /* Flush the send buffer, moving all records to a temporary queue */
2327 while (sb->sb_mb != NULL) {
2328 mbuf_ref_t record = sb->sb_mb;
2329 mbuf_ref_t m = record;
2330 sb->sb_mb = sb->sb_mb->m_nextpkt;
2331 while (m != NULL) {
2332 sbfree(sb, m);
2333 m = m->m_next;
2334 }
2335 record->m_nextpkt = NULL;
2336 MBUFQ_ENQUEUE(&send_queue, record);
2337 }
2338 SB_EMPTY_FIXUP(sb);
2339
2340 while (!MBUFQ_EMPTY(&send_queue)) {
2341 mbuf_ref_t next_record = MBUFQ_FIRST(&send_queue);
2342 mbuf_ref_t addr = NULL;
2343 mbuf_ref_t control = NULL;
2344 mbuf_ref_t last_control = NULL;
2345 mbuf_ref_t data = NULL;
2346 mbuf_ref_t m = next_record;
2347 struct sockaddr *to_endpoint = NULL;
2348
2349 MBUFQ_DEQUEUE(&send_queue, next_record);
2350
2351 while (m != NULL) {
2352 if (m->m_type == MT_SONAME) {
2353 addr = m;
2354 } else if (m->m_type == MT_CONTROL) {
2355 if (control == NULL) {
2356 control = m;
2357 }
2358 last_control = m;
2359 } else if (m->m_type == MT_DATA) {
2360 data = m;
2361 break;
2362 }
2363 m = m->m_next;
2364 }
2365
2366 if (addr != NULL && !do_connect) {
2367 to_endpoint = flow_divert_get_buffered_target_address(addr);
2368 if (to_endpoint == NULL) {
2369 FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2370 }
2371 }
2372
2373 if (data == NULL) {
2374 FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2375 mbuf_freem(next_record);
2376 continue;
2377 }
2378
2379 if (!(data->m_flags & M_PKTHDR)) {
2380 FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2381 mbuf_freem(next_record);
2382 continue;
2383 }
2384
2385 if (addr != NULL) {
2386 addr->m_next = NULL;
2387 }
2388
2389 if (last_control != NULL) {
2390 last_control->m_next = NULL;
2391 }
2392
2393 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2394 0,
2395 data,
2396 to_endpoint,
2397 control,
2398 (last_proc != NULL ? last_proc : current_proc()));
2399
2400 if (addr != NULL) {
2401 mbuf_freem(addr);
2402 }
2403
2404 if (error) {
2405 FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2406 }
2407 }
2408 }
2409 done:
2410 if (last_proc != NULL) {
2411 proc_rele(last_proc);
2412 }
2413
2414 return error;
2415 }
2416
2417 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2418 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2419 {
2420 struct socket *so = NULL;
2421 struct inpcb *inp = NULL;
2422 struct ifnet *current_ifp = NULL;
2423 struct ifnet * __single new_ifp = NULL;
2424 int error = 0;
2425
2426 so = fd_cb->so;
2427 if (so == NULL) {
2428 return;
2429 }
2430
2431 inp = sotoinpcb(so);
2432
2433 if (out_if_index <= 0) {
2434 return;
2435 }
2436
2437 if (inp->inp_vflag & INP_IPV6) {
2438 current_ifp = inp->in6p_last_outifp;
2439 } else {
2440 current_ifp = inp->inp_last_outifp;
2441 }
2442
2443 if (current_ifp != NULL) {
2444 if (current_ifp->if_index == out_if_index) {
2445 /* No change */
2446 return;
2447 }
2448
2449 /* Scope the socket to the given interface */
2450 error = inp_bindif(inp, out_if_index, &new_ifp);
2451 if (error != 0) {
2452 FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2453 return;
2454 }
2455
2456 if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2457 /* Get the appropriate address for the given interface */
2458 if (inp->inp_vflag & INP_IPV6) {
2459 inp->in6p_laddr = sa6_any.sin6_addr;
2460 error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2461 } else {
2462 inp->inp_laddr.s_addr = INADDR_ANY;
2463 error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2464 }
2465
2466 if (error != 0) {
2467 FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2468 }
2469 }
2470 } else {
2471 ifnet_head_lock_shared();
2472 if (IF_INDEX_IN_RANGE(out_if_index)) {
2473 new_ifp = ifindex2ifnet[out_if_index];
2474 }
2475 ifnet_head_done();
2476 }
2477
2478 /* Update the "last interface" of the socket */
2479 if (new_ifp != NULL) {
2480 if (inp->inp_vflag & INP_IPV6) {
2481 inp->in6p_last_outifp = new_ifp;
2482 } else {
2483 inp->inp_last_outifp = new_ifp;
2484 }
2485
2486 #if SKYWALK
2487 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2488 netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2489 }
2490 #endif /* SKYWALK */
2491 }
2492 }
2493
2494 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2495 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2496 {
2497 uint32_t connect_error = 0;
2498 uint32_t ctl_unit = 0;
2499 int error = 0;
2500 union sockaddr_in_4_6 local_endpoint = {};
2501 union sockaddr_in_4_6 remote_endpoint = {};
2502 int out_if_index = 0;
2503 uint32_t send_window = 0;
2504 uint32_t app_data_length = 0;
2505 struct inpcb *inp = NULL;
2506 struct socket *so = fd_cb->so;
2507 bool local_address_is_valid = false;
2508
2509 memset(&local_endpoint, 0, sizeof(local_endpoint));
2510 memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2511
2512 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2513 if (error) {
2514 FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2515 return;
2516 }
2517
2518 connect_error = ntohl(connect_error);
2519 FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2520
2521 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2522 if (error) {
2523 FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2524 return;
2525 }
2526
2527 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2528 if (error) {
2529 FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2530 }
2531
2532 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sin6), NULL);
2533 if (error) {
2534 FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2535 }
2536
2537 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sin6), NULL);
2538 if (error) {
2539 FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2540 }
2541
2542 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2543 if (error) {
2544 FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2545 }
2546
2547 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2548 if (error) {
2549 FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2550 }
2551
2552 error = 0;
2553
2554 if (!SO_IS_DIVERTED(so)) {
2555 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2556 return;
2557 }
2558
2559 if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2560 FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2561 return;
2562 }
2563
2564 inp = sotoinpcb(so);
2565
2566 if (connect_error || error) {
2567 goto set_socket_state;
2568 }
2569
2570 if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2571 if (local_endpoint.sa.sa_family == AF_INET) {
2572 local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2573 if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2574 local_address_is_valid = true;
2575 fd_cb->local_endpoint = local_endpoint;
2576 inp->inp_laddr.s_addr = INADDR_ANY;
2577 } else {
2578 fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2579 }
2580 } else if (local_endpoint.sa.sa_family == AF_INET6) {
2581 local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2582 if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2583 local_address_is_valid = true;
2584 fd_cb->local_endpoint = local_endpoint;
2585 inp->in6p_laddr = sa6_any.sin6_addr;
2586 } else {
2587 fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2588 }
2589 }
2590 }
2591
2592 flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2593 flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2594
2595 if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2596 if (remote_endpoint.sa.sa_family == AF_INET) {
2597 remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2598 } else if (remote_endpoint.sa.sa_family == AF_INET6) {
2599 remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2600 }
2601 flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2602 }
2603
2604 if (app_data_length > 0) {
2605 uint8_t * app_data = NULL;
2606 app_data = kalloc_data(app_data_length, Z_WAITOK);
2607 if (app_data != NULL) {
2608 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2609 if (error == 0) {
2610 FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2611 if (fd_cb->app_data != NULL) {
2612 kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
2613 }
2614 fd_cb->app_data = app_data;
2615 fd_cb->app_data_length = app_data_length;
2616 } else {
2617 FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2618 kfree_data(app_data, app_data_length);
2619 }
2620 } else {
2621 FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2622 }
2623 }
2624
2625 if (error) {
2626 goto set_socket_state;
2627 }
2628
2629 if (fd_cb->group == NULL) {
2630 error = EINVAL;
2631 goto set_socket_state;
2632 }
2633
2634 ctl_unit = ntohl(ctl_unit);
2635 if (ctl_unit > 0) {
2636 int insert_error = 0;
2637 struct flow_divert_group *grp = NULL;
2638
2639 if (ctl_unit >= GROUP_COUNT_MAX) {
2640 FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2641 error = EINVAL;
2642 goto set_socket_state;
2643 }
2644
2645 grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2646 if (grp == NULL) {
2647 error = ECONNRESET;
2648 goto set_socket_state;
2649 }
2650
2651 flow_divert_pcb_remove(fd_cb);
2652 insert_error = flow_divert_pcb_insert(fd_cb, grp);
2653 FDGRP_RELEASE(grp);
2654
2655 if (insert_error != 0) {
2656 error = ECONNRESET;
2657 goto set_socket_state;
2658 }
2659 }
2660
2661 fd_cb->send_window = ntohl(send_window);
2662
2663 set_socket_state:
2664 if (!connect_error && !error) {
2665 FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2666 error = flow_divert_send_connect_result(fd_cb);
2667 }
2668
2669 if (connect_error || error) {
2670 if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2671 /* The plugin rejected the flow and the control unit is an aggregation of multiple plugins, try to move to the next one */
2672 error = flow_divert_try_next_group(fd_cb);
2673 if (error && fd_cb->policy_control_unit == 0) {
2674 /* No more plugins available, disable flow divert */
2675 error = flow_divert_disable(fd_cb);
2676 }
2677
2678 if (error == 0) {
2679 return;
2680 }
2681 so->so_error = (uint16_t)error;
2682 } else if (!connect_error) {
2683 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
2684 so->so_error = (uint16_t)error;
2685 /* The plugin did not close the flow, so notify the plugin */
2686 flow_divert_send_close_if_needed(fd_cb);
2687 } else {
2688 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
2689 so->so_error = (uint16_t)connect_error;
2690 }
2691 flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
2692 } else {
2693 #if NECP
2694 /* Update NECP client with connected five-tuple */
2695 if (!uuid_is_null(inp->necp_client_uuid)) {
2696 socket_unlock(so, 0);
2697 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2698 socket_lock(so, 0);
2699 if (!SO_IS_DIVERTED(so)) {
2700 /* The socket was closed while it was unlocked */
2701 return;
2702 }
2703 }
2704 #endif /* NECP */
2705
2706 flow_divert_send_buffered_data(fd_cb, FALSE);
2707 soisconnected(so);
2708 }
2709
2710 /* We don't need the connect packet any more */
2711 if (fd_cb->connect_packet != NULL) {
2712 mbuf_freem(fd_cb->connect_packet);
2713 fd_cb->connect_packet = NULL;
2714 }
2715
2716 /* We don't need the original remote endpoint any more */
2717 free_sockaddr(fd_cb->original_remote_endpoint);
2718 }
2719
2720 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2721 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2722 {
2723 uint32_t close_error = 0;
2724 int error = 0;
2725 int how = 0;
2726 struct socket *so = fd_cb->so;
2727 bool is_connected = (SOCK_TYPE(so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2728
2729 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2730 if (error) {
2731 FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2732 return;
2733 }
2734
2735 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2736 if (error) {
2737 FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2738 return;
2739 }
2740
2741 how = ntohl(how);
2742
2743 FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2744
2745 if (!SO_IS_DIVERTED(so)) {
2746 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2747 return;
2748 }
2749
2750 so->so_error = (uint16_t)ntohl(close_error);
2751
2752 flow_divert_update_closed_state(fd_cb, how, true, true);
2753
2754 /* Only do this for connected flows because "shutdown by peer" doesn't make sense for unconnected datagram flows */
2755 how = flow_divert_tunnel_how_closed(fd_cb);
2756 if (how == SHUT_RDWR) {
2757 flow_divert_disconnect_socket(so, is_connected, true);
2758 } else if (how == SHUT_RD && is_connected) {
2759 socantrcvmore(so);
2760 } else if (how == SHUT_WR && is_connected) {
2761 socantsendmore(so);
2762 }
2763 }
2764
2765 static mbuf_ref_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2766 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2767 {
2768 struct inpcb *inp = sotoinpcb(fd_cb->so);
2769 bool need_recvdstaddr = false;
2770 /* Socket flow tracking needs to see the local address */
2771 need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2772 if ((inp->inp_vflag & INP_IPV4) &&
2773 fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2774 ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2775 return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2776 } else if ((inp->inp_vflag & INP_IPV6) &&
2777 fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2778 ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2779 struct in6_pktinfo pi6;
2780 memset(&pi6, 0, sizeof(pi6));
2781 pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2782
2783 return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2784 }
2785 return NULL;
2786 }
2787
2788 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,size_t offset)2789 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, size_t offset)
2790 {
2791 int error = 0;
2792 struct socket *so = fd_cb->so;
2793 mbuf_ref_t data = NULL;
2794 size_t data_size;
2795 struct sockaddr_storage remote_address;
2796 boolean_t got_remote_sa = FALSE;
2797 boolean_t appended = FALSE;
2798 boolean_t append_success = FALSE;
2799
2800 if (!SO_IS_DIVERTED(so)) {
2801 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2802 return error;
2803 }
2804
2805 if (sbspace(&so->so_rcv) == 0) {
2806 error = ENOBUFS;
2807 fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2808 FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2809 return error;
2810 }
2811
2812 if (SOCK_TYPE(so) == SOCK_DGRAM) {
2813 uint32_t val_size = 0;
2814
2815 /* check if we got remote address with data */
2816 memset(&remote_address, 0, sizeof(remote_address));
2817 error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2818 if (error || val_size > sizeof(remote_address)) {
2819 FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2820 error = 0;
2821 } else {
2822 if (remote_address.ss_len > sizeof(remote_address)) {
2823 remote_address.ss_len = sizeof(remote_address);
2824 }
2825 /* validate the address */
2826 if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2827 got_remote_sa = TRUE;
2828 } else {
2829 FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2830 }
2831 offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2832 }
2833 }
2834
2835 data_size = (mbuf_pkthdr_len(packet) - offset);
2836
2837 if (so->so_state & SS_CANTRCVMORE) {
2838 FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2839 return error;
2840 }
2841
2842 if (SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) {
2843 FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(so));
2844 return error;
2845 }
2846
2847 FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2848
2849 error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2850 if (error || data == NULL) {
2851 FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2852 return error;
2853 }
2854
2855 if (SOCK_TYPE(so) == SOCK_STREAM) {
2856 appended = (sbappendstream(&so->so_rcv, data) != 0);
2857 append_success = TRUE;
2858 } else {
2859 struct sockaddr * __single append_sa = NULL;
2860 mbuf_ref_t mctl;
2861
2862 if (got_remote_sa == TRUE) {
2863 error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2864 } else {
2865 if (SOCK_CHECK_DOM(so, AF_INET6)) {
2866 error = in6_mapped_peeraddr(so, &append_sa);
2867 } else {
2868 error = in_getpeeraddr(so, &append_sa);
2869 }
2870 }
2871 if (error) {
2872 FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2873 }
2874
2875 mctl = flow_divert_create_control_mbuf(fd_cb);
2876 int append_error = 0;
2877 appended = sbappendaddr(&so->so_rcv, append_sa, data, mctl, &append_error);
2878 if (appended || append_error == 0) {
2879 append_success = TRUE;
2880 } else {
2881 FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2882 }
2883
2884 free_sockaddr(append_sa);
2885 }
2886
2887 if (append_success) {
2888 fd_cb->bytes_received += data_size;
2889 flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2890 }
2891
2892 if (appended) {
2893 sorwakeup(so);
2894 }
2895
2896 return error;
2897 }
2898
2899 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2900 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2901 {
2902 uint32_t read_count = 0;
2903 int error = 0;
2904
2905 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2906 if (error) {
2907 FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2908 return;
2909 }
2910
2911 FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2912
2913 if (!SO_IS_DIVERTED(fd_cb->so)) {
2914 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2915 return;
2916 }
2917
2918 fd_cb->send_window += ntohl(read_count);
2919 flow_divert_send_buffered_data(fd_cb, FALSE);
2920 }
2921
2922 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_ref_t packet,int offset)2923 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
2924 {
2925 int error = 0;
2926 uint32_t key_size = 0;
2927 int log_level = 0;
2928 uint32_t flags = 0;
2929 int32_t order = FLOW_DIVERT_ORDER_LAST;
2930
2931 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2932 if (error) {
2933 FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2934 return;
2935 }
2936
2937 if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2938 FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2939 return;
2940 }
2941
2942 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2943 if (!error) {
2944 nil_pcb.log_level = (uint8_t)log_level;
2945 }
2946
2947 lck_rw_lock_exclusive(&group->lck);
2948
2949 if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
2950 FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
2951 lck_rw_done(&group->lck);
2952 return;
2953 }
2954
2955 if (group->token_key != NULL) {
2956 kfree_data_sized_by(group->token_key, group->token_key_size);
2957 }
2958
2959 group->token_key = kalloc_data(key_size, Z_WAITOK);
2960 group->token_key_size = key_size;
2961 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2962 if (error) {
2963 FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2964 kfree_data_sized_by(group->token_key, group->token_key_size);
2965 lck_rw_done(&group->lck);
2966 return;
2967 }
2968
2969 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2970 if (!error) {
2971 group->flags = flags;
2972 }
2973
2974 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ORDER, sizeof(order), &order, NULL);
2975 if (!error) {
2976 FDLOG(LOG_INFO, &nil_pcb, "group %u order is %u", group->ctl_unit, order);
2977 group->order = order;
2978 }
2979
2980 lck_rw_done(&group->lck);
2981 }
2982
2983 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2984 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2985 {
2986 int error = 0;
2987 int out_if_index = 0;
2988 uint32_t app_data_length = 0;
2989 struct socket *so = fd_cb->so;
2990
2991 FDLOG0(LOG_INFO, fd_cb, "received a properties update");
2992
2993 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2994 if (error) {
2995 FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
2996 }
2997
2998 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2999 if (error) {
3000 FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
3001 }
3002
3003 if (!SO_IS_DIVERTED(so)) {
3004 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
3005 return;
3006 }
3007
3008 if (out_if_index > 0) {
3009 flow_divert_scope(fd_cb, out_if_index, true);
3010 flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3011 }
3012
3013 if (app_data_length > 0) {
3014 uint8_t * app_data __indexable = NULL;
3015 app_data = kalloc_data(app_data_length, Z_WAITOK);
3016 if (app_data != NULL) {
3017 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
3018 if (error == 0) {
3019 if (fd_cb->app_data != NULL) {
3020 kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
3021 }
3022 fd_cb->app_data = app_data;
3023 fd_cb->app_data_length = app_data_length;
3024 } else {
3025 FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
3026 kfree_data(app_data, app_data_length);
3027 }
3028 } else {
3029 FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
3030 }
3031 }
3032 }
3033
3034 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_ref_t packet,int offset)3035 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
3036 {
3037 size_t bytes_mem_size = 0;
3038 size_t child_maps_mem_size = 0;
3039 size_t nodes_mem_size = 0;
3040 size_t trie_memory_size = 0;
3041 int cursor = 0;
3042 int error = 0;
3043 struct flow_divert_trie new_trie;
3044 int insert_error = 0;
3045 int prefix_count = -1;
3046 int signing_id_count = 0;
3047 size_t bytes_count = 0;
3048 size_t nodes_count = 0;
3049 size_t maps_count = 0;
3050
3051 lck_rw_lock_exclusive(&group->lck);
3052
3053 /* Re-set the current trie */
3054 if (group->signing_id_trie.memory != NULL) {
3055 kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
3056 }
3057 memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
3058 group->signing_id_trie.root = NULL_TRIE_IDX;
3059
3060 memset(&new_trie, 0, sizeof(new_trie));
3061
3062 /* Get the number of shared prefixes in the new set of signing ID strings */
3063 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
3064
3065 if (prefix_count < 0 || error) {
3066 FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
3067 lck_rw_done(&group->lck);
3068 return;
3069 }
3070
3071 /* Compute the number of signing IDs and the total amount of bytes needed to store them */
3072 for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3073 cursor >= 0;
3074 cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3075 uint32_t sid_size = 0;
3076 error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3077 if (error || sid_size == 0) {
3078 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
3079 signing_id_count = 0;
3080 break;
3081 }
3082 if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
3083 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
3084 signing_id_count = 0;
3085 break;
3086 }
3087 signing_id_count++;
3088 }
3089
3090 if (signing_id_count == 0) {
3091 lck_rw_done(&group->lck);
3092 FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
3093 return;
3094 }
3095
3096 if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
3097 lck_rw_done(&group->lck);
3098 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
3099 return;
3100 }
3101
3102 if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
3103 lck_rw_done(&group->lck);
3104 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
3105 return;
3106 }
3107
3108 if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
3109 lck_rw_done(&group->lck);
3110 FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
3111 return;
3112 }
3113
3114 FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
3115 nodes_count, maps_count, bytes_count);
3116
3117 if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
3118 os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
3119 os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
3120 os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
3121 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
3122 lck_rw_done(&group->lck);
3123 return;
3124 }
3125
3126 if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
3127 FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
3128 lck_rw_done(&group->lck);
3129 return;
3130 }
3131
3132 new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
3133 new_trie.memory_size = trie_memory_size;
3134 if (new_trie.memory == NULL) {
3135 FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
3136 nodes_mem_size + child_maps_mem_size + bytes_mem_size);
3137 lck_rw_done(&group->lck);
3138 return;
3139 }
3140
3141 /* Initialize the free lists */
3142 new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
3143 new_trie.nodes_count = (uint16_t)nodes_count;
3144
3145 new_trie.nodes_free_next = 0;
3146 memset(new_trie.nodes, 0, nodes_mem_size);
3147
3148 new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
3149 new_trie.child_maps_count = (uint16_t)maps_count;
3150 new_trie.child_maps_size = child_maps_mem_size;
3151
3152 new_trie.child_maps_free_next = 0;
3153 memset(new_trie.child_maps, 0xff, child_maps_mem_size);
3154
3155 new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
3156 new_trie.bytes_count = (uint16_t)bytes_count;
3157
3158 new_trie.bytes_free_next = 0;
3159 memset(new_trie.bytes, 0, bytes_mem_size);
3160
3161 /* The root is an empty node */
3162 new_trie.root = trie_node_alloc(&new_trie);
3163
3164 /* Add each signing ID to the trie */
3165 for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3166 cursor >= 0;
3167 cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3168 uint32_t sid_size = 0;
3169 error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3170 if (error || sid_size == 0) {
3171 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
3172 insert_error = EINVAL;
3173 break;
3174 }
3175 if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
3176 uint16_t new_node_idx;
3177 error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
3178 if (error) {
3179 FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
3180 insert_error = EINVAL;
3181 break;
3182 }
3183 new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
3184 if (new_node_idx == NULL_TRIE_IDX) {
3185 insert_error = EINVAL;
3186 break;
3187 }
3188 } else {
3189 FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3190 insert_error = ENOBUFS;
3191 break;
3192 }
3193 }
3194
3195 if (!insert_error) {
3196 group->signing_id_trie = new_trie;
3197 } else {
3198 kfree_data_sized_by(new_trie.memory, new_trie.memory_size);
3199 }
3200
3201 lck_rw_done(&group->lck);
3202 }
3203
3204 static void
flow_divert_handle_flow_states_request(struct flow_divert_group * group)3205 flow_divert_handle_flow_states_request(struct flow_divert_group *group)
3206 {
3207 struct flow_divert_pcb *fd_cb;
3208 mbuf_ref_t packet = NULL;
3209 SLIST_HEAD(, flow_divert_pcb) tmp_list;
3210 int error = 0;
3211 uint32_t ctl_unit = 0;
3212
3213 SLIST_INIT(&tmp_list);
3214
3215 error = flow_divert_packet_init(&nil_pcb, FLOW_DIVERT_PKT_FLOW_STATES, &packet);
3216 if (error || packet == NULL) {
3217 FDLOG(LOG_ERR, &nil_pcb, "flow_divert_packet_init failed: %d, cannot send flow states", error);
3218 return;
3219 }
3220
3221 lck_rw_lock_shared(&group->lck);
3222
3223 if (!MBUFQ_EMPTY(&group->send_queue)) {
3224 FDLOG0(LOG_WARNING, &nil_pcb, "flow_divert_handle_flow_states_request: group send queue is not empty");
3225 }
3226
3227 ctl_unit = group->ctl_unit;
3228
3229 RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3230 FDRETAIN(fd_cb);
3231 SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3232 }
3233
3234 lck_rw_done(&group->lck);
3235
3236 SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
3237 FDLOCK(fd_cb);
3238 if (fd_cb->so != NULL) {
3239 struct flow_divert_flow_state state = {};
3240 struct socket *so = fd_cb->so;
3241 flow_divert_lock_socket(so, fd_cb);
3242
3243 state.conn_id = fd_cb->hash;
3244 state.bytes_written_by_app = fd_cb->bytes_written_by_app;
3245 state.bytes_sent = fd_cb->bytes_sent;
3246 state.bytes_received = fd_cb->bytes_received;
3247 state.send_window = fd_cb->send_window;
3248 state.send_buffer_bytes = so->so_snd.sb_cc;
3249
3250 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_FLOW_STATE, sizeof(state), &state);
3251 if (error) {
3252 FDLOG(LOG_ERR, fd_cb, "Failed to add a flow state: %d", error);
3253 }
3254
3255 flow_divert_unlock_socket(so, fd_cb);
3256 }
3257 FDUNLOCK(fd_cb);
3258 FDRELEASE(fd_cb);
3259 }
3260
3261 error = ctl_enqueuembuf(g_flow_divert_kctl_ref, ctl_unit, packet, CTL_DATA_EOR);
3262 if (error) {
3263 FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_handle_flow_states_request: ctl_enqueuembuf returned an error: %d", error);
3264 mbuf_freem(packet);
3265 }
3266 }
3267
3268 static int
flow_divert_input(mbuf_ref_t packet,struct flow_divert_group * group)3269 flow_divert_input(mbuf_ref_t packet, struct flow_divert_group *group)
3270 {
3271 struct flow_divert_packet_header hdr;
3272 int error = 0;
3273 struct flow_divert_pcb *fd_cb;
3274
3275 if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3276 FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3277 error = EINVAL;
3278 goto done;
3279 }
3280
3281 error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3282 if (error) {
3283 FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3284 error = ENOBUFS;
3285 goto done;
3286 }
3287
3288 hdr.conn_id = ntohl(hdr.conn_id);
3289
3290 if (hdr.conn_id == 0) {
3291 switch (hdr.packet_type) {
3292 case FLOW_DIVERT_PKT_GROUP_INIT:
3293 flow_divert_handle_group_init(group, packet, sizeof(hdr));
3294 break;
3295 case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3296 flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3297 break;
3298 case FLOW_DIVERT_PKT_FLOW_STATES_REQUEST:
3299 flow_divert_handle_flow_states_request(group);
3300 break;
3301 default:
3302 FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3303 break;
3304 }
3305 goto done;
3306 }
3307
3308 fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group); /* This retains the PCB */
3309 if (fd_cb == NULL) {
3310 if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3311 FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3312 }
3313 goto done;
3314 }
3315
3316 FDLOCK(fd_cb);
3317 if (fd_cb->so != NULL) {
3318 struct socket *so = fd_cb->so;
3319 flow_divert_lock_socket(so, fd_cb);
3320
3321 switch (hdr.packet_type) {
3322 case FLOW_DIVERT_PKT_CONNECT_RESULT:
3323 flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3324 break;
3325 case FLOW_DIVERT_PKT_CLOSE:
3326 flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3327 break;
3328 case FLOW_DIVERT_PKT_DATA:
3329 error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3330 break;
3331 case FLOW_DIVERT_PKT_READ_NOTIFY:
3332 flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3333 break;
3334 case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3335 flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3336 break;
3337 default:
3338 FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3339 break;
3340 }
3341
3342 flow_divert_unlock_socket(so, fd_cb);
3343 }
3344 FDUNLOCK(fd_cb);
3345
3346 FDRELEASE(fd_cb);
3347
3348 done:
3349 mbuf_freem(packet);
3350 return error;
3351 }
3352
3353 static void
flow_divert_close_all(struct flow_divert_group * group)3354 flow_divert_close_all(struct flow_divert_group *group)
3355 {
3356 struct flow_divert_pcb *fd_cb;
3357 SLIST_HEAD(, flow_divert_pcb) tmp_list;
3358
3359 SLIST_INIT(&tmp_list);
3360
3361 lck_rw_lock_exclusive(&group->lck);
3362
3363 MBUFQ_DRAIN(&group->send_queue);
3364
3365 RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3366 FDRETAIN(fd_cb);
3367 SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3368 }
3369
3370 group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3371
3372 lck_rw_done(&group->lck);
3373
3374 while (!SLIST_EMPTY(&tmp_list)) {
3375 fd_cb = SLIST_FIRST(&tmp_list);
3376 FDLOCK(fd_cb);
3377 SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3378 if (fd_cb->so != NULL) {
3379 struct socket *so = fd_cb->so;
3380 flow_divert_lock_socket(so, fd_cb);
3381 flow_divert_pcb_remove(fd_cb);
3382 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
3383 so->so_error = ECONNABORTED;
3384 flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3385 flow_divert_unlock_socket(so, fd_cb);
3386 }
3387 FDUNLOCK(fd_cb);
3388 FDRELEASE(fd_cb);
3389 }
3390 }
3391
3392 void
flow_divert_detach(struct socket * so)3393 flow_divert_detach(struct socket *so)
3394 {
3395 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3396
3397 if (!SO_IS_DIVERTED(so)) {
3398 return;
3399 }
3400
3401 so->so_flags &= ~SOF_FLOW_DIVERT;
3402 so->so_fd_pcb = NULL;
3403
3404 FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3405
3406 if (fd_cb->group != NULL) {
3407 /* Last-ditch effort to send any buffered data */
3408 flow_divert_send_buffered_data(fd_cb, TRUE);
3409 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3410 flow_divert_send_close_if_needed(fd_cb);
3411 /* Remove from the group */
3412 flow_divert_pcb_remove(fd_cb);
3413 }
3414
3415 sbflush(&so->so_snd);
3416 sbflush(&so->so_rcv);
3417
3418 if (!fd_cb->plugin_locked) {
3419 socket_unlock(so, 0);
3420 FDLOCK(fd_cb);
3421 }
3422 fd_cb->so = NULL;
3423 if (!fd_cb->plugin_locked) {
3424 FDUNLOCK(fd_cb);
3425 socket_lock(so, 0);
3426 }
3427
3428 FDRELEASE(fd_cb); /* Release the socket's reference */
3429 }
3430
3431 static int
flow_divert_close(struct socket * so)3432 flow_divert_close(struct socket *so)
3433 {
3434 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3435
3436 if (!SO_IS_DIVERTED(so)) {
3437 return EINVAL;
3438 }
3439
3440 FDLOG0(LOG_INFO, fd_cb, "Closing");
3441
3442 if (SOCK_TYPE(so) == SOCK_STREAM) {
3443 soisdisconnecting(so);
3444 sbflush(&so->so_rcv);
3445 }
3446
3447 FDRETAIN(fd_cb);
3448
3449 flow_divert_send_buffered_data(fd_cb, TRUE);
3450 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3451 flow_divert_send_close_if_needed(fd_cb);
3452
3453 /* Remove from the group */
3454 flow_divert_pcb_remove(fd_cb);
3455
3456 flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3457
3458 FDRELEASE(fd_cb);
3459
3460 return 0;
3461 }
3462
3463 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3464 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3465 sae_connid_t cid __unused)
3466 {
3467 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3468 return EINVAL;
3469 }
3470
3471 return flow_divert_close(so);
3472 }
3473
3474 static int
flow_divert_shutdown(struct socket * so)3475 flow_divert_shutdown(struct socket *so)
3476 {
3477 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3478
3479 if (!SO_IS_DIVERTED(so)) {
3480 return EINVAL;
3481 }
3482
3483 FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3484
3485 socantsendmore(so);
3486
3487 FDRETAIN(fd_cb);
3488
3489 flow_divert_update_closed_state(fd_cb, SHUT_WR, false, true);
3490 flow_divert_send_close_if_needed(fd_cb);
3491 if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
3492 flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3493 }
3494
3495 FDRELEASE(fd_cb);
3496
3497 return 0;
3498 }
3499
3500 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3501 flow_divert_rcvd(struct socket *so, int flags __unused)
3502 {
3503 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3504 int space = 0;
3505
3506 if (!SO_IS_DIVERTED(so)) {
3507 return EINVAL;
3508 }
3509
3510 space = sbspace(&so->so_rcv);
3511 FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3512 if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3513 (space > 0) &&
3514 flow_divert_send_read_notification(fd_cb) == 0) {
3515 FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3516 fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3517 }
3518
3519 return 0;
3520 }
3521
3522 static int
flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet,struct sockaddr * toaddr)3523 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr)
3524 {
3525 int error = 0;
3526 int port = 0;
3527
3528 if (!flow_divert_is_sockaddr_valid(toaddr)) {
3529 FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3530 error = EINVAL;
3531 goto done;
3532 }
3533
3534 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, SA_BYTES(toaddr));
3535 if (error) {
3536 goto done;
3537 }
3538
3539 if (toaddr->sa_family == AF_INET) {
3540 port = ntohs((satosin(toaddr))->sin_port);
3541 } else {
3542 port = ntohs((satosin6(toaddr))->sin6_port);
3543 }
3544
3545 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3546 if (error) {
3547 goto done;
3548 }
3549
3550 done:
3551 return error;
3552 }
3553
3554 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_ref_t buffer)3555 flow_divert_get_buffered_target_address(mbuf_ref_t buffer)
3556 {
3557 if (buffer != NULL && buffer->m_type == MT_SONAME) {
3558 struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3559 if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3560 return toaddr;
3561 }
3562 }
3563 return NULL;
3564 }
3565
3566 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3567 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3568 {
3569 switch (addr->sa_family) {
3570 case AF_INET:
3571 if (addr->sa_len < sizeof(struct sockaddr_in)) {
3572 return FALSE;
3573 }
3574 break;
3575 case AF_INET6:
3576 if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3577 return FALSE;
3578 }
3579 break;
3580 default:
3581 return FALSE;
3582 }
3583 return TRUE;
3584 }
3585
3586 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3587 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3588 struct sockaddr **dup)
3589 {
3590 int error = 0;
3591 struct sockaddr *result;
3592 struct sockaddr_storage ss;
3593
3594 if (addr != NULL) {
3595 result = addr;
3596 } else {
3597 memset(&ss, 0, sizeof(ss));
3598 ss.ss_family = family;
3599 if (ss.ss_family == AF_INET) {
3600 ss.ss_len = sizeof(struct sockaddr_in);
3601 } else if (ss.ss_family == AF_INET6) {
3602 ss.ss_len = sizeof(struct sockaddr_in6);
3603 } else {
3604 error = EINVAL;
3605 }
3606 result = (struct sockaddr *)&ss;
3607 }
3608
3609 if (!error) {
3610 *dup = dup_sockaddr(result, 1);
3611 if (*dup == NULL) {
3612 error = ENOBUFS;
3613 }
3614 }
3615
3616 return error;
3617 }
3618
3619 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected,bool delay_if_needed)3620 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed)
3621 {
3622 if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3623 soisdisconnected(so);
3624 }
3625 if (SOCK_TYPE(so) == SOCK_DGRAM) {
3626 if (delay_if_needed) {
3627 cfil_sock_is_dead(so);
3628 } else {
3629 struct inpcb *inp = sotoinpcb(so);
3630 if (SOCK_CHECK_DOM(so, PF_INET6)) {
3631 in6_pcbdetach(inp);
3632 } else {
3633 in_pcbdetach(inp);
3634 }
3635 }
3636 }
3637 }
3638
3639 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3640 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3641 {
3642 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3643
3644 if (!SO_IS_DIVERTED(so)) {
3645 return EINVAL;
3646 }
3647
3648 if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3649 if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3650 flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3651 }
3652 }
3653
3654 if (SOCK_DOM(so) == PF_INET) {
3655 return g_tcp_protosw->pr_ctloutput(so, sopt);
3656 } else if (SOCK_DOM(so) == PF_INET6) {
3657 return g_tcp6_protosw->pr_ctloutput(so, sopt);
3658 }
3659 return 0;
3660 }
3661
3662 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3663 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3664 {
3665 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3666 int error = 0;
3667 struct inpcb *inp = sotoinpcb(so);
3668 struct sockaddr_in *sinp;
3669 mbuf_ref_t connect_packet = NULL;
3670 int do_send = 1;
3671
3672 if (!SO_IS_DIVERTED(so)) {
3673 return EINVAL;
3674 }
3675
3676 if (fd_cb->group == NULL) {
3677 error = ENETUNREACH;
3678 goto done;
3679 }
3680
3681 if (inp == NULL) {
3682 error = EINVAL;
3683 goto done;
3684 } else if (inp->inp_state == INPCB_STATE_DEAD) {
3685 if (so->so_error) {
3686 error = so->so_error;
3687 so->so_error = 0;
3688 } else {
3689 error = EINVAL;
3690 }
3691 goto done;
3692 }
3693
3694 if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3695 error = EALREADY;
3696 goto done;
3697 }
3698
3699 FDLOG0(LOG_INFO, fd_cb, "Connecting");
3700
3701 if (fd_cb->connect_packet == NULL) {
3702 struct sockaddr_in sin = {};
3703 struct ifnet * __single ifp = NULL;
3704
3705 if (to == NULL) {
3706 FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3707 error = EINVAL;
3708 goto done;
3709 }
3710
3711 if (!flow_divert_is_sockaddr_valid(to)) {
3712 FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3713 error = EINVAL;
3714 goto done;
3715 }
3716
3717 fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3718 if (fd_cb->original_remote_endpoint == NULL) {
3719 FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3720 error = ENOMEM;
3721 goto done;
3722 }
3723 fd_cb->original_vflag = inp->inp_vflag;
3724 fd_cb->original_last_outifp = inp->inp_last_outifp;
3725 fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3726
3727 sinp = (struct sockaddr_in *)(void *)to;
3728 if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3729 error = EAFNOSUPPORT;
3730 goto done;
3731 }
3732
3733 if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3734 struct sockaddr_in6 sin6 = {};
3735 sin6.sin6_family = AF_INET6;
3736 sin6.sin6_len = sizeof(struct sockaddr_in6);
3737 sin6.sin6_port = satosin6(to)->sin6_port;
3738 sin6.sin6_addr = satosin6(to)->sin6_addr;
3739 if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3740 in6_sin6_2_sin(&sin, &sin6);
3741 to = (struct sockaddr *)&sin;
3742 }
3743 }
3744
3745 if (to->sa_family == AF_INET6) {
3746 struct sockaddr_in6 *to6 = satosin6(to);
3747
3748 inp->inp_vflag &= ~INP_IPV4;
3749 inp->inp_vflag |= INP_IPV6;
3750 fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3751 fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3752 fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3753 error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3754 if (error) {
3755 FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3756 if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3757 error = 0;
3758 } else {
3759 goto done;
3760 }
3761 }
3762 if (ifp != NULL) {
3763 inp->in6p_last_outifp = ifp;
3764 ifnet_release(ifp);
3765 }
3766
3767 if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3768 in6_embedded_scope &&
3769 fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3770 fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3771 fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3772 }
3773
3774 if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3775 in6_embedded_scope &&
3776 to6->sin6_addr.s6_addr16[1] != 0) {
3777 to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3778 to6->sin6_addr.s6_addr16[1] = 0;
3779 }
3780 } else if (to->sa_family == AF_INET) {
3781 inp->inp_vflag |= INP_IPV4;
3782 inp->inp_vflag &= ~INP_IPV6;
3783 fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3784 fd_cb->local_endpoint.sin.sin_family = AF_INET;
3785 fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3786 error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3787 if (error) {
3788 FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3789 if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3790 error = 0;
3791 } else {
3792 goto done;
3793 }
3794 }
3795 if (ifp != NULL) {
3796 inp->inp_last_outifp = ifp;
3797 ifnet_release(ifp);
3798 }
3799 } else {
3800 FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3801 }
3802
3803 error = flow_divert_check_no_cellular(fd_cb) ||
3804 flow_divert_check_no_expensive(fd_cb) ||
3805 flow_divert_check_no_constrained(fd_cb);
3806 if (error) {
3807 goto done;
3808 }
3809
3810 if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3811 !implicit || /* connect() was called or */
3812 ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3813 ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3814 fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3815 }
3816
3817 error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3818 if (error) {
3819 goto done;
3820 }
3821
3822 if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3823 flow_divert_set_remote_endpoint(fd_cb, to);
3824 flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3825 }
3826
3827 if (implicit) {
3828 fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3829 }
3830
3831 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3832 FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3833 do_send = 0;
3834 }
3835
3836 fd_cb->connect_packet = connect_packet;
3837 connect_packet = NULL;
3838 } else {
3839 FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3840 }
3841
3842 if (do_send) {
3843 error = flow_divert_send_connect_packet(fd_cb);
3844 if (error) {
3845 goto done;
3846 }
3847
3848 fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3849 }
3850
3851 if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3852 soisconnected(so);
3853 } else {
3854 soisconnecting(so);
3855 }
3856
3857 done:
3858 return error;
3859 }
3860
3861 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3862 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3863 {
3864 #if CONTENT_FILTER
3865 if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3866 int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3867 if (error != 0) {
3868 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3869 FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3870 return error;
3871 }
3872 }
3873 #endif /* CONTENT_FILTER */
3874
3875 return flow_divert_connect_out_internal(so, to, p, false);
3876 }
3877
3878 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3879 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3880 struct proc *p, uint32_t ifscope, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3881 {
3882 struct inpcb *inp = sotoinpcb(so);
3883 int error;
3884
3885 if (inp == NULL) {
3886 return EINVAL;
3887 }
3888
3889 VERIFY(dst != NULL);
3890
3891 #if CONTENT_FILTER && NECP
3892 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3893 if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3894 SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3895 inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3896 }
3897 #endif /* CONTENT_FILTER */
3898
3899 /* bind socket to the specified interface, if requested */
3900 if (ifscope != IFSCOPE_NONE &&
3901 (error = inp_bindif(inp, ifscope, NULL)) != 0) {
3902 return error;
3903 }
3904
3905 error = flow_divert_connect_out(so, dst, p);
3906
3907 if (error != 0) {
3908 return error;
3909 }
3910
3911 /* if there is data, send it */
3912 if (auio != NULL) {
3913 user_ssize_t datalen = 0;
3914
3915 socket_unlock(so, 0);
3916
3917 VERIFY(bytes_written != NULL);
3918
3919 datalen = uio_resid(auio);
3920 error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3921 socket_lock(so, 0);
3922
3923 if (error == 0 || error == EWOULDBLOCK) {
3924 *bytes_written = datalen - uio_resid(auio);
3925 }
3926
3927 /*
3928 * sosend returns EWOULDBLOCK if it's a non-blocking
3929 * socket or a timeout occured (this allows to return
3930 * the amount of queued data through sendit()).
3931 *
3932 * However, connectx() returns EINPROGRESS in case of a
3933 * blocking socket. So we change the return value here.
3934 */
3935 if (error == EWOULDBLOCK) {
3936 error = EINPROGRESS;
3937 }
3938 }
3939
3940 if (error == 0 && pcid != NULL) {
3941 *pcid = 1; /* there is only 1 connection for a TCP */
3942 }
3943
3944 return error;
3945 }
3946
3947 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3948 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3949 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3950 sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3951 uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3952 {
3953 return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3954 }
3955
3956 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3957 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3958 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3959 sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3960 uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3961 {
3962 return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3963 }
3964
3965 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)3966 flow_divert_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
3967 {
3968 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3969 int error = 0;
3970 struct inpcb *inp;
3971 #if CONTENT_FILTER
3972 struct m_tag *cfil_tag = NULL;
3973 #endif
3974
3975 if (!SO_IS_DIVERTED(so)) {
3976 return EINVAL;
3977 }
3978
3979 inp = sotoinpcb(so);
3980 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3981 error = ECONNRESET;
3982 goto done;
3983 }
3984
3985 if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
3986 /* The provider considers this datagram flow to be closed, so no data can be sent */
3987 FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
3988 error = EHOSTUNREACH;
3989 goto done;
3990 }
3991
3992 #if CONTENT_FILTER
3993 /*
3994 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
3995 * retrieve the CFIL saved remote address from the mbuf and use it.
3996 */
3997 if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
3998 struct sockaddr * __single cfil_faddr = NULL;
3999 cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
4000 if (cfil_tag) {
4001 to = (struct sockaddr *)(void *)cfil_faddr;
4002 }
4003 FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
4004 }
4005 #endif
4006
4007 /* Implicit connect */
4008 if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4009 FDLOG0(LOG_INFO, fd_cb, "implicit connect");
4010
4011 error = flow_divert_connect_out_internal(so, to, p, true);
4012 if (error) {
4013 goto done;
4014 }
4015 } else {
4016 error = flow_divert_check_no_cellular(fd_cb) ||
4017 flow_divert_check_no_expensive(fd_cb) ||
4018 flow_divert_check_no_constrained(fd_cb);
4019 if (error) {
4020 goto done;
4021 }
4022 }
4023
4024 if (data != NULL) {
4025 size_t data_size = 0;
4026 if (mbuf_flags(data) & M_PKTHDR) {
4027 data_size = mbuf_pkthdr_len(data);
4028 } else {
4029 for (mbuf_t blob = data; blob != NULL; blob = mbuf_next(blob)) {
4030 data_size += mbuf_len(blob);
4031 }
4032 }
4033
4034 FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", data_size);
4035 fd_cb->bytes_written_by_app += data_size;
4036
4037 error = flow_divert_send_app_data(fd_cb, data, data_size, to);
4038
4039 data = NULL;
4040
4041 if (error) {
4042 goto done;
4043 }
4044 }
4045
4046 if (flags & PRUS_EOF) {
4047 flow_divert_shutdown(so);
4048 }
4049
4050 done:
4051 if (data) {
4052 mbuf_freem(data);
4053 }
4054 if (control) {
4055 mbuf_freem(control);
4056 }
4057 #if CONTENT_FILTER
4058 if (cfil_tag) {
4059 m_tag_free(cfil_tag);
4060 }
4061 #endif
4062
4063 return error;
4064 }
4065
4066 static int
flow_divert_preconnect(struct socket * so)4067 flow_divert_preconnect(struct socket *so)
4068 {
4069 int error = 0;
4070 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4071
4072 if (!SO_IS_DIVERTED(so)) {
4073 return EINVAL;
4074 }
4075
4076 if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4077 FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
4078 error = flow_divert_send_connect_packet(so->so_fd_pcb);
4079 if (error) {
4080 return error;
4081 }
4082
4083 fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
4084 }
4085
4086 soclearfastopen(so);
4087
4088 return error;
4089 }
4090
4091 static void
flow_divert_set_protosw(struct socket * so)4092 flow_divert_set_protosw(struct socket *so)
4093 {
4094 if (SOCK_DOM(so) == PF_INET) {
4095 so->so_proto = &g_flow_divert_in_protosw;
4096 } else {
4097 so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
4098 }
4099 }
4100
4101 static void
flow_divert_set_udp_protosw(struct socket * so)4102 flow_divert_set_udp_protosw(struct socket *so)
4103 {
4104 if (SOCK_DOM(so) == PF_INET) {
4105 so->so_proto = &g_flow_divert_in_udp_protosw;
4106 } else {
4107 so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
4108 }
4109 }
4110
4111 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)4112 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
4113 {
4114 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4115 struct inpcb *inp;
4116 int error = 0;
4117
4118 inp = sotoinpcb(so);
4119 if (inp == NULL) {
4120 error = EINVAL;
4121 goto done;
4122 }
4123
4124 if (fd_cb == NULL) {
4125 error = flow_divert_pcb_init(so);
4126 fd_cb = so->so_fd_pcb;
4127 if (error != 0 || fd_cb == NULL) {
4128 goto done;
4129 }
4130 }
4131 return flow_divert_data_out(so, flags, data, to, control, p);
4132
4133 done:
4134 if (data) {
4135 mbuf_freem(data);
4136 }
4137 if (control) {
4138 mbuf_freem(control);
4139 }
4140
4141 return error;
4142 }
4143
4144 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)4145 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
4146 {
4147 errno_t error = 0;
4148 struct flow_divert_pcb *fd_cb = NULL;
4149 uint32_t agg_unit = aggregate_unit;
4150 uint32_t policy_control_unit = ctl_unit;
4151 bool is_aggregate = false;
4152
4153 if (so->so_flags & SOF_FLOW_DIVERT) {
4154 return EALREADY;
4155 }
4156
4157 fd_cb = flow_divert_pcb_create(so);
4158 if (fd_cb == NULL) {
4159 return ENOMEM;
4160 }
4161
4162 do {
4163 uint32_t group_unit = flow_divert_derive_kernel_control_unit(so->last_pid, &policy_control_unit, &agg_unit, &is_aggregate);
4164 if (group_unit == 0 || (group_unit >= GROUP_COUNT_MAX && group_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
4165 FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
4166 error = EINVAL;
4167 break;
4168 }
4169
4170 error = flow_divert_add_to_group(fd_cb, group_unit);
4171 if (error == 0) {
4172 so->so_fd_pcb = fd_cb;
4173 so->so_flags |= SOF_FLOW_DIVERT;
4174 fd_cb->control_group_unit = group_unit;
4175 fd_cb->policy_control_unit = ctl_unit;
4176 fd_cb->aggregate_unit = agg_unit;
4177 if (is_aggregate) {
4178 fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4179 } else {
4180 fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4181 }
4182
4183 if (SOCK_TYPE(so) == SOCK_STREAM) {
4184 flow_divert_set_protosw(so);
4185 } else if (SOCK_TYPE(so) == SOCK_DGRAM) {
4186 flow_divert_set_udp_protosw(so);
4187 }
4188
4189 FDLOG0(LOG_INFO, fd_cb, "Created");
4190 } else if (error != ENOENT) {
4191 FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
4192 }
4193 } while (error == ENOENT);
4194
4195 if (error != 0) {
4196 FDRELEASE(fd_cb);
4197 }
4198
4199 return error;
4200 }
4201
4202 errno_t
flow_divert_pcb_init(struct socket * so)4203 flow_divert_pcb_init(struct socket *so)
4204 {
4205 struct inpcb *inp = sotoinpcb(so);
4206 uint32_t aggregate_units = 0;
4207 uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
4208 return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
4209 }
4210
4211 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)4212 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
4213 {
4214 uint32_t ctl_unit = 0;
4215 uint32_t key_unit = 0;
4216 uint32_t aggregate_unit = 0;
4217 int error = 0;
4218 int hmac_error = 0;
4219 mbuf_ref_t token = NULL;
4220
4221 if (so->so_flags & SOF_FLOW_DIVERT) {
4222 error = EALREADY;
4223 goto done;
4224 }
4225
4226 if (g_init_result) {
4227 FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
4228 error = ENOPROTOOPT;
4229 goto done;
4230 }
4231
4232 if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
4233 (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
4234 (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
4235 error = EINVAL;
4236 goto done;
4237 } else {
4238 if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
4239 struct tcpcb *tp = sototcpcb(so);
4240 if (tp == NULL || tp->t_state != TCPS_CLOSED) {
4241 error = EINVAL;
4242 goto done;
4243 }
4244 }
4245 }
4246
4247 error = soopt_getm(sopt, &token);
4248 if (error) {
4249 token = NULL;
4250 goto done;
4251 }
4252
4253 error = soopt_mcopyin(sopt, token);
4254 if (error) {
4255 token = NULL;
4256 goto done;
4257 }
4258
4259 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
4260 if (!error) {
4261 key_unit = ntohl(key_unit);
4262 if (key_unit >= GROUP_COUNT_MAX) {
4263 key_unit = 0;
4264 }
4265 } else if (error != ENOENT) {
4266 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
4267 goto done;
4268 } else {
4269 key_unit = 0;
4270 }
4271
4272 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
4273 if (error) {
4274 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
4275 goto done;
4276 }
4277
4278 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4279 if (error && error != ENOENT) {
4280 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4281 goto done;
4282 }
4283
4284 /* A valid kernel control unit is required */
4285 ctl_unit = ntohl(ctl_unit);
4286 aggregate_unit = ntohl(aggregate_unit);
4287
4288 if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4289 hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4290 if (hmac_error && hmac_error != ENOENT) {
4291 FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4292 error = hmac_error;
4293 goto done;
4294 }
4295 }
4296
4297 error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4298 if (error == 0) {
4299 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4300 int log_level = LOG_NOTICE;
4301
4302 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4303 if (error == 0) {
4304 fd_cb->log_level = (uint8_t)log_level;
4305 }
4306 error = 0;
4307
4308 fd_cb->connect_token = token;
4309 token = NULL;
4310
4311 fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4312 }
4313
4314 if (hmac_error == 0) {
4315 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4316 if (fd_cb != NULL) {
4317 fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4318 }
4319 }
4320
4321 done:
4322 if (token != NULL) {
4323 mbuf_freem(token);
4324 }
4325
4326 return error;
4327 }
4328
4329 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4330 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4331 {
4332 uint32_t ctl_unit;
4333 int error = 0;
4334 uint8_t hmac[SHA_DIGEST_LENGTH];
4335 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4336 mbuf_ref_t token = NULL;
4337 struct flow_divert_group *control_group = NULL;
4338
4339 if (!SO_IS_DIVERTED(so)) {
4340 error = EINVAL;
4341 goto done;
4342 }
4343
4344 if (fd_cb->group == NULL) {
4345 error = EINVAL;
4346 goto done;
4347 }
4348
4349 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4350 if (error) {
4351 FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4352 goto done;
4353 }
4354
4355 ctl_unit = htonl(fd_cb->group->ctl_unit);
4356
4357 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4358 if (error) {
4359 goto done;
4360 }
4361
4362 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4363 if (error) {
4364 goto done;
4365 }
4366
4367 if (fd_cb->app_data != NULL) {
4368 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4369 if (error) {
4370 goto done;
4371 }
4372 }
4373
4374 control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4375 if (control_group != NULL) {
4376 lck_rw_lock_shared(&control_group->lck);
4377 ctl_unit = htonl(control_group->ctl_unit);
4378 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4379 if (!error) {
4380 error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4381 }
4382 lck_rw_done(&control_group->lck);
4383 FDGRP_RELEASE(control_group);
4384 } else {
4385 error = ENOPROTOOPT;
4386 }
4387
4388 if (error) {
4389 goto done;
4390 }
4391
4392 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4393 if (error) {
4394 goto done;
4395 }
4396
4397 if (sopt->sopt_val == USER_ADDR_NULL) {
4398 /* If the caller passed NULL to getsockopt, just set the size of the token and return */
4399 sopt->sopt_valsize = mbuf_pkthdr_len(token);
4400 goto done;
4401 }
4402
4403 error = soopt_mcopyout(sopt, token);
4404 if (error) {
4405 token = NULL; /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4406 goto done;
4407 }
4408
4409 done:
4410 if (token != NULL) {
4411 mbuf_freem(token);
4412 }
4413
4414 return error;
4415 }
4416
4417 void
flow_divert_group_destroy(struct flow_divert_group * group)4418 flow_divert_group_destroy(struct flow_divert_group *group)
4419 {
4420 lck_rw_lock_exclusive(&group->lck);
4421
4422 FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4423
4424 if (group->token_key != NULL) {
4425 memset(group->token_key, 0, group->token_key_size);
4426 kfree_data_sized_by(group->token_key, group->token_key_size);
4427 }
4428
4429 /* Re-set the current trie */
4430 if (group->signing_id_trie.memory != NULL) {
4431 kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
4432 }
4433 memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4434 group->signing_id_trie.root = NULL_TRIE_IDX;
4435
4436 lck_rw_done(&group->lck);
4437
4438 zfree(flow_divert_group_zone, group);
4439 }
4440
4441 static struct flow_divert_group *
flow_divert_allocate_group(u_int32_t unit,pid_t pid)4442 flow_divert_allocate_group(u_int32_t unit, pid_t pid)
4443 {
4444 struct flow_divert_group *new_group = NULL;
4445 new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4446 lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4447 RB_INIT(&new_group->pcb_tree);
4448 new_group->ctl_unit = unit;
4449 new_group->in_process_pid = pid;
4450 MBUFQ_INIT(&new_group->send_queue);
4451 new_group->signing_id_trie.root = NULL_TRIE_IDX;
4452 new_group->ref_count = 1;
4453 new_group->order = FLOW_DIVERT_ORDER_LAST;
4454 return new_group;
4455 }
4456
4457 static errno_t
flow_divert_kctl_setup(u_int32_t * unit,void ** unitinfo)4458 flow_divert_kctl_setup(u_int32_t *unit, void **unitinfo)
4459 {
4460 if (unit == NULL || unitinfo == NULL) {
4461 return EINVAL;
4462 }
4463
4464 struct flow_divert_group *new_group = NULL;
4465 errno_t error = 0;
4466 lck_rw_lock_shared(&g_flow_divert_group_lck);
4467 if (*unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
4468 // Return next unused in-process unit
4469 u_int32_t unit_cursor = FLOW_DIVERT_IN_PROCESS_UNIT_MIN;
4470 struct flow_divert_group *group_next = NULL;
4471 TAILQ_FOREACH(group_next, &g_flow_divert_in_process_group_list, chain) {
4472 if (group_next->ctl_unit > unit_cursor) {
4473 // Found a gap, lets fill it in
4474 break;
4475 }
4476 unit_cursor = group_next->ctl_unit + 1;
4477 if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4478 break;
4479 }
4480 }
4481 if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4482 error = EBUSY;
4483 } else {
4484 *unit = unit_cursor;
4485 new_group = flow_divert_allocate_group(*unit, proc_pid(current_proc()));
4486 if (group_next != NULL) {
4487 TAILQ_INSERT_BEFORE(group_next, new_group, chain);
4488 } else {
4489 TAILQ_INSERT_TAIL(&g_flow_divert_in_process_group_list, new_group, chain);
4490 }
4491 g_active_group_count++;
4492 }
4493 } else {
4494 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
4495 error = EPERM;
4496 } else {
4497 if (g_flow_divert_groups == NULL) {
4498 g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4499 GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4500 }
4501
4502 // Return next unused group unit
4503 bool found_unused_unit = false;
4504 u_int32_t unit_cursor;
4505 for (unit_cursor = 1; unit_cursor < GROUP_COUNT_MAX; unit_cursor++) {
4506 struct flow_divert_group *group = g_flow_divert_groups[unit_cursor];
4507 if (group == NULL) {
4508 // Open slot, assign this one
4509 *unit = unit_cursor;
4510 new_group = flow_divert_allocate_group(*unit, 0);
4511 g_flow_divert_groups[*unit] = new_group;
4512 found_unused_unit = true;
4513 g_active_group_count++;
4514 break;
4515 }
4516 }
4517 if (!found_unused_unit) {
4518 error = EBUSY;
4519 }
4520 }
4521 }
4522 lck_rw_done(&g_flow_divert_group_lck);
4523
4524 *unitinfo = new_group;
4525
4526 return error;
4527 }
4528
4529 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4530 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4531 {
4532 if (unitinfo == NULL) {
4533 return EINVAL;
4534 }
4535
4536 // Just validate. The group will already have been allocated.
4537 struct flow_divert_group *group = (struct flow_divert_group *)*unitinfo;
4538 if (group == NULL || sac->sc_unit != group->ctl_unit) {
4539 FDLOG(LOG_ERR, &nil_pcb, "Flow divert connect fail, unit mismatch %u != %u",
4540 sac->sc_unit, group ? group->ctl_unit : 0);
4541 return EINVAL;
4542 }
4543
4544 return 0;
4545 }
4546
4547 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4548 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4549 {
4550 struct flow_divert_group *group = NULL;
4551 errno_t error = 0;
4552
4553 if (unitinfo == NULL) {
4554 return 0;
4555 }
4556
4557 FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4558
4559 lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4560
4561 if (g_active_group_count == 0) {
4562 panic("flow divert group %u is disconnecting, but no groups are active (active count = %u)",
4563 unit, g_active_group_count);
4564 }
4565
4566 if (unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
4567 if (unit >= GROUP_COUNT_MAX) {
4568 return EINVAL;
4569 }
4570
4571 if (g_flow_divert_groups == NULL) {
4572 panic("flow divert group %u is disconnecting, but groups array is NULL",
4573 unit);
4574 }
4575 group = g_flow_divert_groups[unit];
4576
4577 if (group != (struct flow_divert_group *)unitinfo) {
4578 panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4579 }
4580
4581 g_flow_divert_groups[unit] = NULL;
4582 } else {
4583 group = (struct flow_divert_group *)unitinfo;
4584 if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
4585 panic("flow divert group %u is disconnecting, but in-process group list is empty",
4586 unit);
4587 }
4588
4589 TAILQ_REMOVE(&g_flow_divert_in_process_group_list, group, chain);
4590 }
4591
4592 g_active_group_count--;
4593
4594 if (g_active_group_count == 0) {
4595 kfree_type(struct flow_divert_group *,
4596 GROUP_COUNT_MAX, g_flow_divert_groups);
4597 g_flow_divert_groups = NULL;
4598 }
4599
4600 lck_rw_done(&g_flow_divert_group_lck);
4601
4602 if (group != NULL) {
4603 flow_divert_close_all(group);
4604 FDGRP_RELEASE(group);
4605 } else {
4606 error = EINVAL;
4607 }
4608
4609 return error;
4610 }
4611
4612 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_ref_t m,__unused int flags)4613 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_ref_t m, __unused int flags)
4614 {
4615 errno_t error = 0;
4616 struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4617 if (group != NULL) {
4618 error = flow_divert_input(m, group);
4619 FDGRP_RELEASE(group);
4620 } else {
4621 error = ENOENT;
4622 }
4623 return error;
4624 }
4625
4626 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4627 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4628 {
4629 struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4630 if (group == NULL) {
4631 return;
4632 }
4633
4634 if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4635 struct flow_divert_pcb *fd_cb;
4636 SLIST_HEAD(, flow_divert_pcb) tmp_list;
4637
4638 lck_rw_lock_exclusive(&group->lck);
4639
4640 while (!MBUFQ_EMPTY(&group->send_queue)) {
4641 mbuf_ref_t next_packet;
4642 FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4643 next_packet = MBUFQ_FIRST(&group->send_queue);
4644 int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4645 if (error) {
4646 FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_kctl_rcvd: ctl_enqueuembuf returned an error: %d", error);
4647 OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4648 lck_rw_done(&group->lck);
4649 return;
4650 }
4651 MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4652 }
4653
4654 SLIST_INIT(&tmp_list);
4655
4656 RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4657 FDRETAIN(fd_cb);
4658 SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4659 }
4660
4661 lck_rw_done(&group->lck);
4662
4663 SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4664 FDLOCK(fd_cb);
4665 if (fd_cb->so != NULL) {
4666 struct socket *so = fd_cb->so;
4667 flow_divert_lock_socket(so, fd_cb);
4668 if (fd_cb->group != NULL) {
4669 flow_divert_send_buffered_data(fd_cb, FALSE);
4670 }
4671 flow_divert_unlock_socket(so, fd_cb);
4672 }
4673 FDUNLOCK(fd_cb);
4674 FDRELEASE(fd_cb);
4675 }
4676 }
4677
4678 FDGRP_RELEASE(group);
4679 }
4680
4681 static int
flow_divert_kctl_init(void)4682 flow_divert_kctl_init(void)
4683 {
4684 struct kern_ctl_reg ctl_reg;
4685 int result;
4686
4687 memset(&ctl_reg, 0, sizeof(ctl_reg));
4688
4689 strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4690 ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4691
4692 // Do not restrict to privileged processes. flow_divert_kctl_setup checks
4693 // permissions separately.
4694 ctl_reg.ctl_flags = CTL_FLAG_REG_EXTENDED | CTL_FLAG_REG_SETUP;
4695 ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4696
4697 ctl_reg.ctl_connect = flow_divert_kctl_connect;
4698 ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4699 ctl_reg.ctl_send = flow_divert_kctl_send;
4700 ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4701 ctl_reg.ctl_setup = flow_divert_kctl_setup;
4702
4703 result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4704
4705 if (result) {
4706 FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4707 return result;
4708 }
4709
4710 return 0;
4711 }
4712
4713 void
flow_divert_init(void)4714 flow_divert_init(void)
4715 {
4716 memset(&nil_pcb, 0, sizeof(nil_pcb));
4717 nil_pcb.log_level = LOG_NOTICE;
4718
4719 g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4720
4721 VERIFY(g_tcp_protosw != NULL);
4722
4723 memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4724 memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4725
4726 g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4727 g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4728 g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4729 g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4730 g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4731 g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4732 g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4733 g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4734
4735 g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4736 g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4737
4738 /*
4739 * Socket filters shouldn't attach/detach to/from this protosw
4740 * since pr_protosw is to be used instead, which points to the
4741 * real protocol; if they do, it is a bug and we should panic.
4742 */
4743 g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4744 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4745 g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4746 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4747
4748 /* UDP */
4749 g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4750 VERIFY(g_udp_protosw != NULL);
4751
4752 memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4753 memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4754
4755 g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4756 g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4757 g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4758 g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4759 g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4760 g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4761 g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4762 g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4763 g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4764
4765 g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4766 g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4767
4768 /*
4769 * Socket filters shouldn't attach/detach to/from this protosw
4770 * since pr_protosw is to be used instead, which points to the
4771 * real protocol; if they do, it is a bug and we should panic.
4772 */
4773 g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4774 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4775 g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4776 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4777
4778 g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4779
4780 VERIFY(g_tcp6_protosw != NULL);
4781
4782 memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4783 memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4784
4785 g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4786 g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4787 g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4788 g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4789 g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4790 g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4791 g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4792 g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4793
4794 g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4795 g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4796 /*
4797 * Socket filters shouldn't attach/detach to/from this protosw
4798 * since pr_protosw is to be used instead, which points to the
4799 * real protocol; if they do, it is a bug and we should panic.
4800 */
4801 g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4802 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4803 g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4804 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4805
4806 /* UDP6 */
4807 g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4808
4809 VERIFY(g_udp6_protosw != NULL);
4810
4811 memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4812 memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4813
4814 g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4815 g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4816 g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4817 g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4818 g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4819 g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4820 g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4821 g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4822 g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4823
4824 g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4825 g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4826 /*
4827 * Socket filters shouldn't attach/detach to/from this protosw
4828 * since pr_protosw is to be used instead, which points to the
4829 * real protocol; if they do, it is a bug and we should panic.
4830 */
4831 g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4832 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4833 g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4834 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4835
4836 TAILQ_INIT(&g_flow_divert_in_process_group_list);
4837
4838 g_init_result = flow_divert_kctl_init();
4839 if (g_init_result) {
4840 goto done;
4841 }
4842
4843 done:
4844 if (g_init_result != 0) {
4845 if (g_flow_divert_kctl_ref != NULL) {
4846 ctl_deregister(g_flow_divert_kctl_ref);
4847 g_flow_divert_kctl_ref = NULL;
4848 }
4849 }
4850 }
4851