1 /*
2 * Copyright (c) 2012-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/syslog.h>
32 #include <sys/queue.h>
33 #include <sys/malloc.h>
34 #include <sys/socket.h>
35 #include <sys/kpi_mbuf.h>
36 #include <sys/mbuf.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/systm.h>
42 #include <sys/kern_control.h>
43 #include <sys/ubc.h>
44 #include <sys/codesign.h>
45 #include <sys/file_internal.h>
46 #include <sys/kauth.h>
47 #include <libkern/tree.h>
48 #include <kern/locks.h>
49 #include <kern/debug.h>
50 #include <kern/task.h>
51 #include <mach/task_info.h>
52 #include <net/if_var.h>
53 #include <net/route.h>
54 #include <net/flowhash.h>
55 #include <net/ntstat.h>
56 #include <net/content_filter.h>
57 #include <net/necp.h>
58 #include <netinet/in.h>
59 #include <netinet/in_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_var.h>
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/flow_divert.h>
64 #include <netinet/flow_divert_proto.h>
65 #include <netinet6/in6_pcb.h>
66 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
68 #include <libkern/crypto/sha1.h>
69 #include <libkern/crypto/crypto_internal.h>
70 #include <os/log.h>
71 #include <corecrypto/cc.h>
72 #if CONTENT_FILTER
73 #include <net/content_filter.h>
74 #endif /* CONTENT_FILTER */
75
76 #define FLOW_DIVERT_CONNECT_STARTED 0x00000001
77 #define FLOW_DIVERT_READ_CLOSED 0x00000002
78 #define FLOW_DIVERT_WRITE_CLOSED 0x00000004
79 #define FLOW_DIVERT_TUNNEL_RD_CLOSED 0x00000008
80 #define FLOW_DIVERT_TUNNEL_WR_CLOSED 0x00000010
81 #define FLOW_DIVERT_HAS_HMAC 0x00000040
82 #define FLOW_DIVERT_NOTIFY_ON_RECEIVED 0x00000080
83 #define FLOW_DIVERT_IMPLICIT_CONNECT 0x00000100
84 #define FLOW_DIVERT_DID_SET_LOCAL_ADDR 0x00000200
85 #define FLOW_DIVERT_HAS_TOKEN 0x00000400
86 #define FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR 0x00000800
87 #define FLOW_DIVERT_FLOW_IS_TRANSPARENT 0x00001000
88
89 #define FDLOG(level, pcb, format, ...) \
90 os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " format "\n", (pcb)->hash, __VA_ARGS__)
91
92 #define FDLOG0(level, pcb, msg) \
93 os_log_with_type(OS_LOG_DEFAULT, flow_divert_syslog_type_to_oslog_type(level), "(%u): " msg "\n", (pcb)->hash)
94
95 #define FDRETAIN(pcb) if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count)
96 #define FDRELEASE(pcb) \
97 do { \
98 if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) { \
99 flow_divert_pcb_destroy(pcb); \
100 } \
101 } while (0)
102
103 #define FDGRP_RETAIN(grp) if ((grp) != NULL) OSIncrementAtomic(&(grp)->ref_count)
104 #define FDGRP_RELEASE(grp) if ((grp) != NULL && 1 == OSDecrementAtomic(&(grp)->ref_count)) flow_divert_group_destroy(grp)
105
106 #define FDLOCK(pcb) lck_mtx_lock(&(pcb)->mtx)
107 #define FDUNLOCK(pcb) lck_mtx_unlock(&(pcb)->mtx)
108
109 #define FD_CTL_SENDBUFF_SIZE (128 * 1024)
110
111 #define GROUP_BIT_CTL_ENQUEUE_BLOCKED 0
112
113 #define GROUP_COUNT_MAX 31
114 #define FLOW_DIVERT_MAX_NAME_SIZE 4096
115 #define FLOW_DIVERT_MAX_KEY_SIZE 1024
116 #define FLOW_DIVERT_MAX_TRIE_MEMORY (1024 * 1024)
117
118 #define CHILD_MAP_SIZE 256
119 #define NULL_TRIE_IDX 0xffff
120 #define TRIE_NODE(t, i) ((t)->nodes[(i)])
121 #define TRIE_CHILD(t, i, b) (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)])
122 #define TRIE_BYTE(t, i) ((t)->bytes[(i)])
123
124 #define SO_IS_DIVERTED(s) (((s)->so_flags & SOF_FLOW_DIVERT) && (s)->so_fd_pcb != NULL)
125
126 static struct flow_divert_pcb nil_pcb;
127
128 static LCK_ATTR_DECLARE(flow_divert_mtx_attr, 0, 0);
129 static LCK_GRP_DECLARE(flow_divert_mtx_grp, FLOW_DIVERT_CONTROL_NAME);
130 static LCK_RW_DECLARE_ATTR(g_flow_divert_group_lck, &flow_divert_mtx_grp,
131 &flow_divert_mtx_attr);
132
133 static TAILQ_HEAD(_flow_divert_group_list, flow_divert_group) g_flow_divert_in_process_group_list;
134
135 static struct flow_divert_group **g_flow_divert_groups __indexable = NULL;
136 static uint32_t g_active_group_count = 0;
137
138 static errno_t g_init_result = 0;
139
140 static kern_ctl_ref g_flow_divert_kctl_ref = NULL;
141
142 static struct protosw g_flow_divert_in_protosw;
143 static struct pr_usrreqs g_flow_divert_in_usrreqs;
144 static struct protosw g_flow_divert_in_udp_protosw;
145 static struct pr_usrreqs g_flow_divert_in_udp_usrreqs;
146 static struct ip6protosw g_flow_divert_in6_protosw;
147 static struct pr_usrreqs g_flow_divert_in6_usrreqs;
148 static struct ip6protosw g_flow_divert_in6_udp_protosw;
149 static struct pr_usrreqs g_flow_divert_in6_udp_usrreqs;
150
151 static struct protosw *g_tcp_protosw = NULL;
152 static struct ip6protosw *g_tcp6_protosw = NULL;
153 static struct protosw *g_udp_protosw = NULL;
154 static struct ip6protosw *g_udp6_protosw = NULL;
155
156 static KALLOC_TYPE_DEFINE(flow_divert_group_zone, struct flow_divert_group,
157 NET_KT_DEFAULT);
158 static KALLOC_TYPE_DEFINE(flow_divert_pcb_zone, struct flow_divert_pcb,
159 NET_KT_DEFAULT);
160
161 static errno_t
162 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, struct sockaddr **dup);
163
164 static boolean_t
165 flow_divert_is_sockaddr_valid(struct sockaddr *addr);
166
167 static int
168 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr);
169
170 struct sockaddr *
171 flow_divert_get_buffered_target_address(mbuf_ref_t buffer);
172
173 static void
174 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed);
175
176 static void flow_divert_group_destroy(struct flow_divert_group *group);
177
178 static inline uint8_t
flow_divert_syslog_type_to_oslog_type(int syslog_type)179 flow_divert_syslog_type_to_oslog_type(int syslog_type)
180 {
181 switch (syslog_type) {
182 case LOG_ERR: return OS_LOG_TYPE_ERROR;
183 case LOG_INFO: return OS_LOG_TYPE_INFO;
184 case LOG_DEBUG: return OS_LOG_TYPE_DEBUG;
185 default: return OS_LOG_TYPE_DEFAULT;
186 }
187 }
188
189 static inline int
flow_divert_pcb_cmp(const struct flow_divert_pcb * pcb_a,const struct flow_divert_pcb * pcb_b)190 flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b)
191 {
192 return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash));
193 }
194
195 RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
196 RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp);
197
198 static const char *
flow_divert_packet_type2str(uint8_t packet_type)199 flow_divert_packet_type2str(uint8_t packet_type)
200 {
201 switch (packet_type) {
202 case FLOW_DIVERT_PKT_CONNECT:
203 return "connect";
204 case FLOW_DIVERT_PKT_CONNECT_RESULT:
205 return "connect result";
206 case FLOW_DIVERT_PKT_DATA:
207 return "data";
208 case FLOW_DIVERT_PKT_CLOSE:
209 return "close";
210 case FLOW_DIVERT_PKT_READ_NOTIFY:
211 return "read notification";
212 case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
213 return "properties update";
214 case FLOW_DIVERT_PKT_APP_MAP_CREATE:
215 return "app map create";
216 default:
217 return "unknown";
218 }
219 }
220
221 static struct flow_divert_pcb *
flow_divert_pcb_lookup(uint32_t hash,struct flow_divert_group * group)222 flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group)
223 {
224 struct flow_divert_pcb key_item;
225 struct flow_divert_pcb *fd_cb = NULL;
226
227 key_item.hash = hash;
228
229 lck_rw_lock_shared(&group->lck);
230 fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item);
231 FDRETAIN(fd_cb);
232 lck_rw_done(&group->lck);
233
234 return fd_cb;
235 }
236
237 static struct flow_divert_group *
flow_divert_group_lookup(uint32_t ctl_unit,struct flow_divert_pcb * fd_cb)238 flow_divert_group_lookup(uint32_t ctl_unit, struct flow_divert_pcb *fd_cb)
239 {
240 struct flow_divert_group *group = NULL;
241 lck_rw_lock_shared(&g_flow_divert_group_lck);
242 if (g_active_group_count == 0) {
243 if (fd_cb != NULL) {
244 FDLOG0(LOG_ERR, fd_cb, "No active groups, flow divert cannot be used for this socket");
245 }
246 } else if (ctl_unit == 0 || (ctl_unit >= GROUP_COUNT_MAX && ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
247 FDLOG(LOG_ERR, fd_cb, "Cannot lookup group with invalid control unit (%u)", ctl_unit);
248 } else if (ctl_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
249 if (g_flow_divert_groups == NULL) {
250 if (fd_cb != NULL) {
251 FDLOG0(LOG_ERR, fd_cb, "No active non-in-process groups, flow divert cannot be used for this socket");
252 }
253 } else {
254 group = g_flow_divert_groups[ctl_unit];
255 if (group == NULL) {
256 if (fd_cb != NULL) {
257 FDLOG(LOG_ERR, fd_cb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit);
258 }
259 } else {
260 FDGRP_RETAIN(group);
261 }
262 }
263 } else {
264 if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
265 if (fd_cb != NULL) {
266 FDLOG0(LOG_ERR, fd_cb, "No active in-process groups, flow divert cannot be used for this socket");
267 }
268 } else {
269 struct flow_divert_group *group_cursor = NULL;
270 TAILQ_FOREACH(group_cursor, &g_flow_divert_in_process_group_list, chain) {
271 if (group_cursor->ctl_unit == ctl_unit) {
272 group = group_cursor;
273 break;
274 }
275 }
276 if (group == NULL) {
277 if (fd_cb != NULL) {
278 FDLOG(LOG_ERR, fd_cb, "Group for control unit %u not found, flow divert cannot be used for this socket", ctl_unit);
279 }
280 } else if (fd_cb != NULL &&
281 (fd_cb->so == NULL ||
282 group_cursor->in_process_pid != fd_cb->so->last_pid)) {
283 FDLOG(LOG_ERR, fd_cb, "Cannot access group for control unit %u, mismatched PID (%u != %u)",
284 ctl_unit, group_cursor->in_process_pid, fd_cb->so ? fd_cb->so->last_pid : 0);
285 group = NULL;
286 } else {
287 FDGRP_RETAIN(group);
288 }
289 }
290 }
291 lck_rw_done(&g_flow_divert_group_lck);
292 return group;
293 }
294
295 static errno_t
flow_divert_pcb_insert(struct flow_divert_pcb * fd_cb,struct flow_divert_group * group)296 flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, struct flow_divert_group *group)
297 {
298 int error = 0;
299 lck_rw_lock_exclusive(&group->lck);
300 if (!(group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT)) {
301 if (NULL == RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb)) {
302 fd_cb->group = group;
303 fd_cb->control_group_unit = group->ctl_unit;
304 FDRETAIN(fd_cb); /* The group now has a reference */
305 } else {
306 FDLOG(LOG_ERR, fd_cb, "Group %u already contains a PCB with hash %u", group->ctl_unit, fd_cb->hash);
307 error = EEXIST;
308 }
309 } else {
310 FDLOG(LOG_ERR, fd_cb, "Group %u is defunct, cannot insert", group->ctl_unit);
311 error = ENOENT;
312 }
313 lck_rw_done(&group->lck);
314 return error;
315 }
316
317 static errno_t
flow_divert_add_to_group(struct flow_divert_pcb * fd_cb,uint32_t ctl_unit)318 flow_divert_add_to_group(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit)
319 {
320 errno_t error = 0;
321 struct flow_divert_group *group = NULL;
322 static uint32_t g_nextkey = 1;
323 static uint32_t g_hash_seed = 0;
324 int try_count = 0;
325
326 group = flow_divert_group_lookup(ctl_unit, fd_cb);
327 if (group == NULL) {
328 return ENOENT;
329 }
330
331 do {
332 uint32_t key[2];
333 uint32_t idx;
334
335 key[0] = g_nextkey++;
336 key[1] = RandomULong();
337
338 if (g_hash_seed == 0) {
339 g_hash_seed = RandomULong();
340 }
341
342 error = 0;
343 fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed);
344
345 for (idx = 1; idx < GROUP_COUNT_MAX && error == 0; idx++) {
346 if (idx == ctl_unit) {
347 continue;
348 }
349 struct flow_divert_group *curr_group = flow_divert_group_lookup(idx, NULL);
350 if (curr_group != NULL) {
351 lck_rw_lock_shared(&curr_group->lck);
352 if (NULL != RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb)) {
353 error = EEXIST;
354 }
355 lck_rw_done(&curr_group->lck);
356 FDGRP_RELEASE(curr_group);
357 }
358 }
359
360 if (error == 0) {
361 error = flow_divert_pcb_insert(fd_cb, group);
362 }
363 } while (error == EEXIST && try_count++ < 3);
364
365 if (error == EEXIST) {
366 FDLOG0(LOG_ERR, fd_cb, "Failed to create a unique hash");
367 fd_cb->hash = 0;
368 }
369
370 FDGRP_RELEASE(group);
371 return error;
372 }
373
374 static struct flow_divert_pcb *
flow_divert_pcb_create(socket_t so)375 flow_divert_pcb_create(socket_t so)
376 {
377 struct flow_divert_pcb *new_pcb = NULL;
378
379 new_pcb = zalloc_flags(flow_divert_pcb_zone, Z_WAITOK | Z_ZERO);
380 lck_mtx_init(&new_pcb->mtx, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
381 new_pcb->so = so;
382 new_pcb->log_level = nil_pcb.log_level;
383
384 FDRETAIN(new_pcb); /* Represents the socket's reference */
385
386 return new_pcb;
387 }
388
389 static void
flow_divert_pcb_destroy(struct flow_divert_pcb * fd_cb)390 flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb)
391 {
392 FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %llu, tunnel tx %llu, tunnel rx %llu",
393 fd_cb->bytes_written_by_app, fd_cb->bytes_sent, fd_cb->bytes_received);
394
395 if (fd_cb->connect_token != NULL) {
396 mbuf_freem(fd_cb->connect_token);
397 }
398 if (fd_cb->connect_packet != NULL) {
399 mbuf_freem(fd_cb->connect_packet);
400 }
401 if (fd_cb->app_data != NULL) {
402 kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
403 }
404 if (fd_cb->original_remote_endpoint != NULL) {
405 free_sockaddr(fd_cb->original_remote_endpoint);
406 }
407 zfree(flow_divert_pcb_zone, fd_cb);
408 }
409
410 static void
flow_divert_pcb_remove(struct flow_divert_pcb * fd_cb)411 flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb)
412 {
413 if (fd_cb->group != NULL) {
414 struct flow_divert_group *group = fd_cb->group;
415 lck_rw_lock_exclusive(&group->lck);
416 FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count);
417 RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb);
418 fd_cb->group = NULL;
419 FDRELEASE(fd_cb); /* Release the group's reference */
420 lck_rw_done(&group->lck);
421 }
422 }
423
424 static int
flow_divert_packet_init(struct flow_divert_pcb * fd_cb,uint8_t packet_type,mbuf_ref_t * packet)425 flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_ref_t *packet)
426 {
427 struct flow_divert_packet_header hdr;
428 int error = 0;
429
430 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet);
431 if (error) {
432 FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
433 return error;
434 }
435
436 hdr.packet_type = packet_type;
437 hdr.conn_id = htonl(fd_cb->hash);
438
439 /* Lay down the header */
440 error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT);
441 if (error) {
442 FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error);
443 mbuf_freem(*packet);
444 *packet = NULL;
445 return error;
446 }
447
448 return 0;
449 }
450
451 static int
flow_divert_packet_append_tlv(mbuf_ref_t packet,uint8_t type,uint32_t length,const void * value)452 flow_divert_packet_append_tlv(mbuf_ref_t packet, uint8_t type, uint32_t length, const void *value)
453 {
454 uint32_t net_length = htonl(length);
455 int error = 0;
456
457 error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT);
458 if (error) {
459 FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type);
460 return error;
461 }
462
463 error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT);
464 if (error) {
465 FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%u)", length);
466 return error;
467 }
468
469 error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT);
470 if (error) {
471 FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value");
472 return error;
473 }
474
475 return error;
476 }
477
478 static int
flow_divert_packet_find_tlv(mbuf_ref_t packet,int offset,uint8_t type,int * err,int next)479 flow_divert_packet_find_tlv(mbuf_ref_t packet, int offset, uint8_t type, int *err, int next)
480 {
481 size_t cursor = offset;
482 int error = 0;
483 uint32_t curr_length = 0;
484 uint8_t curr_type = 0;
485
486 *err = 0;
487
488 do {
489 if (!next) {
490 error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type);
491 if (error) {
492 *err = ENOENT;
493 return -1;
494 }
495 } else {
496 next = 0;
497 curr_type = FLOW_DIVERT_TLV_NIL;
498 }
499
500 if (curr_type != type) {
501 cursor += sizeof(curr_type);
502 error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length);
503 if (error) {
504 *err = error;
505 return -1;
506 }
507
508 cursor += (sizeof(curr_length) + ntohl(curr_length));
509 }
510 } while (curr_type != type);
511
512 return (int)cursor;
513 }
514
515 static int
flow_divert_packet_get_tlv(mbuf_ref_t packet,int offset,uint8_t type,size_t buff_len,void * buff __sized_by (buff_len),uint32_t * val_size)516 flow_divert_packet_get_tlv(mbuf_ref_t packet, int offset, uint8_t type, size_t buff_len, void *buff __sized_by(buff_len), uint32_t *val_size)
517 {
518 int error = 0;
519 uint32_t length = 0;
520 int tlv_offset = 0;
521
522 tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0);
523 if (tlv_offset < 0) {
524 return error;
525 }
526
527 error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length);
528 if (error) {
529 return error;
530 }
531
532 length = ntohl(length);
533
534 uint32_t data_offset = tlv_offset + sizeof(type) + sizeof(length);
535
536 if (length > (mbuf_pkthdr_len(packet) - data_offset)) {
537 FDLOG(LOG_ERR, &nil_pcb, "Length of %u TLV (%u) is larger than remaining packet data (%lu)", type, length, (mbuf_pkthdr_len(packet) - data_offset));
538 return EINVAL;
539 }
540
541 if (val_size != NULL) {
542 *val_size = length;
543 }
544
545 if (buff != NULL && buff_len > 0) {
546 memset(buff, 0, buff_len);
547 size_t to_copy = (length < buff_len) ? length : buff_len;
548 error = mbuf_copydata(packet, data_offset, to_copy, buff);
549 if (error) {
550 return error;
551 }
552 }
553
554 return 0;
555 }
556
557 static int
flow_divert_packet_compute_hmac(mbuf_ref_t packet,struct flow_divert_group * group,uint8_t * hmac)558 flow_divert_packet_compute_hmac(mbuf_ref_t packet, struct flow_divert_group *group, uint8_t *hmac)
559 {
560 mbuf_ref_t curr_mbuf = packet;
561
562 if (g_crypto_funcs == NULL || group->token_key == NULL) {
563 return ENOPROTOOPT;
564 }
565
566 cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx);
567 g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key);
568
569 while (curr_mbuf != NULL) {
570 g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf));
571 curr_mbuf = mbuf_next(curr_mbuf);
572 }
573
574 g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac);
575
576 return 0;
577 }
578
579 static int
flow_divert_packet_verify_hmac(mbuf_ref_t packet,uint32_t ctl_unit)580 flow_divert_packet_verify_hmac(mbuf_ref_t packet, uint32_t ctl_unit)
581 {
582 int error = 0;
583 struct flow_divert_group *group = NULL;
584 int hmac_offset;
585 uint8_t packet_hmac[SHA_DIGEST_LENGTH];
586 uint8_t computed_hmac[SHA_DIGEST_LENGTH];
587 mbuf_ref_t tail;
588
589 group = flow_divert_group_lookup(ctl_unit, NULL);
590 if (group == NULL) {
591 FDLOG(LOG_ERR, &nil_pcb, "Failed to lookup group for control unit %u", ctl_unit);
592 return ENOPROTOOPT;
593 }
594
595 lck_rw_lock_shared(&group->lck);
596
597 if (group->token_key == NULL) {
598 error = ENOPROTOOPT;
599 goto done;
600 }
601
602 hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0);
603 if (hmac_offset < 0) {
604 goto done;
605 }
606
607 error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL);
608 if (error) {
609 goto done;
610 }
611
612 /* Chop off the HMAC TLV */
613 error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail);
614 if (error) {
615 goto done;
616 }
617
618 mbuf_free(tail);
619
620 error = flow_divert_packet_compute_hmac(packet, group, computed_hmac);
621 if (error) {
622 goto done;
623 }
624
625 if (cc_cmp_safe(sizeof(packet_hmac), packet_hmac, computed_hmac)) {
626 FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC");
627 error = EINVAL;
628 goto done;
629 }
630
631 done:
632 if (group != NULL) {
633 lck_rw_done(&group->lck);
634 FDGRP_RELEASE(group);
635 }
636 return error;
637 }
638
639 static void
flow_divert_add_data_statistics(struct flow_divert_pcb * fd_cb,size_t data_len,Boolean send)640 flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, size_t data_len, Boolean send)
641 {
642 struct inpcb *inp = NULL;
643 struct ifnet *ifp = NULL;
644 stats_functional_type ifnet_count_type = stats_functional_type_none;
645
646 inp = sotoinpcb(fd_cb->so);
647 if (inp == NULL) {
648 return;
649 }
650
651 if (inp->inp_vflag & INP_IPV4) {
652 ifp = inp->inp_last_outifp;
653 } else if (inp->inp_vflag & INP_IPV6) {
654 ifp = inp->in6p_last_outifp;
655 }
656 if (ifp != NULL) {
657 ifnet_count_type = IFNET_COUNT_TYPE(ifp);
658 }
659
660 if (send) {
661 INP_ADD_STAT(inp, ifnet_count_type, txpackets, 1);
662 INP_ADD_STAT(inp, ifnet_count_type, txbytes, data_len);
663 } else {
664 INP_ADD_STAT(inp, ifnet_count_type, rxpackets, 1);
665 INP_ADD_STAT(inp, ifnet_count_type, rxbytes, data_len);
666 }
667 inp_set_activity_bitmap(inp);
668 }
669
670 static errno_t
flow_divert_check_no_cellular(struct flow_divert_pcb * fd_cb)671 flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb)
672 {
673 struct inpcb *inp = sotoinpcb(fd_cb->so);
674 if (INP_NO_CELLULAR(inp)) {
675 struct ifnet *ifp = NULL;
676 if (inp->inp_vflag & INP_IPV4) {
677 ifp = inp->inp_last_outifp;
678 } else if (inp->inp_vflag & INP_IPV6) {
679 ifp = inp->in6p_last_outifp;
680 }
681 if (ifp != NULL && IFNET_IS_CELLULAR(ifp)) {
682 FDLOG0(LOG_ERR, fd_cb, "Cellular is denied");
683 return EHOSTUNREACH;
684 }
685 }
686 return 0;
687 }
688
689 static errno_t
flow_divert_check_no_expensive(struct flow_divert_pcb * fd_cb)690 flow_divert_check_no_expensive(struct flow_divert_pcb *fd_cb)
691 {
692 struct inpcb *inp = sotoinpcb(fd_cb->so);
693 if (INP_NO_EXPENSIVE(inp)) {
694 struct ifnet *ifp = NULL;
695 if (inp->inp_vflag & INP_IPV4) {
696 ifp = inp->inp_last_outifp;
697 } else if (inp->inp_vflag & INP_IPV6) {
698 ifp = inp->in6p_last_outifp;
699 }
700 if (ifp != NULL && IFNET_IS_EXPENSIVE(ifp)) {
701 FDLOG0(LOG_ERR, fd_cb, "Expensive is denied");
702 return EHOSTUNREACH;
703 }
704 }
705 return 0;
706 }
707
708 static errno_t
flow_divert_check_no_constrained(struct flow_divert_pcb * fd_cb)709 flow_divert_check_no_constrained(struct flow_divert_pcb *fd_cb)
710 {
711 struct inpcb *inp = sotoinpcb(fd_cb->so);
712 if (INP_NO_CONSTRAINED(inp)) {
713 struct ifnet *ifp = NULL;
714 if (inp->inp_vflag & INP_IPV4) {
715 ifp = inp->inp_last_outifp;
716 } else if (inp->inp_vflag & INP_IPV6) {
717 ifp = inp->in6p_last_outifp;
718 }
719 if (ifp != NULL && IFNET_IS_CONSTRAINED(ifp)) {
720 FDLOG0(LOG_ERR, fd_cb, "Constrained is denied");
721 return EHOSTUNREACH;
722 }
723 }
724 return 0;
725 }
726
727 static void
flow_divert_update_closed_state(struct flow_divert_pcb * fd_cb,int how,bool tunnel,bool flush_snd)728 flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, bool tunnel, bool flush_snd)
729 {
730 if (how != SHUT_RD) {
731 fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED;
732 if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
733 fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
734 if (flush_snd) {
735 /* If the tunnel is not accepting writes any more, then flush the send buffer */
736 sbflush(&fd_cb->so->so_snd);
737 }
738 }
739 }
740 if (how != SHUT_WR) {
741 fd_cb->flags |= FLOW_DIVERT_READ_CLOSED;
742 if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
743 fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
744 }
745 }
746 }
747
748 static uint16_t
trie_node_alloc(struct flow_divert_trie * trie)749 trie_node_alloc(struct flow_divert_trie *trie)
750 {
751 if (trie->nodes_free_next < trie->nodes_count) {
752 uint16_t node_idx = trie->nodes_free_next++;
753 TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX;
754 return node_idx;
755 } else {
756 return NULL_TRIE_IDX;
757 }
758 }
759
760 static uint16_t
trie_child_map_alloc(struct flow_divert_trie * trie)761 trie_child_map_alloc(struct flow_divert_trie *trie)
762 {
763 if (trie->child_maps_free_next < trie->child_maps_count) {
764 return trie->child_maps_free_next++;
765 } else {
766 return NULL_TRIE_IDX;
767 }
768 }
769
770 static uint16_t
trie_bytes_move(struct flow_divert_trie * trie,uint16_t bytes_idx,size_t bytes_size)771 trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size)
772 {
773 uint16_t start = trie->bytes_free_next;
774 if (start + bytes_size <= trie->bytes_count) {
775 if (start != bytes_idx) {
776 memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size);
777 }
778 trie->bytes_free_next += bytes_size;
779 return start;
780 } else {
781 return NULL_TRIE_IDX;
782 }
783 }
784
785 static uint16_t
flow_divert_trie_insert(struct flow_divert_trie * trie,uint16_t string_start,size_t string_len)786 flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len)
787 {
788 uint16_t current = trie->root;
789 uint16_t child = trie->root;
790 uint16_t string_end = string_start + (uint16_t)string_len;
791 uint16_t string_idx = string_start;
792 uint16_t string_remainder = (uint16_t)string_len;
793
794 while (child != NULL_TRIE_IDX) {
795 uint16_t parent = current;
796 uint16_t node_idx;
797 uint16_t current_end;
798
799 current = child;
800 child = NULL_TRIE_IDX;
801
802 current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
803
804 for (node_idx = TRIE_NODE(trie, current).start;
805 node_idx < current_end &&
806 string_idx < string_end &&
807 TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx);
808 node_idx++, string_idx++) {
809 ;
810 }
811
812 string_remainder = string_end - string_idx;
813
814 if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) {
815 /*
816 * We did not reach the end of the current node's string.
817 * We need to split the current node into two:
818 * 1. A new node that contains the prefix of the node that matches
819 * the prefix of the string being inserted.
820 * 2. The current node modified to point to the remainder
821 * of the current node's string.
822 */
823 uint16_t prefix = trie_node_alloc(trie);
824 if (prefix == NULL_TRIE_IDX) {
825 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node");
826 return NULL_TRIE_IDX;
827 }
828
829 /*
830 * Prefix points to the portion of the current nodes's string that has matched
831 * the input string thus far.
832 */
833 TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start;
834 TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start);
835
836 /*
837 * Prefix has the current node as the child corresponding to the first byte
838 * after the split.
839 */
840 TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie);
841 if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) {
842 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node");
843 return NULL_TRIE_IDX;
844 }
845 TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current;
846
847 /* Parent has the prefix as the child correspoding to the first byte in the prefix */
848 TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix;
849
850 /* Current node is adjusted to point to the remainder */
851 TRIE_NODE(trie, current).start = node_idx;
852 TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length;
853
854 /* We want to insert the new leaf (if any) as a child of the prefix */
855 current = prefix;
856 }
857
858 if (string_remainder > 0) {
859 /*
860 * We still have bytes in the string that have not been matched yet.
861 * If the current node has children, iterate to the child corresponding
862 * to the next byte in the string.
863 */
864 if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
865 child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx));
866 }
867 }
868 } /* while (child != NULL_TRIE_IDX) */
869
870 if (string_remainder > 0) {
871 /* Add a new leaf containing the remainder of the string */
872 uint16_t leaf = trie_node_alloc(trie);
873 if (leaf == NULL_TRIE_IDX) {
874 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf");
875 return NULL_TRIE_IDX;
876 }
877
878 TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder);
879 if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) {
880 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf");
881 return NULL_TRIE_IDX;
882 }
883 TRIE_NODE(trie, leaf).length = string_remainder;
884
885 /* Set the new leaf as the child of the current node */
886 if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
887 TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie);
888 if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) {
889 FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf");
890 return NULL_TRIE_IDX;
891 }
892 }
893 TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf;
894 current = leaf;
895 } /* else duplicate or this string is a prefix of one of the existing strings */
896
897 return current;
898 }
899
900 #define APPLE_WEBCLIP_ID_PREFIX "com.apple.webapp"
901 static uint16_t
flow_divert_trie_search(struct flow_divert_trie * trie,const uint8_t * string_bytes __sized_by (string_bytes_count),__unused size_t string_bytes_count)902 flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes __sized_by(string_bytes_count), __unused size_t string_bytes_count)
903 {
904 uint16_t current = trie->root;
905 uint16_t string_idx = 0;
906
907 while (current != NULL_TRIE_IDX) {
908 uint16_t next = NULL_TRIE_IDX;
909 uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length;
910 uint16_t node_idx;
911
912 for (node_idx = TRIE_NODE(trie, current).start;
913 node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx);
914 node_idx++, string_idx++) {
915 ;
916 }
917
918 if (node_idx == node_end) {
919 if (string_bytes[string_idx] == '\0') {
920 return current; /* Got an exact match */
921 } else if (string_idx == strlen(APPLE_WEBCLIP_ID_PREFIX) &&
922 0 == strlcmp((const char *)string_bytes, APPLE_WEBCLIP_ID_PREFIX, string_idx)) {
923 return current; /* Got an apple webclip id prefix match */
924 } else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) {
925 next = TRIE_CHILD(trie, current, string_bytes[string_idx]);
926 }
927 }
928 current = next;
929 }
930
931 return NULL_TRIE_IDX;
932 }
933
934 struct uuid_search_info {
935 uuid_t target_uuid;
936 char *found_signing_id __sized_by(found_signing_id_size);
937 boolean_t found_multiple_signing_ids;
938 proc_t found_proc;
939 size_t found_signing_id_size;
940 };
941
942 static int
flow_divert_find_proc_by_uuid_callout(proc_t p,void * arg)943 flow_divert_find_proc_by_uuid_callout(proc_t p, void *arg)
944 {
945 struct uuid_search_info *info = (struct uuid_search_info *)arg;
946 int result = PROC_RETURNED_DONE; /* By default, we didn't find the process */
947
948 if (info->found_signing_id != NULL) {
949 if (!info->found_multiple_signing_ids) {
950 /* All processes that were found had the same signing identifier, so just claim this first one and be done. */
951 info->found_proc = p;
952 result = PROC_CLAIMED_DONE;
953 } else {
954 uuid_string_t uuid_str;
955 uuid_unparse(info->target_uuid, uuid_str);
956 FDLOG(LOG_WARNING, &nil_pcb, "Found multiple processes with UUID %s with different signing identifiers", uuid_str);
957 }
958 kfree_data_sized_by(info->found_signing_id, info->found_signing_id_size);
959 }
960
961 if (result == PROC_RETURNED_DONE) {
962 uuid_string_t uuid_str;
963 uuid_unparse(info->target_uuid, uuid_str);
964 FDLOG(LOG_WARNING, &nil_pcb, "Failed to find a process with UUID %s", uuid_str);
965 }
966
967 return result;
968 }
969
970 static int
flow_divert_find_proc_by_uuid_filter(proc_t p,void * arg)971 flow_divert_find_proc_by_uuid_filter(proc_t p, void *arg)
972 {
973 struct uuid_search_info *info = (struct uuid_search_info *)arg;
974 int include = 0;
975
976 if (info->found_multiple_signing_ids) {
977 return include;
978 }
979
980 const unsigned char * p_uuid = proc_executableuuid_addr(p);
981 include = (uuid_compare(p_uuid, info->target_uuid) == 0);
982 if (include) {
983 const char *signing_id __null_terminated = cs_identity_get(p);
984 if (signing_id != NULL) {
985 FDLOG(LOG_INFO, &nil_pcb, "Found process %d with signing identifier %s", proc_getpid(p), signing_id);
986 size_t signing_id_size = strlen(signing_id) + 1;
987 if (info->found_signing_id == NULL) {
988 info->found_signing_id = kalloc_data(signing_id_size, Z_WAITOK);
989 info->found_signing_id_size = signing_id_size;
990 strlcpy(info->found_signing_id, signing_id, signing_id_size);
991 } else if (strlcmp(info->found_signing_id, signing_id, info->found_signing_id_size)) {
992 info->found_multiple_signing_ids = TRUE;
993 }
994 } else {
995 info->found_multiple_signing_ids = TRUE;
996 }
997 include = !info->found_multiple_signing_ids;
998 }
999
1000 return include;
1001 }
1002
1003 static proc_t
flow_divert_find_proc_by_uuid(uuid_t uuid)1004 flow_divert_find_proc_by_uuid(uuid_t uuid)
1005 {
1006 struct uuid_search_info info;
1007
1008 if (LOG_INFO <= nil_pcb.log_level) {
1009 uuid_string_t uuid_str;
1010 uuid_unparse(uuid, uuid_str);
1011 FDLOG(LOG_INFO, &nil_pcb, "Looking for process with UUID %s", uuid_str);
1012 }
1013
1014 memset(&info, 0, sizeof(info));
1015 info.found_proc = PROC_NULL;
1016 uuid_copy(info.target_uuid, uuid);
1017
1018 proc_iterate(PROC_ALLPROCLIST, flow_divert_find_proc_by_uuid_callout, &info, flow_divert_find_proc_by_uuid_filter, &info);
1019
1020 return info.found_proc;
1021 }
1022
1023 static int
flow_divert_add_proc_info(struct flow_divert_pcb * fd_cb,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet,bool is_effective)1024 flow_divert_add_proc_info(struct flow_divert_pcb *fd_cb, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet, bool is_effective)
1025 {
1026 int error = 0;
1027 uint8_t *cdhash = NULL;
1028 audit_token_t audit_token = {};
1029 const char *proc_cs_id __null_terminated = signing_id;
1030
1031 proc_lock(proc);
1032
1033 if (proc_cs_id == NULL) {
1034 if (proc_getcsflags(proc) & (CS_VALID | CS_DEBUGGED)) {
1035 proc_cs_id = cs_identity_get(proc);
1036 } else {
1037 FDLOG0(LOG_ERR, fd_cb, "Signature of proc is invalid");
1038 }
1039 }
1040
1041 if (is_effective) {
1042 lck_rw_lock_shared(&fd_cb->group->lck);
1043 if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1044 if (proc_cs_id != NULL) {
1045 size_t proc_cs_id_size = strlen(proc_cs_id) + 1;
1046 uint16_t result = flow_divert_trie_search(&fd_cb->group->signing_id_trie, (const uint8_t *)__unsafe_null_terminated_to_indexable(proc_cs_id), proc_cs_id_size);
1047 if (result == NULL_TRIE_IDX) {
1048 FDLOG(LOG_WARNING, fd_cb, "%s did not match", proc_cs_id);
1049 error = EPERM;
1050 } else {
1051 FDLOG(LOG_INFO, fd_cb, "%s matched", proc_cs_id);
1052 }
1053 } else {
1054 error = EPERM;
1055 }
1056 }
1057 lck_rw_done(&fd_cb->group->lck);
1058 }
1059
1060 if (error != 0) {
1061 goto done;
1062 }
1063
1064 /*
1065 * If signing_id is not NULL then it came from the flow divert token and will be added
1066 * as part of the token, so there is no need to add it here.
1067 */
1068 if (signing_id == NULL && proc_cs_id != NULL) {
1069 error = flow_divert_packet_append_tlv(connect_packet,
1070 (is_effective ? FLOW_DIVERT_TLV_SIGNING_ID : FLOW_DIVERT_TLV_APP_REAL_SIGNING_ID),
1071 (uint32_t)strlen(proc_cs_id),
1072 __terminated_by_to_indexable(proc_cs_id));
1073 if (error != 0) {
1074 FDLOG(LOG_ERR, fd_cb, "failed to append the signing ID: %d", error);
1075 goto done;
1076 }
1077 }
1078
1079 cdhash = cs_get_cdhash(proc);
1080 if (cdhash != NULL) {
1081 error = flow_divert_packet_append_tlv(connect_packet,
1082 (is_effective ? FLOW_DIVERT_TLV_CDHASH : FLOW_DIVERT_TLV_APP_REAL_CDHASH),
1083 SHA1_RESULTLEN,
1084 cdhash);
1085 if (error) {
1086 FDLOG(LOG_ERR, fd_cb, "failed to append the cdhash: %d", error);
1087 goto done;
1088 }
1089 } else {
1090 FDLOG0(LOG_ERR, fd_cb, "failed to get the cdhash");
1091 }
1092
1093 task_t task __single = proc_task(proc);
1094 if (task != TASK_NULL) {
1095 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
1096 kern_return_t rc = task_info(task, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count);
1097 if (rc == KERN_SUCCESS) {
1098 int append_error = flow_divert_packet_append_tlv(connect_packet,
1099 (is_effective ? FLOW_DIVERT_TLV_APP_AUDIT_TOKEN : FLOW_DIVERT_TLV_APP_REAL_AUDIT_TOKEN),
1100 sizeof(audit_token_t),
1101 &audit_token);
1102 if (append_error) {
1103 FDLOG(LOG_ERR, fd_cb, "failed to append app audit token: %d", append_error);
1104 }
1105 }
1106 }
1107
1108 done:
1109 proc_unlock(proc);
1110
1111 return error;
1112 }
1113
1114 static int
flow_divert_add_all_proc_info(struct flow_divert_pcb * fd_cb,struct socket * so,proc_t proc,const char * signing_id __null_terminated,mbuf_ref_t connect_packet)1115 flow_divert_add_all_proc_info(struct flow_divert_pcb *fd_cb, struct socket *so, proc_t proc, const char *signing_id __null_terminated, mbuf_ref_t connect_packet)
1116 {
1117 int error = 0;
1118 proc_t effective_proc = PROC_NULL;
1119 proc_t responsible_proc = PROC_NULL;
1120 proc_t real_proc = proc_find(so->last_pid);
1121 bool release_real_proc = true;
1122
1123 proc_t src_proc = PROC_NULL;
1124 proc_t real_src_proc = PROC_NULL;
1125
1126 if (real_proc == PROC_NULL) {
1127 FDLOG(LOG_ERR, fd_cb, "failed to find the real proc record for %d", so->last_pid);
1128 release_real_proc = false;
1129 real_proc = proc;
1130 if (real_proc == PROC_NULL) {
1131 real_proc = current_proc();
1132 }
1133 }
1134
1135 if (so->so_flags & SOF_DELEGATED) {
1136 if (proc_getpid(real_proc) != so->e_pid) {
1137 effective_proc = proc_find(so->e_pid);
1138 } else {
1139 const unsigned char * real_proc_uuid = proc_executableuuid_addr(real_proc);
1140 if (uuid_compare(real_proc_uuid, so->e_uuid)) {
1141 effective_proc = flow_divert_find_proc_by_uuid(so->e_uuid);
1142 }
1143 }
1144 }
1145
1146 #if defined(XNU_TARGET_OS_OSX)
1147 lck_rw_lock_shared(&fd_cb->group->lck);
1148 if (!(fd_cb->group->flags & FLOW_DIVERT_GROUP_FLAG_NO_APP_MAP)) {
1149 if (so->so_rpid > 0) {
1150 responsible_proc = proc_find(so->so_rpid);
1151 }
1152 }
1153 lck_rw_done(&fd_cb->group->lck);
1154 #endif
1155
1156 real_src_proc = real_proc;
1157
1158 if (responsible_proc != PROC_NULL) {
1159 src_proc = responsible_proc;
1160 if (effective_proc != NULL) {
1161 real_src_proc = effective_proc;
1162 }
1163 } else if (effective_proc != PROC_NULL) {
1164 src_proc = effective_proc;
1165 } else {
1166 src_proc = real_proc;
1167 }
1168
1169 error = flow_divert_add_proc_info(fd_cb, src_proc, signing_id, connect_packet, true);
1170 if (error != 0) {
1171 goto done;
1172 }
1173
1174 if (real_src_proc != NULL && real_src_proc != src_proc) {
1175 error = flow_divert_add_proc_info(fd_cb, real_src_proc, NULL, connect_packet, false);
1176 if (error != 0) {
1177 goto done;
1178 }
1179 }
1180
1181 done:
1182 if (responsible_proc != PROC_NULL) {
1183 proc_rele(responsible_proc);
1184 }
1185
1186 if (effective_proc != PROC_NULL) {
1187 proc_rele(effective_proc);
1188 }
1189
1190 if (real_proc != PROC_NULL && release_real_proc) {
1191 proc_rele(real_proc);
1192 }
1193
1194 return error;
1195 }
1196
1197 static int
flow_divert_send_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet)1198 flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet)
1199 {
1200 int error;
1201
1202 if (fd_cb->group == NULL) {
1203 FDLOG0(LOG_INFO, fd_cb, "no provider, cannot send packet");
1204 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, false);
1205 flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1206 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1207 error = ECONNABORTED;
1208 } else {
1209 error = EHOSTUNREACH;
1210 }
1211 fd_cb->so->so_error = (uint16_t)error;
1212 return error;
1213 }
1214
1215 lck_rw_lock_shared(&fd_cb->group->lck);
1216
1217 if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) {
1218 error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR);
1219 if (error) {
1220 FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_send_packet: ctl_enqueuembuf returned an error: %d", error);
1221 }
1222 } else {
1223 error = ENOBUFS;
1224 }
1225
1226 if (error == ENOBUFS) {
1227 if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) {
1228 lck_rw_lock_exclusive(&fd_cb->group->lck);
1229 }
1230 MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet);
1231 error = 0;
1232 OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits);
1233 }
1234
1235 lck_rw_done(&fd_cb->group->lck);
1236
1237 return error;
1238 }
1239
1240 static void
flow_divert_append_domain_name(char * domain_name __null_terminated,void * ctx)1241 flow_divert_append_domain_name(char *domain_name __null_terminated, void *ctx)
1242 {
1243 mbuf_ref_t packet = (mbuf_ref_t)ctx;
1244 size_t domain_name_length = 0;
1245
1246 if (packet == NULL || domain_name == NULL) {
1247 return;
1248 }
1249
1250 domain_name_length = strlen(domain_name);
1251 if (domain_name_length > 0 && domain_name_length < FLOW_DIVERT_MAX_NAME_SIZE) {
1252 int error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TARGET_HOSTNAME, (uint32_t)domain_name_length, __terminated_by_to_indexable(domain_name));
1253 if (error) {
1254 FDLOG(LOG_ERR, &nil_pcb, "Failed to append %s: %d", domain_name, error);
1255 }
1256 }
1257 }
1258
1259 static int
flow_divert_create_connect_packet(struct flow_divert_pcb * fd_cb,struct sockaddr * to,struct socket * so,proc_t p,mbuf_ref_t * out_connect_packet)1260 flow_divert_create_connect_packet(struct flow_divert_pcb *fd_cb, struct sockaddr *to, struct socket *so, proc_t p, mbuf_ref_t *out_connect_packet)
1261 {
1262 int error = 0;
1263 int flow_type = 0;
1264 char * signing_id __indexable = NULL;
1265 uint32_t sid_size = 0;
1266 mbuf_ref_t connect_packet = NULL;
1267 cfil_sock_id_t cfil_sock_id = CFIL_SOCK_ID_NONE;
1268 const void *cfil_id = NULL;
1269 size_t cfil_id_size = 0;
1270 struct inpcb *inp = sotoinpcb(so);
1271 struct ifnet *ifp = NULL;
1272 uint32_t flags = 0;
1273
1274 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet);
1275 if (error) {
1276 goto done;
1277 }
1278
1279 if (fd_cb->connect_token != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_HMAC)) {
1280 int find_error = flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
1281 if (find_error == 0 && sid_size > 0) {
1282 signing_id = kalloc_data(sid_size + 1, Z_WAITOK | Z_ZERO);
1283 if (signing_id != NULL) {
1284 flow_divert_packet_get_tlv(fd_cb->connect_token, 0, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, signing_id, NULL);
1285 FDLOG(LOG_INFO, fd_cb, "Got %s from token", signing_id);
1286 }
1287 }
1288 }
1289
1290 // TODO: remove ternary operator after rdar://121487109 is fixed
1291 error = flow_divert_add_all_proc_info(fd_cb, so, p, NULL == signing_id ? NULL : __unsafe_null_terminated_from_indexable(signing_id), connect_packet);
1292
1293 if (signing_id != NULL) {
1294 kfree_data(signing_id, sid_size + 1);
1295 }
1296
1297 if (error) {
1298 FDLOG(LOG_ERR, fd_cb, "Failed to add source proc info: %d", error);
1299 goto done;
1300 }
1301
1302 error = flow_divert_packet_append_tlv(connect_packet,
1303 FLOW_DIVERT_TLV_TRAFFIC_CLASS,
1304 sizeof(fd_cb->so->so_traffic_class),
1305 &fd_cb->so->so_traffic_class);
1306 if (error) {
1307 goto done;
1308 }
1309
1310 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1311 flow_type = FLOW_DIVERT_FLOW_TYPE_TCP;
1312 } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1313 flow_type = FLOW_DIVERT_FLOW_TYPE_UDP;
1314 } else {
1315 error = EINVAL;
1316 goto done;
1317 }
1318 error = flow_divert_packet_append_tlv(connect_packet,
1319 FLOW_DIVERT_TLV_FLOW_TYPE,
1320 sizeof(flow_type),
1321 &flow_type);
1322
1323 if (error) {
1324 goto done;
1325 }
1326
1327 if (fd_cb->connect_token != NULL) {
1328 unsigned int token_len = m_length(fd_cb->connect_token);
1329 mbuf_concatenate(connect_packet, fd_cb->connect_token);
1330 mbuf_pkthdr_adjustlen(connect_packet, token_len);
1331 fd_cb->connect_token = NULL;
1332 } else {
1333 error = flow_divert_append_target_endpoint_tlv(connect_packet, to);
1334 if (error) {
1335 goto done;
1336 }
1337
1338 necp_with_inp_domain_name(so, connect_packet, flow_divert_append_domain_name);
1339 }
1340
1341 if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1342 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1343 if (error) {
1344 goto done;
1345 }
1346 }
1347
1348 if (inp->inp_vflag & INP_IPV4) {
1349 ifp = inp->inp_last_outifp;
1350 } else if (inp->inp_vflag & INP_IPV6) {
1351 ifp = inp->in6p_last_outifp;
1352 }
1353 if ((inp->inp_flags & INP_BOUND_IF) ||
1354 ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) ||
1355 ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
1356 flags |= FLOW_DIVERT_TOKEN_FLAG_BOUND;
1357 if (ifp == NULL) {
1358 ifp = inp->inp_boundifp;
1359 }
1360 }
1361 if (ifp != NULL) {
1362 uint32_t flow_if_index = ifp->if_index;
1363 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_OUT_IF_INDEX,
1364 sizeof(flow_if_index), &flow_if_index);
1365 if (error) {
1366 goto done;
1367 }
1368 }
1369
1370 if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
1371 flags |= FLOW_DIVERT_TOKEN_FLAG_TFO;
1372 }
1373
1374 if (flags != 0) {
1375 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags);
1376 if (error) {
1377 goto done;
1378 }
1379 }
1380
1381 if (SOCK_TYPE(so) == SOCK_DGRAM) {
1382 cfil_sock_id = cfil_sock_id_from_datagram_socket(so, NULL, to);
1383 } else {
1384 cfil_sock_id = cfil_sock_id_from_socket(so);
1385 }
1386
1387 if (cfil_sock_id != CFIL_SOCK_ID_NONE) {
1388 cfil_id = &cfil_sock_id;
1389 cfil_id_size = sizeof(cfil_sock_id);
1390 } else if (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) {
1391 cfil_id = &inp->necp_client_uuid;
1392 cfil_id_size = sizeof(inp->necp_client_uuid);
1393 }
1394
1395 if (cfil_id != NULL && cfil_id_size > 0 && cfil_id_size <= sizeof(uuid_t)) {
1396 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CFIL_ID, (uint32_t)cfil_id_size, cfil_id);
1397 if (error) {
1398 goto done;
1399 }
1400 }
1401
1402 done:
1403 if (!error) {
1404 *out_connect_packet = connect_packet;
1405 } else if (connect_packet != NULL) {
1406 mbuf_freem(connect_packet);
1407 }
1408
1409 return error;
1410 }
1411
1412 static int
flow_divert_send_connect_packet(struct flow_divert_pcb * fd_cb)1413 flow_divert_send_connect_packet(struct flow_divert_pcb *fd_cb)
1414 {
1415 int error = 0;
1416 mbuf_ref_t connect_packet = fd_cb->connect_packet;
1417 mbuf_ref_t saved_connect_packet = NULL;
1418
1419 if (connect_packet != NULL) {
1420 error = mbuf_copym(connect_packet, 0, mbuf_pkthdr_len(connect_packet), MBUF_DONTWAIT, &saved_connect_packet);
1421 if (error) {
1422 FDLOG0(LOG_ERR, fd_cb, "Failed to copy the connect packet");
1423 goto done;
1424 }
1425
1426 error = flow_divert_send_packet(fd_cb, connect_packet);
1427 if (error) {
1428 goto done;
1429 }
1430
1431 fd_cb->connect_packet = saved_connect_packet;
1432 saved_connect_packet = NULL;
1433 } else {
1434 error = ENOENT;
1435 }
1436 done:
1437 if (saved_connect_packet != NULL) {
1438 mbuf_freem(saved_connect_packet);
1439 }
1440
1441 return error;
1442 }
1443
1444 static int
flow_divert_send_connect_result(struct flow_divert_pcb * fd_cb)1445 flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb)
1446 {
1447 int error = 0;
1448 mbuf_ref_t packet = NULL;
1449 int rbuff_space = 0;
1450
1451 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet);
1452 if (error) {
1453 FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error);
1454 goto done;
1455 }
1456
1457 rbuff_space = fd_cb->so->so_rcv.sb_hiwat;
1458 if (rbuff_space < 0) {
1459 rbuff_space = 0;
1460 }
1461 rbuff_space = htonl(rbuff_space);
1462 error = flow_divert_packet_append_tlv(packet,
1463 FLOW_DIVERT_TLV_SPACE_AVAILABLE,
1464 sizeof(rbuff_space),
1465 &rbuff_space);
1466 if (error) {
1467 goto done;
1468 }
1469
1470 if (fd_cb->local_endpoint.sa.sa_family == AF_INET || fd_cb->local_endpoint.sa.sa_family == AF_INET6) {
1471 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_LOCAL_ADDR, fd_cb->local_endpoint.sa.sa_len, &(fd_cb->local_endpoint.sa));
1472 if (error) {
1473 goto done;
1474 }
1475 }
1476
1477 error = flow_divert_send_packet(fd_cb, packet);
1478 if (error) {
1479 goto done;
1480 }
1481
1482 done:
1483 if (error && packet != NULL) {
1484 mbuf_freem(packet);
1485 }
1486
1487 return error;
1488 }
1489
1490 static int
flow_divert_send_close(struct flow_divert_pcb * fd_cb,int how)1491 flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how)
1492 {
1493 int error = 0;
1494 mbuf_ref_t packet = NULL;
1495 uint32_t zero = 0;
1496
1497 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet);
1498 if (error) {
1499 FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error);
1500 goto done;
1501 }
1502
1503 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero);
1504 if (error) {
1505 FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error);
1506 goto done;
1507 }
1508
1509 how = htonl(how);
1510 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how);
1511 if (error) {
1512 FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error);
1513 goto done;
1514 }
1515
1516 error = flow_divert_send_packet(fd_cb, packet);
1517 if (error) {
1518 goto done;
1519 }
1520
1521 done:
1522 if (error && packet != NULL) {
1523 mbuf_free(packet);
1524 }
1525
1526 return error;
1527 }
1528
1529 static int
flow_divert_tunnel_how_closed(struct flow_divert_pcb * fd_cb)1530 flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb)
1531 {
1532 if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) ==
1533 (FLOW_DIVERT_TUNNEL_RD_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) {
1534 return SHUT_RDWR;
1535 } else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) {
1536 return SHUT_RD;
1537 } else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) {
1538 return SHUT_WR;
1539 }
1540
1541 return -1;
1542 }
1543
1544 /*
1545 * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and
1546 * writes. Returns FALSE otherwise.
1547 */
1548 static void
flow_divert_send_close_if_needed(struct flow_divert_pcb * fd_cb)1549 flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb)
1550 {
1551 int how = -1;
1552
1553 /* Do not send any close messages if there is still data in the send buffer */
1554 if (fd_cb->so->so_snd.sb_cc == 0) {
1555 if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED | FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) {
1556 /* Socket closed reads, but tunnel did not. Tell tunnel to close reads */
1557 how = SHUT_RD;
1558 }
1559 if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED | FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) {
1560 /* Socket closed writes, but tunnel did not. Tell tunnel to close writes */
1561 if (how == SHUT_RD) {
1562 how = SHUT_RDWR;
1563 } else {
1564 how = SHUT_WR;
1565 }
1566 }
1567 }
1568
1569 if (how != -1) {
1570 FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how);
1571 if (flow_divert_send_close(fd_cb, how) != ENOBUFS) {
1572 /* Successfully sent the close packet. Record the ways in which the tunnel has been closed */
1573 if (how != SHUT_RD) {
1574 fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED;
1575 }
1576 if (how != SHUT_WR) {
1577 fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED;
1578 }
1579 }
1580 }
1581
1582 if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) {
1583 flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
1584 }
1585 }
1586
1587 static errno_t
flow_divert_send_data_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len)1588 flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len)
1589 {
1590 mbuf_ref_t packet = NULL;
1591 mbuf_ref_t last = NULL;
1592 int error = 0;
1593
1594 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1595 if (error || packet == NULL) {
1596 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1597 goto done;
1598 }
1599
1600 if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1601 last = m_last(packet);
1602 mbuf_setnext(last, data);
1603 mbuf_pkthdr_adjustlen(packet, (int)data_len);
1604 } else {
1605 data_len = 0;
1606 }
1607 error = flow_divert_send_packet(fd_cb, packet);
1608 if (error == 0 && data_len > 0) {
1609 fd_cb->bytes_sent += data_len;
1610 flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1611 }
1612
1613 done:
1614 if (error) {
1615 if (last != NULL) {
1616 mbuf_setnext(last, NULL);
1617 }
1618 if (packet != NULL) {
1619 mbuf_freem(packet);
1620 }
1621 }
1622
1623 return error;
1624 }
1625
1626 static errno_t
flow_divert_send_datagram_packet(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_len,struct sockaddr * toaddr,Boolean is_fragment,size_t datagram_size)1627 flow_divert_send_datagram_packet(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_len, struct sockaddr *toaddr, Boolean is_fragment, size_t datagram_size)
1628 {
1629 mbuf_ref_t packet = NULL;
1630 mbuf_ref_t last = NULL;
1631 int error = 0;
1632
1633 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet);
1634 if (error || packet == NULL) {
1635 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error);
1636 goto done;
1637 }
1638
1639 if (toaddr != NULL) {
1640 error = flow_divert_append_target_endpoint_tlv(packet, toaddr);
1641 if (error) {
1642 FDLOG(LOG_ERR, fd_cb, "flow_divert_append_target_endpoint_tlv() failed: %d", error);
1643 goto done;
1644 }
1645 }
1646 if (is_fragment) {
1647 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_IS_FRAGMENT, sizeof(is_fragment), &is_fragment);
1648 if (error) {
1649 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_IS_FRAGMENT) failed: %d", error);
1650 goto done;
1651 }
1652 }
1653
1654 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_DATAGRAM_SIZE, sizeof(datagram_size), &datagram_size);
1655 if (error) {
1656 FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_append_tlv(FLOW_DIVERT_TLV_DATAGRAM_SIZE) failed: %d", error);
1657 goto done;
1658 }
1659
1660 if (data_len > 0 && data_len <= INT_MAX && data != NULL) {
1661 last = m_last(packet);
1662 mbuf_setnext(last, data);
1663 mbuf_pkthdr_adjustlen(packet, (int)data_len);
1664 } else {
1665 data_len = 0;
1666 }
1667 error = flow_divert_send_packet(fd_cb, packet);
1668 if (error == 0 && data_len > 0) {
1669 fd_cb->bytes_sent += data_len;
1670 flow_divert_add_data_statistics(fd_cb, data_len, TRUE);
1671 }
1672
1673 done:
1674 if (error) {
1675 if (last != NULL) {
1676 mbuf_setnext(last, NULL);
1677 }
1678 if (packet != NULL) {
1679 mbuf_freem(packet);
1680 }
1681 }
1682
1683 return error;
1684 }
1685
1686 static errno_t
flow_divert_send_fragmented_datagram(struct flow_divert_pcb * fd_cb,mbuf_ref_t datagram,size_t datagram_len,struct sockaddr * toaddr)1687 flow_divert_send_fragmented_datagram(struct flow_divert_pcb *fd_cb, mbuf_ref_t datagram, size_t datagram_len, struct sockaddr *toaddr)
1688 {
1689 mbuf_ref_t next_data = datagram;
1690 size_t remaining_len = datagram_len;
1691 mbuf_ref_t remaining_data = NULL;
1692 int error = 0;
1693 bool first = true;
1694
1695 while (remaining_len > 0 && next_data != NULL) {
1696 size_t to_send = remaining_len;
1697 remaining_data = NULL;
1698
1699 if (to_send > FLOW_DIVERT_CHUNK_SIZE) {
1700 to_send = FLOW_DIVERT_CHUNK_SIZE;
1701 error = mbuf_split(next_data, to_send, MBUF_DONTWAIT, &remaining_data);
1702 if (error) {
1703 break;
1704 }
1705 }
1706
1707 error = flow_divert_send_datagram_packet(fd_cb, next_data, to_send, (first ? toaddr : NULL), TRUE, (first ? datagram_len : 0));
1708 if (error) {
1709 break;
1710 }
1711
1712 first = false;
1713 remaining_len -= to_send;
1714 next_data = remaining_data;
1715 }
1716
1717 if (error) {
1718 if (next_data != NULL) {
1719 mbuf_freem(next_data);
1720 }
1721 if (remaining_data != NULL) {
1722 mbuf_freem(remaining_data);
1723 }
1724 }
1725 return error;
1726 }
1727
1728 static void
flow_divert_send_buffered_data(struct flow_divert_pcb * fd_cb,Boolean force)1729 flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force)
1730 {
1731 size_t to_send;
1732 size_t sent = 0;
1733 int error = 0;
1734 mbuf_ref_t buffer;
1735
1736 to_send = fd_cb->so->so_snd.sb_cc;
1737 buffer = fd_cb->so->so_snd.sb_mb;
1738
1739 if (buffer == NULL && to_send > 0) {
1740 FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send);
1741 return;
1742 }
1743
1744 /* Ignore the send window if force is enabled */
1745 if (!force && (to_send > fd_cb->send_window)) {
1746 to_send = fd_cb->send_window;
1747 }
1748
1749 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1750 while (sent < to_send) {
1751 mbuf_ref_t data;
1752 size_t data_len;
1753
1754 data_len = to_send - sent;
1755 if (data_len > FLOW_DIVERT_CHUNK_SIZE) {
1756 data_len = FLOW_DIVERT_CHUNK_SIZE;
1757 }
1758
1759 error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data);
1760 if (error) {
1761 FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1762 break;
1763 }
1764
1765 error = flow_divert_send_data_packet(fd_cb, data, data_len);
1766 if (error) {
1767 if (data != NULL) {
1768 mbuf_freem(data);
1769 }
1770 break;
1771 }
1772
1773 sent += data_len;
1774 }
1775 sbdrop(&fd_cb->so->so_snd, (int)sent);
1776 sowwakeup(fd_cb->so);
1777 } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1778 mbuf_ref_t data;
1779 mbuf_ref_t m;
1780 size_t data_len;
1781
1782 while (buffer) {
1783 struct sockaddr *toaddr = flow_divert_get_buffered_target_address(buffer);
1784
1785 m = buffer;
1786 if (toaddr != NULL) {
1787 /* look for data in the chain */
1788 do {
1789 m = m->m_next;
1790 if (m != NULL && m->m_type == MT_DATA) {
1791 break;
1792 }
1793 } while (m);
1794 if (m == NULL) {
1795 /* unexpected */
1796 FDLOG0(LOG_ERR, fd_cb, "failed to find type MT_DATA in the mbuf chain.");
1797 goto move_on;
1798 }
1799 }
1800 data_len = mbuf_pkthdr_len(m);
1801 if (data_len > 0) {
1802 FDLOG(LOG_DEBUG, fd_cb, "mbuf_copym() data_len = %lu", data_len);
1803 error = mbuf_copym(m, 0, data_len, MBUF_DONTWAIT, &data);
1804 if (error) {
1805 FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error);
1806 break;
1807 }
1808 } else {
1809 data = NULL;
1810 }
1811 if (data_len <= FLOW_DIVERT_CHUNK_SIZE) {
1812 error = flow_divert_send_datagram_packet(fd_cb, data, data_len, toaddr, FALSE, 0);
1813 } else {
1814 error = flow_divert_send_fragmented_datagram(fd_cb, data, data_len, toaddr);
1815 data = NULL;
1816 }
1817 if (error) {
1818 if (data != NULL) {
1819 mbuf_freem(data);
1820 }
1821 break;
1822 }
1823 sent += data_len;
1824 move_on:
1825 buffer = buffer->m_nextpkt;
1826 (void) sbdroprecord(&(fd_cb->so->so_snd));
1827 }
1828 }
1829
1830 if (sent > 0) {
1831 FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent);
1832 if (fd_cb->send_window >= sent) {
1833 fd_cb->send_window -= sent;
1834 } else {
1835 fd_cb->send_window = 0;
1836 }
1837 }
1838 }
1839
1840 static int
flow_divert_send_app_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t data,size_t data_size,struct sockaddr * toaddr)1841 flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t data, size_t data_size, struct sockaddr *toaddr)
1842 {
1843 size_t to_send = data_size;
1844 int error = 0;
1845
1846 if (to_send > fd_cb->send_window) {
1847 to_send = fd_cb->send_window;
1848 }
1849
1850 if (fd_cb->so->so_snd.sb_cc > 0) {
1851 to_send = 0; /* If the send buffer is non-empty, then we can't send anything */
1852 }
1853
1854 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
1855 size_t sent = 0;
1856 mbuf_ref_t remaining_data = data;
1857 size_t remaining_size = data_size;
1858 mbuf_ref_t pkt_data = NULL;
1859 while (sent < to_send && remaining_data != NULL && remaining_size > 0) {
1860 size_t pkt_data_len;
1861
1862 pkt_data = remaining_data;
1863
1864 if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) {
1865 pkt_data_len = FLOW_DIVERT_CHUNK_SIZE;
1866 } else {
1867 pkt_data_len = to_send - sent;
1868 }
1869
1870 if (pkt_data_len < remaining_size) {
1871 error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data);
1872 if (error) {
1873 FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
1874 pkt_data = NULL;
1875 break;
1876 }
1877 remaining_size -= pkt_data_len;
1878 } else {
1879 remaining_data = NULL;
1880 remaining_size = 0;
1881 }
1882
1883 error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len);
1884 if (error) {
1885 break;
1886 }
1887
1888 pkt_data = NULL;
1889 sent += pkt_data_len;
1890 }
1891
1892 if (fd_cb->send_window >= sent) {
1893 fd_cb->send_window -= sent;
1894 } else {
1895 fd_cb->send_window = 0;
1896 }
1897
1898 error = 0;
1899
1900 if (pkt_data != NULL) {
1901 if (sbspace(&fd_cb->so->so_snd) > 0) {
1902 if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) {
1903 FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n",
1904 fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1905 }
1906 } else {
1907 mbuf_freem(pkt_data);
1908 error = ENOBUFS;
1909 }
1910 }
1911
1912 if (remaining_data != NULL) {
1913 if (sbspace(&fd_cb->so->so_snd) > 0) {
1914 if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) {
1915 FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n",
1916 fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1917 }
1918 } else {
1919 mbuf_freem(remaining_data);
1920 error = ENOBUFS;
1921 }
1922 }
1923 } else if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
1924 int send_dgram_error = 0;
1925 if (to_send || data_size == 0) {
1926 if (data_size <= FLOW_DIVERT_CHUNK_SIZE) {
1927 send_dgram_error = flow_divert_send_datagram_packet(fd_cb, data, data_size, toaddr, FALSE, 0);
1928 } else {
1929 send_dgram_error = flow_divert_send_fragmented_datagram(fd_cb, data, data_size, toaddr);
1930 data = NULL;
1931 }
1932 if (send_dgram_error) {
1933 FDLOG(LOG_NOTICE, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu", send_dgram_error, data_size);
1934 } else {
1935 if (data_size >= fd_cb->send_window) {
1936 fd_cb->send_window = 0;
1937 } else {
1938 fd_cb->send_window -= data_size;
1939 }
1940 data = NULL;
1941 }
1942 }
1943
1944 if (data != NULL) {
1945 /* buffer it */
1946 if (sbspace(&fd_cb->so->so_snd) > 0) {
1947 if (toaddr != NULL) {
1948 int append_error = 0;
1949 if (!sbappendaddr(&fd_cb->so->so_snd, toaddr, data, NULL, &append_error)) {
1950 FDLOG(LOG_ERR, fd_cb,
1951 "sbappendaddr failed. send buffer size = %u, send_window = %u, error = %d",
1952 fd_cb->so->so_snd.sb_cc, fd_cb->send_window, append_error);
1953 }
1954 } else {
1955 if (!sbappendrecord(&fd_cb->so->so_snd, data)) {
1956 FDLOG(LOG_ERR, fd_cb,
1957 "sbappendrecord failed. send buffer size = %u, send_window = %u",
1958 fd_cb->so->so_snd.sb_cc, fd_cb->send_window);
1959 }
1960 }
1961 } else {
1962 FDLOG(LOG_ERR, fd_cb, "flow_divert_send_datagram_packet failed with error %d, send data size = %lu, dropping the datagram", error, data_size);
1963 mbuf_freem(data);
1964 }
1965 }
1966 }
1967
1968 return error;
1969 }
1970
1971 static int
flow_divert_send_read_notification(struct flow_divert_pcb * fd_cb)1972 flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb)
1973 {
1974 int error = 0;
1975 mbuf_ref_t packet = NULL;
1976
1977 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet);
1978 if (error) {
1979 FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error);
1980 goto done;
1981 }
1982
1983 error = flow_divert_send_packet(fd_cb, packet);
1984 if (error) {
1985 goto done;
1986 }
1987
1988 done:
1989 if (error && packet != NULL) {
1990 mbuf_free(packet);
1991 }
1992
1993 return error;
1994 }
1995
1996 static int
flow_divert_send_traffic_class_update(struct flow_divert_pcb * fd_cb,int traffic_class)1997 flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class)
1998 {
1999 int error = 0;
2000 mbuf_ref_t packet = NULL;
2001
2002 error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet);
2003 if (error) {
2004 FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error);
2005 goto done;
2006 }
2007
2008 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class);
2009 if (error) {
2010 FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error);
2011 goto done;
2012 }
2013
2014 error = flow_divert_send_packet(fd_cb, packet);
2015 if (error) {
2016 goto done;
2017 }
2018
2019 done:
2020 if (error && packet != NULL) {
2021 mbuf_free(packet);
2022 }
2023
2024 return error;
2025 }
2026
2027 static void
flow_divert_set_local_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * local_endpoint)2028 flow_divert_set_local_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *local_endpoint)
2029 {
2030 struct inpcb *inp = sotoinpcb(fd_cb->so);
2031
2032 if (local_endpoint->sa_family == AF_INET6) {
2033 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2034 fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2035 inp->in6p_laddr = (satosin6(local_endpoint))->sin6_addr;
2036 inp->inp_lifscope = (satosin6(local_endpoint))->sin6_scope_id;
2037 in6_verify_ifscope(&inp->in6p_laddr, inp->inp_lifscope);
2038 }
2039 if (inp->inp_lport == 0) {
2040 inp->inp_lport = (satosin6(local_endpoint))->sin6_port;
2041 }
2042 } else if (local_endpoint->sa_family == AF_INET) {
2043 if (inp->inp_laddr.s_addr == INADDR_ANY && (fd_cb->flags & FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR)) {
2044 fd_cb->flags |= FLOW_DIVERT_DID_SET_LOCAL_ADDR;
2045 inp->inp_laddr = (satosin(local_endpoint))->sin_addr;
2046 }
2047 if (inp->inp_lport == 0) {
2048 inp->inp_lport = (satosin(local_endpoint))->sin_port;
2049 }
2050 }
2051 }
2052
2053 static void
flow_divert_set_remote_endpoint(struct flow_divert_pcb * fd_cb,struct sockaddr * remote_endpoint)2054 flow_divert_set_remote_endpoint(struct flow_divert_pcb *fd_cb, struct sockaddr *remote_endpoint)
2055 {
2056 struct inpcb *inp = sotoinpcb(fd_cb->so);
2057
2058 if (remote_endpoint->sa_family == AF_INET6) {
2059 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
2060 inp->in6p_faddr = (satosin6(remote_endpoint))->sin6_addr;
2061 inp->inp_fifscope = (satosin6(remote_endpoint))->sin6_scope_id;
2062 in6_verify_ifscope(&inp->in6p_faddr, inp->inp_fifscope);
2063 }
2064 if (inp->inp_fport == 0) {
2065 inp->inp_fport = (satosin6(remote_endpoint))->sin6_port;
2066 }
2067 } else if (remote_endpoint->sa_family == AF_INET) {
2068 if (inp->inp_faddr.s_addr == INADDR_ANY) {
2069 inp->inp_faddr = (satosin(remote_endpoint))->sin_addr;
2070 }
2071 if (inp->inp_fport == 0) {
2072 inp->inp_fport = (satosin(remote_endpoint))->sin_port;
2073 }
2074 }
2075 }
2076
2077 static uint32_t
flow_divert_derive_kernel_control_unit(pid_t pid,uint32_t * ctl_unit,uint32_t * aggregate_unit,bool * is_aggregate)2078 flow_divert_derive_kernel_control_unit(pid_t pid, uint32_t *ctl_unit, uint32_t *aggregate_unit, bool *is_aggregate)
2079 {
2080 uint32_t result = *ctl_unit;
2081
2082 // There are two models supported for deriving control units:
2083 // 1. A series of flow divert units that allow "transparently" failing
2084 // over to the next unit. For this model, the aggregate_unit contains list
2085 // of all control units (between 1 and 30) masked over each other.
2086 // 2. An indication that in-process flow divert should be preferred, with
2087 // an out of process flow divert to fail over to. For this model, the
2088 // ctl_unit is FLOW_DIVERT_IN_PROCESS_UNIT. In this case, that unit
2089 // is returned first, with the unpacked aggregate unit returned as a
2090 // fallback.
2091 *is_aggregate = false;
2092 if (*ctl_unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
2093 bool found_unit = false;
2094 if (pid != 0) {
2095 // Look for an in-process group that is already open, and use that unit
2096 struct flow_divert_group *group = NULL;
2097 TAILQ_FOREACH(group, &g_flow_divert_in_process_group_list, chain) {
2098 if (group->in_process_pid == pid) {
2099 // Found an in-process group for our same PID, use it
2100 found_unit = true;
2101 result = group->ctl_unit;
2102 break;
2103 }
2104 }
2105
2106 // If an in-process group isn't open yet, send a signal up through NECP to request one
2107 if (!found_unit) {
2108 necp_client_request_in_process_flow_divert(pid);
2109 }
2110 }
2111
2112 // If a unit was found, return it
2113 if (found_unit) {
2114 if (aggregate_unit != NULL && *aggregate_unit != 0) {
2115 *is_aggregate = true;
2116 }
2117 // The next time around, the aggregate unit values will be picked up
2118 *ctl_unit = 0;
2119 return result;
2120 }
2121
2122 // If no unit was found, fall through and clear out the ctl_unit
2123 result = 0;
2124 *ctl_unit = 0;
2125 }
2126
2127 if (aggregate_unit != NULL && *aggregate_unit != 0) {
2128 uint32_t counter;
2129 struct flow_divert_group *lower_order_group = NULL;
2130
2131 for (counter = 0; counter < (GROUP_COUNT_MAX - 1); counter++) {
2132 if ((*aggregate_unit) & (1 << counter)) {
2133 struct flow_divert_group *group = NULL;
2134 group = flow_divert_group_lookup(counter + 1, NULL);
2135
2136 if (group != NULL) {
2137 if (lower_order_group == NULL) {
2138 lower_order_group = group;
2139 } else if ((group->order < lower_order_group->order)) {
2140 lower_order_group = group;
2141 }
2142 }
2143 }
2144 }
2145
2146 if (lower_order_group != NULL) {
2147 *aggregate_unit &= ~(1 << (lower_order_group->ctl_unit - 1));
2148 *is_aggregate = true;
2149 return lower_order_group->ctl_unit;
2150 } else {
2151 *ctl_unit = 0;
2152 return result;
2153 }
2154 } else {
2155 *ctl_unit = 0;
2156 return result;
2157 }
2158 }
2159
2160 static int
flow_divert_try_next_group(struct flow_divert_pcb * fd_cb)2161 flow_divert_try_next_group(struct flow_divert_pcb *fd_cb)
2162 {
2163 int error = 0;
2164 uint32_t policy_control_unit = fd_cb->policy_control_unit;
2165
2166 flow_divert_pcb_remove(fd_cb);
2167
2168 do {
2169 struct flow_divert_group *next_group = NULL;
2170 bool is_aggregate = false;
2171 uint32_t next_ctl_unit = flow_divert_derive_kernel_control_unit(0, &policy_control_unit, &(fd_cb->aggregate_unit), &is_aggregate);
2172
2173 if (fd_cb->control_group_unit == next_ctl_unit) {
2174 FDLOG0(LOG_NOTICE, fd_cb, "Next control unit is the same as the current control unit, disabling flow divert");
2175 error = EALREADY;
2176 break;
2177 }
2178
2179 if (next_ctl_unit == 0 || next_ctl_unit >= GROUP_COUNT_MAX) {
2180 FDLOG0(LOG_NOTICE, fd_cb, "No more valid control units, disabling flow divert");
2181 error = ENOENT;
2182 break;
2183 }
2184
2185 next_group = flow_divert_group_lookup(next_ctl_unit, fd_cb);
2186 if (next_group == NULL) {
2187 FDLOG(LOG_NOTICE, fd_cb, "Group for control unit %u does not exist", next_ctl_unit);
2188 continue;
2189 }
2190
2191 FDLOG(LOG_NOTICE, fd_cb, "Moving from %u to %u", fd_cb->control_group_unit, next_ctl_unit);
2192
2193 error = flow_divert_pcb_insert(fd_cb, next_group);
2194 if (error == 0) {
2195 if (is_aggregate) {
2196 fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2197 } else {
2198 fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
2199 }
2200 }
2201 FDGRP_RELEASE(next_group);
2202 } while (fd_cb->group == NULL);
2203
2204 if (fd_cb->group == NULL) {
2205 return error ? error : ENOENT;
2206 }
2207
2208 error = flow_divert_send_connect_packet(fd_cb);
2209 if (error) {
2210 FDLOG(LOG_NOTICE, fd_cb, "Failed to send the connect packet to %u, disabling flow divert", fd_cb->control_group_unit);
2211 flow_divert_pcb_remove(fd_cb);
2212 error = ENOENT;
2213 }
2214
2215 return error;
2216 }
2217
2218 static void
flow_divert_disable(struct flow_divert_pcb * fd_cb)2219 flow_divert_disable(struct flow_divert_pcb *fd_cb)
2220 {
2221 struct socket *so = NULL;
2222 mbuf_ref_t buffer;
2223 int error = 0;
2224 proc_t last_proc = NULL;
2225 struct sockaddr *remote_endpoint = fd_cb->original_remote_endpoint;
2226 bool do_connect = !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT);
2227 struct inpcb *inp = NULL;
2228
2229 so = fd_cb->so;
2230 if (so == NULL) {
2231 goto done;
2232 }
2233
2234 FDLOG0(LOG_NOTICE, fd_cb, "Skipped all flow divert services, disabling flow divert");
2235
2236 /* Restore the IP state */
2237 inp = sotoinpcb(so);
2238 inp->inp_vflag = fd_cb->original_vflag;
2239 inp->inp_faddr.s_addr = INADDR_ANY;
2240 inp->inp_fport = 0;
2241 memset(&(inp->in6p_faddr), 0, sizeof(inp->in6p_faddr));
2242 inp->inp_fifscope = IFSCOPE_NONE;
2243 inp->in6p_fport = 0;
2244 /* If flow divert set the local address, clear it out */
2245 if (fd_cb->flags & FLOW_DIVERT_DID_SET_LOCAL_ADDR) {
2246 inp->inp_laddr.s_addr = INADDR_ANY;
2247 memset(&(inp->in6p_laddr), 0, sizeof(inp->in6p_laddr));
2248 inp->inp_lifscope = IFSCOPE_NONE;
2249 }
2250 inp->inp_last_outifp = fd_cb->original_last_outifp;
2251 inp->in6p_last_outifp = fd_cb->original_last_outifp6;
2252
2253 /* Dis-associate the socket */
2254 so->so_flags &= ~SOF_FLOW_DIVERT;
2255 so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2256 so->so_fd_pcb = NULL;
2257 fd_cb->so = NULL;
2258
2259 FDRELEASE(fd_cb); /* Release the socket's reference */
2260
2261 /* Revert back to the original protocol */
2262 so->so_proto = pffindproto(SOCK_DOM(so), SOCK_PROTO(so), SOCK_TYPE(so));
2263
2264 /* Reset the socket state to avoid confusing NECP */
2265 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED);
2266
2267 last_proc = proc_find(so->last_pid);
2268
2269 if (do_connect) {
2270 /* Connect using the original protocol */
2271 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, remote_endpoint, (last_proc != NULL ? last_proc : current_proc()));
2272 if (error) {
2273 FDLOG(LOG_ERR, fd_cb, "Failed to connect using the socket's original protocol: %d", error);
2274 goto done;
2275 }
2276 }
2277
2278 buffer = so->so_snd.sb_mb;
2279 if (buffer == NULL) {
2280 /* No buffered data, done */
2281 goto done;
2282 }
2283
2284 /* Send any buffered data using the original protocol */
2285 if (SOCK_TYPE(so) == SOCK_STREAM) {
2286 mbuf_ref_t data_to_send = NULL;
2287 size_t data_len = so->so_snd.sb_cc;
2288
2289 error = mbuf_copym(buffer, 0, data_len, MBUF_DONTWAIT, &data_to_send);
2290 if (error) {
2291 FDLOG0(LOG_ERR, fd_cb, "Failed to copy the mbuf chain in the socket's send buffer");
2292 goto done;
2293 }
2294
2295 sbflush(&so->so_snd);
2296
2297 if (data_to_send->m_flags & M_PKTHDR) {
2298 mbuf_pkthdr_setlen(data_to_send, data_len);
2299 }
2300
2301 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2302 0,
2303 data_to_send,
2304 NULL,
2305 NULL,
2306 (last_proc != NULL ? last_proc : current_proc()));
2307
2308 if (error && error != EWOULDBLOCK) {
2309 FDLOG(LOG_ERR, fd_cb, "Failed to send queued TCP data using the socket's original protocol: %d", error);
2310 } else {
2311 error = 0;
2312 }
2313 } else if (SOCK_TYPE(so) == SOCK_DGRAM) {
2314 struct sockbuf *sb = &so->so_snd;
2315 MBUFQ_HEAD(send_queue_head) send_queue;
2316 MBUFQ_INIT(&send_queue);
2317
2318 /* Flush the send buffer, moving all records to a temporary queue */
2319 while (sb->sb_mb != NULL) {
2320 mbuf_ref_t record = sb->sb_mb;
2321 mbuf_ref_t m = record;
2322 sb->sb_mb = sb->sb_mb->m_nextpkt;
2323 while (m != NULL) {
2324 sbfree(sb, m);
2325 m = m->m_next;
2326 }
2327 record->m_nextpkt = NULL;
2328 MBUFQ_ENQUEUE(&send_queue, record);
2329 }
2330 SB_EMPTY_FIXUP(sb);
2331
2332 while (!MBUFQ_EMPTY(&send_queue)) {
2333 mbuf_ref_t next_record = MBUFQ_FIRST(&send_queue);
2334 mbuf_ref_t addr = NULL;
2335 mbuf_ref_t control = NULL;
2336 mbuf_ref_t last_control = NULL;
2337 mbuf_ref_t data = NULL;
2338 mbuf_ref_t m = next_record;
2339 struct sockaddr *to_endpoint = NULL;
2340
2341 MBUFQ_DEQUEUE(&send_queue, next_record);
2342
2343 while (m != NULL) {
2344 if (m->m_type == MT_SONAME) {
2345 addr = m;
2346 } else if (m->m_type == MT_CONTROL) {
2347 if (control == NULL) {
2348 control = m;
2349 }
2350 last_control = m;
2351 } else if (m->m_type == MT_DATA) {
2352 data = m;
2353 break;
2354 }
2355 m = m->m_next;
2356 }
2357
2358 if (addr != NULL && !do_connect) {
2359 to_endpoint = flow_divert_get_buffered_target_address(addr);
2360 if (to_endpoint == NULL) {
2361 FDLOG0(LOG_NOTICE, fd_cb, "Failed to get the remote address from the buffer");
2362 }
2363 }
2364
2365 if (data == NULL) {
2366 FDLOG0(LOG_ERR, fd_cb, "Buffered record does not contain any data");
2367 mbuf_freem(next_record);
2368 continue;
2369 }
2370
2371 if (!(data->m_flags & M_PKTHDR)) {
2372 FDLOG0(LOG_ERR, fd_cb, "Buffered data does not have a packet header");
2373 mbuf_freem(next_record);
2374 continue;
2375 }
2376
2377 if (addr != NULL) {
2378 addr->m_next = NULL;
2379 }
2380
2381 if (last_control != NULL) {
2382 last_control->m_next = NULL;
2383 }
2384
2385 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
2386 0,
2387 data,
2388 to_endpoint,
2389 control,
2390 (last_proc != NULL ? last_proc : current_proc()));
2391
2392 if (addr != NULL) {
2393 mbuf_freem(addr);
2394 }
2395
2396 if (error) {
2397 FDLOG(LOG_ERR, fd_cb, "Failed to send queued UDP data using the socket's original protocol: %d", error);
2398 }
2399 }
2400 }
2401 done:
2402 if (last_proc != NULL) {
2403 proc_rele(last_proc);
2404 }
2405
2406 if (error && so != NULL) {
2407 so->so_error = (uint16_t)error;
2408 flow_divert_disconnect_socket(so, do_connect, false);
2409 }
2410 }
2411
2412 static void
flow_divert_scope(struct flow_divert_pcb * fd_cb,int out_if_index,bool derive_new_address)2413 flow_divert_scope(struct flow_divert_pcb *fd_cb, int out_if_index, bool derive_new_address)
2414 {
2415 struct socket *so = NULL;
2416 struct inpcb *inp = NULL;
2417 struct ifnet *current_ifp = NULL;
2418 struct ifnet * __single new_ifp = NULL;
2419 int error = 0;
2420
2421 so = fd_cb->so;
2422 if (so == NULL) {
2423 return;
2424 }
2425
2426 inp = sotoinpcb(so);
2427
2428 if (out_if_index <= 0) {
2429 return;
2430 }
2431
2432 if (inp->inp_vflag & INP_IPV6) {
2433 current_ifp = inp->in6p_last_outifp;
2434 } else {
2435 current_ifp = inp->inp_last_outifp;
2436 }
2437
2438 if (current_ifp != NULL) {
2439 if (current_ifp->if_index == out_if_index) {
2440 /* No change */
2441 return;
2442 }
2443
2444 /* Scope the socket to the given interface */
2445 error = inp_bindif(inp, out_if_index, &new_ifp);
2446 if (error != 0) {
2447 FDLOG(LOG_ERR, fd_cb, "failed to scope to %d because inp_bindif returned %d", out_if_index, error);
2448 return;
2449 }
2450
2451 if (derive_new_address && fd_cb->original_remote_endpoint != NULL) {
2452 /* Get the appropriate address for the given interface */
2453 if (inp->inp_vflag & INP_IPV6) {
2454 inp->in6p_laddr = sa6_any.sin6_addr;
2455 error = in6_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin6.sin6_addr), NULL);
2456 } else {
2457 inp->inp_laddr.s_addr = INADDR_ANY;
2458 error = in_pcbladdr(inp, fd_cb->original_remote_endpoint, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, NULL, 0);
2459 }
2460
2461 if (error != 0) {
2462 FDLOG(LOG_WARNING, fd_cb, "failed to derive a new local address from %d because in_pcbladdr returned %d", out_if_index, error);
2463 }
2464 }
2465 } else {
2466 ifnet_head_lock_shared();
2467 if (IF_INDEX_IN_RANGE(out_if_index)) {
2468 new_ifp = ifindex2ifnet[out_if_index];
2469 }
2470 ifnet_head_done();
2471 }
2472
2473 /* Update the "last interface" of the socket */
2474 if (new_ifp != NULL) {
2475 if (inp->inp_vflag & INP_IPV6) {
2476 inp->in6p_last_outifp = new_ifp;
2477 } else {
2478 inp->inp_last_outifp = new_ifp;
2479 }
2480
2481 #if SKYWALK
2482 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2483 netns_set_ifnet(&inp->inp_netns_token, new_ifp);
2484 }
2485 #endif /* SKYWALK */
2486 }
2487 }
2488
2489 static void
flow_divert_handle_connect_result(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2490 flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2491 {
2492 uint32_t connect_error = 0;
2493 uint32_t ctl_unit = 0;
2494 int error = 0;
2495 union sockaddr_in_4_6 local_endpoint = {};
2496 union sockaddr_in_4_6 remote_endpoint = {};
2497 int out_if_index = 0;
2498 uint32_t send_window = 0;
2499 uint32_t app_data_length = 0;
2500
2501 memset(&local_endpoint, 0, sizeof(local_endpoint));
2502 memset(&remote_endpoint, 0, sizeof(remote_endpoint));
2503
2504 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL);
2505 if (error) {
2506 FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error);
2507 return;
2508 }
2509
2510 connect_error = ntohl(connect_error);
2511 FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error);
2512
2513 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL);
2514 if (error) {
2515 FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error);
2516 return;
2517 }
2518
2519 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL);
2520 if (error) {
2521 FDLOG0(LOG_INFO, fd_cb, "No control unit provided in the connect result");
2522 }
2523
2524 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_endpoint), &(local_endpoint.sin6), NULL);
2525 if (error) {
2526 FDLOG0(LOG_INFO, fd_cb, "No local address provided");
2527 }
2528
2529 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_endpoint), &(remote_endpoint.sin6), NULL);
2530 if (error) {
2531 FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2532 }
2533
2534 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
2535 if (error) {
2536 FDLOG0(LOG_INFO, fd_cb, "No output if index provided");
2537 }
2538
2539 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
2540 if (error) {
2541 FDLOG0(LOG_INFO, fd_cb, "No application data provided in connect result");
2542 }
2543
2544 error = 0;
2545
2546 FDLOCK(fd_cb);
2547 if (fd_cb->so != NULL) {
2548 struct inpcb *inp = NULL;
2549 struct socket *so = fd_cb->so;
2550 bool local_address_is_valid = false;
2551
2552 socket_lock(so, 1);
2553
2554 if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2555 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring connect result");
2556 goto done;
2557 }
2558
2559 if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_state & SS_ISCONNECTING)) {
2560 FDLOG0(LOG_ERR, fd_cb, "TCP socket is not in the connecting state, ignoring connect result");
2561 goto done;
2562 }
2563
2564 inp = sotoinpcb(so);
2565
2566 if (connect_error || error) {
2567 goto set_socket_state;
2568 }
2569
2570 if (flow_divert_is_sockaddr_valid(SA(&local_endpoint))) {
2571 if (local_endpoint.sa.sa_family == AF_INET) {
2572 local_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2573 if ((inp->inp_vflag & INP_IPV4) && local_endpoint.sin.sin_addr.s_addr != INADDR_ANY) {
2574 local_address_is_valid = true;
2575 fd_cb->local_endpoint = local_endpoint;
2576 inp->inp_laddr.s_addr = INADDR_ANY;
2577 } else {
2578 fd_cb->local_endpoint.sin.sin_port = local_endpoint.sin.sin_port;
2579 }
2580 } else if (local_endpoint.sa.sa_family == AF_INET6) {
2581 local_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2582 if ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&local_endpoint.sin6.sin6_addr)) {
2583 local_address_is_valid = true;
2584 fd_cb->local_endpoint = local_endpoint;
2585 inp->in6p_laddr = sa6_any.sin6_addr;
2586 } else {
2587 fd_cb->local_endpoint.sin6.sin6_port = local_endpoint.sin6.sin6_port;
2588 }
2589 }
2590 }
2591
2592 flow_divert_scope(fd_cb, out_if_index, !local_address_is_valid);
2593 flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
2594
2595 if (flow_divert_is_sockaddr_valid(SA(&remote_endpoint)) && SOCK_TYPE(so) == SOCK_STREAM) {
2596 if (remote_endpoint.sa.sa_family == AF_INET) {
2597 remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in);
2598 } else if (remote_endpoint.sa.sa_family == AF_INET6) {
2599 remote_endpoint.sa.sa_len = sizeof(struct sockaddr_in6);
2600 }
2601 flow_divert_set_remote_endpoint(fd_cb, SA(&remote_endpoint));
2602 }
2603
2604 if (app_data_length > 0) {
2605 uint8_t * app_data = NULL;
2606 app_data = kalloc_data(app_data_length, Z_WAITOK);
2607 if (app_data != NULL) {
2608 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
2609 if (error == 0) {
2610 FDLOG(LOG_INFO, fd_cb, "Got %u bytes of app data from the connect result", app_data_length);
2611 if (fd_cb->app_data != NULL) {
2612 kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
2613 }
2614 fd_cb->app_data = app_data;
2615 fd_cb->app_data_length = app_data_length;
2616 } else {
2617 FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the connect result packet", app_data_length);
2618 kfree_data(app_data, app_data_length);
2619 }
2620 } else {
2621 FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the connect result", app_data_length);
2622 }
2623 }
2624
2625 if (error) {
2626 goto set_socket_state;
2627 }
2628
2629 if (fd_cb->group == NULL) {
2630 error = EINVAL;
2631 goto set_socket_state;
2632 }
2633
2634 ctl_unit = ntohl(ctl_unit);
2635 if (ctl_unit > 0) {
2636 int insert_error = 0;
2637 struct flow_divert_group *grp = NULL;
2638
2639 if (ctl_unit >= GROUP_COUNT_MAX) {
2640 FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit);
2641 error = EINVAL;
2642 goto set_socket_state;
2643 }
2644
2645 grp = flow_divert_group_lookup(ctl_unit, fd_cb);
2646 if (grp == NULL) {
2647 error = ECONNRESET;
2648 goto set_socket_state;
2649 }
2650
2651 flow_divert_pcb_remove(fd_cb);
2652 insert_error = flow_divert_pcb_insert(fd_cb, grp);
2653 FDGRP_RELEASE(grp);
2654
2655 if (insert_error != 0) {
2656 error = ECONNRESET;
2657 goto set_socket_state;
2658 }
2659 }
2660
2661 fd_cb->send_window = ntohl(send_window);
2662
2663 set_socket_state:
2664 if (!connect_error && !error) {
2665 FDLOG0(LOG_INFO, fd_cb, "sending connect result");
2666 error = flow_divert_send_connect_result(fd_cb);
2667 }
2668
2669 if (connect_error || error) {
2670 if (connect_error && fd_cb->control_group_unit != fd_cb->policy_control_unit) {
2671 error = flow_divert_try_next_group(fd_cb);
2672 if (error && fd_cb->policy_control_unit == 0) {
2673 flow_divert_disable(fd_cb);
2674 goto done;
2675 } else if (error == 0) {
2676 goto done;
2677 }
2678 }
2679
2680 if (!connect_error) {
2681 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
2682 so->so_error = (uint16_t)error;
2683 flow_divert_send_close_if_needed(fd_cb);
2684 } else {
2685 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
2686 so->so_error = (uint16_t)connect_error;
2687 }
2688 flow_divert_disconnect_socket(so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
2689 } else {
2690 #if NECP
2691 /* Update NECP client with connected five-tuple */
2692 if (!uuid_is_null(inp->necp_client_uuid)) {
2693 socket_unlock(so, 0);
2694 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2695 socket_lock(so, 0);
2696 if (!(so->so_flags & SOF_FLOW_DIVERT)) {
2697 /* The socket was closed while it was unlocked */
2698 goto done;
2699 }
2700 }
2701 #endif /* NECP */
2702
2703 flow_divert_send_buffered_data(fd_cb, FALSE);
2704 soisconnected(so);
2705 }
2706
2707 /* We don't need the connect packet any more */
2708 if (fd_cb->connect_packet != NULL) {
2709 mbuf_freem(fd_cb->connect_packet);
2710 fd_cb->connect_packet = NULL;
2711 }
2712
2713 /* We don't need the original remote endpoint any more */
2714 free_sockaddr(fd_cb->original_remote_endpoint);
2715 done:
2716 socket_unlock(so, 1);
2717 }
2718 FDUNLOCK(fd_cb);
2719 }
2720
2721 static void
flow_divert_handle_close(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2722 flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2723 {
2724 uint32_t close_error = 0;
2725 int error = 0;
2726 int how = 0;
2727
2728 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL);
2729 if (error) {
2730 FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error);
2731 return;
2732 }
2733
2734 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL);
2735 if (error) {
2736 FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error);
2737 return;
2738 }
2739
2740 how = ntohl(how);
2741
2742 FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how);
2743
2744 FDLOCK(fd_cb);
2745 if (fd_cb->so != NULL) {
2746 bool is_connected = (SOCK_TYPE(fd_cb->so) == SOCK_STREAM || !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT));
2747 socket_lock(fd_cb->so, 0);
2748
2749 if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2750 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring close from provider");
2751 goto done;
2752 }
2753
2754 fd_cb->so->so_error = (uint16_t)ntohl(close_error);
2755
2756 flow_divert_update_closed_state(fd_cb, how, true, true);
2757
2758 /* Only do this for stream flows because "shutdown by peer" doesn't make sense for datagram flows */
2759 how = flow_divert_tunnel_how_closed(fd_cb);
2760 if (how == SHUT_RDWR) {
2761 flow_divert_disconnect_socket(fd_cb->so, is_connected, true);
2762 } else if (how == SHUT_RD && is_connected) {
2763 socantrcvmore(fd_cb->so);
2764 } else if (how == SHUT_WR && is_connected) {
2765 socantsendmore(fd_cb->so);
2766 }
2767 done:
2768 socket_unlock(fd_cb->so, 0);
2769 }
2770 FDUNLOCK(fd_cb);
2771 }
2772
2773 static mbuf_ref_t
flow_divert_create_control_mbuf(struct flow_divert_pcb * fd_cb)2774 flow_divert_create_control_mbuf(struct flow_divert_pcb *fd_cb)
2775 {
2776 struct inpcb *inp = sotoinpcb(fd_cb->so);
2777 bool need_recvdstaddr = false;
2778 /* Socket flow tracking needs to see the local address */
2779 need_recvdstaddr = SOFLOW_ENABLED(inp->inp_socket);
2780 if ((inp->inp_vflag & INP_IPV4) &&
2781 fd_cb->local_endpoint.sa.sa_family == AF_INET &&
2782 ((inp->inp_flags & INP_RECVDSTADDR) || need_recvdstaddr)) {
2783 return sbcreatecontrol((caddr_t)&(fd_cb->local_endpoint.sin.sin_addr), sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2784 } else if ((inp->inp_vflag & INP_IPV6) &&
2785 fd_cb->local_endpoint.sa.sa_family == AF_INET6 &&
2786 ((inp->inp_flags & IN6P_PKTINFO) || need_recvdstaddr)) {
2787 struct in6_pktinfo pi6;
2788 memset(&pi6, 0, sizeof(pi6));
2789 pi6.ipi6_addr = fd_cb->local_endpoint.sin6.sin6_addr;
2790
2791 return sbcreatecontrol((caddr_t)&pi6, sizeof(pi6), IPV6_PKTINFO, IPPROTO_IPV6);
2792 }
2793 return NULL;
2794 }
2795
2796 static int
flow_divert_handle_data(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,size_t offset)2797 flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, size_t offset)
2798 {
2799 int error = 0;
2800
2801 FDLOCK(fd_cb);
2802 if (fd_cb->so != NULL) {
2803 mbuf_ref_t data = NULL;
2804 size_t data_size;
2805 struct sockaddr_storage remote_address;
2806 boolean_t got_remote_sa = FALSE;
2807 boolean_t appended = FALSE;
2808 boolean_t append_success = FALSE;
2809
2810 socket_lock(fd_cb->so, 0);
2811
2812 if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2813 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring inbound data");
2814 goto done;
2815 }
2816
2817 if (sbspace(&fd_cb->so->so_rcv) == 0) {
2818 error = ENOBUFS;
2819 fd_cb->flags |= FLOW_DIVERT_NOTIFY_ON_RECEIVED;
2820 FDLOG0(LOG_INFO, fd_cb, "Receive buffer is full, will send read notification when app reads some data");
2821 goto done;
2822 }
2823
2824 if (SOCK_TYPE(fd_cb->so) == SOCK_DGRAM) {
2825 uint32_t val_size = 0;
2826
2827 /* check if we got remote address with data */
2828 memset(&remote_address, 0, sizeof(remote_address));
2829 error = flow_divert_packet_get_tlv(packet, (int)offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, &val_size);
2830 if (error || val_size > sizeof(remote_address)) {
2831 FDLOG0(LOG_INFO, fd_cb, "No remote address provided");
2832 error = 0;
2833 } else {
2834 if (remote_address.ss_len > sizeof(remote_address)) {
2835 remote_address.ss_len = sizeof(remote_address);
2836 }
2837 /* validate the address */
2838 if (flow_divert_is_sockaddr_valid((struct sockaddr *)&remote_address)) {
2839 got_remote_sa = TRUE;
2840 } else {
2841 FDLOG0(LOG_INFO, fd_cb, "Remote address is invalid");
2842 }
2843 offset += (sizeof(uint8_t) + sizeof(uint32_t) + val_size);
2844 }
2845 }
2846
2847 data_size = (mbuf_pkthdr_len(packet) - offset);
2848
2849 if (fd_cb->so->so_state & SS_CANTRCVMORE) {
2850 FDLOG(LOG_NOTICE, fd_cb, "app cannot receive any more data, dropping %lu bytes of data", data_size);
2851 goto done;
2852 }
2853
2854 if (SOCK_TYPE(fd_cb->so) != SOCK_STREAM && SOCK_TYPE(fd_cb->so) != SOCK_DGRAM) {
2855 FDLOG(LOG_ERR, fd_cb, "socket has an unsupported type: %d", SOCK_TYPE(fd_cb->so));
2856 goto done;
2857 }
2858
2859 FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size);
2860
2861 error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data);
2862 if (error || data == NULL) {
2863 FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error);
2864 goto done;
2865 }
2866
2867 if (SOCK_TYPE(fd_cb->so) == SOCK_STREAM) {
2868 appended = (sbappendstream(&fd_cb->so->so_rcv, data) != 0);
2869 append_success = TRUE;
2870 } else {
2871 struct sockaddr * __single append_sa = NULL;
2872 mbuf_ref_t mctl;
2873
2874 if (got_remote_sa == TRUE) {
2875 error = flow_divert_dup_addr(remote_address.ss_family, (struct sockaddr *)&remote_address, &append_sa);
2876 } else {
2877 if (SOCK_CHECK_DOM(fd_cb->so, AF_INET6)) {
2878 error = in6_mapped_peeraddr(fd_cb->so, &append_sa);
2879 } else {
2880 error = in_getpeeraddr(fd_cb->so, &append_sa);
2881 }
2882 }
2883 if (error) {
2884 FDLOG0(LOG_ERR, fd_cb, "failed to dup the socket address.");
2885 }
2886
2887 mctl = flow_divert_create_control_mbuf(fd_cb);
2888 int append_error = 0;
2889 appended = sbappendaddr(&fd_cb->so->so_rcv, append_sa, data, mctl, &append_error);
2890 if (appended || append_error == 0) {
2891 append_success = TRUE;
2892 } else {
2893 FDLOG(LOG_ERR, fd_cb, "failed to append %lu bytes of data: %d", data_size, append_error);
2894 }
2895
2896 free_sockaddr(append_sa);
2897 }
2898
2899 if (append_success) {
2900 fd_cb->bytes_received += data_size;
2901 flow_divert_add_data_statistics(fd_cb, data_size, FALSE);
2902 }
2903
2904 if (appended) {
2905 sorwakeup(fd_cb->so);
2906 }
2907 done:
2908 socket_unlock(fd_cb->so, 0);
2909 }
2910 FDUNLOCK(fd_cb);
2911
2912 return error;
2913 }
2914
2915 static void
flow_divert_handle_read_notification(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)2916 flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
2917 {
2918 uint32_t read_count = 0;
2919 int error = 0;
2920
2921 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL);
2922 if (error) {
2923 FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error);
2924 return;
2925 }
2926
2927 FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", ntohl(read_count));
2928
2929 FDLOCK(fd_cb);
2930 if (fd_cb->so != NULL) {
2931 socket_lock(fd_cb->so, 0);
2932
2933 if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
2934 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring read notification");
2935 goto done;
2936 }
2937
2938 fd_cb->send_window += ntohl(read_count);
2939 flow_divert_send_buffered_data(fd_cb, FALSE);
2940 done:
2941 socket_unlock(fd_cb->so, 0);
2942 }
2943 FDUNLOCK(fd_cb);
2944 }
2945
2946 static void
flow_divert_handle_group_init(struct flow_divert_group * group,mbuf_ref_t packet,int offset)2947 flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
2948 {
2949 int error = 0;
2950 uint32_t key_size = 0;
2951 int log_level = 0;
2952 uint32_t flags = 0;
2953 int32_t order = FLOW_DIVERT_ORDER_LAST;
2954
2955 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size);
2956 if (error) {
2957 FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error);
2958 return;
2959 }
2960
2961 if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) {
2962 FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %u", key_size);
2963 return;
2964 }
2965
2966 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
2967 if (!error) {
2968 nil_pcb.log_level = (uint8_t)log_level;
2969 }
2970
2971 lck_rw_lock_exclusive(&group->lck);
2972
2973 if (group->flags & FLOW_DIVERT_GROUP_FLAG_DEFUNCT) {
2974 FDLOG(LOG_ERR, &nil_pcb, "Skipping (re)initialization of defunct group %u", group->ctl_unit);
2975 lck_rw_done(&group->lck);
2976 return;
2977 }
2978
2979 if (group->token_key != NULL) {
2980 kfree_data_sized_by(group->token_key, group->token_key_size);
2981 }
2982
2983 group->token_key = kalloc_data(key_size, Z_WAITOK);
2984 group->token_key_size = key_size;
2985 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL);
2986 if (error) {
2987 FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error);
2988 kfree_data_sized_by(group->token_key, group->token_key_size);
2989 lck_rw_done(&group->lck);
2990 return;
2991 }
2992
2993 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_FLAGS, sizeof(flags), &flags, NULL);
2994 if (!error) {
2995 group->flags = flags;
2996 }
2997
2998 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ORDER, sizeof(order), &order, NULL);
2999 if (!error) {
3000 FDLOG(LOG_INFO, &nil_pcb, "group %u order is %u", group->ctl_unit, order);
3001 group->order = order;
3002 }
3003
3004 lck_rw_done(&group->lck);
3005 }
3006
3007 static void
flow_divert_handle_properties_update(struct flow_divert_pcb * fd_cb,mbuf_ref_t packet,int offset)3008 flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_ref_t packet, int offset)
3009 {
3010 int error = 0;
3011 int out_if_index = 0;
3012 uint32_t app_data_length = 0;
3013
3014 FDLOG0(LOG_INFO, fd_cb, "received a properties update");
3015
3016 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL);
3017 if (error) {
3018 FDLOG0(LOG_INFO, fd_cb, "No output if index provided in properties update");
3019 }
3020
3021 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, 0, NULL, &app_data_length);
3022 if (error) {
3023 FDLOG0(LOG_INFO, fd_cb, "No application data provided in properties update");
3024 }
3025
3026 FDLOCK(fd_cb);
3027 if (fd_cb->so != NULL) {
3028 socket_lock(fd_cb->so, 0);
3029
3030 if (!(fd_cb->so->so_flags & SOF_FLOW_DIVERT)) {
3031 FDLOG0(LOG_NOTICE, fd_cb, "socket is not attached any more, ignoring properties update");
3032 goto done;
3033 }
3034
3035 if (out_if_index > 0) {
3036 flow_divert_scope(fd_cb, out_if_index, true);
3037 flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3038 }
3039
3040 if (app_data_length > 0) {
3041 uint8_t * app_data __indexable = NULL;
3042 app_data = kalloc_data(app_data_length, Z_WAITOK);
3043 if (app_data != NULL) {
3044 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_APP_DATA, app_data_length, app_data, NULL);
3045 if (error == 0) {
3046 if (fd_cb->app_data != NULL) {
3047 kfree_data_sized_by(fd_cb->app_data, fd_cb->app_data_length);
3048 }
3049 fd_cb->app_data = app_data;
3050 fd_cb->app_data_length = app_data_length;
3051 } else {
3052 FDLOG(LOG_ERR, fd_cb, "Failed to copy %u bytes of application data from the properties update packet", app_data_length);
3053 kfree_data(app_data, app_data_length);
3054 }
3055 } else {
3056 FDLOG(LOG_ERR, fd_cb, "Failed to allocate a buffer of size %u to hold the application data from the properties update", app_data_length);
3057 }
3058 }
3059 done:
3060 socket_unlock(fd_cb->so, 0);
3061 }
3062 FDUNLOCK(fd_cb);
3063 }
3064
3065 static void
flow_divert_handle_app_map_create(struct flow_divert_group * group,mbuf_ref_t packet,int offset)3066 flow_divert_handle_app_map_create(struct flow_divert_group *group, mbuf_ref_t packet, int offset)
3067 {
3068 size_t bytes_mem_size = 0;
3069 size_t child_maps_mem_size = 0;
3070 size_t nodes_mem_size = 0;
3071 size_t trie_memory_size = 0;
3072 int cursor = 0;
3073 int error = 0;
3074 struct flow_divert_trie new_trie;
3075 int insert_error = 0;
3076 int prefix_count = -1;
3077 int signing_id_count = 0;
3078 size_t bytes_count = 0;
3079 size_t nodes_count = 0;
3080 size_t maps_count = 0;
3081
3082 lck_rw_lock_exclusive(&group->lck);
3083
3084 /* Re-set the current trie */
3085 if (group->signing_id_trie.memory != NULL) {
3086 kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
3087 }
3088 memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
3089 group->signing_id_trie.root = NULL_TRIE_IDX;
3090
3091 memset(&new_trie, 0, sizeof(new_trie));
3092
3093 /* Get the number of shared prefixes in the new set of signing ID strings */
3094 error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
3095
3096 if (prefix_count < 0 || error) {
3097 FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
3098 lck_rw_done(&group->lck);
3099 return;
3100 }
3101
3102 /* Compute the number of signing IDs and the total amount of bytes needed to store them */
3103 for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3104 cursor >= 0;
3105 cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3106 uint32_t sid_size = 0;
3107 error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3108 if (error || sid_size == 0) {
3109 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
3110 signing_id_count = 0;
3111 break;
3112 }
3113 if (os_add_overflow(bytes_count, sid_size, &bytes_count)) {
3114 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while incrementing number of bytes");
3115 signing_id_count = 0;
3116 break;
3117 }
3118 signing_id_count++;
3119 }
3120
3121 if (signing_id_count == 0) {
3122 lck_rw_done(&group->lck);
3123 FDLOG0(LOG_NOTICE, &nil_pcb, "No signing identifiers");
3124 return;
3125 }
3126
3127 if (os_add3_overflow(prefix_count, signing_id_count, 1, &nodes_count)) { /* + 1 for the root node */
3128 lck_rw_done(&group->lck);
3129 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of nodes");
3130 return;
3131 }
3132
3133 if (os_add_overflow(prefix_count, 1, &maps_count)) { /* + 1 for the root node */
3134 lck_rw_done(&group->lck);
3135 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing the number of maps");
3136 return;
3137 }
3138
3139 if (bytes_count > UINT16_MAX || nodes_count > UINT16_MAX || maps_count > UINT16_MAX) {
3140 lck_rw_done(&group->lck);
3141 FDLOG(LOG_NOTICE, &nil_pcb, "Invalid bytes count (%lu), nodes count (%lu) or maps count (%lu)", bytes_count, nodes_count, maps_count);
3142 return;
3143 }
3144
3145 FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu",
3146 nodes_count, maps_count, bytes_count);
3147
3148 if (os_mul_overflow(sizeof(*new_trie.nodes), (size_t)nodes_count, &nodes_mem_size) ||
3149 os_mul3_overflow(sizeof(*new_trie.child_maps), CHILD_MAP_SIZE, (size_t)maps_count, &child_maps_mem_size) ||
3150 os_mul_overflow(sizeof(*new_trie.bytes), (size_t)bytes_count, &bytes_mem_size) ||
3151 os_add3_overflow(nodes_mem_size, child_maps_mem_size, bytes_mem_size, &trie_memory_size)) {
3152 FDLOG0(LOG_ERR, &nil_pcb, "Overflow while computing trie memory sizes");
3153 lck_rw_done(&group->lck);
3154 return;
3155 }
3156
3157 if (trie_memory_size > FLOW_DIVERT_MAX_TRIE_MEMORY) {
3158 FDLOG(LOG_ERR, &nil_pcb, "Trie memory size (%lu) is too big (maximum is %u)", trie_memory_size, FLOW_DIVERT_MAX_TRIE_MEMORY);
3159 lck_rw_done(&group->lck);
3160 return;
3161 }
3162
3163 new_trie.memory = kalloc_data(trie_memory_size, Z_WAITOK);
3164 new_trie.memory_size = trie_memory_size;
3165 if (new_trie.memory == NULL) {
3166 FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie",
3167 nodes_mem_size + child_maps_mem_size + bytes_mem_size);
3168 lck_rw_done(&group->lck);
3169 return;
3170 }
3171
3172 /* Initialize the free lists */
3173 new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory;
3174 new_trie.nodes_count = (uint16_t)nodes_count;
3175
3176 new_trie.nodes_free_next = 0;
3177 memset(new_trie.nodes, 0, nodes_mem_size);
3178
3179 new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size);
3180 new_trie.child_maps_count = (uint16_t)maps_count;
3181 new_trie.child_maps_size = child_maps_mem_size;
3182
3183 new_trie.child_maps_free_next = 0;
3184 memset(new_trie.child_maps, 0xff, child_maps_mem_size);
3185
3186 new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
3187 new_trie.bytes_count = (uint16_t)bytes_count;
3188
3189 new_trie.bytes_free_next = 0;
3190 memset(new_trie.bytes, 0, bytes_mem_size);
3191
3192 /* The root is an empty node */
3193 new_trie.root = trie_node_alloc(&new_trie);
3194
3195 /* Add each signing ID to the trie */
3196 for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0);
3197 cursor >= 0;
3198 cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
3199 uint32_t sid_size = 0;
3200 error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
3201 if (error || sid_size == 0) {
3202 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
3203 insert_error = EINVAL;
3204 break;
3205 }
3206 if (sid_size <= UINT16_MAX && new_trie.bytes_free_next + (uint16_t)sid_size <= new_trie.bytes_count) {
3207 uint16_t new_node_idx;
3208 error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
3209 if (error) {
3210 FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
3211 insert_error = EINVAL;
3212 break;
3213 }
3214 new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
3215 if (new_node_idx == NULL_TRIE_IDX) {
3216 insert_error = EINVAL;
3217 break;
3218 }
3219 } else {
3220 FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion");
3221 insert_error = ENOBUFS;
3222 break;
3223 }
3224 }
3225
3226 if (!insert_error) {
3227 group->signing_id_trie = new_trie;
3228 } else {
3229 kfree_data_sized_by(new_trie.memory, new_trie.memory_size);
3230 }
3231
3232 lck_rw_done(&group->lck);
3233 }
3234
3235 static void
flow_divert_handle_flow_states_request(struct flow_divert_group * group)3236 flow_divert_handle_flow_states_request(struct flow_divert_group *group)
3237 {
3238 struct flow_divert_pcb *fd_cb;
3239 mbuf_ref_t packet = NULL;
3240 SLIST_HEAD(, flow_divert_pcb) tmp_list;
3241 int error = 0;
3242 uint32_t ctl_unit = 0;
3243
3244 SLIST_INIT(&tmp_list);
3245
3246 error = flow_divert_packet_init(&nil_pcb, FLOW_DIVERT_PKT_FLOW_STATES, &packet);
3247 if (error || packet == NULL) {
3248 FDLOG(LOG_ERR, &nil_pcb, "flow_divert_packet_init failed: %d, cannot send flow states", error);
3249 return;
3250 }
3251
3252 lck_rw_lock_shared(&group->lck);
3253
3254 if (!MBUFQ_EMPTY(&group->send_queue)) {
3255 FDLOG0(LOG_WARNING, &nil_pcb, "flow_divert_handle_flow_states_request: group send queue is not empty");
3256 }
3257
3258 ctl_unit = group->ctl_unit;
3259
3260 RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3261 FDRETAIN(fd_cb);
3262 SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3263 }
3264
3265 lck_rw_done(&group->lck);
3266
3267 SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
3268 FDLOCK(fd_cb);
3269 if (fd_cb->so != NULL) {
3270 struct flow_divert_flow_state state = {};
3271 socket_lock(fd_cb->so, 0);
3272
3273 state.conn_id = fd_cb->hash;
3274 state.bytes_written_by_app = fd_cb->bytes_written_by_app;
3275 state.bytes_sent = fd_cb->bytes_sent;
3276 state.bytes_received = fd_cb->bytes_received;
3277 state.send_window = fd_cb->send_window;
3278 state.send_buffer_bytes = fd_cb->so->so_snd.sb_cc;
3279
3280 error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_FLOW_STATE, sizeof(state), &state);
3281 if (error) {
3282 FDLOG(LOG_ERR, fd_cb, "Failed to add a flow state: %d", error);
3283 }
3284
3285 socket_unlock(fd_cb->so, 0);
3286 }
3287 FDUNLOCK(fd_cb);
3288 FDRELEASE(fd_cb);
3289 }
3290
3291 error = ctl_enqueuembuf(g_flow_divert_kctl_ref, ctl_unit, packet, CTL_DATA_EOR);
3292 if (error) {
3293 FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_handle_flow_states_request: ctl_enqueuembuf returned an error: %d", error);
3294 mbuf_freem(packet);
3295 }
3296 }
3297
3298 static int
flow_divert_input(mbuf_ref_t packet,struct flow_divert_group * group)3299 flow_divert_input(mbuf_ref_t packet, struct flow_divert_group *group)
3300 {
3301 struct flow_divert_packet_header hdr;
3302 int error = 0;
3303 struct flow_divert_pcb *fd_cb;
3304
3305 if (mbuf_pkthdr_len(packet) < sizeof(hdr)) {
3306 FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr));
3307 error = EINVAL;
3308 goto done;
3309 }
3310
3311 error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr);
3312 if (error) {
3313 FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error);
3314 error = ENOBUFS;
3315 goto done;
3316 }
3317
3318 hdr.conn_id = ntohl(hdr.conn_id);
3319
3320 if (hdr.conn_id == 0) {
3321 switch (hdr.packet_type) {
3322 case FLOW_DIVERT_PKT_GROUP_INIT:
3323 flow_divert_handle_group_init(group, packet, sizeof(hdr));
3324 break;
3325 case FLOW_DIVERT_PKT_APP_MAP_CREATE:
3326 flow_divert_handle_app_map_create(group, packet, sizeof(hdr));
3327 break;
3328 case FLOW_DIVERT_PKT_FLOW_STATES_REQUEST:
3329 flow_divert_handle_flow_states_request(group);
3330 break;
3331 default:
3332 FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type);
3333 break;
3334 }
3335 goto done;
3336 }
3337
3338 fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group); /* This retains the PCB */
3339 if (fd_cb == NULL) {
3340 if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) {
3341 FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id);
3342 }
3343 goto done;
3344 }
3345
3346 switch (hdr.packet_type) {
3347 case FLOW_DIVERT_PKT_CONNECT_RESULT:
3348 flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr));
3349 break;
3350 case FLOW_DIVERT_PKT_CLOSE:
3351 flow_divert_handle_close(fd_cb, packet, sizeof(hdr));
3352 break;
3353 case FLOW_DIVERT_PKT_DATA:
3354 error = flow_divert_handle_data(fd_cb, packet, sizeof(hdr));
3355 break;
3356 case FLOW_DIVERT_PKT_READ_NOTIFY:
3357 flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr));
3358 break;
3359 case FLOW_DIVERT_PKT_PROPERTIES_UPDATE:
3360 flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr));
3361 break;
3362 default:
3363 FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type);
3364 break;
3365 }
3366
3367 FDRELEASE(fd_cb);
3368
3369 done:
3370 mbuf_freem(packet);
3371 return error;
3372 }
3373
3374 static void
flow_divert_close_all(struct flow_divert_group * group)3375 flow_divert_close_all(struct flow_divert_group *group)
3376 {
3377 struct flow_divert_pcb *fd_cb;
3378 SLIST_HEAD(, flow_divert_pcb) tmp_list;
3379
3380 SLIST_INIT(&tmp_list);
3381
3382 lck_rw_lock_exclusive(&group->lck);
3383
3384 MBUFQ_DRAIN(&group->send_queue);
3385
3386 RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
3387 FDRETAIN(fd_cb);
3388 SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
3389 }
3390
3391 group->flags |= FLOW_DIVERT_GROUP_FLAG_DEFUNCT;
3392
3393 lck_rw_done(&group->lck);
3394
3395 while (!SLIST_EMPTY(&tmp_list)) {
3396 fd_cb = SLIST_FIRST(&tmp_list);
3397 FDLOCK(fd_cb);
3398 SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry);
3399 if (fd_cb->so != NULL) {
3400 socket_lock(fd_cb->so, 0);
3401 flow_divert_pcb_remove(fd_cb);
3402 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, true, true);
3403 fd_cb->so->so_error = ECONNABORTED;
3404 flow_divert_disconnect_socket(fd_cb->so, !(fd_cb->flags & FLOW_DIVERT_IMPLICIT_CONNECT), false);
3405 socket_unlock(fd_cb->so, 0);
3406 }
3407 FDUNLOCK(fd_cb);
3408 FDRELEASE(fd_cb);
3409 }
3410 }
3411
3412 void
flow_divert_detach(struct socket * so)3413 flow_divert_detach(struct socket *so)
3414 {
3415 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3416
3417 if (!SO_IS_DIVERTED(so)) {
3418 return;
3419 }
3420
3421 so->so_flags &= ~SOF_FLOW_DIVERT;
3422 so->so_fd_pcb = NULL;
3423
3424 FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count);
3425
3426 if (fd_cb->group != NULL) {
3427 /* Last-ditch effort to send any buffered data */
3428 flow_divert_send_buffered_data(fd_cb, TRUE);
3429
3430 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3431 flow_divert_send_close_if_needed(fd_cb);
3432 /* Remove from the group */
3433 flow_divert_pcb_remove(fd_cb);
3434 }
3435
3436 socket_unlock(so, 0);
3437 FDLOCK(fd_cb);
3438 fd_cb->so = NULL;
3439 FDUNLOCK(fd_cb);
3440 socket_lock(so, 0);
3441
3442 FDRELEASE(fd_cb); /* Release the socket's reference */
3443 }
3444
3445 static int
flow_divert_close(struct socket * so)3446 flow_divert_close(struct socket *so)
3447 {
3448 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3449
3450 if (!SO_IS_DIVERTED(so)) {
3451 return EINVAL;
3452 }
3453
3454 FDLOG0(LOG_INFO, fd_cb, "Closing");
3455
3456 if (SOCK_TYPE(so) == SOCK_STREAM) {
3457 soisdisconnecting(so);
3458 sbflush(&so->so_rcv);
3459 }
3460
3461 flow_divert_send_buffered_data(fd_cb, TRUE);
3462 flow_divert_update_closed_state(fd_cb, SHUT_RDWR, false, true);
3463 flow_divert_send_close_if_needed(fd_cb);
3464
3465 /* Remove from the group */
3466 flow_divert_pcb_remove(fd_cb);
3467
3468 return 0;
3469 }
3470
3471 static int
flow_divert_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid __unused)3472 flow_divert_disconnectx(struct socket *so, sae_associd_t aid,
3473 sae_connid_t cid __unused)
3474 {
3475 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
3476 return EINVAL;
3477 }
3478
3479 return flow_divert_close(so);
3480 }
3481
3482 static int
flow_divert_shutdown(struct socket * so)3483 flow_divert_shutdown(struct socket *so)
3484 {
3485 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3486
3487 if (!SO_IS_DIVERTED(so)) {
3488 return EINVAL;
3489 }
3490
3491 FDLOG0(LOG_INFO, fd_cb, "Can't send more");
3492
3493 socantsendmore(so);
3494
3495 flow_divert_update_closed_state(fd_cb, SHUT_WR, false, true);
3496 flow_divert_send_close_if_needed(fd_cb);
3497
3498 return 0;
3499 }
3500
3501 static int
flow_divert_rcvd(struct socket * so,int flags __unused)3502 flow_divert_rcvd(struct socket *so, int flags __unused)
3503 {
3504 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3505 int space = 0;
3506
3507 if (!SO_IS_DIVERTED(so)) {
3508 return EINVAL;
3509 }
3510
3511 space = sbspace(&so->so_rcv);
3512 FDLOG(LOG_DEBUG, fd_cb, "app read bytes, space = %d", space);
3513 if ((fd_cb->flags & FLOW_DIVERT_NOTIFY_ON_RECEIVED) &&
3514 (space > 0) &&
3515 flow_divert_send_read_notification(fd_cb) == 0) {
3516 FDLOG0(LOG_INFO, fd_cb, "Sent a read notification");
3517 fd_cb->flags &= ~FLOW_DIVERT_NOTIFY_ON_RECEIVED;
3518 }
3519
3520 return 0;
3521 }
3522
3523 static int
flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet,struct sockaddr * toaddr)3524 flow_divert_append_target_endpoint_tlv(mbuf_ref_t connect_packet, struct sockaddr *toaddr)
3525 {
3526 int error = 0;
3527 int port = 0;
3528
3529 if (!flow_divert_is_sockaddr_valid(toaddr)) {
3530 FDLOG(LOG_ERR, &nil_pcb, "Invalid target address, family = %u, length = %u", toaddr->sa_family, toaddr->sa_len);
3531 error = EINVAL;
3532 goto done;
3533 }
3534
3535 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, toaddr->sa_len, toaddr);
3536 if (error) {
3537 goto done;
3538 }
3539
3540 if (toaddr->sa_family == AF_INET) {
3541 port = ntohs((satosin(toaddr))->sin_port);
3542 } else {
3543 port = ntohs((satosin6(toaddr))->sin6_port);
3544 }
3545
3546 error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port);
3547 if (error) {
3548 goto done;
3549 }
3550
3551 done:
3552 return error;
3553 }
3554
3555 struct sockaddr *
flow_divert_get_buffered_target_address(mbuf_ref_t buffer)3556 flow_divert_get_buffered_target_address(mbuf_ref_t buffer)
3557 {
3558 if (buffer != NULL && buffer->m_type == MT_SONAME) {
3559 struct sockaddr *toaddr = mtod(buffer, struct sockaddr *);
3560 if (toaddr != NULL && flow_divert_is_sockaddr_valid(toaddr)) {
3561 return toaddr;
3562 }
3563 }
3564 return NULL;
3565 }
3566
3567 static boolean_t
flow_divert_is_sockaddr_valid(struct sockaddr * addr)3568 flow_divert_is_sockaddr_valid(struct sockaddr *addr)
3569 {
3570 switch (addr->sa_family) {
3571 case AF_INET:
3572 if (addr->sa_len < sizeof(struct sockaddr_in)) {
3573 return FALSE;
3574 }
3575 break;
3576 case AF_INET6:
3577 if (addr->sa_len < sizeof(struct sockaddr_in6)) {
3578 return FALSE;
3579 }
3580 break;
3581 default:
3582 return FALSE;
3583 }
3584 return TRUE;
3585 }
3586
3587 static errno_t
flow_divert_dup_addr(sa_family_t family,struct sockaddr * addr,struct sockaddr ** dup)3588 flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr,
3589 struct sockaddr **dup)
3590 {
3591 int error = 0;
3592 struct sockaddr *result;
3593 struct sockaddr_storage ss;
3594
3595 if (addr != NULL) {
3596 result = addr;
3597 } else {
3598 memset(&ss, 0, sizeof(ss));
3599 ss.ss_family = family;
3600 if (ss.ss_family == AF_INET) {
3601 ss.ss_len = sizeof(struct sockaddr_in);
3602 } else if (ss.ss_family == AF_INET6) {
3603 ss.ss_len = sizeof(struct sockaddr_in6);
3604 } else {
3605 error = EINVAL;
3606 }
3607 result = (struct sockaddr *)&ss;
3608 }
3609
3610 if (!error) {
3611 *dup = dup_sockaddr(result, 1);
3612 if (*dup == NULL) {
3613 error = ENOBUFS;
3614 }
3615 }
3616
3617 return error;
3618 }
3619
3620 static void
flow_divert_disconnect_socket(struct socket * so,bool is_connected,bool delay_if_needed)3621 flow_divert_disconnect_socket(struct socket *so, bool is_connected, bool delay_if_needed)
3622 {
3623 if (SOCK_TYPE(so) == SOCK_STREAM || is_connected) {
3624 soisdisconnected(so);
3625 }
3626 if (SOCK_TYPE(so) == SOCK_DGRAM) {
3627 struct inpcb *inp = sotoinpcb(so);
3628 if (inp != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
3629 /*
3630 * Let NetworkStatistics know this PCB is going away
3631 * before we detach it.
3632 */
3633 if (nstat_collect && (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
3634 nstat_pcb_detach(inp);
3635 }
3636
3637 if (SOCK_DOM(so) == PF_INET6) {
3638 ROUTE_RELEASE(&inp->in6p_route);
3639 } else {
3640 ROUTE_RELEASE(&inp->inp_route);
3641 }
3642 if (delay_if_needed) {
3643 (void) cfil_sock_is_dead(so);
3644 } else {
3645 inp->inp_state = INPCB_STATE_DEAD;
3646 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
3647 }
3648 /* makes sure we're not called twice from so_close */
3649 so->so_flags |= SOF_PCBCLEARING;
3650 }
3651 }
3652 }
3653
3654 static errno_t
flow_divert_ctloutput(struct socket * so,struct sockopt * sopt)3655 flow_divert_ctloutput(struct socket *so, struct sockopt *sopt)
3656 {
3657 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3658
3659 if (!SO_IS_DIVERTED(so)) {
3660 return EINVAL;
3661 }
3662
3663 if (sopt->sopt_name == SO_TRAFFIC_CLASS) {
3664 if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3665 flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class);
3666 }
3667 }
3668
3669 if (SOCK_DOM(so) == PF_INET) {
3670 return g_tcp_protosw->pr_ctloutput(so, sopt);
3671 } else if (SOCK_DOM(so) == PF_INET6) {
3672 return g_tcp6_protosw->pr_ctloutput(so, sopt);
3673 }
3674 return 0;
3675 }
3676
3677 static errno_t
flow_divert_connect_out_internal(struct socket * so,struct sockaddr * to,proc_t p,bool implicit)3678 flow_divert_connect_out_internal(struct socket *so, struct sockaddr *to, proc_t p, bool implicit)
3679 {
3680 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3681 int error = 0;
3682 struct inpcb *inp = sotoinpcb(so);
3683 struct sockaddr_in *sinp;
3684 mbuf_ref_t connect_packet = NULL;
3685 int do_send = 1;
3686
3687 if (!SO_IS_DIVERTED(so)) {
3688 return EINVAL;
3689 }
3690
3691 if (fd_cb->group == NULL) {
3692 error = ENETUNREACH;
3693 goto done;
3694 }
3695
3696 if (inp == NULL) {
3697 error = EINVAL;
3698 goto done;
3699 } else if (inp->inp_state == INPCB_STATE_DEAD) {
3700 if (so->so_error) {
3701 error = so->so_error;
3702 so->so_error = 0;
3703 } else {
3704 error = EINVAL;
3705 }
3706 goto done;
3707 }
3708
3709 if (fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) {
3710 error = EALREADY;
3711 goto done;
3712 }
3713
3714 FDLOG0(LOG_INFO, fd_cb, "Connecting");
3715
3716 if (fd_cb->connect_packet == NULL) {
3717 struct sockaddr_in sin = {};
3718 struct ifnet * __single ifp = NULL;
3719
3720 if (to == NULL) {
3721 FDLOG0(LOG_ERR, fd_cb, "No destination address available when creating connect packet");
3722 error = EINVAL;
3723 goto done;
3724 }
3725
3726 if (!flow_divert_is_sockaddr_valid(to)) {
3727 FDLOG0(LOG_ERR, fd_cb, "Destination address is not valid when creating connect packet");
3728 error = EINVAL;
3729 goto done;
3730 }
3731
3732 fd_cb->original_remote_endpoint = dup_sockaddr(to, 0);
3733 if (fd_cb->original_remote_endpoint == NULL) {
3734 FDLOG0(LOG_ERR, fd_cb, "Failed to dup the remote endpoint");
3735 error = ENOMEM;
3736 goto done;
3737 }
3738 fd_cb->original_vflag = inp->inp_vflag;
3739 fd_cb->original_last_outifp = inp->inp_last_outifp;
3740 fd_cb->original_last_outifp6 = inp->in6p_last_outifp;
3741
3742 sinp = (struct sockaddr_in *)(void *)to;
3743 if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
3744 error = EAFNOSUPPORT;
3745 goto done;
3746 }
3747
3748 if (to->sa_family == AF_INET6 && !(inp->inp_flags & IN6P_IPV6_V6ONLY)) {
3749 struct sockaddr_in6 sin6 = {};
3750 sin6.sin6_family = AF_INET6;
3751 sin6.sin6_len = sizeof(struct sockaddr_in6);
3752 sin6.sin6_port = satosin6(to)->sin6_port;
3753 sin6.sin6_addr = satosin6(to)->sin6_addr;
3754 if (IN6_IS_ADDR_V4MAPPED(&(sin6.sin6_addr))) {
3755 in6_sin6_2_sin(&sin, &sin6);
3756 to = (struct sockaddr *)&sin;
3757 }
3758 }
3759
3760 if (to->sa_family == AF_INET6) {
3761 struct sockaddr_in6 *to6 = satosin6(to);
3762
3763 inp->inp_vflag &= ~INP_IPV4;
3764 inp->inp_vflag |= INP_IPV6;
3765 fd_cb->local_endpoint.sin6.sin6_len = sizeof(struct sockaddr_in6);
3766 fd_cb->local_endpoint.sin6.sin6_family = AF_INET6;
3767 fd_cb->local_endpoint.sin6.sin6_port = inp->inp_lport;
3768 error = in6_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin6.sin6_addr), &ifp);
3769 if (error) {
3770 FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv6 address: %d", error);
3771 if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || IN6_IS_ADDR_UNSPECIFIED(&(satosin6(to)->sin6_addr))) {
3772 error = 0;
3773 } else {
3774 goto done;
3775 }
3776 }
3777 if (ifp != NULL) {
3778 inp->in6p_last_outifp = ifp;
3779 ifnet_release(ifp);
3780 }
3781
3782 if (IN6_IS_SCOPE_EMBED(&(fd_cb->local_endpoint.sin6.sin6_addr)) &&
3783 in6_embedded_scope &&
3784 fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] != 0) {
3785 fd_cb->local_endpoint.sin6.sin6_scope_id = ntohs(fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1]);
3786 fd_cb->local_endpoint.sin6.sin6_addr.s6_addr16[1] = 0;
3787 }
3788
3789 if (IN6_IS_SCOPE_EMBED(&(to6->sin6_addr)) &&
3790 in6_embedded_scope &&
3791 to6->sin6_addr.s6_addr16[1] != 0) {
3792 to6->sin6_scope_id = ntohs(to6->sin6_addr.s6_addr16[1]);
3793 to6->sin6_addr.s6_addr16[1] = 0;
3794 }
3795 } else if (to->sa_family == AF_INET) {
3796 inp->inp_vflag |= INP_IPV4;
3797 inp->inp_vflag &= ~INP_IPV6;
3798 fd_cb->local_endpoint.sin.sin_len = sizeof(struct sockaddr_in);
3799 fd_cb->local_endpoint.sin.sin_family = AF_INET;
3800 fd_cb->local_endpoint.sin.sin_port = inp->inp_lport;
3801 error = in_pcbladdr(inp, to, &(fd_cb->local_endpoint.sin.sin_addr), IFSCOPE_NONE, &ifp, 0);
3802 if (error) {
3803 FDLOG(LOG_WARNING, fd_cb, "failed to get a local IPv4 address: %d", error);
3804 if (!(fd_cb->flags & FLOW_DIVERT_FLOW_IS_TRANSPARENT) || satosin(to)->sin_addr.s_addr == INADDR_ANY) {
3805 error = 0;
3806 } else {
3807 goto done;
3808 }
3809 }
3810 if (ifp != NULL) {
3811 inp->inp_last_outifp = ifp;
3812 ifnet_release(ifp);
3813 }
3814 } else {
3815 FDLOG(LOG_WARNING, fd_cb, "target address has an unsupported family: %d", to->sa_family);
3816 }
3817
3818 error = flow_divert_check_no_cellular(fd_cb) ||
3819 flow_divert_check_no_expensive(fd_cb) ||
3820 flow_divert_check_no_constrained(fd_cb);
3821 if (error) {
3822 goto done;
3823 }
3824
3825 if (SOCK_TYPE(so) == SOCK_STREAM || /* TCP or */
3826 !implicit || /* connect() was called or */
3827 ((inp->inp_vflag & INP_IPV6) && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) || /* local address is not un-specified */
3828 ((inp->inp_vflag & INP_IPV4) && inp->inp_laddr.s_addr != INADDR_ANY)) {
3829 fd_cb->flags |= FLOW_DIVERT_SHOULD_SET_LOCAL_ADDR;
3830 }
3831
3832 error = flow_divert_create_connect_packet(fd_cb, to, so, p, &connect_packet);
3833 if (error) {
3834 goto done;
3835 }
3836
3837 if (!implicit || SOCK_TYPE(so) == SOCK_STREAM) {
3838 flow_divert_set_remote_endpoint(fd_cb, to);
3839 flow_divert_set_local_endpoint(fd_cb, SA(&fd_cb->local_endpoint));
3840 }
3841
3842 if (implicit) {
3843 fd_cb->flags |= FLOW_DIVERT_IMPLICIT_CONNECT;
3844 }
3845
3846 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
3847 FDLOG0(LOG_INFO, fd_cb, "Delaying sending the connect packet until send or receive");
3848 do_send = 0;
3849 }
3850
3851 fd_cb->connect_packet = connect_packet;
3852 connect_packet = NULL;
3853 } else {
3854 FDLOG0(LOG_INFO, fd_cb, "Sending saved connect packet");
3855 }
3856
3857 if (do_send) {
3858 error = flow_divert_send_connect_packet(fd_cb);
3859 if (error) {
3860 goto done;
3861 }
3862
3863 fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
3864 }
3865
3866 if (SOCK_TYPE(so) == SOCK_DGRAM && !(fd_cb->flags & FLOW_DIVERT_HAS_TOKEN)) {
3867 soisconnected(so);
3868 } else {
3869 soisconnecting(so);
3870 }
3871
3872 done:
3873 return error;
3874 }
3875
3876 errno_t
flow_divert_connect_out(struct socket * so,struct sockaddr * to,proc_t p)3877 flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p)
3878 {
3879 #if CONTENT_FILTER
3880 if (SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3881 int error = cfil_sock_attach(so, NULL, to, CFS_CONNECTION_DIR_OUT);
3882 if (error != 0) {
3883 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3884 FDLOG(LOG_ERR, fd_cb, "Failed to attach cfil: %d", error);
3885 return error;
3886 }
3887 }
3888 #endif /* CONTENT_FILTER */
3889
3890 return flow_divert_connect_out_internal(so, to, p, false);
3891 }
3892
3893 static int
flow_divert_connectx_out_common(struct socket * so,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_connid_t * pcid,struct uio * auio,user_ssize_t * bytes_written)3894 flow_divert_connectx_out_common(struct socket *so, struct sockaddr *dst,
3895 struct proc *p, uint32_t ifscope, sae_connid_t *pcid, struct uio *auio, user_ssize_t *bytes_written)
3896 {
3897 struct inpcb *inp = sotoinpcb(so);
3898 int error;
3899
3900 if (inp == NULL) {
3901 return EINVAL;
3902 }
3903
3904 VERIFY(dst != NULL);
3905
3906 #if CONTENT_FILTER && NECP
3907 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3908 if (fd_cb != NULL && (fd_cb->flags & FLOW_DIVERT_HAS_TOKEN) &&
3909 SOCK_TYPE(so) == SOCK_STREAM && !(so->so_flags & SOF_CONTENT_FILTER)) {
3910 inp_update_necp_policy(sotoinpcb(so), NULL, dst, 0);
3911 }
3912 #endif /* CONTENT_FILTER */
3913
3914 /* bind socket to the specified interface, if requested */
3915 if (ifscope != IFSCOPE_NONE &&
3916 (error = inp_bindif(inp, ifscope, NULL)) != 0) {
3917 return error;
3918 }
3919
3920 error = flow_divert_connect_out(so, dst, p);
3921
3922 if (error != 0) {
3923 return error;
3924 }
3925
3926 /* if there is data, send it */
3927 if (auio != NULL) {
3928 user_ssize_t datalen = 0;
3929
3930 socket_unlock(so, 0);
3931
3932 VERIFY(bytes_written != NULL);
3933
3934 datalen = uio_resid(auio);
3935 error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL, (uio_t)auio, NULL, NULL, 0);
3936 socket_lock(so, 0);
3937
3938 if (error == 0 || error == EWOULDBLOCK) {
3939 *bytes_written = datalen - uio_resid(auio);
3940 }
3941
3942 /*
3943 * sosend returns EWOULDBLOCK if it's a non-blocking
3944 * socket or a timeout occured (this allows to return
3945 * the amount of queued data through sendit()).
3946 *
3947 * However, connectx() returns EINPROGRESS in case of a
3948 * blocking socket. So we change the return value here.
3949 */
3950 if (error == EWOULDBLOCK) {
3951 error = EINPROGRESS;
3952 }
3953 }
3954
3955 if (error == 0 && pcid != NULL) {
3956 *pcid = 1; /* there is only 1 connection for a TCP */
3957 }
3958
3959 return error;
3960 }
3961
3962 static int
flow_divert_connectx_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3963 flow_divert_connectx_out(struct socket *so, struct sockaddr *src __unused,
3964 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3965 sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3966 uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3967 {
3968 return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3969 }
3970
3971 static int
flow_divert_connectx6_out(struct socket * so,struct sockaddr * src __unused,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid __unused,sae_connid_t * pcid,uint32_t flags __unused,void * arg __unused,uint32_t arglen __unused,struct uio * uio,user_ssize_t * bytes_written)3972 flow_divert_connectx6_out(struct socket *so, struct sockaddr *src __unused,
3973 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3974 sae_associd_t aid __unused, sae_connid_t *pcid, uint32_t flags __unused, void *arg __unused,
3975 uint32_t arglen __unused, struct uio *uio, user_ssize_t *bytes_written)
3976 {
3977 return flow_divert_connectx_out_common(so, dst, p, ifscope, pcid, uio, bytes_written);
3978 }
3979
3980 static errno_t
flow_divert_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)3981 flow_divert_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
3982 {
3983 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
3984 int error = 0;
3985 struct inpcb *inp;
3986 #if CONTENT_FILTER
3987 struct m_tag *cfil_tag = NULL;
3988 #endif
3989
3990 if (!SO_IS_DIVERTED(so)) {
3991 return EINVAL;
3992 }
3993
3994 inp = sotoinpcb(so);
3995 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3996 error = ECONNRESET;
3997 goto done;
3998 }
3999
4000 if ((fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) && SOCK_TYPE(so) == SOCK_DGRAM) {
4001 /* The provider considers this datagram flow to be closed, so no data can be sent */
4002 FDLOG0(LOG_INFO, fd_cb, "provider is no longer accepting writes, cannot send data");
4003 error = EHOSTUNREACH;
4004 goto done;
4005 }
4006
4007 #if CONTENT_FILTER
4008 /*
4009 * If the socket is subject to a UDP Content Filter and no remote address is passed in,
4010 * retrieve the CFIL saved remote address from the mbuf and use it.
4011 */
4012 if (to == NULL && CFIL_DGRAM_FILTERED(so)) {
4013 struct sockaddr * __single cfil_faddr = NULL;
4014 cfil_tag = cfil_dgram_get_socket_state(data, NULL, NULL, &cfil_faddr, NULL);
4015 if (cfil_tag) {
4016 to = (struct sockaddr *)(void *)cfil_faddr;
4017 }
4018 FDLOG(LOG_INFO, fd_cb, "Using remote address from CFIL saved state: %p", to);
4019 }
4020 #endif
4021
4022 /* Implicit connect */
4023 if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4024 FDLOG0(LOG_INFO, fd_cb, "implicit connect");
4025
4026 error = flow_divert_connect_out_internal(so, to, p, true);
4027 if (error) {
4028 goto done;
4029 }
4030 } else {
4031 error = flow_divert_check_no_cellular(fd_cb) ||
4032 flow_divert_check_no_expensive(fd_cb) ||
4033 flow_divert_check_no_constrained(fd_cb);
4034 if (error) {
4035 goto done;
4036 }
4037 }
4038
4039 if (data != NULL) {
4040 size_t data_size = 0;
4041 if (mbuf_flags(data) & M_PKTHDR) {
4042 data_size = mbuf_pkthdr_len(data);
4043 } else {
4044 for (mbuf_t blob = data; blob != NULL; blob = mbuf_next(blob)) {
4045 data_size += mbuf_len(blob);
4046 }
4047 }
4048
4049 FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", data_size);
4050 fd_cb->bytes_written_by_app += data_size;
4051
4052 error = flow_divert_send_app_data(fd_cb, data, data_size, to);
4053
4054 data = NULL;
4055
4056 if (error) {
4057 goto done;
4058 }
4059 }
4060
4061 if (flags & PRUS_EOF) {
4062 flow_divert_shutdown(so);
4063 }
4064
4065 done:
4066 if (data) {
4067 mbuf_freem(data);
4068 }
4069 if (control) {
4070 mbuf_free(control);
4071 }
4072 #if CONTENT_FILTER
4073 if (cfil_tag) {
4074 m_tag_free(cfil_tag);
4075 }
4076 #endif
4077
4078 return error;
4079 }
4080
4081 static int
flow_divert_preconnect(struct socket * so)4082 flow_divert_preconnect(struct socket *so)
4083 {
4084 int error = 0;
4085 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4086
4087 if (!SO_IS_DIVERTED(so)) {
4088 return EINVAL;
4089 }
4090
4091 if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) {
4092 FDLOG0(LOG_INFO, fd_cb, "Pre-connect read: sending saved connect packet");
4093 error = flow_divert_send_connect_packet(so->so_fd_pcb);
4094 if (error) {
4095 return error;
4096 }
4097
4098 fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED;
4099 }
4100
4101 soclearfastopen(so);
4102
4103 return error;
4104 }
4105
4106 static void
flow_divert_set_protosw(struct socket * so)4107 flow_divert_set_protosw(struct socket *so)
4108 {
4109 if (SOCK_DOM(so) == PF_INET) {
4110 so->so_proto = &g_flow_divert_in_protosw;
4111 } else {
4112 so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw;
4113 }
4114 }
4115
4116 static void
flow_divert_set_udp_protosw(struct socket * so)4117 flow_divert_set_udp_protosw(struct socket *so)
4118 {
4119 if (SOCK_DOM(so) == PF_INET) {
4120 so->so_proto = &g_flow_divert_in_udp_protosw;
4121 } else {
4122 so->so_proto = (struct protosw *)&g_flow_divert_in6_udp_protosw;
4123 }
4124 }
4125
4126 errno_t
flow_divert_implicit_data_out(struct socket * so,int flags,mbuf_ref_t data,struct sockaddr * to,mbuf_ref_t control,struct proc * p)4127 flow_divert_implicit_data_out(struct socket *so, int flags, mbuf_ref_t data, struct sockaddr *to, mbuf_ref_t control, struct proc *p)
4128 {
4129 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4130 struct inpcb *inp;
4131 int error = 0;
4132
4133 inp = sotoinpcb(so);
4134 if (inp == NULL) {
4135 return EINVAL;
4136 }
4137
4138 if (fd_cb == NULL) {
4139 error = flow_divert_pcb_init(so);
4140 fd_cb = so->so_fd_pcb;
4141 if (error != 0 || fd_cb == NULL) {
4142 goto done;
4143 }
4144 }
4145 return flow_divert_data_out(so, flags, data, to, control, p);
4146
4147 done:
4148 if (data) {
4149 mbuf_freem(data);
4150 }
4151 if (control) {
4152 mbuf_free(control);
4153 }
4154
4155 return error;
4156 }
4157
4158 static errno_t
flow_divert_pcb_init_internal(struct socket * so,uint32_t ctl_unit,uint32_t aggregate_unit)4159 flow_divert_pcb_init_internal(struct socket *so, uint32_t ctl_unit, uint32_t aggregate_unit)
4160 {
4161 errno_t error = 0;
4162 struct flow_divert_pcb *fd_cb = NULL;
4163 uint32_t agg_unit = aggregate_unit;
4164 uint32_t policy_control_unit = ctl_unit;
4165 bool is_aggregate = false;
4166
4167 if (so->so_flags & SOF_FLOW_DIVERT) {
4168 return EALREADY;
4169 }
4170
4171 fd_cb = flow_divert_pcb_create(so);
4172 if (fd_cb == NULL) {
4173 return ENOMEM;
4174 }
4175
4176 do {
4177 uint32_t group_unit = flow_divert_derive_kernel_control_unit(so->last_pid, &policy_control_unit, &agg_unit, &is_aggregate);
4178 if (group_unit == 0 || (group_unit >= GROUP_COUNT_MAX && group_unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN)) {
4179 FDLOG0(LOG_ERR, fd_cb, "No valid group is available, cannot init flow divert");
4180 error = EINVAL;
4181 break;
4182 }
4183
4184 error = flow_divert_add_to_group(fd_cb, group_unit);
4185 if (error == 0) {
4186 so->so_fd_pcb = fd_cb;
4187 so->so_flags |= SOF_FLOW_DIVERT;
4188 fd_cb->control_group_unit = group_unit;
4189 fd_cb->policy_control_unit = ctl_unit;
4190 fd_cb->aggregate_unit = agg_unit;
4191 if (is_aggregate) {
4192 fd_cb->flags |= FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4193 } else {
4194 fd_cb->flags &= ~FLOW_DIVERT_FLOW_IS_TRANSPARENT;
4195 }
4196
4197 if (SOCK_TYPE(so) == SOCK_STREAM) {
4198 flow_divert_set_protosw(so);
4199 } else if (SOCK_TYPE(so) == SOCK_DGRAM) {
4200 flow_divert_set_udp_protosw(so);
4201 }
4202
4203 FDLOG0(LOG_INFO, fd_cb, "Created");
4204 } else if (error != ENOENT) {
4205 FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error);
4206 }
4207 } while (error == ENOENT);
4208
4209 if (error != 0) {
4210 FDRELEASE(fd_cb);
4211 }
4212
4213 return error;
4214 }
4215
4216 errno_t
flow_divert_pcb_init(struct socket * so)4217 flow_divert_pcb_init(struct socket *so)
4218 {
4219 struct inpcb *inp = sotoinpcb(so);
4220 uint32_t aggregate_units = 0;
4221 uint32_t ctl_unit = necp_socket_get_flow_divert_control_unit(inp, &aggregate_units);
4222 return flow_divert_pcb_init_internal(so, ctl_unit, aggregate_units);
4223 }
4224
4225 errno_t
flow_divert_token_set(struct socket * so,struct sockopt * sopt)4226 flow_divert_token_set(struct socket *so, struct sockopt *sopt)
4227 {
4228 uint32_t ctl_unit = 0;
4229 uint32_t key_unit = 0;
4230 uint32_t aggregate_unit = 0;
4231 int error = 0;
4232 int hmac_error = 0;
4233 mbuf_ref_t token = NULL;
4234
4235 if (so->so_flags & SOF_FLOW_DIVERT) {
4236 error = EALREADY;
4237 goto done;
4238 }
4239
4240 if (g_init_result) {
4241 FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result);
4242 error = ENOPROTOOPT;
4243 goto done;
4244 }
4245
4246 if ((SOCK_TYPE(so) != SOCK_STREAM && SOCK_TYPE(so) != SOCK_DGRAM) ||
4247 (SOCK_PROTO(so) != IPPROTO_TCP && SOCK_PROTO(so) != IPPROTO_UDP) ||
4248 (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6)) {
4249 error = EINVAL;
4250 goto done;
4251 } else {
4252 if (SOCK_TYPE(so) == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP) {
4253 struct tcpcb *tp = sototcpcb(so);
4254 if (tp == NULL || tp->t_state != TCPS_CLOSED) {
4255 error = EINVAL;
4256 goto done;
4257 }
4258 }
4259 }
4260
4261 error = soopt_getm(sopt, &token);
4262 if (error) {
4263 token = NULL;
4264 goto done;
4265 }
4266
4267 error = soopt_mcopyin(sopt, token);
4268 if (error) {
4269 token = NULL;
4270 goto done;
4271 }
4272
4273 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL);
4274 if (!error) {
4275 key_unit = ntohl(key_unit);
4276 if (key_unit >= GROUP_COUNT_MAX) {
4277 key_unit = 0;
4278 }
4279 } else if (error != ENOENT) {
4280 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error);
4281 goto done;
4282 } else {
4283 key_unit = 0;
4284 }
4285
4286 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL);
4287 if (error) {
4288 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error);
4289 goto done;
4290 }
4291
4292 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_AGGREGATE_UNIT, sizeof(aggregate_unit), (void *)&aggregate_unit, NULL);
4293 if (error && error != ENOENT) {
4294 FDLOG(LOG_ERR, &nil_pcb, "Failed to get the aggregate unit from the token: %d", error);
4295 goto done;
4296 }
4297
4298 /* A valid kernel control unit is required */
4299 ctl_unit = ntohl(ctl_unit);
4300 aggregate_unit = ntohl(aggregate_unit);
4301
4302 if (ctl_unit > 0 && ctl_unit < GROUP_COUNT_MAX) {
4303 hmac_error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit));
4304 if (hmac_error && hmac_error != ENOENT) {
4305 FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", hmac_error);
4306 error = hmac_error;
4307 goto done;
4308 }
4309 }
4310
4311 error = flow_divert_pcb_init_internal(so, ctl_unit, aggregate_unit);
4312 if (error == 0) {
4313 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4314 int log_level = LOG_NOTICE;
4315
4316 error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL);
4317 if (error == 0) {
4318 fd_cb->log_level = (uint8_t)log_level;
4319 }
4320 error = 0;
4321
4322 fd_cb->connect_token = token;
4323 token = NULL;
4324
4325 fd_cb->flags |= FLOW_DIVERT_HAS_TOKEN;
4326 }
4327
4328 if (hmac_error == 0) {
4329 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4330 if (fd_cb != NULL) {
4331 fd_cb->flags |= FLOW_DIVERT_HAS_HMAC;
4332 }
4333 }
4334
4335 done:
4336 if (token != NULL) {
4337 mbuf_freem(token);
4338 }
4339
4340 return error;
4341 }
4342
4343 errno_t
flow_divert_token_get(struct socket * so,struct sockopt * sopt)4344 flow_divert_token_get(struct socket *so, struct sockopt *sopt)
4345 {
4346 uint32_t ctl_unit;
4347 int error = 0;
4348 uint8_t hmac[SHA_DIGEST_LENGTH];
4349 struct flow_divert_pcb *fd_cb = so->so_fd_pcb;
4350 mbuf_ref_t token = NULL;
4351 struct flow_divert_group *control_group = NULL;
4352
4353 if (!SO_IS_DIVERTED(so)) {
4354 error = EINVAL;
4355 goto done;
4356 }
4357
4358 if (fd_cb->group == NULL) {
4359 error = EINVAL;
4360 goto done;
4361 }
4362
4363 error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token);
4364 if (error) {
4365 FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error);
4366 goto done;
4367 }
4368
4369 ctl_unit = htonl(fd_cb->group->ctl_unit);
4370
4371 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit);
4372 if (error) {
4373 goto done;
4374 }
4375
4376 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash);
4377 if (error) {
4378 goto done;
4379 }
4380
4381 if (fd_cb->app_data != NULL) {
4382 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_APP_DATA, (uint32_t)fd_cb->app_data_length, fd_cb->app_data);
4383 if (error) {
4384 goto done;
4385 }
4386 }
4387
4388 control_group = flow_divert_group_lookup(fd_cb->control_group_unit, fd_cb);
4389 if (control_group != NULL) {
4390 lck_rw_lock_shared(&control_group->lck);
4391 ctl_unit = htonl(control_group->ctl_unit);
4392 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit);
4393 if (!error) {
4394 error = flow_divert_packet_compute_hmac(token, control_group, hmac);
4395 }
4396 lck_rw_done(&control_group->lck);
4397 FDGRP_RELEASE(control_group);
4398 } else {
4399 error = ENOPROTOOPT;
4400 }
4401
4402 if (error) {
4403 goto done;
4404 }
4405
4406 error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac);
4407 if (error) {
4408 goto done;
4409 }
4410
4411 if (sopt->sopt_val == USER_ADDR_NULL) {
4412 /* If the caller passed NULL to getsockopt, just set the size of the token and return */
4413 sopt->sopt_valsize = mbuf_pkthdr_len(token);
4414 goto done;
4415 }
4416
4417 error = soopt_mcopyout(sopt, token);
4418 if (error) {
4419 token = NULL; /* For some reason, soopt_mcopyout() frees the mbuf if it fails */
4420 goto done;
4421 }
4422
4423 done:
4424 if (token != NULL) {
4425 mbuf_freem(token);
4426 }
4427
4428 return error;
4429 }
4430
4431 void
flow_divert_group_destroy(struct flow_divert_group * group)4432 flow_divert_group_destroy(struct flow_divert_group *group)
4433 {
4434 lck_rw_lock_exclusive(&group->lck);
4435
4436 FDLOG(LOG_NOTICE, &nil_pcb, "Destroying group %u", group->ctl_unit);
4437
4438 if (group->token_key != NULL) {
4439 memset(group->token_key, 0, group->token_key_size);
4440 kfree_data_sized_by(group->token_key, group->token_key_size);
4441 }
4442
4443 /* Re-set the current trie */
4444 if (group->signing_id_trie.memory != NULL) {
4445 kfree_data_sized_by(group->signing_id_trie.memory, group->signing_id_trie.memory_size);
4446 }
4447 memset(&group->signing_id_trie, 0, sizeof(group->signing_id_trie));
4448 group->signing_id_trie.root = NULL_TRIE_IDX;
4449
4450 lck_rw_done(&group->lck);
4451
4452 zfree(flow_divert_group_zone, group);
4453 }
4454
4455 static struct flow_divert_group *
flow_divert_allocate_group(u_int32_t unit,pid_t pid)4456 flow_divert_allocate_group(u_int32_t unit, pid_t pid)
4457 {
4458 struct flow_divert_group *new_group = NULL;
4459 new_group = zalloc_flags(flow_divert_group_zone, Z_WAITOK | Z_ZERO);
4460 lck_rw_init(&new_group->lck, &flow_divert_mtx_grp, &flow_divert_mtx_attr);
4461 RB_INIT(&new_group->pcb_tree);
4462 new_group->ctl_unit = unit;
4463 new_group->in_process_pid = pid;
4464 MBUFQ_INIT(&new_group->send_queue);
4465 new_group->signing_id_trie.root = NULL_TRIE_IDX;
4466 new_group->ref_count = 1;
4467 new_group->order = FLOW_DIVERT_ORDER_LAST;
4468 return new_group;
4469 }
4470
4471 static errno_t
flow_divert_kctl_setup(u_int32_t * unit,void ** unitinfo)4472 flow_divert_kctl_setup(u_int32_t *unit, void **unitinfo)
4473 {
4474 if (unit == NULL || unitinfo == NULL) {
4475 return EINVAL;
4476 }
4477
4478 struct flow_divert_group *new_group = NULL;
4479 errno_t error = 0;
4480 lck_rw_lock_shared(&g_flow_divert_group_lck);
4481 if (*unit == FLOW_DIVERT_IN_PROCESS_UNIT) {
4482 // Return next unused in-process unit
4483 u_int32_t unit_cursor = FLOW_DIVERT_IN_PROCESS_UNIT_MIN;
4484 struct flow_divert_group *group_next = NULL;
4485 TAILQ_FOREACH(group_next, &g_flow_divert_in_process_group_list, chain) {
4486 if (group_next->ctl_unit > unit_cursor) {
4487 // Found a gap, lets fill it in
4488 break;
4489 }
4490 unit_cursor = group_next->ctl_unit + 1;
4491 if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4492 break;
4493 }
4494 }
4495 if (unit_cursor == FLOW_DIVERT_IN_PROCESS_UNIT_MAX) {
4496 error = EBUSY;
4497 } else {
4498 *unit = unit_cursor;
4499 new_group = flow_divert_allocate_group(*unit, proc_pid(current_proc()));
4500 if (group_next != NULL) {
4501 TAILQ_INSERT_BEFORE(group_next, new_group, chain);
4502 } else {
4503 TAILQ_INSERT_TAIL(&g_flow_divert_in_process_group_list, new_group, chain);
4504 }
4505 g_active_group_count++;
4506 }
4507 } else {
4508 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
4509 error = EPERM;
4510 } else {
4511 if (g_flow_divert_groups == NULL) {
4512 g_flow_divert_groups = kalloc_type(struct flow_divert_group *,
4513 GROUP_COUNT_MAX, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4514 }
4515
4516 // Return next unused group unit
4517 bool found_unused_unit = false;
4518 u_int32_t unit_cursor;
4519 for (unit_cursor = 1; unit_cursor < GROUP_COUNT_MAX; unit_cursor++) {
4520 struct flow_divert_group *group = g_flow_divert_groups[unit_cursor];
4521 if (group == NULL) {
4522 // Open slot, assign this one
4523 *unit = unit_cursor;
4524 new_group = flow_divert_allocate_group(*unit, 0);
4525 g_flow_divert_groups[*unit] = new_group;
4526 found_unused_unit = true;
4527 g_active_group_count++;
4528 break;
4529 }
4530 }
4531 if (!found_unused_unit) {
4532 error = EBUSY;
4533 }
4534 }
4535 }
4536 lck_rw_done(&g_flow_divert_group_lck);
4537
4538 *unitinfo = new_group;
4539
4540 return error;
4541 }
4542
4543 static errno_t
flow_divert_kctl_connect(kern_ctl_ref kctlref __unused,struct sockaddr_ctl * sac,void ** unitinfo)4544 flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo)
4545 {
4546 if (unitinfo == NULL) {
4547 return EINVAL;
4548 }
4549
4550 // Just validate. The group will already have been allocated.
4551 struct flow_divert_group *group = (struct flow_divert_group *)*unitinfo;
4552 if (group == NULL || sac->sc_unit != group->ctl_unit) {
4553 FDLOG(LOG_ERR, &nil_pcb, "Flow divert connect fail, unit mismatch %u != %u",
4554 sac->sc_unit, group ? group->ctl_unit : 0);
4555 return EINVAL;
4556 }
4557
4558 return 0;
4559 }
4560
4561 static errno_t
flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused,uint32_t unit,void * unitinfo)4562 flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo)
4563 {
4564 struct flow_divert_group *group = NULL;
4565 errno_t error = 0;
4566
4567 if (unitinfo == NULL) {
4568 return 0;
4569 }
4570
4571 FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit);
4572
4573 lck_rw_lock_exclusive(&g_flow_divert_group_lck);
4574
4575 if (g_active_group_count == 0) {
4576 panic("flow divert group %u is disconnecting, but no groups are active (active count = %u)",
4577 unit, g_active_group_count);
4578 }
4579
4580 if (unit < FLOW_DIVERT_IN_PROCESS_UNIT_MIN) {
4581 if (unit >= GROUP_COUNT_MAX) {
4582 return EINVAL;
4583 }
4584
4585 if (g_flow_divert_groups == NULL) {
4586 panic("flow divert group %u is disconnecting, but groups array is NULL",
4587 unit);
4588 }
4589 group = g_flow_divert_groups[unit];
4590
4591 if (group != (struct flow_divert_group *)unitinfo) {
4592 panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo);
4593 }
4594
4595 g_flow_divert_groups[unit] = NULL;
4596 } else {
4597 group = (struct flow_divert_group *)unitinfo;
4598 if (TAILQ_EMPTY(&g_flow_divert_in_process_group_list)) {
4599 panic("flow divert group %u is disconnecting, but in-process group list is empty",
4600 unit);
4601 }
4602
4603 TAILQ_REMOVE(&g_flow_divert_in_process_group_list, group, chain);
4604 }
4605
4606 g_active_group_count--;
4607
4608 if (g_active_group_count == 0) {
4609 kfree_type(struct flow_divert_group *,
4610 GROUP_COUNT_MAX, g_flow_divert_groups);
4611 g_flow_divert_groups = NULL;
4612 }
4613
4614 lck_rw_done(&g_flow_divert_group_lck);
4615
4616 if (group != NULL) {
4617 flow_divert_close_all(group);
4618 FDGRP_RELEASE(group);
4619 } else {
4620 error = EINVAL;
4621 }
4622
4623 return error;
4624 }
4625
4626 static errno_t
flow_divert_kctl_send(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,mbuf_ref_t m,__unused int flags)4627 flow_divert_kctl_send(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, mbuf_ref_t m, __unused int flags)
4628 {
4629 errno_t error = 0;
4630 struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4631 if (group != NULL) {
4632 error = flow_divert_input(m, group);
4633 FDGRP_RELEASE(group);
4634 } else {
4635 error = ENOENT;
4636 }
4637 return error;
4638 }
4639
4640 static void
flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref,uint32_t unit,__unused void * unitinfo,__unused int flags)4641 flow_divert_kctl_rcvd(__unused kern_ctl_ref kctlref, uint32_t unit, __unused void *unitinfo, __unused int flags)
4642 {
4643 struct flow_divert_group *group = flow_divert_group_lookup(unit, NULL);
4644 if (group == NULL) {
4645 return;
4646 }
4647
4648 if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) {
4649 struct flow_divert_pcb *fd_cb;
4650 SLIST_HEAD(, flow_divert_pcb) tmp_list;
4651
4652 lck_rw_lock_exclusive(&group->lck);
4653
4654 while (!MBUFQ_EMPTY(&group->send_queue)) {
4655 mbuf_ref_t next_packet;
4656 FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again");
4657 next_packet = MBUFQ_FIRST(&group->send_queue);
4658 int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR);
4659 if (error) {
4660 FDLOG(LOG_NOTICE, &nil_pcb, "flow_divert_kctl_rcvd: ctl_enqueuembuf returned an error: %d", error);
4661 OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits);
4662 lck_rw_done(&group->lck);
4663 return;
4664 }
4665 MBUFQ_DEQUEUE(&group->send_queue, next_packet);
4666 }
4667
4668 SLIST_INIT(&tmp_list);
4669
4670 RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) {
4671 FDRETAIN(fd_cb);
4672 SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry);
4673 }
4674
4675 lck_rw_done(&group->lck);
4676
4677 SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) {
4678 FDLOCK(fd_cb);
4679 if (fd_cb->so != NULL) {
4680 socket_lock(fd_cb->so, 0);
4681 if (fd_cb->group != NULL) {
4682 flow_divert_send_buffered_data(fd_cb, FALSE);
4683 }
4684 socket_unlock(fd_cb->so, 0);
4685 }
4686 FDUNLOCK(fd_cb);
4687 FDRELEASE(fd_cb);
4688 }
4689 }
4690
4691 FDGRP_RELEASE(group);
4692 }
4693
4694 static int
flow_divert_kctl_init(void)4695 flow_divert_kctl_init(void)
4696 {
4697 struct kern_ctl_reg ctl_reg;
4698 int result;
4699
4700 memset(&ctl_reg, 0, sizeof(ctl_reg));
4701
4702 strlcpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name));
4703 ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name) - 1] = '\0';
4704
4705 // Do not restrict to privileged processes. flow_divert_kctl_setup checks
4706 // permissions separately.
4707 ctl_reg.ctl_flags = CTL_FLAG_REG_EXTENDED | CTL_FLAG_REG_SETUP;
4708 ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE;
4709
4710 ctl_reg.ctl_connect = flow_divert_kctl_connect;
4711 ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect;
4712 ctl_reg.ctl_send = flow_divert_kctl_send;
4713 ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd;
4714 ctl_reg.ctl_setup = flow_divert_kctl_setup;
4715
4716 result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref);
4717
4718 if (result) {
4719 FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result);
4720 return result;
4721 }
4722
4723 return 0;
4724 }
4725
4726 void
flow_divert_init(void)4727 flow_divert_init(void)
4728 {
4729 memset(&nil_pcb, 0, sizeof(nil_pcb));
4730 nil_pcb.log_level = LOG_NOTICE;
4731
4732 g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM);
4733
4734 VERIFY(g_tcp_protosw != NULL);
4735
4736 memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw));
4737 memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs));
4738
4739 g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out;
4740 g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out;
4741 g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close;
4742 g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4743 g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd;
4744 g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out;
4745 g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown;
4746 g_flow_divert_in_usrreqs.pru_preconnect = flow_divert_preconnect;
4747
4748 g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4749 g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput;
4750
4751 /*
4752 * Socket filters shouldn't attach/detach to/from this protosw
4753 * since pr_protosw is to be used instead, which points to the
4754 * real protocol; if they do, it is a bug and we should panic.
4755 */
4756 g_flow_divert_in_protosw.pr_filter_head.tqh_first =
4757 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4758 g_flow_divert_in_protosw.pr_filter_head.tqh_last =
4759 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4760
4761 /* UDP */
4762 g_udp_protosw = pffindproto(AF_INET, IPPROTO_UDP, SOCK_DGRAM);
4763 VERIFY(g_udp_protosw != NULL);
4764
4765 memcpy(&g_flow_divert_in_udp_protosw, g_udp_protosw, sizeof(g_flow_divert_in_udp_protosw));
4766 memcpy(&g_flow_divert_in_udp_usrreqs, g_udp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_udp_usrreqs));
4767
4768 g_flow_divert_in_udp_usrreqs.pru_connect = flow_divert_connect_out;
4769 g_flow_divert_in_udp_usrreqs.pru_connectx = flow_divert_connectx_out;
4770 g_flow_divert_in_udp_usrreqs.pru_disconnect = flow_divert_close;
4771 g_flow_divert_in_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4772 g_flow_divert_in_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4773 g_flow_divert_in_udp_usrreqs.pru_send = flow_divert_data_out;
4774 g_flow_divert_in_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4775 g_flow_divert_in_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4776 g_flow_divert_in_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4777
4778 g_flow_divert_in_udp_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs;
4779 g_flow_divert_in_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4780
4781 /*
4782 * Socket filters shouldn't attach/detach to/from this protosw
4783 * since pr_protosw is to be used instead, which points to the
4784 * real protocol; if they do, it is a bug and we should panic.
4785 */
4786 g_flow_divert_in_udp_protosw.pr_filter_head.tqh_first =
4787 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4788 g_flow_divert_in_udp_protosw.pr_filter_head.tqh_last =
4789 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4790
4791 g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM);
4792
4793 VERIFY(g_tcp6_protosw != NULL);
4794
4795 memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw));
4796 memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs));
4797
4798 g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out;
4799 g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out;
4800 g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close;
4801 g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4802 g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd;
4803 g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out;
4804 g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown;
4805 g_flow_divert_in6_usrreqs.pru_preconnect = flow_divert_preconnect;
4806
4807 g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs;
4808 g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput;
4809 /*
4810 * Socket filters shouldn't attach/detach to/from this protosw
4811 * since pr_protosw is to be used instead, which points to the
4812 * real protocol; if they do, it is a bug and we should panic.
4813 */
4814 g_flow_divert_in6_protosw.pr_filter_head.tqh_first =
4815 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4816 g_flow_divert_in6_protosw.pr_filter_head.tqh_last =
4817 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4818
4819 /* UDP6 */
4820 g_udp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_UDP, SOCK_DGRAM);
4821
4822 VERIFY(g_udp6_protosw != NULL);
4823
4824 memcpy(&g_flow_divert_in6_udp_protosw, g_udp6_protosw, sizeof(g_flow_divert_in6_udp_protosw));
4825 memcpy(&g_flow_divert_in6_udp_usrreqs, g_udp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_udp_usrreqs));
4826
4827 g_flow_divert_in6_udp_usrreqs.pru_connect = flow_divert_connect_out;
4828 g_flow_divert_in6_udp_usrreqs.pru_connectx = flow_divert_connectx6_out;
4829 g_flow_divert_in6_udp_usrreqs.pru_disconnect = flow_divert_close;
4830 g_flow_divert_in6_udp_usrreqs.pru_disconnectx = flow_divert_disconnectx;
4831 g_flow_divert_in6_udp_usrreqs.pru_rcvd = flow_divert_rcvd;
4832 g_flow_divert_in6_udp_usrreqs.pru_send = flow_divert_data_out;
4833 g_flow_divert_in6_udp_usrreqs.pru_shutdown = flow_divert_shutdown;
4834 g_flow_divert_in6_udp_usrreqs.pru_sosend_list = pru_sosend_list_notsupp;
4835 g_flow_divert_in6_udp_usrreqs.pru_preconnect = flow_divert_preconnect;
4836
4837 g_flow_divert_in6_udp_protosw.pr_usrreqs = &g_flow_divert_in6_udp_usrreqs;
4838 g_flow_divert_in6_udp_protosw.pr_ctloutput = flow_divert_ctloutput;
4839 /*
4840 * Socket filters shouldn't attach/detach to/from this protosw
4841 * since pr_protosw is to be used instead, which points to the
4842 * real protocol; if they do, it is a bug and we should panic.
4843 */
4844 g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_first =
4845 __unsafe_forge_single(struct socket_filter *, (uintptr_t)0xdeadbeefdeadbeef);
4846 g_flow_divert_in6_udp_protosw.pr_filter_head.tqh_last =
4847 __unsafe_forge_single(struct socket_filter **, (uintptr_t)0xdeadbeefdeadbeef);
4848
4849 TAILQ_INIT(&g_flow_divert_in_process_group_list);
4850
4851 g_init_result = flow_divert_kctl_init();
4852 if (g_init_result) {
4853 goto done;
4854 }
4855
4856 done:
4857 if (g_init_result != 0) {
4858 if (g_flow_divert_kctl_ref != NULL) {
4859 ctl_deregister(g_flow_divert_kctl_ref);
4860 g_flow_divert_kctl_ref = NULL;
4861 }
4862 }
4863 }
4864