1 /*
2 * Copyright (c) 1998-2022, 2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/filedesc.h>
73 #include <sys/proc.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/file_internal.h>
77 #include <sys/fcntl.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/domain.h>
81 #include <sys/kernel.h>
82 #include <sys/event.h>
83 #include <sys/poll.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/resourcevar.h>
88 #include <sys/signalvar.h>
89 #include <sys/sysctl.h>
90 #include <sys/syslog.h>
91 #include <sys/uio.h>
92 #include <sys/uio_internal.h>
93 #include <sys/ev.h>
94 #include <sys/kdebug.h>
95 #include <sys/un.h>
96 #include <sys/user.h>
97 #include <sys/priv.h>
98 #include <sys/kern_event.h>
99 #include <sys/persona.h>
100 #include <net/route.h>
101 #include <net/init.h>
102 #include <net/net_api_stats.h>
103 #include <net/ntstat.h>
104 #include <net/content_filter.h>
105 #include <net/sockaddr_utils.h>
106 #include <netinet/in.h>
107 #include <netinet/in_pcb.h>
108 #include <netinet/in_tclass.h>
109 #include <netinet/in_var.h>
110 #include <netinet/tcp_var.h>
111 #include <netinet/ip6.h>
112 #include <netinet6/ip6_var.h>
113 #include <netinet/flow_divert.h>
114 #include <kern/zalloc.h>
115 #include <kern/locks.h>
116 #include <machine/limits.h>
117 #include <libkern/OSAtomic.h>
118 #include <pexpert/pexpert.h>
119 #include <kern/assert.h>
120 #include <kern/task.h>
121 #include <kern/policy_internal.h>
122
123 #include <sys/kpi_mbuf.h>
124 #include <sys/mcache.h>
125 #include <sys/unpcb.h>
126 #include <libkern/section_keywords.h>
127
128 #include <os/log.h>
129
130 #if CONFIG_MACF
131 #include <security/mac_framework.h>
132 #endif /* MAC */
133
134 #if MULTIPATH
135 #include <netinet/mp_pcb.h>
136 #include <netinet/mptcp_var.h>
137 #endif /* MULTIPATH */
138
139 #define ROUNDUP(a, b) (((a) + ((b) - 1)) & (~((b) - 1)))
140
141 #if DEBUG || DEVELOPMENT
142 #define DEBUG_KERNEL_ADDRPERM(_v) (_v)
143 #else
144 #define DEBUG_KERNEL_ADDRPERM(_v) VM_KERNEL_ADDRPERM(_v)
145 #endif
146
147 /* TODO: this should be in a header file somewhere */
148 extern char *proc_name_address(void *p);
149
150 static u_int32_t so_cache_hw; /* High water mark for socache */
151 static u_int32_t so_cache_timeouts; /* number of timeouts */
152 static u_int32_t so_cache_max_freed; /* max freed per timeout */
153 static u_int32_t cached_sock_count = 0;
154 STAILQ_HEAD(, socket) so_cache_head;
155 int max_cached_sock_count = MAX_CACHED_SOCKETS;
156 static uint64_t so_cache_time;
157 static int socketinit_done;
158 static struct zone *so_cache_zone;
159 ZONE_DECLARE(so_cache_zone, struct zone *);
160
161 static LCK_GRP_DECLARE(so_cache_mtx_grp, "so_cache");
162 static LCK_MTX_DECLARE(so_cache_mtx, &so_cache_mtx_grp);
163
164 #include <machine/limits.h>
165
166 static int filt_sorattach(struct knote *kn, struct kevent_qos_s *kev);
167 static void filt_sordetach(struct knote *kn);
168 static int filt_soread(struct knote *kn, long hint);
169 static int filt_sortouch(struct knote *kn, struct kevent_qos_s *kev);
170 static int filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev);
171
172 static int filt_sowattach(struct knote *kn, struct kevent_qos_s *kev);
173 static void filt_sowdetach(struct knote *kn);
174 static int filt_sowrite(struct knote *kn, long hint);
175 static int filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev);
176 static int filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev);
177
178 static int filt_sockattach(struct knote *kn, struct kevent_qos_s *kev);
179 static void filt_sockdetach(struct knote *kn);
180 static int filt_sockev(struct knote *kn, long hint);
181 static int filt_socktouch(struct knote *kn, struct kevent_qos_s *kev);
182 static int filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev);
183
184 static int sooptcopyin_timeval(struct sockopt *, struct timeval *);
185 static int sooptcopyout_timeval(struct sockopt *, const struct timeval *);
186
187 SECURITY_READ_ONLY_EARLY(struct filterops) soread_filtops = {
188 .f_isfd = 1,
189 .f_attach = filt_sorattach,
190 .f_detach = filt_sordetach,
191 .f_event = filt_soread,
192 .f_touch = filt_sortouch,
193 .f_process = filt_sorprocess,
194 };
195
196 SECURITY_READ_ONLY_EARLY(struct filterops) sowrite_filtops = {
197 .f_isfd = 1,
198 .f_attach = filt_sowattach,
199 .f_detach = filt_sowdetach,
200 .f_event = filt_sowrite,
201 .f_touch = filt_sowtouch,
202 .f_process = filt_sowprocess,
203 };
204
205 SECURITY_READ_ONLY_EARLY(struct filterops) sock_filtops = {
206 .f_isfd = 1,
207 .f_attach = filt_sockattach,
208 .f_detach = filt_sockdetach,
209 .f_event = filt_sockev,
210 .f_touch = filt_socktouch,
211 .f_process = filt_sockprocess,
212 };
213
214 SECURITY_READ_ONLY_EARLY(struct filterops) soexcept_filtops = {
215 .f_isfd = 1,
216 .f_attach = filt_sorattach,
217 .f_detach = filt_sordetach,
218 .f_event = filt_soread,
219 .f_touch = filt_sortouch,
220 .f_process = filt_sorprocess,
221 };
222
223 SYSCTL_DECL(_kern_ipc);
224
225 #define EVEN_MORE_LOCKING_DEBUG 0
226
227 int socket_debug = 0;
228 SYSCTL_INT(_kern_ipc, OID_AUTO, socket_debug,
229 CTLFLAG_RW | CTLFLAG_LOCKED, &socket_debug, 0, "");
230
231 #if (DEBUG || DEVELOPMENT)
232 #define DEFAULT_SOSEND_ASSERT_PANIC 1
233 #else
234 #define DEFAULT_SOSEND_ASSERT_PANIC 0
235 #endif /* (DEBUG || DEVELOPMENT) */
236
237 int sosend_assert_panic = 0;
238 SYSCTL_INT(_kern_ipc, OID_AUTO, sosend_assert_panic,
239 CTLFLAG_RW | CTLFLAG_LOCKED, &sosend_assert_panic, DEFAULT_SOSEND_ASSERT_PANIC, "");
240
241 static unsigned long sodefunct_calls = 0;
242 SYSCTL_LONG(_kern_ipc, OID_AUTO, sodefunct_calls, CTLFLAG_LOCKED,
243 &sodefunct_calls, "");
244
245 ZONE_DEFINE_TYPE(socket_zone, "socket", struct socket, ZC_ZFREE_CLEARMEM);
246 so_gen_t so_gencnt; /* generation count for sockets */
247
248 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
249
250 #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0)
251 #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2)
252 #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1)
253 #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3)
254 #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1)
255 #define DBG_FNC_SOSEND_LIST NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 3)
256 #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8))
257 #define DBG_FNC_SORECEIVE_LIST NETDBG_CODE(DBG_NETSOCK, (8 << 8) | 3)
258 #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8))
259
260 #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES)
261
262 int somaxconn = SOMAXCONN;
263 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
264 CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, "");
265
266 /* Should we get a maximum also ??? */
267 static int sosendmaxchain = 65536;
268 static int sosendminchain = 16384;
269 static int sorecvmincopy = 16384;
270 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain,
271 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendminchain, 0, "");
272 SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy,
273 CTLFLAG_RW | CTLFLAG_LOCKED, &sorecvmincopy, 0, "");
274
275 /*
276 * Set to enable jumbo clusters (if available) for large writes when
277 * the socket is marked with SOF_MULTIPAGES; see below.
278 */
279 int sosendjcl = 1;
280 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl,
281 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl, 0, "");
282
283 /*
284 * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
285 * writes on the socket for all protocols on any network interfaces,
286 * depending upon sosendjcl above. Be extra careful when setting this
287 * to 1, because sending down packets that cross physical pages down to
288 * broken drivers (those that falsely assume that the physical pages
289 * are contiguous) might lead to system panics or silent data corruption.
290 * When set to 0, the system will respect SOF_MULTIPAGES, which is set
291 * only for TCP sockets whose outgoing interface is IFNET_MULTIPAGES
292 * capable. Set this to 1 only for testing/debugging purposes.
293 */
294 int sosendjcl_ignore_capab = 0;
295 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab,
296 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl_ignore_capab, 0, "");
297
298 /*
299 * Set this to ignore SOF1_IF_2KCL and use big clusters for large
300 * writes on the socket for all protocols on any network interfaces.
301 * Be extra careful when setting this to 1, because sending down packets with
302 * clusters larger that 2 KB might lead to system panics or data corruption.
303 * When set to 0, the system will respect SOF1_IF_2KCL, which is set
304 * on the outgoing interface
305 * Set this to 1 for testing/debugging purposes only.
306 */
307 int sosendbigcl_ignore_capab = 0;
308 SYSCTL_INT(_kern_ipc, OID_AUTO, sosendbigcl_ignore_capab,
309 CTLFLAG_RW | CTLFLAG_LOCKED, &sosendbigcl_ignore_capab, 0, "");
310
311 int sodefunctlog = 0;
312 SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED,
313 &sodefunctlog, 0, "");
314
315 int sothrottlelog = 0;
316 SYSCTL_INT(_kern_ipc, OID_AUTO, sothrottlelog, CTLFLAG_RW | CTLFLAG_LOCKED,
317 &sothrottlelog, 0, "");
318
319 int sorestrictrecv = 1;
320 SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictrecv, CTLFLAG_RW | CTLFLAG_LOCKED,
321 &sorestrictrecv, 0, "Enable inbound interface restrictions");
322
323 int sorestrictsend = 1;
324 SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictsend, CTLFLAG_RW | CTLFLAG_LOCKED,
325 &sorestrictsend, 0, "Enable outbound interface restrictions");
326
327 int soreserveheadroom = 1;
328 SYSCTL_INT(_kern_ipc, OID_AUTO, soreserveheadroom, CTLFLAG_RW | CTLFLAG_LOCKED,
329 &soreserveheadroom, 0, "To allocate contiguous datagram buffers");
330
331 #if (DEBUG || DEVELOPMENT)
332 int so_notsent_lowat_check = 1;
333 SYSCTL_INT(_kern_ipc, OID_AUTO, notsent_lowat, CTLFLAG_RW | CTLFLAG_LOCKED,
334 &so_notsent_lowat_check, 0, "enable/disable notsnet lowat check");
335 #endif /* DEBUG || DEVELOPMENT */
336
337 int so_accept_list_waits = 0;
338 #if (DEBUG || DEVELOPMENT)
339 SYSCTL_INT(_kern_ipc, OID_AUTO, accept_list_waits, CTLFLAG_RW | CTLFLAG_LOCKED,
340 &so_accept_list_waits, 0, "number of waits for listener incomp list");
341 #endif /* DEBUG || DEVELOPMENT */
342
343 extern struct inpcbinfo tcbinfo;
344
345 /* TODO: these should be in header file */
346 extern int get_inpcb_str_size(void);
347 extern int get_tcp_str_size(void);
348
349 vm_size_t so_cache_zone_element_size;
350
351 static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **,
352 user_ssize_t *);
353 static void cached_sock_alloc(struct socket **, zalloc_flags_t);
354 static void cached_sock_free(struct socket *);
355
356 /*
357 * Maximum of extended background idle sockets per process
358 * Set to zero to disable further setting of the option
359 */
360
361 #define SO_IDLE_BK_IDLE_MAX_PER_PROC 1
362 #define SO_IDLE_BK_IDLE_TIME 600
363 #define SO_IDLE_BK_IDLE_RCV_HIWAT 131072
364
365 struct soextbkidlestat soextbkidlestat;
366
367 SYSCTL_UINT(_kern_ipc, OID_AUTO, maxextbkidleperproc,
368 CTLFLAG_RW | CTLFLAG_LOCKED, &soextbkidlestat.so_xbkidle_maxperproc, 0,
369 "Maximum of extended background idle sockets per process");
370
371 SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidletime, CTLFLAG_RW | CTLFLAG_LOCKED,
372 &soextbkidlestat.so_xbkidle_time, 0,
373 "Time in seconds to keep extended background idle sockets");
374
375 SYSCTL_UINT(_kern_ipc, OID_AUTO, extbkidlercvhiwat, CTLFLAG_RW | CTLFLAG_LOCKED,
376 &soextbkidlestat.so_xbkidle_rcvhiwat, 0,
377 "High water mark for extended background idle sockets");
378
379 SYSCTL_STRUCT(_kern_ipc, OID_AUTO, extbkidlestat, CTLFLAG_RD | CTLFLAG_LOCKED,
380 &soextbkidlestat, soextbkidlestat, "");
381
382 int so_set_extended_bk_idle(struct socket *, int);
383
384 #define SO_MAX_MSG_X 1024
385
386 /*
387 * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
388 * setting the DSCP code on the packet based on the service class; see
389 * <rdar://problem/11277343> for details.
390 */
391 __private_extern__ u_int32_t sotcdb = 0;
392 SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED,
393 &sotcdb, 0, "");
394
395 void
socketinit(void)396 socketinit(void)
397 {
398 _CASSERT(sizeof(so_gencnt) == sizeof(uint64_t));
399 VERIFY(IS_P2ALIGNED(&so_gencnt, sizeof(uint32_t)));
400
401 #ifdef __LP64__
402 _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user64_sa_endpoints));
403 _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user64_sa_endpoints, sae_srcif));
404 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user64_sa_endpoints, sae_srcaddr));
405 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user64_sa_endpoints, sae_srcaddrlen));
406 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user64_sa_endpoints, sae_dstaddr));
407 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user64_sa_endpoints, sae_dstaddrlen));
408 #else
409 _CASSERT(sizeof(struct sa_endpoints) == sizeof(struct user32_sa_endpoints));
410 _CASSERT(offsetof(struct sa_endpoints, sae_srcif) == offsetof(struct user32_sa_endpoints, sae_srcif));
411 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddr) == offsetof(struct user32_sa_endpoints, sae_srcaddr));
412 _CASSERT(offsetof(struct sa_endpoints, sae_srcaddrlen) == offsetof(struct user32_sa_endpoints, sae_srcaddrlen));
413 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddr) == offsetof(struct user32_sa_endpoints, sae_dstaddr));
414 _CASSERT(offsetof(struct sa_endpoints, sae_dstaddrlen) == offsetof(struct user32_sa_endpoints, sae_dstaddrlen));
415 #endif
416
417 if (socketinit_done) {
418 printf("socketinit: already called...\n");
419 return;
420 }
421 socketinit_done = 1;
422
423 PE_parse_boot_argn("socket_debug", &socket_debug,
424 sizeof(socket_debug));
425
426 PE_parse_boot_argn("sosend_assert_panic", &sosend_assert_panic,
427 sizeof(sosend_assert_panic));
428
429 STAILQ_INIT(&so_cache_head);
430
431 so_cache_zone_element_size = (vm_size_t)(sizeof(struct socket) + 4
432 + get_inpcb_str_size() + 4 + get_tcp_str_size());
433
434 so_cache_zone = zone_create("socache zone", so_cache_zone_element_size,
435 ZC_PGZ_USE_GUARDS | ZC_ZFREE_CLEARMEM);
436
437 bzero(&soextbkidlestat, sizeof(struct soextbkidlestat));
438 soextbkidlestat.so_xbkidle_maxperproc = SO_IDLE_BK_IDLE_MAX_PER_PROC;
439 soextbkidlestat.so_xbkidle_time = SO_IDLE_BK_IDLE_TIME;
440 soextbkidlestat.so_xbkidle_rcvhiwat = SO_IDLE_BK_IDLE_RCV_HIWAT;
441
442 in_pcbinit();
443 }
444
445 static void
cached_sock_alloc(struct socket ** so,zalloc_flags_t how)446 cached_sock_alloc(struct socket **so, zalloc_flags_t how)
447 {
448 caddr_t temp;
449 uintptr_t offset;
450
451 lck_mtx_lock(&so_cache_mtx);
452
453 if (!STAILQ_EMPTY(&so_cache_head)) {
454 VERIFY(cached_sock_count > 0);
455
456 *so = STAILQ_FIRST(&so_cache_head);
457 STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent);
458 STAILQ_NEXT((*so), so_cache_ent) = NULL;
459
460 cached_sock_count--;
461 lck_mtx_unlock(&so_cache_mtx);
462
463 temp = (*so)->so_saved_pcb;
464 bzero(*so, sizeof(struct socket));
465
466 (*so)->so_saved_pcb = temp;
467 } else {
468 lck_mtx_unlock(&so_cache_mtx);
469
470 uint8_t *so_mem = zalloc_flags_buf(so_cache_zone, how | Z_ZERO);
471 #pragma clang diagnostic push
472 #pragma clang diagnostic ignored "-Wcast-align"
473 *so = (struct socket *)so_mem;
474
475 /*
476 * Define offsets for extra structures into our
477 * single block of memory. Align extra structures
478 * on longword boundaries.
479 */
480
481 offset = (uintptr_t)so_mem;
482 offset += sizeof(struct socket);
483 offset = ALIGN(offset);
484 struct inpcb *pcb = (struct inpcb *)(so_mem + (offset - (uintptr_t)so_mem));
485 #pragma clang diagnostic pop
486 (*so)->so_saved_pcb = (caddr_t)pcb;
487
488 offset += get_inpcb_str_size();
489 offset = ALIGN(offset);
490 pcb->inp_saved_ppcb = (caddr_t)(so_mem + (offset - (uintptr_t)so_mem));
491 }
492
493 OSBitOrAtomic(SOF1_CACHED_IN_SOCK_LAYER, &(*so)->so_flags1);
494 }
495
496 static void
cached_sock_free(struct socket * so)497 cached_sock_free(struct socket *so)
498 {
499 lck_mtx_lock(&so_cache_mtx);
500
501 so_cache_time = net_uptime();
502 if (++cached_sock_count > max_cached_sock_count) {
503 --cached_sock_count;
504 lck_mtx_unlock(&so_cache_mtx);
505 zfree(so_cache_zone, so);
506 } else {
507 if (so_cache_hw < cached_sock_count) {
508 so_cache_hw = cached_sock_count;
509 }
510
511 STAILQ_INSERT_TAIL(&so_cache_head, so, so_cache_ent);
512
513 so->cache_timestamp = so_cache_time;
514 lck_mtx_unlock(&so_cache_mtx);
515 }
516 }
517
518 void
so_update_last_owner_locked(struct socket * so,proc_t self)519 so_update_last_owner_locked(struct socket *so, proc_t self)
520 {
521 if (so->last_pid != 0) {
522 /*
523 * last_pid and last_upid should remain zero for sockets
524 * created using sock_socket. The check above achieves that
525 */
526 if (self == PROC_NULL) {
527 self = current_proc();
528 }
529
530 if (so->last_upid != proc_uniqueid(self) ||
531 so->last_pid != proc_pid(self)) {
532 so->last_upid = proc_uniqueid(self);
533 so->last_pid = proc_pid(self);
534 proc_getexecutableuuid(self, so->last_uuid,
535 sizeof(so->last_uuid));
536 if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
537 (*so->so_proto->pr_update_last_owner)(so, self, NULL);
538 }
539 }
540 proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid));
541 }
542 }
543
544 void
so_update_policy(struct socket * so)545 so_update_policy(struct socket *so)
546 {
547 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
548 (void) inp_update_policy(sotoinpcb(so));
549 }
550 }
551
552 #if NECP
553 static void
so_update_necp_policy(struct socket * so,struct sockaddr * override_local_addr,struct sockaddr * override_remote_addr)554 so_update_necp_policy(struct socket *so, struct sockaddr *override_local_addr,
555 struct sockaddr *override_remote_addr)
556 {
557 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
558 inp_update_necp_policy(sotoinpcb(so), override_local_addr,
559 override_remote_addr, 0);
560 }
561 }
562 #endif /* NECP */
563
564 boolean_t
so_cache_timer(void)565 so_cache_timer(void)
566 {
567 struct socket *p;
568 int n_freed = 0;
569 boolean_t rc = FALSE;
570
571 lck_mtx_lock(&so_cache_mtx);
572 so_cache_timeouts++;
573 so_cache_time = net_uptime();
574
575 while (!STAILQ_EMPTY(&so_cache_head)) {
576 VERIFY(cached_sock_count > 0);
577 p = STAILQ_FIRST(&so_cache_head);
578 if ((so_cache_time - p->cache_timestamp) <
579 SO_CACHE_TIME_LIMIT) {
580 break;
581 }
582
583 STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent);
584 --cached_sock_count;
585
586 zfree(so_cache_zone, p);
587
588 if (++n_freed >= SO_CACHE_MAX_FREE_BATCH) {
589 so_cache_max_freed++;
590 break;
591 }
592 }
593
594 /* Schedule again if there is more to cleanup */
595 if (!STAILQ_EMPTY(&so_cache_head)) {
596 rc = TRUE;
597 }
598
599 lck_mtx_unlock(&so_cache_mtx);
600 return rc;
601 }
602
603 /*
604 * Get a socket structure from our zone, and initialize it.
605 * We don't implement `waitok' yet (see comments in uipc_domain.c).
606 * Note that it would probably be better to allocate socket
607 * and PCB at the same time, but I'm not convinced that all
608 * the protocols can be easily modified to do this.
609 */
610 struct socket *
soalloc(int waitok,int dom,int type)611 soalloc(int waitok, int dom, int type)
612 {
613 zalloc_flags_t how = waitok ? Z_WAITOK : Z_NOWAIT;
614 struct socket *__single so;
615
616 if ((dom == PF_INET) && (type == SOCK_STREAM)) {
617 cached_sock_alloc(&so, how);
618 } else {
619 so = zalloc_flags(socket_zone, how | Z_ZERO);
620 }
621 if (so != NULL) {
622 so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt);
623
624 /*
625 * Increment the socket allocation statistics
626 */
627 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_alloc_total);
628 }
629
630 return so;
631 }
632
633 int
socreate_internal(int dom,struct socket ** aso,int type,int proto,struct proc * p,uint32_t flags,struct proc * ep)634 socreate_internal(int dom, struct socket **aso, int type, int proto,
635 struct proc *p, uint32_t flags, struct proc *ep)
636 {
637 struct protosw *prp;
638 struct socket *so;
639 int error = 0;
640 pid_t rpid = -1;
641
642 VERIFY(aso != NULL);
643 *aso = NULL;
644
645 if (proto != 0) {
646 prp = pffindproto(dom, proto, type);
647 } else {
648 prp = pffindtype(dom, type);
649 }
650
651 if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL) {
652 if (pffinddomain(dom) == NULL) {
653 return EAFNOSUPPORT;
654 }
655 if (proto != 0) {
656 if (pffindprotonotype(dom, proto) != NULL) {
657 return EPROTOTYPE;
658 }
659 }
660 return EPROTONOSUPPORT;
661 }
662 if (prp->pr_type != type) {
663 return EPROTOTYPE;
664 }
665 so = soalloc(1, dom, type);
666 if (so == NULL) {
667 return ENOBUFS;
668 }
669
670 switch (dom) {
671 case PF_LOCAL:
672 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_local_total);
673 break;
674 case PF_INET:
675 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_inet_total);
676 if (type == SOCK_STREAM) {
677 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_stream_total);
678 } else {
679 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_total);
680 }
681 break;
682 case PF_ROUTE:
683 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_route_total);
684 break;
685 case PF_NDRV:
686 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_ndrv_total);
687 break;
688 case PF_KEY:
689 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_key_total);
690 break;
691 case PF_INET6:
692 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_inet6_total);
693 if (type == SOCK_STREAM) {
694 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet6_stream_total);
695 } else {
696 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet6_dgram_total);
697 }
698 break;
699 case PF_SYSTEM:
700 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_system_total);
701 break;
702 case PF_MULTIPATH:
703 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_multipath_total);
704 break;
705 default:
706 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_domain_other_total);
707 break;
708 }
709
710 if (flags & SOCF_MPTCP) {
711 so->so_state |= SS_NBIO;
712 }
713
714 TAILQ_INIT(&so->so_incomp);
715 TAILQ_INIT(&so->so_comp);
716 so->so_type = (short)type;
717 so->so_family = prp->pr_domain->dom_family;
718 so->so_protocol = prp->pr_protocol;
719 so->last_upid = proc_uniqueid(p);
720 so->last_pid = proc_pid(p);
721 proc_getexecutableuuid(p, so->last_uuid, sizeof(so->last_uuid));
722 proc_pidoriginatoruuid(so->so_vuuid, sizeof(so->so_vuuid));
723
724 so->so_rpid = -1;
725 uuid_clear(so->so_ruuid);
726
727 if (ep != PROC_NULL && ep != p) {
728 so->e_upid = proc_uniqueid(ep);
729 so->e_pid = proc_pid(ep);
730 proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
731 so->so_flags |= SOF_DELEGATED;
732 if (ep->p_responsible_pid != so->e_pid) {
733 rpid = ep->p_responsible_pid;
734 so->so_rpid = rpid;
735 proc_getresponsibleuuid(ep, so->so_ruuid, sizeof(so->so_ruuid));
736 }
737 }
738
739 if (rpid < 0 && p->p_responsible_pid != so->last_pid) {
740 rpid = p->p_responsible_pid;
741 so->so_rpid = rpid;
742 proc_getresponsibleuuid(p, so->so_ruuid, sizeof(so->so_ruuid));
743 }
744
745 so->so_cred = kauth_cred_proc_ref(p);
746 if (!suser(kauth_cred_get(), NULL)) {
747 so->so_state |= SS_PRIV;
748 }
749
750 so->so_persona_id = current_persona_get_id();
751 so->so_proto = prp;
752 so->so_rcv.sb_flags |= SB_RECV;
753 so->so_rcv.sb_so = so->so_snd.sb_so = so;
754 so->next_lock_lr = 0;
755 so->next_unlock_lr = 0;
756
757 /*
758 * Attachment will create the per pcb lock if necessary and
759 * increase refcount for creation, make sure it's done before
760 * socket is inserted in lists.
761 */
762 so->so_usecount++;
763
764 error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
765 if (error != 0) {
766 /*
767 * Warning:
768 * If so_pcb is not zero, the socket will be leaked,
769 * so protocol attachment handler must be coded carefuly
770 */
771 if (so->so_pcb != NULL) {
772 os_log_error(OS_LOG_DEFAULT,
773 "so_pcb not NULL after pru_attach error %d for dom %d, proto %d, type %d",
774 error, dom, proto, type);
775 }
776 /*
777 * Both SS_NOFDREF and SOF_PCBCLEARING should be set to free the socket
778 */
779 so->so_state |= SS_NOFDREF;
780 so->so_flags |= SOF_PCBCLEARING;
781 VERIFY(so->so_usecount > 0);
782 so->so_usecount--;
783 sofreelastref(so, 1); /* will deallocate the socket */
784 return error;
785 }
786
787 /*
788 * Note: needs so_pcb to be set after pru_attach
789 */
790 if (prp->pr_update_last_owner != NULL) {
791 (*prp->pr_update_last_owner)(so, p, ep);
792 }
793
794 os_atomic_inc(&prp->pr_domain->dom_refs, relaxed);
795
796 /* Attach socket filters for this protocol */
797 sflt_initsock(so);
798 so_set_default_traffic_class(so);
799
800 /*
801 * If this thread or task is marked to create backgrounded sockets,
802 * mark the socket as background.
803 */
804 if (!(flags & SOCF_MPTCP) &&
805 proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG)) {
806 socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
807 so->so_background_thread = current_thread();
808 }
809
810 switch (dom) {
811 /*
812 * Don't mark Unix domain or system
813 * eligible for defunct by default.
814 */
815 case PF_LOCAL:
816 case PF_SYSTEM:
817 so->so_flags |= SOF_NODEFUNCT;
818 break;
819 default:
820 break;
821 }
822
823 /*
824 * Entitlements can't be checked at socket creation time except if the
825 * application requested a feature guarded by a privilege (c.f., socket
826 * delegation).
827 * The priv(9) and the Sandboxing APIs are designed with the idea that
828 * a privilege check should only be triggered by a userland request.
829 * A privilege check at socket creation time is time consuming and
830 * could trigger many authorisation error messages from the security
831 * APIs.
832 */
833
834 *aso = so;
835
836 return 0;
837 }
838
839 /*
840 * Returns: 0 Success
841 * EAFNOSUPPORT
842 * EPROTOTYPE
843 * EPROTONOSUPPORT
844 * ENOBUFS
845 * <pru_attach>:ENOBUFS[AF_UNIX]
846 * <pru_attach>:ENOBUFS[TCP]
847 * <pru_attach>:ENOMEM[TCP]
848 * <pru_attach>:??? [other protocol families, IPSEC]
849 */
850 int
socreate(int dom,struct socket ** aso,int type,int proto)851 socreate(int dom, struct socket **aso, int type, int proto)
852 {
853 return socreate_internal(dom, aso, type, proto, current_proc(), 0,
854 PROC_NULL);
855 }
856
857 int
socreate_delegate(int dom,struct socket ** aso,int type,int proto,pid_t epid)858 socreate_delegate(int dom, struct socket **aso, int type, int proto, pid_t epid)
859 {
860 int error = 0;
861 struct proc *ep = PROC_NULL;
862
863 if ((proc_selfpid() != epid) && ((ep = proc_find(epid)) == PROC_NULL)) {
864 error = ESRCH;
865 goto done;
866 }
867
868 error = socreate_internal(dom, aso, type, proto, current_proc(), 0, ep);
869
870 /*
871 * It might not be wise to hold the proc reference when calling
872 * socreate_internal since it calls soalloc with M_WAITOK
873 */
874 done:
875 if (ep != PROC_NULL) {
876 proc_rele(ep);
877 }
878
879 return error;
880 }
881
882 /*
883 * Returns: 0 Success
884 * <pru_bind>:EINVAL Invalid argument [COMMON_START]
885 * <pru_bind>:EAFNOSUPPORT Address family not supported
886 * <pru_bind>:EADDRNOTAVAIL Address not available.
887 * <pru_bind>:EINVAL Invalid argument
888 * <pru_bind>:EAFNOSUPPORT Address family not supported [notdef]
889 * <pru_bind>:EACCES Permission denied
890 * <pru_bind>:EADDRINUSE Address in use
891 * <pru_bind>:EAGAIN Resource unavailable, try again
892 * <pru_bind>:EPERM Operation not permitted
893 * <pru_bind>:???
894 * <sf_bind>:???
895 *
896 * Notes: It's not possible to fully enumerate the return codes above,
897 * since socket filter authors and protocol family authors may
898 * not choose to limit their error returns to those listed, even
899 * though this may result in some software operating incorrectly.
900 *
901 * The error codes which are enumerated above are those known to
902 * be returned by the tcp_usr_bind function supplied.
903 */
904 int
sobindlock(struct socket * so,struct sockaddr * nam,int dolock)905 sobindlock(struct socket *so, struct sockaddr *nam, int dolock)
906 {
907 struct proc *p = current_proc();
908 int error = 0;
909
910 if (dolock) {
911 socket_lock(so, 1);
912 }
913
914 so_update_last_owner_locked(so, p);
915 so_update_policy(so);
916
917 #if NECP
918 so_update_necp_policy(so, nam, NULL);
919 #endif /* NECP */
920
921 /*
922 * If this is a bind request on a socket that has been marked
923 * as inactive, reject it now before we go any further.
924 */
925 if (so->so_flags & SOF_DEFUNCT) {
926 error = EINVAL;
927 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] (%d)\n",
928 __func__, proc_pid(p), proc_best_name(p),
929 so->so_gencnt,
930 SOCK_DOM(so), SOCK_TYPE(so), error);
931 goto out;
932 }
933
934 /* Socket filter */
935 error = sflt_bind(so, nam);
936
937 if (error == 0) {
938 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
939 }
940 out:
941 if (dolock) {
942 socket_unlock(so, 1);
943 }
944
945 if (error == EJUSTRETURN) {
946 error = 0;
947 }
948
949 return error;
950 }
951
952 void
sodealloc(struct socket * so)953 sodealloc(struct socket *so)
954 {
955 kauth_cred_unref(&so->so_cred);
956
957 /* Remove any filters */
958 sflt_termsock(so);
959
960 so->so_gencnt = OSIncrementAtomic64((SInt64 *)&so_gencnt);
961
962 if (so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) {
963 cached_sock_free(so);
964 } else {
965 zfree(socket_zone, so);
966 }
967 }
968
969 /*
970 * Returns: 0 Success
971 * EINVAL
972 * EOPNOTSUPP
973 * <pru_listen>:EINVAL[AF_UNIX]
974 * <pru_listen>:EINVAL[TCP]
975 * <pru_listen>:EADDRNOTAVAIL[TCP] Address not available.
976 * <pru_listen>:EINVAL[TCP] Invalid argument
977 * <pru_listen>:EAFNOSUPPORT[TCP] Address family not supported [notdef]
978 * <pru_listen>:EACCES[TCP] Permission denied
979 * <pru_listen>:EADDRINUSE[TCP] Address in use
980 * <pru_listen>:EAGAIN[TCP] Resource unavailable, try again
981 * <pru_listen>:EPERM[TCP] Operation not permitted
982 * <sf_listen>:???
983 *
984 * Notes: Other <pru_listen> returns depend on the protocol family; all
985 * <sf_listen> returns depend on what the filter author causes
986 * their filter to return.
987 */
988 int
solisten(struct socket * so,int backlog)989 solisten(struct socket *so, int backlog)
990 {
991 struct proc *p = current_proc();
992 int error = 0;
993
994 socket_lock(so, 1);
995
996 so_update_last_owner_locked(so, p);
997 so_update_policy(so);
998
999 if (TAILQ_EMPTY(&so->so_comp)) {
1000 so->so_options |= SO_ACCEPTCONN;
1001 }
1002
1003 #if NECP
1004 so_update_necp_policy(so, NULL, NULL);
1005 #endif /* NECP */
1006
1007 if (so->so_proto == NULL) {
1008 error = EINVAL;
1009 so->so_options &= ~SO_ACCEPTCONN;
1010 goto out;
1011 }
1012 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
1013 error = EOPNOTSUPP;
1014 so->so_options &= ~SO_ACCEPTCONN;
1015 goto out;
1016 }
1017
1018 /*
1019 * If the listen request is made on a socket that is not fully
1020 * disconnected, or on a socket that has been marked as inactive,
1021 * reject the request now.
1022 */
1023 if ((so->so_state &
1024 (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) ||
1025 (so->so_flags & SOF_DEFUNCT)) {
1026 error = EINVAL;
1027 if (so->so_flags & SOF_DEFUNCT) {
1028 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] "
1029 "(%d)\n", __func__, proc_pid(p),
1030 proc_best_name(p),
1031 so->so_gencnt,
1032 SOCK_DOM(so), SOCK_TYPE(so), error);
1033 }
1034 so->so_options &= ~SO_ACCEPTCONN;
1035 goto out;
1036 }
1037
1038 if ((so->so_restrictions & SO_RESTRICT_DENY_IN) != 0) {
1039 error = EPERM;
1040 so->so_options &= ~SO_ACCEPTCONN;
1041 goto out;
1042 }
1043
1044 error = sflt_listen(so);
1045 if (error == 0) {
1046 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
1047 }
1048
1049 if (error) {
1050 if (error == EJUSTRETURN) {
1051 error = 0;
1052 }
1053 so->so_options &= ~SO_ACCEPTCONN;
1054 goto out;
1055 }
1056
1057 /*
1058 * POSIX: The implementation may have an upper limit on the length of
1059 * the listen queue-either global or per accepting socket. If backlog
1060 * exceeds this limit, the length of the listen queue is set to the
1061 * limit.
1062 *
1063 * If listen() is called with a backlog argument value that is less
1064 * than 0, the function behaves as if it had been called with a backlog
1065 * argument value of 0.
1066 *
1067 * A backlog argument of 0 may allow the socket to accept connections,
1068 * in which case the length of the listen queue may be set to an
1069 * implementation-defined minimum value.
1070 */
1071 if (backlog <= 0 || backlog > somaxconn) {
1072 backlog = somaxconn;
1073 }
1074
1075 so->so_qlimit = (short)backlog;
1076 out:
1077 socket_unlock(so, 1);
1078 return error;
1079 }
1080
1081 /*
1082 * The "accept list lock" protects the fields related to the listener queues
1083 * because we can unlock a socket to respect the lock ordering between
1084 * the listener socket and its clients sockets. The lock ordering is first to
1085 * acquire the client socket before the listener socket.
1086 *
1087 * The accept list lock serializes access to the following fields:
1088 * - of the listener socket:
1089 * - so_comp
1090 * - so_incomp
1091 * - so_qlen
1092 * - so_inqlen
1093 * - of client sockets that are in so_comp or so_incomp:
1094 * - so_head
1095 * - so_list
1096 *
1097 * As one can see the accept list lock protects the consistent of the
1098 * linkage of the client sockets.
1099 *
1100 * Note that those fields may be read without holding the accept list lock
1101 * for a preflight provided the accept list lock is taken when committing
1102 * to take an action based on the result of the preflight. The preflight
1103 * saves the cost of doing the unlock/lock dance.
1104 */
1105 void
so_acquire_accept_list(struct socket * head,struct socket * so)1106 so_acquire_accept_list(struct socket *head, struct socket *so)
1107 {
1108 lck_mtx_t *mutex_held;
1109
1110 if (head->so_proto->pr_getlock == NULL) {
1111 return;
1112 }
1113 mutex_held = (*head->so_proto->pr_getlock)(head, PR_F_WILLUNLOCK);
1114 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1115
1116 if (!(head->so_flags1 & SOF1_ACCEPT_LIST_HELD)) {
1117 head->so_flags1 |= SOF1_ACCEPT_LIST_HELD;
1118 return;
1119 }
1120 if (so != NULL) {
1121 socket_unlock(so, 0);
1122 }
1123 while (head->so_flags1 & SOF1_ACCEPT_LIST_HELD) {
1124 so_accept_list_waits += 1;
1125 msleep((caddr_t)&head->so_incomp, mutex_held,
1126 PSOCK | PCATCH, __func__, NULL);
1127 }
1128 head->so_flags1 |= SOF1_ACCEPT_LIST_HELD;
1129 if (so != NULL) {
1130 socket_unlock(head, 0);
1131 socket_lock(so, 0);
1132 socket_lock(head, 0);
1133 }
1134 }
1135
1136 void
so_release_accept_list(struct socket * head)1137 so_release_accept_list(struct socket *head)
1138 {
1139 if (head->so_proto->pr_getlock != NULL) {
1140 lck_mtx_t *mutex_held;
1141
1142 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
1143 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1144
1145 head->so_flags1 &= ~SOF1_ACCEPT_LIST_HELD;
1146 wakeup((caddr_t)&head->so_incomp);
1147 }
1148 }
1149
1150 void
sofreelastref(struct socket * so,int dealloc)1151 sofreelastref(struct socket *so, int dealloc)
1152 {
1153 struct socket *head = so->so_head;
1154
1155 /* Assume socket is locked */
1156
1157 #if FLOW_DIVERT
1158 if (so->so_flags & SOF_FLOW_DIVERT) {
1159 flow_divert_detach(so);
1160 }
1161 #endif /* FLOW_DIVERT */
1162
1163 #if CONTENT_FILTER
1164 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
1165 cfil_sock_detach(so);
1166 }
1167 #endif /* CONTENT_FILTER */
1168
1169 if (NEED_DGRAM_FLOW_TRACKING(so)) {
1170 soflow_detach(so);
1171 }
1172
1173 if (!(so->so_flags & SOF_PCBCLEARING) || !(so->so_state & SS_NOFDREF)) {
1174 selthreadclear(&so->so_snd.sb_sel);
1175 selthreadclear(&so->so_rcv.sb_sel);
1176 so->so_rcv.sb_flags &= ~(SB_SEL | SB_UPCALL);
1177 so->so_snd.sb_flags &= ~(SB_SEL | SB_UPCALL);
1178 so->so_event = sonullevent;
1179 return;
1180 }
1181 if (head != NULL) {
1182 /*
1183 * Need to lock the listener when the protocol has
1184 * per socket locks
1185 */
1186 if (head->so_proto->pr_getlock != NULL) {
1187 socket_lock(head, 1);
1188 so_acquire_accept_list(head, so);
1189 }
1190 if (so->so_state & SS_INCOMP) {
1191 so->so_state &= ~SS_INCOMP;
1192 TAILQ_REMOVE(&head->so_incomp, so, so_list);
1193 head->so_incqlen--;
1194 head->so_qlen--;
1195 so->so_head = NULL;
1196
1197 if (head->so_proto->pr_getlock != NULL) {
1198 so_release_accept_list(head);
1199 socket_unlock(head, 1);
1200 }
1201 } else if (so->so_state & SS_COMP) {
1202 if (head->so_proto->pr_getlock != NULL) {
1203 so_release_accept_list(head);
1204 socket_unlock(head, 1);
1205 }
1206 /*
1207 * We must not decommission a socket that's
1208 * on the accept(2) queue. If we do, then
1209 * accept(2) may hang after select(2) indicated
1210 * that the listening socket was ready.
1211 */
1212 selthreadclear(&so->so_snd.sb_sel);
1213 selthreadclear(&so->so_rcv.sb_sel);
1214 so->so_rcv.sb_flags &= ~(SB_SEL | SB_UPCALL);
1215 so->so_snd.sb_flags &= ~(SB_SEL | SB_UPCALL);
1216 so->so_event = sonullevent;
1217 return;
1218 } else {
1219 if (head->so_proto->pr_getlock != NULL) {
1220 so_release_accept_list(head);
1221 socket_unlock(head, 1);
1222 }
1223 printf("sofree: not queued\n");
1224 }
1225 }
1226 sowflush(so);
1227 sorflush(so);
1228
1229 /* 3932268: disable upcall */
1230 so->so_rcv.sb_flags &= ~SB_UPCALL;
1231 so->so_snd.sb_flags &= ~(SB_UPCALL | SB_SNDBYTE_CNT);
1232 so->so_event = sonullevent;
1233
1234 if (dealloc) {
1235 sodealloc(so);
1236 }
1237 }
1238
1239 void
soclose_wait_locked(struct socket * so)1240 soclose_wait_locked(struct socket *so)
1241 {
1242 lck_mtx_t *mutex_held;
1243
1244 if (so->so_proto->pr_getlock != NULL) {
1245 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1246 } else {
1247 mutex_held = so->so_proto->pr_domain->dom_mtx;
1248 }
1249 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1250
1251 /*
1252 * Double check here and return if there's no outstanding upcall;
1253 * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
1254 */
1255 if (!so->so_upcallusecount || !(so->so_flags & SOF_UPCALLCLOSEWAIT)) {
1256 return;
1257 }
1258 so->so_rcv.sb_flags &= ~SB_UPCALL;
1259 so->so_snd.sb_flags &= ~SB_UPCALL;
1260 so->so_flags |= SOF_CLOSEWAIT;
1261
1262 (void) msleep((caddr_t)&so->so_upcallusecount, mutex_held, (PZERO - 1),
1263 "soclose_wait_locked", NULL);
1264 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1265 so->so_flags &= ~SOF_CLOSEWAIT;
1266 }
1267
1268 /*
1269 * Close a socket on last file table reference removal.
1270 * Initiate disconnect if connected.
1271 * Free socket when disconnect complete.
1272 */
1273 int
soclose_locked(struct socket * so)1274 soclose_locked(struct socket *so)
1275 {
1276 int error = 0;
1277 struct timespec ts;
1278
1279 if (so->so_usecount == 0) {
1280 panic("soclose: so=%p refcount=0", so);
1281 /* NOTREACHED */
1282 }
1283
1284 sflt_notify(so, sock_evt_closing, NULL);
1285
1286 if (so->so_upcallusecount) {
1287 soclose_wait_locked(so);
1288 }
1289
1290 #if CONTENT_FILTER
1291 /*
1292 * We have to wait until the content filters are done
1293 */
1294 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
1295 cfil_sock_close_wait(so);
1296 cfil_sock_is_closed(so);
1297 cfil_sock_detach(so);
1298 }
1299 #endif /* CONTENT_FILTER */
1300
1301 if (NEED_DGRAM_FLOW_TRACKING(so)) {
1302 soflow_detach(so);
1303 }
1304
1305 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
1306 soresume(current_proc(), so, 1);
1307 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
1308 }
1309
1310 if ((so->so_options & SO_ACCEPTCONN)) {
1311 struct socket *sp, *sonext;
1312 int persocklock = 0;
1313 int incomp_overflow_only;
1314
1315 /*
1316 * We do not want new connection to be added
1317 * to the connection queues
1318 */
1319 so->so_options &= ~SO_ACCEPTCONN;
1320
1321 /*
1322 * We can drop the lock on the listener once
1323 * we've acquired the incoming list
1324 */
1325 if (so->so_proto->pr_getlock != NULL) {
1326 persocklock = 1;
1327 so_acquire_accept_list(so, NULL);
1328 socket_unlock(so, 0);
1329 }
1330 again:
1331 incomp_overflow_only = 1;
1332
1333 TAILQ_FOREACH_SAFE(sp, &so->so_incomp, so_list, sonext) {
1334 /*
1335 * Radar 5350314
1336 * skip sockets thrown away by tcpdropdropblreq
1337 * they will get cleanup by the garbage collection.
1338 * otherwise, remove the incomp socket from the queue
1339 * and let soabort trigger the appropriate cleanup.
1340 */
1341 if (sp->so_flags & SOF_OVERFLOW) {
1342 continue;
1343 }
1344
1345 if (persocklock != 0) {
1346 socket_lock(sp, 1);
1347 }
1348
1349 /*
1350 * Radar 27945981
1351 * The extra reference for the list insure the
1352 * validity of the socket pointer when we perform the
1353 * unlock of the head above
1354 */
1355 if (sp->so_state & SS_INCOMP) {
1356 sp->so_state &= ~SS_INCOMP;
1357 sp->so_head = NULL;
1358 TAILQ_REMOVE(&so->so_incomp, sp, so_list);
1359 so->so_incqlen--;
1360 so->so_qlen--;
1361
1362 (void) soabort(sp);
1363 } else {
1364 panic("%s sp %p in so_incomp but !SS_INCOMP",
1365 __func__, sp);
1366 }
1367
1368 if (persocklock != 0) {
1369 socket_unlock(sp, 1);
1370 }
1371 }
1372
1373 TAILQ_FOREACH_SAFE(sp, &so->so_comp, so_list, sonext) {
1374 /* Dequeue from so_comp since sofree() won't do it */
1375 if (persocklock != 0) {
1376 socket_lock(sp, 1);
1377 }
1378
1379 if (sp->so_state & SS_COMP) {
1380 sp->so_state &= ~SS_COMP;
1381 sp->so_head = NULL;
1382 TAILQ_REMOVE(&so->so_comp, sp, so_list);
1383 so->so_qlen--;
1384
1385 (void) soabort(sp);
1386 } else {
1387 panic("%s sp %p in so_comp but !SS_COMP",
1388 __func__, sp);
1389 }
1390
1391 if (persocklock) {
1392 socket_unlock(sp, 1);
1393 }
1394 }
1395
1396 if (incomp_overflow_only == 0 && !TAILQ_EMPTY(&so->so_incomp)) {
1397 #if (DEBUG | DEVELOPMENT)
1398 panic("%s head %p so_comp not empty", __func__, so);
1399 #endif /* (DEVELOPMENT || DEBUG) */
1400
1401 goto again;
1402 }
1403
1404 if (!TAILQ_EMPTY(&so->so_comp)) {
1405 #if (DEBUG | DEVELOPMENT)
1406 panic("%s head %p so_comp not empty", __func__, so);
1407 #endif /* (DEVELOPMENT || DEBUG) */
1408
1409 goto again;
1410 }
1411
1412 if (persocklock) {
1413 socket_lock(so, 0);
1414 so_release_accept_list(so);
1415 }
1416 }
1417 if (so->so_pcb == NULL) {
1418 /* 3915887: mark the socket as ready for dealloc */
1419 so->so_flags |= SOF_PCBCLEARING;
1420 goto discard;
1421 }
1422
1423 if (so->so_state & SS_ISCONNECTED) {
1424 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
1425 error = sodisconnectlocked(so);
1426 if (error) {
1427 goto drop;
1428 }
1429 }
1430 if (so->so_options & SO_LINGER) {
1431 if ((so->so_state & SS_ISDISCONNECTING) &&
1432 (so->so_state & SS_NBIO)) {
1433 goto drop;
1434 }
1435 while ((so->so_state & SS_ISCONNECTED) && so->so_linger > 0) {
1436 lck_mtx_t *mutex_held;
1437
1438 if (so->so_proto->pr_getlock != NULL) {
1439 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1440 } else {
1441 mutex_held = so->so_proto->pr_domain->dom_mtx;
1442 }
1443 ts.tv_sec = (so->so_linger / 100);
1444 ts.tv_nsec = (so->so_linger % 100) *
1445 NSEC_PER_USEC * 1000 * 10;
1446 error = msleep((caddr_t)&so->so_timeo,
1447 mutex_held, PSOCK | PCATCH, "soclose", &ts);
1448 if (error) {
1449 /*
1450 * It's OK when the time fires,
1451 * don't report an error
1452 */
1453 if (error == EWOULDBLOCK) {
1454 error = 0;
1455 }
1456 break;
1457 }
1458 }
1459 }
1460 }
1461 drop:
1462 if (so->so_usecount == 0) {
1463 panic("soclose: usecount is zero so=%p", so);
1464 /* NOTREACHED */
1465 }
1466 if (so->so_pcb != NULL && !(so->so_flags & SOF_PCBCLEARING)) {
1467 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
1468 if (error == 0) {
1469 error = error2;
1470 }
1471 }
1472 if (so->so_usecount <= 0) {
1473 panic("soclose: usecount is zero so=%p", so);
1474 /* NOTREACHED */
1475 }
1476 discard:
1477 if (so->so_pcb != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
1478 (so->so_state & SS_NOFDREF)) {
1479 panic("soclose: NOFDREF");
1480 /* NOTREACHED */
1481 }
1482 so->so_state |= SS_NOFDREF;
1483
1484 if ((so->so_flags & SOF_KNOTE) != 0) {
1485 KNOTE(&so->so_klist, SO_FILT_HINT_LOCKED);
1486 }
1487
1488 os_atomic_dec(&so->so_proto->pr_domain->dom_refs, relaxed);
1489
1490 VERIFY(so->so_usecount > 0);
1491 so->so_usecount--;
1492 sofree(so);
1493 return error;
1494 }
1495
1496 int
soclose(struct socket * so)1497 soclose(struct socket *so)
1498 {
1499 int error = 0;
1500 socket_lock(so, 1);
1501
1502 if (so->so_retaincnt == 0) {
1503 error = soclose_locked(so);
1504 } else {
1505 /*
1506 * if the FD is going away, but socket is
1507 * retained in kernel remove its reference
1508 */
1509 so->so_usecount--;
1510 if (so->so_usecount < 2) {
1511 panic("soclose: retaincnt non null and so=%p "
1512 "usecount=%d\n", so, so->so_usecount);
1513 }
1514 }
1515 socket_unlock(so, 1);
1516 return error;
1517 }
1518
1519 /*
1520 * Must be called at splnet...
1521 */
1522 /* Should already be locked */
1523 int
soabort(struct socket * so)1524 soabort(struct socket *so)
1525 {
1526 int error;
1527
1528 #ifdef MORE_LOCKING_DEBUG
1529 lck_mtx_t *mutex_held;
1530
1531 if (so->so_proto->pr_getlock != NULL) {
1532 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1533 } else {
1534 mutex_held = so->so_proto->pr_domain->dom_mtx;
1535 }
1536 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1537 #endif
1538
1539 if ((so->so_flags & SOF_ABORTED) == 0) {
1540 so->so_flags |= SOF_ABORTED;
1541 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
1542 if (error) {
1543 sofree(so);
1544 return error;
1545 }
1546 }
1547 return 0;
1548 }
1549
1550 int
soacceptlock(struct socket * so,struct sockaddr ** nam,int dolock)1551 soacceptlock(struct socket *so, struct sockaddr **nam, int dolock)
1552 {
1553 int error;
1554
1555 if (dolock) {
1556 socket_lock(so, 1);
1557 }
1558
1559 so_update_last_owner_locked(so, PROC_NULL);
1560 so_update_policy(so);
1561 #if NECP
1562 so_update_necp_policy(so, NULL, NULL);
1563 #endif /* NECP */
1564
1565 if ((so->so_state & SS_NOFDREF) == 0) {
1566 panic("soaccept: !NOFDREF");
1567 }
1568 so->so_state &= ~SS_NOFDREF;
1569 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
1570
1571 if (dolock) {
1572 socket_unlock(so, 1);
1573 }
1574 return error;
1575 }
1576
1577 int
soaccept(struct socket * so,struct sockaddr ** nam)1578 soaccept(struct socket *so, struct sockaddr **nam)
1579 {
1580 return soacceptlock(so, nam, 1);
1581 }
1582
1583 int
soacceptfilter(struct socket * so,struct socket * head)1584 soacceptfilter(struct socket *so, struct socket *head)
1585 {
1586 struct sockaddr *__single local = NULL, *__single remote = NULL;
1587 int error = 0;
1588
1589 /*
1590 * Hold the lock even if this socket has not been made visible
1591 * to the filter(s). For sockets with global locks, this protects
1592 * against the head or peer going away
1593 */
1594 socket_lock(so, 1);
1595 if (sogetaddr_locked(so, &remote, 1) != 0 ||
1596 sogetaddr_locked(so, &local, 0) != 0) {
1597 so->so_state &= ~SS_NOFDREF;
1598 socket_unlock(so, 1);
1599 soclose(so);
1600 /* Out of resources; try it again next time */
1601 error = ECONNABORTED;
1602 goto done;
1603 }
1604
1605 error = sflt_accept(head, so, local, remote);
1606
1607 /*
1608 * If we get EJUSTRETURN from one of the filters, mark this socket
1609 * as inactive and return it anyway. This newly accepted socket
1610 * will be disconnected later before we hand it off to the caller.
1611 */
1612 if (error == EJUSTRETURN) {
1613 error = 0;
1614 (void) sosetdefunct(current_proc(), so,
1615 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
1616 }
1617
1618 if (error != 0) {
1619 /*
1620 * This may seem like a duplication to the above error
1621 * handling part when we return ECONNABORTED, except
1622 * the following is done while holding the lock since
1623 * the socket has been exposed to the filter(s) earlier.
1624 */
1625 so->so_state &= ~SS_NOFDREF;
1626 socket_unlock(so, 1);
1627 soclose(so);
1628 /* Propagate socket filter's error code to the caller */
1629 } else {
1630 socket_unlock(so, 1);
1631 }
1632 done:
1633 /* Callee checks for NULL pointer */
1634 sock_freeaddr(remote);
1635 sock_freeaddr(local);
1636 return error;
1637 }
1638
1639 /*
1640 * Returns: 0 Success
1641 * EOPNOTSUPP Operation not supported on socket
1642 * EISCONN Socket is connected
1643 * <pru_connect>:EADDRNOTAVAIL Address not available.
1644 * <pru_connect>:EINVAL Invalid argument
1645 * <pru_connect>:EAFNOSUPPORT Address family not supported [notdef]
1646 * <pru_connect>:EACCES Permission denied
1647 * <pru_connect>:EADDRINUSE Address in use
1648 * <pru_connect>:EAGAIN Resource unavailable, try again
1649 * <pru_connect>:EPERM Operation not permitted
1650 * <sf_connect_out>:??? [anything a filter writer might set]
1651 */
1652 int
soconnectlock(struct socket * so,struct sockaddr * nam,int dolock)1653 soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
1654 {
1655 int error;
1656 struct proc *p = current_proc();
1657 tracker_metadata_t metadata = { };
1658
1659 if (dolock) {
1660 socket_lock(so, 1);
1661 }
1662
1663 so_update_last_owner_locked(so, p);
1664 so_update_policy(so);
1665
1666 /*
1667 * If this is a listening socket or if this is a previously-accepted
1668 * socket that has been marked as inactive, reject the connect request.
1669 */
1670 if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
1671 error = EOPNOTSUPP;
1672 if (so->so_flags & SOF_DEFUNCT) {
1673 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] "
1674 "(%d)\n", __func__, proc_pid(p),
1675 proc_best_name(p),
1676 so->so_gencnt,
1677 SOCK_DOM(so), SOCK_TYPE(so), error);
1678 }
1679 if (dolock) {
1680 socket_unlock(so, 1);
1681 }
1682 return error;
1683 }
1684
1685 if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) {
1686 if (dolock) {
1687 socket_unlock(so, 1);
1688 }
1689 return EPERM;
1690 }
1691
1692 /*
1693 * If protocol is connection-based, can only connect once.
1694 * Otherwise, if connected, try to disconnect first.
1695 * This allows user to disconnect by connecting to, e.g.,
1696 * a null address.
1697 */
1698 #if NECP
1699 bool set_domain_from_tracker_lookup = false;
1700 #endif /* NECP */
1701 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING) &&
1702 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
1703 (error = sodisconnectlocked(so)))) {
1704 error = EISCONN;
1705 } else {
1706 /*
1707 * For connected v4/v6 sockets, check if destination address associates with a domain name and if it is
1708 * a tracker domain. Mark socket accordingly. Skip lookup if socket has already been marked a tracker.
1709 */
1710 if (!(so->so_flags1 & SOF1_KNOWN_TRACKER) && IS_INET(so)) {
1711 if (tracker_lookup(so->so_flags & SOF_DELEGATED ? so->e_uuid : so->last_uuid, nam, &metadata) == 0) {
1712 if (metadata.flags & SO_TRACKER_ATTRIBUTE_FLAGS_TRACKER) {
1713 so->so_flags1 |= SOF1_KNOWN_TRACKER;
1714 }
1715 if (metadata.flags & SO_TRACKER_ATTRIBUTE_FLAGS_APP_APPROVED) {
1716 so->so_flags1 |= SOF1_APPROVED_APP_DOMAIN;
1717 }
1718 #if NECP
1719 set_domain_from_tracker_lookup = (metadata.domain[0] != 0);
1720 #endif /* NECP */
1721 necp_set_socket_domain_attributes(so,
1722 __unsafe_null_terminated_from_indexable(metadata.domain),
1723 __unsafe_null_terminated_from_indexable(metadata.domain_owner));
1724 }
1725 }
1726
1727 #if NECP
1728 /* Update NECP evaluation after setting any domain via the tracker checks */
1729 so_update_necp_policy(so, NULL, nam);
1730 if (set_domain_from_tracker_lookup && (so->so_flags1 & SOF1_DOMAIN_MATCHED_POLICY)) {
1731 // Mark extended timeout on tracker lookup to ensure that the entry stays around
1732 tracker_metadata_t update_metadata = { };
1733 update_metadata.flags = SO_TRACKER_ATTRIBUTE_FLAGS_EXTENDED_TIMEOUT;
1734 (void)tracker_lookup(so->so_flags & SOF_DELEGATED ? so->e_uuid : so->last_uuid, nam, &update_metadata);
1735 }
1736 #endif /* NECP */
1737
1738 /*
1739 * Run connect filter before calling protocol:
1740 * - non-blocking connect returns before completion;
1741 */
1742 error = sflt_connectout(so, nam);
1743 if (error != 0) {
1744 if (error == EJUSTRETURN) {
1745 error = 0;
1746 }
1747 } else {
1748 error = (*so->so_proto->pr_usrreqs->pru_connect)
1749 (so, nam, p);
1750 if (error != 0) {
1751 so->so_state &= ~SS_ISCONNECTING;
1752 }
1753 }
1754 }
1755 if (dolock) {
1756 socket_unlock(so, 1);
1757 }
1758 return error;
1759 }
1760
1761 int
soconnect(struct socket * so,struct sockaddr * nam)1762 soconnect(struct socket *so, struct sockaddr *nam)
1763 {
1764 return soconnectlock(so, nam, 1);
1765 }
1766
1767 /*
1768 * Returns: 0 Success
1769 * <pru_connect2>:EINVAL[AF_UNIX]
1770 * <pru_connect2>:EPROTOTYPE[AF_UNIX]
1771 * <pru_connect2>:??? [other protocol families]
1772 *
1773 * Notes: <pru_connect2> is not supported by [TCP].
1774 */
1775 int
soconnect2(struct socket * so1,struct socket * so2)1776 soconnect2(struct socket *so1, struct socket *so2)
1777 {
1778 int error;
1779
1780 socket_lock(so1, 1);
1781 if (so2->so_proto->pr_lock) {
1782 socket_lock(so2, 1);
1783 }
1784
1785 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
1786
1787 socket_unlock(so1, 1);
1788 if (so2->so_proto->pr_lock) {
1789 socket_unlock(so2, 1);
1790 }
1791 return error;
1792 }
1793
1794 int
soconnectxlocked(struct socket * so,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uint32_t flags,void * arg,uint32_t arglen,uio_t auio,user_ssize_t * bytes_written)1795 soconnectxlocked(struct socket *so, struct sockaddr *src,
1796 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
1797 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
1798 uint32_t arglen, uio_t auio, user_ssize_t *bytes_written)
1799 {
1800 int error;
1801 tracker_metadata_t metadata = { };
1802
1803 so_update_last_owner_locked(so, p);
1804 so_update_policy(so);
1805
1806 /*
1807 * If this is a listening socket or if this is a previously-accepted
1808 * socket that has been marked as inactive, reject the connect request.
1809 */
1810 if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
1811 error = EOPNOTSUPP;
1812 if (so->so_flags & SOF_DEFUNCT) {
1813 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] "
1814 "(%d)\n", __func__, proc_pid(p),
1815 proc_best_name(p),
1816 so->so_gencnt,
1817 SOCK_DOM(so), SOCK_TYPE(so), error);
1818 }
1819 return error;
1820 }
1821
1822 if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) {
1823 return EPERM;
1824 }
1825
1826 /*
1827 * If protocol is connection-based, can only connect once
1828 * unless PR_MULTICONN is set. Otherwise, if connected,
1829 * try to disconnect first. This allows user to disconnect
1830 * by connecting to, e.g., a null address.
1831 */
1832 #if NECP
1833 bool set_domain_from_tracker_lookup = false;
1834 #endif /* NECP */
1835 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) &&
1836 !(so->so_proto->pr_flags & PR_MULTICONN) &&
1837 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
1838 (error = sodisconnectlocked(so)) != 0)) {
1839 error = EISCONN;
1840 } else {
1841 /*
1842 * For TCP, check if destination address is a tracker and mark the socket accordingly
1843 * (only if it hasn't been marked yet).
1844 */
1845 if (SOCK_CHECK_TYPE(so, SOCK_STREAM) && SOCK_CHECK_PROTO(so, IPPROTO_TCP) &&
1846 !(so->so_flags1 & SOF1_KNOWN_TRACKER)) {
1847 if (tracker_lookup(so->so_flags & SOF_DELEGATED ? so->e_uuid : so->last_uuid, dst, &metadata) == 0) {
1848 if (metadata.flags & SO_TRACKER_ATTRIBUTE_FLAGS_TRACKER) {
1849 so->so_flags1 |= SOF1_KNOWN_TRACKER;
1850 }
1851 if (metadata.flags & SO_TRACKER_ATTRIBUTE_FLAGS_APP_APPROVED) {
1852 so->so_flags1 |= SOF1_APPROVED_APP_DOMAIN;
1853 }
1854 #if NECP
1855 set_domain_from_tracker_lookup = (metadata.domain[0] != 0);
1856 #endif /* NECP */
1857 necp_set_socket_domain_attributes(so, __unsafe_null_terminated_from_indexable(metadata.domain),
1858 __unsafe_null_terminated_from_indexable(metadata.domain_owner));
1859 }
1860 }
1861
1862 if ((so->so_proto->pr_flags & PR_DATA_IDEMPOTENT) &&
1863 (flags & CONNECT_DATA_IDEMPOTENT)) {
1864 so->so_flags1 |= SOF1_DATA_IDEMPOTENT;
1865
1866 if (flags & CONNECT_DATA_AUTHENTICATED) {
1867 so->so_flags1 |= SOF1_DATA_AUTHENTICATED;
1868 }
1869 }
1870
1871 /*
1872 * Case 1: CONNECT_RESUME_ON_READ_WRITE set, no data.
1873 * Case 2: CONNECT_RESUME_ON_READ_WRITE set, with data (user error)
1874 * Case 3: CONNECT_RESUME_ON_READ_WRITE not set, with data
1875 * Case 3 allows user to combine write with connect even if they have
1876 * no use for TFO (such as regular TCP, and UDP).
1877 * Case 4: CONNECT_RESUME_ON_READ_WRITE not set, no data (regular case)
1878 */
1879 if ((so->so_proto->pr_flags & PR_PRECONN_WRITE) &&
1880 ((flags & CONNECT_RESUME_ON_READ_WRITE) || auio)) {
1881 so->so_flags1 |= SOF1_PRECONNECT_DATA;
1882 }
1883
1884 /*
1885 * If a user sets data idempotent and does not pass an uio, or
1886 * sets CONNECT_RESUME_ON_READ_WRITE, this is an error, reset
1887 * SOF1_DATA_IDEMPOTENT.
1888 */
1889 if (!(so->so_flags1 & SOF1_PRECONNECT_DATA) &&
1890 (so->so_flags1 & SOF1_DATA_IDEMPOTENT)) {
1891 /* We should return EINVAL instead perhaps. */
1892 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
1893 }
1894
1895 /*
1896 * Run connect filter before calling protocol:
1897 * - non-blocking connect returns before completion;
1898 */
1899 error = sflt_connectout(so, dst);
1900 if (error != 0) {
1901 /* Disable PRECONNECT_DATA, as we don't need to send a SYN anymore. */
1902 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
1903 if (error == EJUSTRETURN) {
1904 error = 0;
1905 }
1906 } else {
1907 error = (*so->so_proto->pr_usrreqs->pru_connectx)
1908 (so, src, dst, p, ifscope, aid, pcid,
1909 flags, arg, arglen, auio, bytes_written);
1910 if (error != 0) {
1911 so->so_state &= ~SS_ISCONNECTING;
1912 if (error != EINPROGRESS) {
1913 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
1914 }
1915 }
1916
1917 #if NECP
1918 if (set_domain_from_tracker_lookup && (so->so_flags1 & SOF1_DOMAIN_MATCHED_POLICY)) {
1919 // Mark extended timeout on tracker lookup to ensure that the entry stays around
1920 tracker_metadata_t update_metadata = { };
1921 update_metadata.flags = SO_TRACKER_ATTRIBUTE_FLAGS_EXTENDED_TIMEOUT;
1922 (void)tracker_lookup(so->so_flags & SOF_DELEGATED ? so->e_uuid : so->last_uuid, dst, &update_metadata);
1923 }
1924 #endif /* NECP */
1925 }
1926 }
1927
1928 return error;
1929 }
1930
1931 int
sodisconnectlocked(struct socket * so)1932 sodisconnectlocked(struct socket *so)
1933 {
1934 int error;
1935
1936 if ((so->so_state & SS_ISCONNECTED) == 0) {
1937 error = ENOTCONN;
1938 goto bad;
1939 }
1940 if (so->so_state & SS_ISDISCONNECTING) {
1941 error = EALREADY;
1942 goto bad;
1943 }
1944
1945 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
1946 if (error == 0) {
1947 sflt_notify(so, sock_evt_disconnected, NULL);
1948 }
1949
1950 bad:
1951 return error;
1952 }
1953
1954 /* Locking version */
1955 int
sodisconnect(struct socket * so)1956 sodisconnect(struct socket *so)
1957 {
1958 int error;
1959
1960 socket_lock(so, 1);
1961 error = sodisconnectlocked(so);
1962 socket_unlock(so, 1);
1963 return error;
1964 }
1965
1966 int
sodisconnectxlocked(struct socket * so,sae_associd_t aid,sae_connid_t cid)1967 sodisconnectxlocked(struct socket *so, sae_associd_t aid, sae_connid_t cid)
1968 {
1969 int error;
1970
1971 /*
1972 * Call the protocol disconnectx handler; let it handle all
1973 * matters related to the connection state of this session.
1974 */
1975 error = (*so->so_proto->pr_usrreqs->pru_disconnectx)(so, aid, cid);
1976 if (error == 0) {
1977 /*
1978 * The event applies only for the session, not for
1979 * the disconnection of individual subflows.
1980 */
1981 if (so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) {
1982 sflt_notify(so, sock_evt_disconnected, NULL);
1983 }
1984 }
1985 return error;
1986 }
1987
1988 int
sodisconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid)1989 sodisconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
1990 {
1991 int error;
1992
1993 socket_lock(so, 1);
1994 error = sodisconnectxlocked(so, aid, cid);
1995 socket_unlock(so, 1);
1996 return error;
1997 }
1998
1999 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
2000
2001 /*
2002 * sosendcheck will lock the socket buffer if it isn't locked and
2003 * verify that there is space for the data being inserted.
2004 *
2005 * Returns: 0 Success
2006 * EPIPE
2007 * sblock:EWOULDBLOCK
2008 * sblock:EINTR
2009 * sbwait:EBADF
2010 * sbwait:EINTR
2011 * [so_error]:???
2012 */
2013 int
sosendcheck(struct socket * so,struct sockaddr * addr,user_ssize_t resid,int32_t clen,int32_t atomic,int flags,int * sblocked)2014 sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid,
2015 int32_t clen, int32_t atomic, int flags, int *sblocked)
2016 {
2017 int error = 0;
2018 int32_t space;
2019 int assumelock = 0;
2020
2021 restart:
2022 if (*sblocked == 0) {
2023 if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
2024 so->so_send_filt_thread != 0 &&
2025 so->so_send_filt_thread == current_thread()) {
2026 /*
2027 * We're being called recursively from a filter,
2028 * allow this to continue. Radar 4150520.
2029 * Don't set sblocked because we don't want
2030 * to perform an unlock later.
2031 */
2032 assumelock = 1;
2033 } else {
2034 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
2035 if (error) {
2036 if (so->so_flags & SOF_DEFUNCT) {
2037 goto defunct;
2038 }
2039 return error;
2040 }
2041 *sblocked = 1;
2042 }
2043 }
2044
2045 /*
2046 * If a send attempt is made on a socket that has been marked
2047 * as inactive (disconnected), reject the request.
2048 */
2049 if (so->so_flags & SOF_DEFUNCT) {
2050 defunct:
2051 error = EPIPE;
2052 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] (%d)\n",
2053 __func__, proc_selfpid(), proc_best_name(current_proc()),
2054 so->so_gencnt,
2055 SOCK_DOM(so), SOCK_TYPE(so), error);
2056 return error;
2057 }
2058
2059 if (so->so_state & SS_CANTSENDMORE) {
2060 #if CONTENT_FILTER
2061 /*
2062 * Can re-inject data of half closed connections
2063 */
2064 if ((so->so_state & SS_ISDISCONNECTED) == 0 &&
2065 so->so_snd.sb_cfil_thread == current_thread() &&
2066 cfil_sock_data_pending(&so->so_snd) != 0) {
2067 CFIL_LOG(LOG_INFO,
2068 "so %llx ignore SS_CANTSENDMORE",
2069 (uint64_t)DEBUG_KERNEL_ADDRPERM(so));
2070 } else
2071 #endif /* CONTENT_FILTER */
2072 return EPIPE;
2073 }
2074 if (so->so_error) {
2075 error = so->so_error;
2076 so->so_error = 0;
2077 return error;
2078 }
2079
2080 if ((so->so_state & SS_ISCONNECTED) == 0) {
2081 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
2082 if (((so->so_state & SS_ISCONFIRMING) == 0) &&
2083 (resid != 0 || clen == 0) &&
2084 !(so->so_flags1 & SOF1_PRECONNECT_DATA)) {
2085 return ENOTCONN;
2086 }
2087 } else if (addr == 0) {
2088 return (so->so_proto->pr_flags & PR_CONNREQUIRED) ?
2089 ENOTCONN : EDESTADDRREQ;
2090 }
2091 }
2092
2093 space = sbspace(&so->so_snd);
2094
2095 if (flags & MSG_OOB) {
2096 space += 1024;
2097 }
2098 if ((atomic && resid > so->so_snd.sb_hiwat) ||
2099 clen > so->so_snd.sb_hiwat) {
2100 return EMSGSIZE;
2101 }
2102
2103 if ((space < resid + clen &&
2104 (atomic || (space < (int32_t)so->so_snd.sb_lowat) ||
2105 space < clen)) ||
2106 (so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) {
2107 /*
2108 * don't block the connectx call when there's more data
2109 * than can be copied.
2110 */
2111 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2112 if (space == 0) {
2113 return EWOULDBLOCK;
2114 }
2115 if (space < (int32_t)so->so_snd.sb_lowat) {
2116 return 0;
2117 }
2118 }
2119 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) ||
2120 assumelock) {
2121 return EWOULDBLOCK;
2122 }
2123 sbunlock(&so->so_snd, TRUE); /* keep socket locked */
2124 *sblocked = 0;
2125 error = sbwait(&so->so_snd);
2126 if (error) {
2127 if (so->so_flags & SOF_DEFUNCT) {
2128 goto defunct;
2129 }
2130 return error;
2131 }
2132 goto restart;
2133 }
2134 return 0;
2135 }
2136
2137 /*
2138 * Send on a socket.
2139 * If send must go all at once and message is larger than
2140 * send buffering, then hard error.
2141 * Lock against other senders.
2142 * If must go all at once and not enough room now, then
2143 * inform user that this would block and do nothing.
2144 * Otherwise, if nonblocking, send as much as possible.
2145 * The data to be sent is described by "uio" if nonzero,
2146 * otherwise by the mbuf chain "top" (which must be null
2147 * if uio is not). Data provided in mbuf chain must be small
2148 * enough to send all at once.
2149 *
2150 * Returns nonzero on error, timeout or signal; callers
2151 * must check for short counts if EINTR/ERESTART are returned.
2152 * Data and control buffers are freed on return.
2153 *
2154 * Returns: 0 Success
2155 * EOPNOTSUPP
2156 * EINVAL
2157 * ENOBUFS
2158 * uiomove:EFAULT
2159 * sosendcheck:EPIPE
2160 * sosendcheck:EWOULDBLOCK
2161 * sosendcheck:EINTR
2162 * sosendcheck:EBADF
2163 * sosendcheck:EINTR
2164 * sosendcheck:??? [value from so_error]
2165 * <pru_send>:ECONNRESET[TCP]
2166 * <pru_send>:EINVAL[TCP]
2167 * <pru_send>:ENOBUFS[TCP]
2168 * <pru_send>:EADDRINUSE[TCP]
2169 * <pru_send>:EADDRNOTAVAIL[TCP]
2170 * <pru_send>:EAFNOSUPPORT[TCP]
2171 * <pru_send>:EACCES[TCP]
2172 * <pru_send>:EAGAIN[TCP]
2173 * <pru_send>:EPERM[TCP]
2174 * <pru_send>:EMSGSIZE[TCP]
2175 * <pru_send>:EHOSTUNREACH[TCP]
2176 * <pru_send>:ENETUNREACH[TCP]
2177 * <pru_send>:ENETDOWN[TCP]
2178 * <pru_send>:ENOMEM[TCP]
2179 * <pru_send>:ENOBUFS[TCP]
2180 * <pru_send>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
2181 * <pru_send>:EINVAL[AF_UNIX]
2182 * <pru_send>:EOPNOTSUPP[AF_UNIX]
2183 * <pru_send>:EPIPE[AF_UNIX]
2184 * <pru_send>:ENOTCONN[AF_UNIX]
2185 * <pru_send>:EISCONN[AF_UNIX]
2186 * <pru_send>:???[AF_UNIX] [whatever a filter author chooses]
2187 * <sf_data_out>:??? [whatever a filter author chooses]
2188 *
2189 * Notes: Other <pru_send> returns depend on the protocol family; all
2190 * <sf_data_out> returns depend on what the filter author causes
2191 * their filter to return.
2192 */
2193 int
sosend(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags)2194 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
2195 struct mbuf *top, struct mbuf *control, int flags)
2196 {
2197 mbuf_ref_ref_t mp;
2198 mbuf_ref_t m, freelist = NULL;
2199 struct soflow_hash_entry *__single dgram_flow_entry = NULL;
2200 user_ssize_t space, len, resid, orig_resid;
2201 int clen = 0, error, dontroute, sendflags;
2202 int atomic = sosendallatonce(so) || top;
2203 int sblocked = 0;
2204 struct proc *p = current_proc();
2205 uint16_t headroom = 0;
2206 ssize_t mlen;
2207 boolean_t en_tracing = FALSE;
2208
2209 if (uio != NULL) {
2210 resid = uio_resid(uio);
2211 } else {
2212 resid = top->m_pkthdr.len;
2213 }
2214 orig_resid = resid;
2215
2216 KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START), so, resid,
2217 so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
2218
2219 socket_lock(so, 1);
2220
2221 if (NEED_DGRAM_FLOW_TRACKING(so)) {
2222 dgram_flow_entry = soflow_get_flow(so, NULL, addr, control, resid, SOFLOW_DIRECTION_OUTBOUND, 0);
2223 }
2224
2225 /*
2226 * trace if tracing & network (vs. unix) sockets & and
2227 * non-loopback
2228 */
2229 if (ENTR_SHOULDTRACE &&
2230 (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
2231 struct inpcb *inp = sotoinpcb(so);
2232 if (inp->inp_last_outifp != NULL &&
2233 !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
2234 en_tracing = TRUE;
2235 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START,
2236 VM_KERNEL_ADDRPERM(so),
2237 ((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0),
2238 (int64_t)resid);
2239 }
2240 }
2241
2242 /*
2243 * Re-injection should not affect process accounting
2244 */
2245 if ((flags & MSG_SKIPCFIL) == 0) {
2246 so_update_last_owner_locked(so, p);
2247 so_update_policy(so);
2248
2249 #if NECP
2250 so_update_necp_policy(so, NULL, addr);
2251 #endif /* NECP */
2252 }
2253
2254 if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) {
2255 error = EOPNOTSUPP;
2256 goto out_locked;
2257 }
2258
2259 /*
2260 * In theory resid should be unsigned.
2261 * However, space must be signed, as it might be less than 0
2262 * if we over-committed, and we must use a signed comparison
2263 * of space and resid. On the other hand, a negative resid
2264 * causes us to loop sending 0-length segments to the protocol.
2265 *
2266 * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets.
2267 *
2268 * Note: We limit resid to be a positive int value as we use
2269 * imin() to set bytes_to_copy -- radr://14558484
2270 */
2271 if (resid < 0 || resid > INT_MAX ||
2272 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
2273 error = EINVAL;
2274 goto out_locked;
2275 }
2276
2277 dontroute = (flags & MSG_DONTROUTE) &&
2278 (so->so_options & SO_DONTROUTE) == 0 &&
2279 (so->so_proto->pr_flags & PR_ATOMIC);
2280 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
2281
2282 if (control != NULL) {
2283 clen = control->m_len;
2284 }
2285
2286 if (soreserveheadroom != 0) {
2287 headroom = so->so_pktheadroom;
2288 }
2289
2290 do {
2291 error = sosendcheck(so, addr, resid, clen, atomic, flags,
2292 &sblocked);
2293 if (error) {
2294 goto out_locked;
2295 }
2296
2297 mp = ⊤
2298 space = sbspace(&so->so_snd) - clen;
2299 space += ((flags & MSG_OOB) ? 1024 : 0);
2300
2301 do {
2302 if (uio == NULL) {
2303 /*
2304 * Data is prepackaged in "top".
2305 */
2306 resid = 0;
2307 if (flags & MSG_EOR) {
2308 top->m_flags |= M_EOR;
2309 }
2310 } else {
2311 int chainlength;
2312 int bytes_to_copy;
2313 boolean_t jumbocl;
2314 boolean_t bigcl;
2315 int bytes_to_alloc;
2316
2317 bytes_to_copy = imin((int)resid, (int)space);
2318
2319 bytes_to_alloc = bytes_to_copy;
2320 if (top == NULL) {
2321 bytes_to_alloc += headroom;
2322 }
2323
2324 if (sosendminchain > 0) {
2325 chainlength = 0;
2326 } else {
2327 chainlength = sosendmaxchain;
2328 }
2329
2330 /*
2331 * Use big 4 KB cluster when the outgoing interface
2332 * does not prefer 2 KB clusters
2333 */
2334 bigcl = !(so->so_flags1 & SOF1_IF_2KCL) ||
2335 sosendbigcl_ignore_capab;
2336
2337 /*
2338 * Attempt to use larger than system page-size
2339 * clusters for large writes only if there is
2340 * a jumbo cluster pool and if the socket is
2341 * marked accordingly.
2342 */
2343 jumbocl = sosendjcl && njcl > 0 &&
2344 ((so->so_flags & SOF_MULTIPAGES) ||
2345 sosendjcl_ignore_capab) &&
2346 bigcl;
2347
2348 socket_unlock(so, 0);
2349
2350 do {
2351 int num_needed;
2352 int hdrs_needed = (top == NULL) ? 1 : 0;
2353
2354 /*
2355 * try to maintain a local cache of mbuf
2356 * clusters needed to complete this
2357 * write the list is further limited to
2358 * the number that are currently needed
2359 * to fill the socket this mechanism
2360 * allows a large number of mbufs/
2361 * clusters to be grabbed under a single
2362 * mbuf lock... if we can't get any
2363 * clusters, than fall back to trying
2364 * for mbufs if we fail early (or
2365 * miscalcluate the number needed) make
2366 * sure to release any clusters we
2367 * haven't yet consumed.
2368 */
2369 if (freelist == NULL &&
2370 bytes_to_alloc > MBIGCLBYTES &&
2371 jumbocl) {
2372 num_needed =
2373 bytes_to_alloc / M16KCLBYTES;
2374
2375 if ((bytes_to_alloc -
2376 (num_needed * M16KCLBYTES))
2377 >= MINCLSIZE) {
2378 num_needed++;
2379 }
2380
2381 freelist =
2382 m_getpackets_internal(
2383 (unsigned int *)&num_needed,
2384 hdrs_needed, M_WAIT, 0,
2385 M16KCLBYTES);
2386 /*
2387 * Fall back to 4K cluster size
2388 * if allocation failed
2389 */
2390 }
2391
2392 if (freelist == NULL &&
2393 bytes_to_alloc > MCLBYTES &&
2394 bigcl) {
2395 num_needed =
2396 bytes_to_alloc / MBIGCLBYTES;
2397
2398 if ((bytes_to_alloc -
2399 (num_needed * MBIGCLBYTES)) >=
2400 MINCLSIZE) {
2401 num_needed++;
2402 }
2403
2404 freelist =
2405 m_getpackets_internal(
2406 (unsigned int *)&num_needed,
2407 hdrs_needed, M_WAIT, 0,
2408 MBIGCLBYTES);
2409 /*
2410 * Fall back to cluster size
2411 * if allocation failed
2412 */
2413 }
2414
2415 /*
2416 * Allocate a cluster as we want to
2417 * avoid to split the data in more
2418 * that one segment and using MINCLSIZE
2419 * would lead us to allocate two mbufs
2420 */
2421 if (soreserveheadroom != 0 &&
2422 freelist == NULL &&
2423 ((top == NULL &&
2424 bytes_to_alloc > _MHLEN) ||
2425 bytes_to_alloc > _MLEN)) {
2426 num_needed = ROUNDUP(bytes_to_alloc, MCLBYTES) /
2427 MCLBYTES;
2428 freelist =
2429 m_getpackets_internal(
2430 (unsigned int *)&num_needed,
2431 hdrs_needed, M_WAIT, 0,
2432 MCLBYTES);
2433 /*
2434 * Fall back to a single mbuf
2435 * if allocation failed
2436 */
2437 } else if (freelist == NULL &&
2438 bytes_to_alloc > MINCLSIZE) {
2439 num_needed =
2440 bytes_to_alloc / MCLBYTES;
2441
2442 if ((bytes_to_alloc -
2443 (num_needed * MCLBYTES)) >=
2444 MINCLSIZE) {
2445 num_needed++;
2446 }
2447
2448 freelist =
2449 m_getpackets_internal(
2450 (unsigned int *)&num_needed,
2451 hdrs_needed, M_WAIT, 0,
2452 MCLBYTES);
2453 /*
2454 * Fall back to a single mbuf
2455 * if allocation failed
2456 */
2457 }
2458 /*
2459 * For datagram protocols, leave
2460 * headroom for protocol headers
2461 * in the first cluster of the chain
2462 */
2463 if (freelist != NULL && atomic &&
2464 top == NULL && headroom > 0) {
2465 freelist->m_data += headroom;
2466 }
2467
2468 /*
2469 * Fall back to regular mbufs without
2470 * reserving the socket headroom
2471 */
2472 if (freelist == NULL) {
2473 if (SOCK_TYPE(so) != SOCK_STREAM || bytes_to_alloc <= MINCLSIZE) {
2474 if (top == NULL) {
2475 MGETHDR(freelist,
2476 M_WAIT, MT_DATA);
2477 } else {
2478 MGET(freelist,
2479 M_WAIT, MT_DATA);
2480 }
2481 }
2482
2483 if (freelist == NULL) {
2484 error = ENOBUFS;
2485 socket_lock(so, 0);
2486 goto out_locked;
2487 }
2488 /*
2489 * For datagram protocols,
2490 * leave room for protocol
2491 * headers in first mbuf.
2492 */
2493 if (atomic && top == NULL &&
2494 bytes_to_copy > 0 &&
2495 bytes_to_copy < MHLEN) {
2496 MH_ALIGN(freelist,
2497 bytes_to_copy);
2498 }
2499 }
2500 m = freelist;
2501 freelist = m->m_next;
2502 m->m_next = NULL;
2503
2504 if ((m->m_flags & M_EXT)) {
2505 mlen = m->m_ext.ext_size -
2506 M_LEADINGSPACE(m);
2507 } else if ((m->m_flags & M_PKTHDR)) {
2508 mlen = MHLEN - M_LEADINGSPACE(m);
2509 m_add_crumb(m, PKT_CRUMB_SOSEND);
2510 } else {
2511 mlen = MLEN - M_LEADINGSPACE(m);
2512 }
2513 len = imin((int)mlen, bytes_to_copy);
2514
2515 chainlength += len;
2516
2517 space -= len;
2518
2519 error = uiomove(mtod(m, caddr_t),
2520 (int)len, uio);
2521
2522 resid = uio_resid(uio);
2523
2524 m->m_len = (int32_t)len;
2525 *mp = m;
2526 top->m_pkthdr.len += len;
2527 if (error) {
2528 break;
2529 }
2530 mp = &m->m_next;
2531 if (resid <= 0) {
2532 if (flags & MSG_EOR) {
2533 top->m_flags |= M_EOR;
2534 }
2535 break;
2536 }
2537 bytes_to_copy = imin((int)resid, (int)space);
2538 } while (space > 0 &&
2539 (chainlength < sosendmaxchain || atomic ||
2540 resid < MINCLSIZE));
2541
2542 socket_lock(so, 0);
2543
2544 if (error) {
2545 goto out_locked;
2546 }
2547 }
2548
2549 if (dontroute) {
2550 so->so_options |= SO_DONTROUTE;
2551 }
2552
2553 /*
2554 * Compute flags here, for pru_send and NKEs
2555 *
2556 * If the user set MSG_EOF, the protocol
2557 * understands this flag and nothing left to
2558 * send then use PRU_SEND_EOF instead of PRU_SEND.
2559 */
2560 sendflags = (flags & MSG_OOB) ? PRUS_OOB :
2561 ((flags & MSG_EOF) &&
2562 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
2563 (resid <= 0)) ? PRUS_EOF :
2564 /* If there is more to send set PRUS_MORETOCOME */
2565 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
2566
2567 if ((flags & MSG_SKIPCFIL) == 0) {
2568 /*
2569 * Socket filter processing
2570 */
2571 error = sflt_data_out(so, addr, &top,
2572 &control, (sendflags & MSG_OOB) ?
2573 sock_data_filt_flag_oob : 0);
2574 if (error) {
2575 if (error == EJUSTRETURN) {
2576 error = 0;
2577 goto packet_consumed;
2578 }
2579 goto out_locked;
2580 }
2581 #if CONTENT_FILTER
2582 /*
2583 * Content filter processing
2584 */
2585 error = cfil_sock_data_out(so, addr, top,
2586 control, sendflags, dgram_flow_entry);
2587 if (error) {
2588 if (error == EJUSTRETURN) {
2589 error = 0;
2590 goto packet_consumed;
2591 }
2592 goto out_locked;
2593 }
2594 #endif /* CONTENT_FILTER */
2595 }
2596 error = (*so->so_proto->pr_usrreqs->pru_send)
2597 (so, sendflags, top, addr, control, p);
2598
2599 packet_consumed:
2600 if (dontroute) {
2601 so->so_options &= ~SO_DONTROUTE;
2602 }
2603
2604 clen = 0;
2605 control = NULL;
2606 top = NULL;
2607 mp = ⊤
2608 if (error) {
2609 goto out_locked;
2610 }
2611 } while (resid && space > 0);
2612 } while (resid);
2613
2614
2615 out_locked:
2616 if (resid > orig_resid) {
2617 char pname[MAXCOMLEN] = {};
2618 pid_t current_pid = proc_pid(current_proc());
2619 proc_name(current_pid, pname, sizeof(pname));
2620
2621 if (sosend_assert_panic != 0) {
2622 panic("sosend so %p resid %lld > orig_resid %lld proc %s:%d",
2623 so, resid, orig_resid, pname, current_pid);
2624 } else {
2625 os_log_error(OS_LOG_DEFAULT, "sosend: so_gencnt %llu resid %lld > orig_resid %lld proc %s:%d",
2626 so->so_gencnt, resid, orig_resid, pname, current_pid);
2627 }
2628 }
2629
2630 if (sblocked) {
2631 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
2632 } else {
2633 socket_unlock(so, 1);
2634 }
2635 if (top != NULL) {
2636 m_freem(top);
2637 }
2638 if (control != NULL) {
2639 m_freem(control);
2640 }
2641 if (freelist != NULL) {
2642 m_freem_list(freelist);
2643 }
2644
2645 if (dgram_flow_entry != NULL) {
2646 soflow_free_flow(dgram_flow_entry);
2647 }
2648
2649 soclearfastopen(so);
2650
2651 if (en_tracing) {
2652 /* resid passed here is the bytes left in uio */
2653 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END,
2654 VM_KERNEL_ADDRPERM(so),
2655 ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
2656 (int64_t)(orig_resid - resid));
2657 }
2658 KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid,
2659 so->so_snd.sb_cc, space, error);
2660
2661 return error;
2662 }
2663
2664 int
sosend_reinject(struct socket * so,struct sockaddr * addr,struct mbuf * top,struct mbuf * control,uint32_t sendflags)2665 sosend_reinject(struct socket *so, struct sockaddr *addr, struct mbuf *top, struct mbuf *control, uint32_t sendflags)
2666 {
2667 struct mbuf *m0 = NULL, *control_end = NULL;
2668
2669 socket_lock_assert_owned(so);
2670
2671 /*
2672 * top must points to mbuf chain to be sent.
2673 * If control is not NULL, top must be packet header
2674 */
2675 VERIFY(top != NULL &&
2676 (control == NULL || top->m_flags & M_PKTHDR));
2677
2678 /*
2679 * If control is not passed in, see if we can get it
2680 * from top.
2681 */
2682 if (control == NULL && (top->m_flags & M_PKTHDR) == 0) {
2683 // Locate start of control if present and start of data
2684 for (m0 = top; m0 != NULL; m0 = m0->m_next) {
2685 if (m0->m_flags & M_PKTHDR) {
2686 top = m0;
2687 break;
2688 } else if (m0->m_type == MT_CONTROL) {
2689 if (control == NULL) {
2690 // Found start of control
2691 control = m0;
2692 }
2693 if (control != NULL && m0->m_next != NULL && m0->m_next->m_type != MT_CONTROL) {
2694 // Found end of control
2695 control_end = m0;
2696 }
2697 }
2698 }
2699 if (control_end != NULL) {
2700 control_end->m_next = NULL;
2701 }
2702 }
2703
2704 int error = (*so->so_proto->pr_usrreqs->pru_send)
2705 (so, sendflags, top, addr, control, current_proc());
2706
2707 return error;
2708 }
2709
2710 static struct mbuf *
mbuf_detach_control_from_list(struct mbuf ** mp,struct mbuf ** last_control)2711 mbuf_detach_control_from_list(struct mbuf **mp, struct mbuf **last_control)
2712 {
2713 struct mbuf *control = NULL;
2714 struct mbuf *m = *mp;
2715
2716 if (m->m_type == MT_CONTROL) {
2717 struct mbuf *control_end;
2718 struct mbuf *n;
2719
2720 n = control_end = control = m;
2721
2722 /*
2723 * Break the chain per mbuf type
2724 */
2725 while (n != NULL && n->m_type == MT_CONTROL) {
2726 control_end = n;
2727 n = n->m_next;
2728 }
2729 control_end->m_next = NULL;
2730 *mp = n;
2731 if (last_control != NULL) {
2732 *last_control = control_end;
2733 }
2734 }
2735 VERIFY(*mp != NULL);
2736
2737 return control;
2738 }
2739
2740 /*
2741 * Supported only connected sockets (no address) without ancillary data
2742 * (control mbuf) for atomic protocols
2743 */
2744 int
sosend_list(struct socket * so,struct mbuf * pktlist,size_t total_len,u_int * pktcnt,int flags)2745 sosend_list(struct socket *so, struct mbuf *pktlist, size_t total_len, u_int *pktcnt, int flags)
2746 {
2747 mbuf_ref_t m, control = NULL;
2748 struct soflow_hash_entry *__single dgram_flow_entry = NULL;
2749 int error, dontroute;
2750 int atomic = sosendallatonce(so);
2751 int sblocked = 0;
2752 struct proc *p = current_proc();
2753 struct mbuf *top = pktlist;
2754 bool skip_filt = (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) || (flags & MSG_SKIPCFIL);
2755
2756 KERNEL_DEBUG((DBG_FNC_SOSEND_LIST | DBG_FUNC_START), so, uiocnt,
2757 so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
2758
2759 if (so->so_type != SOCK_DGRAM) {
2760 error = EINVAL;
2761 os_log(OS_LOG_DEFAULT, "sosend_list: so->so_type != SOCK_DGRAM error %d",
2762 error);
2763 goto out;
2764 }
2765 if (atomic == 0) {
2766 error = EINVAL;
2767 os_log(OS_LOG_DEFAULT, "sosend_list: atomic == 0 error %d",
2768 error);
2769 goto out;
2770 }
2771 if ((so->so_state & SS_ISCONNECTED) == 0) {
2772 error = ENOTCONN;
2773 os_log(OS_LOG_DEFAULT, "sosend_list: SS_ISCONNECTED not set error: %d",
2774 error);
2775 goto out;
2776 }
2777 if (flags & ~(MSG_DONTWAIT | MSG_NBIO | MSG_SKIPCFIL)) {
2778 error = EINVAL;
2779 os_log(OS_LOG_DEFAULT, "sosend_list: flags 0x%x error %d",
2780 flags, error);
2781 goto out;
2782 }
2783
2784 socket_lock(so, 1);
2785 so_update_last_owner_locked(so, p);
2786 so_update_policy(so);
2787
2788 if (NEED_DGRAM_FLOW_TRACKING(so)) {
2789 dgram_flow_entry = soflow_get_flow(so, NULL, NULL, NULL, total_len, SOFLOW_DIRECTION_OUTBOUND, 0);
2790 }
2791
2792 #if NECP
2793 so_update_necp_policy(so, NULL, NULL);
2794 #endif /* NECP */
2795
2796 dontroute = (flags & MSG_DONTROUTE) &&
2797 (so->so_options & SO_DONTROUTE) == 0 &&
2798 (so->so_proto->pr_flags & PR_ATOMIC);
2799 if (dontroute) {
2800 so->so_options |= SO_DONTROUTE;
2801 }
2802
2803 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
2804
2805 error = sosendcheck(so, NULL, 0, 0, atomic, flags, &sblocked);
2806 if (error) {
2807 os_log(OS_LOG_DEFAULT, "sosend_list: sosendcheck error %d",
2808 error);
2809 goto release;
2810 }
2811
2812 if (!skip_filt) {
2813 mbuf_ref_ref_t prevnextp = NULL;
2814
2815 for (m = top; m != NULL; m = m->m_nextpkt) {
2816 mbuf_ref_t nextpkt, last_control;
2817
2818 /*
2819 * Remove packet from the list of packets
2820 */
2821 nextpkt = m->m_nextpkt;
2822 if (prevnextp != NULL) {
2823 *prevnextp = nextpkt;
2824 } else {
2825 top = nextpkt;
2826 }
2827 m->m_nextpkt = NULL;
2828
2829 /*
2830 * Break the chain per mbuf type
2831 */
2832 if (m->m_type == MT_CONTROL) {
2833 control = mbuf_detach_control_from_list(&m, &last_control);
2834 }
2835 /*
2836 * Socket filter processing
2837 */
2838 error = sflt_data_out(so, NULL, &m,
2839 &control, 0);
2840 if (error != 0 && error != EJUSTRETURN) {
2841 os_log(OS_LOG_DEFAULT, "sosend_list: sflt_data_out error %d",
2842 error);
2843 m_freem(m);
2844 goto release;
2845 }
2846
2847 #if CONTENT_FILTER
2848 if (error == 0) {
2849 /*
2850 * Content filter processing
2851 */
2852 error = cfil_sock_data_out(so, NULL, m,
2853 control, 0, dgram_flow_entry);
2854 if (error != 0 && error != EJUSTRETURN) {
2855 os_log(OS_LOG_DEFAULT, "sosend_list: cfil_sock_data_out error %d",
2856 error);
2857 m_freem(m);
2858 goto release;
2859 }
2860 }
2861 #endif /* CONTENT_FILTER */
2862 if (error == EJUSTRETURN) {
2863 /*
2864 * When swallowed by a filter, the packet is not
2865 * in the list anymore
2866 */
2867 error = 0;
2868 } else {
2869 /*
2870 * Rebuild the mbuf chain of the packet
2871 */
2872 if (control != NULL) {
2873 last_control->m_next = m;
2874 m = control;
2875 }
2876 /*
2877 * Reinsert the packet in the list of packets
2878 */
2879 m->m_nextpkt = nextpkt;
2880 if (prevnextp != NULL) {
2881 *prevnextp = m;
2882 } else {
2883 top = m;
2884 }
2885 prevnextp = &m->m_nextpkt;
2886 }
2887 control = NULL;
2888 }
2889 }
2890
2891 if (top != NULL) {
2892 if (so->so_proto->pr_usrreqs->pru_send_list != pru_send_list_notsupp) {
2893 error = (*so->so_proto->pr_usrreqs->pru_send_list)
2894 (so, top, pktcnt, flags);
2895 if (error != 0 && error != ENOBUFS) {
2896 os_log(OS_LOG_DEFAULT, "sosend_list: pru_send_list error %d",
2897 error);
2898 }
2899 top = NULL;
2900 } else {
2901 *pktcnt = 0;
2902 control = NULL;
2903 for (m = top; m != NULL; m = top) {
2904 top = m->m_nextpkt;
2905 m->m_nextpkt = NULL;
2906
2907 /*
2908 * Break the chain per mbuf type
2909 */
2910 if (m->m_type == MT_CONTROL) {
2911 control = mbuf_detach_control_from_list(&m, NULL);
2912 }
2913
2914 error = (*so->so_proto->pr_usrreqs->pru_send)
2915 (so, 0, m, NULL, control, current_proc());
2916 if (error != 0) {
2917 if (error != ENOBUFS) {
2918 os_log(OS_LOG_DEFAULT, "sosend_list: pru_send error %d",
2919 error);
2920 }
2921 control = NULL;
2922 goto release;
2923 }
2924 *pktcnt += 1;
2925 control = NULL;
2926 }
2927 }
2928 }
2929
2930 release:
2931 if (dontroute) {
2932 so->so_options &= ~SO_DONTROUTE;
2933 }
2934 if (sblocked) {
2935 sbunlock(&so->so_snd, FALSE); /* will unlock socket */
2936 } else {
2937 socket_unlock(so, 1);
2938 }
2939 out:
2940 if (control != NULL) {
2941 m_freem(control);
2942 }
2943 if (top != NULL) {
2944 if (error != ENOBUFS) {
2945 os_log(OS_LOG_DEFAULT, "sosend_list: m_freem_list(top) with error %d",
2946 error);
2947 }
2948 m_freem_list(top);
2949 }
2950
2951 if (dgram_flow_entry != NULL) {
2952 soflow_free_flow(dgram_flow_entry);
2953 }
2954
2955 KERNEL_DEBUG(DBG_FNC_SOSEND_LIST | DBG_FUNC_END, so, resid,
2956 so->so_snd.sb_cc, 0, error);
2957
2958 return error;
2959 }
2960
2961 /*
2962 * May return ERESTART when packet is dropped by MAC policy check
2963 */
2964 static int
soreceive_addr(struct proc * p,struct socket * so,struct sockaddr ** psa,struct mbuf ** maddrp,int flags,struct mbuf ** mp,struct mbuf ** nextrecordp,int canwait)2965 soreceive_addr(struct proc *p, struct socket *so, struct sockaddr **psa,
2966 struct mbuf **maddrp,
2967 int flags, struct mbuf **mp, struct mbuf **nextrecordp, int canwait)
2968 {
2969 int error = 0;
2970 struct mbuf *m = *mp;
2971 struct mbuf *nextrecord = *nextrecordp;
2972
2973 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
2974 #if CONFIG_MACF_SOCKET_SUBSET
2975 /*
2976 * Call the MAC framework for policy checking if we're in
2977 * the user process context and the socket isn't connected.
2978 */
2979 if (p != kernproc && !(so->so_state & SS_ISCONNECTED)) {
2980 struct mbuf *m0 = m;
2981 /*
2982 * Dequeue this record (temporarily) from the receive
2983 * list since we're about to drop the socket's lock
2984 * where a new record may arrive and be appended to
2985 * the list. Upon MAC policy failure, the record
2986 * will be freed. Otherwise, we'll add it back to
2987 * the head of the list. We cannot rely on SB_LOCK
2988 * because append operation uses the socket's lock.
2989 */
2990 do {
2991 m->m_nextpkt = NULL;
2992 sbfree(&so->so_rcv, m);
2993 m = m->m_next;
2994 } while (m != NULL);
2995 m = m0;
2996 so->so_rcv.sb_mb = nextrecord;
2997 SB_EMPTY_FIXUP(&so->so_rcv);
2998 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1a");
2999 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1a");
3000 socket_unlock(so, 0);
3001
3002 error = mac_socket_check_received(kauth_cred_get(), so,
3003 mtod(m, struct sockaddr *));
3004
3005 if (error != 0) {
3006 /*
3007 * MAC policy failure; free this record and
3008 * process the next record (or block until
3009 * one is available). We have adjusted sb_cc
3010 * and sb_mbcnt above so there is no need to
3011 * call sbfree() again.
3012 */
3013 m_freem(m);
3014 /*
3015 * Clear SB_LOCK but don't unlock the socket.
3016 * Process the next record or wait for one.
3017 */
3018 socket_lock(so, 0);
3019 sbunlock(&so->so_rcv, TRUE); /* stay locked */
3020 error = ERESTART;
3021 goto done;
3022 }
3023 socket_lock(so, 0);
3024 /*
3025 * If the socket has been defunct'd, drop it.
3026 */
3027 if (so->so_flags & SOF_DEFUNCT) {
3028 m_freem(m);
3029 error = ENOTCONN;
3030 goto done;
3031 }
3032 /*
3033 * Re-adjust the socket receive list and re-enqueue
3034 * the record in front of any packets which may have
3035 * been appended while we dropped the lock.
3036 */
3037 for (m = m0; m->m_next != NULL; m = m->m_next) {
3038 sballoc(&so->so_rcv, m);
3039 }
3040 sballoc(&so->so_rcv, m);
3041 if (so->so_rcv.sb_mb == NULL) {
3042 so->so_rcv.sb_lastrecord = m0;
3043 so->so_rcv.sb_mbtail = m;
3044 }
3045 m = m0;
3046 nextrecord = m->m_nextpkt = so->so_rcv.sb_mb;
3047 so->so_rcv.sb_mb = m;
3048 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1b");
3049 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1b");
3050 }
3051 #endif /* CONFIG_MACF_SOCKET_SUBSET */
3052 if (psa != NULL) {
3053 *psa = dup_sockaddr(mtod(m, struct sockaddr *), canwait);
3054 if ((*psa == NULL) && (flags & MSG_NEEDSA)) {
3055 error = EWOULDBLOCK;
3056 goto done;
3057 }
3058 } else if (maddrp != NULL) {
3059 *maddrp = m;
3060 }
3061 if (flags & MSG_PEEK) {
3062 m = m->m_next;
3063 } else {
3064 sbfree(&so->so_rcv, m);
3065 if (m->m_next == NULL && so->so_rcv.sb_cc != 0) {
3066 panic("%s: about to create invalid socketbuf",
3067 __func__);
3068 /* NOTREACHED */
3069 }
3070 if (maddrp == NULL) {
3071 MFREE(m, so->so_rcv.sb_mb);
3072 } else {
3073 so->so_rcv.sb_mb = m->m_next;
3074 m->m_next = NULL;
3075 }
3076 m = so->so_rcv.sb_mb;
3077 if (m != NULL) {
3078 m->m_nextpkt = nextrecord;
3079 } else {
3080 so->so_rcv.sb_mb = nextrecord;
3081 SB_EMPTY_FIXUP(&so->so_rcv);
3082 }
3083 }
3084 done:
3085 *mp = m;
3086 *nextrecordp = nextrecord;
3087
3088 return error;
3089 }
3090
3091 /*
3092 * When peeking SCM_RIGHTS, the actual file descriptors are not yet created
3093 * so clear the data portion in order not to leak the file pointers
3094 */
3095 static void
sopeek_scm_rights(struct mbuf * rights)3096 sopeek_scm_rights(struct mbuf *rights)
3097 {
3098 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
3099
3100 if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) {
3101 VERIFY(cm->cmsg_len <= rights->m_len);
3102 memset(cm + 1, 0, cm->cmsg_len - sizeof(*cm));
3103 }
3104 }
3105
3106 /*
3107 * Process one or more MT_CONTROL mbufs present before any data mbufs
3108 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
3109 * just copy the data; if !MSG_PEEK, we call into the protocol to
3110 * perform externalization.
3111 */
3112 static int
soreceive_ctl(struct socket * so,struct mbuf ** controlp,int flags,struct mbuf ** mp,struct mbuf ** nextrecordp)3113 soreceive_ctl(struct socket *so, struct mbuf **controlp, int flags,
3114 struct mbuf **mp, struct mbuf **nextrecordp)
3115 {
3116 int error = 0;
3117 mbuf_ref_t cm = NULL, cmn;
3118 mbuf_ref_ref_t cme = &cm;
3119 struct sockbuf *sb_rcv = &so->so_rcv;
3120 mbuf_ref_ref_t msgpcm = NULL;
3121 mbuf_ref_t m = *mp;
3122 mbuf_ref_t nextrecord = *nextrecordp;
3123 struct protosw *pr = so->so_proto;
3124
3125 /*
3126 * Externalizing the control messages would require us to
3127 * drop the socket's lock below. Once we re-acquire the
3128 * lock, the mbuf chain might change. In order to preserve
3129 * consistency, we unlink all control messages from the
3130 * first mbuf chain in one shot and link them separately
3131 * onto a different chain.
3132 */
3133 do {
3134 if (flags & MSG_PEEK) {
3135 if (controlp != NULL) {
3136 if (*controlp == NULL) {
3137 msgpcm = controlp;
3138 }
3139 *controlp = m_copy(m, 0, m->m_len);
3140
3141 /*
3142 * If we failed to allocate an mbuf,
3143 * release any previously allocated
3144 * mbufs for control data. Return
3145 * an error. Keep the mbufs in the
3146 * socket as this is using
3147 * MSG_PEEK flag.
3148 */
3149 if (*controlp == NULL) {
3150 m_freem(*msgpcm);
3151 error = ENOBUFS;
3152 goto done;
3153 }
3154
3155 if (pr->pr_domain->dom_externalize != NULL) {
3156 sopeek_scm_rights(*controlp);
3157 }
3158
3159 controlp = &(*controlp)->m_next;
3160 }
3161 m = m->m_next;
3162 } else {
3163 m->m_nextpkt = NULL;
3164 sbfree(sb_rcv, m);
3165 sb_rcv->sb_mb = m->m_next;
3166 m->m_next = NULL;
3167 *cme = m;
3168 cme = &(*cme)->m_next;
3169 m = sb_rcv->sb_mb;
3170 }
3171 } while (m != NULL && m->m_type == MT_CONTROL);
3172
3173 if (!(flags & MSG_PEEK)) {
3174 if (sb_rcv->sb_mb != NULL) {
3175 sb_rcv->sb_mb->m_nextpkt = nextrecord;
3176 } else {
3177 sb_rcv->sb_mb = nextrecord;
3178 SB_EMPTY_FIXUP(sb_rcv);
3179 }
3180 if (nextrecord == NULL) {
3181 sb_rcv->sb_lastrecord = m;
3182 }
3183 }
3184
3185 SBLASTRECORDCHK(&so->so_rcv, "soreceive ctl");
3186 SBLASTMBUFCHK(&so->so_rcv, "soreceive ctl");
3187
3188 while (cm != NULL) {
3189 int cmsg_level;
3190 int cmsg_type;
3191
3192 cmn = cm->m_next;
3193 cm->m_next = NULL;
3194 cmsg_level = mtod(cm, struct cmsghdr *)->cmsg_level;
3195 cmsg_type = mtod(cm, struct cmsghdr *)->cmsg_type;
3196
3197 /*
3198 * Call the protocol to externalize SCM_RIGHTS message
3199 * and return the modified message to the caller upon
3200 * success. Otherwise, all other control messages are
3201 * returned unmodified to the caller. Note that we
3202 * only get into this loop if MSG_PEEK is not set.
3203 */
3204 if (pr->pr_domain->dom_externalize != NULL &&
3205 cmsg_level == SOL_SOCKET &&
3206 cmsg_type == SCM_RIGHTS) {
3207 /*
3208 * Release socket lock: see 3903171. This
3209 * would also allow more records to be appended
3210 * to the socket buffer. We still have SB_LOCK
3211 * set on it, so we can be sure that the head
3212 * of the mbuf chain won't change.
3213 */
3214 socket_unlock(so, 0);
3215 error = (*pr->pr_domain->dom_externalize)(cm);
3216 socket_lock(so, 0);
3217 } else {
3218 error = 0;
3219 }
3220
3221 if (controlp != NULL && error == 0) {
3222 *controlp = cm;
3223 controlp = &(*controlp)->m_next;
3224 } else {
3225 (void) m_free(cm);
3226 }
3227 cm = cmn;
3228 }
3229 /*
3230 * Update the value of nextrecord in case we received new
3231 * records when the socket was unlocked above for
3232 * externalizing SCM_RIGHTS.
3233 */
3234 if (m != NULL) {
3235 nextrecord = sb_rcv->sb_mb->m_nextpkt;
3236 } else {
3237 nextrecord = sb_rcv->sb_mb;
3238 }
3239
3240 done:
3241 *mp = m;
3242 *nextrecordp = nextrecord;
3243
3244 return error;
3245 }
3246
3247 /*
3248 * If we have less data than requested, block awaiting more
3249 * (subject to any timeout) if:
3250 * 1. the current count is less than the low water mark, or
3251 * 2. MSG_WAITALL is set, and it is possible to do the entire
3252 * receive operation at once if we block (resid <= hiwat).
3253 * 3. MSG_DONTWAIT is not set
3254 * If MSG_WAITALL is set but resid is larger than the receive buffer,
3255 * we have to do the receive in sections, and thus risk returning
3256 * a short count if a timeout or signal occurs after we start.
3257 */
3258 static boolean_t
so_should_wait(struct socket * so,struct uio * uio,struct mbuf * m,int flags)3259 so_should_wait(struct socket *so, struct uio *uio, struct mbuf *m, int flags)
3260 {
3261 struct protosw *pr = so->so_proto;
3262
3263 /* No mbufs in the receive-queue? Wait! */
3264 if (m == NULL) {
3265 return true;
3266 }
3267
3268 /* Not enough data in the receive socket-buffer - we may have to wait */
3269 if ((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio_resid(uio) &&
3270 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0) {
3271 /*
3272 * Application did set the lowater-mark, so we should wait for
3273 * this data to be present.
3274 */
3275 if (so->so_rcv.sb_cc < so->so_rcv.sb_lowat) {
3276 return true;
3277 }
3278
3279 /*
3280 * Application wants all the data - so let's try to do the
3281 * receive-operation at once by waiting for everything to
3282 * be there.
3283 */
3284 if ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat) {
3285 return true;
3286 }
3287 }
3288
3289 return false;
3290 }
3291
3292 /*
3293 * Implement receive operations on a socket.
3294 * We depend on the way that records are added to the sockbuf
3295 * by sbappend*. In particular, each record (mbufs linked through m_next)
3296 * must begin with an address if the protocol so specifies,
3297 * followed by an optional mbuf or mbufs containing ancillary data,
3298 * and then zero or more mbufs of data.
3299 * In order to avoid blocking network interrupts for the entire time here,
3300 * we splx() while doing the actual copy to user space.
3301 * Although the sockbuf is locked, new data may still be appended,
3302 * and thus we must maintain consistency of the sockbuf during that time.
3303 *
3304 * The caller may receive the data as a single mbuf chain by supplying
3305 * an mbuf **mp0 for use in returning the chain. The uio is then used
3306 * only for the count in uio_resid.
3307 *
3308 * Returns: 0 Success
3309 * ENOBUFS
3310 * ENOTCONN
3311 * EWOULDBLOCK
3312 * uiomove:EFAULT
3313 * sblock:EWOULDBLOCK
3314 * sblock:EINTR
3315 * sbwait:EBADF
3316 * sbwait:EINTR
3317 * sodelayed_copy:EFAULT
3318 * <pru_rcvoob>:EINVAL[TCP]
3319 * <pru_rcvoob>:EWOULDBLOCK[TCP]
3320 * <pru_rcvoob>:???
3321 * <pr_domain->dom_externalize>:EMSGSIZE[AF_UNIX]
3322 * <pr_domain->dom_externalize>:ENOBUFS[AF_UNIX]
3323 * <pr_domain->dom_externalize>:???
3324 *
3325 * Notes: Additional return values from calls through <pru_rcvoob> and
3326 * <pr_domain->dom_externalize> depend on protocols other than
3327 * TCP or AF_UNIX, which are documented above.
3328 */
3329 int
soreceive(struct socket * so,struct sockaddr ** psa,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)3330 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
3331 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
3332 {
3333 mbuf_ref_t m;
3334 mbuf_ref_ref_t mp;
3335 mbuf_ref_t ml = NULL;
3336 mbuf_ref_t nextrecord, free_list;
3337 int flags, error, offset;
3338 user_ssize_t len;
3339 struct protosw *pr = so->so_proto;
3340 int moff, type = 0;
3341 user_ssize_t orig_resid = uio_resid(uio);
3342 user_ssize_t delayed_copy_len;
3343 int can_delay;
3344 struct proc *p = current_proc();
3345 boolean_t en_tracing = FALSE;
3346
3347 /*
3348 * Sanity check on the length passed by caller as we are making 'int'
3349 * comparisons
3350 */
3351 if (orig_resid < 0 || orig_resid > INT_MAX) {
3352 return EINVAL;
3353 }
3354
3355 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so,
3356 uio_resid(uio), so->so_rcv.sb_cc, so->so_rcv.sb_lowat,
3357 so->so_rcv.sb_hiwat);
3358
3359 socket_lock(so, 1);
3360 so_update_last_owner_locked(so, p);
3361 so_update_policy(so);
3362
3363 #ifdef MORE_LOCKING_DEBUG
3364 if (so->so_usecount == 1) {
3365 panic("%s: so=%x no other reference on socket", __func__, so);
3366 /* NOTREACHED */
3367 }
3368 #endif
3369 mp = mp0;
3370 if (psa != NULL) {
3371 *psa = NULL;
3372 }
3373 if (controlp != NULL) {
3374 *controlp = NULL;
3375 }
3376 if (flagsp != NULL) {
3377 flags = *flagsp & ~MSG_EOR;
3378 } else {
3379 flags = 0;
3380 }
3381
3382 /*
3383 * If a recv attempt is made on a previously-accepted socket
3384 * that has been marked as inactive (disconnected), reject
3385 * the request.
3386 */
3387 if (so->so_flags & SOF_DEFUNCT) {
3388 struct sockbuf *sb = &so->so_rcv;
3389
3390 error = ENOTCONN;
3391 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] (%d)\n",
3392 __func__, proc_pid(p), proc_best_name(p),
3393 so->so_gencnt,
3394 SOCK_DOM(so), SOCK_TYPE(so), error);
3395 /*
3396 * This socket should have been disconnected and flushed
3397 * prior to being returned from sodefunct(); there should
3398 * be no data on its receive list, so panic otherwise.
3399 */
3400 if (so->so_state & SS_DEFUNCT) {
3401 sb_empty_assert(sb, __func__);
3402 }
3403 socket_unlock(so, 1);
3404 return error;
3405 }
3406
3407 if ((so->so_flags1 & SOF1_PRECONNECT_DATA) &&
3408 pr->pr_usrreqs->pru_preconnect) {
3409 /*
3410 * A user may set the CONNECT_RESUME_ON_READ_WRITE-flag but not
3411 * calling write() right after this. *If* the app calls a read
3412 * we do not want to block this read indefinetely. Thus,
3413 * we trigger a connect so that the session gets initiated.
3414 */
3415 error = (*pr->pr_usrreqs->pru_preconnect)(so);
3416
3417 if (error) {
3418 socket_unlock(so, 1);
3419 return error;
3420 }
3421 }
3422
3423 if (ENTR_SHOULDTRACE &&
3424 (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
3425 /*
3426 * enable energy tracing for inet sockets that go over
3427 * non-loopback interfaces only.
3428 */
3429 struct inpcb *inp = sotoinpcb(so);
3430 if (inp->inp_last_outifp != NULL &&
3431 !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
3432 en_tracing = TRUE;
3433 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_START,
3434 VM_KERNEL_ADDRPERM(so),
3435 ((so->so_state & SS_NBIO) ?
3436 kEnTrFlagNonBlocking : 0),
3437 (int64_t)orig_resid);
3438 }
3439 }
3440
3441 /*
3442 * When SO_WANTOOBFLAG is set we try to get out-of-band data
3443 * regardless of the flags argument. Here is the case were
3444 * out-of-band data is not inline.
3445 */
3446 if ((flags & MSG_OOB) ||
3447 ((so->so_options & SO_WANTOOBFLAG) != 0 &&
3448 (so->so_options & SO_OOBINLINE) == 0 &&
3449 (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) {
3450 m = m_get(M_WAIT, MT_DATA);
3451 if (m == NULL) {
3452 socket_unlock(so, 1);
3453 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END,
3454 ENOBUFS, 0, 0, 0, 0);
3455 return ENOBUFS;
3456 }
3457 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
3458 if (error) {
3459 goto bad;
3460 }
3461 socket_unlock(so, 0);
3462 do {
3463 error = uiomove(mtod(m, caddr_t),
3464 imin((int)uio_resid(uio), m->m_len), uio);
3465 m = m_free(m);
3466 } while (uio_resid(uio) && error == 0 && m != NULL);
3467 socket_lock(so, 0);
3468 bad:
3469 if (m != NULL) {
3470 m_freem(m);
3471 }
3472
3473 if ((so->so_options & SO_WANTOOBFLAG) != 0) {
3474 if (error == EWOULDBLOCK || error == EINVAL) {
3475 /*
3476 * Let's try to get normal data:
3477 * EWOULDBLOCK: out-of-band data not
3478 * receive yet. EINVAL: out-of-band data
3479 * already read.
3480 */
3481 error = 0;
3482 goto nooob;
3483 } else if (error == 0 && flagsp != NULL) {
3484 *flagsp |= MSG_OOB;
3485 }
3486 }
3487 socket_unlock(so, 1);
3488 if (en_tracing) {
3489 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
3490 VM_KERNEL_ADDRPERM(so), 0,
3491 (int64_t)(orig_resid - uio_resid(uio)));
3492 }
3493 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
3494 0, 0, 0, 0);
3495
3496 return error;
3497 }
3498 nooob:
3499 if (mp != NULL) {
3500 *mp = NULL;
3501 }
3502
3503 if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) {
3504 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
3505 }
3506
3507 free_list = NULL;
3508 delayed_copy_len = 0;
3509 restart:
3510 #ifdef MORE_LOCKING_DEBUG
3511 if (so->so_usecount <= 1) {
3512 printf("soreceive: sblock so=0x%llx ref=%d on socket\n",
3513 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), so->so_usecount);
3514 }
3515 #endif
3516 /*
3517 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
3518 * and if so just return to the caller. This could happen when
3519 * soreceive() is called by a socket upcall function during the
3520 * time the socket is freed. The socket buffer would have been
3521 * locked across the upcall, therefore we cannot put this thread
3522 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
3523 * we may livelock), because the lock on the socket buffer will
3524 * only be released when the upcall routine returns to its caller.
3525 * Because the socket has been officially closed, there can be
3526 * no further read on it.
3527 *
3528 * A multipath subflow socket would have its SS_NOFDREF set by
3529 * default, so check for SOF_MP_SUBFLOW socket flag; when the
3530 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
3531 */
3532 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
3533 (SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) {
3534 socket_unlock(so, 1);
3535 return 0;
3536 }
3537
3538 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
3539 if (error) {
3540 socket_unlock(so, 1);
3541 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
3542 0, 0, 0, 0);
3543 if (en_tracing) {
3544 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
3545 VM_KERNEL_ADDRPERM(so), 0,
3546 (int64_t)(orig_resid - uio_resid(uio)));
3547 }
3548 return error;
3549 }
3550
3551 m = so->so_rcv.sb_mb;
3552 if (so_should_wait(so, uio, m, flags)) {
3553 /*
3554 * Panic if we notice inconsistencies in the socket's
3555 * receive list; both sb_mb and sb_cc should correctly
3556 * reflect the contents of the list, otherwise we may
3557 * end up with false positives during select() or poll()
3558 * which could put the application in a bad state.
3559 */
3560 SB_MB_CHECK(&so->so_rcv);
3561
3562 if (so->so_error) {
3563 if (m != NULL) {
3564 goto dontblock;
3565 }
3566 error = so->so_error;
3567 if ((flags & MSG_PEEK) == 0) {
3568 so->so_error = 0;
3569 }
3570 goto release;
3571 }
3572 if (so->so_state & SS_CANTRCVMORE) {
3573 #if CONTENT_FILTER
3574 /*
3575 * Deal with half closed connections
3576 */
3577 if ((so->so_state & SS_ISDISCONNECTED) == 0 &&
3578 cfil_sock_data_pending(&so->so_rcv) != 0) {
3579 CFIL_LOG(LOG_INFO,
3580 "so %llx ignore SS_CANTRCVMORE",
3581 (uint64_t)DEBUG_KERNEL_ADDRPERM(so));
3582 } else
3583 #endif /* CONTENT_FILTER */
3584 if (m != NULL) {
3585 goto dontblock;
3586 } else {
3587 goto release;
3588 }
3589 }
3590 for (; m != NULL; m = m->m_next) {
3591 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
3592 m = so->so_rcv.sb_mb;
3593 goto dontblock;
3594 }
3595 }
3596 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0 &&
3597 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
3598 error = ENOTCONN;
3599 goto release;
3600 }
3601 if (uio_resid(uio) == 0) {
3602 goto release;
3603 }
3604
3605 if ((so->so_state & SS_NBIO) ||
3606 (flags & (MSG_DONTWAIT | MSG_NBIO))) {
3607 error = EWOULDBLOCK;
3608 goto release;
3609 }
3610 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
3611 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
3612 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
3613 #if EVEN_MORE_LOCKING_DEBUG
3614 if (socket_debug) {
3615 printf("Waiting for socket data\n");
3616 }
3617 #endif
3618
3619 /*
3620 * Depending on the protocol (e.g. TCP), the following
3621 * might cause the socket lock to be dropped and later
3622 * be reacquired, and more data could have arrived and
3623 * have been appended to the receive socket buffer by
3624 * the time it returns. Therefore, we only sleep in
3625 * sbwait() below if and only if the wait-condition is still
3626 * true.
3627 */
3628 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) {
3629 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
3630 }
3631
3632 error = 0;
3633 if (so_should_wait(so, uio, so->so_rcv.sb_mb, flags)) {
3634 error = sbwait(&so->so_rcv);
3635 }
3636
3637 #if EVEN_MORE_LOCKING_DEBUG
3638 if (socket_debug) {
3639 printf("SORECEIVE - sbwait returned %d\n", error);
3640 }
3641 #endif
3642 if (so->so_usecount < 1) {
3643 panic("%s: after 2nd sblock so=%p ref=%d on socket",
3644 __func__, so, so->so_usecount);
3645 /* NOTREACHED */
3646 }
3647 if (error) {
3648 socket_unlock(so, 1);
3649 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,
3650 0, 0, 0, 0);
3651 if (en_tracing) {
3652 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
3653 VM_KERNEL_ADDRPERM(so), 0,
3654 (int64_t)(orig_resid - uio_resid(uio)));
3655 }
3656 return error;
3657 }
3658 goto restart;
3659 }
3660 dontblock:
3661 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
3662 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
3663 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
3664 nextrecord = m->m_nextpkt;
3665
3666 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
3667 error = soreceive_addr(p, so, psa, NULL, flags, &m, &nextrecord,
3668 mp0 == NULL);
3669 if (error == ERESTART) {
3670 goto restart;
3671 } else if (error != 0) {
3672 goto release;
3673 }
3674 orig_resid = 0;
3675 }
3676
3677 /*
3678 * Process one or more MT_CONTROL mbufs present before any data mbufs
3679 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we
3680 * just copy the data; if !MSG_PEEK, we call into the protocol to
3681 * perform externalization.
3682 */
3683 if (m != NULL && m->m_type == MT_CONTROL) {
3684 error = soreceive_ctl(so, controlp, flags, &m, &nextrecord);
3685 if (error != 0) {
3686 goto release;
3687 }
3688 orig_resid = 0;
3689 }
3690
3691 if (m != NULL) {
3692 if (!(flags & MSG_PEEK)) {
3693 /*
3694 * We get here because m points to an mbuf following
3695 * any MT_SONAME or MT_CONTROL mbufs which have been
3696 * processed above. In any case, m should be pointing
3697 * to the head of the mbuf chain, and the nextrecord
3698 * should be either NULL or equal to m->m_nextpkt.
3699 * See comments above about SB_LOCK.
3700 */
3701 if (m != so->so_rcv.sb_mb ||
3702 m->m_nextpkt != nextrecord) {
3703 panic("%s: post-control !sync so=%p m=%p "
3704 "nextrecord=%p\n", __func__, so, m,
3705 nextrecord);
3706 /* NOTREACHED */
3707 }
3708 if (nextrecord == NULL) {
3709 so->so_rcv.sb_lastrecord = m;
3710 }
3711 }
3712 type = m->m_type;
3713 if (type == MT_OOBDATA) {
3714 flags |= MSG_OOB;
3715 }
3716 } else {
3717 if (!(flags & MSG_PEEK)) {
3718 SB_EMPTY_FIXUP(&so->so_rcv);
3719 }
3720 }
3721 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
3722 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
3723
3724 moff = 0;
3725 offset = 0;
3726
3727 if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy) {
3728 can_delay = 1;
3729 } else {
3730 can_delay = 0;
3731 }
3732
3733 while (m != NULL &&
3734 (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) {
3735 if (m->m_type == MT_OOBDATA) {
3736 if (type != MT_OOBDATA) {
3737 break;
3738 }
3739 } else if (type == MT_OOBDATA) {
3740 break;
3741 }
3742
3743 if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
3744 break;
3745 }
3746 /*
3747 * Make sure to allways set MSG_OOB event when getting
3748 * out of band data inline.
3749 */
3750 if ((so->so_options & SO_WANTOOBFLAG) != 0 &&
3751 (so->so_options & SO_OOBINLINE) != 0 &&
3752 (so->so_state & SS_RCVATMARK) != 0) {
3753 flags |= MSG_OOB;
3754 }
3755 so->so_state &= ~SS_RCVATMARK;
3756 len = uio_resid(uio) - delayed_copy_len;
3757 if (so->so_oobmark && len > so->so_oobmark - offset) {
3758 len = so->so_oobmark - offset;
3759 }
3760 if (len > m->m_len - moff) {
3761 len = m->m_len - moff;
3762 }
3763 /*
3764 * If mp is set, just pass back the mbufs.
3765 * Otherwise copy them out via the uio, then free.
3766 * Sockbuf must be consistent here (points to current mbuf,
3767 * it points to next record) when we drop priority;
3768 * we must note any additions to the sockbuf when we
3769 * block interrupts again.
3770 */
3771 if (mp == NULL) {
3772 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
3773 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
3774 if (can_delay && len == m->m_len) {
3775 /*
3776 * only delay the copy if we're consuming the
3777 * mbuf and we're NOT in MSG_PEEK mode
3778 * and we have enough data to make it worthwile
3779 * to drop and retake the lock... can_delay
3780 * reflects the state of the 2 latter
3781 * constraints moff should always be zero
3782 * in these cases
3783 */
3784 delayed_copy_len += len;
3785 } else {
3786 if (delayed_copy_len) {
3787 error = sodelayed_copy(so, uio,
3788 &free_list, &delayed_copy_len);
3789
3790 if (error) {
3791 goto release;
3792 }
3793 /*
3794 * can only get here if MSG_PEEK is not
3795 * set therefore, m should point at the
3796 * head of the rcv queue; if it doesn't,
3797 * it means something drastically
3798 * changed while we were out from behind
3799 * the lock in sodelayed_copy. perhaps
3800 * a RST on the stream. in any event,
3801 * the stream has been interrupted. it's
3802 * probably best just to return whatever
3803 * data we've moved and let the caller
3804 * sort it out...
3805 */
3806 if (m != so->so_rcv.sb_mb) {
3807 break;
3808 }
3809 }
3810 socket_unlock(so, 0);
3811 error = uiomove(mtod(m, caddr_t) + moff,
3812 (int)len, uio);
3813 socket_lock(so, 0);
3814
3815 if (error) {
3816 goto release;
3817 }
3818 }
3819 } else {
3820 uio_setresid(uio, (uio_resid(uio) - len));
3821 }
3822 if (len == m->m_len - moff) {
3823 if (m->m_flags & M_EOR) {
3824 flags |= MSG_EOR;
3825 }
3826 if (flags & MSG_PEEK) {
3827 m = m->m_next;
3828 moff = 0;
3829 } else {
3830 nextrecord = m->m_nextpkt;
3831 sbfree(&so->so_rcv, m);
3832 m->m_nextpkt = NULL;
3833
3834 if (mp != NULL) {
3835 *mp = m;
3836 mp = &m->m_next;
3837 so->so_rcv.sb_mb = m = m->m_next;
3838 *mp = NULL;
3839 } else {
3840 if (free_list == NULL) {
3841 free_list = m;
3842 } else {
3843 ml->m_next = m;
3844 }
3845 ml = m;
3846 so->so_rcv.sb_mb = m = m->m_next;
3847 ml->m_next = NULL;
3848 }
3849 if (m != NULL) {
3850 m->m_nextpkt = nextrecord;
3851 if (nextrecord == NULL) {
3852 so->so_rcv.sb_lastrecord = m;
3853 }
3854 } else {
3855 so->so_rcv.sb_mb = nextrecord;
3856 SB_EMPTY_FIXUP(&so->so_rcv);
3857 }
3858 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
3859 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
3860 }
3861 } else {
3862 if (flags & MSG_PEEK) {
3863 moff += len;
3864 } else {
3865 if (mp != NULL) {
3866 int copy_flag;
3867
3868 if (flags & MSG_DONTWAIT) {
3869 copy_flag = M_DONTWAIT;
3870 } else {
3871 copy_flag = M_WAIT;
3872 }
3873 *mp = m_copym(m, 0, (int)len, copy_flag);
3874 /*
3875 * Failed to allocate an mbuf?
3876 * Adjust uio_resid back, it was
3877 * adjusted down by len bytes which
3878 * we didn't copy over.
3879 */
3880 if (*mp == NULL) {
3881 uio_setresid(uio,
3882 (uio_resid(uio) + len));
3883 break;
3884 }
3885 }
3886 m->m_data += len;
3887 m->m_len -= len;
3888 so->so_rcv.sb_cc -= len;
3889 }
3890 }
3891 if (so->so_oobmark) {
3892 if ((flags & MSG_PEEK) == 0) {
3893 so->so_oobmark -= len;
3894 if (so->so_oobmark == 0) {
3895 so->so_state |= SS_RCVATMARK;
3896 break;
3897 }
3898 } else {
3899 offset += len;
3900 if (offset == so->so_oobmark) {
3901 break;
3902 }
3903 }
3904 }
3905 if (flags & MSG_EOR) {
3906 break;
3907 }
3908 /*
3909 * If the MSG_WAITALL or MSG_WAITSTREAM flag is set
3910 * (for non-atomic socket), we must not quit until
3911 * "uio->uio_resid == 0" or an error termination.
3912 * If a signal/timeout occurs, return with a short
3913 * count but without error. Keep sockbuf locked
3914 * against other readers.
3915 */
3916 while (flags & (MSG_WAITALL | MSG_WAITSTREAM) && m == NULL &&
3917 (uio_resid(uio) - delayed_copy_len) > 0 &&
3918 !sosendallatonce(so) && !nextrecord) {
3919 if (so->so_error || ((so->so_state & SS_CANTRCVMORE)
3920 #if CONTENT_FILTER
3921 && cfil_sock_data_pending(&so->so_rcv) == 0
3922 #endif /* CONTENT_FILTER */
3923 )) {
3924 goto release;
3925 }
3926
3927 /*
3928 * Depending on the protocol (e.g. TCP), the following
3929 * might cause the socket lock to be dropped and later
3930 * be reacquired, and more data could have arrived and
3931 * have been appended to the receive socket buffer by
3932 * the time it returns. Therefore, we only sleep in
3933 * sbwait() below if and only if the socket buffer is
3934 * empty, in order to avoid a false sleep.
3935 */
3936 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) {
3937 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
3938 }
3939
3940 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
3941 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
3942
3943 if (so->so_rcv.sb_mb == NULL && sbwait(&so->so_rcv)) {
3944 error = 0;
3945 goto release;
3946 }
3947 /*
3948 * have to wait until after we get back from the sbwait
3949 * to do the copy because we will drop the lock if we
3950 * have enough data that has been delayed... by dropping
3951 * the lock we open up a window allowing the netisr
3952 * thread to process the incoming packets and to change
3953 * the state of this socket... we're issuing the sbwait
3954 * because the socket is empty and we're expecting the
3955 * netisr thread to wake us up when more packets arrive;
3956 * if we allow that processing to happen and then sbwait
3957 * we could stall forever with packets sitting in the
3958 * socket if no further packets arrive from the remote
3959 * side.
3960 *
3961 * we want to copy before we've collected all the data
3962 * to satisfy this request to allow the copy to overlap
3963 * the incoming packet processing on an MP system
3964 */
3965 if (delayed_copy_len > sorecvmincopy &&
3966 (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) {
3967 error = sodelayed_copy(so, uio,
3968 &free_list, &delayed_copy_len);
3969
3970 if (error) {
3971 goto release;
3972 }
3973 }
3974 m = so->so_rcv.sb_mb;
3975 if (m != NULL) {
3976 nextrecord = m->m_nextpkt;
3977 }
3978 SB_MB_CHECK(&so->so_rcv);
3979 }
3980 }
3981 #ifdef MORE_LOCKING_DEBUG
3982 if (so->so_usecount <= 1) {
3983 panic("%s: after big while so=%p ref=%d on socket",
3984 __func__, so, so->so_usecount);
3985 /* NOTREACHED */
3986 }
3987 #endif
3988
3989 if (m != NULL && pr->pr_flags & PR_ATOMIC) {
3990 if (so->so_options & SO_DONTTRUNC) {
3991 flags |= MSG_RCVMORE;
3992 } else {
3993 flags |= MSG_TRUNC;
3994 if ((flags & MSG_PEEK) == 0) {
3995 (void) sbdroprecord(&so->so_rcv);
3996 }
3997 }
3998 }
3999
4000 /*
4001 * pru_rcvd below (for TCP) may cause more data to be received
4002 * if the socket lock is dropped prior to sending the ACK; some
4003 * legacy OpenTransport applications don't handle this well
4004 * (if it receives less data than requested while MSG_HAVEMORE
4005 * is set), and so we set the flag now based on what we know
4006 * prior to calling pru_rcvd.
4007 */
4008 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) {
4009 flags |= MSG_HAVEMORE;
4010 }
4011
4012 if ((flags & MSG_PEEK) == 0) {
4013 if (m == NULL) {
4014 so->so_rcv.sb_mb = nextrecord;
4015 /*
4016 * First part is an inline SB_EMPTY_FIXUP(). Second
4017 * part makes sure sb_lastrecord is up-to-date if
4018 * there is still data in the socket buffer.
4019 */
4020 if (so->so_rcv.sb_mb == NULL) {
4021 so->so_rcv.sb_mbtail = NULL;
4022 so->so_rcv.sb_lastrecord = NULL;
4023 } else if (nextrecord->m_nextpkt == NULL) {
4024 so->so_rcv.sb_lastrecord = nextrecord;
4025 }
4026 SB_MB_CHECK(&so->so_rcv);
4027 }
4028 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
4029 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
4030 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) {
4031 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
4032 }
4033 }
4034
4035 if (delayed_copy_len) {
4036 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
4037 if (error) {
4038 goto release;
4039 }
4040 }
4041 if (free_list != NULL) {
4042 m_freem_list(free_list);
4043 free_list = NULL;
4044 }
4045
4046 if (orig_resid == uio_resid(uio) && orig_resid &&
4047 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
4048 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
4049 goto restart;
4050 }
4051
4052 if (flagsp != NULL) {
4053 *flagsp |= flags;
4054 }
4055 release:
4056 #ifdef MORE_LOCKING_DEBUG
4057 if (so->so_usecount <= 1) {
4058 panic("%s: release so=%p ref=%d on socket", __func__,
4059 so, so->so_usecount);
4060 /* NOTREACHED */
4061 }
4062 #endif
4063 if (delayed_copy_len) {
4064 error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len);
4065 }
4066
4067 if (free_list != NULL) {
4068 m_freem_list(free_list);
4069 }
4070
4071 sbunlock(&so->so_rcv, FALSE); /* will unlock socket */
4072
4073 if (en_tracing) {
4074 KERNEL_ENERGYTRACE(kEnTrActKernSockRead, DBG_FUNC_END,
4075 VM_KERNEL_ADDRPERM(so),
4076 ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
4077 (int64_t)(orig_resid - uio_resid(uio)));
4078 }
4079 KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, so, uio_resid(uio),
4080 so->so_rcv.sb_cc, 0, error);
4081
4082 return error;
4083 }
4084
4085 /*
4086 * Returns: 0 Success
4087 * uiomove:EFAULT
4088 */
4089 static int
sodelayed_copy(struct socket * so,struct uio * uio,struct mbuf ** free_list,user_ssize_t * resid)4090 sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list,
4091 user_ssize_t *resid)
4092 {
4093 int error = 0;
4094 struct mbuf *m;
4095
4096 m = *free_list;
4097
4098 socket_unlock(so, 0);
4099
4100 while (m != NULL && error == 0) {
4101 error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio);
4102 m = m->m_next;
4103 }
4104 m_freem_list(*free_list);
4105
4106 *free_list = NULL;
4107 *resid = 0;
4108
4109 socket_lock(so, 0);
4110
4111 return error;
4112 }
4113
4114 int
soreceive_m_list(struct socket * so,u_int * pktcntp,struct mbuf ** maddrp,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)4115 soreceive_m_list(struct socket *so, u_int *pktcntp, struct mbuf **maddrp,
4116 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
4117 {
4118 mbuf_ref_t m;
4119 mbuf_ref_ref_t mp;
4120 mbuf_ref_t nextrecord;
4121 int flags, error;
4122 struct protosw *pr = so->so_proto;
4123 struct proc *p = current_proc();
4124 u_int npkts = 0;
4125 mbuf_ref_t free_list = NULL;
4126 int sblocked = 0;
4127
4128 /*
4129 * Sanity check on the parameters passed by caller
4130 */
4131 if (mp0 == NULL || pktcntp == NULL) {
4132 return EINVAL;
4133 }
4134 if (*pktcntp > SO_MAX_MSG_X || *pktcntp == 0) {
4135 return EINVAL;
4136 }
4137
4138 mp = mp0;
4139 *mp0 = NULL;
4140 if (controlp != NULL) {
4141 *controlp = NULL;
4142 }
4143 if (maddrp != NULL) {
4144 *maddrp = NULL;
4145 }
4146 if (flagsp != NULL) {
4147 flags = *flagsp;
4148 } else {
4149 flags = 0;
4150 }
4151
4152 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_START, so,
4153 *pktcntp, so->so_rcv.sb_cc, so->so_rcv.sb_lowat,
4154 so->so_rcv.sb_hiwat);
4155
4156 socket_lock(so, 1);
4157 so_update_last_owner_locked(so, p);
4158 so_update_policy(so);
4159
4160 #if NECP
4161 so_update_necp_policy(so, NULL, NULL);
4162 #endif /* NECP */
4163
4164 /*
4165 * If a recv attempt is made on a previously-accepted socket
4166 * that has been marked as inactive (disconnected), reject
4167 * the request.
4168 */
4169 if (so->so_flags & SOF_DEFUNCT) {
4170 struct sockbuf *sb = &so->so_rcv;
4171
4172 error = ENOTCONN;
4173 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] (%d)\n",
4174 __func__, proc_pid(p), proc_best_name(p),
4175 so->so_gencnt,
4176 SOCK_DOM(so), SOCK_TYPE(so), error);
4177 /*
4178 * This socket should have been disconnected and flushed
4179 * prior to being returned from sodefunct(); there should
4180 * be no data on its receive list, so panic otherwise.
4181 */
4182 if (so->so_state & SS_DEFUNCT) {
4183 sb_empty_assert(sb, __func__);
4184 }
4185 goto release;
4186 }
4187
4188 *mp = NULL;
4189
4190 restart:
4191 /*
4192 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
4193 * and if so just return to the caller. This could happen when
4194 * soreceive() is called by a socket upcall function during the
4195 * time the socket is freed. The socket buffer would have been
4196 * locked across the upcall, therefore we cannot put this thread
4197 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
4198 * we may livelock), because the lock on the socket buffer will
4199 * only be released when the upcall routine returns to its caller.
4200 * Because the socket has been officially closed, there can be
4201 * no further read on it.
4202 */
4203 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
4204 (SS_NOFDREF | SS_CANTRCVMORE)) {
4205 error = 0;
4206 goto release;
4207 }
4208
4209 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
4210 if (error) {
4211 goto release;
4212 }
4213 sblocked = 1;
4214
4215 m = so->so_rcv.sb_mb;
4216 /*
4217 * Block awaiting more datagram if needed
4218 */
4219 if (m == NULL || ((flags & MSG_DONTWAIT) == 0 &&
4220 so->so_rcv.sb_cc < so->so_rcv.sb_lowat)) {
4221 /*
4222 * Panic if we notice inconsistencies in the socket's
4223 * receive list; both sb_mb and sb_cc should correctly
4224 * reflect the contents of the list, otherwise we may
4225 * end up with false positives during select() or poll()
4226 * which could put the application in a bad state.
4227 */
4228 SB_MB_CHECK(&so->so_rcv);
4229
4230 if (so->so_error) {
4231 if (m != NULL) {
4232 goto dontblock;
4233 }
4234 error = so->so_error;
4235 if ((flags & MSG_PEEK) == 0) {
4236 so->so_error = 0;
4237 }
4238 goto release;
4239 }
4240 if (so->so_state & SS_CANTRCVMORE) {
4241 if (m != NULL) {
4242 goto dontblock;
4243 } else {
4244 goto release;
4245 }
4246 }
4247 for (; m != NULL; m = m->m_next) {
4248 if (m->m_flags & M_EOR) {
4249 m = so->so_rcv.sb_mb;
4250 goto dontblock;
4251 }
4252 }
4253 if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0 &&
4254 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
4255 error = ENOTCONN;
4256 goto release;
4257 }
4258 if ((so->so_state & SS_NBIO) ||
4259 (flags & (MSG_DONTWAIT | MSG_NBIO))) {
4260 error = EWOULDBLOCK;
4261 goto release;
4262 }
4263 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
4264 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
4265
4266 sbunlock(&so->so_rcv, TRUE); /* keep socket locked */
4267 sblocked = 0;
4268
4269 error = sbwait(&so->so_rcv);
4270 if (error != 0) {
4271 goto release;
4272 }
4273 goto restart;
4274 }
4275 dontblock:
4276 m = so->so_rcv.sb_mb;
4277 if (m == NULL) {
4278 goto release;
4279 }
4280
4281 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
4282 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
4283 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
4284 nextrecord = m->m_nextpkt;
4285
4286 if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) {
4287 mbuf_ref_t maddr = NULL;
4288
4289 error = soreceive_addr(p, so, NULL, &maddr, flags, &m,
4290 &nextrecord, 1);
4291 if (error == ERESTART) {
4292 goto restart;
4293 } else if (error != 0) {
4294 goto release;
4295 }
4296
4297 if (maddr != NULL) {
4298 maddr->m_nextpkt = NULL;
4299 maddr->m_next = NULL;
4300 if (maddrp != NULL) {
4301 *maddrp = maddr;
4302 maddrp = &maddr->m_nextpkt;
4303 } else {
4304 maddr->m_next = free_list;
4305 free_list = maddr;
4306 }
4307 }
4308 }
4309
4310 /*
4311 * Process one or more MT_CONTROL mbufs present before any data mbufs
4312 * in the first mbuf chain on the socket buffer.
4313 * We call into the protocol to perform externalization.
4314 */
4315 if (m != NULL && m->m_type == MT_CONTROL) {
4316 mbuf_ref_t control = NULL;
4317
4318 error = soreceive_ctl(so, &control, flags, &m, &nextrecord);
4319 if (error != 0) {
4320 goto release;
4321 }
4322 if (control != NULL) {
4323 control->m_nextpkt = NULL;
4324 control->m_next = NULL;
4325 if (controlp != NULL) {
4326 *controlp = control;
4327 controlp = &control->m_nextpkt;
4328 } else {
4329 control->m_next = free_list;
4330 free_list = control;
4331 }
4332 }
4333 }
4334
4335 /*
4336 * Link the packet to the list
4337 */
4338 if (m != NULL) {
4339 if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
4340 panic("%s: m %p m_type %d != MT_DATA", __func__, m, m->m_type);
4341 }
4342 m->m_nextpkt = NULL;
4343 *mp = m;
4344 mp = &m->m_nextpkt;
4345 }
4346 while (m != NULL) {
4347 sbfree(&so->so_rcv, m);
4348
4349 m = m->m_next;
4350 }
4351
4352 so->so_rcv.sb_mb = nextrecord;
4353 /*
4354 * First part is an inline SB_EMPTY_FIXUP(). Second
4355 * part makes sure sb_lastrecord is up-to-date if
4356 * there is still data in the socket buffer.
4357 */
4358 if (so->so_rcv.sb_mb == NULL) {
4359 so->so_rcv.sb_mbtail = NULL;
4360 so->so_rcv.sb_lastrecord = NULL;
4361 } else if (nextrecord->m_nextpkt == NULL) {
4362 so->so_rcv.sb_lastrecord = nextrecord;
4363 }
4364 SB_MB_CHECK(&so->so_rcv);
4365
4366 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
4367 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
4368
4369 npkts += 1;
4370
4371 /*
4372 * We continue as long as all those conditions as we have less packets
4373 * than requested and the socket buffer is not empty
4374 */
4375 if (npkts < *pktcntp) {
4376 if (so->so_rcv.sb_mb != NULL) {
4377 goto dontblock;
4378 }
4379 if ((flags & MSG_WAITALL) != 0) {
4380 goto restart;
4381 }
4382 }
4383
4384 if (flagsp != NULL) {
4385 *flagsp |= flags;
4386 }
4387
4388 release:
4389 /*
4390 * pru_rcvd may cause more data to be received if the socket lock
4391 * is dropped so we set MSG_HAVEMORE now based on what we know.
4392 * That way the caller won't be surprised if it receives less data
4393 * than requested.
4394 */
4395 if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) {
4396 flags |= MSG_HAVEMORE;
4397 }
4398
4399 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb != NULL) {
4400 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
4401 }
4402
4403 if (sblocked) {
4404 sbunlock(&so->so_rcv, FALSE); /* will unlock socket */
4405 } else {
4406 socket_unlock(so, 1);
4407 }
4408
4409 *pktcntp = npkts;
4410 /*
4411 * Amortize the cost of freeing the mbufs
4412 */
4413 if (free_list != NULL) {
4414 m_freem_list(free_list);
4415 }
4416
4417 KERNEL_DEBUG(DBG_FNC_SORECEIVE_LIST | DBG_FUNC_END, error,
4418 0, 0, 0, 0);
4419 return error;
4420 }
4421
4422 static int
so_statistics_event_to_nstat_event(int64_t * input_options,uint64_t * nstat_event)4423 so_statistics_event_to_nstat_event(int64_t *input_options,
4424 uint64_t *nstat_event)
4425 {
4426 int error = 0;
4427 switch (*input_options) {
4428 case SO_STATISTICS_EVENT_ENTER_CELLFALLBACK:
4429 *nstat_event = NSTAT_EVENT_SRC_ENTER_CELLFALLBACK;
4430 break;
4431 case SO_STATISTICS_EVENT_EXIT_CELLFALLBACK:
4432 *nstat_event = NSTAT_EVENT_SRC_EXIT_CELLFALLBACK;
4433 break;
4434 #if (DEBUG || DEVELOPMENT)
4435 case SO_STATISTICS_EVENT_RESERVED_1:
4436 *nstat_event = NSTAT_EVENT_SRC_RESERVED_1;
4437 break;
4438 case SO_STATISTICS_EVENT_RESERVED_2:
4439 *nstat_event = NSTAT_EVENT_SRC_RESERVED_2;
4440 break;
4441 #endif /* (DEBUG || DEVELOPMENT) */
4442 default:
4443 error = EINVAL;
4444 break;
4445 }
4446 return error;
4447 }
4448
4449 /*
4450 * Returns: 0 Success
4451 * EINVAL
4452 * ENOTCONN
4453 * <pru_shutdown>:EINVAL
4454 * <pru_shutdown>:EADDRNOTAVAIL[TCP]
4455 * <pru_shutdown>:ENOBUFS[TCP]
4456 * <pru_shutdown>:EMSGSIZE[TCP]
4457 * <pru_shutdown>:EHOSTUNREACH[TCP]
4458 * <pru_shutdown>:ENETUNREACH[TCP]
4459 * <pru_shutdown>:ENETDOWN[TCP]
4460 * <pru_shutdown>:ENOMEM[TCP]
4461 * <pru_shutdown>:EACCES[TCP]
4462 * <pru_shutdown>:EMSGSIZE[TCP]
4463 * <pru_shutdown>:ENOBUFS[TCP]
4464 * <pru_shutdown>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL]
4465 * <pru_shutdown>:??? [other protocol families]
4466 */
4467 int
soshutdown(struct socket * so,int how)4468 soshutdown(struct socket *so, int how)
4469 {
4470 int error;
4471
4472 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, how, 0, 0, 0, 0);
4473
4474 switch (how) {
4475 case SHUT_RD:
4476 case SHUT_WR:
4477 case SHUT_RDWR:
4478 socket_lock(so, 1);
4479 if ((so->so_state &
4480 (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
4481 error = ENOTCONN;
4482 } else {
4483 error = soshutdownlock(so, how);
4484 }
4485 socket_unlock(so, 1);
4486 break;
4487 default:
4488 error = EINVAL;
4489 break;
4490 }
4491
4492 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, how, error, 0, 0, 0);
4493
4494 return error;
4495 }
4496
4497 int
soshutdownlock_final(struct socket * so,int how)4498 soshutdownlock_final(struct socket *so, int how)
4499 {
4500 struct protosw *pr = so->so_proto;
4501 int error = 0;
4502
4503 sflt_notify(so, sock_evt_shutdown, &how);
4504
4505 if (how != SHUT_WR) {
4506 if ((so->so_state & SS_CANTRCVMORE) != 0) {
4507 /* read already shut down */
4508 error = ENOTCONN;
4509 goto done;
4510 }
4511 sorflush(so);
4512 }
4513 if (how != SHUT_RD) {
4514 if ((so->so_state & SS_CANTSENDMORE) != 0) {
4515 /* write already shut down */
4516 error = ENOTCONN;
4517 goto done;
4518 }
4519 error = (*pr->pr_usrreqs->pru_shutdown)(so);
4520 }
4521 done:
4522 KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN, how, 1, 0, 0, 0);
4523 return error;
4524 }
4525
4526 int
soshutdownlock(struct socket * so,int how)4527 soshutdownlock(struct socket *so, int how)
4528 {
4529 int error = 0;
4530
4531 #if CONTENT_FILTER
4532 /*
4533 * A content filter may delay the actual shutdown until it
4534 * has processed the pending data
4535 */
4536 if (so->so_flags & SOF_CONTENT_FILTER) {
4537 error = cfil_sock_shutdown(so, &how);
4538 if (error == EJUSTRETURN) {
4539 error = 0;
4540 goto done;
4541 } else if (error != 0) {
4542 goto done;
4543 }
4544 }
4545 #endif /* CONTENT_FILTER */
4546
4547 error = soshutdownlock_final(so, how);
4548
4549 done:
4550 return error;
4551 }
4552
4553 void
sowflush(struct socket * so)4554 sowflush(struct socket *so)
4555 {
4556 struct sockbuf *sb = &so->so_snd;
4557
4558 /*
4559 * Obtain lock on the socket buffer (SB_LOCK). This is required
4560 * to prevent the socket buffer from being unexpectedly altered
4561 * while it is used by another thread in socket send/receive.
4562 *
4563 * sblock() must not fail here, hence the assertion.
4564 */
4565 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
4566 VERIFY(sb->sb_flags & SB_LOCK);
4567
4568 sb->sb_flags &= ~(SB_SEL | SB_UPCALL);
4569 sb->sb_flags |= SB_DROP;
4570 sb->sb_upcall = NULL;
4571 sb->sb_upcallarg = NULL;
4572
4573 sbunlock(sb, TRUE); /* keep socket locked */
4574
4575 selthreadclear(&sb->sb_sel);
4576 sbrelease(sb);
4577 }
4578
4579 void
sorflush(struct socket * so)4580 sorflush(struct socket *so)
4581 {
4582 struct sockbuf *sb = &so->so_rcv;
4583 struct protosw *pr = so->so_proto;
4584 struct sockbuf asb;
4585 #ifdef notyet
4586 lck_mtx_t *mutex_held;
4587 /*
4588 * XXX: This code is currently commented out, because we may get here
4589 * as part of sofreelastref(), and at that time, pr_getlock() may no
4590 * longer be able to return us the lock; this will be fixed in future.
4591 */
4592 if (so->so_proto->pr_getlock != NULL) {
4593 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
4594 } else {
4595 mutex_held = so->so_proto->pr_domain->dom_mtx;
4596 }
4597
4598 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
4599 #endif /* notyet */
4600
4601 sflt_notify(so, sock_evt_flush_read, NULL);
4602
4603 socantrcvmore(so);
4604
4605 /*
4606 * Obtain lock on the socket buffer (SB_LOCK). This is required
4607 * to prevent the socket buffer from being unexpectedly altered
4608 * while it is used by another thread in socket send/receive.
4609 *
4610 * sblock() must not fail here, hence the assertion.
4611 */
4612 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
4613 VERIFY(sb->sb_flags & SB_LOCK);
4614
4615 /*
4616 * Copy only the relevant fields from "sb" to "asb" which we
4617 * need for sbrelease() to function. In particular, skip
4618 * sb_sel as it contains the wait queue linkage, which would
4619 * wreak havoc if we were to issue selthreadclear() on "asb".
4620 * Make sure to not carry over SB_LOCK in "asb", as we need
4621 * to acquire it later as part of sbrelease().
4622 */
4623 bzero(&asb, sizeof(asb));
4624 asb.sb_cc = sb->sb_cc;
4625 asb.sb_hiwat = sb->sb_hiwat;
4626 asb.sb_mbcnt = sb->sb_mbcnt;
4627 asb.sb_mbmax = sb->sb_mbmax;
4628 asb.sb_ctl = sb->sb_ctl;
4629 asb.sb_lowat = sb->sb_lowat;
4630 asb.sb_mb = sb->sb_mb;
4631 asb.sb_mbtail = sb->sb_mbtail;
4632 asb.sb_lastrecord = sb->sb_lastrecord;
4633 asb.sb_so = sb->sb_so;
4634 asb.sb_flags = sb->sb_flags;
4635 asb.sb_flags &= ~(SB_LOCK | SB_SEL | SB_KNOTE | SB_UPCALL);
4636 asb.sb_flags |= SB_DROP;
4637
4638 /*
4639 * Ideally we'd bzero() these and preserve the ones we need;
4640 * but to do that we'd need to shuffle things around in the
4641 * sockbuf, and we can't do it now because there are KEXTS
4642 * that are directly referring to the socket structure.
4643 *
4644 * Setting SB_DROP acts as a barrier to prevent further appends.
4645 * Clearing SB_SEL is done for selthreadclear() below.
4646 */
4647 sb->sb_cc = 0;
4648 sb->sb_hiwat = 0;
4649 sb->sb_mbcnt = 0;
4650 sb->sb_mbmax = 0;
4651 sb->sb_ctl = 0;
4652 sb->sb_lowat = 0;
4653 sb->sb_mb = NULL;
4654 sb->sb_mbtail = NULL;
4655 sb->sb_lastrecord = NULL;
4656 sb->sb_timeo.tv_sec = 0;
4657 sb->sb_timeo.tv_usec = 0;
4658 sb->sb_upcall = NULL;
4659 sb->sb_upcallarg = NULL;
4660 sb->sb_flags &= ~(SB_SEL | SB_UPCALL);
4661 sb->sb_flags |= SB_DROP;
4662
4663 sbunlock(sb, TRUE); /* keep socket locked */
4664
4665 /*
4666 * Note that selthreadclear() is called on the original "sb" and
4667 * not the local "asb" because of the way wait queue linkage is
4668 * implemented. Given that selwakeup() may be triggered, SB_SEL
4669 * should no longer be set (cleared above.)
4670 */
4671 selthreadclear(&sb->sb_sel);
4672
4673 if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) {
4674 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
4675 }
4676
4677 sbrelease(&asb);
4678 }
4679
4680 /*
4681 * Perhaps this routine, and sooptcopyout(), below, ought to come in
4682 * an additional variant to handle the case where the option value needs
4683 * to be some kind of integer, but not a specific size.
4684 * In addition to their use here, these functions are also called by the
4685 * protocol-level pr_ctloutput() routines.
4686 *
4687 * Returns: 0 Success
4688 * EINVAL
4689 * copyin:EFAULT
4690 */
4691 int
sooptcopyin(struct sockopt * sopt,void * __sized_by (len)buf,size_t len,size_t minlen)4692 sooptcopyin(struct sockopt *sopt, void *__sized_by(len) buf, size_t len, size_t minlen)
4693 {
4694 size_t valsize;
4695
4696 /*
4697 * If the user gives us more than we wanted, we ignore it,
4698 * but if we don't get the minimum length the caller
4699 * wants, we return EINVAL. On success, sopt->sopt_valsize
4700 * is set to however much we actually retrieved.
4701 */
4702 if ((valsize = sopt->sopt_valsize) < minlen) {
4703 return EINVAL;
4704 }
4705 if (valsize > len) {
4706 sopt->sopt_valsize = valsize = len;
4707 }
4708
4709 if (sopt->sopt_p != kernproc) {
4710 return copyin(sopt->sopt_val, buf, valsize);
4711 }
4712
4713 caddr_t tmp = __unsafe_forge_bidi_indexable(caddr_t,
4714 CAST_DOWN(caddr_t, sopt->sopt_val),
4715 valsize);
4716 bcopy(tmp, buf, valsize);
4717
4718 return 0;
4719 }
4720
4721 /*
4722 * sooptcopyin_timeval
4723 * Copy in a timeval value into tv_p, and take into account whether the
4724 * the calling process is 64-bit or 32-bit. Moved the sanity checking
4725 * code here so that we can verify the 64-bit tv_sec value before we lose
4726 * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec.
4727 */
4728 static int
sooptcopyin_timeval(struct sockopt * sopt,struct timeval * tv_p)4729 sooptcopyin_timeval(struct sockopt *sopt, struct timeval *tv_p)
4730 {
4731 int error;
4732
4733 if (proc_is64bit(sopt->sopt_p)) {
4734 struct user64_timeval tv64;
4735
4736 if (sopt->sopt_valsize < sizeof(tv64)) {
4737 return EINVAL;
4738 }
4739
4740 sopt->sopt_valsize = sizeof(tv64);
4741 if (sopt->sopt_p != kernproc) {
4742 error = copyin(sopt->sopt_val, &tv64, sizeof(tv64));
4743 if (error != 0) {
4744 return error;
4745 }
4746 } else {
4747 caddr_t tmp = __unsafe_forge_bidi_indexable(caddr_t,
4748 CAST_DOWN(caddr_t, sopt->sopt_val),
4749 sizeof(tv64));
4750 bcopy(tmp, &tv64, sizeof(tv64));
4751 }
4752 if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX ||
4753 tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) {
4754 return EDOM;
4755 }
4756
4757 tv_p->tv_sec = (__darwin_time_t)tv64.tv_sec;
4758 tv_p->tv_usec = tv64.tv_usec;
4759 } else {
4760 struct user32_timeval tv32;
4761
4762 if (sopt->sopt_valsize < sizeof(tv32)) {
4763 return EINVAL;
4764 }
4765
4766 sopt->sopt_valsize = sizeof(tv32);
4767 if (sopt->sopt_p != kernproc) {
4768 error = copyin(sopt->sopt_val, &tv32, sizeof(tv32));
4769 if (error != 0) {
4770 return error;
4771 }
4772 } else {
4773 caddr_t tmp = __unsafe_forge_bidi_indexable(caddr_t,
4774 CAST_DOWN(caddr_t, sopt->sopt_val),
4775 sizeof(tv32));
4776 bcopy(tmp, &tv32, sizeof(tv32));
4777 }
4778 #ifndef __LP64__
4779 /*
4780 * K64todo "comparison is always false due to
4781 * limited range of data type"
4782 */
4783 if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX ||
4784 tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) {
4785 return EDOM;
4786 }
4787 #endif
4788 tv_p->tv_sec = tv32.tv_sec;
4789 tv_p->tv_usec = tv32.tv_usec;
4790 }
4791 return 0;
4792 }
4793
4794 int
sooptcopyin_bindtodevice(struct sockopt * sopt,char * __sized_by (bufsize)buf,size_t bufsize)4795 sooptcopyin_bindtodevice(struct sockopt *sopt, char * __sized_by(bufsize) buf, size_t bufsize)
4796 {
4797 #define MIN_BINDTODEVICE_NAME_SIZE 2
4798 size_t maxlen = bufsize - 1; /* the max string length that fits in the buffer */
4799
4800 if (bufsize < MIN_BINDTODEVICE_NAME_SIZE) {
4801 #if DEBUG || DEVELOPMENT
4802 os_log(OS_LOG_DEFAULT, "%s: bufsize %lu < MIN_BINDTODEVICE_NAME_SIZE %d",
4803 __func__, bufsize, MIN_BINDTODEVICE_NAME_SIZE);
4804 #endif /* DEBUG || DEVELOPMENT */
4805 return EINVAL;
4806 }
4807
4808 memset(buf, 0, bufsize);
4809
4810 /*
4811 * bufsize includes the end-of-string because of the uncertainty wether
4812 * interface names are passed as strings or byte buffers.
4813 * If the user gives us more than the max string length return EINVAL.
4814 * On success, sopt->sopt_valsize is not modified
4815 */
4816 maxlen = bufsize - 1;
4817 if (sopt->sopt_valsize > maxlen) {
4818 os_log(OS_LOG_DEFAULT, "%s: sopt_valsize %lu > maxlen %lu",
4819 __func__, sopt->sopt_valsize, maxlen);
4820 return EINVAL;
4821 }
4822
4823 if (sopt->sopt_p != kernproc) {
4824 return copyin(sopt->sopt_val, buf, sopt->sopt_valsize);
4825 } else {
4826 caddr_t tmp = __unsafe_forge_bidi_indexable(caddr_t,
4827 CAST_DOWN(caddr_t, sopt->sopt_val),
4828 sopt->sopt_valsize);
4829 bcopy(tmp, buf, sopt->sopt_valsize);
4830 }
4831
4832 return 0;
4833 #undef MIN_BINDTODEVICE_NAME_SIZE
4834 }
4835
4836 int
soopt_cred_check(struct socket * so,int priv,boolean_t allow_root,boolean_t ignore_delegate)4837 soopt_cred_check(struct socket *so, int priv, boolean_t allow_root,
4838 boolean_t ignore_delegate)
4839 {
4840 kauth_cred_t cred = NULL;
4841 proc_t ep = PROC_NULL;
4842 uid_t uid;
4843 int error = 0;
4844
4845 if (ignore_delegate == false && so->so_flags & SOF_DELEGATED) {
4846 ep = proc_find(so->e_pid);
4847 if (ep) {
4848 cred = kauth_cred_proc_ref(ep);
4849 }
4850 }
4851
4852 uid = kauth_cred_getuid(cred ? cred : so->so_cred);
4853
4854 /* uid is 0 for root */
4855 if (uid != 0 || !allow_root) {
4856 error = priv_check_cred(cred ? cred : so->so_cred, priv, 0);
4857 }
4858 if (cred) {
4859 kauth_cred_unref(&cred);
4860 }
4861 if (ep != PROC_NULL) {
4862 proc_rele(ep);
4863 }
4864
4865 return error;
4866 }
4867
4868 /*
4869 * Returns: 0 Success
4870 * EINVAL
4871 * ENOPROTOOPT
4872 * ENOBUFS
4873 * EDOM
4874 * sooptcopyin:EINVAL
4875 * sooptcopyin:EFAULT
4876 * sooptcopyin_timeval:EINVAL
4877 * sooptcopyin_timeval:EFAULT
4878 * sooptcopyin_timeval:EDOM
4879 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
4880 * <pr_ctloutput>:???w
4881 * sflt_attach_private:??? [whatever a filter author chooses]
4882 * <sf_setoption>:??? [whatever a filter author chooses]
4883 *
4884 * Notes: Other <pru_listen> returns depend on the protocol family; all
4885 * <sf_listen> returns depend on what the filter author causes
4886 * their filter to return.
4887 */
4888 int
sosetoptlock(struct socket * so,struct sockopt * sopt,int dolock)4889 sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
4890 {
4891 int error, optval;
4892 int64_t long_optval;
4893 struct linger l;
4894 struct timeval tv;
4895
4896 if (sopt->sopt_dir != SOPT_SET) {
4897 sopt->sopt_dir = SOPT_SET;
4898 }
4899
4900 if (dolock) {
4901 socket_lock(so, 1);
4902 }
4903
4904 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
4905 (SS_CANTRCVMORE | SS_CANTSENDMORE) &&
4906 (so->so_flags & SOF_NPX_SETOPTSHUT) == 0) {
4907 /* the socket has been shutdown, no more sockopt's */
4908 error = EINVAL;
4909 goto out;
4910 }
4911
4912 error = sflt_setsockopt(so, sopt);
4913 if (error != 0) {
4914 if (error == EJUSTRETURN) {
4915 error = 0;
4916 }
4917 goto out;
4918 }
4919
4920 if (sopt->sopt_level != SOL_SOCKET || sopt->sopt_name == SO_BINDTODEVICE) {
4921 if (so->so_proto != NULL &&
4922 so->so_proto->pr_ctloutput != NULL) {
4923 error = (*so->so_proto->pr_ctloutput)(so, sopt);
4924 goto out;
4925 }
4926 error = ENOPROTOOPT;
4927 } else {
4928 /*
4929 * Allow socket-level (SOL_SOCKET) options to be filtered by
4930 * the protocol layer, if needed. A zero value returned from
4931 * the handler means use default socket-level processing as
4932 * done by the rest of this routine. Otherwise, any other
4933 * return value indicates that the option is unsupported.
4934 */
4935 if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs->
4936 pru_socheckopt(so, sopt)) != 0) {
4937 goto out;
4938 }
4939
4940 error = 0;
4941 switch (sopt->sopt_name) {
4942 case SO_LINGER:
4943 case SO_LINGER_SEC: {
4944 error = sooptcopyin(sopt, &l, sizeof(l), sizeof(l));
4945 if (error != 0) {
4946 goto out;
4947 }
4948 /* Make sure to use sane values */
4949 if (sopt->sopt_name == SO_LINGER) {
4950 so->so_linger = (short)l.l_linger;
4951 } else {
4952 so->so_linger = (short)((long)l.l_linger * hz);
4953 }
4954 if (l.l_onoff != 0) {
4955 so->so_options |= SO_LINGER;
4956 } else {
4957 so->so_options &= ~SO_LINGER;
4958 }
4959 break;
4960 }
4961 case SO_DEBUG:
4962 case SO_KEEPALIVE:
4963 case SO_DONTROUTE:
4964 case SO_USELOOPBACK:
4965 case SO_BROADCAST:
4966 case SO_REUSEADDR:
4967 case SO_REUSEPORT:
4968 case SO_OOBINLINE:
4969 case SO_TIMESTAMP:
4970 case SO_TIMESTAMP_MONOTONIC:
4971 case SO_TIMESTAMP_CONTINUOUS:
4972 case SO_DONTTRUNC:
4973 case SO_WANTMORE:
4974 case SO_WANTOOBFLAG:
4975 case SO_NOWAKEFROMSLEEP:
4976 case SO_NOAPNFALLBK:
4977 error = sooptcopyin(sopt, &optval, sizeof(optval),
4978 sizeof(optval));
4979 if (error != 0) {
4980 goto out;
4981 }
4982 if (optval) {
4983 so->so_options |= sopt->sopt_name;
4984 } else {
4985 so->so_options &= ~sopt->sopt_name;
4986 }
4987 #if SKYWALK
4988 inp_update_netns_flags(so);
4989 #endif /* SKYWALK */
4990 break;
4991
4992 case SO_SNDBUF:
4993 case SO_RCVBUF:
4994 case SO_SNDLOWAT:
4995 case SO_RCVLOWAT:
4996 error = sooptcopyin(sopt, &optval, sizeof(optval),
4997 sizeof(optval));
4998 if (error != 0) {
4999 goto out;
5000 }
5001
5002 /*
5003 * Values < 1 make no sense for any of these
5004 * options, so disallow them.
5005 */
5006 if (optval < 1) {
5007 error = EINVAL;
5008 goto out;
5009 }
5010
5011 switch (sopt->sopt_name) {
5012 case SO_SNDBUF:
5013 case SO_RCVBUF: {
5014 struct sockbuf *sb =
5015 (sopt->sopt_name == SO_SNDBUF) ?
5016 &so->so_snd : &so->so_rcv;
5017 if (sbreserve(sb, (u_int32_t)optval) == 0) {
5018 error = ENOBUFS;
5019 goto out;
5020 }
5021 sb->sb_flags |= SB_USRSIZE;
5022 sb->sb_flags &= ~SB_AUTOSIZE;
5023 sb->sb_idealsize = (u_int32_t)optval;
5024 break;
5025 }
5026 /*
5027 * Make sure the low-water is never greater than
5028 * the high-water.
5029 */
5030 case SO_SNDLOWAT: {
5031 int space = sbspace(&so->so_snd);
5032 uint32_t hiwat = so->so_snd.sb_hiwat;
5033
5034 if (so->so_snd.sb_flags & SB_UNIX) {
5035 struct unpcb *unp =
5036 (struct unpcb *)(so->so_pcb);
5037 if (unp != NULL &&
5038 unp->unp_conn != NULL) {
5039 struct socket *so2 = unp->unp_conn->unp_socket;
5040 hiwat += unp->unp_conn->unp_cc;
5041 space = sbspace(&so2->so_rcv);
5042 }
5043 }
5044
5045 so->so_snd.sb_lowat =
5046 (optval > hiwat) ?
5047 hiwat : optval;
5048
5049 if (space >= so->so_snd.sb_lowat) {
5050 sowwakeup(so);
5051 }
5052 break;
5053 }
5054 case SO_RCVLOWAT: {
5055 int64_t data_len;
5056 so->so_rcv.sb_lowat =
5057 (optval > so->so_rcv.sb_hiwat) ?
5058 so->so_rcv.sb_hiwat : optval;
5059 if (so->so_rcv.sb_flags & SB_UNIX) {
5060 struct unpcb *unp =
5061 (struct unpcb *)(so->so_pcb);
5062 if (unp != NULL &&
5063 unp->unp_conn != NULL) {
5064 struct socket *so2 = unp->unp_conn->unp_socket;
5065 data_len = so2->so_snd.sb_cc
5066 - so2->so_snd.sb_ctl;
5067 } else {
5068 data_len = so->so_rcv.sb_cc
5069 - so->so_rcv.sb_ctl;
5070 }
5071 } else {
5072 data_len = so->so_rcv.sb_cc
5073 - so->so_rcv.sb_ctl;
5074 }
5075
5076 if (data_len >= so->so_rcv.sb_lowat) {
5077 sorwakeup(so);
5078 }
5079 break;
5080 }
5081 }
5082 break;
5083
5084 case SO_SNDTIMEO:
5085 case SO_RCVTIMEO:
5086 error = sooptcopyin_timeval(sopt, &tv);
5087 if (error != 0) {
5088 goto out;
5089 }
5090
5091 switch (sopt->sopt_name) {
5092 case SO_SNDTIMEO:
5093 so->so_snd.sb_timeo = tv;
5094 break;
5095 case SO_RCVTIMEO:
5096 so->so_rcv.sb_timeo = tv;
5097 break;
5098 }
5099 break;
5100
5101 case SO_NKE: {
5102 struct so_nke nke;
5103
5104 error = sooptcopyin(sopt, &nke, sizeof(nke),
5105 sizeof(nke));
5106 if (error != 0) {
5107 goto out;
5108 }
5109
5110 error = sflt_attach_internal(so, nke.nke_handle);
5111 break;
5112 }
5113
5114 case SO_NOSIGPIPE:
5115 error = sooptcopyin(sopt, &optval, sizeof(optval),
5116 sizeof(optval));
5117 if (error != 0) {
5118 goto out;
5119 }
5120 if (optval != 0) {
5121 so->so_flags |= SOF_NOSIGPIPE;
5122 } else {
5123 so->so_flags &= ~SOF_NOSIGPIPE;
5124 }
5125 break;
5126
5127 case SO_NOADDRERR:
5128 error = sooptcopyin(sopt, &optval, sizeof(optval),
5129 sizeof(optval));
5130 if (error != 0) {
5131 goto out;
5132 }
5133 if (optval != 0) {
5134 so->so_flags |= SOF_NOADDRAVAIL;
5135 } else {
5136 so->so_flags &= ~SOF_NOADDRAVAIL;
5137 }
5138 break;
5139
5140 case SO_REUSESHAREUID:
5141 error = sooptcopyin(sopt, &optval, sizeof(optval),
5142 sizeof(optval));
5143 if (error != 0) {
5144 goto out;
5145 }
5146 if (optval != 0) {
5147 so->so_flags |= SOF_REUSESHAREUID;
5148 } else {
5149 so->so_flags &= ~SOF_REUSESHAREUID;
5150 }
5151 break;
5152
5153 case SO_NOTIFYCONFLICT:
5154 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
5155 error = EPERM;
5156 goto out;
5157 }
5158 error = sooptcopyin(sopt, &optval, sizeof(optval),
5159 sizeof(optval));
5160 if (error != 0) {
5161 goto out;
5162 }
5163 if (optval != 0) {
5164 so->so_flags |= SOF_NOTIFYCONFLICT;
5165 } else {
5166 so->so_flags &= ~SOF_NOTIFYCONFLICT;
5167 }
5168 break;
5169
5170 case SO_RESTRICTIONS:
5171 error = sooptcopyin(sopt, &optval, sizeof(optval),
5172 sizeof(optval));
5173 if (error != 0) {
5174 goto out;
5175 }
5176
5177 error = so_set_restrictions(so, optval);
5178 break;
5179
5180 case SO_AWDL_UNRESTRICTED:
5181 if (SOCK_DOM(so) != PF_INET &&
5182 SOCK_DOM(so) != PF_INET6) {
5183 error = EOPNOTSUPP;
5184 goto out;
5185 }
5186 error = sooptcopyin(sopt, &optval, sizeof(optval),
5187 sizeof(optval));
5188 if (error != 0) {
5189 goto out;
5190 }
5191 if (optval != 0) {
5192 error = soopt_cred_check(so,
5193 PRIV_NET_RESTRICTED_AWDL, false, false);
5194 if (error == 0) {
5195 inp_set_awdl_unrestricted(
5196 sotoinpcb(so));
5197 }
5198 } else {
5199 inp_clear_awdl_unrestricted(sotoinpcb(so));
5200 }
5201 break;
5202 case SO_INTCOPROC_ALLOW:
5203 if (SOCK_DOM(so) != PF_INET6) {
5204 error = EOPNOTSUPP;
5205 goto out;
5206 }
5207 error = sooptcopyin(sopt, &optval, sizeof(optval),
5208 sizeof(optval));
5209 if (error != 0) {
5210 goto out;
5211 }
5212 if (optval != 0 &&
5213 inp_get_intcoproc_allowed(sotoinpcb(so)) == FALSE) {
5214 error = soopt_cred_check(so,
5215 PRIV_NET_RESTRICTED_INTCOPROC, false, false);
5216 if (error == 0) {
5217 inp_set_intcoproc_allowed(
5218 sotoinpcb(so));
5219 }
5220 } else if (optval == 0) {
5221 inp_clear_intcoproc_allowed(sotoinpcb(so));
5222 }
5223 break;
5224
5225 case SO_LABEL:
5226 error = EOPNOTSUPP;
5227 break;
5228
5229 case SO_UPCALLCLOSEWAIT:
5230 error = sooptcopyin(sopt, &optval, sizeof(optval),
5231 sizeof(optval));
5232 if (error != 0) {
5233 goto out;
5234 }
5235 if (optval != 0) {
5236 so->so_flags |= SOF_UPCALLCLOSEWAIT;
5237 } else {
5238 so->so_flags &= ~SOF_UPCALLCLOSEWAIT;
5239 }
5240 break;
5241
5242 case SO_RANDOMPORT:
5243 error = sooptcopyin(sopt, &optval, sizeof(optval),
5244 sizeof(optval));
5245 if (error != 0) {
5246 goto out;
5247 }
5248 if (optval != 0) {
5249 so->so_flags |= SOF_BINDRANDOMPORT;
5250 } else {
5251 so->so_flags &= ~SOF_BINDRANDOMPORT;
5252 }
5253 break;
5254
5255 case SO_NP_EXTENSIONS: {
5256 struct so_np_extensions sonpx;
5257
5258 error = sooptcopyin(sopt, &sonpx, sizeof(sonpx),
5259 sizeof(sonpx));
5260 if (error != 0) {
5261 goto out;
5262 }
5263 if (sonpx.npx_mask & ~SONPX_MASK_VALID) {
5264 error = EINVAL;
5265 goto out;
5266 }
5267 /*
5268 * Only one bit defined for now
5269 */
5270 if ((sonpx.npx_mask & SONPX_SETOPTSHUT)) {
5271 if ((sonpx.npx_flags & SONPX_SETOPTSHUT)) {
5272 so->so_flags |= SOF_NPX_SETOPTSHUT;
5273 } else {
5274 so->so_flags &= ~SOF_NPX_SETOPTSHUT;
5275 }
5276 }
5277 break;
5278 }
5279
5280 case SO_TRAFFIC_CLASS: {
5281 error = sooptcopyin(sopt, &optval, sizeof(optval),
5282 sizeof(optval));
5283 if (error != 0) {
5284 goto out;
5285 }
5286 if (optval >= SO_TC_NET_SERVICE_OFFSET) {
5287 int netsvc = optval - SO_TC_NET_SERVICE_OFFSET;
5288 error = so_set_net_service_type(so, netsvc);
5289 goto out;
5290 }
5291 error = so_set_traffic_class(so, optval);
5292 if (error != 0) {
5293 goto out;
5294 }
5295 so->so_flags1 &= ~SOF1_TC_NET_SERV_TYPE;
5296 so->so_netsvctype = _NET_SERVICE_TYPE_UNSPEC;
5297 break;
5298 }
5299
5300 case SO_RECV_TRAFFIC_CLASS: {
5301 error = sooptcopyin(sopt, &optval, sizeof(optval),
5302 sizeof(optval));
5303 if (error != 0) {
5304 goto out;
5305 }
5306 if (optval == 0) {
5307 so->so_flags &= ~SOF_RECV_TRAFFIC_CLASS;
5308 } else {
5309 so->so_flags |= SOF_RECV_TRAFFIC_CLASS;
5310 }
5311 break;
5312 }
5313
5314 #if (DEVELOPMENT || DEBUG)
5315 case SO_TRAFFIC_CLASS_DBG: {
5316 struct so_tcdbg so_tcdbg;
5317
5318 error = sooptcopyin(sopt, &so_tcdbg,
5319 sizeof(struct so_tcdbg), sizeof(struct so_tcdbg));
5320 if (error != 0) {
5321 goto out;
5322 }
5323 error = so_set_tcdbg(so, &so_tcdbg);
5324 if (error != 0) {
5325 goto out;
5326 }
5327 break;
5328 }
5329 #endif /* (DEVELOPMENT || DEBUG) */
5330
5331 case SO_PRIVILEGED_TRAFFIC_CLASS:
5332 error = priv_check_cred(kauth_cred_get(),
5333 PRIV_NET_PRIVILEGED_TRAFFIC_CLASS, 0);
5334 if (error != 0) {
5335 goto out;
5336 }
5337 error = sooptcopyin(sopt, &optval, sizeof(optval),
5338 sizeof(optval));
5339 if (error != 0) {
5340 goto out;
5341 }
5342 if (optval == 0) {
5343 so->so_flags &= ~SOF_PRIVILEGED_TRAFFIC_CLASS;
5344 } else {
5345 so->so_flags |= SOF_PRIVILEGED_TRAFFIC_CLASS;
5346 }
5347 break;
5348
5349 #if (DEVELOPMENT || DEBUG)
5350 case SO_DEFUNCTIT:
5351 error = sosetdefunct(current_proc(), so, 0, FALSE);
5352 if (error == 0) {
5353 error = sodefunct(current_proc(), so, 0);
5354 }
5355
5356 break;
5357 #endif /* (DEVELOPMENT || DEBUG) */
5358
5359 case SO_DEFUNCTOK:
5360 error = sooptcopyin(sopt, &optval, sizeof(optval),
5361 sizeof(optval));
5362 if (error != 0 || (so->so_flags & SOF_DEFUNCT)) {
5363 if (error == 0) {
5364 error = EBADF;
5365 }
5366 goto out;
5367 }
5368 /*
5369 * Any process can set SO_DEFUNCTOK (clear
5370 * SOF_NODEFUNCT), but only root can clear
5371 * SO_DEFUNCTOK (set SOF_NODEFUNCT).
5372 */
5373 if (optval == 0 &&
5374 kauth_cred_issuser(kauth_cred_get()) == 0) {
5375 error = EPERM;
5376 goto out;
5377 }
5378 if (optval) {
5379 so->so_flags &= ~SOF_NODEFUNCT;
5380 } else {
5381 so->so_flags |= SOF_NODEFUNCT;
5382 }
5383
5384 if (SOCK_DOM(so) == PF_INET ||
5385 SOCK_DOM(so) == PF_INET6) {
5386 char s[MAX_IPv6_STR_LEN];
5387 char d[MAX_IPv6_STR_LEN];
5388 struct inpcb *inp = sotoinpcb(so);
5389
5390 SODEFUNCTLOG("%s[%d, %s]: so 0x%llu "
5391 "[%s %s:%d -> %s:%d] is now marked "
5392 "as %seligible for "
5393 "defunct\n", __func__, proc_selfpid(),
5394 proc_best_name(current_proc()),
5395 so->so_gencnt,
5396 (SOCK_TYPE(so) == SOCK_STREAM) ?
5397 "TCP" : "UDP", inet_ntop(SOCK_DOM(so),
5398 ((SOCK_DOM(so) == PF_INET) ?
5399 (void *)&inp->inp_laddr.s_addr :
5400 (void *)&inp->in6p_laddr), s, sizeof(s)),
5401 ntohs(inp->in6p_lport),
5402 inet_ntop(SOCK_DOM(so),
5403 (SOCK_DOM(so) == PF_INET) ?
5404 (void *)&inp->inp_faddr.s_addr :
5405 (void *)&inp->in6p_faddr, d, sizeof(d)),
5406 ntohs(inp->in6p_fport),
5407 (so->so_flags & SOF_NODEFUNCT) ?
5408 "not " : "");
5409 } else {
5410 SODEFUNCTLOG("%s[%d, %s]: so 0x%llu [%d,%d] "
5411 "is now marked as %seligible for "
5412 "defunct\n",
5413 __func__, proc_selfpid(),
5414 proc_best_name(current_proc()),
5415 so->so_gencnt,
5416 SOCK_DOM(so), SOCK_TYPE(so),
5417 (so->so_flags & SOF_NODEFUNCT) ?
5418 "not " : "");
5419 }
5420 break;
5421
5422 case SO_ISDEFUNCT:
5423 /* This option is not settable */
5424 error = EINVAL;
5425 break;
5426
5427 case SO_OPPORTUNISTIC:
5428 error = sooptcopyin(sopt, &optval, sizeof(optval),
5429 sizeof(optval));
5430 if (error == 0) {
5431 error = so_set_opportunistic(so, optval);
5432 }
5433 break;
5434
5435 case SO_FLUSH:
5436 /* This option is handled by lower layer(s) */
5437 error = 0;
5438 break;
5439
5440 case SO_RECV_ANYIF:
5441 error = sooptcopyin(sopt, &optval, sizeof(optval),
5442 sizeof(optval));
5443 if (error == 0) {
5444 error = so_set_recv_anyif(so, optval);
5445 }
5446 break;
5447
5448 case SO_TRAFFIC_MGT_BACKGROUND: {
5449 /* This option is handled by lower layer(s) */
5450 error = 0;
5451 break;
5452 }
5453
5454 #if FLOW_DIVERT
5455 case SO_FLOW_DIVERT_TOKEN:
5456 error = flow_divert_token_set(so, sopt);
5457 break;
5458 #endif /* FLOW_DIVERT */
5459
5460
5461 case SO_DELEGATED:
5462 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
5463 sizeof(optval))) != 0) {
5464 break;
5465 }
5466
5467 error = so_set_effective_pid(so, optval, sopt->sopt_p, true);
5468 break;
5469
5470 case SO_DELEGATED_UUID: {
5471 uuid_t euuid;
5472
5473 if ((error = sooptcopyin(sopt, &euuid, sizeof(euuid),
5474 sizeof(euuid))) != 0) {
5475 break;
5476 }
5477
5478 error = so_set_effective_uuid(so, euuid, sopt->sopt_p, true);
5479 break;
5480 }
5481
5482 #if NECP
5483 case SO_NECP_ATTRIBUTES:
5484 if (SOCK_DOM(so) == PF_MULTIPATH) {
5485 /* Handled by MPTCP itself */
5486 break;
5487 }
5488
5489 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
5490 error = EINVAL;
5491 goto out;
5492 }
5493
5494 error = necp_set_socket_attributes(&sotoinpcb(so)->inp_necp_attributes, sopt);
5495 break;
5496
5497 case SO_NECP_CLIENTUUID: {
5498 if (SOCK_DOM(so) == PF_MULTIPATH) {
5499 /* Handled by MPTCP itself */
5500 break;
5501 }
5502
5503 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
5504 error = EINVAL;
5505 goto out;
5506 }
5507
5508 struct inpcb *inp = sotoinpcb(so);
5509 if (!uuid_is_null(inp->necp_client_uuid)) {
5510 // Clear out the old client UUID if present
5511 necp_inpcb_remove_cb(inp);
5512 }
5513
5514 error = sooptcopyin(sopt, &inp->necp_client_uuid,
5515 sizeof(uuid_t), sizeof(uuid_t));
5516 if (error != 0) {
5517 goto out;
5518 }
5519
5520 if (uuid_is_null(inp->necp_client_uuid)) {
5521 error = EINVAL;
5522 goto out;
5523 }
5524
5525 pid_t current_pid = proc_pid(current_proc());
5526 error = necp_client_register_socket_flow(current_pid,
5527 inp->necp_client_uuid, inp);
5528 if (error != 0) {
5529 uuid_clear(inp->necp_client_uuid);
5530 goto out;
5531 }
5532
5533 if (inp->inp_lport != 0) {
5534 // There is a bound local port, so this is not
5535 // a fresh socket. Assign to the client.
5536 necp_client_assign_from_socket(current_pid, inp->necp_client_uuid, inp);
5537 }
5538
5539 break;
5540 }
5541 case SO_NECP_LISTENUUID: {
5542 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
5543 error = EINVAL;
5544 goto out;
5545 }
5546
5547 struct inpcb *inp = sotoinpcb(so);
5548 if (!uuid_is_null(inp->necp_client_uuid)) {
5549 error = EINVAL;
5550 goto out;
5551 }
5552
5553 error = sooptcopyin(sopt, &inp->necp_client_uuid,
5554 sizeof(uuid_t), sizeof(uuid_t));
5555 if (error != 0) {
5556 goto out;
5557 }
5558
5559 if (uuid_is_null(inp->necp_client_uuid)) {
5560 error = EINVAL;
5561 goto out;
5562 }
5563
5564 error = necp_client_register_socket_listener(proc_pid(current_proc()),
5565 inp->necp_client_uuid, inp);
5566 if (error != 0) {
5567 uuid_clear(inp->necp_client_uuid);
5568 goto out;
5569 }
5570
5571 // Mark that the port registration is held by NECP
5572 inp->inp_flags2 |= INP2_EXTERNAL_PORT;
5573
5574 break;
5575 }
5576
5577 case SO_RESOLVER_SIGNATURE: {
5578 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
5579 error = EINVAL;
5580 goto out;
5581 }
5582 error = necp_set_socket_resolver_signature(sotoinpcb(so), sopt);
5583 break;
5584 }
5585 #endif /* NECP */
5586
5587 case SO_EXTENDED_BK_IDLE:
5588 error = sooptcopyin(sopt, &optval, sizeof(optval),
5589 sizeof(optval));
5590 if (error == 0) {
5591 error = so_set_extended_bk_idle(so, optval);
5592 }
5593 break;
5594
5595 case SO_MARK_CELLFALLBACK:
5596 error = sooptcopyin(sopt, &optval, sizeof(optval),
5597 sizeof(optval));
5598 if (error != 0) {
5599 goto out;
5600 }
5601 if (optval < 0) {
5602 error = EINVAL;
5603 goto out;
5604 }
5605 if (optval == 0) {
5606 so->so_flags1 &= ~SOF1_CELLFALLBACK;
5607 } else {
5608 so->so_flags1 |= SOF1_CELLFALLBACK;
5609 }
5610 break;
5611
5612 case SO_MARK_CELLFALLBACK_UUID:
5613 {
5614 struct so_mark_cellfallback_uuid_args args;
5615
5616 error = sooptcopyin(sopt, &args, sizeof(args),
5617 sizeof(args));
5618 if (error != 0) {
5619 goto out;
5620 }
5621 error = nstat_userland_mark_rnf_override(args.flow_uuid,
5622 args.flow_cellfallback);
5623 break;
5624 }
5625
5626 case SO_FALLBACK_MODE:
5627 error = sooptcopyin(sopt, &optval, sizeof(optval),
5628 sizeof(optval));
5629 if (error != 0) {
5630 goto out;
5631 }
5632 if (optval < SO_FALLBACK_MODE_NONE ||
5633 optval > SO_FALLBACK_MODE_PREFER) {
5634 error = EINVAL;
5635 goto out;
5636 }
5637 so->so_fallback_mode = (u_int8_t)optval;
5638 break;
5639
5640 case SO_MARK_KNOWN_TRACKER: {
5641 error = sooptcopyin(sopt, &optval, sizeof(optval),
5642 sizeof(optval));
5643 if (error != 0) {
5644 goto out;
5645 }
5646 if (optval < 0) {
5647 error = EINVAL;
5648 goto out;
5649 }
5650 if (optval == 0) {
5651 so->so_flags1 &= ~SOF1_KNOWN_TRACKER;
5652 } else {
5653 so->so_flags1 |= SOF1_KNOWN_TRACKER;
5654 }
5655 break;
5656 }
5657
5658 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED: {
5659 error = sooptcopyin(sopt, &optval, sizeof(optval),
5660 sizeof(optval));
5661 if (error != 0) {
5662 goto out;
5663 }
5664 if (optval < 0) {
5665 error = EINVAL;
5666 goto out;
5667 }
5668 if (optval == 0) {
5669 so->so_flags1 &= ~SOF1_TRACKER_NON_APP_INITIATED;
5670 } else {
5671 so->so_flags1 |= SOF1_TRACKER_NON_APP_INITIATED;
5672 }
5673 break;
5674 }
5675
5676 case SO_MARK_APPROVED_APP_DOMAIN: {
5677 error = sooptcopyin(sopt, &optval, sizeof(optval),
5678 sizeof(optval));
5679 if (error != 0) {
5680 goto out;
5681 }
5682 if (optval < 0) {
5683 error = EINVAL;
5684 goto out;
5685 }
5686 if (optval == 0) {
5687 so->so_flags1 &= ~SOF1_APPROVED_APP_DOMAIN;
5688 } else {
5689 so->so_flags1 |= SOF1_APPROVED_APP_DOMAIN;
5690 }
5691 break;
5692 }
5693
5694 case SO_STATISTICS_EVENT:
5695 error = sooptcopyin(sopt, &long_optval,
5696 sizeof(long_optval), sizeof(long_optval));
5697 if (error != 0) {
5698 goto out;
5699 }
5700 u_int64_t nstat_event = 0;
5701 error = so_statistics_event_to_nstat_event(
5702 &long_optval, &nstat_event);
5703 if (error != 0) {
5704 goto out;
5705 }
5706 nstat_pcb_event(sotoinpcb(so), nstat_event);
5707 break;
5708
5709 case SO_NET_SERVICE_TYPE: {
5710 error = sooptcopyin(sopt, &optval, sizeof(optval),
5711 sizeof(optval));
5712 if (error != 0) {
5713 goto out;
5714 }
5715 error = so_set_net_service_type(so, optval);
5716 break;
5717 }
5718
5719 case SO_QOSMARKING_POLICY_OVERRIDE:
5720 error = priv_check_cred(kauth_cred_get(),
5721 PRIV_NET_QOSMARKING_POLICY_OVERRIDE, 0);
5722 if (error != 0) {
5723 goto out;
5724 }
5725 error = sooptcopyin(sopt, &optval, sizeof(optval),
5726 sizeof(optval));
5727 if (error != 0) {
5728 goto out;
5729 }
5730 if (optval == 0) {
5731 so->so_flags1 &= ~SOF1_QOSMARKING_POLICY_OVERRIDE;
5732 } else {
5733 so->so_flags1 |= SOF1_QOSMARKING_POLICY_OVERRIDE;
5734 }
5735 break;
5736
5737 case SO_MPKL_SEND_INFO: {
5738 struct so_mpkl_send_info so_mpkl_send_info;
5739
5740 error = sooptcopyin(sopt, &so_mpkl_send_info,
5741 sizeof(struct so_mpkl_send_info), sizeof(struct so_mpkl_send_info));
5742 if (error != 0) {
5743 goto out;
5744 }
5745 uuid_copy(so->so_mpkl_send_uuid, so_mpkl_send_info.mpkl_uuid);
5746 so->so_mpkl_send_proto = so_mpkl_send_info.mpkl_proto;
5747
5748 if (uuid_is_null(so->so_mpkl_send_uuid) && so->so_mpkl_send_proto == 0) {
5749 so->so_flags1 &= ~SOF1_MPKL_SEND_INFO;
5750 } else {
5751 so->so_flags1 |= SOF1_MPKL_SEND_INFO;
5752 }
5753 break;
5754 }
5755 case SO_WANT_KEV_SOCKET_CLOSED: {
5756 error = sooptcopyin(sopt, &optval, sizeof(optval),
5757 sizeof(optval));
5758 if (error != 0) {
5759 goto out;
5760 }
5761 if (optval == 0) {
5762 so->so_flags1 &= ~SOF1_WANT_KEV_SOCK_CLOSED;
5763 } else {
5764 so->so_flags1 |= SOF1_WANT_KEV_SOCK_CLOSED;
5765 }
5766 break;
5767 }
5768 case SO_MARK_WAKE_PKT: {
5769 error = sooptcopyin(sopt, &optval, sizeof(optval),
5770 sizeof(optval));
5771 if (error != 0) {
5772 goto out;
5773 }
5774 if (optval == 0) {
5775 so->so_flags &= ~SOF_MARK_WAKE_PKT;
5776 } else {
5777 so->so_flags |= SOF_MARK_WAKE_PKT;
5778 }
5779 break;
5780 }
5781 case SO_RECV_WAKE_PKT: {
5782 error = sooptcopyin(sopt, &optval, sizeof(optval),
5783 sizeof(optval));
5784 if (error != 0) {
5785 goto out;
5786 }
5787 if (optval == 0) {
5788 so->so_flags &= ~SOF_RECV_WAKE_PKT;
5789 } else {
5790 so->so_flags |= SOF_RECV_WAKE_PKT;
5791 }
5792 break;
5793 }
5794 case SO_APPLICATION_ID: {
5795 so_application_id_t application_id = { 0 };
5796
5797 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
5798 error = EINVAL;
5799 goto out;
5800 }
5801 error = sooptcopyin(sopt, &application_id, sizeof(application_id),
5802 sizeof(application_id));
5803 if (error != 0) {
5804 goto out;
5805 }
5806
5807 // The user needs to match
5808 if (kauth_cred_getuid(so->so_cred) != application_id.uid) {
5809 error = EINVAL;
5810 printf("setsockopt: SO_APPLICATION_ID - wrong uid");
5811 goto out;
5812 }
5813 error = so_set_effective_uuid(so, application_id.effective_uuid, sopt->sopt_p, true);
5814 if (error != 0) {
5815 printf("setsockopt: SO_APPLICATION_ID - failed to set e_uuid");
5816 goto out;
5817 }
5818 if (application_id.persona_id != PERSONA_ID_NONE) {
5819 so->so_persona_id = application_id.persona_id;
5820 }
5821 break;
5822 }
5823 case SO_MARK_DOMAIN_INFO_SILENT:
5824 error = sooptcopyin(sopt, &optval, sizeof(optval),
5825 sizeof(optval));
5826 if (error != 0) {
5827 goto out;
5828 }
5829 if (optval < 0) {
5830 error = EINVAL;
5831 goto out;
5832 }
5833 if (optval == 0) {
5834 so->so_flags1 &= ~SOF1_DOMAIN_INFO_SILENT;
5835 } else {
5836 so->so_flags1 |= SOF1_DOMAIN_INFO_SILENT;
5837 }
5838 break;
5839
5840 default:
5841 error = ENOPROTOOPT;
5842 break;
5843 }
5844 if (error == 0 && so->so_proto != NULL &&
5845 so->so_proto->pr_ctloutput != NULL) {
5846 (void) so->so_proto->pr_ctloutput(so, sopt);
5847 }
5848 }
5849 out:
5850 if (dolock) {
5851 socket_unlock(so, 1);
5852 }
5853 return error;
5854 }
5855
5856 /* Helper routines for getsockopt */
5857 int
sooptcopyout(struct sockopt * sopt,void * __sized_by (len)buf,size_t len)5858 sooptcopyout(struct sockopt *sopt, void *__sized_by(len) buf, size_t len)
5859 {
5860 int error;
5861 size_t valsize;
5862
5863 error = 0;
5864
5865 /*
5866 * Documented get behavior is that we always return a value,
5867 * possibly truncated to fit in the user's buffer.
5868 * Traditional behavior is that we always tell the user
5869 * precisely how much we copied, rather than something useful
5870 * like the total amount we had available for her.
5871 * Note that this interface is not idempotent; the entire answer must
5872 * generated ahead of time.
5873 */
5874 valsize = MIN(len, sopt->sopt_valsize);
5875 sopt->sopt_valsize = valsize;
5876 if (sopt->sopt_valsize != 0 && sopt->sopt_val != USER_ADDR_NULL) {
5877 if (sopt->sopt_p != kernproc) {
5878 error = copyout(buf, sopt->sopt_val, valsize);
5879 } else {
5880 caddr_t tmp = __unsafe_forge_bidi_indexable(caddr_t,
5881 CAST_DOWN(caddr_t, sopt->sopt_val),
5882 valsize);
5883 bcopy(buf, tmp, valsize);
5884 }
5885 }
5886 return error;
5887 }
5888
5889 static int
sooptcopyout_timeval(struct sockopt * sopt,const struct timeval * tv_p)5890 sooptcopyout_timeval(struct sockopt *sopt, const struct timeval *tv_p)
5891 {
5892 int error;
5893 size_t len;
5894 struct user64_timeval tv64 = {};
5895 struct user32_timeval tv32 = {};
5896 const void * val;
5897 size_t valsize;
5898
5899 error = 0;
5900 if (proc_is64bit(sopt->sopt_p)) {
5901 len = sizeof(tv64);
5902 tv64.tv_sec = tv_p->tv_sec;
5903 tv64.tv_usec = tv_p->tv_usec;
5904 val = &tv64;
5905 } else {
5906 len = sizeof(tv32);
5907 tv32.tv_sec = (user32_time_t)tv_p->tv_sec;
5908 tv32.tv_usec = tv_p->tv_usec;
5909 val = &tv32;
5910 }
5911 valsize = MIN(len, sopt->sopt_valsize);
5912 sopt->sopt_valsize = valsize;
5913 if (sopt->sopt_val != USER_ADDR_NULL) {
5914 if (sopt->sopt_p != kernproc) {
5915 error = copyout(val, sopt->sopt_val, valsize);
5916 } else {
5917 caddr_t tmp = __unsafe_forge_bidi_indexable(caddr_t,
5918 CAST_DOWN(caddr_t, sopt->sopt_val),
5919 valsize);
5920 bcopy(val, tmp, valsize);
5921 }
5922 }
5923 return error;
5924 }
5925
5926 /*
5927 * Return: 0 Success
5928 * ENOPROTOOPT
5929 * <pr_ctloutput>:EOPNOTSUPP[AF_UNIX]
5930 * <pr_ctloutput>:???
5931 * <sf_getoption>:???
5932 */
5933 int
sogetoptlock(struct socket * so,struct sockopt * sopt,int dolock)5934 sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock)
5935 {
5936 int error, optval;
5937 struct linger l;
5938 struct timeval tv;
5939
5940 if (sopt->sopt_dir != SOPT_GET) {
5941 sopt->sopt_dir = SOPT_GET;
5942 }
5943
5944 if (dolock) {
5945 socket_lock(so, 1);
5946 }
5947
5948 error = sflt_getsockopt(so, sopt);
5949 if (error != 0) {
5950 if (error == EJUSTRETURN) {
5951 error = 0;
5952 }
5953 goto out;
5954 }
5955
5956 if (sopt->sopt_level != SOL_SOCKET || sopt->sopt_name == SO_BINDTODEVICE) {
5957 if (so->so_proto != NULL &&
5958 so->so_proto->pr_ctloutput != NULL) {
5959 error = (*so->so_proto->pr_ctloutput)(so, sopt);
5960 goto out;
5961 }
5962 error = ENOPROTOOPT;
5963 } else {
5964 /*
5965 * Allow socket-level (SOL_SOCKET) options to be filtered by
5966 * the protocol layer, if needed. A zero value returned from
5967 * the handler means use default socket-level processing as
5968 * done by the rest of this routine. Otherwise, any other
5969 * return value indicates that the option is unsupported.
5970 */
5971 if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs->
5972 pru_socheckopt(so, sopt)) != 0) {
5973 goto out;
5974 }
5975
5976 error = 0;
5977 switch (sopt->sopt_name) {
5978 case SO_LINGER:
5979 case SO_LINGER_SEC:
5980 l.l_onoff = ((so->so_options & SO_LINGER) ? 1 : 0);
5981 l.l_linger = (sopt->sopt_name == SO_LINGER) ?
5982 so->so_linger : so->so_linger / hz;
5983 error = sooptcopyout(sopt, &l, sizeof(l));
5984 break;
5985
5986 case SO_USELOOPBACK:
5987 case SO_DONTROUTE:
5988 case SO_DEBUG:
5989 case SO_KEEPALIVE:
5990 case SO_REUSEADDR:
5991 case SO_REUSEPORT:
5992 case SO_BROADCAST:
5993 case SO_OOBINLINE:
5994 case SO_TIMESTAMP:
5995 case SO_TIMESTAMP_MONOTONIC:
5996 case SO_TIMESTAMP_CONTINUOUS:
5997 case SO_DONTTRUNC:
5998 case SO_WANTMORE:
5999 case SO_WANTOOBFLAG:
6000 case SO_NOWAKEFROMSLEEP:
6001 case SO_NOAPNFALLBK:
6002 optval = so->so_options & sopt->sopt_name;
6003 integer:
6004 error = sooptcopyout(sopt, &optval, sizeof(optval));
6005 break;
6006
6007 case SO_TYPE:
6008 optval = so->so_type;
6009 goto integer;
6010
6011 case SO_NREAD:
6012 if (so->so_proto->pr_flags & PR_ATOMIC) {
6013 int pkt_total;
6014 struct mbuf *m1;
6015
6016 pkt_total = 0;
6017 m1 = so->so_rcv.sb_mb;
6018 while (m1 != NULL) {
6019 if (m_has_mtype(m1, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
6020 pkt_total += m1->m_len;
6021 }
6022 m1 = m1->m_next;
6023 }
6024 optval = pkt_total;
6025 } else {
6026 optval = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
6027 }
6028 goto integer;
6029
6030 case SO_NUMRCVPKT:
6031 if (so->so_proto->pr_flags & PR_ATOMIC) {
6032 int cnt = 0;
6033 struct mbuf *m1;
6034
6035 m1 = so->so_rcv.sb_mb;
6036 while (m1 != NULL) {
6037 cnt += 1;
6038 m1 = m1->m_nextpkt;
6039 }
6040 optval = cnt;
6041 goto integer;
6042 } else {
6043 error = ENOPROTOOPT;
6044 break;
6045 }
6046
6047 case SO_NWRITE:
6048 optval = so->so_snd.sb_cc;
6049 goto integer;
6050
6051 case SO_ERROR:
6052 optval = so->so_error;
6053 so->so_error = 0;
6054 goto integer;
6055
6056 case SO_SNDBUF: {
6057 u_int32_t hiwat = so->so_snd.sb_hiwat;
6058
6059 if (so->so_snd.sb_flags & SB_UNIX) {
6060 struct unpcb *unp =
6061 (struct unpcb *)(so->so_pcb);
6062 if (unp != NULL && unp->unp_conn != NULL) {
6063 hiwat += unp->unp_conn->unp_cc;
6064 }
6065 }
6066
6067 optval = hiwat;
6068 goto integer;
6069 }
6070 case SO_RCVBUF:
6071 optval = so->so_rcv.sb_hiwat;
6072 goto integer;
6073
6074 case SO_SNDLOWAT:
6075 optval = so->so_snd.sb_lowat;
6076 goto integer;
6077
6078 case SO_RCVLOWAT:
6079 optval = so->so_rcv.sb_lowat;
6080 goto integer;
6081
6082 case SO_SNDTIMEO:
6083 case SO_RCVTIMEO:
6084 tv = (sopt->sopt_name == SO_SNDTIMEO ?
6085 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
6086
6087 error = sooptcopyout_timeval(sopt, &tv);
6088 break;
6089
6090 case SO_NOSIGPIPE:
6091 optval = (so->so_flags & SOF_NOSIGPIPE);
6092 goto integer;
6093
6094 case SO_NOADDRERR:
6095 optval = (so->so_flags & SOF_NOADDRAVAIL);
6096 goto integer;
6097
6098 case SO_REUSESHAREUID:
6099 optval = (so->so_flags & SOF_REUSESHAREUID);
6100 goto integer;
6101
6102
6103 case SO_NOTIFYCONFLICT:
6104 optval = (so->so_flags & SOF_NOTIFYCONFLICT);
6105 goto integer;
6106
6107 case SO_RESTRICTIONS:
6108 optval = so_get_restrictions(so);
6109 goto integer;
6110
6111 case SO_AWDL_UNRESTRICTED:
6112 if (SOCK_DOM(so) == PF_INET ||
6113 SOCK_DOM(so) == PF_INET6) {
6114 optval = inp_get_awdl_unrestricted(
6115 sotoinpcb(so));
6116 goto integer;
6117 } else {
6118 error = EOPNOTSUPP;
6119 }
6120 break;
6121
6122 case SO_INTCOPROC_ALLOW:
6123 if (SOCK_DOM(so) == PF_INET6) {
6124 optval = inp_get_intcoproc_allowed(
6125 sotoinpcb(so));
6126 goto integer;
6127 } else {
6128 error = EOPNOTSUPP;
6129 }
6130 break;
6131
6132 case SO_LABEL:
6133 error = EOPNOTSUPP;
6134 break;
6135
6136 case SO_PEERLABEL:
6137 error = EOPNOTSUPP;
6138 break;
6139
6140 #ifdef __APPLE_API_PRIVATE
6141 case SO_UPCALLCLOSEWAIT:
6142 optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
6143 goto integer;
6144 #endif
6145 case SO_RANDOMPORT:
6146 optval = (so->so_flags & SOF_BINDRANDOMPORT);
6147 goto integer;
6148
6149 case SO_NP_EXTENSIONS: {
6150 struct so_np_extensions sonpx = {};
6151
6152 sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ?
6153 SONPX_SETOPTSHUT : 0;
6154 sonpx.npx_mask = SONPX_MASK_VALID;
6155
6156 error = sooptcopyout(sopt, &sonpx,
6157 sizeof(struct so_np_extensions));
6158 break;
6159 }
6160
6161 case SO_TRAFFIC_CLASS:
6162 optval = so->so_traffic_class;
6163 goto integer;
6164
6165 case SO_RECV_TRAFFIC_CLASS:
6166 optval = (so->so_flags & SOF_RECV_TRAFFIC_CLASS);
6167 goto integer;
6168
6169 #if (DEVELOPMENT || DEBUG)
6170 case SO_TRAFFIC_CLASS_DBG:
6171 error = sogetopt_tcdbg(so, sopt);
6172 break;
6173 #endif /* (DEVELOPMENT || DEBUG) */
6174
6175 case SO_PRIVILEGED_TRAFFIC_CLASS:
6176 optval = (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS);
6177 goto integer;
6178
6179 case SO_DEFUNCTOK:
6180 optval = !(so->so_flags & SOF_NODEFUNCT);
6181 goto integer;
6182
6183 case SO_ISDEFUNCT:
6184 optval = (so->so_flags & SOF_DEFUNCT);
6185 goto integer;
6186
6187 case SO_OPPORTUNISTIC:
6188 optval = so_get_opportunistic(so);
6189 goto integer;
6190
6191 case SO_FLUSH:
6192 /* This option is not gettable */
6193 error = EINVAL;
6194 break;
6195
6196 case SO_RECV_ANYIF:
6197 optval = so_get_recv_anyif(so);
6198 goto integer;
6199
6200 case SO_TRAFFIC_MGT_BACKGROUND:
6201 /* This option is handled by lower layer(s) */
6202 if (so->so_proto != NULL &&
6203 so->so_proto->pr_ctloutput != NULL) {
6204 (void) so->so_proto->pr_ctloutput(so, sopt);
6205 }
6206 break;
6207
6208 #if FLOW_DIVERT
6209 case SO_FLOW_DIVERT_TOKEN:
6210 error = flow_divert_token_get(so, sopt);
6211 break;
6212 #endif /* FLOW_DIVERT */
6213
6214 #if NECP
6215 case SO_NECP_ATTRIBUTES:
6216 if (SOCK_DOM(so) == PF_MULTIPATH) {
6217 /* Handled by MPTCP itself */
6218 break;
6219 }
6220
6221 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
6222 error = EINVAL;
6223 goto out;
6224 }
6225
6226 error = necp_get_socket_attributes(&sotoinpcb(so)->inp_necp_attributes, sopt);
6227 break;
6228
6229 case SO_NECP_CLIENTUUID: {
6230 uuid_t *ncu;
6231
6232 if (SOCK_DOM(so) == PF_MULTIPATH) {
6233 ncu = &mpsotomppcb(so)->necp_client_uuid;
6234 } else if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
6235 ncu = &sotoinpcb(so)->necp_client_uuid;
6236 } else {
6237 error = EINVAL;
6238 goto out;
6239 }
6240
6241 error = sooptcopyout(sopt, ncu, sizeof(uuid_t));
6242 break;
6243 }
6244
6245 case SO_NECP_LISTENUUID: {
6246 uuid_t *nlu;
6247
6248 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
6249 if (sotoinpcb(so)->inp_flags2 & INP2_EXTERNAL_PORT) {
6250 nlu = &sotoinpcb(so)->necp_client_uuid;
6251 } else {
6252 error = ENOENT;
6253 goto out;
6254 }
6255 } else {
6256 error = EINVAL;
6257 goto out;
6258 }
6259
6260 error = sooptcopyout(sopt, nlu, sizeof(uuid_t));
6261 break;
6262 }
6263
6264 case SO_RESOLVER_SIGNATURE: {
6265 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
6266 error = EINVAL;
6267 goto out;
6268 }
6269 error = necp_get_socket_resolver_signature(sotoinpcb(so), sopt);
6270 break;
6271 }
6272
6273 #endif /* NECP */
6274
6275 #if CONTENT_FILTER
6276 case SO_CFIL_SOCK_ID: {
6277 cfil_sock_id_t sock_id;
6278
6279 sock_id = cfil_sock_id_from_socket(so);
6280
6281 error = sooptcopyout(sopt, &sock_id,
6282 sizeof(cfil_sock_id_t));
6283 break;
6284 }
6285 #endif /* CONTENT_FILTER */
6286
6287 case SO_EXTENDED_BK_IDLE:
6288 optval = (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED);
6289 goto integer;
6290 case SO_MARK_CELLFALLBACK:
6291 optval = ((so->so_flags1 & SOF1_CELLFALLBACK) > 0)
6292 ? 1 : 0;
6293 goto integer;
6294 case SO_FALLBACK_MODE:
6295 optval = so->so_fallback_mode;
6296 goto integer;
6297 case SO_MARK_KNOWN_TRACKER: {
6298 optval = ((so->so_flags1 & SOF1_KNOWN_TRACKER) > 0)
6299 ? 1 : 0;
6300 goto integer;
6301 }
6302 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED: {
6303 optval = ((so->so_flags1 & SOF1_TRACKER_NON_APP_INITIATED) > 0)
6304 ? 1 : 0;
6305 goto integer;
6306 }
6307 case SO_MARK_APPROVED_APP_DOMAIN: {
6308 optval = ((so->so_flags1 & SOF1_APPROVED_APP_DOMAIN) > 0)
6309 ? 1 : 0;
6310 goto integer;
6311 }
6312 case SO_NET_SERVICE_TYPE: {
6313 if ((so->so_flags1 & SOF1_TC_NET_SERV_TYPE)) {
6314 optval = so->so_netsvctype;
6315 } else {
6316 optval = NET_SERVICE_TYPE_BE;
6317 }
6318 goto integer;
6319 }
6320 case SO_NETSVC_MARKING_LEVEL:
6321 optval = so_get_netsvc_marking_level(so);
6322 goto integer;
6323
6324 case SO_MPKL_SEND_INFO: {
6325 struct so_mpkl_send_info so_mpkl_send_info;
6326
6327 uuid_copy(so_mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid);
6328 so_mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto;
6329 error = sooptcopyout(sopt, &so_mpkl_send_info,
6330 sizeof(struct so_mpkl_send_info));
6331 break;
6332 }
6333 case SO_MARK_WAKE_PKT:
6334 optval = (so->so_flags & SOF_MARK_WAKE_PKT);
6335 goto integer;
6336 case SO_RECV_WAKE_PKT:
6337 optval = (so->so_flags & SOF_RECV_WAKE_PKT);
6338 goto integer;
6339 case SO_APPLICATION_ID: {
6340 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
6341 error = EINVAL;
6342 goto out;
6343 }
6344 so_application_id_t application_id = { 0 };
6345 application_id.uid = kauth_cred_getuid(so->so_cred);
6346 uuid_copy(application_id.effective_uuid, !uuid_is_null(so->e_uuid) ? so->e_uuid : so->last_uuid);
6347 application_id.persona_id = so->so_persona_id;
6348 error = sooptcopyout(sopt, &application_id, sizeof(so_application_id_t));
6349 break;
6350 }
6351 case SO_MARK_DOMAIN_INFO_SILENT:
6352 optval = ((so->so_flags1 & SOF1_DOMAIN_INFO_SILENT) > 0)
6353 ? 1 : 0;
6354 goto integer;
6355 default:
6356 error = ENOPROTOOPT;
6357 break;
6358 }
6359 }
6360 out:
6361 if (dolock) {
6362 socket_unlock(so, 1);
6363 }
6364 return error;
6365 }
6366
6367 /*
6368 * The size limits on our soopt_getm is different from that on FreeBSD.
6369 * We limit the size of options to MCLBYTES. This will have to change
6370 * if we need to define options that need more space than MCLBYTES.
6371 */
6372 int
soopt_getm(struct sockopt * sopt,struct mbuf ** mp)6373 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
6374 {
6375 struct mbuf *m, *m_prev;
6376 int sopt_size = (int)sopt->sopt_valsize;
6377 int how;
6378
6379 if (sopt_size <= 0 || sopt_size > MCLBYTES) {
6380 return EMSGSIZE;
6381 }
6382
6383 how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT;
6384 MGET(m, how, MT_DATA);
6385 if (m == NULL) {
6386 return ENOBUFS;
6387 }
6388 if (sopt_size > MLEN) {
6389 MCLGET(m, how);
6390 if ((m->m_flags & M_EXT) == 0) {
6391 m_free(m);
6392 return ENOBUFS;
6393 }
6394 m->m_len = min(MCLBYTES, sopt_size);
6395 } else {
6396 m->m_len = min(MLEN, sopt_size);
6397 }
6398 sopt_size -= m->m_len;
6399 *mp = m;
6400 m_prev = m;
6401
6402 while (sopt_size > 0) {
6403 MGET(m, how, MT_DATA);
6404 if (m == NULL) {
6405 m_freem(*mp);
6406 return ENOBUFS;
6407 }
6408 if (sopt_size > MLEN) {
6409 MCLGET(m, how);
6410 if ((m->m_flags & M_EXT) == 0) {
6411 m_freem(*mp);
6412 m_freem(m);
6413 return ENOBUFS;
6414 }
6415 m->m_len = min(MCLBYTES, sopt_size);
6416 } else {
6417 m->m_len = min(MLEN, sopt_size);
6418 }
6419 sopt_size -= m->m_len;
6420 m_prev->m_next = m;
6421 m_prev = m;
6422 }
6423 return 0;
6424 }
6425
6426 /* copyin sopt data into mbuf chain */
6427 int
soopt_mcopyin(struct sockopt * sopt,struct mbuf * m)6428 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
6429 {
6430 struct mbuf *m0 = m;
6431
6432 if (sopt->sopt_val == USER_ADDR_NULL) {
6433 return 0;
6434 }
6435 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
6436 if (sopt->sopt_p != kernproc) {
6437 int error;
6438
6439 error = copyin(sopt->sopt_val, mtod(m, char *),
6440 m->m_len);
6441 if (error != 0) {
6442 m_freem(m0);
6443 return error;
6444 }
6445 } else {
6446 caddr_t tmp = __unsafe_forge_bidi_indexable(caddr_t,
6447 CAST_DOWN(caddr_t, sopt->sopt_val),
6448 m->m_len);
6449 bcopy(tmp, mtod(m, char *), m->m_len);
6450 }
6451 sopt->sopt_valsize -= m->m_len;
6452 sopt->sopt_val += m->m_len;
6453 m = m->m_next;
6454 }
6455 /* should be allocated enoughly at ip6_sooptmcopyin() */
6456 if (m != NULL) {
6457 panic("soopt_mcopyin");
6458 /* NOTREACHED */
6459 }
6460 return 0;
6461 }
6462
6463 /* copyout mbuf chain data into soopt */
6464 int
soopt_mcopyout(struct sockopt * sopt,struct mbuf * m)6465 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
6466 {
6467 struct mbuf *m0 = m;
6468 size_t valsize = 0;
6469
6470 if (sopt->sopt_val == USER_ADDR_NULL) {
6471 return 0;
6472 }
6473 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
6474 if (sopt->sopt_p != kernproc) {
6475 int error;
6476
6477 error = copyout(mtod(m, char *), sopt->sopt_val,
6478 m->m_len);
6479 if (error != 0) {
6480 m_freem(m0);
6481 return error;
6482 }
6483 } else {
6484 caddr_t tmp = __unsafe_forge_bidi_indexable(caddr_t,
6485 CAST_DOWN(caddr_t, sopt->sopt_val),
6486 m->m_len);
6487
6488 bcopy(mtod(m, char *), tmp, m->m_len);
6489 }
6490 sopt->sopt_valsize -= m->m_len;
6491 sopt->sopt_val += m->m_len;
6492 valsize += m->m_len;
6493 m = m->m_next;
6494 }
6495 if (m != NULL) {
6496 /* enough soopt buffer should be given from user-land */
6497 m_freem(m0);
6498 return EINVAL;
6499 }
6500 sopt->sopt_valsize = valsize;
6501 return 0;
6502 }
6503
6504 void
sohasoutofband(struct socket * so)6505 sohasoutofband(struct socket *so)
6506 {
6507 if (so->so_pgid < 0) {
6508 gsignal(-so->so_pgid, SIGURG);
6509 } else if (so->so_pgid > 0) {
6510 proc_signal(so->so_pgid, SIGURG);
6511 }
6512 selwakeup(&so->so_rcv.sb_sel);
6513 if (so->so_rcv.sb_flags & SB_KNOTE) {
6514 KNOTE(&so->so_rcv.sb_sel.si_note,
6515 (NOTE_OOB | SO_FILT_HINT_LOCKED));
6516 }
6517 }
6518
6519 int
sopoll(struct socket * so,int events,kauth_cred_t cred,void * wql)6520 sopoll(struct socket *so, int events, kauth_cred_t cred, void * wql)
6521 {
6522 #pragma unused(cred)
6523 struct proc *p = current_proc();
6524 int revents = 0;
6525
6526 socket_lock(so, 1);
6527 so_update_last_owner_locked(so, PROC_NULL);
6528 so_update_policy(so);
6529
6530 if (events & (POLLIN | POLLRDNORM)) {
6531 if (soreadable(so)) {
6532 revents |= events & (POLLIN | POLLRDNORM);
6533 }
6534 }
6535
6536 if (events & (POLLOUT | POLLWRNORM)) {
6537 if (sowriteable(so)) {
6538 revents |= events & (POLLOUT | POLLWRNORM);
6539 }
6540 }
6541
6542 if (events & (POLLPRI | POLLRDBAND)) {
6543 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
6544 revents |= events & (POLLPRI | POLLRDBAND);
6545 }
6546 }
6547
6548 if (revents == 0) {
6549 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
6550 /*
6551 * Darwin sets the flag first,
6552 * BSD calls selrecord first
6553 */
6554 so->so_rcv.sb_flags |= SB_SEL;
6555 selrecord(p, &so->so_rcv.sb_sel, wql);
6556 }
6557
6558 if (events & (POLLOUT | POLLWRNORM)) {
6559 /*
6560 * Darwin sets the flag first,
6561 * BSD calls selrecord first
6562 */
6563 so->so_snd.sb_flags |= SB_SEL;
6564 selrecord(p, &so->so_snd.sb_sel, wql);
6565 }
6566 }
6567
6568 socket_unlock(so, 1);
6569 return revents;
6570 }
6571
6572 int
soo_kqfilter(struct fileproc * fp,struct knote * kn,struct kevent_qos_s * kev)6573 soo_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev)
6574 {
6575 struct socket *so = (struct socket *)fp_get_data(fp);
6576 int result;
6577
6578 socket_lock(so, 1);
6579 so_update_last_owner_locked(so, PROC_NULL);
6580 so_update_policy(so);
6581
6582 switch (kn->kn_filter) {
6583 case EVFILT_READ:
6584 kn->kn_filtid = EVFILTID_SOREAD;
6585 break;
6586 case EVFILT_WRITE:
6587 kn->kn_filtid = EVFILTID_SOWRITE;
6588 break;
6589 case EVFILT_SOCK:
6590 kn->kn_filtid = EVFILTID_SCK;
6591 break;
6592 case EVFILT_EXCEPT:
6593 kn->kn_filtid = EVFILTID_SOEXCEPT;
6594 break;
6595 default:
6596 socket_unlock(so, 1);
6597 knote_set_error(kn, EINVAL);
6598 return 0;
6599 }
6600
6601 /*
6602 * call the appropriate sub-filter attach
6603 * with the socket still locked
6604 */
6605 result = knote_fops(kn)->f_attach(kn, kev);
6606
6607 socket_unlock(so, 1);
6608
6609 return result;
6610 }
6611
6612 static int
filt_soread_common(struct knote * kn,struct kevent_qos_s * kev,struct socket * so)6613 filt_soread_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so)
6614 {
6615 int retval = 0;
6616 int64_t data = 0;
6617
6618 if (so->so_options & SO_ACCEPTCONN) {
6619 /*
6620 * Radar 6615193 handle the listen case dynamically
6621 * for kqueue read filter. This allows to call listen()
6622 * after registering the kqueue EVFILT_READ.
6623 */
6624
6625 retval = !TAILQ_EMPTY(&so->so_comp);
6626 data = so->so_qlen;
6627 goto out;
6628 }
6629
6630 /* socket isn't a listener */
6631 /*
6632 * NOTE_LOWAT specifies new low water mark in data, i.e.
6633 * the bytes of protocol data. We therefore exclude any
6634 * control bytes.
6635 */
6636 data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
6637
6638 if (kn->kn_sfflags & NOTE_OOB) {
6639 if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
6640 kn->kn_fflags |= NOTE_OOB;
6641 data -= so->so_oobmark;
6642 retval = 1;
6643 goto out;
6644 }
6645 }
6646
6647 if ((so->so_state & SS_CANTRCVMORE)
6648 #if CONTENT_FILTER
6649 && cfil_sock_data_pending(&so->so_rcv) == 0
6650 #endif /* CONTENT_FILTER */
6651 ) {
6652 kn->kn_flags |= EV_EOF;
6653 kn->kn_fflags = so->so_error;
6654 retval = 1;
6655 goto out;
6656 }
6657
6658 if (so->so_error) { /* temporary udp error */
6659 retval = 1;
6660 goto out;
6661 }
6662
6663 int64_t lowwat = so->so_rcv.sb_lowat;
6664 /*
6665 * Ensure that when NOTE_LOWAT is used, the derived
6666 * low water mark is bounded by socket's rcv buf's
6667 * high and low water mark values.
6668 */
6669 if (kn->kn_sfflags & NOTE_LOWAT) {
6670 if (kn->kn_sdata > so->so_rcv.sb_hiwat) {
6671 lowwat = so->so_rcv.sb_hiwat;
6672 } else if (kn->kn_sdata > lowwat) {
6673 lowwat = kn->kn_sdata;
6674 }
6675 }
6676
6677 /*
6678 * While the `data` field is the amount of data to read,
6679 * 0-sized packets need to wake up the kqueue, see 58140856,
6680 * so we need to take control bytes into account too.
6681 */
6682 retval = (so->so_rcv.sb_cc >= lowwat);
6683
6684 out:
6685 if (retval && kev) {
6686 knote_fill_kevent(kn, kev, data);
6687 }
6688 return retval;
6689 }
6690
6691 static int
filt_sorattach(struct knote * kn,__unused struct kevent_qos_s * kev)6692 filt_sorattach(struct knote *kn, __unused struct kevent_qos_s *kev)
6693 {
6694 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6695
6696 /* socket locked */
6697
6698 /*
6699 * If the caller explicitly asked for OOB results (e.g. poll())
6700 * from EVFILT_READ, then save that off in the hookid field
6701 * and reserve the kn_flags EV_OOBAND bit for output only.
6702 */
6703 if (kn->kn_filter == EVFILT_READ &&
6704 kn->kn_flags & EV_OOBAND) {
6705 kn->kn_flags &= ~EV_OOBAND;
6706 kn->kn_hook32 = EV_OOBAND;
6707 } else {
6708 kn->kn_hook32 = 0;
6709 }
6710 if (KNOTE_ATTACH(&so->so_rcv.sb_sel.si_note, kn)) {
6711 so->so_rcv.sb_flags |= SB_KNOTE;
6712 }
6713
6714 /* indicate if event is already fired */
6715 return filt_soread_common(kn, NULL, so);
6716 }
6717
6718 static void
filt_sordetach(struct knote * kn)6719 filt_sordetach(struct knote *kn)
6720 {
6721 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6722
6723 socket_lock(so, 1);
6724 if (so->so_rcv.sb_flags & SB_KNOTE) {
6725 if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn)) {
6726 so->so_rcv.sb_flags &= ~SB_KNOTE;
6727 }
6728 }
6729 socket_unlock(so, 1);
6730 }
6731
6732 /*ARGSUSED*/
6733 static int
filt_soread(struct knote * kn,long hint)6734 filt_soread(struct knote *kn, long hint)
6735 {
6736 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6737 int retval;
6738
6739 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6740 socket_lock(so, 1);
6741 }
6742
6743 retval = filt_soread_common(kn, NULL, so);
6744
6745 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6746 socket_unlock(so, 1);
6747 }
6748
6749 return retval;
6750 }
6751
6752 static int
filt_sortouch(struct knote * kn,struct kevent_qos_s * kev)6753 filt_sortouch(struct knote *kn, struct kevent_qos_s *kev)
6754 {
6755 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6756 int retval;
6757
6758 socket_lock(so, 1);
6759
6760 /* save off the new input fflags and data */
6761 kn->kn_sfflags = kev->fflags;
6762 kn->kn_sdata = kev->data;
6763
6764 /* determine if changes result in fired events */
6765 retval = filt_soread_common(kn, NULL, so);
6766
6767 socket_unlock(so, 1);
6768
6769 return retval;
6770 }
6771
6772 static int
filt_sorprocess(struct knote * kn,struct kevent_qos_s * kev)6773 filt_sorprocess(struct knote *kn, struct kevent_qos_s *kev)
6774 {
6775 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6776 int retval;
6777
6778 socket_lock(so, 1);
6779 retval = filt_soread_common(kn, kev, so);
6780 socket_unlock(so, 1);
6781
6782 return retval;
6783 }
6784
6785 int
so_wait_for_if_feedback(struct socket * so)6786 so_wait_for_if_feedback(struct socket *so)
6787 {
6788 if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) &&
6789 (so->so_state & SS_ISCONNECTED)) {
6790 struct inpcb *inp = sotoinpcb(so);
6791 if (INP_WAIT_FOR_IF_FEEDBACK(inp)) {
6792 return 1;
6793 }
6794 }
6795 return 0;
6796 }
6797
6798 static int
filt_sowrite_common(struct knote * kn,struct kevent_qos_s * kev,struct socket * so)6799 filt_sowrite_common(struct knote *kn, struct kevent_qos_s *kev, struct socket *so)
6800 {
6801 int ret = 0;
6802 int64_t data = sbspace(&so->so_snd);
6803
6804 if (so->so_state & SS_CANTSENDMORE) {
6805 kn->kn_flags |= EV_EOF;
6806 kn->kn_fflags = so->so_error;
6807 ret = 1;
6808 goto out;
6809 }
6810
6811 if (so->so_error) { /* temporary udp error */
6812 ret = 1;
6813 goto out;
6814 }
6815
6816 if (!socanwrite(so)) {
6817 ret = 0;
6818 goto out;
6819 }
6820
6821 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
6822 ret = 1;
6823 goto out;
6824 }
6825
6826 int64_t lowwat = so->so_snd.sb_lowat;
6827 const int64_t hiwat = so->so_snd.sb_hiwat;
6828 /*
6829 * Deal with connected UNIX domain sockets which
6830 * rely on the fact that the sender's socket buffer is
6831 * actually the receiver's socket buffer.
6832 */
6833 if (SOCK_DOM(so) == PF_LOCAL) {
6834 struct unpcb *unp = sotounpcb(so);
6835 if (unp != NULL && unp->unp_conn != NULL &&
6836 unp->unp_conn->unp_socket != NULL) {
6837 struct socket *so2 = unp->unp_conn->unp_socket;
6838 /*
6839 * At this point we know that `so' is locked
6840 * and that `unp_conn` isn't going to change.
6841 * However, we don't lock `so2` because doing so
6842 * may require unlocking `so'
6843 * (see unp_get_locks_in_order()).
6844 *
6845 * Two cases can happen:
6846 *
6847 * 1) we return 1 and tell the application that
6848 * it can write. Meanwhile, another thread
6849 * fills up the socket buffer. This will either
6850 * lead to a blocking send or EWOULDBLOCK
6851 * which the application should deal with.
6852 * 2) we return 0 and tell the application that
6853 * the socket is not writable. Meanwhile,
6854 * another thread depletes the receive socket
6855 * buffer. In this case the application will
6856 * be woken up by sb_notify().
6857 *
6858 * MIN() is required because otherwise sosendcheck()
6859 * may return EWOULDBLOCK since it only considers
6860 * so->so_snd.
6861 */
6862 data = MIN(data, sbspace(&so2->so_rcv));
6863 }
6864 }
6865
6866 if (kn->kn_sfflags & NOTE_LOWAT) {
6867 if (kn->kn_sdata > hiwat) {
6868 lowwat = hiwat;
6869 } else if (kn->kn_sdata > lowwat) {
6870 lowwat = kn->kn_sdata;
6871 }
6872 }
6873
6874 if (data > 0 && data >= lowwat) {
6875 if ((so->so_flags & SOF_NOTSENT_LOWAT)
6876 #if (DEBUG || DEVELOPMENT)
6877 && so_notsent_lowat_check == 1
6878 #endif /* DEBUG || DEVELOPMENT */
6879 ) {
6880 if ((SOCK_DOM(so) == PF_INET ||
6881 SOCK_DOM(so) == PF_INET6) &&
6882 so->so_type == SOCK_STREAM) {
6883 ret = tcp_notsent_lowat_check(so);
6884 }
6885 #if MPTCP
6886 else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
6887 (SOCK_PROTO(so) == IPPROTO_TCP)) {
6888 ret = mptcp_notsent_lowat_check(so);
6889 }
6890 #endif
6891 else {
6892 ret = 1;
6893 goto out;
6894 }
6895 } else {
6896 ret = 1;
6897 }
6898 }
6899 if (so_wait_for_if_feedback(so)) {
6900 ret = 0;
6901 }
6902
6903 out:
6904 if (ret && kev) {
6905 knote_fill_kevent(kn, kev, data);
6906 }
6907 return ret;
6908 }
6909
6910 static int
filt_sowattach(struct knote * kn,__unused struct kevent_qos_s * kev)6911 filt_sowattach(struct knote *kn, __unused struct kevent_qos_s *kev)
6912 {
6913 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6914
6915 /* socket locked */
6916 if (KNOTE_ATTACH(&so->so_snd.sb_sel.si_note, kn)) {
6917 so->so_snd.sb_flags |= SB_KNOTE;
6918 }
6919
6920 /* determine if its already fired */
6921 return filt_sowrite_common(kn, NULL, so);
6922 }
6923
6924 static void
filt_sowdetach(struct knote * kn)6925 filt_sowdetach(struct knote *kn)
6926 {
6927 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6928 socket_lock(so, 1);
6929
6930 if (so->so_snd.sb_flags & SB_KNOTE) {
6931 if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn)) {
6932 so->so_snd.sb_flags &= ~SB_KNOTE;
6933 }
6934 }
6935 socket_unlock(so, 1);
6936 }
6937
6938 /*ARGSUSED*/
6939 static int
filt_sowrite(struct knote * kn,long hint)6940 filt_sowrite(struct knote *kn, long hint)
6941 {
6942 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6943 int ret;
6944
6945 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6946 socket_lock(so, 1);
6947 }
6948
6949 ret = filt_sowrite_common(kn, NULL, so);
6950
6951 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
6952 socket_unlock(so, 1);
6953 }
6954
6955 return ret;
6956 }
6957
6958 static int
filt_sowtouch(struct knote * kn,struct kevent_qos_s * kev)6959 filt_sowtouch(struct knote *kn, struct kevent_qos_s *kev)
6960 {
6961 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6962 int ret;
6963
6964 socket_lock(so, 1);
6965
6966 /*save off the new input fflags and data */
6967 kn->kn_sfflags = kev->fflags;
6968 kn->kn_sdata = kev->data;
6969
6970 /* determine if these changes result in a triggered event */
6971 ret = filt_sowrite_common(kn, NULL, so);
6972
6973 socket_unlock(so, 1);
6974
6975 return ret;
6976 }
6977
6978 static int
filt_sowprocess(struct knote * kn,struct kevent_qos_s * kev)6979 filt_sowprocess(struct knote *kn, struct kevent_qos_s *kev)
6980 {
6981 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
6982 int ret;
6983
6984 socket_lock(so, 1);
6985 ret = filt_sowrite_common(kn, kev, so);
6986 socket_unlock(so, 1);
6987
6988 return ret;
6989 }
6990
6991 static int
filt_sockev_common(struct knote * kn,struct kevent_qos_s * kev,struct socket * so,long ev_hint)6992 filt_sockev_common(struct knote *kn, struct kevent_qos_s *kev,
6993 struct socket *so, long ev_hint)
6994 {
6995 int ret = 0;
6996 int64_t data = 0;
6997 uint32_t level_trigger = 0;
6998
6999 if (ev_hint & SO_FILT_HINT_CONNRESET) {
7000 kn->kn_fflags |= NOTE_CONNRESET;
7001 }
7002 if (ev_hint & SO_FILT_HINT_TIMEOUT) {
7003 kn->kn_fflags |= NOTE_TIMEOUT;
7004 }
7005 if (ev_hint & SO_FILT_HINT_NOSRCADDR) {
7006 kn->kn_fflags |= NOTE_NOSRCADDR;
7007 }
7008 if (ev_hint & SO_FILT_HINT_IFDENIED) {
7009 kn->kn_fflags |= NOTE_IFDENIED;
7010 }
7011 if (ev_hint & SO_FILT_HINT_KEEPALIVE) {
7012 kn->kn_fflags |= NOTE_KEEPALIVE;
7013 }
7014 if (ev_hint & SO_FILT_HINT_ADAPTIVE_WTIMO) {
7015 kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO;
7016 }
7017 if (ev_hint & SO_FILT_HINT_ADAPTIVE_RTIMO) {
7018 kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO;
7019 }
7020 if ((ev_hint & SO_FILT_HINT_CONNECTED) ||
7021 (so->so_state & SS_ISCONNECTED)) {
7022 kn->kn_fflags |= NOTE_CONNECTED;
7023 level_trigger |= NOTE_CONNECTED;
7024 }
7025 if ((ev_hint & SO_FILT_HINT_DISCONNECTED) ||
7026 (so->so_state & SS_ISDISCONNECTED)) {
7027 kn->kn_fflags |= NOTE_DISCONNECTED;
7028 level_trigger |= NOTE_DISCONNECTED;
7029 }
7030 if (ev_hint & SO_FILT_HINT_CONNINFO_UPDATED) {
7031 if (so->so_proto != NULL &&
7032 (so->so_proto->pr_flags & PR_EVCONNINFO)) {
7033 kn->kn_fflags |= NOTE_CONNINFO_UPDATED;
7034 }
7035 }
7036 if ((ev_hint & SO_FILT_HINT_NOTIFY_ACK) ||
7037 tcp_notify_ack_active(so)) {
7038 kn->kn_fflags |= NOTE_NOTIFY_ACK;
7039 }
7040 if (ev_hint & SO_FILT_HINT_WAKE_PKT) {
7041 kn->kn_fflags |= NOTE_WAKE_PKT;
7042 }
7043
7044 if ((so->so_state & SS_CANTRCVMORE)
7045 #if CONTENT_FILTER
7046 && cfil_sock_data_pending(&so->so_rcv) == 0
7047 #endif /* CONTENT_FILTER */
7048 ) {
7049 kn->kn_fflags |= NOTE_READCLOSED;
7050 level_trigger |= NOTE_READCLOSED;
7051 }
7052
7053 if (so->so_state & SS_CANTSENDMORE) {
7054 kn->kn_fflags |= NOTE_WRITECLOSED;
7055 level_trigger |= NOTE_WRITECLOSED;
7056 }
7057
7058 if ((ev_hint & SO_FILT_HINT_SUSPEND) ||
7059 (so->so_flags & SOF_SUSPENDED)) {
7060 kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
7061
7062 /* If resume event was delivered before, reset it */
7063 kn->kn_hook32 &= ~NOTE_RESUME;
7064
7065 kn->kn_fflags |= NOTE_SUSPEND;
7066 level_trigger |= NOTE_SUSPEND;
7067 }
7068
7069 if ((ev_hint & SO_FILT_HINT_RESUME) ||
7070 (so->so_flags & SOF_SUSPENDED) == 0) {
7071 kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME);
7072
7073 /* If suspend event was delivered before, reset it */
7074 kn->kn_hook32 &= ~NOTE_SUSPEND;
7075
7076 kn->kn_fflags |= NOTE_RESUME;
7077 level_trigger |= NOTE_RESUME;
7078 }
7079
7080 if (so->so_error != 0) {
7081 ret = 1;
7082 data = so->so_error;
7083 kn->kn_flags |= EV_EOF;
7084 } else {
7085 u_int32_t data32 = 0;
7086 get_sockev_state(so, &data32);
7087 data = data32;
7088 }
7089
7090 /* Reset any events that are not requested on this knote */
7091 kn->kn_fflags &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
7092 level_trigger &= (kn->kn_sfflags & EVFILT_SOCK_ALL_MASK);
7093
7094 /* Find the level triggerred events that are already delivered */
7095 level_trigger &= kn->kn_hook32;
7096 level_trigger &= EVFILT_SOCK_LEVEL_TRIGGER_MASK;
7097
7098 /* Do not deliver level triggerred events more than once */
7099 if ((kn->kn_fflags & ~level_trigger) != 0) {
7100 ret = 1;
7101 }
7102
7103 if (ret && kev) {
7104 /*
7105 * Store the state of the events being delivered. This
7106 * state can be used to deliver level triggered events
7107 * ateast once and still avoid waking up the application
7108 * multiple times as long as the event is active.
7109 */
7110 if (kn->kn_fflags != 0) {
7111 kn->kn_hook32 |= (kn->kn_fflags &
7112 EVFILT_SOCK_LEVEL_TRIGGER_MASK);
7113 }
7114
7115 /*
7116 * NOTE_RESUME and NOTE_SUSPEND are an exception, deliver
7117 * only one of them and remember the last one that was
7118 * delivered last
7119 */
7120 if (kn->kn_fflags & NOTE_SUSPEND) {
7121 kn->kn_hook32 &= ~NOTE_RESUME;
7122 }
7123 if (kn->kn_fflags & NOTE_RESUME) {
7124 kn->kn_hook32 &= ~NOTE_SUSPEND;
7125 }
7126
7127 knote_fill_kevent(kn, kev, data);
7128 }
7129 return ret;
7130 }
7131
7132 static int
filt_sockattach(struct knote * kn,__unused struct kevent_qos_s * kev)7133 filt_sockattach(struct knote *kn, __unused struct kevent_qos_s *kev)
7134 {
7135 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
7136
7137 /* socket locked */
7138 kn->kn_hook32 = 0;
7139 if (KNOTE_ATTACH(&so->so_klist, kn)) {
7140 so->so_flags |= SOF_KNOTE;
7141 }
7142
7143 /* determine if event already fired */
7144 return filt_sockev_common(kn, NULL, so, 0);
7145 }
7146
7147 static void
filt_sockdetach(struct knote * kn)7148 filt_sockdetach(struct knote *kn)
7149 {
7150 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
7151 socket_lock(so, 1);
7152
7153 if ((so->so_flags & SOF_KNOTE) != 0) {
7154 if (KNOTE_DETACH(&so->so_klist, kn)) {
7155 so->so_flags &= ~SOF_KNOTE;
7156 }
7157 }
7158 socket_unlock(so, 1);
7159 }
7160
7161 static int
filt_sockev(struct knote * kn,long hint)7162 filt_sockev(struct knote *kn, long hint)
7163 {
7164 int ret = 0, locked = 0;
7165 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
7166 long ev_hint = (hint & SO_FILT_HINT_EV);
7167
7168 if ((hint & SO_FILT_HINT_LOCKED) == 0) {
7169 socket_lock(so, 1);
7170 locked = 1;
7171 }
7172
7173 ret = filt_sockev_common(kn, NULL, so, ev_hint);
7174
7175 if (locked) {
7176 socket_unlock(so, 1);
7177 }
7178
7179 return ret;
7180 }
7181
7182
7183
7184 /*
7185 * filt_socktouch - update event state
7186 */
7187 static int
filt_socktouch(struct knote * kn,struct kevent_qos_s * kev)7188 filt_socktouch(
7189 struct knote *kn,
7190 struct kevent_qos_s *kev)
7191 {
7192 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
7193 uint32_t changed_flags;
7194 int ret;
7195
7196 socket_lock(so, 1);
7197
7198 /* save off the [result] data and fflags */
7199 changed_flags = (kn->kn_sfflags ^ kn->kn_hook32);
7200
7201 /* save off the new input fflags and data */
7202 kn->kn_sfflags = kev->fflags;
7203 kn->kn_sdata = kev->data;
7204
7205 /* restrict the current results to the (smaller?) set of new interest */
7206 /*
7207 * For compatibility with previous implementations, we leave kn_fflags
7208 * as they were before.
7209 */
7210 //kn->kn_fflags &= kev->fflags;
7211
7212 /*
7213 * Since we keep track of events that are already
7214 * delivered, if any of those events are not requested
7215 * anymore the state related to them can be reset
7216 */
7217 kn->kn_hook32 &= ~(changed_flags & EVFILT_SOCK_LEVEL_TRIGGER_MASK);
7218
7219 /* determine if we have events to deliver */
7220 ret = filt_sockev_common(kn, NULL, so, 0);
7221
7222 socket_unlock(so, 1);
7223
7224 return ret;
7225 }
7226
7227 /*
7228 * filt_sockprocess - query event fired state and return data
7229 */
7230 static int
filt_sockprocess(struct knote * kn,struct kevent_qos_s * kev)7231 filt_sockprocess(struct knote *kn, struct kevent_qos_s *kev)
7232 {
7233 struct socket *so = (struct socket *)fp_get_data(kn->kn_fp);
7234 int ret = 0;
7235
7236 socket_lock(so, 1);
7237
7238 ret = filt_sockev_common(kn, kev, so, 0);
7239
7240 socket_unlock(so, 1);
7241
7242 return ret;
7243 }
7244
7245 void
get_sockev_state(struct socket * so,u_int32_t * statep)7246 get_sockev_state(struct socket *so, u_int32_t *statep)
7247 {
7248 u_int32_t state = *(statep);
7249
7250 /*
7251 * If the state variable is already used by a previous event,
7252 * reset it.
7253 */
7254 if (state != 0) {
7255 return;
7256 }
7257
7258 if (so->so_state & SS_ISCONNECTED) {
7259 state |= SOCKEV_CONNECTED;
7260 } else {
7261 state &= ~(SOCKEV_CONNECTED);
7262 }
7263 state |= ((so->so_state & SS_ISDISCONNECTED) ? SOCKEV_DISCONNECTED : 0);
7264 *(statep) = state;
7265 }
7266
7267 #define SO_LOCK_HISTORY_STR_LEN \
7268 (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1)
7269
7270 __private_extern__ const char *
solockhistory_nr(struct socket * so)7271 solockhistory_nr(struct socket *so)
7272 {
7273 size_t n = 0;
7274 int i;
7275 static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
7276
7277 bzero(lock_history_str, sizeof(lock_history_str));
7278 for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
7279 n += scnprintf(lock_history_str + n,
7280 SO_LOCK_HISTORY_STR_LEN - n, "%p:%p ",
7281 so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
7282 so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]);
7283 }
7284 return __unsafe_null_terminated_from_indexable(lock_history_str);
7285 }
7286
7287 lck_mtx_t *
socket_getlock(struct socket * so,int flags)7288 socket_getlock(struct socket *so, int flags)
7289 {
7290 if (so->so_proto->pr_getlock != NULL) {
7291 return (*so->so_proto->pr_getlock)(so, flags);
7292 } else {
7293 return so->so_proto->pr_domain->dom_mtx;
7294 }
7295 }
7296
7297 void
socket_lock(struct socket * so,int refcount)7298 socket_lock(struct socket *so, int refcount)
7299 {
7300 void *__single lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
7301
7302 if (so->so_proto->pr_lock) {
7303 (*so->so_proto->pr_lock)(so, refcount, lr_saved);
7304 } else {
7305 #ifdef MORE_LOCKING_DEBUG
7306 LCK_MTX_ASSERT(so->so_proto->pr_domain->dom_mtx,
7307 LCK_MTX_ASSERT_NOTOWNED);
7308 #endif
7309 lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
7310 if (refcount) {
7311 so->so_usecount++;
7312 }
7313 so->lock_lr[so->next_lock_lr] = lr_saved;
7314 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
7315 }
7316 }
7317
7318 void
socket_lock_assert_owned(struct socket * so)7319 socket_lock_assert_owned(struct socket *so)
7320 {
7321 lck_mtx_t *mutex_held;
7322
7323 if (so->so_proto->pr_getlock != NULL) {
7324 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
7325 } else {
7326 mutex_held = so->so_proto->pr_domain->dom_mtx;
7327 }
7328
7329 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
7330 }
7331
7332 int
socket_try_lock(struct socket * so)7333 socket_try_lock(struct socket *so)
7334 {
7335 lck_mtx_t *mtx;
7336
7337 if (so->so_proto->pr_getlock != NULL) {
7338 mtx = (*so->so_proto->pr_getlock)(so, 0);
7339 } else {
7340 mtx = so->so_proto->pr_domain->dom_mtx;
7341 }
7342
7343 return lck_mtx_try_lock(mtx);
7344 }
7345
7346 void
socket_unlock(struct socket * so,int refcount)7347 socket_unlock(struct socket *so, int refcount)
7348 {
7349 lck_mtx_t *mutex_held;
7350 void *__single lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
7351
7352 if (so == NULL || so->so_proto == NULL) {
7353 panic("%s: null so_proto so=%p", __func__, so);
7354 /* NOTREACHED */
7355 }
7356
7357 if (so->so_proto->pr_unlock) {
7358 (*so->so_proto->pr_unlock)(so, refcount, lr_saved);
7359 } else {
7360 mutex_held = so->so_proto->pr_domain->dom_mtx;
7361 #ifdef MORE_LOCKING_DEBUG
7362 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
7363 #endif
7364 so->unlock_lr[so->next_unlock_lr] = lr_saved;
7365 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
7366
7367 if (refcount) {
7368 if (so->so_usecount <= 0) {
7369 panic("%s: bad refcount=%d so=%p (%d, %d, %d) "
7370 "lrh=%s", __func__, so->so_usecount, so,
7371 SOCK_DOM(so), so->so_type,
7372 SOCK_PROTO(so), solockhistory_nr(so));
7373 /* NOTREACHED */
7374 }
7375
7376 so->so_usecount--;
7377 if (so->so_usecount == 0) {
7378 sofreelastref(so, 1);
7379 }
7380 }
7381 lck_mtx_unlock(mutex_held);
7382 }
7383 }
7384
7385 /* Called with socket locked, will unlock socket */
7386 void
sofree(struct socket * so)7387 sofree(struct socket *so)
7388 {
7389 lck_mtx_t *mutex_held;
7390
7391 if (so->so_proto->pr_getlock != NULL) {
7392 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
7393 } else {
7394 mutex_held = so->so_proto->pr_domain->dom_mtx;
7395 }
7396 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
7397
7398 sofreelastref(so, 0);
7399 }
7400
7401 void
soreference(struct socket * so)7402 soreference(struct socket *so)
7403 {
7404 socket_lock(so, 1); /* locks & take one reference on socket */
7405 socket_unlock(so, 0); /* unlock only */
7406 }
7407
7408 void
sodereference(struct socket * so)7409 sodereference(struct socket *so)
7410 {
7411 socket_lock(so, 0);
7412 socket_unlock(so, 1);
7413 }
7414
7415 /*
7416 * Set or clear SOF_MULTIPAGES on the socket to enable or disable the
7417 * possibility of using jumbo clusters. Caller must ensure to hold
7418 * the socket lock.
7419 */
7420 void
somultipages(struct socket * so,boolean_t set)7421 somultipages(struct socket *so, boolean_t set)
7422 {
7423 if (set) {
7424 so->so_flags |= SOF_MULTIPAGES;
7425 } else {
7426 so->so_flags &= ~SOF_MULTIPAGES;
7427 }
7428 }
7429
7430 void
soif2kcl(struct socket * so,boolean_t set)7431 soif2kcl(struct socket *so, boolean_t set)
7432 {
7433 if (set) {
7434 so->so_flags1 |= SOF1_IF_2KCL;
7435 } else {
7436 so->so_flags1 &= ~SOF1_IF_2KCL;
7437 }
7438 }
7439
7440 int
so_isdstlocal(struct socket * so)7441 so_isdstlocal(struct socket *so)
7442 {
7443 struct inpcb *inp = (struct inpcb *)so->so_pcb;
7444
7445 if (SOCK_DOM(so) == PF_INET) {
7446 return inaddr_local(inp->inp_faddr);
7447 } else if (SOCK_DOM(so) == PF_INET6) {
7448 return in6addr_local(&inp->in6p_faddr);
7449 }
7450
7451 return 0;
7452 }
7453
7454 int
sosetdefunct(struct proc * p,struct socket * so,int level,boolean_t noforce)7455 sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
7456 {
7457 struct sockbuf *rcv, *snd;
7458 int err = 0, defunct;
7459
7460 rcv = &so->so_rcv;
7461 snd = &so->so_snd;
7462
7463 defunct = (so->so_flags & SOF_DEFUNCT);
7464 if (defunct) {
7465 if (!(snd->sb_flags & rcv->sb_flags & SB_DROP)) {
7466 panic("%s: SB_DROP not set", __func__);
7467 /* NOTREACHED */
7468 }
7469 goto done;
7470 }
7471
7472 if (so->so_flags & SOF_NODEFUNCT) {
7473 if (noforce) {
7474 err = EOPNOTSUPP;
7475 if (p != PROC_NULL) {
7476 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7477 "name %s level %d) so 0x%llu [%d,%d] "
7478 "is not eligible for defunct "
7479 "(%d)\n", __func__, proc_selfpid(),
7480 proc_best_name(current_proc()), proc_pid(p),
7481 proc_best_name(p), level,
7482 so->so_gencnt,
7483 SOCK_DOM(so), SOCK_TYPE(so), err);
7484 }
7485 return err;
7486 }
7487 so->so_flags &= ~SOF_NODEFUNCT;
7488 if (p != PROC_NULL) {
7489 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7490 "name %s level %d) so 0x%llu [%d,%d] "
7491 "defunct by force "
7492 "(%d)\n", __func__, proc_selfpid(),
7493 proc_best_name(current_proc()), proc_pid(p),
7494 proc_best_name(p), level,
7495 so->so_gencnt,
7496 SOCK_DOM(so), SOCK_TYPE(so), err);
7497 }
7498 } else if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
7499 struct inpcb *inp = (struct inpcb *)so->so_pcb;
7500 struct ifnet *ifp = inp->inp_last_outifp;
7501
7502 if (ifp && IFNET_IS_CELLULAR(ifp)) {
7503 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nocell);
7504 } else if (so->so_flags & SOF_DELEGATED) {
7505 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
7506 } else if (soextbkidlestat.so_xbkidle_time == 0) {
7507 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_notime);
7508 } else if (noforce && p != PROC_NULL) {
7509 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_active);
7510
7511 so->so_flags1 |= SOF1_EXTEND_BK_IDLE_INPROG;
7512 so->so_extended_bk_start = net_uptime();
7513 OSBitOrAtomic(P_LXBKIDLEINPROG, &p->p_ladvflag);
7514
7515 inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
7516
7517 err = EOPNOTSUPP;
7518 SODEFUNCTLOG("%s[%d, %s]: (target pid %d "
7519 "name %s level %d) so 0x%llu [%d,%d] "
7520 "extend bk idle "
7521 "(%d)\n", __func__, proc_selfpid(),
7522 proc_best_name(current_proc()), proc_pid(p),
7523 proc_best_name(p), level,
7524 so->so_gencnt,
7525 SOCK_DOM(so), SOCK_TYPE(so), err);
7526 return err;
7527 } else {
7528 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_forced);
7529 }
7530 }
7531
7532 so->so_flags |= SOF_DEFUNCT;
7533
7534 /* Prevent further data from being appended to the socket buffers */
7535 snd->sb_flags |= SB_DROP;
7536 rcv->sb_flags |= SB_DROP;
7537
7538 /* Flush any existing data in the socket buffers */
7539 if (rcv->sb_cc != 0) {
7540 rcv->sb_flags &= ~SB_SEL;
7541 selthreadclear(&rcv->sb_sel);
7542 sbrelease(rcv);
7543 }
7544 if (snd->sb_cc != 0) {
7545 snd->sb_flags &= ~SB_SEL;
7546 selthreadclear(&snd->sb_sel);
7547 sbrelease(snd);
7548 }
7549
7550 done:
7551 if (p != PROC_NULL) {
7552 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
7553 "so 0x%llu [%d,%d] %s defunct%s\n", __func__,
7554 proc_selfpid(), proc_best_name(current_proc()),
7555 proc_pid(p), proc_best_name(p), level,
7556 so->so_gencnt, SOCK_DOM(so),
7557 SOCK_TYPE(so), defunct ? "is already" : "marked as",
7558 (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ?
7559 " extbkidle" : "");
7560 }
7561 return err;
7562 }
7563
7564 int
sodefunct(struct proc * p,struct socket * so,int level)7565 sodefunct(struct proc *p, struct socket *so, int level)
7566 {
7567 struct sockbuf *rcv, *snd;
7568
7569 if (!(so->so_flags & SOF_DEFUNCT)) {
7570 panic("%s improperly called", __func__);
7571 /* NOTREACHED */
7572 }
7573 if (so->so_state & SS_DEFUNCT) {
7574 goto done;
7575 }
7576
7577 rcv = &so->so_rcv;
7578 snd = &so->so_snd;
7579
7580 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7581 char s[MAX_IPv6_STR_LEN];
7582 char d[MAX_IPv6_STR_LEN];
7583 struct inpcb *inp = sotoinpcb(so);
7584
7585 if (p != PROC_NULL) {
7586 SODEFUNCTLOG(
7587 "%s[%d, %s]: (target pid %d name %s level %d) "
7588 "so 0x%llu [%s %s:%d -> %s:%d] is now defunct "
7589 "[rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, "
7590 " snd_fl 0x%x]\n", __func__,
7591 proc_selfpid(), proc_best_name(current_proc()),
7592 proc_pid(p), proc_best_name(p), level,
7593 so->so_gencnt,
7594 (SOCK_TYPE(so) == SOCK_STREAM) ? "TCP" : "UDP",
7595 inet_ntop(SOCK_DOM(so), ((SOCK_DOM(so) == PF_INET) ?
7596 (void *)&inp->inp_laddr.s_addr :
7597 (void *)&inp->in6p_laddr),
7598 s, sizeof(s)), ntohs(inp->in6p_lport),
7599 inet_ntop(SOCK_DOM(so), (SOCK_DOM(so) == PF_INET) ?
7600 (void *)&inp->inp_faddr.s_addr :
7601 (void *)&inp->in6p_faddr,
7602 d, sizeof(d)), ntohs(inp->in6p_fport),
7603 (uint32_t)rcv->sb_sel.si_flags,
7604 (uint32_t)snd->sb_sel.si_flags,
7605 rcv->sb_flags, snd->sb_flags);
7606 }
7607 } else if (p != PROC_NULL) {
7608 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s level %d) "
7609 "so 0x%llu [%d,%d] is now defunct [rcv_si 0x%x, "
7610 "snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n", __func__,
7611 proc_selfpid(), proc_best_name(current_proc()),
7612 proc_pid(p), proc_best_name(p), level,
7613 so->so_gencnt,
7614 SOCK_DOM(so), SOCK_TYPE(so),
7615 (uint32_t)rcv->sb_sel.si_flags,
7616 (uint32_t)snd->sb_sel.si_flags, rcv->sb_flags,
7617 snd->sb_flags);
7618 }
7619
7620 /*
7621 * First tell the protocol the flow is defunct
7622 */
7623 (void) (*so->so_proto->pr_usrreqs->pru_defunct)(so);
7624
7625 /*
7626 * Unwedge threads blocked on sbwait() and sb_lock().
7627 */
7628 sbwakeup(rcv);
7629 sbwakeup(snd);
7630
7631 so->so_flags1 |= SOF1_DEFUNCTINPROG;
7632 if (rcv->sb_flags & SB_LOCK) {
7633 sbunlock(rcv, TRUE); /* keep socket locked */
7634 }
7635 if (snd->sb_flags & SB_LOCK) {
7636 sbunlock(snd, TRUE); /* keep socket locked */
7637 }
7638 /*
7639 * Flush the buffers and disconnect. We explicitly call shutdown
7640 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
7641 * states are set for the socket. This would also flush out data
7642 * hanging off the receive list of this socket.
7643 */
7644 (void) soshutdownlock_final(so, SHUT_RD);
7645 (void) soshutdownlock_final(so, SHUT_WR);
7646 (void) sodisconnectlocked(so);
7647
7648 /*
7649 * Explicitly handle connectionless-protocol disconnection
7650 * and release any remaining data in the socket buffers.
7651 */
7652 if (!(so->so_state & SS_ISDISCONNECTED)) {
7653 (void) soisdisconnected(so);
7654 }
7655
7656 if (so->so_error == 0) {
7657 so->so_error = EBADF;
7658 }
7659
7660 if (rcv->sb_cc != 0) {
7661 rcv->sb_flags &= ~SB_SEL;
7662 selthreadclear(&rcv->sb_sel);
7663 sbrelease(rcv);
7664 }
7665 if (snd->sb_cc != 0) {
7666 snd->sb_flags &= ~SB_SEL;
7667 selthreadclear(&snd->sb_sel);
7668 sbrelease(snd);
7669 }
7670 so->so_state |= SS_DEFUNCT;
7671 OSIncrementAtomicLong((volatile long *)&sodefunct_calls);
7672
7673 done:
7674 return 0;
7675 }
7676
7677 int
soresume(struct proc * p,struct socket * so,int locked)7678 soresume(struct proc *p, struct socket *so, int locked)
7679 {
7680 if (locked == 0) {
7681 socket_lock(so, 1);
7682 }
7683
7684 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG) {
7685 SODEFUNCTLOG("%s[%d, %s]: (target pid %d name %s) so 0x%llu "
7686 "[%d,%d] resumed from bk idle\n",
7687 __func__, proc_selfpid(), proc_best_name(current_proc()),
7688 proc_pid(p), proc_best_name(p),
7689 so->so_gencnt,
7690 SOCK_DOM(so), SOCK_TYPE(so));
7691
7692 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
7693 so->so_extended_bk_start = 0;
7694 OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
7695
7696 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resumed);
7697 OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
7698 VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
7699 }
7700 if (locked == 0) {
7701 socket_unlock(so, 1);
7702 }
7703
7704 return 0;
7705 }
7706
7707 /*
7708 * Does not attempt to account for sockets that are delegated from
7709 * the current process
7710 */
7711 int
so_set_extended_bk_idle(struct socket * so,int optval)7712 so_set_extended_bk_idle(struct socket *so, int optval)
7713 {
7714 int error = 0;
7715
7716 if ((SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) ||
7717 SOCK_PROTO(so) != IPPROTO_TCP) {
7718 OSDecrementAtomic(&soextbkidlestat.so_xbkidle_notsupp);
7719 error = EOPNOTSUPP;
7720 } else if (optval == 0) {
7721 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_WANTED;
7722
7723 soresume(current_proc(), so, 1);
7724 } else {
7725 struct proc *p = current_proc();
7726 struct fileproc *fp;
7727 int count = 0;
7728
7729 /*
7730 * Unlock socket to avoid lock ordering issue with
7731 * the proc fd table lock
7732 */
7733 socket_unlock(so, 0);
7734
7735 proc_fdlock(p);
7736 fdt_foreach(fp, p) {
7737 struct socket *so2;
7738
7739 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
7740 continue;
7741 }
7742
7743 so2 = (struct socket *)fp_get_data(fp);
7744 if (so != so2 &&
7745 so2->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
7746 count++;
7747 }
7748 if (count >= soextbkidlestat.so_xbkidle_maxperproc) {
7749 break;
7750 }
7751 }
7752 proc_fdunlock(p);
7753
7754 socket_lock(so, 0);
7755
7756 if (count >= soextbkidlestat.so_xbkidle_maxperproc) {
7757 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_toomany);
7758 error = EBUSY;
7759 } else if (so->so_flags & SOF_DELEGATED) {
7760 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_nodlgtd);
7761 error = EBUSY;
7762 } else {
7763 so->so_flags1 |= SOF1_EXTEND_BK_IDLE_WANTED;
7764 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_wantok);
7765 }
7766 SODEFUNCTLOG("%s[%d, %s]: so 0x%llu [%d,%d] "
7767 "%s marked for extended bk idle\n",
7768 __func__, proc_selfpid(), proc_best_name(current_proc()),
7769 so->so_gencnt,
7770 SOCK_DOM(so), SOCK_TYPE(so),
7771 (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ?
7772 "is" : "not");
7773 }
7774
7775 return error;
7776 }
7777
7778 static void
so_stop_extended_bk_idle(struct socket * so)7779 so_stop_extended_bk_idle(struct socket *so)
7780 {
7781 so->so_flags1 &= ~SOF1_EXTEND_BK_IDLE_INPROG;
7782 so->so_extended_bk_start = 0;
7783
7784 OSDecrementAtomic(&soextbkidlestat.so_xbkidle_active);
7785 VERIFY(soextbkidlestat.so_xbkidle_active >= 0);
7786 /*
7787 * Force defunct
7788 */
7789 sosetdefunct(current_proc(), so,
7790 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
7791 if (so->so_flags & SOF_DEFUNCT) {
7792 sodefunct(current_proc(), so,
7793 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
7794 }
7795 }
7796
7797 void
so_drain_extended_bk_idle(struct socket * so)7798 so_drain_extended_bk_idle(struct socket *so)
7799 {
7800 if (so && (so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
7801 /*
7802 * Only penalize sockets that have outstanding data
7803 */
7804 if (so->so_rcv.sb_cc || so->so_snd.sb_cc) {
7805 so_stop_extended_bk_idle(so);
7806
7807 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_drained);
7808 }
7809 }
7810 }
7811
7812 /*
7813 * Return values tells if socket is still in extended background idle
7814 */
7815 int
so_check_extended_bk_idle_time(struct socket * so)7816 so_check_extended_bk_idle_time(struct socket *so)
7817 {
7818 int ret = 1;
7819
7820 if ((so->so_flags1 & SOF1_EXTEND_BK_IDLE_INPROG)) {
7821 SODEFUNCTLOG("%s[%d, %s]: so 0x%llu [%d,%d]\n",
7822 __func__, proc_selfpid(), proc_best_name(current_proc()),
7823 so->so_gencnt,
7824 SOCK_DOM(so), SOCK_TYPE(so));
7825 if (net_uptime() - so->so_extended_bk_start >
7826 soextbkidlestat.so_xbkidle_time) {
7827 so_stop_extended_bk_idle(so);
7828
7829 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_expired);
7830
7831 ret = 0;
7832 } else {
7833 struct inpcb *inp = (struct inpcb *)so->so_pcb;
7834
7835 inpcb_timer_sched(inp->inp_pcbinfo, INPCB_TIMER_LAZY);
7836 OSIncrementAtomic(&soextbkidlestat.so_xbkidle_resched);
7837 }
7838 }
7839
7840 return ret;
7841 }
7842
7843 void
resume_proc_sockets(proc_t p)7844 resume_proc_sockets(proc_t p)
7845 {
7846 if (p->p_ladvflag & P_LXBKIDLEINPROG) {
7847 struct fileproc *fp;
7848 struct socket *so;
7849
7850 proc_fdlock(p);
7851 fdt_foreach(fp, p) {
7852 if (FILEGLOB_DTYPE(fp->fp_glob) != DTYPE_SOCKET) {
7853 continue;
7854 }
7855
7856 so = (struct socket *)fp_get_data(fp);
7857 (void) soresume(p, so, 0);
7858 }
7859 proc_fdunlock(p);
7860
7861 OSBitAndAtomic(~P_LXBKIDLEINPROG, &p->p_ladvflag);
7862 }
7863 }
7864
7865 __private_extern__ int
so_set_recv_anyif(struct socket * so,int optval)7866 so_set_recv_anyif(struct socket *so, int optval)
7867 {
7868 int ret = 0;
7869
7870 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7871 if (optval) {
7872 sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF;
7873 } else {
7874 sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF;
7875 }
7876 #if SKYWALK
7877 inp_update_netns_flags(so);
7878 #endif /* SKYWALK */
7879 }
7880
7881
7882 return ret;
7883 }
7884
7885 __private_extern__ int
so_get_recv_anyif(struct socket * so)7886 so_get_recv_anyif(struct socket *so)
7887 {
7888 int ret = 0;
7889
7890 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7891 ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0;
7892 }
7893
7894 return ret;
7895 }
7896
7897 int
so_set_restrictions(struct socket * so,uint32_t vals)7898 so_set_restrictions(struct socket *so, uint32_t vals)
7899 {
7900 int nocell_old, nocell_new;
7901 int noexpensive_old, noexpensive_new;
7902 int noconstrained_old, noconstrained_new;
7903
7904 /*
7905 * Deny-type restrictions are trapdoors; once set they cannot be
7906 * unset for the lifetime of the socket. This allows them to be
7907 * issued by a framework on behalf of the application without
7908 * having to worry that they can be undone.
7909 *
7910 * Note here that socket-level restrictions overrides any protocol
7911 * level restrictions. For instance, SO_RESTRICT_DENY_CELLULAR
7912 * socket restriction issued on the socket has a higher precendence
7913 * than INP_NO_IFT_CELLULAR. The latter is affected by the UUID
7914 * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only,
7915 * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued.
7916 */
7917 nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
7918 noexpensive_old = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
7919 noconstrained_old = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED);
7920 so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN |
7921 SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR |
7922 SO_RESTRICT_DENY_EXPENSIVE | SO_RESTRICT_DENY_CONSTRAINED));
7923 nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR);
7924 noexpensive_new = (so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE);
7925 noconstrained_new = (so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED);
7926
7927 /* we can only set, not clear restrictions */
7928 if ((nocell_new - nocell_old) == 0 &&
7929 (noexpensive_new - noexpensive_old) == 0 &&
7930 (noconstrained_new - noconstrained_old) == 0) {
7931 return 0;
7932 }
7933 if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) {
7934 if (nocell_new - nocell_old != 0) {
7935 /*
7936 * if deny cellular is now set, do what's needed
7937 * for INPCB
7938 */
7939 inp_set_nocellular(sotoinpcb(so));
7940 }
7941 if (noexpensive_new - noexpensive_old != 0) {
7942 inp_set_noexpensive(sotoinpcb(so));
7943 }
7944 if (noconstrained_new - noconstrained_old != 0) {
7945 inp_set_noconstrained(sotoinpcb(so));
7946 }
7947 }
7948
7949 if (SOCK_DOM(so) == PF_MULTIPATH) {
7950 mptcp_set_restrictions(so);
7951 }
7952
7953 return 0;
7954 }
7955
7956 uint32_t
so_get_restrictions(struct socket * so)7957 so_get_restrictions(struct socket *so)
7958 {
7959 return so->so_restrictions & (SO_RESTRICT_DENY_IN |
7960 SO_RESTRICT_DENY_OUT |
7961 SO_RESTRICT_DENY_CELLULAR | SO_RESTRICT_DENY_EXPENSIVE);
7962 }
7963
7964 int
so_set_effective_pid(struct socket * so,int epid,struct proc * p,boolean_t check_cred)7965 so_set_effective_pid(struct socket *so, int epid, struct proc *p, boolean_t check_cred)
7966 {
7967 struct proc *ep = PROC_NULL;
7968 int error = 0;
7969
7970 /* pid 0 is reserved for kernel */
7971 if (epid == 0) {
7972 error = EINVAL;
7973 goto done;
7974 }
7975
7976 /*
7977 * If this is an in-kernel socket, prevent its delegate
7978 * association from changing unless the socket option is
7979 * coming from within the kernel itself.
7980 */
7981 if (so->last_pid == 0 && p != kernproc) {
7982 error = EACCES;
7983 goto done;
7984 }
7985
7986 /*
7987 * If this is issued by a process that's recorded as the
7988 * real owner of the socket, or if the pid is the same as
7989 * the process's own pid, then proceed. Otherwise ensure
7990 * that the issuing process has the necessary privileges.
7991 */
7992 if (check_cred && (epid != so->last_pid || epid != proc_pid(p))) {
7993 if ((error = priv_check_cred(kauth_cred_get(),
7994 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
7995 error = EACCES;
7996 goto done;
7997 }
7998 }
7999
8000 /* Find the process that corresponds to the effective pid */
8001 if ((ep = proc_find(epid)) == PROC_NULL) {
8002 error = ESRCH;
8003 goto done;
8004 }
8005
8006 /*
8007 * If a process tries to delegate the socket to itself, then
8008 * there's really nothing to do; treat it as a way for the
8009 * delegate association to be cleared. Note that we check
8010 * the passed-in proc rather than calling proc_selfpid(),
8011 * as we need to check the process issuing the socket option
8012 * which could be kernproc. Given that we don't allow 0 for
8013 * effective pid, it means that a delegated in-kernel socket
8014 * stays delegated during its lifetime (which is probably OK.)
8015 */
8016 if (epid == proc_pid(p)) {
8017 so->so_flags &= ~SOF_DELEGATED;
8018 so->e_upid = 0;
8019 so->e_pid = 0;
8020 uuid_clear(so->e_uuid);
8021 } else {
8022 so->so_flags |= SOF_DELEGATED;
8023 so->e_upid = proc_uniqueid(ep);
8024 so->e_pid = proc_pid(ep);
8025 proc_getexecutableuuid(ep, so->e_uuid, sizeof(so->e_uuid));
8026
8027 #if defined(XNU_TARGET_OS_OSX)
8028 if (ep->p_responsible_pid != so->e_pid) {
8029 proc_t rp = proc_find(ep->p_responsible_pid);
8030 if (rp != PROC_NULL) {
8031 proc_getexecutableuuid(rp, so->so_ruuid, sizeof(so->so_ruuid));
8032 so->so_rpid = ep->p_responsible_pid;
8033 proc_rele(rp);
8034 } else {
8035 uuid_clear(so->so_ruuid);
8036 so->so_rpid = -1;
8037 }
8038 }
8039 #endif
8040 }
8041 if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
8042 (*so->so_proto->pr_update_last_owner)(so, NULL, ep);
8043 }
8044 done:
8045 if (error == 0 && net_io_policy_log) {
8046 uuid_string_t buf;
8047
8048 uuid_unparse(so->e_uuid, buf);
8049 log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
8050 "euuid %s%s\n", __func__, proc_name_address(p),
8051 proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
8052 SOCK_DOM(so), SOCK_TYPE(so),
8053 so->e_pid, proc_name_address(ep), buf,
8054 ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
8055 } else if (error != 0 && net_io_policy_log) {
8056 log(LOG_ERR, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) "
8057 "ERROR (%d)\n", __func__, proc_name_address(p),
8058 proc_pid(p), (uint64_t)DEBUG_KERNEL_ADDRPERM(so),
8059 SOCK_DOM(so), SOCK_TYPE(so),
8060 epid, (ep == PROC_NULL) ? "PROC_NULL" :
8061 proc_name_address(ep), error);
8062 }
8063
8064 /* Update this socket's policy upon success */
8065 if (error == 0) {
8066 so->so_policy_gencnt *= -1;
8067 so_update_policy(so);
8068 #if NECP
8069 so_update_necp_policy(so, NULL, NULL);
8070 #endif /* NECP */
8071 }
8072
8073 if (ep != PROC_NULL) {
8074 proc_rele(ep);
8075 }
8076
8077 return error;
8078 }
8079
8080 int
so_set_effective_uuid(struct socket * so,uuid_t euuid,struct proc * p,boolean_t check_cred)8081 so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p, boolean_t check_cred)
8082 {
8083 uuid_string_t buf;
8084 uuid_t uuid;
8085 int error = 0;
8086
8087 /* UUID must not be all-zeroes (reserved for kernel) */
8088 if (uuid_is_null(euuid)) {
8089 error = EINVAL;
8090 goto done;
8091 }
8092
8093 /*
8094 * If this is an in-kernel socket, prevent its delegate
8095 * association from changing unless the socket option is
8096 * coming from within the kernel itself.
8097 */
8098 if (so->last_pid == 0 && p != kernproc) {
8099 error = EACCES;
8100 goto done;
8101 }
8102
8103 /* Get the UUID of the issuing process */
8104 proc_getexecutableuuid(p, uuid, sizeof(uuid));
8105
8106 /*
8107 * If this is issued by a process that's recorded as the
8108 * real owner of the socket, or if the uuid is the same as
8109 * the process's own uuid, then proceed. Otherwise ensure
8110 * that the issuing process has the necessary privileges.
8111 */
8112 if (check_cred &&
8113 (uuid_compare(euuid, so->last_uuid) != 0 ||
8114 uuid_compare(euuid, uuid) != 0)) {
8115 if ((error = priv_check_cred(kauth_cred_get(),
8116 PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) {
8117 error = EACCES;
8118 goto done;
8119 }
8120 }
8121
8122 /*
8123 * If a process tries to delegate the socket to itself, then
8124 * there's really nothing to do; treat it as a way for the
8125 * delegate association to be cleared. Note that we check
8126 * the uuid of the passed-in proc rather than that of the
8127 * current process, as we need to check the process issuing
8128 * the socket option which could be kernproc itself. Given
8129 * that we don't allow 0 for effective uuid, it means that
8130 * a delegated in-kernel socket stays delegated during its
8131 * lifetime (which is okay.)
8132 */
8133 if (uuid_compare(euuid, uuid) == 0) {
8134 so->so_flags &= ~SOF_DELEGATED;
8135 so->e_upid = 0;
8136 so->e_pid = 0;
8137 uuid_clear(so->e_uuid);
8138 } else {
8139 so->so_flags |= SOF_DELEGATED;
8140 /*
8141 * Unlike so_set_effective_pid(), we only have the UUID
8142 * here and the process ID is not known. Inherit the
8143 * real {pid,upid} of the socket.
8144 */
8145 so->e_upid = so->last_upid;
8146 so->e_pid = so->last_pid;
8147 uuid_copy(so->e_uuid, euuid);
8148 }
8149 /*
8150 * The following will clear the effective process name as it's the same
8151 * as the real process
8152 */
8153 if (so->so_proto != NULL && so->so_proto->pr_update_last_owner != NULL) {
8154 (*so->so_proto->pr_update_last_owner)(so, NULL, NULL);
8155 }
8156 done:
8157 if (error == 0 && net_io_policy_log) {
8158 uuid_unparse(so->e_uuid, buf);
8159 log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d "
8160 "euuid %s%s\n", __func__, proc_name_address(p), proc_pid(p),
8161 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
8162 SOCK_TYPE(so), so->e_pid, buf,
8163 ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : ""));
8164 } else if (error != 0 && net_io_policy_log) {
8165 uuid_unparse(euuid, buf);
8166 log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s "
8167 "ERROR (%d)\n", __func__, proc_name_address(p), proc_pid(p),
8168 (uint64_t)DEBUG_KERNEL_ADDRPERM(so), SOCK_DOM(so),
8169 SOCK_TYPE(so), buf, error);
8170 }
8171
8172 /* Update this socket's policy upon success */
8173 if (error == 0) {
8174 so->so_policy_gencnt *= -1;
8175 so_update_policy(so);
8176 #if NECP
8177 so_update_necp_policy(so, NULL, NULL);
8178 #endif /* NECP */
8179 }
8180
8181 return error;
8182 }
8183
8184 void
netpolicy_post_msg(uint32_t ev_code,struct netpolicy_event_data * ev_data,uint32_t ev_datalen)8185 netpolicy_post_msg(uint32_t ev_code, struct netpolicy_event_data *ev_data,
8186 uint32_t ev_datalen)
8187 {
8188 struct kev_msg ev_msg;
8189
8190 /*
8191 * A netpolicy event always starts with a netpolicy_event_data
8192 * structure, but the caller can provide for a longer event
8193 * structure to post, depending on the event code.
8194 */
8195 VERIFY(ev_data != NULL && ev_datalen >= sizeof(*ev_data));
8196
8197 bzero(&ev_msg, sizeof(ev_msg));
8198 ev_msg.vendor_code = KEV_VENDOR_APPLE;
8199 ev_msg.kev_class = KEV_NETWORK_CLASS;
8200 ev_msg.kev_subclass = KEV_NETPOLICY_SUBCLASS;
8201 ev_msg.event_code = ev_code;
8202
8203 ev_msg.dv[0].data_ptr = ev_data;
8204 ev_msg.dv[0].data_length = ev_datalen;
8205
8206 kev_post_msg(&ev_msg);
8207 }
8208
8209 void
socket_post_kev_msg(uint32_t ev_code,struct kev_socket_event_data * ev_data,uint32_t ev_datalen)8210 socket_post_kev_msg(uint32_t ev_code,
8211 struct kev_socket_event_data *ev_data,
8212 uint32_t ev_datalen)
8213 {
8214 struct kev_msg ev_msg;
8215
8216 bzero(&ev_msg, sizeof(ev_msg));
8217 ev_msg.vendor_code = KEV_VENDOR_APPLE;
8218 ev_msg.kev_class = KEV_NETWORK_CLASS;
8219 ev_msg.kev_subclass = KEV_SOCKET_SUBCLASS;
8220 ev_msg.event_code = ev_code;
8221
8222 ev_msg.dv[0].data_ptr = ev_data;
8223 ev_msg.dv[0].data_length = ev_datalen;
8224
8225 kev_post_msg(&ev_msg);
8226 }
8227
8228 void
socket_post_kev_msg_closed(struct socket * so)8229 socket_post_kev_msg_closed(struct socket *so)
8230 {
8231 struct kev_socket_closed ev = {};
8232 struct sockaddr *__single socksa = NULL, *__single peersa = NULL;
8233 int err;
8234
8235 if ((so->so_flags1 & SOF1_WANT_KEV_SOCK_CLOSED) == 0) {
8236 return;
8237 }
8238 err = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &socksa);
8239 if (err == 0) {
8240 err = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so,
8241 &peersa);
8242 if (err == 0) {
8243 SOCKADDR_COPY(socksa, &ev.ev_data.kev_sockname,
8244 min(socksa->sa_len,
8245 sizeof(ev.ev_data.kev_sockname)));
8246 SOCKADDR_COPY(peersa, &ev.ev_data.kev_peername,
8247 min(peersa->sa_len,
8248 sizeof(ev.ev_data.kev_peername)));
8249 socket_post_kev_msg(KEV_SOCKET_CLOSED,
8250 &ev.ev_data, sizeof(ev));
8251 }
8252 }
8253 free_sockaddr(socksa);
8254 free_sockaddr(peersa);
8255 }
8256
8257 __attribute__((noinline, cold, not_tail_called, noreturn))
8258 __private_extern__ int
assfail(const char * a,const char * f,int l)8259 assfail(const char *a, const char *f, int l)
8260 {
8261 panic("assertion failed: %s, file: %s, line: %d", a, f, l);
8262 /* NOTREACHED */
8263 __builtin_unreachable();
8264 }
8265