1 /*
2 * Copyright (c) 1998-2020, 2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1988, 1990, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
62 */
63 /*
64 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
65 * support for mandatory and extensible security protections. This notice
66 * is included in support of clause 2.2 (b) of the Apple Public License,
67 * Version 2.0.
68 */
69
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/domain.h>
73 #include <sys/kernel.h>
74 #include <sys/proc_internal.h>
75 #include <sys/kauth.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80 #include <sys/stat.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/signalvar.h>
84 #include <sys/sysctl.h>
85 #include <sys/syslog.h>
86 #include <sys/unpcb.h>
87 #include <sys/ev.h>
88 #include <kern/locks.h>
89 #include <kern/uipc_domain.h>
90 #include <kern/uipc_socket.h>
91 #include <net/route.h>
92 #include <net/content_filter.h>
93 #include <netinet/in.h>
94 #include <netinet/in_pcb.h>
95 #include <netinet/tcp_var.h>
96 #include <sys/kdebug.h>
97 #include <libkern/OSAtomic.h>
98 #include <net/droptap.h>
99
100 #if CONFIG_MACF
101 #include <security/mac_framework.h>
102 #endif
103
104 #include <mach/vm_param.h>
105
106 #if MPTCP
107 #include <netinet/mptcp_var.h>
108 #endif
109
110 #include <net/sockaddr_utils.h>
111
112 extern uint32_t net_wake_pkt_debug;
113
114 #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4)
115 #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5)
116
117 SYSCTL_DECL(_kern_ipc);
118
119 __private_extern__ u_int32_t net_io_policy_throttle_best_effort = 0;
120 SYSCTL_INT(_kern_ipc, OID_AUTO, throttle_best_effort,
121 CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttle_best_effort, 0, "");
122
123 static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
124 static struct socket *sonewconn_internal(struct socket *, int);
125 static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *,
126 struct mbuf *);
127 static void soevent_ifdenied(struct socket *);
128
129 static int sbappendrecord_common(struct sockbuf *sb, struct mbuf *m0, boolean_t nodrop);
130 static int sbappend_common(struct sockbuf *sb, struct mbuf *m, boolean_t nodrop);
131
132 /*
133 * Primitive routines for operating on sockets and socket buffers
134 */
135 int soqlimitcompat = 1;
136 static int soqlencomp = 0;
137
138 /*
139 * Based on the number of mbuf clusters configured, high_sb_max and sb_max can
140 * get scaled up or down to suit that memory configuration. high_sb_max is a
141 * higher limit on sb_max that is checked when sb_max gets set through sysctl.
142 */
143 uint32_t sb_max = SB_MAX;
144 uint32_t high_sb_max = SB_MAX;
145
146 static uint32_t sb_efficiency = 8; /* parameter for sbreserve() */
147
148 uint32_t net_io_policy_log = 0; /* log socket policy changes */
149 #if CONFIG_PROC_UUID_POLICY
150 uint32_t net_io_policy_uuid = 1; /* enable UUID socket policy */
151 #endif /* CONFIG_PROC_UUID_POLICY */
152
153 /*
154 * Procedures to manipulate state flags of socket
155 * and do appropriate wakeups. Normal sequence from the
156 * active (originating) side is that soisconnecting() is
157 * called during processing of connect() call,
158 * resulting in an eventual call to soisconnected() if/when the
159 * connection is established. When the connection is torn down
160 * soisdisconnecting() is called during processing of disconnect() call,
161 * and soisdisconnected() is called when the connection to the peer
162 * is totally severed. The semantics of these routines are such that
163 * connectionless protocols can call soisconnected() and soisdisconnected()
164 * only, bypassing the in-progress calls when setting up a ``connection''
165 * takes no time.
166 *
167 * From the passive side, a socket is created with
168 * two queues of sockets: so_incomp for connections in progress
169 * and so_comp for connections already made and awaiting user acceptance.
170 * As a protocol is preparing incoming connections, it creates a socket
171 * structure queued on so_incomp by calling sonewconn(). When the connection
172 * is established, soisconnected() is called, and transfers the
173 * socket structure to so_comp, making it available to accept().
174 *
175 * If a socket is closed with sockets on either
176 * so_incomp or so_comp, these sockets are dropped.
177 *
178 * If higher level protocols are implemented in
179 * the kernel, the wakeups done here will sometimes
180 * cause software-interrupt process scheduling.
181 */
182 void
soisconnecting(struct socket * so)183 soisconnecting(struct socket *so)
184 {
185 so->so_state &= ~(SS_ISCONNECTED | SS_ISDISCONNECTING);
186 so->so_state |= SS_ISCONNECTING;
187
188 sflt_notify(so, sock_evt_connecting, NULL);
189 }
190
191 void
soisconnected(struct socket * so)192 soisconnected(struct socket *so)
193 {
194 /*
195 * If socket is subject to filter and is pending initial verdict,
196 * delay marking socket as connected and do not present the connected
197 * socket to user just yet.
198 */
199 if (cfil_sock_connected_pending_verdict(so)) {
200 return;
201 }
202
203 so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | SS_ISCONFIRMING);
204 so->so_state |= SS_ISCONNECTED;
205
206 soreserve_preconnect(so, 0);
207
208 sflt_notify(so, sock_evt_connected, NULL);
209
210 if (so->so_head != NULL && (so->so_state & SS_INCOMP)) {
211 struct socket *head = so->so_head;
212 int locked = 0;
213
214 /*
215 * Enforce lock order when the protocol has per socket locks
216 */
217 if (head->so_proto->pr_getlock != NULL) {
218 socket_lock(head, 1);
219 so_acquire_accept_list(head, so);
220 locked = 1;
221 }
222 if (so->so_head == head && (so->so_state & SS_INCOMP)) {
223 so->so_state &= ~SS_INCOMP;
224 so->so_state |= SS_COMP;
225 TAILQ_REMOVE(&head->so_incomp, so, so_list);
226 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
227 head->so_incqlen--;
228
229 /*
230 * We have to release the accept list in
231 * case a socket callback calls sock_accept()
232 */
233 if (locked != 0) {
234 so_release_accept_list(head);
235 socket_unlock(so, 0);
236 }
237 sorwakeup(head);
238 wakeup_one((caddr_t)&head->so_timeo);
239
240 if (locked != 0) {
241 socket_unlock(head, 1);
242 socket_lock(so, 0);
243 }
244 } else if (locked != 0) {
245 so_release_accept_list(head);
246 socket_unlock(head, 1);
247 }
248 } else {
249 wakeup((caddr_t)&so->so_timeo);
250 sorwakeup(so);
251 sowwakeup(so);
252 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNECTED |
253 SO_FILT_HINT_CONNINFO_UPDATED);
254 }
255 }
256
257 boolean_t
socanwrite(struct socket * so)258 socanwrite(struct socket *so)
259 {
260 return (so->so_state & SS_ISCONNECTED) ||
261 !(so->so_proto->pr_flags & PR_CONNREQUIRED) ||
262 (so->so_flags1 & SOF1_PRECONNECT_DATA);
263 }
264
265 void
soisdisconnecting(struct socket * so)266 soisdisconnecting(struct socket *so)
267 {
268 so->so_state &= ~SS_ISCONNECTING;
269 so->so_state |= (SS_ISDISCONNECTING | SS_CANTRCVMORE | SS_CANTSENDMORE);
270 soevent(so, SO_FILT_HINT_LOCKED);
271 sflt_notify(so, sock_evt_disconnecting, NULL);
272 wakeup((caddr_t)&so->so_timeo);
273 sowwakeup(so);
274 sorwakeup(so);
275 }
276
277 void
soisdisconnected(struct socket * so)278 soisdisconnected(struct socket *so)
279 {
280 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
281 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
282 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
283 SO_FILT_HINT_CONNINFO_UPDATED);
284 sflt_notify(so, sock_evt_disconnected, NULL);
285 wakeup((caddr_t)&so->so_timeo);
286 sowwakeup(so);
287 sorwakeup(so);
288
289 #if CONTENT_FILTER
290 /* Notify content filters as soon as we cannot send/receive data */
291 cfil_sock_notify_shutdown(so, SHUT_RDWR);
292 #endif /* CONTENT_FILTER */
293 }
294
295 /*
296 * This function will issue a wakeup like soisdisconnected but it will not
297 * notify the socket filters. This will avoid unlocking the socket
298 * in the midst of closing it.
299 */
300 void
sodisconnectwakeup(struct socket * so)301 sodisconnectwakeup(struct socket *so)
302 {
303 so->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
304 so->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
305 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED |
306 SO_FILT_HINT_CONNINFO_UPDATED);
307 wakeup((caddr_t)&so->so_timeo);
308 sowwakeup(so);
309 sorwakeup(so);
310
311 #if CONTENT_FILTER
312 /* Notify content filters as soon as we cannot send/receive data */
313 cfil_sock_notify_shutdown(so, SHUT_RDWR);
314 #endif /* CONTENT_FILTER */
315 }
316
317 /*
318 * When an attempt at a new connection is noted on a socket
319 * which accepts connections, sonewconn is called. If the
320 * connection is possible (subject to space constraints, etc.)
321 * then we allocate a new structure, propoerly linked into the
322 * data structure of the original socket, and return this.
323 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
324 */
325 static struct socket *
sonewconn_internal(struct socket * head,int connstatus)326 sonewconn_internal(struct socket *head, int connstatus)
327 {
328 int so_qlen, error = 0;
329 struct socket *so;
330 lck_mtx_t *mutex_held;
331
332 if (head->so_proto->pr_getlock != NULL) {
333 mutex_held = (*head->so_proto->pr_getlock)(head, 0);
334 } else {
335 mutex_held = head->so_proto->pr_domain->dom_mtx;
336 }
337 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
338
339 if (!soqlencomp) {
340 /*
341 * This is the default case; so_qlen represents the
342 * sum of both incomplete and completed queues.
343 */
344 so_qlen = head->so_qlen;
345 } else {
346 /*
347 * When kern.ipc.soqlencomp is set to 1, so_qlen
348 * represents only the completed queue. Since we
349 * cannot let the incomplete queue goes unbounded
350 * (in case of SYN flood), we cap the incomplete
351 * queue length to at most somaxconn, and use that
352 * as so_qlen so that we fail immediately below.
353 */
354 so_qlen = head->so_qlen - head->so_incqlen;
355 if (head->so_incqlen > somaxconn) {
356 so_qlen = somaxconn;
357 }
358 }
359
360 if (so_qlen >=
361 (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2))) {
362 return NULL;
363 }
364 if (proto_memacct_hardlimit(head->so_proto)) {
365 return NULL;
366 }
367 so = soalloc();
368 if (so == NULL) {
369 return NULL;
370 }
371
372 so->so_type = head->so_type;
373 so->so_family = head->so_family;
374 so->so_protocol = head->so_protocol;
375 so->so_options = head->so_options & ~SO_ACCEPTCONN;
376 so->so_linger = head->so_linger;
377 so->so_state = head->so_state | SS_NOFDREF;
378 so->so_proto = head->so_proto;
379 so->so_timeo = head->so_timeo;
380 so->so_pgid = head->so_pgid;
381 kauth_cred_ref(head->so_cred);
382 so->so_cred = head->so_cred;
383 so->so_persona_id = head->so_persona_id;
384 so->last_pid = head->last_pid;
385 so->last_upid = head->last_upid;
386 memcpy(so->last_uuid, head->last_uuid, sizeof(so->last_uuid));
387 if (head->so_flags & SOF_DELEGATED) {
388 so->e_pid = head->e_pid;
389 so->e_upid = head->e_upid;
390 memcpy(so->e_uuid, head->e_uuid, sizeof(so->e_uuid));
391 }
392 /* inherit socket options stored in so_flags */
393 so->so_flags = head->so_flags &
394 (SOF_NOSIGPIPE | SOF_NOADDRAVAIL | SOF_REUSESHAREUID |
395 SOF_NOTIFYCONFLICT | SOF_BINDRANDOMPORT | SOF_NPX_SETOPTSHUT |
396 SOF_NODEFUNCT | SOF_PRIVILEGED_TRAFFIC_CLASS | SOF_NOTSENT_LOWAT |
397 SOF_DELEGATED);
398 so->so_flags1 |= SOF1_INBOUND;
399 so->so_usecount = 1;
400 so->next_lock_lr = 0;
401 so->next_unlock_lr = 0;
402
403 so->so_rcv.sb_flags |= SB_RECV; /* XXX */
404 so->so_rcv.sb_so = so->so_snd.sb_so = so;
405
406 /* inherit traffic management properties of listener */
407 so->so_flags1 |=
408 head->so_flags1 & (SOF1_TRAFFIC_MGT_SO_BACKGROUND | SOF1_TC_NET_SERV_TYPE |
409 SOF1_QOSMARKING_ALLOWED | SOF1_QOSMARKING_POLICY_OVERRIDE);
410 so->so_background_thread = head->so_background_thread;
411 so->so_traffic_class = head->so_traffic_class;
412 so->so_netsvctype = head->so_netsvctype;
413
414 proto_memacct_add(so->so_proto, sizeof(struct socket));
415
416 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
417 sodealloc(so);
418 return NULL;
419 }
420 so->so_rcv.sb_flags |= (head->so_rcv.sb_flags & SB_USRSIZE);
421 so->so_snd.sb_flags |= (head->so_snd.sb_flags & SB_USRSIZE);
422
423 /*
424 * Must be done with head unlocked to avoid deadlock
425 * for protocol with per socket mutexes.
426 */
427 if (head->so_proto->pr_unlock) {
428 socket_unlock(head, 0);
429 }
430 if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) ||
431 error) {
432 sodealloc(so);
433 if (head->so_proto->pr_unlock) {
434 socket_lock(head, 0);
435 }
436 return NULL;
437 }
438 if (head->so_proto->pr_unlock) {
439 socket_lock(head, 0);
440 /*
441 * Radar 7385998 Recheck that the head is still accepting
442 * to avoid race condition when head is getting closed.
443 */
444 if ((head->so_options & SO_ACCEPTCONN) == 0) {
445 so->so_state &= ~SS_NOFDREF;
446 soclose(so);
447 return NULL;
448 }
449 }
450
451 if (so->so_proto->pr_copy_last_owner != NULL) {
452 (*so->so_proto->pr_copy_last_owner)(so, head);
453 }
454 os_atomic_inc(&so->so_proto->pr_domain->dom_refs, relaxed);
455
456 /* Insert in head appropriate lists */
457 so_acquire_accept_list(head, NULL);
458
459 so->so_head = head;
460
461 /*
462 * Since this socket is going to be inserted into the incomp
463 * queue, it can be picked up by another thread in
464 * tcp_dropdropablreq to get dropped before it is setup..
465 * To prevent this race, set in-progress flag which can be
466 * cleared later
467 */
468 so->so_flags |= SOF_INCOMP_INPROGRESS;
469
470 if (connstatus) {
471 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
472 so->so_state |= SS_COMP;
473 } else {
474 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
475 so->so_state |= SS_INCOMP;
476 head->so_incqlen++;
477 }
478 head->so_qlen++;
479
480 so_release_accept_list(head);
481
482 /* Attach socket filters for this protocol */
483 sflt_initsock(so);
484
485 if (connstatus) {
486 so->so_state |= (short)connstatus;
487 sorwakeup(head);
488 wakeup((caddr_t)&head->so_timeo);
489 }
490 return so;
491 }
492
493
494 struct socket *
sonewconn(struct socket * head,int connstatus,const struct sockaddr * from)495 sonewconn(struct socket *head, int connstatus, const struct sockaddr *from)
496 {
497 int error = sflt_connectin(head, from);
498 if (error) {
499 return NULL;
500 }
501
502 return sonewconn_internal(head, connstatus);
503 }
504
505 /*
506 * Socantsendmore indicates that no more data will be sent on the
507 * socket; it would normally be applied to a socket when the user
508 * informs the system that no more data is to be sent, by the protocol
509 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
510 * will be received, and will normally be applied to the socket by a
511 * protocol when it detects that the peer will send no more data.
512 * Data queued for reading in the socket may yet be read.
513 */
514
515 void
socantsendmore(struct socket * so)516 socantsendmore(struct socket *so)
517 {
518 so->so_state |= SS_CANTSENDMORE;
519 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTSENDMORE);
520 sflt_notify(so, sock_evt_cantsendmore, NULL);
521 sowwakeup(so);
522 }
523
524 void
socantrcvmore(struct socket * so)525 socantrcvmore(struct socket *so)
526 {
527 so->so_state |= SS_CANTRCVMORE;
528 soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE);
529 sflt_notify(so, sock_evt_cantrecvmore, NULL);
530 sorwakeup(so);
531 }
532
533 /*
534 * Wait for data to arrive at/drain from a socket buffer.
535 */
536 int
sbwait(struct sockbuf * sb)537 sbwait(struct sockbuf *sb)
538 {
539 boolean_t nointr = (sb->sb_flags & SB_NOINTR);
540 void *__single lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
541 struct socket *so = sb->sb_so;
542 lck_mtx_t *mutex_held;
543 struct timespec ts;
544 int error = 0;
545
546 if (so == NULL) {
547 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p",
548 __func__, sb, sb->sb_flags, lr_saved);
549 /* NOTREACHED */
550 } else if (so->so_usecount < 1) {
551 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
552 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
553 so->so_usecount, lr_saved, solockhistory_nr(so));
554 /* NOTREACHED */
555 }
556
557 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
558 error = EBADF;
559 if (so->so_flags & SOF_DEFUNCT) {
560 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] "
561 "(%d)\n", __func__, proc_selfpid(),
562 proc_best_name(current_proc()),
563 so->so_gencnt,
564 SOCK_DOM(so), SOCK_TYPE(so), error);
565 }
566 return error;
567 }
568
569 if (so->so_proto->pr_getlock != NULL) {
570 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
571 } else {
572 mutex_held = so->so_proto->pr_domain->dom_mtx;
573 }
574
575 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
576
577 ts.tv_sec = sb->sb_timeo.tv_sec;
578 ts.tv_nsec = sb->sb_timeo.tv_usec * 1000;
579
580 sb->sb_waiters++;
581 VERIFY(sb->sb_waiters != 0);
582
583 error = msleep((caddr_t)&sb->sb_cc, mutex_held,
584 nointr ? PSOCK : PSOCK | PCATCH,
585 nointr ? "sbwait_nointr" : "sbwait", &ts);
586
587 VERIFY(sb->sb_waiters != 0);
588 sb->sb_waiters--;
589
590 if (so->so_usecount < 1) {
591 panic("%s: 2 sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
592 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
593 so->so_usecount, lr_saved, solockhistory_nr(so));
594 /* NOTREACHED */
595 }
596
597 if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
598 error = EBADF;
599 if (so->so_flags & SOF_DEFUNCT) {
600 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] "
601 "(%d)\n", __func__, proc_selfpid(),
602 proc_best_name(current_proc()),
603 so->so_gencnt,
604 SOCK_DOM(so), SOCK_TYPE(so), error);
605 }
606 }
607
608 return error;
609 }
610
611 void
sbwakeup(struct sockbuf * sb)612 sbwakeup(struct sockbuf *sb)
613 {
614 if (sb->sb_waiters > 0) {
615 wakeup((caddr_t)&sb->sb_cc);
616 }
617 }
618
619 /*
620 * Wakeup processes waiting on a socket buffer.
621 * Do asynchronous notification via SIGIO
622 * if the socket has the SS_ASYNC flag set.
623 */
624 void
sowakeup(struct socket * so,struct sockbuf * sb,struct socket * so2)625 sowakeup(struct socket *so, struct sockbuf *sb, struct socket *so2)
626 {
627 if (so->so_flags & SOF_DEFUNCT) {
628 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] si 0x%x, "
629 "fl 0x%x [%s]\n", __func__, proc_selfpid(),
630 proc_best_name(current_proc()),
631 so->so_gencnt, SOCK_DOM(so),
632 SOCK_TYPE(so), (uint32_t)sb->sb_sel.si_flags, sb->sb_flags,
633 (sb->sb_flags & SB_RECV) ? "rcv" : "snd");
634 }
635
636 sb->sb_flags &= ~SB_SEL;
637 selwakeup(&sb->sb_sel);
638 sbwakeup(sb);
639 if (so->so_state & SS_ASYNC) {
640 if (so->so_pgid < 0) {
641 gsignal(-so->so_pgid, SIGIO);
642 } else if (so->so_pgid > 0) {
643 proc_signal(so->so_pgid, SIGIO);
644 }
645 }
646 if (sb->sb_flags & SB_KNOTE) {
647 KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED);
648 }
649 if (sb->sb_flags & SB_UPCALL) {
650 void (*sb_upcall)(struct socket *, void *, int);
651 caddr_t __single sb_upcallarg;
652 int lock = !(sb->sb_flags & SB_UPCALL_LOCK);
653
654 sb_upcall = sb->sb_upcall;
655 sb_upcallarg = sb->sb_upcallarg;
656 /* Let close know that we're about to do an upcall */
657 so->so_upcallusecount++;
658
659 if (lock) {
660 if (so2) {
661 struct unpcb *unp = sotounpcb(so2);
662 unp->unp_flags |= UNP_DONTDISCONNECT;
663 unp->rw_thrcount++;
664
665 socket_unlock(so2, 0);
666 }
667 socket_unlock(so, 0);
668 }
669 (*sb_upcall)(so, sb_upcallarg, M_DONTWAIT);
670 if (lock) {
671 if (so2 && so > so2) {
672 struct unpcb *unp;
673 socket_lock(so2, 0);
674
675 unp = sotounpcb(so2);
676 unp->rw_thrcount--;
677 if (unp->rw_thrcount == 0) {
678 unp->unp_flags &= ~UNP_DONTDISCONNECT;
679 wakeup(unp);
680 }
681 }
682
683 socket_lock(so, 0);
684
685 if (so2 && so < so2) {
686 struct unpcb *unp;
687 socket_lock(so2, 0);
688
689 unp = sotounpcb(so2);
690 unp->rw_thrcount--;
691 if (unp->rw_thrcount == 0) {
692 unp->unp_flags &= ~UNP_DONTDISCONNECT;
693 wakeup(unp);
694 }
695 }
696 }
697
698 so->so_upcallusecount--;
699 /* Tell close that it's safe to proceed */
700 if ((so->so_flags & SOF_CLOSEWAIT) &&
701 so->so_upcallusecount == 0) {
702 wakeup((caddr_t)&so->so_upcallusecount);
703 }
704 }
705 #if CONTENT_FILTER
706 /*
707 * Trap disconnection events for content filters
708 */
709 if ((so->so_flags & SOF_CONTENT_FILTER) != 0) {
710 if ((sb->sb_flags & SB_RECV)) {
711 if (so->so_state & (SS_CANTRCVMORE)) {
712 cfil_sock_notify_shutdown(so, SHUT_RD);
713 }
714 } else {
715 if (so->so_state & (SS_CANTSENDMORE)) {
716 cfil_sock_notify_shutdown(so, SHUT_WR);
717 }
718 }
719 }
720 #endif /* CONTENT_FILTER */
721 }
722
723 /*
724 * Socket buffer (struct sockbuf) utility routines.
725 *
726 * Each socket contains two socket buffers: one for sending data and
727 * one for receiving data. Each buffer contains a queue of mbufs,
728 * information about the number of mbufs and amount of data in the
729 * queue, and other fields allowing select() statements and notification
730 * on data availability to be implemented.
731 *
732 * Data stored in a socket buffer is maintained as a list of records.
733 * Each record is a list of mbufs chained together with the m_next
734 * field. Records are chained together with the m_nextpkt field. The upper
735 * level routine soreceive() expects the following conventions to be
736 * observed when placing information in the receive buffer:
737 *
738 * 1. If the protocol requires each message be preceded by the sender's
739 * name, then a record containing that name must be present before
740 * any associated data (mbuf's must be of type MT_SONAME).
741 * 2. If the protocol supports the exchange of ``access rights'' (really
742 * just additional data associated with the message), and there are
743 * ``rights'' to be received, then a record containing this data
744 * should be present (mbuf's must be of type MT_RIGHTS).
745 * 3. If a name or rights record exists, then it must be followed by
746 * a data record, perhaps of zero length.
747 *
748 * Before using a new socket structure it is first necessary to reserve
749 * buffer space to the socket, by calling sbreserve(). This should commit
750 * some of the available buffer space in the system buffer pool for the
751 * socket (currently, it does nothing but enforce limits). The space
752 * should be released by calling sbrelease() when the socket is destroyed.
753 */
754
755 /*
756 * Returns: 0 Success
757 * ENOBUFS
758 */
759 int
soreserve(struct socket * so,uint32_t sndcc,uint32_t rcvcc)760 soreserve(struct socket *so, uint32_t sndcc, uint32_t rcvcc)
761 {
762 if (sbreserve(&so->so_snd, sndcc) == 0) {
763 goto bad;
764 } else {
765 so->so_snd.sb_idealsize = sndcc;
766 }
767
768 if (sbreserve(&so->so_rcv, rcvcc) == 0) {
769 goto bad2;
770 } else {
771 so->so_rcv.sb_idealsize = rcvcc;
772 }
773
774 if (so->so_rcv.sb_lowat == 0) {
775 so->so_rcv.sb_lowat = 1;
776 }
777 if (so->so_snd.sb_lowat == 0) {
778 so->so_snd.sb_lowat = MCLBYTES;
779 }
780 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) {
781 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
782 }
783 return 0;
784 bad2:
785 so->so_snd.sb_flags &= ~SB_SEL;
786 selthreadclear(&so->so_snd.sb_sel);
787 sbrelease(&so->so_snd);
788 bad:
789 return ENOBUFS;
790 }
791
792 void
soreserve_preconnect(struct socket * so,unsigned int pre_cc)793 soreserve_preconnect(struct socket *so, unsigned int pre_cc)
794 {
795 /* As of now, same bytes for both preconnect read and write */
796 so->so_snd.sb_preconn_hiwat = pre_cc;
797 so->so_rcv.sb_preconn_hiwat = pre_cc;
798 }
799
800 /*
801 * Allot mbufs to a sockbuf.
802 * Attempt to scale mbmax so that mbcnt doesn't become limiting
803 * if buffering efficiency is near the normal case.
804 */
805 int
sbreserve(struct sockbuf * sb,uint32_t cc)806 sbreserve(struct sockbuf *sb, uint32_t cc)
807 {
808 if (cc > sb_max) {
809 /* We would not end up changing sb_cc, so return 0 */
810 if (sb->sb_hiwat == sb_max) {
811 return 0;
812 }
813 cc = sb_max;
814 }
815 if (cc > sb->sb_hiwat && (sb->sb_flags & SB_LIMITED)) {
816 return 0;
817 }
818 sb->sb_hiwat = cc;
819 sb->sb_mbmax = cc * sb_efficiency;
820 if (sb->sb_lowat > sb->sb_hiwat) {
821 sb->sb_lowat = sb->sb_hiwat;
822 }
823 return 1;
824 }
825
826 /*
827 * Free mbufs held by a socket, and reserved mbuf space.
828 */
829 /* WARNING needs to do selthreadclear() before calling this */
830 void
sbrelease(struct sockbuf * sb)831 sbrelease(struct sockbuf *sb)
832 {
833 sbflush(sb);
834 sb->sb_hiwat = 0;
835 sb->sb_mbmax = 0;
836 }
837
838 void
so_update_tx_data_stats(struct socket * so,uint32_t num_pkts,uint32_t space)839 so_update_tx_data_stats(struct socket *so, uint32_t num_pkts, uint32_t space)
840 {
841 so->so_tc_stats[SO_STATS_DATA].txpackets += num_pkts;
842 so->so_tc_stats[SO_STATS_DATA].txbytes += space;
843 }
844
845 static void
sb_update_data_stats(struct sockbuf * sb,uint32_t space)846 sb_update_data_stats(struct sockbuf *sb, uint32_t space)
847 {
848 if (sb->sb_flags & SB_RECV) {
849 sb->sb_so->so_tc_stats[SO_STATS_DATA].rxpackets += 1;
850 sb->sb_so->so_tc_stats[SO_STATS_DATA].rxbytes += space;
851 } else {
852 sb->sb_so->so_tc_stats[SO_STATS_DATA].txpackets += 1;
853 sb->sb_so->so_tc_stats[SO_STATS_DATA].txbytes += space;
854 }
855 }
856
857 static void
sb_update_no_space_stats(struct sockbuf * sb,uint32_t space)858 sb_update_no_space_stats(struct sockbuf *sb, uint32_t space)
859 {
860 if (sb->sb_flags & SB_RECV) {
861 sb->sb_so->so_tc_stats[SO_STATS_SBNOSPACE].rxpackets += 1;
862 sb->sb_so->so_tc_stats[SO_STATS_SBNOSPACE].rxbytes += space;
863 } else {
864 sb->sb_so->so_tc_stats[SO_STATS_SBNOSPACE].txpackets += 1;
865 sb->sb_so->so_tc_stats[SO_STATS_SBNOSPACE].txbytes += space;
866 }
867 }
868
869 /*
870 * Routines to add and remove
871 * data from an mbuf queue.
872 *
873 * The routines sbappend() or sbappendrecord() are normally called to
874 * append new mbufs to a socket buffer, after checking that adequate
875 * space is available, comparing the function sbspace() with the amount
876 * of data to be added. sbappendrecord() differs from sbappend() in
877 * that data supplied is treated as the beginning of a new record.
878 * To place a sender's address, optional access rights, and data in a
879 * socket receive buffer, sbappendaddr() should be used. To place
880 * access rights and data in a socket receive buffer, sbappendrights()
881 * should be used. In either case, the new data begins a new record.
882 * Note that unlike sbappend() and sbappendrecord(), these routines check
883 * for the caller that there will be enough space to store the data.
884 * Each fails if there is not enough space, or if it cannot find mbufs
885 * to store additional information in.
886 *
887 * Reliable protocols may use the socket send buffer to hold data
888 * awaiting acknowledgement. Data is normally copied from a socket
889 * send buffer in a protocol with m_copy for output to a peer,
890 * and then removing the data from the socket buffer with sbdrop()
891 * or sbdroprecord() when the data is acknowledged by the peer.
892 */
893
894 /*
895 * Append mbuf chain m to the last record in the
896 * socket buffer sb. The additional space associated
897 * the mbuf chain is recorded in sb. Empty mbufs are
898 * discarded and mbufs are compacted where possible.
899 */
900 static int
sbappend_common(struct sockbuf * sb,struct mbuf * m,boolean_t nodrop)901 sbappend_common(struct sockbuf *sb, struct mbuf *m, boolean_t nodrop)
902 {
903 struct socket *__single so = sb->sb_so;
904 struct soflow_hash_entry *__single dgram_flow_entry = NULL;
905
906 if (m == NULL || (sb->sb_flags & SB_DROP)) {
907 if (m != NULL && !nodrop) {
908 m_freem(m);
909 }
910 return 0;
911 }
912
913 SBLASTRECORDCHK(sb, "sbappend 1");
914
915 if (sb->sb_lastrecord != NULL && (sb->sb_mbtail->m_flags & M_EOR)) {
916 return sbappendrecord_common(sb, m, nodrop);
917 }
918
919 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
920 ASSERT(nodrop == FALSE);
921
922 if (NEED_DGRAM_FLOW_TRACKING(so)) {
923 dgram_flow_entry = soflow_get_flow(so, NULL, NULL, NULL,
924 m != NULL ? m_length(m) : 0, SOFLOW_DIRECTION_INBOUND,
925 (m != NULL && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0);
926 }
927
928 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
929 int error = sflt_data_in(so, NULL, &m, NULL, 0);
930 SBLASTRECORDCHK(sb, "sbappend 2");
931
932 #if CONTENT_FILTER
933 if (error == 0) {
934 error = cfil_sock_data_in(so, NULL, m, NULL, 0, dgram_flow_entry);
935 }
936 #endif /* CONTENT_FILTER */
937
938 if (error != 0) {
939 if (error != EJUSTRETURN) {
940 m_freem(m);
941 }
942 if (dgram_flow_entry != NULL) {
943 soflow_free_flow(dgram_flow_entry);
944 }
945 return 0;
946 }
947 } else if (m) {
948 m->m_flags &= ~M_SKIPCFIL;
949 }
950
951 if (dgram_flow_entry != NULL) {
952 soflow_free_flow(dgram_flow_entry);
953 }
954 }
955
956 /* If this is the first record, it's also the last record */
957 if (sb->sb_lastrecord == NULL) {
958 sb->sb_lastrecord = m;
959 }
960
961 sbcompress(sb, m, sb->sb_mbtail);
962 SBLASTRECORDCHK(sb, "sbappend 3");
963 return 1;
964 }
965
966 int
sbappend(struct sockbuf * sb,struct mbuf * m)967 sbappend(struct sockbuf *sb, struct mbuf *m)
968 {
969 return sbappend_common(sb, m, FALSE);
970 }
971
972 int
sbappend_nodrop(struct sockbuf * sb,struct mbuf * m)973 sbappend_nodrop(struct sockbuf *sb, struct mbuf *m)
974 {
975 return sbappend_common(sb, m, TRUE);
976 }
977
978 /*
979 * Similar to sbappend, except that this is optimized for stream sockets.
980 */
981 int
sbappendstream(struct sockbuf * sb,struct mbuf * m)982 sbappendstream(struct sockbuf *sb, struct mbuf *m)
983 {
984 struct soflow_hash_entry *__single dgram_flow_entry = NULL;
985 struct socket *__single so = sb->sb_so;
986
987 if (m == NULL || (sb->sb_flags & SB_DROP)) {
988 if (m != NULL) {
989 m_freem(m);
990 }
991 return 0;
992 }
993
994 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
995 panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p",
996 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
997 /* NOTREACHED */
998 }
999
1000 SBLASTMBUFCHK(sb, __func__);
1001
1002 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1003 if (NEED_DGRAM_FLOW_TRACKING(so)) {
1004 dgram_flow_entry = soflow_get_flow(so, NULL, NULL, NULL,
1005 m != NULL ? m_length(m) : 0, SOFLOW_DIRECTION_INBOUND,
1006 (m != NULL && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0);
1007 }
1008
1009 if (sb->sb_flags & SB_RECV && !(m && m->m_flags & M_SKIPCFIL)) {
1010 int error = sflt_data_in(so, NULL, &m, NULL, 0);
1011 SBLASTRECORDCHK(sb, "sbappendstream 1");
1012
1013 #if CONTENT_FILTER
1014 if (error == 0) {
1015 error = cfil_sock_data_in(so, NULL, m, NULL, 0, dgram_flow_entry);
1016 }
1017 #endif /* CONTENT_FILTER */
1018
1019 if (error != 0) {
1020 if (error != EJUSTRETURN) {
1021 m_freem(m);
1022 }
1023 if (dgram_flow_entry != NULL) {
1024 soflow_free_flow(dgram_flow_entry);
1025 }
1026 return 0;
1027 }
1028 } else if (m) {
1029 m->m_flags &= ~M_SKIPCFIL;
1030 }
1031
1032 if (dgram_flow_entry != NULL) {
1033 soflow_free_flow(dgram_flow_entry);
1034 }
1035 }
1036
1037 sbcompress(sb, m, sb->sb_mbtail);
1038 sb->sb_lastrecord = sb->sb_mb;
1039 SBLASTRECORDCHK(sb, "sbappendstream 2");
1040 return 1;
1041 }
1042
1043 void
sblastrecordchk(struct sockbuf * sb,const char * where)1044 sblastrecordchk(struct sockbuf *sb, const char *where)
1045 {
1046 struct mbuf *m = sb->sb_mb;
1047
1048 while (m && m->m_nextpkt) {
1049 m = m->m_nextpkt;
1050 }
1051
1052 if (m != sb->sb_lastrecord) {
1053 printf("sblastrecordchk: mb 0x%llx lastrecord 0x%llx "
1054 "last 0x%llx\n",
1055 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1056 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_lastrecord),
1057 (uint64_t)VM_KERNEL_ADDRPERM(m));
1058 printf("packet chain:\n");
1059 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
1060 printf("\t0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(m));
1061 }
1062 panic("sblastrecordchk from %s", where);
1063 }
1064 }
1065
1066 void
sblastmbufchk(struct sockbuf * sb,const char * where)1067 sblastmbufchk(struct sockbuf *sb, const char *where)
1068 {
1069 struct mbuf *m = sb->sb_mb;
1070 struct mbuf *n;
1071
1072 while (m && m->m_nextpkt) {
1073 m = m->m_nextpkt;
1074 }
1075
1076 while (m && m->m_next) {
1077 m = m->m_next;
1078 }
1079
1080 if (m != sb->sb_mbtail) {
1081 printf("sblastmbufchk: mb 0x%llx mbtail 0x%llx last 0x%llx\n",
1082 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mb),
1083 (uint64_t)VM_KERNEL_ADDRPERM(sb->sb_mbtail),
1084 (uint64_t)VM_KERNEL_ADDRPERM(m));
1085 printf("packet tree:\n");
1086 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
1087 printf("\t");
1088 for (n = m; n != NULL; n = n->m_next) {
1089 printf("0x%llx ",
1090 (uint64_t)VM_KERNEL_ADDRPERM(n));
1091 }
1092 printf("\n");
1093 }
1094 panic("sblastmbufchk from %s", where);
1095 }
1096 }
1097
1098 /*
1099 * Similar to sbappend, except the mbuf chain begins a new record.
1100 */
1101 static int
sbappendrecord_common(struct sockbuf * sb,struct mbuf * m0,boolean_t nodrop)1102 sbappendrecord_common(struct sockbuf *sb, struct mbuf *m0, boolean_t nodrop)
1103 {
1104 struct soflow_hash_entry *__single dgram_flow_entry = NULL;
1105 struct socket *__single so = sb->sb_so;
1106 struct mbuf *m;
1107 int space = 0;
1108
1109 if (m0 == NULL || (sb->sb_flags & SB_DROP)) {
1110 if (m0 != NULL && nodrop == FALSE) {
1111 m_freem(m0);
1112 }
1113 return 0;
1114 }
1115
1116 for (m = m0; m != NULL; m = m->m_next) {
1117 space += m->m_len;
1118 }
1119
1120 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1121 sb_update_no_space_stats(sb, space);
1122
1123 if (nodrop == FALSE) {
1124 m_freem(m0);
1125 }
1126 return 0;
1127 }
1128
1129 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1130 ASSERT(nodrop == FALSE);
1131
1132 if (NEED_DGRAM_FLOW_TRACKING(so)) {
1133 dgram_flow_entry = soflow_get_flow(so, NULL, NULL, NULL,
1134 m0 != NULL ? m_length(m0) : 0, SOFLOW_DIRECTION_INBOUND,
1135 (m0 != NULL && m0->m_pkthdr.rcvif) ? m0->m_pkthdr.rcvif->if_index : 0);
1136 }
1137
1138 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1139 int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
1140 sock_data_filt_flag_record);
1141
1142 #if CONTENT_FILTER
1143 if (error == 0) {
1144 error = cfil_sock_data_in(sb->sb_so, NULL, m0, NULL, 0, dgram_flow_entry);
1145 }
1146 #endif /* CONTENT_FILTER */
1147
1148 if (error != 0) {
1149 SBLASTRECORDCHK(sb, "sbappendrecord 1");
1150 if (error != EJUSTRETURN) {
1151 m_freem(m0);
1152 }
1153 if (dgram_flow_entry != NULL) {
1154 soflow_free_flow(dgram_flow_entry);
1155 }
1156 return 0;
1157 }
1158 } else if (m0) {
1159 m0->m_flags &= ~M_SKIPCFIL;
1160 }
1161
1162 if (dgram_flow_entry != NULL) {
1163 soflow_free_flow(dgram_flow_entry);
1164 }
1165 }
1166
1167 /*
1168 * Note this permits zero length records.
1169 */
1170 sballoc(sb, m0);
1171 SBLASTRECORDCHK(sb, "sbappendrecord 2");
1172 if (sb->sb_lastrecord != NULL) {
1173 sb->sb_lastrecord->m_nextpkt = m0;
1174 } else {
1175 sb->sb_mb = m0;
1176 }
1177 sb->sb_lastrecord = m0;
1178 sb->sb_mbtail = m0;
1179
1180 m = m0->m_next;
1181 m0->m_next = 0;
1182 if (m && (m0->m_flags & M_EOR)) {
1183 m0->m_flags &= ~M_EOR;
1184 m->m_flags |= M_EOR;
1185 }
1186 sbcompress(sb, m, m0);
1187 SBLASTRECORDCHK(sb, "sbappendrecord 3");
1188 return 1;
1189 }
1190
1191 int
sbappendrecord(struct sockbuf * sb,struct mbuf * m0)1192 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1193 {
1194 return sbappendrecord_common(sb, m0, FALSE);
1195 }
1196
1197 int
sbappendrecord_nodrop(struct sockbuf * sb,struct mbuf * m0)1198 sbappendrecord_nodrop(struct sockbuf *sb, struct mbuf *m0)
1199 {
1200 return sbappendrecord_common(sb, m0, TRUE);
1201 }
1202
1203 /*
1204 * Concatenate address (optional), control (optional) and data into one
1205 * single mbuf chain. If sockbuf *sb is passed in, space check will be
1206 * performed.
1207 *
1208 * Returns: mbuf chain pointer if succeeded, NULL if failed
1209 */
1210 struct mbuf *
sbconcat_mbufs(struct sockbuf * sb,struct sockaddr * asa,struct mbuf * m0,struct mbuf * control)1211 sbconcat_mbufs(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, struct mbuf *control)
1212 {
1213 struct mbuf *m = NULL, *n = NULL;
1214 int space = 0;
1215
1216 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
1217 panic("sbconcat_mbufs");
1218 }
1219
1220 if (m0) {
1221 space += m0->m_pkthdr.len;
1222 }
1223 for (n = control; n; n = n->m_next) {
1224 space += n->m_len;
1225 if (n->m_next == 0) { /* keep pointer to last control buf */
1226 break;
1227 }
1228 }
1229
1230 if (asa != NULL) {
1231 static_assert(sizeof(asa->sa_len) == sizeof(__uint8_t));
1232 if (MLEN <= UINT8_MAX && asa->sa_len > MLEN) {
1233 return NULL;
1234 }
1235 space += asa->sa_len;
1236 }
1237
1238 if (sb != NULL && space > sbspace(sb)) {
1239 sb_update_no_space_stats(sb, space);
1240
1241 return NULL;
1242 }
1243
1244 if (n) {
1245 n->m_next = m0; /* concatenate data to control */
1246 } else {
1247 control = m0;
1248 }
1249
1250 if (asa != NULL) {
1251 MGET(m, M_DONTWAIT, MT_SONAME);
1252 if (m == 0) {
1253 if (n) {
1254 /* unchain control and data if necessary */
1255 n->m_next = NULL;
1256 }
1257 return NULL;
1258 }
1259 m->m_len = asa->sa_len;
1260 SOCKADDR_COPY(asa, mtod(m, caddr_t), asa->sa_len);
1261
1262 m->m_next = control;
1263 } else {
1264 m = control;
1265 }
1266
1267 return m;
1268 }
1269
1270 /*
1271 * Queue mbuf chain to the receive queue of a socket.
1272 * Parameter space is the total len of the mbuf chain.
1273 * If passed in, sockbuf space will be checked.
1274 *
1275 * Returns: 0 Invalid mbuf chain
1276 * 1 Success
1277 */
1278 int
sbappendchain(struct sockbuf * sb,struct mbuf * m)1279 sbappendchain(struct sockbuf *sb, struct mbuf *m)
1280 {
1281 struct mbuf *n, *nlast;
1282 int space = 0;
1283
1284 if (m == NULL) {
1285 return 0;
1286 }
1287
1288 for (n = m; n->m_next != NULL; n = n->m_next) {
1289 space += n->m_len;
1290 sballoc(sb, n);
1291 }
1292 space += n->m_len;
1293 sballoc(sb, n);
1294 nlast = n;
1295
1296 if (sb->sb_lastrecord != NULL) {
1297 sb->sb_lastrecord->m_nextpkt = m;
1298 } else {
1299 sb->sb_mb = m;
1300 }
1301 sb->sb_lastrecord = m;
1302 sb->sb_mbtail = nlast;
1303
1304 sb_update_data_stats(sb, space);
1305
1306 SBLASTMBUFCHK(sb, __func__);
1307 SBLASTRECORDCHK(sb, "sbappendadddr 2");
1308 return 1;
1309 }
1310
1311 /*
1312 * Returns: 0 Error: No space/out of mbufs/etc.
1313 * 1 Success
1314 *
1315 * Imputed: (*error_out) errno for error
1316 * ENOBUFS
1317 * sflt_data_in:??? [whatever a filter author chooses]
1318 */
1319 int
sbappendaddr(struct sockbuf * sb,struct sockaddr * asa,struct mbuf * m0,struct mbuf * control,int * error_out)1320 sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
1321 struct mbuf *control, int *error_out)
1322 {
1323 int result = 0;
1324 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1325 struct mbuf *mbuf_chain = NULL;
1326 struct soflow_hash_entry *__single dgram_flow_entry = NULL;
1327 struct socket *__single so = sb->sb_so;
1328
1329 if (error_out) {
1330 *error_out = 0;
1331 }
1332
1333 if (m0 && (m0->m_flags & M_PKTHDR) == 0) {
1334 panic("sbappendaddrorfree");
1335 }
1336
1337 if (sb->sb_flags & SB_DROP) {
1338 if (m0 != NULL) {
1339 m_freem(m0);
1340 }
1341 if (control != NULL && !sb_unix) {
1342 m_freem(control);
1343 }
1344 if (error_out != NULL) {
1345 *error_out = EINVAL;
1346 }
1347 return 0;
1348 }
1349
1350 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1351 /* Call socket data in filters */
1352
1353 if (NEED_DGRAM_FLOW_TRACKING(so)) {
1354 dgram_flow_entry = soflow_get_flow(so, NULL, asa, control,
1355 m0 != NULL ? m_length(m0) : 0, SOFLOW_DIRECTION_INBOUND,
1356 (m0 != NULL && m0->m_pkthdr.rcvif) ? m0->m_pkthdr.rcvif->if_index : 0);
1357 }
1358
1359 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1360 int error;
1361 error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0);
1362 SBLASTRECORDCHK(sb, __func__);
1363
1364 #if CONTENT_FILTER
1365 if (error == 0) {
1366 error = cfil_sock_data_in(sb->sb_so, asa, m0, control,
1367 0, dgram_flow_entry);
1368 }
1369 #endif /* CONTENT_FILTER */
1370
1371 if (error) {
1372 if (error != EJUSTRETURN) {
1373 if (m0) {
1374 m_freem(m0);
1375 }
1376 if (control != NULL && !sb_unix) {
1377 m_freem(control);
1378 }
1379 if (error_out) {
1380 *error_out = error;
1381 }
1382 }
1383 if (dgram_flow_entry != NULL) {
1384 soflow_free_flow(dgram_flow_entry);
1385 }
1386 return 0;
1387 }
1388 } else if (m0) {
1389 m0->m_flags &= ~M_SKIPCFIL;
1390 }
1391
1392 if (dgram_flow_entry != NULL) {
1393 soflow_free_flow(dgram_flow_entry);
1394 }
1395 }
1396
1397 mbuf_chain = sbconcat_mbufs(sb, asa, m0, control);
1398 SBLASTRECORDCHK(sb, "sbappendadddr 1");
1399 result = sbappendchain(sb, mbuf_chain);
1400 if (result == 0) {
1401 if (m0) {
1402 m_freem(m0);
1403 }
1404 if (control != NULL && !sb_unix) {
1405 m_freem(control);
1406 }
1407 if (error_out) {
1408 *error_out = ENOBUFS;
1409 }
1410 }
1411
1412 return result;
1413 }
1414
1415 inline boolean_t
is_cmsg_valid(struct mbuf * control,struct cmsghdr * cmsg)1416 is_cmsg_valid(struct mbuf *control, struct cmsghdr *cmsg)
1417 {
1418 if (cmsg == NULL) {
1419 return FALSE;
1420 }
1421
1422 if (cmsg->cmsg_len < sizeof(struct cmsghdr)) {
1423 return FALSE;
1424 }
1425
1426 if ((uintptr_t)control->m_data >= (uintptr_t)cmsg + cmsg->cmsg_len) {
1427 return FALSE;
1428 }
1429
1430 if ((uintptr_t)control->m_data + control->m_len <
1431 (uintptr_t)cmsg + cmsg->cmsg_len) {
1432 return FALSE;
1433 }
1434
1435 return TRUE;
1436 }
1437
1438 static int
sbappendcontrol_internal(struct sockbuf * sb,struct mbuf * m0,struct mbuf * control)1439 sbappendcontrol_internal(struct sockbuf *sb, struct mbuf *m0,
1440 struct mbuf *control)
1441 {
1442 struct mbuf *m, *mlast, *n;
1443 int space = 0;
1444
1445 if (control == 0) {
1446 panic("sbappendcontrol");
1447 }
1448
1449 for (m = control;; m = m->m_next) {
1450 space += m->m_len;
1451 if (m->m_next == 0) {
1452 break;
1453 }
1454 }
1455 n = m; /* save pointer to last control buffer */
1456 for (m = m0; m; m = m->m_next) {
1457 space += m->m_len;
1458 }
1459 if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) {
1460 sb_update_no_space_stats(sb, space);
1461
1462 return 0;
1463 }
1464 n->m_next = m0; /* concatenate data to control */
1465 SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1466
1467 for (m = control; m->m_next != NULL; m = m->m_next) {
1468 sballoc(sb, m);
1469 }
1470 sballoc(sb, m);
1471 mlast = m;
1472
1473 if (sb->sb_lastrecord != NULL) {
1474 sb->sb_lastrecord->m_nextpkt = control;
1475 } else {
1476 sb->sb_mb = control;
1477 }
1478 sb->sb_lastrecord = control;
1479 sb->sb_mbtail = mlast;
1480
1481 sb_update_data_stats(sb, space);
1482
1483 SBLASTMBUFCHK(sb, __func__);
1484 SBLASTRECORDCHK(sb, "sbappendcontrol 2");
1485 return 1;
1486 }
1487
1488 int
sbappendcontrol(struct sockbuf * sb,struct mbuf * m0,struct mbuf * control,int * error_out)1489 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
1490 int *error_out)
1491 {
1492 struct soflow_hash_entry *__single dgram_flow_entry = NULL;
1493 struct socket *__single so = sb->sb_so;
1494 int result = 0;
1495 boolean_t sb_unix = (sb->sb_flags & SB_UNIX);
1496
1497 if (error_out) {
1498 *error_out = 0;
1499 }
1500
1501 if (sb->sb_flags & SB_DROP) {
1502 if (m0 != NULL) {
1503 m_freem(m0);
1504 }
1505 if (control != NULL && !sb_unix) {
1506 m_freem(control);
1507 }
1508 if (error_out != NULL) {
1509 *error_out = EINVAL;
1510 }
1511 return 0;
1512 }
1513
1514 if (SOCK_DOM(sb->sb_so) == PF_INET || SOCK_DOM(sb->sb_so) == PF_INET6) {
1515 if (NEED_DGRAM_FLOW_TRACKING(so)) {
1516 dgram_flow_entry = soflow_get_flow(so, NULL, NULL, control,
1517 m0 != NULL ? m_length(m0) : 0, SOFLOW_DIRECTION_INBOUND,
1518 (m0 != NULL && m0->m_pkthdr.rcvif) ? m0->m_pkthdr.rcvif->if_index : 0);
1519 }
1520
1521 if (sb->sb_flags & SB_RECV && !(m0 && m0->m_flags & M_SKIPCFIL)) {
1522 int error;
1523
1524 error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0);
1525 SBLASTRECORDCHK(sb, __func__);
1526
1527 #if CONTENT_FILTER
1528 if (error == 0) {
1529 error = cfil_sock_data_in(sb->sb_so, NULL, m0, control,
1530 0, dgram_flow_entry);
1531 }
1532 #endif /* CONTENT_FILTER */
1533
1534 if (error) {
1535 if (error != EJUSTRETURN) {
1536 if (m0) {
1537 m_freem(m0);
1538 }
1539 if (control != NULL && !sb_unix) {
1540 m_freem(control);
1541 }
1542 if (error_out) {
1543 *error_out = error;
1544 }
1545 }
1546 if (dgram_flow_entry != NULL) {
1547 soflow_free_flow(dgram_flow_entry);
1548 }
1549 return 0;
1550 }
1551 } else if (m0) {
1552 m0->m_flags &= ~M_SKIPCFIL;
1553 }
1554
1555 if (dgram_flow_entry != NULL) {
1556 soflow_free_flow(dgram_flow_entry);
1557 }
1558 }
1559
1560 result = sbappendcontrol_internal(sb, m0, control);
1561 if (result == 0) {
1562 if (m0) {
1563 m_freem(m0);
1564 }
1565 if (control != NULL && !sb_unix) {
1566 m_freem(control);
1567 }
1568 if (error_out) {
1569 *error_out = ENOBUFS;
1570 }
1571 }
1572
1573 return result;
1574 }
1575
1576 /*
1577 * TCP streams have Multipath TCP support or are regular TCP sockets.
1578 */
1579 int
sbappendstream_rcvdemux(struct socket * so,struct mbuf * m)1580 sbappendstream_rcvdemux(struct socket *so, struct mbuf *m)
1581 {
1582 int ret = 0;
1583
1584 if ((m != NULL) &&
1585 m_pktlen(m) <= 0 &&
1586 !((so->so_flags & SOF_MP_SUBFLOW) &&
1587 (m->m_flags & M_PKTHDR) &&
1588 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN))) {
1589 m_freem(m);
1590 return ret;
1591 }
1592
1593 #if MPTCP
1594 if (so->so_flags & SOF_MP_SUBFLOW) {
1595 return sbappendmptcpstream_rcv(&so->so_rcv, m);
1596 } else
1597 #endif /* MPTCP */
1598 {
1599 return sbappendstream(&so->so_rcv, m);
1600 }
1601 }
1602
1603 #if MPTCP
1604 int
sbappendmptcpstream_rcv(struct sockbuf * sb,struct mbuf * m)1605 sbappendmptcpstream_rcv(struct sockbuf *sb, struct mbuf *m)
1606 {
1607 struct socket *so = sb->sb_so;
1608
1609 VERIFY(m == NULL || (m->m_flags & M_PKTHDR));
1610 /* SB_NOCOMPRESS must be set prevent loss of M_PKTHDR data */
1611 VERIFY((sb->sb_flags & (SB_RECV | SB_NOCOMPRESS)) ==
1612 (SB_RECV | SB_NOCOMPRESS));
1613
1614 if (m == NULL || m_pktlen(m) == 0 || (sb->sb_flags & SB_DROP) ||
1615 (so->so_state & SS_CANTRCVMORE)) {
1616 if (m && (m->m_flags & M_PKTHDR) &&
1617 m_pktlen(m) == 0 &&
1618 (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
1619 mptcp_input(tptomptp(sototcpcb(so))->mpt_mpte, m);
1620 return 1;
1621 } else if (m != NULL) {
1622 m_freem(m);
1623 }
1624 return 0;
1625 }
1626 /* the socket is not closed, so SOF_MP_SUBFLOW must be set */
1627 VERIFY(so->so_flags & SOF_MP_SUBFLOW);
1628
1629 if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) {
1630 panic("%s: nexpkt %p || mb %p != lastrecord %p", __func__,
1631 m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord);
1632 /* NOTREACHED */
1633 }
1634
1635 SBLASTMBUFCHK(sb, __func__);
1636
1637 /* No filter support (SB_RECV) on mptcp subflow sockets */
1638
1639 sbcompress(sb, m, sb->sb_mbtail);
1640 sb->sb_lastrecord = sb->sb_mb;
1641 SBLASTRECORDCHK(sb, __func__);
1642 return 1;
1643 }
1644 #endif /* MPTCP */
1645
1646 /*
1647 * Compress mbuf chain m into the socket
1648 * buffer sb following mbuf n. If n
1649 * is null, the buffer is presumed empty.
1650 */
1651 static inline void
sbcompress(struct sockbuf * sb,struct mbuf * m,struct mbuf * n)1652 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1653 {
1654 int eor = 0, compress = (!(sb->sb_flags & SB_NOCOMPRESS));
1655 struct mbuf *o;
1656 int space = 0;
1657
1658 if (m == NULL) {
1659 /* There is nothing to compress; just update the tail */
1660 for (; n->m_next != NULL; n = n->m_next) {
1661 ;
1662 }
1663 sb->sb_mbtail = n;
1664 goto done;
1665 }
1666
1667 while (m != NULL) {
1668 space += m->m_len;
1669 eor |= m->m_flags & M_EOR;
1670 if (compress && m->m_len == 0 && (eor == 0 ||
1671 (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) {
1672 if (sb->sb_lastrecord == m) {
1673 sb->sb_lastrecord = m->m_next;
1674 }
1675 m = m_free(m);
1676 continue;
1677 }
1678 if (compress && n != NULL && (n->m_flags & M_EOR) == 0 &&
1679 m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1680 m->m_len <= M_TRAILINGSPACE(n) &&
1681 n->m_type == m->m_type) {
1682 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1683 (unsigned)m->m_len);
1684 n->m_len += m->m_len;
1685 sb->sb_cc += m->m_len;
1686 if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
1687 sb->sb_ctl += m->m_len;
1688 }
1689
1690 /* update send byte count */
1691 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1692 inp_incr_sndbytes_total(sb->sb_so,
1693 m->m_len);
1694 inp_incr_sndbytes_unsent(sb->sb_so,
1695 m->m_len);
1696 }
1697 m = m_free(m);
1698 continue;
1699 }
1700 if (compress && n != NULL && (n->m_flags & M_EOR) == 0 &&
1701 proto_memacct_limited(sb->sb_so->so_proto) &&
1702 n->m_type == m->m_type) {
1703 int tocopy = min((int)M_TRAILINGSPACE(n), m->m_len);
1704 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
1705 tocopy);
1706 n->m_len += tocopy;
1707 sb->sb_cc += tocopy;
1708 if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
1709 sb->sb_ctl += m->m_len;
1710 }
1711
1712 /* update send byte count */
1713 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1714 inp_incr_sndbytes_total(sb->sb_so,
1715 m->m_len);
1716 inp_incr_sndbytes_unsent(sb->sb_so,
1717 m->m_len);
1718 }
1719
1720 if (tocopy < m->m_len) {
1721 memmove(mtod(m, caddr_t),
1722 mtod(m, caddr_t) + tocopy, m->m_len - tocopy);
1723
1724 m->m_len -= tocopy;
1725 } else {
1726 m = m_free(m);
1727 continue;
1728 }
1729 }
1730 if (n != NULL) {
1731 n->m_next = m;
1732 } else {
1733 sb->sb_mb = m;
1734 }
1735 sb->sb_mbtail = m;
1736 sballoc(sb, m);
1737 n = m;
1738 m->m_flags &= ~M_EOR;
1739 m = m->m_next;
1740 n->m_next = NULL;
1741 }
1742 if (eor != 0) {
1743 if (n != NULL) {
1744 n->m_flags |= M_EOR;
1745 } else {
1746 printf("semi-panic: sbcompress\n");
1747 }
1748 }
1749 sb_update_data_stats(sb, space);
1750 done:
1751 SBLASTMBUFCHK(sb, __func__);
1752 }
1753
1754 void
sb_empty_assert(struct sockbuf * sb,const char * where)1755 sb_empty_assert(struct sockbuf *sb, const char *where)
1756 {
1757 if (!(sb->sb_cc == 0 && sb->sb_mb == NULL && sb->sb_mbcnt == 0 &&
1758 sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) {
1759 panic("%s: sb %p so %p cc %d mbcnt %d mb %p mbtail %p "
1760 "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc,
1761 sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail,
1762 sb->sb_lastrecord);
1763 /* NOTREACHED */
1764 }
1765 }
1766
1767 /*
1768 * Free all mbufs in a sockbuf.
1769 * Check that all resources are reclaimed.
1770 */
1771 void
sbflush(struct sockbuf * sb)1772 sbflush(struct sockbuf *sb)
1773 {
1774 void *__single lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
1775 struct socket *so = sb->sb_so;
1776
1777 /* so_usecount may be 0 if we get here from sofreelastref() */
1778 if (so == NULL) {
1779 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p",
1780 __func__, sb, sb->sb_flags, lr_saved);
1781 /* NOTREACHED */
1782 } else if (so->so_usecount < 0) {
1783 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
1784 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
1785 so->so_usecount, lr_saved, solockhistory_nr(so));
1786 /* NOTREACHED */
1787 }
1788
1789 /*
1790 * Obtain lock on the socket buffer (SB_LOCK). This is required
1791 * to prevent the socket buffer from being unexpectedly altered
1792 * while it is used by another thread in socket send/receive.
1793 *
1794 * sblock() must not fail here, hence the assertion.
1795 */
1796 (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT);
1797 VERIFY(sb->sb_flags & SB_LOCK);
1798
1799 while (sb->sb_mbcnt > 0) {
1800 /*
1801 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
1802 * we would loop forever. Panic instead.
1803 */
1804 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) {
1805 break;
1806 }
1807 sbdrop(sb, (int)sb->sb_cc);
1808 }
1809
1810 if (sb->sb_flags & SB_SENDHEAD) {
1811 sb->sb_sendhead = NULL;
1812 }
1813
1814 sb_empty_assert(sb, __func__);
1815 sbunlock(sb, TRUE); /* keep socket locked */
1816 }
1817
1818 /*
1819 * Drop data from (the front of) a sockbuf.
1820 * use m_freem_list to free the mbuf structures
1821 * under a single lock... this is done by pruning
1822 * the top of the tree from the body by keeping track
1823 * of where we get to in the tree and then zeroing the
1824 * two pertinent pointers m_nextpkt and m_next
1825 * the socket buffer is then updated to point at the new
1826 * top of the tree and the pruned area is released via
1827 * m_freem_list.
1828 */
1829 void
sbdrop(struct sockbuf * sb,int len)1830 sbdrop(struct sockbuf *sb, int len)
1831 {
1832 struct mbuf *m, *free_list, *ml;
1833 struct mbuf *next, *last;
1834
1835 next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1836 #if MPTCP
1837 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
1838 ((sb->sb_so->so_flags & SOF_MP_SUBFLOW) ||
1839 (SOCK_CHECK_DOM(sb->sb_so, PF_MULTIPATH) &&
1840 SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP))) &&
1841 !(sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
1842 mptcp_preproc_sbdrop(sb->sb_so, m, (unsigned int)len);
1843 }
1844 if (m != NULL && len > 0 && !(sb->sb_flags & SB_RECV) &&
1845 (sb->sb_so->so_flags & SOF_MP_SUBFLOW) &&
1846 (sb->sb_so->so_flags1 & SOF1_POST_FALLBACK_SYNC)) {
1847 mptcp_fallback_sbdrop(sb->sb_so, m, len);
1848 }
1849 #endif /* MPTCP */
1850 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0);
1851
1852 free_list = last = m;
1853 ml = (struct mbuf *)0;
1854
1855 if (sb->sb_flags & SB_SENDHEAD) {
1856 sb->sb_sendoff -= MIN(len, sb->sb_sendoff);
1857 }
1858
1859 while (len > 0) {
1860 if (m == NULL) {
1861 if (next == NULL) {
1862 /*
1863 * We have reached the end of the mbuf chain before
1864 * freeing the requested amount of data.
1865 * Since there is no data left, zero the counts
1866 * and exit the loop.
1867 */
1868 len = 0;
1869 sb->sb_cc = 0;
1870 sb->sb_mbcnt = 0;
1871 break;
1872 }
1873 m = last = next;
1874 next = m->m_nextpkt;
1875 continue;
1876 }
1877 if (m->m_len > len) {
1878 m->m_len -= len;
1879 m->m_data += len;
1880 sb->sb_cc -= len;
1881 /* update the send byte count */
1882 if (sb->sb_flags & SB_SNDBYTE_CNT) {
1883 inp_decr_sndbytes_total(sb->sb_so, len);
1884 }
1885 if (sb->sb_flags & SB_SENDHEAD) {
1886 if (sb->sb_sendhead == m) {
1887 sb->sb_sendhead = NULL;
1888 }
1889 }
1890 if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
1891 sb->sb_ctl -= len;
1892 }
1893 break;
1894 }
1895 len -= m->m_len;
1896 sbfree(sb, m);
1897
1898 ml = m;
1899 m = m->m_next;
1900 }
1901 while (m && m->m_len == 0) {
1902 sbfree(sb, m);
1903
1904 ml = m;
1905 m = m->m_next;
1906 }
1907 if (ml) {
1908 ml->m_next = (struct mbuf *)0;
1909 last->m_nextpkt = (struct mbuf *)0;
1910 m_freem_list(free_list);
1911 }
1912 if (m) {
1913 sb->sb_mb = m;
1914 m->m_nextpkt = next;
1915 } else {
1916 sb->sb_mb = next;
1917 }
1918
1919 /*
1920 * First part is an inline SB_EMPTY_FIXUP(). Second part
1921 * makes sure sb_lastrecord is up-to-date if we dropped
1922 * part of the last record.
1923 */
1924 m = sb->sb_mb;
1925 if (m == NULL) {
1926 sb->sb_mbtail = NULL;
1927 sb->sb_lastrecord = NULL;
1928 } else if (m->m_nextpkt == NULL) {
1929 sb->sb_lastrecord = m;
1930 }
1931
1932 #if CONTENT_FILTER
1933 cfil_sock_buf_update(sb);
1934 #endif /* CONTENT_FILTER */
1935
1936 KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0);
1937 }
1938
1939 /*
1940 * Drop a record off the front of a sockbuf
1941 * and move the next record to the front.
1942 */
1943 void
sbdroprecord(struct sockbuf * sb)1944 sbdroprecord(struct sockbuf *sb)
1945 {
1946 struct mbuf *m, *mn;
1947
1948 m = sb->sb_mb;
1949 if (m) {
1950 sb->sb_mb = m->m_nextpkt;
1951 do {
1952 sbfree(sb, m);
1953 MFREE(m, mn);
1954 m = mn;
1955 } while (m);
1956 }
1957 SB_EMPTY_FIXUP(sb);
1958 }
1959
1960 /*
1961 * Create a "control" mbuf containing the specified data
1962 * with the specified type for presentation on a socket buffer.
1963 */
1964 struct mbuf *
sbcreatecontrol(caddr_t __sized_by (size)p,int size,int type,int level)1965 sbcreatecontrol(caddr_t __sized_by(size) p, int size, int type, int level)
1966 {
1967 struct cmsghdr *cp;
1968 struct mbuf *m;
1969
1970 if (CMSG_SPACE((u_int)size) > MLEN) {
1971 return (struct mbuf *)NULL;
1972 }
1973 if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) {
1974 return (struct mbuf *)NULL;
1975 }
1976 cp = mtod(m, struct cmsghdr *);
1977 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1978 /* XXX check size? */
1979 (void) memcpy(CMSG_DATA(cp), p, size);
1980 m->m_len = (int32_t)CMSG_SPACE(size);
1981 cp->cmsg_len = CMSG_LEN(size);
1982 cp->cmsg_level = level;
1983 cp->cmsg_type = type;
1984 return m;
1985 }
1986
1987 struct mbuf **
sbcreatecontrol_mbuf(caddr_t __sized_by (size)p,int size,int type,int level,struct mbuf ** mp)1988 sbcreatecontrol_mbuf(caddr_t __sized_by(size) p, int size, int type, int level, struct mbuf **mp)
1989 {
1990 struct mbuf *m;
1991 struct cmsghdr *cp;
1992
1993 if (*mp == NULL) {
1994 *mp = sbcreatecontrol(p, size, type, level);
1995 return mp;
1996 }
1997
1998 if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN) {
1999 mp = &(*mp)->m_next;
2000 *mp = sbcreatecontrol(p, size, type, level);
2001 return mp;
2002 }
2003
2004 m = *mp;
2005
2006 cp = (struct cmsghdr *)(void *)(mtod(m, char *) + m->m_len);
2007 /* CMSG_SPACE ensures 32-bit alignment */
2008 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2009 m->m_len += (int32_t)CMSG_SPACE(size);
2010
2011 /* XXX check size? */
2012 (void) memcpy(CMSG_DATA(cp), p, size);
2013 cp->cmsg_len = CMSG_LEN(size);
2014 cp->cmsg_level = level;
2015 cp->cmsg_type = type;
2016
2017 return mp;
2018 }
2019
2020
2021 /*
2022 * Some routines that return EOPNOTSUPP for entry points that are not
2023 * supported by a protocol. Fill in as needed.
2024 */
2025 int
pru_abort_notsupp(struct socket * so)2026 pru_abort_notsupp(struct socket *so)
2027 {
2028 #pragma unused(so)
2029 return EOPNOTSUPP;
2030 }
2031
2032 int
pru_accept_notsupp(struct socket * so,struct sockaddr ** nam)2033 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
2034 {
2035 #pragma unused(so, nam)
2036 return EOPNOTSUPP;
2037 }
2038
2039 int
pru_attach_notsupp(struct socket * so,int proto,struct proc * p)2040 pru_attach_notsupp(struct socket *so, int proto, struct proc *p)
2041 {
2042 #pragma unused(so, proto, p)
2043 return EOPNOTSUPP;
2044 }
2045
2046 int
pru_bind_notsupp(struct socket * so,struct sockaddr * nam,struct proc * p)2047 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
2048 {
2049 #pragma unused(so, nam, p)
2050 return EOPNOTSUPP;
2051 }
2052
2053 int
pru_connect_notsupp(struct socket * so,struct sockaddr * nam,struct proc * p)2054 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
2055 {
2056 #pragma unused(so, nam, p)
2057 return EOPNOTSUPP;
2058 }
2059
2060 int
pru_connect2_notsupp(struct socket * so1,struct socket * so2)2061 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
2062 {
2063 #pragma unused(so1, so2)
2064 return EOPNOTSUPP;
2065 }
2066
2067 int
pru_connectx_notsupp(struct socket * so,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uint32_t flags,void * arg,uint32_t arglen,struct uio * uio,user_ssize_t * bytes_written)2068 pru_connectx_notsupp(struct socket *so, struct sockaddr *src,
2069 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
2070 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
2071 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
2072 {
2073 #pragma unused(so, src, dst, p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written)
2074 return EOPNOTSUPP;
2075 }
2076
2077 int
pru_control_notsupp(struct socket * so,u_long cmd,caddr_t __sized_by (IOCPARM_LEN (cmd))data,struct ifnet * ifp,struct proc * p)2078 pru_control_notsupp(struct socket *so,
2079 u_long cmd, caddr_t __sized_by(IOCPARM_LEN(cmd)) data,
2080 struct ifnet *ifp, struct proc *p)
2081 {
2082 #pragma unused(so, cmd, data, ifp, p)
2083 return EOPNOTSUPP;
2084 }
2085
2086 int
pru_detach_notsupp(struct socket * so)2087 pru_detach_notsupp(struct socket *so)
2088 {
2089 #pragma unused(so)
2090 return EOPNOTSUPP;
2091 }
2092
2093 int
pru_disconnect_notsupp(struct socket * so)2094 pru_disconnect_notsupp(struct socket *so)
2095 {
2096 #pragma unused(so)
2097 return EOPNOTSUPP;
2098 }
2099
2100 int
pru_disconnectx_notsupp(struct socket * so,sae_associd_t aid,sae_connid_t cid)2101 pru_disconnectx_notsupp(struct socket *so, sae_associd_t aid, sae_connid_t cid)
2102 {
2103 #pragma unused(so, aid, cid)
2104 return EOPNOTSUPP;
2105 }
2106
2107 int
pru_listen_notsupp(struct socket * so,struct proc * p)2108 pru_listen_notsupp(struct socket *so, struct proc *p)
2109 {
2110 #pragma unused(so, p)
2111 return EOPNOTSUPP;
2112 }
2113
2114 int
pru_peeraddr_notsupp(struct socket * so,struct sockaddr ** nam)2115 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
2116 {
2117 #pragma unused(so, nam)
2118 return EOPNOTSUPP;
2119 }
2120
2121 int
pru_rcvd_notsupp(struct socket * so,int flags)2122 pru_rcvd_notsupp(struct socket *so, int flags)
2123 {
2124 #pragma unused(so, flags)
2125 return EOPNOTSUPP;
2126 }
2127
2128 int
pru_rcvoob_notsupp(struct socket * so,struct mbuf * m,int flags)2129 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
2130 {
2131 #pragma unused(so, m, flags)
2132 return EOPNOTSUPP;
2133 }
2134
2135 int
pru_send_notsupp(struct socket * so,int flags,struct mbuf * m,struct sockaddr * addr,struct mbuf * control,struct proc * p)2136 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
2137 struct sockaddr *addr, struct mbuf *control, struct proc *p)
2138 {
2139 #pragma unused(so, flags, m, addr, control, p)
2140 return EOPNOTSUPP;
2141 }
2142
2143 int
pru_send_list_notsupp(struct socket * so,struct mbuf * m,u_int * pktcnt,int flags)2144 pru_send_list_notsupp(struct socket *so, struct mbuf *m, u_int *pktcnt,
2145 int flags)
2146 {
2147 #pragma unused(so, m, pktcnt, flags)
2148 return EOPNOTSUPP;
2149 }
2150
2151 /*
2152 * This isn't really a ``null'' operation, but it's the default one
2153 * and doesn't do anything destructive.
2154 */
2155 int
pru_sense_null(struct socket * so,void * ub,int isstat64)2156 pru_sense_null(struct socket *so, void *ub, int isstat64)
2157 {
2158 if (isstat64 != 0) {
2159 struct stat64 *sb64;
2160
2161 sb64 = (struct stat64 *)ub;
2162 sb64->st_blksize = so->so_snd.sb_hiwat;
2163 } else {
2164 struct stat *sb;
2165
2166 sb = (struct stat *)ub;
2167 sb->st_blksize = so->so_snd.sb_hiwat;
2168 }
2169
2170 return 0;
2171 }
2172
2173 int
pru_sosend_notsupp(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags)2174 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
2175 struct mbuf *top, struct mbuf *control, int flags)
2176 {
2177 #pragma unused(so, addr, uio, top, control, flags)
2178 return EOPNOTSUPP;
2179 }
2180
2181 int
pru_sosend_list_notsupp(struct socket * so,struct mbuf * m,size_t total_len,u_int * pktcnt,int flags)2182 pru_sosend_list_notsupp(struct socket *so, struct mbuf *m, size_t total_len, u_int *pktcnt, int flags)
2183 {
2184 #pragma unused(so, m, total_len, pktcnt, flags)
2185 return EOPNOTSUPP;
2186 }
2187
2188 int
pru_soreceive_notsupp(struct socket * so,struct sockaddr ** paddr,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)2189 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
2190 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
2191 {
2192 #pragma unused(so, paddr, uio, mp0, controlp, flagsp)
2193 return EOPNOTSUPP;
2194 }
2195
2196 int
pru_shutdown_notsupp(struct socket * so)2197 pru_shutdown_notsupp(struct socket *so)
2198 {
2199 #pragma unused(so)
2200 return EOPNOTSUPP;
2201 }
2202
2203 int
pru_sockaddr_notsupp(struct socket * so,struct sockaddr ** nam)2204 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
2205 {
2206 #pragma unused(so, nam)
2207 return EOPNOTSUPP;
2208 }
2209
2210 int
pru_sopoll_notsupp(struct socket * so,int events,kauth_cred_t cred,void * wql)2211 pru_sopoll_notsupp(struct socket *so, int events, kauth_cred_t cred, void *wql)
2212 {
2213 #pragma unused(so, events, cred, wql)
2214 return EOPNOTSUPP;
2215 }
2216
2217 int
pru_socheckopt_null(struct socket * so,struct sockopt * sopt)2218 pru_socheckopt_null(struct socket *so, struct sockopt *sopt)
2219 {
2220 #pragma unused(so, sopt)
2221 /*
2222 * Allow all options for set/get by default.
2223 */
2224 return 0;
2225 }
2226
2227 static int
pru_preconnect_null(struct socket * so)2228 pru_preconnect_null(struct socket *so)
2229 {
2230 #pragma unused(so)
2231 return 0;
2232 }
2233
2234 static int
pru_defunct_null(struct socket * so)2235 pru_defunct_null(struct socket *so)
2236 {
2237 #pragma unused(so)
2238 return 0;
2239 }
2240
2241
2242 void
pru_sanitize(struct pr_usrreqs * pru)2243 pru_sanitize(struct pr_usrreqs *pru)
2244 {
2245 #define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar)
2246 DEFAULT(pru->pru_abort, pru_abort_notsupp);
2247 DEFAULT(pru->pru_accept, pru_accept_notsupp);
2248 DEFAULT(pru->pru_attach, pru_attach_notsupp);
2249 DEFAULT(pru->pru_bind, pru_bind_notsupp);
2250 DEFAULT(pru->pru_connect, pru_connect_notsupp);
2251 DEFAULT(pru->pru_connect2, pru_connect2_notsupp);
2252 DEFAULT(pru->pru_connectx, pru_connectx_notsupp);
2253 DEFAULT(pru->pru_control, pru_control_notsupp);
2254 DEFAULT(pru->pru_detach, pru_detach_notsupp);
2255 DEFAULT(pru->pru_disconnect, pru_disconnect_notsupp);
2256 DEFAULT(pru->pru_disconnectx, pru_disconnectx_notsupp);
2257 DEFAULT(pru->pru_listen, pru_listen_notsupp);
2258 DEFAULT(pru->pru_peeraddr, pru_peeraddr_notsupp);
2259 DEFAULT(pru->pru_rcvd, pru_rcvd_notsupp);
2260 DEFAULT(pru->pru_rcvoob, pru_rcvoob_notsupp);
2261 DEFAULT(pru->pru_send, pru_send_notsupp);
2262 DEFAULT(pru->pru_send_list, pru_send_list_notsupp);
2263 DEFAULT(pru->pru_sense, pru_sense_null);
2264 DEFAULT(pru->pru_shutdown, pru_shutdown_notsupp);
2265 DEFAULT(pru->pru_sockaddr, pru_sockaddr_notsupp);
2266 DEFAULT(pru->pru_sopoll, pru_sopoll_notsupp);
2267 DEFAULT(pru->pru_soreceive, pru_soreceive_notsupp);
2268 DEFAULT(pru->pru_sosend, pru_sosend_notsupp);
2269 DEFAULT(pru->pru_sosend_list, pru_sosend_list_notsupp);
2270 DEFAULT(pru->pru_socheckopt, pru_socheckopt_null);
2271 DEFAULT(pru->pru_preconnect, pru_preconnect_null);
2272 DEFAULT(pru->pru_defunct, pru_defunct_null);
2273 #undef DEFAULT
2274 }
2275
2276 /*
2277 * The following are macros on BSD and functions on Darwin
2278 */
2279
2280 /*
2281 * Do we need to notify the other side when I/O is possible?
2282 */
2283
2284 int
sb_notify(struct sockbuf * sb)2285 sb_notify(struct sockbuf *sb)
2286 {
2287 return sb->sb_waiters > 0 ||
2288 (sb->sb_flags & (SB_SEL | SB_ASYNC | SB_UPCALL | SB_KNOTE));
2289 }
2290
2291 /*
2292 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
2293 * This is problematical if the fields are unsigned, as the space might
2294 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect
2295 * overflow and return 0.
2296 */
2297 int
sbspace(struct sockbuf * sb)2298 sbspace(struct sockbuf *sb)
2299 {
2300 int pending = 0;
2301 int space;
2302
2303 if (sb->sb_flags & SB_KCTL) {
2304 space = (int)(sb->sb_hiwat - sb->sb_cc);
2305 } else {
2306 space = imin((int)(sb->sb_hiwat - sb->sb_cc),
2307 (int)(sb->sb_mbmax - sb->sb_mbcnt));
2308 }
2309 if (sb->sb_preconn_hiwat != 0) {
2310 space = imin((int)(sb->sb_preconn_hiwat - sb->sb_cc), space);
2311 }
2312
2313 if (space < 0) {
2314 space = 0;
2315 }
2316
2317 /* Compensate for data being processed by content filters */
2318 #if CONTENT_FILTER
2319 pending = cfil_sock_data_space(sb);
2320 #endif /* CONTENT_FILTER */
2321 if (pending > space) {
2322 space = 0;
2323 } else {
2324 space -= pending;
2325 }
2326
2327 return space;
2328 }
2329
2330 /* do we have to send all at once on a socket? */
2331 int
sosendallatonce(struct socket * so)2332 sosendallatonce(struct socket *so)
2333 {
2334 return so->so_proto->pr_flags & PR_ATOMIC;
2335 }
2336
2337 /* can we read something from so? */
2338 int
soreadable(struct socket * so)2339 soreadable(struct socket *so)
2340 {
2341 return so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
2342 ((so->so_state & SS_CANTRCVMORE)
2343 #if CONTENT_FILTER
2344 && cfil_sock_data_pending(&so->so_rcv) == 0
2345 #endif /* CONTENT_FILTER */
2346 ) ||
2347 so->so_comp.tqh_first || so->so_error;
2348 }
2349
2350 /* can we write something to so? */
2351
2352 int
sowriteable(struct socket * so)2353 sowriteable(struct socket *so)
2354 {
2355 if ((so->so_state & SS_CANTSENDMORE) ||
2356 so->so_error > 0) {
2357 return 1;
2358 }
2359 if (so_wait_for_if_feedback(so) || !socanwrite(so)) {
2360 return 0;
2361 }
2362 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2363 return 1;
2364 }
2365
2366 int64_t data = sbspace(&so->so_snd);
2367 int64_t lowat = so->so_snd.sb_lowat;
2368 /*
2369 * Deal with connected UNIX domain sockets which
2370 * rely on the fact that the sender's socket buffer is
2371 * actually the receiver's socket buffer.
2372 */
2373 if (SOCK_DOM(so) == PF_LOCAL) {
2374 struct unpcb *unp = sotounpcb(so);
2375 if (unp != NULL && unp->unp_conn != NULL &&
2376 unp->unp_conn->unp_socket != NULL) {
2377 struct socket *so2 = unp->unp_conn->unp_socket;
2378 /*
2379 * At this point we know that `so' is locked
2380 * and that `unp_conn` isn't going to change.
2381 * However, we don't lock `so2` because doing so
2382 * may require unlocking `so'
2383 * (see unp_get_locks_in_order()).
2384 *
2385 * Two cases can happen:
2386 *
2387 * 1) we return 1 and tell the application that
2388 * it can write. Meanwhile, another thread
2389 * fills up the socket buffer. This will either
2390 * lead to a blocking send or EWOULDBLOCK
2391 * which the application should deal with.
2392 * 2) we return 0 and tell the application that
2393 * the socket is not writable. Meanwhile,
2394 * another thread depletes the receive socket
2395 * buffer. In this case the application will
2396 * be woken up by sb_notify().
2397 *
2398 * MIN() is required because otherwise sosendcheck()
2399 * may return EWOULDBLOCK since it only considers
2400 * so->so_snd.
2401 */
2402 data = MIN(data, sbspace(&so2->so_rcv));
2403 }
2404 }
2405
2406 if (data >= lowat) {
2407 if (so->so_flags & SOF_NOTSENT_LOWAT) {
2408 if ((SOCK_DOM(so) == PF_INET6 ||
2409 SOCK_DOM(so) == PF_INET) &&
2410 so->so_type == SOCK_STREAM) {
2411 return tcp_notsent_lowat_check(so);
2412 }
2413 #if MPTCP
2414 else if ((SOCK_DOM(so) == PF_MULTIPATH) &&
2415 (SOCK_PROTO(so) == IPPROTO_TCP)) {
2416 return mptcp_notsent_lowat_check(so);
2417 }
2418 #endif
2419 else {
2420 return 1;
2421 }
2422 } else {
2423 return 1;
2424 }
2425 }
2426 return 0;
2427 }
2428
2429 /* adjust counters in sb reflecting allocation of m */
2430
2431 void
sballoc(struct sockbuf * sb,struct mbuf * m)2432 sballoc(struct sockbuf *sb, struct mbuf *m)
2433 {
2434 int mbcnt = m_capacity(m);
2435
2436 sb->sb_cc += m->m_len;
2437 if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
2438 sb->sb_ctl += m->m_len;
2439 }
2440
2441 sb->sb_mbcnt += mbcnt;
2442 proto_memacct_add(sb->sb_so->so_proto, mbcnt);
2443
2444 /*
2445 * If data is being added to the send socket buffer,
2446 * update the send byte count
2447 */
2448 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2449 inp_incr_sndbytes_total(sb->sb_so, m->m_len);
2450 inp_incr_sndbytes_unsent(sb->sb_so, m->m_len);
2451 }
2452 }
2453
2454 /* adjust counters in sb reflecting freeing of m */
2455 void
sbfree(struct sockbuf * sb,struct mbuf * m)2456 sbfree(struct sockbuf *sb, struct mbuf *m)
2457 {
2458 int mbcnt = m_capacity(m);
2459
2460 sb->sb_cc -= m->m_len;
2461 if (!m_has_mtype(m, MTF_DATA | MTF_HEADER | MTF_OOBDATA)) {
2462 sb->sb_ctl -= m->m_len;
2463 }
2464
2465 sb->sb_mbcnt -= mbcnt;
2466 proto_memacct_sub(sb->sb_so->so_proto, mbcnt);
2467
2468 /*
2469 * If data is being removed from the send socket buffer,
2470 * update the send byte count
2471 */
2472 if (sb->sb_flags & SB_SNDBYTE_CNT) {
2473 inp_decr_sndbytes_total(sb->sb_so, m->m_len);
2474 }
2475
2476 if (sb->sb_flags & SB_SENDHEAD) {
2477 if (m == sb->sb_sendhead) {
2478 sb->sb_sendhead = NULL;
2479 }
2480 }
2481 }
2482
2483 /*
2484 * Set lock on sockbuf sb; sleep if lock is already held.
2485 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
2486 * Returns error without lock if sleep is interrupted.
2487 */
2488 int
sblock(struct sockbuf * sb,uint32_t flags)2489 sblock(struct sockbuf *sb, uint32_t flags)
2490 {
2491 boolean_t nointr = ((sb->sb_flags & SB_NOINTR) || (flags & SBL_NOINTR));
2492 void *__single lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
2493 struct socket *so = sb->sb_so;
2494 void * wchan;
2495 int error = 0;
2496 thread_t __single tp = current_thread();
2497
2498 VERIFY((flags & SBL_VALID) == flags);
2499
2500 /* so_usecount may be 0 if we get here from sofreelastref() */
2501 if (so == NULL) {
2502 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p",
2503 __func__, sb, sb->sb_flags, lr_saved);
2504 /* NOTREACHED */
2505 } else if (so->so_usecount < 0) {
2506 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2507 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2508 so->so_usecount, lr_saved, solockhistory_nr(so));
2509 /* NOTREACHED */
2510 }
2511
2512 /*
2513 * The content filter thread must hold the sockbuf lock
2514 */
2515 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2516 /*
2517 * Don't panic if we are defunct because SB_LOCK has
2518 * been cleared by sodefunct()
2519 */
2520 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
2521 panic("%s: SB_LOCK not held for %p",
2522 __func__, sb);
2523 }
2524
2525 /* Keep the sockbuf locked */
2526 return 0;
2527 }
2528
2529 if ((sb->sb_flags & SB_LOCK) && !(flags & SBL_WAIT)) {
2530 return EWOULDBLOCK;
2531 }
2532 /*
2533 * We may get here from sorflush(), in which case "sb" may not
2534 * point to the real socket buffer. Use the actual socket buffer
2535 * address from the socket instead.
2536 */
2537 wchan = (sb->sb_flags & SB_RECV) ?
2538 &so->so_rcv.sb_flags : &so->so_snd.sb_flags;
2539
2540 /*
2541 * A content filter thread has exclusive access to the sockbuf
2542 * until it clears the
2543 */
2544 while ((sb->sb_flags & SB_LOCK) ||
2545 ((so->so_flags & SOF_CONTENT_FILTER) &&
2546 sb->sb_cfil_thread != NULL)) {
2547 lck_mtx_t *mutex_held;
2548
2549 /*
2550 * XXX: This code should be moved up above outside of this loop;
2551 * however, we may get here as part of sofreelastref(), and
2552 * at that time pr_getlock() may no longer be able to return
2553 * us the lock. This will be fixed in future.
2554 */
2555 if (so->so_proto->pr_getlock != NULL) {
2556 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2557 } else {
2558 mutex_held = so->so_proto->pr_domain->dom_mtx;
2559 }
2560
2561 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2562
2563 sb->sb_wantlock++;
2564 VERIFY(sb->sb_wantlock != 0);
2565
2566 error = msleep(wchan, mutex_held,
2567 nointr ? PSOCK : PSOCK | PCATCH,
2568 nointr ? "sb_lock_nointr" : "sb_lock", NULL);
2569
2570 VERIFY(sb->sb_wantlock != 0);
2571 sb->sb_wantlock--;
2572
2573 if (error == 0 && (so->so_flags & SOF_DEFUNCT) &&
2574 !(flags & SBL_IGNDEFUNCT)) {
2575 error = EBADF;
2576 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llu [%d,%d] "
2577 "(%d)\n", __func__, proc_selfpid(),
2578 proc_best_name(current_proc()),
2579 so->so_gencnt,
2580 SOCK_DOM(so), SOCK_TYPE(so), error);
2581 }
2582
2583 if (error != 0) {
2584 return error;
2585 }
2586 }
2587 sb->sb_flags |= SB_LOCK;
2588 return 0;
2589 }
2590
2591 /*
2592 * Release lock on sockbuf sb
2593 */
2594 void
sbunlock(struct sockbuf * sb,boolean_t keeplocked)2595 sbunlock(struct sockbuf *sb, boolean_t keeplocked)
2596 {
2597 void *__single lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
2598 struct socket *so = sb->sb_so;
2599 thread_t __single tp = current_thread();
2600
2601 /* so_usecount may be 0 if we get here from sofreelastref() */
2602 if (so == NULL) {
2603 panic("%s: null so, sb=%p sb_flags=0x%x lr=%p",
2604 __func__, sb, sb->sb_flags, lr_saved);
2605 /* NOTREACHED */
2606 } else if (so->so_usecount < 0) {
2607 panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p "
2608 "lrh= %s\n", __func__, sb, sb->sb_flags, so,
2609 so->so_usecount, lr_saved, solockhistory_nr(so));
2610 /* NOTREACHED */
2611 }
2612
2613 /*
2614 * The content filter thread must hold the sockbuf lock
2615 */
2616 if ((so->so_flags & SOF_CONTENT_FILTER) && sb->sb_cfil_thread == tp) {
2617 /*
2618 * Don't panic if we are defunct because SB_LOCK has
2619 * been cleared by sodefunct()
2620 */
2621 if (!(so->so_flags & SOF_DEFUNCT) &&
2622 !(sb->sb_flags & SB_LOCK) &&
2623 !(so->so_state & SS_DEFUNCT) &&
2624 !(so->so_flags1 & SOF1_DEFUNCTINPROG)) {
2625 panic("%s: SB_LOCK not held for %p",
2626 __func__, sb);
2627 }
2628 /* Keep the sockbuf locked and proceed */
2629 } else {
2630 VERIFY((sb->sb_flags & SB_LOCK) ||
2631 (so->so_state & SS_DEFUNCT) ||
2632 (so->so_flags1 & SOF1_DEFUNCTINPROG));
2633
2634 sb->sb_flags &= ~SB_LOCK;
2635
2636 if (sb->sb_wantlock > 0) {
2637 /*
2638 * We may get here from sorflush(), in which case "sb"
2639 * may not point to the real socket buffer. Use the
2640 * actual socket buffer address from the socket instead.
2641 */
2642 wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags :
2643 &so->so_snd.sb_flags);
2644 }
2645 }
2646
2647 if (!keeplocked) { /* unlock on exit */
2648 if (so->so_flags & SOF_MP_SUBFLOW || SOCK_DOM(so) == PF_MULTIPATH) {
2649 (*so->so_proto->pr_unlock)(so, 1, lr_saved);
2650 } else {
2651 lck_mtx_t *mutex_held;
2652
2653 if (so->so_proto->pr_getlock != NULL) {
2654 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
2655 } else {
2656 mutex_held = so->so_proto->pr_domain->dom_mtx;
2657 }
2658
2659 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2660
2661 VERIFY(so->so_usecount > 0);
2662 so->so_usecount--;
2663 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2664 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2665 lck_mtx_unlock(mutex_held);
2666 }
2667 }
2668 }
2669
2670 void
sorwakeup(struct socket * so)2671 sorwakeup(struct socket *so)
2672 {
2673 if (sb_notify(&so->so_rcv)) {
2674 sowakeup(so, &so->so_rcv, NULL);
2675 }
2676 }
2677
2678 void
sowwakeup(struct socket * so)2679 sowwakeup(struct socket *so)
2680 {
2681 if (sb_notify(&so->so_snd)) {
2682 sowakeup(so, &so->so_snd, NULL);
2683 }
2684 }
2685
2686 static void
soevupcall(struct socket * so,uint32_t hint)2687 soevupcall(struct socket *so, uint32_t hint)
2688 {
2689 if (so->so_event != NULL) {
2690 caddr_t __single so_eventarg = so->so_eventarg;
2691
2692 hint &= so->so_eventmask;
2693 if (hint != 0) {
2694 so->so_event(so, so_eventarg, hint);
2695 }
2696 }
2697 }
2698
2699 void
soevent(struct socket * so,uint32_t hint)2700 soevent(struct socket *so, uint32_t hint)
2701 {
2702 if (net_wake_pkt_debug > 0 && (hint & SO_FILT_HINT_WAKE_PKT)) {
2703 os_log(wake_packet_log_handle, "soevents: SO_FILT_HINT_WAKE_PKT so_gencnt: %llu",
2704 so->so_gencnt);
2705 }
2706
2707 if (so->so_flags & SOF_KNOTE) {
2708 KNOTE(&so->so_klist, hint);
2709 }
2710
2711 soevupcall(so, hint);
2712
2713 /*
2714 * Don't post an event if this a subflow socket or
2715 * the app has opted out of using cellular interface
2716 */
2717 if ((hint & SO_FILT_HINT_IFDENIED) &&
2718 !(so->so_flags & SOF_MP_SUBFLOW) &&
2719 !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR) &&
2720 !(so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE) &&
2721 !(so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
2722 soevent_ifdenied(so);
2723 }
2724 }
2725
2726 static void
soevent_ifdenied(struct socket * so)2727 soevent_ifdenied(struct socket *so)
2728 {
2729 struct kev_netpolicy_ifdenied ev_ifdenied;
2730
2731 bzero(&ev_ifdenied, sizeof(ev_ifdenied));
2732 /*
2733 * The event consumer is interested about the effective {upid,pid,uuid}
2734 * info which can be different than the those related to the process
2735 * that recently performed a system call on the socket, i.e. when the
2736 * socket is delegated.
2737 */
2738 if (so->so_flags & SOF_DELEGATED) {
2739 ev_ifdenied.ev_data.eupid = so->e_upid;
2740 ev_ifdenied.ev_data.epid = so->e_pid;
2741 uuid_copy(ev_ifdenied.ev_data.euuid, so->e_uuid);
2742 } else {
2743 ev_ifdenied.ev_data.eupid = so->last_upid;
2744 ev_ifdenied.ev_data.epid = so->last_pid;
2745 uuid_copy(ev_ifdenied.ev_data.euuid, so->last_uuid);
2746 }
2747
2748 if (++so->so_ifdenied_notifies > 1) {
2749 /*
2750 * Allow for at most one kernel event to be generated per
2751 * socket; so_ifdenied_notifies is reset upon changes in
2752 * the UUID policy. See comments in inp_update_policy.
2753 */
2754 if (net_io_policy_log) {
2755 uuid_string_t buf;
2756
2757 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2758 log(LOG_DEBUG, "%s[%d]: so %llu [%d,%d] epid %llu "
2759 "euuid %s%s has %d redundant events supressed\n",
2760 __func__, so->last_pid,
2761 so->so_gencnt, SOCK_DOM(so),
2762 SOCK_TYPE(so), ev_ifdenied.ev_data.epid, buf,
2763 ((so->so_flags & SOF_DELEGATED) ?
2764 " [delegated]" : ""), so->so_ifdenied_notifies);
2765 }
2766 } else {
2767 if (net_io_policy_log) {
2768 uuid_string_t buf;
2769
2770 uuid_unparse(ev_ifdenied.ev_data.euuid, buf);
2771 log(LOG_DEBUG, "%s[%d]: so %llu [%d,%d] epid %llu "
2772 "euuid %s%s event posted\n", __func__,
2773 so->last_pid, so->so_gencnt,
2774 SOCK_DOM(so), SOCK_TYPE(so),
2775 ev_ifdenied.ev_data.epid, buf,
2776 ((so->so_flags & SOF_DELEGATED) ?
2777 " [delegated]" : ""));
2778 }
2779 netpolicy_post_msg(KEV_NETPOLICY_IFDENIED, &ev_ifdenied.ev_data,
2780 sizeof(ev_ifdenied));
2781 }
2782 }
2783
2784 /*
2785 * Make a copy of a sockaddr in a malloced buffer of type SONAME.
2786 */
2787 struct sockaddr *
dup_sockaddr(struct sockaddr * sa,int canwait)2788 dup_sockaddr(struct sockaddr *sa, int canwait)
2789 {
2790 struct sockaddr *sa2;
2791
2792 sa2 = SA(alloc_sockaddr(sa->sa_len, canwait ? Z_WAITOK : Z_NOWAIT));
2793 if (sa2 != NULL) {
2794 SOCKADDR_COPY(sa, sa2, sa->sa_len);
2795 }
2796 return sa2;
2797 }
2798
2799 /*
2800 * Create an external-format (``xsocket'') structure using the information
2801 * in the kernel-format socket structure pointed to by so. This is done
2802 * to reduce the spew of irrelevant information over this interface,
2803 * to isolate user code from changes in the kernel structure, and
2804 * potentially to provide information-hiding if we decide that
2805 * some of this information should be hidden from users.
2806 */
2807 void
sotoxsocket(struct socket * so,struct xsocket * xso)2808 sotoxsocket(struct socket *so, struct xsocket *xso)
2809 {
2810 xso->xso_len = sizeof(*xso);
2811 xso->xso_so = (_XSOCKET_PTR(struct socket *))VM_KERNEL_ADDRHASH(so);
2812 xso->so_type = so->so_type;
2813 xso->so_options = (short)(so->so_options & 0xffff);
2814 xso->so_linger = so->so_linger;
2815 xso->so_state = so->so_state;
2816 xso->so_pcb = (_XSOCKET_PTR(caddr_t))VM_KERNEL_ADDRHASH(so->so_pcb);
2817 if (so->so_proto) {
2818 xso->xso_protocol = SOCK_PROTO(so);
2819 xso->xso_family = SOCK_DOM(so);
2820 } else {
2821 xso->xso_protocol = xso->xso_family = 0;
2822 }
2823 xso->so_qlen = so->so_qlen;
2824 xso->so_incqlen = so->so_incqlen;
2825 xso->so_qlimit = so->so_qlimit;
2826 xso->so_timeo = so->so_timeo;
2827 xso->so_error = so->so_error;
2828 xso->so_pgid = so->so_pgid;
2829 xso->so_oobmark = so->so_oobmark;
2830 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
2831 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
2832 xso->so_uid = kauth_cred_getuid(so->so_cred);
2833 }
2834
2835
2836 #if XNU_TARGET_OS_OSX
2837
2838 void
sotoxsocket64(struct socket * so,struct xsocket64 * xso)2839 sotoxsocket64(struct socket *so, struct xsocket64 *xso)
2840 {
2841 xso->xso_len = sizeof(*xso);
2842 xso->xso_so = (u_int64_t)VM_KERNEL_ADDRHASH(so);
2843 xso->so_type = so->so_type;
2844 xso->so_options = (short)(so->so_options & 0xffff);
2845 xso->so_linger = so->so_linger;
2846 xso->so_state = so->so_state;
2847 xso->so_pcb = (u_int64_t)VM_KERNEL_ADDRHASH(so->so_pcb);
2848 if (so->so_proto) {
2849 xso->xso_protocol = SOCK_PROTO(so);
2850 xso->xso_family = SOCK_DOM(so);
2851 } else {
2852 xso->xso_protocol = xso->xso_family = 0;
2853 }
2854 xso->so_qlen = so->so_qlen;
2855 xso->so_incqlen = so->so_incqlen;
2856 xso->so_qlimit = so->so_qlimit;
2857 xso->so_timeo = so->so_timeo;
2858 xso->so_error = so->so_error;
2859 xso->so_pgid = so->so_pgid;
2860 xso->so_oobmark = so->so_oobmark;
2861 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
2862 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
2863 xso->so_uid = kauth_cred_getuid(so->so_cred);
2864 }
2865
2866 #endif /* XNU_TARGET_OS_OSX */
2867
2868 /*
2869 * This does the same for sockbufs. Note that the xsockbuf structure,
2870 * since it is always embedded in a socket, does not include a self
2871 * pointer nor a length. We make this entry point public in case
2872 * some other mechanism needs it.
2873 */
2874 void
sbtoxsockbuf(struct sockbuf * sb,struct xsockbuf * xsb)2875 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
2876 {
2877 xsb->sb_cc = sb->sb_cc;
2878 xsb->sb_hiwat = sb->sb_hiwat;
2879 xsb->sb_mbcnt = sb->sb_mbcnt;
2880 xsb->sb_mbmax = sb->sb_mbmax;
2881 xsb->sb_lowat = sb->sb_lowat;
2882 xsb->sb_flags = (short)sb->sb_flags;
2883 xsb->sb_timeo = (short)
2884 ((sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick);
2885 if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) {
2886 xsb->sb_timeo = 1;
2887 }
2888 }
2889
2890 /*
2891 * Based on the policy set by an all knowing decison maker, throttle sockets
2892 * that either have been marked as belonging to "background" process.
2893 */
2894 inline int
soisthrottled(struct socket * so)2895 soisthrottled(struct socket *so)
2896 {
2897 return so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND;
2898 }
2899
2900 inline int
soisprivilegedtraffic(struct socket * so)2901 soisprivilegedtraffic(struct socket *so)
2902 {
2903 return (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS) ? 1 : 0;
2904 }
2905
2906 inline int
soissrcbackground(struct socket * so)2907 soissrcbackground(struct socket *so)
2908 {
2909 return (so->so_flags1 & SOF1_TRAFFIC_MGT_SO_BACKGROUND) ||
2910 IS_SO_TC_BACKGROUND(so->so_traffic_class);
2911 }
2912
2913 inline int
soissrcrealtime(struct socket * so)2914 soissrcrealtime(struct socket *so)
2915 {
2916 return so->so_traffic_class >= SO_TC_AV &&
2917 so->so_traffic_class <= SO_TC_VO;
2918 }
2919
2920 inline int
soissrcbesteffort(struct socket * so)2921 soissrcbesteffort(struct socket *so)
2922 {
2923 return so->so_traffic_class == SO_TC_BE ||
2924 so->so_traffic_class == SO_TC_RD ||
2925 so->so_traffic_class == SO_TC_OAM;
2926 }
2927
2928 void
soclearfastopen(struct socket * so)2929 soclearfastopen(struct socket *so)
2930 {
2931 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
2932 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
2933 }
2934
2935 if (so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
2936 so->so_flags1 &= ~SOF1_DATA_IDEMPOTENT;
2937 }
2938 }
2939
2940 void
sonullevent(struct socket * so,void * arg,uint32_t hint)2941 sonullevent(struct socket *so, void *arg, uint32_t hint)
2942 {
2943 #pragma unused(so, arg, hint)
2944 }
2945
2946 /*
2947 * Here is the definition of some of the basic objects in the kern.ipc
2948 * branch of the MIB.
2949 */
2950 SYSCTL_NODE(_kern, KERN_IPC, ipc,
2951 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "IPC");
2952
2953 /* Check that the maximum socket buffer size is within a range */
2954
2955 static int
2956 sysctl_sb_max SYSCTL_HANDLER_ARGS
2957 {
2958 #pragma unused(oidp, arg1, arg2)
2959 u_int32_t new_value;
2960 int changed = 0;
2961 int error = sysctl_io_number(req, sb_max, sizeof(u_int32_t),
2962 &new_value, &changed);
2963 if (!error && changed) {
2964 if (new_value > LOW_SB_MAX && new_value <= high_sb_max) {
2965 sb_max = new_value;
2966 } else {
2967 error = ERANGE;
2968 }
2969 }
2970 return error;
2971 }
2972
2973 SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
2974 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2975 &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size");
2976
2977 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor,
2978 CTLFLAG_RW | CTLFLAG_LOCKED, &sb_efficiency, 0, "");
2979
2980 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters,
2981 CTLFLAG_RD | CTLFLAG_LOCKED, &nmbclusters, 0, "");
2982
2983 SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes,
2984 CTLFLAG_RD | CTLFLAG_LOCKED, &njclbytes, 0, "");
2985
2986 SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat,
2987 CTLFLAG_RW | CTLFLAG_LOCKED, &soqlimitcompat, 1,
2988 "Enable socket queue limit compatibility");
2989
2990 /*
2991 * Hack alert -- rdar://33572856
2992 * A loopback test we cannot change was failing because it sets
2993 * SO_SENDTIMEO to 5 seconds and that's also the value
2994 * of the minimum persist timer. Because of the persist timer,
2995 * the connection was not idle for 5 seconds and SO_SNDTIMEO
2996 * was not triggering at 5 seconds causing the test failure.
2997 * As a workaround we check the sysctl soqlencomp the test is already
2998 * setting to set disable auto tuning of the receive buffer.
2999 */
3000
3001 extern u_int32_t tcp_do_autorcvbuf;
3002
3003 static int
3004 sysctl_soqlencomp SYSCTL_HANDLER_ARGS
3005 {
3006 #pragma unused(oidp, arg1, arg2)
3007 u_int32_t new_value;
3008 int changed = 0;
3009 int error = sysctl_io_number(req, soqlencomp, sizeof(u_int32_t),
3010 &new_value, &changed);
3011 if (!error && changed) {
3012 soqlencomp = new_value;
3013 if (new_value != 0) {
3014 tcp_do_autorcvbuf = 0;
3015 tcptv_persmin_val = 6 * TCP_RETRANSHZ;
3016 }
3017 }
3018 return error;
3019 }
3020 SYSCTL_PROC(_kern_ipc, OID_AUTO, soqlencomp,
3021 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3022 &soqlencomp, 0, &sysctl_soqlencomp, "IU", "");
3023
3024 SYSCTL_NODE(_kern_ipc, OID_AUTO, io_policy, CTLFLAG_RW, 0, "network IO policy");
3025
3026 SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
3027 &net_io_policy_log, 0, "");
3028
3029 #if CONFIG_PROC_UUID_POLICY
3030 SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, uuid, CTLFLAG_RW | CTLFLAG_LOCKED,
3031 &net_io_policy_uuid, 0, "");
3032 #endif /* CONFIG_PROC_UUID_POLICY */
3033