1 /*
2 * Copyright (c) 2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/domain.h>
30 #include <sys/socket.h>
31 #include <sys/protosw.h>
32 #include <sys/mcache.h>
33 #include <sys/systm.h>
34 #include <sys/sysctl.h>
35 #include <sys/random.h>
36 #include <sys/mbuf.h>
37 #include <sys/vsock_domain.h>
38 #include <sys/vsock_transport.h>
39 #include <kern/task.h>
40 #include <kern/zalloc.h>
41 #include <kern/locks.h>
42 #include <machine/atomic.h>
43
44 #define sotovsockpcb(so) ((struct vsockpcb *)(so)->so_pcb)
45
46 #define VSOCK_PORT_RESERVED 1024
47
48 /* VSock Protocol Globals */
49
50 static struct vsock_transport * _Atomic the_vsock_transport = NULL;
51 static ZONE_DEFINE(vsockpcb_zone, "vsockpcbzone",
52 sizeof(struct vsockpcb), ZC_NONE);
53 static LCK_GRP_DECLARE(vsock_lock_grp, "vsock");
54 static struct vsockpcbinfo vsockinfo;
55
56 static uint32_t vsock_sendspace = VSOCK_MAX_PACKET_SIZE * 8;
57 static uint32_t vsock_recvspace = VSOCK_MAX_PACKET_SIZE * 8;
58
59 /* VSock PCB Helpers */
60
61 static uint32_t
vsock_get_peer_space(struct vsockpcb * pcb)62 vsock_get_peer_space(struct vsockpcb *pcb)
63 {
64 return pcb->peer_buf_alloc - (pcb->tx_cnt - pcb->peer_fwd_cnt);
65 }
66
67 static struct vsockpcb *
vsock_get_matching_pcb(struct vsock_address src,struct vsock_address dst)68 vsock_get_matching_pcb(struct vsock_address src, struct vsock_address dst)
69 {
70 struct vsockpcb *preferred = NULL;
71 struct vsockpcb *match = NULL;
72 struct vsockpcb *pcb = NULL;
73
74 lck_rw_lock_shared(&vsockinfo.bound_lock);
75 LIST_FOREACH(pcb, &vsockinfo.bound, bound) {
76 // Source cid and port must match. Only destination port must match. (Allows for a changing CID during migration)
77 socket_lock(pcb->so, 1);
78 if ((pcb->so->so_state & SS_ISCONNECTED || pcb->so->so_state & SS_ISCONNECTING) &&
79 pcb->local_address.cid == src.cid && pcb->local_address.port == src.port &&
80 pcb->remote_address.port == dst.port) {
81 preferred = pcb;
82 break;
83 } else if ((pcb->local_address.cid == src.cid || pcb->local_address.cid == VMADDR_CID_ANY) &&
84 pcb->local_address.port == src.port) {
85 match = pcb;
86 }
87 socket_unlock(pcb->so, 1);
88 }
89 if (!preferred && match) {
90 socket_lock(match->so, 1);
91 preferred = match;
92 }
93 lck_rw_done(&vsockinfo.bound_lock);
94
95 return preferred;
96 }
97
98 static errno_t
vsock_bind_address_if_free(struct vsockpcb * pcb,uint32_t local_cid,uint32_t local_port,uint32_t remote_cid,uint32_t remote_port)99 vsock_bind_address_if_free(struct vsockpcb *pcb, uint32_t local_cid, uint32_t local_port, uint32_t remote_cid, uint32_t remote_port)
100 {
101 socket_lock_assert_owned(pcb->so);
102
103 // Privileged ports.
104 if (local_port != VMADDR_PORT_ANY && local_port < VSOCK_PORT_RESERVED &&
105 current_task() != kernel_task && proc_suser(current_proc()) != 0) {
106 return EACCES;
107 }
108
109 bool taken = false;
110 const bool check_remote = (remote_cid != VMADDR_CID_ANY && remote_port != VMADDR_PORT_ANY);
111
112 struct vsockpcb *pcb_match = NULL;
113
114 socket_unlock(pcb->so, 0);
115 lck_rw_lock_exclusive(&vsockinfo.bound_lock);
116 LIST_FOREACH(pcb_match, &vsockinfo.bound, bound) {
117 socket_lock(pcb_match->so, 1);
118 if (pcb == pcb_match ||
119 (!check_remote && pcb_match->local_address.port == local_port) ||
120 (check_remote && pcb_match->local_address.port == local_port &&
121 pcb_match->remote_address.cid == remote_cid && pcb_match->remote_address.port == remote_port)) {
122 socket_unlock(pcb_match->so, 1);
123 taken = true;
124 break;
125 }
126 socket_unlock(pcb_match->so, 1);
127 }
128 socket_lock(pcb->so, 0);
129 if (!taken) {
130 pcb->local_address = (struct vsock_address) { .cid = local_cid, .port = local_port };
131 pcb->remote_address = (struct vsock_address) { .cid = remote_cid, .port = remote_port };
132 LIST_INSERT_HEAD(&vsockinfo.bound, pcb, bound);
133 }
134 lck_rw_done(&vsockinfo.bound_lock);
135
136 return taken ? EADDRINUSE : 0;
137 }
138
139 static errno_t
vsock_bind_address(struct vsockpcb * pcb,struct vsock_address laddr,struct vsock_address raddr)140 vsock_bind_address(struct vsockpcb *pcb, struct vsock_address laddr, struct vsock_address raddr)
141 {
142 if (!pcb) {
143 return EINVAL;
144 }
145
146 socket_lock_assert_owned(pcb->so);
147
148 // Certain CIDs are reserved.
149 if (laddr.cid == VMADDR_CID_HYPERVISOR || laddr.cid == VMADDR_CID_RESERVED || laddr.cid == VMADDR_CID_HOST) {
150 return EADDRNOTAVAIL;
151 }
152
153 // Remote address must be fully specified or not specified at all.
154 if ((raddr.cid == VMADDR_CID_ANY) ^ (raddr.port == VMADDR_PORT_ANY)) {
155 return EINVAL;
156 }
157
158 // Cannot bind if already bound.
159 if (pcb->local_address.port != VMADDR_PORT_ANY) {
160 return EINVAL;
161 }
162
163 uint32_t transport_cid;
164 struct vsock_transport *transport = pcb->transport;
165 errno_t error = transport->get_cid(transport->provider, &transport_cid);
166 if (error) {
167 return error;
168 }
169
170 // Local CID must be this transport's CID or any.
171 if (laddr.cid != transport_cid && laddr.cid != VMADDR_CID_ANY) {
172 return EINVAL;
173 }
174
175 if (laddr.port != VMADDR_PORT_ANY) {
176 error = vsock_bind_address_if_free(pcb, laddr.cid, laddr.port, raddr.cid, raddr.port);
177 } else {
178 socket_unlock(pcb->so, 0);
179 lck_mtx_lock(&vsockinfo.port_lock);
180 socket_lock(pcb->so, 0);
181
182 const uint32_t first = VSOCK_PORT_RESERVED;
183 const uint32_t last = VMADDR_PORT_ANY - 1;
184 uint32_t count = last - first + 1;
185 uint32_t *last_port = &vsockinfo.last_port;
186
187 if (pcb->so->so_flags & SOF_BINDRANDOMPORT) {
188 uint32_t random = 0;
189 read_frandom(&random, sizeof(random));
190 *last_port = first + (random % count);
191 }
192
193 do {
194 if (count == 0) {
195 lck_mtx_unlock(&vsockinfo.port_lock);
196 return EADDRNOTAVAIL;
197 }
198 count--;
199
200 ++*last_port;
201 if (*last_port < first || *last_port > last) {
202 *last_port = first;
203 }
204
205 error = vsock_bind_address_if_free(pcb, laddr.cid, *last_port, raddr.cid, raddr.port);
206 } while (error);
207
208 lck_mtx_unlock(&vsockinfo.port_lock);
209 }
210
211 return error;
212 }
213
214 static void
vsock_unbind_pcb_locked(struct vsockpcb * pcb,bool is_locked)215 vsock_unbind_pcb_locked(struct vsockpcb *pcb, bool is_locked)
216 {
217 if (!pcb) {
218 return;
219 }
220
221 struct socket *so = pcb->so;
222 socket_lock_assert_owned(so);
223
224 // Bail if disconnect and already unbound.
225 if (so->so_state & SS_ISDISCONNECTED) {
226 assert(pcb->bound.le_next == NULL);
227 assert(pcb->bound.le_prev == NULL);
228 return;
229 }
230
231 if (!is_locked) {
232 socket_unlock(so, 0);
233 lck_rw_lock_exclusive(&vsockinfo.bound_lock);
234 socket_lock(so, 0);
235
236 // Case where some other thread also called unbind() on this socket while waiting to acquire its lock.
237 if (!pcb->bound.le_prev) {
238 soisdisconnected(so);
239 lck_rw_done(&vsockinfo.bound_lock);
240 return;
241 }
242 }
243
244 soisdisconnected(so);
245
246 LIST_REMOVE(pcb, bound);
247 pcb->bound.le_next = NULL;
248 pcb->bound.le_prev = NULL;
249
250 if (!is_locked) {
251 lck_rw_done(&vsockinfo.bound_lock);
252 }
253 }
254
255 static void
vsock_unbind_pcb(struct vsockpcb * pcb)256 vsock_unbind_pcb(struct vsockpcb *pcb)
257 {
258 vsock_unbind_pcb_locked(pcb, false);
259 }
260
261 static struct sockaddr *
vsock_new_sockaddr(struct vsock_address * address)262 vsock_new_sockaddr(struct vsock_address *address)
263 {
264 if (!address) {
265 return NULL;
266 }
267
268 struct sockaddr_vm *addr;
269 addr = (struct sockaddr_vm *)alloc_sockaddr(sizeof(*addr),
270 Z_WAITOK | Z_NOFAIL);
271
272 addr->svm_family = AF_VSOCK;
273 addr->svm_port = address->port;
274 addr->svm_cid = address->cid;
275
276 return (struct sockaddr *)addr;
277 }
278
279 static errno_t
vsock_pcb_send_message(struct vsockpcb * pcb,enum vsock_operation operation,mbuf_t m)280 vsock_pcb_send_message(struct vsockpcb *pcb, enum vsock_operation operation, mbuf_t m)
281 {
282 if (!pcb) {
283 if (m != NULL) {
284 mbuf_freem_list(m);
285 }
286 return EINVAL;
287 }
288
289 socket_lock_assert_owned(pcb->so);
290
291 errno_t error;
292
293 struct vsock_address dst = pcb->remote_address;
294 if (dst.cid == VMADDR_CID_ANY || dst.port == VMADDR_PORT_ANY) {
295 if (m != NULL) {
296 mbuf_freem_list(m);
297 }
298 return EINVAL;
299 }
300
301 struct vsock_address src = pcb->local_address;
302 if (src.cid == VMADDR_CID_ANY) {
303 uint32_t transport_cid;
304 struct vsock_transport *transport = pcb->transport;
305 error = transport->get_cid(transport->provider, &transport_cid);
306 if (error) {
307 if (m != NULL) {
308 mbuf_freem_list(m);
309 }
310 return error;
311 }
312 src.cid = transport_cid;
313 }
314
315 uint32_t buf_alloc = pcb->so->so_rcv.sb_hiwat;
316 uint32_t fwd_cnt = pcb->fwd_cnt;
317
318 if (src.cid == dst.cid) {
319 pcb->last_buf_alloc = buf_alloc;
320 pcb->last_fwd_cnt = fwd_cnt;
321
322 socket_unlock(pcb->so, 0);
323 error = vsock_put_message(src, dst, operation, buf_alloc, fwd_cnt, m);
324 socket_lock(pcb->so, 0);
325 } else {
326 struct vsock_transport *transport = pcb->transport;
327 error = transport->put_message(transport->provider, src, dst, operation, buf_alloc, fwd_cnt, m);
328
329 if (!error) {
330 pcb->last_buf_alloc = buf_alloc;
331 pcb->last_fwd_cnt = fwd_cnt;
332 }
333 }
334
335 return error;
336 }
337
338 static errno_t
vsock_pcb_reset_address(struct vsock_address src,struct vsock_address dst)339 vsock_pcb_reset_address(struct vsock_address src, struct vsock_address dst)
340 {
341 if (dst.cid == VMADDR_CID_ANY || dst.port == VMADDR_PORT_ANY) {
342 return EINVAL;
343 }
344
345 errno_t error = 0;
346 struct vsock_transport *transport = NULL;
347
348 if (src.cid == VMADDR_CID_ANY) {
349 transport = os_atomic_load(&the_vsock_transport, relaxed);
350 if (transport == NULL) {
351 return ENODEV;
352 }
353
354 uint32_t transport_cid;
355 error = transport->get_cid(transport->provider, &transport_cid);
356 if (error) {
357 return error;
358 }
359 src.cid = transport_cid;
360 }
361
362 if (src.cid == dst.cid) {
363 // Reset both sockets.
364 struct vsockpcb *pcb = vsock_get_matching_pcb(src, dst);
365 if (pcb) {
366 socket_lock_assert_owned(pcb->so);
367 vsock_unbind_pcb(pcb);
368 socket_unlock(pcb->so, 1);
369 }
370 } else {
371 if (!transport) {
372 transport = os_atomic_load(&the_vsock_transport, relaxed);
373 if (transport == NULL) {
374 return ENODEV;
375 }
376 }
377 error = transport->put_message(transport->provider, src, dst, VSOCK_RESET, 0, 0, NULL);
378 }
379
380 return error;
381 }
382
383 static errno_t
vsock_pcb_safe_reset_address(struct vsockpcb * pcb,struct vsock_address src,struct vsock_address dst)384 vsock_pcb_safe_reset_address(struct vsockpcb *pcb, struct vsock_address src, struct vsock_address dst)
385 {
386 if (pcb) {
387 socket_lock_assert_owned(pcb->so);
388 socket_unlock(pcb->so, 0);
389 }
390 errno_t error = vsock_pcb_reset_address(src, dst);
391 if (pcb) {
392 socket_lock(pcb->so, 0);
393 }
394 return error;
395 }
396
397 static errno_t
vsock_pcb_connect(struct vsockpcb * pcb)398 vsock_pcb_connect(struct vsockpcb *pcb)
399 {
400 return vsock_pcb_send_message(pcb, VSOCK_REQUEST, NULL);
401 }
402
403 static errno_t
vsock_pcb_respond(struct vsockpcb * pcb)404 vsock_pcb_respond(struct vsockpcb *pcb)
405 {
406 return vsock_pcb_send_message(pcb, VSOCK_RESPONSE, NULL);
407 }
408
409 static errno_t
vsock_pcb_send(struct vsockpcb * pcb,mbuf_t m)410 vsock_pcb_send(struct vsockpcb *pcb, mbuf_t m)
411 {
412 return vsock_pcb_send_message(pcb, VSOCK_PAYLOAD, m);
413 }
414
415 static errno_t
vsock_pcb_shutdown_send(struct vsockpcb * pcb)416 vsock_pcb_shutdown_send(struct vsockpcb *pcb)
417 {
418 return vsock_pcb_send_message(pcb, VSOCK_SHUTDOWN_SEND, NULL);
419 }
420
421 static errno_t
vsock_pcb_reset(struct vsockpcb * pcb)422 vsock_pcb_reset(struct vsockpcb *pcb)
423 {
424 return vsock_pcb_send_message(pcb, VSOCK_RESET, NULL);
425 }
426
427 static errno_t
vsock_pcb_credit_update(struct vsockpcb * pcb)428 vsock_pcb_credit_update(struct vsockpcb *pcb)
429 {
430 return vsock_pcb_send_message(pcb, VSOCK_CREDIT_UPDATE, NULL);
431 }
432
433 static errno_t
vsock_pcb_credit_request(struct vsockpcb * pcb)434 vsock_pcb_credit_request(struct vsockpcb *pcb)
435 {
436 return vsock_pcb_send_message(pcb, VSOCK_CREDIT_REQUEST, NULL);
437 }
438
439 static errno_t
vsock_disconnect_pcb_common(struct vsockpcb * pcb,bool is_locked)440 vsock_disconnect_pcb_common(struct vsockpcb *pcb, bool is_locked)
441 {
442 socket_lock_assert_owned(pcb->so);
443 vsock_unbind_pcb_locked(pcb, is_locked);
444 return vsock_pcb_reset(pcb);
445 }
446
447 static errno_t
vsock_disconnect_pcb_locked(struct vsockpcb * pcb)448 vsock_disconnect_pcb_locked(struct vsockpcb *pcb)
449 {
450 return vsock_disconnect_pcb_common(pcb, true);
451 }
452
453 static errno_t
vsock_disconnect_pcb(struct vsockpcb * pcb)454 vsock_disconnect_pcb(struct vsockpcb *pcb)
455 {
456 return vsock_disconnect_pcb_common(pcb, false);
457 }
458
459 static errno_t
vsock_sockaddr_vm_validate(struct vsockpcb * pcb,struct sockaddr_vm * addr)460 vsock_sockaddr_vm_validate(struct vsockpcb *pcb, struct sockaddr_vm *addr)
461 {
462 if (!pcb || !pcb->so || !addr) {
463 return EINVAL;
464 }
465
466 // Validate address length.
467 if (addr->svm_len < sizeof(struct sockaddr_vm)) {
468 return EINVAL;
469 }
470
471 // Validate address family.
472 if (addr->svm_family != AF_UNSPEC && addr->svm_family != AF_VSOCK) {
473 return EAFNOSUPPORT;
474 }
475
476 // Only stream is supported currently.
477 if (pcb->so->so_type != SOCK_STREAM) {
478 return EAFNOSUPPORT;
479 }
480
481 return 0;
482 }
483
484 /* VSock Receive Handlers */
485
486 static errno_t
vsock_put_message_connected(struct vsockpcb * pcb,enum vsock_operation op,mbuf_t m)487 vsock_put_message_connected(struct vsockpcb *pcb, enum vsock_operation op, mbuf_t m)
488 {
489 socket_lock_assert_owned(pcb->so);
490
491 errno_t error = 0;
492
493 switch (op) {
494 case VSOCK_SHUTDOWN:
495 socantsendmore(pcb->so);
496 socantrcvmore(pcb->so);
497 break;
498 case VSOCK_SHUTDOWN_RECEIVE:
499 socantsendmore(pcb->so);
500 break;
501 case VSOCK_SHUTDOWN_SEND:
502 socantrcvmore(pcb->so);
503 break;
504 case VSOCK_PAYLOAD:
505 // Add data to the receive queue then wakeup any reading threads.
506 error = !sbappendstream(&pcb->so->so_rcv, m);
507 if (!error) {
508 sorwakeup(pcb->so);
509 }
510 break;
511 case VSOCK_RESET:
512 vsock_unbind_pcb(pcb);
513 break;
514 default:
515 error = ENOTSUP;
516 break;
517 }
518
519 return error;
520 }
521
522 static errno_t
vsock_put_message_connecting(struct vsockpcb * pcb,enum vsock_operation op)523 vsock_put_message_connecting(struct vsockpcb *pcb, enum vsock_operation op)
524 {
525 socket_lock_assert_owned(pcb->so);
526
527 errno_t error = 0;
528
529 switch (op) {
530 case VSOCK_RESPONSE:
531 soisconnected(pcb->so);
532 break;
533 case VSOCK_RESET:
534 pcb->so->so_error = EAGAIN;
535 error = vsock_disconnect_pcb(pcb);
536 break;
537 default:
538 vsock_disconnect_pcb(pcb);
539 error = ENOTSUP;
540 break;
541 }
542
543 return error;
544 }
545
546 static errno_t
vsock_put_message_listening(struct vsockpcb * pcb,enum vsock_operation op,struct vsock_address src,struct vsock_address dst)547 vsock_put_message_listening(struct vsockpcb *pcb, enum vsock_operation op, struct vsock_address src, struct vsock_address dst)
548 {
549 socket_lock_assert_owned(pcb->so);
550
551 struct sockaddr_vm addr;
552 struct socket *so2 = NULL;
553 struct vsockpcb *pcb2 = NULL;
554
555 errno_t error = 0;
556
557 switch (op) {
558 case VSOCK_REQUEST:
559 addr = (struct sockaddr_vm) {
560 .svm_len = sizeof(addr),
561 .svm_family = AF_VSOCK,
562 .svm_reserved1 = 0,
563 .svm_port = pcb->local_address.port,
564 .svm_cid = pcb->local_address.cid
565 };
566 so2 = sonewconn(pcb->so, 0, (struct sockaddr *)&addr);
567 if (!so2) {
568 // It is likely that the backlog is full. Deny this request.
569 vsock_pcb_safe_reset_address(pcb, dst, src);
570 error = ECONNREFUSED;
571 break;
572 }
573
574 pcb2 = sotovsockpcb(so2);
575 if (!pcb2) {
576 error = EINVAL;
577 goto done;
578 }
579
580 error = vsock_bind_address(pcb2, dst, src);
581 if (error) {
582 goto done;
583 }
584
585 error = vsock_pcb_respond(pcb2);
586 if (error) {
587 goto done;
588 }
589
590 soisconnected(so2);
591
592 done:
593 if (error) {
594 if (pcb2) {
595 vsock_unbind_pcb(pcb2);
596 } else {
597 soisdisconnected(so2);
598 }
599 socket_unlock(so2, 1);
600 vsock_pcb_reset_address(dst, src);
601 } else {
602 socket_unlock(so2, 0);
603 }
604 socket_lock(pcb->so, 0);
605
606 break;
607 case VSOCK_RESET:
608 error = vsock_pcb_safe_reset_address(pcb, dst, src);
609 break;
610 default:
611 vsock_pcb_safe_reset_address(pcb, dst, src);
612 error = ENOTSUP;
613 break;
614 }
615
616 return error;
617 }
618
619 /* VSock Transport */
620
621 errno_t
vsock_add_transport(struct vsock_transport * transport)622 vsock_add_transport(struct vsock_transport *transport)
623 {
624 if (transport == NULL || transport->provider == NULL) {
625 return EINVAL;
626 }
627 if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport, NULL, transport, acq_rel)) {
628 return EEXIST;
629 }
630 return 0;
631 }
632
633 errno_t
vsock_remove_transport(struct vsock_transport * transport)634 vsock_remove_transport(struct vsock_transport *transport)
635 {
636 if (!os_atomic_cmpxchg((void * volatile *)&the_vsock_transport, transport, NULL, acq_rel)) {
637 return ENODEV;
638 }
639 return 0;
640 }
641
642 errno_t
vsock_reset_transport(struct vsock_transport * transport)643 vsock_reset_transport(struct vsock_transport *transport)
644 {
645 if (transport == NULL) {
646 return EINVAL;
647 }
648
649 errno_t error = 0;
650 struct vsockpcb *pcb = NULL;
651 struct vsockpcb *tmp_pcb = NULL;
652
653 lck_rw_lock_exclusive(&vsockinfo.bound_lock);
654 LIST_FOREACH_SAFE(pcb, &vsockinfo.bound, bound, tmp_pcb) {
655 // Disconnect this transport's sockets. Listen and bind sockets must stay alive.
656 socket_lock(pcb->so, 1);
657 if (pcb->transport == transport && pcb->so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) {
658 errno_t dc_error = vsock_disconnect_pcb_locked(pcb);
659 if (dc_error && !error) {
660 error = dc_error;
661 }
662 }
663 socket_unlock(pcb->so, 1);
664 }
665 lck_rw_done(&vsockinfo.bound_lock);
666
667 return error;
668 }
669
670 errno_t
vsock_put_message(struct vsock_address src,struct vsock_address dst,enum vsock_operation op,uint32_t buf_alloc,uint32_t fwd_cnt,mbuf_t m)671 vsock_put_message(struct vsock_address src, struct vsock_address dst, enum vsock_operation op, uint32_t buf_alloc, uint32_t fwd_cnt, mbuf_t m)
672 {
673 struct vsockpcb *pcb = vsock_get_matching_pcb(dst, src);
674 if (!pcb) {
675 if (op != VSOCK_RESET) {
676 vsock_pcb_reset_address(dst, src);
677 }
678 if (m != NULL) {
679 mbuf_freem_list(m);
680 }
681 return EINVAL;
682 }
683
684 socket_lock_assert_owned(pcb->so);
685
686 struct socket *so = pcb->so;
687 errno_t error = 0;
688
689 // Check if the peer's buffer has changed. Update our view of the peer's forwarded bytes.
690 int buffers_changed = (pcb->peer_buf_alloc != buf_alloc) || (pcb->peer_fwd_cnt) != fwd_cnt;
691 pcb->peer_buf_alloc = buf_alloc;
692 pcb->peer_fwd_cnt = fwd_cnt;
693
694 // Peer's buffer has enough space for the next packet. Notify any threads waiting for space.
695 if (buffers_changed && vsock_get_peer_space(pcb) >= pcb->waiting_send_size) {
696 sowwakeup(so);
697 }
698
699 switch (op) {
700 case VSOCK_CREDIT_REQUEST:
701 error = vsock_pcb_credit_update(pcb);
702 break;
703 case VSOCK_CREDIT_UPDATE:
704 break;
705 default:
706 if (so->so_state & SS_ISCONNECTED) {
707 error = vsock_put_message_connected(pcb, op, m);
708 m = NULL;
709 } else if (so->so_state & SS_ISCONNECTING) {
710 error = vsock_put_message_connecting(pcb, op);
711 } else if (so->so_options & SO_ACCEPTCONN) {
712 error = vsock_put_message_listening(pcb, op, src, dst);
713 } else {
714 // Reset the connection for other states such as 'disconnecting'.
715 error = vsock_disconnect_pcb(pcb);
716 if (!error) {
717 error = ENODEV;
718 }
719 }
720 break;
721 }
722 socket_unlock(so, 1);
723
724 if (m != NULL) {
725 mbuf_freem_list(m);
726 }
727
728 return error;
729 }
730
731 /* VSock Sysctl */
732
733 static int
734 vsock_pcblist SYSCTL_HANDLER_ARGS
735 {
736 #pragma unused(oidp,arg2)
737
738 int error;
739
740 // Only stream is supported.
741 if ((intptr_t)arg1 != SOCK_STREAM) {
742 return EINVAL;
743 }
744
745 // Get the generation count and the count of all vsock sockets.
746 lck_rw_lock_shared(&vsockinfo.all_lock);
747 uint64_t n = vsockinfo.all_pcb_count;
748 vsock_gen_t gen_count = vsockinfo.vsock_gencnt;
749 lck_rw_done(&vsockinfo.all_lock);
750
751 const size_t xpcb_len = sizeof(struct xvsockpcb);
752 struct xvsockpgen xvg;
753
754 /*
755 * The process of preparing the PCB list is too time-consuming and
756 * resource-intensive to repeat twice on every request.
757 */
758 if (req->oldptr == USER_ADDR_NULL) {
759 req->oldidx = (size_t)(2 * sizeof(xvg) + (n + n / 8) * xpcb_len);
760 return 0;
761 }
762
763 if (req->newptr != USER_ADDR_NULL) {
764 return EPERM;
765 }
766
767 bzero(&xvg, sizeof(xvg));
768 xvg.xvg_len = sizeof(xvg);
769 xvg.xvg_count = n;
770 xvg.xvg_gen = gen_count;
771 xvg.xvg_sogen = so_gencnt;
772 error = SYSCTL_OUT(req, &xvg, sizeof(xvg));
773 if (error) {
774 return error;
775 }
776
777 // Return if no sockets exist.
778 if (n == 0) {
779 return 0;
780 }
781
782 lck_rw_lock_shared(&vsockinfo.all_lock);
783
784 n = 0;
785 struct vsockpcb *pcb = NULL;
786 TAILQ_FOREACH(pcb, &vsockinfo.all, all) {
787 // Bail if there is not enough user buffer for this next socket.
788 if (req->oldlen - req->oldidx - sizeof(xvg) < xpcb_len) {
789 break;
790 }
791
792 // Populate the socket structure.
793 socket_lock(pcb->so, 1);
794 if (pcb->vsock_gencnt <= gen_count) {
795 struct xvsockpcb xpcb;
796 bzero(&xpcb, xpcb_len);
797 xpcb.xv_len = xpcb_len;
798 xpcb.xv_vsockpp = (uint64_t)VM_KERNEL_ADDRHASH(pcb);
799 xpcb.xvp_local_cid = pcb->local_address.cid;
800 xpcb.xvp_local_port = pcb->local_address.port;
801 xpcb.xvp_remote_cid = pcb->remote_address.cid;
802 xpcb.xvp_remote_port = pcb->remote_address.port;
803 xpcb.xvp_rxcnt = pcb->fwd_cnt;
804 xpcb.xvp_txcnt = pcb->tx_cnt;
805 xpcb.xvp_peer_rxhiwat = pcb->peer_buf_alloc;
806 xpcb.xvp_peer_rxcnt = pcb->peer_fwd_cnt;
807 xpcb.xvp_last_pid = pcb->so->last_pid;
808 xpcb.xvp_gencnt = pcb->vsock_gencnt;
809 if (pcb->so) {
810 sotoxsocket(pcb->so, &xpcb.xv_socket);
811 }
812 socket_unlock(pcb->so, 1);
813
814 error = SYSCTL_OUT(req, &xpcb, xpcb_len);
815 if (error != 0) {
816 break;
817 }
818 n++;
819 } else {
820 socket_unlock(pcb->so, 1);
821 }
822 }
823
824 // Update the generation count to match the sockets being returned.
825 gen_count = vsockinfo.vsock_gencnt;
826
827 lck_rw_done(&vsockinfo.all_lock);
828
829 if (!error) {
830 /*
831 * Give the user an updated idea of our state.
832 * If the generation differs from what we told
833 * her before, she knows that something happened
834 * while we were processing this request, and it
835 * might be necessary to retry.
836 */
837 bzero(&xvg, sizeof(xvg));
838 xvg.xvg_len = sizeof(xvg);
839 xvg.xvg_count = n;
840 xvg.xvg_gen = gen_count;
841 xvg.xvg_sogen = so_gencnt;
842 error = SYSCTL_OUT(req, &xvg, sizeof(xvg));
843 }
844
845 return error;
846 }
847
848 #ifdef SYSCTL_DECL
849 SYSCTL_NODE(_net, OID_AUTO, vsock, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "vsock");
850 SYSCTL_UINT(_net_vsock, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
851 &vsock_sendspace, 0, "Maximum outgoing vsock datagram size");
852 SYSCTL_UINT(_net_vsock, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
853 &vsock_recvspace, 0, "Maximum incoming vsock datagram size");
854 SYSCTL_PROC(_net_vsock, OID_AUTO, pcblist,
855 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
856 (caddr_t)(long)SOCK_STREAM, 0, vsock_pcblist, "S,xvsockpcb",
857 "List of active vsock sockets");
858
859 SYSCTL_UINT(_net_vsock, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED,
860 (u_int *)&vsockinfo.all_pcb_count, 0, "");
861 #endif
862
863 /* VSock Protocol */
864
865 static int
vsock_attach(struct socket * so,int proto,struct proc * p)866 vsock_attach(struct socket *so, int proto, struct proc *p)
867 {
868 #pragma unused(proto, p)
869
870 // Reserve send and receive buffers.
871 errno_t error = soreserve(so, vsock_sendspace, vsock_recvspace);
872 if (error) {
873 return error;
874 }
875
876 // Attach should only be run once per socket.
877 struct vsockpcb *pcb = sotovsockpcb(so);
878 if (pcb) {
879 return EINVAL;
880 }
881
882 // Get the transport for this socket.
883 struct vsock_transport *transport = os_atomic_load(&the_vsock_transport, relaxed);
884 if (transport == NULL) {
885 return ENODEV;
886 }
887
888 // Initialize the vsock protocol control block.
889 pcb = zalloc_flags(vsockpcb_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
890 pcb->so = so;
891 pcb->transport = transport;
892 pcb->local_address = (struct vsock_address) {
893 .cid = VMADDR_CID_ANY,
894 .port = VMADDR_PORT_ANY
895 };
896 pcb->remote_address = (struct vsock_address) {
897 .cid = VMADDR_CID_ANY,
898 .port = VMADDR_PORT_ANY
899 };
900 so->so_pcb = pcb;
901
902 // Tell the transport that this socket has attached.
903 error = transport->attach_socket(transport->provider);
904 if (error) {
905 zfree(vsockpcb_zone, pcb);
906 so->so_pcb = NULL;
907 return error;
908 }
909
910 // Add to the list of all vsock sockets.
911 lck_rw_lock_exclusive(&vsockinfo.all_lock);
912 TAILQ_INSERT_TAIL(&vsockinfo.all, pcb, all);
913 vsockinfo.all_pcb_count++;
914 pcb->vsock_gencnt = ++vsockinfo.vsock_gencnt;
915 lck_rw_done(&vsockinfo.all_lock);
916
917 return 0;
918 }
919
920 static int
vsock_control(struct socket * so,u_long cmd,caddr_t __sized_by (IOCPARM_LEN (cmd))data,struct ifnet * ifp,struct proc * p)921 vsock_control(struct socket *so, u_long cmd, caddr_t __sized_by(IOCPARM_LEN(cmd)) data, struct ifnet *ifp, struct proc *p)
922 {
923 #pragma unused(ifp)
924
925 VERIFY(so != NULL || p == kernproc);
926
927 if (cmd != IOCTL_VM_SOCKETS_GET_LOCAL_CID) {
928 return EINVAL;
929 }
930
931 struct vsock_transport *transport;
932 if (so) {
933 struct vsockpcb *pcb = sotovsockpcb(so);
934 if (pcb == NULL) {
935 return EINVAL;
936 }
937 transport = pcb->transport;
938 } else {
939 transport = os_atomic_load(&the_vsock_transport, relaxed);
940 }
941
942 if (transport == NULL) {
943 return ENODEV;
944 }
945
946 uint32_t transport_cid;
947 errno_t error = transport->get_cid(transport->provider, &transport_cid);
948 if (error) {
949 return error;
950 }
951
952 memcpy(data, &transport_cid, sizeof(transport_cid));
953
954 return 0;
955 }
956
957 static int
vsock_detach(struct socket * so)958 vsock_detach(struct socket *so)
959 {
960 struct vsockpcb *pcb = sotovsockpcb(so);
961 if (pcb == NULL) {
962 return EINVAL;
963 }
964
965 vsock_unbind_pcb(pcb);
966
967 // Tell the transport that this socket has detached.
968 struct vsock_transport *transport = pcb->transport;
969 errno_t error = transport->detach_socket(transport->provider);
970 if (error) {
971 return error;
972 }
973
974 // Remove from the list of all vsock sockets.
975 lck_rw_lock_exclusive(&vsockinfo.all_lock);
976 TAILQ_REMOVE(&vsockinfo.all, pcb, all);
977 pcb->all.tqe_next = NULL;
978 pcb->all.tqe_prev = NULL;
979 vsockinfo.all_pcb_count--;
980 vsockinfo.vsock_gencnt++;
981 lck_rw_done(&vsockinfo.all_lock);
982
983 // Mark this socket for deallocation.
984 so->so_flags |= SOF_PCBCLEARING;
985
986 return 0;
987 }
988
989 static int
vsock_abort(struct socket * so)990 vsock_abort(struct socket *so)
991 {
992 return vsock_detach(so);
993 }
994
995 static int
vsock_bind(struct socket * so,struct sockaddr * nam,struct proc * p)996 vsock_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
997 {
998 #pragma unused(p)
999
1000 struct vsockpcb *pcb = sotovsockpcb(so);
1001 if (pcb == NULL) {
1002 return EINVAL;
1003 }
1004
1005 struct sockaddr_vm *addr = (struct sockaddr_vm *)nam;
1006
1007 errno_t error = vsock_sockaddr_vm_validate(pcb, addr);
1008 if (error) {
1009 return error;
1010 }
1011
1012 struct vsock_address laddr = (struct vsock_address) {
1013 .cid = addr->svm_cid,
1014 .port = addr->svm_port,
1015 };
1016
1017 struct vsock_address raddr = (struct vsock_address) {
1018 .cid = VMADDR_CID_ANY,
1019 .port = VMADDR_PORT_ANY,
1020 };
1021
1022 error = vsock_bind_address(pcb, laddr, raddr);
1023 if (error) {
1024 return error;
1025 }
1026
1027 return 0;
1028 }
1029
1030 static int
vsock_listen(struct socket * so,struct proc * p)1031 vsock_listen(struct socket *so, struct proc *p)
1032 {
1033 #pragma unused(p)
1034
1035 struct vsockpcb *pcb = sotovsockpcb(so);
1036 if (pcb == NULL) {
1037 return EINVAL;
1038 }
1039
1040 // Only stream is supported currently.
1041 if (so->so_type != SOCK_STREAM) {
1042 return EAFNOSUPPORT;
1043 }
1044
1045 struct vsock_address *addr = &pcb->local_address;
1046
1047 if (addr->port == VMADDR_CID_ANY) {
1048 return EFAULT;
1049 }
1050
1051 struct vsock_transport *transport = pcb->transport;
1052 uint32_t transport_cid;
1053 errno_t error = transport->get_cid(transport->provider, &transport_cid);
1054 if (error) {
1055 return error;
1056 }
1057
1058 // Can listen on the transport's cid or any.
1059 if (addr->cid != transport_cid && addr->cid != VMADDR_CID_ANY) {
1060 return EFAULT;
1061 }
1062
1063 return 0;
1064 }
1065
1066 static int
vsock_accept(struct socket * so,struct sockaddr ** nam)1067 vsock_accept(struct socket *so, struct sockaddr **nam)
1068 {
1069 struct vsockpcb *pcb = sotovsockpcb(so);
1070 if (pcb == NULL) {
1071 return EINVAL;
1072 }
1073
1074 // Do not accept disconnected sockets.
1075 if (so->so_state & SS_ISDISCONNECTED) {
1076 return ECONNABORTED;
1077 }
1078
1079 *nam = vsock_new_sockaddr(&pcb->remote_address);
1080
1081 return 0;
1082 }
1083
1084 static int
vsock_connect(struct socket * so,struct sockaddr * nam,struct proc * p)1085 vsock_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
1086 {
1087 #pragma unused(p)
1088
1089 struct vsockpcb *pcb = sotovsockpcb(so);
1090 if (pcb == NULL) {
1091 return EINVAL;
1092 }
1093
1094 struct sockaddr_vm *addr = (struct sockaddr_vm *)nam;
1095
1096 errno_t error = vsock_sockaddr_vm_validate(pcb, addr);
1097 if (error) {
1098 return error;
1099 }
1100
1101 uint32_t transport_cid;
1102 struct vsock_transport *transport = pcb->transport;
1103 error = transport->get_cid(transport->provider, &transport_cid);
1104 if (error) {
1105 return error;
1106 }
1107
1108 // Only supporting connections to the host, hypervisor, or self for now.
1109 if (addr->svm_cid != VMADDR_CID_HOST &&
1110 addr->svm_cid != VMADDR_CID_HYPERVISOR &&
1111 addr->svm_cid != transport_cid) {
1112 return EFAULT;
1113 }
1114
1115 soisconnecting(so);
1116
1117 // Set the remote and local address.
1118 struct vsock_address remote_addr = (struct vsock_address) {
1119 .cid = addr->svm_cid,
1120 .port = addr->svm_port,
1121 };
1122
1123 struct vsock_address local_addr = (struct vsock_address) {
1124 .cid = transport_cid,
1125 .port = VMADDR_PORT_ANY,
1126 };
1127
1128 // Bind to the address.
1129 error = vsock_bind_address(pcb, local_addr, remote_addr);
1130 if (error) {
1131 goto cleanup;
1132 }
1133
1134 // Attempt a connection using the socket's transport.
1135 error = vsock_pcb_connect(pcb);
1136 if (error) {
1137 goto cleanup;
1138 }
1139
1140 if ((so->so_state & SS_ISCONNECTED) == 0) {
1141 // Don't wait for peer's response if non-blocking.
1142 if (so->so_state & SS_NBIO) {
1143 goto done;
1144 }
1145
1146 struct timespec ts = (struct timespec) {
1147 .tv_sec = so->so_snd.sb_timeo.tv_sec,
1148 .tv_nsec = so->so_snd.sb_timeo.tv_usec * 1000,
1149 };
1150
1151 lck_mtx_t *mutex_held;
1152 if (so->so_proto->pr_getlock != NULL) {
1153 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1154 } else {
1155 mutex_held = so->so_proto->pr_domain->dom_mtx;
1156 }
1157
1158 // Wait until we receive a response to the connect request.
1159 error = msleep((caddr_t)&so->so_timeo, mutex_held, PSOCK | PCATCH, "vsock_connect", &ts);
1160 if (error) {
1161 if (error == EAGAIN) {
1162 error = ETIMEDOUT;
1163 }
1164 goto cleanup;
1165 }
1166 }
1167
1168 cleanup:
1169 if (so->so_error && !error) {
1170 error = so->so_error;
1171 so->so_error = 0;
1172 }
1173 if (!error) {
1174 error = !(so->so_state & SS_ISCONNECTED);
1175 }
1176 if (error) {
1177 vsock_unbind_pcb(pcb);
1178 }
1179
1180 done:
1181 return error;
1182 }
1183
1184 static int
vsock_disconnect(struct socket * so)1185 vsock_disconnect(struct socket *so)
1186 {
1187 struct vsockpcb *pcb = sotovsockpcb(so);
1188 if (pcb == NULL) {
1189 return EINVAL;
1190 }
1191
1192 return vsock_disconnect_pcb(pcb);
1193 }
1194
1195 static int
vsock_sockaddr(struct socket * so,struct sockaddr ** nam)1196 vsock_sockaddr(struct socket *so, struct sockaddr **nam)
1197 {
1198 struct vsockpcb *pcb = sotovsockpcb(so);
1199 if (pcb == NULL) {
1200 return EINVAL;
1201 }
1202
1203 *nam = vsock_new_sockaddr(&pcb->local_address);
1204
1205 return 0;
1206 }
1207
1208 static int
vsock_peeraddr(struct socket * so,struct sockaddr ** nam)1209 vsock_peeraddr(struct socket *so, struct sockaddr **nam)
1210 {
1211 struct vsockpcb *pcb = sotovsockpcb(so);
1212 if (pcb == NULL) {
1213 return EINVAL;
1214 }
1215
1216 *nam = vsock_new_sockaddr(&pcb->remote_address);
1217
1218 return 0;
1219 }
1220
1221 static int
vsock_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,proc_t p)1222 vsock_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, proc_t p)
1223 {
1224 #pragma unused(flags, nam, p)
1225
1226 struct vsockpcb *pcb = sotovsockpcb(so);
1227 if (pcb == NULL || m == NULL) {
1228 return EINVAL;
1229 }
1230
1231 if (control != NULL) {
1232 m_freem(control);
1233 return EOPNOTSUPP;
1234 }
1235
1236 // Ensure this socket is connected.
1237 if ((so->so_state & SS_ISCONNECTED) == 0) {
1238 if (m != NULL) {
1239 mbuf_freem_list(m);
1240 }
1241 return EPERM;
1242 }
1243
1244 errno_t error;
1245
1246 // rdar://84098487 (SEED: Web: Virtio-socket sent data lost after 128KB)
1247 // For writes larger than the default `sosendmaxchain` of 65536, vsock_send() is called multiple times per write().
1248 // Only the first call to vsock_send() is passed a valid mbuf packet, while subsequent calls are not marked as a packet
1249 // with a valid length. We should mark all mbufs as a packet and set the correct packet length so that the downstream
1250 // socket transport layer can correctly generate physical segments.
1251 if (!(mbuf_flags(m) & MBUF_PKTHDR)) {
1252 if (!(mbuf_flags(m) & M_EXT)) {
1253 struct mbuf *header = NULL;
1254 MGETHDR(header, M_WAITOK, MT_HEADER);
1255 if (header == NULL) {
1256 if (m != NULL) {
1257 mbuf_freem_list(m);
1258 }
1259 return ENOBUFS;
1260 }
1261 header->m_next = m;
1262 m = header;
1263 } else {
1264 mbuf_setflags(m, mbuf_flags(m) | MBUF_PKTHDR);
1265 }
1266
1267 size_t len = 0;
1268 struct mbuf *next = m;
1269 while (next) {
1270 len += mbuf_len(next);
1271 next = mbuf_next(next);
1272 }
1273 mbuf_pkthdr_setlen(m, len);
1274 }
1275
1276 const size_t len = mbuf_pkthdr_len(m);
1277 uint32_t free_space = vsock_get_peer_space(pcb);
1278
1279 // Ensure the peer has enough space in their receive buffer.
1280 while (len > free_space) {
1281 // Record the number of free peer bytes necessary before we can send.
1282 if (len > pcb->waiting_send_size) {
1283 pcb->waiting_send_size = len;
1284 }
1285
1286 // Send a credit request.
1287 error = vsock_pcb_credit_request(pcb);
1288 if (error) {
1289 if (m != NULL) {
1290 mbuf_freem_list(m);
1291 }
1292 return error;
1293 }
1294
1295 // Check again in case free space was automatically updated in loopback case.
1296 free_space = vsock_get_peer_space(pcb);
1297 if (len <= free_space) {
1298 pcb->waiting_send_size = 0;
1299 break;
1300 }
1301
1302 // Bail if this is a non-blocking socket.
1303 if (so->so_state & SS_NBIO) {
1304 if (m != NULL) {
1305 mbuf_freem_list(m);
1306 }
1307 return EWOULDBLOCK;
1308 }
1309
1310 // Wait until our peer has enough free space in their receive buffer.
1311 error = sbwait(&so->so_snd);
1312 pcb->waiting_send_size = 0;
1313 if (error) {
1314 if (m != NULL) {
1315 mbuf_freem_list(m);
1316 }
1317 return error;
1318 }
1319
1320 // Bail if an error occured or we can't send more.
1321 if (so->so_state & SS_CANTSENDMORE) {
1322 if (m != NULL) {
1323 mbuf_freem_list(m);
1324 }
1325 return EPIPE;
1326 } else if (so->so_error) {
1327 error = so->so_error;
1328 so->so_error = 0;
1329 if (m != NULL) {
1330 mbuf_freem_list(m);
1331 }
1332 return error;
1333 }
1334
1335 free_space = vsock_get_peer_space(pcb);
1336 }
1337
1338 // Send a payload over the transport.
1339 error = vsock_pcb_send(pcb, m);
1340 if (error) {
1341 return error;
1342 }
1343
1344 pcb->tx_cnt += len;
1345
1346 return 0;
1347 }
1348
1349 static int
vsock_shutdown(struct socket * so)1350 vsock_shutdown(struct socket *so)
1351 {
1352 struct vsockpcb *pcb = sotovsockpcb(so);
1353 if (pcb == NULL) {
1354 return EINVAL;
1355 }
1356
1357 socantsendmore(so);
1358
1359 // Tell peer we will no longer send.
1360 errno_t error = vsock_pcb_shutdown_send(pcb);
1361 if (error) {
1362 return error;
1363 }
1364
1365 return 0;
1366 }
1367
1368 static int
vsock_soreceive(struct socket * so,struct sockaddr ** psa,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)1369 vsock_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
1370 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1371 {
1372 struct vsockpcb *pcb = sotovsockpcb(so);
1373 if (pcb == NULL) {
1374 return EINVAL;
1375 }
1376
1377 user_ssize_t length = uio_resid(uio);
1378 int result = soreceive(so, psa, uio, mp0, controlp, flagsp);
1379 length -= uio_resid(uio);
1380
1381 socket_lock(so, 1);
1382
1383 pcb->fwd_cnt += length;
1384
1385 const uint32_t threshold = VSOCK_MAX_PACKET_SIZE;
1386
1387 // Send a credit update if is possible that the peer will no longer send.
1388 if ((pcb->fwd_cnt - pcb->last_fwd_cnt + threshold) >= pcb->last_buf_alloc) {
1389 errno_t error = vsock_pcb_credit_update(pcb);
1390 if (!result && error) {
1391 result = error;
1392 }
1393 }
1394
1395 socket_unlock(so, 1);
1396
1397 return result;
1398 }
1399
1400 static struct pr_usrreqs vsock_usrreqs = {
1401 .pru_abort = vsock_abort,
1402 .pru_attach = vsock_attach,
1403 .pru_control = vsock_control,
1404 .pru_detach = vsock_detach,
1405 .pru_bind = vsock_bind,
1406 .pru_listen = vsock_listen,
1407 .pru_accept = vsock_accept,
1408 .pru_connect = vsock_connect,
1409 .pru_disconnect = vsock_disconnect,
1410 .pru_send = vsock_send,
1411 .pru_shutdown = vsock_shutdown,
1412 .pru_sockaddr = vsock_sockaddr,
1413 .pru_peeraddr = vsock_peeraddr,
1414 .pru_sosend = sosend,
1415 .pru_soreceive = vsock_soreceive,
1416 };
1417
1418 static void
vsock_init(struct protosw * pp,struct domain * dp)1419 vsock_init(struct protosw *pp, struct domain *dp)
1420 {
1421 #pragma unused(dp)
1422
1423 static int vsock_initialized = 0;
1424 VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
1425 if (!os_atomic_cmpxchg((volatile int *)&vsock_initialized, 0, 1, acq_rel)) {
1426 return;
1427 }
1428
1429 // Setup VSock protocol info struct.
1430 lck_rw_init(&vsockinfo.all_lock, &vsock_lock_grp, LCK_ATTR_NULL);
1431 lck_rw_init(&vsockinfo.bound_lock, &vsock_lock_grp, LCK_ATTR_NULL);
1432 lck_mtx_init(&vsockinfo.port_lock, &vsock_lock_grp, LCK_ATTR_NULL);
1433 TAILQ_INIT(&vsockinfo.all);
1434 LIST_INIT(&vsockinfo.bound);
1435 vsockinfo.last_port = VMADDR_PORT_ANY;
1436 }
1437
1438 static int
vsock_sofreelastref(struct socket * so,int dealloc)1439 vsock_sofreelastref(struct socket *so, int dealloc)
1440 {
1441 socket_lock_assert_owned(so);
1442
1443 struct vsockpcb *pcb = sotovsockpcb(so);
1444 if (pcb != NULL) {
1445 zfree(vsockpcb_zone, pcb);
1446 }
1447
1448 so->so_pcb = NULL;
1449 sofreelastref(so, dealloc);
1450
1451 return 0;
1452 }
1453
1454 static int
vsock_unlock(struct socket * so,int refcount,void * lr_saved)1455 vsock_unlock(struct socket *so, int refcount, void *lr_saved)
1456 {
1457 lck_mtx_t *mutex_held = so->so_proto->pr_domain->dom_mtx;
1458 #ifdef MORE_LOCKING_DEBUG
1459 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1460 #endif
1461 so->unlock_lr[so->next_unlock_lr] = lr_saved;
1462 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1463
1464 if (refcount) {
1465 if (so->so_usecount <= 0) {
1466 panic("%s: bad refcount=%d so=%p (%d, %d, %d) "
1467 "lrh=%s", __func__, so->so_usecount, so,
1468 SOCK_DOM(so), so->so_type,
1469 SOCK_PROTO(so), solockhistory_nr(so));
1470 /* NOTREACHED */
1471 }
1472
1473 so->so_usecount--;
1474 if (so->so_usecount == 0) {
1475 vsock_sofreelastref(so, 1);
1476 }
1477 }
1478 lck_mtx_unlock(mutex_held);
1479
1480 return 0;
1481 }
1482
1483 static struct protosw vsocksw[] = {
1484 {
1485 .pr_type = SOCK_STREAM,
1486 .pr_protocol = 0,
1487 .pr_flags = PR_CONNREQUIRED | PR_WANTRCVD,
1488 .pr_init = vsock_init,
1489 .pr_unlock = vsock_unlock,
1490 .pr_usrreqs = &vsock_usrreqs,
1491 }
1492 };
1493
1494 static const int vsock_proto_count = (sizeof(vsocksw) / sizeof(struct protosw));
1495
1496 /* VSock Domain */
1497
1498 static struct domain *vsock_domain = NULL;
1499
1500 static void
vsock_dinit(struct domain * dp)1501 vsock_dinit(struct domain *dp)
1502 {
1503 // The VSock domain is initialized with a singleton pattern.
1504 VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
1505 VERIFY(vsock_domain == NULL);
1506 vsock_domain = dp;
1507
1508 // Add protocols and initialize.
1509 for (int i = 0; i < vsock_proto_count; i++) {
1510 net_add_proto((struct protosw *)&vsocksw[i], dp, 1);
1511 }
1512 }
1513
1514 struct domain vsockdomain_s = {
1515 .dom_family = PF_VSOCK,
1516 .dom_name = "vsock",
1517 .dom_init = vsock_dinit,
1518 .dom_maxrtkey = sizeof(struct sockaddr_vm),
1519 .dom_protohdrlen = sizeof(struct sockaddr_vm),
1520 };
1521