1 /*
2 * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)rtsock.c 8.5 (Berkeley) 11/2/94
61 */
62
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kauth.h>
66 #include <sys/kernel.h>
67 #include <sys/proc.h>
68 #include <sys/malloc.h>
69 #include <sys/mbuf.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/domain.h>
73 #include <sys/protosw.h>
74 #include <sys/syslog.h>
75 #include <sys/mcache.h>
76 #include <kern/locks.h>
77 #include <kern/uipc_domain.h>
78 #include <sys/codesign.h>
79
80 #include <net/if.h>
81 #include <net/route.h>
82 #include <net/dlil.h>
83 #include <net/raw_cb.h>
84 #include <net/net_sysctl.h>
85
86 #include <netinet/in.h>
87 #include <netinet/in_var.h>
88 #include <netinet/in_arp.h>
89 #include <netinet/ip.h>
90 #include <netinet/ip6.h>
91 #include <netinet6/nd6.h>
92
93 #include <net/sockaddr_utils.h>
94
95 #include <IOKit/IOBSD.h>
96
97 extern struct rtstat_64 rtstat;
98 extern struct domain routedomain_s;
99 static struct domain *routedomain = NULL;
100
101 static struct sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
102 static struct sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
103 static struct sockaddr sa_zero = { .sa_len = sizeof(sa_zero), .sa_family = AF_INET, .sa_data = { 0, } };
104
105 struct route_cb {
106 u_int32_t ip_count; /* attached w/ AF_INET */
107 u_int32_t ip6_count; /* attached w/ AF_INET6 */
108 u_int32_t any_count; /* total attached */
109 };
110
111 static struct route_cb route_cb;
112
113 struct walkarg {
114 int w_tmemsize;
115 int w_op, w_arg;
116 caddr_t w_tmem __sized_by(w_tmemsize);
117 struct sysctl_req *w_req;
118 };
119
120 typedef struct walkarg * __single walkarg_ref_t;
121
122 static void route_dinit(struct domain *);
123 static int rts_abort(struct socket *);
124 static int rts_attach(struct socket *, int, struct proc *);
125 static int rts_bind(struct socket *, struct sockaddr *, struct proc *);
126 static int rts_connect(struct socket *, struct sockaddr *, struct proc *);
127 static int rts_detach(struct socket *);
128 static int rts_disconnect(struct socket *);
129 static int rts_peeraddr(struct socket *, struct sockaddr **);
130 static int rts_send(struct socket *, int, struct mbuf *, struct sockaddr *,
131 struct mbuf *, struct proc *);
132 static int rts_shutdown(struct socket *);
133 static int rts_sockaddr(struct socket *, struct sockaddr **);
134
135 static int route_output(struct mbuf *, struct socket *);
136 static int rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
137 static void rt_getmetrics(struct rtentry *, struct rt_metrics *);
138 static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
139 struct sockaddr *, unsigned int);
140 static int rt_xaddrs(caddr_t cp __ended_by(cplim), caddr_t cplim, struct rt_addrinfo *rtinfo, struct sockaddr xtra_storage[RTAX_MAX]);
141 static struct mbuf *rt_msg1(u_char, struct rt_addrinfo *);
142 static int rt_msg2(u_char, struct rt_addrinfo *, caddr_t __indexable, struct walkarg *,
143 kauth_cred_t *);
144 static int sysctl_dumpentry(struct radix_node *rn, void *vw);
145 static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw);
146 static int sysctl_iflist(int af, struct walkarg *w);
147 static int sysctl_iflist2(int af, struct walkarg *w);
148 static int sysctl_rtstat(struct sysctl_req *);
149 static int sysctl_rtstat_64(struct sysctl_req *);
150 static int sysctl_rttrash(struct sysctl_req *);
151 static int sysctl_rtsock SYSCTL_HANDLER_ARGS;
152
153 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED,
154 sysctl_rtsock, "");
155
156 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "routing");
157
158 /* Align x to 1024 (only power of 2) assuming x is positive */
159 #define ALIGN_BYTES(x) do { \
160 x = (uint32_t)P2ALIGN(x, 1024); \
161 } while(0)
162
163 #define ROUNDUP32(a) \
164 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (uint32_t) - 1))) : \
165 sizeof (uint32_t))
166
167
168 #define RT_HAS_IFADDR(rt) \
169 ((rt)->rt_ifa != NULL && (rt)->rt_ifa->ifa_addr != NULL)
170
171 /*
172 * It really doesn't make any sense at all for this code to share much
173 * with raw_usrreq.c, since its functionality is so restricted. XXX
174 */
175 static int
rts_abort(struct socket * so)176 rts_abort(struct socket *so)
177 {
178 return raw_usrreqs.pru_abort(so);
179 }
180
181 /* pru_accept is EOPNOTSUPP */
182
183 static int
rts_attach(struct socket * so,int proto,struct proc * p)184 rts_attach(struct socket *so, int proto, struct proc *p)
185 {
186 #pragma unused(p)
187 struct rawcb *rp;
188 int error;
189
190 VERIFY(so->so_pcb == NULL);
191
192 rp = kalloc_type(struct rawcb, Z_WAITOK_ZERO_NOFAIL);
193 so->so_pcb = (caddr_t)rp;
194 /* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */
195 error = raw_attach(so, proto);
196 rp = sotorawcb(so);
197 if (error) {
198 kfree_type(struct rawcb, rp);
199 so->so_pcb = NULL;
200 so->so_flags |= SOF_PCBCLEARING;
201 return error;
202 }
203
204 switch (rp->rcb_proto.sp_protocol) {
205 case AF_INET:
206 os_atomic_inc(&route_cb.ip_count, relaxed);
207 break;
208 case AF_INET6:
209 os_atomic_inc(&route_cb.ip6_count, relaxed);
210 break;
211 }
212 rp->rcb_faddr = &route_src;
213 os_atomic_inc(&route_cb.any_count, relaxed);
214 /* the socket is already locked when we enter rts_attach */
215 soisconnected(so);
216 so->so_options |= SO_USELOOPBACK;
217 return 0;
218 }
219
220 static int
rts_bind(struct socket * so,struct sockaddr * nam,struct proc * p)221 rts_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
222 {
223 return raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */
224 }
225
226 static int
rts_connect(struct socket * so,struct sockaddr * nam,struct proc * p)227 rts_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
228 {
229 return raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */
230 }
231
232 /* pru_connect2 is EOPNOTSUPP */
233 /* pru_control is EOPNOTSUPP */
234
235 static int
rts_detach(struct socket * so)236 rts_detach(struct socket *so)
237 {
238 struct rawcb *rp = sotorawcb(so);
239
240 VERIFY(rp != NULL);
241
242 switch (rp->rcb_proto.sp_protocol) {
243 case AF_INET:
244 os_atomic_dec(&route_cb.ip_count, relaxed);
245 break;
246 case AF_INET6:
247 os_atomic_dec(&route_cb.ip6_count, relaxed);
248 break;
249 }
250 os_atomic_dec(&route_cb.any_count, relaxed);
251 return raw_usrreqs.pru_detach(so);
252 }
253
254 static int
rts_disconnect(struct socket * so)255 rts_disconnect(struct socket *so)
256 {
257 return raw_usrreqs.pru_disconnect(so);
258 }
259
260 /* pru_listen is EOPNOTSUPP */
261
262 static int
rts_peeraddr(struct socket * so,struct sockaddr ** nam)263 rts_peeraddr(struct socket *so, struct sockaddr **nam)
264 {
265 return raw_usrreqs.pru_peeraddr(so, nam);
266 }
267
268 /* pru_rcvd is EOPNOTSUPP */
269 /* pru_rcvoob is EOPNOTSUPP */
270
271 static int
rts_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)272 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
273 struct mbuf *control, struct proc *p)
274 {
275 return raw_usrreqs.pru_send(so, flags, m, nam, control, p);
276 }
277
278 /* pru_sense is null */
279
280 static int
rts_shutdown(struct socket * so)281 rts_shutdown(struct socket *so)
282 {
283 return raw_usrreqs.pru_shutdown(so);
284 }
285
286 static int
rts_sockaddr(struct socket * so,struct sockaddr ** nam)287 rts_sockaddr(struct socket *so, struct sockaddr **nam)
288 {
289 return raw_usrreqs.pru_sockaddr(so, nam);
290 }
291
292 static struct pr_usrreqs route_usrreqs = {
293 .pru_abort = rts_abort,
294 .pru_attach = rts_attach,
295 .pru_bind = rts_bind,
296 .pru_connect = rts_connect,
297 .pru_detach = rts_detach,
298 .pru_disconnect = rts_disconnect,
299 .pru_peeraddr = rts_peeraddr,
300 .pru_send = rts_send,
301 .pru_shutdown = rts_shutdown,
302 .pru_sockaddr = rts_sockaddr,
303 .pru_sosend = sosend,
304 .pru_soreceive = soreceive,
305 };
306
307 static struct rt_msghdr *
308 __attribute__((always_inline))
309 __stateful_pure
_rtm_hdr(caddr_t rtm_data __header_indexable)310 _rtm_hdr(caddr_t rtm_data __header_indexable)
311 {
312 #pragma clang diagnostic push
313 #pragma clang diagnostic ignored "-Wcast-align"
314 return (struct rt_msghdr*)rtm_data;
315 #pragma clang diagnostic pop
316 }
317
318 /*ARGSUSED*/
319 static int
route_output(struct mbuf * m,struct socket * so)320 route_output(struct mbuf *m, struct socket *so)
321 {
322 size_t rtm_len = 0;
323 caddr_t rtm_buf __counted_by(rtm_len) = NULL;
324 caddr_t rtm_tmpbuf;
325 #define RTM _rtm_hdr(rtm_buf)
326 rtentry_ref_t rt = NULL;
327 rtentry_ref_t saved_nrt = NULL;
328 struct radix_node_head *rnh;
329 struct rt_addrinfo info;
330 struct sockaddr tiny_sa_storage[RTAX_MAX];
331 int len, error = 0;
332 sa_family_t dst_sa_family = 0;
333 struct ifnet *ifp = NULL;
334 struct sockaddr_in dst_in, gate_in;
335 int sendonlytoself = 0;
336 unsigned int ifscope = IFSCOPE_NONE;
337 struct rawcb *rp = NULL;
338 boolean_t is_router = FALSE;
339 #define senderr(e) { error = (e); goto flush; }
340 if (m == NULL || ((m->m_len < sizeof(intptr_t)) &&
341 (m = m_pullup(m, sizeof(intptr_t))) == NULL)) {
342 return ENOBUFS;
343 }
344 VERIFY(m->m_flags & M_PKTHDR);
345
346 /*
347 * Unlock the socket (but keep a reference) it won't be
348 * accessed until raw_input appends to it.
349 */
350 socket_unlock(so, 0);
351 lck_mtx_lock(rnh_lock);
352
353 len = m->m_pkthdr.len;
354 if (len < sizeof(*RTM) ||
355 len != mtod(m, struct rt_msghdr_prelude *)->rtm_msglen) {
356 info.rti_info[RTAX_DST] = NULL;
357 senderr(EINVAL);
358 }
359
360 /*
361 * Allocate the buffer for the message. First we allocate
362 * a temporary buffer, and if successful, set the pointers.
363 */
364 rtm_tmpbuf = kalloc_data(len, Z_WAITOK);
365 if (rtm_tmpbuf == NULL) {
366 info.rti_info[RTAX_DST] = NULL;
367 senderr(ENOBUFS);
368 }
369 rtm_len = (size_t)len;
370 rtm_buf = rtm_tmpbuf;
371 rtm_tmpbuf = NULL;
372
373
374 m_copydata(m, 0, len, rtm_buf);
375
376 if (RTM->rtm_version != RTM_VERSION) {
377 info.rti_info[RTAX_DST] = NULL;
378 senderr(EPROTONOSUPPORT);
379 }
380
381 /*
382 * Silent version of RTM_GET for Reachabiltiy APIs. We may change
383 * all RTM_GETs to be silent in the future, so this is private for now.
384 */
385 if (RTM->rtm_type == RTM_GET_SILENT) {
386 if (!(so->so_options & SO_USELOOPBACK)) {
387 senderr(EINVAL);
388 }
389 sendonlytoself = 1;
390 RTM->rtm_type = RTM_GET;
391 }
392
393 /*
394 * Perform permission checking, only privileged sockets
395 * may perform operations other than RTM_GET
396 */
397 if (RTM->rtm_type != RTM_GET && !(so->so_state & SS_PRIV)) {
398 info.rti_info[RTAX_DST] = NULL;
399 senderr(EPERM);
400 }
401
402 RTM->rtm_pid = proc_selfpid();
403 info.rti_addrs = RTM->rtm_addrs;
404
405 if (rt_xaddrs(rtm_buf + sizeof(struct rt_msghdr), rtm_buf + rtm_len, &info, tiny_sa_storage)) {
406 info.rti_info[RTAX_DST] = NULL;
407 senderr(EINVAL);
408 }
409
410 if (info.rti_info[RTAX_DST] == NULL ||
411 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
412 (info.rti_info[RTAX_GATEWAY] != NULL &&
413 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) {
414 senderr(EINVAL);
415 }
416
417 if (info.rti_info[RTAX_DST]->sa_family == AF_INET &&
418 info.rti_info[RTAX_DST]->sa_len != sizeof(struct sockaddr_in)) {
419 /* At minimum, we need up to sin_addr */
420 if (info.rti_info[RTAX_DST]->sa_len <
421 offsetof(struct sockaddr_in, sin_zero)) {
422 senderr(EINVAL);
423 }
424
425 SOCKADDR_ZERO(&dst_in, sizeof(dst_in));
426 dst_in.sin_len = sizeof(dst_in);
427 dst_in.sin_family = AF_INET;
428 dst_in.sin_port = SIN(info.rti_info[RTAX_DST])->sin_port;
429 dst_in.sin_addr = SIN(info.rti_info[RTAX_DST])->sin_addr;
430 info.rti_info[RTAX_DST] = SA(&dst_in);
431 dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
432 } else if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
433 info.rti_info[RTAX_DST]->sa_len < sizeof(struct sockaddr_in6)) {
434 senderr(EINVAL);
435 }
436
437 if (info.rti_info[RTAX_GATEWAY] != NULL) {
438 if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET &&
439 info.rti_info[RTAX_GATEWAY]->sa_len != sizeof(struct sockaddr_in)) {
440 /* At minimum, we need up to sin_addr */
441 if (info.rti_info[RTAX_GATEWAY]->sa_len <
442 offsetof(struct sockaddr_in, sin_zero)) {
443 senderr(EINVAL);
444 }
445
446 SOCKADDR_ZERO(&gate_in, sizeof(gate_in));
447 gate_in.sin_len = sizeof(gate_in);
448 gate_in.sin_family = AF_INET;
449 gate_in.sin_port = SIN(info.rti_info[RTAX_GATEWAY])->sin_port;
450 gate_in.sin_addr = SIN(info.rti_info[RTAX_GATEWAY])->sin_addr;
451 info.rti_info[RTAX_GATEWAY] = SA(&gate_in);
452 } else if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET6 &&
453 info.rti_info[RTAX_GATEWAY]->sa_len < sizeof(struct sockaddr_in6)) {
454 senderr(EINVAL);
455 }
456 }
457
458 if (info.rti_info[RTAX_GENMASK]) {
459 struct radix_node *t;
460 struct sockaddr *genmask = SA(info.rti_info[RTAX_GENMASK]);
461 void *genmask_bytes = __SA_UTILS_CONV_TO_BYTES(genmask);
462 t = rn_addmask(genmask_bytes, 0, 1);
463 if (t != NULL && SOCKADDR_CMP(genmask, rn_get_key(t), genmask->sa_len) == 0) {
464 info.rti_info[RTAX_GENMASK] = SA(rn_get_key(t));
465 } else {
466 senderr(ENOBUFS);
467 }
468 }
469
470 /*
471 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
472 */
473 if (RTM->rtm_flags & RTF_IFSCOPE) {
474 if (info.rti_info[RTAX_DST]->sa_family != AF_INET &&
475 info.rti_info[RTAX_DST]->sa_family != AF_INET6) {
476 senderr(EINVAL);
477 }
478 ifscope = RTM->rtm_index;
479 }
480 /*
481 * Block changes on INTCOPROC interfaces.
482 */
483 if (ifscope != IFSCOPE_NONE) {
484 unsigned int intcoproc_scope = 0;
485 ifnet_head_lock_shared();
486 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
487 if (IFNET_IS_INTCOPROC(ifp)) {
488 intcoproc_scope = ifp->if_index;
489 break;
490 }
491 }
492 ifnet_head_done();
493 if (intcoproc_scope == ifscope && proc_getpid(current_proc()) != 0) {
494 senderr(EINVAL);
495 }
496 }
497 /*
498 * Require entitlement to change management interfaces
499 */
500 if (management_control_unrestricted == false && if_management_interface_check_needed == true &&
501 ifscope != IFSCOPE_NONE && proc_getpid(current_proc()) != 0) {
502 bool is_management = false;
503
504 ifnet_head_lock_shared();
505 if (IF_INDEX_IN_RANGE(ifscope)) {
506 ifp = ifindex2ifnet[ifscope];
507 if (ifp != NULL && IFNET_IS_MANAGEMENT(ifp)) {
508 is_management = true;
509 }
510 }
511 ifnet_head_done();
512
513 if (is_management && !IOCurrentTaskHasEntitlement(MANAGEMENT_CONTROL_ENTITLEMENT)) {
514 senderr(EINVAL);
515 }
516 }
517
518 /*
519 * RTF_PROXY can only be set internally from within the kernel.
520 */
521 if (RTM->rtm_flags & RTF_PROXY) {
522 senderr(EINVAL);
523 }
524
525 /*
526 * For AF_INET, always zero out the embedded scope ID. If this is
527 * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
528 * flag and the corresponding rtm_index value. This is to prevent
529 * false interpretation of the scope ID because it's using the sin_zero
530 * field, which might not be properly cleared by the requestor.
531 */
532 if (info.rti_info[RTAX_DST]->sa_family == AF_INET) {
533 sin_set_ifscope(info.rti_info[RTAX_DST], IFSCOPE_NONE);
534 }
535 if (info.rti_info[RTAX_GATEWAY] != NULL &&
536 info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET) {
537 sin_set_ifscope(info.rti_info[RTAX_GATEWAY], IFSCOPE_NONE);
538 }
539 if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
540 IN6_IS_SCOPE_EMBED(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
541 !IN6_IS_ADDR_UNICAST_BASED_MULTICAST(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
542 SIN6(info.rti_info[RTAX_DST])->sin6_scope_id == 0) {
543 SIN6(info.rti_info[RTAX_DST])->sin6_scope_id = ntohs(SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1]);
544 SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1] = 0;
545 }
546
547 switch (RTM->rtm_type) {
548 case RTM_ADD:
549 if (info.rti_info[RTAX_GATEWAY] == NULL) {
550 senderr(EINVAL);
551 }
552
553 error = rtrequest_scoped_locked(RTM_ADD,
554 info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
555 info.rti_info[RTAX_NETMASK], RTM->rtm_flags, &saved_nrt,
556 ifscope);
557 if (error == 0 && saved_nrt != NULL) {
558 RT_LOCK(saved_nrt);
559 /*
560 * If the route request specified an interface with
561 * IFA and/or IFP, we set the requested interface on
562 * the route with rt_setif. It would be much better
563 * to do this inside rtrequest, but that would
564 * require passing the desired interface, in some
565 * form, to rtrequest. Since rtrequest is called in
566 * so many places (roughly 40 in our source), adding
567 * a parameter is to much for us to swallow; this is
568 * something for the FreeBSD developers to tackle.
569 * Instead, we let rtrequest compute whatever
570 * interface it wants, then come in behind it and
571 * stick in the interface that we really want. This
572 * works reasonably well except when rtrequest can't
573 * figure out what interface to use (with
574 * ifa_withroute) and returns ENETUNREACH. Ideally
575 * it shouldn't matter if rtrequest can't figure out
576 * the interface if we're going to explicitly set it
577 * ourselves anyway. But practically we can't
578 * recover here because rtrequest will not do any of
579 * the work necessary to add the route if it can't
580 * find an interface. As long as there is a default
581 * route that leads to some interface, rtrequest will
582 * find an interface, so this problem should be
583 * rarely encountered.
584 * [email protected]
585 */
586 rt_setif(saved_nrt,
587 info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
588 info.rti_info[RTAX_GATEWAY], ifscope);
589 (void)rt_setmetrics(RTM->rtm_inits, &RTM->rtm_rmx, saved_nrt);
590 saved_nrt->rt_rmx.rmx_locks &= ~(RTM->rtm_inits);
591 saved_nrt->rt_rmx.rmx_locks |=
592 (RTM->rtm_inits & RTM->rtm_rmx.rmx_locks);
593 saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
594 if ((saved_nrt->rt_flags & (RTF_UP | RTF_LLINFO)) ==
595 (RTF_UP | RTF_LLINFO)) {
596 rt_lookup_qset_id(saved_nrt, false);
597 }
598 RT_REMREF_LOCKED(saved_nrt);
599 RT_UNLOCK(saved_nrt);
600 }
601 break;
602
603 case RTM_DELETE:
604 error = rtrequest_scoped_locked(RTM_DELETE,
605 info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
606 info.rti_info[RTAX_NETMASK], RTM->rtm_flags, &saved_nrt,
607 ifscope);
608 if (error == 0) {
609 rt = saved_nrt;
610 RT_LOCK(rt);
611 goto report;
612 }
613 break;
614
615 case RTM_GET:
616 case RTM_CHANGE:
617 case RTM_LOCK:
618 rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
619 if (rnh == NULL) {
620 senderr(EAFNOSUPPORT);
621 }
622 /*
623 * Lookup the best match based on the key-mask pair;
624 * callee adds a reference and checks for root node.
625 */
626 rt = rt_lookup(TRUE, info.rti_info[RTAX_DST],
627 info.rti_info[RTAX_NETMASK], rnh, ifscope);
628 if (rt == NULL) {
629 senderr(ESRCH);
630 }
631 RT_LOCK(rt);
632
633 /*
634 * Holding rnh_lock here prevents the possibility of
635 * ifa from changing (e.g. in_ifinit), so it is safe
636 * to access its ifa_addr (down below) without locking.
637 */
638 switch (RTM->rtm_type) {
639 case RTM_GET: {
640 kauth_cred_t cred __single;
641 kauth_cred_t* credp;
642 struct ifaddr *ifa2;
643 /*
644 * The code below serves both the `RTM_GET'
645 * and the `RTM_DELETE' requests.
646 */
647 report:
648 cred = current_cached_proc_cred(PROC_NULL);
649 credp = &cred;
650
651 ifa2 = NULL;
652 RT_LOCK_ASSERT_HELD(rt);
653 info.rti_info[RTAX_DST] = rt_key(rt);
654 dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
655 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
656 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
657 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
658 if (RTM->rtm_addrs & (RTA_IFP | RTA_IFA)) {
659 ifp = rt->rt_ifp;
660 if (ifp != NULL) {
661 ifnet_lock_shared(ifp);
662 ifa2 = ifp->if_lladdr;
663 info.rti_info[RTAX_IFP] = ifa2->ifa_addr;
664 ifa_addref(ifa2);
665 ifnet_lock_done(ifp);
666 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
667 RTM->rtm_index = ifp->if_index;
668 } else {
669 info.rti_info[RTAX_IFP] = NULL;
670 info.rti_info[RTAX_IFA] = NULL;
671 }
672 } else if ((ifp = rt->rt_ifp) != NULL) {
673 RTM->rtm_index = ifp->if_index;
674 }
675
676 /*
677 * Determine the length required for the routing information
678 * report.
679 */
680 if (ifa2 != NULL) {
681 IFA_LOCK(ifa2);
682 }
683 len = rt_msg2(RTM->rtm_type, &info, NULL, NULL, credp);
684 if (ifa2 != NULL) {
685 IFA_UNLOCK(ifa2);
686 }
687
688 /*
689 * Allocate output message for the routing information report.
690 */
691 VERIFY(rtm_tmpbuf == NULL);
692 rtm_tmpbuf = kalloc_data(len, Z_WAITOK);
693 if (rtm_tmpbuf == NULL) {
694 RT_UNLOCK(rt);
695 if (ifa2 != NULL) {
696 ifa_remref(ifa2);
697 }
698 senderr(ENOBUFS);
699 }
700
701 /*
702 * Create the header for the output message, based
703 * on the request message header and the current routing information.
704 */
705 struct rt_msghdr *out_rtm = _rtm_hdr(rtm_tmpbuf);
706 bcopy(RTM, out_rtm, sizeof(struct rt_msghdr));
707 out_rtm->rtm_flags = rt->rt_flags;
708 rt_getmetrics(rt, &out_rtm->rtm_rmx);
709 out_rtm->rtm_addrs = info.rti_addrs;
710
711 /*
712 * Populate the body of the output message.
713 */
714 if (ifa2 != NULL) {
715 IFA_LOCK(ifa2);
716 }
717 (void) rt_msg2(out_rtm->rtm_type, &info, rtm_tmpbuf,
718 NULL, &cred);
719 if (ifa2 != NULL) {
720 IFA_UNLOCK(ifa2);
721 }
722
723 /*
724 * Replace the "main" routing message with the output message
725 * we have constructed.
726 */
727 kfree_data_counted_by(rtm_buf, rtm_len);
728 rtm_len = len;
729 rtm_buf = rtm_tmpbuf;
730 rtm_tmpbuf = NULL;
731
732 if (ifa2 != NULL) {
733 ifa_remref(ifa2);
734 }
735
736 break;
737 }
738
739 case RTM_CHANGE:
740 is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
741
742 if (info.rti_info[RTAX_GATEWAY] != NULL &&
743 (error = rt_setgate(rt, rt_key(rt),
744 info.rti_info[RTAX_GATEWAY]))) {
745 int tmp = error;
746 RT_UNLOCK(rt);
747 senderr(tmp);
748 }
749 /*
750 * If they tried to change things but didn't specify
751 * the required gateway, then just use the old one.
752 * This can happen if the user tries to change the
753 * flags on the default route without changing the
754 * default gateway. Changing flags still doesn't work.
755 */
756 if ((rt->rt_flags & RTF_GATEWAY) &&
757 info.rti_info[RTAX_GATEWAY] == NULL) {
758 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
759 }
760
761 /*
762 * On Darwin, we call rt_setif which contains the
763 * equivalent to the code found at this very spot
764 * in BSD.
765 */
766 rt_setif(rt,
767 info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
768 info.rti_info[RTAX_GATEWAY], ifscope);
769
770 if ((error = rt_setmetrics(RTM->rtm_inits,
771 &RTM->rtm_rmx, rt))) {
772 int tmp = error;
773 RT_UNLOCK(rt);
774 senderr(tmp);
775 }
776 if (info.rti_info[RTAX_GENMASK]) {
777 rt->rt_genmask = info.rti_info[RTAX_GENMASK];
778 }
779
780 /*
781 * Enqueue work item to invoke callback for this route entry
782 * This may not be needed always, but for now issue it anytime
783 * RTM_CHANGE gets called.
784 */
785 route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, TRUE);
786 /*
787 * If the route is for a router, walk the tree to send refresh
788 * event to protocol cloned entries
789 */
790 if (is_router) {
791 struct route_event rt_ev;
792 route_event_init(&rt_ev, rt, NULL, ROUTE_ENTRY_REFRESH);
793 RT_UNLOCK(rt);
794 (void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
795 RT_LOCK(rt);
796 }
797 OS_FALLTHROUGH;
798 case RTM_LOCK:
799 rt->rt_rmx.rmx_locks &= ~(RTM->rtm_inits);
800 rt->rt_rmx.rmx_locks |=
801 (RTM->rtm_inits & RTM->rtm_rmx.rmx_locks);
802 break;
803 }
804 RT_UNLOCK(rt);
805 break;
806 default:
807 senderr(EOPNOTSUPP);
808 }
809 flush:
810 if (RTM != NULL) {
811 if (error) {
812 RTM->rtm_errno = error;
813 } else {
814 RTM->rtm_flags |= RTF_DONE;
815 }
816 }
817 if (rt != NULL) {
818 RT_LOCK_ASSERT_NOTHELD(rt);
819 rtfree_locked(rt);
820 }
821 lck_mtx_unlock(rnh_lock);
822
823 /* relock the socket now */
824 socket_lock(so, 0);
825 /*
826 * Check to see if we don't want our own messages.
827 */
828 if (!(so->so_options & SO_USELOOPBACK)) {
829 if (route_cb.any_count <= 1) {
830 kfree_data_counted_by(rtm_buf, rtm_len);
831 m_freem(m);
832 return error;
833 }
834 /* There is another listener, so construct message */
835 rp = sotorawcb(so);
836 }
837 if (rtm_buf != NULL) {
838 m_copyback(m, 0, RTM->rtm_msglen, rtm_buf);
839 if (m->m_pkthdr.len < RTM->rtm_msglen) {
840 m_freem(m);
841 m = NULL;
842 } else if (m->m_pkthdr.len > RTM->rtm_msglen) {
843 m_adj(m, RTM->rtm_msglen - m->m_pkthdr.len);
844 }
845 kfree_data_counted_by(rtm_buf, rtm_len);
846 }
847 if (sendonlytoself && m != NULL) {
848 error = 0;
849 if (sbappendaddr(&so->so_rcv, &route_src, m,
850 NULL, &error) != 0) {
851 sorwakeup(so);
852 }
853 if (error) {
854 return error;
855 }
856 } else {
857 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
858 if (rp != NULL) {
859 rp->rcb_proto.sp_family = 0; /* Avoid us */
860 }
861 if (dst_sa_family != 0) {
862 route_proto.sp_protocol = dst_sa_family;
863 }
864 if (m != NULL) {
865 socket_unlock(so, 0);
866 raw_input(m, &route_proto, &route_src, &route_dst);
867 socket_lock(so, 0);
868 }
869 if (rp != NULL) {
870 rp->rcb_proto.sp_family = PF_ROUTE;
871 }
872 }
873 return error;
874 #undef RTM /* was defined to __rtm_hdr(rtm_buf) */
875 }
876
877 void
rt_setexpire(struct rtentry * rt,uint64_t expiry)878 rt_setexpire(struct rtentry *rt, uint64_t expiry)
879 {
880 /* set both rt_expire and rmx_expire */
881 rt->rt_expire = expiry;
882 if (expiry) {
883 rt->rt_rmx.rmx_expire =
884 (int32_t)(expiry + rt->base_calendartime -
885 rt->base_uptime);
886 } else {
887 rt->rt_rmx.rmx_expire = 0;
888 }
889 }
890
891 static int
rt_setmetrics(u_int32_t which,struct rt_metrics * in,struct rtentry * out)892 rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out)
893 {
894 if (!(which & RTV_REFRESH_HOST)) {
895 struct timeval caltime;
896 getmicrotime(&caltime);
897 #define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
898 metric(RTV_RPIPE, rmx_recvpipe);
899 metric(RTV_SPIPE, rmx_sendpipe);
900 metric(RTV_SSTHRESH, rmx_ssthresh);
901 metric(RTV_RTT, rmx_rtt);
902 metric(RTV_RTTVAR, rmx_rttvar);
903 metric(RTV_HOPCOUNT, rmx_hopcount);
904 metric(RTV_MTU, rmx_mtu);
905 metric(RTV_EXPIRE, rmx_expire);
906 #undef metric
907 if (out->rt_rmx.rmx_expire > 0) {
908 /* account for system time change */
909 getmicrotime(&caltime);
910 out->base_calendartime +=
911 NET_CALCULATE_CLOCKSKEW(caltime,
912 out->base_calendartime,
913 net_uptime(), out->base_uptime);
914 rt_setexpire(out,
915 out->rt_rmx.rmx_expire -
916 out->base_calendartime +
917 out->base_uptime);
918 } else {
919 rt_setexpire(out, 0);
920 }
921
922 VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
923 VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
924 } else {
925 /* Only RTV_REFRESH_HOST must be set */
926 if ((which & ~RTV_REFRESH_HOST) ||
927 (out->rt_flags & RTF_STATIC) ||
928 !(out->rt_flags & RTF_LLINFO)) {
929 return EINVAL;
930 }
931
932 if (out->rt_llinfo_refresh == NULL) {
933 return ENOTSUP;
934 }
935
936 out->rt_llinfo_refresh(out);
937 }
938 return 0;
939 }
940
941 static void
rt_getmetrics(struct rtentry * in,struct rt_metrics * out)942 rt_getmetrics(struct rtentry *in, struct rt_metrics *out)
943 {
944 struct timeval caltime;
945
946 VERIFY(in->rt_expire == 0 || in->rt_rmx.rmx_expire != 0);
947 VERIFY(in->rt_expire != 0 || in->rt_rmx.rmx_expire == 0);
948
949 *out = in->rt_rmx;
950
951 if (in->rt_expire != 0) {
952 /* account for system time change */
953 getmicrotime(&caltime);
954
955 in->base_calendartime +=
956 NET_CALCULATE_CLOCKSKEW(caltime,
957 in->base_calendartime, net_uptime(), in->base_uptime);
958
959 out->rmx_expire = (int32_t)(in->base_calendartime +
960 in->rt_expire - in->base_uptime);
961 } else {
962 out->rmx_expire = 0;
963 }
964 }
965
966 /*
967 * Set route's interface given info.rti_info[RTAX_IFP],
968 * info.rti_info[RTAX_IFA], and gateway.
969 */
970 static void
rt_setif(struct rtentry * rt,struct sockaddr * Ifpaddr,struct sockaddr * Ifaaddr,struct sockaddr * Gate,unsigned int ifscope)971 rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
972 struct sockaddr *Gate, unsigned int ifscope)
973 {
974 struct ifaddr *ifa = NULL;
975 struct ifnet *ifp = NULL;
976 void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
977
978 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
979
980 RT_LOCK_ASSERT_HELD(rt);
981
982 /* Don't update a defunct route */
983 if (rt->rt_flags & RTF_CONDEMNED) {
984 return;
985 }
986
987 /* Add an extra ref for ourselves */
988 RT_ADDREF_LOCKED(rt);
989
990 /* Become a regular mutex, just in case */
991 RT_CONVERT_LOCK(rt);
992
993 /*
994 * New gateway could require new ifaddr, ifp; flags may also
995 * be different; ifp may be specified by ll sockaddr when
996 * protocol address is ambiguous.
997 */
998 if (Ifpaddr && (ifa = ifa_ifwithnet_scoped(Ifpaddr, ifscope)) &&
999 (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) {
1000 ifa_remref(ifa);
1001 ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp);
1002 } else {
1003 if (ifa != NULL) {
1004 ifa_remref(ifa);
1005 ifa = NULL;
1006 }
1007 if (Ifpaddr && (ifp = if_withname(Ifpaddr))) {
1008 if (Gate) {
1009 ifa = ifaof_ifpforaddr(Gate, ifp);
1010 } else {
1011 ifnet_lock_shared(ifp);
1012 ifa = TAILQ_FIRST(&ifp->if_addrhead);
1013 if (ifa != NULL) {
1014 ifa_addref(ifa);
1015 }
1016 ifnet_lock_done(ifp);
1017 }
1018 } else if (Ifaaddr &&
1019 (ifa = ifa_ifwithaddr_scoped(Ifaaddr, ifscope))) {
1020 ifp = ifa->ifa_ifp;
1021 } else if (Gate != NULL) {
1022 /*
1023 * Safe to drop rt_lock and use rt_key, since holding
1024 * rnh_lock here prevents another thread from calling
1025 * rt_setgate() on this route. We cannot hold the
1026 * lock across ifa_ifwithroute since the lookup done
1027 * by that routine may point to the same route.
1028 */
1029 RT_UNLOCK(rt);
1030 if ((ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags,
1031 rt_key(rt), Gate, ifscope)) != NULL) {
1032 ifp = ifa->ifa_ifp;
1033 }
1034 RT_LOCK(rt);
1035 /* Don't update a defunct route */
1036 if (rt->rt_flags & RTF_CONDEMNED) {
1037 if (ifa != NULL) {
1038 ifa_remref(ifa);
1039 }
1040 /* Release extra ref */
1041 RT_REMREF_LOCKED(rt);
1042 return;
1043 }
1044 }
1045 }
1046
1047 /* trigger route cache reevaluation */
1048 if (rt_key(rt)->sa_family == AF_INET) {
1049 routegenid_inet_update();
1050 } else if (rt_key(rt)->sa_family == AF_INET6) {
1051 routegenid_inet6_update();
1052 }
1053
1054 if (ifa != NULL) {
1055 struct ifaddr *oifa = rt->rt_ifa;
1056 if (oifa != ifa) {
1057 if (oifa != NULL) {
1058 IFA_LOCK_SPIN(oifa);
1059 ifa_rtrequest = oifa->ifa_rtrequest;
1060 IFA_UNLOCK(oifa);
1061 if (ifa_rtrequest != NULL) {
1062 ifa_rtrequest(RTM_DELETE, rt, Gate);
1063 }
1064 }
1065 rtsetifa(rt, ifa);
1066
1067 if (rt->rt_ifp != ifp) {
1068 /*
1069 * Purge any link-layer info caching.
1070 */
1071 if (rt->rt_llinfo_purge != NULL) {
1072 rt->rt_llinfo_purge(rt);
1073 }
1074
1075 /*
1076 * Adjust route ref count for the interfaces.
1077 */
1078 if (rt->rt_if_ref_fn != NULL) {
1079 rt->rt_if_ref_fn(ifp, 1);
1080 rt->rt_if_ref_fn(rt->rt_ifp, -1);
1081 }
1082 }
1083 rt->rt_ifp = ifp;
1084 /*
1085 * If this is the (non-scoped) default route, record
1086 * the interface index used for the primary ifscope.
1087 */
1088 if (rt_primary_default(rt, rt_key(rt))) {
1089 set_primary_ifscope(rt_key(rt)->sa_family,
1090 rt->rt_ifp->if_index);
1091 }
1092 /*
1093 * If rmx_mtu is not locked, update it
1094 * to the MTU used by the new interface.
1095 */
1096 if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
1097 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1098 if (rt_key(rt)->sa_family == AF_INET &&
1099 INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
1100 rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
1101 /* Further adjust the size for CLAT46 expansion */
1102 rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
1103 }
1104 }
1105
1106 if (rt->rt_ifa != NULL) {
1107 IFA_LOCK_SPIN(rt->rt_ifa);
1108 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1109 IFA_UNLOCK(rt->rt_ifa);
1110 if (ifa_rtrequest != NULL) {
1111 ifa_rtrequest(RTM_ADD, rt, Gate);
1112 }
1113 }
1114 ifa_remref(ifa);
1115 /* Release extra ref */
1116 RT_REMREF_LOCKED(rt);
1117 return;
1118 }
1119 ifa_remref(ifa);
1120 ifa = NULL;
1121 }
1122
1123 /* XXX: to reset gateway to correct value, at RTM_CHANGE */
1124 if (rt->rt_ifa != NULL) {
1125 IFA_LOCK_SPIN(rt->rt_ifa);
1126 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1127 IFA_UNLOCK(rt->rt_ifa);
1128 if (ifa_rtrequest != NULL) {
1129 ifa_rtrequest(RTM_ADD, rt, Gate);
1130 }
1131 }
1132
1133 /*
1134 * Workaround for local address routes pointing to the loopback
1135 * interface added by configd, until <rdar://problem/12970142>.
1136 */
1137 if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
1138 (rt->rt_flags & RTF_HOST) && rt->rt_ifa->ifa_ifp == rt->rt_ifp) {
1139 ifa = ifa_ifwithaddr(rt_key(rt));
1140 if (ifa != NULL) {
1141 if (ifa != rt->rt_ifa) {
1142 rtsetifa(rt, ifa);
1143 }
1144 ifa_remref(ifa);
1145 }
1146 }
1147
1148 /* Release extra ref */
1149 RT_REMREF_LOCKED(rt);
1150 }
1151
1152 /*
1153 * Extract the addresses of the passed sockaddrs.
1154 *
1155 * Do a little sanity checking so as to avoid bad memory references.
1156 * This data is derived straight from userland. Some of the data
1157 * anomalies are unrecoverable; for others we substitute the anomalous
1158 * user data with a sanitized replacement.
1159 *
1160 * Details on the input anomalies:
1161 *
1162 * 1. Unrecoverable input anomalies (retcode == EINVAL)
1163 * The function returns EINVAL.
1164 * 1.1. Truncated sockaddrs at the end of the user-provided buffer.
1165 * 1.2. Unparseable sockaddr header (`0 < .sa_len && .sa_len < 2').
1166 * 1.3. Sockaddrs that won't fit `struct sockaddr_storage'.
1167 *
1168 * 2. Recoverable input anomalies (retcode == 0):
1169 * The below anomalies would lead to a malformed `struct sockaddr *'
1170 * pointers. Any attempt to pass such malformed pointers to a function
1171 * or to assign those to another variable will cause a trap
1172 * when the `-fbounds-safety' feature is enabled.
1173 *
1174 * To mitigate the malformed pointers problem, we substitute the malformed
1175 * user data with a well-formed sockaddrs.
1176 *
1177 * 2.1. Sockadrs with `.sa_len == 0' (aka "zero-length" sockaddrs).
1178 * We substitute those with a pointer to the `sa_data' global
1179 * variable.
1180 * 2.2. Sockaddrs with `.sa_len < 16' (a.k.a. "tiny" sockaddrs).
1181 * We copy the contents of "tiny" sockaddrs to a location
1182 * inside the `xtra_storage' parameter, and substitute
1183 * the pointer into the user-provided data with the location
1184 * in `xtra_storage'.
1185 */
1186 static int
rt_xaddrs(caddr_t cp __ended_by (cplim),caddr_t cplim,struct rt_addrinfo * rtinfo,struct sockaddr xtra_storage[RTAX_MAX])1187 rt_xaddrs(caddr_t cp __ended_by(cplim), caddr_t cplim, struct rt_addrinfo *rtinfo, struct sockaddr xtra_storage[RTAX_MAX])
1188 {
1189 struct sockaddr *sa;
1190 int i, next_tiny_sa = 0;
1191
1192 for (i = 0; i < RTAX_MAX; i++) {
1193 SOCKADDR_ZERO(&xtra_storage[i], sizeof(struct sockaddr));
1194 }
1195 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1196
1197 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1198 if ((rtinfo->rti_addrs & (1 << i)) == 0) {
1199 continue;
1200 }
1201
1202 /*
1203 * We expect the memory pointed to by `cp' to contain a valid socket address.
1204 * However, there are no guarantees that our expectations are correct,
1205 * since the buffer is passed from the user-space.
1206 * In particular, the socket address may be corrupted or truncated.
1207 * If we attempt to interpret the contents of the memory pointed to by `cp'
1208 * as a valid socket address, we may end up in a situation where the end
1209 * of the presumed socket address exceeds the end of the input buffer:
1210 *
1211 * +-------------------------------+
1212 * | user buffer |
1213 * +-------------------------------+
1214 * cp ^ cplim ^
1215 * +-----------------------+
1216 * | (struct sockaddr *)cp |
1217 * +-----------------------+
1218 *
1219 * In such case, we are likely to panic with the `-fbounds-safety' trap,
1220 * while the desired behavior is to return `ENOENT'.
1221 *
1222 * Because of the above concern, we can not optimistically cast the pointer
1223 * `cp' to `struct sockaddr*' until we have validated that the contents
1224 * of the memory can be safely interpreted as a socket address.
1225 *
1226 * Instead, we start by examining the expected length of the socket address,
1227 * which is guaranteed to be located at the first byte, and perform several
1228 * sanity checks, before interpreting the memory as a valid socket address.
1229 */
1230 uint8_t next_sa_len = *cp;
1231
1232 /*
1233 * Is the user-provided sockaddr truncated?
1234 */
1235 if ((cp + next_sa_len) > cplim) {
1236 return EINVAL;
1237 }
1238
1239 /*
1240 * Will the user-provided sockaddr fit the sockaddr storage?
1241 */
1242 if (next_sa_len > sizeof(struct sockaddr_storage)) {
1243 return EINVAL;
1244 }
1245
1246 /*
1247 * there are no more.. quit now
1248 * If there are more bits, they are in error.
1249 * I've seen this. route(1) can evidently generate these.
1250 * This causes kernel to core dump.
1251 * for compatibility, If we see this, point to a safe address.
1252 */
1253 if (next_sa_len == 0) {
1254 rtinfo->rti_info[i] = &sa_zero;
1255 return 0; /* should be EINVAL but for compat */
1256 }
1257
1258 /*
1259 * Check for the minimal length.
1260 */
1261 if (next_sa_len < offsetof(struct sockaddr, sa_data)) {
1262 return EINVAL;
1263 }
1264
1265 /*
1266 * Check whether we are looking at a "tiny" sockaddr,
1267 * and if so, copy the contents to the xtra storage.
1268 * See the comment to this function for the details
1269 * on "tiny" sockaddrs and the xtra storage.
1270 */
1271 if (next_sa_len < sizeof(struct sockaddr)) {
1272 sa = &xtra_storage[next_tiny_sa++];
1273 SOCKADDR_COPY(cp, sa, next_sa_len);
1274 } else {
1275 sa = SA(cp);
1276 }
1277
1278 /*
1279 * From this point on we can safely use `sa'.
1280 */
1281
1282 /* accepthe it */
1283 rtinfo->rti_info[i] = sa;
1284 const uint32_t rounded_sa_len = ROUNDUP32(sa->sa_len);
1285 if (cp + rounded_sa_len > cplim) {
1286 break;
1287 } else {
1288 cp += rounded_sa_len;
1289 cplim = cplim;
1290 }
1291 }
1292 return 0;
1293 }
1294
1295 static struct mbuf *
rt_msg1(u_char type,struct rt_addrinfo * rtinfo)1296 rt_msg1(u_char type, struct rt_addrinfo *rtinfo)
1297 {
1298 struct rt_msghdr_common *rtmh;
1299 int32_t *rtm_buf; /* int32 to preserve the alingment. */
1300 struct mbuf *m;
1301 int i;
1302 int len, dlen, off;
1303
1304 switch (type) {
1305 case RTM_DELADDR:
1306 case RTM_NEWADDR:
1307 len = sizeof(struct ifa_msghdr);
1308 break;
1309
1310 case RTM_DELMADDR:
1311 case RTM_NEWMADDR:
1312 len = sizeof(struct ifma_msghdr);
1313 break;
1314
1315 case RTM_IFINFO:
1316 len = sizeof(struct if_msghdr);
1317 break;
1318
1319 default:
1320 len = sizeof(struct rt_msghdr);
1321 }
1322 m = m_gethdr(M_DONTWAIT, MT_DATA);
1323 if (m && len > MHLEN) {
1324 MCLGET(m, M_DONTWAIT);
1325 if (!(m->m_flags & M_EXT)) {
1326 m_free(m);
1327 m = NULL;
1328 }
1329 }
1330 if (m == NULL) {
1331 return NULL;
1332 }
1333 m->m_pkthdr.len = m->m_len = len;
1334 m->m_pkthdr.rcvif = NULL;
1335 rtm_buf = mtod(m, int32_t *);
1336 bzero(rtm_buf, len);
1337 off = len;
1338 for (i = 0; i < RTAX_MAX; i++) {
1339 struct sockaddr *sa, *hint;
1340 uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1341
1342 /*
1343 * Make sure to accomodate the largest possible size of sa_len.
1344 */
1345 static_assert(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1346
1347 if ((sa = rtinfo->rti_info[i]) == NULL) {
1348 continue;
1349 }
1350
1351 switch (i) {
1352 case RTAX_DST:
1353 case RTAX_NETMASK:
1354 if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1355 hint = rtinfo->rti_info[RTAX_IFA];
1356 }
1357
1358 /* Scrub away any trace of embedded interface scope */
1359 sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1360 sizeof(ssbuf), NULL);
1361 break;
1362
1363 default:
1364 break;
1365 }
1366
1367 rtinfo->rti_addrs |= (1 << i);
1368 dlen = sa->sa_len;
1369 m_copyback(m, off, dlen, __SA_UTILS_CONV_TO_BYTES(sa));
1370 len = off + dlen;
1371 off += ROUNDUP32(dlen);
1372 }
1373 if (m->m_pkthdr.len != len) {
1374 m_freem(m);
1375 return NULL;
1376 }
1377 rtmh = (struct rt_msghdr_common *)rtm_buf;
1378 rtmh->rtm_msglen = (u_short)len;
1379 rtmh->rtm_version = RTM_VERSION;
1380 rtmh->rtm_type = type;
1381 return m;
1382 }
1383
1384 static int
rt_msg2(u_char type,struct rt_addrinfo * rtinfo,caddr_t cp __header_indexable,struct walkarg * w,kauth_cred_t * credp)1385 rt_msg2(u_char type, struct rt_addrinfo *rtinfo, caddr_t cp __header_indexable, struct walkarg *w,
1386 kauth_cred_t* credp)
1387 {
1388 int i;
1389 int len, dlen, rlen, second_time = 0;
1390 caddr_t cp0;
1391
1392 rtinfo->rti_addrs = 0;
1393 again:
1394 switch (type) {
1395 case RTM_DELADDR:
1396 case RTM_NEWADDR:
1397 len = sizeof(struct ifa_msghdr);
1398 break;
1399
1400 case RTM_DELMADDR:
1401 case RTM_NEWMADDR:
1402 len = sizeof(struct ifma_msghdr);
1403 break;
1404
1405 case RTM_IFINFO:
1406 len = sizeof(struct if_msghdr);
1407 break;
1408
1409 case RTM_IFINFO2:
1410 len = sizeof(struct if_msghdr2);
1411 break;
1412
1413 case RTM_NEWMADDR2:
1414 len = sizeof(struct ifma_msghdr2);
1415 break;
1416
1417 case RTM_GET_EXT:
1418 len = sizeof(struct rt_msghdr_ext);
1419 break;
1420
1421 case RTM_GET2:
1422 len = sizeof(struct rt_msghdr2);
1423 break;
1424
1425 default:
1426 len = sizeof(struct rt_msghdr);
1427 }
1428 cp0 = cp;
1429 if (cp0) {
1430 cp += len;
1431 }
1432 for (i = 0; i < RTAX_MAX; i++) {
1433 struct sockaddr *sa, *hint;
1434 uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1435
1436 /*
1437 * Make sure to accomodate the largest possible size of sa_len.
1438 */
1439 static_assert(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1440
1441 if ((sa = rtinfo->rti_info[i]) == NULL) {
1442 continue;
1443 }
1444
1445 switch (i) {
1446 case RTAX_DST:
1447 case RTAX_NETMASK:
1448 if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1449 hint = rtinfo->rti_info[RTAX_IFA];
1450 }
1451
1452 /* Scrub away any trace of embedded interface scope */
1453 sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1454 sizeof(ssbuf), NULL);
1455 break;
1456 case RTAX_GATEWAY:
1457 case RTAX_IFP:
1458 sa = rtm_scrub(type, i, NULL, sa, &ssbuf,
1459 sizeof(ssbuf), credp);
1460 break;
1461
1462 default:
1463 break;
1464 }
1465
1466 rtinfo->rti_addrs |= (1 << i);
1467 dlen = sa->sa_len;
1468 rlen = ROUNDUP32(dlen);
1469 if (cp) {
1470 SOCKADDR_COPY(sa, cp, dlen);
1471 if (dlen != rlen) {
1472 bzero(cp + dlen, rlen - dlen);
1473 }
1474 cp += rlen;
1475 }
1476 len += rlen;
1477 }
1478 if (cp == NULL && w != NULL && !second_time) {
1479 walkarg_ref_t rw = w;
1480
1481 if (rw->w_req != NULL) {
1482 if (rw->w_tmemsize < len) {
1483 if (rw->w_tmem != NULL) {
1484 kfree_data_sized_by(rw->w_tmem, rw->w_tmemsize);
1485 }
1486 caddr_t new_tmem = (caddr_t)kalloc_data(len, Z_ZERO | Z_WAITOK);
1487 if (new_tmem != NULL) {
1488 rw->w_tmemsize = len;
1489 rw->w_tmem = new_tmem;
1490 }
1491 }
1492 if (rw->w_tmem != NULL) {
1493 cp = rw->w_tmem;
1494 second_time = 1;
1495 goto again;
1496 }
1497 }
1498 }
1499 if (cp) {
1500 struct rt_msghdr_common *rtmh = (struct rt_msghdr_common *)(void *)cp0;
1501
1502 rtmh->rtm_version = RTM_VERSION;
1503 rtmh->rtm_type = type;
1504 rtmh->rtm_msglen = (u_short)len;
1505 }
1506 return len;
1507 }
1508
1509 /*
1510 * This routine is called to generate a message from the routing
1511 * socket indicating that a redirect has occurred, a routing lookup
1512 * has failed, or that a protocol has detected timeouts to a particular
1513 * destination.
1514 */
1515 void
rt_missmsg(u_char type,struct rt_addrinfo * rtinfo,int flags,int error)1516 rt_missmsg(u_char type, struct rt_addrinfo *rtinfo, int flags, int error)
1517 {
1518 struct rt_msghdr_common *rtmh;
1519 struct mbuf *m;
1520 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1521 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1522
1523 if (route_cb.any_count == 0) {
1524 return;
1525 }
1526 m = rt_msg1(type, rtinfo);
1527 if (m == NULL) {
1528 return;
1529 }
1530 rtmh = mtod(m, struct rt_msghdr_common *);
1531 rtmh->rtm_flags = RTF_DONE | flags;
1532 rtmh->rtm_errno = error;
1533 rtmh->rtm_addrs = rtinfo->rti_addrs;
1534 route_proto.sp_family = sa ? sa->sa_family : 0;
1535 raw_input(m, &route_proto, &route_src, &route_dst);
1536 }
1537
1538 /*
1539 * This routine is called to generate a message from the routing
1540 * socket indicating that the status of a network interface has changed.
1541 */
1542 void
rt_ifmsg(struct ifnet * ifp)1543 rt_ifmsg(struct ifnet *ifp)
1544 {
1545 struct if_msghdr *ifm;
1546 struct mbuf *m;
1547 struct rt_addrinfo info;
1548 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1549
1550 if (route_cb.any_count == 0) {
1551 return;
1552 }
1553 bzero((caddr_t)&info, sizeof(info));
1554 m = rt_msg1(RTM_IFINFO, &info);
1555 if (m == NULL) {
1556 return;
1557 }
1558 ifm = mtod(m, struct if_msghdr *);
1559 ifm->ifm_index = ifp->if_index;
1560 ifm->ifm_flags = (u_short)ifp->if_flags;
1561 if_data_internal_to_if_data(ifp, &ifp->if_data, &ifm->ifm_data);
1562 ifm->ifm_addrs = 0;
1563 raw_input(m, &route_proto, &route_src, &route_dst);
1564 }
1565
1566 /*
1567 * This is called to generate messages from the routing socket
1568 * indicating a network interface has had addresses associated with it.
1569 * if we ever reverse the logic and replace messages TO the routing
1570 * socket indicate a request to configure interfaces, then it will
1571 * be unnecessary as the routing socket will automatically generate
1572 * copies of it.
1573 *
1574 * Since this is coming from the interface, it is expected that the
1575 * interface will be locked. Caller must hold rnh_lock and rt_lock.
1576 */
1577 void
rt_newaddrmsg(u_char cmd,struct ifaddr * ifa,int error,struct rtentry * rt)1578 rt_newaddrmsg(u_char cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1579 {
1580 struct rt_addrinfo info;
1581 struct sockaddr *sa = 0;
1582 int pass;
1583 struct mbuf *m = 0;
1584 struct ifnet *ifp = ifa->ifa_ifp;
1585 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1586
1587 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1588 RT_LOCK_ASSERT_HELD(rt);
1589
1590 if (route_cb.any_count == 0) {
1591 return;
1592 }
1593
1594 /* Become a regular mutex, just in case */
1595 RT_CONVERT_LOCK(rt);
1596 for (pass = 1; pass < 3; pass++) {
1597 bzero((caddr_t)&info, sizeof(info));
1598 if ((cmd == RTM_ADD && pass == 1) ||
1599 (cmd == RTM_DELETE && pass == 2)) {
1600 struct ifa_msghdr *ifam;
1601 u_char ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1602
1603 /* Lock ifp for if_lladdr */
1604 ifnet_lock_shared(ifp);
1605 IFA_LOCK(ifa);
1606 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1607 /*
1608 * Holding ifnet lock here prevents the link address
1609 * from changing contents, so no need to hold its
1610 * lock. The link address is always present; it's
1611 * never freed.
1612 */
1613 info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1614 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1615 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1616 if ((m = rt_msg1(ncmd, &info)) == NULL) {
1617 IFA_UNLOCK(ifa);
1618 ifnet_lock_done(ifp);
1619 continue;
1620 }
1621 IFA_UNLOCK(ifa);
1622 ifnet_lock_done(ifp);
1623 ifam = mtod(m, struct ifa_msghdr *);
1624 ifam->ifam_index = ifp->if_index;
1625 IFA_LOCK_SPIN(ifa);
1626 ifam->ifam_metric = ifa->ifa_metric;
1627 ifam->ifam_flags = ifa->ifa_flags;
1628 IFA_UNLOCK(ifa);
1629 ifam->ifam_addrs = info.rti_addrs;
1630 }
1631 if ((cmd == RTM_ADD && pass == 2) ||
1632 (cmd == RTM_DELETE && pass == 1)) {
1633 struct rt_msghdr *rtm;
1634
1635 if (rt == NULL) {
1636 continue;
1637 }
1638 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1639 info.rti_info[RTAX_DST] = sa = rt_key(rt);
1640 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1641 if ((m = rt_msg1(cmd, &info)) == NULL) {
1642 continue;
1643 }
1644 rtm = mtod(m, struct rt_msghdr *);
1645 rtm->rtm_index = ifp->if_index;
1646 rtm->rtm_flags |= rt->rt_flags;
1647 rtm->rtm_errno = error;
1648 rtm->rtm_addrs = info.rti_addrs;
1649 }
1650 route_proto.sp_protocol = sa ? sa->sa_family : 0;
1651 raw_input(m, &route_proto, &route_src, &route_dst);
1652 }
1653 }
1654
1655 /*
1656 * This is the analogue to the rt_newaddrmsg which performs the same
1657 * function but for multicast group memberhips. This is easier since
1658 * there is no route state to worry about.
1659 */
1660 void
rt_newmaddrmsg(u_char cmd,struct ifmultiaddr * ifma)1661 rt_newmaddrmsg(u_char cmd, struct ifmultiaddr *ifma)
1662 {
1663 struct rt_addrinfo info;
1664 struct mbuf *m = 0;
1665 struct ifnet *ifp = ifma->ifma_ifp;
1666 struct ifma_msghdr *ifmam;
1667 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1668
1669 if (route_cb.any_count == 0) {
1670 return;
1671 }
1672
1673 /* Lock ifp for if_lladdr */
1674 ifnet_lock_shared(ifp);
1675 bzero((caddr_t)&info, sizeof(info));
1676 IFMA_LOCK(ifma);
1677 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1678 /* lladdr doesn't need lock */
1679 info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1680
1681 /*
1682 * If a link-layer address is present, present it as a ``gateway''
1683 * (similarly to how ARP entries, e.g., are presented).
1684 */
1685 info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ?
1686 ifma->ifma_ll->ifma_addr : NULL;
1687 if ((m = rt_msg1(cmd, &info)) == NULL) {
1688 IFMA_UNLOCK(ifma);
1689 ifnet_lock_done(ifp);
1690 return;
1691 }
1692 ifmam = mtod(m, struct ifma_msghdr *);
1693 ifmam->ifmam_index = ifp->if_index;
1694 ifmam->ifmam_addrs = info.rti_addrs;
1695 route_proto.sp_protocol = ifma->ifma_addr->sa_family;
1696 IFMA_UNLOCK(ifma);
1697 ifnet_lock_done(ifp);
1698 raw_input(m, &route_proto, &route_src, &route_dst);
1699 }
1700
1701 const char *
rtm2str(int cmd)1702 rtm2str(int cmd)
1703 {
1704 const char *c __null_terminated = "RTM_?";
1705
1706 switch (cmd) {
1707 case RTM_ADD:
1708 c = "RTM_ADD";
1709 break;
1710 case RTM_DELETE:
1711 c = "RTM_DELETE";
1712 break;
1713 case RTM_CHANGE:
1714 c = "RTM_CHANGE";
1715 break;
1716 case RTM_GET:
1717 c = "RTM_GET";
1718 break;
1719 case RTM_LOSING:
1720 c = "RTM_LOSING";
1721 break;
1722 case RTM_REDIRECT:
1723 c = "RTM_REDIRECT";
1724 break;
1725 case RTM_MISS:
1726 c = "RTM_MISS";
1727 break;
1728 case RTM_LOCK:
1729 c = "RTM_LOCK";
1730 break;
1731 case RTM_OLDADD:
1732 c = "RTM_OLDADD";
1733 break;
1734 case RTM_OLDDEL:
1735 c = "RTM_OLDDEL";
1736 break;
1737 case RTM_RESOLVE:
1738 c = "RTM_RESOLVE";
1739 break;
1740 case RTM_NEWADDR:
1741 c = "RTM_NEWADDR";
1742 break;
1743 case RTM_DELADDR:
1744 c = "RTM_DELADDR";
1745 break;
1746 case RTM_IFINFO:
1747 c = "RTM_IFINFO";
1748 break;
1749 case RTM_NEWMADDR:
1750 c = "RTM_NEWMADDR";
1751 break;
1752 case RTM_DELMADDR:
1753 c = "RTM_DELMADDR";
1754 break;
1755 case RTM_GET_SILENT:
1756 c = "RTM_GET_SILENT";
1757 break;
1758 case RTM_IFINFO2:
1759 c = "RTM_IFINFO2";
1760 break;
1761 case RTM_NEWMADDR2:
1762 c = "RTM_NEWMADDR2";
1763 break;
1764 case RTM_GET2:
1765 c = "RTM_GET2";
1766 break;
1767 case RTM_GET_EXT:
1768 c = "RTM_GET_EXT";
1769 break;
1770 }
1771
1772 return c;
1773 }
1774
1775 /*
1776 * This is used in dumping the kernel table via sysctl().
1777 */
1778 static int
sysctl_dumpentry(struct radix_node * rn,void * vw)1779 sysctl_dumpentry(struct radix_node *rn, void *vw)
1780 {
1781 walkarg_ref_t w = vw;
1782 rtentry_ref_t rt = rn_rtentry(rn);
1783 int error = 0, size;
1784 struct rt_addrinfo info;
1785 kauth_cred_t cred __single;
1786 kauth_cred_t *credp;
1787
1788 cred = current_cached_proc_cred(PROC_NULL);
1789 credp = &cred;
1790
1791 RT_LOCK(rt);
1792 if ((w->w_op == NET_RT_FLAGS || w->w_op == NET_RT_FLAGS_PRIV) &&
1793 !(rt->rt_flags & w->w_arg)) {
1794 goto done;
1795 }
1796
1797 /*
1798 * If the matching route has RTF_LLINFO set, then we can skip scrubbing the MAC
1799 * only if the outgoing interface is not loopback and the process has entitlement
1800 * for neighbor cache read.
1801 */
1802 if (w->w_op == NET_RT_FLAGS_PRIV && (rt->rt_flags & RTF_LLINFO)) {
1803 if (rt->rt_ifp != lo_ifp &&
1804 (route_op_entitlement_check(NULL, cred, ROUTE_OP_READ, TRUE) == 0)) {
1805 credp = NULL;
1806 }
1807 }
1808
1809 bzero((caddr_t)&info, sizeof(info));
1810 info.rti_info[RTAX_DST] = rt_key(rt);
1811 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1812 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1813 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1814 if (RT_HAS_IFADDR(rt)) {
1815 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1816 }
1817
1818 if (w->w_op != NET_RT_DUMP2) {
1819 size = rt_msg2(RTM_GET, &info, NULL, w, credp);
1820 if (w->w_req != NULL && w->w_tmem != NULL) {
1821 struct rt_msghdr *rtm =
1822 (struct rt_msghdr *)(void *)w->w_tmem;
1823
1824 rtm->rtm_flags = rt->rt_flags;
1825 rtm->rtm_use = rt->rt_use;
1826 rt_getmetrics(rt, &rtm->rtm_rmx);
1827 rtm->rtm_index = rt->rt_ifp->if_index;
1828 rtm->rtm_pid = 0;
1829 rtm->rtm_seq = 0;
1830 rtm->rtm_errno = 0;
1831 rtm->rtm_addrs = info.rti_addrs;
1832 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1833 }
1834 } else {
1835 size = rt_msg2(RTM_GET2, &info, NULL, w, credp);
1836 if (w->w_req != NULL && w->w_tmem != NULL) {
1837 struct rt_msghdr2 *rtm =
1838 (struct rt_msghdr2 *)(void *)w->w_tmem;
1839
1840 rtm->rtm_flags = rt->rt_flags;
1841 rtm->rtm_use = rt->rt_use;
1842 rt_getmetrics(rt, &rtm->rtm_rmx);
1843 rtm->rtm_index = rt->rt_ifp->if_index;
1844 rtm->rtm_refcnt = rt->rt_refcnt;
1845 if (rt->rt_parent) {
1846 rtm->rtm_parentflags = rt->rt_parent->rt_flags;
1847 } else {
1848 rtm->rtm_parentflags = 0;
1849 }
1850 rtm->rtm_reserved = 0;
1851 rtm->rtm_addrs = info.rti_addrs;
1852 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1853 }
1854 }
1855
1856 done:
1857 RT_UNLOCK(rt);
1858 return error;
1859 }
1860
1861 /*
1862 * This is used for dumping extended information from route entries.
1863 */
1864 static int
sysctl_dumpentry_ext(struct radix_node * rn,void * vw)1865 sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
1866 {
1867 walkarg_ref_t w = vw;
1868 rtentry_ref_t rt = rn_rtentry(rn);
1869 int error = 0, size;
1870 struct rt_addrinfo info;
1871 kauth_cred_t cred __single;
1872
1873 cred = current_cached_proc_cred(PROC_NULL);
1874
1875 RT_LOCK(rt);
1876 if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) {
1877 goto done;
1878 }
1879 bzero(&info, sizeof(info));
1880 info.rti_info[RTAX_DST] = rt_key(rt);
1881 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1882 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1883 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1884
1885 size = rt_msg2(RTM_GET_EXT, &info, NULL, w, &cred);
1886 if (w->w_req != NULL && w->w_tmem != NULL) {
1887 struct rt_msghdr_ext *ertm =
1888 (struct rt_msghdr_ext *)(void *)w->w_tmem;
1889
1890 ertm->rtm_flags = rt->rt_flags;
1891 ertm->rtm_use = rt->rt_use;
1892 rt_getmetrics(rt, &ertm->rtm_rmx);
1893 ertm->rtm_index = rt->rt_ifp->if_index;
1894 ertm->rtm_pid = 0;
1895 ertm->rtm_seq = 0;
1896 ertm->rtm_errno = 0;
1897 ertm->rtm_addrs = info.rti_addrs;
1898 if (rt->rt_llinfo_get_ri == NULL) {
1899 bzero(&ertm->rtm_ri, sizeof(ertm->rtm_ri));
1900 ertm->rtm_ri.ri_rssi = IFNET_RSSI_UNKNOWN;
1901 ertm->rtm_ri.ri_lqm = IFNET_LQM_THRESH_OFF;
1902 ertm->rtm_ri.ri_npm = IFNET_NPM_THRESH_UNKNOWN;
1903 } else {
1904 rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri);
1905 }
1906 error = SYSCTL_OUT(w->w_req, (caddr_t)ertm, size);
1907 }
1908
1909 done:
1910 RT_UNLOCK(rt);
1911 return error;
1912 }
1913
1914 static boolean_t
should_include_clat46(void)1915 should_include_clat46(void)
1916 {
1917 #define CLAT46_ENTITLEMENT "com.apple.private.route.iflist.include-clat46"
1918 return IOCurrentTaskHasEntitlement(CLAT46_ENTITLEMENT);
1919 }
1920
1921 static boolean_t
is_clat46_address(struct ifaddr * ifa)1922 is_clat46_address(struct ifaddr *ifa)
1923 {
1924 boolean_t is_clat46 = FALSE;
1925
1926 if (ifa->ifa_addr->sa_family == AF_INET6) {
1927 struct in6_ifaddr *ifa6 = ifatoia6(ifa);
1928
1929 is_clat46 = (ifa6->ia6_flags & IN6_IFF_CLAT46) != 0;
1930 }
1931 return is_clat46;
1932 }
1933
1934 /*
1935 * rdar://9307819
1936 * To avoid to call copyout() while holding locks and to cause problems
1937 * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct
1938 * the list in two passes. In the first pass we compute the total
1939 * length of the data we are going to copyout, then we release
1940 * all locks to allocate a temporary buffer that gets filled
1941 * in the second pass.
1942 *
1943 * Note that we are verifying the assumption that kalloc() returns a buffer
1944 * that is at least 32 bits aligned and that the messages and addresses are
1945 * 32 bits aligned.
1946 */
1947 static int
sysctl_iflist(int af,struct walkarg * w)1948 sysctl_iflist(int af, struct walkarg *w)
1949 {
1950 struct ifnet *ifp;
1951 struct ifaddr *ifa;
1952 struct rt_addrinfo info;
1953 int error = 0;
1954 int pass = 0;
1955 size_t len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1956 char *total_buffer = NULL, *cp = NULL;
1957 kauth_cred_t cred __single;
1958 boolean_t include_clat46 = FALSE;
1959 boolean_t include_clat46_valid = FALSE;
1960
1961 cred = current_cached_proc_cred(PROC_NULL);
1962
1963 bzero((caddr_t)&info, sizeof(info));
1964
1965 for (pass = 0; pass < 2; pass++) {
1966 ifnet_head_lock_shared();
1967
1968 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1969 if (error) {
1970 break;
1971 }
1972 if (w->w_arg && w->w_arg != ifp->if_index) {
1973 continue;
1974 }
1975 ifnet_lock_shared(ifp);
1976 /*
1977 * Holding ifnet lock here prevents the link address
1978 * from changing contents, so no need to hold the ifa
1979 * lock. The link address is always present; it's
1980 * never freed.
1981 */
1982 ifa = ifp->if_lladdr;
1983 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1984 len = rt_msg2(RTM_IFINFO, &info, NULL, NULL, &cred);
1985 if (pass == 0) {
1986 if (os_add_overflow(total_len, len, &total_len)) {
1987 ifnet_lock_done(ifp);
1988 error = ENOBUFS;
1989 break;
1990 }
1991 } else {
1992 struct if_msghdr *ifm;
1993
1994 if (current_len + len > total_len) {
1995 ifnet_lock_done(ifp);
1996 error = ENOBUFS;
1997 break;
1998 }
1999 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2000 len = rt_msg2(RTM_IFINFO, &info,
2001 (caddr_t)cp, NULL, &cred);
2002 info.rti_info[RTAX_IFP] = NULL;
2003
2004 ifm = (struct if_msghdr *)(void *)cp;
2005 ifm->ifm_index = ifp->if_index;
2006 ifm->ifm_flags = (u_short)ifp->if_flags;
2007 if_data_internal_to_if_data(ifp, &ifp->if_data,
2008 &ifm->ifm_data);
2009 ifm->ifm_addrs = info.rti_addrs;
2010 /*
2011 * <rdar://problem/32940901>
2012 * Round bytes only for non-platform
2013 */
2014 if (!csproc_get_platform_binary(w->w_req->p)) {
2015 ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
2016 ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
2017 }
2018
2019 cp += len;
2020 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2021 current_len += len;
2022 VERIFY(current_len <= total_len);
2023 }
2024 while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
2025 boolean_t is_clat46;
2026
2027 IFA_LOCK(ifa);
2028 if (af && af != ifa->ifa_addr->sa_family) {
2029 IFA_UNLOCK(ifa);
2030 continue;
2031 }
2032 is_clat46 = is_clat46_address(ifa);
2033 if (is_clat46) {
2034 if (!include_clat46_valid) {
2035 include_clat46_valid = TRUE;
2036 include_clat46 =
2037 should_include_clat46();
2038 }
2039 if (!include_clat46) {
2040 IFA_UNLOCK(ifa);
2041 continue;
2042 }
2043 }
2044 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2045 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2046 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2047 len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2048 &cred);
2049 if (pass == 0) {
2050 if (os_add_overflow(total_len, len, &total_len)) {
2051 IFA_UNLOCK(ifa);
2052 error = ENOBUFS;
2053 break;
2054 }
2055 } else {
2056 struct ifa_msghdr *ifam;
2057
2058 if (current_len + len > total_len) {
2059 IFA_UNLOCK(ifa);
2060 error = ENOBUFS;
2061 break;
2062 }
2063 len = rt_msg2(RTM_NEWADDR, &info,
2064 (caddr_t)cp, NULL, &cred);
2065
2066 ifam = (struct ifa_msghdr *)(void *)cp;
2067 ifam->ifam_index =
2068 ifa->ifa_ifp->if_index;
2069 ifam->ifam_flags = ifa->ifa_flags;
2070 ifam->ifam_metric = ifa->ifa_metric;
2071 ifam->ifam_addrs = info.rti_addrs;
2072
2073 cp += len;
2074 VERIFY(IS_P2ALIGNED(cp,
2075 sizeof(u_int32_t)));
2076 current_len += len;
2077 VERIFY(current_len <= total_len);
2078 }
2079 IFA_UNLOCK(ifa);
2080 }
2081 ifnet_lock_done(ifp);
2082 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2083 info.rti_info[RTAX_BRD] = NULL;
2084 }
2085
2086 ifnet_head_done();
2087
2088 if (error != 0) {
2089 if (error == ENOBUFS) {
2090 printf("%s: current_len (%lu) + len (%lu) > "
2091 "total_len (%lu)\n", __func__, current_len,
2092 len, total_len);
2093 }
2094 break;
2095 }
2096
2097 if (pass == 0) {
2098 /* Better to return zero length buffer than ENOBUFS */
2099 if (total_len == 0) {
2100 total_len = 1;
2101 }
2102 total_len += total_len >> 3;
2103 total_buffer_len = total_len;
2104 total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2105 if (total_buffer == NULL) {
2106 printf("%s: kalloc_data(%lu) failed\n", __func__,
2107 total_len);
2108 error = ENOBUFS;
2109 break;
2110 }
2111 cp = total_buffer;
2112 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2113 } else {
2114 error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2115 if (error) {
2116 break;
2117 }
2118 }
2119 }
2120
2121 if (total_buffer != NULL) {
2122 kfree_data(total_buffer, total_buffer_len);
2123 }
2124
2125 return error;
2126 }
2127
2128 static int
sysctl_iflist2(int af,struct walkarg * w)2129 sysctl_iflist2(int af, struct walkarg *w)
2130 {
2131 struct ifnet *ifp;
2132 struct ifaddr *ifa;
2133 struct rt_addrinfo info;
2134 int error = 0;
2135 int pass = 0;
2136 size_t len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
2137 char *total_buffer = NULL, *cp = NULL;
2138 kauth_cred_t cred __single;
2139 boolean_t include_clat46 = FALSE;
2140 boolean_t include_clat46_valid = FALSE;
2141
2142 cred = current_cached_proc_cred(PROC_NULL);
2143
2144 bzero((caddr_t)&info, sizeof(info));
2145
2146 for (pass = 0; pass < 2; pass++) {
2147 struct ifmultiaddr *ifma;
2148
2149 ifnet_head_lock_shared();
2150
2151 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
2152 if (error) {
2153 break;
2154 }
2155 if (w->w_arg && w->w_arg != ifp->if_index) {
2156 continue;
2157 }
2158 ifnet_lock_shared(ifp);
2159 /*
2160 * Holding ifnet lock here prevents the link address
2161 * from changing contents, so no need to hold the ifa
2162 * lock. The link address is always present; it's
2163 * never freed.
2164 */
2165 ifa = ifp->if_lladdr;
2166 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2167 len = rt_msg2(RTM_IFINFO2, &info, NULL, NULL, &cred);
2168 if (pass == 0) {
2169 if (os_add_overflow(total_len, len, &total_len)) {
2170 ifnet_lock_done(ifp);
2171 error = ENOBUFS;
2172 break;
2173 }
2174 } else {
2175 struct if_msghdr2 *ifm;
2176
2177 if (current_len + len > total_len) {
2178 ifnet_lock_done(ifp);
2179 error = ENOBUFS;
2180 break;
2181 }
2182 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2183 len = rt_msg2(RTM_IFINFO2, &info,
2184 (caddr_t)cp, NULL, &cred);
2185 info.rti_info[RTAX_IFP] = NULL;
2186
2187 ifm = (struct if_msghdr2 *)(void *)cp;
2188 ifm->ifm_addrs = info.rti_addrs;
2189 ifm->ifm_flags = (u_short)ifp->if_flags;
2190 ifm->ifm_index = ifp->if_index;
2191 ifm->ifm_snd_len = IFCQ_LEN(ifp->if_snd);
2192 ifm->ifm_snd_maxlen = IFCQ_MAXLEN(ifp->if_snd);
2193 ifm->ifm_snd_drops =
2194 (int)ifp->if_snd->ifcq_dropcnt.packets;
2195 ifm->ifm_timer = ifp->if_timer;
2196 if_data_internal_to_if_data64(ifp,
2197 &ifp->if_data, &ifm->ifm_data);
2198 /*
2199 * <rdar://problem/32940901>
2200 * Round bytes only for non-platform
2201 */
2202 if (!csproc_get_platform_binary(w->w_req->p)) {
2203 ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
2204 ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
2205 }
2206
2207 cp += len;
2208 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2209 current_len += len;
2210 VERIFY(current_len <= total_len);
2211 }
2212 while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
2213 boolean_t is_clat46;
2214
2215 IFA_LOCK(ifa);
2216 if (af && af != ifa->ifa_addr->sa_family) {
2217 IFA_UNLOCK(ifa);
2218 continue;
2219 }
2220 is_clat46 = is_clat46_address(ifa);
2221 if (is_clat46) {
2222 if (!include_clat46_valid) {
2223 include_clat46_valid = TRUE;
2224 include_clat46 =
2225 should_include_clat46();
2226 }
2227 if (!include_clat46) {
2228 IFA_UNLOCK(ifa);
2229 continue;
2230 }
2231 }
2232 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2233 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2234 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2235 len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2236 &cred);
2237 if (pass == 0) {
2238 if (os_add_overflow(total_len, len, &total_len)) {
2239 IFA_UNLOCK(ifa);
2240 error = ENOBUFS;
2241 break;
2242 }
2243 } else {
2244 struct ifa_msghdr *ifam;
2245
2246 if (current_len + len > total_len) {
2247 IFA_UNLOCK(ifa);
2248 error = ENOBUFS;
2249 break;
2250 }
2251 len = rt_msg2(RTM_NEWADDR, &info,
2252 (caddr_t)cp, NULL, &cred);
2253
2254 ifam = (struct ifa_msghdr *)(void *)cp;
2255 ifam->ifam_index =
2256 ifa->ifa_ifp->if_index;
2257 ifam->ifam_flags = ifa->ifa_flags;
2258 ifam->ifam_metric = ifa->ifa_metric;
2259 ifam->ifam_addrs = info.rti_addrs;
2260
2261 cp += len;
2262 VERIFY(IS_P2ALIGNED(cp,
2263 sizeof(u_int32_t)));
2264 current_len += len;
2265 VERIFY(current_len <= total_len);
2266 }
2267 IFA_UNLOCK(ifa);
2268 }
2269 if (error) {
2270 ifnet_lock_done(ifp);
2271 break;
2272 }
2273
2274 for (ifma = LIST_FIRST(&ifp->if_multiaddrs);
2275 ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) {
2276 struct ifaddr *ifa0;
2277
2278 IFMA_LOCK(ifma);
2279 if (af && af != ifma->ifma_addr->sa_family) {
2280 IFMA_UNLOCK(ifma);
2281 continue;
2282 }
2283 bzero((caddr_t)&info, sizeof(info));
2284 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2285 /*
2286 * Holding ifnet lock here prevents the link
2287 * address from changing contents, so no need
2288 * to hold the ifa0 lock. The link address is
2289 * always present; it's never freed.
2290 */
2291 ifa0 = ifp->if_lladdr;
2292 info.rti_info[RTAX_IFP] = ifa0->ifa_addr;
2293 if (ifma->ifma_ll != NULL) {
2294 info.rti_info[RTAX_GATEWAY] =
2295 ifma->ifma_ll->ifma_addr;
2296 }
2297 len = rt_msg2(RTM_NEWMADDR2, &info, NULL, NULL,
2298 &cred);
2299 if (pass == 0) {
2300 total_len += len;
2301 } else {
2302 struct ifma_msghdr2 *ifmam;
2303
2304 if (current_len + len > total_len) {
2305 IFMA_UNLOCK(ifma);
2306 error = ENOBUFS;
2307 break;
2308 }
2309 len = rt_msg2(RTM_NEWMADDR2, &info,
2310 (caddr_t)cp, NULL, &cred);
2311
2312 ifmam =
2313 (struct ifma_msghdr2 *)(void *)cp;
2314 ifmam->ifmam_addrs = info.rti_addrs;
2315 ifmam->ifmam_flags = 0;
2316 ifmam->ifmam_index =
2317 ifma->ifma_ifp->if_index;
2318 ifmam->ifmam_refcount =
2319 ifma->ifma_reqcnt;
2320
2321 cp += len;
2322 VERIFY(IS_P2ALIGNED(cp,
2323 sizeof(u_int32_t)));
2324 current_len += len;
2325 }
2326 IFMA_UNLOCK(ifma);
2327 }
2328 ifnet_lock_done(ifp);
2329 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2330 info.rti_info[RTAX_BRD] = NULL;
2331 }
2332 ifnet_head_done();
2333
2334 if (error) {
2335 if (error == ENOBUFS) {
2336 printf("%s: current_len (%lu) + len (%lu) > "
2337 "total_len (%lu)\n", __func__, current_len,
2338 len, total_len);
2339 }
2340 break;
2341 }
2342
2343 if (pass == 0) {
2344 /* Better to return zero length buffer than ENOBUFS */
2345 if (total_len == 0) {
2346 total_len = 1;
2347 }
2348 total_len += total_len >> 3;
2349 total_buffer_len = total_len;
2350 total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2351 if (total_buffer == NULL) {
2352 printf("%s: kalloc_data(%lu) failed\n", __func__,
2353 total_len);
2354 error = ENOBUFS;
2355 break;
2356 }
2357 cp = total_buffer;
2358 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2359 } else {
2360 error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2361 if (error) {
2362 break;
2363 }
2364 }
2365 }
2366
2367 if (total_buffer != NULL) {
2368 kfree_data(total_buffer, total_buffer_len);
2369 }
2370
2371 return error;
2372 }
2373
2374
2375 static int
sysctl_rtstat(struct sysctl_req * req)2376 sysctl_rtstat(struct sysctl_req *req)
2377 {
2378 struct rtstat rtstat_compat = { 0 };
2379
2380 #define RTSTAT_COMPAT(_field) rtstat_compat._field = rtstat._field < SHRT_MAX ? (short)rtstat._field : SHRT_MAX
2381 RTSTAT_COMPAT(rts_badredirect);
2382 RTSTAT_COMPAT(rts_dynamic);
2383 RTSTAT_COMPAT(rts_newgateway);
2384 RTSTAT_COMPAT(rts_unreach);
2385 RTSTAT_COMPAT(rts_wildcard);
2386 RTSTAT_COMPAT(rts_badrtgwroute);
2387 #undef RTSTAT_TO_COMPAT
2388
2389 return SYSCTL_OUT(req, &rtstat_compat, sizeof(struct rtstat));
2390 }
2391
2392 static int
sysctl_rtstat_64(struct sysctl_req * req)2393 sysctl_rtstat_64(struct sysctl_req *req)
2394 {
2395 return SYSCTL_OUT(req, &rtstat, sizeof(struct rtstat_64));
2396 }
2397
2398 static int
sysctl_rttrash(struct sysctl_req * req)2399 sysctl_rttrash(struct sysctl_req *req)
2400 {
2401 return SYSCTL_OUT(req, &rttrash, sizeof(rttrash));
2402 }
2403
2404 static int
2405 sysctl_rtsock SYSCTL_HANDLER_ARGS
2406 {
2407 #pragma unused(oidp)
2408 DECLARE_SYSCTL_HANDLER_ARG_ARRAY(int, 4, name, namelen);
2409 struct radix_node_head *rnh;
2410 int i, error = EINVAL;
2411 u_char af;
2412 struct walkarg w;
2413
2414 name++;
2415 namelen--;
2416 if (req->newptr) {
2417 return EPERM;
2418 }
2419 af = (u_char)name[0];
2420 Bzero(&w, sizeof(w));
2421 w.w_op = name[1];
2422 w.w_arg = name[2];
2423 w.w_req = req;
2424
2425 switch (w.w_op) {
2426 case NET_RT_DUMP:
2427 case NET_RT_DUMP2:
2428 case NET_RT_FLAGS:
2429 case NET_RT_FLAGS_PRIV:
2430 lck_mtx_lock(rnh_lock);
2431 for (i = 1; i <= AF_MAX; i++) {
2432 if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2433 (error = rnh->rnh_walktree(rnh,
2434 sysctl_dumpentry, &w))) {
2435 break;
2436 }
2437 }
2438 lck_mtx_unlock(rnh_lock);
2439 break;
2440 case NET_RT_DUMPX:
2441 case NET_RT_DUMPX_FLAGS:
2442 lck_mtx_lock(rnh_lock);
2443 for (i = 1; i <= AF_MAX; i++) {
2444 if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2445 (error = rnh->rnh_walktree(rnh,
2446 sysctl_dumpentry_ext, &w))) {
2447 break;
2448 }
2449 }
2450 lck_mtx_unlock(rnh_lock);
2451 break;
2452 case NET_RT_IFLIST:
2453 error = sysctl_iflist(af, &w);
2454 break;
2455 case NET_RT_IFLIST2:
2456 error = sysctl_iflist2(af, &w);
2457 break;
2458 case NET_RT_STAT:
2459 error = sysctl_rtstat(req);
2460 break;
2461 case NET_RT_STAT_64:
2462 error = sysctl_rtstat_64(req);
2463 break;
2464 case NET_RT_TRASH:
2465 error = sysctl_rttrash(req);
2466 break;
2467 }
2468 if (w.w_tmem != NULL) {
2469 kfree_data_sized_by(w.w_tmem, w.w_tmemsize);
2470 }
2471 return error;
2472 }
2473
2474 /*
2475 * Definitions of protocols supported in the ROUTE domain.
2476 */
2477 static struct protosw routesw[] = {
2478 {
2479 .pr_type = SOCK_RAW,
2480 .pr_protocol = 0,
2481 .pr_flags = PR_ATOMIC | PR_ADDR,
2482 .pr_output = route_output,
2483 .pr_ctlinput = raw_ctlinput,
2484 .pr_usrreqs = &route_usrreqs,
2485 }
2486 };
2487
2488 static int route_proto_count = (sizeof(routesw) / sizeof(struct protosw));
2489
2490 struct domain routedomain_s = {
2491 .dom_family = PF_ROUTE,
2492 .dom_name = "route",
2493 .dom_init = route_dinit,
2494 };
2495
2496 static void
route_dinit(struct domain * dp)2497 route_dinit(struct domain *dp)
2498 {
2499 struct protosw *pr;
2500 int i;
2501
2502 VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
2503 VERIFY(routedomain == NULL);
2504
2505 routedomain = dp;
2506
2507 for (i = 0, pr = &routesw[0]; i < route_proto_count; i++, pr++) {
2508 net_add_proto(pr, dp, 1);
2509 }
2510
2511 route_init();
2512 }
2513