1 /*
2 * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)rtsock.c 8.5 (Berkeley) 11/2/94
61 */
62
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kauth.h>
66 #include <sys/kernel.h>
67 #include <sys/proc.h>
68 #include <sys/malloc.h>
69 #include <sys/mbuf.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/domain.h>
73 #include <sys/protosw.h>
74 #include <sys/syslog.h>
75 #include <sys/mcache.h>
76 #include <kern/locks.h>
77 #include <sys/codesign.h>
78
79 #include <net/if.h>
80 #include <net/route.h>
81 #include <net/dlil.h>
82 #include <net/raw_cb.h>
83 #include <net/net_sysctl.h>
84
85 #include <netinet/in.h>
86 #include <netinet/in_var.h>
87 #include <netinet/in_arp.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip6.h>
90 #include <netinet6/nd6.h>
91
92 #include <net/sockaddr_utils.h>
93
94 #include <IOKit/IOBSD.h>
95
96 extern struct rtstat_64 rtstat;
97 extern struct domain routedomain_s;
98 static struct domain *routedomain = NULL;
99
100 static struct sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
101 static struct sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
102 static struct sockaddr sa_zero = { .sa_len = sizeof(sa_zero), .sa_family = AF_INET, .sa_data = { 0, } };
103
104 struct route_cb {
105 u_int32_t ip_count; /* attached w/ AF_INET */
106 u_int32_t ip6_count; /* attached w/ AF_INET6 */
107 u_int32_t any_count; /* total attached */
108 };
109
110 static struct route_cb route_cb;
111
112 struct walkarg {
113 int w_tmemsize;
114 int w_op, w_arg;
115 caddr_t w_tmem __sized_by(w_tmemsize);
116 struct sysctl_req *w_req;
117 };
118
119 typedef struct walkarg * __single walkarg_ref_t;
120
121 static void route_dinit(struct domain *);
122 static int rts_abort(struct socket *);
123 static int rts_attach(struct socket *, int, struct proc *);
124 static int rts_bind(struct socket *, struct sockaddr *, struct proc *);
125 static int rts_connect(struct socket *, struct sockaddr *, struct proc *);
126 static int rts_detach(struct socket *);
127 static int rts_disconnect(struct socket *);
128 static int rts_peeraddr(struct socket *, struct sockaddr **);
129 static int rts_send(struct socket *, int, struct mbuf *, struct sockaddr *,
130 struct mbuf *, struct proc *);
131 static int rts_shutdown(struct socket *);
132 static int rts_sockaddr(struct socket *, struct sockaddr **);
133
134 static int route_output(struct mbuf *, struct socket *);
135 static int rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
136 static void rt_getmetrics(struct rtentry *, struct rt_metrics *);
137 static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
138 struct sockaddr *, unsigned int);
139 static int rt_xaddrs(caddr_t cp __ended_by(cplim), caddr_t cplim, struct rt_addrinfo *rtinfo, struct sockaddr xtra_storage[RTAX_MAX]);
140 static struct mbuf *rt_msg1(u_char, struct rt_addrinfo *);
141 static int rt_msg2(u_char, struct rt_addrinfo *, caddr_t __indexable, struct walkarg *,
142 kauth_cred_t *);
143 static int sysctl_dumpentry(struct radix_node *rn, void *vw);
144 static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw);
145 static int sysctl_iflist(int af, struct walkarg *w);
146 static int sysctl_iflist2(int af, struct walkarg *w);
147 static int sysctl_rtstat(struct sysctl_req *);
148 static int sysctl_rtstat_64(struct sysctl_req *);
149 static int sysctl_rttrash(struct sysctl_req *);
150 static int sysctl_rtsock SYSCTL_HANDLER_ARGS;
151
152 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED,
153 sysctl_rtsock, "");
154
155 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "routing");
156
157 /* Align x to 1024 (only power of 2) assuming x is positive */
158 #define ALIGN_BYTES(x) do { \
159 x = (uint32_t)P2ALIGN(x, 1024); \
160 } while(0)
161
162 #define ROUNDUP32(a) \
163 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (uint32_t) - 1))) : \
164 sizeof (uint32_t))
165
166
167 #define RT_HAS_IFADDR(rt) \
168 ((rt)->rt_ifa != NULL && (rt)->rt_ifa->ifa_addr != NULL)
169
170 /*
171 * It really doesn't make any sense at all for this code to share much
172 * with raw_usrreq.c, since its functionality is so restricted. XXX
173 */
174 static int
rts_abort(struct socket * so)175 rts_abort(struct socket *so)
176 {
177 return raw_usrreqs.pru_abort(so);
178 }
179
180 /* pru_accept is EOPNOTSUPP */
181
182 static int
rts_attach(struct socket * so,int proto,struct proc * p)183 rts_attach(struct socket *so, int proto, struct proc *p)
184 {
185 #pragma unused(p)
186 struct rawcb *rp;
187 int error;
188
189 VERIFY(so->so_pcb == NULL);
190
191 rp = kalloc_type(struct rawcb, Z_WAITOK_ZERO_NOFAIL);
192 so->so_pcb = (caddr_t)rp;
193 /* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */
194 error = raw_attach(so, proto);
195 rp = sotorawcb(so);
196 if (error) {
197 kfree_type(struct rawcb, rp);
198 so->so_pcb = NULL;
199 so->so_flags |= SOF_PCBCLEARING;
200 return error;
201 }
202
203 switch (rp->rcb_proto.sp_protocol) {
204 case AF_INET:
205 os_atomic_inc(&route_cb.ip_count, relaxed);
206 break;
207 case AF_INET6:
208 os_atomic_inc(&route_cb.ip6_count, relaxed);
209 break;
210 }
211 rp->rcb_faddr = &route_src;
212 os_atomic_inc(&route_cb.any_count, relaxed);
213 /* the socket is already locked when we enter rts_attach */
214 soisconnected(so);
215 so->so_options |= SO_USELOOPBACK;
216 return 0;
217 }
218
219 static int
rts_bind(struct socket * so,struct sockaddr * nam,struct proc * p)220 rts_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
221 {
222 return raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */
223 }
224
225 static int
rts_connect(struct socket * so,struct sockaddr * nam,struct proc * p)226 rts_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
227 {
228 return raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */
229 }
230
231 /* pru_connect2 is EOPNOTSUPP */
232 /* pru_control is EOPNOTSUPP */
233
234 static int
rts_detach(struct socket * so)235 rts_detach(struct socket *so)
236 {
237 struct rawcb *rp = sotorawcb(so);
238
239 VERIFY(rp != NULL);
240
241 switch (rp->rcb_proto.sp_protocol) {
242 case AF_INET:
243 os_atomic_dec(&route_cb.ip_count, relaxed);
244 break;
245 case AF_INET6:
246 os_atomic_dec(&route_cb.ip6_count, relaxed);
247 break;
248 }
249 os_atomic_dec(&route_cb.any_count, relaxed);
250 return raw_usrreqs.pru_detach(so);
251 }
252
253 static int
rts_disconnect(struct socket * so)254 rts_disconnect(struct socket *so)
255 {
256 return raw_usrreqs.pru_disconnect(so);
257 }
258
259 /* pru_listen is EOPNOTSUPP */
260
261 static int
rts_peeraddr(struct socket * so,struct sockaddr ** nam)262 rts_peeraddr(struct socket *so, struct sockaddr **nam)
263 {
264 return raw_usrreqs.pru_peeraddr(so, nam);
265 }
266
267 /* pru_rcvd is EOPNOTSUPP */
268 /* pru_rcvoob is EOPNOTSUPP */
269
270 static int
rts_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)271 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
272 struct mbuf *control, struct proc *p)
273 {
274 return raw_usrreqs.pru_send(so, flags, m, nam, control, p);
275 }
276
277 /* pru_sense is null */
278
279 static int
rts_shutdown(struct socket * so)280 rts_shutdown(struct socket *so)
281 {
282 return raw_usrreqs.pru_shutdown(so);
283 }
284
285 static int
rts_sockaddr(struct socket * so,struct sockaddr ** nam)286 rts_sockaddr(struct socket *so, struct sockaddr **nam)
287 {
288 return raw_usrreqs.pru_sockaddr(so, nam);
289 }
290
291 static struct pr_usrreqs route_usrreqs = {
292 .pru_abort = rts_abort,
293 .pru_attach = rts_attach,
294 .pru_bind = rts_bind,
295 .pru_connect = rts_connect,
296 .pru_detach = rts_detach,
297 .pru_disconnect = rts_disconnect,
298 .pru_peeraddr = rts_peeraddr,
299 .pru_send = rts_send,
300 .pru_shutdown = rts_shutdown,
301 .pru_sockaddr = rts_sockaddr,
302 .pru_sosend = sosend,
303 .pru_soreceive = soreceive,
304 };
305
306 static struct rt_msghdr *
307 __attribute__((always_inline))
308 __stateful_pure
_rtm_hdr(caddr_t rtm_data __header_indexable)309 _rtm_hdr(caddr_t rtm_data __header_indexable)
310 {
311 #pragma clang diagnostic push
312 #pragma clang diagnostic ignored "-Wcast-align"
313 return (struct rt_msghdr*)rtm_data;
314 #pragma clang diagnostic pop
315 }
316
317 /*ARGSUSED*/
318 static int
route_output(struct mbuf * m,struct socket * so)319 route_output(struct mbuf *m, struct socket *so)
320 {
321 size_t rtm_len = 0;
322 caddr_t rtm_buf __counted_by(rtm_len) = NULL;
323 caddr_t rtm_tmpbuf;
324 #define RTM _rtm_hdr(rtm_buf)
325 rtentry_ref_t rt = NULL;
326 rtentry_ref_t saved_nrt = NULL;
327 struct radix_node_head *rnh;
328 struct rt_addrinfo info;
329 struct sockaddr tiny_sa_storage[RTAX_MAX];
330 int len, error = 0;
331 sa_family_t dst_sa_family = 0;
332 struct ifnet *ifp = NULL;
333 struct sockaddr_in dst_in, gate_in;
334 int sendonlytoself = 0;
335 unsigned int ifscope = IFSCOPE_NONE;
336 struct rawcb *rp = NULL;
337 boolean_t is_router = FALSE;
338 #define senderr(e) { error = (e); goto flush; }
339 if (m == NULL || ((m->m_len < sizeof(intptr_t)) &&
340 (m = m_pullup(m, sizeof(intptr_t))) == NULL)) {
341 return ENOBUFS;
342 }
343 VERIFY(m->m_flags & M_PKTHDR);
344
345 /*
346 * Unlock the socket (but keep a reference) it won't be
347 * accessed until raw_input appends to it.
348 */
349 socket_unlock(so, 0);
350 lck_mtx_lock(rnh_lock);
351
352 len = m->m_pkthdr.len;
353 if (len < sizeof(*RTM) ||
354 len != mtod(m, struct rt_msghdr_prelude *)->rtm_msglen) {
355 info.rti_info[RTAX_DST] = NULL;
356 senderr(EINVAL);
357 }
358
359 /*
360 * Allocate the buffer for the message. First we allocate
361 * a temporary buffer, and if successful, set the pointers.
362 */
363 rtm_tmpbuf = kalloc_data(len, Z_WAITOK);
364 if (rtm_tmpbuf == NULL) {
365 info.rti_info[RTAX_DST] = NULL;
366 senderr(ENOBUFS);
367 }
368 rtm_len = (size_t)len;
369 rtm_buf = rtm_tmpbuf;
370 rtm_tmpbuf = NULL;
371
372
373 m_copydata(m, 0, len, rtm_buf);
374
375 if (RTM->rtm_version != RTM_VERSION) {
376 info.rti_info[RTAX_DST] = NULL;
377 senderr(EPROTONOSUPPORT);
378 }
379
380 /*
381 * Silent version of RTM_GET for Reachabiltiy APIs. We may change
382 * all RTM_GETs to be silent in the future, so this is private for now.
383 */
384 if (RTM->rtm_type == RTM_GET_SILENT) {
385 if (!(so->so_options & SO_USELOOPBACK)) {
386 senderr(EINVAL);
387 }
388 sendonlytoself = 1;
389 RTM->rtm_type = RTM_GET;
390 }
391
392 /*
393 * Perform permission checking, only privileged sockets
394 * may perform operations other than RTM_GET
395 */
396 if (RTM->rtm_type != RTM_GET && !(so->so_state & SS_PRIV)) {
397 info.rti_info[RTAX_DST] = NULL;
398 senderr(EPERM);
399 }
400
401 RTM->rtm_pid = proc_selfpid();
402 info.rti_addrs = RTM->rtm_addrs;
403
404 if (rt_xaddrs(rtm_buf + sizeof(struct rt_msghdr), rtm_buf + rtm_len, &info, tiny_sa_storage)) {
405 info.rti_info[RTAX_DST] = NULL;
406 senderr(EINVAL);
407 }
408
409 if (info.rti_info[RTAX_DST] == NULL ||
410 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
411 (info.rti_info[RTAX_GATEWAY] != NULL &&
412 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) {
413 senderr(EINVAL);
414 }
415
416 if (info.rti_info[RTAX_DST]->sa_family == AF_INET &&
417 info.rti_info[RTAX_DST]->sa_len != sizeof(struct sockaddr_in)) {
418 /* At minimum, we need up to sin_addr */
419 if (info.rti_info[RTAX_DST]->sa_len <
420 offsetof(struct sockaddr_in, sin_zero)) {
421 senderr(EINVAL);
422 }
423
424 SOCKADDR_ZERO(&dst_in, sizeof(dst_in));
425 dst_in.sin_len = sizeof(dst_in);
426 dst_in.sin_family = AF_INET;
427 dst_in.sin_port = SIN(info.rti_info[RTAX_DST])->sin_port;
428 dst_in.sin_addr = SIN(info.rti_info[RTAX_DST])->sin_addr;
429 info.rti_info[RTAX_DST] = SA(&dst_in);
430 dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
431 } else if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
432 info.rti_info[RTAX_DST]->sa_len < sizeof(struct sockaddr_in6)) {
433 senderr(EINVAL);
434 }
435
436 if (info.rti_info[RTAX_GATEWAY] != NULL) {
437 if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET &&
438 info.rti_info[RTAX_GATEWAY]->sa_len != sizeof(struct sockaddr_in)) {
439 /* At minimum, we need up to sin_addr */
440 if (info.rti_info[RTAX_GATEWAY]->sa_len <
441 offsetof(struct sockaddr_in, sin_zero)) {
442 senderr(EINVAL);
443 }
444
445 SOCKADDR_ZERO(&gate_in, sizeof(gate_in));
446 gate_in.sin_len = sizeof(gate_in);
447 gate_in.sin_family = AF_INET;
448 gate_in.sin_port = SIN(info.rti_info[RTAX_GATEWAY])->sin_port;
449 gate_in.sin_addr = SIN(info.rti_info[RTAX_GATEWAY])->sin_addr;
450 info.rti_info[RTAX_GATEWAY] = SA(&gate_in);
451 } else if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET6 &&
452 info.rti_info[RTAX_GATEWAY]->sa_len < sizeof(struct sockaddr_in6)) {
453 senderr(EINVAL);
454 }
455 }
456
457 if (info.rti_info[RTAX_GENMASK]) {
458 struct radix_node *t;
459 struct sockaddr *genmask = SA(info.rti_info[RTAX_GENMASK]);
460 void *genmask_bytes = __SA_UTILS_CONV_TO_BYTES(genmask);
461 t = rn_addmask(genmask_bytes, 0, 1);
462 if (t != NULL && SOCKADDR_CMP(genmask, rn_get_key(t), genmask->sa_len) == 0) {
463 info.rti_info[RTAX_GENMASK] = SA(rn_get_key(t));
464 } else {
465 senderr(ENOBUFS);
466 }
467 }
468
469 /*
470 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
471 */
472 if (RTM->rtm_flags & RTF_IFSCOPE) {
473 if (info.rti_info[RTAX_DST]->sa_family != AF_INET &&
474 info.rti_info[RTAX_DST]->sa_family != AF_INET6) {
475 senderr(EINVAL);
476 }
477 ifscope = RTM->rtm_index;
478 }
479 /*
480 * Block changes on INTCOPROC interfaces.
481 */
482 if (ifscope != IFSCOPE_NONE) {
483 unsigned int intcoproc_scope = 0;
484 ifnet_head_lock_shared();
485 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
486 if (IFNET_IS_INTCOPROC(ifp)) {
487 intcoproc_scope = ifp->if_index;
488 break;
489 }
490 }
491 ifnet_head_done();
492 if (intcoproc_scope == ifscope && proc_getpid(current_proc()) != 0) {
493 senderr(EINVAL);
494 }
495 }
496 /*
497 * Require entitlement to change management interfaces
498 */
499 if (management_control_unrestricted == false && if_management_interface_check_needed == true &&
500 ifscope != IFSCOPE_NONE && proc_getpid(current_proc()) != 0) {
501 bool is_management = false;
502
503 ifnet_head_lock_shared();
504 if (IF_INDEX_IN_RANGE(ifscope)) {
505 ifp = ifindex2ifnet[ifscope];
506 if (ifp != NULL && IFNET_IS_MANAGEMENT(ifp)) {
507 is_management = true;
508 }
509 }
510 ifnet_head_done();
511
512 if (is_management && !IOCurrentTaskHasEntitlement(MANAGEMENT_CONTROL_ENTITLEMENT)) {
513 senderr(EINVAL);
514 }
515 }
516
517 /*
518 * RTF_PROXY can only be set internally from within the kernel.
519 */
520 if (RTM->rtm_flags & RTF_PROXY) {
521 senderr(EINVAL);
522 }
523
524 /*
525 * For AF_INET, always zero out the embedded scope ID. If this is
526 * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
527 * flag and the corresponding rtm_index value. This is to prevent
528 * false interpretation of the scope ID because it's using the sin_zero
529 * field, which might not be properly cleared by the requestor.
530 */
531 if (info.rti_info[RTAX_DST]->sa_family == AF_INET) {
532 sin_set_ifscope(info.rti_info[RTAX_DST], IFSCOPE_NONE);
533 }
534 if (info.rti_info[RTAX_GATEWAY] != NULL &&
535 info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET) {
536 sin_set_ifscope(info.rti_info[RTAX_GATEWAY], IFSCOPE_NONE);
537 }
538 if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
539 IN6_IS_SCOPE_EMBED(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
540 !IN6_IS_ADDR_UNICAST_BASED_MULTICAST(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
541 SIN6(info.rti_info[RTAX_DST])->sin6_scope_id == 0) {
542 SIN6(info.rti_info[RTAX_DST])->sin6_scope_id = ntohs(SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1]);
543 SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1] = 0;
544 }
545
546 switch (RTM->rtm_type) {
547 case RTM_ADD:
548 if (info.rti_info[RTAX_GATEWAY] == NULL) {
549 senderr(EINVAL);
550 }
551
552 error = rtrequest_scoped_locked(RTM_ADD,
553 info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
554 info.rti_info[RTAX_NETMASK], RTM->rtm_flags, &saved_nrt,
555 ifscope);
556 if (error == 0 && saved_nrt != NULL) {
557 RT_LOCK(saved_nrt);
558 /*
559 * If the route request specified an interface with
560 * IFA and/or IFP, we set the requested interface on
561 * the route with rt_setif. It would be much better
562 * to do this inside rtrequest, but that would
563 * require passing the desired interface, in some
564 * form, to rtrequest. Since rtrequest is called in
565 * so many places (roughly 40 in our source), adding
566 * a parameter is to much for us to swallow; this is
567 * something for the FreeBSD developers to tackle.
568 * Instead, we let rtrequest compute whatever
569 * interface it wants, then come in behind it and
570 * stick in the interface that we really want. This
571 * works reasonably well except when rtrequest can't
572 * figure out what interface to use (with
573 * ifa_withroute) and returns ENETUNREACH. Ideally
574 * it shouldn't matter if rtrequest can't figure out
575 * the interface if we're going to explicitly set it
576 * ourselves anyway. But practically we can't
577 * recover here because rtrequest will not do any of
578 * the work necessary to add the route if it can't
579 * find an interface. As long as there is a default
580 * route that leads to some interface, rtrequest will
581 * find an interface, so this problem should be
582 * rarely encountered.
583 * [email protected]
584 */
585 rt_setif(saved_nrt,
586 info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
587 info.rti_info[RTAX_GATEWAY], ifscope);
588 (void)rt_setmetrics(RTM->rtm_inits, &RTM->rtm_rmx, saved_nrt);
589 saved_nrt->rt_rmx.rmx_locks &= ~(RTM->rtm_inits);
590 saved_nrt->rt_rmx.rmx_locks |=
591 (RTM->rtm_inits & RTM->rtm_rmx.rmx_locks);
592 saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
593 RT_REMREF_LOCKED(saved_nrt);
594 RT_UNLOCK(saved_nrt);
595 }
596 break;
597
598 case RTM_DELETE:
599 error = rtrequest_scoped_locked(RTM_DELETE,
600 info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
601 info.rti_info[RTAX_NETMASK], RTM->rtm_flags, &saved_nrt,
602 ifscope);
603 if (error == 0) {
604 rt = saved_nrt;
605 RT_LOCK(rt);
606 goto report;
607 }
608 break;
609
610 case RTM_GET:
611 case RTM_CHANGE:
612 case RTM_LOCK:
613 rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
614 if (rnh == NULL) {
615 senderr(EAFNOSUPPORT);
616 }
617 /*
618 * Lookup the best match based on the key-mask pair;
619 * callee adds a reference and checks for root node.
620 */
621 rt = rt_lookup(TRUE, info.rti_info[RTAX_DST],
622 info.rti_info[RTAX_NETMASK], rnh, ifscope);
623 if (rt == NULL) {
624 senderr(ESRCH);
625 }
626 RT_LOCK(rt);
627
628 /*
629 * Holding rnh_lock here prevents the possibility of
630 * ifa from changing (e.g. in_ifinit), so it is safe
631 * to access its ifa_addr (down below) without locking.
632 */
633 switch (RTM->rtm_type) {
634 case RTM_GET: {
635 kauth_cred_t cred __single;
636 kauth_cred_t* credp;
637 struct ifaddr *ifa2;
638 /*
639 * The code below serves both the `RTM_GET'
640 * and the `RTM_DELETE' requests.
641 */
642 report:
643 cred = current_cached_proc_cred(PROC_NULL);
644 credp = &cred;
645
646 ifa2 = NULL;
647 RT_LOCK_ASSERT_HELD(rt);
648 info.rti_info[RTAX_DST] = rt_key(rt);
649 dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
650 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
651 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
652 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
653 if (RTM->rtm_addrs & (RTA_IFP | RTA_IFA)) {
654 ifp = rt->rt_ifp;
655 if (ifp != NULL) {
656 ifnet_lock_shared(ifp);
657 ifa2 = ifp->if_lladdr;
658 info.rti_info[RTAX_IFP] = ifa2->ifa_addr;
659 ifa_addref(ifa2);
660 ifnet_lock_done(ifp);
661 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
662 RTM->rtm_index = ifp->if_index;
663 } else {
664 info.rti_info[RTAX_IFP] = NULL;
665 info.rti_info[RTAX_IFA] = NULL;
666 }
667 } else if ((ifp = rt->rt_ifp) != NULL) {
668 RTM->rtm_index = ifp->if_index;
669 }
670
671 /*
672 * Determine the length required for the routing information
673 * report.
674 */
675 if (ifa2 != NULL) {
676 IFA_LOCK(ifa2);
677 }
678 len = rt_msg2(RTM->rtm_type, &info, NULL, NULL, credp);
679 if (ifa2 != NULL) {
680 IFA_UNLOCK(ifa2);
681 }
682
683 /*
684 * Allocate output message for the routing information report.
685 */
686 VERIFY(rtm_tmpbuf == NULL);
687 rtm_tmpbuf = kalloc_data(len, Z_WAITOK);
688 if (rtm_tmpbuf == NULL) {
689 RT_UNLOCK(rt);
690 if (ifa2 != NULL) {
691 ifa_remref(ifa2);
692 }
693 senderr(ENOBUFS);
694 }
695
696 /*
697 * Create the header for the output message, based
698 * on the request message header and the current routing information.
699 */
700 struct rt_msghdr *out_rtm = _rtm_hdr(rtm_tmpbuf);
701 bcopy(RTM, out_rtm, sizeof(struct rt_msghdr));
702 out_rtm->rtm_flags = rt->rt_flags;
703 rt_getmetrics(rt, &out_rtm->rtm_rmx);
704 out_rtm->rtm_addrs = info.rti_addrs;
705
706 /*
707 * Populate the body of the output message.
708 */
709 if (ifa2 != NULL) {
710 IFA_LOCK(ifa2);
711 }
712 (void) rt_msg2(out_rtm->rtm_type, &info, rtm_tmpbuf,
713 NULL, &cred);
714 if (ifa2 != NULL) {
715 IFA_UNLOCK(ifa2);
716 }
717
718 /*
719 * Replace the "main" routing message with the output message
720 * we have constructed.
721 */
722 kfree_data_counted_by(rtm_buf, rtm_len);
723 rtm_len = len;
724 rtm_buf = rtm_tmpbuf;
725 rtm_tmpbuf = NULL;
726
727 if (ifa2 != NULL) {
728 ifa_remref(ifa2);
729 }
730
731 break;
732 }
733
734 case RTM_CHANGE:
735 is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
736
737 if (info.rti_info[RTAX_GATEWAY] != NULL &&
738 (error = rt_setgate(rt, rt_key(rt),
739 info.rti_info[RTAX_GATEWAY]))) {
740 int tmp = error;
741 RT_UNLOCK(rt);
742 senderr(tmp);
743 }
744 /*
745 * If they tried to change things but didn't specify
746 * the required gateway, then just use the old one.
747 * This can happen if the user tries to change the
748 * flags on the default route without changing the
749 * default gateway. Changing flags still doesn't work.
750 */
751 if ((rt->rt_flags & RTF_GATEWAY) &&
752 info.rti_info[RTAX_GATEWAY] == NULL) {
753 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
754 }
755
756 /*
757 * On Darwin, we call rt_setif which contains the
758 * equivalent to the code found at this very spot
759 * in BSD.
760 */
761 rt_setif(rt,
762 info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
763 info.rti_info[RTAX_GATEWAY], ifscope);
764
765 if ((error = rt_setmetrics(RTM->rtm_inits,
766 &RTM->rtm_rmx, rt))) {
767 int tmp = error;
768 RT_UNLOCK(rt);
769 senderr(tmp);
770 }
771 if (info.rti_info[RTAX_GENMASK]) {
772 rt->rt_genmask = info.rti_info[RTAX_GENMASK];
773 }
774
775 /*
776 * Enqueue work item to invoke callback for this route entry
777 * This may not be needed always, but for now issue it anytime
778 * RTM_CHANGE gets called.
779 */
780 route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, TRUE);
781 /*
782 * If the route is for a router, walk the tree to send refresh
783 * event to protocol cloned entries
784 */
785 if (is_router) {
786 struct route_event rt_ev;
787 route_event_init(&rt_ev, rt, NULL, ROUTE_ENTRY_REFRESH);
788 RT_UNLOCK(rt);
789 (void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
790 RT_LOCK(rt);
791 }
792 OS_FALLTHROUGH;
793 case RTM_LOCK:
794 rt->rt_rmx.rmx_locks &= ~(RTM->rtm_inits);
795 rt->rt_rmx.rmx_locks |=
796 (RTM->rtm_inits & RTM->rtm_rmx.rmx_locks);
797 break;
798 }
799 RT_UNLOCK(rt);
800 break;
801 default:
802 senderr(EOPNOTSUPP);
803 }
804 flush:
805 if (RTM != NULL) {
806 if (error) {
807 RTM->rtm_errno = error;
808 } else {
809 RTM->rtm_flags |= RTF_DONE;
810 }
811 }
812 if (rt != NULL) {
813 RT_LOCK_ASSERT_NOTHELD(rt);
814 rtfree_locked(rt);
815 }
816 lck_mtx_unlock(rnh_lock);
817
818 /* relock the socket now */
819 socket_lock(so, 0);
820 /*
821 * Check to see if we don't want our own messages.
822 */
823 if (!(so->so_options & SO_USELOOPBACK)) {
824 if (route_cb.any_count <= 1) {
825 kfree_data_counted_by(rtm_buf, rtm_len);
826 m_freem(m);
827 return error;
828 }
829 /* There is another listener, so construct message */
830 rp = sotorawcb(so);
831 }
832 if (rtm_buf != NULL) {
833 m_copyback(m, 0, RTM->rtm_msglen, rtm_buf);
834 if (m->m_pkthdr.len < RTM->rtm_msglen) {
835 m_freem(m);
836 m = NULL;
837 } else if (m->m_pkthdr.len > RTM->rtm_msglen) {
838 m_adj(m, RTM->rtm_msglen - m->m_pkthdr.len);
839 }
840 kfree_data_counted_by(rtm_buf, rtm_len);
841 }
842 if (sendonlytoself && m != NULL) {
843 error = 0;
844 if (sbappendaddr(&so->so_rcv, &route_src, m,
845 NULL, &error) != 0) {
846 sorwakeup(so);
847 }
848 if (error) {
849 return error;
850 }
851 } else {
852 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
853 if (rp != NULL) {
854 rp->rcb_proto.sp_family = 0; /* Avoid us */
855 }
856 if (dst_sa_family != 0) {
857 route_proto.sp_protocol = dst_sa_family;
858 }
859 if (m != NULL) {
860 socket_unlock(so, 0);
861 raw_input(m, &route_proto, &route_src, &route_dst);
862 socket_lock(so, 0);
863 }
864 if (rp != NULL) {
865 rp->rcb_proto.sp_family = PF_ROUTE;
866 }
867 }
868 return error;
869 #undef RTM /* was defined to __rtm_hdr(rtm_buf) */
870 }
871
872 void
rt_setexpire(struct rtentry * rt,uint64_t expiry)873 rt_setexpire(struct rtentry *rt, uint64_t expiry)
874 {
875 /* set both rt_expire and rmx_expire */
876 rt->rt_expire = expiry;
877 if (expiry) {
878 rt->rt_rmx.rmx_expire =
879 (int32_t)(expiry + rt->base_calendartime -
880 rt->base_uptime);
881 } else {
882 rt->rt_rmx.rmx_expire = 0;
883 }
884 }
885
886 static int
rt_setmetrics(u_int32_t which,struct rt_metrics * in,struct rtentry * out)887 rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out)
888 {
889 if (!(which & RTV_REFRESH_HOST)) {
890 struct timeval caltime;
891 getmicrotime(&caltime);
892 #define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
893 metric(RTV_RPIPE, rmx_recvpipe);
894 metric(RTV_SPIPE, rmx_sendpipe);
895 metric(RTV_SSTHRESH, rmx_ssthresh);
896 metric(RTV_RTT, rmx_rtt);
897 metric(RTV_RTTVAR, rmx_rttvar);
898 metric(RTV_HOPCOUNT, rmx_hopcount);
899 metric(RTV_MTU, rmx_mtu);
900 metric(RTV_EXPIRE, rmx_expire);
901 #undef metric
902 if (out->rt_rmx.rmx_expire > 0) {
903 /* account for system time change */
904 getmicrotime(&caltime);
905 out->base_calendartime +=
906 NET_CALCULATE_CLOCKSKEW(caltime,
907 out->base_calendartime,
908 net_uptime(), out->base_uptime);
909 rt_setexpire(out,
910 out->rt_rmx.rmx_expire -
911 out->base_calendartime +
912 out->base_uptime);
913 } else {
914 rt_setexpire(out, 0);
915 }
916
917 VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
918 VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
919 } else {
920 /* Only RTV_REFRESH_HOST must be set */
921 if ((which & ~RTV_REFRESH_HOST) ||
922 (out->rt_flags & RTF_STATIC) ||
923 !(out->rt_flags & RTF_LLINFO)) {
924 return EINVAL;
925 }
926
927 if (out->rt_llinfo_refresh == NULL) {
928 return ENOTSUP;
929 }
930
931 out->rt_llinfo_refresh(out);
932 }
933 return 0;
934 }
935
936 static void
rt_getmetrics(struct rtentry * in,struct rt_metrics * out)937 rt_getmetrics(struct rtentry *in, struct rt_metrics *out)
938 {
939 struct timeval caltime;
940
941 VERIFY(in->rt_expire == 0 || in->rt_rmx.rmx_expire != 0);
942 VERIFY(in->rt_expire != 0 || in->rt_rmx.rmx_expire == 0);
943
944 *out = in->rt_rmx;
945
946 if (in->rt_expire != 0) {
947 /* account for system time change */
948 getmicrotime(&caltime);
949
950 in->base_calendartime +=
951 NET_CALCULATE_CLOCKSKEW(caltime,
952 in->base_calendartime, net_uptime(), in->base_uptime);
953
954 out->rmx_expire = (int32_t)(in->base_calendartime +
955 in->rt_expire - in->base_uptime);
956 } else {
957 out->rmx_expire = 0;
958 }
959 }
960
961 /*
962 * Set route's interface given info.rti_info[RTAX_IFP],
963 * info.rti_info[RTAX_IFA], and gateway.
964 */
965 static void
rt_setif(struct rtentry * rt,struct sockaddr * Ifpaddr,struct sockaddr * Ifaaddr,struct sockaddr * Gate,unsigned int ifscope)966 rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
967 struct sockaddr *Gate, unsigned int ifscope)
968 {
969 struct ifaddr *ifa = NULL;
970 struct ifnet *ifp = NULL;
971 void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
972
973 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
974
975 RT_LOCK_ASSERT_HELD(rt);
976
977 /* Don't update a defunct route */
978 if (rt->rt_flags & RTF_CONDEMNED) {
979 return;
980 }
981
982 /* Add an extra ref for ourselves */
983 RT_ADDREF_LOCKED(rt);
984
985 /* Become a regular mutex, just in case */
986 RT_CONVERT_LOCK(rt);
987
988 /*
989 * New gateway could require new ifaddr, ifp; flags may also
990 * be different; ifp may be specified by ll sockaddr when
991 * protocol address is ambiguous.
992 */
993 if (Ifpaddr && (ifa = ifa_ifwithnet_scoped(Ifpaddr, ifscope)) &&
994 (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) {
995 ifa_remref(ifa);
996 ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp);
997 } else {
998 if (ifa != NULL) {
999 ifa_remref(ifa);
1000 ifa = NULL;
1001 }
1002 if (Ifpaddr && (ifp = if_withname(Ifpaddr))) {
1003 if (Gate) {
1004 ifa = ifaof_ifpforaddr(Gate, ifp);
1005 } else {
1006 ifnet_lock_shared(ifp);
1007 ifa = TAILQ_FIRST(&ifp->if_addrhead);
1008 if (ifa != NULL) {
1009 ifa_addref(ifa);
1010 }
1011 ifnet_lock_done(ifp);
1012 }
1013 } else if (Ifaaddr &&
1014 (ifa = ifa_ifwithaddr_scoped(Ifaaddr, ifscope))) {
1015 ifp = ifa->ifa_ifp;
1016 } else if (Gate != NULL) {
1017 /*
1018 * Safe to drop rt_lock and use rt_key, since holding
1019 * rnh_lock here prevents another thread from calling
1020 * rt_setgate() on this route. We cannot hold the
1021 * lock across ifa_ifwithroute since the lookup done
1022 * by that routine may point to the same route.
1023 */
1024 RT_UNLOCK(rt);
1025 if ((ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags,
1026 rt_key(rt), Gate, ifscope)) != NULL) {
1027 ifp = ifa->ifa_ifp;
1028 }
1029 RT_LOCK(rt);
1030 /* Don't update a defunct route */
1031 if (rt->rt_flags & RTF_CONDEMNED) {
1032 if (ifa != NULL) {
1033 ifa_remref(ifa);
1034 }
1035 /* Release extra ref */
1036 RT_REMREF_LOCKED(rt);
1037 return;
1038 }
1039 }
1040 }
1041
1042 /* trigger route cache reevaluation */
1043 if (rt_key(rt)->sa_family == AF_INET) {
1044 routegenid_inet_update();
1045 } else if (rt_key(rt)->sa_family == AF_INET6) {
1046 routegenid_inet6_update();
1047 }
1048
1049 if (ifa != NULL) {
1050 struct ifaddr *oifa = rt->rt_ifa;
1051 if (oifa != ifa) {
1052 if (oifa != NULL) {
1053 IFA_LOCK_SPIN(oifa);
1054 ifa_rtrequest = oifa->ifa_rtrequest;
1055 IFA_UNLOCK(oifa);
1056 if (ifa_rtrequest != NULL) {
1057 ifa_rtrequest(RTM_DELETE, rt, Gate);
1058 }
1059 }
1060 rtsetifa(rt, ifa);
1061
1062 if (rt->rt_ifp != ifp) {
1063 /*
1064 * Purge any link-layer info caching.
1065 */
1066 if (rt->rt_llinfo_purge != NULL) {
1067 rt->rt_llinfo_purge(rt);
1068 }
1069
1070 /*
1071 * Adjust route ref count for the interfaces.
1072 */
1073 if (rt->rt_if_ref_fn != NULL) {
1074 rt->rt_if_ref_fn(ifp, 1);
1075 rt->rt_if_ref_fn(rt->rt_ifp, -1);
1076 }
1077 }
1078 rt->rt_ifp = ifp;
1079 /*
1080 * If this is the (non-scoped) default route, record
1081 * the interface index used for the primary ifscope.
1082 */
1083 if (rt_primary_default(rt, rt_key(rt))) {
1084 set_primary_ifscope(rt_key(rt)->sa_family,
1085 rt->rt_ifp->if_index);
1086 }
1087 /*
1088 * If rmx_mtu is not locked, update it
1089 * to the MTU used by the new interface.
1090 */
1091 if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
1092 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1093 if (rt_key(rt)->sa_family == AF_INET &&
1094 INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
1095 rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
1096 /* Further adjust the size for CLAT46 expansion */
1097 rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
1098 }
1099 }
1100
1101 if (rt->rt_ifa != NULL) {
1102 IFA_LOCK_SPIN(rt->rt_ifa);
1103 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1104 IFA_UNLOCK(rt->rt_ifa);
1105 if (ifa_rtrequest != NULL) {
1106 ifa_rtrequest(RTM_ADD, rt, Gate);
1107 }
1108 }
1109 ifa_remref(ifa);
1110 /* Release extra ref */
1111 RT_REMREF_LOCKED(rt);
1112 return;
1113 }
1114 ifa_remref(ifa);
1115 ifa = NULL;
1116 }
1117
1118 /* XXX: to reset gateway to correct value, at RTM_CHANGE */
1119 if (rt->rt_ifa != NULL) {
1120 IFA_LOCK_SPIN(rt->rt_ifa);
1121 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1122 IFA_UNLOCK(rt->rt_ifa);
1123 if (ifa_rtrequest != NULL) {
1124 ifa_rtrequest(RTM_ADD, rt, Gate);
1125 }
1126 }
1127
1128 /*
1129 * Workaround for local address routes pointing to the loopback
1130 * interface added by configd, until <rdar://problem/12970142>.
1131 */
1132 if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
1133 (rt->rt_flags & RTF_HOST) && rt->rt_ifa->ifa_ifp == rt->rt_ifp) {
1134 ifa = ifa_ifwithaddr(rt_key(rt));
1135 if (ifa != NULL) {
1136 if (ifa != rt->rt_ifa) {
1137 rtsetifa(rt, ifa);
1138 }
1139 ifa_remref(ifa);
1140 }
1141 }
1142
1143 /* Release extra ref */
1144 RT_REMREF_LOCKED(rt);
1145 }
1146
1147 /*
1148 * Extract the addresses of the passed sockaddrs.
1149 *
1150 * Do a little sanity checking so as to avoid bad memory references.
1151 * This data is derived straight from userland. Some of the data
1152 * anomalies are unrecoverable; for others we substitute the anomalous
1153 * user data with a sanitized replacement.
1154 *
1155 * Details on the input anomalies:
1156 *
1157 * 1. Unrecoverable input anomalies (retcode == EINVAL)
1158 * The function returns EINVAL.
1159 * 1.1. Truncated sockaddrs at the end of the user-provided buffer.
1160 * 1.2. Unparseable sockaddr header (`0 < .sa_len && .sa_len < 2').
1161 * 1.3. Sockaddrs that won't fit `struct sockaddr_storage'.
1162 *
1163 * 2. Recoverable input anomalies (retcode == 0):
1164 * The below anomalies would lead to a malformed `struct sockaddr *'
1165 * pointers. Any attempt to pass such malformed pointers to a function
1166 * or to assign those to another variable will cause a trap
1167 * when the `-fbounds-safety' feature is enabled.
1168 *
1169 * To mitigate the malformed pointers problem, we substitute the malformed
1170 * user data with a well-formed sockaddrs.
1171 *
1172 * 2.1. Sockadrs with `.sa_len == 0' (aka "zero-length" sockaddrs).
1173 * We substitute those with a pointer to the `sa_data' global
1174 * variable.
1175 * 2.2. Sockaddrs with `.sa_len < 16' (a.k.a. "tiny" sockaddrs).
1176 * We copy the contents of "tiny" sockaddrs to a location
1177 * inside the `xtra_storage' parameter, and substitute
1178 * the pointer into the user-provided data with the location
1179 * in `xtra_storage'.
1180 */
1181 static int
rt_xaddrs(caddr_t cp __ended_by (cplim),caddr_t cplim,struct rt_addrinfo * rtinfo,struct sockaddr xtra_storage[RTAX_MAX])1182 rt_xaddrs(caddr_t cp __ended_by(cplim), caddr_t cplim, struct rt_addrinfo *rtinfo, struct sockaddr xtra_storage[RTAX_MAX])
1183 {
1184 struct sockaddr *sa;
1185 int i, next_tiny_sa = 0;
1186
1187 for (i = 0; i < RTAX_MAX; i++) {
1188 SOCKADDR_ZERO(&xtra_storage[i], sizeof(struct sockaddr));
1189 }
1190 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1191
1192 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1193 if ((rtinfo->rti_addrs & (1 << i)) == 0) {
1194 continue;
1195 }
1196
1197 /*
1198 * We expect the memory pointed to by `cp' to contain a valid socket address.
1199 * However, there are no guarantees that our expectations are correct,
1200 * since the buffer is passed from the user-space.
1201 * In particular, the socket address may be corrupted or truncated.
1202 * If we attempt to interpret the contents of the memory pointed to by `cp'
1203 * as a valid socket address, we may end up in a situation where the end
1204 * of the presumed socket address exceeds the end of the input buffer:
1205 *
1206 * +-------------------------------+
1207 * | user buffer |
1208 * +-------------------------------+
1209 * cp ^ cplim ^
1210 * +-----------------------+
1211 * | (struct sockaddr *)cp |
1212 * +-----------------------+
1213 *
1214 * In such case, we are likely to panic with the `-fbounds-safety' trap,
1215 * while the desired behavior is to return `ENOENT'.
1216 *
1217 * Because of the above concern, we can not optimistically cast the pointer
1218 * `cp' to `struct sockaddr*' until we have validated that the contents
1219 * of the memory can be safely interpreted as a socket address.
1220 *
1221 * Instead, we start by examining the expected length of the socket address,
1222 * which is guaranteed to be located at the first byte, and perform several
1223 * sanity checks, before interpreting the memory as a valid socket address.
1224 */
1225 uint8_t next_sa_len = *cp;
1226
1227 /*
1228 * Is the user-provided sockaddr truncated?
1229 */
1230 if ((cp + next_sa_len) > cplim) {
1231 return EINVAL;
1232 }
1233
1234 /*
1235 * Will the user-provided sockaddr fit the sockaddr storage?
1236 */
1237 if (next_sa_len > sizeof(struct sockaddr_storage)) {
1238 return EINVAL;
1239 }
1240
1241 /*
1242 * there are no more.. quit now
1243 * If there are more bits, they are in error.
1244 * I've seen this. route(1) can evidently generate these.
1245 * This causes kernel to core dump.
1246 * for compatibility, If we see this, point to a safe address.
1247 */
1248 if (next_sa_len == 0) {
1249 rtinfo->rti_info[i] = &sa_zero;
1250 return 0; /* should be EINVAL but for compat */
1251 }
1252
1253 /*
1254 * Check for the minimal length.
1255 */
1256 if (next_sa_len < offsetof(struct sockaddr, sa_data)) {
1257 return EINVAL;
1258 }
1259
1260 /*
1261 * Check whether we are looking at a "tiny" sockaddr,
1262 * and if so, copy the contents to the xtra storage.
1263 * See the comment to this function for the details
1264 * on "tiny" sockaddrs and the xtra storage.
1265 */
1266 if (next_sa_len < sizeof(struct sockaddr)) {
1267 sa = &xtra_storage[next_tiny_sa++];
1268 SOCKADDR_COPY(cp, sa, next_sa_len);
1269 } else {
1270 sa = SA(cp);
1271 }
1272
1273 /*
1274 * From this point on we can safely use `sa'.
1275 */
1276
1277 /* accepthe it */
1278 rtinfo->rti_info[i] = sa;
1279 const uint32_t rounded_sa_len = ROUNDUP32(sa->sa_len);
1280 if (cp + rounded_sa_len > cplim) {
1281 break;
1282 } else {
1283 cp += rounded_sa_len;
1284 cplim = cplim;
1285 }
1286 }
1287 return 0;
1288 }
1289
1290 static struct mbuf *
rt_msg1(u_char type,struct rt_addrinfo * rtinfo)1291 rt_msg1(u_char type, struct rt_addrinfo *rtinfo)
1292 {
1293 struct rt_msghdr_common *rtmh;
1294 int32_t *rtm_buf; /* int32 to preserve the alingment. */
1295 struct mbuf *m;
1296 int i;
1297 int len, dlen, off;
1298
1299 switch (type) {
1300 case RTM_DELADDR:
1301 case RTM_NEWADDR:
1302 len = sizeof(struct ifa_msghdr);
1303 break;
1304
1305 case RTM_DELMADDR:
1306 case RTM_NEWMADDR:
1307 len = sizeof(struct ifma_msghdr);
1308 break;
1309
1310 case RTM_IFINFO:
1311 len = sizeof(struct if_msghdr);
1312 break;
1313
1314 default:
1315 len = sizeof(struct rt_msghdr);
1316 }
1317 m = m_gethdr(M_DONTWAIT, MT_DATA);
1318 if (m && len > MHLEN) {
1319 MCLGET(m, M_DONTWAIT);
1320 if (!(m->m_flags & M_EXT)) {
1321 m_free(m);
1322 m = NULL;
1323 }
1324 }
1325 if (m == NULL) {
1326 return NULL;
1327 }
1328 m->m_pkthdr.len = m->m_len = len;
1329 m->m_pkthdr.rcvif = NULL;
1330 rtm_buf = mtod(m, int32_t *);
1331 bzero(rtm_buf, len);
1332 off = len;
1333 for (i = 0; i < RTAX_MAX; i++) {
1334 struct sockaddr *sa, *hint;
1335 uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1336
1337 /*
1338 * Make sure to accomodate the largest possible size of sa_len.
1339 */
1340 _CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1341
1342 if ((sa = rtinfo->rti_info[i]) == NULL) {
1343 continue;
1344 }
1345
1346 switch (i) {
1347 case RTAX_DST:
1348 case RTAX_NETMASK:
1349 if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1350 hint = rtinfo->rti_info[RTAX_IFA];
1351 }
1352
1353 /* Scrub away any trace of embedded interface scope */
1354 sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1355 sizeof(ssbuf), NULL);
1356 break;
1357
1358 default:
1359 break;
1360 }
1361
1362 rtinfo->rti_addrs |= (1 << i);
1363 dlen = sa->sa_len;
1364 m_copyback(m, off, dlen, __SA_UTILS_CONV_TO_BYTES(sa));
1365 len = off + dlen;
1366 off += ROUNDUP32(dlen);
1367 }
1368 if (m->m_pkthdr.len != len) {
1369 m_freem(m);
1370 return NULL;
1371 }
1372 rtmh = (struct rt_msghdr_common *)rtm_buf;
1373 rtmh->rtm_msglen = (u_short)len;
1374 rtmh->rtm_version = RTM_VERSION;
1375 rtmh->rtm_type = type;
1376 return m;
1377 }
1378
1379 static int
rt_msg2(u_char type,struct rt_addrinfo * rtinfo,caddr_t cp __header_indexable,struct walkarg * w,kauth_cred_t * credp)1380 rt_msg2(u_char type, struct rt_addrinfo *rtinfo, caddr_t cp __header_indexable, struct walkarg *w,
1381 kauth_cred_t* credp)
1382 {
1383 int i;
1384 int len, dlen, rlen, second_time = 0;
1385 caddr_t cp0;
1386
1387 rtinfo->rti_addrs = 0;
1388 again:
1389 switch (type) {
1390 case RTM_DELADDR:
1391 case RTM_NEWADDR:
1392 len = sizeof(struct ifa_msghdr);
1393 break;
1394
1395 case RTM_DELMADDR:
1396 case RTM_NEWMADDR:
1397 len = sizeof(struct ifma_msghdr);
1398 break;
1399
1400 case RTM_IFINFO:
1401 len = sizeof(struct if_msghdr);
1402 break;
1403
1404 case RTM_IFINFO2:
1405 len = sizeof(struct if_msghdr2);
1406 break;
1407
1408 case RTM_NEWMADDR2:
1409 len = sizeof(struct ifma_msghdr2);
1410 break;
1411
1412 case RTM_GET_EXT:
1413 len = sizeof(struct rt_msghdr_ext);
1414 break;
1415
1416 case RTM_GET2:
1417 len = sizeof(struct rt_msghdr2);
1418 break;
1419
1420 default:
1421 len = sizeof(struct rt_msghdr);
1422 }
1423 cp0 = cp;
1424 if (cp0) {
1425 cp += len;
1426 }
1427 for (i = 0; i < RTAX_MAX; i++) {
1428 struct sockaddr *sa, *hint;
1429 uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1430
1431 /*
1432 * Make sure to accomodate the largest possible size of sa_len.
1433 */
1434 _CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1435
1436 if ((sa = rtinfo->rti_info[i]) == NULL) {
1437 continue;
1438 }
1439
1440 switch (i) {
1441 case RTAX_DST:
1442 case RTAX_NETMASK:
1443 if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1444 hint = rtinfo->rti_info[RTAX_IFA];
1445 }
1446
1447 /* Scrub away any trace of embedded interface scope */
1448 sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1449 sizeof(ssbuf), NULL);
1450 break;
1451 case RTAX_GATEWAY:
1452 case RTAX_IFP:
1453 sa = rtm_scrub(type, i, NULL, sa, &ssbuf,
1454 sizeof(ssbuf), credp);
1455 break;
1456
1457 default:
1458 break;
1459 }
1460
1461 rtinfo->rti_addrs |= (1 << i);
1462 dlen = sa->sa_len;
1463 rlen = ROUNDUP32(dlen);
1464 if (cp) {
1465 SOCKADDR_COPY(sa, cp, dlen);
1466 if (dlen != rlen) {
1467 bzero(cp + dlen, rlen - dlen);
1468 }
1469 cp += rlen;
1470 }
1471 len += rlen;
1472 }
1473 if (cp == NULL && w != NULL && !second_time) {
1474 walkarg_ref_t rw = w;
1475
1476 if (rw->w_req != NULL) {
1477 if (rw->w_tmemsize < len) {
1478 if (rw->w_tmem != NULL) {
1479 kfree_data_sized_by(rw->w_tmem, rw->w_tmemsize);
1480 }
1481 caddr_t new_tmem = (caddr_t)kalloc_data(len, Z_ZERO | Z_WAITOK);
1482 if (new_tmem != NULL) {
1483 rw->w_tmemsize = len;
1484 rw->w_tmem = new_tmem;
1485 }
1486 }
1487 if (rw->w_tmem != NULL) {
1488 cp = rw->w_tmem;
1489 second_time = 1;
1490 goto again;
1491 }
1492 }
1493 }
1494 if (cp) {
1495 struct rt_msghdr_common *rtmh = (struct rt_msghdr_common *)(void *)cp0;
1496
1497 rtmh->rtm_version = RTM_VERSION;
1498 rtmh->rtm_type = type;
1499 rtmh->rtm_msglen = (u_short)len;
1500 }
1501 return len;
1502 }
1503
1504 /*
1505 * This routine is called to generate a message from the routing
1506 * socket indicating that a redirect has occurred, a routing lookup
1507 * has failed, or that a protocol has detected timeouts to a particular
1508 * destination.
1509 */
1510 void
rt_missmsg(u_char type,struct rt_addrinfo * rtinfo,int flags,int error)1511 rt_missmsg(u_char type, struct rt_addrinfo *rtinfo, int flags, int error)
1512 {
1513 struct rt_msghdr_common *rtmh;
1514 struct mbuf *m;
1515 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1516 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1517
1518 if (route_cb.any_count == 0) {
1519 return;
1520 }
1521 m = rt_msg1(type, rtinfo);
1522 if (m == NULL) {
1523 return;
1524 }
1525 rtmh = mtod(m, struct rt_msghdr_common *);
1526 rtmh->rtm_flags = RTF_DONE | flags;
1527 rtmh->rtm_errno = error;
1528 rtmh->rtm_addrs = rtinfo->rti_addrs;
1529 route_proto.sp_family = sa ? sa->sa_family : 0;
1530 raw_input(m, &route_proto, &route_src, &route_dst);
1531 }
1532
1533 /*
1534 * This routine is called to generate a message from the routing
1535 * socket indicating that the status of a network interface has changed.
1536 */
1537 void
rt_ifmsg(struct ifnet * ifp)1538 rt_ifmsg(struct ifnet *ifp)
1539 {
1540 struct if_msghdr *ifm;
1541 struct mbuf *m;
1542 struct rt_addrinfo info;
1543 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1544
1545 if (route_cb.any_count == 0) {
1546 return;
1547 }
1548 bzero((caddr_t)&info, sizeof(info));
1549 m = rt_msg1(RTM_IFINFO, &info);
1550 if (m == NULL) {
1551 return;
1552 }
1553 ifm = mtod(m, struct if_msghdr *);
1554 ifm->ifm_index = ifp->if_index;
1555 ifm->ifm_flags = (u_short)ifp->if_flags;
1556 if_data_internal_to_if_data(ifp, &ifp->if_data, &ifm->ifm_data);
1557 ifm->ifm_addrs = 0;
1558 raw_input(m, &route_proto, &route_src, &route_dst);
1559 }
1560
1561 /*
1562 * This is called to generate messages from the routing socket
1563 * indicating a network interface has had addresses associated with it.
1564 * if we ever reverse the logic and replace messages TO the routing
1565 * socket indicate a request to configure interfaces, then it will
1566 * be unnecessary as the routing socket will automatically generate
1567 * copies of it.
1568 *
1569 * Since this is coming from the interface, it is expected that the
1570 * interface will be locked. Caller must hold rnh_lock and rt_lock.
1571 */
1572 void
rt_newaddrmsg(u_char cmd,struct ifaddr * ifa,int error,struct rtentry * rt)1573 rt_newaddrmsg(u_char cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1574 {
1575 struct rt_addrinfo info;
1576 struct sockaddr *sa = 0;
1577 int pass;
1578 struct mbuf *m = 0;
1579 struct ifnet *ifp = ifa->ifa_ifp;
1580 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1581
1582 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1583 RT_LOCK_ASSERT_HELD(rt);
1584
1585 if (route_cb.any_count == 0) {
1586 return;
1587 }
1588
1589 /* Become a regular mutex, just in case */
1590 RT_CONVERT_LOCK(rt);
1591 for (pass = 1; pass < 3; pass++) {
1592 bzero((caddr_t)&info, sizeof(info));
1593 if ((cmd == RTM_ADD && pass == 1) ||
1594 (cmd == RTM_DELETE && pass == 2)) {
1595 struct ifa_msghdr *ifam;
1596 u_char ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1597
1598 /* Lock ifp for if_lladdr */
1599 ifnet_lock_shared(ifp);
1600 IFA_LOCK(ifa);
1601 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1602 /*
1603 * Holding ifnet lock here prevents the link address
1604 * from changing contents, so no need to hold its
1605 * lock. The link address is always present; it's
1606 * never freed.
1607 */
1608 info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1609 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1610 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1611 if ((m = rt_msg1(ncmd, &info)) == NULL) {
1612 IFA_UNLOCK(ifa);
1613 ifnet_lock_done(ifp);
1614 continue;
1615 }
1616 IFA_UNLOCK(ifa);
1617 ifnet_lock_done(ifp);
1618 ifam = mtod(m, struct ifa_msghdr *);
1619 ifam->ifam_index = ifp->if_index;
1620 IFA_LOCK_SPIN(ifa);
1621 ifam->ifam_metric = ifa->ifa_metric;
1622 ifam->ifam_flags = ifa->ifa_flags;
1623 IFA_UNLOCK(ifa);
1624 ifam->ifam_addrs = info.rti_addrs;
1625 }
1626 if ((cmd == RTM_ADD && pass == 2) ||
1627 (cmd == RTM_DELETE && pass == 1)) {
1628 struct rt_msghdr *rtm;
1629
1630 if (rt == NULL) {
1631 continue;
1632 }
1633 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1634 info.rti_info[RTAX_DST] = sa = rt_key(rt);
1635 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1636 if ((m = rt_msg1(cmd, &info)) == NULL) {
1637 continue;
1638 }
1639 rtm = mtod(m, struct rt_msghdr *);
1640 rtm->rtm_index = ifp->if_index;
1641 rtm->rtm_flags |= rt->rt_flags;
1642 rtm->rtm_errno = error;
1643 rtm->rtm_addrs = info.rti_addrs;
1644 }
1645 route_proto.sp_protocol = sa ? sa->sa_family : 0;
1646 raw_input(m, &route_proto, &route_src, &route_dst);
1647 }
1648 }
1649
1650 /*
1651 * This is the analogue to the rt_newaddrmsg which performs the same
1652 * function but for multicast group memberhips. This is easier since
1653 * there is no route state to worry about.
1654 */
1655 void
rt_newmaddrmsg(u_char cmd,struct ifmultiaddr * ifma)1656 rt_newmaddrmsg(u_char cmd, struct ifmultiaddr *ifma)
1657 {
1658 struct rt_addrinfo info;
1659 struct mbuf *m = 0;
1660 struct ifnet *ifp = ifma->ifma_ifp;
1661 struct ifma_msghdr *ifmam;
1662 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1663
1664 if (route_cb.any_count == 0) {
1665 return;
1666 }
1667
1668 /* Lock ifp for if_lladdr */
1669 ifnet_lock_shared(ifp);
1670 bzero((caddr_t)&info, sizeof(info));
1671 IFMA_LOCK(ifma);
1672 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1673 /* lladdr doesn't need lock */
1674 info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1675
1676 /*
1677 * If a link-layer address is present, present it as a ``gateway''
1678 * (similarly to how ARP entries, e.g., are presented).
1679 */
1680 info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ?
1681 ifma->ifma_ll->ifma_addr : NULL;
1682 if ((m = rt_msg1(cmd, &info)) == NULL) {
1683 IFMA_UNLOCK(ifma);
1684 ifnet_lock_done(ifp);
1685 return;
1686 }
1687 ifmam = mtod(m, struct ifma_msghdr *);
1688 ifmam->ifmam_index = ifp->if_index;
1689 ifmam->ifmam_addrs = info.rti_addrs;
1690 route_proto.sp_protocol = ifma->ifma_addr->sa_family;
1691 IFMA_UNLOCK(ifma);
1692 ifnet_lock_done(ifp);
1693 raw_input(m, &route_proto, &route_src, &route_dst);
1694 }
1695
1696 const char *
rtm2str(int cmd)1697 rtm2str(int cmd)
1698 {
1699 const char *c __null_terminated = "RTM_?";
1700
1701 switch (cmd) {
1702 case RTM_ADD:
1703 c = "RTM_ADD";
1704 break;
1705 case RTM_DELETE:
1706 c = "RTM_DELETE";
1707 break;
1708 case RTM_CHANGE:
1709 c = "RTM_CHANGE";
1710 break;
1711 case RTM_GET:
1712 c = "RTM_GET";
1713 break;
1714 case RTM_LOSING:
1715 c = "RTM_LOSING";
1716 break;
1717 case RTM_REDIRECT:
1718 c = "RTM_REDIRECT";
1719 break;
1720 case RTM_MISS:
1721 c = "RTM_MISS";
1722 break;
1723 case RTM_LOCK:
1724 c = "RTM_LOCK";
1725 break;
1726 case RTM_OLDADD:
1727 c = "RTM_OLDADD";
1728 break;
1729 case RTM_OLDDEL:
1730 c = "RTM_OLDDEL";
1731 break;
1732 case RTM_RESOLVE:
1733 c = "RTM_RESOLVE";
1734 break;
1735 case RTM_NEWADDR:
1736 c = "RTM_NEWADDR";
1737 break;
1738 case RTM_DELADDR:
1739 c = "RTM_DELADDR";
1740 break;
1741 case RTM_IFINFO:
1742 c = "RTM_IFINFO";
1743 break;
1744 case RTM_NEWMADDR:
1745 c = "RTM_NEWMADDR";
1746 break;
1747 case RTM_DELMADDR:
1748 c = "RTM_DELMADDR";
1749 break;
1750 case RTM_GET_SILENT:
1751 c = "RTM_GET_SILENT";
1752 break;
1753 case RTM_IFINFO2:
1754 c = "RTM_IFINFO2";
1755 break;
1756 case RTM_NEWMADDR2:
1757 c = "RTM_NEWMADDR2";
1758 break;
1759 case RTM_GET2:
1760 c = "RTM_GET2";
1761 break;
1762 case RTM_GET_EXT:
1763 c = "RTM_GET_EXT";
1764 break;
1765 }
1766
1767 return c;
1768 }
1769
1770 /*
1771 * This is used in dumping the kernel table via sysctl().
1772 */
1773 static int
sysctl_dumpentry(struct radix_node * rn,void * vw)1774 sysctl_dumpentry(struct radix_node *rn, void *vw)
1775 {
1776 walkarg_ref_t w = vw;
1777 rtentry_ref_t rt = rn_rtentry(rn);
1778 int error = 0, size;
1779 struct rt_addrinfo info;
1780 kauth_cred_t cred __single;
1781 kauth_cred_t *credp;
1782
1783 cred = current_cached_proc_cred(PROC_NULL);
1784 credp = &cred;
1785
1786 RT_LOCK(rt);
1787 if ((w->w_op == NET_RT_FLAGS || w->w_op == NET_RT_FLAGS_PRIV) &&
1788 !(rt->rt_flags & w->w_arg)) {
1789 goto done;
1790 }
1791
1792 /*
1793 * If the matching route has RTF_LLINFO set, then we can skip scrubbing the MAC
1794 * only if the outgoing interface is not loopback and the process has entitlement
1795 * for neighbor cache read.
1796 */
1797 if (w->w_op == NET_RT_FLAGS_PRIV && (rt->rt_flags & RTF_LLINFO)) {
1798 if (rt->rt_ifp != lo_ifp &&
1799 (route_op_entitlement_check(NULL, cred, ROUTE_OP_READ, TRUE) == 0)) {
1800 credp = NULL;
1801 }
1802 }
1803
1804 bzero((caddr_t)&info, sizeof(info));
1805 info.rti_info[RTAX_DST] = rt_key(rt);
1806 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1807 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1808 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1809 if (RT_HAS_IFADDR(rt)) {
1810 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1811 }
1812
1813 if (w->w_op != NET_RT_DUMP2) {
1814 size = rt_msg2(RTM_GET, &info, NULL, w, credp);
1815 if (w->w_req != NULL && w->w_tmem != NULL) {
1816 struct rt_msghdr *rtm =
1817 (struct rt_msghdr *)(void *)w->w_tmem;
1818
1819 rtm->rtm_flags = rt->rt_flags;
1820 rtm->rtm_use = rt->rt_use;
1821 rt_getmetrics(rt, &rtm->rtm_rmx);
1822 rtm->rtm_index = rt->rt_ifp->if_index;
1823 rtm->rtm_pid = 0;
1824 rtm->rtm_seq = 0;
1825 rtm->rtm_errno = 0;
1826 rtm->rtm_addrs = info.rti_addrs;
1827 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1828 }
1829 } else {
1830 size = rt_msg2(RTM_GET2, &info, NULL, w, credp);
1831 if (w->w_req != NULL && w->w_tmem != NULL) {
1832 struct rt_msghdr2 *rtm =
1833 (struct rt_msghdr2 *)(void *)w->w_tmem;
1834
1835 rtm->rtm_flags = rt->rt_flags;
1836 rtm->rtm_use = rt->rt_use;
1837 rt_getmetrics(rt, &rtm->rtm_rmx);
1838 rtm->rtm_index = rt->rt_ifp->if_index;
1839 rtm->rtm_refcnt = rt->rt_refcnt;
1840 if (rt->rt_parent) {
1841 rtm->rtm_parentflags = rt->rt_parent->rt_flags;
1842 } else {
1843 rtm->rtm_parentflags = 0;
1844 }
1845 rtm->rtm_reserved = 0;
1846 rtm->rtm_addrs = info.rti_addrs;
1847 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1848 }
1849 }
1850
1851 done:
1852 RT_UNLOCK(rt);
1853 return error;
1854 }
1855
1856 /*
1857 * This is used for dumping extended information from route entries.
1858 */
1859 static int
sysctl_dumpentry_ext(struct radix_node * rn,void * vw)1860 sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
1861 {
1862 walkarg_ref_t w = vw;
1863 rtentry_ref_t rt = rn_rtentry(rn);
1864 int error = 0, size;
1865 struct rt_addrinfo info;
1866 kauth_cred_t cred __single;
1867
1868 cred = current_cached_proc_cred(PROC_NULL);
1869
1870 RT_LOCK(rt);
1871 if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) {
1872 goto done;
1873 }
1874 bzero(&info, sizeof(info));
1875 info.rti_info[RTAX_DST] = rt_key(rt);
1876 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1877 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1878 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1879
1880 size = rt_msg2(RTM_GET_EXT, &info, NULL, w, &cred);
1881 if (w->w_req != NULL && w->w_tmem != NULL) {
1882 struct rt_msghdr_ext *ertm =
1883 (struct rt_msghdr_ext *)(void *)w->w_tmem;
1884
1885 ertm->rtm_flags = rt->rt_flags;
1886 ertm->rtm_use = rt->rt_use;
1887 rt_getmetrics(rt, &ertm->rtm_rmx);
1888 ertm->rtm_index = rt->rt_ifp->if_index;
1889 ertm->rtm_pid = 0;
1890 ertm->rtm_seq = 0;
1891 ertm->rtm_errno = 0;
1892 ertm->rtm_addrs = info.rti_addrs;
1893 if (rt->rt_llinfo_get_ri == NULL) {
1894 bzero(&ertm->rtm_ri, sizeof(ertm->rtm_ri));
1895 ertm->rtm_ri.ri_rssi = IFNET_RSSI_UNKNOWN;
1896 ertm->rtm_ri.ri_lqm = IFNET_LQM_THRESH_OFF;
1897 ertm->rtm_ri.ri_npm = IFNET_NPM_THRESH_UNKNOWN;
1898 } else {
1899 rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri);
1900 }
1901 error = SYSCTL_OUT(w->w_req, (caddr_t)ertm, size);
1902 }
1903
1904 done:
1905 RT_UNLOCK(rt);
1906 return error;
1907 }
1908
1909 static boolean_t
should_include_clat46(void)1910 should_include_clat46(void)
1911 {
1912 #define CLAT46_ENTITLEMENT "com.apple.private.route.iflist.include-clat46"
1913 return IOCurrentTaskHasEntitlement(CLAT46_ENTITLEMENT);
1914 }
1915
1916 static boolean_t
is_clat46_address(struct ifaddr * ifa)1917 is_clat46_address(struct ifaddr *ifa)
1918 {
1919 boolean_t is_clat46 = FALSE;
1920
1921 if (ifa->ifa_addr->sa_family == AF_INET6) {
1922 struct in6_ifaddr *ifa6 = ifatoia6(ifa);
1923
1924 is_clat46 = (ifa6->ia6_flags & IN6_IFF_CLAT46) != 0;
1925 }
1926 return is_clat46;
1927 }
1928
1929 /*
1930 * rdar://9307819
1931 * To avoid to call copyout() while holding locks and to cause problems
1932 * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct
1933 * the list in two passes. In the first pass we compute the total
1934 * length of the data we are going to copyout, then we release
1935 * all locks to allocate a temporary buffer that gets filled
1936 * in the second pass.
1937 *
1938 * Note that we are verifying the assumption that kalloc() returns a buffer
1939 * that is at least 32 bits aligned and that the messages and addresses are
1940 * 32 bits aligned.
1941 */
1942 static int
sysctl_iflist(int af,struct walkarg * w)1943 sysctl_iflist(int af, struct walkarg *w)
1944 {
1945 struct ifnet *ifp;
1946 struct ifaddr *ifa;
1947 struct rt_addrinfo info;
1948 int error = 0;
1949 int pass = 0;
1950 size_t len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1951 char *total_buffer = NULL, *cp = NULL;
1952 kauth_cred_t cred __single;
1953 boolean_t include_clat46 = FALSE;
1954 boolean_t include_clat46_valid = FALSE;
1955
1956 cred = current_cached_proc_cred(PROC_NULL);
1957
1958 bzero((caddr_t)&info, sizeof(info));
1959
1960 for (pass = 0; pass < 2; pass++) {
1961 ifnet_head_lock_shared();
1962
1963 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1964 if (error) {
1965 break;
1966 }
1967 if (w->w_arg && w->w_arg != ifp->if_index) {
1968 continue;
1969 }
1970 ifnet_lock_shared(ifp);
1971 /*
1972 * Holding ifnet lock here prevents the link address
1973 * from changing contents, so no need to hold the ifa
1974 * lock. The link address is always present; it's
1975 * never freed.
1976 */
1977 ifa = ifp->if_lladdr;
1978 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1979 len = rt_msg2(RTM_IFINFO, &info, NULL, NULL, &cred);
1980 if (pass == 0) {
1981 if (os_add_overflow(total_len, len, &total_len)) {
1982 ifnet_lock_done(ifp);
1983 error = ENOBUFS;
1984 break;
1985 }
1986 } else {
1987 struct if_msghdr *ifm;
1988
1989 if (current_len + len > total_len) {
1990 ifnet_lock_done(ifp);
1991 error = ENOBUFS;
1992 break;
1993 }
1994 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1995 len = rt_msg2(RTM_IFINFO, &info,
1996 (caddr_t)cp, NULL, &cred);
1997 info.rti_info[RTAX_IFP] = NULL;
1998
1999 ifm = (struct if_msghdr *)(void *)cp;
2000 ifm->ifm_index = ifp->if_index;
2001 ifm->ifm_flags = (u_short)ifp->if_flags;
2002 if_data_internal_to_if_data(ifp, &ifp->if_data,
2003 &ifm->ifm_data);
2004 ifm->ifm_addrs = info.rti_addrs;
2005 /*
2006 * <rdar://problem/32940901>
2007 * Round bytes only for non-platform
2008 */
2009 if (!csproc_get_platform_binary(w->w_req->p)) {
2010 ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
2011 ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
2012 }
2013
2014 cp += len;
2015 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2016 current_len += len;
2017 VERIFY(current_len <= total_len);
2018 }
2019 while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
2020 boolean_t is_clat46;
2021
2022 IFA_LOCK(ifa);
2023 if (af && af != ifa->ifa_addr->sa_family) {
2024 IFA_UNLOCK(ifa);
2025 continue;
2026 }
2027 is_clat46 = is_clat46_address(ifa);
2028 if (is_clat46) {
2029 if (!include_clat46_valid) {
2030 include_clat46_valid = TRUE;
2031 include_clat46 =
2032 should_include_clat46();
2033 }
2034 if (!include_clat46) {
2035 IFA_UNLOCK(ifa);
2036 continue;
2037 }
2038 }
2039 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2040 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2041 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2042 len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2043 &cred);
2044 if (pass == 0) {
2045 if (os_add_overflow(total_len, len, &total_len)) {
2046 IFA_UNLOCK(ifa);
2047 error = ENOBUFS;
2048 break;
2049 }
2050 } else {
2051 struct ifa_msghdr *ifam;
2052
2053 if (current_len + len > total_len) {
2054 IFA_UNLOCK(ifa);
2055 error = ENOBUFS;
2056 break;
2057 }
2058 len = rt_msg2(RTM_NEWADDR, &info,
2059 (caddr_t)cp, NULL, &cred);
2060
2061 ifam = (struct ifa_msghdr *)(void *)cp;
2062 ifam->ifam_index =
2063 ifa->ifa_ifp->if_index;
2064 ifam->ifam_flags = ifa->ifa_flags;
2065 ifam->ifam_metric = ifa->ifa_metric;
2066 ifam->ifam_addrs = info.rti_addrs;
2067
2068 cp += len;
2069 VERIFY(IS_P2ALIGNED(cp,
2070 sizeof(u_int32_t)));
2071 current_len += len;
2072 VERIFY(current_len <= total_len);
2073 }
2074 IFA_UNLOCK(ifa);
2075 }
2076 ifnet_lock_done(ifp);
2077 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2078 info.rti_info[RTAX_BRD] = NULL;
2079 }
2080
2081 ifnet_head_done();
2082
2083 if (error != 0) {
2084 if (error == ENOBUFS) {
2085 printf("%s: current_len (%lu) + len (%lu) > "
2086 "total_len (%lu)\n", __func__, current_len,
2087 len, total_len);
2088 }
2089 break;
2090 }
2091
2092 if (pass == 0) {
2093 /* Better to return zero length buffer than ENOBUFS */
2094 if (total_len == 0) {
2095 total_len = 1;
2096 }
2097 total_len += total_len >> 3;
2098 total_buffer_len = total_len;
2099 total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2100 if (total_buffer == NULL) {
2101 printf("%s: kalloc_data(%lu) failed\n", __func__,
2102 total_len);
2103 error = ENOBUFS;
2104 break;
2105 }
2106 cp = total_buffer;
2107 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2108 } else {
2109 error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2110 if (error) {
2111 break;
2112 }
2113 }
2114 }
2115
2116 if (total_buffer != NULL) {
2117 kfree_data(total_buffer, total_buffer_len);
2118 }
2119
2120 return error;
2121 }
2122
2123 static int
sysctl_iflist2(int af,struct walkarg * w)2124 sysctl_iflist2(int af, struct walkarg *w)
2125 {
2126 struct ifnet *ifp;
2127 struct ifaddr *ifa;
2128 struct rt_addrinfo info;
2129 int error = 0;
2130 int pass = 0;
2131 size_t len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
2132 char *total_buffer = NULL, *cp = NULL;
2133 kauth_cred_t cred __single;
2134 boolean_t include_clat46 = FALSE;
2135 boolean_t include_clat46_valid = FALSE;
2136
2137 cred = current_cached_proc_cred(PROC_NULL);
2138
2139 bzero((caddr_t)&info, sizeof(info));
2140
2141 for (pass = 0; pass < 2; pass++) {
2142 struct ifmultiaddr *ifma;
2143
2144 ifnet_head_lock_shared();
2145
2146 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
2147 if (error) {
2148 break;
2149 }
2150 if (w->w_arg && w->w_arg != ifp->if_index) {
2151 continue;
2152 }
2153 ifnet_lock_shared(ifp);
2154 /*
2155 * Holding ifnet lock here prevents the link address
2156 * from changing contents, so no need to hold the ifa
2157 * lock. The link address is always present; it's
2158 * never freed.
2159 */
2160 ifa = ifp->if_lladdr;
2161 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2162 len = rt_msg2(RTM_IFINFO2, &info, NULL, NULL, &cred);
2163 if (pass == 0) {
2164 if (os_add_overflow(total_len, len, &total_len)) {
2165 ifnet_lock_done(ifp);
2166 error = ENOBUFS;
2167 break;
2168 }
2169 } else {
2170 struct if_msghdr2 *ifm;
2171
2172 if (current_len + len > total_len) {
2173 ifnet_lock_done(ifp);
2174 error = ENOBUFS;
2175 break;
2176 }
2177 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2178 len = rt_msg2(RTM_IFINFO2, &info,
2179 (caddr_t)cp, NULL, &cred);
2180 info.rti_info[RTAX_IFP] = NULL;
2181
2182 ifm = (struct if_msghdr2 *)(void *)cp;
2183 ifm->ifm_addrs = info.rti_addrs;
2184 ifm->ifm_flags = (u_short)ifp->if_flags;
2185 ifm->ifm_index = ifp->if_index;
2186 ifm->ifm_snd_len = IFCQ_LEN(ifp->if_snd);
2187 ifm->ifm_snd_maxlen = IFCQ_MAXLEN(ifp->if_snd);
2188 ifm->ifm_snd_drops =
2189 (int)ifp->if_snd->ifcq_dropcnt.packets;
2190 ifm->ifm_timer = ifp->if_timer;
2191 if_data_internal_to_if_data64(ifp,
2192 &ifp->if_data, &ifm->ifm_data);
2193 /*
2194 * <rdar://problem/32940901>
2195 * Round bytes only for non-platform
2196 */
2197 if (!csproc_get_platform_binary(w->w_req->p)) {
2198 ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
2199 ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
2200 }
2201
2202 cp += len;
2203 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2204 current_len += len;
2205 VERIFY(current_len <= total_len);
2206 }
2207 while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
2208 boolean_t is_clat46;
2209
2210 IFA_LOCK(ifa);
2211 if (af && af != ifa->ifa_addr->sa_family) {
2212 IFA_UNLOCK(ifa);
2213 continue;
2214 }
2215 is_clat46 = is_clat46_address(ifa);
2216 if (is_clat46) {
2217 if (!include_clat46_valid) {
2218 include_clat46_valid = TRUE;
2219 include_clat46 =
2220 should_include_clat46();
2221 }
2222 if (!include_clat46) {
2223 IFA_UNLOCK(ifa);
2224 continue;
2225 }
2226 }
2227 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2228 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2229 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2230 len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2231 &cred);
2232 if (pass == 0) {
2233 if (os_add_overflow(total_len, len, &total_len)) {
2234 IFA_UNLOCK(ifa);
2235 error = ENOBUFS;
2236 break;
2237 }
2238 } else {
2239 struct ifa_msghdr *ifam;
2240
2241 if (current_len + len > total_len) {
2242 IFA_UNLOCK(ifa);
2243 error = ENOBUFS;
2244 break;
2245 }
2246 len = rt_msg2(RTM_NEWADDR, &info,
2247 (caddr_t)cp, NULL, &cred);
2248
2249 ifam = (struct ifa_msghdr *)(void *)cp;
2250 ifam->ifam_index =
2251 ifa->ifa_ifp->if_index;
2252 ifam->ifam_flags = ifa->ifa_flags;
2253 ifam->ifam_metric = ifa->ifa_metric;
2254 ifam->ifam_addrs = info.rti_addrs;
2255
2256 cp += len;
2257 VERIFY(IS_P2ALIGNED(cp,
2258 sizeof(u_int32_t)));
2259 current_len += len;
2260 VERIFY(current_len <= total_len);
2261 }
2262 IFA_UNLOCK(ifa);
2263 }
2264 if (error) {
2265 ifnet_lock_done(ifp);
2266 break;
2267 }
2268
2269 for (ifma = LIST_FIRST(&ifp->if_multiaddrs);
2270 ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) {
2271 struct ifaddr *ifa0;
2272
2273 IFMA_LOCK(ifma);
2274 if (af && af != ifma->ifma_addr->sa_family) {
2275 IFMA_UNLOCK(ifma);
2276 continue;
2277 }
2278 bzero((caddr_t)&info, sizeof(info));
2279 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2280 /*
2281 * Holding ifnet lock here prevents the link
2282 * address from changing contents, so no need
2283 * to hold the ifa0 lock. The link address is
2284 * always present; it's never freed.
2285 */
2286 ifa0 = ifp->if_lladdr;
2287 info.rti_info[RTAX_IFP] = ifa0->ifa_addr;
2288 if (ifma->ifma_ll != NULL) {
2289 info.rti_info[RTAX_GATEWAY] =
2290 ifma->ifma_ll->ifma_addr;
2291 }
2292 len = rt_msg2(RTM_NEWMADDR2, &info, NULL, NULL,
2293 &cred);
2294 if (pass == 0) {
2295 total_len += len;
2296 } else {
2297 struct ifma_msghdr2 *ifmam;
2298
2299 if (current_len + len > total_len) {
2300 IFMA_UNLOCK(ifma);
2301 error = ENOBUFS;
2302 break;
2303 }
2304 len = rt_msg2(RTM_NEWMADDR2, &info,
2305 (caddr_t)cp, NULL, &cred);
2306
2307 ifmam =
2308 (struct ifma_msghdr2 *)(void *)cp;
2309 ifmam->ifmam_addrs = info.rti_addrs;
2310 ifmam->ifmam_flags = 0;
2311 ifmam->ifmam_index =
2312 ifma->ifma_ifp->if_index;
2313 ifmam->ifmam_refcount =
2314 ifma->ifma_reqcnt;
2315
2316 cp += len;
2317 VERIFY(IS_P2ALIGNED(cp,
2318 sizeof(u_int32_t)));
2319 current_len += len;
2320 }
2321 IFMA_UNLOCK(ifma);
2322 }
2323 ifnet_lock_done(ifp);
2324 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2325 info.rti_info[RTAX_BRD] = NULL;
2326 }
2327 ifnet_head_done();
2328
2329 if (error) {
2330 if (error == ENOBUFS) {
2331 printf("%s: current_len (%lu) + len (%lu) > "
2332 "total_len (%lu)\n", __func__, current_len,
2333 len, total_len);
2334 }
2335 break;
2336 }
2337
2338 if (pass == 0) {
2339 /* Better to return zero length buffer than ENOBUFS */
2340 if (total_len == 0) {
2341 total_len = 1;
2342 }
2343 total_len += total_len >> 3;
2344 total_buffer_len = total_len;
2345 total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2346 if (total_buffer == NULL) {
2347 printf("%s: kalloc_data(%lu) failed\n", __func__,
2348 total_len);
2349 error = ENOBUFS;
2350 break;
2351 }
2352 cp = total_buffer;
2353 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2354 } else {
2355 error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2356 if (error) {
2357 break;
2358 }
2359 }
2360 }
2361
2362 if (total_buffer != NULL) {
2363 kfree_data(total_buffer, total_buffer_len);
2364 }
2365
2366 return error;
2367 }
2368
2369
2370 static int
sysctl_rtstat(struct sysctl_req * req)2371 sysctl_rtstat(struct sysctl_req *req)
2372 {
2373 struct rtstat rtstat_compat = { 0 };
2374
2375 #define RTSTAT_COMPAT(_field) rtstat_compat._field = rtstat._field < SHRT_MAX ? (short)rtstat._field : SHRT_MAX
2376 RTSTAT_COMPAT(rts_badredirect);
2377 RTSTAT_COMPAT(rts_dynamic);
2378 RTSTAT_COMPAT(rts_newgateway);
2379 RTSTAT_COMPAT(rts_unreach);
2380 RTSTAT_COMPAT(rts_wildcard);
2381 RTSTAT_COMPAT(rts_badrtgwroute);
2382 #undef RTSTAT_TO_COMPAT
2383
2384 return SYSCTL_OUT(req, &rtstat_compat, sizeof(struct rtstat));
2385 }
2386
2387 static int
sysctl_rtstat_64(struct sysctl_req * req)2388 sysctl_rtstat_64(struct sysctl_req *req)
2389 {
2390 return SYSCTL_OUT(req, &rtstat, sizeof(struct rtstat_64));
2391 }
2392
2393 static int
sysctl_rttrash(struct sysctl_req * req)2394 sysctl_rttrash(struct sysctl_req *req)
2395 {
2396 return SYSCTL_OUT(req, &rttrash, sizeof(rttrash));
2397 }
2398
2399 static int
2400 sysctl_rtsock SYSCTL_HANDLER_ARGS
2401 {
2402 #pragma unused(oidp)
2403 DECLARE_SYSCTL_HANDLER_ARG_ARRAY(int, 4, name, namelen);
2404 struct radix_node_head *rnh;
2405 int i, error = EINVAL;
2406 u_char af;
2407 struct walkarg w;
2408
2409 name++;
2410 namelen--;
2411 if (req->newptr) {
2412 return EPERM;
2413 }
2414 af = (u_char)name[0];
2415 Bzero(&w, sizeof(w));
2416 w.w_op = name[1];
2417 w.w_arg = name[2];
2418 w.w_req = req;
2419
2420 switch (w.w_op) {
2421 case NET_RT_DUMP:
2422 case NET_RT_DUMP2:
2423 case NET_RT_FLAGS:
2424 case NET_RT_FLAGS_PRIV:
2425 lck_mtx_lock(rnh_lock);
2426 for (i = 1; i <= AF_MAX; i++) {
2427 if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2428 (error = rnh->rnh_walktree(rnh,
2429 sysctl_dumpentry, &w))) {
2430 break;
2431 }
2432 }
2433 lck_mtx_unlock(rnh_lock);
2434 break;
2435 case NET_RT_DUMPX:
2436 case NET_RT_DUMPX_FLAGS:
2437 lck_mtx_lock(rnh_lock);
2438 for (i = 1; i <= AF_MAX; i++) {
2439 if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2440 (error = rnh->rnh_walktree(rnh,
2441 sysctl_dumpentry_ext, &w))) {
2442 break;
2443 }
2444 }
2445 lck_mtx_unlock(rnh_lock);
2446 break;
2447 case NET_RT_IFLIST:
2448 error = sysctl_iflist(af, &w);
2449 break;
2450 case NET_RT_IFLIST2:
2451 error = sysctl_iflist2(af, &w);
2452 break;
2453 case NET_RT_STAT:
2454 error = sysctl_rtstat(req);
2455 break;
2456 case NET_RT_STAT_64:
2457 error = sysctl_rtstat_64(req);
2458 break;
2459 case NET_RT_TRASH:
2460 error = sysctl_rttrash(req);
2461 break;
2462 }
2463 if (w.w_tmem != NULL) {
2464 kfree_data_sized_by(w.w_tmem, w.w_tmemsize);
2465 }
2466 return error;
2467 }
2468
2469 /*
2470 * Definitions of protocols supported in the ROUTE domain.
2471 */
2472 static struct protosw routesw[] = {
2473 {
2474 .pr_type = SOCK_RAW,
2475 .pr_protocol = 0,
2476 .pr_flags = PR_ATOMIC | PR_ADDR,
2477 .pr_output = route_output,
2478 .pr_ctlinput = raw_ctlinput,
2479 .pr_usrreqs = &route_usrreqs,
2480 }
2481 };
2482
2483 static int route_proto_count = (sizeof(routesw) / sizeof(struct protosw));
2484
2485 struct domain routedomain_s = {
2486 .dom_family = PF_ROUTE,
2487 .dom_name = "route",
2488 .dom_init = route_dinit,
2489 };
2490
2491 static void
route_dinit(struct domain * dp)2492 route_dinit(struct domain *dp)
2493 {
2494 struct protosw *pr;
2495 int i;
2496
2497 VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
2498 VERIFY(routedomain == NULL);
2499
2500 routedomain = dp;
2501
2502 for (i = 0, pr = &routesw[0]; i < route_proto_count; i++, pr++) {
2503 net_add_proto(pr, dp, 1);
2504 }
2505
2506 route_init();
2507 }
2508