1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)rtsock.c 8.5 (Berkeley) 11/2/94
61 */
62
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kauth.h>
66 #include <sys/kernel.h>
67 #include <sys/sysctl.h>
68 #include <sys/proc.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/syslog.h>
76 #include <sys/mcache.h>
77 #include <kern/locks.h>
78 #include <sys/codesign.h>
79
80 #include <net/if.h>
81 #include <net/route.h>
82 #include <net/dlil.h>
83 #include <net/raw_cb.h>
84 #include <netinet/in.h>
85 #include <netinet/in_var.h>
86 #include <netinet/in_arp.h>
87 #include <netinet/ip.h>
88 #include <netinet/ip6.h>
89 #include <netinet6/nd6.h>
90
91 extern struct rtstat rtstat;
92 extern struct domain routedomain_s;
93 static struct domain *routedomain = NULL;
94
95 static struct sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
96 static struct sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
97 static struct sockaddr sa_zero = { .sa_len = sizeof(sa_zero), .sa_family = AF_INET, .sa_data = { 0, } };
98
99 struct route_cb {
100 u_int32_t ip_count; /* attached w/ AF_INET */
101 u_int32_t ip6_count; /* attached w/ AF_INET6 */
102 u_int32_t any_count; /* total attached */
103 };
104
105 static struct route_cb route_cb;
106
107 struct walkarg {
108 int w_tmemsize;
109 int w_op, w_arg;
110 caddr_t w_tmem;
111 struct sysctl_req *w_req;
112 };
113
114 static void route_dinit(struct domain *);
115 static int rts_abort(struct socket *);
116 static int rts_attach(struct socket *, int, struct proc *);
117 static int rts_bind(struct socket *, struct sockaddr *, struct proc *);
118 static int rts_connect(struct socket *, struct sockaddr *, struct proc *);
119 static int rts_detach(struct socket *);
120 static int rts_disconnect(struct socket *);
121 static int rts_peeraddr(struct socket *, struct sockaddr **);
122 static int rts_send(struct socket *, int, struct mbuf *, struct sockaddr *,
123 struct mbuf *, struct proc *);
124 static int rts_shutdown(struct socket *);
125 static int rts_sockaddr(struct socket *, struct sockaddr **);
126
127 static int route_output(struct mbuf *, struct socket *);
128 static int rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
129 static void rt_getmetrics(struct rtentry *, struct rt_metrics *);
130 static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
131 struct sockaddr *, unsigned int);
132 static int rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
133 static struct mbuf *rt_msg1(u_char, struct rt_addrinfo *);
134 static int rt_msg2(u_char, struct rt_addrinfo *, caddr_t, struct walkarg *,
135 kauth_cred_t *);
136 static int sysctl_dumpentry(struct radix_node *rn, void *vw);
137 static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw);
138 static int sysctl_iflist(int af, struct walkarg *w);
139 static int sysctl_iflist2(int af, struct walkarg *w);
140 static int sysctl_rtstat(struct sysctl_req *);
141 static int sysctl_rttrash(struct sysctl_req *);
142 static int sysctl_rtsock SYSCTL_HANDLER_ARGS;
143
144 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED,
145 sysctl_rtsock, "");
146
147 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "routing");
148
149 /* Align x to 1024 (only power of 2) assuming x is positive */
150 #define ALIGN_BYTES(x) do { \
151 x = (uint32_t)P2ALIGN(x, 1024); \
152 } while(0)
153
154 #define ROUNDUP32(a) \
155 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (uint32_t) - 1))) : \
156 sizeof (uint32_t))
157
158 #define ADVANCE32(x, n) \
159 (x += ROUNDUP32((n)->sa_len))
160
161 #define RT_HAS_IFADDR(rt) \
162 ((rt)->rt_ifa != NULL && (rt)->rt_ifa->ifa_addr != NULL)
163
164 /*
165 * It really doesn't make any sense at all for this code to share much
166 * with raw_usrreq.c, since its functionality is so restricted. XXX
167 */
168 static int
rts_abort(struct socket * so)169 rts_abort(struct socket *so)
170 {
171 return raw_usrreqs.pru_abort(so);
172 }
173
174 /* pru_accept is EOPNOTSUPP */
175
176 static int
rts_attach(struct socket * so,int proto,struct proc * p)177 rts_attach(struct socket *so, int proto, struct proc *p)
178 {
179 #pragma unused(p)
180 struct rawcb *rp;
181 int error;
182
183 VERIFY(so->so_pcb == NULL);
184
185 rp = kalloc_type(struct rawcb, Z_WAITOK_ZERO_NOFAIL);
186 so->so_pcb = (caddr_t)rp;
187 /* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */
188 error = raw_attach(so, proto);
189 rp = sotorawcb(so);
190 if (error) {
191 kfree_type(struct rawcb, rp);
192 so->so_pcb = NULL;
193 so->so_flags |= SOF_PCBCLEARING;
194 return error;
195 }
196
197 switch (rp->rcb_proto.sp_protocol) {
198 case AF_INET:
199 atomic_add_32(&route_cb.ip_count, 1);
200 break;
201 case AF_INET6:
202 atomic_add_32(&route_cb.ip6_count, 1);
203 break;
204 }
205 rp->rcb_faddr = &route_src;
206 atomic_add_32(&route_cb.any_count, 1);
207 /* the socket is already locked when we enter rts_attach */
208 soisconnected(so);
209 so->so_options |= SO_USELOOPBACK;
210 return 0;
211 }
212
213 static int
rts_bind(struct socket * so,struct sockaddr * nam,struct proc * p)214 rts_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
215 {
216 return raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */
217 }
218
219 static int
rts_connect(struct socket * so,struct sockaddr * nam,struct proc * p)220 rts_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
221 {
222 return raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */
223 }
224
225 /* pru_connect2 is EOPNOTSUPP */
226 /* pru_control is EOPNOTSUPP */
227
228 static int
rts_detach(struct socket * so)229 rts_detach(struct socket *so)
230 {
231 struct rawcb *rp = sotorawcb(so);
232
233 VERIFY(rp != NULL);
234
235 switch (rp->rcb_proto.sp_protocol) {
236 case AF_INET:
237 atomic_add_32(&route_cb.ip_count, -1);
238 break;
239 case AF_INET6:
240 atomic_add_32(&route_cb.ip6_count, -1);
241 break;
242 }
243 atomic_add_32(&route_cb.any_count, -1);
244 return raw_usrreqs.pru_detach(so);
245 }
246
247 static int
rts_disconnect(struct socket * so)248 rts_disconnect(struct socket *so)
249 {
250 return raw_usrreqs.pru_disconnect(so);
251 }
252
253 /* pru_listen is EOPNOTSUPP */
254
255 static int
rts_peeraddr(struct socket * so,struct sockaddr ** nam)256 rts_peeraddr(struct socket *so, struct sockaddr **nam)
257 {
258 return raw_usrreqs.pru_peeraddr(so, nam);
259 }
260
261 /* pru_rcvd is EOPNOTSUPP */
262 /* pru_rcvoob is EOPNOTSUPP */
263
264 static int
rts_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)265 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
266 struct mbuf *control, struct proc *p)
267 {
268 return raw_usrreqs.pru_send(so, flags, m, nam, control, p);
269 }
270
271 /* pru_sense is null */
272
273 static int
rts_shutdown(struct socket * so)274 rts_shutdown(struct socket *so)
275 {
276 return raw_usrreqs.pru_shutdown(so);
277 }
278
279 static int
rts_sockaddr(struct socket * so,struct sockaddr ** nam)280 rts_sockaddr(struct socket *so, struct sockaddr **nam)
281 {
282 return raw_usrreqs.pru_sockaddr(so, nam);
283 }
284
285 static struct pr_usrreqs route_usrreqs = {
286 .pru_abort = rts_abort,
287 .pru_attach = rts_attach,
288 .pru_bind = rts_bind,
289 .pru_connect = rts_connect,
290 .pru_detach = rts_detach,
291 .pru_disconnect = rts_disconnect,
292 .pru_peeraddr = rts_peeraddr,
293 .pru_send = rts_send,
294 .pru_shutdown = rts_shutdown,
295 .pru_sockaddr = rts_sockaddr,
296 .pru_sosend = sosend,
297 .pru_soreceive = soreceive,
298 };
299
300 /*ARGSUSED*/
301 static int
route_output(struct mbuf * m,struct socket * so)302 route_output(struct mbuf *m, struct socket *so)
303 {
304 struct rt_msghdr *rtm = NULL;
305 size_t rtm_len = 0;
306 struct rtentry *rt = NULL;
307 struct rtentry *saved_nrt = NULL;
308 struct radix_node_head *rnh;
309 struct rt_addrinfo info;
310 int len, error = 0;
311 sa_family_t dst_sa_family = 0;
312 struct ifnet *ifp = NULL;
313 struct sockaddr_in dst_in, gate_in;
314 int sendonlytoself = 0;
315 unsigned int ifscope = IFSCOPE_NONE;
316 struct rawcb *rp = NULL;
317 boolean_t is_router = FALSE;
318 #define senderr(e) { error = (e); goto flush; }
319 if (m == NULL || ((m->m_len < sizeof(intptr_t)) &&
320 (m = m_pullup(m, sizeof(intptr_t))) == NULL)) {
321 return ENOBUFS;
322 }
323 VERIFY(m->m_flags & M_PKTHDR);
324
325 /*
326 * Unlock the socket (but keep a reference) it won't be
327 * accessed until raw_input appends to it.
328 */
329 socket_unlock(so, 0);
330 lck_mtx_lock(rnh_lock);
331
332 len = m->m_pkthdr.len;
333 if (len < sizeof(*rtm) ||
334 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
335 info.rti_info[RTAX_DST] = NULL;
336 senderr(EINVAL);
337 }
338 rtm = kalloc_data(len, Z_WAITOK);
339 if (rtm == NULL) {
340 info.rti_info[RTAX_DST] = NULL;
341 senderr(ENOBUFS);
342 }
343 rtm_len = (size_t)len;
344 m_copydata(m, 0, len, (caddr_t)rtm);
345 if (rtm->rtm_version != RTM_VERSION) {
346 info.rti_info[RTAX_DST] = NULL;
347 senderr(EPROTONOSUPPORT);
348 }
349
350 /*
351 * Silent version of RTM_GET for Reachabiltiy APIs. We may change
352 * all RTM_GETs to be silent in the future, so this is private for now.
353 */
354 if (rtm->rtm_type == RTM_GET_SILENT) {
355 if (!(so->so_options & SO_USELOOPBACK)) {
356 senderr(EINVAL);
357 }
358 sendonlytoself = 1;
359 rtm->rtm_type = RTM_GET;
360 }
361
362 /*
363 * Perform permission checking, only privileged sockets
364 * may perform operations other than RTM_GET
365 */
366 if (rtm->rtm_type != RTM_GET && !(so->so_state & SS_PRIV)) {
367 info.rti_info[RTAX_DST] = NULL;
368 senderr(EPERM);
369 }
370
371 rtm->rtm_pid = proc_selfpid();
372 info.rti_addrs = rtm->rtm_addrs;
373 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
374 info.rti_info[RTAX_DST] = NULL;
375 senderr(EINVAL);
376 }
377 if (info.rti_info[RTAX_DST] == NULL ||
378 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
379 (info.rti_info[RTAX_GATEWAY] != NULL &&
380 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) {
381 senderr(EINVAL);
382 }
383
384 if (info.rti_info[RTAX_DST]->sa_family == AF_INET &&
385 info.rti_info[RTAX_DST]->sa_len != sizeof(struct sockaddr_in)) {
386 /* At minimum, we need up to sin_addr */
387 if (info.rti_info[RTAX_DST]->sa_len <
388 offsetof(struct sockaddr_in, sin_zero)) {
389 senderr(EINVAL);
390 }
391 bzero(&dst_in, sizeof(dst_in));
392 dst_in.sin_len = sizeof(dst_in);
393 dst_in.sin_family = AF_INET;
394 dst_in.sin_port = SIN(info.rti_info[RTAX_DST])->sin_port;
395 dst_in.sin_addr = SIN(info.rti_info[RTAX_DST])->sin_addr;
396 info.rti_info[RTAX_DST] = (struct sockaddr *)&dst_in;
397 dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
398 } else if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
399 info.rti_info[RTAX_DST]->sa_len < sizeof(struct sockaddr_in6)) {
400 senderr(EINVAL);
401 }
402
403 if (info.rti_info[RTAX_GATEWAY] != NULL) {
404 if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET &&
405 info.rti_info[RTAX_GATEWAY]->sa_len != sizeof(struct sockaddr_in)) {
406 /* At minimum, we need up to sin_addr */
407 if (info.rti_info[RTAX_GATEWAY]->sa_len <
408 offsetof(struct sockaddr_in, sin_zero)) {
409 senderr(EINVAL);
410 }
411 bzero(&gate_in, sizeof(gate_in));
412 gate_in.sin_len = sizeof(gate_in);
413 gate_in.sin_family = AF_INET;
414 gate_in.sin_port = SIN(info.rti_info[RTAX_GATEWAY])->sin_port;
415 gate_in.sin_addr = SIN(info.rti_info[RTAX_GATEWAY])->sin_addr;
416 info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gate_in;
417 } else if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET6 &&
418 info.rti_info[RTAX_GATEWAY]->sa_len < sizeof(struct sockaddr_in6)) {
419 senderr(EINVAL);
420 }
421 }
422
423 if (info.rti_info[RTAX_GENMASK]) {
424 struct radix_node *t;
425 t = rn_addmask((caddr_t)info.rti_info[RTAX_GENMASK], 0, 1);
426 if (t != NULL && Bcmp(info.rti_info[RTAX_GENMASK],
427 t->rn_key, *(u_char *)info.rti_info[RTAX_GENMASK]) == 0) {
428 info.rti_info[RTAX_GENMASK] =
429 (struct sockaddr *)(t->rn_key);
430 } else {
431 senderr(ENOBUFS);
432 }
433 }
434
435 /*
436 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
437 */
438 if (rtm->rtm_flags & RTF_IFSCOPE) {
439 if (info.rti_info[RTAX_DST]->sa_family != AF_INET &&
440 info.rti_info[RTAX_DST]->sa_family != AF_INET6) {
441 senderr(EINVAL);
442 }
443 ifscope = rtm->rtm_index;
444 }
445 /*
446 * Block changes on INTCOPROC interfaces.
447 */
448 if (ifscope) {
449 unsigned int intcoproc_scope = 0;
450 ifnet_head_lock_shared();
451 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
452 if (IFNET_IS_INTCOPROC(ifp)) {
453 intcoproc_scope = ifp->if_index;
454 break;
455 }
456 }
457 ifnet_head_done();
458 if (intcoproc_scope == ifscope && proc_getpid(current_proc()) != 0) {
459 senderr(EINVAL);
460 }
461 }
462
463 /*
464 * RTF_PROXY can only be set internally from within the kernel.
465 */
466 if (rtm->rtm_flags & RTF_PROXY) {
467 senderr(EINVAL);
468 }
469
470 /*
471 * For AF_INET, always zero out the embedded scope ID. If this is
472 * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
473 * flag and the corresponding rtm_index value. This is to prevent
474 * false interpretation of the scope ID because it's using the sin_zero
475 * field, which might not be properly cleared by the requestor.
476 */
477 if (info.rti_info[RTAX_DST]->sa_family == AF_INET) {
478 sin_set_ifscope(info.rti_info[RTAX_DST], IFSCOPE_NONE);
479 }
480 if (info.rti_info[RTAX_GATEWAY] != NULL &&
481 info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET) {
482 sin_set_ifscope(info.rti_info[RTAX_GATEWAY], IFSCOPE_NONE);
483 }
484 if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
485 IN6_IS_SCOPE_EMBED(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
486 !IN6_IS_ADDR_UNICAST_BASED_MULTICAST(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
487 SIN6(info.rti_info[RTAX_DST])->sin6_scope_id == 0) {
488 SIN6(info.rti_info[RTAX_DST])->sin6_scope_id = ntohs(SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1]);
489 SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1] = 0;
490 }
491
492 switch (rtm->rtm_type) {
493 case RTM_ADD:
494 if (info.rti_info[RTAX_GATEWAY] == NULL) {
495 senderr(EINVAL);
496 }
497
498 error = rtrequest_scoped_locked(RTM_ADD,
499 info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
500 info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt,
501 ifscope);
502 if (error == 0 && saved_nrt != NULL) {
503 RT_LOCK(saved_nrt);
504 /*
505 * If the route request specified an interface with
506 * IFA and/or IFP, we set the requested interface on
507 * the route with rt_setif. It would be much better
508 * to do this inside rtrequest, but that would
509 * require passing the desired interface, in some
510 * form, to rtrequest. Since rtrequest is called in
511 * so many places (roughly 40 in our source), adding
512 * a parameter is to much for us to swallow; this is
513 * something for the FreeBSD developers to tackle.
514 * Instead, we let rtrequest compute whatever
515 * interface it wants, then come in behind it and
516 * stick in the interface that we really want. This
517 * works reasonably well except when rtrequest can't
518 * figure out what interface to use (with
519 * ifa_withroute) and returns ENETUNREACH. Ideally
520 * it shouldn't matter if rtrequest can't figure out
521 * the interface if we're going to explicitly set it
522 * ourselves anyway. But practically we can't
523 * recover here because rtrequest will not do any of
524 * the work necessary to add the route if it can't
525 * find an interface. As long as there is a default
526 * route that leads to some interface, rtrequest will
527 * find an interface, so this problem should be
528 * rarely encountered.
529 * [email protected]
530 */
531 rt_setif(saved_nrt,
532 info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
533 info.rti_info[RTAX_GATEWAY], ifscope);
534 (void)rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, saved_nrt);
535 saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
536 saved_nrt->rt_rmx.rmx_locks |=
537 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
538 saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
539 RT_REMREF_LOCKED(saved_nrt);
540 RT_UNLOCK(saved_nrt);
541 }
542 break;
543
544 case RTM_DELETE:
545 error = rtrequest_scoped_locked(RTM_DELETE,
546 info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
547 info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt,
548 ifscope);
549 if (error == 0) {
550 rt = saved_nrt;
551 RT_LOCK(rt);
552 goto report;
553 }
554 break;
555
556 case RTM_GET:
557 case RTM_CHANGE:
558 case RTM_LOCK:
559 rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
560 if (rnh == NULL) {
561 senderr(EAFNOSUPPORT);
562 }
563 /*
564 * Lookup the best match based on the key-mask pair;
565 * callee adds a reference and checks for root node.
566 */
567 rt = rt_lookup(TRUE, info.rti_info[RTAX_DST],
568 info.rti_info[RTAX_NETMASK], rnh, ifscope);
569 if (rt == NULL) {
570 senderr(ESRCH);
571 }
572 RT_LOCK(rt);
573
574 /*
575 * Holding rnh_lock here prevents the possibility of
576 * ifa from changing (e.g. in_ifinit), so it is safe
577 * to access its ifa_addr (down below) without locking.
578 */
579 switch (rtm->rtm_type) {
580 case RTM_GET: {
581 kauth_cred_t cred;
582 kauth_cred_t* credp;
583 struct ifaddr *ifa2;
584 report:
585 cred = kauth_cred_proc_ref(current_proc());
586 credp = &cred;
587
588 ifa2 = NULL;
589 RT_LOCK_ASSERT_HELD(rt);
590 info.rti_info[RTAX_DST] = rt_key(rt);
591 dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
592 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
593 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
594 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
595 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
596 ifp = rt->rt_ifp;
597 if (ifp != NULL) {
598 ifnet_lock_shared(ifp);
599 ifa2 = ifp->if_lladdr;
600 info.rti_info[RTAX_IFP] =
601 ifa2->ifa_addr;
602 IFA_ADDREF(ifa2);
603 ifnet_lock_done(ifp);
604 info.rti_info[RTAX_IFA] =
605 rt->rt_ifa->ifa_addr;
606 rtm->rtm_index = ifp->if_index;
607 } else {
608 info.rti_info[RTAX_IFP] = NULL;
609 info.rti_info[RTAX_IFA] = NULL;
610 }
611 } else if ((ifp = rt->rt_ifp) != NULL) {
612 rtm->rtm_index = ifp->if_index;
613 }
614 if (ifa2 != NULL) {
615 IFA_LOCK(ifa2);
616 }
617 len = rt_msg2(rtm->rtm_type, &info, NULL, NULL, credp);
618 if (ifa2 != NULL) {
619 IFA_UNLOCK(ifa2);
620 }
621 struct rt_msghdr *out_rtm;
622 out_rtm = kalloc_data(len, Z_WAITOK);
623 if (out_rtm == NULL) {
624 RT_UNLOCK(rt);
625 if (ifa2 != NULL) {
626 IFA_REMREF(ifa2);
627 }
628 senderr(ENOBUFS);
629 }
630 Bcopy(rtm, out_rtm, sizeof(struct rt_msghdr));
631 if (ifa2 != NULL) {
632 IFA_LOCK(ifa2);
633 }
634 (void) rt_msg2(out_rtm->rtm_type, &info, (caddr_t)out_rtm,
635 NULL, &cred);
636 if (ifa2 != NULL) {
637 IFA_UNLOCK(ifa2);
638 }
639 kfree_data(rtm, rtm_len);
640 rtm = out_rtm;
641 rtm_len = len;
642 rtm->rtm_flags = rt->rt_flags;
643 rt_getmetrics(rt, &rtm->rtm_rmx);
644 rtm->rtm_addrs = info.rti_addrs;
645 if (ifa2 != NULL) {
646 IFA_REMREF(ifa2);
647 }
648
649 kauth_cred_unref(&cred);
650 break;
651 }
652
653 case RTM_CHANGE:
654 is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
655
656 if (info.rti_info[RTAX_GATEWAY] != NULL &&
657 (error = rt_setgate(rt, rt_key(rt),
658 info.rti_info[RTAX_GATEWAY]))) {
659 int tmp = error;
660 RT_UNLOCK(rt);
661 senderr(tmp);
662 }
663 /*
664 * If they tried to change things but didn't specify
665 * the required gateway, then just use the old one.
666 * This can happen if the user tries to change the
667 * flags on the default route without changing the
668 * default gateway. Changing flags still doesn't work.
669 */
670 if ((rt->rt_flags & RTF_GATEWAY) &&
671 info.rti_info[RTAX_GATEWAY] == NULL) {
672 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
673 }
674
675 /*
676 * On Darwin, we call rt_setif which contains the
677 * equivalent to the code found at this very spot
678 * in BSD.
679 */
680 rt_setif(rt,
681 info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
682 info.rti_info[RTAX_GATEWAY], ifscope);
683
684 if ((error = rt_setmetrics(rtm->rtm_inits,
685 &rtm->rtm_rmx, rt))) {
686 int tmp = error;
687 RT_UNLOCK(rt);
688 senderr(tmp);
689 }
690 if (info.rti_info[RTAX_GENMASK]) {
691 rt->rt_genmask = info.rti_info[RTAX_GENMASK];
692 }
693
694 /*
695 * Enqueue work item to invoke callback for this route entry
696 * This may not be needed always, but for now issue it anytime
697 * RTM_CHANGE gets called.
698 */
699 route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, TRUE);
700 /*
701 * If the route is for a router, walk the tree to send refresh
702 * event to protocol cloned entries
703 */
704 if (is_router) {
705 struct route_event rt_ev;
706 route_event_init(&rt_ev, rt, NULL, ROUTE_ENTRY_REFRESH);
707 RT_UNLOCK(rt);
708 (void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
709 RT_LOCK(rt);
710 }
711 OS_FALLTHROUGH;
712 case RTM_LOCK:
713 rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
714 rt->rt_rmx.rmx_locks |=
715 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
716 break;
717 }
718 RT_UNLOCK(rt);
719 break;
720 default:
721 senderr(EOPNOTSUPP);
722 }
723 flush:
724 if (rtm != NULL) {
725 if (error) {
726 rtm->rtm_errno = error;
727 } else {
728 rtm->rtm_flags |= RTF_DONE;
729 }
730 }
731 if (rt != NULL) {
732 RT_LOCK_ASSERT_NOTHELD(rt);
733 rtfree_locked(rt);
734 }
735 lck_mtx_unlock(rnh_lock);
736
737 /* relock the socket now */
738 socket_lock(so, 0);
739 /*
740 * Check to see if we don't want our own messages.
741 */
742 if (!(so->so_options & SO_USELOOPBACK)) {
743 if (route_cb.any_count <= 1) {
744 kfree_data(rtm, rtm_len);
745 m_freem(m);
746 return error;
747 }
748 /* There is another listener, so construct message */
749 rp = sotorawcb(so);
750 }
751 if (rtm != NULL) {
752 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
753 if (m->m_pkthdr.len < rtm->rtm_msglen) {
754 m_freem(m);
755 m = NULL;
756 } else if (m->m_pkthdr.len > rtm->rtm_msglen) {
757 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
758 }
759 kfree_data(rtm, rtm_len);
760 }
761 if (sendonlytoself && m != NULL) {
762 error = 0;
763 if (sbappendaddr(&so->so_rcv, &route_src, m,
764 NULL, &error) != 0) {
765 sorwakeup(so);
766 }
767 if (error) {
768 return error;
769 }
770 } else {
771 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
772 if (rp != NULL) {
773 rp->rcb_proto.sp_family = 0; /* Avoid us */
774 }
775 if (dst_sa_family != 0) {
776 route_proto.sp_protocol = dst_sa_family;
777 }
778 if (m != NULL) {
779 socket_unlock(so, 0);
780 raw_input(m, &route_proto, &route_src, &route_dst);
781 socket_lock(so, 0);
782 }
783 if (rp != NULL) {
784 rp->rcb_proto.sp_family = PF_ROUTE;
785 }
786 }
787 return error;
788 }
789
790 void
rt_setexpire(struct rtentry * rt,uint64_t expiry)791 rt_setexpire(struct rtentry *rt, uint64_t expiry)
792 {
793 /* set both rt_expire and rmx_expire */
794 rt->rt_expire = expiry;
795 if (expiry) {
796 rt->rt_rmx.rmx_expire =
797 (int32_t)(expiry + rt->base_calendartime -
798 rt->base_uptime);
799 } else {
800 rt->rt_rmx.rmx_expire = 0;
801 }
802 }
803
804 static int
rt_setmetrics(u_int32_t which,struct rt_metrics * in,struct rtentry * out)805 rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out)
806 {
807 if (!(which & RTV_REFRESH_HOST)) {
808 struct timeval caltime;
809 getmicrotime(&caltime);
810 #define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
811 metric(RTV_RPIPE, rmx_recvpipe);
812 metric(RTV_SPIPE, rmx_sendpipe);
813 metric(RTV_SSTHRESH, rmx_ssthresh);
814 metric(RTV_RTT, rmx_rtt);
815 metric(RTV_RTTVAR, rmx_rttvar);
816 metric(RTV_HOPCOUNT, rmx_hopcount);
817 metric(RTV_MTU, rmx_mtu);
818 metric(RTV_EXPIRE, rmx_expire);
819 #undef metric
820 if (out->rt_rmx.rmx_expire > 0) {
821 /* account for system time change */
822 getmicrotime(&caltime);
823 out->base_calendartime +=
824 NET_CALCULATE_CLOCKSKEW(caltime,
825 out->base_calendartime,
826 net_uptime(), out->base_uptime);
827 rt_setexpire(out,
828 out->rt_rmx.rmx_expire -
829 out->base_calendartime +
830 out->base_uptime);
831 } else {
832 rt_setexpire(out, 0);
833 }
834
835 VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
836 VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
837 } else {
838 /* Only RTV_REFRESH_HOST must be set */
839 if ((which & ~RTV_REFRESH_HOST) ||
840 (out->rt_flags & RTF_STATIC) ||
841 !(out->rt_flags & RTF_LLINFO)) {
842 return EINVAL;
843 }
844
845 if (out->rt_llinfo_refresh == NULL) {
846 return ENOTSUP;
847 }
848
849 out->rt_llinfo_refresh(out);
850 }
851 return 0;
852 }
853
854 static void
rt_getmetrics(struct rtentry * in,struct rt_metrics * out)855 rt_getmetrics(struct rtentry *in, struct rt_metrics *out)
856 {
857 struct timeval caltime;
858
859 VERIFY(in->rt_expire == 0 || in->rt_rmx.rmx_expire != 0);
860 VERIFY(in->rt_expire != 0 || in->rt_rmx.rmx_expire == 0);
861
862 *out = in->rt_rmx;
863
864 if (in->rt_expire != 0) {
865 /* account for system time change */
866 getmicrotime(&caltime);
867
868 in->base_calendartime +=
869 NET_CALCULATE_CLOCKSKEW(caltime,
870 in->base_calendartime, net_uptime(), in->base_uptime);
871
872 out->rmx_expire = (int32_t)(in->base_calendartime +
873 in->rt_expire - in->base_uptime);
874 } else {
875 out->rmx_expire = 0;
876 }
877 }
878
879 /*
880 * Set route's interface given info.rti_info[RTAX_IFP],
881 * info.rti_info[RTAX_IFA], and gateway.
882 */
883 static void
rt_setif(struct rtentry * rt,struct sockaddr * Ifpaddr,struct sockaddr * Ifaaddr,struct sockaddr * Gate,unsigned int ifscope)884 rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
885 struct sockaddr *Gate, unsigned int ifscope)
886 {
887 struct ifaddr *ifa = NULL;
888 struct ifnet *ifp = NULL;
889 void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
890
891 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
892
893 RT_LOCK_ASSERT_HELD(rt);
894
895 /* Don't update a defunct route */
896 if (rt->rt_flags & RTF_CONDEMNED) {
897 return;
898 }
899
900 /* Add an extra ref for ourselves */
901 RT_ADDREF_LOCKED(rt);
902
903 /* Become a regular mutex, just in case */
904 RT_CONVERT_LOCK(rt);
905
906 /*
907 * New gateway could require new ifaddr, ifp; flags may also
908 * be different; ifp may be specified by ll sockaddr when
909 * protocol address is ambiguous.
910 */
911 if (Ifpaddr && (ifa = ifa_ifwithnet_scoped(Ifpaddr, ifscope)) &&
912 (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) {
913 IFA_REMREF(ifa);
914 ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp);
915 } else {
916 if (ifa != NULL) {
917 IFA_REMREF(ifa);
918 ifa = NULL;
919 }
920 if (Ifpaddr && (ifp = if_withname(Ifpaddr))) {
921 if (Gate) {
922 ifa = ifaof_ifpforaddr(Gate, ifp);
923 } else {
924 ifnet_lock_shared(ifp);
925 ifa = TAILQ_FIRST(&ifp->if_addrhead);
926 if (ifa != NULL) {
927 IFA_ADDREF(ifa);
928 }
929 ifnet_lock_done(ifp);
930 }
931 } else if (Ifaaddr &&
932 (ifa = ifa_ifwithaddr_scoped(Ifaaddr, ifscope))) {
933 ifp = ifa->ifa_ifp;
934 } else if (Gate != NULL) {
935 /*
936 * Safe to drop rt_lock and use rt_key, since holding
937 * rnh_lock here prevents another thread from calling
938 * rt_setgate() on this route. We cannot hold the
939 * lock across ifa_ifwithroute since the lookup done
940 * by that routine may point to the same route.
941 */
942 RT_UNLOCK(rt);
943 if ((ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags,
944 rt_key(rt), Gate, ifscope)) != NULL) {
945 ifp = ifa->ifa_ifp;
946 }
947 RT_LOCK(rt);
948 /* Don't update a defunct route */
949 if (rt->rt_flags & RTF_CONDEMNED) {
950 if (ifa != NULL) {
951 IFA_REMREF(ifa);
952 }
953 /* Release extra ref */
954 RT_REMREF_LOCKED(rt);
955 return;
956 }
957 }
958 }
959
960 /* trigger route cache reevaluation */
961 if (rt_key(rt)->sa_family == AF_INET) {
962 routegenid_inet_update();
963 } else if (rt_key(rt)->sa_family == AF_INET6) {
964 routegenid_inet6_update();
965 }
966
967 if (ifa != NULL) {
968 struct ifaddr *oifa = rt->rt_ifa;
969 if (oifa != ifa) {
970 if (oifa != NULL) {
971 IFA_LOCK_SPIN(oifa);
972 ifa_rtrequest = oifa->ifa_rtrequest;
973 IFA_UNLOCK(oifa);
974 if (ifa_rtrequest != NULL) {
975 ifa_rtrequest(RTM_DELETE, rt, Gate);
976 }
977 }
978 rtsetifa(rt, ifa);
979
980 if (rt->rt_ifp != ifp) {
981 /*
982 * Purge any link-layer info caching.
983 */
984 if (rt->rt_llinfo_purge != NULL) {
985 rt->rt_llinfo_purge(rt);
986 }
987
988 /*
989 * Adjust route ref count for the interfaces.
990 */
991 if (rt->rt_if_ref_fn != NULL) {
992 rt->rt_if_ref_fn(ifp, 1);
993 rt->rt_if_ref_fn(rt->rt_ifp, -1);
994 }
995 }
996 rt->rt_ifp = ifp;
997 /*
998 * If this is the (non-scoped) default route, record
999 * the interface index used for the primary ifscope.
1000 */
1001 if (rt_primary_default(rt, rt_key(rt))) {
1002 set_primary_ifscope(rt_key(rt)->sa_family,
1003 rt->rt_ifp->if_index);
1004 }
1005 /*
1006 * If rmx_mtu is not locked, update it
1007 * to the MTU used by the new interface.
1008 */
1009 if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
1010 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1011 if (rt_key(rt)->sa_family == AF_INET &&
1012 INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
1013 rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
1014 /* Further adjust the size for CLAT46 expansion */
1015 rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
1016 }
1017 }
1018
1019 if (rt->rt_ifa != NULL) {
1020 IFA_LOCK_SPIN(rt->rt_ifa);
1021 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1022 IFA_UNLOCK(rt->rt_ifa);
1023 if (ifa_rtrequest != NULL) {
1024 ifa_rtrequest(RTM_ADD, rt, Gate);
1025 }
1026 }
1027 IFA_REMREF(ifa);
1028 /* Release extra ref */
1029 RT_REMREF_LOCKED(rt);
1030 return;
1031 }
1032 IFA_REMREF(ifa);
1033 ifa = NULL;
1034 }
1035
1036 /* XXX: to reset gateway to correct value, at RTM_CHANGE */
1037 if (rt->rt_ifa != NULL) {
1038 IFA_LOCK_SPIN(rt->rt_ifa);
1039 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1040 IFA_UNLOCK(rt->rt_ifa);
1041 if (ifa_rtrequest != NULL) {
1042 ifa_rtrequest(RTM_ADD, rt, Gate);
1043 }
1044 }
1045
1046 /*
1047 * Workaround for local address routes pointing to the loopback
1048 * interface added by configd, until <rdar://problem/12970142>.
1049 */
1050 if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
1051 (rt->rt_flags & RTF_HOST) && rt->rt_ifa->ifa_ifp == rt->rt_ifp) {
1052 ifa = ifa_ifwithaddr(rt_key(rt));
1053 if (ifa != NULL) {
1054 if (ifa != rt->rt_ifa) {
1055 rtsetifa(rt, ifa);
1056 }
1057 IFA_REMREF(ifa);
1058 }
1059 }
1060
1061 /* Release extra ref */
1062 RT_REMREF_LOCKED(rt);
1063 }
1064
1065 /*
1066 * Extract the addresses of the passed sockaddrs.
1067 * Do a little sanity checking so as to avoid bad memory references.
1068 * This data is derived straight from userland.
1069 */
1070 static int
rt_xaddrs(caddr_t cp,caddr_t cplim,struct rt_addrinfo * rtinfo)1071 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1072 {
1073 struct sockaddr *sa;
1074 int i;
1075
1076 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1077 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1078 if ((rtinfo->rti_addrs & (1 << i)) == 0) {
1079 continue;
1080 }
1081 sa = (struct sockaddr *)cp;
1082 /*
1083 * It won't fit.
1084 */
1085 if ((cp + sa->sa_len) > cplim) {
1086 return EINVAL;
1087 }
1088 if (sa->sa_len > sizeof(struct sockaddr_storage)) {
1089 return EINVAL;
1090 }
1091 /*
1092 * there are no more.. quit now
1093 * If there are more bits, they are in error.
1094 * I've seen this. route(1) can evidently generate these.
1095 * This causes kernel to core dump.
1096 * for compatibility, If we see this, point to a safe address.
1097 */
1098 if (sa->sa_len == 0) {
1099 rtinfo->rti_info[i] = &sa_zero;
1100 return 0; /* should be EINVAL but for compat */
1101 }
1102 if (sa->sa_len < offsetof(struct sockaddr, sa_data)) {
1103 return EINVAL;
1104 }
1105 /* accept it */
1106 rtinfo->rti_info[i] = sa;
1107 ADVANCE32(cp, sa);
1108 }
1109 return 0;
1110 }
1111
1112 static struct mbuf *
rt_msg1(u_char type,struct rt_addrinfo * rtinfo)1113 rt_msg1(u_char type, struct rt_addrinfo *rtinfo)
1114 {
1115 struct rt_msghdr *rtm;
1116 struct mbuf *m;
1117 int i;
1118 int len, dlen, off;
1119
1120 switch (type) {
1121 case RTM_DELADDR:
1122 case RTM_NEWADDR:
1123 len = sizeof(struct ifa_msghdr);
1124 break;
1125
1126 case RTM_DELMADDR:
1127 case RTM_NEWMADDR:
1128 len = sizeof(struct ifma_msghdr);
1129 break;
1130
1131 case RTM_IFINFO:
1132 len = sizeof(struct if_msghdr);
1133 break;
1134
1135 default:
1136 len = sizeof(struct rt_msghdr);
1137 }
1138 m = m_gethdr(M_DONTWAIT, MT_DATA);
1139 if (m && len > MHLEN) {
1140 MCLGET(m, M_DONTWAIT);
1141 if (!(m->m_flags & M_EXT)) {
1142 m_free(m);
1143 m = NULL;
1144 }
1145 }
1146 if (m == NULL) {
1147 return NULL;
1148 }
1149 m->m_pkthdr.len = m->m_len = len;
1150 m->m_pkthdr.rcvif = NULL;
1151 rtm = mtod(m, struct rt_msghdr *);
1152 bzero((caddr_t)rtm, len);
1153 off = len;
1154 for (i = 0; i < RTAX_MAX; i++) {
1155 struct sockaddr *sa, *hint;
1156 uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1157
1158 /*
1159 * Make sure to accomodate the largest possible size of sa_len.
1160 */
1161 _CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1162
1163 if ((sa = rtinfo->rti_info[i]) == NULL) {
1164 continue;
1165 }
1166
1167 switch (i) {
1168 case RTAX_DST:
1169 case RTAX_NETMASK:
1170 if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1171 hint = rtinfo->rti_info[RTAX_IFA];
1172 }
1173
1174 /* Scrub away any trace of embedded interface scope */
1175 sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1176 sizeof(ssbuf), NULL);
1177 break;
1178
1179 default:
1180 break;
1181 }
1182
1183 rtinfo->rti_addrs |= (1 << i);
1184 dlen = sa->sa_len;
1185 m_copyback(m, off, dlen, (caddr_t)sa);
1186 len = off + dlen;
1187 off += ROUNDUP32(dlen);
1188 }
1189 if (m->m_pkthdr.len != len) {
1190 m_freem(m);
1191 return NULL;
1192 }
1193 rtm->rtm_msglen = (u_short)len;
1194 rtm->rtm_version = RTM_VERSION;
1195 rtm->rtm_type = type;
1196 return m;
1197 }
1198
1199 static int
rt_msg2(u_char type,struct rt_addrinfo * rtinfo,caddr_t cp,struct walkarg * w,kauth_cred_t * credp)1200 rt_msg2(u_char type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w,
1201 kauth_cred_t* credp)
1202 {
1203 int i;
1204 int len, dlen, rlen, second_time = 0;
1205 caddr_t cp0;
1206
1207 rtinfo->rti_addrs = 0;
1208 again:
1209 switch (type) {
1210 case RTM_DELADDR:
1211 case RTM_NEWADDR:
1212 len = sizeof(struct ifa_msghdr);
1213 break;
1214
1215 case RTM_DELMADDR:
1216 case RTM_NEWMADDR:
1217 len = sizeof(struct ifma_msghdr);
1218 break;
1219
1220 case RTM_IFINFO:
1221 len = sizeof(struct if_msghdr);
1222 break;
1223
1224 case RTM_IFINFO2:
1225 len = sizeof(struct if_msghdr2);
1226 break;
1227
1228 case RTM_NEWMADDR2:
1229 len = sizeof(struct ifma_msghdr2);
1230 break;
1231
1232 case RTM_GET_EXT:
1233 len = sizeof(struct rt_msghdr_ext);
1234 break;
1235
1236 case RTM_GET2:
1237 len = sizeof(struct rt_msghdr2);
1238 break;
1239
1240 default:
1241 len = sizeof(struct rt_msghdr);
1242 }
1243 cp0 = cp;
1244 if (cp0) {
1245 cp += len;
1246 }
1247 for (i = 0; i < RTAX_MAX; i++) {
1248 struct sockaddr *sa, *hint;
1249 uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1250
1251 /*
1252 * Make sure to accomodate the largest possible size of sa_len.
1253 */
1254 _CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1255
1256 if ((sa = rtinfo->rti_info[i]) == NULL) {
1257 continue;
1258 }
1259
1260 switch (i) {
1261 case RTAX_DST:
1262 case RTAX_NETMASK:
1263 if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1264 hint = rtinfo->rti_info[RTAX_IFA];
1265 }
1266
1267 /* Scrub away any trace of embedded interface scope */
1268 sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1269 sizeof(ssbuf), NULL);
1270 break;
1271 case RTAX_GATEWAY:
1272 case RTAX_IFP:
1273 sa = rtm_scrub(type, i, NULL, sa, &ssbuf,
1274 sizeof(ssbuf), credp);
1275 break;
1276
1277 default:
1278 break;
1279 }
1280
1281 rtinfo->rti_addrs |= (1 << i);
1282 dlen = sa->sa_len;
1283 rlen = ROUNDUP32(dlen);
1284 if (cp) {
1285 bcopy((caddr_t)sa, cp, (size_t)dlen);
1286 if (dlen != rlen) {
1287 bzero(cp + dlen, rlen - dlen);
1288 }
1289 cp += rlen;
1290 }
1291 len += rlen;
1292 }
1293 if (cp == NULL && w != NULL && !second_time) {
1294 struct walkarg *rw = w;
1295
1296 if (rw->w_req != NULL) {
1297 if (rw->w_tmemsize < len) {
1298 if (rw->w_tmem != NULL) {
1299 kfree_data(rw->w_tmem, rw->w_tmemsize);
1300 }
1301 rw->w_tmem = (caddr_t) kalloc_data(len, Z_ZERO | Z_WAITOK);
1302 if (rw->w_tmem != NULL) {
1303 rw->w_tmemsize = len;
1304 }
1305 }
1306 if (rw->w_tmem != NULL) {
1307 cp = rw->w_tmem;
1308 second_time = 1;
1309 goto again;
1310 }
1311 }
1312 }
1313 if (cp) {
1314 struct rt_msghdr *rtm = (struct rt_msghdr *)(void *)cp0;
1315
1316 rtm->rtm_version = RTM_VERSION;
1317 rtm->rtm_type = type;
1318 rtm->rtm_msglen = (u_short)len;
1319 }
1320 return len;
1321 }
1322
1323 /*
1324 * This routine is called to generate a message from the routing
1325 * socket indicating that a redirect has occurred, a routing lookup
1326 * has failed, or that a protocol has detected timeouts to a particular
1327 * destination.
1328 */
1329 void
rt_missmsg(u_char type,struct rt_addrinfo * rtinfo,int flags,int error)1330 rt_missmsg(u_char type, struct rt_addrinfo *rtinfo, int flags, int error)
1331 {
1332 struct rt_msghdr *rtm;
1333 struct mbuf *m;
1334 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1335 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1336
1337 if (route_cb.any_count == 0) {
1338 return;
1339 }
1340 m = rt_msg1(type, rtinfo);
1341 if (m == NULL) {
1342 return;
1343 }
1344 rtm = mtod(m, struct rt_msghdr *);
1345 rtm->rtm_flags = RTF_DONE | flags;
1346 rtm->rtm_errno = error;
1347 rtm->rtm_addrs = rtinfo->rti_addrs;
1348 route_proto.sp_family = sa ? sa->sa_family : 0;
1349 raw_input(m, &route_proto, &route_src, &route_dst);
1350 }
1351
1352 /*
1353 * This routine is called to generate a message from the routing
1354 * socket indicating that the status of a network interface has changed.
1355 */
1356 void
rt_ifmsg(struct ifnet * ifp)1357 rt_ifmsg(struct ifnet *ifp)
1358 {
1359 struct if_msghdr *ifm;
1360 struct mbuf *m;
1361 struct rt_addrinfo info;
1362 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1363
1364 if (route_cb.any_count == 0) {
1365 return;
1366 }
1367 bzero((caddr_t)&info, sizeof(info));
1368 m = rt_msg1(RTM_IFINFO, &info);
1369 if (m == NULL) {
1370 return;
1371 }
1372 ifm = mtod(m, struct if_msghdr *);
1373 ifm->ifm_index = ifp->if_index;
1374 ifm->ifm_flags = (u_short)ifp->if_flags;
1375 if_data_internal_to_if_data(ifp, &ifp->if_data, &ifm->ifm_data);
1376 ifm->ifm_addrs = 0;
1377 raw_input(m, &route_proto, &route_src, &route_dst);
1378 }
1379
1380 /*
1381 * This is called to generate messages from the routing socket
1382 * indicating a network interface has had addresses associated with it.
1383 * if we ever reverse the logic and replace messages TO the routing
1384 * socket indicate a request to configure interfaces, then it will
1385 * be unnecessary as the routing socket will automatically generate
1386 * copies of it.
1387 *
1388 * Since this is coming from the interface, it is expected that the
1389 * interface will be locked. Caller must hold rnh_lock and rt_lock.
1390 */
1391 void
rt_newaddrmsg(u_char cmd,struct ifaddr * ifa,int error,struct rtentry * rt)1392 rt_newaddrmsg(u_char cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1393 {
1394 struct rt_addrinfo info;
1395 struct sockaddr *sa = 0;
1396 int pass;
1397 struct mbuf *m = 0;
1398 struct ifnet *ifp = ifa->ifa_ifp;
1399 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1400
1401 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1402 RT_LOCK_ASSERT_HELD(rt);
1403
1404 if (route_cb.any_count == 0) {
1405 return;
1406 }
1407
1408 /* Become a regular mutex, just in case */
1409 RT_CONVERT_LOCK(rt);
1410 for (pass = 1; pass < 3; pass++) {
1411 bzero((caddr_t)&info, sizeof(info));
1412 if ((cmd == RTM_ADD && pass == 1) ||
1413 (cmd == RTM_DELETE && pass == 2)) {
1414 struct ifa_msghdr *ifam;
1415 u_char ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1416
1417 /* Lock ifp for if_lladdr */
1418 ifnet_lock_shared(ifp);
1419 IFA_LOCK(ifa);
1420 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1421 /*
1422 * Holding ifnet lock here prevents the link address
1423 * from changing contents, so no need to hold its
1424 * lock. The link address is always present; it's
1425 * never freed.
1426 */
1427 info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1428 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1429 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1430 if ((m = rt_msg1(ncmd, &info)) == NULL) {
1431 IFA_UNLOCK(ifa);
1432 ifnet_lock_done(ifp);
1433 continue;
1434 }
1435 IFA_UNLOCK(ifa);
1436 ifnet_lock_done(ifp);
1437 ifam = mtod(m, struct ifa_msghdr *);
1438 ifam->ifam_index = ifp->if_index;
1439 IFA_LOCK_SPIN(ifa);
1440 ifam->ifam_metric = ifa->ifa_metric;
1441 ifam->ifam_flags = ifa->ifa_flags;
1442 IFA_UNLOCK(ifa);
1443 ifam->ifam_addrs = info.rti_addrs;
1444 }
1445 if ((cmd == RTM_ADD && pass == 2) ||
1446 (cmd == RTM_DELETE && pass == 1)) {
1447 struct rt_msghdr *rtm;
1448
1449 if (rt == NULL) {
1450 continue;
1451 }
1452 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1453 info.rti_info[RTAX_DST] = sa = rt_key(rt);
1454 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1455 if ((m = rt_msg1(cmd, &info)) == NULL) {
1456 continue;
1457 }
1458 rtm = mtod(m, struct rt_msghdr *);
1459 rtm->rtm_index = ifp->if_index;
1460 rtm->rtm_flags |= rt->rt_flags;
1461 rtm->rtm_errno = error;
1462 rtm->rtm_addrs = info.rti_addrs;
1463 }
1464 route_proto.sp_protocol = sa ? sa->sa_family : 0;
1465 raw_input(m, &route_proto, &route_src, &route_dst);
1466 }
1467 }
1468
1469 /*
1470 * This is the analogue to the rt_newaddrmsg which performs the same
1471 * function but for multicast group memberhips. This is easier since
1472 * there is no route state to worry about.
1473 */
1474 void
rt_newmaddrmsg(u_char cmd,struct ifmultiaddr * ifma)1475 rt_newmaddrmsg(u_char cmd, struct ifmultiaddr *ifma)
1476 {
1477 struct rt_addrinfo info;
1478 struct mbuf *m = 0;
1479 struct ifnet *ifp = ifma->ifma_ifp;
1480 struct ifma_msghdr *ifmam;
1481 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1482
1483 if (route_cb.any_count == 0) {
1484 return;
1485 }
1486
1487 /* Lock ifp for if_lladdr */
1488 ifnet_lock_shared(ifp);
1489 bzero((caddr_t)&info, sizeof(info));
1490 IFMA_LOCK(ifma);
1491 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1492 /* lladdr doesn't need lock */
1493 info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1494
1495 /*
1496 * If a link-layer address is present, present it as a ``gateway''
1497 * (similarly to how ARP entries, e.g., are presented).
1498 */
1499 info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ?
1500 ifma->ifma_ll->ifma_addr : NULL;
1501 if ((m = rt_msg1(cmd, &info)) == NULL) {
1502 IFMA_UNLOCK(ifma);
1503 ifnet_lock_done(ifp);
1504 return;
1505 }
1506 ifmam = mtod(m, struct ifma_msghdr *);
1507 ifmam->ifmam_index = ifp->if_index;
1508 ifmam->ifmam_addrs = info.rti_addrs;
1509 route_proto.sp_protocol = ifma->ifma_addr->sa_family;
1510 IFMA_UNLOCK(ifma);
1511 ifnet_lock_done(ifp);
1512 raw_input(m, &route_proto, &route_src, &route_dst);
1513 }
1514
1515 const char *
rtm2str(int cmd)1516 rtm2str(int cmd)
1517 {
1518 const char *c = "RTM_?";
1519
1520 switch (cmd) {
1521 case RTM_ADD:
1522 c = "RTM_ADD";
1523 break;
1524 case RTM_DELETE:
1525 c = "RTM_DELETE";
1526 break;
1527 case RTM_CHANGE:
1528 c = "RTM_CHANGE";
1529 break;
1530 case RTM_GET:
1531 c = "RTM_GET";
1532 break;
1533 case RTM_LOSING:
1534 c = "RTM_LOSING";
1535 break;
1536 case RTM_REDIRECT:
1537 c = "RTM_REDIRECT";
1538 break;
1539 case RTM_MISS:
1540 c = "RTM_MISS";
1541 break;
1542 case RTM_LOCK:
1543 c = "RTM_LOCK";
1544 break;
1545 case RTM_OLDADD:
1546 c = "RTM_OLDADD";
1547 break;
1548 case RTM_OLDDEL:
1549 c = "RTM_OLDDEL";
1550 break;
1551 case RTM_RESOLVE:
1552 c = "RTM_RESOLVE";
1553 break;
1554 case RTM_NEWADDR:
1555 c = "RTM_NEWADDR";
1556 break;
1557 case RTM_DELADDR:
1558 c = "RTM_DELADDR";
1559 break;
1560 case RTM_IFINFO:
1561 c = "RTM_IFINFO";
1562 break;
1563 case RTM_NEWMADDR:
1564 c = "RTM_NEWMADDR";
1565 break;
1566 case RTM_DELMADDR:
1567 c = "RTM_DELMADDR";
1568 break;
1569 case RTM_GET_SILENT:
1570 c = "RTM_GET_SILENT";
1571 break;
1572 case RTM_IFINFO2:
1573 c = "RTM_IFINFO2";
1574 break;
1575 case RTM_NEWMADDR2:
1576 c = "RTM_NEWMADDR2";
1577 break;
1578 case RTM_GET2:
1579 c = "RTM_GET2";
1580 break;
1581 case RTM_GET_EXT:
1582 c = "RTM_GET_EXT";
1583 break;
1584 }
1585
1586 return c;
1587 }
1588
1589 /*
1590 * This is used in dumping the kernel table via sysctl().
1591 */
1592 static int
sysctl_dumpentry(struct radix_node * rn,void * vw)1593 sysctl_dumpentry(struct radix_node *rn, void *vw)
1594 {
1595 struct walkarg *w = vw;
1596 struct rtentry *rt = (struct rtentry *)rn;
1597 int error = 0, size;
1598 struct rt_addrinfo info;
1599 kauth_cred_t cred;
1600 kauth_cred_t *credp;
1601
1602 cred = kauth_cred_proc_ref(current_proc());
1603 credp = &cred;
1604
1605 RT_LOCK(rt);
1606 if ((w->w_op == NET_RT_FLAGS || w->w_op == NET_RT_FLAGS_PRIV) &&
1607 !(rt->rt_flags & w->w_arg)) {
1608 goto done;
1609 }
1610
1611 /*
1612 * If the matching route has RTF_LLINFO set, then we can skip scrubbing the MAC
1613 * only if the outgoing interface is not loopback and the process has entitlement
1614 * for neighbor cache read.
1615 */
1616 if (w->w_op == NET_RT_FLAGS_PRIV && (rt->rt_flags & RTF_LLINFO)) {
1617 if (rt->rt_ifp != lo_ifp &&
1618 (route_op_entitlement_check(NULL, cred, ROUTE_OP_READ, TRUE) == 0)) {
1619 credp = NULL;
1620 }
1621 }
1622
1623 bzero((caddr_t)&info, sizeof(info));
1624 info.rti_info[RTAX_DST] = rt_key(rt);
1625 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1626 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1627 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1628 if (RT_HAS_IFADDR(rt)) {
1629 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1630 }
1631
1632 if (w->w_op != NET_RT_DUMP2) {
1633 size = rt_msg2(RTM_GET, &info, NULL, w, credp);
1634 if (w->w_req != NULL && w->w_tmem != NULL) {
1635 struct rt_msghdr *rtm =
1636 (struct rt_msghdr *)(void *)w->w_tmem;
1637
1638 rtm->rtm_flags = rt->rt_flags;
1639 rtm->rtm_use = rt->rt_use;
1640 rt_getmetrics(rt, &rtm->rtm_rmx);
1641 rtm->rtm_index = rt->rt_ifp->if_index;
1642 rtm->rtm_pid = 0;
1643 rtm->rtm_seq = 0;
1644 rtm->rtm_errno = 0;
1645 rtm->rtm_addrs = info.rti_addrs;
1646 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1647 }
1648 } else {
1649 size = rt_msg2(RTM_GET2, &info, NULL, w, credp);
1650 if (w->w_req != NULL && w->w_tmem != NULL) {
1651 struct rt_msghdr2 *rtm =
1652 (struct rt_msghdr2 *)(void *)w->w_tmem;
1653
1654 rtm->rtm_flags = rt->rt_flags;
1655 rtm->rtm_use = rt->rt_use;
1656 rt_getmetrics(rt, &rtm->rtm_rmx);
1657 rtm->rtm_index = rt->rt_ifp->if_index;
1658 rtm->rtm_refcnt = rt->rt_refcnt;
1659 if (rt->rt_parent) {
1660 rtm->rtm_parentflags = rt->rt_parent->rt_flags;
1661 } else {
1662 rtm->rtm_parentflags = 0;
1663 }
1664 rtm->rtm_reserved = 0;
1665 rtm->rtm_addrs = info.rti_addrs;
1666 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1667 }
1668 }
1669
1670 done:
1671 RT_UNLOCK(rt);
1672 kauth_cred_unref(&cred);
1673 return error;
1674 }
1675
1676 /*
1677 * This is used for dumping extended information from route entries.
1678 */
1679 static int
sysctl_dumpentry_ext(struct radix_node * rn,void * vw)1680 sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
1681 {
1682 struct walkarg *w = vw;
1683 struct rtentry *rt = (struct rtentry *)rn;
1684 int error = 0, size;
1685 struct rt_addrinfo info;
1686 kauth_cred_t cred;
1687
1688 cred = kauth_cred_proc_ref(current_proc());
1689
1690 RT_LOCK(rt);
1691 if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) {
1692 goto done;
1693 }
1694 bzero(&info, sizeof(info));
1695 info.rti_info[RTAX_DST] = rt_key(rt);
1696 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1697 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1698 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1699
1700 size = rt_msg2(RTM_GET_EXT, &info, NULL, w, &cred);
1701 if (w->w_req != NULL && w->w_tmem != NULL) {
1702 struct rt_msghdr_ext *ertm =
1703 (struct rt_msghdr_ext *)(void *)w->w_tmem;
1704
1705 ertm->rtm_flags = rt->rt_flags;
1706 ertm->rtm_use = rt->rt_use;
1707 rt_getmetrics(rt, &ertm->rtm_rmx);
1708 ertm->rtm_index = rt->rt_ifp->if_index;
1709 ertm->rtm_pid = 0;
1710 ertm->rtm_seq = 0;
1711 ertm->rtm_errno = 0;
1712 ertm->rtm_addrs = info.rti_addrs;
1713 if (rt->rt_llinfo_get_ri == NULL) {
1714 bzero(&ertm->rtm_ri, sizeof(ertm->rtm_ri));
1715 ertm->rtm_ri.ri_rssi = IFNET_RSSI_UNKNOWN;
1716 ertm->rtm_ri.ri_lqm = IFNET_LQM_THRESH_OFF;
1717 ertm->rtm_ri.ri_npm = IFNET_NPM_THRESH_UNKNOWN;
1718 } else {
1719 rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri);
1720 }
1721 error = SYSCTL_OUT(w->w_req, (caddr_t)ertm, size);
1722 }
1723
1724 done:
1725 RT_UNLOCK(rt);
1726 kauth_cred_unref(&cred);
1727 return error;
1728 }
1729
1730 /*
1731 * rdar://9307819
1732 * To avoid to call copyout() while holding locks and to cause problems
1733 * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct
1734 * the list in two passes. In the first pass we compute the total
1735 * length of the data we are going to copyout, then we release
1736 * all locks to allocate a temporary buffer that gets filled
1737 * in the second pass.
1738 *
1739 * Note that we are verifying the assumption that kalloc() returns a buffer
1740 * that is at least 32 bits aligned and that the messages and addresses are
1741 * 32 bits aligned.
1742 */
1743 static int
sysctl_iflist(int af,struct walkarg * w)1744 sysctl_iflist(int af, struct walkarg *w)
1745 {
1746 struct ifnet *ifp;
1747 struct ifaddr *ifa;
1748 struct rt_addrinfo info;
1749 int error = 0;
1750 int pass = 0;
1751 size_t len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1752 char *total_buffer = NULL, *cp = NULL;
1753 kauth_cred_t cred;
1754
1755 cred = kauth_cred_proc_ref(current_proc());
1756
1757 bzero((caddr_t)&info, sizeof(info));
1758
1759 for (pass = 0; pass < 2; pass++) {
1760 ifnet_head_lock_shared();
1761
1762 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1763 if (error) {
1764 break;
1765 }
1766 if (w->w_arg && w->w_arg != ifp->if_index) {
1767 continue;
1768 }
1769 ifnet_lock_shared(ifp);
1770 /*
1771 * Holding ifnet lock here prevents the link address
1772 * from changing contents, so no need to hold the ifa
1773 * lock. The link address is always present; it's
1774 * never freed.
1775 */
1776 ifa = ifp->if_lladdr;
1777 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1778 len = rt_msg2(RTM_IFINFO, &info, NULL, NULL, &cred);
1779 if (pass == 0) {
1780 if (os_add_overflow(total_len, len, &total_len)) {
1781 ifnet_lock_done(ifp);
1782 error = ENOBUFS;
1783 break;
1784 }
1785 } else {
1786 struct if_msghdr *ifm;
1787
1788 if (current_len + len > total_len) {
1789 ifnet_lock_done(ifp);
1790 error = ENOBUFS;
1791 break;
1792 }
1793 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1794 len = rt_msg2(RTM_IFINFO, &info,
1795 (caddr_t)cp, NULL, &cred);
1796 info.rti_info[RTAX_IFP] = NULL;
1797
1798 ifm = (struct if_msghdr *)(void *)cp;
1799 ifm->ifm_index = ifp->if_index;
1800 ifm->ifm_flags = (u_short)ifp->if_flags;
1801 if_data_internal_to_if_data(ifp, &ifp->if_data,
1802 &ifm->ifm_data);
1803 ifm->ifm_addrs = info.rti_addrs;
1804 /*
1805 * <rdar://problem/32940901>
1806 * Round bytes only for non-platform
1807 */
1808 if (!csproc_get_platform_binary(w->w_req->p)) {
1809 ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
1810 ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
1811 }
1812
1813 cp += len;
1814 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1815 current_len += len;
1816 VERIFY(current_len <= total_len);
1817 }
1818 while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
1819 IFA_LOCK(ifa);
1820 if (af && af != ifa->ifa_addr->sa_family) {
1821 IFA_UNLOCK(ifa);
1822 continue;
1823 }
1824 if (ifa->ifa_addr->sa_family == AF_INET6 &&
1825 (((struct in6_ifaddr *)ifa)->ia6_flags &
1826 IN6_IFF_CLAT46) != 0) {
1827 IFA_UNLOCK(ifa);
1828 continue;
1829 }
1830 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1831 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1832 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1833 len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
1834 &cred);
1835 if (pass == 0) {
1836 if (os_add_overflow(total_len, len, &total_len)) {
1837 IFA_UNLOCK(ifa);
1838 error = ENOBUFS;
1839 break;
1840 }
1841 } else {
1842 struct ifa_msghdr *ifam;
1843
1844 if (current_len + len > total_len) {
1845 IFA_UNLOCK(ifa);
1846 error = ENOBUFS;
1847 break;
1848 }
1849 len = rt_msg2(RTM_NEWADDR, &info,
1850 (caddr_t)cp, NULL, &cred);
1851
1852 ifam = (struct ifa_msghdr *)(void *)cp;
1853 ifam->ifam_index =
1854 ifa->ifa_ifp->if_index;
1855 ifam->ifam_flags = ifa->ifa_flags;
1856 ifam->ifam_metric = ifa->ifa_metric;
1857 ifam->ifam_addrs = info.rti_addrs;
1858
1859 cp += len;
1860 VERIFY(IS_P2ALIGNED(cp,
1861 sizeof(u_int32_t)));
1862 current_len += len;
1863 VERIFY(current_len <= total_len);
1864 }
1865 IFA_UNLOCK(ifa);
1866 }
1867 ifnet_lock_done(ifp);
1868 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1869 info.rti_info[RTAX_BRD] = NULL;
1870 }
1871
1872 ifnet_head_done();
1873
1874 if (error != 0) {
1875 if (error == ENOBUFS) {
1876 printf("%s: current_len (%lu) + len (%lu) > "
1877 "total_len (%lu)\n", __func__, current_len,
1878 len, total_len);
1879 }
1880 break;
1881 }
1882
1883 if (pass == 0) {
1884 /* Better to return zero length buffer than ENOBUFS */
1885 if (total_len == 0) {
1886 total_len = 1;
1887 }
1888 total_len += total_len >> 3;
1889 total_buffer_len = total_len;
1890 total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
1891 if (total_buffer == NULL) {
1892 printf("%s: kalloc_data(%lu) failed\n", __func__,
1893 total_len);
1894 error = ENOBUFS;
1895 break;
1896 }
1897 cp = total_buffer;
1898 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1899 } else {
1900 error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
1901 if (error) {
1902 break;
1903 }
1904 }
1905 }
1906
1907 if (total_buffer != NULL) {
1908 kfree_data(total_buffer, total_buffer_len);
1909 }
1910
1911 kauth_cred_unref(&cred);
1912 return error;
1913 }
1914
1915 static int
sysctl_iflist2(int af,struct walkarg * w)1916 sysctl_iflist2(int af, struct walkarg *w)
1917 {
1918 struct ifnet *ifp;
1919 struct ifaddr *ifa;
1920 struct rt_addrinfo info;
1921 int error = 0;
1922 int pass = 0;
1923 size_t len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1924 char *total_buffer = NULL, *cp = NULL;
1925 kauth_cred_t cred;
1926
1927 cred = kauth_cred_proc_ref(current_proc());
1928
1929 bzero((caddr_t)&info, sizeof(info));
1930
1931 for (pass = 0; pass < 2; pass++) {
1932 struct ifmultiaddr *ifma;
1933
1934 ifnet_head_lock_shared();
1935
1936 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1937 if (error) {
1938 break;
1939 }
1940 if (w->w_arg && w->w_arg != ifp->if_index) {
1941 continue;
1942 }
1943 ifnet_lock_shared(ifp);
1944 /*
1945 * Holding ifnet lock here prevents the link address
1946 * from changing contents, so no need to hold the ifa
1947 * lock. The link address is always present; it's
1948 * never freed.
1949 */
1950 ifa = ifp->if_lladdr;
1951 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1952 len = rt_msg2(RTM_IFINFO2, &info, NULL, NULL, &cred);
1953 if (pass == 0) {
1954 if (os_add_overflow(total_len, len, &total_len)) {
1955 ifnet_lock_done(ifp);
1956 error = ENOBUFS;
1957 break;
1958 }
1959 } else {
1960 struct if_msghdr2 *ifm;
1961
1962 if (current_len + len > total_len) {
1963 ifnet_lock_done(ifp);
1964 error = ENOBUFS;
1965 break;
1966 }
1967 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1968 len = rt_msg2(RTM_IFINFO2, &info,
1969 (caddr_t)cp, NULL, &cred);
1970 info.rti_info[RTAX_IFP] = NULL;
1971
1972 ifm = (struct if_msghdr2 *)(void *)cp;
1973 ifm->ifm_addrs = info.rti_addrs;
1974 ifm->ifm_flags = (u_short)ifp->if_flags;
1975 ifm->ifm_index = ifp->if_index;
1976 ifm->ifm_snd_len = IFCQ_LEN(ifp->if_snd);
1977 ifm->ifm_snd_maxlen = IFCQ_MAXLEN(ifp->if_snd);
1978 ifm->ifm_snd_drops =
1979 (int)ifp->if_snd->ifcq_dropcnt.packets;
1980 ifm->ifm_timer = ifp->if_timer;
1981 if_data_internal_to_if_data64(ifp,
1982 &ifp->if_data, &ifm->ifm_data);
1983 /*
1984 * <rdar://problem/32940901>
1985 * Round bytes only for non-platform
1986 */
1987 if (!csproc_get_platform_binary(w->w_req->p)) {
1988 ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
1989 ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
1990 }
1991
1992 cp += len;
1993 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1994 current_len += len;
1995 VERIFY(current_len <= total_len);
1996 }
1997 while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
1998 IFA_LOCK(ifa);
1999 if (af && af != ifa->ifa_addr->sa_family) {
2000 IFA_UNLOCK(ifa);
2001 continue;
2002 }
2003 if (ifa->ifa_addr->sa_family == AF_INET6 &&
2004 (((struct in6_ifaddr *)ifa)->ia6_flags &
2005 IN6_IFF_CLAT46) != 0) {
2006 IFA_UNLOCK(ifa);
2007 continue;
2008 }
2009
2010 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2011 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2012 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2013 len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2014 &cred);
2015 if (pass == 0) {
2016 if (os_add_overflow(total_len, len, &total_len)) {
2017 IFA_UNLOCK(ifa);
2018 error = ENOBUFS;
2019 break;
2020 }
2021 } else {
2022 struct ifa_msghdr *ifam;
2023
2024 if (current_len + len > total_len) {
2025 IFA_UNLOCK(ifa);
2026 error = ENOBUFS;
2027 break;
2028 }
2029 len = rt_msg2(RTM_NEWADDR, &info,
2030 (caddr_t)cp, NULL, &cred);
2031
2032 ifam = (struct ifa_msghdr *)(void *)cp;
2033 ifam->ifam_index =
2034 ifa->ifa_ifp->if_index;
2035 ifam->ifam_flags = ifa->ifa_flags;
2036 ifam->ifam_metric = ifa->ifa_metric;
2037 ifam->ifam_addrs = info.rti_addrs;
2038
2039 cp += len;
2040 VERIFY(IS_P2ALIGNED(cp,
2041 sizeof(u_int32_t)));
2042 current_len += len;
2043 VERIFY(current_len <= total_len);
2044 }
2045 IFA_UNLOCK(ifa);
2046 }
2047 if (error) {
2048 ifnet_lock_done(ifp);
2049 break;
2050 }
2051
2052 for (ifma = LIST_FIRST(&ifp->if_multiaddrs);
2053 ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) {
2054 struct ifaddr *ifa0;
2055
2056 IFMA_LOCK(ifma);
2057 if (af && af != ifma->ifma_addr->sa_family) {
2058 IFMA_UNLOCK(ifma);
2059 continue;
2060 }
2061 bzero((caddr_t)&info, sizeof(info));
2062 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2063 /*
2064 * Holding ifnet lock here prevents the link
2065 * address from changing contents, so no need
2066 * to hold the ifa0 lock. The link address is
2067 * always present; it's never freed.
2068 */
2069 ifa0 = ifp->if_lladdr;
2070 info.rti_info[RTAX_IFP] = ifa0->ifa_addr;
2071 if (ifma->ifma_ll != NULL) {
2072 info.rti_info[RTAX_GATEWAY] =
2073 ifma->ifma_ll->ifma_addr;
2074 }
2075 len = rt_msg2(RTM_NEWMADDR2, &info, NULL, NULL,
2076 &cred);
2077 if (pass == 0) {
2078 total_len += len;
2079 } else {
2080 struct ifma_msghdr2 *ifmam;
2081
2082 if (current_len + len > total_len) {
2083 IFMA_UNLOCK(ifma);
2084 error = ENOBUFS;
2085 break;
2086 }
2087 len = rt_msg2(RTM_NEWMADDR2, &info,
2088 (caddr_t)cp, NULL, &cred);
2089
2090 ifmam =
2091 (struct ifma_msghdr2 *)(void *)cp;
2092 ifmam->ifmam_addrs = info.rti_addrs;
2093 ifmam->ifmam_flags = 0;
2094 ifmam->ifmam_index =
2095 ifma->ifma_ifp->if_index;
2096 ifmam->ifmam_refcount =
2097 ifma->ifma_reqcnt;
2098
2099 cp += len;
2100 VERIFY(IS_P2ALIGNED(cp,
2101 sizeof(u_int32_t)));
2102 current_len += len;
2103 }
2104 IFMA_UNLOCK(ifma);
2105 }
2106 ifnet_lock_done(ifp);
2107 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2108 info.rti_info[RTAX_BRD] = NULL;
2109 }
2110 ifnet_head_done();
2111
2112 if (error) {
2113 if (error == ENOBUFS) {
2114 printf("%s: current_len (%lu) + len (%lu) > "
2115 "total_len (%lu)\n", __func__, current_len,
2116 len, total_len);
2117 }
2118 break;
2119 }
2120
2121 if (pass == 0) {
2122 /* Better to return zero length buffer than ENOBUFS */
2123 if (total_len == 0) {
2124 total_len = 1;
2125 }
2126 total_len += total_len >> 3;
2127 total_buffer_len = total_len;
2128 total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2129 if (total_buffer == NULL) {
2130 printf("%s: kalloc_data(%lu) failed\n", __func__,
2131 total_len);
2132 error = ENOBUFS;
2133 break;
2134 }
2135 cp = total_buffer;
2136 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2137 } else {
2138 error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2139 if (error) {
2140 break;
2141 }
2142 }
2143 }
2144
2145 if (total_buffer != NULL) {
2146 kfree_data(total_buffer, total_buffer_len);
2147 }
2148
2149 kauth_cred_unref(&cred);
2150 return error;
2151 }
2152
2153
2154 static int
sysctl_rtstat(struct sysctl_req * req)2155 sysctl_rtstat(struct sysctl_req *req)
2156 {
2157 return SYSCTL_OUT(req, &rtstat, sizeof(struct rtstat));
2158 }
2159
2160 static int
sysctl_rttrash(struct sysctl_req * req)2161 sysctl_rttrash(struct sysctl_req *req)
2162 {
2163 return SYSCTL_OUT(req, &rttrash, sizeof(rttrash));
2164 }
2165
2166 static int
2167 sysctl_rtsock SYSCTL_HANDLER_ARGS
2168 {
2169 #pragma unused(oidp)
2170 int *name = (int *)arg1;
2171 u_int namelen = arg2;
2172 struct radix_node_head *rnh;
2173 int i, error = EINVAL;
2174 u_char af;
2175 struct walkarg w;
2176
2177 name++;
2178 namelen--;
2179 if (req->newptr) {
2180 return EPERM;
2181 }
2182 if (namelen != 3) {
2183 return EINVAL;
2184 }
2185 af = (u_char)name[0];
2186 Bzero(&w, sizeof(w));
2187 w.w_op = name[1];
2188 w.w_arg = name[2];
2189 w.w_req = req;
2190
2191 switch (w.w_op) {
2192 case NET_RT_DUMP:
2193 case NET_RT_DUMP2:
2194 case NET_RT_FLAGS:
2195 case NET_RT_FLAGS_PRIV:
2196 lck_mtx_lock(rnh_lock);
2197 for (i = 1; i <= AF_MAX; i++) {
2198 if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2199 (error = rnh->rnh_walktree(rnh,
2200 sysctl_dumpentry, &w))) {
2201 break;
2202 }
2203 }
2204 lck_mtx_unlock(rnh_lock);
2205 break;
2206 case NET_RT_DUMPX:
2207 case NET_RT_DUMPX_FLAGS:
2208 lck_mtx_lock(rnh_lock);
2209 for (i = 1; i <= AF_MAX; i++) {
2210 if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2211 (error = rnh->rnh_walktree(rnh,
2212 sysctl_dumpentry_ext, &w))) {
2213 break;
2214 }
2215 }
2216 lck_mtx_unlock(rnh_lock);
2217 break;
2218 case NET_RT_IFLIST:
2219 error = sysctl_iflist(af, &w);
2220 break;
2221 case NET_RT_IFLIST2:
2222 error = sysctl_iflist2(af, &w);
2223 break;
2224 case NET_RT_STAT:
2225 error = sysctl_rtstat(req);
2226 break;
2227 case NET_RT_TRASH:
2228 error = sysctl_rttrash(req);
2229 break;
2230 }
2231 if (w.w_tmem != NULL) {
2232 kfree_data(w.w_tmem, w.w_tmemsize);
2233 }
2234 return error;
2235 }
2236
2237 /*
2238 * Definitions of protocols supported in the ROUTE domain.
2239 */
2240 static struct protosw routesw[] = {
2241 {
2242 .pr_type = SOCK_RAW,
2243 .pr_protocol = 0,
2244 .pr_flags = PR_ATOMIC | PR_ADDR,
2245 .pr_output = route_output,
2246 .pr_ctlinput = raw_ctlinput,
2247 .pr_usrreqs = &route_usrreqs,
2248 }
2249 };
2250
2251 static int route_proto_count = (sizeof(routesw) / sizeof(struct protosw));
2252
2253 struct domain routedomain_s = {
2254 .dom_family = PF_ROUTE,
2255 .dom_name = "route",
2256 .dom_init = route_dinit,
2257 };
2258
2259 static void
route_dinit(struct domain * dp)2260 route_dinit(struct domain *dp)
2261 {
2262 struct protosw *pr;
2263 int i;
2264
2265 VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
2266 VERIFY(routedomain == NULL);
2267
2268 routedomain = dp;
2269
2270 for (i = 0, pr = &routesw[0]; i < route_proto_count; i++, pr++) {
2271 net_add_proto(pr, dp, 1);
2272 }
2273
2274 route_init();
2275 }
2276