1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)rtsock.c 8.5 (Berkeley) 11/2/94
61 */
62
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kauth.h>
66 #include <sys/kernel.h>
67 #include <sys/sysctl.h>
68 #include <sys/proc.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/syslog.h>
76 #include <sys/mcache.h>
77 #include <kern/locks.h>
78 #include <sys/codesign.h>
79
80 #include <net/if.h>
81 #include <net/route.h>
82 #include <net/dlil.h>
83 #include <net/raw_cb.h>
84 #include <netinet/in.h>
85 #include <netinet/in_var.h>
86 #include <netinet/in_arp.h>
87 #include <netinet/ip.h>
88 #include <netinet/ip6.h>
89 #include <netinet6/nd6.h>
90
91 #include <IOKit/IOBSD.h>
92
93 extern struct rtstat rtstat;
94 extern struct domain routedomain_s;
95 static struct domain *routedomain = NULL;
96
97 static struct sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
98 static struct sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
99 static struct sockaddr sa_zero = { .sa_len = sizeof(sa_zero), .sa_family = AF_INET, .sa_data = { 0, } };
100
101 struct route_cb {
102 u_int32_t ip_count; /* attached w/ AF_INET */
103 u_int32_t ip6_count; /* attached w/ AF_INET6 */
104 u_int32_t any_count; /* total attached */
105 };
106
107 static struct route_cb route_cb;
108
109 struct walkarg {
110 int w_tmemsize;
111 int w_op, w_arg;
112 caddr_t w_tmem;
113 struct sysctl_req *w_req;
114 };
115
116 static void route_dinit(struct domain *);
117 static int rts_abort(struct socket *);
118 static int rts_attach(struct socket *, int, struct proc *);
119 static int rts_bind(struct socket *, struct sockaddr *, struct proc *);
120 static int rts_connect(struct socket *, struct sockaddr *, struct proc *);
121 static int rts_detach(struct socket *);
122 static int rts_disconnect(struct socket *);
123 static int rts_peeraddr(struct socket *, struct sockaddr **);
124 static int rts_send(struct socket *, int, struct mbuf *, struct sockaddr *,
125 struct mbuf *, struct proc *);
126 static int rts_shutdown(struct socket *);
127 static int rts_sockaddr(struct socket *, struct sockaddr **);
128
129 static int route_output(struct mbuf *, struct socket *);
130 static int rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
131 static void rt_getmetrics(struct rtentry *, struct rt_metrics *);
132 static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
133 struct sockaddr *, unsigned int);
134 static int rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
135 static struct mbuf *rt_msg1(u_char, struct rt_addrinfo *);
136 static int rt_msg2(u_char, struct rt_addrinfo *, caddr_t, struct walkarg *,
137 kauth_cred_t *);
138 static int sysctl_dumpentry(struct radix_node *rn, void *vw);
139 static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw);
140 static int sysctl_iflist(int af, struct walkarg *w);
141 static int sysctl_iflist2(int af, struct walkarg *w);
142 static int sysctl_rtstat(struct sysctl_req *);
143 static int sysctl_rttrash(struct sysctl_req *);
144 static int sysctl_rtsock SYSCTL_HANDLER_ARGS;
145
146 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED,
147 sysctl_rtsock, "");
148
149 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "routing");
150
151 /* Align x to 1024 (only power of 2) assuming x is positive */
152 #define ALIGN_BYTES(x) do { \
153 x = (uint32_t)P2ALIGN(x, 1024); \
154 } while(0)
155
156 #define ROUNDUP32(a) \
157 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (uint32_t) - 1))) : \
158 sizeof (uint32_t))
159
160 #define ADVANCE32(x, n) \
161 (x += ROUNDUP32((n)->sa_len))
162
163 #define RT_HAS_IFADDR(rt) \
164 ((rt)->rt_ifa != NULL && (rt)->rt_ifa->ifa_addr != NULL)
165
166 /*
167 * It really doesn't make any sense at all for this code to share much
168 * with raw_usrreq.c, since its functionality is so restricted. XXX
169 */
170 static int
rts_abort(struct socket * so)171 rts_abort(struct socket *so)
172 {
173 return raw_usrreqs.pru_abort(so);
174 }
175
176 /* pru_accept is EOPNOTSUPP */
177
178 static int
rts_attach(struct socket * so,int proto,struct proc * p)179 rts_attach(struct socket *so, int proto, struct proc *p)
180 {
181 #pragma unused(p)
182 struct rawcb *rp;
183 int error;
184
185 VERIFY(so->so_pcb == NULL);
186
187 rp = kalloc_type(struct rawcb, Z_WAITOK_ZERO_NOFAIL);
188 so->so_pcb = (caddr_t)rp;
189 /* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */
190 error = raw_attach(so, proto);
191 rp = sotorawcb(so);
192 if (error) {
193 kfree_type(struct rawcb, rp);
194 so->so_pcb = NULL;
195 so->so_flags |= SOF_PCBCLEARING;
196 return error;
197 }
198
199 switch (rp->rcb_proto.sp_protocol) {
200 case AF_INET:
201 os_atomic_inc(&route_cb.ip_count, relaxed);
202 break;
203 case AF_INET6:
204 os_atomic_inc(&route_cb.ip6_count, relaxed);
205 break;
206 }
207 rp->rcb_faddr = &route_src;
208 os_atomic_inc(&route_cb.any_count, relaxed);
209 /* the socket is already locked when we enter rts_attach */
210 soisconnected(so);
211 so->so_options |= SO_USELOOPBACK;
212 return 0;
213 }
214
215 static int
rts_bind(struct socket * so,struct sockaddr * nam,struct proc * p)216 rts_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
217 {
218 return raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */
219 }
220
221 static int
rts_connect(struct socket * so,struct sockaddr * nam,struct proc * p)222 rts_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
223 {
224 return raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */
225 }
226
227 /* pru_connect2 is EOPNOTSUPP */
228 /* pru_control is EOPNOTSUPP */
229
230 static int
rts_detach(struct socket * so)231 rts_detach(struct socket *so)
232 {
233 struct rawcb *rp = sotorawcb(so);
234
235 VERIFY(rp != NULL);
236
237 switch (rp->rcb_proto.sp_protocol) {
238 case AF_INET:
239 os_atomic_dec(&route_cb.ip_count, relaxed);
240 break;
241 case AF_INET6:
242 os_atomic_dec(&route_cb.ip6_count, relaxed);
243 break;
244 }
245 os_atomic_dec(&route_cb.any_count, relaxed);
246 return raw_usrreqs.pru_detach(so);
247 }
248
249 static int
rts_disconnect(struct socket * so)250 rts_disconnect(struct socket *so)
251 {
252 return raw_usrreqs.pru_disconnect(so);
253 }
254
255 /* pru_listen is EOPNOTSUPP */
256
257 static int
rts_peeraddr(struct socket * so,struct sockaddr ** nam)258 rts_peeraddr(struct socket *so, struct sockaddr **nam)
259 {
260 return raw_usrreqs.pru_peeraddr(so, nam);
261 }
262
263 /* pru_rcvd is EOPNOTSUPP */
264 /* pru_rcvoob is EOPNOTSUPP */
265
266 static int
rts_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)267 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
268 struct mbuf *control, struct proc *p)
269 {
270 return raw_usrreqs.pru_send(so, flags, m, nam, control, p);
271 }
272
273 /* pru_sense is null */
274
275 static int
rts_shutdown(struct socket * so)276 rts_shutdown(struct socket *so)
277 {
278 return raw_usrreqs.pru_shutdown(so);
279 }
280
281 static int
rts_sockaddr(struct socket * so,struct sockaddr ** nam)282 rts_sockaddr(struct socket *so, struct sockaddr **nam)
283 {
284 return raw_usrreqs.pru_sockaddr(so, nam);
285 }
286
287 static struct pr_usrreqs route_usrreqs = {
288 .pru_abort = rts_abort,
289 .pru_attach = rts_attach,
290 .pru_bind = rts_bind,
291 .pru_connect = rts_connect,
292 .pru_detach = rts_detach,
293 .pru_disconnect = rts_disconnect,
294 .pru_peeraddr = rts_peeraddr,
295 .pru_send = rts_send,
296 .pru_shutdown = rts_shutdown,
297 .pru_sockaddr = rts_sockaddr,
298 .pru_sosend = sosend,
299 .pru_soreceive = soreceive,
300 };
301
302 /*ARGSUSED*/
303 static int
route_output(struct mbuf * m,struct socket * so)304 route_output(struct mbuf *m, struct socket *so)
305 {
306 struct rt_msghdr *rtm = NULL;
307 size_t rtm_len = 0;
308 struct rtentry *rt = NULL;
309 struct rtentry *saved_nrt = NULL;
310 struct radix_node_head *rnh;
311 struct rt_addrinfo info;
312 int len, error = 0;
313 sa_family_t dst_sa_family = 0;
314 struct ifnet *ifp = NULL;
315 struct sockaddr_in dst_in, gate_in;
316 int sendonlytoself = 0;
317 unsigned int ifscope = IFSCOPE_NONE;
318 struct rawcb *rp = NULL;
319 boolean_t is_router = FALSE;
320 #define senderr(e) { error = (e); goto flush; }
321 if (m == NULL || ((m->m_len < sizeof(intptr_t)) &&
322 (m = m_pullup(m, sizeof(intptr_t))) == NULL)) {
323 return ENOBUFS;
324 }
325 VERIFY(m->m_flags & M_PKTHDR);
326
327 /*
328 * Unlock the socket (but keep a reference) it won't be
329 * accessed until raw_input appends to it.
330 */
331 socket_unlock(so, 0);
332 lck_mtx_lock(rnh_lock);
333
334 len = m->m_pkthdr.len;
335 if (len < sizeof(*rtm) ||
336 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
337 info.rti_info[RTAX_DST] = NULL;
338 senderr(EINVAL);
339 }
340 rtm = kalloc_data(len, Z_WAITOK);
341 if (rtm == NULL) {
342 info.rti_info[RTAX_DST] = NULL;
343 senderr(ENOBUFS);
344 }
345 rtm_len = (size_t)len;
346 m_copydata(m, 0, len, (caddr_t)rtm);
347 if (rtm->rtm_version != RTM_VERSION) {
348 info.rti_info[RTAX_DST] = NULL;
349 senderr(EPROTONOSUPPORT);
350 }
351
352 /*
353 * Silent version of RTM_GET for Reachabiltiy APIs. We may change
354 * all RTM_GETs to be silent in the future, so this is private for now.
355 */
356 if (rtm->rtm_type == RTM_GET_SILENT) {
357 if (!(so->so_options & SO_USELOOPBACK)) {
358 senderr(EINVAL);
359 }
360 sendonlytoself = 1;
361 rtm->rtm_type = RTM_GET;
362 }
363
364 /*
365 * Perform permission checking, only privileged sockets
366 * may perform operations other than RTM_GET
367 */
368 if (rtm->rtm_type != RTM_GET && !(so->so_state & SS_PRIV)) {
369 info.rti_info[RTAX_DST] = NULL;
370 senderr(EPERM);
371 }
372
373 rtm->rtm_pid = proc_selfpid();
374 info.rti_addrs = rtm->rtm_addrs;
375 if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
376 info.rti_info[RTAX_DST] = NULL;
377 senderr(EINVAL);
378 }
379 if (info.rti_info[RTAX_DST] == NULL ||
380 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
381 (info.rti_info[RTAX_GATEWAY] != NULL &&
382 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) {
383 senderr(EINVAL);
384 }
385
386 if (info.rti_info[RTAX_DST]->sa_family == AF_INET &&
387 info.rti_info[RTAX_DST]->sa_len != sizeof(struct sockaddr_in)) {
388 /* At minimum, we need up to sin_addr */
389 if (info.rti_info[RTAX_DST]->sa_len <
390 offsetof(struct sockaddr_in, sin_zero)) {
391 senderr(EINVAL);
392 }
393 bzero(&dst_in, sizeof(dst_in));
394 dst_in.sin_len = sizeof(dst_in);
395 dst_in.sin_family = AF_INET;
396 dst_in.sin_port = SIN(info.rti_info[RTAX_DST])->sin_port;
397 dst_in.sin_addr = SIN(info.rti_info[RTAX_DST])->sin_addr;
398 info.rti_info[RTAX_DST] = (struct sockaddr *)&dst_in;
399 dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
400 } else if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
401 info.rti_info[RTAX_DST]->sa_len < sizeof(struct sockaddr_in6)) {
402 senderr(EINVAL);
403 }
404
405 if (info.rti_info[RTAX_GATEWAY] != NULL) {
406 if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET &&
407 info.rti_info[RTAX_GATEWAY]->sa_len != sizeof(struct sockaddr_in)) {
408 /* At minimum, we need up to sin_addr */
409 if (info.rti_info[RTAX_GATEWAY]->sa_len <
410 offsetof(struct sockaddr_in, sin_zero)) {
411 senderr(EINVAL);
412 }
413 bzero(&gate_in, sizeof(gate_in));
414 gate_in.sin_len = sizeof(gate_in);
415 gate_in.sin_family = AF_INET;
416 gate_in.sin_port = SIN(info.rti_info[RTAX_GATEWAY])->sin_port;
417 gate_in.sin_addr = SIN(info.rti_info[RTAX_GATEWAY])->sin_addr;
418 info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gate_in;
419 } else if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET6 &&
420 info.rti_info[RTAX_GATEWAY]->sa_len < sizeof(struct sockaddr_in6)) {
421 senderr(EINVAL);
422 }
423 }
424
425 if (info.rti_info[RTAX_GENMASK]) {
426 struct radix_node *t;
427 t = rn_addmask((caddr_t)info.rti_info[RTAX_GENMASK], 0, 1);
428 if (t != NULL && Bcmp(info.rti_info[RTAX_GENMASK],
429 t->rn_key, *(u_char *)info.rti_info[RTAX_GENMASK]) == 0) {
430 info.rti_info[RTAX_GENMASK] =
431 (struct sockaddr *)(t->rn_key);
432 } else {
433 senderr(ENOBUFS);
434 }
435 }
436
437 /*
438 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
439 */
440 if (rtm->rtm_flags & RTF_IFSCOPE) {
441 if (info.rti_info[RTAX_DST]->sa_family != AF_INET &&
442 info.rti_info[RTAX_DST]->sa_family != AF_INET6) {
443 senderr(EINVAL);
444 }
445 ifscope = rtm->rtm_index;
446 }
447 /*
448 * Block changes on INTCOPROC interfaces.
449 */
450 if (ifscope != IFSCOPE_NONE) {
451 unsigned int intcoproc_scope = 0;
452 ifnet_head_lock_shared();
453 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
454 if (IFNET_IS_INTCOPROC(ifp)) {
455 intcoproc_scope = ifp->if_index;
456 break;
457 }
458 }
459 ifnet_head_done();
460 if (intcoproc_scope == ifscope && proc_getpid(current_proc()) != 0) {
461 senderr(EINVAL);
462 }
463 }
464 /*
465 * Require entitlement to change management interfaces
466 */
467 if (management_control_unrestricted == false && if_management_interface_check_needed == true &&
468 ifscope != IFSCOPE_NONE && proc_getpid(current_proc()) != 0) {
469 bool is_management = false;
470
471 ifnet_head_lock_shared();
472 if (IF_INDEX_IN_RANGE(ifscope)) {
473 if (IFNET_IS_MANAGEMENT(ifindex2ifnet[ifscope])) {
474 is_management = true;
475 }
476 }
477 ifnet_head_done();
478
479 if (is_management && !IOCurrentTaskHasEntitlement(MANAGEMENT_CONTROL_ENTITLEMENT)) {
480 senderr(EINVAL);
481 }
482 }
483
484 /*
485 * RTF_PROXY can only be set internally from within the kernel.
486 */
487 if (rtm->rtm_flags & RTF_PROXY) {
488 senderr(EINVAL);
489 }
490
491 /*
492 * For AF_INET, always zero out the embedded scope ID. If this is
493 * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
494 * flag and the corresponding rtm_index value. This is to prevent
495 * false interpretation of the scope ID because it's using the sin_zero
496 * field, which might not be properly cleared by the requestor.
497 */
498 if (info.rti_info[RTAX_DST]->sa_family == AF_INET) {
499 sin_set_ifscope(info.rti_info[RTAX_DST], IFSCOPE_NONE);
500 }
501 if (info.rti_info[RTAX_GATEWAY] != NULL &&
502 info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET) {
503 sin_set_ifscope(info.rti_info[RTAX_GATEWAY], IFSCOPE_NONE);
504 }
505 if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
506 IN6_IS_SCOPE_EMBED(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
507 !IN6_IS_ADDR_UNICAST_BASED_MULTICAST(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
508 SIN6(info.rti_info[RTAX_DST])->sin6_scope_id == 0) {
509 SIN6(info.rti_info[RTAX_DST])->sin6_scope_id = ntohs(SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1]);
510 SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1] = 0;
511 }
512
513 switch (rtm->rtm_type) {
514 case RTM_ADD:
515 if (info.rti_info[RTAX_GATEWAY] == NULL) {
516 senderr(EINVAL);
517 }
518
519 error = rtrequest_scoped_locked(RTM_ADD,
520 info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
521 info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt,
522 ifscope);
523 if (error == 0 && saved_nrt != NULL) {
524 RT_LOCK(saved_nrt);
525 /*
526 * If the route request specified an interface with
527 * IFA and/or IFP, we set the requested interface on
528 * the route with rt_setif. It would be much better
529 * to do this inside rtrequest, but that would
530 * require passing the desired interface, in some
531 * form, to rtrequest. Since rtrequest is called in
532 * so many places (roughly 40 in our source), adding
533 * a parameter is to much for us to swallow; this is
534 * something for the FreeBSD developers to tackle.
535 * Instead, we let rtrequest compute whatever
536 * interface it wants, then come in behind it and
537 * stick in the interface that we really want. This
538 * works reasonably well except when rtrequest can't
539 * figure out what interface to use (with
540 * ifa_withroute) and returns ENETUNREACH. Ideally
541 * it shouldn't matter if rtrequest can't figure out
542 * the interface if we're going to explicitly set it
543 * ourselves anyway. But practically we can't
544 * recover here because rtrequest will not do any of
545 * the work necessary to add the route if it can't
546 * find an interface. As long as there is a default
547 * route that leads to some interface, rtrequest will
548 * find an interface, so this problem should be
549 * rarely encountered.
550 * [email protected]
551 */
552 rt_setif(saved_nrt,
553 info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
554 info.rti_info[RTAX_GATEWAY], ifscope);
555 (void)rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, saved_nrt);
556 saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
557 saved_nrt->rt_rmx.rmx_locks |=
558 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
559 saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
560 RT_REMREF_LOCKED(saved_nrt);
561 RT_UNLOCK(saved_nrt);
562 }
563 break;
564
565 case RTM_DELETE:
566 error = rtrequest_scoped_locked(RTM_DELETE,
567 info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
568 info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt,
569 ifscope);
570 if (error == 0) {
571 rt = saved_nrt;
572 RT_LOCK(rt);
573 goto report;
574 }
575 break;
576
577 case RTM_GET:
578 case RTM_CHANGE:
579 case RTM_LOCK:
580 rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
581 if (rnh == NULL) {
582 senderr(EAFNOSUPPORT);
583 }
584 /*
585 * Lookup the best match based on the key-mask pair;
586 * callee adds a reference and checks for root node.
587 */
588 rt = rt_lookup(TRUE, info.rti_info[RTAX_DST],
589 info.rti_info[RTAX_NETMASK], rnh, ifscope);
590 if (rt == NULL) {
591 senderr(ESRCH);
592 }
593 RT_LOCK(rt);
594
595 /*
596 * Holding rnh_lock here prevents the possibility of
597 * ifa from changing (e.g. in_ifinit), so it is safe
598 * to access its ifa_addr (down below) without locking.
599 */
600 switch (rtm->rtm_type) {
601 case RTM_GET: {
602 kauth_cred_t cred;
603 kauth_cred_t* credp;
604 struct ifaddr *ifa2;
605 report:
606 cred = kauth_cred_proc_ref(current_proc());
607 credp = &cred;
608
609 ifa2 = NULL;
610 RT_LOCK_ASSERT_HELD(rt);
611 info.rti_info[RTAX_DST] = rt_key(rt);
612 dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
613 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
614 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
615 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
616 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
617 ifp = rt->rt_ifp;
618 if (ifp != NULL) {
619 ifnet_lock_shared(ifp);
620 ifa2 = ifp->if_lladdr;
621 info.rti_info[RTAX_IFP] =
622 ifa2->ifa_addr;
623 IFA_ADDREF(ifa2);
624 ifnet_lock_done(ifp);
625 info.rti_info[RTAX_IFA] =
626 rt->rt_ifa->ifa_addr;
627 rtm->rtm_index = ifp->if_index;
628 } else {
629 info.rti_info[RTAX_IFP] = NULL;
630 info.rti_info[RTAX_IFA] = NULL;
631 }
632 } else if ((ifp = rt->rt_ifp) != NULL) {
633 rtm->rtm_index = ifp->if_index;
634 }
635 if (ifa2 != NULL) {
636 IFA_LOCK(ifa2);
637 }
638 len = rt_msg2(rtm->rtm_type, &info, NULL, NULL, credp);
639 if (ifa2 != NULL) {
640 IFA_UNLOCK(ifa2);
641 }
642 struct rt_msghdr *out_rtm;
643 out_rtm = kalloc_data(len, Z_WAITOK);
644 if (out_rtm == NULL) {
645 RT_UNLOCK(rt);
646 if (ifa2 != NULL) {
647 IFA_REMREF(ifa2);
648 }
649 senderr(ENOBUFS);
650 }
651 Bcopy(rtm, out_rtm, sizeof(struct rt_msghdr));
652 if (ifa2 != NULL) {
653 IFA_LOCK(ifa2);
654 }
655 (void) rt_msg2(out_rtm->rtm_type, &info, (caddr_t)out_rtm,
656 NULL, &cred);
657 if (ifa2 != NULL) {
658 IFA_UNLOCK(ifa2);
659 }
660 kfree_data(rtm, rtm_len);
661 rtm = out_rtm;
662 rtm_len = len;
663 rtm->rtm_flags = rt->rt_flags;
664 rt_getmetrics(rt, &rtm->rtm_rmx);
665 rtm->rtm_addrs = info.rti_addrs;
666 if (ifa2 != NULL) {
667 IFA_REMREF(ifa2);
668 }
669
670 kauth_cred_unref(&cred);
671 break;
672 }
673
674 case RTM_CHANGE:
675 is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
676
677 if (info.rti_info[RTAX_GATEWAY] != NULL &&
678 (error = rt_setgate(rt, rt_key(rt),
679 info.rti_info[RTAX_GATEWAY]))) {
680 int tmp = error;
681 RT_UNLOCK(rt);
682 senderr(tmp);
683 }
684 /*
685 * If they tried to change things but didn't specify
686 * the required gateway, then just use the old one.
687 * This can happen if the user tries to change the
688 * flags on the default route without changing the
689 * default gateway. Changing flags still doesn't work.
690 */
691 if ((rt->rt_flags & RTF_GATEWAY) &&
692 info.rti_info[RTAX_GATEWAY] == NULL) {
693 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
694 }
695
696 /*
697 * On Darwin, we call rt_setif which contains the
698 * equivalent to the code found at this very spot
699 * in BSD.
700 */
701 rt_setif(rt,
702 info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
703 info.rti_info[RTAX_GATEWAY], ifscope);
704
705 if ((error = rt_setmetrics(rtm->rtm_inits,
706 &rtm->rtm_rmx, rt))) {
707 int tmp = error;
708 RT_UNLOCK(rt);
709 senderr(tmp);
710 }
711 if (info.rti_info[RTAX_GENMASK]) {
712 rt->rt_genmask = info.rti_info[RTAX_GENMASK];
713 }
714
715 /*
716 * Enqueue work item to invoke callback for this route entry
717 * This may not be needed always, but for now issue it anytime
718 * RTM_CHANGE gets called.
719 */
720 route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, TRUE);
721 /*
722 * If the route is for a router, walk the tree to send refresh
723 * event to protocol cloned entries
724 */
725 if (is_router) {
726 struct route_event rt_ev;
727 route_event_init(&rt_ev, rt, NULL, ROUTE_ENTRY_REFRESH);
728 RT_UNLOCK(rt);
729 (void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
730 RT_LOCK(rt);
731 }
732 OS_FALLTHROUGH;
733 case RTM_LOCK:
734 rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
735 rt->rt_rmx.rmx_locks |=
736 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
737 break;
738 }
739 RT_UNLOCK(rt);
740 break;
741 default:
742 senderr(EOPNOTSUPP);
743 }
744 flush:
745 if (rtm != NULL) {
746 if (error) {
747 rtm->rtm_errno = error;
748 } else {
749 rtm->rtm_flags |= RTF_DONE;
750 }
751 }
752 if (rt != NULL) {
753 RT_LOCK_ASSERT_NOTHELD(rt);
754 rtfree_locked(rt);
755 }
756 lck_mtx_unlock(rnh_lock);
757
758 /* relock the socket now */
759 socket_lock(so, 0);
760 /*
761 * Check to see if we don't want our own messages.
762 */
763 if (!(so->so_options & SO_USELOOPBACK)) {
764 if (route_cb.any_count <= 1) {
765 kfree_data(rtm, rtm_len);
766 m_freem(m);
767 return error;
768 }
769 /* There is another listener, so construct message */
770 rp = sotorawcb(so);
771 }
772 if (rtm != NULL) {
773 m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
774 if (m->m_pkthdr.len < rtm->rtm_msglen) {
775 m_freem(m);
776 m = NULL;
777 } else if (m->m_pkthdr.len > rtm->rtm_msglen) {
778 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
779 }
780 kfree_data(rtm, rtm_len);
781 }
782 if (sendonlytoself && m != NULL) {
783 error = 0;
784 if (sbappendaddr(&so->so_rcv, &route_src, m,
785 NULL, &error) != 0) {
786 sorwakeup(so);
787 }
788 if (error) {
789 return error;
790 }
791 } else {
792 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
793 if (rp != NULL) {
794 rp->rcb_proto.sp_family = 0; /* Avoid us */
795 }
796 if (dst_sa_family != 0) {
797 route_proto.sp_protocol = dst_sa_family;
798 }
799 if (m != NULL) {
800 socket_unlock(so, 0);
801 raw_input(m, &route_proto, &route_src, &route_dst);
802 socket_lock(so, 0);
803 }
804 if (rp != NULL) {
805 rp->rcb_proto.sp_family = PF_ROUTE;
806 }
807 }
808 return error;
809 }
810
811 void
rt_setexpire(struct rtentry * rt,uint64_t expiry)812 rt_setexpire(struct rtentry *rt, uint64_t expiry)
813 {
814 /* set both rt_expire and rmx_expire */
815 rt->rt_expire = expiry;
816 if (expiry) {
817 rt->rt_rmx.rmx_expire =
818 (int32_t)(expiry + rt->base_calendartime -
819 rt->base_uptime);
820 } else {
821 rt->rt_rmx.rmx_expire = 0;
822 }
823 }
824
825 static int
rt_setmetrics(u_int32_t which,struct rt_metrics * in,struct rtentry * out)826 rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out)
827 {
828 if (!(which & RTV_REFRESH_HOST)) {
829 struct timeval caltime;
830 getmicrotime(&caltime);
831 #define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
832 metric(RTV_RPIPE, rmx_recvpipe);
833 metric(RTV_SPIPE, rmx_sendpipe);
834 metric(RTV_SSTHRESH, rmx_ssthresh);
835 metric(RTV_RTT, rmx_rtt);
836 metric(RTV_RTTVAR, rmx_rttvar);
837 metric(RTV_HOPCOUNT, rmx_hopcount);
838 metric(RTV_MTU, rmx_mtu);
839 metric(RTV_EXPIRE, rmx_expire);
840 #undef metric
841 if (out->rt_rmx.rmx_expire > 0) {
842 /* account for system time change */
843 getmicrotime(&caltime);
844 out->base_calendartime +=
845 NET_CALCULATE_CLOCKSKEW(caltime,
846 out->base_calendartime,
847 net_uptime(), out->base_uptime);
848 rt_setexpire(out,
849 out->rt_rmx.rmx_expire -
850 out->base_calendartime +
851 out->base_uptime);
852 } else {
853 rt_setexpire(out, 0);
854 }
855
856 VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
857 VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
858 } else {
859 /* Only RTV_REFRESH_HOST must be set */
860 if ((which & ~RTV_REFRESH_HOST) ||
861 (out->rt_flags & RTF_STATIC) ||
862 !(out->rt_flags & RTF_LLINFO)) {
863 return EINVAL;
864 }
865
866 if (out->rt_llinfo_refresh == NULL) {
867 return ENOTSUP;
868 }
869
870 out->rt_llinfo_refresh(out);
871 }
872 return 0;
873 }
874
875 static void
rt_getmetrics(struct rtentry * in,struct rt_metrics * out)876 rt_getmetrics(struct rtentry *in, struct rt_metrics *out)
877 {
878 struct timeval caltime;
879
880 VERIFY(in->rt_expire == 0 || in->rt_rmx.rmx_expire != 0);
881 VERIFY(in->rt_expire != 0 || in->rt_rmx.rmx_expire == 0);
882
883 *out = in->rt_rmx;
884
885 if (in->rt_expire != 0) {
886 /* account for system time change */
887 getmicrotime(&caltime);
888
889 in->base_calendartime +=
890 NET_CALCULATE_CLOCKSKEW(caltime,
891 in->base_calendartime, net_uptime(), in->base_uptime);
892
893 out->rmx_expire = (int32_t)(in->base_calendartime +
894 in->rt_expire - in->base_uptime);
895 } else {
896 out->rmx_expire = 0;
897 }
898 }
899
900 /*
901 * Set route's interface given info.rti_info[RTAX_IFP],
902 * info.rti_info[RTAX_IFA], and gateway.
903 */
904 static void
rt_setif(struct rtentry * rt,struct sockaddr * Ifpaddr,struct sockaddr * Ifaaddr,struct sockaddr * Gate,unsigned int ifscope)905 rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
906 struct sockaddr *Gate, unsigned int ifscope)
907 {
908 struct ifaddr *ifa = NULL;
909 struct ifnet *ifp = NULL;
910 void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
911
912 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
913
914 RT_LOCK_ASSERT_HELD(rt);
915
916 /* Don't update a defunct route */
917 if (rt->rt_flags & RTF_CONDEMNED) {
918 return;
919 }
920
921 /* Add an extra ref for ourselves */
922 RT_ADDREF_LOCKED(rt);
923
924 /* Become a regular mutex, just in case */
925 RT_CONVERT_LOCK(rt);
926
927 /*
928 * New gateway could require new ifaddr, ifp; flags may also
929 * be different; ifp may be specified by ll sockaddr when
930 * protocol address is ambiguous.
931 */
932 if (Ifpaddr && (ifa = ifa_ifwithnet_scoped(Ifpaddr, ifscope)) &&
933 (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) {
934 IFA_REMREF(ifa);
935 ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp);
936 } else {
937 if (ifa != NULL) {
938 IFA_REMREF(ifa);
939 ifa = NULL;
940 }
941 if (Ifpaddr && (ifp = if_withname(Ifpaddr))) {
942 if (Gate) {
943 ifa = ifaof_ifpforaddr(Gate, ifp);
944 } else {
945 ifnet_lock_shared(ifp);
946 ifa = TAILQ_FIRST(&ifp->if_addrhead);
947 if (ifa != NULL) {
948 IFA_ADDREF(ifa);
949 }
950 ifnet_lock_done(ifp);
951 }
952 } else if (Ifaaddr &&
953 (ifa = ifa_ifwithaddr_scoped(Ifaaddr, ifscope))) {
954 ifp = ifa->ifa_ifp;
955 } else if (Gate != NULL) {
956 /*
957 * Safe to drop rt_lock and use rt_key, since holding
958 * rnh_lock here prevents another thread from calling
959 * rt_setgate() on this route. We cannot hold the
960 * lock across ifa_ifwithroute since the lookup done
961 * by that routine may point to the same route.
962 */
963 RT_UNLOCK(rt);
964 if ((ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags,
965 rt_key(rt), Gate, ifscope)) != NULL) {
966 ifp = ifa->ifa_ifp;
967 }
968 RT_LOCK(rt);
969 /* Don't update a defunct route */
970 if (rt->rt_flags & RTF_CONDEMNED) {
971 if (ifa != NULL) {
972 IFA_REMREF(ifa);
973 }
974 /* Release extra ref */
975 RT_REMREF_LOCKED(rt);
976 return;
977 }
978 }
979 }
980
981 /* trigger route cache reevaluation */
982 if (rt_key(rt)->sa_family == AF_INET) {
983 routegenid_inet_update();
984 } else if (rt_key(rt)->sa_family == AF_INET6) {
985 routegenid_inet6_update();
986 }
987
988 if (ifa != NULL) {
989 struct ifaddr *oifa = rt->rt_ifa;
990 if (oifa != ifa) {
991 if (oifa != NULL) {
992 IFA_LOCK_SPIN(oifa);
993 ifa_rtrequest = oifa->ifa_rtrequest;
994 IFA_UNLOCK(oifa);
995 if (ifa_rtrequest != NULL) {
996 ifa_rtrequest(RTM_DELETE, rt, Gate);
997 }
998 }
999 rtsetifa(rt, ifa);
1000
1001 if (rt->rt_ifp != ifp) {
1002 /*
1003 * Purge any link-layer info caching.
1004 */
1005 if (rt->rt_llinfo_purge != NULL) {
1006 rt->rt_llinfo_purge(rt);
1007 }
1008
1009 /*
1010 * Adjust route ref count for the interfaces.
1011 */
1012 if (rt->rt_if_ref_fn != NULL) {
1013 rt->rt_if_ref_fn(ifp, 1);
1014 rt->rt_if_ref_fn(rt->rt_ifp, -1);
1015 }
1016 }
1017 rt->rt_ifp = ifp;
1018 /*
1019 * If this is the (non-scoped) default route, record
1020 * the interface index used for the primary ifscope.
1021 */
1022 if (rt_primary_default(rt, rt_key(rt))) {
1023 set_primary_ifscope(rt_key(rt)->sa_family,
1024 rt->rt_ifp->if_index);
1025 }
1026 /*
1027 * If rmx_mtu is not locked, update it
1028 * to the MTU used by the new interface.
1029 */
1030 if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
1031 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1032 if (rt_key(rt)->sa_family == AF_INET &&
1033 INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
1034 rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
1035 /* Further adjust the size for CLAT46 expansion */
1036 rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
1037 }
1038 }
1039
1040 if (rt->rt_ifa != NULL) {
1041 IFA_LOCK_SPIN(rt->rt_ifa);
1042 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1043 IFA_UNLOCK(rt->rt_ifa);
1044 if (ifa_rtrequest != NULL) {
1045 ifa_rtrequest(RTM_ADD, rt, Gate);
1046 }
1047 }
1048 IFA_REMREF(ifa);
1049 /* Release extra ref */
1050 RT_REMREF_LOCKED(rt);
1051 return;
1052 }
1053 IFA_REMREF(ifa);
1054 ifa = NULL;
1055 }
1056
1057 /* XXX: to reset gateway to correct value, at RTM_CHANGE */
1058 if (rt->rt_ifa != NULL) {
1059 IFA_LOCK_SPIN(rt->rt_ifa);
1060 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1061 IFA_UNLOCK(rt->rt_ifa);
1062 if (ifa_rtrequest != NULL) {
1063 ifa_rtrequest(RTM_ADD, rt, Gate);
1064 }
1065 }
1066
1067 /*
1068 * Workaround for local address routes pointing to the loopback
1069 * interface added by configd, until <rdar://problem/12970142>.
1070 */
1071 if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
1072 (rt->rt_flags & RTF_HOST) && rt->rt_ifa->ifa_ifp == rt->rt_ifp) {
1073 ifa = ifa_ifwithaddr(rt_key(rt));
1074 if (ifa != NULL) {
1075 if (ifa != rt->rt_ifa) {
1076 rtsetifa(rt, ifa);
1077 }
1078 IFA_REMREF(ifa);
1079 }
1080 }
1081
1082 /* Release extra ref */
1083 RT_REMREF_LOCKED(rt);
1084 }
1085
1086 /*
1087 * Extract the addresses of the passed sockaddrs.
1088 * Do a little sanity checking so as to avoid bad memory references.
1089 * This data is derived straight from userland.
1090 */
1091 static int
rt_xaddrs(caddr_t cp,caddr_t cplim,struct rt_addrinfo * rtinfo)1092 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1093 {
1094 struct sockaddr *sa;
1095 int i;
1096
1097 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1098 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1099 if ((rtinfo->rti_addrs & (1 << i)) == 0) {
1100 continue;
1101 }
1102 sa = (struct sockaddr *)cp;
1103 /*
1104 * It won't fit.
1105 */
1106 if ((cp + sa->sa_len) > cplim) {
1107 return EINVAL;
1108 }
1109 if (sa->sa_len > sizeof(struct sockaddr_storage)) {
1110 return EINVAL;
1111 }
1112 /*
1113 * there are no more.. quit now
1114 * If there are more bits, they are in error.
1115 * I've seen this. route(1) can evidently generate these.
1116 * This causes kernel to core dump.
1117 * for compatibility, If we see this, point to a safe address.
1118 */
1119 if (sa->sa_len == 0) {
1120 rtinfo->rti_info[i] = &sa_zero;
1121 return 0; /* should be EINVAL but for compat */
1122 }
1123 if (sa->sa_len < offsetof(struct sockaddr, sa_data)) {
1124 return EINVAL;
1125 }
1126 /* accept it */
1127 rtinfo->rti_info[i] = sa;
1128 ADVANCE32(cp, sa);
1129 }
1130 return 0;
1131 }
1132
1133 static struct mbuf *
rt_msg1(u_char type,struct rt_addrinfo * rtinfo)1134 rt_msg1(u_char type, struct rt_addrinfo *rtinfo)
1135 {
1136 struct rt_msghdr *rtm;
1137 struct mbuf *m;
1138 int i;
1139 int len, dlen, off;
1140
1141 switch (type) {
1142 case RTM_DELADDR:
1143 case RTM_NEWADDR:
1144 len = sizeof(struct ifa_msghdr);
1145 break;
1146
1147 case RTM_DELMADDR:
1148 case RTM_NEWMADDR:
1149 len = sizeof(struct ifma_msghdr);
1150 break;
1151
1152 case RTM_IFINFO:
1153 len = sizeof(struct if_msghdr);
1154 break;
1155
1156 default:
1157 len = sizeof(struct rt_msghdr);
1158 }
1159 m = m_gethdr(M_DONTWAIT, MT_DATA);
1160 if (m && len > MHLEN) {
1161 MCLGET(m, M_DONTWAIT);
1162 if (!(m->m_flags & M_EXT)) {
1163 m_free(m);
1164 m = NULL;
1165 }
1166 }
1167 if (m == NULL) {
1168 return NULL;
1169 }
1170 m->m_pkthdr.len = m->m_len = len;
1171 m->m_pkthdr.rcvif = NULL;
1172 rtm = mtod(m, struct rt_msghdr *);
1173 bzero((caddr_t)rtm, len);
1174 off = len;
1175 for (i = 0; i < RTAX_MAX; i++) {
1176 struct sockaddr *sa, *hint;
1177 uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1178
1179 /*
1180 * Make sure to accomodate the largest possible size of sa_len.
1181 */
1182 _CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1183
1184 if ((sa = rtinfo->rti_info[i]) == NULL) {
1185 continue;
1186 }
1187
1188 switch (i) {
1189 case RTAX_DST:
1190 case RTAX_NETMASK:
1191 if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1192 hint = rtinfo->rti_info[RTAX_IFA];
1193 }
1194
1195 /* Scrub away any trace of embedded interface scope */
1196 sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1197 sizeof(ssbuf), NULL);
1198 break;
1199
1200 default:
1201 break;
1202 }
1203
1204 rtinfo->rti_addrs |= (1 << i);
1205 dlen = sa->sa_len;
1206 m_copyback(m, off, dlen, (caddr_t)sa);
1207 len = off + dlen;
1208 off += ROUNDUP32(dlen);
1209 }
1210 if (m->m_pkthdr.len != len) {
1211 m_freem(m);
1212 return NULL;
1213 }
1214 rtm->rtm_msglen = (u_short)len;
1215 rtm->rtm_version = RTM_VERSION;
1216 rtm->rtm_type = type;
1217 return m;
1218 }
1219
1220 static int
rt_msg2(u_char type,struct rt_addrinfo * rtinfo,caddr_t cp,struct walkarg * w,kauth_cred_t * credp)1221 rt_msg2(u_char type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w,
1222 kauth_cred_t* credp)
1223 {
1224 int i;
1225 int len, dlen, rlen, second_time = 0;
1226 caddr_t cp0;
1227
1228 rtinfo->rti_addrs = 0;
1229 again:
1230 switch (type) {
1231 case RTM_DELADDR:
1232 case RTM_NEWADDR:
1233 len = sizeof(struct ifa_msghdr);
1234 break;
1235
1236 case RTM_DELMADDR:
1237 case RTM_NEWMADDR:
1238 len = sizeof(struct ifma_msghdr);
1239 break;
1240
1241 case RTM_IFINFO:
1242 len = sizeof(struct if_msghdr);
1243 break;
1244
1245 case RTM_IFINFO2:
1246 len = sizeof(struct if_msghdr2);
1247 break;
1248
1249 case RTM_NEWMADDR2:
1250 len = sizeof(struct ifma_msghdr2);
1251 break;
1252
1253 case RTM_GET_EXT:
1254 len = sizeof(struct rt_msghdr_ext);
1255 break;
1256
1257 case RTM_GET2:
1258 len = sizeof(struct rt_msghdr2);
1259 break;
1260
1261 default:
1262 len = sizeof(struct rt_msghdr);
1263 }
1264 cp0 = cp;
1265 if (cp0) {
1266 cp += len;
1267 }
1268 for (i = 0; i < RTAX_MAX; i++) {
1269 struct sockaddr *sa, *hint;
1270 uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1271
1272 /*
1273 * Make sure to accomodate the largest possible size of sa_len.
1274 */
1275 _CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1276
1277 if ((sa = rtinfo->rti_info[i]) == NULL) {
1278 continue;
1279 }
1280
1281 switch (i) {
1282 case RTAX_DST:
1283 case RTAX_NETMASK:
1284 if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1285 hint = rtinfo->rti_info[RTAX_IFA];
1286 }
1287
1288 /* Scrub away any trace of embedded interface scope */
1289 sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1290 sizeof(ssbuf), NULL);
1291 break;
1292 case RTAX_GATEWAY:
1293 case RTAX_IFP:
1294 sa = rtm_scrub(type, i, NULL, sa, &ssbuf,
1295 sizeof(ssbuf), credp);
1296 break;
1297
1298 default:
1299 break;
1300 }
1301
1302 rtinfo->rti_addrs |= (1 << i);
1303 dlen = sa->sa_len;
1304 rlen = ROUNDUP32(dlen);
1305 if (cp) {
1306 bcopy((caddr_t)sa, cp, (size_t)dlen);
1307 if (dlen != rlen) {
1308 bzero(cp + dlen, rlen - dlen);
1309 }
1310 cp += rlen;
1311 }
1312 len += rlen;
1313 }
1314 if (cp == NULL && w != NULL && !second_time) {
1315 struct walkarg *rw = w;
1316
1317 if (rw->w_req != NULL) {
1318 if (rw->w_tmemsize < len) {
1319 if (rw->w_tmem != NULL) {
1320 kfree_data(rw->w_tmem, rw->w_tmemsize);
1321 }
1322 rw->w_tmem = (caddr_t) kalloc_data(len, Z_ZERO | Z_WAITOK);
1323 if (rw->w_tmem != NULL) {
1324 rw->w_tmemsize = len;
1325 }
1326 }
1327 if (rw->w_tmem != NULL) {
1328 cp = rw->w_tmem;
1329 second_time = 1;
1330 goto again;
1331 }
1332 }
1333 }
1334 if (cp) {
1335 struct rt_msghdr *rtm = (struct rt_msghdr *)(void *)cp0;
1336
1337 rtm->rtm_version = RTM_VERSION;
1338 rtm->rtm_type = type;
1339 rtm->rtm_msglen = (u_short)len;
1340 }
1341 return len;
1342 }
1343
1344 /*
1345 * This routine is called to generate a message from the routing
1346 * socket indicating that a redirect has occurred, a routing lookup
1347 * has failed, or that a protocol has detected timeouts to a particular
1348 * destination.
1349 */
1350 void
rt_missmsg(u_char type,struct rt_addrinfo * rtinfo,int flags,int error)1351 rt_missmsg(u_char type, struct rt_addrinfo *rtinfo, int flags, int error)
1352 {
1353 struct rt_msghdr *rtm;
1354 struct mbuf *m;
1355 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1356 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1357
1358 if (route_cb.any_count == 0) {
1359 return;
1360 }
1361 m = rt_msg1(type, rtinfo);
1362 if (m == NULL) {
1363 return;
1364 }
1365 rtm = mtod(m, struct rt_msghdr *);
1366 rtm->rtm_flags = RTF_DONE | flags;
1367 rtm->rtm_errno = error;
1368 rtm->rtm_addrs = rtinfo->rti_addrs;
1369 route_proto.sp_family = sa ? sa->sa_family : 0;
1370 raw_input(m, &route_proto, &route_src, &route_dst);
1371 }
1372
1373 /*
1374 * This routine is called to generate a message from the routing
1375 * socket indicating that the status of a network interface has changed.
1376 */
1377 void
rt_ifmsg(struct ifnet * ifp)1378 rt_ifmsg(struct ifnet *ifp)
1379 {
1380 struct if_msghdr *ifm;
1381 struct mbuf *m;
1382 struct rt_addrinfo info;
1383 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1384
1385 if (route_cb.any_count == 0) {
1386 return;
1387 }
1388 bzero((caddr_t)&info, sizeof(info));
1389 m = rt_msg1(RTM_IFINFO, &info);
1390 if (m == NULL) {
1391 return;
1392 }
1393 ifm = mtod(m, struct if_msghdr *);
1394 ifm->ifm_index = ifp->if_index;
1395 ifm->ifm_flags = (u_short)ifp->if_flags;
1396 if_data_internal_to_if_data(ifp, &ifp->if_data, &ifm->ifm_data);
1397 ifm->ifm_addrs = 0;
1398 raw_input(m, &route_proto, &route_src, &route_dst);
1399 }
1400
1401 /*
1402 * This is called to generate messages from the routing socket
1403 * indicating a network interface has had addresses associated with it.
1404 * if we ever reverse the logic and replace messages TO the routing
1405 * socket indicate a request to configure interfaces, then it will
1406 * be unnecessary as the routing socket will automatically generate
1407 * copies of it.
1408 *
1409 * Since this is coming from the interface, it is expected that the
1410 * interface will be locked. Caller must hold rnh_lock and rt_lock.
1411 */
1412 void
rt_newaddrmsg(u_char cmd,struct ifaddr * ifa,int error,struct rtentry * rt)1413 rt_newaddrmsg(u_char cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1414 {
1415 struct rt_addrinfo info;
1416 struct sockaddr *sa = 0;
1417 int pass;
1418 struct mbuf *m = 0;
1419 struct ifnet *ifp = ifa->ifa_ifp;
1420 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1421
1422 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1423 RT_LOCK_ASSERT_HELD(rt);
1424
1425 if (route_cb.any_count == 0) {
1426 return;
1427 }
1428
1429 /* Become a regular mutex, just in case */
1430 RT_CONVERT_LOCK(rt);
1431 for (pass = 1; pass < 3; pass++) {
1432 bzero((caddr_t)&info, sizeof(info));
1433 if ((cmd == RTM_ADD && pass == 1) ||
1434 (cmd == RTM_DELETE && pass == 2)) {
1435 struct ifa_msghdr *ifam;
1436 u_char ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1437
1438 /* Lock ifp for if_lladdr */
1439 ifnet_lock_shared(ifp);
1440 IFA_LOCK(ifa);
1441 info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1442 /*
1443 * Holding ifnet lock here prevents the link address
1444 * from changing contents, so no need to hold its
1445 * lock. The link address is always present; it's
1446 * never freed.
1447 */
1448 info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1449 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1450 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1451 if ((m = rt_msg1(ncmd, &info)) == NULL) {
1452 IFA_UNLOCK(ifa);
1453 ifnet_lock_done(ifp);
1454 continue;
1455 }
1456 IFA_UNLOCK(ifa);
1457 ifnet_lock_done(ifp);
1458 ifam = mtod(m, struct ifa_msghdr *);
1459 ifam->ifam_index = ifp->if_index;
1460 IFA_LOCK_SPIN(ifa);
1461 ifam->ifam_metric = ifa->ifa_metric;
1462 ifam->ifam_flags = ifa->ifa_flags;
1463 IFA_UNLOCK(ifa);
1464 ifam->ifam_addrs = info.rti_addrs;
1465 }
1466 if ((cmd == RTM_ADD && pass == 2) ||
1467 (cmd == RTM_DELETE && pass == 1)) {
1468 struct rt_msghdr *rtm;
1469
1470 if (rt == NULL) {
1471 continue;
1472 }
1473 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1474 info.rti_info[RTAX_DST] = sa = rt_key(rt);
1475 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1476 if ((m = rt_msg1(cmd, &info)) == NULL) {
1477 continue;
1478 }
1479 rtm = mtod(m, struct rt_msghdr *);
1480 rtm->rtm_index = ifp->if_index;
1481 rtm->rtm_flags |= rt->rt_flags;
1482 rtm->rtm_errno = error;
1483 rtm->rtm_addrs = info.rti_addrs;
1484 }
1485 route_proto.sp_protocol = sa ? sa->sa_family : 0;
1486 raw_input(m, &route_proto, &route_src, &route_dst);
1487 }
1488 }
1489
1490 /*
1491 * This is the analogue to the rt_newaddrmsg which performs the same
1492 * function but for multicast group memberhips. This is easier since
1493 * there is no route state to worry about.
1494 */
1495 void
rt_newmaddrmsg(u_char cmd,struct ifmultiaddr * ifma)1496 rt_newmaddrmsg(u_char cmd, struct ifmultiaddr *ifma)
1497 {
1498 struct rt_addrinfo info;
1499 struct mbuf *m = 0;
1500 struct ifnet *ifp = ifma->ifma_ifp;
1501 struct ifma_msghdr *ifmam;
1502 struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1503
1504 if (route_cb.any_count == 0) {
1505 return;
1506 }
1507
1508 /* Lock ifp for if_lladdr */
1509 ifnet_lock_shared(ifp);
1510 bzero((caddr_t)&info, sizeof(info));
1511 IFMA_LOCK(ifma);
1512 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1513 /* lladdr doesn't need lock */
1514 info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1515
1516 /*
1517 * If a link-layer address is present, present it as a ``gateway''
1518 * (similarly to how ARP entries, e.g., are presented).
1519 */
1520 info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ?
1521 ifma->ifma_ll->ifma_addr : NULL;
1522 if ((m = rt_msg1(cmd, &info)) == NULL) {
1523 IFMA_UNLOCK(ifma);
1524 ifnet_lock_done(ifp);
1525 return;
1526 }
1527 ifmam = mtod(m, struct ifma_msghdr *);
1528 ifmam->ifmam_index = ifp->if_index;
1529 ifmam->ifmam_addrs = info.rti_addrs;
1530 route_proto.sp_protocol = ifma->ifma_addr->sa_family;
1531 IFMA_UNLOCK(ifma);
1532 ifnet_lock_done(ifp);
1533 raw_input(m, &route_proto, &route_src, &route_dst);
1534 }
1535
1536 const char *
rtm2str(int cmd)1537 rtm2str(int cmd)
1538 {
1539 const char *c = "RTM_?";
1540
1541 switch (cmd) {
1542 case RTM_ADD:
1543 c = "RTM_ADD";
1544 break;
1545 case RTM_DELETE:
1546 c = "RTM_DELETE";
1547 break;
1548 case RTM_CHANGE:
1549 c = "RTM_CHANGE";
1550 break;
1551 case RTM_GET:
1552 c = "RTM_GET";
1553 break;
1554 case RTM_LOSING:
1555 c = "RTM_LOSING";
1556 break;
1557 case RTM_REDIRECT:
1558 c = "RTM_REDIRECT";
1559 break;
1560 case RTM_MISS:
1561 c = "RTM_MISS";
1562 break;
1563 case RTM_LOCK:
1564 c = "RTM_LOCK";
1565 break;
1566 case RTM_OLDADD:
1567 c = "RTM_OLDADD";
1568 break;
1569 case RTM_OLDDEL:
1570 c = "RTM_OLDDEL";
1571 break;
1572 case RTM_RESOLVE:
1573 c = "RTM_RESOLVE";
1574 break;
1575 case RTM_NEWADDR:
1576 c = "RTM_NEWADDR";
1577 break;
1578 case RTM_DELADDR:
1579 c = "RTM_DELADDR";
1580 break;
1581 case RTM_IFINFO:
1582 c = "RTM_IFINFO";
1583 break;
1584 case RTM_NEWMADDR:
1585 c = "RTM_NEWMADDR";
1586 break;
1587 case RTM_DELMADDR:
1588 c = "RTM_DELMADDR";
1589 break;
1590 case RTM_GET_SILENT:
1591 c = "RTM_GET_SILENT";
1592 break;
1593 case RTM_IFINFO2:
1594 c = "RTM_IFINFO2";
1595 break;
1596 case RTM_NEWMADDR2:
1597 c = "RTM_NEWMADDR2";
1598 break;
1599 case RTM_GET2:
1600 c = "RTM_GET2";
1601 break;
1602 case RTM_GET_EXT:
1603 c = "RTM_GET_EXT";
1604 break;
1605 }
1606
1607 return c;
1608 }
1609
1610 /*
1611 * This is used in dumping the kernel table via sysctl().
1612 */
1613 static int
sysctl_dumpentry(struct radix_node * rn,void * vw)1614 sysctl_dumpentry(struct radix_node *rn, void *vw)
1615 {
1616 struct walkarg *w = vw;
1617 struct rtentry *rt = (struct rtentry *)rn;
1618 int error = 0, size;
1619 struct rt_addrinfo info;
1620 kauth_cred_t cred;
1621 kauth_cred_t *credp;
1622
1623 cred = kauth_cred_proc_ref(current_proc());
1624 credp = &cred;
1625
1626 RT_LOCK(rt);
1627 if ((w->w_op == NET_RT_FLAGS || w->w_op == NET_RT_FLAGS_PRIV) &&
1628 !(rt->rt_flags & w->w_arg)) {
1629 goto done;
1630 }
1631
1632 /*
1633 * If the matching route has RTF_LLINFO set, then we can skip scrubbing the MAC
1634 * only if the outgoing interface is not loopback and the process has entitlement
1635 * for neighbor cache read.
1636 */
1637 if (w->w_op == NET_RT_FLAGS_PRIV && (rt->rt_flags & RTF_LLINFO)) {
1638 if (rt->rt_ifp != lo_ifp &&
1639 (route_op_entitlement_check(NULL, cred, ROUTE_OP_READ, TRUE) == 0)) {
1640 credp = NULL;
1641 }
1642 }
1643
1644 bzero((caddr_t)&info, sizeof(info));
1645 info.rti_info[RTAX_DST] = rt_key(rt);
1646 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1647 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1648 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1649 if (RT_HAS_IFADDR(rt)) {
1650 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1651 }
1652
1653 if (w->w_op != NET_RT_DUMP2) {
1654 size = rt_msg2(RTM_GET, &info, NULL, w, credp);
1655 if (w->w_req != NULL && w->w_tmem != NULL) {
1656 struct rt_msghdr *rtm =
1657 (struct rt_msghdr *)(void *)w->w_tmem;
1658
1659 rtm->rtm_flags = rt->rt_flags;
1660 rtm->rtm_use = rt->rt_use;
1661 rt_getmetrics(rt, &rtm->rtm_rmx);
1662 rtm->rtm_index = rt->rt_ifp->if_index;
1663 rtm->rtm_pid = 0;
1664 rtm->rtm_seq = 0;
1665 rtm->rtm_errno = 0;
1666 rtm->rtm_addrs = info.rti_addrs;
1667 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1668 }
1669 } else {
1670 size = rt_msg2(RTM_GET2, &info, NULL, w, credp);
1671 if (w->w_req != NULL && w->w_tmem != NULL) {
1672 struct rt_msghdr2 *rtm =
1673 (struct rt_msghdr2 *)(void *)w->w_tmem;
1674
1675 rtm->rtm_flags = rt->rt_flags;
1676 rtm->rtm_use = rt->rt_use;
1677 rt_getmetrics(rt, &rtm->rtm_rmx);
1678 rtm->rtm_index = rt->rt_ifp->if_index;
1679 rtm->rtm_refcnt = rt->rt_refcnt;
1680 if (rt->rt_parent) {
1681 rtm->rtm_parentflags = rt->rt_parent->rt_flags;
1682 } else {
1683 rtm->rtm_parentflags = 0;
1684 }
1685 rtm->rtm_reserved = 0;
1686 rtm->rtm_addrs = info.rti_addrs;
1687 error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1688 }
1689 }
1690
1691 done:
1692 RT_UNLOCK(rt);
1693 kauth_cred_unref(&cred);
1694 return error;
1695 }
1696
1697 /*
1698 * This is used for dumping extended information from route entries.
1699 */
1700 static int
sysctl_dumpentry_ext(struct radix_node * rn,void * vw)1701 sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
1702 {
1703 struct walkarg *w = vw;
1704 struct rtentry *rt = (struct rtentry *)rn;
1705 int error = 0, size;
1706 struct rt_addrinfo info;
1707 kauth_cred_t cred;
1708
1709 cred = kauth_cred_proc_ref(current_proc());
1710
1711 RT_LOCK(rt);
1712 if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) {
1713 goto done;
1714 }
1715 bzero(&info, sizeof(info));
1716 info.rti_info[RTAX_DST] = rt_key(rt);
1717 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1718 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1719 info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1720
1721 size = rt_msg2(RTM_GET_EXT, &info, NULL, w, &cred);
1722 if (w->w_req != NULL && w->w_tmem != NULL) {
1723 struct rt_msghdr_ext *ertm =
1724 (struct rt_msghdr_ext *)(void *)w->w_tmem;
1725
1726 ertm->rtm_flags = rt->rt_flags;
1727 ertm->rtm_use = rt->rt_use;
1728 rt_getmetrics(rt, &ertm->rtm_rmx);
1729 ertm->rtm_index = rt->rt_ifp->if_index;
1730 ertm->rtm_pid = 0;
1731 ertm->rtm_seq = 0;
1732 ertm->rtm_errno = 0;
1733 ertm->rtm_addrs = info.rti_addrs;
1734 if (rt->rt_llinfo_get_ri == NULL) {
1735 bzero(&ertm->rtm_ri, sizeof(ertm->rtm_ri));
1736 ertm->rtm_ri.ri_rssi = IFNET_RSSI_UNKNOWN;
1737 ertm->rtm_ri.ri_lqm = IFNET_LQM_THRESH_OFF;
1738 ertm->rtm_ri.ri_npm = IFNET_NPM_THRESH_UNKNOWN;
1739 } else {
1740 rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri);
1741 }
1742 error = SYSCTL_OUT(w->w_req, (caddr_t)ertm, size);
1743 }
1744
1745 done:
1746 RT_UNLOCK(rt);
1747 kauth_cred_unref(&cred);
1748 return error;
1749 }
1750
1751 /*
1752 * rdar://9307819
1753 * To avoid to call copyout() while holding locks and to cause problems
1754 * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct
1755 * the list in two passes. In the first pass we compute the total
1756 * length of the data we are going to copyout, then we release
1757 * all locks to allocate a temporary buffer that gets filled
1758 * in the second pass.
1759 *
1760 * Note that we are verifying the assumption that kalloc() returns a buffer
1761 * that is at least 32 bits aligned and that the messages and addresses are
1762 * 32 bits aligned.
1763 */
1764 static int
sysctl_iflist(int af,struct walkarg * w)1765 sysctl_iflist(int af, struct walkarg *w)
1766 {
1767 struct ifnet *ifp;
1768 struct ifaddr *ifa;
1769 struct rt_addrinfo info;
1770 int error = 0;
1771 int pass = 0;
1772 size_t len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1773 char *total_buffer = NULL, *cp = NULL;
1774 kauth_cred_t cred;
1775
1776 cred = kauth_cred_proc_ref(current_proc());
1777
1778 bzero((caddr_t)&info, sizeof(info));
1779
1780 for (pass = 0; pass < 2; pass++) {
1781 ifnet_head_lock_shared();
1782
1783 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1784 if (error) {
1785 break;
1786 }
1787 if (w->w_arg && w->w_arg != ifp->if_index) {
1788 continue;
1789 }
1790 ifnet_lock_shared(ifp);
1791 /*
1792 * Holding ifnet lock here prevents the link address
1793 * from changing contents, so no need to hold the ifa
1794 * lock. The link address is always present; it's
1795 * never freed.
1796 */
1797 ifa = ifp->if_lladdr;
1798 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1799 len = rt_msg2(RTM_IFINFO, &info, NULL, NULL, &cred);
1800 if (pass == 0) {
1801 if (os_add_overflow(total_len, len, &total_len)) {
1802 ifnet_lock_done(ifp);
1803 error = ENOBUFS;
1804 break;
1805 }
1806 } else {
1807 struct if_msghdr *ifm;
1808
1809 if (current_len + len > total_len) {
1810 ifnet_lock_done(ifp);
1811 error = ENOBUFS;
1812 break;
1813 }
1814 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1815 len = rt_msg2(RTM_IFINFO, &info,
1816 (caddr_t)cp, NULL, &cred);
1817 info.rti_info[RTAX_IFP] = NULL;
1818
1819 ifm = (struct if_msghdr *)(void *)cp;
1820 ifm->ifm_index = ifp->if_index;
1821 ifm->ifm_flags = (u_short)ifp->if_flags;
1822 if_data_internal_to_if_data(ifp, &ifp->if_data,
1823 &ifm->ifm_data);
1824 ifm->ifm_addrs = info.rti_addrs;
1825 /*
1826 * <rdar://problem/32940901>
1827 * Round bytes only for non-platform
1828 */
1829 if (!csproc_get_platform_binary(w->w_req->p)) {
1830 ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
1831 ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
1832 }
1833
1834 cp += len;
1835 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1836 current_len += len;
1837 VERIFY(current_len <= total_len);
1838 }
1839 while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
1840 IFA_LOCK(ifa);
1841 if (af && af != ifa->ifa_addr->sa_family) {
1842 IFA_UNLOCK(ifa);
1843 continue;
1844 }
1845 if (ifa->ifa_addr->sa_family == AF_INET6 &&
1846 (((struct in6_ifaddr *)ifa)->ia6_flags &
1847 IN6_IFF_CLAT46) != 0) {
1848 IFA_UNLOCK(ifa);
1849 continue;
1850 }
1851 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1852 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1853 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1854 len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
1855 &cred);
1856 if (pass == 0) {
1857 if (os_add_overflow(total_len, len, &total_len)) {
1858 IFA_UNLOCK(ifa);
1859 error = ENOBUFS;
1860 break;
1861 }
1862 } else {
1863 struct ifa_msghdr *ifam;
1864
1865 if (current_len + len > total_len) {
1866 IFA_UNLOCK(ifa);
1867 error = ENOBUFS;
1868 break;
1869 }
1870 len = rt_msg2(RTM_NEWADDR, &info,
1871 (caddr_t)cp, NULL, &cred);
1872
1873 ifam = (struct ifa_msghdr *)(void *)cp;
1874 ifam->ifam_index =
1875 ifa->ifa_ifp->if_index;
1876 ifam->ifam_flags = ifa->ifa_flags;
1877 ifam->ifam_metric = ifa->ifa_metric;
1878 ifam->ifam_addrs = info.rti_addrs;
1879
1880 cp += len;
1881 VERIFY(IS_P2ALIGNED(cp,
1882 sizeof(u_int32_t)));
1883 current_len += len;
1884 VERIFY(current_len <= total_len);
1885 }
1886 IFA_UNLOCK(ifa);
1887 }
1888 ifnet_lock_done(ifp);
1889 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1890 info.rti_info[RTAX_BRD] = NULL;
1891 }
1892
1893 ifnet_head_done();
1894
1895 if (error != 0) {
1896 if (error == ENOBUFS) {
1897 printf("%s: current_len (%lu) + len (%lu) > "
1898 "total_len (%lu)\n", __func__, current_len,
1899 len, total_len);
1900 }
1901 break;
1902 }
1903
1904 if (pass == 0) {
1905 /* Better to return zero length buffer than ENOBUFS */
1906 if (total_len == 0) {
1907 total_len = 1;
1908 }
1909 total_len += total_len >> 3;
1910 total_buffer_len = total_len;
1911 total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
1912 if (total_buffer == NULL) {
1913 printf("%s: kalloc_data(%lu) failed\n", __func__,
1914 total_len);
1915 error = ENOBUFS;
1916 break;
1917 }
1918 cp = total_buffer;
1919 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1920 } else {
1921 error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
1922 if (error) {
1923 break;
1924 }
1925 }
1926 }
1927
1928 if (total_buffer != NULL) {
1929 kfree_data(total_buffer, total_buffer_len);
1930 }
1931
1932 kauth_cred_unref(&cred);
1933 return error;
1934 }
1935
1936 static int
sysctl_iflist2(int af,struct walkarg * w)1937 sysctl_iflist2(int af, struct walkarg *w)
1938 {
1939 struct ifnet *ifp;
1940 struct ifaddr *ifa;
1941 struct rt_addrinfo info;
1942 int error = 0;
1943 int pass = 0;
1944 size_t len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1945 char *total_buffer = NULL, *cp = NULL;
1946 kauth_cred_t cred;
1947
1948 cred = kauth_cred_proc_ref(current_proc());
1949
1950 bzero((caddr_t)&info, sizeof(info));
1951
1952 for (pass = 0; pass < 2; pass++) {
1953 struct ifmultiaddr *ifma;
1954
1955 ifnet_head_lock_shared();
1956
1957 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1958 if (error) {
1959 break;
1960 }
1961 if (w->w_arg && w->w_arg != ifp->if_index) {
1962 continue;
1963 }
1964 ifnet_lock_shared(ifp);
1965 /*
1966 * Holding ifnet lock here prevents the link address
1967 * from changing contents, so no need to hold the ifa
1968 * lock. The link address is always present; it's
1969 * never freed.
1970 */
1971 ifa = ifp->if_lladdr;
1972 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1973 len = rt_msg2(RTM_IFINFO2, &info, NULL, NULL, &cred);
1974 if (pass == 0) {
1975 if (os_add_overflow(total_len, len, &total_len)) {
1976 ifnet_lock_done(ifp);
1977 error = ENOBUFS;
1978 break;
1979 }
1980 } else {
1981 struct if_msghdr2 *ifm;
1982
1983 if (current_len + len > total_len) {
1984 ifnet_lock_done(ifp);
1985 error = ENOBUFS;
1986 break;
1987 }
1988 info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1989 len = rt_msg2(RTM_IFINFO2, &info,
1990 (caddr_t)cp, NULL, &cred);
1991 info.rti_info[RTAX_IFP] = NULL;
1992
1993 ifm = (struct if_msghdr2 *)(void *)cp;
1994 ifm->ifm_addrs = info.rti_addrs;
1995 ifm->ifm_flags = (u_short)ifp->if_flags;
1996 ifm->ifm_index = ifp->if_index;
1997 ifm->ifm_snd_len = IFCQ_LEN(ifp->if_snd);
1998 ifm->ifm_snd_maxlen = IFCQ_MAXLEN(ifp->if_snd);
1999 ifm->ifm_snd_drops =
2000 (int)ifp->if_snd->ifcq_dropcnt.packets;
2001 ifm->ifm_timer = ifp->if_timer;
2002 if_data_internal_to_if_data64(ifp,
2003 &ifp->if_data, &ifm->ifm_data);
2004 /*
2005 * <rdar://problem/32940901>
2006 * Round bytes only for non-platform
2007 */
2008 if (!csproc_get_platform_binary(w->w_req->p)) {
2009 ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
2010 ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
2011 }
2012
2013 cp += len;
2014 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2015 current_len += len;
2016 VERIFY(current_len <= total_len);
2017 }
2018 while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
2019 IFA_LOCK(ifa);
2020 if (af && af != ifa->ifa_addr->sa_family) {
2021 IFA_UNLOCK(ifa);
2022 continue;
2023 }
2024 if (ifa->ifa_addr->sa_family == AF_INET6 &&
2025 (((struct in6_ifaddr *)ifa)->ia6_flags &
2026 IN6_IFF_CLAT46) != 0) {
2027 IFA_UNLOCK(ifa);
2028 continue;
2029 }
2030
2031 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2032 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2033 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2034 len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2035 &cred);
2036 if (pass == 0) {
2037 if (os_add_overflow(total_len, len, &total_len)) {
2038 IFA_UNLOCK(ifa);
2039 error = ENOBUFS;
2040 break;
2041 }
2042 } else {
2043 struct ifa_msghdr *ifam;
2044
2045 if (current_len + len > total_len) {
2046 IFA_UNLOCK(ifa);
2047 error = ENOBUFS;
2048 break;
2049 }
2050 len = rt_msg2(RTM_NEWADDR, &info,
2051 (caddr_t)cp, NULL, &cred);
2052
2053 ifam = (struct ifa_msghdr *)(void *)cp;
2054 ifam->ifam_index =
2055 ifa->ifa_ifp->if_index;
2056 ifam->ifam_flags = ifa->ifa_flags;
2057 ifam->ifam_metric = ifa->ifa_metric;
2058 ifam->ifam_addrs = info.rti_addrs;
2059
2060 cp += len;
2061 VERIFY(IS_P2ALIGNED(cp,
2062 sizeof(u_int32_t)));
2063 current_len += len;
2064 VERIFY(current_len <= total_len);
2065 }
2066 IFA_UNLOCK(ifa);
2067 }
2068 if (error) {
2069 ifnet_lock_done(ifp);
2070 break;
2071 }
2072
2073 for (ifma = LIST_FIRST(&ifp->if_multiaddrs);
2074 ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) {
2075 struct ifaddr *ifa0;
2076
2077 IFMA_LOCK(ifma);
2078 if (af && af != ifma->ifma_addr->sa_family) {
2079 IFMA_UNLOCK(ifma);
2080 continue;
2081 }
2082 bzero((caddr_t)&info, sizeof(info));
2083 info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2084 /*
2085 * Holding ifnet lock here prevents the link
2086 * address from changing contents, so no need
2087 * to hold the ifa0 lock. The link address is
2088 * always present; it's never freed.
2089 */
2090 ifa0 = ifp->if_lladdr;
2091 info.rti_info[RTAX_IFP] = ifa0->ifa_addr;
2092 if (ifma->ifma_ll != NULL) {
2093 info.rti_info[RTAX_GATEWAY] =
2094 ifma->ifma_ll->ifma_addr;
2095 }
2096 len = rt_msg2(RTM_NEWMADDR2, &info, NULL, NULL,
2097 &cred);
2098 if (pass == 0) {
2099 total_len += len;
2100 } else {
2101 struct ifma_msghdr2 *ifmam;
2102
2103 if (current_len + len > total_len) {
2104 IFMA_UNLOCK(ifma);
2105 error = ENOBUFS;
2106 break;
2107 }
2108 len = rt_msg2(RTM_NEWMADDR2, &info,
2109 (caddr_t)cp, NULL, &cred);
2110
2111 ifmam =
2112 (struct ifma_msghdr2 *)(void *)cp;
2113 ifmam->ifmam_addrs = info.rti_addrs;
2114 ifmam->ifmam_flags = 0;
2115 ifmam->ifmam_index =
2116 ifma->ifma_ifp->if_index;
2117 ifmam->ifmam_refcount =
2118 ifma->ifma_reqcnt;
2119
2120 cp += len;
2121 VERIFY(IS_P2ALIGNED(cp,
2122 sizeof(u_int32_t)));
2123 current_len += len;
2124 }
2125 IFMA_UNLOCK(ifma);
2126 }
2127 ifnet_lock_done(ifp);
2128 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2129 info.rti_info[RTAX_BRD] = NULL;
2130 }
2131 ifnet_head_done();
2132
2133 if (error) {
2134 if (error == ENOBUFS) {
2135 printf("%s: current_len (%lu) + len (%lu) > "
2136 "total_len (%lu)\n", __func__, current_len,
2137 len, total_len);
2138 }
2139 break;
2140 }
2141
2142 if (pass == 0) {
2143 /* Better to return zero length buffer than ENOBUFS */
2144 if (total_len == 0) {
2145 total_len = 1;
2146 }
2147 total_len += total_len >> 3;
2148 total_buffer_len = total_len;
2149 total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2150 if (total_buffer == NULL) {
2151 printf("%s: kalloc_data(%lu) failed\n", __func__,
2152 total_len);
2153 error = ENOBUFS;
2154 break;
2155 }
2156 cp = total_buffer;
2157 VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2158 } else {
2159 error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2160 if (error) {
2161 break;
2162 }
2163 }
2164 }
2165
2166 if (total_buffer != NULL) {
2167 kfree_data(total_buffer, total_buffer_len);
2168 }
2169
2170 kauth_cred_unref(&cred);
2171 return error;
2172 }
2173
2174
2175 static int
sysctl_rtstat(struct sysctl_req * req)2176 sysctl_rtstat(struct sysctl_req *req)
2177 {
2178 return SYSCTL_OUT(req, &rtstat, sizeof(struct rtstat));
2179 }
2180
2181 static int
sysctl_rttrash(struct sysctl_req * req)2182 sysctl_rttrash(struct sysctl_req *req)
2183 {
2184 return SYSCTL_OUT(req, &rttrash, sizeof(rttrash));
2185 }
2186
2187 static int
2188 sysctl_rtsock SYSCTL_HANDLER_ARGS
2189 {
2190 #pragma unused(oidp)
2191 int *name = (int *)arg1;
2192 u_int namelen = arg2;
2193 struct radix_node_head *rnh;
2194 int i, error = EINVAL;
2195 u_char af;
2196 struct walkarg w;
2197
2198 name++;
2199 namelen--;
2200 if (req->newptr) {
2201 return EPERM;
2202 }
2203 if (namelen != 3) {
2204 return EINVAL;
2205 }
2206 af = (u_char)name[0];
2207 Bzero(&w, sizeof(w));
2208 w.w_op = name[1];
2209 w.w_arg = name[2];
2210 w.w_req = req;
2211
2212 switch (w.w_op) {
2213 case NET_RT_DUMP:
2214 case NET_RT_DUMP2:
2215 case NET_RT_FLAGS:
2216 case NET_RT_FLAGS_PRIV:
2217 lck_mtx_lock(rnh_lock);
2218 for (i = 1; i <= AF_MAX; i++) {
2219 if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2220 (error = rnh->rnh_walktree(rnh,
2221 sysctl_dumpentry, &w))) {
2222 break;
2223 }
2224 }
2225 lck_mtx_unlock(rnh_lock);
2226 break;
2227 case NET_RT_DUMPX:
2228 case NET_RT_DUMPX_FLAGS:
2229 lck_mtx_lock(rnh_lock);
2230 for (i = 1; i <= AF_MAX; i++) {
2231 if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2232 (error = rnh->rnh_walktree(rnh,
2233 sysctl_dumpentry_ext, &w))) {
2234 break;
2235 }
2236 }
2237 lck_mtx_unlock(rnh_lock);
2238 break;
2239 case NET_RT_IFLIST:
2240 error = sysctl_iflist(af, &w);
2241 break;
2242 case NET_RT_IFLIST2:
2243 error = sysctl_iflist2(af, &w);
2244 break;
2245 case NET_RT_STAT:
2246 error = sysctl_rtstat(req);
2247 break;
2248 case NET_RT_TRASH:
2249 error = sysctl_rttrash(req);
2250 break;
2251 }
2252 if (w.w_tmem != NULL) {
2253 kfree_data(w.w_tmem, w.w_tmemsize);
2254 }
2255 return error;
2256 }
2257
2258 /*
2259 * Definitions of protocols supported in the ROUTE domain.
2260 */
2261 static struct protosw routesw[] = {
2262 {
2263 .pr_type = SOCK_RAW,
2264 .pr_protocol = 0,
2265 .pr_flags = PR_ATOMIC | PR_ADDR,
2266 .pr_output = route_output,
2267 .pr_ctlinput = raw_ctlinput,
2268 .pr_usrreqs = &route_usrreqs,
2269 }
2270 };
2271
2272 static int route_proto_count = (sizeof(routesw) / sizeof(struct protosw));
2273
2274 struct domain routedomain_s = {
2275 .dom_family = PF_ROUTE,
2276 .dom_name = "route",
2277 .dom_init = route_dinit,
2278 };
2279
2280 static void
route_dinit(struct domain * dp)2281 route_dinit(struct domain *dp)
2282 {
2283 struct protosw *pr;
2284 int i;
2285
2286 VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
2287 VERIFY(routedomain == NULL);
2288
2289 routedomain = dp;
2290
2291 for (i = 0, pr = &routesw[0]; i < route_proto_count; i++, pr++) {
2292 net_add_proto(pr, dp, 1);
2293 }
2294
2295 route_init();
2296 }
2297