xref: /xnu-8796.121.2/bsd/net/rtsock.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1988, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)rtsock.c	8.5 (Berkeley) 11/2/94
61  */
62 
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kauth.h>
66 #include <sys/kernel.h>
67 #include <sys/sysctl.h>
68 #include <sys/proc.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/syslog.h>
76 #include <sys/mcache.h>
77 #include <kern/locks.h>
78 #include <sys/codesign.h>
79 
80 #include <net/if.h>
81 #include <net/route.h>
82 #include <net/dlil.h>
83 #include <net/raw_cb.h>
84 #include <netinet/in.h>
85 #include <netinet/in_var.h>
86 #include <netinet/in_arp.h>
87 #include <netinet/ip.h>
88 #include <netinet/ip6.h>
89 #include <netinet6/nd6.h>
90 
91 extern struct rtstat rtstat;
92 extern struct domain routedomain_s;
93 static struct domain *routedomain = NULL;
94 
95 static struct sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
96 static struct sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, .sa_data = { 0, } };
97 static struct sockaddr sa_zero   = { .sa_len = sizeof(sa_zero), .sa_family = AF_INET, .sa_data = { 0, } };
98 
99 struct route_cb {
100 	u_int32_t       ip_count;       /* attached w/ AF_INET */
101 	u_int32_t       ip6_count;      /* attached w/ AF_INET6 */
102 	u_int32_t       any_count;      /* total attached */
103 };
104 
105 static struct route_cb route_cb;
106 
107 struct walkarg {
108 	int     w_tmemsize;
109 	int     w_op, w_arg;
110 	caddr_t w_tmem;
111 	struct sysctl_req *w_req;
112 };
113 
114 static void route_dinit(struct domain *);
115 static int rts_abort(struct socket *);
116 static int rts_attach(struct socket *, int, struct proc *);
117 static int rts_bind(struct socket *, struct sockaddr *, struct proc *);
118 static int rts_connect(struct socket *, struct sockaddr *, struct proc *);
119 static int rts_detach(struct socket *);
120 static int rts_disconnect(struct socket *);
121 static int rts_peeraddr(struct socket *, struct sockaddr **);
122 static int rts_send(struct socket *, int, struct mbuf *, struct sockaddr *,
123     struct mbuf *, struct proc *);
124 static int rts_shutdown(struct socket *);
125 static int rts_sockaddr(struct socket *, struct sockaddr **);
126 
127 static int route_output(struct mbuf *, struct socket *);
128 static int rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
129 static void rt_getmetrics(struct rtentry *, struct rt_metrics *);
130 static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
131     struct sockaddr *, unsigned int);
132 static int rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
133 static struct mbuf *rt_msg1(u_char, struct rt_addrinfo *);
134 static int rt_msg2(u_char, struct rt_addrinfo *, caddr_t, struct walkarg *,
135     kauth_cred_t *);
136 static int sysctl_dumpentry(struct radix_node *rn, void *vw);
137 static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw);
138 static int sysctl_iflist(int af, struct walkarg *w);
139 static int sysctl_iflist2(int af, struct walkarg *w);
140 static int sysctl_rtstat(struct sysctl_req *);
141 static int sysctl_rttrash(struct sysctl_req *);
142 static int sysctl_rtsock SYSCTL_HANDLER_ARGS;
143 
144 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED,
145     sysctl_rtsock, "");
146 
147 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "routing");
148 
149 /* Align x to 1024 (only power of 2) assuming x is positive */
150 #define ALIGN_BYTES(x) do {                                             \
151 	x = (uint32_t)P2ALIGN(x, 1024);                         \
152 } while(0)
153 
154 #define ROUNDUP32(a)                                                    \
155 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (uint32_t) - 1))) :        \
156 	sizeof (uint32_t))
157 
158 #define ADVANCE32(x, n)                                                 \
159 	(x += ROUNDUP32((n)->sa_len))
160 
161 #define RT_HAS_IFADDR(rt)                                               \
162 	((rt)->rt_ifa != NULL && (rt)->rt_ifa->ifa_addr != NULL)
163 
164 /*
165  * It really doesn't make any sense at all for this code to share much
166  * with raw_usrreq.c, since its functionality is so restricted.  XXX
167  */
168 static int
rts_abort(struct socket * so)169 rts_abort(struct socket *so)
170 {
171 	return raw_usrreqs.pru_abort(so);
172 }
173 
174 /* pru_accept is EOPNOTSUPP */
175 
176 static int
rts_attach(struct socket * so,int proto,struct proc * p)177 rts_attach(struct socket *so, int proto, struct proc *p)
178 {
179 #pragma unused(p)
180 	struct rawcb *rp;
181 	int error;
182 
183 	VERIFY(so->so_pcb == NULL);
184 
185 	rp = kalloc_type(struct rawcb, Z_WAITOK_ZERO_NOFAIL);
186 	so->so_pcb = (caddr_t)rp;
187 	/* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */
188 	error = raw_attach(so, proto);
189 	rp = sotorawcb(so);
190 	if (error) {
191 		kfree_type(struct rawcb, rp);
192 		so->so_pcb = NULL;
193 		so->so_flags |= SOF_PCBCLEARING;
194 		return error;
195 	}
196 
197 	switch (rp->rcb_proto.sp_protocol) {
198 	case AF_INET:
199 		atomic_add_32(&route_cb.ip_count, 1);
200 		break;
201 	case AF_INET6:
202 		atomic_add_32(&route_cb.ip6_count, 1);
203 		break;
204 	}
205 	rp->rcb_faddr = &route_src;
206 	atomic_add_32(&route_cb.any_count, 1);
207 	/* the socket is already locked when we enter rts_attach */
208 	soisconnected(so);
209 	so->so_options |= SO_USELOOPBACK;
210 	return 0;
211 }
212 
213 static int
rts_bind(struct socket * so,struct sockaddr * nam,struct proc * p)214 rts_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
215 {
216 	return raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */
217 }
218 
219 static int
rts_connect(struct socket * so,struct sockaddr * nam,struct proc * p)220 rts_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
221 {
222 	return raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */
223 }
224 
225 /* pru_connect2 is EOPNOTSUPP */
226 /* pru_control is EOPNOTSUPP */
227 
228 static int
rts_detach(struct socket * so)229 rts_detach(struct socket *so)
230 {
231 	struct rawcb *rp = sotorawcb(so);
232 
233 	VERIFY(rp != NULL);
234 
235 	switch (rp->rcb_proto.sp_protocol) {
236 	case AF_INET:
237 		atomic_add_32(&route_cb.ip_count, -1);
238 		break;
239 	case AF_INET6:
240 		atomic_add_32(&route_cb.ip6_count, -1);
241 		break;
242 	}
243 	atomic_add_32(&route_cb.any_count, -1);
244 	return raw_usrreqs.pru_detach(so);
245 }
246 
247 static int
rts_disconnect(struct socket * so)248 rts_disconnect(struct socket *so)
249 {
250 	return raw_usrreqs.pru_disconnect(so);
251 }
252 
253 /* pru_listen is EOPNOTSUPP */
254 
255 static int
rts_peeraddr(struct socket * so,struct sockaddr ** nam)256 rts_peeraddr(struct socket *so, struct sockaddr **nam)
257 {
258 	return raw_usrreqs.pru_peeraddr(so, nam);
259 }
260 
261 /* pru_rcvd is EOPNOTSUPP */
262 /* pru_rcvoob is EOPNOTSUPP */
263 
264 static int
rts_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)265 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
266     struct mbuf *control, struct proc *p)
267 {
268 	return raw_usrreqs.pru_send(so, flags, m, nam, control, p);
269 }
270 
271 /* pru_sense is null */
272 
273 static int
rts_shutdown(struct socket * so)274 rts_shutdown(struct socket *so)
275 {
276 	return raw_usrreqs.pru_shutdown(so);
277 }
278 
279 static int
rts_sockaddr(struct socket * so,struct sockaddr ** nam)280 rts_sockaddr(struct socket *so, struct sockaddr **nam)
281 {
282 	return raw_usrreqs.pru_sockaddr(so, nam);
283 }
284 
285 static struct pr_usrreqs route_usrreqs = {
286 	.pru_abort =            rts_abort,
287 	.pru_attach =           rts_attach,
288 	.pru_bind =             rts_bind,
289 	.pru_connect =          rts_connect,
290 	.pru_detach =           rts_detach,
291 	.pru_disconnect =       rts_disconnect,
292 	.pru_peeraddr =         rts_peeraddr,
293 	.pru_send =             rts_send,
294 	.pru_shutdown =         rts_shutdown,
295 	.pru_sockaddr =         rts_sockaddr,
296 	.pru_sosend =           sosend,
297 	.pru_soreceive =        soreceive,
298 };
299 
300 /*ARGSUSED*/
301 static int
route_output(struct mbuf * m,struct socket * so)302 route_output(struct mbuf *m, struct socket *so)
303 {
304 	struct rt_msghdr *rtm = NULL;
305 	size_t rtm_len = 0;
306 	struct rtentry *rt = NULL;
307 	struct rtentry *saved_nrt = NULL;
308 	struct radix_node_head *rnh;
309 	struct rt_addrinfo info;
310 	int len, error = 0;
311 	sa_family_t dst_sa_family = 0;
312 	struct ifnet *ifp = NULL;
313 	struct sockaddr_in dst_in, gate_in;
314 	int sendonlytoself = 0;
315 	unsigned int ifscope = IFSCOPE_NONE;
316 	struct rawcb *rp = NULL;
317 	boolean_t is_router = FALSE;
318 #define senderr(e) { error = (e); goto flush; }
319 	if (m == NULL || ((m->m_len < sizeof(intptr_t)) &&
320 	    (m = m_pullup(m, sizeof(intptr_t))) == NULL)) {
321 		return ENOBUFS;
322 	}
323 	VERIFY(m->m_flags & M_PKTHDR);
324 
325 	/*
326 	 * Unlock the socket (but keep a reference) it won't be
327 	 * accessed until raw_input appends to it.
328 	 */
329 	socket_unlock(so, 0);
330 	lck_mtx_lock(rnh_lock);
331 
332 	len = m->m_pkthdr.len;
333 	if (len < sizeof(*rtm) ||
334 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
335 		info.rti_info[RTAX_DST] = NULL;
336 		senderr(EINVAL);
337 	}
338 	rtm = kalloc_data(len, Z_WAITOK);
339 	if (rtm == NULL) {
340 		info.rti_info[RTAX_DST] = NULL;
341 		senderr(ENOBUFS);
342 	}
343 	rtm_len = (size_t)len;
344 	m_copydata(m, 0, len, (caddr_t)rtm);
345 	if (rtm->rtm_version != RTM_VERSION) {
346 		info.rti_info[RTAX_DST] = NULL;
347 		senderr(EPROTONOSUPPORT);
348 	}
349 
350 	/*
351 	 * Silent version of RTM_GET for Reachabiltiy APIs. We may change
352 	 * all RTM_GETs to be silent in the future, so this is private for now.
353 	 */
354 	if (rtm->rtm_type == RTM_GET_SILENT) {
355 		if (!(so->so_options & SO_USELOOPBACK)) {
356 			senderr(EINVAL);
357 		}
358 		sendonlytoself = 1;
359 		rtm->rtm_type = RTM_GET;
360 	}
361 
362 	/*
363 	 * Perform permission checking, only privileged sockets
364 	 * may perform operations other than RTM_GET
365 	 */
366 	if (rtm->rtm_type != RTM_GET && !(so->so_state & SS_PRIV)) {
367 		info.rti_info[RTAX_DST] = NULL;
368 		senderr(EPERM);
369 	}
370 
371 	rtm->rtm_pid = proc_selfpid();
372 	info.rti_addrs = rtm->rtm_addrs;
373 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
374 		info.rti_info[RTAX_DST] = NULL;
375 		senderr(EINVAL);
376 	}
377 	if (info.rti_info[RTAX_DST] == NULL ||
378 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
379 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
380 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) {
381 		senderr(EINVAL);
382 	}
383 
384 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET &&
385 	    info.rti_info[RTAX_DST]->sa_len != sizeof(struct sockaddr_in)) {
386 		/* At minimum, we need up to sin_addr */
387 		if (info.rti_info[RTAX_DST]->sa_len <
388 		    offsetof(struct sockaddr_in, sin_zero)) {
389 			senderr(EINVAL);
390 		}
391 		bzero(&dst_in, sizeof(dst_in));
392 		dst_in.sin_len = sizeof(dst_in);
393 		dst_in.sin_family = AF_INET;
394 		dst_in.sin_port = SIN(info.rti_info[RTAX_DST])->sin_port;
395 		dst_in.sin_addr = SIN(info.rti_info[RTAX_DST])->sin_addr;
396 		info.rti_info[RTAX_DST] = (struct sockaddr *)&dst_in;
397 		dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
398 	} else if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
399 	    info.rti_info[RTAX_DST]->sa_len < sizeof(struct sockaddr_in6)) {
400 		senderr(EINVAL);
401 	}
402 
403 	if (info.rti_info[RTAX_GATEWAY] != NULL) {
404 		if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET &&
405 		    info.rti_info[RTAX_GATEWAY]->sa_len != sizeof(struct sockaddr_in)) {
406 			/* At minimum, we need up to sin_addr */
407 			if (info.rti_info[RTAX_GATEWAY]->sa_len <
408 			    offsetof(struct sockaddr_in, sin_zero)) {
409 				senderr(EINVAL);
410 			}
411 			bzero(&gate_in, sizeof(gate_in));
412 			gate_in.sin_len = sizeof(gate_in);
413 			gate_in.sin_family = AF_INET;
414 			gate_in.sin_port = SIN(info.rti_info[RTAX_GATEWAY])->sin_port;
415 			gate_in.sin_addr = SIN(info.rti_info[RTAX_GATEWAY])->sin_addr;
416 			info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gate_in;
417 		} else if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET6 &&
418 		    info.rti_info[RTAX_GATEWAY]->sa_len < sizeof(struct sockaddr_in6)) {
419 			senderr(EINVAL);
420 		}
421 	}
422 
423 	if (info.rti_info[RTAX_GENMASK]) {
424 		struct radix_node *t;
425 		t = rn_addmask((caddr_t)info.rti_info[RTAX_GENMASK], 0, 1);
426 		if (t != NULL && Bcmp(info.rti_info[RTAX_GENMASK],
427 		    t->rn_key, *(u_char *)info.rti_info[RTAX_GENMASK]) == 0) {
428 			info.rti_info[RTAX_GENMASK] =
429 			    (struct sockaddr *)(t->rn_key);
430 		} else {
431 			senderr(ENOBUFS);
432 		}
433 	}
434 
435 	/*
436 	 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
437 	 */
438 	if (rtm->rtm_flags & RTF_IFSCOPE) {
439 		if (info.rti_info[RTAX_DST]->sa_family != AF_INET &&
440 		    info.rti_info[RTAX_DST]->sa_family != AF_INET6) {
441 			senderr(EINVAL);
442 		}
443 		ifscope = rtm->rtm_index;
444 	}
445 	/*
446 	 * Block changes on INTCOPROC interfaces.
447 	 */
448 	if (ifscope) {
449 		unsigned int intcoproc_scope = 0;
450 		ifnet_head_lock_shared();
451 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
452 			if (IFNET_IS_INTCOPROC(ifp)) {
453 				intcoproc_scope = ifp->if_index;
454 				break;
455 			}
456 		}
457 		ifnet_head_done();
458 		if (intcoproc_scope == ifscope && proc_getpid(current_proc()) != 0) {
459 			senderr(EINVAL);
460 		}
461 	}
462 
463 	/*
464 	 * RTF_PROXY can only be set internally from within the kernel.
465 	 */
466 	if (rtm->rtm_flags & RTF_PROXY) {
467 		senderr(EINVAL);
468 	}
469 
470 	/*
471 	 * For AF_INET, always zero out the embedded scope ID.  If this is
472 	 * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
473 	 * flag and the corresponding rtm_index value.  This is to prevent
474 	 * false interpretation of the scope ID because it's using the sin_zero
475 	 * field, which might not be properly cleared by the requestor.
476 	 */
477 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET) {
478 		sin_set_ifscope(info.rti_info[RTAX_DST], IFSCOPE_NONE);
479 	}
480 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
481 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET) {
482 		sin_set_ifscope(info.rti_info[RTAX_GATEWAY], IFSCOPE_NONE);
483 	}
484 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET6 &&
485 	    IN6_IS_SCOPE_EMBED(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
486 	    !IN6_IS_ADDR_UNICAST_BASED_MULTICAST(&SIN6(info.rti_info[RTAX_DST])->sin6_addr) &&
487 	    SIN6(info.rti_info[RTAX_DST])->sin6_scope_id == 0) {
488 		SIN6(info.rti_info[RTAX_DST])->sin6_scope_id = ntohs(SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1]);
489 		SIN6(info.rti_info[RTAX_DST])->sin6_addr.s6_addr16[1] = 0;
490 	}
491 
492 	switch (rtm->rtm_type) {
493 	case RTM_ADD:
494 		if (info.rti_info[RTAX_GATEWAY] == NULL) {
495 			senderr(EINVAL);
496 		}
497 
498 		error = rtrequest_scoped_locked(RTM_ADD,
499 		    info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
500 		    info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt,
501 		    ifscope);
502 		if (error == 0 && saved_nrt != NULL) {
503 			RT_LOCK(saved_nrt);
504 			/*
505 			 * If the route request specified an interface with
506 			 * IFA and/or IFP, we set the requested interface on
507 			 * the route with rt_setif.  It would be much better
508 			 * to do this inside rtrequest, but that would
509 			 * require passing the desired interface, in some
510 			 * form, to rtrequest.  Since rtrequest is called in
511 			 * so many places (roughly 40 in our source), adding
512 			 * a parameter is to much for us to swallow; this is
513 			 * something for the FreeBSD developers to tackle.
514 			 * Instead, we let rtrequest compute whatever
515 			 * interface it wants, then come in behind it and
516 			 * stick in the interface that we really want.  This
517 			 * works reasonably well except when rtrequest can't
518 			 * figure out what interface to use (with
519 			 * ifa_withroute) and returns ENETUNREACH.  Ideally
520 			 * it shouldn't matter if rtrequest can't figure out
521 			 * the interface if we're going to explicitly set it
522 			 * ourselves anyway.  But practically we can't
523 			 * recover here because rtrequest will not do any of
524 			 * the work necessary to add the route if it can't
525 			 * find an interface.  As long as there is a default
526 			 * route that leads to some interface, rtrequest will
527 			 * find an interface, so this problem should be
528 			 * rarely encountered.
529 			 * [email protected]
530 			 */
531 			rt_setif(saved_nrt,
532 			    info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
533 			    info.rti_info[RTAX_GATEWAY], ifscope);
534 			(void)rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, saved_nrt);
535 			saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
536 			saved_nrt->rt_rmx.rmx_locks |=
537 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
538 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
539 			RT_REMREF_LOCKED(saved_nrt);
540 			RT_UNLOCK(saved_nrt);
541 		}
542 		break;
543 
544 	case RTM_DELETE:
545 		error = rtrequest_scoped_locked(RTM_DELETE,
546 		    info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
547 		    info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt,
548 		    ifscope);
549 		if (error == 0) {
550 			rt = saved_nrt;
551 			RT_LOCK(rt);
552 			goto report;
553 		}
554 		break;
555 
556 	case RTM_GET:
557 	case RTM_CHANGE:
558 	case RTM_LOCK:
559 		rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
560 		if (rnh == NULL) {
561 			senderr(EAFNOSUPPORT);
562 		}
563 		/*
564 		 * Lookup the best match based on the key-mask pair;
565 		 * callee adds a reference and checks for root node.
566 		 */
567 		rt = rt_lookup(TRUE, info.rti_info[RTAX_DST],
568 		    info.rti_info[RTAX_NETMASK], rnh, ifscope);
569 		if (rt == NULL) {
570 			senderr(ESRCH);
571 		}
572 		RT_LOCK(rt);
573 
574 		/*
575 		 * Holding rnh_lock here prevents the possibility of
576 		 * ifa from changing (e.g. in_ifinit), so it is safe
577 		 * to access its ifa_addr (down below) without locking.
578 		 */
579 		switch (rtm->rtm_type) {
580 		case RTM_GET: {
581 			kauth_cred_t cred;
582 			kauth_cred_t* credp;
583 			struct ifaddr *ifa2;
584 report:
585 			cred = kauth_cred_proc_ref(current_proc());
586 			credp = &cred;
587 
588 			ifa2 = NULL;
589 			RT_LOCK_ASSERT_HELD(rt);
590 			info.rti_info[RTAX_DST] = rt_key(rt);
591 			dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
592 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
593 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
594 			info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
595 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
596 				ifp = rt->rt_ifp;
597 				if (ifp != NULL) {
598 					ifnet_lock_shared(ifp);
599 					ifa2 = ifp->if_lladdr;
600 					info.rti_info[RTAX_IFP] =
601 					    ifa2->ifa_addr;
602 					IFA_ADDREF(ifa2);
603 					ifnet_lock_done(ifp);
604 					info.rti_info[RTAX_IFA] =
605 					    rt->rt_ifa->ifa_addr;
606 					rtm->rtm_index = ifp->if_index;
607 				} else {
608 					info.rti_info[RTAX_IFP] = NULL;
609 					info.rti_info[RTAX_IFA] = NULL;
610 				}
611 			} else if ((ifp = rt->rt_ifp) != NULL) {
612 				rtm->rtm_index = ifp->if_index;
613 			}
614 			if (ifa2 != NULL) {
615 				IFA_LOCK(ifa2);
616 			}
617 			len = rt_msg2(rtm->rtm_type, &info, NULL, NULL, credp);
618 			if (ifa2 != NULL) {
619 				IFA_UNLOCK(ifa2);
620 			}
621 			struct rt_msghdr *out_rtm;
622 			out_rtm = kalloc_data(len, Z_WAITOK);
623 			if (out_rtm == NULL) {
624 				RT_UNLOCK(rt);
625 				if (ifa2 != NULL) {
626 					IFA_REMREF(ifa2);
627 				}
628 				senderr(ENOBUFS);
629 			}
630 			Bcopy(rtm, out_rtm, sizeof(struct rt_msghdr));
631 			if (ifa2 != NULL) {
632 				IFA_LOCK(ifa2);
633 			}
634 			(void) rt_msg2(out_rtm->rtm_type, &info, (caddr_t)out_rtm,
635 			    NULL, &cred);
636 			if (ifa2 != NULL) {
637 				IFA_UNLOCK(ifa2);
638 			}
639 			kfree_data(rtm, rtm_len);
640 			rtm = out_rtm;
641 			rtm_len = len;
642 			rtm->rtm_flags = rt->rt_flags;
643 			rt_getmetrics(rt, &rtm->rtm_rmx);
644 			rtm->rtm_addrs = info.rti_addrs;
645 			if (ifa2 != NULL) {
646 				IFA_REMREF(ifa2);
647 			}
648 
649 			kauth_cred_unref(&cred);
650 			break;
651 		}
652 
653 		case RTM_CHANGE:
654 			is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
655 
656 			if (info.rti_info[RTAX_GATEWAY] != NULL &&
657 			    (error = rt_setgate(rt, rt_key(rt),
658 			    info.rti_info[RTAX_GATEWAY]))) {
659 				int tmp = error;
660 				RT_UNLOCK(rt);
661 				senderr(tmp);
662 			}
663 			/*
664 			 * If they tried to change things but didn't specify
665 			 * the required gateway, then just use the old one.
666 			 * This can happen if the user tries to change the
667 			 * flags on the default route without changing the
668 			 * default gateway. Changing flags still doesn't work.
669 			 */
670 			if ((rt->rt_flags & RTF_GATEWAY) &&
671 			    info.rti_info[RTAX_GATEWAY] == NULL) {
672 				info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
673 			}
674 
675 			/*
676 			 * On Darwin, we call rt_setif which contains the
677 			 * equivalent to the code found at this very spot
678 			 * in BSD.
679 			 */
680 			rt_setif(rt,
681 			    info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA],
682 			    info.rti_info[RTAX_GATEWAY], ifscope);
683 
684 			if ((error = rt_setmetrics(rtm->rtm_inits,
685 			    &rtm->rtm_rmx, rt))) {
686 				int tmp = error;
687 				RT_UNLOCK(rt);
688 				senderr(tmp);
689 			}
690 			if (info.rti_info[RTAX_GENMASK]) {
691 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
692 			}
693 
694 			/*
695 			 * Enqueue work item to invoke callback for this route entry
696 			 * This may not be needed always, but for now issue it anytime
697 			 * RTM_CHANGE gets called.
698 			 */
699 			route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, TRUE);
700 			/*
701 			 * If the route is for a router, walk the tree to send refresh
702 			 * event to protocol cloned entries
703 			 */
704 			if (is_router) {
705 				struct route_event rt_ev;
706 				route_event_init(&rt_ev, rt, NULL, ROUTE_ENTRY_REFRESH);
707 				RT_UNLOCK(rt);
708 				(void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
709 				RT_LOCK(rt);
710 			}
711 			OS_FALLTHROUGH;
712 		case RTM_LOCK:
713 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
714 			rt->rt_rmx.rmx_locks |=
715 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
716 			break;
717 		}
718 		RT_UNLOCK(rt);
719 		break;
720 	default:
721 		senderr(EOPNOTSUPP);
722 	}
723 flush:
724 	if (rtm != NULL) {
725 		if (error) {
726 			rtm->rtm_errno = error;
727 		} else {
728 			rtm->rtm_flags |= RTF_DONE;
729 		}
730 	}
731 	if (rt != NULL) {
732 		RT_LOCK_ASSERT_NOTHELD(rt);
733 		rtfree_locked(rt);
734 	}
735 	lck_mtx_unlock(rnh_lock);
736 
737 	/* relock the socket now */
738 	socket_lock(so, 0);
739 	/*
740 	 * Check to see if we don't want our own messages.
741 	 */
742 	if (!(so->so_options & SO_USELOOPBACK)) {
743 		if (route_cb.any_count <= 1) {
744 			kfree_data(rtm, rtm_len);
745 			m_freem(m);
746 			return error;
747 		}
748 		/* There is another listener, so construct message */
749 		rp = sotorawcb(so);
750 	}
751 	if (rtm != NULL) {
752 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
753 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
754 			m_freem(m);
755 			m = NULL;
756 		} else if (m->m_pkthdr.len > rtm->rtm_msglen) {
757 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
758 		}
759 		kfree_data(rtm, rtm_len);
760 	}
761 	if (sendonlytoself && m != NULL) {
762 		error = 0;
763 		if (sbappendaddr(&so->so_rcv, &route_src, m,
764 		    NULL, &error) != 0) {
765 			sorwakeup(so);
766 		}
767 		if (error) {
768 			return error;
769 		}
770 	} else {
771 		struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
772 		if (rp != NULL) {
773 			rp->rcb_proto.sp_family = 0; /* Avoid us */
774 		}
775 		if (dst_sa_family != 0) {
776 			route_proto.sp_protocol = dst_sa_family;
777 		}
778 		if (m != NULL) {
779 			socket_unlock(so, 0);
780 			raw_input(m, &route_proto, &route_src, &route_dst);
781 			socket_lock(so, 0);
782 		}
783 		if (rp != NULL) {
784 			rp->rcb_proto.sp_family = PF_ROUTE;
785 		}
786 	}
787 	return error;
788 }
789 
790 void
rt_setexpire(struct rtentry * rt,uint64_t expiry)791 rt_setexpire(struct rtentry *rt, uint64_t expiry)
792 {
793 	/* set both rt_expire and rmx_expire */
794 	rt->rt_expire = expiry;
795 	if (expiry) {
796 		rt->rt_rmx.rmx_expire =
797 		    (int32_t)(expiry + rt->base_calendartime -
798 		    rt->base_uptime);
799 	} else {
800 		rt->rt_rmx.rmx_expire = 0;
801 	}
802 }
803 
804 static int
rt_setmetrics(u_int32_t which,struct rt_metrics * in,struct rtentry * out)805 rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out)
806 {
807 	if (!(which & RTV_REFRESH_HOST)) {
808 		struct timeval caltime;
809 		getmicrotime(&caltime);
810 #define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
811 		metric(RTV_RPIPE, rmx_recvpipe);
812 		metric(RTV_SPIPE, rmx_sendpipe);
813 		metric(RTV_SSTHRESH, rmx_ssthresh);
814 		metric(RTV_RTT, rmx_rtt);
815 		metric(RTV_RTTVAR, rmx_rttvar);
816 		metric(RTV_HOPCOUNT, rmx_hopcount);
817 		metric(RTV_MTU, rmx_mtu);
818 		metric(RTV_EXPIRE, rmx_expire);
819 #undef metric
820 		if (out->rt_rmx.rmx_expire > 0) {
821 			/* account for system time change */
822 			getmicrotime(&caltime);
823 			out->base_calendartime +=
824 			    NET_CALCULATE_CLOCKSKEW(caltime,
825 			    out->base_calendartime,
826 			    net_uptime(), out->base_uptime);
827 			rt_setexpire(out,
828 			    out->rt_rmx.rmx_expire -
829 			    out->base_calendartime +
830 			    out->base_uptime);
831 		} else {
832 			rt_setexpire(out, 0);
833 		}
834 
835 		VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
836 		VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
837 	} else {
838 		/* Only RTV_REFRESH_HOST must be set */
839 		if ((which & ~RTV_REFRESH_HOST) ||
840 		    (out->rt_flags & RTF_STATIC) ||
841 		    !(out->rt_flags & RTF_LLINFO)) {
842 			return EINVAL;
843 		}
844 
845 		if (out->rt_llinfo_refresh == NULL) {
846 			return ENOTSUP;
847 		}
848 
849 		out->rt_llinfo_refresh(out);
850 	}
851 	return 0;
852 }
853 
854 static void
rt_getmetrics(struct rtentry * in,struct rt_metrics * out)855 rt_getmetrics(struct rtentry *in, struct rt_metrics *out)
856 {
857 	struct timeval caltime;
858 
859 	VERIFY(in->rt_expire == 0 || in->rt_rmx.rmx_expire != 0);
860 	VERIFY(in->rt_expire != 0 || in->rt_rmx.rmx_expire == 0);
861 
862 	*out = in->rt_rmx;
863 
864 	if (in->rt_expire != 0) {
865 		/* account for system time change */
866 		getmicrotime(&caltime);
867 
868 		in->base_calendartime +=
869 		    NET_CALCULATE_CLOCKSKEW(caltime,
870 		    in->base_calendartime, net_uptime(), in->base_uptime);
871 
872 		out->rmx_expire = (int32_t)(in->base_calendartime +
873 		    in->rt_expire - in->base_uptime);
874 	} else {
875 		out->rmx_expire = 0;
876 	}
877 }
878 
879 /*
880  * Set route's interface given info.rti_info[RTAX_IFP],
881  * info.rti_info[RTAX_IFA], and gateway.
882  */
883 static void
rt_setif(struct rtentry * rt,struct sockaddr * Ifpaddr,struct sockaddr * Ifaaddr,struct sockaddr * Gate,unsigned int ifscope)884 rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
885     struct sockaddr *Gate, unsigned int ifscope)
886 {
887 	struct ifaddr *ifa = NULL;
888 	struct ifnet *ifp = NULL;
889 	void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
890 
891 	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
892 
893 	RT_LOCK_ASSERT_HELD(rt);
894 
895 	/* Don't update a defunct route */
896 	if (rt->rt_flags & RTF_CONDEMNED) {
897 		return;
898 	}
899 
900 	/* Add an extra ref for ourselves */
901 	RT_ADDREF_LOCKED(rt);
902 
903 	/* Become a regular mutex, just in case */
904 	RT_CONVERT_LOCK(rt);
905 
906 	/*
907 	 * New gateway could require new ifaddr, ifp; flags may also
908 	 * be different; ifp may be specified by ll sockaddr when
909 	 * protocol address is ambiguous.
910 	 */
911 	if (Ifpaddr && (ifa = ifa_ifwithnet_scoped(Ifpaddr, ifscope)) &&
912 	    (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) {
913 		IFA_REMREF(ifa);
914 		ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp);
915 	} else {
916 		if (ifa != NULL) {
917 			IFA_REMREF(ifa);
918 			ifa = NULL;
919 		}
920 		if (Ifpaddr && (ifp = if_withname(Ifpaddr))) {
921 			if (Gate) {
922 				ifa = ifaof_ifpforaddr(Gate, ifp);
923 			} else {
924 				ifnet_lock_shared(ifp);
925 				ifa = TAILQ_FIRST(&ifp->if_addrhead);
926 				if (ifa != NULL) {
927 					IFA_ADDREF(ifa);
928 				}
929 				ifnet_lock_done(ifp);
930 			}
931 		} else if (Ifaaddr &&
932 		    (ifa = ifa_ifwithaddr_scoped(Ifaaddr, ifscope))) {
933 			ifp = ifa->ifa_ifp;
934 		} else if (Gate != NULL) {
935 			/*
936 			 * Safe to drop rt_lock and use rt_key, since holding
937 			 * rnh_lock here prevents another thread from calling
938 			 * rt_setgate() on this route.  We cannot hold the
939 			 * lock across ifa_ifwithroute since the lookup done
940 			 * by that routine may point to the same route.
941 			 */
942 			RT_UNLOCK(rt);
943 			if ((ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags,
944 			    rt_key(rt), Gate, ifscope)) != NULL) {
945 				ifp = ifa->ifa_ifp;
946 			}
947 			RT_LOCK(rt);
948 			/* Don't update a defunct route */
949 			if (rt->rt_flags & RTF_CONDEMNED) {
950 				if (ifa != NULL) {
951 					IFA_REMREF(ifa);
952 				}
953 				/* Release extra ref */
954 				RT_REMREF_LOCKED(rt);
955 				return;
956 			}
957 		}
958 	}
959 
960 	/* trigger route cache reevaluation */
961 	if (rt_key(rt)->sa_family == AF_INET) {
962 		routegenid_inet_update();
963 	} else if (rt_key(rt)->sa_family == AF_INET6) {
964 		routegenid_inet6_update();
965 	}
966 
967 	if (ifa != NULL) {
968 		struct ifaddr *oifa = rt->rt_ifa;
969 		if (oifa != ifa) {
970 			if (oifa != NULL) {
971 				IFA_LOCK_SPIN(oifa);
972 				ifa_rtrequest = oifa->ifa_rtrequest;
973 				IFA_UNLOCK(oifa);
974 				if (ifa_rtrequest != NULL) {
975 					ifa_rtrequest(RTM_DELETE, rt, Gate);
976 				}
977 			}
978 			rtsetifa(rt, ifa);
979 
980 			if (rt->rt_ifp != ifp) {
981 				/*
982 				 * Purge any link-layer info caching.
983 				 */
984 				if (rt->rt_llinfo_purge != NULL) {
985 					rt->rt_llinfo_purge(rt);
986 				}
987 
988 				/*
989 				 * Adjust route ref count for the interfaces.
990 				 */
991 				if (rt->rt_if_ref_fn != NULL) {
992 					rt->rt_if_ref_fn(ifp, 1);
993 					rt->rt_if_ref_fn(rt->rt_ifp, -1);
994 				}
995 			}
996 			rt->rt_ifp = ifp;
997 			/*
998 			 * If this is the (non-scoped) default route, record
999 			 * the interface index used for the primary ifscope.
1000 			 */
1001 			if (rt_primary_default(rt, rt_key(rt))) {
1002 				set_primary_ifscope(rt_key(rt)->sa_family,
1003 				    rt->rt_ifp->if_index);
1004 			}
1005 			/*
1006 			 * If rmx_mtu is not locked, update it
1007 			 * to the MTU used by the new interface.
1008 			 */
1009 			if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
1010 				rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
1011 				if (rt_key(rt)->sa_family == AF_INET &&
1012 				    INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
1013 					rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
1014 					/* Further adjust the size for CLAT46 expansion */
1015 					rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
1016 				}
1017 			}
1018 
1019 			if (rt->rt_ifa != NULL) {
1020 				IFA_LOCK_SPIN(rt->rt_ifa);
1021 				ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1022 				IFA_UNLOCK(rt->rt_ifa);
1023 				if (ifa_rtrequest != NULL) {
1024 					ifa_rtrequest(RTM_ADD, rt, Gate);
1025 				}
1026 			}
1027 			IFA_REMREF(ifa);
1028 			/* Release extra ref */
1029 			RT_REMREF_LOCKED(rt);
1030 			return;
1031 		}
1032 		IFA_REMREF(ifa);
1033 		ifa = NULL;
1034 	}
1035 
1036 	/* XXX: to reset gateway to correct value, at RTM_CHANGE */
1037 	if (rt->rt_ifa != NULL) {
1038 		IFA_LOCK_SPIN(rt->rt_ifa);
1039 		ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
1040 		IFA_UNLOCK(rt->rt_ifa);
1041 		if (ifa_rtrequest != NULL) {
1042 			ifa_rtrequest(RTM_ADD, rt, Gate);
1043 		}
1044 	}
1045 
1046 	/*
1047 	 * Workaround for local address routes pointing to the loopback
1048 	 * interface added by configd, until <rdar://problem/12970142>.
1049 	 */
1050 	if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
1051 	    (rt->rt_flags & RTF_HOST) && rt->rt_ifa->ifa_ifp == rt->rt_ifp) {
1052 		ifa = ifa_ifwithaddr(rt_key(rt));
1053 		if (ifa != NULL) {
1054 			if (ifa != rt->rt_ifa) {
1055 				rtsetifa(rt, ifa);
1056 			}
1057 			IFA_REMREF(ifa);
1058 		}
1059 	}
1060 
1061 	/* Release extra ref */
1062 	RT_REMREF_LOCKED(rt);
1063 }
1064 
1065 /*
1066  * Extract the addresses of the passed sockaddrs.
1067  * Do a little sanity checking so as to avoid bad memory references.
1068  * This data is derived straight from userland.
1069  */
1070 static int
rt_xaddrs(caddr_t cp,caddr_t cplim,struct rt_addrinfo * rtinfo)1071 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1072 {
1073 	struct sockaddr *sa;
1074 	int i;
1075 
1076 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1077 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1078 		if ((rtinfo->rti_addrs & (1 << i)) == 0) {
1079 			continue;
1080 		}
1081 		sa = (struct sockaddr *)cp;
1082 		/*
1083 		 * It won't fit.
1084 		 */
1085 		if ((cp + sa->sa_len) > cplim) {
1086 			return EINVAL;
1087 		}
1088 		if (sa->sa_len > sizeof(struct sockaddr_storage)) {
1089 			return EINVAL;
1090 		}
1091 		/*
1092 		 * there are no more.. quit now
1093 		 * If there are more bits, they are in error.
1094 		 * I've seen this. route(1) can evidently generate these.
1095 		 * This causes kernel to core dump.
1096 		 * for compatibility, If we see this, point to a safe address.
1097 		 */
1098 		if (sa->sa_len == 0) {
1099 			rtinfo->rti_info[i] = &sa_zero;
1100 			return 0; /* should be EINVAL but for compat */
1101 		}
1102 		if (sa->sa_len < offsetof(struct sockaddr, sa_data)) {
1103 			return EINVAL;
1104 		}
1105 		/* accept it */
1106 		rtinfo->rti_info[i] = sa;
1107 		ADVANCE32(cp, sa);
1108 	}
1109 	return 0;
1110 }
1111 
1112 static struct mbuf *
rt_msg1(u_char type,struct rt_addrinfo * rtinfo)1113 rt_msg1(u_char type, struct rt_addrinfo *rtinfo)
1114 {
1115 	struct rt_msghdr *rtm;
1116 	struct mbuf *m;
1117 	int i;
1118 	int len, dlen, off;
1119 
1120 	switch (type) {
1121 	case RTM_DELADDR:
1122 	case RTM_NEWADDR:
1123 		len = sizeof(struct ifa_msghdr);
1124 		break;
1125 
1126 	case RTM_DELMADDR:
1127 	case RTM_NEWMADDR:
1128 		len = sizeof(struct ifma_msghdr);
1129 		break;
1130 
1131 	case RTM_IFINFO:
1132 		len = sizeof(struct if_msghdr);
1133 		break;
1134 
1135 	default:
1136 		len = sizeof(struct rt_msghdr);
1137 	}
1138 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1139 	if (m && len > MHLEN) {
1140 		MCLGET(m, M_DONTWAIT);
1141 		if (!(m->m_flags & M_EXT)) {
1142 			m_free(m);
1143 			m = NULL;
1144 		}
1145 	}
1146 	if (m == NULL) {
1147 		return NULL;
1148 	}
1149 	m->m_pkthdr.len = m->m_len = len;
1150 	m->m_pkthdr.rcvif = NULL;
1151 	rtm = mtod(m, struct rt_msghdr *);
1152 	bzero((caddr_t)rtm, len);
1153 	off = len;
1154 	for (i = 0; i < RTAX_MAX; i++) {
1155 		struct sockaddr *sa, *hint;
1156 		uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1157 
1158 		/*
1159 		 * Make sure to accomodate the largest possible size of sa_len.
1160 		 */
1161 		_CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1162 
1163 		if ((sa = rtinfo->rti_info[i]) == NULL) {
1164 			continue;
1165 		}
1166 
1167 		switch (i) {
1168 		case RTAX_DST:
1169 		case RTAX_NETMASK:
1170 			if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1171 				hint = rtinfo->rti_info[RTAX_IFA];
1172 			}
1173 
1174 			/* Scrub away any trace of embedded interface scope */
1175 			sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1176 			    sizeof(ssbuf), NULL);
1177 			break;
1178 
1179 		default:
1180 			break;
1181 		}
1182 
1183 		rtinfo->rti_addrs |= (1 << i);
1184 		dlen = sa->sa_len;
1185 		m_copyback(m, off, dlen, (caddr_t)sa);
1186 		len = off + dlen;
1187 		off += ROUNDUP32(dlen);
1188 	}
1189 	if (m->m_pkthdr.len != len) {
1190 		m_freem(m);
1191 		return NULL;
1192 	}
1193 	rtm->rtm_msglen = (u_short)len;
1194 	rtm->rtm_version = RTM_VERSION;
1195 	rtm->rtm_type = type;
1196 	return m;
1197 }
1198 
1199 static int
rt_msg2(u_char type,struct rt_addrinfo * rtinfo,caddr_t cp,struct walkarg * w,kauth_cred_t * credp)1200 rt_msg2(u_char type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w,
1201     kauth_cred_t* credp)
1202 {
1203 	int i;
1204 	int len, dlen, rlen, second_time = 0;
1205 	caddr_t cp0;
1206 
1207 	rtinfo->rti_addrs = 0;
1208 again:
1209 	switch (type) {
1210 	case RTM_DELADDR:
1211 	case RTM_NEWADDR:
1212 		len = sizeof(struct ifa_msghdr);
1213 		break;
1214 
1215 	case RTM_DELMADDR:
1216 	case RTM_NEWMADDR:
1217 		len = sizeof(struct ifma_msghdr);
1218 		break;
1219 
1220 	case RTM_IFINFO:
1221 		len = sizeof(struct if_msghdr);
1222 		break;
1223 
1224 	case RTM_IFINFO2:
1225 		len = sizeof(struct if_msghdr2);
1226 		break;
1227 
1228 	case RTM_NEWMADDR2:
1229 		len = sizeof(struct ifma_msghdr2);
1230 		break;
1231 
1232 	case RTM_GET_EXT:
1233 		len = sizeof(struct rt_msghdr_ext);
1234 		break;
1235 
1236 	case RTM_GET2:
1237 		len = sizeof(struct rt_msghdr2);
1238 		break;
1239 
1240 	default:
1241 		len = sizeof(struct rt_msghdr);
1242 	}
1243 	cp0 = cp;
1244 	if (cp0) {
1245 		cp += len;
1246 	}
1247 	for (i = 0; i < RTAX_MAX; i++) {
1248 		struct sockaddr *sa, *hint;
1249 		uint8_t ssbuf[SOCK_MAXADDRLEN + 1];
1250 
1251 		/*
1252 		 * Make sure to accomodate the largest possible size of sa_len.
1253 		 */
1254 		_CASSERT(sizeof(ssbuf) == (SOCK_MAXADDRLEN + 1));
1255 
1256 		if ((sa = rtinfo->rti_info[i]) == NULL) {
1257 			continue;
1258 		}
1259 
1260 		switch (i) {
1261 		case RTAX_DST:
1262 		case RTAX_NETMASK:
1263 			if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) {
1264 				hint = rtinfo->rti_info[RTAX_IFA];
1265 			}
1266 
1267 			/* Scrub away any trace of embedded interface scope */
1268 			sa = rtm_scrub(type, i, hint, sa, &ssbuf,
1269 			    sizeof(ssbuf), NULL);
1270 			break;
1271 		case RTAX_GATEWAY:
1272 		case RTAX_IFP:
1273 			sa = rtm_scrub(type, i, NULL, sa, &ssbuf,
1274 			    sizeof(ssbuf), credp);
1275 			break;
1276 
1277 		default:
1278 			break;
1279 		}
1280 
1281 		rtinfo->rti_addrs |= (1 << i);
1282 		dlen = sa->sa_len;
1283 		rlen = ROUNDUP32(dlen);
1284 		if (cp) {
1285 			bcopy((caddr_t)sa, cp, (size_t)dlen);
1286 			if (dlen != rlen) {
1287 				bzero(cp + dlen, rlen - dlen);
1288 			}
1289 			cp += rlen;
1290 		}
1291 		len += rlen;
1292 	}
1293 	if (cp == NULL && w != NULL && !second_time) {
1294 		struct walkarg *rw = w;
1295 
1296 		if (rw->w_req != NULL) {
1297 			if (rw->w_tmemsize < len) {
1298 				if (rw->w_tmem != NULL) {
1299 					kfree_data(rw->w_tmem, rw->w_tmemsize);
1300 				}
1301 				rw->w_tmem = (caddr_t) kalloc_data(len, Z_ZERO | Z_WAITOK);
1302 				if (rw->w_tmem != NULL) {
1303 					rw->w_tmemsize = len;
1304 				}
1305 			}
1306 			if (rw->w_tmem != NULL) {
1307 				cp = rw->w_tmem;
1308 				second_time = 1;
1309 				goto again;
1310 			}
1311 		}
1312 	}
1313 	if (cp) {
1314 		struct rt_msghdr *rtm = (struct rt_msghdr *)(void *)cp0;
1315 
1316 		rtm->rtm_version = RTM_VERSION;
1317 		rtm->rtm_type = type;
1318 		rtm->rtm_msglen = (u_short)len;
1319 	}
1320 	return len;
1321 }
1322 
1323 /*
1324  * This routine is called to generate a message from the routing
1325  * socket indicating that a redirect has occurred, a routing lookup
1326  * has failed, or that a protocol has detected timeouts to a particular
1327  * destination.
1328  */
1329 void
rt_missmsg(u_char type,struct rt_addrinfo * rtinfo,int flags,int error)1330 rt_missmsg(u_char type, struct rt_addrinfo *rtinfo, int flags, int error)
1331 {
1332 	struct rt_msghdr *rtm;
1333 	struct mbuf *m;
1334 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1335 	struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1336 
1337 	if (route_cb.any_count == 0) {
1338 		return;
1339 	}
1340 	m = rt_msg1(type, rtinfo);
1341 	if (m == NULL) {
1342 		return;
1343 	}
1344 	rtm = mtod(m, struct rt_msghdr *);
1345 	rtm->rtm_flags = RTF_DONE | flags;
1346 	rtm->rtm_errno = error;
1347 	rtm->rtm_addrs = rtinfo->rti_addrs;
1348 	route_proto.sp_family = sa ? sa->sa_family : 0;
1349 	raw_input(m, &route_proto, &route_src, &route_dst);
1350 }
1351 
1352 /*
1353  * This routine is called to generate a message from the routing
1354  * socket indicating that the status of a network interface has changed.
1355  */
1356 void
rt_ifmsg(struct ifnet * ifp)1357 rt_ifmsg(struct ifnet *ifp)
1358 {
1359 	struct if_msghdr *ifm;
1360 	struct mbuf *m;
1361 	struct rt_addrinfo info;
1362 	struct  sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1363 
1364 	if (route_cb.any_count == 0) {
1365 		return;
1366 	}
1367 	bzero((caddr_t)&info, sizeof(info));
1368 	m = rt_msg1(RTM_IFINFO, &info);
1369 	if (m == NULL) {
1370 		return;
1371 	}
1372 	ifm = mtod(m, struct if_msghdr *);
1373 	ifm->ifm_index = ifp->if_index;
1374 	ifm->ifm_flags = (u_short)ifp->if_flags;
1375 	if_data_internal_to_if_data(ifp, &ifp->if_data, &ifm->ifm_data);
1376 	ifm->ifm_addrs = 0;
1377 	raw_input(m, &route_proto, &route_src, &route_dst);
1378 }
1379 
1380 /*
1381  * This is called to generate messages from the routing socket
1382  * indicating a network interface has had addresses associated with it.
1383  * if we ever reverse the logic and replace messages TO the routing
1384  * socket indicate a request to configure interfaces, then it will
1385  * be unnecessary as the routing socket will automatically generate
1386  * copies of it.
1387  *
1388  * Since this is coming from the interface, it is expected that the
1389  * interface will be locked.  Caller must hold rnh_lock and rt_lock.
1390  */
1391 void
rt_newaddrmsg(u_char cmd,struct ifaddr * ifa,int error,struct rtentry * rt)1392 rt_newaddrmsg(u_char cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1393 {
1394 	struct rt_addrinfo info;
1395 	struct sockaddr *sa = 0;
1396 	int pass;
1397 	struct mbuf *m = 0;
1398 	struct ifnet *ifp = ifa->ifa_ifp;
1399 	struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1400 
1401 	LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1402 	RT_LOCK_ASSERT_HELD(rt);
1403 
1404 	if (route_cb.any_count == 0) {
1405 		return;
1406 	}
1407 
1408 	/* Become a regular mutex, just in case */
1409 	RT_CONVERT_LOCK(rt);
1410 	for (pass = 1; pass < 3; pass++) {
1411 		bzero((caddr_t)&info, sizeof(info));
1412 		if ((cmd == RTM_ADD && pass == 1) ||
1413 		    (cmd == RTM_DELETE && pass == 2)) {
1414 			struct ifa_msghdr *ifam;
1415 			u_char ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1416 
1417 			/* Lock ifp for if_lladdr */
1418 			ifnet_lock_shared(ifp);
1419 			IFA_LOCK(ifa);
1420 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1421 			/*
1422 			 * Holding ifnet lock here prevents the link address
1423 			 * from changing contents, so no need to hold its
1424 			 * lock.  The link address is always present; it's
1425 			 * never freed.
1426 			 */
1427 			info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1428 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1429 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1430 			if ((m = rt_msg1(ncmd, &info)) == NULL) {
1431 				IFA_UNLOCK(ifa);
1432 				ifnet_lock_done(ifp);
1433 				continue;
1434 			}
1435 			IFA_UNLOCK(ifa);
1436 			ifnet_lock_done(ifp);
1437 			ifam = mtod(m, struct ifa_msghdr *);
1438 			ifam->ifam_index = ifp->if_index;
1439 			IFA_LOCK_SPIN(ifa);
1440 			ifam->ifam_metric = ifa->ifa_metric;
1441 			ifam->ifam_flags = ifa->ifa_flags;
1442 			IFA_UNLOCK(ifa);
1443 			ifam->ifam_addrs = info.rti_addrs;
1444 		}
1445 		if ((cmd == RTM_ADD && pass == 2) ||
1446 		    (cmd == RTM_DELETE && pass == 1)) {
1447 			struct rt_msghdr *rtm;
1448 
1449 			if (rt == NULL) {
1450 				continue;
1451 			}
1452 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1453 			info.rti_info[RTAX_DST] = sa = rt_key(rt);
1454 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1455 			if ((m = rt_msg1(cmd, &info)) == NULL) {
1456 				continue;
1457 			}
1458 			rtm = mtod(m, struct rt_msghdr *);
1459 			rtm->rtm_index = ifp->if_index;
1460 			rtm->rtm_flags |= rt->rt_flags;
1461 			rtm->rtm_errno = error;
1462 			rtm->rtm_addrs = info.rti_addrs;
1463 		}
1464 		route_proto.sp_protocol = sa ? sa->sa_family : 0;
1465 		raw_input(m, &route_proto, &route_src, &route_dst);
1466 	}
1467 }
1468 
1469 /*
1470  * This is the analogue to the rt_newaddrmsg which performs the same
1471  * function but for multicast group memberhips.  This is easier since
1472  * there is no route state to worry about.
1473  */
1474 void
rt_newmaddrmsg(u_char cmd,struct ifmultiaddr * ifma)1475 rt_newmaddrmsg(u_char cmd, struct ifmultiaddr *ifma)
1476 {
1477 	struct rt_addrinfo info;
1478 	struct mbuf *m = 0;
1479 	struct ifnet *ifp = ifma->ifma_ifp;
1480 	struct ifma_msghdr *ifmam;
1481 	struct sockproto route_proto = { .sp_family = PF_ROUTE, .sp_protocol = 0 };
1482 
1483 	if (route_cb.any_count == 0) {
1484 		return;
1485 	}
1486 
1487 	/* Lock ifp for if_lladdr */
1488 	ifnet_lock_shared(ifp);
1489 	bzero((caddr_t)&info, sizeof(info));
1490 	IFMA_LOCK(ifma);
1491 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1492 	/* lladdr doesn't need lock */
1493 	info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
1494 
1495 	/*
1496 	 * If a link-layer address is present, present it as a ``gateway''
1497 	 * (similarly to how ARP entries, e.g., are presented).
1498 	 */
1499 	info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ?
1500 	    ifma->ifma_ll->ifma_addr : NULL;
1501 	if ((m = rt_msg1(cmd, &info)) == NULL) {
1502 		IFMA_UNLOCK(ifma);
1503 		ifnet_lock_done(ifp);
1504 		return;
1505 	}
1506 	ifmam = mtod(m, struct ifma_msghdr *);
1507 	ifmam->ifmam_index = ifp->if_index;
1508 	ifmam->ifmam_addrs = info.rti_addrs;
1509 	route_proto.sp_protocol = ifma->ifma_addr->sa_family;
1510 	IFMA_UNLOCK(ifma);
1511 	ifnet_lock_done(ifp);
1512 	raw_input(m, &route_proto, &route_src, &route_dst);
1513 }
1514 
1515 const char *
rtm2str(int cmd)1516 rtm2str(int cmd)
1517 {
1518 	const char *c = "RTM_?";
1519 
1520 	switch (cmd) {
1521 	case RTM_ADD:
1522 		c = "RTM_ADD";
1523 		break;
1524 	case RTM_DELETE:
1525 		c = "RTM_DELETE";
1526 		break;
1527 	case RTM_CHANGE:
1528 		c = "RTM_CHANGE";
1529 		break;
1530 	case RTM_GET:
1531 		c = "RTM_GET";
1532 		break;
1533 	case RTM_LOSING:
1534 		c = "RTM_LOSING";
1535 		break;
1536 	case RTM_REDIRECT:
1537 		c = "RTM_REDIRECT";
1538 		break;
1539 	case RTM_MISS:
1540 		c = "RTM_MISS";
1541 		break;
1542 	case RTM_LOCK:
1543 		c = "RTM_LOCK";
1544 		break;
1545 	case RTM_OLDADD:
1546 		c = "RTM_OLDADD";
1547 		break;
1548 	case RTM_OLDDEL:
1549 		c = "RTM_OLDDEL";
1550 		break;
1551 	case RTM_RESOLVE:
1552 		c = "RTM_RESOLVE";
1553 		break;
1554 	case RTM_NEWADDR:
1555 		c = "RTM_NEWADDR";
1556 		break;
1557 	case RTM_DELADDR:
1558 		c = "RTM_DELADDR";
1559 		break;
1560 	case RTM_IFINFO:
1561 		c = "RTM_IFINFO";
1562 		break;
1563 	case RTM_NEWMADDR:
1564 		c = "RTM_NEWMADDR";
1565 		break;
1566 	case RTM_DELMADDR:
1567 		c = "RTM_DELMADDR";
1568 		break;
1569 	case RTM_GET_SILENT:
1570 		c = "RTM_GET_SILENT";
1571 		break;
1572 	case RTM_IFINFO2:
1573 		c = "RTM_IFINFO2";
1574 		break;
1575 	case RTM_NEWMADDR2:
1576 		c = "RTM_NEWMADDR2";
1577 		break;
1578 	case RTM_GET2:
1579 		c = "RTM_GET2";
1580 		break;
1581 	case RTM_GET_EXT:
1582 		c = "RTM_GET_EXT";
1583 		break;
1584 	}
1585 
1586 	return c;
1587 }
1588 
1589 /*
1590  * This is used in dumping the kernel table via sysctl().
1591  */
1592 static int
sysctl_dumpentry(struct radix_node * rn,void * vw)1593 sysctl_dumpentry(struct radix_node *rn, void *vw)
1594 {
1595 	struct walkarg *w = vw;
1596 	struct rtentry *rt = (struct rtentry *)rn;
1597 	int error = 0, size;
1598 	struct rt_addrinfo info;
1599 	kauth_cred_t cred;
1600 	kauth_cred_t *credp;
1601 
1602 	cred = kauth_cred_proc_ref(current_proc());
1603 	credp = &cred;
1604 
1605 	RT_LOCK(rt);
1606 	if ((w->w_op == NET_RT_FLAGS || w->w_op == NET_RT_FLAGS_PRIV) &&
1607 	    !(rt->rt_flags & w->w_arg)) {
1608 		goto done;
1609 	}
1610 
1611 	/*
1612 	 * If the matching route has RTF_LLINFO set, then we can skip scrubbing the MAC
1613 	 * only if the outgoing interface is not loopback and the process has entitlement
1614 	 * for neighbor cache read.
1615 	 */
1616 	if (w->w_op == NET_RT_FLAGS_PRIV && (rt->rt_flags & RTF_LLINFO)) {
1617 		if (rt->rt_ifp != lo_ifp &&
1618 		    (route_op_entitlement_check(NULL, cred, ROUTE_OP_READ, TRUE) == 0)) {
1619 			credp = NULL;
1620 		}
1621 	}
1622 
1623 	bzero((caddr_t)&info, sizeof(info));
1624 	info.rti_info[RTAX_DST] = rt_key(rt);
1625 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1626 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1627 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1628 	if (RT_HAS_IFADDR(rt)) {
1629 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1630 	}
1631 
1632 	if (w->w_op != NET_RT_DUMP2) {
1633 		size = rt_msg2(RTM_GET, &info, NULL, w, credp);
1634 		if (w->w_req != NULL && w->w_tmem != NULL) {
1635 			struct rt_msghdr *rtm =
1636 			    (struct rt_msghdr *)(void *)w->w_tmem;
1637 
1638 			rtm->rtm_flags = rt->rt_flags;
1639 			rtm->rtm_use = rt->rt_use;
1640 			rt_getmetrics(rt, &rtm->rtm_rmx);
1641 			rtm->rtm_index = rt->rt_ifp->if_index;
1642 			rtm->rtm_pid = 0;
1643 			rtm->rtm_seq = 0;
1644 			rtm->rtm_errno = 0;
1645 			rtm->rtm_addrs = info.rti_addrs;
1646 			error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1647 		}
1648 	} else {
1649 		size = rt_msg2(RTM_GET2, &info, NULL, w, credp);
1650 		if (w->w_req != NULL && w->w_tmem != NULL) {
1651 			struct rt_msghdr2 *rtm =
1652 			    (struct rt_msghdr2 *)(void *)w->w_tmem;
1653 
1654 			rtm->rtm_flags = rt->rt_flags;
1655 			rtm->rtm_use = rt->rt_use;
1656 			rt_getmetrics(rt, &rtm->rtm_rmx);
1657 			rtm->rtm_index = rt->rt_ifp->if_index;
1658 			rtm->rtm_refcnt = rt->rt_refcnt;
1659 			if (rt->rt_parent) {
1660 				rtm->rtm_parentflags = rt->rt_parent->rt_flags;
1661 			} else {
1662 				rtm->rtm_parentflags = 0;
1663 			}
1664 			rtm->rtm_reserved = 0;
1665 			rtm->rtm_addrs = info.rti_addrs;
1666 			error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1667 		}
1668 	}
1669 
1670 done:
1671 	RT_UNLOCK(rt);
1672 	kauth_cred_unref(&cred);
1673 	return error;
1674 }
1675 
1676 /*
1677  * This is used for dumping extended information from route entries.
1678  */
1679 static int
sysctl_dumpentry_ext(struct radix_node * rn,void * vw)1680 sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
1681 {
1682 	struct walkarg *w = vw;
1683 	struct rtentry *rt = (struct rtentry *)rn;
1684 	int error = 0, size;
1685 	struct rt_addrinfo info;
1686 	kauth_cred_t cred;
1687 
1688 	cred = kauth_cred_proc_ref(current_proc());
1689 
1690 	RT_LOCK(rt);
1691 	if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) {
1692 		goto done;
1693 	}
1694 	bzero(&info, sizeof(info));
1695 	info.rti_info[RTAX_DST] = rt_key(rt);
1696 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1697 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1698 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1699 
1700 	size = rt_msg2(RTM_GET_EXT, &info, NULL, w, &cred);
1701 	if (w->w_req != NULL && w->w_tmem != NULL) {
1702 		struct rt_msghdr_ext *ertm =
1703 		    (struct rt_msghdr_ext *)(void *)w->w_tmem;
1704 
1705 		ertm->rtm_flags = rt->rt_flags;
1706 		ertm->rtm_use = rt->rt_use;
1707 		rt_getmetrics(rt, &ertm->rtm_rmx);
1708 		ertm->rtm_index = rt->rt_ifp->if_index;
1709 		ertm->rtm_pid = 0;
1710 		ertm->rtm_seq = 0;
1711 		ertm->rtm_errno = 0;
1712 		ertm->rtm_addrs = info.rti_addrs;
1713 		if (rt->rt_llinfo_get_ri == NULL) {
1714 			bzero(&ertm->rtm_ri, sizeof(ertm->rtm_ri));
1715 			ertm->rtm_ri.ri_rssi = IFNET_RSSI_UNKNOWN;
1716 			ertm->rtm_ri.ri_lqm = IFNET_LQM_THRESH_OFF;
1717 			ertm->rtm_ri.ri_npm = IFNET_NPM_THRESH_UNKNOWN;
1718 		} else {
1719 			rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri);
1720 		}
1721 		error = SYSCTL_OUT(w->w_req, (caddr_t)ertm, size);
1722 	}
1723 
1724 done:
1725 	RT_UNLOCK(rt);
1726 	kauth_cred_unref(&cred);
1727 	return error;
1728 }
1729 
1730 /*
1731  * rdar://9307819
1732  * To avoid to call copyout() while holding locks and to cause problems
1733  * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct
1734  * the list in two passes. In the first pass we compute the total
1735  * length of the data we are going to copyout, then we release
1736  * all locks to allocate a temporary buffer that gets filled
1737  * in the second pass.
1738  *
1739  * Note that we are verifying the assumption that kalloc() returns a buffer
1740  * that is at least 32 bits aligned and that the messages and addresses are
1741  * 32 bits aligned.
1742  */
1743 static int
sysctl_iflist(int af,struct walkarg * w)1744 sysctl_iflist(int af, struct walkarg *w)
1745 {
1746 	struct ifnet *ifp;
1747 	struct ifaddr *ifa;
1748 	struct  rt_addrinfo info;
1749 	int     error = 0;
1750 	int     pass = 0;
1751 	size_t  len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1752 	char    *total_buffer = NULL, *cp = NULL;
1753 	kauth_cred_t cred;
1754 
1755 	cred = kauth_cred_proc_ref(current_proc());
1756 
1757 	bzero((caddr_t)&info, sizeof(info));
1758 
1759 	for (pass = 0; pass < 2; pass++) {
1760 		ifnet_head_lock_shared();
1761 
1762 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1763 			if (error) {
1764 				break;
1765 			}
1766 			if (w->w_arg && w->w_arg != ifp->if_index) {
1767 				continue;
1768 			}
1769 			ifnet_lock_shared(ifp);
1770 			/*
1771 			 * Holding ifnet lock here prevents the link address
1772 			 * from changing contents, so no need to hold the ifa
1773 			 * lock.  The link address is always present; it's
1774 			 * never freed.
1775 			 */
1776 			ifa = ifp->if_lladdr;
1777 			info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1778 			len = rt_msg2(RTM_IFINFO, &info, NULL, NULL, &cred);
1779 			if (pass == 0) {
1780 				if (os_add_overflow(total_len, len, &total_len)) {
1781 					ifnet_lock_done(ifp);
1782 					error = ENOBUFS;
1783 					break;
1784 				}
1785 			} else {
1786 				struct if_msghdr *ifm;
1787 
1788 				if (current_len + len > total_len) {
1789 					ifnet_lock_done(ifp);
1790 					error = ENOBUFS;
1791 					break;
1792 				}
1793 				info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1794 				len = rt_msg2(RTM_IFINFO, &info,
1795 				    (caddr_t)cp, NULL, &cred);
1796 				info.rti_info[RTAX_IFP] = NULL;
1797 
1798 				ifm = (struct if_msghdr *)(void *)cp;
1799 				ifm->ifm_index = ifp->if_index;
1800 				ifm->ifm_flags = (u_short)ifp->if_flags;
1801 				if_data_internal_to_if_data(ifp, &ifp->if_data,
1802 				    &ifm->ifm_data);
1803 				ifm->ifm_addrs = info.rti_addrs;
1804 				/*
1805 				 * <rdar://problem/32940901>
1806 				 * Round bytes only for non-platform
1807 				 */
1808 				if (!csproc_get_platform_binary(w->w_req->p)) {
1809 					ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
1810 					ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
1811 				}
1812 
1813 				cp += len;
1814 				VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1815 				current_len += len;
1816 				VERIFY(current_len <= total_len);
1817 			}
1818 			while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
1819 				IFA_LOCK(ifa);
1820 				if (af && af != ifa->ifa_addr->sa_family) {
1821 					IFA_UNLOCK(ifa);
1822 					continue;
1823 				}
1824 				if (ifa->ifa_addr->sa_family == AF_INET6 &&
1825 				    (((struct in6_ifaddr *)ifa)->ia6_flags &
1826 				    IN6_IFF_CLAT46) != 0) {
1827 					IFA_UNLOCK(ifa);
1828 					continue;
1829 				}
1830 				info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1831 				info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1832 				info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1833 				len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
1834 				    &cred);
1835 				if (pass == 0) {
1836 					if (os_add_overflow(total_len, len, &total_len)) {
1837 						IFA_UNLOCK(ifa);
1838 						error = ENOBUFS;
1839 						break;
1840 					}
1841 				} else {
1842 					struct ifa_msghdr *ifam;
1843 
1844 					if (current_len + len > total_len) {
1845 						IFA_UNLOCK(ifa);
1846 						error = ENOBUFS;
1847 						break;
1848 					}
1849 					len = rt_msg2(RTM_NEWADDR, &info,
1850 					    (caddr_t)cp, NULL, &cred);
1851 
1852 					ifam = (struct ifa_msghdr *)(void *)cp;
1853 					ifam->ifam_index =
1854 					    ifa->ifa_ifp->if_index;
1855 					ifam->ifam_flags = ifa->ifa_flags;
1856 					ifam->ifam_metric = ifa->ifa_metric;
1857 					ifam->ifam_addrs = info.rti_addrs;
1858 
1859 					cp += len;
1860 					VERIFY(IS_P2ALIGNED(cp,
1861 					    sizeof(u_int32_t)));
1862 					current_len += len;
1863 					VERIFY(current_len <= total_len);
1864 				}
1865 				IFA_UNLOCK(ifa);
1866 			}
1867 			ifnet_lock_done(ifp);
1868 			info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1869 			    info.rti_info[RTAX_BRD] = NULL;
1870 		}
1871 
1872 		ifnet_head_done();
1873 
1874 		if (error != 0) {
1875 			if (error == ENOBUFS) {
1876 				printf("%s: current_len (%lu) + len (%lu) > "
1877 				    "total_len (%lu)\n", __func__, current_len,
1878 				    len, total_len);
1879 			}
1880 			break;
1881 		}
1882 
1883 		if (pass == 0) {
1884 			/* Better to return zero length buffer than ENOBUFS */
1885 			if (total_len == 0) {
1886 				total_len = 1;
1887 			}
1888 			total_len += total_len >> 3;
1889 			total_buffer_len = total_len;
1890 			total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
1891 			if (total_buffer == NULL) {
1892 				printf("%s: kalloc_data(%lu) failed\n", __func__,
1893 				    total_len);
1894 				error = ENOBUFS;
1895 				break;
1896 			}
1897 			cp = total_buffer;
1898 			VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1899 		} else {
1900 			error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
1901 			if (error) {
1902 				break;
1903 			}
1904 		}
1905 	}
1906 
1907 	if (total_buffer != NULL) {
1908 		kfree_data(total_buffer, total_buffer_len);
1909 	}
1910 
1911 	kauth_cred_unref(&cred);
1912 	return error;
1913 }
1914 
1915 static int
sysctl_iflist2(int af,struct walkarg * w)1916 sysctl_iflist2(int af, struct walkarg *w)
1917 {
1918 	struct ifnet *ifp;
1919 	struct ifaddr *ifa;
1920 	struct  rt_addrinfo info;
1921 	int     error = 0;
1922 	int     pass = 0;
1923 	size_t  len = 0, total_len = 0, total_buffer_len = 0, current_len = 0;
1924 	char    *total_buffer = NULL, *cp = NULL;
1925 	kauth_cred_t cred;
1926 
1927 	cred = kauth_cred_proc_ref(current_proc());
1928 
1929 	bzero((caddr_t)&info, sizeof(info));
1930 
1931 	for (pass = 0; pass < 2; pass++) {
1932 		struct ifmultiaddr *ifma;
1933 
1934 		ifnet_head_lock_shared();
1935 
1936 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1937 			if (error) {
1938 				break;
1939 			}
1940 			if (w->w_arg && w->w_arg != ifp->if_index) {
1941 				continue;
1942 			}
1943 			ifnet_lock_shared(ifp);
1944 			/*
1945 			 * Holding ifnet lock here prevents the link address
1946 			 * from changing contents, so no need to hold the ifa
1947 			 * lock.  The link address is always present; it's
1948 			 * never freed.
1949 			 */
1950 			ifa = ifp->if_lladdr;
1951 			info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1952 			len = rt_msg2(RTM_IFINFO2, &info, NULL, NULL, &cred);
1953 			if (pass == 0) {
1954 				if (os_add_overflow(total_len, len, &total_len)) {
1955 					ifnet_lock_done(ifp);
1956 					error = ENOBUFS;
1957 					break;
1958 				}
1959 			} else {
1960 				struct if_msghdr2 *ifm;
1961 
1962 				if (current_len + len > total_len) {
1963 					ifnet_lock_done(ifp);
1964 					error = ENOBUFS;
1965 					break;
1966 				}
1967 				info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1968 				len = rt_msg2(RTM_IFINFO2, &info,
1969 				    (caddr_t)cp, NULL, &cred);
1970 				info.rti_info[RTAX_IFP] = NULL;
1971 
1972 				ifm = (struct if_msghdr2 *)(void *)cp;
1973 				ifm->ifm_addrs = info.rti_addrs;
1974 				ifm->ifm_flags = (u_short)ifp->if_flags;
1975 				ifm->ifm_index = ifp->if_index;
1976 				ifm->ifm_snd_len = IFCQ_LEN(ifp->if_snd);
1977 				ifm->ifm_snd_maxlen = IFCQ_MAXLEN(ifp->if_snd);
1978 				ifm->ifm_snd_drops =
1979 				    (int)ifp->if_snd->ifcq_dropcnt.packets;
1980 				ifm->ifm_timer = ifp->if_timer;
1981 				if_data_internal_to_if_data64(ifp,
1982 				    &ifp->if_data, &ifm->ifm_data);
1983 				/*
1984 				 * <rdar://problem/32940901>
1985 				 * Round bytes only for non-platform
1986 				 */
1987 				if (!csproc_get_platform_binary(w->w_req->p)) {
1988 					ALIGN_BYTES(ifm->ifm_data.ifi_ibytes);
1989 					ALIGN_BYTES(ifm->ifm_data.ifi_obytes);
1990 				}
1991 
1992 				cp += len;
1993 				VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
1994 				current_len += len;
1995 				VERIFY(current_len <= total_len);
1996 			}
1997 			while ((ifa = ifa->ifa_link.tqe_next) != NULL) {
1998 				IFA_LOCK(ifa);
1999 				if (af && af != ifa->ifa_addr->sa_family) {
2000 					IFA_UNLOCK(ifa);
2001 					continue;
2002 				}
2003 				if (ifa->ifa_addr->sa_family == AF_INET6 &&
2004 				    (((struct in6_ifaddr *)ifa)->ia6_flags &
2005 				    IN6_IFF_CLAT46) != 0) {
2006 					IFA_UNLOCK(ifa);
2007 					continue;
2008 				}
2009 
2010 				info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2011 				info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2012 				info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2013 				len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL,
2014 				    &cred);
2015 				if (pass == 0) {
2016 					if (os_add_overflow(total_len, len, &total_len)) {
2017 						IFA_UNLOCK(ifa);
2018 						error = ENOBUFS;
2019 						break;
2020 					}
2021 				} else {
2022 					struct ifa_msghdr *ifam;
2023 
2024 					if (current_len + len > total_len) {
2025 						IFA_UNLOCK(ifa);
2026 						error = ENOBUFS;
2027 						break;
2028 					}
2029 					len = rt_msg2(RTM_NEWADDR, &info,
2030 					    (caddr_t)cp, NULL, &cred);
2031 
2032 					ifam = (struct ifa_msghdr *)(void *)cp;
2033 					ifam->ifam_index =
2034 					    ifa->ifa_ifp->if_index;
2035 					ifam->ifam_flags = ifa->ifa_flags;
2036 					ifam->ifam_metric = ifa->ifa_metric;
2037 					ifam->ifam_addrs = info.rti_addrs;
2038 
2039 					cp += len;
2040 					VERIFY(IS_P2ALIGNED(cp,
2041 					    sizeof(u_int32_t)));
2042 					current_len += len;
2043 					VERIFY(current_len <= total_len);
2044 				}
2045 				IFA_UNLOCK(ifa);
2046 			}
2047 			if (error) {
2048 				ifnet_lock_done(ifp);
2049 				break;
2050 			}
2051 
2052 			for (ifma = LIST_FIRST(&ifp->if_multiaddrs);
2053 			    ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) {
2054 				struct ifaddr *ifa0;
2055 
2056 				IFMA_LOCK(ifma);
2057 				if (af && af != ifma->ifma_addr->sa_family) {
2058 					IFMA_UNLOCK(ifma);
2059 					continue;
2060 				}
2061 				bzero((caddr_t)&info, sizeof(info));
2062 				info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2063 				/*
2064 				 * Holding ifnet lock here prevents the link
2065 				 * address from changing contents, so no need
2066 				 * to hold the ifa0 lock.  The link address is
2067 				 * always present; it's never freed.
2068 				 */
2069 				ifa0 = ifp->if_lladdr;
2070 				info.rti_info[RTAX_IFP] = ifa0->ifa_addr;
2071 				if (ifma->ifma_ll != NULL) {
2072 					info.rti_info[RTAX_GATEWAY] =
2073 					    ifma->ifma_ll->ifma_addr;
2074 				}
2075 				len = rt_msg2(RTM_NEWMADDR2, &info, NULL, NULL,
2076 				    &cred);
2077 				if (pass == 0) {
2078 					total_len += len;
2079 				} else {
2080 					struct ifma_msghdr2 *ifmam;
2081 
2082 					if (current_len + len > total_len) {
2083 						IFMA_UNLOCK(ifma);
2084 						error = ENOBUFS;
2085 						break;
2086 					}
2087 					len = rt_msg2(RTM_NEWMADDR2, &info,
2088 					    (caddr_t)cp, NULL, &cred);
2089 
2090 					ifmam =
2091 					    (struct ifma_msghdr2 *)(void *)cp;
2092 					ifmam->ifmam_addrs = info.rti_addrs;
2093 					ifmam->ifmam_flags = 0;
2094 					ifmam->ifmam_index =
2095 					    ifma->ifma_ifp->if_index;
2096 					ifmam->ifmam_refcount =
2097 					    ifma->ifma_reqcnt;
2098 
2099 					cp += len;
2100 					VERIFY(IS_P2ALIGNED(cp,
2101 					    sizeof(u_int32_t)));
2102 					current_len += len;
2103 				}
2104 				IFMA_UNLOCK(ifma);
2105 			}
2106 			ifnet_lock_done(ifp);
2107 			info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2108 			    info.rti_info[RTAX_BRD] = NULL;
2109 		}
2110 		ifnet_head_done();
2111 
2112 		if (error) {
2113 			if (error == ENOBUFS) {
2114 				printf("%s: current_len (%lu) + len (%lu) > "
2115 				    "total_len (%lu)\n", __func__, current_len,
2116 				    len, total_len);
2117 			}
2118 			break;
2119 		}
2120 
2121 		if (pass == 0) {
2122 			/* Better to return zero length buffer than ENOBUFS */
2123 			if (total_len == 0) {
2124 				total_len = 1;
2125 			}
2126 			total_len += total_len >> 3;
2127 			total_buffer_len = total_len;
2128 			total_buffer = (char *) kalloc_data(total_len, Z_ZERO | Z_WAITOK);
2129 			if (total_buffer == NULL) {
2130 				printf("%s: kalloc_data(%lu) failed\n", __func__,
2131 				    total_len);
2132 				error = ENOBUFS;
2133 				break;
2134 			}
2135 			cp = total_buffer;
2136 			VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
2137 		} else {
2138 			error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
2139 			if (error) {
2140 				break;
2141 			}
2142 		}
2143 	}
2144 
2145 	if (total_buffer != NULL) {
2146 		kfree_data(total_buffer, total_buffer_len);
2147 	}
2148 
2149 	kauth_cred_unref(&cred);
2150 	return error;
2151 }
2152 
2153 
2154 static int
sysctl_rtstat(struct sysctl_req * req)2155 sysctl_rtstat(struct sysctl_req *req)
2156 {
2157 	return SYSCTL_OUT(req, &rtstat, sizeof(struct rtstat));
2158 }
2159 
2160 static int
sysctl_rttrash(struct sysctl_req * req)2161 sysctl_rttrash(struct sysctl_req *req)
2162 {
2163 	return SYSCTL_OUT(req, &rttrash, sizeof(rttrash));
2164 }
2165 
2166 static int
2167 sysctl_rtsock SYSCTL_HANDLER_ARGS
2168 {
2169 #pragma unused(oidp)
2170 	int     *name = (int *)arg1;
2171 	u_int   namelen = arg2;
2172 	struct radix_node_head *rnh;
2173 	int     i, error = EINVAL;
2174 	u_char  af;
2175 	struct  walkarg w;
2176 
2177 	name++;
2178 	namelen--;
2179 	if (req->newptr) {
2180 		return EPERM;
2181 	}
2182 	if (namelen != 3) {
2183 		return EINVAL;
2184 	}
2185 	af = (u_char)name[0];
2186 	Bzero(&w, sizeof(w));
2187 	w.w_op = name[1];
2188 	w.w_arg = name[2];
2189 	w.w_req = req;
2190 
2191 	switch (w.w_op) {
2192 	case NET_RT_DUMP:
2193 	case NET_RT_DUMP2:
2194 	case NET_RT_FLAGS:
2195 	case NET_RT_FLAGS_PRIV:
2196 		lck_mtx_lock(rnh_lock);
2197 		for (i = 1; i <= AF_MAX; i++) {
2198 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2199 			    (error = rnh->rnh_walktree(rnh,
2200 			    sysctl_dumpentry, &w))) {
2201 				break;
2202 			}
2203 		}
2204 		lck_mtx_unlock(rnh_lock);
2205 		break;
2206 	case NET_RT_DUMPX:
2207 	case NET_RT_DUMPX_FLAGS:
2208 		lck_mtx_lock(rnh_lock);
2209 		for (i = 1; i <= AF_MAX; i++) {
2210 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
2211 			    (error = rnh->rnh_walktree(rnh,
2212 			    sysctl_dumpentry_ext, &w))) {
2213 				break;
2214 			}
2215 		}
2216 		lck_mtx_unlock(rnh_lock);
2217 		break;
2218 	case NET_RT_IFLIST:
2219 		error = sysctl_iflist(af, &w);
2220 		break;
2221 	case NET_RT_IFLIST2:
2222 		error = sysctl_iflist2(af, &w);
2223 		break;
2224 	case NET_RT_STAT:
2225 		error = sysctl_rtstat(req);
2226 		break;
2227 	case NET_RT_TRASH:
2228 		error = sysctl_rttrash(req);
2229 		break;
2230 	}
2231 	if (w.w_tmem != NULL) {
2232 		kfree_data(w.w_tmem, w.w_tmemsize);
2233 	}
2234 	return error;
2235 }
2236 
2237 /*
2238  * Definitions of protocols supported in the ROUTE domain.
2239  */
2240 static struct protosw routesw[] = {
2241 	{
2242 		.pr_type =              SOCK_RAW,
2243 		.pr_protocol =          0,
2244 		.pr_flags =             PR_ATOMIC | PR_ADDR,
2245 		.pr_output =            route_output,
2246 		.pr_ctlinput =          raw_ctlinput,
2247 		.pr_usrreqs =           &route_usrreqs,
2248 	}
2249 };
2250 
2251 static int route_proto_count = (sizeof(routesw) / sizeof(struct protosw));
2252 
2253 struct domain routedomain_s = {
2254 	.dom_family =           PF_ROUTE,
2255 	.dom_name =             "route",
2256 	.dom_init =             route_dinit,
2257 };
2258 
2259 static void
route_dinit(struct domain * dp)2260 route_dinit(struct domain *dp)
2261 {
2262 	struct protosw *pr;
2263 	int i;
2264 
2265 	VERIFY(!(dp->dom_flags & DOM_INITIALIZED));
2266 	VERIFY(routedomain == NULL);
2267 
2268 	routedomain = dp;
2269 
2270 	for (i = 0, pr = &routesw[0]; i < route_proto_count; i++, pr++) {
2271 		net_add_proto(pr, dp, 1);
2272 	}
2273 
2274 	route_init();
2275 }
2276